From ede11e339f7bee3d47f70fa9e59ec0b3e0985414 Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:40:37 +0000 Subject: [PATCH 01/11] release: nys summit --- .github/workflows/agent-restricted.yml | 2 +- .github/workflows/build-and-test.yml | 4 +- .github/workflows/ci-failure-issue.yml | 2 +- .github/workflows/cleanup-pr-tarballs.yml | 2 +- .github/workflows/codeql.yml | 4 +- .github/workflows/e2e-tests-full.yml | 35 +- .github/workflows/e2e-tests.yml | 106 +- .github/workflows/lint.yml | 17 +- .github/workflows/pr-security-review.yml | 388 ++++-- .github/workflows/pr-size.yml | 2 +- .github/workflows/pr-tarball.yml | 7 +- .github/workflows/pr-title.yml | 10 +- .github/workflows/prerelease-tarball.yml | 84 -- .../workflows/release-main-and-preview.yml | 403 ++++--- .github/workflows/release.yml | 79 +- .../workflows/slack-issue-notification.yml | 53 + .../workflows/slack-open-prs-notification.yml | 2 +- .github/workflows/strands-command.yml | 2 +- .github/workflows/sync-from-public.yml | 110 -- .github/workflows/sync-preview.yml | 191 +++ README.md | 48 +- docs/ab-tests.md | 154 +++ docs/batch-evaluation.md | 45 +- docs/commands.md | 177 +-- docs/config-bundles.md | 14 +- docs/connector-config-templates/README.md | 71 ++ .../confluence.json | 16 + .../google-drive.json | 12 + docs/connector-config-templates/onedrive.json | 16 + .../sharepoint.json | 14 + .../web-crawler.json | 16 + docs/knowledge-bases.md | 295 +++++ docs/recommendations.md | 30 +- e2e-tests/README.md | 1 - e2e-tests/ab-test-config-bundle.test.ts | 209 ---- e2e-tests/ab-test-target-based.test.ts | 317 ----- e2e-tests/archive-lifecycle.test.ts | 28 +- e2e-tests/config-bundle-eval-rec.test.ts | 22 +- e2e-tests/global-setup.ts | 19 + e2e-tests/guardrail-block.test.ts | 233 ++++ e2e-tests/utils/recommendation-cleanup.ts | 62 + .../add-remove-ab-test-target-based.test.ts | 461 -------- integ-tests/add-remove-ab-test.test.ts | 183 --- integ-tests/add-remove-config-bundle.test.ts | 4 +- integ-tests/add-remove-gateway.test.ts | 24 +- .../add-remove-online-insights.test.ts | 329 ++++++ integ-tests/promote-ab-test.test.ts | 161 +++ integ-tests/recommendation.test.ts | 151 --- integ-tests/run-ab-test.test.ts | 116 ++ integ-tests/run-insights.test.ts | 207 ++++ .../run-recommendation-from-insights.test.ts | 72 ++ npm-shrinkwrap.json | 517 +++----- package.json | 14 +- schemas/agentcore.schema.v1.json | 201 +--- scripts/run-e2e-local.sh | 11 + .../assets.snapshot.test.ts.snap | 990 ++++++++++++++-- src/assets/agents/AGENTS.md | 14 + src/assets/cdk/bin/cdk.ts | 62 +- src/assets/cdk/lib/cdk-stack.ts | 33 +- src/assets/cdk/test/cdk.test.ts | 3 +- src/assets/container/python/Dockerfile | 4 + .../strands/capabilities/memory/session.py | 5 +- .../strands/capabilities/memory/session.py | 5 +- src/assets/python/http/strands/base/main.py | 465 +++++++- .../http/strands/base/mcp_client/client.py | 44 +- .../python/http/strands/base/model/load.py | 6 +- .../python/http/strands/base/pyproject.toml | 5 +- .../http/strands/base/skills/fetcher.py | 279 +++++ .../hooks/execution_limits.py | 54 + .../strands/capabilities/memory/session.py | 5 +- .../aws/__tests__/agentcore-ab-tests.test.ts | 6 - .../agentcore-batch-evaluation.test.ts | 357 ++++++ .../agentcore-config-bundles.test.ts | 84 ++ .../aws/__tests__/agentcore-harness.test.ts | 141 +-- .../__tests__/agentcore-http-gateways.test.ts | 235 ---- .../agentcore-recommendation.test.ts | 76 ++ src/cli/aws/__tests__/agentcore.test.ts | 18 + src/cli/aws/__tests__/bedrock-agent.test.ts | 262 +++++ src/cli/aws/agentcore-ab-tests.ts | 16 +- src/cli/aws/agentcore-batch-evaluation.ts | 52 +- src/cli/aws/agentcore-config-bundles.ts | 1 + src/cli/aws/agentcore-control.ts | 38 +- src/cli/aws/agentcore-harness.ts | 190 +-- src/cli/aws/agentcore-http-gateways.ts | 512 -------- src/cli/aws/agentcore-payments.ts | 20 +- src/cli/aws/agentcore-recommendation.ts | 14 +- src/cli/aws/agentcore.ts | 12 +- src/cli/aws/bedrock-agent.ts | 182 +++ src/cli/aws/index.ts | 13 +- src/cli/cdk/toolkit-lib/wrapper.ts | 12 +- src/cli/cli.ts | 22 +- .../__tests__/outputs-config-bundles.test.ts | 82 ++ .../__tests__/outputs-extended.test.ts | 54 + .../__tests__/outputs-harness.test.ts | 138 +++ .../cloudformation/__tests__/outputs.test.ts | 75 +- .../__tests__/parse-kb-outputs.test.ts | 69 ++ src/cli/cloudformation/outputs.ts | 321 ++++- src/cli/commands/abtest/command.ts | 199 ---- src/cli/commands/abtest/index.ts | 1 - .../add/__tests__/add-gateway-target.test.ts | 7 +- .../add/__tests__/add-knowledge-base.test.ts | 106 ++ .../add/__tests__/auth-options.test.ts | 160 +++ .../harness-privatelink-guard.test.ts | 191 +++ .../add/__tests__/skill-action.test.ts | 209 ++++ .../commands/add/__tests__/validate.test.ts | 148 +++ src/cli/commands/add/auth-options.ts | 142 ++- src/cli/commands/add/skill-action.ts | 140 +++ src/cli/commands/add/skill-command.ts | 78 ++ src/cli/commands/add/tool-action.ts | 40 + src/cli/commands/add/tool-command.ts | 7 + src/cli/commands/add/types.ts | 44 +- src/cli/commands/add/validate.ts | 350 +++++- .../archive/__tests__/command.test.ts | 347 +----- src/cli/commands/archive/command.tsx | 106 +- .../commands/batch-evaluations/command.tsx | 57 + src/cli/commands/batch-evaluations/index.ts | 1 + src/cli/commands/config-bundle/command.tsx | 37 +- .../commands/create/__tests__/create.test.ts | 38 + .../create/__tests__/harness-validate.test.ts | 24 + .../create/__tests__/validate.test.ts | 26 + src/cli/commands/create/command.tsx | 41 +- src/cli/commands/create/harness-action.ts | 4 + src/cli/commands/create/harness-validate.ts | 18 +- src/cli/commands/create/types.ts | 2 + src/cli/commands/create/validate.ts | 13 +- .../__tests__/harness-version-drift.test.ts | 35 + src/cli/commands/deploy/actions.ts | 410 +++---- src/cli/commands/deploy/command.tsx | 14 + src/cli/commands/dev/command.tsx | 2 +- .../commands/exec/__tests__/command.test.ts | 61 + src/cli/commands/exec/command.tsx | 12 +- .../export/__tests__/harness-action.test.ts | 42 + .../export/__tests__/harness-mapper.test.ts | 1038 +++++++++++++++++ src/cli/commands/export/constants.ts | 19 + src/cli/commands/export/harness-action.ts | 384 ++++++ src/cli/commands/export/harness-mapper.ts | 956 +++++++++++++++ src/cli/commands/export/harness-resolver.ts | 95 ++ src/cli/commands/export/index.ts | 83 ++ src/cli/commands/export/types.ts | 78 ++ .../__tests__/import-gateway-flow.test.ts | 15 + .../__tests__/import-gateway-spec.test.ts | 47 +- .../__tests__/import-gateway-targets.test.ts | 362 ------ .../import/__tests__/import-gateway.test.ts | 42 +- .../import/__tests__/import-no-deploy.test.ts | 14 + .../__tests__/import-online-eval.test.ts | 4 +- .../__tests__/import-runtime-handler.test.ts | 1 + .../import/__tests__/jwt-authorizer.test.ts | 5 +- src/cli/commands/import/constants.ts | 2 + src/cli/commands/import/import-gateway.ts | 214 ++-- .../invoke/__tests__/action-gateway.test.ts | 98 ++ .../__tests__/build-harness-base-opts.test.ts | 56 + .../commands/invoke/__tests__/command.test.ts | 25 + src/cli/commands/invoke/action.ts | 224 +++- src/cli/commands/invoke/command.tsx | 74 +- src/cli/commands/invoke/types.ts | 12 +- .../commands/logs/__tests__/action.test.ts | 8 +- .../commands/pause/__tests__/promote.test.ts | 59 - src/cli/commands/pause/command.tsx | 283 ++--- src/cli/commands/pause/index.ts | 2 +- src/cli/commands/pause/promote-utils.ts | 28 - src/cli/commands/promote/command.tsx | 34 + src/cli/commands/promote/index.ts | 1 + src/cli/commands/recommendations/command.tsx | 78 +- .../__tests__/remove-gateway-target.test.ts | 7 +- .../remove/__tests__/remove-gateway.test.ts | 8 +- .../remove/__tests__/skill-command.test.ts | 147 +++ src/cli/commands/remove/command.tsx | 2 +- src/cli/commands/remove/skill-command.ts | 138 +++ src/cli/commands/remove/types.ts | 3 +- src/cli/commands/run/command.tsx | 843 +++++++++---- .../commands/status/__tests__/action.test.ts | 451 ++++++- .../__tests__/format-knowledge-base.test.ts | 117 ++ src/cli/commands/status/action.ts | 254 +++- src/cli/commands/status/command.tsx | 47 +- .../commands/status/format-knowledge-base.ts | 94 ++ src/cli/commands/stop/command.tsx | 67 +- src/cli/commands/stop/index.ts | 2 +- src/cli/commands/validate/action.ts | 12 + src/cli/commands/view/JobDetailScreen.tsx | 109 ++ src/cli/commands/view/command.tsx | 129 ++ src/cli/commands/view/index.ts | 1 + src/cli/constants.ts | 9 +- .../__tests__/checks-extended.test.ts | 20 +- src/cli/feature-flags.ts | 2 + src/cli/logging/remove-logger.ts | 3 +- .../ab-test/__tests__/promote.test.ts | 270 ----- src/cli/operations/ab-test/promote.ts | 124 -- .../agent/generate/write-agent-to-project.ts | 2 +- .../archive/__tests__/archive-storage.test.ts | 130 --- src/cli/operations/archive/archive-storage.ts | 43 - src/cli/operations/archive/index.ts | 1 - .../__tests__/managed-memory-notice.test.ts | 78 ++ .../__tests__/post-deploy-ab-tests.test.ts | 660 ----------- .../post-deploy-config-bundles.test.ts | 654 ----------- .../post-deploy-http-gateways.test.ts | 471 -------- .../post-deploy-knowledge-bases.test.ts | 165 +++ .../deploy/__tests__/preflight.test.ts | 57 +- .../__tests__/harness-deployer.test.ts | 466 -------- .../__tests__/harness-mapper.test.ts | 753 ------------ .../imperative/deployers/harness-deployer.ts | 391 ------- .../imperative/deployers/harness-mapper.ts | 433 ------- .../deploy/imperative/deployers/index.ts | 2 - src/cli/operations/deploy/imperative/index.ts | 18 - .../operations/deploy/imperative/manager.ts | 110 -- src/cli/operations/deploy/imperative/types.ts | 32 - src/cli/operations/deploy/index.ts | 19 +- .../deploy/managed-memory-notice.ts | 47 + .../operations/deploy/post-deploy-ab-tests.ts | 721 ------------ .../deploy/post-deploy-config-bundles.ts | 348 ------ .../deploy/post-deploy-http-gateways.ts | 652 ----------- .../deploy/post-deploy-knowledge-bases.ts | 134 +++ src/cli/operations/deploy/preflight.ts | 93 +- src/cli/operations/deploy/teardown.ts | 61 +- .../operations/dev/__tests__/config.test.ts | 42 +- .../dev/__tests__/sse-transform.test.ts | 111 ++ src/cli/operations/dev/invoke.ts | 14 +- src/cli/operations/dev/sse-transform.ts | 45 + .../__tests__/resolve-ui-dist-dir.test.ts | 3 +- src/cli/operations/dev/web-ui/api-types.ts | 20 +- .../dev/web-ui/handlers/invocations.ts | 12 +- .../dev/web-ui/handlers/resources.ts | 27 + src/cli/operations/eval/batch-eval-storage.ts | 91 -- .../operations/eval/run-batch-evaluation.ts | 428 ------- .../__tests__/fetch-gateway-token.test.ts | 1 + .../fetch-access/fetch-gateway-token.ts | 3 +- .../operations/fetch-access/list-gateways.ts | 12 +- .../harness/__tests__/orphan.test.ts | 92 ++ src/cli/operations/harness/orphan.ts | 76 ++ src/cli/operations/harness/skill-utils.ts | 37 + .../operations/ingest/__tests__/index.test.ts | 296 +++++ src/cli/operations/ingest/index.ts | 215 ++++ src/cli/operations/insights/index.ts | 10 + .../operations/insights/insights-storage.ts | 58 + src/cli/operations/insights/run-insights.ts | 231 ++++ src/cli/operations/insights/types.ts | 46 + .../__tests__/resolve-agent-context.test.ts | 1 + .../ab-test/__tests__/build-options.test.ts | 190 +++ .../jobs/ab-test/__tests__/format.test.ts | 47 + .../jobs/ab-test/__tests__/promote.test.ts | 434 +++++++ .../operations/jobs/ab-test/build-options.ts | 158 +++ src/cli/operations/jobs/ab-test/format.ts | 85 ++ src/cli/operations/jobs/ab-test/handler.ts | 466 ++++++++ src/cli/operations/jobs/ab-test/promote.ts | 207 ++++ src/cli/operations/jobs/ab-test/resolve.ts | 246 ++++ .../jobs/batch-evaluation/build-source.ts | 73 ++ .../jobs/batch-evaluation/dataset-phase1.ts | 109 ++ .../jobs/batch-evaluation/format.ts | 49 + .../jobs/batch-evaluation/handler.ts | 229 ++++ src/cli/operations/jobs/index.ts | 46 + .../jobs/insights/__tests__/handler.test.ts | 24 + src/cli/operations/jobs/insights/format.ts | 75 ++ src/cli/operations/jobs/insights/handler.ts | 212 ++++ .../__tests__/apply-to-bundle.test.ts | 6 +- .../__tests__/auto-name.test.ts | 39 + .../__tests__/fetch-session-spans.test.ts | 2 +- .../__tests__/input-validation.test.ts | 155 +++ .../recommendation/__tests__/refresh.test.ts | 102 ++ .../recommendation/apply-to-bundle.ts | 8 +- .../jobs/recommendation/build-config.ts | 310 +++++ .../recommendation/fetch-session-spans.ts | 4 +- .../operations/jobs/recommendation/format.ts | 63 + .../operations/jobs/recommendation/handler.ts | 336 ++++++ .../jobs/shared/__tests__/constants.test.ts | 93 ++ .../jobs/shared/__tests__/engine.test.ts | 248 ++++ .../jobs/shared/__tests__/region.test.ts | 41 + .../jobs/shared/__tests__/storage.test.ts | 102 ++ src/cli/operations/jobs/shared/constants.ts | 99 ++ src/cli/operations/jobs/shared/engine.ts | 231 ++++ src/cli/operations/jobs/shared/format.ts | 13 + src/cli/operations/jobs/shared/region.ts | 36 + .../jobs/shared/resolve-agent-state.ts | 21 + src/cli/operations/jobs/shared/storage.ts | 114 ++ src/cli/operations/jobs/shared/types.ts | 456 ++++++++ src/cli/operations/jobs/shared/wait.ts | 38 + .../__tests__/agentic-retrieve-upsert.test.ts | 59 + .../__tests__/connector-config.test.ts | 119 ++ .../__tests__/hydrate-data-sources.test.ts | 110 ++ .../__tests__/templates.test.ts | 18 + .../knowledge-base/agentic-retrieve-upsert.ts | 42 + .../knowledge-base/connector-config.ts | 116 ++ .../knowledge-base/hydrate-data-sources.ts | 79 ++ .../mcp/__tests__/create-mcp-utils.test.ts | 4 + .../__tests__/recommendation-storage.test.ts | 136 --- .../__tests__/run-recommendation.test.ts | 720 ------------ .../operations/recommendation/constants.ts | 11 - src/cli/operations/recommendation/index.ts | 18 - .../recommendation/recommendation-storage.ts | 84 -- .../recommendation/run-recommendation.ts | 623 ---------- src/cli/operations/recommendation/types.ts | 65 -- .../remove/__tests__/remove-agent-ops.test.ts | 1 + .../__tests__/remove-identity-ops.test.ts | 1 + src/cli/primitives/ABTestPrimitive.ts | 732 ------------ src/cli/primitives/AgentPrimitive.tsx | 5 +- src/cli/primitives/ConfigBundlePrimitive.ts | 13 +- src/cli/primitives/EvaluatorPrimitive.ts | 4 +- src/cli/primitives/GatewayPrimitive.ts | 64 +- src/cli/primitives/GatewayTargetPrimitive.ts | 755 +++++++++++- src/cli/primitives/HarnessPrimitive.ts | 848 +++++++++++++- src/cli/primitives/KnowledgeBasePrimitive.ts | 688 +++++++++++ .../primitives/OnlineEvalConfigPrimitive.ts | 55 +- src/cli/primitives/OnlineInsightsPrimitive.ts | 270 +++++ .../primitives/PaymentConnectorPrimitive.ts | 4 +- src/cli/primitives/PaymentManagerPrimitive.ts | 4 +- src/cli/primitives/PolicyEnginePrimitive.ts | 38 +- src/cli/primitives/PolicyPrimitive.ts | 165 ++- .../__tests__/ABTestPrimitive.test.ts | 289 ----- .../__tests__/GatewayPrimitive.test.ts | 2 +- .../__tests__/GatewayTargetPrimitive.test.ts | 412 +++++++ .../__tests__/HarnessPrimitive.remove.test.ts | 271 +++++ .../__tests__/KnowledgeBasePrimitive.test.ts | 743 ++++++++++++ .../__tests__/OnlineInsightsPrimitive.test.ts | 36 + .../PaymentConnectorPrimitive.test.ts | 2 + .../__tests__/PaymentManagerPrimitive.test.ts | 2 + .../__tests__/PolicyPrimitive.test.ts | 111 ++ .../primitives/__tests__/auth-utils.test.ts | 2 +- .../__tests__/wirePaymentCapability.test.ts | 2 + src/cli/primitives/auth-utils.ts | 15 +- src/cli/primitives/constants.ts | 6 + src/cli/primitives/index.ts | 2 - src/cli/primitives/registry.ts | 9 +- src/cli/project.ts | 2 +- src/cli/telemetry/schemas/command-run.ts | 61 +- src/cli/telemetry/schemas/common-shapes.ts | 27 +- src/cli/templates/BaseRenderer.ts | 13 +- .../templates/__tests__/BaseRenderer.test.ts | 2 +- src/cli/templates/render.ts | 28 + src/cli/templates/types.ts | 57 + src/cli/tui/App.tsx | 112 +- .../__tests__/app-command-coverage.test.ts | 54 + src/cli/tui/components/DeployStatus.tsx | 10 +- src/cli/tui/components/ResourceGraph.tsx | 58 +- src/cli/tui/components/TextInput.tsx | 6 +- .../__tests__/DeployStatus.test.tsx | 15 + .../__tests__/ResourceGraph.test.tsx | 1 + .../components/__tests__/TextInput.test.tsx | 8 +- .../jwt-config/DomainOverridesManager.tsx | 219 ++++ .../components/jwt-config/JwtConfigInput.tsx | 167 ++- src/cli/tui/components/jwt-config/index.ts | 1 + src/cli/tui/components/jwt-config/types.ts | 46 + .../components/jwt-config/useJwtConfigFlow.ts | 160 ++- src/cli/tui/copy.ts | 59 +- .../tui/hooks/__tests__/useDevDeploy.test.tsx | 38 +- .../__tests__/usePanelNavigation.test.tsx | 2 +- .../tui/hooks/__tests__/useRemove.test.tsx | 9 + src/cli/tui/hooks/index.ts | 8 +- src/cli/tui/hooks/useCreateABTest.ts | 93 -- src/cli/tui/hooks/useCreateMcp.ts | 78 ++ src/cli/tui/hooks/useCreateOnlineEval.ts | 4 + src/cli/tui/hooks/useDevServer.ts | 2 +- src/cli/tui/hooks/useRemove.ts | 41 +- src/cli/tui/render.ts | 9 +- .../screens/ab-test/ABTestDetailScreen.tsx | 637 ---------- .../screens/ab-test/ABTestPickerScreen.tsx | 90 -- src/cli/tui/screens/ab-test/AddABTestFlow.tsx | 281 ----- .../tui/screens/ab-test/AddABTestScreen.tsx | 914 --------------- .../screens/ab-test/RemoveABTestScreen.tsx | 26 - .../ab-test/TargetBasedABTestScreen.tsx | 712 ----------- .../tui/screens/ab-test/VariantConfigForm.tsx | 268 ----- .../__tests__/useAddABTestWizard.test.tsx | 286 ----- .../__tests__/useTargetBasedWizard.test.tsx | 319 ----- src/cli/tui/screens/ab-test/index.ts | 4 - src/cli/tui/screens/ab-test/types.ts | 89 -- .../tui/screens/ab-test/useAddABTestWizard.ts | 324 ----- .../screens/ab-test/useTargetBasedWizard.ts | 188 --- src/cli/tui/screens/add/AddFlow.tsx | 78 +- src/cli/tui/screens/add/AddScreen.tsx | 36 +- .../screens/add/__tests__/AddScreen.test.tsx | 22 +- src/cli/tui/screens/agent/AddAgentScreen.tsx | 5 +- .../ConfigBundleHubScreen.tsx | 14 +- .../config-bundle-hub/useConfigBundleHub.ts | 17 +- .../config-bundle/AddConfigBundleFlow.tsx | 4 +- .../config-bundle/AddConfigBundleScreen.tsx | 24 + .../useAddConfigBundleWizard.test.tsx | 167 +++ .../tui/screens/config-bundle/constants.ts | 15 + src/cli/tui/screens/config-bundle/types.ts | 5 +- .../config-bundle/useAddConfigBundleWizard.ts | 40 +- src/cli/tui/screens/create/CreateScreen.tsx | 2 +- src/cli/tui/screens/create/useCreateFlow.ts | 1 + src/cli/tui/screens/deploy/DeployScreen.tsx | 9 + src/cli/tui/screens/deploy/useDeployFlow.ts | 625 +++++----- src/cli/tui/screens/eval/EvalHubScreen.tsx | 23 +- .../tui/screens/export/ExportHarnessFlow.tsx | 196 ++++ .../screens/export/ExportHarnessScreen.tsx | 137 +++ src/cli/tui/screens/export/index.ts | 1 + src/cli/tui/screens/export/types.ts | 14 + .../screens/export/useExportHarnessWizard.ts | 63 + .../screens/generate/__tests__/types.test.ts | 45 + src/cli/tui/screens/generate/types.ts | 17 +- .../tui/screens/generate/useGenerateWizard.ts | 5 +- .../tui/screens/harness/AddHarnessFlow.tsx | 62 +- .../tui/screens/harness/AddHarnessScreen.tsx | 711 ++++++++++- src/cli/tui/screens/harness/types.ts | 160 ++- .../screens/harness/useAddHarnessWizard.ts | 446 ++++++- src/cli/tui/screens/import/ArnInputScreen.tsx | 3 +- .../insights-jobs/InsightsJobsScreen.tsx | 383 ++++++ src/cli/tui/screens/insights-jobs/index.ts | 1 + .../screens/job-detail/ABTestDetailView.tsx | 235 ++++ .../job-detail/BatchEvalDetailView.tsx | 157 +++ .../job-detail/RecommendationDetailView.tsx | 132 +++ src/cli/tui/screens/job-detail/helpers.ts | 32 + src/cli/tui/screens/job-detail/index.ts | 4 + .../knowledge-base/AddKnowledgeBaseFlow.tsx | 196 ++++ .../knowledge-base/AddKnowledgeBaseScreen.tsx | 506 ++++++++ .../__tests__/AddKnowledgeBaseFlow.test.tsx | 134 +++ .../__tests__/AddKnowledgeBaseScreen.test.tsx | 238 ++++ .../__tests__/groupDataSources.test.ts | 49 + .../__tests__/inline-connector-config.test.ts | 97 ++ .../knowledge-base/groupDataSources.ts | 49 + src/cli/tui/screens/knowledge-base/index.ts | 2 + .../knowledge-base/inline-connector-config.ts | Bin 0 -> 3191 bytes src/cli/tui/screens/knowledge-base/types.ts | 43 + .../logs/__tests__/LogsScreen.test.tsx | 9 + .../tui/screens/mcp/AddGatewayTargetFlow.tsx | 79 +- .../screens/mcp/AddGatewayTargetScreen.tsx | 419 ++++++- .../mcp/__tests__/discriminated-union.test.ts | 31 + .../tui/screens/mcp/__tests__/types.test.ts | 14 +- src/cli/tui/screens/mcp/types.ts | 152 ++- .../screens/mcp/useAddGatewayTargetWizard.ts | 203 +++- .../screens/online-eval/AddOnlineEvalFlow.tsx | 8 - .../online-eval/AddOnlineEvalScreen.tsx | 149 ++- .../online-eval/OnlineEvalDashboard.tsx | 2 +- src/cli/tui/screens/online-eval/types.ts | 10 + .../online-eval/useAddOnlineEvalWizard.ts | 71 +- .../online-insights/AddOnlineInsightsFlow.tsx | 145 +++ .../AddOnlineInsightsScreen.tsx | 216 ++++ src/cli/tui/screens/online-insights/index.ts | 2 + src/cli/tui/screens/online-insights/types.ts | 49 + .../useAddOnlineInsightsWizard.ts | 106 ++ src/cli/tui/screens/policy/AddPolicyFlow.tsx | 84 +- .../tui/screens/policy/AddPolicyScreen.tsx | 346 +++++- .../policy/__tests__/synthesize-cedar.test.ts | 128 ++ .../useAddPolicyWizard.render.test.tsx | 76 ++ .../tui/screens/policy/synthesize-cedar.ts | 58 + src/cli/tui/screens/policy/types.ts | 174 ++- .../tui/screens/policy/useAddPolicyWizard.ts | 135 ++- .../recommendation/RecommendationFlow.tsx | 302 +---- .../RecommendationHistoryScreen.tsx | 194 +-- .../recommendation/RecommendationScreen.tsx | 24 + .../RecommendationsHubScreen.tsx | 6 +- src/cli/tui/screens/recommendation/types.ts | 10 +- .../recommendation/useRecommendationWizard.ts | 17 +- src/cli/tui/screens/remove/RemoveFlow.tsx | 274 +++-- .../remove/RemoveKnowledgeBaseScreen.tsx | 26 + src/cli/tui/screens/remove/RemoveScreen.tsx | 38 +- .../remove/__tests__/RemoveScreen.test.tsx | 60 +- src/cli/tui/screens/remove/index.ts | 1 + .../run-ab-test/ABTestJobsHistoryScreen.tsx | 196 ++++ .../tui/screens/run-ab-test/RunABTestFlow.tsx | 805 +++++++++++++ src/cli/tui/screens/run-ab-test/index.ts | 2 + src/cli/tui/screens/run-ab-test/types.ts | 37 + .../run-eval/BatchEvalHistoryScreen.tsx | 281 ++--- .../tui/screens/run-eval/RunBatchEvalFlow.tsx | 686 ++++++----- .../tui/screens/run-eval/RunIngestFlow.tsx | 669 +++++++++++ src/cli/tui/screens/run-eval/RunScreen.tsx | 31 +- src/cli/tui/screens/run-eval/index.ts | 1 + .../screens/run-insights/RunInsightsFlow.tsx | 148 +++ .../run-insights/RunInsightsScreen.tsx | 221 ++++ src/cli/tui/screens/run-insights/index.ts | 2 + src/cli/tui/screens/run-insights/types.ts | 81 ++ .../run-insights/useRunInsightsWizard.ts | 151 +++ .../tui/screens/view/ViewTypePickerScreen.tsx | 40 + src/cli/tui/screens/view/index.ts | 1 + .../screens/web-search/AddWebSearchFlow.tsx | 88 ++ .../screens/web-search/AddWebSearchScreen.tsx | 158 +++ src/cli/tui/screens/web-search/index.ts | 2 + src/cli/tui/screens/web-search/types.ts | 10 + src/lib/errors/types.ts | 27 + src/lib/schemas/io/config-io.ts | 1 - src/schema/__tests__/constants.test.ts | 51 + src/schema/constants.ts | 28 + src/schema/llm-compacted/agentcore.ts | 36 +- src/schema/llm-compacted/mcp.ts | 31 +- .../__tests__/agentcore-project.test.ts | 222 ++++ .../schemas/__tests__/deployed-state.test.ts | 69 ++ src/schema/schemas/__tests__/mcp-defs.test.ts | 5 +- src/schema/schemas/__tests__/mcp.test.ts | 305 +++++ .../__tests__/online-eval-config.test.ts | 69 ++ .../__tests__/private-endpoint.test.ts | 232 ++++ src/schema/schemas/agentcore-project.ts | 141 ++- src/schema/schemas/auth.ts | 148 ++- src/schema/schemas/deployed-state.ts | 65 +- src/schema/schemas/mcp-defs.ts | 7 +- src/schema/schemas/mcp.ts | 508 +++++++- .../primitives/__tests__/ab-test.test.ts | 7 - .../primitives/__tests__/evaluator.test.ts | 43 + .../primitives/__tests__/harness.test.ts | 502 +++++++- .../primitives/__tests__/http-gateway.test.ts | 82 -- .../__tests__/knowledge-base.test.ts | 203 ++++ src/schema/schemas/primitives/ab-test.ts | 2 - src/schema/schemas/primitives/harness.ts | 350 +++++- src/schema/schemas/primitives/http-gateway.ts | 41 - src/schema/schemas/primitives/index.ts | 13 +- .../schemas/primitives/knowledge-base.ts | 122 ++ .../schemas/primitives/online-eval-config.ts | 74 +- src/schema/schemas/primitives/policy.ts | 8 + 495 files changed, 41701 insertions(+), 22027 deletions(-) delete mode 100644 .github/workflows/prerelease-tarball.yml create mode 100644 .github/workflows/slack-issue-notification.yml delete mode 100644 .github/workflows/sync-from-public.yml create mode 100644 .github/workflows/sync-preview.yml create mode 100644 docs/ab-tests.md create mode 100644 docs/connector-config-templates/README.md create mode 100644 docs/connector-config-templates/confluence.json create mode 100644 docs/connector-config-templates/google-drive.json create mode 100644 docs/connector-config-templates/onedrive.json create mode 100644 docs/connector-config-templates/sharepoint.json create mode 100644 docs/connector-config-templates/web-crawler.json create mode 100644 docs/knowledge-bases.md delete mode 100644 e2e-tests/ab-test-config-bundle.test.ts delete mode 100644 e2e-tests/ab-test-target-based.test.ts create mode 100644 e2e-tests/guardrail-block.test.ts create mode 100644 e2e-tests/utils/recommendation-cleanup.ts delete mode 100644 integ-tests/add-remove-ab-test-target-based.test.ts delete mode 100644 integ-tests/add-remove-ab-test.test.ts create mode 100644 integ-tests/add-remove-online-insights.test.ts create mode 100644 integ-tests/promote-ab-test.test.ts create mode 100644 integ-tests/run-ab-test.test.ts create mode 100644 integ-tests/run-insights.test.ts create mode 100644 integ-tests/run-recommendation-from-insights.test.ts create mode 100644 src/assets/python/http/strands/base/skills/fetcher.py create mode 100644 src/assets/python/http/strands/capabilities/execution-limits/hooks/execution_limits.py create mode 100644 src/cli/aws/__tests__/agentcore-batch-evaluation.test.ts create mode 100644 src/cli/aws/__tests__/agentcore-config-bundles.test.ts delete mode 100644 src/cli/aws/__tests__/agentcore-http-gateways.test.ts create mode 100644 src/cli/aws/__tests__/bedrock-agent.test.ts delete mode 100644 src/cli/aws/agentcore-http-gateways.ts create mode 100644 src/cli/aws/bedrock-agent.ts create mode 100644 src/cli/cloudformation/__tests__/outputs-config-bundles.test.ts create mode 100644 src/cli/cloudformation/__tests__/outputs-harness.test.ts create mode 100644 src/cli/cloudformation/__tests__/parse-kb-outputs.test.ts delete mode 100644 src/cli/commands/abtest/command.ts delete mode 100644 src/cli/commands/abtest/index.ts create mode 100644 src/cli/commands/add/__tests__/add-knowledge-base.test.ts create mode 100644 src/cli/commands/add/__tests__/harness-privatelink-guard.test.ts create mode 100644 src/cli/commands/add/__tests__/skill-action.test.ts create mode 100644 src/cli/commands/add/skill-action.ts create mode 100644 src/cli/commands/add/skill-command.ts create mode 100644 src/cli/commands/batch-evaluations/command.tsx create mode 100644 src/cli/commands/batch-evaluations/index.ts create mode 100644 src/cli/commands/deploy/__tests__/harness-version-drift.test.ts create mode 100644 src/cli/commands/export/__tests__/harness-action.test.ts create mode 100644 src/cli/commands/export/__tests__/harness-mapper.test.ts create mode 100644 src/cli/commands/export/constants.ts create mode 100644 src/cli/commands/export/harness-action.ts create mode 100644 src/cli/commands/export/harness-mapper.ts create mode 100644 src/cli/commands/export/harness-resolver.ts create mode 100644 src/cli/commands/export/index.ts create mode 100644 src/cli/commands/export/types.ts delete mode 100644 src/cli/commands/import/__tests__/import-gateway-targets.test.ts create mode 100644 src/cli/commands/invoke/__tests__/action-gateway.test.ts create mode 100644 src/cli/commands/invoke/__tests__/build-harness-base-opts.test.ts delete mode 100644 src/cli/commands/pause/__tests__/promote.test.ts delete mode 100644 src/cli/commands/pause/promote-utils.ts create mode 100644 src/cli/commands/promote/command.tsx create mode 100644 src/cli/commands/promote/index.ts create mode 100644 src/cli/commands/remove/__tests__/skill-command.test.ts create mode 100644 src/cli/commands/remove/skill-command.ts create mode 100644 src/cli/commands/status/__tests__/format-knowledge-base.test.ts create mode 100644 src/cli/commands/status/format-knowledge-base.ts create mode 100644 src/cli/commands/view/JobDetailScreen.tsx create mode 100644 src/cli/commands/view/command.tsx create mode 100644 src/cli/commands/view/index.ts delete mode 100644 src/cli/operations/ab-test/__tests__/promote.test.ts delete mode 100644 src/cli/operations/ab-test/promote.ts delete mode 100644 src/cli/operations/archive/__tests__/archive-storage.test.ts delete mode 100644 src/cli/operations/archive/archive-storage.ts delete mode 100644 src/cli/operations/archive/index.ts create mode 100644 src/cli/operations/deploy/__tests__/managed-memory-notice.test.ts delete mode 100644 src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts delete mode 100644 src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts delete mode 100644 src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts create mode 100644 src/cli/operations/deploy/__tests__/post-deploy-knowledge-bases.test.ts delete mode 100644 src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts delete mode 100644 src/cli/operations/deploy/imperative/deployers/__tests__/harness-mapper.test.ts delete mode 100644 src/cli/operations/deploy/imperative/deployers/harness-deployer.ts delete mode 100644 src/cli/operations/deploy/imperative/deployers/harness-mapper.ts delete mode 100644 src/cli/operations/deploy/imperative/deployers/index.ts delete mode 100644 src/cli/operations/deploy/imperative/index.ts delete mode 100644 src/cli/operations/deploy/imperative/manager.ts delete mode 100644 src/cli/operations/deploy/imperative/types.ts create mode 100644 src/cli/operations/deploy/managed-memory-notice.ts delete mode 100644 src/cli/operations/deploy/post-deploy-ab-tests.ts delete mode 100644 src/cli/operations/deploy/post-deploy-config-bundles.ts delete mode 100644 src/cli/operations/deploy/post-deploy-http-gateways.ts create mode 100644 src/cli/operations/deploy/post-deploy-knowledge-bases.ts create mode 100644 src/cli/operations/dev/__tests__/sse-transform.test.ts create mode 100644 src/cli/operations/dev/sse-transform.ts delete mode 100644 src/cli/operations/eval/batch-eval-storage.ts delete mode 100644 src/cli/operations/eval/run-batch-evaluation.ts create mode 100644 src/cli/operations/harness/__tests__/orphan.test.ts create mode 100644 src/cli/operations/harness/orphan.ts create mode 100644 src/cli/operations/harness/skill-utils.ts create mode 100644 src/cli/operations/ingest/__tests__/index.test.ts create mode 100644 src/cli/operations/ingest/index.ts create mode 100644 src/cli/operations/insights/index.ts create mode 100644 src/cli/operations/insights/insights-storage.ts create mode 100644 src/cli/operations/insights/run-insights.ts create mode 100644 src/cli/operations/insights/types.ts create mode 100644 src/cli/operations/jobs/ab-test/__tests__/build-options.test.ts create mode 100644 src/cli/operations/jobs/ab-test/__tests__/format.test.ts create mode 100644 src/cli/operations/jobs/ab-test/__tests__/promote.test.ts create mode 100644 src/cli/operations/jobs/ab-test/build-options.ts create mode 100644 src/cli/operations/jobs/ab-test/format.ts create mode 100644 src/cli/operations/jobs/ab-test/handler.ts create mode 100644 src/cli/operations/jobs/ab-test/promote.ts create mode 100644 src/cli/operations/jobs/ab-test/resolve.ts create mode 100644 src/cli/operations/jobs/batch-evaluation/build-source.ts create mode 100644 src/cli/operations/jobs/batch-evaluation/dataset-phase1.ts create mode 100644 src/cli/operations/jobs/batch-evaluation/format.ts create mode 100644 src/cli/operations/jobs/batch-evaluation/handler.ts create mode 100644 src/cli/operations/jobs/index.ts create mode 100644 src/cli/operations/jobs/insights/__tests__/handler.test.ts create mode 100644 src/cli/operations/jobs/insights/format.ts create mode 100644 src/cli/operations/jobs/insights/handler.ts rename src/cli/operations/{ => jobs}/recommendation/__tests__/apply-to-bundle.test.ts (97%) create mode 100644 src/cli/operations/jobs/recommendation/__tests__/auto-name.test.ts rename src/cli/operations/{ => jobs}/recommendation/__tests__/fetch-session-spans.test.ts (99%) create mode 100644 src/cli/operations/jobs/recommendation/__tests__/input-validation.test.ts create mode 100644 src/cli/operations/jobs/recommendation/__tests__/refresh.test.ts rename src/cli/operations/{ => jobs}/recommendation/apply-to-bundle.ts (94%) create mode 100644 src/cli/operations/jobs/recommendation/build-config.ts rename src/cli/operations/{ => jobs}/recommendation/fetch-session-spans.ts (97%) create mode 100644 src/cli/operations/jobs/recommendation/format.ts create mode 100644 src/cli/operations/jobs/recommendation/handler.ts create mode 100644 src/cli/operations/jobs/shared/__tests__/constants.test.ts create mode 100644 src/cli/operations/jobs/shared/__tests__/engine.test.ts create mode 100644 src/cli/operations/jobs/shared/__tests__/region.test.ts create mode 100644 src/cli/operations/jobs/shared/__tests__/storage.test.ts create mode 100644 src/cli/operations/jobs/shared/constants.ts create mode 100644 src/cli/operations/jobs/shared/engine.ts create mode 100644 src/cli/operations/jobs/shared/format.ts create mode 100644 src/cli/operations/jobs/shared/region.ts create mode 100644 src/cli/operations/jobs/shared/resolve-agent-state.ts create mode 100644 src/cli/operations/jobs/shared/storage.ts create mode 100644 src/cli/operations/jobs/shared/types.ts create mode 100644 src/cli/operations/jobs/shared/wait.ts create mode 100644 src/cli/operations/knowledge-base/__tests__/agentic-retrieve-upsert.test.ts create mode 100644 src/cli/operations/knowledge-base/__tests__/connector-config.test.ts create mode 100644 src/cli/operations/knowledge-base/__tests__/hydrate-data-sources.test.ts create mode 100644 src/cli/operations/knowledge-base/__tests__/templates.test.ts create mode 100644 src/cli/operations/knowledge-base/agentic-retrieve-upsert.ts create mode 100644 src/cli/operations/knowledge-base/connector-config.ts create mode 100644 src/cli/operations/knowledge-base/hydrate-data-sources.ts delete mode 100644 src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts delete mode 100644 src/cli/operations/recommendation/__tests__/run-recommendation.test.ts delete mode 100644 src/cli/operations/recommendation/constants.ts delete mode 100644 src/cli/operations/recommendation/index.ts delete mode 100644 src/cli/operations/recommendation/recommendation-storage.ts delete mode 100644 src/cli/operations/recommendation/run-recommendation.ts delete mode 100644 src/cli/operations/recommendation/types.ts delete mode 100644 src/cli/primitives/ABTestPrimitive.ts create mode 100644 src/cli/primitives/KnowledgeBasePrimitive.ts create mode 100644 src/cli/primitives/OnlineInsightsPrimitive.ts delete mode 100644 src/cli/primitives/__tests__/ABTestPrimitive.test.ts create mode 100644 src/cli/primitives/__tests__/GatewayTargetPrimitive.test.ts create mode 100644 src/cli/primitives/__tests__/HarnessPrimitive.remove.test.ts create mode 100644 src/cli/primitives/__tests__/KnowledgeBasePrimitive.test.ts create mode 100644 src/cli/primitives/__tests__/OnlineInsightsPrimitive.test.ts create mode 100644 src/cli/primitives/__tests__/PolicyPrimitive.test.ts create mode 100644 src/cli/tui/__tests__/app-command-coverage.test.ts create mode 100644 src/cli/tui/components/jwt-config/DomainOverridesManager.tsx delete mode 100644 src/cli/tui/hooks/useCreateABTest.ts delete mode 100644 src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx delete mode 100644 src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx delete mode 100644 src/cli/tui/screens/ab-test/AddABTestFlow.tsx delete mode 100644 src/cli/tui/screens/ab-test/AddABTestScreen.tsx delete mode 100644 src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx delete mode 100644 src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx delete mode 100644 src/cli/tui/screens/ab-test/VariantConfigForm.tsx delete mode 100644 src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx delete mode 100644 src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx delete mode 100644 src/cli/tui/screens/ab-test/index.ts delete mode 100644 src/cli/tui/screens/ab-test/types.ts delete mode 100644 src/cli/tui/screens/ab-test/useAddABTestWizard.ts delete mode 100644 src/cli/tui/screens/ab-test/useTargetBasedWizard.ts create mode 100644 src/cli/tui/screens/config-bundle/__tests__/useAddConfigBundleWizard.test.tsx create mode 100644 src/cli/tui/screens/config-bundle/constants.ts create mode 100644 src/cli/tui/screens/export/ExportHarnessFlow.tsx create mode 100644 src/cli/tui/screens/export/ExportHarnessScreen.tsx create mode 100644 src/cli/tui/screens/export/index.ts create mode 100644 src/cli/tui/screens/export/types.ts create mode 100644 src/cli/tui/screens/export/useExportHarnessWizard.ts create mode 100644 src/cli/tui/screens/generate/__tests__/types.test.ts create mode 100644 src/cli/tui/screens/insights-jobs/InsightsJobsScreen.tsx create mode 100644 src/cli/tui/screens/insights-jobs/index.ts create mode 100644 src/cli/tui/screens/job-detail/ABTestDetailView.tsx create mode 100644 src/cli/tui/screens/job-detail/BatchEvalDetailView.tsx create mode 100644 src/cli/tui/screens/job-detail/RecommendationDetailView.tsx create mode 100644 src/cli/tui/screens/job-detail/helpers.ts create mode 100644 src/cli/tui/screens/job-detail/index.ts create mode 100644 src/cli/tui/screens/knowledge-base/AddKnowledgeBaseFlow.tsx create mode 100644 src/cli/tui/screens/knowledge-base/AddKnowledgeBaseScreen.tsx create mode 100644 src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseFlow.test.tsx create mode 100644 src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseScreen.test.tsx create mode 100644 src/cli/tui/screens/knowledge-base/__tests__/groupDataSources.test.ts create mode 100644 src/cli/tui/screens/knowledge-base/__tests__/inline-connector-config.test.ts create mode 100644 src/cli/tui/screens/knowledge-base/groupDataSources.ts create mode 100644 src/cli/tui/screens/knowledge-base/index.ts create mode 100644 src/cli/tui/screens/knowledge-base/inline-connector-config.ts create mode 100644 src/cli/tui/screens/knowledge-base/types.ts create mode 100644 src/cli/tui/screens/online-insights/AddOnlineInsightsFlow.tsx create mode 100644 src/cli/tui/screens/online-insights/AddOnlineInsightsScreen.tsx create mode 100644 src/cli/tui/screens/online-insights/index.ts create mode 100644 src/cli/tui/screens/online-insights/types.ts create mode 100644 src/cli/tui/screens/online-insights/useAddOnlineInsightsWizard.ts create mode 100644 src/cli/tui/screens/policy/__tests__/synthesize-cedar.test.ts create mode 100644 src/cli/tui/screens/policy/__tests__/useAddPolicyWizard.render.test.tsx create mode 100644 src/cli/tui/screens/policy/synthesize-cedar.ts create mode 100644 src/cli/tui/screens/remove/RemoveKnowledgeBaseScreen.tsx create mode 100644 src/cli/tui/screens/run-ab-test/ABTestJobsHistoryScreen.tsx create mode 100644 src/cli/tui/screens/run-ab-test/RunABTestFlow.tsx create mode 100644 src/cli/tui/screens/run-ab-test/index.ts create mode 100644 src/cli/tui/screens/run-ab-test/types.ts create mode 100644 src/cli/tui/screens/run-eval/RunIngestFlow.tsx create mode 100644 src/cli/tui/screens/run-insights/RunInsightsFlow.tsx create mode 100644 src/cli/tui/screens/run-insights/RunInsightsScreen.tsx create mode 100644 src/cli/tui/screens/run-insights/index.ts create mode 100644 src/cli/tui/screens/run-insights/types.ts create mode 100644 src/cli/tui/screens/run-insights/useRunInsightsWizard.ts create mode 100644 src/cli/tui/screens/view/ViewTypePickerScreen.tsx create mode 100644 src/cli/tui/screens/view/index.ts create mode 100644 src/cli/tui/screens/web-search/AddWebSearchFlow.tsx create mode 100644 src/cli/tui/screens/web-search/AddWebSearchScreen.tsx create mode 100644 src/cli/tui/screens/web-search/index.ts create mode 100644 src/cli/tui/screens/web-search/types.ts create mode 100644 src/schema/schemas/__tests__/online-eval-config.test.ts create mode 100644 src/schema/schemas/__tests__/private-endpoint.test.ts delete mode 100644 src/schema/schemas/primitives/__tests__/http-gateway.test.ts create mode 100644 src/schema/schemas/primitives/__tests__/knowledge-base.test.ts delete mode 100644 src/schema/schemas/primitives/http-gateway.ts create mode 100644 src/schema/schemas/primitives/knowledge-base.ts diff --git a/.github/workflows/agent-restricted.yml b/.github/workflows/agent-restricted.yml index d229919a4..c53838a9c 100644 --- a/.github/workflows/agent-restricted.yml +++ b/.github/workflows/agent-restricted.yml @@ -68,7 +68,7 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 341ab0f4c..9a0eed3fb 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -2,9 +2,9 @@ name: Build and Test on: push: - branches: ['main'] + branches: ['main', 'feat/**'] pull_request: - branches: ['main'] + branches: ['main', 'feat/**'] permissions: contents: read diff --git a/.github/workflows/ci-failure-issue.yml b/.github/workflows/ci-failure-issue.yml index 443fc61a5..1ce4c5ce5 100644 --- a/.github/workflows/ci-failure-issue.yml +++ b/.github/workflows/ci-failure-issue.yml @@ -26,7 +26,7 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} diff --git a/.github/workflows/cleanup-pr-tarballs.yml b/.github/workflows/cleanup-pr-tarballs.yml index 10a35be2c..2d898348d 100644 --- a/.github/workflows/cleanup-pr-tarballs.yml +++ b/.github/workflows/cleanup-pr-tarballs.yml @@ -16,7 +16,7 @@ jobs: steps: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 0b3f65d25..8882e4342 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -4,9 +4,9 @@ on: push: branches: ['main'] pull_request: - branches: ['main'] + branches: ['main', 'feat/**'] pull_request_target: - branches: ['main'] + branches: ['main', 'feat/**'] # Cancel in-progress runs for PRs; never cancel runs on main (merges should not abort each other) concurrency: diff --git a/.github/workflows/e2e-tests-full.yml b/.github/workflows/e2e-tests-full.yml index 0962bfce8..e20a6b037 100644 --- a/.github/workflows/e2e-tests-full.yml +++ b/.github/workflows/e2e-tests-full.yml @@ -5,8 +5,11 @@ on: aws_region: description: 'AWS region for deployment' default: 'us-east-1' + cdk_branch: + description: 'Branch of CDK constructs repo to build from' + default: 'main' push: - branches: [main] + branches: [main, 'feat/**'] env: AGENTCORE_TELEMETRY_DISABLED: '1' @@ -32,7 +35,7 @@ jobs: steps: - uses: actions/checkout@v6 with: - ref: ${{ github.event_name == 'workflow_dispatch' && github.ref || 'main' }} + persist-credentials: false - uses: actions/setup-node@v6 with: node-version: '20.x' @@ -51,7 +54,7 @@ jobs: id: aws run: echo "account_id=$(aws sts get-caller-identity --query Account --output text)" >> "$GITHUB_OUTPUT" - name: Get API keys from Secrets Manager - uses: aws-actions/aws-secretsmanager-get-secrets@v2 + uses: aws-actions/aws-secretsmanager-get-secrets@v3 with: secret-ids: | E2E,${{ secrets.E2E_SECRET_ARN }} @@ -63,14 +66,34 @@ jobs: BUILD_PREVIEW: ${{ matrix.cli-build == 'preview' && '1' || '0' }} - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} owner: aws - - name: Build CDK package from main + - name: Build CDK package run: | - git clone --depth 1 "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo + if [ -n "${{ inputs.cdk_branch }}" ] && [ "${{ inputs.cdk_branch }}" != "main" ]; then + CDK_BRANCH="${{ inputs.cdk_branch }}" + elif [ "${{ github.ref_name }}" != "main" ]; then + CDK_BRANCH="main" + REPO_URL="https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" + # Check if a branch exists on the CDK repo with the same + if git ls-remote --exit-code --heads "$REPO_URL" "${{ github.ref_name }}" > /dev/null 2>&1; then + CDK_BRANCH="${{ github.ref_name }}" + else + # Check if a branch exists with _ subbed for -. (legacy support for summit branch) + ALT="${{ github.ref_name }}" + ALT="${ALT//_/-}" + if git ls-remote --exit-code --heads "$REPO_URL" "$ALT" > /dev/null 2>&1; then + CDK_BRANCH="$ALT" + fi + fi + else + CDK_BRANCH="main" + fi + echo "Using CDK branch: $CDK_BRANCH" + git clone --depth 1 --branch "$CDK_BRANCH" "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo cd /tmp/cdk-repo npm ci npm run build diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index d44763efe..2d13211d0 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -5,11 +5,8 @@ on: aws_region: description: 'AWS region for deployment' default: 'us-east-1' - cdk_branch: - description: 'CDK repo branch to build from (default: main)' - default: 'main' pull_request_target: - branches: [main] + branches: [main, feat/**] concurrency: group: e2e-${{ github.event.pull_request.number || github.ref }} @@ -50,8 +47,6 @@ jobs: runs-on: ubuntu-latest environment: e2e-testing timeout-minutes: 30 - env: - AGENTCORE_TELEMETRY_DISABLED: '1' steps: - uses: actions/checkout@v6 with: @@ -75,7 +70,7 @@ jobs: id: aws run: echo "account_id=$(aws sts get-caller-identity --query Account --output text)" >> "$GITHUB_OUTPUT" - name: Get API keys from Secrets Manager - uses: aws-actions/aws-secretsmanager-get-secrets@v2 + uses: aws-actions/aws-secretsmanager-get-secrets@v3 with: secret-ids: | E2E,${{ secrets.E2E_SECRET_ARN }} @@ -83,102 +78,45 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} owner: aws - # Clone CDK repo for bundle script (requires App token for private repo access) - - name: Clone CDK repo + - name: Build CDK package from main run: | - CDK_BRANCH="${{ inputs.cdk_branch || 'main' }}" - echo "Cloning CDK from branch: $CDK_BRANCH" - git clone --depth 1 --branch "$CDK_BRANCH" "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo + git clone --depth 1 "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo + cd /tmp/cdk-repo + npm ci + npm run build + TARBALL=$(npm pack --pack-destination "$RUNNER_TEMP" | tail -1) + echo "CDK_TARBALL=$RUNNER_TEMP/$TARBALL" >> "$GITHUB_ENV" env: CDK_REPO_TOKEN: ${{ steps.app-token.outputs.token }} CDK_REPO: ${{ secrets.CDK_REPO_NAME }} - run: npm ci - - - name: Bundle GA and preview tarballs - run: | - npm run bundle - GA_TARBALL=$(ls aws-agentcore-*.tgz | grep -v preview | head -1) - PREVIEW_TARBALL=$(ls aws-agentcore-*-preview-*.tgz | head -1) - echo "GA_TARBALL=$PWD/$GA_TARBALL" >> "$GITHUB_ENV" - echo "PREVIEW_TARBALL=$PWD/$PREVIEW_TARBALL" >> "$GITHUB_ENV" - env: - AGENTCORE_CDK_PATH: /tmp/cdk-repo - - - name: Install GA CLI globally - run: npm install -g "$GA_TARBALL" + - run: npm run build + - name: Install CLI globally + run: npm install -g "$(npm pack | tail -1)" - name: Detect changed e2e test files id: changed run: | BASE_SHA=${{ github.event.pull_request.base.sha || 'HEAD~1' }} - # If any helper file changed, run all e2e tests - HELPERS_CHANGED=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/*.ts' \ - | grep -v '\.test\.ts$' | head -1) - if [ -n "$HELPERS_CHANGED" ]; then - GA_EXTRA=$(find e2e-tests -name '*.test.ts' \ - | grep -v '^e2e-tests/strands-bedrock\.test\.ts$' \ - | grep -v '^e2e-tests/payment-strands-bedrock\.test\.ts$' \ - | grep -v '^e2e-tests/harness-' \ - | tr '\n' ' ') - HARNESS_EXTRA=$(find e2e-tests -name 'harness-*.test.ts' \ - | grep -v '^e2e-tests/harness-bedrock\.test\.ts$' \ - | tr '\n' ' ') - else - GA_EXTRA=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/*.test.ts' \ - | grep -v '^e2e-tests/strands-bedrock\.test\.ts$' \ - | grep -v '^e2e-tests/payment-strands-bedrock\.test\.ts$' \ - | grep -v '^e2e-tests/harness-' \ - | tr '\n' ' ') - HARNESS_EXTRA=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/harness-*.test.ts' \ - | grep -v '^e2e-tests/harness-bedrock\.test\.ts$' \ - | tr '\n' ' ') - fi - echo "ga_extra=$GA_EXTRA" >> "$GITHUB_OUTPUT" - echo "harness_extra=$HARNESS_EXTRA" >> "$GITHUB_OUTPUT" - echo "GA extra tests: ${GA_EXTRA:-none}" - echo "Harness extra tests: ${HARNESS_EXTRA:-none}" - - - name: Run E2E tests (GA) - env: - AWS_ACCOUNT_ID: ${{ steps.aws.outputs.account_id }} - AWS_REGION: ${{ inputs.aws_region || 'us-east-1' }} - ANTHROPIC_API_KEY: ${{ env.E2E_ANTHROPIC_API_KEY }} - OPENAI_API_KEY: ${{ env.E2E_OPENAI_API_KEY }} - GEMINI_API_KEY: ${{ env.E2E_GEMINI_API_KEY }} - E2E_EFS_ACCESS_POINT_ARN: ${{ env.E2E_EFS_ACCESS_POINT_ARN }} - E2E_S3_ACCESS_POINT_ARN: ${{ env.E2E_S3_ACCESS_POINT_ARN }} - E2E_FILESYSTEM_SUBNET_ID: ${{ env.E2E_FILESYSTEM_SUBNET_ID }} - E2E_FILESYSTEM_SECURITY_GROUP_ID: ${{ env.E2E_FILESYSTEM_SECURITY_GROUP_ID }} - # CoinbaseCDP testnet creds for payment-strands-bedrock.test.ts. Sourced from - # the same E2E secret (keys CDP_API_KEY_ID / CDP_API_KEY_SECRET / CDP_WALLET_SECRET), - # which parse-json-secrets surfaces as E2E_CDP_*; remapped here to the unprefixed - # names the test reads. Absent on forks -> test self-skips via its hasCdpCreds gate. - CDP_API_KEY_ID: ${{ env.E2E_CDP_API_KEY_ID }} - CDP_API_KEY_SECRET: ${{ env.E2E_CDP_API_KEY_SECRET }} - CDP_WALLET_SECRET: ${{ env.E2E_CDP_WALLET_SECRET }} - run: - npx vitest run --project e2e e2e-tests/strands-bedrock.test.ts e2e-tests/payment-strands-bedrock.test.ts ${{ - steps.changed.outputs.ga_extra }} - - - name: Install preview CLI globally - run: npm install -g "$PREVIEW_TARBALL" + CHANGED=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/*.test.ts' \ + | grep -v '^e2e-tests/strands-bedrock\.test\.ts$' \ + | tr '\n' ' ') + echo "extra_tests=$CHANGED" >> "$GITHUB_OUTPUT" + echo "Changed e2e tests: ${CHANGED:-none}" - - name: Run E2E tests (preview/harness) + - name: Run E2E tests env: AWS_ACCOUNT_ID: ${{ steps.aws.outputs.account_id }} AWS_REGION: ${{ inputs.aws_region || 'us-east-1' }} ANTHROPIC_API_KEY: ${{ env.E2E_ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ env.E2E_OPENAI_API_KEY }} GEMINI_API_KEY: ${{ env.E2E_GEMINI_API_KEY }} - E2E_EFS_ACCESS_POINT_ARN: ${{ env.E2E_EFS_ACCESS_POINT_ARN }} - E2E_S3_ACCESS_POINT_ARN: ${{ env.E2E_S3_ACCESS_POINT_ARN }} - E2E_FILESYSTEM_SUBNET_ID: ${{ env.E2E_FILESYSTEM_SUBNET_ID }} - E2E_FILESYSTEM_SECURITY_GROUP_ID: ${{ env.E2E_FILESYSTEM_SECURITY_GROUP_ID }} - BUILD_PREVIEW: '1' - run: npx vitest run --project e2e e2e-tests/harness-bedrock.test.ts ${{ steps.changed.outputs.harness_extra }} + CDK_TARBALL: ${{ env.CDK_TARBALL }} + # Always run strands-bedrock as baseline, plus any e2e test files changed in the PR + run: npx vitest run --project e2e e2e-tests/strands-bedrock.test.ts ${{ steps.changed.outputs.extra_tests }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 6b522474c..53c7c2ca6 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,9 +2,9 @@ name: Quality and Safety Checks on: push: - branches: ['main'] + branches: ['main', 'feat/**'] pull_request: - branches: ['main'] + branches: ['main', 'feat/**'] permissions: contents: read @@ -29,7 +29,7 @@ jobs: uses: actions/cache/save@v5 with: path: node_modules - key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} + key: node-modules-${{ hashFiles('package-lock.json') }} format: needs: setup @@ -42,7 +42,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} + key: node-modules-${{ hashFiles('package-lock.json') }} - run: npm run format:check lint: @@ -56,7 +56,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} + key: node-modules-${{ hashFiles('package-lock.json') }} - run: npm run lint security: @@ -70,7 +70,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} + key: node-modules-${{ hashFiles('package-lock.json') }} - run: npm run security:audit secrets: @@ -84,7 +84,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} + key: node-modules-${{ hashFiles('package-lock.json') }} - run: npm run secrets:check typecheck: @@ -98,11 +98,10 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} + key: node-modules-${{ hashFiles('package-lock.json') }} - run: npm run typecheck schema-check: - if: ${{ !contains(github.event.pull_request.labels.*.name, 'release') }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 diff --git a/.github/workflows/pr-security-review.yml b/.github/workflows/pr-security-review.yml index 697cdc7b1..ea11529e1 100644 --- a/.github/workflows/pr-security-review.yml +++ b/.github/workflows/pr-security-review.yml @@ -1,15 +1,29 @@ name: Claude Security Review +# This workflow inlines the security-review prompt rather than calling the +# bundled /security-review slash command. The bundled skill silently bombs +# whenever the runner's clone gets shallowed mid-run (claude-code-action's +# restoreConfigFromBase does this on every PR by design — see +# https://github.com/anthropics/claude-code-action/blob/v1/src/github/operations/restore-config.ts), +# because its first action is `git diff origin/HEAD...` and a shallow clone +# has no merge base. Computing the diff ourselves before the action starts +# eliminates that whole class of failure. + on: pull_request_target: types: [opened, reopened, synchronize, labeled] + # Only review PRs targeting our two long-lived release branches. PRs + # into short-lived feature branches don't need a security gate — they + # get reviewed when those features are merged into main or + # feat/summit_release. + branches: + - main + - feat/summit_release workflow_dispatch: inputs: pr_number: description: - 'PR number to review (note: workflow_dispatch will NOT post inline comments — the action only attaches the - inline-comment MCP server on PR-context events. Use this only for end-to-end smoke-testing the prompt - plumbing.)' + PR number to review (workflow_dispatch will NOT post inline comments — use only for prompt smoke tests) required: true type: string @@ -45,10 +59,6 @@ jobs: uses: actions/github-script@v9 with: script: | - // pull_request_target opened/reopened/synchronize: gate on the PR author - // (auto-runs on maintainer-authored PRs; community PRs need the label path below). - // pull_request_target labeled (safe-to-review): gate on the labeler (sender) - // so a maintainer applying the label authorizes the run on a community PR. const isLabel = context.payload.action === 'labeled'; const user = isLabel ? context.payload.sender.login @@ -60,35 +70,25 @@ jobs: team_slug: 'agentcore-cli-devs', username: user, }); - console.log(`${reason} is a member of agentcore-cli-devs`); core.setOutput('authorized', 'true'); - } catch (teamError) { + } catch { try { const { data } = await github.rest.repos.getCollaboratorPermissionLevel({ owner: context.repo.owner, repo: context.repo.repo, username: user, }); - const hasWriteAccess = ['write', 'admin'].includes(data.permission); - if (hasWriteAccess) { - console.log(`${reason} has write access (${data.permission})`); - core.setOutput('authorized', 'true'); - } else { - console.log(`${reason} does not have write access (${data.permission}) — skipping review`); - core.setOutput('authorized', 'false'); - } - } catch (collabError) { - console.log(`${reason} authorization check failed (${collabError.status}) — skipping review`); + core.setOutput('authorized', ['write', 'admin'].includes(data.permission) ? 'true' : 'false'); + } catch { core.setOutput('authorized', 'false'); } } - - name: Auto-authorize workflow_dispatch id: dispatch-auth if: github.event_name == 'workflow_dispatch' run: echo "authorized=true" >> "$GITHUB_OUTPUT" - security-review: + review: needs: authorize if: needs.authorize.outputs.authorized == 'true' runs-on: ubuntu-latest @@ -96,9 +96,6 @@ jobs: env: AWS_REGION: us-west-2 steps: - # Generate the GitHub App token first so every subsequent github-script step can - # use it. The default GITHUB_TOKEN is read-only on pull_request_target / - # pull_request_review events from forks, which makes label/comment writes 403. - name: Generate GitHub App token id: app-token uses: actions/create-github-app-token@v1 @@ -106,7 +103,7 @@ jobs: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} - - name: Resolve PR number + - name: Resolve PR id: pr uses: actions/github-script@v9 env: @@ -114,10 +111,9 @@ jobs: with: github-token: ${{ steps.app-token.outputs.token }} script: | - const num = - context.eventName === 'workflow_dispatch' - ? parseInt(process.env.PR_NUMBER_INPUT, 10) - : context.payload.pull_request.number; + const num = context.eventName === 'workflow_dispatch' + ? parseInt(process.env.PR_NUMBER_INPUT, 10) + : context.payload.pull_request.number; const { data: pr } = await github.rest.pulls.get({ owner: context.repo.owner, repo: context.repo.repo, @@ -127,7 +123,7 @@ jobs: core.setOutput('head_sha', pr.head.sha); core.setOutput('base_ref', pr.base.ref); - - name: Add claude-security-reviewing label + - name: Add reviewing label uses: actions/github-script@v9 env: PR_NUMBER: ${{ steps.pr.outputs.number }} @@ -148,7 +144,7 @@ jobs: repo: context.repo.repo, name: 'claude-security-reviewing', color: 'D73A4A', - description: 'Claude Code /security-review in progress', + description: 'Claude Code security review in progress', }); } } @@ -163,104 +159,249 @@ jobs: uses: actions/checkout@v6 with: ref: ${{ steps.pr.outputs.head_sha }} - # The bundled /security-review skill runs `git diff origin/HEAD...` so we need - # the base branch locally too. fetch-depth: 0 grabs the full history. fetch-depth: 0 - - name: Prepare base ref for /security-review skill + - name: Compute diff + id: diff env: BASE_REF: ${{ steps.pr.outputs.base_ref }} run: | set -euo pipefail - # The bundled /security-review skill's SessionStart hook runs - # `git diff --name-only origin/HEAD...` as its first command. Two - # things have to be true for that to succeed: - # 1. origin/HEAD has to be a valid ref. actions/checkout doesn't - # set up the remote's symbolic HEAD, so we point it at the PR's - # base branch. - # 2. The PR head and origin/ have to share a merge base in - # the local clone. actions/checkout@v6 with `ref: ` - # fetches the head's history but on fork PRs may NOT fetch - # origin/ into the clone — leaving `git diff origin/HEAD...` - # to fail with "no merge base", which silently bombs the skill - # (num_turns=0, model never invoked) and would otherwise look - # like a clean review with zero findings. - # Fetching origin/ explicitly closes that gap. + # Compute the diff *before* claude-code-action shallows the repo. + # The action's restoreConfigFromBase() runs `git fetch --depth=1` + # against the base branch on startup, which strips history and + # would break any base-vs-head diff after that point. Doing it + # here means the model gets a frozen artifact that can't be + # invalidated by anything the action does later. git fetch --no-tags origin "+refs/heads/$BASE_REF:refs/remotes/origin/$BASE_REF" - git remote set-head origin "$BASE_REF" - git symbolic-ref refs/remotes/origin/HEAD - - # Sanity: ensure the merge base actually resolves before we hand off - # to the skill. If it doesn't, fail loudly here rather than letting - # the skill silently bail. - if ! git merge-base "origin/$BASE_REF" HEAD >/dev/null; then - echo "::error::No merge base between HEAD and origin/$BASE_REF — /security-review cannot compute a diff." - exit 1 - fi - echo "Merge base: $(git merge-base "origin/$BASE_REF" HEAD)" - echo "Files changed: $(git diff --name-only "origin/$BASE_REF...HEAD" | wc -l)" + git diff "origin/$BASE_REF...HEAD" > /tmp/pr.diff + BYTES=$(wc -c < /tmp/pr.diff) + FILES=$(git diff --name-only "origin/$BASE_REF...HEAD" | wc -l | tr -d ' ') + echo "bytes=$BYTES" >> "$GITHUB_OUTPUT" + echo "files=$FILES" >> "$GITHUB_OUTPUT" + echo "Diff: $BYTES bytes across $FILES files" + + - name: Build prompt + if: steps.diff.outputs.bytes != '0' + run: | + set -euo pipefail + mkdir -p "$RUNNER_TEMP/prompt" + cat > "$RUNNER_TEMP/prompt/prompt.md" <<'PROMPT_EOF' + You are performing a HIGH-CONFIDENCE security code review of a pull + request. The complete diff is at `/tmp/pr.diff` — read it first + using the Read tool. That file is the ground truth for what the + PR changes; do not run `git diff` or any other git commands. To + understand context — callers of a changed function, existing + sanitization patterns, the project's threat model — use Grep, + Glob, and Read against the repository working tree. Do not use + Bash. + + # OBJECTIVE + + Identify HIGH-CONFIDENCE security vulnerabilities newly introduced + by this PR that have real exploitation potential. This is NOT a + general code review. Focus ONLY on security implications added by + the PR. Do not comment on pre-existing issues. + + # CRITICAL INSTRUCTIONS + + 1. MINIMIZE FALSE POSITIVES: Only flag issues where you are >80% + confident of actual exploitability. + 2. AVOID NOISE: Skip theoretical issues, style concerns, or + low-impact findings. + 3. FOCUS ON IMPACT: Prioritize vulnerabilities that lead to + unauthorized access, data breach, or system compromise. + 4. DO NOT report any of: + - Denial of service / resource exhaustion / rate limiting + - Secrets at rest on disk (handled by other tooling) + - Memory consumption or CPU exhaustion + + # CATEGORIES TO EXAMINE + + **Input validation**: SQL injection, command injection, XXE, + template injection, NoSQL injection, path traversal. + **AuthN/AuthZ**: authentication bypass, privilege escalation, + session/JWT flaws, authorization-logic bypasses. + **Crypto & secrets**: hardcoded keys/passwords/tokens, weak + algorithms, improper key storage, weak randomness, certificate + validation bypass. + **Code execution**: deserialization RCE (pickle, YAML, etc.), + eval injection, XSS (reflected/stored/DOM) — only in unsafe paths + (see precedents). + **Data exposure**: sensitive logging, PII handling violations, + API leakage, debug-info exposure. + + A finding can still be HIGH severity if only exploitable from the + local network. + + # METHODOLOGY + + Phase 1 — Repository context: identify existing security + libraries/frameworks, sanitization patterns, the project's threat + model. Use search tools. + + Phase 2 — Comparative analysis: compare new changes against + established patterns; flag deviations and net-new attack surface. + + Phase 3 — Vulnerability assessment: for each modified file, + trace user input → sensitive operations, look for unsafe privilege + boundary crossings, identify injection points. + + # FALSE-POSITIVE FILTER (apply hard) - - name: Configure AWS credentials (OIDC) + Read the code (Read/Grep/Glob); do not run commands to reproduce + or write files. + + HARD EXCLUSIONS — drop any finding matching: + 1. DoS / resource exhaustion. + 2. Secrets/credentials on disk if otherwise secured. + 3. Rate limiting or service overload. + 4. Memory/CPU exhaustion. + 5. Missing input validation on non-security-critical fields. + 6. Input sanitization in GitHub Actions workflows unless clearly + triggerable via untrusted input. + 7. Lack of hardening; only flag concrete vulns. + 8. Theoretical race conditions or timing attacks. + 9. Outdated third-party libraries (handled separately). + 10. Memory-safety issues in memory-safe languages (Rust, Go, + JS/TS, Python). + 11. Files that are unit tests or test-only. + 12. Log spoofing — un-sanitized user input to logs is not a vuln. + 13. SSRF that only controls the path (host/protocol control is + required). + 14. User-controlled content in AI system prompts is not a vuln. + 15. Regex injection. + 16. Regex DoS. + 17. Insecure documentation (.md and similar). + 18. Lack of audit logs. + + PRECEDENTS: + 1. Plaintext-logging high-value secrets IS a vuln; logging URLs + is assumed safe. + 2. UUIDs are unguessable and need no validation. + 3. Env vars and CLI flags are trusted inputs. + 4. Resource leaks (memory, fd) are not vulns. + 5. Tabnabbing, XS-Leaks, prototype pollution, open redirects: + only with extremely high confidence. + 6. React / Angular: do not report XSS in components or .tsx files + unless using `dangerouslySetInnerHTML`, + `bypassSecurityTrustHtml`, or equivalents. + 7. GitHub Actions workflow vulns: only when a concrete attack + path through untrusted input exists. + 8. Missing AuthN/AuthZ in client-side code is not a vuln — + validation is the server's job. + 9. MEDIUM findings only when obvious and concrete. + 10. .ipynb notebook vulns: only with a concrete attack path + through untrusted input. + 11. Logging non-PII data is not a vuln. Only flag when the data + is secrets, passwords, or PII. + 12. Command injection in shell scripts: only when there is a + concrete attack path through untrusted input. + + For each surviving finding, score confidence 1–10: + - 1–3: low / likely noise — drop + - 4–6: medium — drop unless obvious and concrete + - 7–10: high — keep + + # PROCESS + + Run this in three steps, exactly: + + 1. Spawn a Task sub-agent to identify candidate vulnerabilities. + Pass the full instructions above (objective, categories, + methodology, hard exclusions, precedents). Have it return a + structured list of candidates with file/line/category/ + description/exploit/fix. + + 2. For EACH candidate from step 1, spawn an independent Task + sub-agent IN PARALLEL to adversarially verify it. Each + verifier gets the full FALSE-POSITIVE FILTER above and is + told to default to "drop" if uncertain. Each returns a + confidence score 1–10. + + 3. Drop any finding with confidence < 8. For every finding that + survives, call: + + mcp__github_inline_comment__create_inline_comment + + with `{ path, line, body }` pointing at the exact file and + line in the diff. The body should follow: + + **: ** + + **Recommendation:** + + Do NOT post a single summary comment listing all findings — + the workflow handles a top-level summary after this run + completes. If zero findings survive Phase 3, exit without + calling any tool. + + Begin. + PROMPT_EOF + + - name: Configure AWS credentials + if: steps.diff.outputs.bytes != '0' uses: aws-actions/configure-aws-credentials@v6 with: role-to-assume: ${{ secrets.BEDROCK_SECURITY_REVIEW_ROLE_ARN }} aws-region: us-west-2 - - name: Run Claude Code security review + - name: Load prompt into env + id: load-prompt + if: steps.diff.outputs.bytes != '0' + # The action only accepts `prompt:` (a string), not a file path — + # passing prompt_file silently no-ops, leaves the action with no + # trigger, and skips the run with "No trigger found". Read the + # built prompt into an environment variable so we can pass it + # inline below. Using GITHUB_ENV with a randomized heredoc + # sentinel is the standard Actions idiom for multi-line values. + env: + PROMPT_FILE: ${{ runner.temp }}/prompt/prompt.md + run: | + set -euo pipefail + DELIM="EOF_$(uuidgen)" + { + echo "PROMPT_BODY<<$DELIM" + cat "$PROMPT_FILE" + echo "$DELIM" + } >> "$GITHUB_ENV" + + - name: Run Claude Code id: review + if: steps.diff.outputs.bytes != '0' uses: anthropics/claude-code-action@v1 with: github_token: ${{ steps.app-token.outputs.token }} use_bedrock: 'true' - # The Claude Code SDK that ships with the action has /security-review bundled - # as a slash command. Invoking it directly lets the skill drive its own - # `git diff origin/HEAD...`, sub-task fan-out, and false-positive filtering - # without us re-implementing any of that. We append a short tail telling the - # action to use the inline-comment MCP tool for findings. - prompt: | - /security-review - - For each finding, call mcp__github_inline_comment__create_inline_comment with - { path, line, body } pointing at the exact file and line in the diff. Do NOT - post a single summary comment listing all findings — the workflow handles a - top-level summary after this run completes. If there are no findings, exit - without calling any tool. + prompt: ${{ env.PROMPT_BODY }} show_full_output: 'true' - # Allow-listing this MCP tool name is what tells the action to register the - # github_inline_comment MCP server. See anthropics/claude-code-action - # src/mcp/install-mcp-server.ts. + # Read/Grep/Glob let the model explore the repo for context + # (existing sanitization patterns, threat model, callers of a + # changed function). Task is needed for the parallel verifier + # sub-agents in Phase 2. The github_inline_comment MCP tool is + # the output channel; allow-listing it is also what tells the + # action to attach the inline-comment MCP server. Bash is + # intentionally NOT allowed: the prompt forbids running + # commands, and keeping Bash off the list makes the diff at + # /tmp/pr.diff the only ground truth (no `git diff` re-runs + # against a possibly-shallow clone). claude_args: >- - --model us.anthropic.claude-opus-4-7 --max-turns 30 --allowedTools - mcp__github_inline_comment__create_inline_comment + --model us.anthropic.claude-opus-4-7 --max-turns 60 --allowedTools "Read Grep Glob Task + mcp__github_inline_comment__create_inline_comment" - - name: Verify model actually ran + - name: Verify model ran productively id: model-ran - # The action exits 0 even when the model was never invoked (e.g. a - # SessionStart hook errored before the first turn). Treating that as - # success would let the workflow falsely report "no high-confidence - # findings" — that's exactly what happened on PR #1474, where the - # `/security-review` skill's first `git diff origin/HEAD...` hit a - # "no merge base" error, the SDK still returned `subtype: success` - # with `num_turns: 0` and zero tokens, and the buffer was empty. - # - # The action writes its full SDK transcript to - # ${RUNNER_TEMP}/claude-execution-output.json. We pull the final - # result envelope and require that the model actually took turns and - # spent tokens. If it didn't, mark the run as not-actually-completed - # so the summary comment uses the failure branch instead of pretending - # there were no findings. - if: steps.review.conclusion == 'success' || steps.review.conclusion == 'failure' + if: + steps.diff.outputs.bytes != '0' && (steps.review.conclusion == 'success' || steps.review.conclusion == + 'failure') env: - # The action exposes its full SDK transcript path as the - # `execution_file` step output (a JSON array of stream events; the - # final element is the result envelope). We fall back to the known - # path under RUNNER_TEMP if for some reason the output is missing. OUTPUT_JSON: ${{ steps.review.outputs.execution_file || format('{0}/claude-execution-output.json', runner.temp) }} run: | set -euo pipefail if [ ! -s "$OUTPUT_JSON" ]; then - echo "::warning::No claude-execution-output.json found at $OUTPUT_JSON; cannot verify the model ran." + echo "::warning::No execution transcript at $OUTPUT_JSON — cannot verify" echo "ran=unknown" >> "$GITHUB_OUTPUT" echo "num_turns=0" >> "$GITHUB_OUTPUT" exit 0 @@ -271,18 +412,20 @@ jobs: echo "num_turns=$NUM_TURNS, is_error=$IS_ERROR, output_tokens=$OUTPUT_TOKENS" echo "num_turns=$NUM_TURNS" >> "$GITHUB_OUTPUT" if [ "$IS_ERROR" = "true" ] || [ "$NUM_TURNS" = "0" ] || [ "$OUTPUT_TOKENS" = "0" ]; then - echo "::error::Claude Code SDK reported success but the model never ran productively (num_turns=$NUM_TURNS, output_tokens=$OUTPUT_TOKENS, is_error=$IS_ERROR). The /security-review skill likely bailed before analysis (e.g. SessionStart hook error). Refusing to report 'no findings'." + echo "::group::Last messages from SDK transcript" + jq -r '.[] | select(.type == "user" or .type == "system") | .message.content // .subtype' "$OUTPUT_JSON" | tail -40 + echo "::endgroup::" + echo "::error::Model did not run productively (turns=$NUM_TURNS, output_tokens=$OUTPUT_TOKENS, is_error=$IS_ERROR)" echo "ran=false" >> "$GITHUB_OUTPUT" exit 1 fi echo "ran=true" >> "$GITHUB_OUTPUT" - - name: Count buffered findings + - name: Count findings id: findings - # Only count if the review step actually ran (success or failure - both produce - # a meaningful buffer state). Skip on cancellation/skip so we don't lie about - # "no findings" when Bedrock was never invoked. - if: steps.review.conclusion == 'success' || steps.review.conclusion == 'failure' + if: + steps.diff.outputs.bytes != '0' && (steps.review.conclusion == 'success' || steps.review.conclusion == + 'failure') run: | set -euo pipefail BUFFER=/tmp/inline-comments-buffer.jsonl @@ -294,10 +437,7 @@ jobs: echo "count=$COUNT" >> "$GITHUB_OUTPUT" echo "Buffered findings: $COUNT" - - name: Post security review summary comment - # Always post some kind of summary so the PR shows the run happened, but branch on - # the review step's conclusion so a cancelled/skipped run doesn't get reported as - # "no findings". + - name: Post summary if: always() uses: actions/github-script@v9 env: @@ -306,6 +446,8 @@ jobs: REVIEW_CONCLUSION: ${{ steps.review.conclusion }} MODEL_RAN: ${{ steps.model-ran.outputs.ran }} NUM_TURNS: ${{ steps.model-ran.outputs.num_turns }} + DIFF_BYTES: ${{ steps.diff.outputs.bytes }} + DIFF_FILES: ${{ steps.diff.outputs.files }} RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} with: github-token: ${{ steps.app-token.outputs.token }} @@ -316,25 +458,21 @@ jobs: const modelRan = process.env.MODEL_RAN || 'unknown'; const numTurns = process.env.NUM_TURNS || '0'; const runUrl = process.env.RUN_URL; + const diffBytes = parseInt(process.env.DIFF_BYTES || '0', 10); let body; - if (modelRan !== 'true') { - // Two cases land here, both unsafe to report as "no findings": - // - 'false': SDK exited 0 but transcript shows the model never ran productively - // (e.g. /security-review's SessionStart hook errored before the first turn). - // - 'unknown': claude-execution-output.json was missing, so we couldn't verify - // the model ran at all. Treat as not-verified rather than silently green. - body = `**Claude Security Review:** the review did not actually analyze this PR (model took ${numTurns} turn${numTurns === '1' ? '' : 's'} — the skill likely failed during setup). See the [run](${runUrl}) for details; a later push or re-run is needed for a real review.`; + if (diffBytes === 0) { + body = `**Claude Security Review:** PR has an empty diff against base — nothing to review. ([run](${runUrl}))`; + } else if (modelRan !== 'true') { + body = `**Claude Security Review:** the review did not analyze this PR (model took ${numTurns} turn${numTurns === '1' ? '' : 's'}). See the [run](${runUrl}) for details; a later push or re-run is needed.`; } else if (conclusion === 'success') { - body = - count > 0 - ? `**Claude Security Review:** posted ${count} inline finding${count === 1 ? '' : 's'} on this PR. ([run](${runUrl}))` - : `**Claude Security Review:** no high-confidence findings. ([run](${runUrl}))`; + body = count > 0 + ? `**Claude Security Review:** posted ${count} inline finding${count === 1 ? '' : 's'} on this PR. ([run](${runUrl}))` + : `**Claude Security Review:** no high-confidence findings. ([run](${runUrl}))`; } else if (conclusion === 'failure') { body = `**Claude Security Review:** the review run failed before completing. See the [run](${runUrl}) for details.`; } else { - // cancelled / skipped — analysis didn't run, do NOT claim "no findings" - body = `**Claude Security Review:** the review run was ${conclusion} before the analysis could complete (likely superseded or interrupted). See the [run](${runUrl}); a later run on this PR will replace this status.`; + body = `**Claude Security Review:** the review run was ${conclusion} before analysis could complete. See the [run](${runUrl}); a later run on this PR will replace this status.`; } await github.rest.issues.createComment({ @@ -344,7 +482,7 @@ jobs: body, }); - - name: Remove claude-security-reviewing label + - name: Remove reviewing label if: always() uses: actions/github-script@v9 env: diff --git a/.github/workflows/pr-size.yml b/.github/workflows/pr-size.yml index 792f0d728..2f3e588e3 100644 --- a/.github/workflows/pr-size.yml +++ b/.github/workflows/pr-size.yml @@ -4,7 +4,7 @@ name: PR Size Check and Label # Safe because this workflow only reads PR metadata — it never checks out untrusted code. on: pull_request_target: - branches: [main] + branches: [main, feat/**] jobs: label-size: diff --git a/.github/workflows/pr-tarball.yml b/.github/workflows/pr-tarball.yml index 65725c087..5da53aa4a 100644 --- a/.github/workflows/pr-tarball.yml +++ b/.github/workflows/pr-tarball.yml @@ -1,7 +1,7 @@ name: PR Tarball on: pull_request_target: - branches: [main] + branches: [main, feat/**] permissions: contents: write @@ -52,7 +52,7 @@ jobs: echo "name=$TARBALL_NAME" >> $GITHUB_OUTPUT - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} @@ -90,6 +90,5 @@ jobs: ### How to install ```bash - gh release download pr-${{ github.event.pull_request.number }}-tarball --repo ${{ github.repository }} --pattern "*.tgz" --dir /tmp/pr-tarball - npm install -g /tmp/pr-tarball/${{ steps.tarball.outputs.name }} + npm install ${{ steps.release.outputs.url }} ``` diff --git a/.github/workflows/pr-title.yml b/.github/workflows/pr-title.yml index 694bbe851..873becbd3 100644 --- a/.github/workflows/pr-title.yml +++ b/.github/workflows/pr-title.yml @@ -2,7 +2,7 @@ name: Validate PR Title on: pull_request_target: - branches: [main] + branches: [main, feat/**] types: [opened, edited, synchronize, reopened] permissions: @@ -38,9 +38,11 @@ jobs: The subject "{subject}" found in the pull request title "{title}" must start with a lowercase letter. Example: "feat: add deploy command" - validateSingleCommit: false - # Skip validation for bot/dependency/release PRs + # Validate the commit message when a PR has a single commit, since + # GitHub suggests using it as the merge commit message on squash-merge + validateSingleCommit: true + validateSingleCommitMatchesPrTitle: true + # Skip validation for bot/dependency PRs ignoreLabels: | bot dependencies - release diff --git a/.github/workflows/prerelease-tarball.yml b/.github/workflows/prerelease-tarball.yml deleted file mode 100644 index daddcd7c8..000000000 --- a/.github/workflows/prerelease-tarball.yml +++ /dev/null @@ -1,84 +0,0 @@ -name: Prerelease Tarball - -on: - push: - branches: [main] - # Manually trigger to pull in the latest CDK constructs changes - workflow_dispatch: - -permissions: - contents: write - -concurrency: - group: prerelease-tarball - cancel-in-progress: true - -jobs: - prerelease-tarball: - runs-on: ubuntu-latest - timeout-minutes: 15 - env: - TARBALL_BASE: agentcore-cli-prerelease - steps: - - uses: actions/checkout@v6 - - uses: actions/setup-node@v6 - with: - node-version: '20.x' - cache: 'npm' - - uses: astral-sh/setup-uv@v7 - - name: Generate GitHub App Token - id: app-token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ vars.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} - owner: aws - - name: Clone CDK repo - run: | - git clone --depth 1 "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo - env: - CDK_REPO_TOKEN: ${{ steps.app-token.outputs.token }} - CDK_REPO: ${{ secrets.CDK_REPO_NAME }} - - name: Compute version suffix - id: version - run: | - CLI_SHA=$(git rev-parse --short=5 HEAD) - CDK_SHA=$(git -C /tmp/cdk-repo rev-parse --short=5 HEAD) - SUFFIX="${CLI_SHA}-${CDK_SHA}" - echo "suffix=$SUFFIX" >> $GITHUB_OUTPUT - echo "Version suffix: $SUFFIX" - - run: npm run bundle - env: - AGENTCORE_CDK_PATH: /tmp/cdk-repo - AGENTCORE_TARBALL_OUTPUT: ${{ env.TARBALL_BASE }} - AGENTCORE_TARBALL_VERSION_SUFFIX: ${{ steps.version.outputs.suffix }} - - name: Create or update prerelease - env: - GH_TOKEN: ${{ steps.app-token.outputs.token }} - VERSION_SUFFIX: ${{ steps.version.outputs.suffix }} - run: | - TAG="prerelease" - - # Delete existing release if it exists (to update the tarballs) - gh release delete "$TAG" --yes --cleanup-tag 2>/dev/null || true - - # Create a new pre-release with both tarballs - gh release create "$TAG" \ - "${TARBALL_BASE}.tgz" \ - "${TARBALL_BASE}-preview.tgz" \ - --title "Prerelease" \ - --notes "Auto-generated tarballs from the latest commit on main. - - Version: \`${VERSION_SUFFIX}\` (cli-cdk) - - **GA build** (no harness features): - \`\`\` - npm install -g https://github.com/aws/agentcore-cli/releases/download/prerelease/${TARBALL_BASE}.tgz - \`\`\` - - **Preview build** (harness features enabled): - \`\`\` - npm install -g https://github.com/aws/agentcore-cli/releases/download/prerelease/${TARBALL_BASE}-preview.tgz - \`\`\`" \ - --prerelease \ - --target "${{ github.sha }}" diff --git a/.github/workflows/release-main-and-preview.yml b/.github/workflows/release-main-and-preview.yml index 6a99f94f0..751a5167f 100644 --- a/.github/workflows/release-main-and-preview.yml +++ b/.github/workflows/release-main-and-preview.yml @@ -1,18 +1,10 @@ -name: Release Main and Preview +name: Release Both (Main + Preview) on: workflow_dispatch: inputs: - release_target: - description: 'What to release' - required: true - type: choice - options: - - both - - main-only - - preview-only main_bump_type: - description: 'Main version bump (ignored for preview-only)' + description: 'Main branch version bump' required: true type: choice options: @@ -20,13 +12,11 @@ on: - minor - major preview_bump_type: - description: 'Preview version bump (ignored for main-only)' + description: 'Preview branch version bump (prerelease with preview tag)' required: true type: choice options: - prerelease - - minor - - major main_changelog: description: 'Main changelog entry (optional)' required: false @@ -36,7 +26,7 @@ on: required: false type: string dry_run: - description: 'Dry run — create PR but skip npm publish' + description: 'Dry run — create PRs but skip npm publish' required: false type: boolean default: false @@ -45,27 +35,64 @@ permissions: contents: write pull-requests: write -env: - AGENTCORE_TELEMETRY_DISABLED: '1' - jobs: # ═══════════════════════════════════════════════════════════════════ - # Step 1 — Prepare release (bump both versions, single PR) + # Preflight — verify preview contains all of main # ═══════════════════════════════════════════════════════════════════ - prepare-release: - name: Prepare Release + preflight: + name: Preflight Checks + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Verify running from main + run: | + if [[ "${{ github.ref }}" != "refs/heads/main" ]]; then + echo "❌ This workflow must be run from the main branch." + exit 1 + fi + + - name: Verify preview contains all of main + run: | + git fetch origin preview + MAIN_SHA=$(git rev-parse HEAD) + MERGE_BASE=$(git merge-base HEAD origin/preview) + + if [[ "$MAIN_SHA" != "$MERGE_BASE" ]]; then + echo "❌ preview branch does not contain all of main." + echo "" + echo "Main HEAD: $MAIN_SHA" + echo "Merge base: $MERGE_BASE" + echo "" + echo "The sync-preview workflow should have merged automatically." + echo "If it failed due to conflicts, resolve manually:" + echo " git checkout preview && git merge main && git push origin preview" + echo "" + echo "Then re-run this workflow." + exit 1 + fi + + echo "✅ preview contains all of main" + + # ═══════════════════════════════════════════════════════════════════ + # Step 1 — Prepare main release (bump, PR) + # ═══════════════════════════════════════════════════════════════════ + prepare-main: + name: Prepare Main Release + needs: preflight runs-on: ubuntu-latest outputs: - main_version: ${{ steps.bump-main.outputs.version || steps.current-main.outputs.version }} - preview_version: ${{ steps.bump-preview.outputs.version }} - branch: ${{ steps.create-pr.outputs.branch }} - release_target: ${{ github.event.inputs.release_target }} + version: ${{ steps.bump.outputs.version }} + branch: ${{ steps.bump.outputs.branch }} steps: - name: Checkout main uses: actions/checkout@v6 with: - ref: ${{ github.ref_name }} + ref: main fetch-depth: 0 - uses: actions/setup-node@v6 @@ -82,9 +109,8 @@ jobs: - run: npm ci - - name: Bump main version - id: bump-main - if: inputs.release_target != 'preview-only' + - name: Bump version + id: bump env: BUMP_TYPE: ${{ github.event.inputs.main_bump_type }} CHANGELOG_INPUT: ${{ github.event.inputs.main_changelog }} @@ -97,42 +123,98 @@ jobs: NEW_VERSION=$(node -p "require('./package.json').version") echo "version=$NEW_VERSION" >> $GITHUB_OUTPUT + echo "branch=release/v$NEW_VERSION" >> $GITHUB_OUTPUT echo "📦 Main version: $NEW_VERSION" - - name: Output current main version (preview-only) - id: current-main - if: inputs.release_target == 'preview-only' + - name: Regenerate JSON schema + run: | + npm run build + node scripts/generate-schema.mjs + npx prettier --write schemas/ + + - name: Update snapshots + run: npm run test:update-snapshots + + - name: Generate GitHub App Token + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} + + - name: Create release branch and PR + env: + GH_TOKEN: ${{ steps.app-token.outputs.token }} + NEW_VERSION: ${{ steps.bump.outputs.version }} + run: | + BRANCH_NAME="release/v$NEW_VERSION" + git ls-remote --exit-code --heads origin $BRANCH_NAME && git push origin --delete $BRANCH_NAME || true + git show-ref --verify --quiet refs/heads/$BRANCH_NAME && git branch -D $BRANCH_NAME || true + + git checkout -b $BRANCH_NAME + git add -A + git commit -m "chore: bump version to $NEW_VERSION" + git push origin $BRANCH_NAME + + gh pr create \ + --base main \ + --head "$BRANCH_NAME" \ + --title "Release v$NEW_VERSION" \ + --body "## Release v$NEW_VERSION (main) + + Part of a coordinated main + preview release. + + ### Checklist + - [ ] Review CHANGELOG.md + - [ ] All CI checks passing + - [ ] Merge this PR before approving the publish step" + + # ═══════════════════════════════════════════════════════════════════ + # Step 2 — Prepare preview release (bump, PR) + # ═══════════════════════════════════════════════════════════════════ + prepare-preview: + name: Prepare Preview Release + needs: preflight + runs-on: ubuntu-latest + outputs: + version: ${{ steps.bump.outputs.version }} + branch: ${{ steps.bump.outputs.branch }} + + steps: + - name: Checkout preview + uses: actions/checkout@v6 + with: + ref: preview + fetch-depth: 0 + + - uses: actions/setup-node@v6 + with: + node-version: 20.x + + - name: Install uv + uses: astral-sh/setup-uv@v7 + + - name: Configure git run: | - echo "version=$(node -p "require('./package.json').version")" >> $GITHUB_OUTPUT + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + - run: npm ci - - name: Bump preview version - id: bump-preview - if: inputs.release_target != 'main-only' + - name: Bump version + id: bump env: - BUMP_TYPE: ${{ github.event.inputs.preview_bump_type }} + CHANGELOG_INPUT: ${{ github.event.inputs.preview_changelog }} run: | - CURRENT_VERSION=$(node -p "require('./preview-version.json').version") - echo "Current preview version: $CURRENT_VERSION" - - NEW_VERSION=$(node -e " - const current = require('./preview-version.json').version; - const bumpType = process.env.BUMP_TYPE; - const parts = current.match(/^(\d+)\.(\d+)\.(\d+)(?:-preview\.(\d+))?$/); - if (!parts) { console.error('Cannot parse version:', current); process.exit(1); } - let [, major, minor, patch, pre] = parts.map((v, i) => i > 0 && i < 5 ? parseInt(v || '0') : v); - if (bumpType === 'major') { major++; minor = 0; patch = 0; pre = 1; } - else if (bumpType === 'minor') { minor++; patch = 0; pre = 1; } - else { pre = (pre || 0) + 1; } - console.log(major + '.' + minor + '.' + patch + '-preview.' + pre); - ") - - node -e " - const fs = require('fs'); - const data = { version: '$NEW_VERSION' }; - fs.writeFileSync('preview-version.json', JSON.stringify(data, null, 2) + '\n'); - " + BUMP_CMD="npx tsx scripts/bump-version.ts prerelease --prerelease-tag preview" + if [ -n "$CHANGELOG_INPUT" ]; then + BUMP_CMD="$BUMP_CMD --changelog \"$CHANGELOG_INPUT\"" + fi + eval $BUMP_CMD + NEW_VERSION=$(node -p "require('./package.json').version") echo "version=$NEW_VERSION" >> $GITHUB_OUTPUT + echo "branch=release/v$NEW_VERSION" >> $GITHUB_OUTPUT echo "📦 Preview version: $NEW_VERSION" - name: Regenerate JSON schema @@ -146,144 +228,158 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} - name: Create release branch and PR - id: create-pr env: GH_TOKEN: ${{ steps.app-token.outputs.token }} - MAIN_VERSION: ${{ steps.bump-main.outputs.version || steps.current-main.outputs.version }} - PREVIEW_VERSION: ${{ steps.bump-preview.outputs.version }} - RELEASE_TARGET: ${{ github.event.inputs.release_target }} + NEW_VERSION: ${{ steps.bump.outputs.version }} run: | - # Build branch name based on what we're releasing - if [ "$RELEASE_TARGET" = "main-only" ]; then - BRANCH_NAME="release/v${MAIN_VERSION}" - TITLE="Release v$MAIN_VERSION" - COMMIT_MSG="chore: bump main to $MAIN_VERSION" - elif [ "$RELEASE_TARGET" = "preview-only" ]; then - BRANCH_NAME="release/preview-v${PREVIEW_VERSION}" - TITLE="Release preview v$PREVIEW_VERSION" - COMMIT_MSG="chore: bump preview to $PREVIEW_VERSION" - else - BRANCH_NAME="release/v${MAIN_VERSION}+preview.${PREVIEW_VERSION}" - TITLE="Release v$MAIN_VERSION + preview v$PREVIEW_VERSION" - COMMIT_MSG="chore: bump main to $MAIN_VERSION, preview to $PREVIEW_VERSION" - fi - echo "branch=$BRANCH_NAME" >> $GITHUB_OUTPUT - - git ls-remote --exit-code --heads origin "$BRANCH_NAME" && git push origin --delete "$BRANCH_NAME" || true - git show-ref --verify --quiet "refs/heads/$BRANCH_NAME" && git branch -D "$BRANCH_NAME" || true + BRANCH_NAME="release/v$NEW_VERSION" + git ls-remote --exit-code --heads origin $BRANCH_NAME && git push origin --delete $BRANCH_NAME || true + git show-ref --verify --quiet refs/heads/$BRANCH_NAME && git branch -D $BRANCH_NAME || true - git checkout -b "$BRANCH_NAME" + git checkout -b $BRANCH_NAME git add -A - git commit -m "$COMMIT_MSG" - git push origin "$BRANCH_NAME" + git commit -m "chore: bump version to $NEW_VERSION" + git push origin $BRANCH_NAME - # Build PR body - BODY="## $TITLE + gh pr create \ + --base preview \ + --head "$BRANCH_NAME" \ + --title "Release v$NEW_VERSION (preview)" \ + --body "## Release v$NEW_VERSION (preview) - | Package | Version | npm Tag | - |---------|---------|---------|" - if [ "$RELEASE_TARGET" != "preview-only" ]; then - BODY="$BODY - | @aws/agentcore | $MAIN_VERSION | latest |" - fi - if [ "$RELEASE_TARGET" != "main-only" ]; then - BODY="$BODY - | @aws/agentcore | $PREVIEW_VERSION | preview |" - fi - BODY="$BODY + Part of a coordinated main + preview release. ### Checklist - [ ] Review CHANGELOG.md - [ ] All CI checks passing - [ ] Merge this PR before approving the publish step" - gh pr create \ - --base "${{ github.ref_name }}" \ - --head "$BRANCH_NAME" \ - --label release \ - --title "$TITLE" \ - --body "$BODY" + # ═══════════════════════════════════════════════════════════════════ + # Step 3 — Build and test both + # ═══════════════════════════════════════════════════════════════════ + test-main: + name: Test Main + needs: prepare-main + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + ref: release/v${{ needs.prepare-main.outputs.version }} + - uses: actions/setup-node@v6 + with: + node-version: 20.x + - name: Configure git + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - run: npm ci + - run: npm run lint + - run: npm run typecheck + - run: npm run build + - run: npm run test:unit + + test-preview: + name: Test Preview + needs: prepare-preview + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + with: + ref: release/v${{ needs.prepare-preview.outputs.version }} + - uses: actions/setup-node@v6 + with: + node-version: 20.x + - name: Configure git + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + - run: curl -LsSf https://astral.sh/uv/install.sh | sh + - run: npm ci + - run: npm run lint + - run: npm run typecheck + - run: npm run build + - run: npm run test:unit # ═══════════════════════════════════════════════════════════════════ - # Step 2 — Manual approval gate + # Step 4 — Manual approval gate # ═══════════════════════════════════════════════════════════════════ release-approval: - name: Release Approval - needs: [prepare-release] + name: Release Approval (Both) + needs: [test-main, test-preview, prepare-main, prepare-preview] runs-on: ubuntu-latest environment: name: npm-publish-approval steps: - name: Approval checkpoint env: - MAIN_VERSION: ${{ needs.prepare-release.outputs.main_version }} - PREVIEW_VERSION: ${{ needs.prepare-release.outputs.preview_version }} + MAIN_VERSION: ${{ needs.prepare-main.outputs.version }} + PREVIEW_VERSION: ${{ needs.prepare-preview.outputs.version }} run: | + echo "✅ Both builds and tests passed" + echo "" echo "📦 Main version: $MAIN_VERSION (npm tag: latest)" echo "📦 Preview version: $PREVIEW_VERSION (npm tag: preview)" echo "" echo "⚠️ MANUAL APPROVAL REQUIRED" echo "" echo "Before approving:" - echo "1. Verify PR CI checks are passing" - echo "2. Merge the release PR to main" - echo "3. Verify the PR is merged" + echo "1. Merge the main release PR (release/v$MAIN_VERSION → main)" + echo "2. Merge the preview release PR (release/v$PREVIEW_VERSION → preview)" + echo "3. Verify both PRs are merged" # ═══════════════════════════════════════════════════════════════════ - # Step 3 — Verify PR merged + # Step 5 — Verify both PRs merged before any publish # ═══════════════════════════════════════════════════════════════════ - verify-merge: - name: Verify PR Merged - needs: [prepare-release, release-approval] + verify-merges: + name: Verify Both PRs Merged + needs: [prepare-main, prepare-preview, release-approval] if: ${{ !inputs.dry_run }} runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: - ref: ${{ github.ref_name }} fetch-depth: 0 - name: Verify main version - if: needs.prepare-release.outputs.release_target != 'preview-only' env: - EXPECTED: ${{ needs.prepare-release.outputs.main_version }} + EXPECTED: ${{ needs.prepare-main.outputs.version }} run: | - git fetch origin ${{ github.ref_name }} - ACTUAL=$(git show origin/${{ github.ref_name }}:package.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") + git fetch origin main + ACTUAL=$(git show origin/main:package.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") if [ "$ACTUAL" != "$EXPECTED" ]; then - echo "❌ Release PR not merged yet!" - echo "Expected main version: $EXPECTED, Got: $ACTUAL" + echo "❌ Main release PR not merged yet!" + echo "Expected: $EXPECTED, Got: $ACTUAL" exit 1 fi echo "✅ Main version verified: $ACTUAL" - name: Verify preview version - if: needs.prepare-release.outputs.release_target != 'main-only' env: - EXPECTED: ${{ needs.prepare-release.outputs.preview_version }} + EXPECTED: ${{ needs.prepare-preview.outputs.version }} run: | - ACTUAL=$(git show origin/${{ github.ref_name }}:preview-version.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") + git fetch origin preview + ACTUAL=$(git show origin/preview:package.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") if [ "$ACTUAL" != "$EXPECTED" ]; then - echo "❌ Release PR not merged yet!" - echo "Expected preview version: $EXPECTED, Got: $ACTUAL" + echo "❌ Preview release PR not merged yet!" + echo "Expected: $EXPECTED, Got: $ACTUAL" exit 1 fi echo "✅ Preview version verified: $ACTUAL" # ═══════════════════════════════════════════════════════════════════ - # Step 4a — Publish main to npm (tag: latest) + # Step 6a — Publish main to npm (tag: latest) # ═══════════════════════════════════════════════════════════════════ publish-main: name: Publish Main (@latest) - needs: [prepare-release, verify-merge] - if: inputs.release_target != 'preview-only' + needs: [prepare-main, verify-merges] runs-on: ubuntu-latest environment: name: npm-publish @@ -296,7 +392,7 @@ jobs: - name: Checkout main uses: actions/checkout@v6 with: - ref: ${{ github.ref_name }} + ref: main fetch-depth: 0 - uses: actions/setup-node@v6 @@ -313,7 +409,7 @@ jobs: - name: Tag and release env: - VERSION: ${{ needs.prepare-release.outputs.main_version }} + VERSION: ${{ needs.prepare-main.outputs.version }} run: | git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" @@ -323,24 +419,23 @@ jobs: - name: Create GitHub Release uses: softprops/action-gh-release@v3 with: - tag_name: v${{ needs.prepare-release.outputs.main_version }} - name: AgentCore CLI v${{ needs.prepare-release.outputs.main_version }} + tag_name: v${{ needs.prepare-main.outputs.version }} + name: AgentCore CLI v${{ needs.prepare-main.outputs.version }} generate_release_notes: true prerelease: false body: | ## Installation ```bash - npm install -g @aws/agentcore@${{ needs.prepare-release.outputs.main_version }} + npm install -g @aws/agentcore@${{ needs.prepare-main.outputs.version }} ``` # ═══════════════════════════════════════════════════════════════════ - # Step 4b — Publish preview to npm (tag: preview) + # Step 6b — Publish preview to npm (tag: preview) # ═══════════════════════════════════════════════════════════════════ publish-preview: name: Publish Preview (@preview) - needs: [prepare-release, verify-merge] - if: inputs.release_target != 'main-only' + needs: [prepare-preview, verify-merges] runs-on: ubuntu-latest environment: name: npm-publish @@ -350,10 +445,10 @@ jobs: contents: write steps: - - name: Checkout main + - name: Checkout preview uses: actions/checkout@v6 with: - ref: ${{ github.ref_name }} + ref: preview fetch-depth: 0 - uses: actions/setup-node@v6 @@ -363,30 +458,14 @@ jobs: - run: npm install -g npm@11.5.1 - run: npm ci - - - name: Set preview version in package.json - env: - VERSION: ${{ needs.prepare-release.outputs.preview_version }} - run: | - node -e " - const fs = require('fs'); - const pkg = JSON.parse(fs.readFileSync('package.json', 'utf8')); - pkg.version = process.env.VERSION; - fs.writeFileSync('package.json', JSON.stringify(pkg, null, 2) + '\n'); - " - echo "Set package.json version to $VERSION for preview publish" - - - name: Build package - env: - BUILD_PREVIEW: '1' - run: npm run build + - run: npm run build - name: Publish to npm run: npm publish --access public --provenance --tag preview - name: Tag and release env: - VERSION: ${{ needs.prepare-release.outputs.preview_version }} + VERSION: ${{ needs.prepare-preview.outputs.version }} run: | git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" @@ -396,8 +475,8 @@ jobs: - name: Create GitHub Release uses: softprops/action-gh-release@v3 with: - tag_name: v${{ needs.prepare-release.outputs.preview_version }} - name: AgentCore CLI v${{ needs.prepare-release.outputs.preview_version }} (Preview) + tag_name: v${{ needs.prepare-preview.outputs.version }} + name: AgentCore CLI v${{ needs.prepare-preview.outputs.version }} (Preview) generate_release_notes: true prerelease: true body: | @@ -412,14 +491,14 @@ jobs: # ═══════════════════════════════════════════════════════════════════ summary: name: Release Summary - needs: [prepare-release, publish-main, publish-preview] - if: always() && !cancelled() + needs: [prepare-main, prepare-preview, publish-main, publish-preview] + if: always() runs-on: ubuntu-latest steps: - name: Summary env: - MAIN_VERSION: ${{ needs.prepare-release.outputs.main_version }} - PREVIEW_VERSION: ${{ needs.prepare-release.outputs.preview_version }} + MAIN_VERSION: ${{ needs.prepare-main.outputs.version }} + PREVIEW_VERSION: ${{ needs.prepare-preview.outputs.version }} MAIN_STATUS: ${{ needs.publish-main.result }} PREVIEW_STATUS: ${{ needs.publish-preview.result }} run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9984ad891..a7bfb6416 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,6 +8,8 @@ on: required: true type: choice options: + - preview + - preview-major - patch - minor - major @@ -33,28 +35,13 @@ jobs: outputs: version: ${{ steps.bump.outputs.version }} branch: ${{ steps.bump.outputs.branch }} - dist_tag: ${{ steps.release-meta.outputs.dist_tag }} - base_branch: ${{ steps.release-meta.outputs.base_branch }} steps: - - name: Determine release metadata - id: release-meta + - name: Validate running from main run: | - BRANCH_NAME="${{ github.ref_name }}" - VERSION_BUMP="${{ github.event.inputs.bump_type }}" - - if [[ "$BRANCH_NAME" == "main" ]]; then - echo "dist_tag=latest" >> $GITHUB_OUTPUT - echo "base_branch=main" >> $GITHUB_OUTPUT - else - if [[ "$VERSION_BUMP" != "prerelease" ]]; then - echo "❌ ERROR: Only the prerelease bump type is allowed from non-main branches." - echo "Current branch: $BRANCH_NAME, bump type: $VERSION_BUMP" - exit 1 - fi - echo "dist_tag=preview" >> $GITHUB_OUTPUT - echo "base_branch=$BRANCH_NAME" >> $GITHUB_OUTPUT - echo "ℹ️ Publishing preview release from branch: $BRANCH_NAME" + if [[ "${{ github.ref }}" != "refs/heads/main" ]]; then + echo "⚠️ WARNING: Running from ${{ github.ref }}" + echo "⚠️ Production releases should only run from main branch" fi - name: Checkout code @@ -162,7 +149,7 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} @@ -171,40 +158,46 @@ jobs: env: GH_TOKEN: ${{ steps.app-token.outputs.token }} NEW_VERSION: ${{ steps.bump.outputs.version }} - BASE_BRANCH: ${{ steps.release-meta.outputs.base_branch }} - DIST_TAG: ${{ steps.release-meta.outputs.dist_tag }} + GITHUB_REF: ${{ github.ref }} GITHUB_ACTOR: ${{ github.actor }} run: | BRANCH_NAME="release/v$NEW_VERSION" - RELEASE_TYPE="Production" - if [ "$DIST_TAG" != "latest" ]; then - RELEASE_TYPE="Preview (npm tag: $DIST_TAG)" + WARNING_TEXT="" + if [ "$GITHUB_REF" != "refs/heads/main" ]; then + WARNING_TEXT="**WARNING**: Not running from main branch!" + else + WARNING_TEXT="✅ Running from main branch" fi gh pr create \ - --base "$BASE_BRANCH" \ + --base main \ --head "$BRANCH_NAME" \ - --label release \ --title "Release v$NEW_VERSION" \ - --body "## Release v$NEW_VERSION + --body "## 🚀 Release v$NEW_VERSION This PR was automatically created by the release workflow. - **Release type:** $RELEASE_TYPE - **Base branch:** $BASE_BRANCH - - ### Pre-merge Checklist + ### ⚠️ Pre-merge Checklist - [ ] Review CHANGELOG.md - ensure it has meaningful release notes - [ ] Verify version numbers are correct in all files - [ ] All CI checks are passing - ### Release Process + ### 📝 How to improve changelog + If the auto-generated changelog isn't good enough: + 1. Edit CHANGELOG.md in this PR + 2. Commit the changes + 3. Then approve and merge + + ### 🔄 Release Process After merging this PR: 1. Package will be built and tested 2. **Manual approval required** before publishing to npm 3. GitHub release and tag created after publication + ### 🚨 Running from: $GITHUB_REF + $WARNING_TEXT + --- *Triggered by @$GITHUB_ACTOR*" @@ -299,6 +292,7 @@ jobs: name: Publish to npm needs: [prepare-release, release-approval] runs-on: ubuntu-latest + if: github.ref == 'refs/heads/main' environment: name: npm-publish url: https://www.npmjs.com/package/@aws/agentcore @@ -307,15 +301,14 @@ jobs: contents: write # Required to push git tags steps: - - name: Checkout base branch (AFTER PR merge) + - name: Checkout latest main (AFTER PR merge) uses: actions/checkout@v6 with: - ref: ${{ needs.prepare-release.outputs.base_branch }} + ref: main fetch-depth: 0 - name: Verify we have the merged code run: | - echo "Branch: ${{ needs.prepare-release.outputs.base_branch }}" echo "Current version in package.json:" cat package.json | grep '"version"' echo "" @@ -350,9 +343,8 @@ jobs: env: VERSION: ${{ steps.version.outputs.version }} EXPECTED_VERSION: ${{ needs.prepare-release.outputs.version }} - BASE_BRANCH: ${{ needs.prepare-release.outputs.base_branch }} run: | - echo "Version in $BASE_BRANCH: $VERSION" + echo "Version in main branch: $VERSION" echo "Expected version from PR: $EXPECTED_VERSION" if [ "$VERSION" != "$EXPECTED_VERSION" ]; then @@ -360,15 +352,15 @@ jobs: echo "❌ ERROR: Version mismatch!" echo "" echo "The release PR has NOT been merged yet." - echo "$BASE_BRANCH has: $VERSION" + echo "Main branch has: $VERSION" echo "Release PR has: $EXPECTED_VERSION" echo "" - echo "Please MERGE the release PR first, then approve this deployment." + echo "👉 Please MERGE the release PR first, then approve this deployment." echo "" exit 1 fi - echo "Version matches - PR was merged correctly" + echo "✅ Version matches - PR was merged correctly" - name: Install dependencies run: npm ci @@ -377,13 +369,10 @@ jobs: run: npm run build - name: Publish to npm (using OIDC trusted publishing) - env: - DIST_TAG: ${{ needs.prepare-release.outputs.dist_tag }} run: | echo "Publishing with OIDC trusted publishing..." echo "No NPM_TOKEN needed - using GitHub OIDC" - echo "Dist tag: $DIST_TAG" - npm publish --access public --provenance --tag "$DIST_TAG" + npm publish --access public --provenance --tag latest - name: Create and push tag env: diff --git a/.github/workflows/slack-issue-notification.yml b/.github/workflows/slack-issue-notification.yml new file mode 100644 index 000000000..eca83b08b --- /dev/null +++ b/.github/workflows/slack-issue-notification.yml @@ -0,0 +1,53 @@ +name: Slack Issue Notification + +on: + issues: + types: [opened] + +permissions: {} + +jobs: + notify-slack: + runs-on: ubuntu-latest + steps: + - name: Send issue details to Slack + # Attacker-controlled fields are passed through env: rather than + # interpolated into the YAML payload, to prevent workflow injection. + # Schema is uniform across event types: every workflow sends the + # same 20 keys so Slack-side branching on event_type is reliable. + # For issue_opened, the issue_* fields carry the data and the + # pr_*/comment_* fields are empty. + env: + REPOSITORY: ${{ github.repository }} + CREATED_AT: ${{ github.event.issue.created_at }} + ISSUE_NUMBER: ${{ github.event.issue.number }} + ISSUE_TITLE: ${{ github.event.issue.title }} + ISSUE_URL: ${{ github.event.issue.html_url }} + ISSUE_AUTHOR: ${{ github.event.issue.user.login }} + ISSUE_BODY: ${{ github.event.issue.body }} + LABELS: ${{ join(github.event.issue.labels.*.name, ', ') }} + uses: slackapi/slack-github-action@v3.0.3 + with: + webhook: ${{ secrets.SLACK_WEBHOOK_URL }} + webhook-type: webhook-trigger + payload: | + event_type: "issue_opened" + repository: "${{ env.REPOSITORY }}" + created_at: "${{ env.CREATED_AT }}" + issue_number: "${{ env.ISSUE_NUMBER }}" + issue_title: ${{ toJSON(env.ISSUE_TITLE) }} + issue_url: "${{ env.ISSUE_URL }}" + issue_author: "${{ env.ISSUE_AUTHOR }}" + issue_body: ${{ toJSON(env.ISSUE_BODY) }} + labels: ${{ toJSON(env.LABELS) }} + pr_number: "" + pr_title: "" + pr_url: "" + pr_author: "" + pr_state: "" + pr_closed_at: "" + pr_merged_at: "" + comment_id: "" + comment_url: "" + comment_author: "" + comment_body: "" diff --git a/.github/workflows/slack-open-prs-notification.yml b/.github/workflows/slack-open-prs-notification.yml index 68dd1df49..be684d4b3 100644 --- a/.github/workflows/slack-open-prs-notification.yml +++ b/.github/workflows/slack-open-prs-notification.yml @@ -40,7 +40,7 @@ jobs: ); - name: Send open PRs summary to Slack - uses: slackapi/slack-github-action@v2.1.1 + uses: slackapi/slack-github-action@v3.0.3 with: webhook: ${{ secrets.SLACK_OPEN_PRS_WEBHOOK_URL }} webhook-type: webhook-trigger diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml index dcf768206..73c848d65 100644 --- a/.github/workflows/strands-command.yml +++ b/.github/workflows/strands-command.yml @@ -96,7 +96,7 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v1 + uses: actions/create-github-app-token@v3 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} diff --git a/.github/workflows/sync-from-public.yml b/.github/workflows/sync-from-public.yml deleted file mode 100644 index a2ec14c0d..000000000 --- a/.github/workflows/sync-from-public.yml +++ /dev/null @@ -1,110 +0,0 @@ -name: Sync from Public Repo - -on: - schedule: - - cron: '0 */6 * * *' # Every 6 hours - workflow_dispatch: # Manual trigger via Actions tab - -permissions: - contents: write - pull-requests: write - -jobs: - sync: - runs-on: ubuntu-latest - steps: - - name: Generate GitHub App Token - id: app-token - uses: actions/create-github-app-token@v1 - with: - app-id: ${{ vars.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} - - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - token: ${{ steps.app-token.outputs.token }} - - - name: Configure Git - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - - - name: Fetch public main - run: | - git remote add public https://github.com/aws/agentcore-cli.git - git fetch public main - - - name: Sync main with public/main - run: | - git checkout -B main origin/main - - # Check if public/main is already merged - if git merge-base --is-ancestor public/main HEAD; then - echo "✅ main is already up to date with public/main" - exit 0 - fi - - # Merge but exclude .github/workflows/ (GITHUB_TOKEN lacks workflow permission) - if git merge public/main --no-commit --no-ff; then - git checkout HEAD -- .github/workflows/ 2>/dev/null || true - git commit -m "chore: sync main with public/main" - git push origin main - echo "✅ main synced successfully" - else - echo "⚠️ Conflict detected in main" - - # Capture conflicted files before aborting - conflicted_files=$(git diff --name-only --diff-filter=U 2>/dev/null || echo "Unable to determine conflicted files") - git merge --abort - - # Check if a sync PR already exists - existing_pr=$(gh pr list --base "main" --search "Merge public/main" --state open --json number --jq '.[0].number' 2>/dev/null || echo "") - - if [ -n "$existing_pr" ]; then - echo "ℹ️ PR #$existing_pr already exists, skipping" - exit 0 - fi - - conflict_branch="sync-conflict-main-$(date +%Y%m%d-%H%M%S)" - git checkout -b "$conflict_branch" - - git merge public/main --no-commit --no-ff || true - git checkout HEAD -- .github/workflows/ 2>/dev/null || true - git add -A - git commit -m "chore: sync main with public/main (conflicts present) - - This automated sync detected merge conflicts that require manual resolution. - - Source: public/main (https://github.com/aws/agentcore-cli) - Target: main - - Please resolve conflicts and merge this PR." || true - - git push origin "$conflict_branch" - - gh pr create \ - --title "🔀 [Sync Conflict] Merge public/main → main" \ - --body "## Automated Sync Conflict - - This PR was automatically created because merging \`public/main\` into \`main\` encountered conflicts. - - **Source:** \`main\` from [aws/agentcore-cli](https://github.com/aws/agentcore-cli) - **Target:** \`main\` - - ### Action Required - 1. \`git fetch origin && git checkout $conflict_branch\` - 2. Resolve merge conflicts - 3. \`git add . && git commit\` - 4. \`git push origin $conflict_branch\` - 5. Merge this PR - - ### Files with Conflicts - \`\`\` - $conflicted_files - \`\`\`" \ - --base "main" \ - --head "$conflict_branch" || echo "⚠️ Failed to create PR" - fi - env: - GH_TOKEN: ${{ steps.app-token.outputs.token }} diff --git a/.github/workflows/sync-preview.yml b/.github/workflows/sync-preview.yml new file mode 100644 index 000000000..14f0add13 --- /dev/null +++ b/.github/workflows/sync-preview.yml @@ -0,0 +1,191 @@ +name: Sync Preview with Main + +on: + push: + branches: [main] + +concurrency: + group: sync-preview + cancel-in-progress: false + +permissions: + contents: write + pull-requests: write + +jobs: + sync: + name: Merge main into preview + runs-on: ubuntu-latest + steps: + - name: Generate GitHub App Token + id: app-token + uses: actions/create-github-app-token@v3 + with: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} + + - name: Checkout preview + uses: actions/checkout@v6 + with: + ref: preview + fetch-depth: 0 + token: ${{ steps.app-token.outputs.token }} + + - name: Configure git + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + - name: Check if sync needed + id: check + run: | + git fetch origin main + MAIN_SHA=$(git rev-parse origin/main) + MERGE_BASE=$(git merge-base HEAD origin/main) + + if [[ "$MAIN_SHA" == "$MERGE_BASE" ]]; then + echo "✅ preview already contains all of main" + echo "needed=false" >> $GITHUB_OUTPUT + else + echo "needed=true" >> $GITHUB_OUTPUT + fi + + - name: Skip if already synced + if: steps.check.outputs.needed == 'false' + run: echo "Nothing to sync." + + - name: Merge main into preview + if: steps.check.outputs.needed == 'true' + id: merge + run: | + # Save preview's version before merge so we can restore it after + PREVIEW_VERSION=$(node -p "require('./package.json').version") + echo "preview_version=$PREVIEW_VERSION" >> $GITHUB_OUTPUT + + if git merge origin/main --no-edit -m "chore: merge main into preview"; then + echo "status=clean" >> $GITHUB_OUTPUT + else + # preview carries a higher version string than main (e.g. 1.0.0-preview.X vs 0.13.X). + # This means package.json/package-lock.json almost always conflict on the version field. + # Accept main's content here; the version is restored in the next step. + for f in package.json package-lock.json; do + if git diff --name-only --diff-filter=U | grep -qx "$f"; then + git checkout --theirs "$f" + git add "$f" + echo " ↳ resolved $f conflict (accepted main, will restore version)" + fi + done + + # Check if all conflicts are now resolved + if [[ -z "$(git diff --name-only --diff-filter=U)" ]]; then + git commit --no-edit -m "chore: merge main into preview" + echo "status=clean" >> $GITHUB_OUTPUT + else + echo "status=conflict" >> $GITHUB_OUTPUT + fi + fi + + - name: Restore preview-owned files + if: steps.merge.outputs.status == 'clean' + run: | + # These files are auto-generated during preview releases and must not + # be overwritten by main's versions (schema-check CI will reject changes + # to schemas/, and CHANGELOG.md tracks preview releases separately). + PREVIEW_HEAD=$(git rev-parse HEAD^1) + for f in schemas/agentcore.schema.v1.json CHANGELOG.md; do + if git show "$PREVIEW_HEAD:$f" > /dev/null 2>&1; then + git show "$PREVIEW_HEAD:$f" > "$f" + git add "$f" + echo " ↳ restored preview's $f" + fi + done + if ! git diff --cached --quiet; then + git commit -m "chore: restore preview-owned files (schema, changelog)" + fi + + - name: Restore preview version and push + if: steps.merge.outputs.status == 'clean' + run: | + PREVIEW_VERSION="${{ steps.merge.outputs.preview_version }}" + CURRENT_VERSION=$(node -p "require('./package.json').version") + + if [[ "$CURRENT_VERSION" != "$PREVIEW_VERSION" ]]; then + PREVIEW_VERSION="$PREVIEW_VERSION" node -e " + const fs = require('fs'); + const pkg = JSON.parse(fs.readFileSync('package.json', 'utf8')); + pkg.version = process.env.PREVIEW_VERSION; + fs.writeFileSync('package.json', JSON.stringify(pkg, null, 2) + '\n'); + " + if [[ -f package-lock.json ]]; then + PREVIEW_VERSION="$PREVIEW_VERSION" node -e " + const fs = require('fs'); + const lock = JSON.parse(fs.readFileSync('package-lock.json', 'utf8')); + lock.version = process.env.PREVIEW_VERSION; + if (lock.packages && lock.packages['']) { + lock.packages[''].version = process.env.PREVIEW_VERSION; + } + fs.writeFileSync('package-lock.json', JSON.stringify(lock, null, 2) + '\n'); + " + fi + git add package.json + [[ -f package-lock.json ]] && git add package-lock.json + git commit -m "chore: restore preview version ($PREVIEW_VERSION)" + fi + + git push origin HEAD:preview + echo "✅ main merged into preview and pushed" + + - name: Create PR for conflict resolution + if: steps.merge.outputs.status == 'conflict' + env: + GH_TOKEN: ${{ steps.app-token.outputs.token }} + run: | + # Check if there's already an open sync PR (match by branch prefix, not title search) + COUNT=$(gh pr list --base preview --state open --json headRefName \ + --jq '[.[] | select(.headRefName | startswith("sync-preview/"))] | length') + if [[ "$COUNT" != "0" ]]; then + echo "ℹ️ Sync PR already open — skipping duplicate." + exit 0 + fi + + # Abort the failed merge and redo on a branch for the PR + git merge --abort + + BRANCH="sync-preview/merge-main-$(date +%Y%m%d-%H%M%S)" + git checkout -b "$BRANCH" + git merge origin/main --no-edit -m "chore: merge main into preview (conflicts need resolution)" || true + git add -A + git commit --no-edit -m "chore: merge main into preview (conflicts need resolution)" || true + git push origin "$BRANCH" + + GH_USER=$(gh api "/repos/${{ github.repository }}/commits/$(git rev-parse origin/main)" --jq '.author.login // empty' 2>/dev/null || echo "") + MENTION="" + if [[ -n "$GH_USER" ]]; then + MENTION="cc @${GH_USER}" + fi + + gh pr create \ + --base preview \ + --head "$BRANCH" \ + --title "sync-preview: merge main into preview (conflicts)" \ + --body "$(cat < + \`\`\` + 2. Search for conflict markers and resolve them: + \`\`\`bash + grep -rn '<<<<<<< HEAD' . + \`\`\` + 3. Keep preview-specific values (package version, preview tests, etc.) — accept main's changes for everything else. + 4. Commit and push, then merge this PR. + + ${MENTION} + + _Opened automatically by the sync-preview workflow._ + BODY + )" diff --git a/README.md b/README.md index 0dfb08eea..32b3ab3dc 100644 --- a/README.md +++ b/README.md @@ -109,20 +109,20 @@ agentcore invoke ### Evaluations -| Command | Description | -| ----------------------- | ------------------------------------------------ | -| `add evaluator` | Add a custom LLM-as-a-Judge evaluator | -| `add online-eval` | Add continuous evaluation for live traffic | -| `run eval` | Run on-demand evaluation against agent traces | -| `run batch-evaluation` | Run evaluators across all sessions [preview] | -| `run recommendation` | Optimize prompts and tool descriptions [preview] | -| `evals history` | View past eval run results | -| `pause online-eval` | Pause a deployed online eval config | -| `resume online-eval` | Resume a paused online eval config | -| `stop batch-evaluation` | Stop a running batch evaluation [preview] | -| `logs evals` | Stream or search online eval logs | - -### Config Bundles [preview] +| Command | Description | +| ----------------------- | --------------------------------------------- | +| `add evaluator` | Add a custom LLM-as-a-Judge evaluator | +| `add online-eval` | Add continuous evaluation for live traffic | +| `run eval` | Run on-demand evaluation against agent traces | +| `run batch-evaluation` | Run evaluators across all sessions | +| `run recommendation` | Optimize prompts and tool descriptions | +| `evals history` | View past eval run results | +| `pause online-eval` | Pause a deployed online eval config | +| `resume online-eval` | Resume a paused online eval config | +| `stop batch-evaluation` | Stop a running batch evaluation | +| `logs evals` | Stream or search online eval logs | + +### Config Bundles | Command | Description | | ------------------- | ----------------------------------------- | @@ -133,6 +133,18 @@ agentcore invoke > Create agents with `--with-config-bundle` to auto-wire config bundle support into the generated template. +### A/B Tests + +| Command | Description | +| ----------------- | -------------------------------------------------------------- | +| `run ab-test` | Start an A/B test (config-bundle or target-based) on a gateway | +| `view ab-test` | List A/B test jobs or view one in detail | +| `pause ab-test` | Pause traffic splitting for a running test | +| `resume ab-test` | Resume a paused test | +| `stop ab-test` | Stop a running test (terminal) | +| `promote ab-test` | Apply the winning variant to `agentcore.json` | +| `archive ab-test` | Delete the test on the service and clear local history | + ### Utilities | Command | Description | @@ -185,11 +197,13 @@ Projects use JSON schema files in the `agentcore/` directory: - [CLI Commands Reference](docs/commands.md) - Full command reference for scripting and CI/CD - [Configuration](docs/configuration.md) - Schema reference for config files - [Evaluations](docs/evals.md) - Evaluators, on-demand evals, and online monitoring -- [Batch Evaluation](docs/batch-evaluation.md) - Run evaluators across sessions at scale [preview] -- [Recommendations](docs/recommendations.md) - Optimize prompts and tool descriptions [preview] -- [Config Bundles](docs/config-bundles.md) - Versioned runtime configurations [preview] +- [Batch Evaluation](docs/batch-evaluation.md) - Run evaluators across sessions at scale +- [Recommendations](docs/recommendations.md) - Optimize prompts and tool descriptions +- [A/B Tests](docs/ab-tests.md) - Split traffic between variants and promote the winner +- [Config Bundles](docs/config-bundles.md) - Versioned runtime configurations - [Frameworks](docs/frameworks.md) - Supported frameworks and model providers - [Gateway](docs/gateway.md) - Gateway setup, targets, and authentication +- [Knowledge Bases](docs/knowledge-bases.md) - Managed Bedrock Knowledge Bases wired to gateways - [Memory](docs/memory.md) - Memory strategies and sharing - [Local Development](docs/local-development.md) - Dev server and debugging - [Feedback](docs/feedback.md) - Submit feedback from your terminal diff --git a/docs/ab-tests.md b/docs/ab-tests.md new file mode 100644 index 000000000..b8e8d51de --- /dev/null +++ b/docs/ab-tests.md @@ -0,0 +1,154 @@ +# A/B Tests + +A/B tests split live traffic through a gateway between a **control** variant and a **treatment** variant, then use +online evaluation configs to measure which performs better. When you have a winner, `promote` applies it to your project +config. + +A/B tests are **fire-and-forget jobs** (like `run recommendation` and `run batch-evaluation`): you start one with +`run ab-test`, then manage its lifecycle with `view` / `pause` / `resume` / `stop` / `promote` / `archive`. They are +**not** declared in `agentcore.json` and are not created by `deploy` — the gateway, its targets, and any config bundles +must already be deployed first. + +## Two modes + +| Mode | Compares | Variant inputs | +| ------------------------- | ------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | +| `config-bundle` (default) | Two **versions of the same config bundle** | `--control-bundle`/`--control-version`, `--treatment-bundle`/`--treatment-version`, shared `--online-eval` | +| `target-based` | Two **gateway targets** (runtime endpoints) | `--control-target`/`--treatment-target`, `--control-online-eval`/`--treatment-online-eval` | + +Each A/B test needs its **own gateway**, and only one test can be RUNNING per gateway at a time. + +## Quick Start + +```bash +# Config-bundle mode: compare two versions of one bundle (50/50 split) +agentcore run ab-test \ + -n PromptTest \ + -g MyGateway \ + --mode config-bundle \ + -r MyAgent \ + --control-bundle MyBundle --control-version \ + --treatment-bundle MyBundle --treatment-version \ + --online-eval MyEvalConfig + +# Target-based mode: compare two gateway targets +agentcore run ab-test \ + -n TargetTest \ + -g MyGateway \ + --mode target-based \ + -r MyAgent \ + --control-target prodTarget \ + --treatment-target stagingTarget \ + --control-online-eval ctrlEval \ + --treatment-online-eval treatEval +``` + +A test is enabled (RUNNING) on create by default. Pass `--disable-on-create` to create it stopped. + +## `run ab-test` options + +| Flag | Description | +| -------------------------------- | ------------------------------------------------------------------------------------------- | +| `-n, --name ` | Name for the A/B test | +| `-g, --gateway ` | Gateway name (must already be deployed) | +| `-m, --mode ` | `config-bundle` (default) or `target-based` | +| `-r, --runtime ` | Runtime name (recorded as the agent) | +| `--control-weight ` | Control traffic weight 0–100 (default 50) | +| `--treatment-weight ` | Treatment traffic weight 0–100 (default 50) | +| `--max-duration-days ` | Auto-stop the test after this many days | +| `--role-arn ` | Execution role ARN (auto-created if omitted) | +| `--disable-on-create` | Create the test without starting it (default: enabled) | +| `--gateway-filter ` | Restrict the test to a single gateway target path (e.g. `/orders/*`); applies to both modes | +| `--region ` | AWS region (auto-detected if omitted) | +| `--wait` | Block until the test reaches a terminal state | +| `--json` | JSON output | +| **config-bundle mode** | | +| `--control-bundle ` | Control bundle name or ARN | +| `--control-version ` | Control bundle version (or `LATEST`) | +| `--treatment-bundle ` | Treatment bundle name or ARN | +| `--treatment-version ` | Treatment bundle version (or `LATEST`) | +| `--online-eval ` | Shared online eval config name or ARN | +| `--traffic-header ` | Route traffic on this header instead of by weight | +| **target-based mode** | | +| `--control-target ` | Control gateway-target name | +| `--treatment-target ` | Treatment gateway-target name | +| `--control-online-eval ` | Online eval for the control endpoint (required) | +| `--treatment-online-eval ` | Online eval for the treatment endpoint (required) | + +Names must start with a letter and contain only letters, digits, underscores, and hyphens (max 48 characters). + +## Lifecycle + +All lifecycle commands take the test's **job ID** via `-i, --id` (get it from `run ab-test --json` or `view ab-test`): + +```bash +# List all A/B test jobs, or view one in detail +agentcore view ab-test +agentcore view ab-test --json + +# Pause / resume traffic splitting +agentcore pause ab-test -i +agentcore resume ab-test -i + +# Stop the test (terminal) +agentcore stop ab-test -i + +# Apply the winning variant to agentcore.json, then deploy to roll it out +agentcore promote ab-test -i +agentcore deploy + +# Remove the job from local history (and the test from the service) +agentcore archive ab-test -i +``` + +### Promote + +`promote` writes the winning (treatment) variant into `agentcore.json`: + +- **config-bundle mode** — control and treatment must be two **versions of the same bundle**; promote adopts the + treatment version's components into that bundle. Promoting across two _different_ bundles is rejected. +- **target-based mode** — control adopts the treatment endpoint: when both are named endpoints of the same runtime, + control's endpoint version is bumped to the treatment's; otherwise the control target is repointed to the treatment's + runtime/endpoint. + +Promote does not deploy — review the change and run `agentcore deploy` to roll it out. + +## Invocation URL + +`view ab-test ` shows an **Invocation URL** derived from the test's gateway. Send traffic there and the gateway +splits it between the variants per the configured weights: + +``` +https://.gateway.bedrock-agentcore..amazonaws.com//invocations +``` + +(target-based uses the control target's path; config-bundle uses the agent name.) + +## Results + +`view ab-test ` shows, once the online evals have scored enough traffic, per-evaluator metrics: the control mean, +each treatment's mean with percent change, and a significance marker. `--json` includes the same under +`results.evaluatorMetrics`, plus `status`, `executionStatus`, `variants`, and `invocationUrl`. + +## Local history + +Job records are saved under `.cli/jobs/ab-tests/`. Browse them in the TUI: + +```bash +agentcore +# Navigate to: Run → A/B Tests (or View → A/B Tests) +``` + +## TUI Wizard + +Run `agentcore` → Run → A/B Test for a guided flow: + +1. Select mode (config-bundle or target-based) +2. Select the gateway +3. Pick control + treatment variants (bundle versions, or gateway targets) +4. Select online eval config(s) +5. Optionally set a gateway filter +6. Name the test and confirm + +Selecting a test from the A/B Tests list shows its detail (status, variants, invocation URL, results) with keybindings +to pause/resume/stop/promote/debug. diff --git a/docs/batch-evaluation.md b/docs/batch-evaluation.md index ea13d3707..afd4744d4 100644 --- a/docs/batch-evaluation.md +++ b/docs/batch-evaluation.md @@ -1,4 +1,4 @@ -# Batch Evaluation [preview] +# Batch Evaluation Batch evaluation runs evaluators across all agent sessions in CloudWatch, producing per-session scores and aggregate metrics. Use it to measure agent quality over time, compare before/after prompt changes, or validate ground truth @@ -13,6 +13,9 @@ agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness # Multiple evaluators agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness Builtin.Helpfulness Builtin.Faithfulness +# Reference evaluators by ARN (custom or cross-account) +agentcore run batch-evaluation -r MyAgent --evaluator-arn arn:aws:bedrock-agentcore:us-west-2:123456789012:evaluator/MyCustomEval + # JSON output for scripting agentcore run batch-evaluation -r MyAgent -e Builtin.Helpfulness --json ``` @@ -90,6 +93,27 @@ All fields inside `inline` are optional — include only what's relevant: - `expectedTrajectory` — tool call sequence evaluated by `Builtin.TrajectoryExactOrderMatch` - `turns` — input/expected-response pairs evaluated by `Builtin.Correctness` +## Dataset-Driven Evaluation + +Instead of scoring historical CloudWatch traces, drive the evaluation from a **dataset** — the CLI invokes the agent +with each dataset scenario, then scores the results: + +```bash +# Use the local DRAFT dataset file +agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness --dataset MyScenarios + +# Use a published dataset version +agentcore run batch-evaluation -r MyAgent -e Builtin.Correctness --dataset MyScenarios --dataset-version 1 +``` + +| Flag | Description | +| ----------------------------- | -------------------------------------------------------------------- | +| `--dataset ` | Dataset name — invoke the agent with its scenarios instead of traces | +| `--dataset-version ` | Dataset version (`N` or `DRAFT`; omit to use the local file) | + +Add and edit datasets with `agentcore add dataset` and `agentcore dataset publish-version`. The number of scored +sessions equals the number of scenarios in the dataset. + ## Custom Name ```bash @@ -98,6 +122,21 @@ agentcore run batch-evaluation -r MyAgent -e Builtin.Helpfulness -n "weekly_qual Names must start with a letter and contain only letters, digits, and underscores (max 48 characters). +## Encrypting Results with KMS + +By default, batch evaluation results are encrypted with an AWS-managed key. To encrypt them with your own customer +managed key (CMK), pass its ARN with `--kms-key`: + +```bash +agentcore run batch-evaluation \ + -r MyAgent \ + -e Builtin.Correctness \ + --kms-key arn:aws:kms:us-west-2:111122223333:key/12345678-1234-1234-1234-123456789012 +``` + +The key must be in the same region as the evaluation, and the calling principal (and the AgentCore service) must have +`kms:Encrypt`/`kms:GenerateDataKey` permissions on it. Omit the flag to use the AWS-managed key. + ## Stopping a Running Evaluation ```bash @@ -112,11 +151,11 @@ The CLI shows scores grouped by evaluator with average scores after the run comp ### Local history -Results are saved in `.cli/eval-job-results/`. View past runs via the TUI: +Job records are saved in `.cli/jobs/batch-eval-results/`. View past runs via the TUI: ```bash agentcore -# Navigate to: Evals → Batch Evaluation History +# Navigate to: Run → Batch Evaluation History (or View → Batch Evaluation) ``` ### JSON output diff --git a/docs/commands.md b/docs/commands.md index 6c6881692..84f8856ac 100644 --- a/docs/commands.md +++ b/docs/commands.md @@ -96,7 +96,7 @@ agentcore create \ | `--idle-timeout ` | Idle session timeout in seconds | | `--max-lifetime ` | Max instance lifetime in seconds | | `--session-storage-mount-path ` | Absolute mount path for session filesystem storage under `/mnt` (e.g. `/mnt/data`) | -| `--with-config-bundle` | [preview] Create a config bundle wired into the generated agent template | +| `--with-config-bundle` | Create a config bundle wired into the generated agent template | | `--output-dir ` | Output directory | | `--skip-git` | Skip git initialization | | `--skip-python-setup` | Skip venv setup | @@ -314,7 +314,7 @@ agentcore add agent \ | `--client-secret ` | OAuth client secret | | `--request-header-allowlist ` | Comma-separated list of inbound header names to forward to the agent. `X-*` names (e.g. `X-Api-Key`, `X-Custom-Signature`) pass through unchanged; bare names without an `X-` prefix are auto-prefixed with the legacy `X-Amzn-Bedrock-AgentCore-Runtime-Custom-` prefix for backward compatibility. | | `--session-storage-mount-path ` | Absolute mount path for session filesystem storage (e.g. `/mnt/session-storage`) | -| `--with-config-bundle` | [preview] Wire a config bundle into the generated agent template | +| `--with-config-bundle` | Wire a config bundle into the generated agent template | | `--idle-timeout ` | Idle session timeout in seconds | | `--max-lifetime ` | Max instance lifetime in seconds | | `--json` | JSON output | @@ -745,8 +745,8 @@ agentcore add dataset \ ### add config-bundle -[preview] Add a configuration bundle. Config bundles snapshot system prompts, tool descriptions, and runtime config so -they can be versioned and used as A/B test arms. +Add a configuration bundle. Config bundles snapshot system prompts, tool descriptions, and runtime config so they can be +versioned and used as A/B test arms. ```bash agentcore add config-bundle \ @@ -765,40 +765,6 @@ agentcore add config-bundle \ | `--commit-message ` | Commit message for this version | | `--json` | JSON output | -### add ab-test - -[preview] Add an A/B test. Two modes: `config-bundle` (default; split traffic between two bundle versions) and -`target-based` (split traffic between two HTTP gateway targets). - -```bash -agentcore add ab-test \ - --name PromptComparison \ - --runtime MyAgent \ - --control-bundle ProdBundle --control-version 5 \ - --treatment-bundle ExperimentalBundle --treatment-version 2 \ - --control-weight 80 --treatment-weight 20 \ - --enable -``` - -| Flag | Description | -| --------------------------- | --------------------------------------------------------- | -| `--mode ` | `config-bundle` (default) or `target-based` | -| `--name ` | AB test name | -| `--description ` | AB test description | -| `--role-arn ` | IAM role ARN (auto-created if omitted) | -| `--control-weight ` | Traffic weight for control (1–100) | -| `--treatment-weight ` | Traffic weight for treatment (1–100) | -| `--gateway ` | HTTP gateway name | -| `--enable` | Enable the AB test on creation | -| `--runtime ` | (config-bundle mode) Runtime agent to A/B test | -| `--control-bundle ` | (config-bundle mode) Control config bundle name or ARN | -| `--control-version ` | (config-bundle mode) Control config bundle version | -| `--treatment-bundle ` | (config-bundle mode) Treatment config bundle name or ARN | -| `--treatment-version ` | (config-bundle mode) Treatment config bundle version | -| `--online-eval ` | (config-bundle mode) Online evaluation config name or ARN | -| `--traffic-header ` | (config-bundle mode) Header name for traffic routing | -| `--json` | JSON output | - ### remove Remove resources from project. @@ -816,7 +782,6 @@ agentcore remove policy --name AdminAccess --engine MyPolicyEngine agentcore remove runtime-endpoint --name prod agentcore remove dataset --name MyDataset agentcore remove config-bundle --name MyBundle -agentcore remove ab-test --name PromptComparison agentcore remove payment-manager --name MyManager -y agentcore remove payment-connector --name MyCDPConnector --manager MyManager -y @@ -1020,7 +985,7 @@ agentcore run eval \ ### run batch-evaluation -[preview] Run evaluators in batch across all agent sessions found in CloudWatch. +Run evaluators in batch across all agent sessions found in CloudWatch. ```bash # Single evaluator across recent sessions @@ -1044,6 +1009,7 @@ agentcore run batch-evaluation \ | ----------------------------- | ------------------------------------------------------------------------------------------------------ | | `-r, --runtime ` | Runtime name from project config | | `-e, --evaluator ` | Evaluator name(s) — `Builtin.*` IDs | +| `--evaluator-arn ` | Evaluator ARN(s) — use instead of `-e` when referencing evaluators by ARN | | `-n, --name ` | Name for the batch evaluation (auto-generated if omitted) | | `-d, --lookback-days ` | Lookback window in days | | `-s, --session-ids ` | Specific session IDs to evaluate | @@ -1052,11 +1018,12 @@ agentcore run batch-evaluation \ | `--endpoint ` | Runtime endpoint name (e.g. `PROMPT_V1`); defaults to `AGENTCORE_RUNTIME_ENDPOINT` env, then `DEFAULT` | | `--dataset ` | Dataset name — invoke agent with dataset scenarios before batch evaluation | | `--dataset-version ` | Dataset version (omit for local file, or `N`/`DRAFT`) | +| `--kms-key ` | KMS key ARN for encrypting batch evaluation results (default: AWS-managed key) | | `--json` | JSON output | ### run recommendation -[preview] Optimize a system prompt or tool descriptions using agent traces as the signal. +Optimize a system prompt or tool descriptions using agent traces as the signal. ```bash # Optimize a system prompt from an inline string @@ -1104,11 +1071,12 @@ agentcore run recommendation \ | `-s, --session-id ` | Limit trace collection to specific session IDs | | `-n, --run ` | Run name prefix for the recommendation | | `--region ` | AWS region | +| `--kms-key ` | KMS key ARN for encrypting recommendation results (default: AWS-managed key) | | `--json` | JSON output | ### recommendations history -[preview] Show past recommendation runs saved locally. +Show past recommendation runs saved locally. ```bash agentcore recommendations history @@ -1119,6 +1087,51 @@ agentcore recommendations history --json | -------- | ----------- | | `--json` | JSON output | +### run ab-test + +Start an A/B test comparing two config-bundle versions or two gateway targets through a gateway. A fire-and-forget job — +manage it afterward with `view` / `pause` / `resume` / `stop` / `promote` / `archive ab-test`. The gateway (and its +targets / config bundles) must already be deployed. See [docs/ab-tests.md](ab-tests.md). + +```bash +# config-bundle mode (default): two versions of one bundle +agentcore run ab-test -n PromptTest -g MyGateway -r MyAgent \ + --control-bundle MyBundle --control-version \ + --treatment-bundle MyBundle --treatment-version \ + --online-eval MyEvalConfig + +# target-based mode: two gateway targets +agentcore run ab-test -n TargetTest -g MyGateway --mode target-based -r MyAgent \ + --control-target prodTarget --treatment-target stagingTarget \ + --control-online-eval ctrlEval --treatment-online-eval treatEval +``` + +| Flag | Description | +| -------------------------------- | ------------------------------------------------------------ | +| `-n, --name ` | A/B test name (letters/digits/`_`/`-`, max 48) | +| `-g, --gateway ` | Gateway name (must already be deployed) | +| `-m, --mode ` | `config-bundle` (default) or `target-based` | +| `-r, --runtime ` | Runtime name (recorded as the agent) | +| `--control-weight ` | Control traffic weight 0–100 (default 50) | +| `--treatment-weight ` | Treatment traffic weight 0–100 (default 50) | +| `--max-duration-days ` | Auto-stop after this many days | +| `--role-arn ` | Execution role ARN (auto-created if omitted) | +| `--disable-on-create` | Create without starting (default: enabled) | +| `--gateway-filter ` | Restrict to a single gateway target path (both modes) | +| `--wait` | Block until terminal state | +| `--region ` | AWS region | +| `--json` | JSON output | +| `--control-bundle ` | (config-bundle) Control bundle name or ARN | +| `--control-version ` | (config-bundle) Control bundle version (or `LATEST`) | +| `--treatment-bundle ` | (config-bundle) Treatment bundle name or ARN | +| `--treatment-version ` | (config-bundle) Treatment bundle version (or `LATEST`) | +| `--online-eval ` | (config-bundle) Shared online eval config name or ARN | +| `--traffic-header ` | (config-bundle) Route traffic on this header | +| `--control-target ` | (target-based) Control gateway-target name | +| `--treatment-target ` | (target-based) Treatment gateway-target name | +| `--control-online-eval ` | (target-based) Online eval for control endpoint (required) | +| `--treatment-online-eval ` | (target-based) Online eval for treatment endpoint (required) | + ### evals history View past on-demand eval run results. @@ -1197,8 +1210,8 @@ Stop a running batch evaluation or a deployed A/B test. agentcore stop batch-evaluation -i agentcore stop batch-evaluation -i --json -# Stop a deployed A/B test (permanent) -agentcore stop ab-test PromptComparison +# Stop a running A/B test (terminal) +agentcore stop ab-test -i ``` #### `stop batch-evaluation` @@ -1211,15 +1224,15 @@ agentcore stop ab-test PromptComparison #### `stop ab-test` -| Argument / Flag | Description | -| ------------------- | ------------ | -| `` | AB test name | -| `--region ` | AWS region | -| `--json` | JSON output | +| Flag | Description | +| ------------------- | ------------------------------------- | +| `-i, --id ` | A/B test ID to stop | +| `--region ` | AWS region (auto-detected if omitted) | +| `--json` | JSON output | ### archive -[preview] Archive (delete) a batch evaluation or recommendation on the service and clear local history. Irreversible. +Archive (delete) a batch evaluation, recommendation, or A/B test on the service and clear local history. Irreversible. ```bash # Archive a batch evaluation @@ -1228,46 +1241,64 @@ agentcore archive batch-evaluation -i --region us-west-2 --json # Archive a recommendation agentcore archive recommendation -i + +# Archive an A/B test +agentcore archive ab-test -i ``` -Both `archive batch-evaluation` and `archive recommendation` accept the same flags: +`archive batch-evaluation`, `archive recommendation`, and `archive ab-test` accept the same flags: -| Flag | Description | -| ------------------- | -------------------------------------------- | -| `-i, --id ` | ID of the batch evaluation or recommendation | -| `--region ` | AWS region (auto-detected if omitted) | -| `--json` | JSON output | +| Flag | Description | +| ------------------- | ------------------------------------------------------- | +| `-i, --id ` | ID of the batch evaluation, recommendation, or A/B test | +| `--region ` | AWS region (auto-detected if omitted) | +| `--json` | JSON output | -### ab-test +### view -[preview] View A/B test details and results. +View job history and details. Works for all four job types — `recommendation`, `batch-evaluation`, `ab-test`, and +`insights`. With no `[id]` it lists every job of that type; with an `[id]` it shows that job's detail (status, inputs, +and results). Without `--json` the command launches the interactive TUI; with `--json` it prints a machine-readable +record (the `ab-test` detail also includes `invocationUrl`). ```bash -agentcore ab-test PromptComparison -agentcore ab-test PromptComparison --json +# List all jobs of a type +agentcore view recommendation +agentcore view batch-evaluation +agentcore view ab-test +agentcore view insights + +# Detail for one job (JSON is non-interactive) +agentcore view recommendation --json +agentcore view batch-evaluation --json +agentcore view ab-test --json # JSON includes invocationUrl + results ``` -| Argument / Flag | Description | -| ------------------- | ------------ | -| `` | AB test name | -| `--region ` | AWS region | -| `--json` | JSON output | +Each `view ` subcommand accepts the same argument and flags: + +| Argument / Flag | Description | +| ------------------- | ------------------------------------------ | +| `[id]` | Job ID (omit to list all jobs of the type) | +| `--region ` | AWS region (auto-detected if omitted) | +| `--json` | JSON output (non-interactive) | + +A/B tests are also paused/resumed/promoted by ID — see [docs/ab-tests.md](ab-tests.md) for the full lifecycle. ### config-bundle -[preview] Manage configuration bundles. Use the bundle name from `agentcore.json`, not the bundle ID. Aliased as `cb`. +Manage configuration bundles. Use the bundle name from `agentcore.json`, not the bundle ID. Aliased as `cb`. ```bash # List version history -agentcore config-bundle versions --bundle MyBundle -agentcore cb versions --bundle MyBundle --latest-per-branch --json +agentcore config-bundle versions --name MyBundle +agentcore cb versions --name MyBundle --latest-per-branch --json # Diff two versions -agentcore config-bundle diff --bundle MyBundle --from --to +agentcore config-bundle diff --name MyBundle --from --to # Create a new branch from an existing version agentcore config-bundle create-branch \ - --bundle MyBundle \ + --name MyBundle \ --branch experimental \ --from \ --commit-message "Branch off prod for experiments" @@ -1277,7 +1308,7 @@ agentcore config-bundle create-branch \ | Flag | Description | | --------------------- | ------------------------------------------------------ | -| `--bundle ` | Bundle name as defined in `agentcore.json` | +| `--name ` | Bundle name as defined in `agentcore.json` | | `--branch ` | Filter by branch name | | `--latest-per-branch` | Show only the latest version per branch | | `--created-by ` | Filter by creator name (e.g. `user`, `recommendation`) | @@ -1288,7 +1319,7 @@ agentcore config-bundle create-branch \ | Flag | Description | | ------------------- | --------------------------------------------- | -| `--bundle ` | Bundle name | +| `--name ` | Bundle name | | `--from ` | Source version ID (from `cb versions --json`) | | `--to ` | Target version ID (from `cb versions --json`) | | `--region ` | AWS region override | @@ -1298,7 +1329,7 @@ agentcore config-bundle create-branch \ | Flag | Description | | ------------------------- | ----------------------------------------------------- | -| `--bundle ` | Bundle name | +| `--name ` | Bundle name | | `--branch ` | Name for the new branch | | `--from ` | Parent version ID to branch from (defaults to latest) | | `--commit-message ` | Commit message for the branch point | diff --git a/docs/config-bundles.md b/docs/config-bundles.md index 890ad7aaf..f8505294d 100644 --- a/docs/config-bundles.md +++ b/docs/config-bundles.md @@ -1,4 +1,4 @@ -# Configuration Bundles [preview] +# Configuration Bundles Config bundles are versioned configurations that store your agent's runtime settings — system prompt, tool descriptions, model parameters, or any custom keys. Instead of hardcoding values in your agent code, your agent reads its config at @@ -59,7 +59,7 @@ On deploy, the CLI creates or updates the config bundle in the API and stores th ### List versions ```bash -agentcore cb versions --bundle MyBundle +agentcore cb versions --name MyBundle ``` Shows version history grouped by branch with commit messages, timestamps, and parent lineage. @@ -67,13 +67,13 @@ Shows version history grouped by branch with commit messages, timestamps, and pa ### Diff two versions ```bash -agentcore cb diff --bundle MyBundle --from --to +agentcore cb diff --name MyBundle --from --to ``` ### Create a branch ```bash -agentcore cb create-branch --bundle MyBundle --branch experiment-1 +agentcore cb create-branch --name MyBundle --branch experiment-1 ``` Creates a new branch from the latest version (or a specific version with `--from`). @@ -108,7 +108,7 @@ automatically. All commands support `--json` for scripting: ```bash -agentcore cb versions --bundle MyBundle --json -agentcore cb diff --bundle MyBundle --from v1 --to v2 --json -agentcore cb create-branch --bundle MyBundle --branch exp-1 --json +agentcore cb versions --name MyBundle --json +agentcore cb diff --name MyBundle --from v1 --to v2 --json +agentcore cb create-branch --name MyBundle --branch exp-1 --json ``` diff --git a/docs/connector-config-templates/README.md b/docs/connector-config-templates/README.md new file mode 100644 index 000000000..9208bea93 --- /dev/null +++ b/docs/connector-config-templates/README.md @@ -0,0 +1,71 @@ +# Connector config templates + +Sample `--connector-config` JSON files for non-S3 FMKB data sources. Copy the one matching your source, fill in the real +host/tenant/secret values (and replace any `<...>` placeholders), then: + + agentcore add knowledge-base --name my-kb \ + --data-source-type web-crawler \ + --connector-config ./web-crawler.json + +The CLI copies the file under `app//` and stores the relative path in `agentcore.json`. The JSON is passed +through to the Bedrock DataSource's `connectorParameters` verbatim — Bedrock validates field values, not the CLI, so +typos in enum values surface as a `FAILED` DataSource on first deploy. + +## `--data-source-type` → wire `type` mapping + +| Flag value | Wire `type` | Auth required | +| -------------- | ------------- | --------------------------- | +| `web-crawler` | `WEB` | No | +| `confluence` | `CONFLUENCE` | Secrets Manager `secretArn` | +| `sharepoint` | `SHAREPOINT` | Secrets Manager `secretArn` | +| `onedrive` | `ONEDRIVE` | Secrets Manager `secretArn` | +| `google-drive` | `GOOGLEDRIVE` | Secrets Manager `secretArn` | + +For the auth connectors, set the secret ARN under the connector's `authConfiguration.credentialsSecretArn`. The KB +service role is granted `secretsmanager:GetSecretValue` on it at deploy. + +## Field-value gotchas + +Bedrock validates connector-config field values when it creates the DataSource. The CLI doesn't pre-validate enum values +— if you typo one, the DataSource lands in `FAILED` state on first deploy and the failure reason cites the exact +constraint. A few that bite customers: + +### Web Crawler `syncScope` + +Valid values: `PATH_SPECIFIC`, `SUB_DOMAINS`, `ALL_DOMAINS`, `DOMAINS_ONLY`. Any other value (including the +intuitive-sounding `HOST_ONLY`) fails on creation. Pick the scope that matches your seed URLs: + +- **`PATH_SPECIFIC`** — crawl only URLs that share the path prefix of each seed URL. Most restrictive. +- **`SUB_DOMAINS`** — crawl seed hosts and their subdomains. +- **`ALL_DOMAINS`** — crawl any URL reachable from the seed; only the seed list bounds the crawl. Broadest. +- **`DOMAINS_ONLY`** — crawl only the exact host(s) of the seed URLs. No subdomains, no offsite. + +### Auth connectors require a real `credentialsSecretArn` + +The placeholder ARN values in the templates fail validation at deploy. Create the secret first +(`aws secretsmanager create-secret ...`), then paste its ARN into the config file before running +`agentcore add knowledge-base`. + +### Web Crawler `seedUrls` + +Must be a non-empty array of fully-qualified `https://` URLs. Values without a scheme, or `http://` for hosts that +require TLS, fail at first crawl rather than at create-time. + +## Diagnosing a `FAILED` DataSource + +```bash +agentcore status --type knowledge-base --name +``` + +The drill-down view surfaces the failure reason from Bedrock. For a deeper look: + +```bash +aws bedrock-agent get-data-source \ + --knowledge-base-id \ + --data-source-id \ + --region us-west-2 \ + --query 'dataSource.failureReasons' +``` + +Fix the value in the JSON file under `app//.json`, then `agentcore deploy` to update the DataSource and +re-trigger ingestion. diff --git a/docs/connector-config-templates/confluence.json b/docs/connector-config-templates/confluence.json new file mode 100644 index 000000000..4928ac72b --- /dev/null +++ b/docs/connector-config-templates/confluence.json @@ -0,0 +1,16 @@ +{ + "type": "CONFLUENCE", + "version": "1", + "connectionConfiguration": { + "hostUrl": "https://your-domain.atlassian.net", + "authType": "OAUTH2", + "type": "SAAS", + "secretArn": "arn:aws:secretsmanager:us-west-2:111122223333:secret:confluence-creds", + "rotateSecret": false + }, + "dataEntityConfiguration": { + "crawlPage": true, + "crawlBlog": false, + "crawlPageAttachment": true + } +} diff --git a/docs/connector-config-templates/google-drive.json b/docs/connector-config-templates/google-drive.json new file mode 100644 index 000000000..46be07271 --- /dev/null +++ b/docs/connector-config-templates/google-drive.json @@ -0,0 +1,12 @@ +{ + "type": "GOOGLEDRIVE", + "version": "1", + "connectionConfiguration": { + "authType": "SERVICE_ACCOUNT", + "secretArn": "arn:aws:secretsmanager:us-west-2:111122223333:secret:gdrive-creds" + }, + "dataEntityConfiguration": { + "crawlMyDrive": true, + "crawlSharedDrives": false + } +} diff --git a/docs/connector-config-templates/onedrive.json b/docs/connector-config-templates/onedrive.json new file mode 100644 index 000000000..d454ff156 --- /dev/null +++ b/docs/connector-config-templates/onedrive.json @@ -0,0 +1,16 @@ +{ + "type": "ONEDRIVE", + "version": "1", + "connectionConfiguration": { + "tenantId": "00000000-0000-0000-0000-000000000000", + "authType": "OAUTH2", + "secretArn": "arn:aws:secretsmanager:us-west-2:111122223333:secret:onedrive-creds" + }, + "entitySelectionConfiguration": { + "userSelectionMethod": "USER_EMAILS", + "userEmails": ["user@your-domain.com"] + }, + "dataEntityConfiguration": { + "crawlPersonalDrives": true + } +} diff --git a/docs/connector-config-templates/sharepoint.json b/docs/connector-config-templates/sharepoint.json new file mode 100644 index 000000000..03d3cff6a --- /dev/null +++ b/docs/connector-config-templates/sharepoint.json @@ -0,0 +1,14 @@ +{ + "type": "SHAREPOINT", + "version": "1", + "connectionConfiguration": { + "tenantId": "00000000-0000-0000-0000-000000000000", + "authType": "OAUTH2_APP", + "secretArn": "arn:aws:secretsmanager:us-west-2:111122223333:secret:sharepoint-creds" + }, + "dataEntityConfiguration": { + "siteUrls": ["https://your-tenant.sharepoint.com/sites/your-site"], + "crawlFiles": true, + "crawlPages": true + } +} diff --git a/docs/connector-config-templates/web-crawler.json b/docs/connector-config-templates/web-crawler.json new file mode 100644 index 000000000..2bc36489a --- /dev/null +++ b/docs/connector-config-templates/web-crawler.json @@ -0,0 +1,16 @@ +{ + "type": "WEB", + "version": "1", + "connectionConfiguration": { + "authType": "NO_AUTH", + "seedUrls": ["https://docs.example.com/"], + "siteMapUrls": [] + }, + "crawlConfiguration": { + "syncScope": "PATH_SPECIFIC", + "crawlDepth": 2, + "maxCrawledUrlsPerMinute": 300, + "maxLinksPerUrl": 100, + "crawlAttachments": false + } +} diff --git a/docs/knowledge-bases.md b/docs/knowledge-bases.md new file mode 100644 index 000000000..6a5284990 --- /dev/null +++ b/docs/knowledge-bases.md @@ -0,0 +1,295 @@ +# Knowledge Bases + +A Knowledge Base (KB) ingests documents from one or more data sources and exposes a managed `retrieve` tool to your +agent through a gateway. The CLI provisions the underlying Bedrock fully-managed Knowledge Base, its data sources, and +its IAM service role; you only describe the corpus and the gateway you want it wired to. + +## Quick Start + +The simplest path mirrors the gateway flow: set up the KB and gateway before adding the agent so the generated agent +code is wired to call `retrieve` against the KB through the gateway. + +```bash +# 1. Create a project +agentcore create --name MyProject --defaults +cd MyProject + +# 2. Add a gateway +agentcore add gateway --name docs-gw + +# 3. Add a knowledge base, wired to the gateway +agentcore add knowledge-base \ + --name docs \ + --source s3://my-corpus-bucket/manuals/ \ + --gateway docs-gw + +# 4. Create an agent (automatically wired to the gateway) +agentcore add agent --name MyAgent --framework Strands --model-provider Bedrock + +# 5. Deploy +agentcore deploy -y +``` + +The deploy creates the KB and its data sources, kicks off an initial ingestion job, and exposes a `retrieve` tool on +`docs-gw` that your agent can call. + +## Adding a Knowledge Base + +Three forms work, and they compose: + +```bash +# Interactive — drops into the TUI wizard +agentcore add knowledge-base + +# Non-interactive — required flags only +agentcore add knowledge-base --name docs --source s3://bucket/prefix/ + +# Append a second source to an existing KB (idempotent) +agentcore add knowledge-base --name docs --source s3://bucket/another/ +``` + +Re-invoking `add knowledge-base` with an existing `--name` appends data sources rather than creating a duplicate KB. + +### Wiring to a gateway + +Pass `--gateway ` to attach the KB to a gateway. The CLI creates two connector targets on that gateway: + +- a per-KB `bedrock-knowledge-bases` target named after the KB (single-KB Retrieve), and +- a shared `bedrock-agentic-retrieve` target named `-agentic` that fans out across every KB on the gateway. + +```bash +agentcore add knowledge-base --name docs --source s3://bucket/ --gateway docs-gw +``` + +If `docs-gw` doesn't exist yet, run `agentcore add gateway --name docs-gw` first. The KB add fails fast if the gateway +is missing. + +### Multiple data sources per KB + +Repeat `--source` (S3) or `--connector-config` (non-S3) on the same `--name` invocation, or call `add knowledge-base` +multiple times with the same name: + +```bash +agentcore add knowledge-base --name docs \ + --source s3://bucket/manuals/ \ + --source s3://bucket/changelog.md +``` + +Each source becomes its own data source under the KB and gets its own ingestion job. + +## Data Source Types + +`--data-source-type` selects the kind of data source. Defaults to `s3`. Supported values: + +| Type | Flag value | Required input | Notes | +| ------------ | -------------- | --------------------------- | ---------------------------------------------------------- | +| Amazon S3 | `s3` (default) | `--source ` | Bucket must be in the same account; `s3://bucket[/prefix]` | +| Web Crawler | `web-crawler` | `--connector-config ` | Crawls one or more seed URLs | +| Confluence | `confluence` | `--connector-config ` | Requires Secrets Manager credentials | +| SharePoint | `sharepoint` | `--connector-config ` | Requires Secrets Manager credentials | +| OneDrive | `onedrive` | `--connector-config ` | Requires Secrets Manager credentials | +| Google Drive | `google-drive` | `--connector-config ` | Requires Secrets Manager credentials | + +### S3 sources + +Pass an S3 URI. The bucket must live in the same AWS account where you're deploying; cross-account buckets are not +supported by this connector. + +```bash +agentcore add knowledge-base --name docs \ + --source s3://corpus-bucket-123456789012/manuals/ +``` + +The KB service role is granted `s3:GetObject` and `s3:ListBucket` on every bucket referenced by an S3 data source, +scoped to the deploying account via an `aws:ResourceAccount` condition. Permissions are bucket-scoped, not prefix-scoped +— a KB pointed at `s3://bucket/foo/` can read all of `bucket`. Split into separate buckets if you need prefix-level +isolation. + +### Non-S3 connector sources + +For Web Crawler, Confluence, SharePoint, OneDrive, and Google Drive, you supply a JSON connector-config file. Templates +live at [`docs/connector-config-templates/`](connector-config-templates/) — copy the matching one, fill in the real +host/tenant/secret values, then: + +```bash +agentcore add knowledge-base --name web-docs \ + --data-source-type web-crawler \ + --connector-config ./web-crawler.json +``` + +The CLI copies the file under `app//` and stores the relative path in `agentcore.json`. The JSON contents are +passed verbatim to the Bedrock DataSource's `connectorParameters`. + +Auth-bearing connectors (Confluence, SharePoint, OneDrive, Google Drive) require a Secrets Manager `secretArn` in the +config. The KB service role is granted `secretsmanager:GetSecretValue` on that secret at deploy. + +You can mix data source types on a single KB by repeating `add knowledge-base` with the same `--name`: + +```bash +agentcore add knowledge-base --name docs --source s3://corpus/manuals/ +agentcore add knowledge-base --name docs --data-source-type web-crawler --connector-config ./crawler.json +``` + +## Wiring an External Knowledge Base + +To wire an existing Bedrock KB that this project does not own (created elsewhere, owned by another team), use the +gateway-target primitive directly — there is no `agentcore add knowledge-base` path for external KBs: + +```bash +agentcore add gateway-target \ + --type connector \ + --connector bedrock-knowledge-bases \ + --knowledge-base-id <10-CHAR-KB-ID> \ + --gateway docs-gw \ + --name external-docs +``` + +This writes only to `agentCoreGateways[].targets[]` — no `knowledgeBases[]` entry, no IAM role, no managed ingestion. +The KB lives wherever it lives; the project just adds a Retrieve target on top of it. + +## Ingestion + +`agentcore deploy` automatically kicks off an ingestion job on every data source after the CFN stack finishes. To +re-trigger a manual ingestion later (after updating corpus contents, fixing permissions, etc.): + +```bash +# Ingest all data sources on a KB +agentcore run ingest --name docs + +# Ingest a specific data source on the KB +agentcore run ingest --name docs --data-source s3://corpus/manuals/ + +# JSON output for scripting +agentcore run ingest --name docs --json +``` + +Bedrock allows only one concurrent ingestion job per KB; the CLI retries with backoff if a job is already running. + +## Status + +```bash +# All KBs in the project +agentcore status --type knowledge-base + +# Drill into one KB +agentcore status --type knowledge-base --name docs + +# JSON output +agentcore status --type knowledge-base --json +``` + +The drill-down view shows per-data-source ingestion state, document counts (scanned, indexed, failed), and any +troubleshooting hints if ingestion failed (most early failures are bucket permissions, file format, or an expired +secret). + +## Removing a Knowledge Base + +```bash +agentcore remove knowledge-base --name docs +``` + +The remove preview shows everything that will be cleaned up: + +- the KB and its data sources from `knowledgeBases[]`, +- the per-KB Retrieve target on the wired gateway, and +- the entry from the gateway's shared `agentic-retrieve` target — and the agentic target itself if this was the last KB + on the gateway. + +`agentcore deploy` after the remove cleanly tears down the CFN resources. + +## Configuration Reference + +In `agentcore.json`: + +```json +{ + "knowledgeBases": [ + { + "name": "docs", + "description": "Product manuals", + "gateway": "docs-gw", + "dataSources": [ + { "type": "S3", "uri": "s3://corpus-bucket/manuals/" }, + { "type": "WEB", "connectorConfigFile": "app/docs/web-crawler.json" } + ] + } + ], + "agentCoreGateways": [ + { + "name": "docs-gw", + "targets": [ + { + "name": "docs", + "targetType": "connector", + "connectorId": "bedrock-knowledge-bases", + "knowledgeBaseId": "docs" + }, + { + "name": "docs-gw-agentic", + "targetType": "connector", + "connectorId": "bedrock-agentic-retrieve", + "knowledgeBaseIds": ["docs"] + } + ] + } + ] +} +``` + +`knowledgeBaseId` on a connector target accepts either a project KB name (an entry in `knowledgeBases[]`) or a literal +10-character external KB ID. The two formats can never collide because real Bedrock KB IDs are 10 uppercase alphanumeric +chars and project names start with a letter and may include dashes/underscores. + +After deploy, `agentcore/.cli/deployed-state.json` carries the resolved Bedrock KB ID and per-data-source IDs: + +```json +{ + "targets": { + "default": { + "resources": { + "knowledgeBases": { + "docs": { + "knowledgeBaseId": "ABCDEFGHIJ", + "knowledgeBaseArn": "arn:aws:bedrock:us-west-2:111122223333:knowledge-base/ABCDEFGHIJ", + "dataSources": { + "s3://corpus-bucket/manuals/": "ABC1234567" + } + } + } + } + } + } +} +``` + +## Common Issues + +**"Gateway 'X' not found in agentcore.json"** — add the gateway first with `agentcore add gateway --name X` before +attaching the KB to it. The CLI never auto-creates a gateway from `add knowledge-base` non-interactively. + +**Ingestion shows `FAILED` immediately after deploy** — for S3 sources, most early failures are: the bucket doesn't +exist, the bucket is in a different AWS account, the file format is unsupported, or the file size exceeds 50 MB. +`agentcore status --type knowledge-base --name ` shows the troubleshooting hints inline. + +**DataSource itself in `FAILED` state right after deploy (non-S3 connectors)** — Bedrock validates the +`connectorParameters` you wrote in the JSON file and rejects bad enum values, missing fields, or unreachable seed URLs. +Surface the exact reason with: + +```bash +aws bedrock-agent get-data-source \ + --knowledge-base-id \ + --data-source-id \ + --region us-west-2 \ + --query 'dataSource.failureReasons' +``` + +The most common Web Crawler trip-up is `crawlConfiguration.syncScope` — only `PATH_SPECIFIC`, `SUB_DOMAINS`, +`ALL_DOMAINS`, and `DOMAINS_ONLY` are accepted. See +[`docs/connector-config-templates/README.md`](connector-config-templates/README.md) for the full list of value gotchas. +Edit `app//.json`, then `agentcore deploy` to update the DataSource and re-trigger ingestion. + +**"Duplicate data source in this invocation"** — you passed the same `--source` URI twice on one call. Drop the +duplicate. + +**"Connector config files X and Y would both be stored as 'app//'"** — two of your connector configs share +a filename. Rename one before passing both. diff --git a/docs/recommendations.md b/docs/recommendations.md index c5a5c4ac3..b6d25f9c7 100644 --- a/docs/recommendations.md +++ b/docs/recommendations.md @@ -1,4 +1,4 @@ -# Recommendations [preview] +# Recommendations Recommendations optimize your agent's system prompt or tool descriptions using historical traces as signal. The recommendation service analyzes how your agent performed, then produces an improved version scored by an evaluator. @@ -93,16 +93,36 @@ agentcore run recommendation ... --session-id agentcore run recommendation ... --spans-file ./traces.json ``` +## Encrypting Results with KMS + +By default, recommendation results are encrypted with an AWS-managed key. To encrypt them with your own customer managed +key (CMK), pass its ARN with `--kms-key`: + +```bash +agentcore run recommendation \ + -t system-prompt \ + -r MyAgent \ + -e Builtin.Correctness \ + --inline "You are a helpful assistant" \ + --kms-key arn:aws:kms:us-west-2:111122223333:key/12345678-1234-1234-1234-123456789012 +``` + +The key must be in the same region as the recommendation, and the calling principal (and the AgentCore service) must +have `kms:Encrypt`/`kms:GenerateDataKey` permissions on it. Omit the flag to use the AWS-managed key. + ## JSON Output ```bash agentcore run recommendation -r MyAgent -e Builtin.Helpfulness --type system-prompt --inline "..." --json ``` -Returns `recommendationId`, `status`, and `result` with `systemPromptRecommendationResult.recommendedSystemPrompt` or +Recommendations are fire-and-forget jobs: `run recommendation` returns `recommendationId` and an initial `status` +(`PENDING`/`IN_PROGRESS`) — the optimized `result` is **not** available immediately. Pass `--wait` to block until the +job finishes, or check later with `agentcore view recommendation --json`, which returns the completed `result` with +`systemPromptRecommendationResult.recommendedSystemPrompt` (and `explanation`) or `toolDescriptionRecommendationResult.tools`. -When using `--bundle-name`, the result also includes `configurationBundle.versionId` — the new bundle version. +When using `--bundle-name`, the completed result also includes `configurationBundle.versionId` — the new bundle version. ## End-to-End Workflow: Recommendation → Config Bundle → Invoke @@ -136,11 +156,11 @@ When using `--bundle-name`, the result also includes `configurationBundle.versio ## Viewing History -Results are saved in `.cli/recommendations/`. View past runs via the TUI: +Job records are saved in `.cli/jobs/recommendations/`. View past runs via the TUI: ```bash agentcore -# Navigate to: Recommendations → History +# Navigate to: Recommendations → History (or View → Recommendation) ``` ## TUI Wizard diff --git a/e2e-tests/README.md b/e2e-tests/README.md index 45090b613..3cdfaed5f 100644 --- a/e2e-tests/README.md +++ b/e2e-tests/README.md @@ -111,7 +111,6 @@ Framework/model combination tests: `{framework}-{model}.test.ts` Feature lifecycle tests: describe what the test exercises end-to-end -- `ab-test-target-based.test.ts` - `dev-lifecycle.test.ts` - `evals-lifecycle.test.ts` diff --git a/e2e-tests/ab-test-config-bundle.test.ts b/e2e-tests/ab-test-config-bundle.test.ts deleted file mode 100644 index cec0a9cc0..000000000 --- a/e2e-tests/ab-test-config-bundle.test.ts +++ /dev/null @@ -1,209 +0,0 @@ -import { parseJsonOutput, retry } from '../src/test-utils/index.js'; -import { - baseCanRun, - hasAws, - installCdkTarball, - runAgentCoreCLI, - teardownE2EProject, - writeAwsTargets, -} from './e2e-helper.js'; -import { randomUUID } from 'node:crypto'; -import { mkdir, rm } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; - -const canRun = baseCanRun && hasAws; - -describe.sequential('e2e: config-bundle AB test lifecycle', () => { - let testDir: string; - let projectPath: string; - const agentName = `E2eCfgAB${String(Date.now()).slice(-8)}`; - const abTestName = 'ConfigBundleABTest'; - const evalName = 'BundleEvaluator'; - const onlineEvalName = 'BundleOnlineEval'; - - beforeAll(async () => { - if (!canRun) return; - - testDir = join(tmpdir(), `agentcore-e2e-cfg-ab-${randomUUID()}`); - await mkdir(testDir, { recursive: true }); - - const result = await runAgentCoreCLI( - [ - 'create', - '--name', - agentName, - '--language', - 'Python', - '--framework', - 'Strands', - '--model-provider', - 'Bedrock', - '--memory', - 'none', - '--json', - ], - testDir - ); - expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0); - projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath; - - await writeAwsTargets(projectPath); - installCdkTarball(projectPath); - }, 300000); - - afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, 'Bedrock'); - } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); - }, 600000); - - const run = (args: string[]) => runAgentCoreCLI(args, projectPath); - - it.skipIf(!canRun)( - 'adds evaluator and online eval config', - async () => { - let result = await run([ - 'add', - 'evaluator', - '--name', - evalName, - '--level', - 'SESSION', - '--model', - 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', - '--instructions', - 'Evaluate session quality. Context: {context}', - '--json', - ]); - expect(result.exitCode, `Add evaluator failed: ${result.stdout}`).toBe(0); - - result = await run([ - 'add', - 'online-eval', - '--name', - onlineEvalName, - '--runtime', - agentName, - '--evaluator', - evalName, - '--sampling-rate', - '100', - '--enable-on-create', - '--json', - ]); - expect(result.exitCode, `Add online-eval failed: ${result.stdout}`).toBe(0); - }, - 60000 - ); - - it.skipIf(!canRun)( - 'deploys agent before AB test (needed for config bundles)', - async () => { - await retry( - async () => { - const result = await run(['deploy', '--yes', '--json']); - expect(result.exitCode, `Initial deploy failed`).toBe(0); - const json = parseJsonOutput(result.stdout) as { success: boolean }; - expect(json.success).toBe(true); - }, - 2, - 30000 - ); - }, - 600000 - ); - - it.skipIf(!canRun)( - 'adds config-bundle AB test with 90/10 split', - async () => { - // Use placeholder bundle ARNs that satisfy the service format constraints. - // Real config bundles would be created separately; these test the AB test wiring. - const region = process.env.AWS_REGION ?? 'us-east-1'; - const account = process.env.AWS_ACCOUNT_ID ?? '000000000000'; - const controlBundle = `arn:aws:bedrock-agentcore:${region}:${account}:configuration-bundle/control-bundle-AbCdEfGhIj`; - const treatmentBundle = `arn:aws:bedrock-agentcore:${region}:${account}:configuration-bundle/treatment-bundle-AbCdEfGhIj`; - - const result = await run([ - 'add', - 'ab-test', - '--mode', - 'config-bundle', - '--name', - abTestName, - '--runtime', - agentName, - '--control-bundle', - controlBundle, - '--control-version', - '00000000-0000-0000-0000-000000000001', - '--treatment-bundle', - treatmentBundle, - '--treatment-version', - '00000000-0000-0000-0000-000000000002', - '--control-weight', - '90', - '--treatment-weight', - '10', - '--online-eval', - onlineEvalName, - '--json', - ]); - expect(result.exitCode, `Add AB test failed: ${result.stdout}`).toBe(0); - const json = parseJsonOutput(result.stdout) as { success: boolean; abTestName: string }; - expect(json.success).toBe(true); - expect(json.abTestName).toBe(abTestName); - }, - 60000 - ); - - it.skipIf(!canRun)( - 'status shows AB test in config', - async () => { - const result = await run(['status', '--json']); - expect(result.exitCode, `Status failed: ${result.stderr}`).toBe(0); - - const json = parseJsonOutput(result.stdout) as { - success: boolean; - resources: { resourceType: string; name: string; deploymentState: string }[]; - }; - expect(json.success).toBe(true); - - // Agent should be deployed - const agent = json.resources.find(r => r.resourceType === 'agent' && r.name === agentName); - expect(agent, `Agent "${agentName}" should appear in status`).toBeDefined(); - expect(agent!.deploymentState).toBe('deployed'); - }, - 120000 - ); - - it.skipIf(!canRun)( - 'invokes the deployed agent', - async () => { - await retry( - async () => { - const result = await run(['invoke', '--prompt', 'Say hello', '--runtime', agentName, '--json']); - expect(result.exitCode, `Invoke failed: ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as { success: boolean }; - expect(json.success).toBe(true); - }, - 3, - 15000 - ); - }, - 180000 - ); - - it.skipIf(!canRun)( - 'removes config-bundle AB test', - async () => { - const result = await run(['remove', 'ab-test', '--name', abTestName, '--json']); - expect(result.exitCode, `Remove failed: ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as Record; - expect(json).toHaveProperty('success', true); - }, - 60000 - ); -}); diff --git a/e2e-tests/ab-test-target-based.test.ts b/e2e-tests/ab-test-target-based.test.ts deleted file mode 100644 index 274ee447a..000000000 --- a/e2e-tests/ab-test-target-based.test.ts +++ /dev/null @@ -1,317 +0,0 @@ -import { parseJsonOutput, retry } from '../src/test-utils/index.js'; -import { - baseCanRun, - hasAws, - installCdkTarball, - runAgentCoreCLI, - teardownE2EProject, - writeAwsTargets, -} from './e2e-helper.js'; -import { randomUUID } from 'node:crypto'; -import { mkdir, rm } from 'node:fs/promises'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; - -const canRun = baseCanRun && hasAws; - -describe.sequential('e2e: target-based AB test lifecycle', () => { - let testDir: string; - let projectPath: string; - const agentName = `E2eTargAB${String(Date.now()).slice(-8)}`; - const abTestName = 'TargetABTest'; - const evalName = 'ABTestEvaluator'; - const controlEvalName = 'ControlEvalConfig'; - const treatmentEvalName = 'TreatmentEvalConfig'; - - beforeAll(async () => { - if (!canRun) return; - - testDir = join(tmpdir(), `agentcore-e2e-target-ab-${randomUUID()}`); - await mkdir(testDir, { recursive: true }); - - const result = await runAgentCoreCLI( - [ - 'create', - '--name', - agentName, - '--language', - 'Python', - '--framework', - 'Strands', - '--model-provider', - 'Bedrock', - '--memory', - 'none', - '--json', - ], - testDir - ); - expect(result.exitCode, `Create failed: ${result.stderr}`).toBe(0); - projectPath = (parseJsonOutput(result.stdout) as { projectPath: string }).projectPath; - - await writeAwsTargets(projectPath); - installCdkTarball(projectPath); - }, 300000); - - afterAll(async () => { - if (projectPath && hasAws) { - await teardownE2EProject(projectPath, agentName, 'Bedrock'); - } - if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); - }, 600000); - - const run = (args: string[]) => runAgentCoreCLI(args, projectPath); - - it.skipIf(!canRun)( - 'adds runtime endpoints (prod v1, staging v1)', - async () => { - let result = await run([ - 'add', - 'runtime-endpoint', - '--runtime', - agentName, - '--endpoint', - 'prod', - '--version', - '1', - '--json', - ]); - expect(result.exitCode, `Add prod endpoint failed: ${result.stdout}`).toBe(0); - - result = await run([ - 'add', - 'runtime-endpoint', - '--runtime', - agentName, - '--endpoint', - 'staging', - '--version', - '1', - '--json', - ]); - expect(result.exitCode, `Add staging endpoint failed: ${result.stdout}`).toBe(0); - }, - 60000 - ); - - it.skipIf(!canRun)( - 'adds evaluator and per-variant online eval configs', - async () => { - let result = await run([ - 'add', - 'evaluator', - '--name', - evalName, - '--level', - 'SESSION', - '--model', - 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', - '--instructions', - 'Evaluate quality. Context: {context}', - '--json', - ]); - expect(result.exitCode, `Add evaluator failed: ${result.stdout}`).toBe(0); - - result = await run([ - 'add', - 'online-eval', - '--name', - controlEvalName, - '--runtime', - agentName, - '--evaluator', - evalName, - '--sampling-rate', - '100', - '--endpoint', - 'prod', - '--enable-on-create', - '--json', - ]); - expect(result.exitCode, `Add control online-eval failed: ${result.stdout}`).toBe(0); - - result = await run([ - 'add', - 'online-eval', - '--name', - treatmentEvalName, - '--runtime', - agentName, - '--evaluator', - evalName, - '--sampling-rate', - '100', - '--endpoint', - 'staging', - '--enable-on-create', - '--json', - ]); - expect(result.exitCode, `Add treatment online-eval failed: ${result.stdout}`).toBe(0); - }, - 60000 - ); - - it.skipIf(!canRun)( - 'adds target-based AB test with 90/10 split', - async () => { - const result = await run([ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - abTestName, - '--runtime', - agentName, - '--gateway', - `${abTestName}-gw`, - '--control-endpoint', - 'prod', - '--treatment-endpoint', - 'staging', - '--control-weight', - '90', - '--treatment-weight', - '10', - '--control-online-eval', - controlEvalName, - '--treatment-online-eval', - treatmentEvalName, - '--enable', - '--json', - ]); - expect(result.exitCode, `Add AB test failed: ${result.stdout}`).toBe(0); - const json = parseJsonOutput(result.stdout) as { success: boolean; abTestName: string }; - expect(json.success).toBe(true); - expect(json.abTestName).toBe(abTestName); - }, - 60000 - ); - - it.skipIf(!canRun)( - 'deploys project (creates gateway, targets, AB test, eval configs)', - async () => { - await retry( - async () => { - const result = await run(['deploy', '--yes', '--json']); - expect(result.exitCode, `Deploy failed (stderr: ${result.stderr})`).toBe(0); - const json = parseJsonOutput(result.stdout) as { success: boolean }; - expect(json.success).toBe(true); - }, - 2, - 30000 - ); - }, - 600000 - ); - - it.skipIf(!canRun)( - 'AB test reaches RUNNING status after deploy', - async () => { - await retry( - async () => { - const result = await run(['ab-test', abTestName, '--json']); - expect(result.exitCode, `ab-test lookup failed: ${result.stdout} ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as { executionStatus: string }; - expect(json.executionStatus, 'AB test should be RUNNING after deploy').toBe('RUNNING'); - }, - 12, - 15000 - ); - }, - 300000 - ); - - it.skipIf(!canRun)( - 'status shows all resources deployed', - async () => { - await retry( - async () => { - const result = await run(['status', '--json']); - expect(result.exitCode, `Status failed: ${result.stderr}`).toBe(0); - - const json = parseJsonOutput(result.stdout) as { - success: boolean; - resources: { resourceType: string; name: string; deploymentState: string; invocationUrl?: string }[]; - }; - expect(json.success).toBe(true); - - // Agent should be deployed - const agent = json.resources.find(r => r.resourceType === 'agent' && r.name === agentName); - expect(agent, `Agent "${agentName}" should appear in status`).toBeDefined(); - expect(agent!.deploymentState).toBe('deployed'); - - // AB test should be deployed (HTTP gateways are not surfaced as top-level status resources) - const abTest = json.resources.find(r => r.resourceType === 'ab-test' && r.name === abTestName); - expect(abTest, `AB test "${abTestName}" should appear in status`).toBeDefined(); - expect(abTest!.deploymentState).toBe('deployed'); - // invocationUrl proves the HTTP gateway was deployed and wired up correctly - expect(abTest!.invocationUrl, 'AB test should have a gateway invocation URL').toBeTruthy(); - }, - 3, - 15000 - ); - }, - 120000 - ); - - it.skipIf(!canRun)( - 'pauses AB test', - async () => { - await retry( - async () => { - const result = await run(['pause', 'ab-test', abTestName, '--json']); - expect(result.exitCode, `Pause failed: ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as Record; - expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('executionStatus', 'PAUSED'); - }, - 3, - 10000 - ); - }, - 120000 - ); - - it.skipIf(!canRun)( - 'resumes AB test', - async () => { - await retry( - async () => { - const result = await run(['resume', 'ab-test', abTestName, '--json']); - expect(result.exitCode, `Resume failed: ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as Record; - expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('executionStatus', 'RUNNING'); - }, - 3, - 10000 - ); - }, - 120000 - ); - - it.skipIf(!canRun)( - 'promotes AB test (updates agentcore.json)', - async () => { - const result = await run(['promote', 'ab-test', abTestName, '--json']); - expect(result.exitCode, `Promote failed: ${result.stdout} ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as Record; - expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('promoted', true); - }, - 120000 - ); - - it.skipIf(!canRun)( - 'removes AB test from config', - async () => { - const result = await run(['remove', 'ab-test', '--name', abTestName, '--delete-gateway', '--json']); - expect(result.exitCode, `Remove failed: ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as Record; - expect(json).toHaveProperty('success', true); - }, - 60000 - ); -}); diff --git a/e2e-tests/archive-lifecycle.test.ts b/e2e-tests/archive-lifecycle.test.ts index dbe0a053e..83b949a97 100644 --- a/e2e-tests/archive-lifecycle.test.ts +++ b/e2e-tests/archive-lifecycle.test.ts @@ -136,9 +136,9 @@ describe.sequential('e2e: archive command lifecycle', () => { ); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json.batchEvaluationId).toBeTruthy(); + expect(json.id).toBeTruthy(); expect(json.status).not.toBe('FAILED'); - batchEvaluationId = json.batchEvaluationId as string; + batchEvaluationId = json.id as string; }, 6, 15000 @@ -182,8 +182,8 @@ describe.sequential('e2e: archive command lifecycle', () => { expect(result.exitCode, `recommendation failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe(0); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json.recommendationId).toBeTruthy(); - recommendationId = json.recommendationId as string; + expect(json.id).toBeTruthy(); + recommendationId = json.id as string; }, 6, 30000 @@ -225,9 +225,7 @@ describe.sequential('e2e: archive command lifecycle', () => { const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json.batchEvaluationId).toBe(batchEvaluationId); - expect(json).toHaveProperty('localCliHistoryDeleted', true); - expect(json.localDeleteWarning).toBeUndefined(); + expect(json.id).toBe(batchEvaluationId); }, 120000 ); @@ -291,9 +289,7 @@ describe.sequential('e2e: archive command lifecycle', () => { const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json.recommendationId).toBe(recommendationId); - expect(json).toHaveProperty('localCliHistoryDeleted', true); - expect(json.localDeleteWarning).toBeUndefined(); + expect(json.id).toBe(recommendationId); }, 120000 ); @@ -307,18 +303,6 @@ describe.sequential('e2e: archive command lifecycle', () => { 30000 ); - it.skipIf(!canRun)( - 'recommendations history no longer includes the archived entry', - async () => { - const result = await run(['recommendations', 'history', '--json']); - expect(result.exitCode, `recommendations history failed: ${result.stderr}`).toBe(0); - const json = parseJsonOutput(result.stdout) as { recommendations: { recommendationId: string }[] }; - const ids = (json.recommendations ?? []).map(r => r.recommendationId); - expect(ids).not.toContain(recommendationId); - }, - 60000 - ); - it.skipIf(!canRun)( 'archiving the same recommendation again returns success false (already deleted)', async () => { diff --git a/e2e-tests/config-bundle-eval-rec.test.ts b/e2e-tests/config-bundle-eval-rec.test.ts index 01e3287bf..88c505dba 100644 --- a/e2e-tests/config-bundle-eval-rec.test.ts +++ b/e2e-tests/config-bundle-eval-rec.test.ts @@ -224,7 +224,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' it.skipIf(!canRun)( 'config-bundle versions lists the deployed version', async () => { - const result = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + const result = await run(['config-bundle', 'versions', '--name', bundleName, '--json']); expect(result.exitCode, `cb versions failed: ${result.stderr}`).toBe(0); const json = parseJsonOutput(result.stdout) as { @@ -243,7 +243,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' it.skipIf(!canRun)( 'config-bundle versions supports --branch filter', async () => { - const result = await run(['config-bundle', 'versions', '--bundle', bundleName, '--branch', 'mainline', '--json']); + const result = await run(['config-bundle', 'versions', '--name', bundleName, '--branch', 'mainline', '--json']); expect(result.exitCode, `cb versions --branch failed: ${result.stderr}`).toBe(0); const json = parseJsonOutput(result.stdout) as { @@ -302,7 +302,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' it.skipIf(!canRun)( 'config-bundle versions shows both versions after update', async () => { - const result = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + const result = await run(['config-bundle', 'versions', '--name', bundleName, '--json']); expect(result.exitCode, `cb versions failed: ${result.stderr}`).toBe(0); const json = parseJsonOutput(result.stdout) as { @@ -318,7 +318,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' 'config-bundle diff shows changes between versions', async () => { // Get the latest two versions - const versionsResult = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + const versionsResult = await run(['config-bundle', 'versions', '--name', bundleName, '--json']); const versionsJson = parseJsonOutput(versionsResult.stdout) as { versions: { versionId: string }[]; }; @@ -330,7 +330,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' const result = await run([ 'config-bundle', 'diff', - '--bundle', + '--name', bundleName, '--from', oldestVersion, @@ -375,7 +375,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' ); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('batchEvaluationId'); + expect(json).toHaveProperty('id'); expect(json.status).toBeDefined(); expect(json.status).not.toBe('FAILED'); }, @@ -497,7 +497,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' expect(result.exitCode, `recommendation failed (stdout: ${result.stdout}, stderr: ${result.stderr})`).toBe(0); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('recommendationId'); + expect(json).toHaveProperty('id'); expect(json.result).toBeDefined(); expect(json.result).not.toBe(''); expect(json.result).not.toBeNull(); @@ -534,7 +534,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' expect(result.exitCode, `recommendation from file failed: ${result.stdout}`).toBe(0); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('recommendationId'); + expect(json).toHaveProperty('id'); }, 6, 30000 @@ -565,7 +565,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' expect(result.exitCode, `tool-desc recommendation failed: ${result.stdout}`).toBe(0); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('recommendationId'); + expect(json).toHaveProperty('id'); }, 6, 30000 @@ -578,7 +578,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' 'runs recommendation with config bundle source via CLI', async () => { // Get the latest version ID for the bundle - const versionsResult = await run(['config-bundle', 'versions', '--bundle', bundleName, '--json']); + const versionsResult = await run(['config-bundle', 'versions', '--name', bundleName, '--json']); const versionsJson = parseJsonOutput(versionsResult.stdout) as { versions: { versionId: string }[]; }; @@ -607,7 +607,7 @@ describe.sequential('e2e: config bundles, batch evaluation, and recommendations' expect(result.exitCode, `bundle recommendation failed: ${result.stdout}`).toBe(0); const json = parseJsonOutput(result.stdout) as Record; expect(json).toHaveProperty('success', true); - expect(json).toHaveProperty('recommendationId'); + expect(json).toHaveProperty('id'); }, 6, 30000 diff --git a/e2e-tests/global-setup.ts b/e2e-tests/global-setup.ts index 0f93f2857..3e02ba745 100644 --- a/e2e-tests/global-setup.ts +++ b/e2e-tests/global-setup.ts @@ -1,6 +1,8 @@ import { cleanupStaleCredentialProviders } from './utils/credential-provider-cleanup'; import { getLogger } from './utils/logger'; +import { cleanupStaleRecommendations } from './utils/recommendation-cleanup'; import { cleanUpOldStacks } from './utils/stack-cleanup'; +import { BedrockAgentCoreClient } from '@aws-sdk/client-bedrock-agentcore'; import { BedrockAgentCoreControlClient } from '@aws-sdk/client-bedrock-agentcore-control'; import { CloudFormationClient } from '@aws-sdk/client-cloudformation'; import type { TestProject } from 'vitest/node'; @@ -47,6 +49,23 @@ export default async function setup(_project: TestProject): Promise<() => void> bedrockCPClient.destroy(); } + // Recommendations are capped at 5 active per account. Failed e2e runs leak ACTIVE + // recommendations that never reach a terminal state, so the next run 402s on every + // StartRecommendation across all shards. Reap leftover e2e recs before starting. + logger.info(`cleaning up stale active recommendations...`); + const bedrockDPClient = new BedrockAgentCoreClient({ region: region, maxAttempts: 10 }); + try { + await cleanupStaleRecommendations(bedrockDPClient, logger.child('recommendation-cleanup'), { + minAgeMs: 30 * 60 * 1000, + prefix: 'E2e', + }); + } catch (e) { + logger.error(String(e)); + logger.warn(`failed to clean up stale recommendations`); + } finally { + bedrockDPClient.destroy(); + } + logger.info(`setup finished in ${(Date.now() - startTime) / 1000} seconds`); return function teardown(): void { diff --git a/e2e-tests/guardrail-block.test.ts b/e2e-tests/guardrail-block.test.ts new file mode 100644 index 000000000..abd335bb1 --- /dev/null +++ b/e2e-tests/guardrail-block.test.ts @@ -0,0 +1,233 @@ +import { type RunResult, hasAwsCredentials, parseJsonOutput, prereqs, retry } from '../src/test-utils/index.js'; +import { installCdkTarball, runAgentCoreCLI, writeAwsTargets } from './e2e-helper.js'; +import { randomUUID } from 'node:crypto'; +import { mkdir, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const hasAws = hasAwsCredentials(); + +// The AWS::BedrockAgentCore::Policy CFN resource type is not yet generally +// released, so `agentcore deploy` cannot synth/provision the policy and this +// end-to-end suite cannot pass. Skip the whole suite until the resource type +// is released, then drop SUITE_DISABLED to re-enable. +const SUITE_DISABLED = true; +const canRun = !SUITE_DISABLED && prereqs.npm && prereqs.git && prereqs.uv && hasAws; + +/** + * e2e: policy engine blocks a gateway invoke via CFN-deployed forbid policy. + * + * This test manually wires what the (removed) "secure mode" used to do automatically: + * 1. create a Strands/Bedrock project (agent runtime) + * 2. add a Cedar policy engine + * 3. add a gateway referencing the engine in ENFORCE mode (authorizer AWS_IAM) + * 4. add an http-runtime gateway target pointing at the agent runtime + * 5. add a blanket forbid policy scoped to AgentCore::Gateway + * 6. deploy via CFN (runtime + gateway + engine + policy all provisioned) + * 7. invoke through the gateway — assert the request is BLOCKED (403) + * + * The blanket `forbid(principal, action, resource is AgentCore::Gateway);` policy blocks ALL + * requests through the gateway, proving the policy engine ENFORCE mechanism works end-to-end. + */ +describe.skipIf(!canRun).sequential('e2e: policy engine blocks gateway invoke', () => { + const suffix = Date.now().toString().slice(-8); + const agentName = `E2eGrd${suffix}`; + const gatewayName = 'grdgw'; + const targetName = 'grdtarget'; + const engineName = 'grdengine'; + const policyName = 'denyall'; + + let projectPath: string; + let testDir: string; + + beforeAll(async () => { + if (!canRun) return; + + testDir = join(tmpdir(), `agentcore-e2e-guardrail-${randomUUID()}`); + await mkdir(testDir, { recursive: true }); + + const createResult = await runAgentCoreCLI( + [ + 'create', + '--name', + agentName, + '--language', + 'Python', + '--framework', + 'Strands', + '--model-provider', + 'Bedrock', + '--memory', + 'none', + '--json', + ], + testDir + ); + expect(createResult.exitCode, `Create failed: ${createResult.stderr}`).toBe(0); + projectPath = (parseJsonOutput(createResult.stdout) as { projectPath: string }).projectPath; + + await writeAwsTargets(projectPath); + installCdkTarball(projectPath); + }, 600_000); + + afterAll(async () => { + if (projectPath && hasAws) { + await runAgentCoreCLI(['remove', 'all', '--json'], projectPath); + const deployResult = await runAgentCoreCLI(['deploy', '--yes', '--json'], projectPath); + if (deployResult.exitCode !== 0) { + console.warn('Teardown deploy failed:', deployResult.stderr); + } + } + if (testDir) await rm(testDir, { recursive: true, force: true, maxRetries: 3, retryDelay: 1000 }); + }, 600_000); + + const run = (args: string[]): Promise => runAgentCoreCLI(args, projectPath); + + const assertSuccess = (result: RunResult, label: string): void => { + expect(result.exitCode, `${label} failed: ${result.stderr}`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success, `${label} should report success`).toBe(true); + }; + + // ── Manual wiring (the steps secure mode used to perform) ───────────── + + it.skipIf(!canRun)( + 'adds a policy engine', + async () => { + const result = await run(['add', 'policy-engine', '--name', engineName, '--json']); + assertSuccess(result, 'add policy-engine'); + }, + 60_000 + ); + + it.skipIf(!canRun)( + 'adds a gateway referencing the policy engine in ENFORCE mode', + async () => { + const result = await run([ + 'add', + 'gateway', + '--name', + gatewayName, + '--protocol-type', + 'None', + '--authorizer-type', + 'AWS_IAM', + '--policy-engine', + engineName, + '--policy-engine-mode', + 'ENFORCE', + '--json', + ]); + assertSuccess(result, 'add gateway'); + }, + 60_000 + ); + + it.skipIf(!canRun)( + 'adds an http-runtime target pointing at the agent runtime', + async () => { + const result = await run([ + 'add', + 'gateway-target', + '--name', + targetName, + '--gateway', + gatewayName, + '--type', + 'http-runtime', + '--runtime', + agentName, + '--json', + ]); + assertSuccess(result, 'add gateway-target'); + }, + 60_000 + ); + + it.skipIf(!canRun)( + 'adds a forbid-all policy scoped to AgentCore::Gateway', + async () => { + const result = await run([ + 'add', + 'policy', + '--name', + policyName, + '--engine', + engineName, + '--statement', + 'forbid(principal, action, resource is AgentCore::Gateway);', + '--validation-mode', + 'IGNORE_ALL_FINDINGS', + '--json', + ]); + assertSuccess(result, 'add policy'); + }, + 60_000 + ); + + // ── Deploy via CFN ──────────────────────────────────────────────────── + + it.skipIf(!canRun)( + 'deploys runtime + gateway + policy engine + policy via CFN', + async () => { + await retry( + async () => { + const result = await run(['deploy', '--yes', '--json']); + if (result.exitCode !== 0) { + console.log('Deploy stdout:', result.stdout); + console.log('Deploy stderr:', result.stderr); + } + expect(result.exitCode, `Deploy failed (stderr: ${result.stderr})`).toBe(0); + const json = parseJsonOutput(result.stdout) as { success: boolean }; + expect(json.success, 'Deploy should report success').toBe(true); + }, + 2, + 30_000 + ); + + // Confirm the gateway is deployed + const statePath = join(projectPath, 'agentcore', '.cli', 'deployed-state.json'); + const state = JSON.parse(await readFile(statePath, 'utf-8')) as { + targets: Record } }>; + }; + const gateways = Object.values(state.targets).flatMap(t => Object.values(t.resources?.gateways ?? {})); + expect(gateways.length, 'Gateway should be present in deployed state').toBeGreaterThan(0); + expect(gateways[0]!.gatewayId, 'Gateway should have an ID').toBeTruthy(); + }, + 600_000 + ); + + // ── Invoke through the gateway ────────────────────────────────────────── + + it.skipIf(!canRun)( + 'invoke through the gateway is blocked by the forbid-all policy', + async () => { + await retry( + async () => { + const result = await run([ + 'invoke', + '--gateway', + gatewayName, + '--gateway-target-name', + targetName, + '--prompt', + '{"message": "hello"}', + '--json', + ]); + + console.log('Policy-blocked invoke stdout:', result.stdout); + console.log('Policy-blocked invoke stderr:', result.stderr); + + const json = parseJsonOutput(result.stdout) as { success: boolean; error?: string }; + expect(json.success, `Invoke should be blocked but got: ${JSON.stringify(json)}`).toBe(false); + expect(json.error, 'Block error message should be present').toBeTruthy(); + expect(json.error!, `Error should indicate policy denial, got: ${json.error}`).toMatch(/denied|policy|403/i); + }, + 3, + 15_000 + ); + }, + 180_000 + ); +}); diff --git a/e2e-tests/utils/recommendation-cleanup.ts b/e2e-tests/utils/recommendation-cleanup.ts new file mode 100644 index 000000000..ab8f5b4b4 --- /dev/null +++ b/e2e-tests/utils/recommendation-cleanup.ts @@ -0,0 +1,62 @@ +import type { Logger } from './logger'; +import { + type BedrockAgentCoreClient, + DeleteRecommendationCommand, + RecommendationStatus, + type RecommendationSummary, + paginateListRecommendations, +} from '@aws-sdk/client-bedrock-agentcore'; + +const ACTIVE_STATUSES: ReadonlySet = new Set([ + RecommendationStatus.PENDING, + RecommendationStatus.IN_PROGRESS, +]); + +async function deleteRecommendation( + client: BedrockAgentCoreClient, + logger: Logger, + recommendationId: string, + name: string +): Promise { + try { + await client.send(new DeleteRecommendationCommand({ recommendationId })); + logger.info(`Deleted stale recommendation: ${name} (${recommendationId})`); + } catch (error) { + const err = error as Error; + logger.warn(`Failed to delete recommendation ${name} (${recommendationId}): ${err.name}:${err.message}`); + } +} + +/** + * Delete e2e recommendations that are still active beyond `minAgeMs` and match `prefix`. + * + * The recommendation service caps active recommendations at 5/account. Failed e2e runs + * can leak ACTIVE recommendations that never reach a terminal state, exhausting the slots + * and causing every subsequent StartRecommendation call to 402 across all shards. + * + * Recommendation has no Stop API — DeleteRecommendation is the cancel. + */ +export async function cleanupStaleRecommendations( + client: BedrockAgentCoreClient, + logger: Logger, + options: { + minAgeMs: number; + prefix: string; + } +): Promise { + const cutoff = new Date(Date.now() - options.minAgeMs); + + for await (const page of paginateListRecommendations({ client }, {})) { + const summaries: RecommendationSummary[] = page.recommendationSummaries ?? []; + const stale = summaries.filter( + r => + r.status !== undefined && + ACTIVE_STATUSES.has(r.status) && + r.name?.startsWith(options.prefix) && + r.createdAt !== undefined && + r.createdAt < cutoff + ); + + await Promise.all(stale.map(r => deleteRecommendation(client, logger, r.recommendationId!, r.name!))); + } +} diff --git a/integ-tests/add-remove-ab-test-target-based.test.ts b/integ-tests/add-remove-ab-test-target-based.test.ts deleted file mode 100644 index 8a77b1f06..000000000 --- a/integ-tests/add-remove-ab-test-target-based.test.ts +++ /dev/null @@ -1,461 +0,0 @@ -import { - type TestProject, - createTestProject, - parseJsonOutput, - readProjectConfig, - runCLI, -} from '../src/test-utils/index.js'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; - -async function runSuccess(args: string[], cwd: string) { - const result = await runCLI(args, cwd); - expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); - const json: unknown = parseJsonOutput(result.stdout); - expect(json).toHaveProperty('success', true); - return json as Record; -} - -async function runFailure(args: string[], cwd: string) { - const result = await runCLI(args, cwd); - expect(result.exitCode).toBe(1); - const json: unknown = parseJsonOutput(result.stdout); - expect(json).toHaveProperty('success', false); - expect(json).toHaveProperty('error'); - return json as Record; -} - -describe('integration: add and remove target-based ab-test', () => { - let project: TestProject; - const gatewayName = 'my-test-gw'; - - beforeAll(async () => { - project = await createTestProject({ - name: 'TargetABTest', - language: 'Python', - framework: 'Strands', - modelProvider: 'Bedrock', - memory: 'none', - }); - - // Add runtime endpoints (prod and staging) for the agent - await runSuccess( - ['add', 'runtime-endpoint', '--runtime', project.agentName, '--endpoint', 'prod', '--version', '1', '--json'], - project.projectPath - ); - await runSuccess( - ['add', 'runtime-endpoint', '--runtime', project.agentName, '--endpoint', 'staging', '--version', '1', '--json'], - project.projectPath - ); - - // Add an evaluator and two online eval configs (one per variant) - await runSuccess( - [ - 'add', - 'evaluator', - '--name', - 'TestEval', - '--level', - 'SESSION', - '--model', - 'us.anthropic.claude-sonnet-4-5-20250929-v1:0', - '--instructions', - 'Evaluate quality. Context: {context}', - '--json', - ], - project.projectPath - ); - await runSuccess( - [ - 'add', - 'online-eval', - '--name', - 'ControlEval', - '--runtime', - project.agentName, - '--evaluator', - 'TestEval', - '--sampling-rate', - '100', - '--endpoint', - 'prod', - '--json', - ], - project.projectPath - ); - await runSuccess( - [ - 'add', - 'online-eval', - '--name', - 'TreatmentEval', - '--runtime', - project.agentName, - '--evaluator', - 'TestEval', - '--sampling-rate', - '100', - '--endpoint', - 'staging', - '--json', - ], - project.projectPath - ); - }, 120000); - - afterAll(async () => { - await project.cleanup(); - }); - - it('adds target-based AB test with --control-endpoint and --treatment-endpoint', async () => { - const json = await runSuccess( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'TargetTest1', - '--runtime', - project.agentName, - '--gateway', - gatewayName, - '--control-endpoint', - 'prod', - '--treatment-endpoint', - 'staging', - '--control-weight', - '90', - '--treatment-weight', - '10', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.abTestName).toBe('TargetTest1'); - - // Verify agentcore.json has correct mode, targets, gateway auto-created - const spec = await readProjectConfig(project.projectPath); - const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'TargetTest1'); - expect(abTest).toBeDefined(); - expect(abTest!.mode).toBe('target-based'); - expect(abTest!.variants).toHaveLength(2); - expect(abTest!.variants[0]!.name).toBe('C'); - expect(abTest!.variants[0]!.weight).toBe(90); - expect(abTest!.variants[0]!.variantConfiguration.target).toBeDefined(); - expect(abTest!.variants[0]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-prod`); - expect(abTest!.variants[1]!.name).toBe('T1'); - expect(abTest!.variants[1]!.weight).toBe(10); - expect(abTest!.variants[1]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-staging`); - expect(abTest!.gatewayRef).toBe(`{{gateway:${gatewayName}}}`); - - // Verify gateway was auto-created with targets - const gw = spec.httpGateways?.find((g: { name: string }) => g.name === gatewayName); - expect(gw, 'HTTP gateway should have been auto-created').toBeDefined(); - expect(gw!.targets).toBeDefined(); - expect(gw!.targets!.length).toBeGreaterThanOrEqual(2); - - const controlTarget = gw!.targets!.find((t: { name: string }) => t.name === `${project.agentName}-prod`); - expect(controlTarget).toBeDefined(); - expect(controlTarget!.qualifier).toBe('prod'); - - const treatmentTarget = gw!.targets!.find((t: { name: string }) => t.name === `${project.agentName}-staging`); - expect(treatmentTarget).toBeDefined(); - expect(treatmentTarget!.qualifier).toBe('staging'); - - // Verify per-variant evaluation config - const evalConfig = abTest!.evaluationConfig; - expect('perVariantOnlineEvaluationConfig' in evalConfig).toBe(true); - if ('perVariantOnlineEvaluationConfig' in evalConfig) { - expect(evalConfig.perVariantOnlineEvaluationConfig).toHaveLength(2); - const controlEval = evalConfig.perVariantOnlineEvaluationConfig.find( - (p: { treatmentName: string }) => p.treatmentName === 'C' - ); - expect(controlEval?.onlineEvaluationConfigArn).toBe('ControlEval'); - const treatmentEval = evalConfig.perVariantOnlineEvaluationConfig.find( - (p: { treatmentName: string }) => p.treatmentName === 'T1' - ); - expect(treatmentEval?.onlineEvaluationConfigArn).toBe('TreatmentEval'); - } - }); - - it('adds target-based AB test with existing gateway', async () => { - // TargetTest1 already created the gateway — reuse it - const json = await runSuccess( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'TargetTest2', - '--runtime', - project.agentName, - '--gateway', - gatewayName, - '--control-endpoint', - 'prod', - '--treatment-endpoint', - 'staging', - '--control-weight', - '50', - '--treatment-weight', - '50', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.abTestName).toBe('TargetTest2'); - - const spec = await readProjectConfig(project.projectPath); - // Gateway should still exist (reused, not duplicated) - const gateways = spec.httpGateways?.filter((g: { name: string }) => g.name === gatewayName); - expect(gateways).toHaveLength(1); - }); - - it('rejects duplicate AB test name', async () => { - const json = await runFailure( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'TargetTest1', - '--runtime', - project.agentName, - '--gateway', - gatewayName, - '--control-endpoint', - 'prod', - '--treatment-endpoint', - 'staging', - '--control-weight', - '50', - '--treatment-weight', - '50', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.error).toContain('already exists'); - }); - - it('rejects weights that do not sum to 100', async () => { - const json = await runFailure( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'BadWeights', - '--runtime', - project.agentName, - '--gateway', - gatewayName, - '--control-endpoint', - 'prod', - '--treatment-endpoint', - 'staging', - '--control-weight', - '80', - '--treatment-weight', - '80', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.error).toBeDefined(); - }); - - it('errors when --control-endpoint is missing in target-based mode', async () => { - const json = await runFailure( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'MissingControl', - '--runtime', - project.agentName, - '--gateway', - gatewayName, - '--treatment-endpoint', - 'staging', - '--control-weight', - '50', - '--treatment-weight', - '50', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.error).toContain('--control-endpoint'); - }); - - it('errors when --runtime is missing in target-based mode', async () => { - const json = await runFailure( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'MissingRuntime', - '--gateway', - gatewayName, - '--control-endpoint', - 'prod', - '--treatment-endpoint', - 'staging', - '--control-weight', - '50', - '--treatment-weight', - '50', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.error).toContain('--runtime'); - }); - - it('errors when endpoint does not exist on runtime', async () => { - const json = await runFailure( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'BadEndpoint', - '--runtime', - project.agentName, - '--gateway', - gatewayName, - '--control-endpoint', - 'nonexistent', - '--treatment-endpoint', - 'staging', - '--control-weight', - '50', - '--treatment-weight', - '50', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.error).toContain('nonexistent'); - }); - - it('deprecated --control-qualifier still works as alias for --control-endpoint', async () => { - const json = await runSuccess( - [ - 'add', - 'ab-test', - '--mode', - 'target-based', - '--name', - 'QualifierAlias', - '--runtime', - project.agentName, - '--gateway', - gatewayName, - '--control-qualifier', - 'prod', - '--treatment-qualifier', - 'staging', - '--control-weight', - '60', - '--treatment-weight', - '40', - '--control-online-eval', - 'ControlEval', - '--treatment-online-eval', - 'TreatmentEval', - '--json', - ], - project.projectPath - ); - - expect(json.abTestName).toBe('QualifierAlias'); - - const spec = await readProjectConfig(project.projectPath); - const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'QualifierAlias'); - expect(abTest).toBeDefined(); - expect(abTest!.mode).toBe('target-based'); - expect(abTest!.variants[0]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-prod`); - expect(abTest!.variants[1]!.variantConfiguration.target!.targetName).toBe(`${project.agentName}-staging`); - }); - - it('removes target-based AB test without --delete-gateway', async () => { - const json = await runSuccess(['remove', 'ab-test', '--name', 'TargetTest2', '--json'], project.projectPath); - expect(json.success).toBe(true); - - // Verify removal from agentcore.json - const spec = await readProjectConfig(project.projectPath); - const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'TargetTest2'); - expect(abTest).toBeUndefined(); - - // Gateway should still exist (other AB tests reference it) - const gw = spec.httpGateways?.find((g: { name: string }) => g.name === gatewayName); - expect(gw, 'Gateway should still exist when other AB tests reference it').toBeDefined(); - }); - - it('removes target-based AB test with --delete-gateway flag', async () => { - // First remove QualifierAlias so only TargetTest1 is left referencing the gateway - await runSuccess(['remove', 'ab-test', '--name', 'QualifierAlias', '--json'], project.projectPath); - - // Now remove TargetTest1 with --delete-gateway - const json = await runSuccess( - ['remove', 'ab-test', '--name', 'TargetTest1', '--delete-gateway', '--json'], - project.projectPath - ); - expect(json.success).toBe(true); - - // Verify gateway was also removed (no other AB tests reference it) - const spec = await readProjectConfig(project.projectPath); - const gw = spec.httpGateways?.find((g: { name: string }) => g.name === gatewayName); - expect(gw, 'Gateway should be removed with --delete-gateway when no other AB tests reference it').toBeUndefined(); - }); - - it('remove returns error for non-existent test', async () => { - const json = await runFailure(['remove', 'ab-test', '--name', 'DoesNotExist', '--json'], project.projectPath); - expect(json.error).toContain('not found'); - }); -}); diff --git a/integ-tests/add-remove-ab-test.test.ts b/integ-tests/add-remove-ab-test.test.ts deleted file mode 100644 index 1fd1aa7bc..000000000 --- a/integ-tests/add-remove-ab-test.test.ts +++ /dev/null @@ -1,183 +0,0 @@ -import { - type TestProject, - createTestProject, - parseJsonOutput, - readProjectConfig, - runCLI, -} from '../src/test-utils/index.js'; -import { createTelemetryHelper } from '../src/test-utils/telemetry-helper.js'; -import { afterAll, beforeAll, describe, expect, it } from 'vitest'; - -const telemetry = createTelemetryHelper(); - -async function runSuccess(args: string[], cwd: string) { - const result = await runCLI(args, cwd); - expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); - const json: unknown = parseJsonOutput(result.stdout); - expect(json).toHaveProperty('success', true); - return json as Record; -} - -async function runFailure(args: string[], cwd: string) { - const result = await runCLI(args, cwd); - expect(result.exitCode).toBe(1); - const json: unknown = parseJsonOutput(result.stdout); - expect(json).toHaveProperty('success', false); - expect(json).toHaveProperty('error'); - return json as Record; -} - -describe('integration: add and remove ab-test', () => { - let project: TestProject; - - beforeAll(async () => { - project = await createTestProject({ - language: 'Python', - framework: 'Strands', - modelProvider: 'Bedrock', - memory: 'none', - }); - }); - - afterAll(async () => { - await project.cleanup(); - telemetry.destroy(); - }); - - it('requires --name for JSON mode', async () => { - const json = await runFailure(['add', 'ab-test', '--json'], project.projectPath); - expect(json.error).toContain('--name'); - }); - - it('requires --runtime when --name is provided', async () => { - const json = await runFailure(['add', 'ab-test', '--name', 'Test1', '--json'], project.projectPath); - expect(json.error).toContain('--runtime'); - }); - - it('adds ab-test with all required flags', async () => { - const json = await runSuccess( - [ - 'add', - 'ab-test', - '--name', - 'MyIntegTest', - '--runtime', - project.agentName, - '--control-bundle', - 'arn:bundle:control', - '--control-version', - 'v1', - '--treatment-bundle', - 'arn:bundle:treatment', - '--treatment-version', - 'v1', - '--control-weight', - '80', - '--treatment-weight', - '20', - '--online-eval', - 'arn:eval:config', - '--json', - ], - project.projectPath - ); - - expect(json.abTestName).toBe('MyIntegTest'); - - // Verify it's in agentcore.json with correct structure - const spec = await readProjectConfig(project.projectPath); - const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'MyIntegTest'); - expect(abTest).toBeDefined(); - expect(abTest!.variants).toHaveLength(2); - expect(abTest!.variants[0]!.name).toBe('C'); - expect(abTest!.variants[0]!.weight).toBe(80); - expect(abTest!.variants[1]!.name).toBe('T1'); - expect(abTest!.variants[1]!.weight).toBe(20); - }); - - it('rejects duplicate AB test name', async () => { - const json = await runFailure( - [ - 'add', - 'ab-test', - '--name', - 'MyIntegTest', - '--runtime', - project.agentName, - '--control-bundle', - 'arn:cb', - '--control-version', - 'v1', - '--treatment-bundle', - 'arn:tb', - '--treatment-version', - 'v1', - '--control-weight', - '50', - '--treatment-weight', - '50', - '--online-eval', - 'arn:eval', - '--json', - ], - project.projectPath - ); - - expect(json.error).toContain('already exists'); - }); - - it('rejects weights that do not sum to 100', async () => { - const json = await runFailure( - [ - 'add', - 'ab-test', - '--name', - 'BadWeights', - '--runtime', - project.agentName, - '--control-bundle', - 'arn:cb', - '--control-version', - 'v1', - '--treatment-bundle', - 'arn:tb', - '--treatment-version', - 'v1', - '--control-weight', - '80', - '--treatment-weight', - '80', - '--online-eval', - 'arn:eval', - '--json', - ], - project.projectPath - ); - - expect(json.error).toBeDefined(); - }); - - it('removes ab-test', async () => { - const result = await runCLI(['remove', 'ab-test', '--name', 'MyIntegTest', '--json'], project.projectPath, { - env: telemetry.env, - }); - expect(result.exitCode).toBe(0); - const json = JSON.parse(result.stdout); - expect(json.success).toBe(true); - - const spec = await readProjectConfig(project.projectPath); - const abTest = spec.abTests?.find((t: { name: string }) => t.name === 'MyIntegTest'); - expect(abTest).toBeUndefined(); - telemetry.assertMetricEmitted({ command: 'remove.ab-test', exit_reason: 'success' }); - }); - - it('remove returns error for non-existent test', async () => { - const result = await runCLI(['remove', 'ab-test', '--name', 'DoesNotExist', '--json'], project.projectPath, { - env: telemetry.env, - }); - expect(result.exitCode).toBe(1); - const json = JSON.parse(result.stdout); - expect(json.error).toContain('not found'); - telemetry.assertMetricEmitted({ command: 'remove.ab-test', exit_reason: 'failure' }); - }); -}); diff --git a/integ-tests/add-remove-config-bundle.test.ts b/integ-tests/add-remove-config-bundle.test.ts index c6c37c257..98c7d07a5 100644 --- a/integ-tests/add-remove-config-bundle.test.ts +++ b/integ-tests/add-remove-config-bundle.test.ts @@ -109,7 +109,9 @@ describe('integration: add and remove config-bundle', () => { const bundle = config.configBundles.find(b => b.name === 'FullOptsBundle'); expect(bundle).toBeDefined(); expect(bundle!.description).toBe('A bundle with all optional fields'); - expect(bundle!.branchName).toBe('feature-branch'); + // --branch is gated behind ENABLE_GATED_FEATURES; when off, silently defaults to mainline + const expectedBranch = process.env.ENABLE_GATED_FEATURES === '1' ? 'feature-branch' : 'mainline'; + expect(bundle!.branchName).toBe(expectedBranch); expect(bundle!.commitMessage).toBe('initial config'); }); diff --git a/integ-tests/add-remove-gateway.test.ts b/integ-tests/add-remove-gateway.test.ts index 8453c5e60..1e09bb0ba 100644 --- a/integ-tests/add-remove-gateway.test.ts +++ b/integ-tests/add-remove-gateway.test.ts @@ -27,7 +27,10 @@ describe('integration: add and remove gateway with external MCP server', () => { describe('gateway lifecycle', () => { it('adds a gateway', async () => { - const result = await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], project.projectPath); + const result = await runCLI( + ['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], + project.projectPath + ); expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); const json = JSON.parse(result.stdout); @@ -136,7 +139,10 @@ describe('integration: add and remove gateway with OpenAPI schema target', () => describe('openApiSchema lifecycle', () => { it('adds a gateway', async () => { - const result = await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], project.projectPath); + const result = await runCLI( + ['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], + project.projectPath + ); expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); const json = JSON.parse(result.stdout); @@ -262,7 +268,10 @@ describe('integration: add gateway with S3 URI schema target', () => { describe('S3 URI openApiSchema lifecycle', () => { it('adds a gateway and credential', async () => { - const result = await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], project.projectPath); + const result = await runCLI( + ['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], + project.projectPath + ); expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); const credResult = await runCLI( @@ -331,7 +340,7 @@ describe('integration: add gateway with S3 URI and bucketOwnerAccountId', () => }); it('adds a gateway and target with --schema-s3-account', async () => { - await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], project.projectPath); + await runCLI(['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], project.projectPath); await runCLI( ['add', 'credential', '--name', 'CrossApiKey', '--api-key', 'test-key', '--json'], project.projectPath @@ -388,7 +397,10 @@ describe('integration: add gateway with Smithy model target', () => { describe('smithyModel lifecycle', () => { it('adds a gateway', async () => { - const result = await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], project.projectPath); + const result = await runCLI( + ['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], + project.projectPath + ); expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); }); @@ -456,7 +468,7 @@ describe('integration: schema-based target validation errors', () => { beforeAll(async () => { project = await createTestProject({ noAgent: true }); - await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], project.projectPath); + await runCLI(['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], project.projectPath); }); afterAll(async () => { diff --git a/integ-tests/add-remove-online-insights.test.ts b/integ-tests/add-remove-online-insights.test.ts new file mode 100644 index 000000000..3d833c64d --- /dev/null +++ b/integ-tests/add-remove-online-insights.test.ts @@ -0,0 +1,329 @@ +import { + type TestProject, + createTestProject, + parseJsonOutput, + readProjectConfig, + runCLI, +} from '../src/test-utils/index.js'; +import { createTelemetryHelper } from '../src/test-utils/telemetry-helper.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const telemetry = createTelemetryHelper(); + +async function runSuccess(args: string[], cwd: string) { + const result = await runCLI(args, cwd, { env: telemetry.env }); + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', true); + return json as Record; +} + +async function runFailure(args: string[], cwd: string) { + const result = await runCLI(args, cwd, { env: telemetry.env }); + expect(result.exitCode).toBe(1); + const json: unknown = parseJsonOutput(result.stdout); + expect(json).toHaveProperty('success', false); + expect(json).toHaveProperty('error'); + return json as Record; +} + +describe('integration: add and remove online-insights configs', () => { + let project: TestProject; + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + telemetry.destroy(); + }); + + describe('online-insights lifecycle', () => { + const configName = `IntegInsights${Date.now().toString().slice(-6)}`; + const insightId = 'Builtin.Insight.FailureAnalysis'; + + it('adds an online-insights config', async () => { + const json = await runSuccess( + [ + 'add', + 'online-insights', + '--name', + configName, + '--runtime', + project.agentName, + '--insights', + insightId, + '--sampling-rate', + '50', + '--json', + ], + project.projectPath + ); + expect(json.configName).toBe(configName); + + const config = await readProjectConfig(project.projectPath); + const found = config.onlineEvalConfigs.find((c: { name: string }) => c.name === configName); + expect(found).toBeDefined(); + expect(found!.agent).toBe(project.agentName); + expect(found!.insights).toContain(insightId); + expect(found!.samplingRate).toBe(50); + expect(found!.evaluators).toBeUndefined(); + }); + + it('rejects duplicate online-insights config name', async () => { + const json = await runFailure( + [ + 'add', + 'online-insights', + '--name', + configName, + '--runtime', + project.agentName, + '--insights', + insightId, + '--sampling-rate', + '50', + '--json', + ], + project.projectPath + ); + expect(json.error).toContain('already exists'); + }); + + it('adds online-insights with clustering frequencies', async () => { + const clusterName = `ClusterInsights${Date.now().toString().slice(-6)}`; + const json = await runSuccess( + [ + 'add', + 'online-insights', + '--name', + clusterName, + '--runtime', + project.agentName, + '--insights', + insightId, + '--sampling-rate', + '100', + '--clustering-frequency', + 'DAILY', + 'WEEKLY', + '--json', + ], + project.projectPath + ); + expect(json.configName).toBe(clusterName); + + const config = await readProjectConfig(project.projectPath); + const found = config.onlineEvalConfigs.find((c: { name: string }) => c.name === clusterName); + expect(found).toBeDefined(); + expect(found!.clusteringConfig).toBeDefined(); + expect(found!.clusteringConfig!.frequencies).toContain('DAILY'); + expect(found!.clusteringConfig!.frequencies).toContain('WEEKLY'); + }); + + it('adds online-insights with --enable-on-create', async () => { + const enabledName = `EnabledInsights${Date.now().toString().slice(-6)}`; + const json = await runSuccess( + [ + 'add', + 'online-insights', + '--name', + enabledName, + '--runtime', + project.agentName, + '--insights', + insightId, + '--sampling-rate', + '75', + '--enable-on-create', + '--json', + ], + project.projectPath + ); + expect(json.configName).toBe(enabledName); + + const config = await readProjectConfig(project.projectPath); + const found = config.onlineEvalConfigs.find((c: { name: string }) => c.name === enabledName); + expect(found).toBeDefined(); + expect(found!.enableOnCreate).toBe(true); + }); + + it('adds online-insights with endpoint', async () => { + // First add an endpoint to the runtime + await runSuccess( + ['add', 'runtime-endpoint', '--runtime', project.agentName, '--endpoint', 'prod', '--version', '1', '--json'], + project.projectPath + ); + + const epName = `EPInsights${Date.now().toString().slice(-6)}`; + const json = await runSuccess( + [ + 'add', + 'online-insights', + '--name', + epName, + '--runtime', + project.agentName, + '--insights', + insightId, + '--sampling-rate', + '50', + '--endpoint', + 'prod', + '--json', + ], + project.projectPath + ); + expect(json.configName).toBe(epName); + + const config = await readProjectConfig(project.projectPath); + const found = config.onlineEvalConfigs.find((c: { name: string }) => c.name === epName); + expect(found).toBeDefined(); + expect(found!.endpoint).toBe('prod'); + }); + + it('rejects online-insights with non-existent endpoint', async () => { + const json = await runFailure( + [ + 'add', + 'online-insights', + '--name', + 'BadEP', + '--runtime', + project.agentName, + '--insights', + insightId, + '--sampling-rate', + '50', + '--endpoint', + 'nonexistent', + '--json', + ], + project.projectPath + ); + expect(json.error).toContain('nonexistent'); + }); + + it('removes the online-insights config', async () => { + await runSuccess(['remove', 'online-insights', '--name', configName, '--json'], project.projectPath); + + const config = await readProjectConfig(project.projectPath); + const found = config.onlineEvalConfigs.find( + (c: { name: string; insights?: string[] }) => c.name === configName && c.insights?.length + ); + expect(found).toBeUndefined(); + telemetry.assertMetricEmitted({ command: 'remove.online-insights', exit_reason: 'success' }); + }); + }); + + describe('error cases', () => { + it('rejects online-insights with missing --runtime', async () => { + const json = await runFailure( + [ + 'add', + 'online-insights', + '--name', + 'SomeConfig', + '--insights', + 'Builtin.Insight.FailureAnalysis', + '--sampling-rate', + '50', + '--json', + ], + project.projectPath + ); + expect(json.error).toContain('--runtime'); + }); + + it('rejects online-insights with missing --insights', async () => { + const json = await runFailure( + [ + 'add', + 'online-insights', + '--name', + 'SomeConfig', + '--runtime', + project.agentName, + '--sampling-rate', + '50', + '--json', + ], + project.projectPath + ); + expect(json.error).toContain('--insights'); + }); + + it('rejects online-insights with missing --sampling-rate', async () => { + const json = await runFailure( + [ + 'add', + 'online-insights', + '--name', + 'SomeConfig', + '--runtime', + project.agentName, + '--insights', + 'Builtin.Insight.FailureAnalysis', + '--json', + ], + project.projectPath + ); + expect(json.error).toContain('--sampling-rate'); + }); + + it('rejects online-insights with invalid sampling rate (too high)', async () => { + const json = await runFailure( + [ + 'add', + 'online-insights', + '--name', + 'SomeConfig', + '--runtime', + project.agentName, + '--insights', + 'Builtin.Insight.FailureAnalysis', + '--sampling-rate', + '200', + '--json', + ], + project.projectPath + ); + expect(json.error).toContain('sampling-rate'); + }); + + it('rejects online-insights with invalid sampling rate (too low)', async () => { + const json = await runFailure( + [ + 'add', + 'online-insights', + '--name', + 'SomeConfig', + '--runtime', + project.agentName, + '--insights', + 'Builtin.Insight.FailureAnalysis', + '--sampling-rate', + '0', + '--json', + ], + project.projectPath + ); + expect(json.error).toContain('sampling-rate'); + }); + + it('fails to remove non-existent online-insights config', async () => { + const json = await runFailure( + ['remove', 'online-insights', '--name', 'NonExistent', '--json'], + project.projectPath + ); + expect(json.error).toContain('not found'); + telemetry.assertMetricEmitted({ command: 'remove.online-insights', exit_reason: 'failure' }); + }); + }); +}); diff --git a/integ-tests/promote-ab-test.test.ts b/integ-tests/promote-ab-test.test.ts new file mode 100644 index 000000000..827038204 --- /dev/null +++ b/integ-tests/promote-ab-test.test.ts @@ -0,0 +1,161 @@ +import { promoteABTestConfig } from '../src/cli/operations/jobs/ab-test/promote.js'; +import type { ABTestJobRecord } from '../src/cli/operations/jobs/shared/types.js'; +import { ConfigIO } from '../src/lib'; +import { type TestProject, createTestProject } from '../src/test-utils/index.js'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +/** + * Integration coverage for A/B-test promotion against a REAL on-disk agentcore.json. + * + * Unit tests mock ConfigIO; here promote does the genuine read → mutate → write → re-validate + * round-trip through ConfigIO + the Zod schema, so a malformed write (e.g. an httpRuntime shape the + * schema rejects) would surface as a real failure. Covers the "promote everything" paths: + * - version-bump (same runtime, named endpoints) + * - repoint to a different runtime + * - repoint when the default (unnamed) endpoint is used + * + * promoteABTestConfig() constructs `new ConfigIO()` internally, which discovers the project via + * INIT_CWD/cwd — so each test points INIT_CWD at the temp project before calling it. + */ +describe('integration: ab-test promote (real config round-trip)', () => { + let project: TestProject; + let configIO: ConfigIO; + const originalInitCwd = process.env.INIT_CWD; + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + // promote()'s internal `new ConfigIO()` resolves the project from INIT_CWD (walks up to the + // agentcore/ dir). The explicit ConfigIO below needs the agentcore/ dir as its baseDir. + process.env.INIT_CWD = project.projectPath; + configIO = new ConfigIO({ baseDir: join(project.projectPath, 'agentcore') }); + }); + + afterAll(async () => { + if (originalInitCwd === undefined) delete process.env.INIT_CWD; + else process.env.INIT_CWD = originalInitCwd; + await project.cleanup(); + }); + + // Build N schema-valid runtimes by cloning the project's real runtime (so build/entrypoint/etc. + // satisfy the Zod schema) and overriding name + endpoints. + async function makeRuntimes( + specs: { name: string; endpoints: Record }[] + ): Promise { + const spec = await configIO.readProjectSpec(); + const base = spec.runtimes[0]; + return specs.map(s => ({ ...base, name: s.name, endpoints: s.endpoints })); + } + + // Each test rewrites the runtimes + gateway from scratch so cases don't bleed into each other. + // httpRuntime targets require the gateway to have protocolType: "None". + async function seedProject(runtimes: unknown[], targets: unknown[]): Promise { + const spec = await configIO.readProjectSpec(); + const next = { + ...spec, + runtimes, + agentCoreGateways: [{ name: 'my-gw', protocolType: 'None', targets }], + }; + await configIO.writeProjectSpec(next as never); + } + + function record(): ABTestJobRecord { + return { + type: 'ab-test', + id: 'ab-integ', + arn: 'arn:aws:bedrock-agentcore:us-east-1:1:ab-test/ab-integ', + status: 'STOPPED', + lifecycleStatus: 'STOPPED', + createdAt: '2026-01-01T00:00:00Z', + agent: 'my-agent', + name: 'integTest', + mode: 'target-based', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:1:gateway/my-gw', + gatewayName: 'my-gw', + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl-target' }, + { name: 'T1', weight: 50, targetName: 'treat-target' }, + ], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:aws:eval:config' }, + }; + } + + it('version-bumps control to the treatment endpoint version (same runtime)', async () => { + await seedProject( + await makeRuntimes([{ name: 'my_runtime', endpoints: { control: { version: 1 }, treatment: { version: 7 } } }]), + [ + { + name: 'ctrl-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my_runtime', runtimeEndpoint: 'control' }, + }, + { + name: 'treat-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my_runtime', runtimeEndpoint: 'treatment' }, + }, + ] + ); + + const result = await promoteABTestConfig(record()); + expect(result.promoted).toBe(true); + + const after = await configIO.readProjectSpec(); + const rt = after.runtimes.find(r => r.name === 'my_runtime')!; + expect(rt.endpoints?.control?.version).toBe(7); + }); + + it('repoints control to a different treatment runtime', async () => { + await seedProject( + await makeRuntimes([ + { name: 'runtime_a', endpoints: { prod: { version: 1 } } }, + { name: 'runtime_b', endpoints: { prod: { version: 5 } } }, + ]), + [ + { + name: 'ctrl-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'runtime_a', runtimeEndpoint: 'prod' }, + }, + { + name: 'treat-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'runtime_b', runtimeEndpoint: 'prod' }, + }, + ] + ); + + const result = await promoteABTestConfig(record()); + expect(result.promoted).toBe(true); + + const after = await configIO.readProjectSpec(); + const ctrl = after.agentCoreGateways!.find(g => g.name === 'my-gw')!.targets!.find(t => t.name === 'ctrl-target')!; + expect(ctrl.httpRuntime!.runtime).toBe('runtime_b'); + expect(ctrl.httpRuntime!.runtimeEndpoint).toBe('prod'); + }); + + it('repoints control when variants use the default (unnamed) endpoint', async () => { + await seedProject( + await makeRuntimes([ + { name: 'runtime_a', endpoints: {} }, + { name: 'runtime_b', endpoints: {} }, + ]), + [ + { name: 'ctrl-target', targetType: 'httpRuntime', httpRuntime: { runtime: 'runtime_a' } }, + { name: 'treat-target', targetType: 'httpRuntime', httpRuntime: { runtime: 'runtime_b' } }, + ] + ); + + const result = await promoteABTestConfig(record()); + expect(result.promoted).toBe(true); + + const after = await configIO.readProjectSpec(); + const ctrl = after.agentCoreGateways!.find(g => g.name === 'my-gw')!.targets!.find(t => t.name === 'ctrl-target')!; + expect(ctrl.httpRuntime!.runtime).toBe('runtime_b'); + }); +}); diff --git a/integ-tests/recommendation.test.ts b/integ-tests/recommendation.test.ts index dc3037a3e..a79a153f5 100644 --- a/integ-tests/recommendation.test.ts +++ b/integ-tests/recommendation.test.ts @@ -70,53 +70,6 @@ describe('integration: run recommendation CLI validation', () => { }); describe('system-prompt recommendation input validation', () => { - it('fails when agent not deployed (inline input)', async () => { - const result = await runCLI( - [ - 'run', - 'recommendation', - '--runtime', - project.agentName, - '--evaluator', - 'Builtin.Faithfulness', - '--inline', - 'You are a helpful assistant.', - '--json', - ], - project.projectPath - ); - - expect(result.exitCode).toBe(1); - const json = parseJsonOutput(result.stdout) as Record; - expect(json.success).toBe(false); - expect(json.error).toContain('deployed'); - }); - - it('fails when agent not deployed (file input)', async () => { - const promptFile = join(project.projectPath, 'system-prompt.txt'); - await writeFile(promptFile, 'You are a helpful assistant for testing.'); - - const result = await runCLI( - [ - 'run', - 'recommendation', - '--runtime', - project.agentName, - '--evaluator', - 'Builtin.Faithfulness', - '--prompt-file', - promptFile, - '--json', - ], - project.projectPath - ); - - expect(result.exitCode).toBe(1); - const json = parseJsonOutput(result.stdout) as Record; - expect(json.success).toBe(false); - expect(json.error).toContain('deployed'); - }); - it('fails with non-existent prompt file', async () => { const result = await runCLI( [ @@ -137,61 +90,6 @@ describe('integration: run recommendation CLI validation', () => { }); }); - describe('tool-description recommendation input validation', () => { - it('fails when agent not deployed (tool-description type with --tools)', async () => { - const result = await runCLI( - [ - 'run', - 'recommendation', - '--type', - 'tool-description', - '--runtime', - project.agentName, - '--tools', - 'search:Searches the web for information', - '--tools', - 'calculator:Performs math calculations', - '--json', - ], - project.projectPath - ); - - expect(result.exitCode).toBe(1); - const json = parseJsonOutput(result.stdout) as Record; - expect(json.success).toBe(false); - expect(json.error).toContain('deployed'); - }); - }); - - describe('config bundle source validation', () => { - it('fails when bundle not found in deployed state', async () => { - const result = await runCLI( - [ - 'run', - 'recommendation', - '--runtime', - project.agentName, - '--evaluator', - 'Builtin.Faithfulness', - '--bundle-name', - 'NonExistentBundle', - '--bundle-version', - 'v1', - '--system-prompt-json-path', - 'systemPrompt', - '--json', - ], - project.projectPath - ); - - expect(result.exitCode).toBe(1); - const json = parseJsonOutput(result.stdout) as Record; - expect(json.success).toBe(false); - // Fails at agent resolution (not deployed) before bundle resolution - expect(json.error).toContain('deployed'); - }); - }); - describe('spans file validation', () => { it('fails when spans file does not exist', async () => { const result = await runCLI( @@ -238,53 +136,4 @@ describe('integration: run recommendation CLI validation', () => { expect(result.exitCode).toBe(1); }); }); - - describe('lookback and session options', () => { - it('accepts --lookback flag (fails at deploy check, not parsing)', async () => { - const result = await runCLI( - [ - 'run', - 'recommendation', - '--runtime', - project.agentName, - '--evaluator', - 'Builtin.Faithfulness', - '--inline', - 'You are a helpful assistant.', - '--lookback', - '14', - '--json', - ], - project.projectPath - ); - - expect(result.exitCode).toBe(1); - const json = parseJsonOutput(result.stdout) as Record; - expect(json.error).toContain('deployed'); - }); - - it('accepts --session-id flag (fails at deploy check, not parsing)', async () => { - const result = await runCLI( - [ - 'run', - 'recommendation', - '--runtime', - project.agentName, - '--evaluator', - 'Builtin.Faithfulness', - '--inline', - 'You are a helpful assistant.', - '--session-id', - 'sess-001', - 'sess-002', - '--json', - ], - project.projectPath - ); - - expect(result.exitCode).toBe(1); - const json = parseJsonOutput(result.stdout) as Record; - expect(json.error).toContain('deployed'); - }); - }); }); diff --git a/integ-tests/run-ab-test.test.ts b/integ-tests/run-ab-test.test.ts new file mode 100644 index 000000000..cce83e490 --- /dev/null +++ b/integ-tests/run-ab-test.test.ts @@ -0,0 +1,116 @@ +import { type TestProject, createTestProject, parseJsonOutput, runCLI } from '../src/test-utils/index.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +/** + * Client-side CLI validation for `agentcore run ab-test` (the fire-and-forget jobs-model command + * that replaced the old `add/remove ab-test` primitive). No live AWS — every case here must fail + * fast on local validation before any API call. Mirrors integ-tests/recommendation.test.ts. + */ +describe('integration: run ab-test CLI validation', () => { + let project: TestProject; + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + }); + + describe('required flags', () => { + it('requires --name and --gateway in non-interactive (JSON) mode', async () => { + const result = await runCLI(['run', 'ab-test', '--json'], project.projectPath); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('--name'); + expect(json.error).toContain('--gateway'); + }); + + it('rejects an invalid --mode', async () => { + const result = await runCLI( + ['run', 'ab-test', '--name', 'MyTest', '--gateway', 'MyGw', '--mode', 'bogus-mode', '--json'], + project.projectPath + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('--mode'); + }); + }); + + describe('variant weight validation', () => { + it('rejects weights that do not sum to 100', async () => { + const result = await runCLI( + [ + 'run', + 'ab-test', + '--name', + 'MyTest', + '--gateway', + 'MyGw', + '--control-weight', + '60', + '--treatment-weight', + '60', + '--json', + ], + project.projectPath + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('sum to 100'); + }); + + it('rejects a non-integer / out-of-range weight', async () => { + const result = await runCLI( + [ + 'run', + 'ab-test', + '--name', + 'MyTest', + '--gateway', + 'MyGw', + '--control-weight', + '150', + '--treatment-weight', + '50', + '--json', + ], + project.projectPath + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('between 0 and 100'); + }); + }); + + describe('mode-specific required inputs', () => { + it('config-bundle mode requires control/treatment bundle names and versions', async () => { + const result = await runCLI( + ['run', 'ab-test', '--name', 'MyTest', '--gateway', 'MyGw', '--mode', 'config-bundle', '--json'], + project.projectPath + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + }); + + it('target-based mode requires control/treatment targets', async () => { + const result = await runCLI( + ['run', 'ab-test', '--name', 'MyTest', '--gateway', 'MyGw', '--mode', 'target-based', '--json'], + project.projectPath + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + }); + }); +}); diff --git a/integ-tests/run-insights.test.ts b/integ-tests/run-insights.test.ts new file mode 100644 index 000000000..3b9b46b83 --- /dev/null +++ b/integ-tests/run-insights.test.ts @@ -0,0 +1,207 @@ +import { type TestProject, createTestProject, parseJsonOutput, runCLI } from '../src/test-utils/index.js'; +import { createTelemetryHelper } from '../src/test-utils/telemetry-helper.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +describe('integration: run insights command validation', () => { + let project: TestProject; + const telemetry = createTelemetryHelper(); + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + telemetry.destroy(); + }); + + it('fails when agent is not deployed (no deployed state)', async () => { + const result = await runCLI(['run', 'insights', '--runtime', project.agentName, '--json'], project.projectPath, { + env: telemetry.env, + }); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toBeTruthy(); + }); + + it('fails with --name that violates naming constraints', async () => { + const result = await runCLI( + ['run', 'insights', '--runtime', project.agentName, '--name', '123-invalid-start', '--json'], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toBeTruthy(); + }); + + it('accepts --insights flag with custom insight IDs', async () => { + const result = await runCLI( + ['run', 'insights', '--runtime', project.agentName, '--insights', 'Builtin.Insight.FailureAnalysis', '--json'], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.error).not.toContain('--insights'); + }); + + it('accepts --evaluator flag for recommendation chaining', async () => { + const result = await runCLI( + ['run', 'insights', '--runtime', project.agentName, '--evaluator', 'Builtin.Accuracy', '--json'], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.error).not.toContain('--evaluator'); + }); + + it('accepts --lookback-days flag', async () => { + const result = await runCLI( + ['run', 'insights', '--runtime', project.agentName, '--lookback-days', '14', '--json'], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.error).not.toContain('--lookback-days'); + }); + + it('accepts --session-ids flag', async () => { + const result = await runCLI( + ['run', 'insights', '--runtime', project.agentName, '--session-ids', 'sess-001', 'sess-002', '--json'], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.error).not.toContain('--session-ids'); + }); + + it('accepts --online-eval-config-arn as data source (no --runtime needed)', async () => { + const result = await runCLI( + [ + 'run', + 'insights', + '--online-eval-config-arn', + 'arn:aws:bedrock:us-east-1:123456789012:online-evaluation-config/test', + '--json', + ], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.error).not.toContain('--runtime'); + expect(json.error).not.toContain('required'); + }); +}); + +describe('integration: view insights command', () => { + let project: TestProject; + const telemetry = createTelemetryHelper(); + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + telemetry.destroy(); + }); + + it('returns empty list when no insights jobs exist', async () => { + const result = await runCLI(['view', 'insights', '--json'], project.projectPath, { env: telemetry.env }); + expect(result.exitCode).toBe(0); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(true); + expect(json.insights).toEqual([]); + }); + + it('returns not-found for a non-existent insights job ID', async () => { + const result = await runCLI(['view', 'insights', 'nonexistent-id', '--json'], project.projectPath, { + env: telemetry.env, + }); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('not found'); + }); +}); + +describe('integration: pause/resume online-insights validation', () => { + let project: TestProject; + const telemetry = createTelemetryHelper(); + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + telemetry.destroy(); + }); + + it('pause online-insights fails without name or --arn', async () => { + const result = await runCLI(['pause', 'online-insights', '--json'], project.projectPath, { env: telemetry.env }); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('name or --arn'); + }); + + it('resume online-insights fails without name or --arn', async () => { + const result = await runCLI(['resume', 'online-insights', '--json'], project.projectPath, { env: telemetry.env }); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('name or --arn'); + }); +}); + +describe('integration: archive insights validation', () => { + let project: TestProject; + const telemetry = createTelemetryHelper(); + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + telemetry.destroy(); + }); + + it('archive insights fails for non-existent ID', async () => { + const result = await runCLI(['archive', 'insights', '--id', 'nonexistent-job-id', '--json'], project.projectPath, { + env: telemetry.env, + }); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).toContain('not found'); + }); +}); diff --git a/integ-tests/run-recommendation-from-insights.test.ts b/integ-tests/run-recommendation-from-insights.test.ts new file mode 100644 index 000000000..1550d59b4 --- /dev/null +++ b/integ-tests/run-recommendation-from-insights.test.ts @@ -0,0 +1,72 @@ +import { type TestProject, createTestProject, parseJsonOutput, runCLI } from '../src/test-utils/index.js'; +import { createTelemetryHelper } from '../src/test-utils/telemetry-helper.js'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +const telemetry = createTelemetryHelper(); + +describe('integration: run recommendation --from-insights', () => { + let project: TestProject; + + beforeAll(async () => { + project = await createTestProject({ + language: 'Python', + framework: 'Strands', + modelProvider: 'Bedrock', + memory: 'none', + }); + }); + + afterAll(async () => { + await project.cleanup(); + telemetry.destroy(); + }); + + it('accepts --from-insights flag (fails on missing insights job, not flag parsing)', async () => { + const result = await runCLI( + ['run', 'recommendation', '--runtime', project.agentName, '--from-insights', 'some-insights-job-id', '--json'], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + // Should fail because the insights job doesn't exist, not because --from-insights is unrecognized + expect(json.error).not.toContain('Unknown option'); + expect(json.error).not.toContain('--from-insights'); + }); + + it('accepts --batch-evaluation-arn flag (fails on API, not flag parsing)', async () => { + const result = await runCLI( + [ + 'run', + 'recommendation', + '--runtime', + project.agentName, + '--batch-evaluation-arn', + 'arn:aws:bedrock:us-east-1:123456789012:batch-evaluation/test', + '--json', + ], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + expect(json.error).not.toContain('Unknown option'); + expect(json.error).not.toContain('--batch-evaluation-arn'); + }); + + it('--from-insights makes --runtime and --evaluator optional', async () => { + const result = await runCLI( + ['run', 'recommendation', '--from-insights', 'some-insights-job-id', '--json'], + project.projectPath, + { env: telemetry.env } + ); + expect(result.exitCode).toBe(1); + const json = parseJsonOutput(result.stdout) as Record; + expect(json.success).toBe(false); + // Should NOT complain about missing --runtime or --evaluator + expect(json.error).not.toContain('--runtime'); + expect(json.error).not.toContain('--evaluator'); + }); +}); diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json index 1f3fe3ea5..07c43ad44 100644 --- a/npm-shrinkwrap.json +++ b/npm-shrinkwrap.json @@ -1,12 +1,12 @@ { "name": "@aws/agentcore", - "version": "0.18.0", + "version": "0.19.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@aws/agentcore", - "version": "0.18.0", + "version": "0.19.0", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { @@ -15,15 +15,15 @@ "@aws-sdk/client-application-signals": "^3.1003.0", "@aws-sdk/client-bedrock": "^3.1012.0", "@aws-sdk/client-bedrock-agent": "^3.1012.0", - "@aws-sdk/client-bedrock-agentcore": "^3.1020.0", - "@aws-sdk/client-bedrock-agentcore-control": "^3.1054.0", + "@aws-sdk/client-bedrock-agentcore": "^3.1061.0", + "@aws-sdk/client-bedrock-agentcore-control": "^3.1061.0", "@aws-sdk/client-bedrock-runtime": "^3.893.0", "@aws-sdk/client-cloudformation": "^3.893.0", "@aws-sdk/client-cloudwatch-logs": "^3.893.0", - "@aws-sdk/client-efs": "^3.1049.0", + "@aws-sdk/client-efs": "^3.1067.0", "@aws-sdk/client-resource-groups-tagging-api": "^3.893.0", "@aws-sdk/client-s3": "^3.1012.0", - "@aws-sdk/client-s3files": "^3.1049.0", + "@aws-sdk/client-s3files": "^3.1067.0", "@aws-sdk/client-sts": "^3.893.0", "@aws-sdk/client-xray": "^3.1003.0", "@aws-sdk/credential-providers": "^3.893.0", @@ -834,53 +834,20 @@ } }, "node_modules/@aws-sdk/client-bedrock-agentcore": { - "version": "3.1037.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-bedrock-agentcore/-/client-bedrock-agentcore-3.1037.0.tgz", - "integrity": "sha512-8WmZulMmFnCWFuX2rDBoZdebCMmmrAi1VABsLgm4O73w3+s7tcON1YgspG9gTevuVRtOVdk1B6TLw2Mo8NBHSQ==", + "version": "3.1066.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-bedrock-agentcore/-/client-bedrock-agentcore-3.1066.0.tgz", + "integrity": "sha512-7wpBOVhp5zWi2Ngd726MJY5wN5QSX9hBDDGdc2kgZaOoW15iiA87atCbPGwjZTspTIBJH73737jgRIhT/O484A==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "^3.974.5", - "@aws-sdk/credential-provider-node": "^3.972.36", - "@aws-sdk/middleware-host-header": "^3.972.10", - "@aws-sdk/middleware-logger": "^3.972.10", - "@aws-sdk/middleware-recursion-detection": "^3.972.11", - "@aws-sdk/middleware-user-agent": "^3.972.35", - "@aws-sdk/region-config-resolver": "^3.972.13", - "@aws-sdk/types": "^3.973.8", - "@aws-sdk/util-endpoints": "^3.996.8", - "@aws-sdk/util-user-agent-browser": "^3.972.10", - "@aws-sdk/util-user-agent-node": "^3.973.21", - "@smithy/config-resolver": "^4.4.17", - "@smithy/core": "^3.23.17", - "@smithy/eventstream-serde-browser": "^4.2.14", - "@smithy/eventstream-serde-config-resolver": "^4.3.14", - "@smithy/eventstream-serde-node": "^4.2.14", - "@smithy/fetch-http-handler": "^5.3.17", - "@smithy/hash-node": "^4.2.14", - "@smithy/invalid-dependency": "^4.2.14", - "@smithy/middleware-content-length": "^4.2.14", - "@smithy/middleware-endpoint": "^4.4.32", - "@smithy/middleware-retry": "^4.5.5", - "@smithy/middleware-serde": "^4.2.20", - "@smithy/middleware-stack": "^4.2.14", - "@smithy/node-config-provider": "^4.3.14", - "@smithy/node-http-handler": "^4.6.1", - "@smithy/protocol-http": "^5.3.14", - "@smithy/smithy-client": "^4.12.13", - "@smithy/types": "^4.14.1", - "@smithy/url-parser": "^4.2.14", - "@smithy/util-base64": "^4.3.2", - "@smithy/util-body-length-browser": "^4.2.2", - "@smithy/util-body-length-node": "^4.2.3", - "@smithy/util-defaults-mode-browser": "^4.3.49", - "@smithy/util-defaults-mode-node": "^4.2.54", - "@smithy/util-endpoints": "^3.4.2", - "@smithy/util-middleware": "^4.2.14", - "@smithy/util-retry": "^4.3.4", - "@smithy/util-stream": "^4.5.25", - "@smithy/util-utf8": "^4.2.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/credential-provider-node": "^3.972.55", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/fetch-http-handler": "^5.4.6", + "@smithy/node-http-handler": "^4.7.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -888,20 +855,20 @@ } }, "node_modules/@aws-sdk/client-bedrock-agentcore-control": { - "version": "3.1057.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-bedrock-agentcore-control/-/client-bedrock-agentcore-control-3.1057.0.tgz", - "integrity": "sha512-REASfgMI9i8k55OJSdSWn7rcoJIKllWMfffoR/tbu4+JLcbrV9j7uPKQg085d0w3Vx3NRrjoNlBjijq7W2dIeQ==", + "version": "3.1066.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-bedrock-agentcore-control/-/client-bedrock-agentcore-control-3.1066.0.tgz", + "integrity": "sha512-CKgcGkhskee4eqiCU8ZMsC1Ru4d0GoSzywEY9bhvCXgeNlFHrLpnO8jhaz3bIh4nDa0/4ABsTX5vyM/0Giz0wA==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/credential-provider-node": "^3.972.47", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/fetch-http-handler": "^5.4.5", - "@smithy/node-http-handler": "^4.7.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/credential-provider-node": "^3.972.55", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/fetch-http-handler": "^5.4.6", + "@smithy/node-http-handler": "^4.7.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -1463,20 +1430,20 @@ } }, "node_modules/@aws-sdk/client-efs": { - "version": "3.1049.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-efs/-/client-efs-3.1049.0.tgz", - "integrity": "sha512-gFjP27S8OYbpm/HUrCcYriqTprD3bYBdbOP1eEtZkrKnDKE9GsX+hZiFRd/mFjzoEHcduK9Emtw7U3oNYrX4DA==", + "version": "3.1067.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-efs/-/client-efs-3.1067.0.tgz", + "integrity": "sha512-Bo7hR++vEnqvBHAGQSUkq/7OJhG2mA80Tf0gmi/yntfr/Zj+6D7/xsp0S5Qtk7eEcGXRauj+sbsvcq3ZbTyyfQ==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "^3.974.12", - "@aws-sdk/credential-provider-node": "^3.972.43", - "@aws-sdk/types": "^3.973.8", - "@smithy/core": "^3.24.2", - "@smithy/fetch-http-handler": "^5.4.2", - "@smithy/node-http-handler": "^4.7.2", - "@smithy/types": "^4.14.1", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/credential-provider-node": "^3.972.55", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/fetch-http-handler": "^5.4.6", + "@smithy/node-http-handler": "^4.7.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -1859,20 +1826,20 @@ } }, "node_modules/@aws-sdk/client-s3files": { - "version": "3.1049.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/client-s3files/-/client-s3files-3.1049.0.tgz", - "integrity": "sha512-nqOZ5SGNmaaUV/AmTFulGVWzDclYt/1Yk/rPvbqdre40aBi+2rlY0EauVcSDszXzUc5AjPMNrOINQ9z3SXq1dA==", + "version": "3.1067.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-s3files/-/client-s3files-3.1067.0.tgz", + "integrity": "sha512-Kj++SPSxeryjDM/60Q++NoOnNePSYmpAyUNSl99llpvP+7V2htIIUSm/gSQxLgF199mL6DPjPqUxdFrpGDHlBg==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "^3.974.12", - "@aws-sdk/credential-provider-node": "^3.972.43", - "@aws-sdk/types": "^3.973.8", - "@smithy/core": "^3.24.2", - "@smithy/fetch-http-handler": "^5.4.2", - "@smithy/node-http-handler": "^4.7.2", - "@smithy/types": "^4.14.1", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/credential-provider-node": "^3.972.55", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/fetch-http-handler": "^5.4.6", + "@smithy/node-http-handler": "^4.7.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2132,17 +2099,17 @@ } }, "node_modules/@aws-sdk/core": { - "version": "3.974.15", - "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.974.15.tgz", - "integrity": "sha512-UpA0rTGW/tHGITcCqHisbuuEPraYg9GG+mWmXjY5+RxZBMLGe6aL9oe0ix50LztwAcPIkGZLH0yWdMIkCM10hw==", + "version": "3.974.20", + "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.974.20.tgz", + "integrity": "sha512-7sDi2B2N3mc3nf1nz6FyEx/FCrJ1N1QnBmraHHQNabFaeAh2IaOOLml48/rHOD1bICHgTRkbBgNTvUzEr5Z35g==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/types": "^3.973.9", - "@aws-sdk/xml-builder": "^3.972.26", + "@aws-sdk/types": "^3.973.12", + "@aws-sdk/xml-builder": "^3.972.29", "@aws/lambda-invoke-store": "^0.2.2", - "@smithy/core": "^3.24.5", - "@smithy/signature-v4": "^5.4.5", - "@smithy/types": "^4.14.2", + "@smithy/core": "^3.24.6", + "@smithy/signature-v4": "^5.4.6", + "@smithy/types": "^4.14.3", "bowser": "^2.11.0", "tslib": "^2.6.2" }, @@ -2214,15 +2181,15 @@ } }, "node_modules/@aws-sdk/credential-provider-env": { - "version": "3.972.41", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.972.41.tgz", - "integrity": "sha512-n1EbJ98yvPWWdHZZv8bRBMqqDQJrtgtxyJ4xLy2Uqrh25BCOZQ7nnS1CsFXvuH8r0b0KVHDZEGEH5FxmEMP8jg==", + "version": "3.972.46", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.972.46.tgz", + "integrity": "sha512-+GPXVS2srMOlH74S+SmC1gVuP2TvUZ0siuC0onKO93q+udP+M72dmY8wJfVQ5CX9z/9X5A1HHwz5yRIGBtskvQ==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2230,17 +2197,17 @@ } }, "node_modules/@aws-sdk/credential-provider-http": { - "version": "3.972.43", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.972.43.tgz", - "integrity": "sha512-TT76RN1NkI9WoyZqCNxOw6/WBMF7pYOTJcXbMokNFU+euSG40Kaf/t/FhDACVZWP+43wEM6ZynIPIkzS1wR1iA==", + "version": "3.972.48", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.972.48.tgz", + "integrity": "sha512-fA5loSdlocacRxyUXtpoHSMuk5rsIKRDzQYVMnMxjcmFeZshaJlJ8lymy/hYKji6sne/UmNGj5pxuEs6kq/Qcg==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/fetch-http-handler": "^5.4.5", - "@smithy/node-http-handler": "^4.7.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/fetch-http-handler": "^5.4.6", + "@smithy/node-http-handler": "^4.7.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2248,23 +2215,23 @@ } }, "node_modules/@aws-sdk/credential-provider-ini": { - "version": "3.972.46", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.972.46.tgz", - "integrity": "sha512-hvcgcwOiS0nb2XFb5Op1Pz/vYaWz5K8kKullziGpdNRuG0NwzRXseuPt2CoBqknHGaSPVesu1aOn2OcctEYdCA==", + "version": "3.972.53", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.972.53.tgz", + "integrity": "sha512-ZfdhIOR41q8TcWEnUac+gCOb+O2LBWdHLmjedXpXz4IEFW2ppNuFcm6p0sMTavpM+zD5TYfpH5Gp7guRyqSgsQ==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/credential-provider-env": "^3.972.41", - "@aws-sdk/credential-provider-http": "^3.972.43", - "@aws-sdk/credential-provider-login": "^3.972.45", - "@aws-sdk/credential-provider-process": "^3.972.41", - "@aws-sdk/credential-provider-sso": "^3.972.45", - "@aws-sdk/credential-provider-web-identity": "^3.972.45", - "@aws-sdk/nested-clients": "^3.997.13", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/credential-provider-imds": "^4.3.6", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/credential-provider-env": "^3.972.46", + "@aws-sdk/credential-provider-http": "^3.972.48", + "@aws-sdk/credential-provider-login": "^3.972.52", + "@aws-sdk/credential-provider-process": "^3.972.46", + "@aws-sdk/credential-provider-sso": "^3.972.52", + "@aws-sdk/credential-provider-web-identity": "^3.972.52", + "@aws-sdk/nested-clients": "^3.997.20", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/credential-provider-imds": "^4.3.7", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2272,16 +2239,16 @@ } }, "node_modules/@aws-sdk/credential-provider-login": { - "version": "3.972.45", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-login/-/credential-provider-login-3.972.45.tgz", - "integrity": "sha512-MZQv4SNjByk1iOKmrqmzcUF/uCB05wjvEHyXKxmGQTUANTIVayX6HPUF0bzkWLvtnkH7sAn9kUCfkXbSpj9sDA==", + "version": "3.972.52", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-login/-/credential-provider-login-3.972.52.tgz", + "integrity": "sha512-9hu2oR0qH7Fst5Tzdx+UWxm+w5zCXtErTLtOOW5hwwQc170CLwOeniRxyFY6s9mHfGEfC5zFukNBdKBwJR8mhQ==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/nested-clients": "^3.997.13", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/nested-clients": "^3.997.20", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2289,21 +2256,21 @@ } }, "node_modules/@aws-sdk/credential-provider-node": { - "version": "3.972.47", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.972.47.tgz", - "integrity": "sha512-HrId+C0DWA5qDIyLG64/kjUB2RNtPypxmABnIctK+TA1P1kHlOYoE/Wf5T5tKOMKgb08P7k/zNyhvfJ3lh5Oag==", + "version": "3.972.55", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.972.55.tgz", + "integrity": "sha512-zMGLa/dhESVqmCD7mmIFFKSwSFrJGScvCXcjvBZEVOOMauFS5JRQvLTMukFpMEFWiV6dTAlsen2ATDBulLPtbg==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/credential-provider-env": "^3.972.41", - "@aws-sdk/credential-provider-http": "^3.972.43", - "@aws-sdk/credential-provider-ini": "^3.972.46", - "@aws-sdk/credential-provider-process": "^3.972.41", - "@aws-sdk/credential-provider-sso": "^3.972.45", - "@aws-sdk/credential-provider-web-identity": "^3.972.45", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/credential-provider-imds": "^4.3.6", - "@smithy/types": "^4.14.2", + "@aws-sdk/credential-provider-env": "^3.972.46", + "@aws-sdk/credential-provider-http": "^3.972.48", + "@aws-sdk/credential-provider-ini": "^3.972.53", + "@aws-sdk/credential-provider-process": "^3.972.46", + "@aws-sdk/credential-provider-sso": "^3.972.52", + "@aws-sdk/credential-provider-web-identity": "^3.972.52", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/credential-provider-imds": "^4.3.7", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2311,15 +2278,15 @@ } }, "node_modules/@aws-sdk/credential-provider-process": { - "version": "3.972.41", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.972.41.tgz", - "integrity": "sha512-7I/n1zkysouLOWvkEhjNEP4vMnD2v4kzzr3/3QBdrripEpn7ap1/I5DF3Hou1SUqkKWo1f3oPGMyFAA1FAMvsQ==", + "version": "3.972.46", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.972.46.tgz", + "integrity": "sha512-VUoNFBIjWrUN8NbFiQiuxQEgFjvziAlBRPK+ddh27aj65gk0BYu6bLZnrdrNZwpW6vAihtSUtEMQ1PUJ32QRPA==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2327,17 +2294,17 @@ } }, "node_modules/@aws-sdk/credential-provider-sso": { - "version": "3.972.45", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.972.45.tgz", - "integrity": "sha512-oHgbz/eFD8IKiksqDsz9ZMU4A59BpQq4QwJedBnGD80ZqYcHPPHZBwjBnxLVkB7iRVVHWpDclR8yWdD2PkQIUA==", + "version": "3.972.52", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.972.52.tgz", + "integrity": "sha512-nb2/n4o/HQf+FVpVbZe9vCTFngmuDoIsltMgLAtjixaKzvzhB4J8WSDFyWgnErgLHk55ctWH+I4PU+LIHhyffg==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/nested-clients": "^3.997.13", - "@aws-sdk/token-providers": "3.1056.0", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/nested-clients": "^3.997.20", + "@aws-sdk/token-providers": "3.1066.0", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2345,16 +2312,16 @@ } }, "node_modules/@aws-sdk/credential-provider-web-identity": { - "version": "3.972.45", - "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.972.45.tgz", - "integrity": "sha512-CDhzKdb2onv5bpnjn/acgdNmJOQthPDLsPizU7rZflsEcgMMp8Mlri+U5hdxf8ldvZJpvM3vLU6D56vfJm5AMQ==", + "version": "3.972.52", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.972.52.tgz", + "integrity": "sha512-lKj6aRSGbqLmpYmM24bY7a1Xmfcq2vkE3hv8CSPYfc1yCu0BPu/XEJ1L4Fm61MsU6ULLNSG8UGsffNoFUBjESA==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/nested-clients": "^3.997.13", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/nested-clients": "^3.997.20", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2694,20 +2661,20 @@ } }, "node_modules/@aws-sdk/nested-clients": { - "version": "3.997.13", - "resolved": "https://registry.npmjs.org/@aws-sdk/nested-clients/-/nested-clients-3.997.13.tgz", - "integrity": "sha512-2pA6eyb5nSo/ZD2cayhOTEMoGQYgspq0RI05GDLkzQ3ajZ6isS6waV6E92Am/hz4LIlLUTrbwPLurJ/fuiHvkg==", + "version": "3.997.20", + "resolved": "https://registry.npmjs.org/@aws-sdk/nested-clients/-/nested-clients-3.997.20.tgz", + "integrity": "sha512-IYJuLpXp2DEILVQpQOy0PMpkftv0AHEOCn52o0atyOaumA0CdWQ3klPyXdViGYLbNpESsVFMVybvHUeZAuiGxA==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/signature-v4-multi-region": "^3.996.30", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/fetch-http-handler": "^5.4.5", - "@smithy/node-http-handler": "^4.7.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/signature-v4-multi-region": "^3.996.34", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/fetch-http-handler": "^5.4.6", + "@smithy/node-http-handler": "^4.7.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2731,14 +2698,14 @@ } }, "node_modules/@aws-sdk/signature-v4-multi-region": { - "version": "3.996.30", - "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4-multi-region/-/signature-v4-multi-region-3.996.30.tgz", - "integrity": "sha512-HULDLMVzkmTSEv6//7kx2kRevp/VYUpm8hJNNFbmhxDn0fUiGTxVcM9yg31TukvTq8nyOBDUN2gH0o5IRbKjdw==", + "version": "3.996.34", + "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4-multi-region/-/signature-v4-multi-region-3.996.34.tgz", + "integrity": "sha512-mx1L5qlumSOt/nKM3BFaHE2HVkWwz0i4Bw0pyYO42FfX/FeLlo8YI6csC0gSPprEk6fTIqI+CZN9RwUwKd5krQ==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/types": "^3.973.9", - "@smithy/signature-v4": "^5.4.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/types": "^3.973.12", + "@smithy/signature-v4": "^5.4.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2746,16 +2713,16 @@ } }, "node_modules/@aws-sdk/token-providers": { - "version": "3.1056.0", - "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.1056.0.tgz", - "integrity": "sha512-81duvlltQlsfn5K+o8zILcystBRdbT1G2JJYVCML5NZHBz4CL/zf+sAemCtBh/uh6RQUMyInGeZLQ7/8igZhbA==", + "version": "3.1066.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.1066.0.tgz", + "integrity": "sha512-UqEUJq7dqa44hneLDUcX7UJy95cg8YqEWyakRpvIPnrNS3Mq+UlQHgCDGu5pvwAPtlIW4qcYbvW6reG6++FyvA==", "license": "Apache-2.0", "dependencies": { - "@aws-sdk/core": "^3.974.15", - "@aws-sdk/nested-clients": "^3.997.13", - "@aws-sdk/types": "^3.973.9", - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@aws-sdk/core": "^3.974.20", + "@aws-sdk/nested-clients": "^3.997.20", + "@aws-sdk/types": "^3.973.12", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -2763,12 +2730,12 @@ } }, "node_modules/@aws-sdk/types": { - "version": "3.973.9", - "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.973.9.tgz", - "integrity": "sha512-kuBfgQVdcz5Bmapc4A13YbpVw/pXkesfhetcFYwbntqas8sF41OHyd4o28+/TG2ZQdHBsv90Lsu5y6oitvYCdg==", + "version": "3.973.12", + "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.973.12.tgz", + "integrity": "sha512-43ajd1NF0RMgX5k0hxCNUyEdrtFUsb2aHT2QvpktSC/2Eyb2Jr/JPVqdp0XIoaHWikZJq5tNWSLO6kB5q2eMCA==", "license": "Apache-2.0", "dependencies": { - "@smithy/types": "^4.14.2", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -4182,20 +4149,6 @@ "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, - "node_modules/@opentelemetry/exporter-metrics-otlp-http/node_modules/protobufjs": { - "version": "8.2.0", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-8.2.0.tgz", - "integrity": "sha512-oI+GC9iPxrQEr6wragljFKH46/r3rNsm6eg7F2fp6kBUMnf6/mesDRdBuF4gK+OyaKJ8N4C1B9s9cCeYdqFikg==", - "hasInstallScript": true, - "license": "BSD-3-Clause", - "dependencies": { - "@types/node": ">=13.7.0", - "long": "^5.0.0" - }, - "engines": { - "node": ">=12.0.0" - } - }, "node_modules/@opentelemetry/otlp-exporter-base": { "version": "0.215.0", "resolved": "https://registry.npmjs.org/@opentelemetry/otlp-exporter-base/-/otlp-exporter-base-0.215.0.tgz", @@ -4327,20 +4280,6 @@ "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, - "node_modules/@opentelemetry/otlp-exporter-base/node_modules/protobufjs": { - "version": "8.2.0", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-8.2.0.tgz", - "integrity": "sha512-oI+GC9iPxrQEr6wragljFKH46/r3rNsm6eg7F2fp6kBUMnf6/mesDRdBuF4gK+OyaKJ8N4C1B9s9cCeYdqFikg==", - "hasInstallScript": true, - "license": "BSD-3-Clause", - "dependencies": { - "@types/node": ">=13.7.0", - "long": "^5.0.0" - }, - "engines": { - "node": ">=12.0.0" - } - }, "node_modules/@opentelemetry/otlp-transformer": { "version": "0.213.0", "resolved": "https://registry.npmjs.org/@opentelemetry/otlp-transformer/-/otlp-transformer-0.213.0.tgz", @@ -4585,70 +4524,6 @@ "node": ">=18" } }, - "node_modules/@protobufjs/aspromise": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", - "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/base64": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz", - "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/codegen": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz", - "integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/eventemitter": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz", - "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/fetch": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz", - "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==", - "license": "BSD-3-Clause", - "dependencies": { - "@protobufjs/aspromise": "^1.1.1", - "@protobufjs/inquire": "^1.1.0" - } - }, - "node_modules/@protobufjs/float": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz", - "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/inquire": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz", - "integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/path": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz", - "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/pool": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz", - "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==", - "license": "BSD-3-Clause" - }, - "node_modules/@protobufjs/utf8": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz", - "integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==", - "license": "BSD-3-Clause" - }, "node_modules/@rolldown/binding-android-arm64": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.3.tgz", @@ -5126,13 +5001,13 @@ } }, "node_modules/@smithy/core": { - "version": "3.24.5", - "resolved": "https://registry.npmjs.org/@smithy/core/-/core-3.24.5.tgz", - "integrity": "sha512-Kt8phUg45M15EjhYAbZ+fFikYneijLu9Liugz8ZsYz2i8j0hzGv27LWKpEHYRfvj+LyCOSijpcR/2i8RouV+cA==", + "version": "3.24.6", + "resolved": "https://registry.npmjs.org/@smithy/core/-/core-3.24.6.tgz", + "integrity": "sha512-wBXDRup6UU97VKyaiRo8AssnfStPtG0oAAfpq/bC0a1YYau8pM86YB4kM6ccoVi1mS8l/UHbn9oDM+7uozr/ug==", "license": "Apache-2.0", "dependencies": { "@aws-crypto/crc32": "5.2.0", - "@smithy/types": "^4.14.2", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -5140,13 +5015,13 @@ } }, "node_modules/@smithy/credential-provider-imds": { - "version": "4.3.6", - "resolved": "https://registry.npmjs.org/@smithy/credential-provider-imds/-/credential-provider-imds-4.3.6.tgz", - "integrity": "sha512-tHhdiWZfG1ZIh2YcRfPJmY2gHcBmqbAzqm3ER4TIDFYsSEqTD5tICT7cgQ/kI8LRakxp12myOYyK68XPn7MnHw==", + "version": "4.3.8", + "resolved": "https://registry.npmjs.org/@smithy/credential-provider-imds/-/credential-provider-imds-4.3.8.tgz", + "integrity": "sha512-5cAM+KZC02sTqDt6NaLXyu50M/GNMd1eTzDVR8Lb0BBsVtu7RWHo47VPPEEv1vt3Yub6uzr+M5FHC+GtoT0USg==", "license": "Apache-2.0", "dependencies": { - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -5224,13 +5099,13 @@ } }, "node_modules/@smithy/fetch-http-handler": { - "version": "5.4.5", - "resolved": "https://registry.npmjs.org/@smithy/fetch-http-handler/-/fetch-http-handler-5.4.5.tgz", - "integrity": "sha512-SK3VMeH0fibgdTg2QeB+O4p7Yy/2E5HBOHJeC58FshkDdeuX8lOgO7PfjYfLyPLP1ch55j91cQqKBzDS0mRjSQ==", + "version": "5.4.6", + "resolved": "https://registry.npmjs.org/@smithy/fetch-http-handler/-/fetch-http-handler-5.4.6.tgz", + "integrity": "sha512-FEwEYJ1jlBKdhe9TPzfghEi1bP55ZeEImlDkEa62bBBYzUcnB6RUCyuiS2mqKt6ZVjUbBgcNhzfIctH+Hevx9g==", "license": "Apache-2.0", "dependencies": { - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -5418,13 +5293,13 @@ } }, "node_modules/@smithy/node-http-handler": { - "version": "4.7.5", - "resolved": "https://registry.npmjs.org/@smithy/node-http-handler/-/node-http-handler-4.7.5.tgz", - "integrity": "sha512-3dA9TQ+ybRSZ/m0wnbZhiBy4Dezjgq1Ib/ZZrYTpJDBgpoLLU/SDzZc/g0x0MNAdOJe1wPcM+x2PBRmoOur+Sw==", + "version": "4.7.7", + "resolved": "https://registry.npmjs.org/@smithy/node-http-handler/-/node-http-handler-4.7.7.tgz", + "integrity": "sha512-ZAFvHXrEk6K180EVhmZVg8GU5pUH5BSFqRs27JW3j1qEFx9YyYwWFx17x/MHcjALYimGAji7qEOlF1++be+G5A==", "license": "Apache-2.0", "dependencies": { - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -5510,13 +5385,13 @@ } }, "node_modules/@smithy/signature-v4": { - "version": "5.4.5", - "resolved": "https://registry.npmjs.org/@smithy/signature-v4/-/signature-v4-5.4.5.tgz", - "integrity": "sha512-QBJKWGqIknH0dc9LWpfH1mkdokAx6iXYN3UcQ3eY6uIEyScuoQAhfl94ge7ozUy9WgFUdE8xsvwBjaYBbWmPNA==", + "version": "5.4.6", + "resolved": "https://registry.npmjs.org/@smithy/signature-v4/-/signature-v4-5.4.6.tgz", + "integrity": "sha512-Ojg4B6oIDlIr1R86xCDJt1zJWnYa0VINmqdjfe9qxWjdRivHalZ3iSlQgVqYbW0MdpFOC5XfHEWsnbmdnpIILQ==", "license": "Apache-2.0", "dependencies": { - "@smithy/core": "^3.24.5", - "@smithy/types": "^4.14.2", + "@smithy/core": "^3.24.6", + "@smithy/types": "^4.14.3", "tslib": "^2.6.2" }, "engines": { @@ -5542,9 +5417,9 @@ } }, "node_modules/@smithy/types": { - "version": "4.14.2", - "resolved": "https://registry.npmjs.org/@smithy/types/-/types-4.14.2.tgz", - "integrity": "sha512-P+otAxbV4CqBybp7EkcJCrig63yE2E7PuNVOmilVMRcx/O+QDzGULTrKsq4DV13gSfak9ObPrWaHl/9bL5YcWw==", + "version": "4.14.3", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-4.14.3.tgz", + "integrity": "sha512-YupL0ZWmFtJexUN2cHzkvvF/b9pKrtAIfT1o7/oY/Ppu8IYeZ+lDPM5vZdQJaSeA132dJCqojjGC9NhXeF71VQ==", "license": "Apache-2.0", "dependencies": { "tslib": "^2.6.2" @@ -6017,6 +5892,7 @@ "version": "25.5.0", "resolved": "https://registry.npmjs.org/@types/node/-/node-25.5.0.tgz", "integrity": "sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==", + "dev": true, "license": "MIT", "dependencies": { "undici-types": "~7.18.0" @@ -13740,24 +13616,12 @@ } }, "node_modules/protobufjs": { - "version": "7.5.8", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.8.tgz", - "integrity": "sha512-dvpCIeLPbXZS/Ete7yLaO7RenOdken2NHKykBXbsaGxZT0UTltcarBciw+A78SRQs9iMAAVpsYA+l8b1hTePIA==", - "hasInstallScript": true, + "version": "8.6.3", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-8.6.3.tgz", + "integrity": "sha512-alQyzT0j401LGBtwsqu6uprjR6pfNH1UJf9N6GBFMjIcd+HzTe0/HrjAbFCqun+zvnfLarrxAtMM2xvZ+kFZ5A==", "license": "BSD-3-Clause", "dependencies": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.5", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.1", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.1", - "@types/node": ">=13.7.0", - "long": "^5.0.0" + "long": "^5.3.2" }, "engines": { "node": ">=12.0.0" @@ -16152,6 +16016,7 @@ "version": "7.18.2", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "dev": true, "license": "MIT" }, "node_modules/unicorn-magic": { diff --git a/package.json b/package.json index 22db4c52d..5594dd505 100644 --- a/package.json +++ b/package.json @@ -80,15 +80,15 @@ "@aws-sdk/client-application-signals": "^3.1003.0", "@aws-sdk/client-bedrock": "^3.1012.0", "@aws-sdk/client-bedrock-agent": "^3.1012.0", - "@aws-sdk/client-bedrock-agentcore": "^3.1020.0", - "@aws-sdk/client-bedrock-agentcore-control": "^3.1054.0", + "@aws-sdk/client-bedrock-agentcore": "^3.1061.0", + "@aws-sdk/client-bedrock-agentcore-control": "^3.1061.0", "@aws-sdk/client-bedrock-runtime": "^3.893.0", "@aws-sdk/client-cloudformation": "^3.893.0", "@aws-sdk/client-cloudwatch-logs": "^3.893.0", - "@aws-sdk/client-efs": "^3.1049.0", + "@aws-sdk/client-efs": "^3.1067.0", "@aws-sdk/client-resource-groups-tagging-api": "^3.893.0", "@aws-sdk/client-s3": "^3.1012.0", - "@aws-sdk/client-s3files": "^3.1049.0", + "@aws-sdk/client-s3files": "^3.1067.0", "@aws-sdk/client-sts": "^3.893.0", "@aws-sdk/client-xray": "^3.1003.0", "@aws-sdk/credential-providers": "^3.893.0", @@ -162,13 +162,15 @@ "minimatch": "GHSA-7r86-cg39-jmmj, GHSA-23c5-xmqv-rm74: minimatch 10.0.0-10.2.2 has ReDoS vulnerabilities. Multiple transitive deps (eslint, typescript-eslint, eslint-plugin-import, eslint-plugin-react, prettier-plugin-sort-imports, aws-cdk-lib) pin older versions. Remove this override once upstream packages update their minimatch dependency to >=10.2.3.", "glob": "glob <12 is deprecated and emits npm install warnings (https://github.com/isaacs/node-glob). Pulled in transitively via archiver-utils@5.0.2 (latest), which still pins glob@^10.0.0. archiver-utils only uses glob.sync(pattern, options), which remains compatible in glob@13. Remove this override once archiver-utils updates its glob dependency.", "fast-xml-parser": "GHSA-8gc5-j5rx-235r, GHSA-jp2q-39xq-3w4g: fast-xml-parser <=5.5.6 has entity expansion bypass (CVE-2026-33036, CVE-2026-33349). Transitive via @aws-sdk/xml-builder. Remove once @aws-sdk updates to fast-xml-parser >=5.5.7.", - "@aws-sdk/xml-builder": "aws/aws-sdk-js-v3#7867: @aws-sdk/xml-builder <3.972.14 does not configure maxTotalExpansions on fast-xml-parser, causing 'Entity expansion limit exceeded' on large CloudFormation responses. Remove once @aws-sdk/client-* deps are bumped past 3.972.14." + "@aws-sdk/xml-builder": "aws/aws-sdk-js-v3#7867: @aws-sdk/xml-builder <3.972.14 does not configure maxTotalExpansions on fast-xml-parser, causing 'Entity expansion limit exceeded' on large CloudFormation responses. Remove once @aws-sdk/client-* deps are bumped past 3.972.14.", + "protobufjs": "GHSA-f38q-mgvj-vph7, GHSA-wcpc-wj8m-hjx6, GHSA-94rc-8x27-4472: protobufjs <=7.6.2 / 8.0.0-8.5.0 has property shadowing and DoS vulnerabilities. Transitive via @opentelemetry/otlp-transformer. Remove once @opentelemetry updates to protobufjs >=7.6.3 or >=8.5.1." }, "overrides": { "minimatch": "10.2.4", "glob": "^13.0.0", "fast-xml-parser": "5.5.7", - "@aws-sdk/xml-builder": "3.972.15" + "@aws-sdk/xml-builder": "3.972.15", + "protobufjs": ">=7.6.3" }, "engines": { "node": ">=20" diff --git a/schemas/agentcore.schema.v1.json b/schemas/agentcore.schema.v1.json index 36e417528..8780a3ded 100644 --- a/schemas/agentcore.schema.v1.json +++ b/schemas/agentcore.schema.v1.json @@ -284,76 +284,22 @@ "filesystemConfigurations": { "type": "array", "items": { - "anyOf": [ - { - "type": "object", - "properties": { - "sessionStorage": { - "type": "object", - "properties": { - "mountPath": { - "type": "string", - "minLength": 6, - "maxLength": 200, - "pattern": "^\\/mnt\\/[a-zA-Z0-9._-]+\\/?$" - } - }, - "required": ["mountPath"], - "additionalProperties": false - } - }, - "required": ["sessionStorage"], - "additionalProperties": false - }, - { - "type": "object", - "properties": { - "efsAccessPoint": { - "type": "object", - "properties": { - "accessPointArn": { - "type": "string", - "pattern": "^arn:aws[-a-z]*:elasticfilesystem:[a-z][a-z0-9-]*:[0-9]{12}:access-point\\/fsap-[0-9a-f]{8,40}$" - }, - "mountPath": { - "type": "string", - "minLength": 6, - "maxLength": 200, - "pattern": "^\\/mnt\\/[a-zA-Z0-9._-]+\\/?$" - } - }, - "required": ["accessPointArn", "mountPath"], - "additionalProperties": false - } - }, - "required": ["efsAccessPoint"], - "additionalProperties": false - }, - { + "type": "object", + "properties": { + "sessionStorage": { "type": "object", "properties": { - "s3FilesAccessPoint": { - "type": "object", - "properties": { - "accessPointArn": { - "type": "string", - "pattern": "^arn:aws[-a-z]*:s3files:[a-z][a-z0-9-]*:[0-9]{12}:file-system\\/fs-[0-9a-f]{17,40}\\/access-point\\/fsap-[0-9a-f]{17,40}$" - }, - "mountPath": { - "type": "string", - "minLength": 6, - "maxLength": 200, - "pattern": "^\\/mnt\\/[a-zA-Z0-9._-]+\\/?$" - } - }, - "required": ["accessPointArn", "mountPath"], - "additionalProperties": false + "mountPath": { + "type": "string", + "pattern": "^\\/mnt\\/[^/]+$" } }, - "required": ["s3FilesAccessPoint"], + "required": ["mountPath"], "additionalProperties": false } - ] + }, + "required": ["sessionStorage"], + "additionalProperties": false } }, "endpoints": { @@ -604,27 +550,6 @@ }, "required": ["authorizerType", "name"], "additionalProperties": false - }, - { - "type": "object", - "properties": { - "authorizerType": { - "type": "string", - "const": "PaymentCredentialProvider" - }, - "name": { - "type": "string", - "minLength": 1, - "maxLength": 128, - "pattern": "^[a-zA-Z0-9\\-_]+$" - }, - "provider": { - "type": "string", - "enum": ["CoinbaseCDP", "StripePrivy"] - } - }, - "required": ["authorizerType", "name", "provider"], - "additionalProperties": false } ] } @@ -2325,112 +2250,6 @@ "required": ["name", "schemaType", "config"], "additionalProperties": false } - }, - "payments": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "minLength": 1, - "maxLength": 48, - "pattern": "^[a-zA-Z][a-zA-Z0-9]{0,47}$" - }, - "authorizerType": { - "default": "AWS_IAM", - "type": "string", - "enum": ["AWS_IAM", "CUSTOM_JWT"] - }, - "authorizerConfiguration": { - "type": "object", - "properties": { - "customJWTAuthorizer": { - "type": "object", - "properties": { - "discoveryUrl": { - "type": "string", - "format": "uri" - }, - "allowedClients": { - "type": "array", - "items": { - "type": "string" - } - }, - "allowedAudience": { - "type": "array", - "items": { - "type": "string" - } - }, - "allowedScopes": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": ["discoveryUrl"], - "additionalProperties": false - } - }, - "required": ["customJWTAuthorizer"], - "additionalProperties": false - }, - "connectors": { - "default": [], - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string", - "minLength": 1, - "maxLength": 48, - "pattern": "^[a-zA-Z][a-zA-Z0-9_]{0,47}$" - }, - "provider": { - "default": "CoinbaseCDP", - "type": "string", - "enum": ["CoinbaseCDP", "StripePrivy"] - }, - "credentialName": { - "type": "string", - "minLength": 1 - } - }, - "required": ["name", "credentialName"], - "additionalProperties": false - } - }, - "description": { - "type": "string" - }, - "autoPayment": { - "default": true, - "type": "boolean" - }, - "defaultSpendLimit": { - "default": "10.00", - "type": "string" - }, - "paymentToolAllowlist": { - "type": "array", - "items": { - "type": "string" - } - }, - "networkPreferences": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": ["name"], - "additionalProperties": false - } } }, "required": ["name", "version"], diff --git a/scripts/run-e2e-local.sh b/scripts/run-e2e-local.sh index 6962c1bd2..26cd86385 100755 --- a/scripts/run-e2e-local.sh +++ b/scripts/run-e2e-local.sh @@ -8,6 +8,7 @@ # # Optional env vars: # AWS_REGION — defaults to us-east-1 +# CDK_REPO_PATH — local path to CDK constructs repo; if set, builds and uses it as CDK_TARBALL # BUILD_PREVIEW — set to 1 to build a preview CLI and run the harness e2e tests # (e2e-tests/harness-*.test.ts). Harness features are gated to preview # builds; without this they self-skip. The var is read at build time @@ -127,6 +128,16 @@ npm install -g "$TARBALL" echo "✅ Installed: $(agentcore --version)" echo "=== Running E2E tests ===" +if [[ -n "${CDK_REPO_PATH:-}" ]]; then + echo "=== Building CDK constructs from $CDK_REPO_PATH ===" + pushd "$CDK_REPO_PATH" > /dev/null + npm ci + npm run build + CDK_TARBALL_NAME=$(npm pack | tail -1) + export CDK_TARBALL="$CDK_REPO_PATH/$CDK_TARBALL_NAME" + popd > /dev/null + echo "✅ CDK_TARBALL=$CDK_TARBALL" +fi if [[ "$RUN_ALL" == "true" ]]; then echo "Running full e2e suite" npx vitest run --project e2e diff --git a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap index 493893070..4b42f93a5 100644 --- a/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap +++ b/src/assets/__tests__/__snapshots__/assets.snapshot.test.ts.snap @@ -44,8 +44,8 @@ agentcore status # checks deployment status exports[`Assets Directory Snapshots > CDK assets > cdk/cdk/bin/cdk.ts should match snapshot 1`] = ` "#!/usr/bin/env node -import { AgentCoreStack } from '../lib/cdk-stack'; -import { ConfigIO, type AwsDeploymentTarget } from '@aws/agentcore-cdk'; +import { AgentCoreStack, type HarnessConfig } from '../lib/cdk-stack'; +import { ConfigIO, HarnessSpecSchema, type AwsDeploymentTarget } from '@aws/agentcore-cdk'; import { App, type Environment } from 'aws-cdk-lib'; import * as path from 'path'; import * as fs from 'fs'; @@ -101,40 +101,61 @@ async function main() { throw new Error('No deployment targets configured. Please define targets in agentcore/aws-targets.json'); } - // Read harness configs for role creation. + // Read harness configs: the full validated spec drives the CFN resource; the + // role-scoped fields drive the IAM role + container build. const projectRoot = path.resolve(configRoot, '..'); - const harnessConfigs: { - name: string; - executionRoleArn?: string; - memoryName?: string; - containerUri?: string; - hasDockerfile?: boolean; - dockerfile?: string; - codeLocation?: string; - tools?: { type: string; name: string }[]; - apiKeyArn?: string; - efsAccessPoints?: { accessPointArn: string; mountPath: string }[]; - s3AccessPoints?: { accessPointArn: string; mountPath: string }[]; - apiFormat?: 'converse_stream' | 'responses' | 'chat_completions'; - }[] = []; - for (const entry of specAny.harnesses ?? []) { + + // Read non-S3 KB connector-config files and pass their parsed contents to the + // L3 verbatim. The L3 does not read files; it expects the parsed + // connectorParameters keyed by the data source's connectorConfigFile path. + const connectorParametersByFile: Record> = {}; + for (const kb of specAny.knowledgeBases ?? []) { + for (const ds of kb.dataSources ?? []) { + if (ds.type !== 'S3' && ds.connectorConfigFile) { + const abs = path.resolve(projectRoot, ds.connectorConfigFile); + try { + connectorParametersByFile[ds.connectorConfigFile] = JSON.parse(fs.readFileSync(abs, 'utf-8')); + } catch (err) { + throw new Error( + \`Could not read connector config '\${ds.connectorConfigFile}' for knowledge base '\${kb.name}' at \${abs}: \${err instanceof Error ? err.message : err}\` + ); + } + } + } + } + + // Harness is preview-gated. The CLI bundle bakes the preview flag at build time and + // forwards it to this child process via AGENTCORE_PREVIEW (see toolkit-lib/wrapper.ts). + // This app is built separately and cannot see that build-time define, so it gates on the + // env var. Absent/anything-but-'1' defaults to off so a stale harnesses[] entry in a + // non-preview build never synthesizes an AWS::BedrockAgentCore::Harness resource. + const previewEnabled = process.env.AGENTCORE_PREVIEW === '1'; + + const harnessConfigs: HarnessConfig[] = []; + for (const entry of previewEnabled ? (specAny.harnesses ?? []) : []) { const harnessDir = path.resolve(projectRoot, entry.path); const harnessPath = path.resolve(harnessDir, 'harness.json'); try { - const harnessSpec = JSON.parse(fs.readFileSync(harnessPath, 'utf-8')); + const harnessSpec = HarnessSpecSchema.parse(JSON.parse(fs.readFileSync(harnessPath, 'utf-8'))); harnessConfigs.push({ name: entry.name, executionRoleArn: harnessSpec.executionRoleArn, - memoryName: harnessSpec.memory?.name, + // Only an \`existing\` memory ref carries a name to wire IAM against; managed memory is + // owned by the harness (no sibling) and disabled has none — both resolve to undefined. + memoryName: harnessSpec.memory?.mode === 'existing' ? harnessSpec.memory.name : undefined, containerUri: harnessSpec.containerUri, hasDockerfile: !!harnessSpec.dockerfile, dockerfile: harnessSpec.dockerfile, codeLocation: harnessSpec.dockerfile ? harnessDir : undefined, tools: harnessSpec.tools, + skills: harnessSpec.skills, apiKeyArn: harnessSpec.model?.apiKeyArn, efsAccessPoints: harnessSpec.efsAccessPoints, s3AccessPoints: harnessSpec.s3AccessPoints, apiFormat: harnessSpec.model?.apiFormat, + // Full spec + dir drive the AWS::BedrockAgentCore::Harness CFN resource. + spec: harnessSpec, + harnessDir, }); } catch (err) { throw new Error( @@ -201,6 +222,7 @@ async function main() { spec, mcpSpec, credentials, + connectorParametersByFile, harnesses: harnessConfigs.length > 0 ? harnessConfigs : undefined, paymentSpec, env, @@ -349,24 +371,18 @@ exports[`Assets Directory Snapshots > CDK assets > cdk/cdk/lib/cdk-stack.ts shou type AgentCoreProjectSpec, type AgentCoreMcpSpec, type CustomJWTAuthorizerConfig, + type HarnessDeploymentConfig, } from '@aws/agentcore-cdk'; import { CfnOutput, Stack, type StackProps } from 'aws-cdk-lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import { Construct } from 'constructs'; -export interface HarnessConfig { - name: string; - executionRoleArn?: string; - memoryName?: string; - containerUri?: string; - hasDockerfile?: boolean; - dockerfile?: string; - codeLocation?: string; - tools?: { type: string; name: string }[]; - apiKeyArn?: string; - efsAccessPoints?: { accessPointArn: string; mountPath: string }[]; - s3AccessPoints?: { accessPointArn: string; mountPath: string }[]; -} +/** + * Harness deployment config: role-scoped fields (for IAM role + container build) + * plus the full validated spec + its config directory so the L3 construct can + * synthesize the AWS::BedrockAgentCore::Harness resource. + */ +export type HarnessConfig = HarnessDeploymentConfig; export interface PaymentConnectorSpec { name: string; @@ -402,6 +418,11 @@ export interface AgentCoreStackProps extends StackProps { * Harness role configurations. */ harnesses?: HarnessConfig[]; + /** + * Parsed connectorParameters for non-S3 KB data sources, keyed by + * connectorConfigFile path. Forwarded to AgentCoreApplication. + */ + connectorParametersByFile?: Record>; /** * Payment specifications with resolved credential provider ARNs. */ @@ -441,7 +462,7 @@ export class AgentCoreStack extends Stack { constructor(scope: Construct, id: string, props: AgentCoreStackProps) { super(scope, id, props); - const { spec, mcpSpec, credentials, harnesses, paymentSpec } = props; + const { spec, mcpSpec, credentials, harnesses, connectorParametersByFile, paymentSpec } = props; // Create AgentCoreApplication with all agents and harness roles // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -449,6 +470,12 @@ export class AgentCoreStack extends Stack { if (harnesses?.length) { appProps.harnesses = harnesses; } + if (connectorParametersByFile && Object.keys(connectorParametersByFile).length > 0) { + appProps.connectorParametersByFile = connectorParametersByFile; + } + if (credentials) { + appProps.credentials = credentials; + } this.application = new AgentCoreApplication(this, 'Application', appProps as any); // Create AgentCoreMcp if there are gateways configured @@ -649,13 +676,14 @@ test('AgentCoreStack synthesizes with empty spec', () => { credentials: [], evaluators: [], onlineEvalConfigs: [], + configBundles: [], policyEngines: [], payments: [], - configBundles: [], agentCoreGateways: [], mcpRuntimeTools: [], unassignedTargets: [], datasets: [], + knowledgeBases: [], }, }); const template = Template.fromStack(stack); @@ -810,6 +838,8 @@ exports[`Assets Directory Snapshots > File listing > should match the expected f "python/http/strands/base/model/__init__.py", "python/http/strands/base/model/load.py", "python/http/strands/base/pyproject.toml", + "python/http/strands/base/skills/fetcher.py", + "python/http/strands/capabilities/execution-limits/hooks/execution_limits.py", "python/http/strands/capabilities/memory/__init__.py", "python/http/strands/capabilities/memory/session.py", "python/http/strands/capabilities/payments/__init__.py", @@ -2227,8 +2257,11 @@ def get_memory_session_manager(session_id: Optional[str], actor_id: str) -> Opti {{#if (includes memoryProviders.[0].strategies "USER_PREFERENCE")}} f"/users/{actor_id}/preferences": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} +{{#if (includes memoryProviders.[0].strategies "EPISODIC")}} + f"/episodes/{actor_id}/{session_id}": RetrievalConfig(top_k=5, relevance_score=0.5), +{{/if}} {{#if (includes memoryProviders.[0].strategies "SUMMARIZATION")}} - f"/summaries/{actor_id}": RetrievalConfig(top_k=3, relevance_score=0.5), + f"/summaries/{actor_id}/{session_id}": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} } {{/if}} @@ -3246,8 +3279,11 @@ def get_memory_session_manager(session_id: Optional[str], actor_id: str) -> Opti {{#if (includes memoryProviders.[0].strategies "USER_PREFERENCE")}} f"/users/{actor_id}/preferences": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} +{{#if (includes memoryProviders.[0].strategies "EPISODIC")}} + f"/episodes/{actor_id}/{session_id}": RetrievalConfig(top_k=5, relevance_score=0.5), +{{/if}} {{#if (includes memoryProviders.[0].strategies "SUMMARIZATION")}} - f"/summaries/{actor_id}": RetrievalConfig(top_k=3, relevance_score=0.5), + f"/summaries/{actor_id}/{session_id}": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} } {{/if}} @@ -5179,25 +5215,77 @@ Thumbs.db" exports[`Assets Directory Snapshots > Python framework assets > python/python/http/strands/base/main.py should match snapshot 1`] = ` "from typing import Any +{{#if inlineFunctionTools}} +import json +from strands.tools.tools import PythonAgentTool +from strands.types.tools import ToolResult, ToolUse +{{/if}} from strands import Agent, tool +{{#if hasSkillsFetcher}} +from strands import AgentSkills +{{#if hasFetchedSkills}} +from skills.fetcher import resolve_s3_skills, resolve_git_skills +{{/if}} +{{#if (some gitSkills "credentialArn")}} +from bedrock_agentcore.services.identity import IdentityClient +{{/if}} +{{/if}} +import asyncio +{{#if hasShell}} +import subprocess +{{/if}} +{{#if hasFileOperations}} +import os +{{/if}} +{{#if hasExecutionLimits}} +from strands.tools.executors import SequentialToolExecutor +from strands.types.exceptions import EventLoopException +from hooks.execution_limits import ExecutionLimitExceeded, ExecutionLimitsHook +{{/if}} {{#if hasConfigBundle}} from strands.hooks import HookProvider, HookRegistry, BeforeInvocationEvent, BeforeToolCallEvent +{{/if}} +{{#if truncationStrategy}} +{{#if (eq truncationStrategy "sliding_window")}} +from strands.agent.conversation_manager.sliding_window_conversation_manager import SlidingWindowConversationManager +{{/if}} +{{#if (eq truncationStrategy "summarization")}} +from strands.agent.conversation_manager.summarizing_conversation_manager import SummarizingConversationManager +{{/if}} +{{else}} +from strands.agent.conversation_manager.null_conversation_manager import NullConversationManager +{{/if}} +{{#if hasConfigBundle}} from bedrock_agentcore.runtime.context import BedrockAgentCoreContext {{/if}} +{{#if hasBrowser}} +from strands_tools.browser import AgentCoreBrowser +{{/if}} +{{#if hasCodeInterpreter}} +from strands_tools.code_interpreter import AgentCoreCodeInterpreter +{{/if}} from bedrock_agentcore.runtime import BedrockAgentCoreApp from model.load import load_model {{#if hasGateway}} from mcp_client.client import get_all_gateway_mcp_clients -{{else}} -from mcp_client.client import get_streamable_http_mcp_client {{/if}} +{{#if remoteMcpTools}} +from mcp_client.client import get_all_remote_mcp_clients +{{/if}} +{{#unless (or hasGateway remoteMcpTools)}} +{{#unless isExportHarness}} +from mcp_client.client import get_streamable_http_mcp_client +{{/unless}} +{{/unless}} {{#if hasMemory}} from memory.session import get_memory_session_manager {{/if}} -{{#if needsOs}} +{{#unless hasFileOperations}} +{{#if (or needsOs (some gitSkills "credentialArn"))}} import os {{/if}} +{{/unless}} {{#if hasPayment}} from capabilities.payments.payments import create_payments_plugin, PAYMENT_SYSTEM_PROMPT {{/if}} @@ -5205,22 +5293,35 @@ from capabilities.payments.payments import create_payments_plugin, PAYMENT_SYSTE app = BedrockAgentCoreApp() log = app.logger -# Define a Streamable HTTP MCP Client +{{#if (or hasGateway remoteMcpTools)}} +# Define MCP clients for all configured MCP servers (gateways and/or remote MCP) +mcp_clients = [] {{#if hasGateway}} -mcp_clients = get_all_gateway_mcp_clients() +mcp_clients += get_all_gateway_mcp_clients() +{{/if}} +{{#if remoteMcpTools}} +mcp_clients += get_all_remote_mcp_clients() +{{/if}} {{else}} +{{#unless isExportHarness}} +# Define a Streamable HTTP MCP Client mcp_clients = [get_streamable_http_mcp_client()] +{{/unless}} {{/if}} +{{#if systemPromptText}} +DEFAULT_SYSTEM_PROMPT = """{{escapePyStr systemPromptText}}""" +{{else}} DEFAULT_SYSTEM_PROMPT = """ You are a helpful assistant. Use tools when appropriate. -{{#if needsOs}} +{{#if needsOs}}{{#unless isExportHarness}} You have access to the following mounted filesystems. Use file_read, file_write, and list_files with full absolute paths: {{#if sessionStorageMountPath}}- {{sessionStorageMountPath}}: ephemeral session storage (lost when session ends) {{/if}}{{#each efsMounts}}- {{mountPath}}: EFS persistent storage (persists across sessions and agent restarts) {{/each}}{{#each s3Mounts}}- {{mountPath}}: S3 Files persistent storage (durable, backed by S3) -{{/each}}{{/if}} +{{/each}}{{/unless}}{{/if}} """ +{{/if}} {{#if hasConfigBundle}} DEFAULT_TOOL_DESC = "Return the sum of two numbers" @@ -5229,6 +5330,30 @@ DEFAULT_TOOL_DESC = "Return the sum of two numbers" # Define a collection of tools used by the model tools = [] +{{#if inlineFunctionTools}} +# Inline function tools — stop the agent loop so the tool call streams back to the caller +def _make_inline_tool(name: str, spec: dict) -> PythonAgentTool: + def _handler(tool: ToolUse, **kwargs: Any) -> ToolResult: + kwargs.get("request_state", {})["stop_event_loop"] = True + return {"toolUseId": tool["toolUseId"], "status": "success", "content": [{"text": " "}]} + _handler.__name__ = name + return PythonAgentTool(tool_name=name, tool_spec=spec, tool_func=_handler) + +{{#each inlineFunctionTools}} +_INLINE_SPEC_{{snakeCase name}} = { + "name": "{{name}}", + "description": {{safeJson description}}, + "inputSchema": {"json": json.loads({{pyJsonStr inputSchema}}) }, +} +tools.append(_make_inline_tool("{{name}}", _INLINE_SPEC_{{snakeCase name}})) +{{/each}} + +_INLINE_FUNCTION_NAMES = { {{#each inlineFunctionTools}}"{{name}}"{{#unless @last}}, {{/unless}}{{/each}} } + +{{else}} +_INLINE_FUNCTION_NAMES = set() + +{{#unless isExportHarness}} # Define a simple function tool {{#if hasConfigBundle}} @tool(description=DEFAULT_TOOL_DESC) @@ -5240,7 +5365,116 @@ def add_numbers(a: int, b: int) -> int: return a+b tools.append(add_numbers) -{{#if needsOs}} +{{/unless}} +{{/if}} +{{#if hasBrowser}} +tools.append(AgentCoreBrowser({{#if browserIdentifier}}identifier="{{browserIdentifier}}"{{/if}}).browser) +{{/if}} +{{#if hasCodeInterpreter}} +tools.append(AgentCoreCodeInterpreter({{#if codeInterpreterIdentifier}}identifier="{{codeInterpreterIdentifier}}"{{/if}}).code_interpreter) +{{/if}} +{{#if hasShell}} +@tool +def shell(command: str, timeout: int = 300) -> dict: + """Execute a bash command and return the results. + + Args: + command: The bash command to execute + timeout: Timeout in seconds (default: 300) + + Returns: + Dict with stdout, stderr, and exit_code + """ + result = subprocess.run( + command, shell=True, capture_output=True, text=True, timeout=timeout + ) + return {"stdout": result.stdout, "stderr": result.stderr, "exit_code": result.returncode} + +tools.append(shell) +{{/if}} +{{#if hasFileOperations}} +@tool +def file_operations( + command: str, + path: str, + old_str: str = None, + new_str: str = None, + file_text: str = None, + insert_line: int = None, + view_range: list = None, +) -> str: + """Text editor tool for viewing and modifying files. + + Args: + command: The command to execute ("view", "str_replace", "create", "insert") + path: Path to the file or directory + old_str: Text to replace (for str_replace command) + new_str: Replacement text (for str_replace and insert commands) + file_text: Content for new file (for create command) + insert_line: Line number to insert after (for insert command) + view_range: [start_line, end_line] for viewing specific lines (for view command) + + Returns: + Result of the operation + """ + try: + if command == "view": + if not os.path.exists(path): + return f"Error: Path '{path}' does not exist" + if os.path.isdir(path): + return "\\n".join(os.listdir(path)) + with open(path) as f: + lines = f.read().splitlines() + if view_range: + start, end = view_range + start_idx = max(0, start - 1) + end_idx = len(lines) if end == -1 else min(len(lines), end) + lines = lines[start_idx:end_idx] + start_num = start_idx + 1 + else: + start_num = 1 + return "\\n".join(f"{start_num + i}: {line}" for i, line in enumerate(lines)) + elif command == "str_replace": + if old_str is None or new_str is None: + return "Error: str_replace requires both old_str and new_str parameters" + if not os.path.exists(path): + return f"Error: File '{path}' does not exist" + content = open(path).read() + if old_str not in content: + return "Error: Text not found in file" + count = content.count(old_str) + if count > 1: + return f"Error: Text appears {count} times in file. Please be more specific." + open(path, "w").write(content.replace(old_str, new_str, 1)) + return f"Successfully replaced text in '{path}'" + elif command == "create": + if file_text is None: + return "Error: create requires file_text parameter" + os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) + open(path, "w").write(file_text) + return f"Successfully created file '{path}'" + elif command == "insert": + if new_str is None or insert_line is None: + return "Error: insert requires both new_str and insert_line parameters" + if not os.path.exists(path): + return f"Error: File '{path}' does not exist" + lines = open(path).read().splitlines(True) + if insert_line == 0: + lines.insert(0, new_str + "\\n") + elif insert_line >= len(lines): + lines.append(new_str + "\\n") + else: + lines.insert(insert_line, new_str + "\\n") + open(path, "w").write("".join(lines)) + return f"Successfully inserted text in '{path}' at line {insert_line + 1}" + else: + return f"Error: Unknown command '{command}'" + except Exception as e: + return f"Error: {e}" + +tools.append(file_operations) +{{/if}} +{{#if needsOs}}{{#unless isExportHarness}} _MOUNT_PATHS = [ {{#if sessionStorageMountPath}}"{{sessionStorageMountPath}}",{{/if}} {{#each efsMounts}}"{{mountPath}}",{{/each}} @@ -5294,12 +5528,21 @@ def list_files(path: str) -> str: return f"Error listing '{path}': {e.strerror}" tools.extend([file_read, file_write, list_files]) -{{/if}} +{{/unless}}{{/if}} +{{#if (or hasGateway remoteMcpTools)}} +# Add MCP clients to tools +for mcp_client in mcp_clients: + if mcp_client: + tools.append(mcp_client) +{{else}} +{{#unless isExportHarness}} # Add MCP client to tools if available for mcp_client in mcp_clients: if mcp_client: tools.append(mcp_client) +{{/unless}} +{{/if}} {{#if hasConfigBundle}} @@ -5335,19 +5578,62 @@ class ConfigBundleHook(HookProvider): {{/if}} +def _make_conversation_manager(): +{{#if truncationStrategy}} +{{#if (eq truncationStrategy "sliding_window")}} +{{#if truncationConfig}} + return SlidingWindowConversationManager(**{{safeJson truncationConfig}}, per_turn=True) +{{else}} + return SlidingWindowConversationManager(per_turn=True) +{{/if}} +{{else}} +{{#if truncationConfig}} + return SummarizingConversationManager(**{{safeJson truncationConfig}}) +{{else}} + return SummarizingConversationManager() +{{/if}} +{{/if}} +{{else}} + return NullConversationManager() +{{/if}} + {{#if hasMemory}} {{#unless hasPayment}} def agent_factory(): cache = {} - def get_or_create_agent(session_id, user_id): - key = f"{session_id}/{user_id}" + def get_or_create_agent(session_id, user_id{{#if hasSkillsFetcher}}, skill_plugins=None{{/if}}): + {{#if actorId}} + _actor_id = "{{actorId}}" + {{else}} + _actor_id = user_id + {{/if}} + key = f"{session_id}/{_actor_id}" if key not in cache: cache[key] = Agent( model=load_model(), - session_manager=get_memory_session_manager(session_id, user_id), + session_manager=get_memory_session_manager(session_id, _actor_id), + conversation_manager=_make_conversation_manager(), system_prompt=DEFAULT_SYSTEM_PROMPT, - tools=tools{{#if hasConfigBundle}}, - hooks=[ConfigBundleHook()]{{/if}} + tools=tools, + {{#if hasSkillsFetcher}} + plugins=skill_plugins or None, + {{/if}} + {{#if hasExecutionLimits}} + tool_executor=SequentialToolExecutor(), + callback_handler=None, + {{/if}} + hooks=[ + {{#if hasExecutionLimits}} + ExecutionLimitsHook( + {{#if maxIterations}}max_iterations={{maxIterations}},{{/if}} + {{#if maxTokens}}max_tokens={{maxTokens}},{{/if}} + {{#if timeoutSeconds}}timeout_seconds={{timeoutSeconds}},{{/if}} + ), + {{/if}} + {{#if hasConfigBundle}} + ConfigBundleHook(), + {{/if}} + ], ) return cache[key] return get_or_create_agent @@ -5355,24 +5641,45 @@ get_or_create_agent = agent_factory() {{/unless}} {{else}} {{#if hasConfigBundle}} -def create_agent(): +def create_agent({{#if hasSkillsFetcher}}skill_plugins=None{{/if}}): return Agent( model=load_model(), system_prompt=DEFAULT_SYSTEM_PROMPT, tools=tools, + conversation_manager=_make_conversation_manager(), + {{#if hasSkillsFetcher}} + plugins=skill_plugins or None, + {{/if}} hooks=[ConfigBundleHook()], ) {{else}} {{#unless hasPayment}} _agent = None -def get_or_create_agent(): +def get_or_create_agent({{#if hasSkillsFetcher}}skill_plugins=None{{/if}}): global _agent if _agent is None: _agent = Agent( model=load_model(), system_prompt=DEFAULT_SYSTEM_PROMPT, tools=tools, + conversation_manager=_make_conversation_manager(), + {{#if hasSkillsFetcher}} + plugins=skill_plugins or None, + {{/if}} + {{#if hasExecutionLimits}} + tool_executor=SequentialToolExecutor(), + callback_handler=None, + {{/if}} + hooks=[ + {{#if hasExecutionLimits}} + ExecutionLimitsHook( + {{#if maxIterations}}max_iterations={{maxIterations}},{{/if}} + {{#if maxTokens}}max_tokens={{maxTokens}},{{/if}} + {{#if timeoutSeconds}}timeout_seconds={{timeoutSeconds}},{{/if}} + ), + {{/if}} + ], ) return _agent {{/unless}} @@ -5380,6 +5687,42 @@ def get_or_create_agent(): {{/if}} +def _extract_prompt(payload: dict): + """Accept harness-style messages[], tool_results[], or plain prompt string payloads.""" + if "messages" in payload: + return payload["messages"] + if "tool_results" in payload: + return [{"role": "user", "content": [{"toolResult": { + "toolUseId": tr["toolUseId"], + "status": tr.get("status", "success"), + "content": tr.get("content", []), + }} for tr in payload["tool_results"]]}] + return payload.get("prompt", "") + + +def _has_inline_function_call(messages) -> bool: + """Return True if messages contains an assistant toolUse for an inline function tool.""" + if not _INLINE_FUNCTION_NAMES or not isinstance(messages, list): + return False + for msg in messages: + if msg.get("role") == "assistant": + for block in msg.get("content", []): + if isinstance(block, dict) and block.get("toolUse", {}).get("name") in _INLINE_FUNCTION_NAMES: + return True + return False + + +def _is_inline_function_call(event: dict) -> bool: + """Check if a contentBlockStart event is for an inline function tool.""" + if not _INLINE_FUNCTION_NAMES: + return False + cbs = event.get("contentBlockStart", {}) + start = cbs.get("start", {}) + tool_use = start.get("toolUse") if isinstance(start, dict) else None + return tool_use is not None and tool_use.get("name") in _INLINE_FUNCTION_NAMES + + + @app.entrypoint async def invoke(payload, context): log.info("Invoking Agent.....") @@ -5391,23 +5734,52 @@ async def invoke(payload, context): payments_plugin = create_payments_plugin(user_id, instrument_id, session_id) plugins = [payments_plugin] if payments_plugin else [] {{/if}} +{{#if hasSkillsFetcher}} + skill_paths = [{{#each pathSkills}}{{safeJson this}}{{#unless @last}}, {{/unless}}{{/each}}] + {{#if s3Skills}} + s3_skill_sources = [{{#each s3Skills}}{{safeJson this}}{{#unless @last}}, {{/unless}}{{/each}}] + skill_paths.extend(await asyncio.to_thread(resolve_s3_skills, s3_skill_sources, None)) + {{/if}} + {{#if gitSkills}} + git_skill_sources = [ + {{#each gitSkills}} + dict(url={{safeJson this.url}}{{#if this.path}}, path={{safeJson this.path}}{{/if}}{{#if this.credentialArn}}, credentialArn={{safeJson this.credentialArn}}{{#if this.username}}, username={{safeJson this.username}}{{/if}}{{/if}}), + {{/each}} + ] + {{#if (some gitSkills "credentialArn")}} + _git_identity_client = IdentityClient(os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))) + {{else}} + _git_identity_client = None + {{/if}} + skill_paths.extend(await asyncio.to_thread(resolve_git_skills, git_skill_sources, _git_identity_client)) + {{/if}} + _skill_plugins = [AgentSkills(skills=skill_paths)] if skill_paths else [] +{{/if}} {{#if hasMemory}} {{#if hasPayment}} mem_session_id = getattr(context, 'session_id', 'default-session') + {{#if actorId}} + mem_user_id = "{{actorId}}" + {{else}} mem_user_id = getattr(context, 'user_id', 'default-user') + {{/if}} agent = Agent( model=load_model(), session_manager=get_memory_session_manager(mem_session_id, mem_user_id), system_prompt=DEFAULT_SYSTEM_PROMPT + PAYMENT_SYSTEM_PROMPT, tools=tools, - plugins=plugins,{{#if hasConfigBundle}} + plugins=plugins{{#if hasSkillsFetcher}} + _skill_plugins{{/if}},{{#if hasConfigBundle}} hooks=[ConfigBundleHook()],{{/if}} ) {{else}} session_id = getattr(context, 'session_id', 'default-session') + {{#if actorId}} + user_id = "{{actorId}}" + {{else}} user_id = getattr(context, 'user_id', 'default-user') - agent = get_or_create_agent(session_id, user_id) + {{/if}} + agent = get_or_create_agent(session_id, user_id{{#if hasSkillsFetcher}}, _skill_plugins{{/if}}) {{/if}} {{else}} {{#if hasPayment}} @@ -5415,25 +5787,100 @@ async def invoke(payload, context): model=load_model(), system_prompt=DEFAULT_SYSTEM_PROMPT + PAYMENT_SYSTEM_PROMPT, tools=tools, - plugins=plugins,{{#if hasConfigBundle}} + plugins=plugins{{#if hasSkillsFetcher}} + _skill_plugins{{/if}},{{#if hasConfigBundle}} hooks=[ConfigBundleHook()],{{/if}} ) {{else}} {{#if hasConfigBundle}} - agent = create_agent() + agent = create_agent({{#if hasSkillsFetcher}}_skill_plugins{{/if}}) {{else}} - agent = get_or_create_agent() + agent = get_or_create_agent({{#if hasSkillsFetcher}}_skill_plugins{{/if}}) {{/if}} {{/if}} {{/if}} - # Execute and format response - stream = agent.stream_async(payload.get("prompt")) + prompt = _extract_prompt(payload) + + {{#if inlineFunctionTools}} + # If Turn 2 carries the harness-style assistant(toolUse)+user(toolResult) pair, + # strip the placeholder turn Strands stored during Turn 1 so the real toolResult + # is injected cleanly — same protocol as the harness runtime. + if _has_inline_function_call(prompt): + msgs = agent.messages + if len(msgs) >= 2 and any("toolResult" in b for b in msgs[-1].get("content", [])): + del msgs[-2:] + {{/if}} + + {{#if hasExecutionLimits}} + timeout_seconds = {{#if timeoutSeconds}}{{timeoutSeconds}}{{else}}None{{/if}} + timeout_fired = False + watchdog_task = None + if timeout_seconds is not None: + async def _timeout_watchdog(): + nonlocal timeout_fired + await asyncio.sleep(timeout_seconds) + timeout_fired = True + agent.cancel() + watchdog_task = asyncio.create_task(_timeout_watchdog()) - async for event in stream: - # Handle Text parts of the response - if "data" in event and isinstance(event["data"], str): - yield event["data"] + try: + {{#if inlineFunctionTools}} + hit_inline_function = False + {{/if}} + async for event in agent.stream_async( + prompt, + ): + if not isinstance(event, dict) or "event" not in event: + continue + cbs = event["event"].get("contentBlockStart") + if cbs is not None and not cbs.get("start"): + continue + {{#if inlineFunctionTools}} + if not hit_inline_function: + hit_inline_function = _is_inline_function_call(event["event"]) + {{/if}} + yield event + {{#if inlineFunctionTools}} + if hit_inline_function and "messageStop" in event["event"]: + return + {{/if}} + + if timeout_fired: + yield {"event": {"messageStop": {"stopReason": "timeout_exceeded"}}} + except EventLoopException as e: + if isinstance(e.original_exception, ExecutionLimitExceeded): + yield {"event": {"messageStop": {"stopReason": str(e.original_exception)}}} + return + raise + finally: + if watchdog_task is not None: + watchdog_task.cancel() + try: + await watchdog_task + except asyncio.CancelledError: + pass + {{else}} + {{#if inlineFunctionTools}} + hit_inline_function = False + {{/if}} + async for event in agent.stream_async( + prompt, + ): + if not isinstance(event, dict) or "event" not in event: + continue + cbs = event["event"].get("contentBlockStart") + if cbs is not None and not cbs.get("start"): + continue + {{#if inlineFunctionTools}} + if not hit_inline_function: + hit_inline_function = _is_inline_function_call(event["event"]) + {{/if}} + yield event + {{#if inlineFunctionTools}} + if hit_inline_function and "messageStop" in event["event"]: + return + {{/if}} + {{/if}} if __name__ == "__main__": @@ -5478,10 +5925,14 @@ def _get_bearer_token_{{snakeCase name}}(*, access_token: str): {{#each gatewayProviders}} def get_{{snakeCase name}}_mcp_client() -> MCPClient | None: """Returns an MCP Client connected to the {{name}} gateway.""" + {{#if hardcodedUrl}} + url = {{safeJson hardcodedUrl}} + {{else}} url = os.environ.get("{{envVarName}}") if not url: logger.warning("{{envVarName}} not set — {{name}} gateway tools unavailable") return None + {{/if}} {{#if (eq authType "AWS_IAM")}} return MCPClient(lambda: aws_iam_streamablehttp_client(url, aws_service="bedrock-agentcore", aws_region=os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION"))), prefix="{{snakeCase name}}") {{else if (eq authType "CUSTOM_JWT")}} @@ -5502,7 +5953,41 @@ def get_all_gateway_mcp_clients() -> list[MCPClient]: clients.append(client) {{/each}} return clients -{{else}} +{{/if}} +{{#if remoteMcpTools}} +{{#if (some remoteMcpTools "headerCredentials")}} +from bedrock_agentcore.identity.auth import requires_api_key +{{/if}} +{{#each remoteMcpTools}} +{{#if headerCredentials}} +{{#each headerCredentials}} +@requires_api_key(provider_name="{{credentialName}}") +def _get_{{snakeCase ../name}}_{{snakeCase headerKey}}_key(api_key: str) -> str: + """Fetch {{headerKey}} credential for {{../name}} from AgentCore Identity.""" + return api_key + +{{/each}} +{{/if}} +def get_{{snakeCase name}}_mcp_client() -> MCPClient | None: + """Returns an MCP Client for the {{name}} remote MCP server.""" + url = {{safeJson url}} + {{#if headerCredentials}} + if os.getenv("LOCAL_DEV") == "1": + headers = { {{#each headerCredentials}}{{safeJson headerKey}}: os.environ.get("{{envVarName}}", ""){{#unless @last}}, {{/unless}}{{/each}} } + else: + headers = { {{#each headerCredentials}}{{safeJson headerKey}}: _get_{{snakeCase ../name}}_{{snakeCase headerKey}}_key(){{#unless @last}}, {{/unless}}{{/each}} } + return MCPClient(lambda: streamablehttp_client(url, headers=headers)) + {{else}} + return MCPClient(lambda: streamablehttp_client(url)) + {{/if}} + +{{/each}} +def get_all_remote_mcp_clients() -> list[MCPClient]: + """Returns all configured remote MCP clients.""" + clients = [{{#each remoteMcpTools}}get_{{snakeCase name}}_mcp_client(){{#unless @last}}, {{/unless}}{{/each}}] + return [c for c in clients if c is not None] +{{/if}} +{{#unless (or hasGateway remoteMcpTools)}} {{#if isVpc}} # VPC mode: external MCP endpoints are not reachable without a NAT gateway. # Add an AgentCore Gateway with \`agentcore add gateway\`, or configure your own endpoint below. @@ -5511,6 +5996,7 @@ def get_streamable_http_mcp_client() -> MCPClient | None: """No MCP server configured. Add a gateway with \`agentcore add gateway\`.""" return None {{else}} +{{#unless isExportHarness}} # ExaAI provides information about code through web searches, crawling and code context searches through their platform. Requires no authentication EXAMPLE_MCP_ENDPOINT = "https://mcp.exa.ai/mcp" @@ -5518,8 +6004,9 @@ def get_streamable_http_mcp_client() -> MCPClient: """Returns an MCP Client compatible with Strands""" # to use an MCP server that supports bearer authentication, add headers={"Authorization": f"Bearer {access_token}"} return MCPClient(lambda: streamablehttp_client(EXAMPLE_MCP_ENDPOINT)) +{{/unless}} {{/if}} -{{/if}} +{{/unless}} " `; @@ -5535,7 +6022,7 @@ from strands.models.bedrock import BedrockModel def load_model() -> BedrockModel: """Get Bedrock model client using IAM credentials.""" - return BedrockModel(model_id="global.anthropic.claude-sonnet-4-5-20250929-v1:0") + return BedrockModel(model_id="{{#if modelId}}{{modelId}}{{else}}global.anthropic.claude-sonnet-4-5-20250929-v1:0{{/if}}") {{/if}} {{#if (eq modelProvider "Anthropic")}} import os @@ -5611,7 +6098,7 @@ def load_model() -> OpenAIModel: """Get authenticated OpenAI model client.""" return OpenAIModel( client_args={"api_key": _get_api_key()}, - model_id="gpt-4.1", + model_id="{{#if modelId}}{{modelId}}{{else}}gpt-4.1{{/if}}", ) {{/if}} {{#if (eq modelProvider "Gemini")}} @@ -5649,7 +6136,7 @@ def load_model() -> GeminiModel: """Get authenticated Gemini model client.""" return GeminiModel( client_args={"api_key": _get_api_key()}, - model_id="gemini-2.5-flash", + model_id="{{#if modelId}}{{modelId}}{{else}}gemini-2.5-flash{{/if}}", ) {{/if}} " @@ -5675,7 +6162,10 @@ dependencies = [ {{/if}}"mcp >= 1.19.0", {{#if (eq modelProvider "OpenAI")}}"openai >= 1.0.0", {{/if}}"strands-agents >= 1.15.0", - {{#if hasGateway}}{{#if (includes gatewayAuthTypes "AWS_IAM")}}"mcp-proxy-for-aws >= 1.1.0", + {{#if (or hasBrowser hasCodeInterpreter)}}"strands-agents-tools >= 0.1.0", + {{/if}}{{#if hasBrowser}}"nest-asyncio >= 1.5.0", + "playwright >= 1.42.0", + {{/if}}{{#if hasGateway}}{{#if (includes gatewayAuthTypes "AWS_IAM")}}"mcp-proxy-for-aws >= 1.1.0", {{/if}}{{/if}} ] @@ -5684,6 +6174,347 @@ packages = ["."] " `; +exports[`Assets Directory Snapshots > Python framework assets > python/python/http/strands/base/skills/fetcher.py should match snapshot 1`] = ` +""""Skill fetcher — downloads s3/git skills to local filesystem on first use. + +Resolved paths are passed to AgentSkills(skills=...) in main.py. +Cache directory: /.agents/skills/ — an absolute path under the system temp +directory (honors $TMPDIR, defaults to /tmp). The runtime working directory (e.g. +/var/task in a CodeZip runtime) is read-only, so the cache must live somewhere +guaranteed-writable. +""" + +import base64 +import hashlib +import json +import logging +import os +import shutil +import subprocess +import tempfile +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +_SKILLS_BASE = Path(tempfile.gettempdir()) / ".agents" / "skills" +_GIT_TIMEOUT = 60 +_S3_MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024 # 1 GB + + +def _stable_hash(value: str) -> str: + return hashlib.sha256(value.encode()).hexdigest()[:12] + + +def _cleanup(path: Path) -> None: + """Remove a partially-created skill directory so retries don't see stale state.""" + shutil.rmtree(path, ignore_errors=True) + + +def _read_map(type_dir: Path) -> dict: + map_file = type_dir / ".map.json" + return json.loads(map_file.read_text()) if map_file.exists() else {} + + +def _write_map(type_dir: Path, mapping: dict) -> None: + type_dir.mkdir(parents=True, exist_ok=True) + (type_dir / ".map.json").write_text(json.dumps(mapping)) + + +def _resolve_cached(type_dir: Path, source_hash: str) -> Optional[str]: + """Return the cached skill directory for a source hash, or None if not on disk.""" + mapping = _read_map(type_dir) + dir_name = mapping.get(source_hash) + if dir_name and (type_dir / dir_name).exists(): + return str(type_dir / dir_name) + return None + + +def _read_skill_name(skill_dir: Path) -> str: + """Extract the skill name from SKILL.md YAML frontmatter.""" + content = (skill_dir / "SKILL.md").read_text() + if not content.startswith("---"): + raise ValueError(f"SKILL.md in {skill_dir} has no YAML frontmatter (must start with ---)") + parts = content.split("---", 2) + if len(parts) < 3: + raise ValueError(f"SKILL.md in {skill_dir} has malformed frontmatter (missing closing ---)") + for line in parts[1].strip().splitlines(): + if line.startswith("name:"): + name = line[len("name:"):].strip().strip("\\"'") + if name: + return name + raise ValueError(f"SKILL.md in {skill_dir} is missing a 'name' field in frontmatter") + + +def _pick_dir_name(type_dir: Path, name: str, source_hash: str) -> str: + """Pick a unique directory name, appending a hash suffix on collision.""" + if not (type_dir / name).exists(): + return name + return f"{name}-{source_hash[:8]}" + + +def _rename_and_cache_skill(type_dir: Path, temp_dir: Path, source_hash: str, skill_root: Path, + source_label: str = "") -> Path: + """Validate SKILL.md, rename the temp dir to the skill's declared name, and update the map. + + Raises ValueError if SKILL.md is missing or has invalid frontmatter. + """ + if not (skill_root / "SKILL.md").exists(): + _cleanup(temp_dir) + hint = f" (source: {source_label})" if source_label else "" + raise ValueError(f"No SKILL.md found in fetched skill{hint}") + + name = _read_skill_name(skill_root) + dir_name = _pick_dir_name(type_dir, name, source_hash) + final_dir = type_dir / dir_name + if final_dir != temp_dir: + temp_dir.rename(final_dir) + + mapping = _read_map(type_dir) + mapping[source_hash] = dir_name + _write_map(type_dir, mapping) + return final_dir + + +def _fetch_s3_skill(source: str, s3_client=None) -> Path: + """Download an s3:// skill prefix and return the local directory.""" + uri = source if source.endswith("/") else source + "/" + source_hash = _stable_hash(uri) + type_dir = _SKILLS_BASE / "s3" + + cached = _resolve_cached(type_dir, source_hash) + if cached: + return Path(cached) + + import boto3 + client = s3_client or boto3.client("s3") + bucket, _, prefix = uri[len("s3://"):].partition("/") + if not bucket: + raise ValueError(f"Invalid S3 URI (no bucket): {uri}") + + temp_dir = type_dir / source_hash + _cleanup(temp_dir) + temp_dir.mkdir(parents=True, exist_ok=True) + temp_root = temp_dir.resolve() + + paginator = client.get_paginator("list_objects_v2") + total = 0 + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + for obj in page.get("Contents", []): + total += obj["Size"] + if total > _S3_MAX_SIZE_BYTES: + _cleanup(temp_dir) + raise ValueError(f"S3 skill {uri} exceeds 1 GB size limit") + rel = obj["Key"][len(prefix):].lstrip("/") + if not rel: + continue + dest = (temp_dir / rel).resolve() + if dest != temp_root and not str(dest).startswith(str(temp_root) + os.sep): + _cleanup(temp_dir) + raise ValueError(f"Path traversal detected in S3 key: {obj['Key']}") + dest.parent.mkdir(parents=True, exist_ok=True) + client.download_file(bucket, obj["Key"], str(dest)) + + if total == 0: + _cleanup(temp_dir) + raise ValueError(f"No files found at S3 URI: {uri}") + + return _rename_and_cache_skill(type_dir, temp_dir, source_hash, temp_dir, source_label=uri) + + +def _resolve_credential_arn(credential_arn: str, identity_client) -> str: + """Resolve a Token Vault API-key credential ARN to its secret value via AgentCore Identity. + + ARN format: arn:

:bedrock-agentcore:::token-vault//apikeycredentialprovider/ + """ + from bedrock_agentcore.runtime.context import BedrockAgentCoreContext # noqa: PLC0415 + + provider_name = credential_arn.rsplit("/", 1)[-1] + if not provider_name: + raise ValueError(f"Invalid credential ARN: {credential_arn}") + workload_token = BedrockAgentCoreContext.get_workload_access_token() + if not workload_token: + raise ValueError("Credential ARN resolution requires a workload access token") + api_key = identity_client.dp_client.get_resource_api_key( + resourceCredentialProviderName=provider_name, + workloadIdentityToken=workload_token, + )["apiKey"] + if not api_key: + raise ValueError(f"Identity returned empty API key for provider: {provider_name}") + return api_key + + +def _build_git_auth_env(credential_arn: Optional[str], username: Optional[str], identity_client=None) -> dict: + """Build GIT_CONFIG_* env vars for HTTP Basic auth using a Token Vault credential ARN. + + Uses env vars instead of -c args to avoid leaking credentials in /proc/*/cmdline, + and so auth propagates to sub-commands (e.g. sparse-checkout triggering a fetch). + """ + if not credential_arn or not identity_client: + return {} + password = _resolve_credential_arn(credential_arn, identity_client) + user = username or "oauth2" + encoded = base64.b64encode(f"{user}:{password}".encode()).decode() + return { + "GIT_CONFIG_COUNT": "1", + "GIT_CONFIG_KEY_0": "http.extraHeader", + "GIT_CONFIG_VALUE_0": f"Authorization: Basic {encoded}", + } + + +def _fetch_git_skill(url: str, skill_path: str = "", credential_arn: Optional[str] = None, + username: Optional[str] = None, identity_client=None) -> Path: + """Shallow-clone a git skill repository and return the local skill directory. + + Returns the directory containing SKILL.md (the subdir itself for sparse checkouts). + """ + if skill_path and (os.path.isabs(skill_path) or ".." in Path(skill_path).parts): + raise ValueError(f"Path traversal detected in skill path: {skill_path}") + + source_hash = _stable_hash(f"{url}:{skill_path}") + type_dir = _SKILLS_BASE / "git" + + cached = _resolve_cached(type_dir, source_hash) + if cached: + return Path(cached) / skill_path if skill_path else Path(cached) + + temp_dir = type_dir / source_hash + _cleanup(temp_dir) + temp_dir.mkdir(parents=True, exist_ok=True) + + extra_env = _build_git_auth_env(credential_arn, username, identity_client) + git_env = {**os.environ, **extra_env} if extra_env else None + + try: + if skill_path: + subprocess.run( + ["git", "clone", "--depth", "1", "--filter=blob:none", "--sparse", url, str(temp_dir)], + check=True, timeout=_GIT_TIMEOUT, capture_output=True, env=git_env, + ) + subprocess.run( + ["git", "sparse-checkout", "set", skill_path], + check=True, timeout=_GIT_TIMEOUT, capture_output=True, cwd=str(temp_dir), env=git_env, + ) + else: + subprocess.run( + ["git", "clone", "--depth", "1", url, str(temp_dir)], + check=True, timeout=_GIT_TIMEOUT, capture_output=True, env=git_env, + ) + except Exception: + _cleanup(temp_dir) + raise + + if skill_path and not (temp_dir / skill_path).exists(): + _cleanup(temp_dir) + raise ValueError(f"Skill path '{skill_path}' not found in repository '{url}'") + + # SKILL.md lives inside the subdir for sparse checkouts. + skill_root = temp_dir / skill_path if skill_path else temp_dir + label = f"{url}:{skill_path}" if skill_path else url + final_dir = _rename_and_cache_skill(type_dir, temp_dir, source_hash, skill_root, source_label=label) + return final_dir / skill_path if skill_path else final_dir + + +def resolve_s3_skills(sources: list, s3_client=None) -> list: + """Resolve s3:// skill URIs to local filesystem paths. + + Any fetch failure raises and fails the invocation — a partial skill set + would silently run the agent without capabilities the harness declared. + """ + paths = [] + for uri in sources: + try: + skill_dir = _fetch_s3_skill(uri, s3_client) + except Exception as e: + raise ValueError(f"Failed to resolve S3 skill '{uri}': {e}") from e + paths.append(str(skill_dir.resolve())) + return paths + + +def resolve_git_skills(sources: list, identity_client=None) -> list: + """Resolve git skill dicts to local filesystem paths. + + Each source is a dict with keys: url (required), path (optional), + credentialArn (optional), username (optional). + + Any fetch failure raises and fails the invocation — a partial skill set + would silently run the agent without capabilities the harness declared. + """ + paths = [] + for source in sources: + try: + skill_dir = _fetch_git_skill( + url=source["url"], + skill_path=source.get("path") or "", + credential_arn=source.get("credentialArn"), + username=source.get("username"), + identity_client=identity_client, + ) + except Exception as e: + raise ValueError(f"Failed to resolve git skill '{source.get('url', source)}': {e}") from e + paths.append(str(skill_dir.resolve())) + return paths +" +`; + +exports[`Assets Directory Snapshots > Python framework assets > python/python/http/strands/capabilities/execution-limits/hooks/execution_limits.py should match snapshot 1`] = ` +"import time +from typing import Optional + +from strands.hooks import BeforeModelCallEvent +from strands.hooks.registry import HookProvider, HookRegistry +from strands.types.exceptions import EventLoopException + + +class ExecutionLimitExceeded(Exception): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class ExecutionLimitsHook(HookProvider): + def __init__( + self, + max_iterations: Optional[int] = None, + max_tokens: Optional[int] = None, + timeout_seconds: Optional[float] = None, + ) -> None: + self._max_iterations = max_iterations + self._max_tokens = max_tokens + self._timeout_seconds = timeout_seconds + self._iteration_count = 0 + self._start_time = time.monotonic() + + def register_hooks(self, registry: HookRegistry, **kwargs) -> None: + registry.add_callback(BeforeModelCallEvent, self._check_limits) + + def _check_limits(self, event: BeforeModelCallEvent) -> None: + self._iteration_count += 1 + + if self._max_iterations is not None and self._iteration_count > self._max_iterations: + raise EventLoopException( + ExecutionLimitExceeded(f"Max iterations exceeded: {self._max_iterations}") + ) + + if self._timeout_seconds is not None: + elapsed = time.monotonic() - self._start_time + if elapsed > self._timeout_seconds: + raise EventLoopException( + ExecutionLimitExceeded( + f"Timeout exceeded: {self._timeout_seconds}s (elapsed {elapsed:.1f}s)" + ) + ) + + if self._max_tokens is not None: + used = event.agent.event_loop_metrics.accumulated_usage.get("outputTokens", 0) + if used >= self._max_tokens: + raise EventLoopException( + ExecutionLimitExceeded( + f"Max output tokens exceeded: {used}/{self._max_tokens}" + ) + ) +" +`; + exports[`Assets Directory Snapshots > Python framework assets > python/python/http/strands/capabilities/memory/__init__.py should match snapshot 1`] = ` "# Package marker " @@ -5716,8 +6547,11 @@ def get_memory_session_manager(session_id: Optional[str], actor_id: str) -> Opti {{#if (includes memoryProviders.[0].strategies "USER_PREFERENCE")}} f"/users/{actor_id}/preferences": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} +{{#if (includes memoryProviders.[0].strategies "EPISODIC")}} + f"/episodes/{actor_id}/{session_id}": RetrievalConfig(top_k=5, relevance_score=0.5), +{{/if}} {{#if (includes memoryProviders.[0].strategies "SUMMARIZATION")}} - f"/summaries/{actor_id}": RetrievalConfig(top_k=3, relevance_score=0.5), + f"/summaries/{actor_id}/{session_id}": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} } {{/if}} @@ -6380,6 +7214,19 @@ When modifying JSON config files: 4. Use CloudFormation-safe names (alphanumeric, start with letter) 5. Run \`agentcore validate\` to verify changes +## Harness Export + +\`agentcore export harness\` converts a harness configuration into a deployable Strands Python agent under \`app//\`. + +**After every export, you MUST read \`app//EXPORT_NOTES.md\` before proceeding.** + +This file lists any manual follow-up items required before the agent will deploy or run correctly — missing files to create, IAM policies to add, or configuration steps the exporter could not automate. A clean export produces "No manual steps required." Complete every item in the file before running \`agentcore deploy\`. + +\`\`\`bash +agentcore export harness --name # generates app//EXPORT_NOTES.md +cat app//EXPORT_NOTES.md # read this before touching anything else +\`\`\` + ## CLI Commands | Command | Description | @@ -6387,6 +7234,7 @@ When modifying JSON config files: | \`agentcore create\` | Create a new project | | \`agentcore add \` | Add agent, memory, credential, gateway, evaluator, policy | | \`agentcore remove \` | Remove a resource | +| \`agentcore export harness\` | Export a harness to a Strands runtime agent | | \`agentcore dev\` | Run agent locally with hot-reload | | \`agentcore deploy\` | Deploy to AWS | | \`agentcore status\` | Show deployment status | diff --git a/src/assets/agents/AGENTS.md b/src/assets/agents/AGENTS.md index 2153de014..431ce6cc3 100644 --- a/src/assets/agents/AGENTS.md +++ b/src/assets/agents/AGENTS.md @@ -122,6 +122,19 @@ When modifying JSON config files: 4. Use CloudFormation-safe names (alphanumeric, start with letter) 5. Run `agentcore validate` to verify changes +## Harness Export + +`agentcore export harness` converts a harness configuration into a deployable Strands Python agent under `app//`. + +**After every export, you MUST read `app//EXPORT_NOTES.md` before proceeding.** + +This file lists any manual follow-up items required before the agent will deploy or run correctly — missing files to create, IAM policies to add, or configuration steps the exporter could not automate. A clean export produces "No manual steps required." Complete every item in the file before running `agentcore deploy`. + +```bash +agentcore export harness --name # generates app//EXPORT_NOTES.md +cat app//EXPORT_NOTES.md # read this before touching anything else +``` + ## CLI Commands | Command | Description | @@ -129,6 +142,7 @@ When modifying JSON config files: | `agentcore create` | Create a new project | | `agentcore add ` | Add agent, memory, credential, gateway, evaluator, policy | | `agentcore remove ` | Remove a resource | +| `agentcore export harness` | Export a harness to a Strands runtime agent | | `agentcore dev` | Run agent locally with hot-reload | | `agentcore deploy` | Deploy to AWS | | `agentcore status` | Show deployment status | diff --git a/src/assets/cdk/bin/cdk.ts b/src/assets/cdk/bin/cdk.ts index cb68c06a3..44fc565ec 100644 --- a/src/assets/cdk/bin/cdk.ts +++ b/src/assets/cdk/bin/cdk.ts @@ -1,6 +1,6 @@ #!/usr/bin/env node -import { AgentCoreStack } from '../lib/cdk-stack'; -import { ConfigIO, type AwsDeploymentTarget } from '@aws/agentcore-cdk'; +import { AgentCoreStack, type HarnessConfig } from '../lib/cdk-stack'; +import { ConfigIO, HarnessSpecSchema, type AwsDeploymentTarget } from '@aws/agentcore-cdk'; import { App, type Environment } from 'aws-cdk-lib'; import * as path from 'path'; import * as fs from 'fs'; @@ -56,40 +56,61 @@ async function main() { throw new Error('No deployment targets configured. Please define targets in agentcore/aws-targets.json'); } - // Read harness configs for role creation. + // Read harness configs: the full validated spec drives the CFN resource; the + // role-scoped fields drive the IAM role + container build. const projectRoot = path.resolve(configRoot, '..'); - const harnessConfigs: { - name: string; - executionRoleArn?: string; - memoryName?: string; - containerUri?: string; - hasDockerfile?: boolean; - dockerfile?: string; - codeLocation?: string; - tools?: { type: string; name: string }[]; - apiKeyArn?: string; - efsAccessPoints?: { accessPointArn: string; mountPath: string }[]; - s3AccessPoints?: { accessPointArn: string; mountPath: string }[]; - apiFormat?: 'converse_stream' | 'responses' | 'chat_completions'; - }[] = []; - for (const entry of specAny.harnesses ?? []) { + + // Read non-S3 KB connector-config files and pass their parsed contents to the + // L3 verbatim. The L3 does not read files; it expects the parsed + // connectorParameters keyed by the data source's connectorConfigFile path. + const connectorParametersByFile: Record> = {}; + for (const kb of specAny.knowledgeBases ?? []) { + for (const ds of kb.dataSources ?? []) { + if (ds.type !== 'S3' && ds.connectorConfigFile) { + const abs = path.resolve(projectRoot, ds.connectorConfigFile); + try { + connectorParametersByFile[ds.connectorConfigFile] = JSON.parse(fs.readFileSync(abs, 'utf-8')); + } catch (err) { + throw new Error( + `Could not read connector config '${ds.connectorConfigFile}' for knowledge base '${kb.name}' at ${abs}: ${err instanceof Error ? err.message : err}` + ); + } + } + } + } + + // Harness is preview-gated. The CLI bundle bakes the preview flag at build time and + // forwards it to this child process via AGENTCORE_PREVIEW (see toolkit-lib/wrapper.ts). + // This app is built separately and cannot see that build-time define, so it gates on the + // env var. Absent/anything-but-'1' defaults to off so a stale harnesses[] entry in a + // non-preview build never synthesizes an AWS::BedrockAgentCore::Harness resource. + const previewEnabled = process.env.AGENTCORE_PREVIEW === '1'; + + const harnessConfigs: HarnessConfig[] = []; + for (const entry of previewEnabled ? (specAny.harnesses ?? []) : []) { const harnessDir = path.resolve(projectRoot, entry.path); const harnessPath = path.resolve(harnessDir, 'harness.json'); try { - const harnessSpec = JSON.parse(fs.readFileSync(harnessPath, 'utf-8')); + const harnessSpec = HarnessSpecSchema.parse(JSON.parse(fs.readFileSync(harnessPath, 'utf-8'))); harnessConfigs.push({ name: entry.name, executionRoleArn: harnessSpec.executionRoleArn, - memoryName: harnessSpec.memory?.name, + // Only an `existing` memory ref carries a name to wire IAM against; managed memory is + // owned by the harness (no sibling) and disabled has none — both resolve to undefined. + memoryName: harnessSpec.memory?.mode === 'existing' ? harnessSpec.memory.name : undefined, containerUri: harnessSpec.containerUri, hasDockerfile: !!harnessSpec.dockerfile, dockerfile: harnessSpec.dockerfile, codeLocation: harnessSpec.dockerfile ? harnessDir : undefined, tools: harnessSpec.tools, + skills: harnessSpec.skills, apiKeyArn: harnessSpec.model?.apiKeyArn, efsAccessPoints: harnessSpec.efsAccessPoints, s3AccessPoints: harnessSpec.s3AccessPoints, apiFormat: harnessSpec.model?.apiFormat, + // Full spec + dir drive the AWS::BedrockAgentCore::Harness CFN resource. + spec: harnessSpec, + harnessDir, }); } catch (err) { throw new Error( @@ -156,6 +177,7 @@ async function main() { spec, mcpSpec, credentials, + connectorParametersByFile, harnesses: harnessConfigs.length > 0 ? harnessConfigs : undefined, paymentSpec, env, diff --git a/src/assets/cdk/lib/cdk-stack.ts b/src/assets/cdk/lib/cdk-stack.ts index f16f84555..3dac0669d 100644 --- a/src/assets/cdk/lib/cdk-stack.ts +++ b/src/assets/cdk/lib/cdk-stack.ts @@ -6,24 +6,18 @@ import { type AgentCoreProjectSpec, type AgentCoreMcpSpec, type CustomJWTAuthorizerConfig, + type HarnessDeploymentConfig, } from '@aws/agentcore-cdk'; import { CfnOutput, Stack, type StackProps } from 'aws-cdk-lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import { Construct } from 'constructs'; -export interface HarnessConfig { - name: string; - executionRoleArn?: string; - memoryName?: string; - containerUri?: string; - hasDockerfile?: boolean; - dockerfile?: string; - codeLocation?: string; - tools?: { type: string; name: string }[]; - apiKeyArn?: string; - efsAccessPoints?: { accessPointArn: string; mountPath: string }[]; - s3AccessPoints?: { accessPointArn: string; mountPath: string }[]; -} +/** + * Harness deployment config: role-scoped fields (for IAM role + container build) + * plus the full validated spec + its config directory so the L3 construct can + * synthesize the AWS::BedrockAgentCore::Harness resource. + */ +export type HarnessConfig = HarnessDeploymentConfig; export interface PaymentConnectorSpec { name: string; @@ -59,6 +53,11 @@ export interface AgentCoreStackProps extends StackProps { * Harness role configurations. */ harnesses?: HarnessConfig[]; + /** + * Parsed connectorParameters for non-S3 KB data sources, keyed by + * connectorConfigFile path. Forwarded to AgentCoreApplication. + */ + connectorParametersByFile?: Record>; /** * Payment specifications with resolved credential provider ARNs. */ @@ -98,7 +97,7 @@ export class AgentCoreStack extends Stack { constructor(scope: Construct, id: string, props: AgentCoreStackProps) { super(scope, id, props); - const { spec, mcpSpec, credentials, harnesses, paymentSpec } = props; + const { spec, mcpSpec, credentials, harnesses, connectorParametersByFile, paymentSpec } = props; // Create AgentCoreApplication with all agents and harness roles // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -106,6 +105,12 @@ export class AgentCoreStack extends Stack { if (harnesses?.length) { appProps.harnesses = harnesses; } + if (connectorParametersByFile && Object.keys(connectorParametersByFile).length > 0) { + appProps.connectorParametersByFile = connectorParametersByFile; + } + if (credentials) { + appProps.credentials = credentials; + } this.application = new AgentCoreApplication(this, 'Application', appProps as any); // Create AgentCoreMcp if there are gateways configured diff --git a/src/assets/cdk/test/cdk.test.ts b/src/assets/cdk/test/cdk.test.ts index 170a0fd90..8db318ada 100644 --- a/src/assets/cdk/test/cdk.test.ts +++ b/src/assets/cdk/test/cdk.test.ts @@ -14,13 +14,14 @@ test('AgentCoreStack synthesizes with empty spec', () => { credentials: [], evaluators: [], onlineEvalConfigs: [], + configBundles: [], policyEngines: [], payments: [], - configBundles: [], agentCoreGateways: [], mcpRuntimeTools: [], unassignedTargets: [], datasets: [], + knowledgeBases: [], }, }); const template = Template.fromStack(stack); diff --git a/src/assets/container/python/Dockerfile b/src/assets/container/python/Dockerfile index cb3569eff..265fd9a3e 100644 --- a/src/assets/container/python/Dockerfile +++ b/src/assets/container/python/Dockerfile @@ -1,5 +1,9 @@ FROM public.ecr.aws/docker/library/python:3.12-slim-trixie +{{#if gitSkills}} +RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/* + +{{/if}} RUN pip install --no-cache-dir uv ARG UV_DEFAULT_INDEX diff --git a/src/assets/python/a2a/strands/capabilities/memory/session.py b/src/assets/python/a2a/strands/capabilities/memory/session.py index 2b754424f..1a8b7e5a3 100644 --- a/src/assets/python/a2a/strands/capabilities/memory/session.py +++ b/src/assets/python/a2a/strands/capabilities/memory/session.py @@ -24,8 +24,11 @@ def get_memory_session_manager(session_id: Optional[str], actor_id: str) -> Opti {{#if (includes memoryProviders.[0].strategies "USER_PREFERENCE")}} f"/users/{actor_id}/preferences": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} +{{#if (includes memoryProviders.[0].strategies "EPISODIC")}} + f"/episodes/{actor_id}/{session_id}": RetrievalConfig(top_k=5, relevance_score=0.5), +{{/if}} {{#if (includes memoryProviders.[0].strategies "SUMMARIZATION")}} - f"/summaries/{actor_id}": RetrievalConfig(top_k=3, relevance_score=0.5), + f"/summaries/{actor_id}/{session_id}": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} } {{/if}} diff --git a/src/assets/python/agui/strands/capabilities/memory/session.py b/src/assets/python/agui/strands/capabilities/memory/session.py index 2b754424f..1a8b7e5a3 100644 --- a/src/assets/python/agui/strands/capabilities/memory/session.py +++ b/src/assets/python/agui/strands/capabilities/memory/session.py @@ -24,8 +24,11 @@ def get_memory_session_manager(session_id: Optional[str], actor_id: str) -> Opti {{#if (includes memoryProviders.[0].strategies "USER_PREFERENCE")}} f"/users/{actor_id}/preferences": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} +{{#if (includes memoryProviders.[0].strategies "EPISODIC")}} + f"/episodes/{actor_id}/{session_id}": RetrievalConfig(top_k=5, relevance_score=0.5), +{{/if}} {{#if (includes memoryProviders.[0].strategies "SUMMARIZATION")}} - f"/summaries/{actor_id}": RetrievalConfig(top_k=3, relevance_score=0.5), + f"/summaries/{actor_id}/{session_id}": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} } {{/if}} diff --git a/src/assets/python/http/strands/base/main.py b/src/assets/python/http/strands/base/main.py index 254550134..87a216c32 100644 --- a/src/assets/python/http/strands/base/main.py +++ b/src/assets/python/http/strands/base/main.py @@ -1,23 +1,75 @@ from typing import Any +{{#if inlineFunctionTools}} +import json +from strands.tools.tools import PythonAgentTool +from strands.types.tools import ToolResult, ToolUse +{{/if}} from strands import Agent, tool +{{#if hasSkillsFetcher}} +from strands import AgentSkills +{{#if hasFetchedSkills}} +from skills.fetcher import resolve_s3_skills, resolve_git_skills +{{/if}} +{{#if (some gitSkills "credentialArn")}} +from bedrock_agentcore.services.identity import IdentityClient +{{/if}} +{{/if}} +import asyncio +{{#if hasShell}} +import subprocess +{{/if}} +{{#if hasFileOperations}} +import os +{{/if}} +{{#if hasExecutionLimits}} +from strands.tools.executors import SequentialToolExecutor +from strands.types.exceptions import EventLoopException +from hooks.execution_limits import ExecutionLimitExceeded, ExecutionLimitsHook +{{/if}} {{#if hasConfigBundle}} from strands.hooks import HookProvider, HookRegistry, BeforeInvocationEvent, BeforeToolCallEvent +{{/if}} +{{#if truncationStrategy}} +{{#if (eq truncationStrategy "sliding_window")}} +from strands.agent.conversation_manager.sliding_window_conversation_manager import SlidingWindowConversationManager +{{/if}} +{{#if (eq truncationStrategy "summarization")}} +from strands.agent.conversation_manager.summarizing_conversation_manager import SummarizingConversationManager +{{/if}} +{{else}} +from strands.agent.conversation_manager.null_conversation_manager import NullConversationManager +{{/if}} +{{#if hasConfigBundle}} from bedrock_agentcore.runtime.context import BedrockAgentCoreContext {{/if}} +{{#if hasBrowser}} +from strands_tools.browser import AgentCoreBrowser +{{/if}} +{{#if hasCodeInterpreter}} +from strands_tools.code_interpreter import AgentCoreCodeInterpreter +{{/if}} from bedrock_agentcore.runtime import BedrockAgentCoreApp from model.load import load_model {{#if hasGateway}} from mcp_client.client import get_all_gateway_mcp_clients -{{else}} -from mcp_client.client import get_streamable_http_mcp_client {{/if}} +{{#if remoteMcpTools}} +from mcp_client.client import get_all_remote_mcp_clients +{{/if}} +{{#unless (or hasGateway remoteMcpTools)}} +{{#unless isExportHarness}} +from mcp_client.client import get_streamable_http_mcp_client +{{/unless}} +{{/unless}} {{#if hasMemory}} from memory.session import get_memory_session_manager {{/if}} -{{#if needsOs}} +{{#unless hasFileOperations}} +{{#if (or needsOs (some gitSkills "credentialArn"))}} import os {{/if}} +{{/unless}} {{#if hasPayment}} from capabilities.payments.payments import create_payments_plugin, PAYMENT_SYSTEM_PROMPT {{/if}} @@ -25,22 +77,35 @@ app = BedrockAgentCoreApp() log = app.logger -# Define a Streamable HTTP MCP Client +{{#if (or hasGateway remoteMcpTools)}} +# Define MCP clients for all configured MCP servers (gateways and/or remote MCP) +mcp_clients = [] {{#if hasGateway}} -mcp_clients = get_all_gateway_mcp_clients() +mcp_clients += get_all_gateway_mcp_clients() +{{/if}} +{{#if remoteMcpTools}} +mcp_clients += get_all_remote_mcp_clients() +{{/if}} {{else}} +{{#unless isExportHarness}} +# Define a Streamable HTTP MCP Client mcp_clients = [get_streamable_http_mcp_client()] +{{/unless}} {{/if}} +{{#if systemPromptText}} +DEFAULT_SYSTEM_PROMPT = """{{escapePyStr systemPromptText}}""" +{{else}} DEFAULT_SYSTEM_PROMPT = """ You are a helpful assistant. Use tools when appropriate. -{{#if needsOs}} +{{#if needsOs}}{{#unless isExportHarness}} You have access to the following mounted filesystems. Use file_read, file_write, and list_files with full absolute paths: {{#if sessionStorageMountPath}}- {{sessionStorageMountPath}}: ephemeral session storage (lost when session ends) {{/if}}{{#each efsMounts}}- {{mountPath}}: EFS persistent storage (persists across sessions and agent restarts) {{/each}}{{#each s3Mounts}}- {{mountPath}}: S3 Files persistent storage (durable, backed by S3) -{{/each}}{{/if}} +{{/each}}{{/unless}}{{/if}} """ +{{/if}} {{#if hasConfigBundle}} DEFAULT_TOOL_DESC = "Return the sum of two numbers" @@ -49,6 +114,30 @@ # Define a collection of tools used by the model tools = [] +{{#if inlineFunctionTools}} +# Inline function tools — stop the agent loop so the tool call streams back to the caller +def _make_inline_tool(name: str, spec: dict) -> PythonAgentTool: + def _handler(tool: ToolUse, **kwargs: Any) -> ToolResult: + kwargs.get("request_state", {})["stop_event_loop"] = True + return {"toolUseId": tool["toolUseId"], "status": "success", "content": [{"text": " "}]} + _handler.__name__ = name + return PythonAgentTool(tool_name=name, tool_spec=spec, tool_func=_handler) + +{{#each inlineFunctionTools}} +_INLINE_SPEC_{{snakeCase name}} = { + "name": "{{name}}", + "description": {{safeJson description}}, + "inputSchema": {"json": json.loads({{pyJsonStr inputSchema}}) }, +} +tools.append(_make_inline_tool("{{name}}", _INLINE_SPEC_{{snakeCase name}})) +{{/each}} + +_INLINE_FUNCTION_NAMES = { {{#each inlineFunctionTools}}"{{name}}"{{#unless @last}}, {{/unless}}{{/each}} } + +{{else}} +_INLINE_FUNCTION_NAMES = set() + +{{#unless isExportHarness}} # Define a simple function tool {{#if hasConfigBundle}} @tool(description=DEFAULT_TOOL_DESC) @@ -60,7 +149,116 @@ def add_numbers(a: int, b: int) -> int: return a+b tools.append(add_numbers) -{{#if needsOs}} +{{/unless}} +{{/if}} +{{#if hasBrowser}} +tools.append(AgentCoreBrowser({{#if browserIdentifier}}identifier="{{browserIdentifier}}"{{/if}}).browser) +{{/if}} +{{#if hasCodeInterpreter}} +tools.append(AgentCoreCodeInterpreter({{#if codeInterpreterIdentifier}}identifier="{{codeInterpreterIdentifier}}"{{/if}}).code_interpreter) +{{/if}} +{{#if hasShell}} +@tool +def shell(command: str, timeout: int = 300) -> dict: + """Execute a bash command and return the results. + + Args: + command: The bash command to execute + timeout: Timeout in seconds (default: 300) + + Returns: + Dict with stdout, stderr, and exit_code + """ + result = subprocess.run( + command, shell=True, capture_output=True, text=True, timeout=timeout + ) + return {"stdout": result.stdout, "stderr": result.stderr, "exit_code": result.returncode} + +tools.append(shell) +{{/if}} +{{#if hasFileOperations}} +@tool +def file_operations( + command: str, + path: str, + old_str: str = None, + new_str: str = None, + file_text: str = None, + insert_line: int = None, + view_range: list = None, +) -> str: + """Text editor tool for viewing and modifying files. + + Args: + command: The command to execute ("view", "str_replace", "create", "insert") + path: Path to the file or directory + old_str: Text to replace (for str_replace command) + new_str: Replacement text (for str_replace and insert commands) + file_text: Content for new file (for create command) + insert_line: Line number to insert after (for insert command) + view_range: [start_line, end_line] for viewing specific lines (for view command) + + Returns: + Result of the operation + """ + try: + if command == "view": + if not os.path.exists(path): + return f"Error: Path '{path}' does not exist" + if os.path.isdir(path): + return "\n".join(os.listdir(path)) + with open(path) as f: + lines = f.read().splitlines() + if view_range: + start, end = view_range + start_idx = max(0, start - 1) + end_idx = len(lines) if end == -1 else min(len(lines), end) + lines = lines[start_idx:end_idx] + start_num = start_idx + 1 + else: + start_num = 1 + return "\n".join(f"{start_num + i}: {line}" for i, line in enumerate(lines)) + elif command == "str_replace": + if old_str is None or new_str is None: + return "Error: str_replace requires both old_str and new_str parameters" + if not os.path.exists(path): + return f"Error: File '{path}' does not exist" + content = open(path).read() + if old_str not in content: + return "Error: Text not found in file" + count = content.count(old_str) + if count > 1: + return f"Error: Text appears {count} times in file. Please be more specific." + open(path, "w").write(content.replace(old_str, new_str, 1)) + return f"Successfully replaced text in '{path}'" + elif command == "create": + if file_text is None: + return "Error: create requires file_text parameter" + os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True) + open(path, "w").write(file_text) + return f"Successfully created file '{path}'" + elif command == "insert": + if new_str is None or insert_line is None: + return "Error: insert requires both new_str and insert_line parameters" + if not os.path.exists(path): + return f"Error: File '{path}' does not exist" + lines = open(path).read().splitlines(True) + if insert_line == 0: + lines.insert(0, new_str + "\n") + elif insert_line >= len(lines): + lines.append(new_str + "\n") + else: + lines.insert(insert_line, new_str + "\n") + open(path, "w").write("".join(lines)) + return f"Successfully inserted text in '{path}' at line {insert_line + 1}" + else: + return f"Error: Unknown command '{command}'" + except Exception as e: + return f"Error: {e}" + +tools.append(file_operations) +{{/if}} +{{#if needsOs}}{{#unless isExportHarness}} _MOUNT_PATHS = [ {{#if sessionStorageMountPath}}"{{sessionStorageMountPath}}",{{/if}} {{#each efsMounts}}"{{mountPath}}",{{/each}} @@ -114,12 +312,21 @@ def list_files(path: str) -> str: return f"Error listing '{path}': {e.strerror}" tools.extend([file_read, file_write, list_files]) -{{/if}} +{{/unless}}{{/if}} +{{#if (or hasGateway remoteMcpTools)}} +# Add MCP clients to tools +for mcp_client in mcp_clients: + if mcp_client: + tools.append(mcp_client) +{{else}} +{{#unless isExportHarness}} # Add MCP client to tools if available for mcp_client in mcp_clients: if mcp_client: tools.append(mcp_client) +{{/unless}} +{{/if}} {{#if hasConfigBundle}} @@ -155,19 +362,62 @@ def _override_tool_desc(self, event: BeforeToolCallEvent) -> None: {{/if}} +def _make_conversation_manager(): +{{#if truncationStrategy}} +{{#if (eq truncationStrategy "sliding_window")}} +{{#if truncationConfig}} + return SlidingWindowConversationManager(**{{safeJson truncationConfig}}, per_turn=True) +{{else}} + return SlidingWindowConversationManager(per_turn=True) +{{/if}} +{{else}} +{{#if truncationConfig}} + return SummarizingConversationManager(**{{safeJson truncationConfig}}) +{{else}} + return SummarizingConversationManager() +{{/if}} +{{/if}} +{{else}} + return NullConversationManager() +{{/if}} + {{#if hasMemory}} {{#unless hasPayment}} def agent_factory(): cache = {} - def get_or_create_agent(session_id, user_id): - key = f"{session_id}/{user_id}" + def get_or_create_agent(session_id, user_id{{#if hasSkillsFetcher}}, skill_plugins=None{{/if}}): + {{#if actorId}} + _actor_id = "{{actorId}}" + {{else}} + _actor_id = user_id + {{/if}} + key = f"{session_id}/{_actor_id}" if key not in cache: cache[key] = Agent( model=load_model(), - session_manager=get_memory_session_manager(session_id, user_id), + session_manager=get_memory_session_manager(session_id, _actor_id), + conversation_manager=_make_conversation_manager(), system_prompt=DEFAULT_SYSTEM_PROMPT, - tools=tools{{#if hasConfigBundle}}, - hooks=[ConfigBundleHook()]{{/if}} + tools=tools, + {{#if hasSkillsFetcher}} + plugins=skill_plugins or None, + {{/if}} + {{#if hasExecutionLimits}} + tool_executor=SequentialToolExecutor(), + callback_handler=None, + {{/if}} + hooks=[ + {{#if hasExecutionLimits}} + ExecutionLimitsHook( + {{#if maxIterations}}max_iterations={{maxIterations}},{{/if}} + {{#if maxTokens}}max_tokens={{maxTokens}},{{/if}} + {{#if timeoutSeconds}}timeout_seconds={{timeoutSeconds}},{{/if}} + ), + {{/if}} + {{#if hasConfigBundle}} + ConfigBundleHook(), + {{/if}} + ], ) return cache[key] return get_or_create_agent @@ -175,24 +425,45 @@ def get_or_create_agent(session_id, user_id): {{/unless}} {{else}} {{#if hasConfigBundle}} -def create_agent(): +def create_agent({{#if hasSkillsFetcher}}skill_plugins=None{{/if}}): return Agent( model=load_model(), system_prompt=DEFAULT_SYSTEM_PROMPT, tools=tools, + conversation_manager=_make_conversation_manager(), + {{#if hasSkillsFetcher}} + plugins=skill_plugins or None, + {{/if}} hooks=[ConfigBundleHook()], ) {{else}} {{#unless hasPayment}} _agent = None -def get_or_create_agent(): +def get_or_create_agent({{#if hasSkillsFetcher}}skill_plugins=None{{/if}}): global _agent if _agent is None: _agent = Agent( model=load_model(), system_prompt=DEFAULT_SYSTEM_PROMPT, tools=tools, + conversation_manager=_make_conversation_manager(), + {{#if hasSkillsFetcher}} + plugins=skill_plugins or None, + {{/if}} + {{#if hasExecutionLimits}} + tool_executor=SequentialToolExecutor(), + callback_handler=None, + {{/if}} + hooks=[ + {{#if hasExecutionLimits}} + ExecutionLimitsHook( + {{#if maxIterations}}max_iterations={{maxIterations}},{{/if}} + {{#if maxTokens}}max_tokens={{maxTokens}},{{/if}} + {{#if timeoutSeconds}}timeout_seconds={{timeoutSeconds}},{{/if}} + ), + {{/if}} + ], ) return _agent {{/unless}} @@ -200,6 +471,42 @@ def get_or_create_agent(): {{/if}} +def _extract_prompt(payload: dict): + """Accept harness-style messages[], tool_results[], or plain prompt string payloads.""" + if "messages" in payload: + return payload["messages"] + if "tool_results" in payload: + return [{"role": "user", "content": [{"toolResult": { + "toolUseId": tr["toolUseId"], + "status": tr.get("status", "success"), + "content": tr.get("content", []), + }} for tr in payload["tool_results"]]}] + return payload.get("prompt", "") + + +def _has_inline_function_call(messages) -> bool: + """Return True if messages contains an assistant toolUse for an inline function tool.""" + if not _INLINE_FUNCTION_NAMES or not isinstance(messages, list): + return False + for msg in messages: + if msg.get("role") == "assistant": + for block in msg.get("content", []): + if isinstance(block, dict) and block.get("toolUse", {}).get("name") in _INLINE_FUNCTION_NAMES: + return True + return False + + +def _is_inline_function_call(event: dict) -> bool: + """Check if a contentBlockStart event is for an inline function tool.""" + if not _INLINE_FUNCTION_NAMES: + return False + cbs = event.get("contentBlockStart", {}) + start = cbs.get("start", {}) + tool_use = start.get("toolUse") if isinstance(start, dict) else None + return tool_use is not None and tool_use.get("name") in _INLINE_FUNCTION_NAMES + + + @app.entrypoint async def invoke(payload, context): log.info("Invoking Agent.....") @@ -211,23 +518,52 @@ async def invoke(payload, context): payments_plugin = create_payments_plugin(user_id, instrument_id, session_id) plugins = [payments_plugin] if payments_plugin else [] {{/if}} +{{#if hasSkillsFetcher}} + skill_paths = [{{#each pathSkills}}{{safeJson this}}{{#unless @last}}, {{/unless}}{{/each}}] + {{#if s3Skills}} + s3_skill_sources = [{{#each s3Skills}}{{safeJson this}}{{#unless @last}}, {{/unless}}{{/each}}] + skill_paths.extend(await asyncio.to_thread(resolve_s3_skills, s3_skill_sources, None)) + {{/if}} + {{#if gitSkills}} + git_skill_sources = [ + {{#each gitSkills}} + dict(url={{safeJson this.url}}{{#if this.path}}, path={{safeJson this.path}}{{/if}}{{#if this.credentialArn}}, credentialArn={{safeJson this.credentialArn}}{{#if this.username}}, username={{safeJson this.username}}{{/if}}{{/if}}), + {{/each}} + ] + {{#if (some gitSkills "credentialArn")}} + _git_identity_client = IdentityClient(os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION", "us-east-1"))) + {{else}} + _git_identity_client = None + {{/if}} + skill_paths.extend(await asyncio.to_thread(resolve_git_skills, git_skill_sources, _git_identity_client)) + {{/if}} + _skill_plugins = [AgentSkills(skills=skill_paths)] if skill_paths else [] +{{/if}} {{#if hasMemory}} {{#if hasPayment}} mem_session_id = getattr(context, 'session_id', 'default-session') + {{#if actorId}} + mem_user_id = "{{actorId}}" + {{else}} mem_user_id = getattr(context, 'user_id', 'default-user') + {{/if}} agent = Agent( model=load_model(), session_manager=get_memory_session_manager(mem_session_id, mem_user_id), system_prompt=DEFAULT_SYSTEM_PROMPT + PAYMENT_SYSTEM_PROMPT, tools=tools, - plugins=plugins,{{#if hasConfigBundle}} + plugins=plugins{{#if hasSkillsFetcher}} + _skill_plugins{{/if}},{{#if hasConfigBundle}} hooks=[ConfigBundleHook()],{{/if}} ) {{else}} session_id = getattr(context, 'session_id', 'default-session') + {{#if actorId}} + user_id = "{{actorId}}" + {{else}} user_id = getattr(context, 'user_id', 'default-user') - agent = get_or_create_agent(session_id, user_id) + {{/if}} + agent = get_or_create_agent(session_id, user_id{{#if hasSkillsFetcher}}, _skill_plugins{{/if}}) {{/if}} {{else}} {{#if hasPayment}} @@ -235,25 +571,100 @@ async def invoke(payload, context): model=load_model(), system_prompt=DEFAULT_SYSTEM_PROMPT + PAYMENT_SYSTEM_PROMPT, tools=tools, - plugins=plugins,{{#if hasConfigBundle}} + plugins=plugins{{#if hasSkillsFetcher}} + _skill_plugins{{/if}},{{#if hasConfigBundle}} hooks=[ConfigBundleHook()],{{/if}} ) {{else}} {{#if hasConfigBundle}} - agent = create_agent() + agent = create_agent({{#if hasSkillsFetcher}}_skill_plugins{{/if}}) {{else}} - agent = get_or_create_agent() + agent = get_or_create_agent({{#if hasSkillsFetcher}}_skill_plugins{{/if}}) {{/if}} {{/if}} {{/if}} - # Execute and format response - stream = agent.stream_async(payload.get("prompt")) + prompt = _extract_prompt(payload) + + {{#if inlineFunctionTools}} + # If Turn 2 carries the harness-style assistant(toolUse)+user(toolResult) pair, + # strip the placeholder turn Strands stored during Turn 1 so the real toolResult + # is injected cleanly — same protocol as the harness runtime. + if _has_inline_function_call(prompt): + msgs = agent.messages + if len(msgs) >= 2 and any("toolResult" in b for b in msgs[-1].get("content", [])): + del msgs[-2:] + {{/if}} + + {{#if hasExecutionLimits}} + timeout_seconds = {{#if timeoutSeconds}}{{timeoutSeconds}}{{else}}None{{/if}} + timeout_fired = False + watchdog_task = None + if timeout_seconds is not None: + async def _timeout_watchdog(): + nonlocal timeout_fired + await asyncio.sleep(timeout_seconds) + timeout_fired = True + agent.cancel() + watchdog_task = asyncio.create_task(_timeout_watchdog()) - async for event in stream: - # Handle Text parts of the response - if "data" in event and isinstance(event["data"], str): - yield event["data"] + try: + {{#if inlineFunctionTools}} + hit_inline_function = False + {{/if}} + async for event in agent.stream_async( + prompt, + ): + if not isinstance(event, dict) or "event" not in event: + continue + cbs = event["event"].get("contentBlockStart") + if cbs is not None and not cbs.get("start"): + continue + {{#if inlineFunctionTools}} + if not hit_inline_function: + hit_inline_function = _is_inline_function_call(event["event"]) + {{/if}} + yield event + {{#if inlineFunctionTools}} + if hit_inline_function and "messageStop" in event["event"]: + return + {{/if}} + + if timeout_fired: + yield {"event": {"messageStop": {"stopReason": "timeout_exceeded"}}} + except EventLoopException as e: + if isinstance(e.original_exception, ExecutionLimitExceeded): + yield {"event": {"messageStop": {"stopReason": str(e.original_exception)}}} + return + raise + finally: + if watchdog_task is not None: + watchdog_task.cancel() + try: + await watchdog_task + except asyncio.CancelledError: + pass + {{else}} + {{#if inlineFunctionTools}} + hit_inline_function = False + {{/if}} + async for event in agent.stream_async( + prompt, + ): + if not isinstance(event, dict) or "event" not in event: + continue + cbs = event["event"].get("contentBlockStart") + if cbs is not None and not cbs.get("start"): + continue + {{#if inlineFunctionTools}} + if not hit_inline_function: + hit_inline_function = _is_inline_function_call(event["event"]) + {{/if}} + yield event + {{#if inlineFunctionTools}} + if hit_inline_function and "messageStop" in event["event"]: + return + {{/if}} + {{/if}} if __name__ == "__main__": diff --git a/src/assets/python/http/strands/base/mcp_client/client.py b/src/assets/python/http/strands/base/mcp_client/client.py index 13dad314c..72987c456 100644 --- a/src/assets/python/http/strands/base/mcp_client/client.py +++ b/src/assets/python/http/strands/base/mcp_client/client.py @@ -29,10 +29,14 @@ def _get_bearer_token_{{snakeCase name}}(*, access_token: str): {{#each gatewayProviders}} def get_{{snakeCase name}}_mcp_client() -> MCPClient | None: """Returns an MCP Client connected to the {{name}} gateway.""" + {{#if hardcodedUrl}} + url = {{safeJson hardcodedUrl}} + {{else}} url = os.environ.get("{{envVarName}}") if not url: logger.warning("{{envVarName}} not set — {{name}} gateway tools unavailable") return None + {{/if}} {{#if (eq authType "AWS_IAM")}} return MCPClient(lambda: aws_iam_streamablehttp_client(url, aws_service="bedrock-agentcore", aws_region=os.environ.get("AWS_REGION", os.environ.get("AWS_DEFAULT_REGION"))), prefix="{{snakeCase name}}") {{else if (eq authType "CUSTOM_JWT")}} @@ -53,7 +57,41 @@ def get_all_gateway_mcp_clients() -> list[MCPClient]: clients.append(client) {{/each}} return clients -{{else}} +{{/if}} +{{#if remoteMcpTools}} +{{#if (some remoteMcpTools "headerCredentials")}} +from bedrock_agentcore.identity.auth import requires_api_key +{{/if}} +{{#each remoteMcpTools}} +{{#if headerCredentials}} +{{#each headerCredentials}} +@requires_api_key(provider_name="{{credentialName}}") +def _get_{{snakeCase ../name}}_{{snakeCase headerKey}}_key(api_key: str) -> str: + """Fetch {{headerKey}} credential for {{../name}} from AgentCore Identity.""" + return api_key + +{{/each}} +{{/if}} +def get_{{snakeCase name}}_mcp_client() -> MCPClient | None: + """Returns an MCP Client for the {{name}} remote MCP server.""" + url = {{safeJson url}} + {{#if headerCredentials}} + if os.getenv("LOCAL_DEV") == "1": + headers = { {{#each headerCredentials}}{{safeJson headerKey}}: os.environ.get("{{envVarName}}", ""){{#unless @last}}, {{/unless}}{{/each}} } + else: + headers = { {{#each headerCredentials}}{{safeJson headerKey}}: _get_{{snakeCase ../name}}_{{snakeCase headerKey}}_key(){{#unless @last}}, {{/unless}}{{/each}} } + return MCPClient(lambda: streamablehttp_client(url, headers=headers)) + {{else}} + return MCPClient(lambda: streamablehttp_client(url)) + {{/if}} + +{{/each}} +def get_all_remote_mcp_clients() -> list[MCPClient]: + """Returns all configured remote MCP clients.""" + clients = [{{#each remoteMcpTools}}get_{{snakeCase name}}_mcp_client(){{#unless @last}}, {{/unless}}{{/each}}] + return [c for c in clients if c is not None] +{{/if}} +{{#unless (or hasGateway remoteMcpTools)}} {{#if isVpc}} # VPC mode: external MCP endpoints are not reachable without a NAT gateway. # Add an AgentCore Gateway with `agentcore add gateway`, or configure your own endpoint below. @@ -62,6 +100,7 @@ def get_streamable_http_mcp_client() -> MCPClient | None: """No MCP server configured. Add a gateway with `agentcore add gateway`.""" return None {{else}} +{{#unless isExportHarness}} # ExaAI provides information about code through web searches, crawling and code context searches through their platform. Requires no authentication EXAMPLE_MCP_ENDPOINT = "https://mcp.exa.ai/mcp" @@ -69,5 +108,6 @@ def get_streamable_http_mcp_client() -> MCPClient: """Returns an MCP Client compatible with Strands""" # to use an MCP server that supports bearer authentication, add headers={"Authorization": f"Bearer {access_token}"} return MCPClient(lambda: streamablehttp_client(EXAMPLE_MCP_ENDPOINT)) +{{/unless}} {{/if}} -{{/if}} +{{/unless}} diff --git a/src/assets/python/http/strands/base/model/load.py b/src/assets/python/http/strands/base/model/load.py index 8954269e6..e1f013b89 100644 --- a/src/assets/python/http/strands/base/model/load.py +++ b/src/assets/python/http/strands/base/model/load.py @@ -4,7 +4,7 @@ def load_model() -> BedrockModel: """Get Bedrock model client using IAM credentials.""" - return BedrockModel(model_id="global.anthropic.claude-sonnet-4-5-20250929-v1:0") + return BedrockModel(model_id="{{#if modelId}}{{modelId}}{{else}}global.anthropic.claude-sonnet-4-5-20250929-v1:0{{/if}}") {{/if}} {{#if (eq modelProvider "Anthropic")}} import os @@ -80,7 +80,7 @@ def load_model() -> OpenAIModel: """Get authenticated OpenAI model client.""" return OpenAIModel( client_args={"api_key": _get_api_key()}, - model_id="gpt-4.1", + model_id="{{#if modelId}}{{modelId}}{{else}}gpt-4.1{{/if}}", ) {{/if}} {{#if (eq modelProvider "Gemini")}} @@ -118,6 +118,6 @@ def load_model() -> GeminiModel: """Get authenticated Gemini model client.""" return GeminiModel( client_args={"api_key": _get_api_key()}, - model_id="gemini-2.5-flash", + model_id="{{#if modelId}}{{modelId}}{{else}}gemini-2.5-flash{{/if}}", ) {{/if}} diff --git a/src/assets/python/http/strands/base/pyproject.toml b/src/assets/python/http/strands/base/pyproject.toml index 3722d0ea9..bed35447f 100644 --- a/src/assets/python/http/strands/base/pyproject.toml +++ b/src/assets/python/http/strands/base/pyproject.toml @@ -17,7 +17,10 @@ dependencies = [ {{/if}}"mcp >= 1.19.0", {{#if (eq modelProvider "OpenAI")}}"openai >= 1.0.0", {{/if}}"strands-agents >= 1.15.0", - {{#if hasGateway}}{{#if (includes gatewayAuthTypes "AWS_IAM")}}"mcp-proxy-for-aws >= 1.1.0", + {{#if (or hasBrowser hasCodeInterpreter)}}"strands-agents-tools >= 0.1.0", + {{/if}}{{#if hasBrowser}}"nest-asyncio >= 1.5.0", + "playwright >= 1.42.0", + {{/if}}{{#if hasGateway}}{{#if (includes gatewayAuthTypes "AWS_IAM")}}"mcp-proxy-for-aws >= 1.1.0", {{/if}}{{/if}} ] diff --git a/src/assets/python/http/strands/base/skills/fetcher.py b/src/assets/python/http/strands/base/skills/fetcher.py new file mode 100644 index 000000000..2f82cd6c2 --- /dev/null +++ b/src/assets/python/http/strands/base/skills/fetcher.py @@ -0,0 +1,279 @@ +"""Skill fetcher — downloads s3/git skills to local filesystem on first use. + +Resolved paths are passed to AgentSkills(skills=...) in main.py. +Cache directory: /.agents/skills/ — an absolute path under the system temp +directory (honors $TMPDIR, defaults to /tmp). The runtime working directory (e.g. +/var/task in a CodeZip runtime) is read-only, so the cache must live somewhere +guaranteed-writable. +""" + +import base64 +import hashlib +import json +import logging +import os +import shutil +import subprocess +import tempfile +from pathlib import Path +from typing import Optional + +logger = logging.getLogger(__name__) + +_SKILLS_BASE = Path(tempfile.gettempdir()) / ".agents" / "skills" +_GIT_TIMEOUT = 60 +_S3_MAX_SIZE_BYTES = 1 * 1024 * 1024 * 1024 # 1 GB + + +def _stable_hash(value: str) -> str: + return hashlib.sha256(value.encode()).hexdigest()[:12] + + +def _cleanup(path: Path) -> None: + """Remove a partially-created skill directory so retries don't see stale state.""" + shutil.rmtree(path, ignore_errors=True) + + +def _read_map(type_dir: Path) -> dict: + map_file = type_dir / ".map.json" + return json.loads(map_file.read_text()) if map_file.exists() else {} + + +def _write_map(type_dir: Path, mapping: dict) -> None: + type_dir.mkdir(parents=True, exist_ok=True) + (type_dir / ".map.json").write_text(json.dumps(mapping)) + + +def _resolve_cached(type_dir: Path, source_hash: str) -> Optional[str]: + """Return the cached skill directory for a source hash, or None if not on disk.""" + mapping = _read_map(type_dir) + dir_name = mapping.get(source_hash) + if dir_name and (type_dir / dir_name).exists(): + return str(type_dir / dir_name) + return None + + +def _read_skill_name(skill_dir: Path) -> str: + """Extract the skill name from SKILL.md YAML frontmatter.""" + content = (skill_dir / "SKILL.md").read_text() + if not content.startswith("---"): + raise ValueError(f"SKILL.md in {skill_dir} has no YAML frontmatter (must start with ---)") + parts = content.split("---", 2) + if len(parts) < 3: + raise ValueError(f"SKILL.md in {skill_dir} has malformed frontmatter (missing closing ---)") + for line in parts[1].strip().splitlines(): + if line.startswith("name:"): + name = line[len("name:"):].strip().strip("\"'") + if name: + return name + raise ValueError(f"SKILL.md in {skill_dir} is missing a 'name' field in frontmatter") + + +def _pick_dir_name(type_dir: Path, name: str, source_hash: str) -> str: + """Pick a unique directory name, appending a hash suffix on collision.""" + if not (type_dir / name).exists(): + return name + return f"{name}-{source_hash[:8]}" + + +def _rename_and_cache_skill(type_dir: Path, temp_dir: Path, source_hash: str, skill_root: Path, + source_label: str = "") -> Path: + """Validate SKILL.md, rename the temp dir to the skill's declared name, and update the map. + + Raises ValueError if SKILL.md is missing or has invalid frontmatter. + """ + if not (skill_root / "SKILL.md").exists(): + _cleanup(temp_dir) + hint = f" (source: {source_label})" if source_label else "" + raise ValueError(f"No SKILL.md found in fetched skill{hint}") + + name = _read_skill_name(skill_root) + dir_name = _pick_dir_name(type_dir, name, source_hash) + final_dir = type_dir / dir_name + if final_dir != temp_dir: + temp_dir.rename(final_dir) + + mapping = _read_map(type_dir) + mapping[source_hash] = dir_name + _write_map(type_dir, mapping) + return final_dir + + +def _fetch_s3_skill(source: str, s3_client=None) -> Path: + """Download an s3:// skill prefix and return the local directory.""" + uri = source if source.endswith("/") else source + "/" + source_hash = _stable_hash(uri) + type_dir = _SKILLS_BASE / "s3" + + cached = _resolve_cached(type_dir, source_hash) + if cached: + return Path(cached) + + import boto3 + client = s3_client or boto3.client("s3") + bucket, _, prefix = uri[len("s3://"):].partition("/") + if not bucket: + raise ValueError(f"Invalid S3 URI (no bucket): {uri}") + + temp_dir = type_dir / source_hash + _cleanup(temp_dir) + temp_dir.mkdir(parents=True, exist_ok=True) + temp_root = temp_dir.resolve() + + paginator = client.get_paginator("list_objects_v2") + total = 0 + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + for obj in page.get("Contents", []): + total += obj["Size"] + if total > _S3_MAX_SIZE_BYTES: + _cleanup(temp_dir) + raise ValueError(f"S3 skill {uri} exceeds 1 GB size limit") + rel = obj["Key"][len(prefix):].lstrip("/") + if not rel: + continue + dest = (temp_dir / rel).resolve() + if dest != temp_root and not str(dest).startswith(str(temp_root) + os.sep): + _cleanup(temp_dir) + raise ValueError(f"Path traversal detected in S3 key: {obj['Key']}") + dest.parent.mkdir(parents=True, exist_ok=True) + client.download_file(bucket, obj["Key"], str(dest)) + + if total == 0: + _cleanup(temp_dir) + raise ValueError(f"No files found at S3 URI: {uri}") + + return _rename_and_cache_skill(type_dir, temp_dir, source_hash, temp_dir, source_label=uri) + + +def _resolve_credential_arn(credential_arn: str, identity_client) -> str: + """Resolve a Token Vault API-key credential ARN to its secret value via AgentCore Identity. + + ARN format: arn:

:bedrock-agentcore:::token-vault//apikeycredentialprovider/ + """ + from bedrock_agentcore.runtime.context import BedrockAgentCoreContext # noqa: PLC0415 + + provider_name = credential_arn.rsplit("/", 1)[-1] + if not provider_name: + raise ValueError(f"Invalid credential ARN: {credential_arn}") + workload_token = BedrockAgentCoreContext.get_workload_access_token() + if not workload_token: + raise ValueError("Credential ARN resolution requires a workload access token") + api_key = identity_client.dp_client.get_resource_api_key( + resourceCredentialProviderName=provider_name, + workloadIdentityToken=workload_token, + )["apiKey"] + if not api_key: + raise ValueError(f"Identity returned empty API key for provider: {provider_name}") + return api_key + + +def _build_git_auth_env(credential_arn: Optional[str], username: Optional[str], identity_client=None) -> dict: + """Build GIT_CONFIG_* env vars for HTTP Basic auth using a Token Vault credential ARN. + + Uses env vars instead of -c args to avoid leaking credentials in /proc/*/cmdline, + and so auth propagates to sub-commands (e.g. sparse-checkout triggering a fetch). + """ + if not credential_arn or not identity_client: + return {} + password = _resolve_credential_arn(credential_arn, identity_client) + user = username or "oauth2" + encoded = base64.b64encode(f"{user}:{password}".encode()).decode() + return { + "GIT_CONFIG_COUNT": "1", + "GIT_CONFIG_KEY_0": "http.extraHeader", + "GIT_CONFIG_VALUE_0": f"Authorization: Basic {encoded}", + } + + +def _fetch_git_skill(url: str, skill_path: str = "", credential_arn: Optional[str] = None, + username: Optional[str] = None, identity_client=None) -> Path: + """Shallow-clone a git skill repository and return the local skill directory. + + Returns the directory containing SKILL.md (the subdir itself for sparse checkouts). + """ + if skill_path and (os.path.isabs(skill_path) or ".." in Path(skill_path).parts): + raise ValueError(f"Path traversal detected in skill path: {skill_path}") + + source_hash = _stable_hash(f"{url}:{skill_path}") + type_dir = _SKILLS_BASE / "git" + + cached = _resolve_cached(type_dir, source_hash) + if cached: + return Path(cached) / skill_path if skill_path else Path(cached) + + temp_dir = type_dir / source_hash + _cleanup(temp_dir) + temp_dir.mkdir(parents=True, exist_ok=True) + + extra_env = _build_git_auth_env(credential_arn, username, identity_client) + git_env = {**os.environ, **extra_env} if extra_env else None + + try: + if skill_path: + subprocess.run( + ["git", "clone", "--depth", "1", "--filter=blob:none", "--sparse", url, str(temp_dir)], + check=True, timeout=_GIT_TIMEOUT, capture_output=True, env=git_env, + ) + subprocess.run( + ["git", "sparse-checkout", "set", skill_path], + check=True, timeout=_GIT_TIMEOUT, capture_output=True, cwd=str(temp_dir), env=git_env, + ) + else: + subprocess.run( + ["git", "clone", "--depth", "1", url, str(temp_dir)], + check=True, timeout=_GIT_TIMEOUT, capture_output=True, env=git_env, + ) + except Exception: + _cleanup(temp_dir) + raise + + if skill_path and not (temp_dir / skill_path).exists(): + _cleanup(temp_dir) + raise ValueError(f"Skill path '{skill_path}' not found in repository '{url}'") + + # SKILL.md lives inside the subdir for sparse checkouts. + skill_root = temp_dir / skill_path if skill_path else temp_dir + label = f"{url}:{skill_path}" if skill_path else url + final_dir = _rename_and_cache_skill(type_dir, temp_dir, source_hash, skill_root, source_label=label) + return final_dir / skill_path if skill_path else final_dir + + +def resolve_s3_skills(sources: list, s3_client=None) -> list: + """Resolve s3:// skill URIs to local filesystem paths. + + Any fetch failure raises and fails the invocation — a partial skill set + would silently run the agent without capabilities the harness declared. + """ + paths = [] + for uri in sources: + try: + skill_dir = _fetch_s3_skill(uri, s3_client) + except Exception as e: + raise ValueError(f"Failed to resolve S3 skill '{uri}': {e}") from e + paths.append(str(skill_dir.resolve())) + return paths + + +def resolve_git_skills(sources: list, identity_client=None) -> list: + """Resolve git skill dicts to local filesystem paths. + + Each source is a dict with keys: url (required), path (optional), + credentialArn (optional), username (optional). + + Any fetch failure raises and fails the invocation — a partial skill set + would silently run the agent without capabilities the harness declared. + """ + paths = [] + for source in sources: + try: + skill_dir = _fetch_git_skill( + url=source["url"], + skill_path=source.get("path") or "", + credential_arn=source.get("credentialArn"), + username=source.get("username"), + identity_client=identity_client, + ) + except Exception as e: + raise ValueError(f"Failed to resolve git skill '{source.get('url', source)}': {e}") from e + paths.append(str(skill_dir.resolve())) + return paths diff --git a/src/assets/python/http/strands/capabilities/execution-limits/hooks/execution_limits.py b/src/assets/python/http/strands/capabilities/execution-limits/hooks/execution_limits.py new file mode 100644 index 000000000..057f348d8 --- /dev/null +++ b/src/assets/python/http/strands/capabilities/execution-limits/hooks/execution_limits.py @@ -0,0 +1,54 @@ +import time +from typing import Optional + +from strands.hooks import BeforeModelCallEvent +from strands.hooks.registry import HookProvider, HookRegistry +from strands.types.exceptions import EventLoopException + + +class ExecutionLimitExceeded(Exception): + def __init__(self, message: str) -> None: + super().__init__(message) + + +class ExecutionLimitsHook(HookProvider): + def __init__( + self, + max_iterations: Optional[int] = None, + max_tokens: Optional[int] = None, + timeout_seconds: Optional[float] = None, + ) -> None: + self._max_iterations = max_iterations + self._max_tokens = max_tokens + self._timeout_seconds = timeout_seconds + self._iteration_count = 0 + self._start_time = time.monotonic() + + def register_hooks(self, registry: HookRegistry, **kwargs) -> None: + registry.add_callback(BeforeModelCallEvent, self._check_limits) + + def _check_limits(self, event: BeforeModelCallEvent) -> None: + self._iteration_count += 1 + + if self._max_iterations is not None and self._iteration_count > self._max_iterations: + raise EventLoopException( + ExecutionLimitExceeded(f"Max iterations exceeded: {self._max_iterations}") + ) + + if self._timeout_seconds is not None: + elapsed = time.monotonic() - self._start_time + if elapsed > self._timeout_seconds: + raise EventLoopException( + ExecutionLimitExceeded( + f"Timeout exceeded: {self._timeout_seconds}s (elapsed {elapsed:.1f}s)" + ) + ) + + if self._max_tokens is not None: + used = event.agent.event_loop_metrics.accumulated_usage.get("outputTokens", 0) + if used >= self._max_tokens: + raise EventLoopException( + ExecutionLimitExceeded( + f"Max output tokens exceeded: {used}/{self._max_tokens}" + ) + ) diff --git a/src/assets/python/http/strands/capabilities/memory/session.py b/src/assets/python/http/strands/capabilities/memory/session.py index 125580900..a00b46666 100644 --- a/src/assets/python/http/strands/capabilities/memory/session.py +++ b/src/assets/python/http/strands/capabilities/memory/session.py @@ -24,8 +24,11 @@ def get_memory_session_manager(session_id: Optional[str], actor_id: str) -> Opti {{#if (includes memoryProviders.[0].strategies "USER_PREFERENCE")}} f"/users/{actor_id}/preferences": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} +{{#if (includes memoryProviders.[0].strategies "EPISODIC")}} + f"/episodes/{actor_id}/{session_id}": RetrievalConfig(top_k=5, relevance_score=0.5), +{{/if}} {{#if (includes memoryProviders.[0].strategies "SUMMARIZATION")}} - f"/summaries/{actor_id}": RetrievalConfig(top_k=3, relevance_score=0.5), + f"/summaries/{actor_id}/{session_id}": RetrievalConfig(top_k=3, relevance_score=0.5), {{/if}} } {{/if}} diff --git a/src/cli/aws/__tests__/agentcore-ab-tests.test.ts b/src/cli/aws/__tests__/agentcore-ab-tests.test.ts index 94dca3bdb..f0b21ccd8 100644 --- a/src/cli/aws/__tests__/agentcore-ab-tests.test.ts +++ b/src/cli/aws/__tests__/agentcore-ab-tests.test.ts @@ -136,15 +136,11 @@ describe('agentcore-ab-tests', () => { roleArn: 'arn:role', variants: [], evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval' }, - trafficAllocationConfig: { routeOnHeader: { headerName: 'X-AB' } }, - maxDurationDays: 30, enableOnCreate: true, }); const body = JSON.parse(mockFetch.mock.calls[0]![1].body); expect(body.description).toBe('A description'); - expect(body.trafficAllocationConfig).toEqual({ routeOnHeader: { headerName: 'X-AB' } }); - expect(body.maxDurationDays).toBe(30); expect(body.enableOnCreate).toBe(true); }); @@ -249,14 +245,12 @@ describe('agentcore-ab-tests', () => { abTestId: 'abt-123', name: 'Updated', description: 'New desc', - maxDurationDays: 60, roleArn: 'arn:new-role', }); const body = JSON.parse(mockFetch.mock.calls[0]![1].body); expect(body.name).toBe('Updated'); expect(body.description).toBe('New desc'); - expect(body.maxDurationDays).toBe(60); expect(body.roleArn).toBe('arn:new-role'); }); }); diff --git a/src/cli/aws/__tests__/agentcore-batch-evaluation.test.ts b/src/cli/aws/__tests__/agentcore-batch-evaluation.test.ts new file mode 100644 index 000000000..262aad0e1 --- /dev/null +++ b/src/cli/aws/__tests__/agentcore-batch-evaluation.test.ts @@ -0,0 +1,357 @@ +import { + deleteBatchEvaluation, + getBatchEvaluation, + listBatchEvaluations, + startBatchEvaluation, + stopBatchEvaluation, +} from '../agentcore-batch-evaluation.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +vi.mock('../account', () => ({ + getCredentialProvider: vi.fn().mockReturnValue({ + accessKeyId: 'AKID', + secretAccessKey: 'SECRET', + sessionToken: 'TOKEN', + }), +})); + +vi.mock('@smithy/signature-v4', () => ({ + SignatureV4: class { + // eslint-disable-next-line @typescript-eslint/require-await + async sign(request: { headers: Record }) { + return { headers: { ...request.headers, Authorization: 'signed' } }; + } + }, +})); + +vi.mock('@aws-crypto/sha256-js', () => ({ + Sha256: class {}, +})); + +vi.mock('@aws-sdk/credential-provider-node', () => ({ + defaultProvider: vi.fn(), +})); + +function mockJsonResponse(body: unknown, status = 200) { + return { + ok: status >= 200 && status < 300, + status, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + json: () => Promise.resolve(body), + text: () => Promise.resolve(JSON.stringify(body)), + }; +} + +describe('agentcore-batch-evaluation', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('startBatchEvaluation', () => { + it('sends POST to /evaluations/batch-evaluate with correct body', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:batch-evaluation/batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'PENDING', + }) + ); + + const result = await startBatchEvaluation({ + region: 'us-west-2', + name: 'MyBatchEval', + evaluators: [{ evaluatorId: 'eval-1' }], + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: ['bedrock-agentcore'], + logGroupNames: ['my-log-group'], + }, + }, + }); + + expect(result.batchEvaluationId).toBe('batch-123'); + expect(result.name).toBe('MyBatchEval'); + expect(result.status).toBe('PENDING'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/evaluations/batch-evaluate'), + expect.objectContaining({ method: 'POST' }) + ); + + const fetchCall = mockFetch.mock.calls[0]!; + const body = JSON.parse(fetchCall[1].body); + expect(body.batchEvaluationName).toBe('MyBatchEval'); + expect(body.evaluators).toEqual([{ evaluatorId: 'eval-1' }]); + }); + + it('includes kmsKeyArn when provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'PENDING', + }) + ); + + await startBatchEvaluation({ + region: 'us-west-2', + name: 'MyBatchEval', + evaluators: [{ evaluatorId: 'eval-1' }], + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: ['bedrock-agentcore'], + logGroupNames: ['my-log-group'], + }, + }, + kmsKeyArn: 'arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012', + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.kmsKeyArn).toBe('arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012'); + }); + + it('omits kmsKeyArn when not provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'PENDING', + }) + ); + + await startBatchEvaluation({ + region: 'us-west-2', + name: 'MyBatchEval', + evaluators: [{ evaluatorId: 'eval-1' }], + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: ['bedrock-agentcore'], + logGroupNames: ['my-log-group'], + }, + }, + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.kmsKeyArn).toBeUndefined(); + }); + + it('includes description when provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'PENDING', + }) + ); + + await startBatchEvaluation({ + region: 'us-west-2', + name: 'MyBatchEval', + evaluators: [{ evaluatorId: 'eval-1' }], + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: ['bedrock-agentcore'], + logGroupNames: ['my-log-group'], + }, + }, + description: 'Test evaluation run', + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.description).toBe('Test evaluation run'); + }); + + it('includes clientToken when provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'PENDING', + }) + ); + + await startBatchEvaluation({ + region: 'us-west-2', + name: 'MyBatchEval', + evaluators: [{ evaluatorId: 'eval-1' }], + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: ['bedrock-agentcore'], + logGroupNames: ['my-log-group'], + }, + }, + clientToken: 'token-abc', + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.clientToken).toBe('token-abc'); + }); + + it('includes evaluationMetadata when provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'PENDING', + }) + ); + + await startBatchEvaluation({ + region: 'us-west-2', + name: 'MyBatchEval', + evaluators: [{ evaluatorId: 'eval-1' }], + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: ['bedrock-agentcore'], + logGroupNames: ['my-log-group'], + }, + }, + evaluationMetadata: { + sessionMetadata: [{ sessionId: 'sess-1', metadata: { referenceAnswer: 'answer' } }], + }, + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.evaluationMetadata.sessionMetadata).toEqual([ + { sessionId: 'sess-1', metadata: { referenceAnswer: 'answer' } }, + ]); + }); + + it('throws on non-ok response', async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 400, + headers: new Map([['x-amzn-requestid', 'test-request-id']]), + text: () => Promise.resolve('Bad Request'), + }); + + await expect( + startBatchEvaluation({ + region: 'us-west-2', + name: 'MyBatchEval', + evaluators: [], + dataSourceConfig: { + cloudWatchLogs: { serviceNames: [], logGroupNames: [] }, + }, + }) + ).rejects.toThrow('BatchEvaluation API error (400)'); + }); + }); + + describe('getBatchEvaluation', () => { + it('sends GET to /evaluations/batch-evaluate/{id}', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'COMPLETED', + kmsKeyArn: 'arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012', + }) + ); + + const result = await getBatchEvaluation({ region: 'us-west-2', batchEvaluationId: 'batch-123' }); + + expect(result.batchEvaluationId).toBe('batch-123'); + expect(result.name).toBe('MyBatchEval'); + expect(result.status).toBe('COMPLETED'); + expect(result.kmsKeyArn).toBe('arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/evaluations/batch-evaluate/batch-123'), + expect.objectContaining({ method: 'GET' }) + ); + }); + + it('returns undefined kmsKeyArn when not present in response', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + batchEvaluationArn: 'arn:batch-123', + batchEvaluationName: 'MyBatchEval', + status: 'COMPLETED', + }) + ); + + const result = await getBatchEvaluation({ region: 'us-west-2', batchEvaluationId: 'batch-123' }); + expect(result.kmsKeyArn).toBeUndefined(); + }); + }); + + describe('listBatchEvaluations', () => { + it('sends GET to /evaluations/batch-evaluate', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluations: [ + { batchEvaluationId: 'b1', name: 'Eval1', status: 'COMPLETED' }, + { batchEvaluationId: 'b2', name: 'Eval2', status: 'PENDING' }, + ], + }) + ); + + const result = await listBatchEvaluations({ region: 'us-west-2' }); + + expect(result.batchEvaluations).toHaveLength(2); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/evaluations/batch-evaluate'), + expect.objectContaining({ method: 'GET' }) + ); + }); + + it('includes maxResults and nextToken query params', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({ batchEvaluations: [], nextToken: undefined })); + + await listBatchEvaluations({ region: 'us-west-2', maxResults: 5, nextToken: 'page2' }); + + const url = mockFetch.mock.calls[0]![0] as string; + expect(url).toContain('maxResults=5'); + expect(url).toContain('nextToken=page2'); + }); + }); + + describe('stopBatchEvaluation', () => { + it('sends POST to /evaluations/batch-evaluate/{id}/stop', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + batchEvaluationId: 'batch-123', + status: 'STOPPING', + }) + ); + + const result = await stopBatchEvaluation({ region: 'us-west-2', batchEvaluationId: 'batch-123' }); + + expect(result.batchEvaluationId).toBe('batch-123'); + expect(result.status).toBe('STOPPING'); + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/evaluations/batch-evaluate/batch-123/stop'), + expect.objectContaining({ method: 'POST' }) + ); + }); + }); + + describe('deleteBatchEvaluation', () => { + it('sends DELETE to /evaluations/batch-evaluate/{id}', async () => { + mockFetch.mockResolvedValue({ + ok: true, + status: 204, + headers: new Map(), + json: () => Promise.resolve({}), + text: () => Promise.resolve(''), + }); + + await deleteBatchEvaluation({ region: 'us-west-2', batchEvaluationId: 'batch-123' }); + + expect(mockFetch).toHaveBeenCalledWith( + expect.stringContaining('/evaluations/batch-evaluate/batch-123'), + expect.objectContaining({ method: 'DELETE' }) + ); + }); + }); +}); diff --git a/src/cli/aws/__tests__/agentcore-config-bundles.test.ts b/src/cli/aws/__tests__/agentcore-config-bundles.test.ts new file mode 100644 index 000000000..30a776b13 --- /dev/null +++ b/src/cli/aws/__tests__/agentcore-config-bundles.test.ts @@ -0,0 +1,84 @@ +import { listConfigurationBundles } from '../agentcore-config-bundles.js'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockFetch = vi.fn(); +vi.stubGlobal('fetch', mockFetch); + +vi.mock('../account', () => ({ + getCredentialProvider: vi.fn().mockReturnValue({ + accessKeyId: 'AKID', + secretAccessKey: 'SECRET', + sessionToken: 'TOKEN', + }), +})); + +vi.mock('@smithy/signature-v4', () => ({ + SignatureV4: class { + // eslint-disable-next-line @typescript-eslint/require-await + async sign(request: { headers: Record }) { + return { headers: { ...request.headers, Authorization: 'signed' } }; + } + }, +})); + +vi.mock('@aws-crypto/sha256-js', () => ({ + Sha256: class {}, +})); + +vi.mock('@aws-sdk/credential-provider-node', () => ({ + defaultProvider: vi.fn(), +})); + +function mockJsonResponse(body: unknown, status = 200) { + return { + ok: status >= 200 && status < 300, + status, + json: () => Promise.resolve(body), + text: () => Promise.resolve(JSON.stringify(body)), + }; +} + +describe('agentcore-config-bundles', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + describe('listConfigurationBundles', () => { + it('returns bundles with createdAt timestamp', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + bundles: [ + { + bundleArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:configuration-bundle/myBundle-abc123', + bundleId: 'myBundle-abc123', + bundleName: 'myBundle', + createdAt: 1780442814.787, + }, + { + bundleArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:configuration-bundle/otherBundle-def456', + bundleId: 'otherBundle-def456', + bundleName: 'otherBundle', + description: 'A test bundle', + createdAt: 1780440000.0, + }, + ], + }) + ); + + const result = await listConfigurationBundles({ region: 'us-west-2' }); + + expect(result.bundles).toHaveLength(2); + expect(result.bundles[0]!.bundleName).toBe('myBundle'); + expect(result.bundles[0]!.createdAt).toBe(1780442814.787); + expect(result.bundles[1]!.createdAt).toBe(1780440000.0); + }); + + it('returns empty array when no bundles exist', async () => { + mockFetch.mockResolvedValue(mockJsonResponse({ bundles: [] })); + + const result = await listConfigurationBundles({ region: 'us-west-2' }); + + expect(result.bundles).toEqual([]); + }); + }); +}); diff --git a/src/cli/aws/__tests__/agentcore-harness.test.ts b/src/cli/aws/__tests__/agentcore-harness.test.ts index 7d14d776c..bc7fb314a 100644 --- a/src/cli/aws/__tests__/agentcore-harness.test.ts +++ b/src/cli/aws/__tests__/agentcore-harness.test.ts @@ -1,12 +1,4 @@ -import { - createHarness, - deleteHarness, - getHarness, - invokeHarness, - listAllHarnesses, - listHarnesses, - updateHarness, -} from '../agentcore-harness.js'; +import { deleteHarness, getHarness, invokeHarness } from '../agentcore-harness.js'; import { EventStreamCodec } from '@smithy/eventstream-codec'; import { beforeEach, describe, expect, it, vi } from 'vitest'; @@ -38,56 +30,6 @@ describe('Harness control plane operations', () => { vi.clearAllMocks(); }); - describe('createHarness', () => { - it('sends POST /harnesses with correct body', async () => { - const harness = { harnessId: 'h-123', harnessName: 'test', status: 'CREATING' }; - mockRequest.mockResolvedValue({ harness }); - - const result = await createHarness({ - region: 'us-west-2', - harnessName: 'test', - executionRoleArn: 'arn:aws:iam::123:role/TestRole', - model: { bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-6-20250514-v1:0' } }, - systemPrompt: [{ text: 'You are helpful.' }], - tools: [{ type: 'agentcore_browser', name: 'browser' }], - maxIterations: 75, - }); - - expect(result.harness.harnessId).toBe('h-123'); - expect(mockRequest).toHaveBeenCalledWith( - expect.objectContaining({ - method: 'POST', - path: '/harnesses', - body: expect.objectContaining({ - harnessName: 'test', - executionRoleArn: 'arn:aws:iam::123:role/TestRole', - clientToken: expect.any(String), - model: { bedrockModelConfig: { modelId: 'us.anthropic.claude-sonnet-4-6-20250514-v1:0' } }, - systemPrompt: [{ text: 'You are helpful.' }], - tools: [{ type: 'agentcore_browser', name: 'browser' }], - maxIterations: 75, - }), - }) - ); - }); - - it('omits optional fields when not provided', async () => { - mockRequest.mockResolvedValue({ harness: { harnessId: 'h-1' } }); - - await createHarness({ - region: 'us-west-2', - harnessName: 'minimal', - executionRoleArn: 'arn:aws:iam::123:role/R', - }); - - const body = mockRequest.mock.calls[0]![0].body; - expect(body.model).toBeUndefined(); - expect(body.tools).toBeUndefined(); - expect(body.memory).toBeUndefined(); - expect(body.maxIterations).toBeUndefined(); - }); - }); - describe('getHarness', () => { it('sends GET /harnesses/{harnessId}', async () => { const harness = { harnessId: 'h-123', status: 'READY' }; @@ -105,46 +47,6 @@ describe('Harness control plane operations', () => { }); }); - describe('updateHarness', () => { - it('sends PATCH /harnesses/{harnessId}', async () => { - mockRequest.mockResolvedValue({ harness: { harnessId: 'h-123', status: 'UPDATING' } }); - - await updateHarness({ - region: 'us-west-2', - harnessId: 'h-123', - model: { bedrockModelConfig: { modelId: 'new-model' } }, - maxTokens: 4096, - }); - - expect(mockRequest).toHaveBeenCalledWith( - expect.objectContaining({ - method: 'PATCH', - path: '/harnesses/h-123', - body: expect.objectContaining({ - clientToken: expect.any(String), - model: { bedrockModelConfig: { modelId: 'new-model' } }, - maxTokens: 4096, - }), - }) - ); - }); - - it('passes nullable wrapper fields for memory and environmentArtifact', async () => { - mockRequest.mockResolvedValue({ harness: { harnessId: 'h-123' } }); - - await updateHarness({ - region: 'us-west-2', - harnessId: 'h-123', - memory: { optionalValue: null }, - environmentArtifact: { optionalValue: null }, - }); - - const body = mockRequest.mock.calls[0]![0].body; - expect(body.memory).toEqual({ optionalValue: null }); - expect(body.environmentArtifact).toEqual({ optionalValue: null }); - }); - }); - describe('deleteHarness', () => { it('sends DELETE /harnesses/{harnessId} with clientToken query param', async () => { mockRequest.mockResolvedValue({ harness: { harnessId: 'h-123', status: 'DELETING' } }); @@ -160,47 +62,6 @@ describe('Harness control plane operations', () => { ); }); }); - - describe('listHarnesses', () => { - it('sends GET /harnesses with query params', async () => { - mockRequest.mockResolvedValue({ - harnesses: [{ harnessId: 'h-1', harnessName: 'one' }], - nextToken: undefined, - }); - - const result = await listHarnesses({ region: 'us-west-2', maxResults: 10 }); - - expect(result.harnesses).toHaveLength(1); - expect(mockRequest).toHaveBeenCalledWith( - expect.objectContaining({ - method: 'GET', - path: '/harnesses', - query: { maxResults: '10' }, - }) - ); - }); - }); - - describe('listAllHarnesses', () => { - it('auto-paginates across multiple pages', async () => { - mockRequest - .mockResolvedValueOnce({ - harnesses: [{ harnessId: 'h-1' }], - nextToken: 'tok-1', - }) - .mockResolvedValueOnce({ - harnesses: [{ harnessId: 'h-2' }], - nextToken: undefined, - }); - - const all = await listAllHarnesses('us-west-2'); - - expect(all).toHaveLength(2); - expect(all[0]!.harnessId).toBe('h-1'); - expect(all[1]!.harnessId).toBe('h-2'); - expect(mockRequest).toHaveBeenCalledTimes(2); - }); - }); }); describe('invokeHarness (streaming)', () => { diff --git a/src/cli/aws/__tests__/agentcore-http-gateways.test.ts b/src/cli/aws/__tests__/agentcore-http-gateways.test.ts deleted file mode 100644 index f9ace9a7a..000000000 --- a/src/cli/aws/__tests__/agentcore-http-gateways.test.ts +++ /dev/null @@ -1,235 +0,0 @@ -import { createHttpGatewayTarget, getHttpGateway, listHttpGatewayTargets } from '../agentcore-http-gateways.js'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -const mockFetch = vi.fn(); -vi.stubGlobal('fetch', mockFetch); - -vi.mock('../account', () => ({ - getCredentialProvider: vi.fn().mockReturnValue({ - accessKeyId: 'AKID', - secretAccessKey: 'SECRET', - sessionToken: 'TOKEN', - }), -})); - -vi.mock('@smithy/signature-v4', () => ({ - SignatureV4: class { - // eslint-disable-next-line @typescript-eslint/require-await - async sign(request: { headers: Record }) { - return { headers: { ...request.headers, Authorization: 'signed' } }; - } - }, -})); - -vi.mock('@aws-crypto/sha256-js', () => ({ - Sha256: class {}, -})); - -vi.mock('@aws-sdk/credential-provider-node', () => ({ - defaultProvider: vi.fn(), -})); - -function mockJsonResponse(body: unknown, status = 200) { - return { - ok: status >= 200 && status < 300, - status, - headers: new Map([['x-amzn-requestid', 'test-request-id']]), - json: () => Promise.resolve(body), - text: () => Promise.resolve(JSON.stringify(body)), - }; -} - -describe('agentcore-http-gateways', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - describe('createHttpGatewayTarget', () => { - it('sends agentcoreRuntime in request body', async () => { - mockFetch.mockResolvedValue( - mockJsonResponse({ - targetId: 'tgt-001', - name: 'my-target', - status: 'CREATING', - }) - ); - - const result = await createHttpGatewayTarget({ - region: 'us-east-1', - gatewayId: 'gw-123', - targetName: 'my-target', - runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', - qualifier: 'DEFAULT', - }); - - expect(result.targetId).toBe('tgt-001'); - expect(result.name).toBe('my-target'); - expect(mockFetch).toHaveBeenCalledTimes(1); - - const body = JSON.parse(mockFetch.mock.calls[0]![1].body); - expect(body.name).toBe('my-target'); - expect(body.targetConfiguration.http.agentcoreRuntime).toEqual({ - arn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', - qualifier: 'DEFAULT', - }); - expect(body.credentialProviderConfigurations).toEqual([{ credentialProviderType: 'GATEWAY_IAM_ROLE' }]); - expect(body.clientToken).toBeDefined(); - }); - - it('falls back to runtimeTargetConfiguration on ValidationException', async () => { - // First call fails with ValidationException - mockFetch.mockResolvedValueOnce({ - ok: false, - status: 400, - headers: new Map([['x-amzn-requestid', 'test-request-id']]), - text: () => Promise.resolve('ValidationException: Unknown field agentcoreRuntime'), - }); - // Second call (fallback) succeeds - mockFetch.mockResolvedValueOnce( - mockJsonResponse({ - targetId: 'tgt-002', - name: 'my-target', - status: 'CREATING', - }) - ); - - const result = await createHttpGatewayTarget({ - region: 'us-east-1', - gatewayId: 'gw-123', - targetName: 'my-target', - runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', - }); - - expect(result.targetId).toBe('tgt-002'); - expect(mockFetch).toHaveBeenCalledTimes(2); - - // Second call should use runtimeTargetConfiguration - const fallbackBody = JSON.parse(mockFetch.mock.calls[1]![1].body); - expect(fallbackBody.targetConfiguration.http.runtimeTargetConfiguration).toEqual({ - arn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1', - qualifier: 'DEFAULT', - }); - }); - - it('falls back to runtimeTargetConfiguration on 400 status', async () => { - // First call fails with 400 - mockFetch.mockResolvedValueOnce({ - ok: false, - status: 400, - headers: new Map([['x-amzn-requestid', 'test-request-id']]), - text: () => Promise.resolve('400 Bad Request'), - }); - // Second call (fallback) succeeds - mockFetch.mockResolvedValueOnce( - mockJsonResponse({ - targetId: 'tgt-003', - name: 'my-target', - status: 'CREATING', - }) - ); - - const result = await createHttpGatewayTarget({ - region: 'us-east-1', - gatewayId: 'gw-123', - targetName: 'my-target', - runtimeArn: 'arn:runtime', - }); - - expect(result.targetId).toBe('tgt-003'); - expect(mockFetch).toHaveBeenCalledTimes(2); - }); - - it('throws on non-validation errors (no fallback)', async () => { - mockFetch.mockResolvedValue({ - ok: false, - status: 500, - headers: new Map([['x-amzn-requestid', 'test-request-id']]), - text: () => Promise.resolve('Internal Server Error'), - }); - - await expect( - createHttpGatewayTarget({ - region: 'us-east-1', - gatewayId: 'gw-123', - targetName: 'my-target', - runtimeArn: 'arn:runtime', - }) - ).rejects.toThrow('Failed to create target'); - - // Only one call — no fallback attempt - expect(mockFetch).toHaveBeenCalledTimes(1); - }); - }); - - describe('getHttpGateway', () => { - it('returns gateway details', async () => { - mockFetch.mockResolvedValue( - mockJsonResponse({ - gatewayId: 'gw-123', - gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-123', - gatewayUrl: 'https://gw-123.example.com', - name: 'my-gateway', - status: 'READY', - authorizerType: 'AWS_IAM', - roleArn: 'arn:aws:iam::123:role/GwRole', - createdAt: '2026-01-01T00:00:00Z', - updatedAt: '2026-01-02T00:00:00Z', - }) - ); - - const result = await getHttpGateway({ region: 'us-east-1', gatewayId: 'gw-123' }); - - expect(result.gatewayId).toBe('gw-123'); - expect(result.name).toBe('my-gateway'); - expect(result.status).toBe('READY'); - expect(result.gatewayUrl).toBe('https://gw-123.example.com'); - expect(mockFetch).toHaveBeenCalledWith( - expect.stringContaining('/gateways/gw-123'), - expect.objectContaining({ method: 'GET' }) - ); - }); - }); - - describe('listHttpGatewayTargets', () => { - it('returns targets array', async () => { - mockFetch.mockResolvedValue( - mockJsonResponse({ - targets: [ - { targetId: 'tgt-1', name: 'target-1', status: 'READY' }, - { targetId: 'tgt-2', name: 'target-2', status: 'CREATING' }, - ], - }) - ); - - const result = await listHttpGatewayTargets({ - region: 'us-east-1', - gatewayId: 'gw-123', - }); - - expect(result.targets).toHaveLength(2); - expect(result.targets[0]!.targetId).toBe('tgt-1'); - expect(result.targets[0]!.name).toBe('target-1'); - expect(result.targets[1]!.targetId).toBe('tgt-2'); - expect(mockFetch).toHaveBeenCalledWith( - expect.stringContaining('/gateways/gw-123/targets'), - expect.objectContaining({ method: 'GET' }) - ); - }); - - it('handles response with items field instead of targets', async () => { - mockFetch.mockResolvedValue( - mockJsonResponse({ - items: [{ targetId: 'tgt-1', name: 'target-1', status: 'READY' }], - }) - ); - - const result = await listHttpGatewayTargets({ - region: 'us-east-1', - gatewayId: 'gw-123', - }); - - expect(result.targets).toHaveLength(1); - expect(result.targets[0]!.targetId).toBe('tgt-1'); - }); - }); -}); diff --git a/src/cli/aws/__tests__/agentcore-recommendation.test.ts b/src/cli/aws/__tests__/agentcore-recommendation.test.ts index 1b330cf30..f27fdb6fb 100644 --- a/src/cli/aws/__tests__/agentcore-recommendation.test.ts +++ b/src/cli/aws/__tests__/agentcore-recommendation.test.ts @@ -171,6 +171,81 @@ describe('agentcore-recommendation', () => { expect(body.description).toBe('Test description'); }); + it('includes kmsKeyArn when provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + recommendationId: 'r1', + recommendationArn: 'arn:1', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }) + ); + + await startRecommendation({ + region: 'us-west-2', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + recommendationConfig: { + systemPromptRecommendationConfig: { + systemPrompt: { text: '' }, + agentTraces: { + cloudwatchLogs: { + logGroupArns: [], + serviceNames: ['bedrock-agentcore'], + startTime: '2026-03-23T00:00:00.000Z', + endTime: '2026-03-30T00:00:00.000Z', + }, + }, + evaluationConfig: { + evaluators: [{ evaluatorArn: 'arn:aws:bedrock-agentcore:::evaluator/Builtin.Helpfulness' }], + }, + }, + }, + kmsKeyArn: 'arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012', + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.kmsKeyArn).toBe('arn:aws:kms:us-west-2:123456789012:key/12345678-1234-1234-1234-123456789012'); + }); + + it('omits kmsKeyArn when not provided', async () => { + mockFetch.mockResolvedValue( + mockJsonResponse({ + recommendationId: 'r1', + recommendationArn: 'arn:1', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + }) + ); + + await startRecommendation({ + region: 'us-west-2', + name: 'MyRec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + recommendationConfig: { + systemPromptRecommendationConfig: { + systemPrompt: { text: '' }, + agentTraces: { + cloudwatchLogs: { + logGroupArns: [], + serviceNames: ['bedrock-agentcore'], + startTime: '2026-03-23T00:00:00.000Z', + endTime: '2026-03-30T00:00:00.000Z', + }, + }, + evaluationConfig: { + evaluators: [{ evaluatorArn: 'arn:aws:bedrock-agentcore:::evaluator/Builtin.Helpfulness' }], + }, + }, + }, + }); + + const body = JSON.parse(mockFetch.mock.calls[0]![1].body); + expect(body.kmsKeyArn).toBeUndefined(); + }); + it('throws on non-ok response', async () => { mockFetch.mockResolvedValue({ ok: false, @@ -215,6 +290,7 @@ describe('agentcore-recommendation', () => { expect(result.recommendationResult?.systemPromptRecommendationResult?.recommendedSystemPrompt).toBe( 'Optimized prompt' ); + expect(result.recommendationResult?.systemPromptRecommendationResult?.explanation).toBe('Made it better'); expect(mockFetch).toHaveBeenCalledWith( expect.stringContaining('/recommendations/rec-123'), expect.objectContaining({ method: 'GET' }) diff --git a/src/cli/aws/__tests__/agentcore.test.ts b/src/cli/aws/__tests__/agentcore.test.ts index c0b4653ee..d4fd3003b 100644 --- a/src/cli/aws/__tests__/agentcore.test.ts +++ b/src/cli/aws/__tests__/agentcore.test.ts @@ -41,6 +41,24 @@ describe('parseSSELine', () => { expect(result.error).toBeNull(); }); + it('extracts text delta from ConverseStream-shaped events', () => { + const result = parseSSELine('data: {"event": {"contentBlockDelta": {"delta": {"text": "Hello"}}}}'); + expect(result.content).toBe('Hello'); + expect(result.error).toBeNull(); + }); + + it('preserves whitespace-only text deltas', () => { + const result = parseSSELine('data: {"event": {"contentBlockDelta": {"delta": {"text": " "}}}}'); + expect(result.content).toBe(' '); + expect(result.error).toBeNull(); + }); + + it('returns null for ConverseStream events without a text delta', () => { + const result = parseSSELine('data: {"event": {"messageStop": {"stopReason": "end_turn"}}}'); + expect(result.content).toBeNull(); + expect(result.error).toBeNull(); + }); + it('handles empty data field', () => { const result = parseSSELine('data: '); expect(result.content).toBe(''); diff --git a/src/cli/aws/__tests__/bedrock-agent.test.ts b/src/cli/aws/__tests__/bedrock-agent.test.ts new file mode 100644 index 000000000..12344ffb3 --- /dev/null +++ b/src/cli/aws/__tests__/bedrock-agent.test.ts @@ -0,0 +1,262 @@ +import { + getDataSource, + getKnowledgeBase, + getLatestIngestionJob, + listDataSources, + listIngestionJobs, + startIngestionJob, +} from '../bedrock-agent'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockSend = vi.fn(); + +vi.mock('@aws-sdk/client-bedrock-agent', () => ({ + BedrockAgentClient: class { + send = mockSend; + }, + GetKnowledgeBaseCommand: class { + constructor(public readonly input: unknown) {} + }, + GetDataSourceCommand: class { + constructor(public readonly input: unknown) {} + }, + ListDataSourcesCommand: class { + constructor(public readonly input: unknown) {} + }, + ListIngestionJobsCommand: class { + constructor(public readonly input: unknown) {} + }, + GetIngestionJobCommand: class { + constructor(public readonly input: unknown) {} + }, + StartIngestionJobCommand: class { + constructor(public readonly input: unknown) {} + }, +})); + +vi.mock('../account', () => ({ + getCredentialProvider: vi.fn().mockReturnValue({}), +})); + +describe('bedrock-agent wrapper', () => { + beforeEach(() => { + mockSend.mockReset(); + }); + + describe('getKnowledgeBase', () => { + it('returns the knowledge base when present', async () => { + mockSend.mockResolvedValueOnce({ + knowledgeBase: { knowledgeBaseId: 'KB123', name: 'docs', status: 'ACTIVE' }, + }); + const result = await getKnowledgeBase({ region: 'us-west-2', knowledgeBaseId: 'KB123' }); + expect(result?.knowledgeBaseId).toBe('KB123'); + expect(result?.status).toBe('ACTIVE'); + }); + + it('returns null when KB not found', async () => { + mockSend.mockRejectedValueOnce(Object.assign(new Error('not found'), { name: 'ResourceNotFoundException' })); + const result = await getKnowledgeBase({ region: 'us-west-2', knowledgeBaseId: 'KBMISSING' }); + expect(result).toBeNull(); + }); + + it('rethrows other errors', async () => { + mockSend.mockRejectedValueOnce(new Error('throttled')); + await expect(getKnowledgeBase({ region: 'us-west-2', knowledgeBaseId: 'KB1' })).rejects.toThrow('throttled'); + }); + }); + + describe('getDataSource', () => { + it('returns the data source when present', async () => { + mockSend.mockResolvedValueOnce({ + dataSource: { dataSourceId: 'DS1', knowledgeBaseId: 'KB1', name: 'ds', status: 'AVAILABLE' }, + }); + const result = await getDataSource({ region: 'us-west-2', knowledgeBaseId: 'KB1', dataSourceId: 'DS1' }); + expect(result?.dataSourceId).toBe('DS1'); + }); + + it('returns null when DS not found', async () => { + mockSend.mockRejectedValueOnce(Object.assign(new Error('not found'), { name: 'ResourceNotFoundException' })); + const result = await getDataSource({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'MISSING', + }); + expect(result).toBeNull(); + }); + }); + + describe('listIngestionJobs', () => { + it('returns the list', async () => { + mockSend.mockResolvedValueOnce({ + ingestionJobSummaries: [{ ingestionJobId: 'IJ1', status: 'COMPLETE', startedAt: new Date('2026-05-01') }], + }); + const result = await listIngestionJobs({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + expect(result).toHaveLength(1); + expect(result[0]?.ingestionJobId).toBe('IJ1'); + }); + + it('returns empty array when none', async () => { + mockSend.mockResolvedValueOnce({ ingestionJobSummaries: [] }); + const result = await listIngestionJobs({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + expect(result).toEqual([]); + }); + + it('paginates through every page until nextToken is undefined', async () => { + mockSend.mockResolvedValueOnce({ + ingestionJobSummaries: [{ ingestionJobId: 'IJ1', status: 'COMPLETE', startedAt: new Date('2026-05-01') }], + nextToken: 'page2', + }); + mockSend.mockResolvedValueOnce({ + ingestionJobSummaries: [{ ingestionJobId: 'IJ2', status: 'COMPLETE', startedAt: new Date('2026-05-02') }], + nextToken: 'page3', + }); + mockSend.mockResolvedValueOnce({ + ingestionJobSummaries: [{ ingestionJobId: 'IJ3', status: 'IN_PROGRESS', startedAt: new Date('2026-05-03') }], + // no nextToken — last page + }); + + const result = await listIngestionJobs({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + + expect(result).toHaveLength(3); + expect(result.map(s => s.ingestionJobId)).toEqual(['IJ1', 'IJ2', 'IJ3']); + expect(mockSend).toHaveBeenCalledTimes(3); + }); + + it('returns empty array on ResourceNotFoundException', async () => { + mockSend.mockRejectedValueOnce(Object.assign(new Error('not found'), { name: 'ResourceNotFoundException' })); + const result = await listIngestionJobs({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + expect(result).toEqual([]); + }); + }); + + describe('listDataSources', () => { + it('returns the data source summaries', async () => { + mockSend.mockResolvedValueOnce({ + dataSourceSummaries: [ + { dataSourceId: 'DS1', name: 'a', status: 'AVAILABLE' }, + { dataSourceId: 'DS2', name: 'b', status: 'AVAILABLE' }, + ], + }); + const result = await listDataSources({ region: 'us-west-2', knowledgeBaseId: 'KB1' }); + expect(result).toHaveLength(2); + expect(result[0]?.dataSourceId).toBe('DS1'); + }); + + it('returns empty array when KB has no DSes', async () => { + mockSend.mockResolvedValueOnce({ dataSourceSummaries: [] }); + const result = await listDataSources({ region: 'us-west-2', knowledgeBaseId: 'KB1' }); + expect(result).toEqual([]); + }); + + it('returns empty array on ResourceNotFoundException', async () => { + mockSend.mockRejectedValueOnce(Object.assign(new Error('not found'), { name: 'ResourceNotFoundException' })); + const result = await listDataSources({ region: 'us-west-2', knowledgeBaseId: 'KB-MISSING' }); + expect(result).toEqual([]); + }); + + it('paginates through every page until nextToken is undefined', async () => { + mockSend.mockResolvedValueOnce({ + dataSourceSummaries: [{ dataSourceId: 'DS1', name: 'a', status: 'AVAILABLE' }], + nextToken: 'page2', + }); + mockSend.mockResolvedValueOnce({ + dataSourceSummaries: [{ dataSourceId: 'DS2', name: 'b', status: 'AVAILABLE' }], + nextToken: 'page3', + }); + mockSend.mockResolvedValueOnce({ + dataSourceSummaries: [{ dataSourceId: 'DS3', name: 'c', status: 'AVAILABLE' }], + }); + + const result = await listDataSources({ region: 'us-west-2', knowledgeBaseId: 'KB1' }); + + expect(result.map(s => s.dataSourceId)).toEqual(['DS1', 'DS2', 'DS3']); + expect(mockSend).toHaveBeenCalledTimes(3); + }); + }); + + describe('startIngestionJob', () => { + it('returns the ingestion job on success', async () => { + mockSend.mockResolvedValueOnce({ + ingestionJob: { ingestionJobId: 'IJ-NEW', status: 'STARTING' }, + }); + const result = await startIngestionJob({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + expect(result.ingestionJobId).toBe('IJ-NEW'); + expect(result.status).toBe('STARTING'); + }); + + it('throws when KB not found', async () => { + mockSend.mockRejectedValueOnce(Object.assign(new Error('not found'), { name: 'ResourceNotFoundException' })); + await expect( + startIngestionJob({ region: 'us-west-2', knowledgeBaseId: 'KB-MISSING', dataSourceId: 'DS1' }) + ).rejects.toThrow(); + }); + + it('throws on validation errors verbatim', async () => { + mockSend.mockRejectedValueOnce(new Error('No documents to ingest')); + await expect( + startIngestionJob({ region: 'us-west-2', knowledgeBaseId: 'KB1', dataSourceId: 'DS1' }) + ).rejects.toThrow('No documents to ingest'); + }); + + it('throws when response has no ingestionJob', async () => { + mockSend.mockResolvedValueOnce({}); + await expect( + startIngestionJob({ region: 'us-west-2', knowledgeBaseId: 'KB1', dataSourceId: 'DS1' }) + ).rejects.toThrow(/no ingestion job/i); + }); + }); + + describe('getLatestIngestionJob', () => { + it('returns the most recently started job', async () => { + mockSend.mockResolvedValueOnce({ + ingestionJobSummaries: [ + { ingestionJobId: 'old', status: 'COMPLETE', startedAt: new Date('2026-01-01') }, + { ingestionJobId: 'new', status: 'IN_PROGRESS', startedAt: new Date('2026-05-01') }, + ], + }); + mockSend.mockResolvedValueOnce({ + ingestionJob: { + ingestionJobId: 'new', + status: 'IN_PROGRESS', + statistics: { numberOfDocumentsScanned: 10 }, + }, + }); + const result = await getLatestIngestionJob({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + expect(result?.ingestionJobId).toBe('new'); + }); + + it('returns null when no jobs', async () => { + mockSend.mockResolvedValueOnce({ ingestionJobSummaries: [] }); + const result = await getLatestIngestionJob({ + region: 'us-west-2', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + expect(result).toBeNull(); + }); + }); +}); diff --git a/src/cli/aws/agentcore-ab-tests.ts b/src/cli/aws/agentcore-ab-tests.ts index 12e5a9f83..d14a750b9 100644 --- a/src/cli/aws/agentcore-ab-tests.ts +++ b/src/cli/aws/agentcore-ab-tests.ts @@ -4,6 +4,7 @@ * Uses the AgentCore Evaluation DataPlane API (bedrock-agentcore) * with direct HTTP requests and SigV4 signing. */ +import { JobNotFoundError } from '../../lib'; import { getCredentialProvider } from './account'; import { dataPlaneEndpoint } from './stage-endpoint'; import { Sha256 } from '@aws-crypto/sha256-js'; @@ -93,8 +94,6 @@ export interface CreateABTestOptions { variants: ABTestVariant[]; evaluationConfig: ABTestEvaluationConfig; gatewayFilter?: GatewayFilter; - trafficAllocationConfig?: TrafficAllocationConfig; - maxDurationDays?: number; enableOnCreate?: boolean; } @@ -130,6 +129,7 @@ export interface GetABTestResult { currentRunId?: string; stopReason?: string; failureReason?: string; + errorDetails?: string[]; startedAt?: string; stoppedAt?: string; maxDurationExpiresAt?: string; @@ -146,9 +146,7 @@ export interface UpdateABTestOptions { name?: string; description?: string; variants?: ABTestVariant[]; - trafficAllocationConfig?: TrafficAllocationConfig; evaluationConfig?: ABTestEvaluationConfig; - maxDurationDays?: number; executionStatus?: 'PAUSED' | 'RUNNING' | 'STOPPED'; roleArn?: string; } @@ -247,7 +245,11 @@ async function signedRequestToEndpoint( if (!response.ok) { const errorBody = await response.text(); - throw new Error(`ABTest API error (${response.status}): ${errorBody}`); + const message = `ABTest API error (${response.status}): ${errorBody}`; + if (response.status === 404) { + throw new JobNotFoundError(message, { errorSource: 'service' }); + } + throw new Error(message); } if (response.status === 204) return {}; @@ -273,8 +275,6 @@ export async function createABTest(options: CreateABTestOptions): Promise { const body: Record = { batchEvaluationName: options.name, - evaluators: options.evaluators, dataSourceConfig: options.dataSourceConfig, }; + if (options.evaluators && options.evaluators.length > 0) { + body.evaluators = options.evaluators; + } + if (options.insights && options.insights.length > 0) { + body.insights = options.insights; + } if (options.evaluationMetadata) { body.evaluationMetadata = options.evaluationMetadata; } @@ -288,6 +329,9 @@ export async function startBatchEvaluation(options: StartBatchEvaluationOptions) if (options.clientToken) { body.clientToken = options.clientToken; } + if (options.kmsKeyArn) { + body.kmsKeyArn = options.kmsKeyArn; + } const { data } = await signedRequest({ region: options.region, @@ -328,8 +372,10 @@ export async function getBatchEvaluation(options: GetBatchEvaluationOptions): Pr dataSourceConfig: raw.dataSourceConfig as DataSourceConfig | undefined, outputConfig: raw.outputConfig as OutputConfig | undefined, evaluationResults: raw.evaluationResults as EvaluationResults | undefined, + failureAnalysisResult: raw.failureAnalysisResult as FailureAnalysisResult | undefined, errorDetails: raw.errorDetails as string[] | undefined, description: raw.description as string | undefined, + kmsKeyArn: raw.kmsKeyArn as string | undefined, }; } diff --git a/src/cli/aws/agentcore-config-bundles.ts b/src/cli/aws/agentcore-config-bundles.ts index 1c8ab5143..e2941acfe 100644 --- a/src/cli/aws/agentcore-config-bundles.ts +++ b/src/cli/aws/agentcore-config-bundles.ts @@ -114,6 +114,7 @@ export interface ConfigurationBundleSummary { bundleId: string; bundleName: string; description?: string; + createdAt?: number; } export interface ListConfigurationBundlesResult { diff --git a/src/cli/aws/agentcore-control.ts b/src/cli/aws/agentcore-control.ts index f8d548b3e..fd8f80052 100644 --- a/src/cli/aws/agentcore-control.ts +++ b/src/cli/aws/agentcore-control.ts @@ -1,5 +1,6 @@ import type { EvaluationLevel } from '../../schema/schemas/primitives/evaluator'; import { getCredentialProvider } from './account'; +import { controlPlaneEndpoint } from './stage-endpoint'; import { BedrockAgentCoreControlClient, GetAgentRuntimeCommand, @@ -20,13 +21,17 @@ import { /** * Create a shared BedrockAgentCoreControlClient for the given region. + * Respects AGENTCORE_STAGE env var for pre-release endpoint override. * Callers should create one client and reuse it across related operations * to benefit from connection pooling and credential caching. */ export function createControlClient(region: string): BedrockAgentCoreControlClient { + const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); + const endpointOverride = stage === 'beta' || stage === 'gamma' ? controlPlaneEndpoint(region) : undefined; return new BedrockAgentCoreControlClient({ region, credentials: getCredentialProvider(), + ...(endpointOverride ? { endpoint: endpointOverride } : {}), }); } @@ -842,6 +847,7 @@ export interface GatewayDetail { }[]; }; }; + protocolType?: string; protocolConfiguration?: { mcp?: { searchType?: string }; }; @@ -948,6 +954,9 @@ export async function getGatewayDetail(options: { region: string; gatewayId: str const tags = await fetchTags(client, response.gatewayArn, 'gateway'); + // Service returns protocolType 'MCP' or null. Null = non-MCP gateway. + const protocolType = response.protocolType === 'MCP' ? 'MCP' : 'None'; + return { gatewayId: response.gatewayId ?? '', gatewayArn: response.gatewayArn ?? '', @@ -957,13 +966,14 @@ export async function getGatewayDetail(options: { region: string; gatewayId: str description: response.description, authorizerType: response.authorizerType ?? 'NONE', roleArn: response.roleArn, - authorizerConfiguration, - protocolConfiguration, + authorizerConfiguration: authorizerConfiguration, + protocolType: protocolType, + protocolConfiguration: protocolConfiguration, exceptionLevel: response.exceptionLevel, policyEngineConfiguration: response.policyEngineConfiguration ? { arn: response.policyEngineConfiguration.arn ?? '', mode: response.policyEngineConfiguration.mode ?? '' } : undefined, - tags, + tags: tags, }; } @@ -999,6 +1009,10 @@ export interface GatewayTargetDetail { smithyModel?: { s3?: { uri: string; bucketOwnerAccountId?: string }; inlinePayload?: string }; lambda?: { lambdaArn: string; toolSchema?: any }; }; + http?: { + agentcoreRuntime?: { runtimeArn: string; qualifier?: string }; + runtimeTargetConfiguration?: { runtimeArn: string; qualifier?: string }; + }; }; credentialProviderConfigurations?: { credentialProviderType: string; @@ -1133,6 +1147,24 @@ export async function getGatewayTargetDetail(options: { } } + if (response.targetConfiguration && 'http' in response.targetConfiguration) { + const http = (response.targetConfiguration as any).http; + targetConfiguration ??= {}; + targetConfiguration.http = {}; + if (http?.agentcoreRuntime) { + targetConfiguration.http.agentcoreRuntime = { + runtimeArn: http.agentcoreRuntime.arn ?? http.agentcoreRuntime.runtimeArn ?? '', + qualifier: http.agentcoreRuntime.qualifier, + }; + } + if (http?.runtimeTargetConfiguration) { + targetConfiguration.http.runtimeTargetConfiguration = { + runtimeArn: http.runtimeTargetConfiguration.arn ?? http.runtimeTargetConfiguration.runtimeArn ?? '', + qualifier: http.runtimeTargetConfiguration.qualifier, + }; + } + } + const credentialProviderConfigurations: GatewayTargetDetail['credentialProviderConfigurations'] = ( response.credentialProviderConfigurations ?? [] ).map((c: any) => ({ diff --git a/src/cli/aws/agentcore-harness.ts b/src/cli/aws/agentcore-harness.ts index 55f321f43..dd7c0e281 100644 --- a/src/cli/aws/agentcore-harness.ts +++ b/src/cli/aws/agentcore-harness.ts @@ -43,10 +43,23 @@ export interface GeminiModelConfig { maxTokens?: number; } +export interface LiteLlmModelConfig { + modelId: string; + apiKeyArn?: string; + /** Base URL for the third-party model provider's API endpoint. */ + apiBase?: string; + temperature?: number; + topP?: number; + maxTokens?: number; + /** Provider-specific parameters passed through to the model provider unchanged. */ + additionalParams?: Record; +} + export interface HarnessModelConfiguration { bedrockModelConfig?: BedrockModelConfig; openAiModelConfig?: OpenAiModelConfig; geminiModelConfig?: GeminiModelConfig; + liteLlmModelConfig?: LiteLlmModelConfig; } export type HarnessSystemPrompt = { text: string }[]; @@ -59,9 +72,10 @@ export interface HarnessTool { config?: Record; } -export interface HarnessSkill { - path: string; -} +export type HarnessSkill = + | { path: string } + | { s3Uri: string } + | { gitUrl: string; path?: string; auth?: { credentialArn: string; username?: string } }; export interface HarnessAgentCoreMemoryConfiguration { arn: string; @@ -121,74 +135,6 @@ export interface Harness { updatedAt: string; } -export interface HarnessSummary { - harnessId: string; - harnessName: string; - arn: string; - status: HarnessStatus; - createdAt: string; - updatedAt: string; -} - -// ============================================================================ -// CreateHarness -// ============================================================================ - -export interface CreateHarnessOptions { - region: string; - harnessName: string; - executionRoleArn: string; - environment?: HarnessEnvironmentProvider; - environmentArtifact?: HarnessEnvironmentArtifact; - environmentVariables?: Record; - authorizerConfiguration?: Record; - model?: HarnessModelConfiguration; - systemPrompt?: HarnessSystemPrompt; - tools?: HarnessTool[]; - skills?: HarnessSkill[]; - allowedTools?: string[]; - memory?: HarnessMemoryConfiguration; - truncation?: HarnessTruncationConfiguration; - maxIterations?: number; - maxTokens?: number; - timeoutSeconds?: number; - tags?: Record; -} - -export interface CreateHarnessResult { - harness: Harness; -} - -export async function createHarness(options: CreateHarnessOptions): Promise { - const { region, ...rest } = options; - const client = new AgentCoreApiClient({ region, plane: 'control' }); - - const body: Record = { - harnessName: rest.harnessName, - clientToken: randomUUID(), - executionRoleArn: rest.executionRoleArn, - }; - - if (rest.environment) body.environment = rest.environment; - if (rest.environmentArtifact) body.environmentArtifact = rest.environmentArtifact; - if (rest.environmentVariables) body.environmentVariables = rest.environmentVariables; - if (rest.authorizerConfiguration) body.authorizerConfiguration = rest.authorizerConfiguration; - if (rest.model) body.model = rest.model; - if (rest.systemPrompt) body.systemPrompt = rest.systemPrompt; - if (rest.tools) body.tools = rest.tools; - if (rest.skills) body.skills = rest.skills; - if (rest.allowedTools) body.allowedTools = rest.allowedTools; - if (rest.memory) body.memory = rest.memory; - if (rest.truncation) body.truncation = rest.truncation; - if (rest.maxIterations != null) body.maxIterations = rest.maxIterations; - if (rest.maxTokens != null) body.maxTokens = rest.maxTokens; - if (rest.timeoutSeconds != null) body.timeoutSeconds = rest.timeoutSeconds; - if (rest.tags) body.tags = rest.tags; - - const result = await client.request({ method: 'POST', path: '/harnesses', body }); - return result as CreateHarnessResult; -} - // ============================================================================ // GetHarness // ============================================================================ @@ -208,64 +154,6 @@ export async function getHarness(options: GetHarnessOptions): Promise; - authorizerConfiguration?: { optionalValue: Record | null }; - model?: HarnessModelConfiguration; - systemPrompt?: HarnessSystemPrompt; - tools?: HarnessTool[]; - skills?: HarnessSkill[]; - allowedTools?: string[]; - memory?: { optionalValue: HarnessMemoryConfiguration | null }; - truncation?: HarnessTruncationConfiguration; - maxIterations?: number; - maxTokens?: number; - timeoutSeconds?: number; - tags?: Record; -} - -export interface UpdateHarnessResult { - harness: Harness; -} - -export async function updateHarness(options: UpdateHarnessOptions): Promise { - const { region, harnessId, ...rest } = options; - const client = new AgentCoreApiClient({ region, plane: 'control' }); - - const body: Record = { - clientToken: randomUUID(), - }; - - if (rest.executionRoleArn) body.executionRoleArn = rest.executionRoleArn; - if (rest.environment) body.environment = rest.environment; - if (rest.environmentArtifact !== undefined) body.environmentArtifact = rest.environmentArtifact; - if (rest.environmentVariables) body.environmentVariables = rest.environmentVariables; - if (rest.authorizerConfiguration !== undefined) body.authorizerConfiguration = rest.authorizerConfiguration; - if (rest.model) body.model = rest.model; - if (rest.systemPrompt) body.systemPrompt = rest.systemPrompt; - if (rest.tools) body.tools = rest.tools; - if (rest.skills) body.skills = rest.skills; - if (rest.allowedTools) body.allowedTools = rest.allowedTools; - if (rest.memory !== undefined) body.memory = rest.memory; - if (rest.truncation) body.truncation = rest.truncation; - if (rest.maxIterations != null) body.maxIterations = rest.maxIterations; - if (rest.maxTokens != null) body.maxTokens = rest.maxTokens; - if (rest.timeoutSeconds != null) body.timeoutSeconds = rest.timeoutSeconds; - if (rest.tags) body.tags = rest.tags; - - const result = await client.request({ method: 'PATCH', path: `/harnesses/${harnessId}`, body }); - return result as UpdateHarnessResult; -} - // ============================================================================ // DeleteHarness // ============================================================================ @@ -289,42 +177,14 @@ export async function deleteHarness(options: DeleteHarnessOptions): Promise { - const client = new AgentCoreApiClient({ region: options.region, plane: 'control' }); - const query: Record = {}; - if (options.maxResults != null) query.maxResults = String(options.maxResults); - if (options.nextToken) query.nextToken = options.nextToken; - - const result = await client.request({ method: 'GET', path: '/harnesses', query }); - return result as ListHarnessesResult; -} - -export async function listAllHarnesses(region: string): Promise { - const all: HarnessSummary[] = []; - let nextToken: string | undefined; - - do { - const result = await listHarnesses({ region, maxResults: 100, nextToken }); - all.push(...result.harnesses); - nextToken = result.nextToken; - } while (nextToken); - - return all; +/** + * True when a DeleteHarness error means the harness is already gone (HTTP 404) — which callers + * treat as success for cleanup. Keyed on the typed control-plane status code, NOT a substring of + * the message, so unrelated "...does not exist" errors (e.g. a dependent IAM role) are NOT + * misclassified as a missing harness. Shared by teardown and the orphan-remove path. + */ +export function isHarnessNotFoundError(err: unknown): boolean { + return err instanceof AgentCoreApiError && err.statusCode === 404; } // ============================================================================ diff --git a/src/cli/aws/agentcore-http-gateways.ts b/src/cli/aws/agentcore-http-gateways.ts deleted file mode 100644 index 22e45b588..000000000 --- a/src/cli/aws/agentcore-http-gateways.ts +++ /dev/null @@ -1,512 +0,0 @@ -/** - * AWS client wrappers for HTTP Gateway control plane operations. - * - * HTTP gateways are required for A/B testing because MCP gateways - * don't emit spans for treatment propagation. These wrappers use - * direct HTTP requests with SigV4 signing against the control plane. - */ -import { getCredentialProvider } from './account'; -import { controlPlaneEndpoint } from './stage-endpoint'; -import { Sha256 } from '@aws-crypto/sha256-js'; -import { defaultProvider } from '@aws-sdk/credential-provider-node'; -import { HttpRequest } from '@smithy/protocol-http'; -import { SignatureV4 } from '@smithy/signature-v4'; -import { randomUUID } from 'node:crypto'; - -// ============================================================================ -// Types -// ============================================================================ - -// ── Create Gateway ───────────────────────────────────────────────────────── - -export interface CreateHttpGatewayOptions { - region: string; - name: string; - roleArn: string; -} - -export interface CreateHttpGatewayResult { - gatewayId: string; - gatewayArn: string; - name: string; - status: string; -} - -// ── Create Gateway Target ────────────────────────────────────────────────── - -export interface CreateHttpGatewayTargetOptions { - region: string; - gatewayId: string; - targetName: string; - runtimeArn: string; - qualifier?: string; -} - -export interface CreateHttpGatewayTargetResult { - targetId: string; - name: string; - status: string; -} - -// ── Get Gateway ──────────────────────────────────────────────────────────── - -export interface GetHttpGatewayOptions { - region: string; - gatewayId: string; -} - -export interface GetHttpGatewayResult { - gatewayId: string; - gatewayArn: string; - gatewayUrl?: string; - name: string; - status: string; - authorizerType?: string; - roleArn?: string; - createdAt?: string; - updatedAt?: string; -} - -// ── Get Gateway Target ───────────────────────────────────────────────────── - -export interface GetHttpGatewayTargetOptions { - region: string; - gatewayId: string; - targetId: string; -} - -export interface GetHttpGatewayTargetResult { - targetId: string; - name: string; - status: string; - targetConfiguration?: unknown; - createdAt?: string; - updatedAt?: string; -} - -// ── List Gateways ────────────────────────────────────────────────────────── - -export interface ListHttpGatewaysOptions { - region: string; - maxResults?: number; - nextToken?: string; -} - -export interface HttpGatewaySummary { - gatewayId: string; - gatewayArn: string; - name: string; - status: string; -} - -export interface ListHttpGatewaysResult { - gateways: HttpGatewaySummary[]; - nextToken?: string; -} - -// ── List Gateway Targets ────────────────────────────────────────────────── - -export interface ListHttpGatewayTargetsOptions { - region: string; - gatewayId: string; - maxResults?: number; -} - -export interface HttpGatewayTargetSummary { - targetId: string; - name: string; - status: string; -} - -export interface ListHttpGatewayTargetsResult { - targets: HttpGatewayTargetSummary[]; -} - -// ── Delete Gateway Target ────────────────────────────────────────────────── - -export interface DeleteHttpGatewayTargetOptions { - region: string; - gatewayId: string; - targetId: string; -} - -// ── Delete Gateway ───────────────────────────────────────────────────────── - -export interface DeleteHttpGatewayOptions { - region: string; - gatewayId: string; -} - -// ── Wait for Target Ready ────────────────────────────────────────────────── - -export interface WaitForTargetReadyOptions { - region: string; - gatewayId: string; - targetId: string; - /** Maximum time to wait in milliseconds. Defaults to 120000 (120s). */ - timeoutMs?: number; -} - -// ============================================================================ -// HTTP signing helper -// ============================================================================ - -async function signedRequest(options: { - region: string; - method: string; - path: string; - body?: string; -}): Promise { - const { region, method, path, body } = options; - const endpoint = controlPlaneEndpoint(region); - const url = new URL(path, endpoint); - - const query: Record = {}; - url.searchParams.forEach((value, key) => { - query[key] = value; - }); - - const request = new HttpRequest({ - method, - protocol: 'https:', - hostname: url.hostname, - path: url.pathname, - ...(Object.keys(query).length > 0 && { query }), - headers: { - 'Content-Type': 'application/json', - host: url.hostname, - }, - ...(body && { body }), - }); - - const credentials = getCredentialProvider() ?? defaultProvider(); - const service = 'bedrock-agentcore'; - const signer = new SignatureV4({ - service, - region, - credentials, - sha256: Sha256, - }); - - const signedReq = await signer.sign(request); - - const response = await fetch(`${endpoint}${path}`, { - method, - headers: signedReq.headers as Record, - ...(body && { body }), - }); - - if (!response.ok) { - const errorBody = await response.text(); - throw new Error(`HttpGateway API error (${response.status}): ${errorBody}`); - } - - if (response.status === 204) return {}; - return response.json(); -} - -// ============================================================================ -// Control Plane Operations -// ============================================================================ - -export async function createHttpGateway(options: CreateHttpGatewayOptions): Promise { - const body = JSON.stringify({ - name: options.name, - authorizerType: 'AWS_IAM', - roleArn: options.roleArn, - clientToken: randomUUID(), - }); - - try { - return (await signedRequest({ - region: options.region, - method: 'POST', - path: '/gateways', - body, - })) as CreateHttpGatewayResult; - } catch (err) { - throw new Error( - `Failed to create HTTP gateway "${options.name}": ${err instanceof Error ? err.message : String(err)}` - ); - } -} - -export async function createHttpGatewayTarget( - options: CreateHttpGatewayTargetOptions -): Promise { - const body = JSON.stringify({ - name: options.targetName, - clientToken: randomUUID(), - targetConfiguration: { - http: { - agentcoreRuntime: { - arn: options.runtimeArn, - qualifier: options.qualifier ?? 'DEFAULT', - }, - }, - }, - credentialProviderConfigurations: [{ credentialProviderType: 'GATEWAY_IAM_ROLE' }], - }); - - try { - return (await signedRequest({ - region: options.region, - method: 'POST', - path: `/gateways/${options.gatewayId}/targets`, - body, - })) as CreateHttpGatewayTargetResult; - } catch (err) { - // Fallback: retry with legacy field name if the new name is not yet supported - const msg = err instanceof Error ? err.message : String(err); - if (msg.includes('ValidationException') || msg.includes('400')) { - const legacyBody = JSON.stringify({ - name: options.targetName, - clientToken: randomUUID(), - targetConfiguration: { - http: { - runtimeTargetConfiguration: { - arn: options.runtimeArn, - qualifier: options.qualifier ?? 'DEFAULT', - }, - }, - }, - credentialProviderConfigurations: [{ credentialProviderType: 'GATEWAY_IAM_ROLE' }], - }); - try { - return (await signedRequest({ - region: options.region, - method: 'POST', - path: `/gateways/${options.gatewayId}/targets`, - body: legacyBody, - })) as CreateHttpGatewayTargetResult; - } catch { - // Fall through to original error - } - } - throw new Error(`Failed to create target "${options.targetName}" in gateway ${options.gatewayId}: ${msg}`); - } -} - -export async function getHttpGateway(options: GetHttpGatewayOptions): Promise { - const data = await signedRequest({ - region: options.region, - method: 'GET', - path: `/gateways/${options.gatewayId}`, - }); - - return data as GetHttpGatewayResult; -} - -export async function getHttpGatewayTarget(options: GetHttpGatewayTargetOptions): Promise { - const data = await signedRequest({ - region: options.region, - method: 'GET', - path: `/gateways/${options.gatewayId}/targets/${options.targetId}`, - }); - - return data as GetHttpGatewayTargetResult; -} - -export async function listHttpGateways(options: ListHttpGatewaysOptions): Promise { - const params = new URLSearchParams(); - if (options.maxResults) params.set('maxResults', String(options.maxResults)); - if (options.nextToken) params.set('nextToken', options.nextToken); - const query = params.toString(); - - const data = await signedRequest({ - region: options.region, - method: 'GET', - path: `/gateways${query ? `?${query}` : ''}`, - }); - - const result = data as ListHttpGatewaysResult; - return { - gateways: result.gateways ?? [], - nextToken: result.nextToken, - }; -} - -/** - * List all HTTP gateways, paginating through all results. - */ -export async function listAllHttpGateways(options: { region: string }): Promise { - const all: HttpGatewaySummary[] = []; - let nextToken: string | undefined; - - do { - const result = await listHttpGateways({ region: options.region, maxResults: 100, nextToken }); - all.push(...result.gateways); - nextToken = result.nextToken; - } while (nextToken); - - return all; -} - -export async function listHttpGatewayTargets( - options: ListHttpGatewayTargetsOptions -): Promise { - const params = new URLSearchParams(); - if (options.maxResults) params.set('maxResults', String(options.maxResults)); - const query = params.toString(); - - const data = await signedRequest({ - region: options.region, - method: 'GET', - path: `/gateways/${options.gatewayId}/targets${query ? `?${query}` : ''}`, - }); - - const result = data as Record; - return { - targets: (result.items ?? result.targets ?? []) as HttpGatewayTargetSummary[], - }; -} - -export async function deleteHttpGatewayTarget( - options: DeleteHttpGatewayTargetOptions -): Promise<{ success: boolean; error?: string }> { - try { - await signedRequest({ - region: options.region, - method: 'DELETE', - path: `/gateways/${options.gatewayId}/targets/${options.targetId}`, - }); - - // Wait for target to be fully deleted before returning. - // Gateway deletion fails if targets still exist in DELETING state. - const timeoutMs = 60_000; - const startTime = Date.now(); - let delayMs = 2_000; - - while (Date.now() - startTime < timeoutMs) { - try { - await getHttpGatewayTarget({ - region: options.region, - gatewayId: options.gatewayId, - targetId: options.targetId, - }); - // Target still exists — keep waiting - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - if (msg.includes('(404)') || msg.includes('not found')) { - return { success: true }; // Target confirmed deleted - } - // Transient error — keep polling - } - - const remaining = timeoutMs - (Date.now() - startTime); - if (remaining <= 0) break; - await new Promise(resolve => setTimeout(resolve, Math.min(delayMs, remaining))); - delayMs = Math.min(delayMs * 2, 8_000); - } - - // Polling timed out — target may still be deleting - return { success: false, error: `Timed out waiting for target ${options.targetId} to be fully deleted` }; - } catch (err) { - return { success: false, error: err instanceof Error ? err.message : String(err) }; - } -} - -export async function deleteHttpGateway( - options: DeleteHttpGatewayOptions -): Promise<{ success: boolean; error?: string }> { - try { - await signedRequest({ - region: options.region, - method: 'DELETE', - path: `/gateways/${options.gatewayId}`, - }); - return { success: true }; - } catch (err) { - return { success: false, error: err instanceof Error ? err.message : String(err) }; - } -} - -/** Terminal states that indicate a resource will never become READY. */ -const TERMINAL_FAILURE_STATES = ['FAILED', 'CREATE_FAILED', 'UPDATE_FAILED', 'DELETING', 'DELETED'] as const; - -export async function waitForGatewayReady(options: { - region: string; - gatewayId: string; - timeoutMs?: number; -}): Promise { - const timeoutMs = options.timeoutMs ?? 120_000; - const startTime = Date.now(); - let delayMs = 2_000; - - while (Date.now() - startTime < timeoutMs) { - const gateway = await getHttpGateway({ - region: options.region, - gatewayId: options.gatewayId, - }); - - if (gateway.status === 'READY') return gateway; - - if ((TERMINAL_FAILURE_STATES as readonly string[]).includes(gateway.status)) { - throw new Error( - `Gateway ${options.gatewayId} reached terminal state '${gateway.status}' and will not become READY` - ); - } - - const remaining = timeoutMs - (Date.now() - startTime); - if (remaining <= 0) break; - - await new Promise(resolve => setTimeout(resolve, Math.min(delayMs, remaining))); - delayMs = Math.min(delayMs * 2, 16_000); - } - - throw new Error( - `Timed out waiting for gateway ${options.gatewayId} to become READY after ${Math.round(timeoutMs / 1000)}s` - ); -} - -export async function waitForTargetReady(options: WaitForTargetReadyOptions): Promise { - const timeoutMs = options.timeoutMs ?? 120_000; - const startTime = Date.now(); - let delayMs = 2_000; - - while (Date.now() - startTime < timeoutMs) { - let target; - try { - target = await getHttpGatewayTarget({ - region: options.region, - gatewayId: options.gatewayId, - targetId: options.targetId, - }); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - if (msg.includes('(404)')) { - throw new Error( - `Target ${options.targetId} not found during readiness poll — it may have been deleted externally` - ); - } - // Retry on transient server errors - if (/\(5\d\d\)/.test(msg)) { - // Continue polling — transient error - const remaining = timeoutMs - (Date.now() - startTime); - if (remaining <= 0) break; - await new Promise(resolve => setTimeout(resolve, delayMs)); - delayMs = Math.min(delayMs * 2, 16_000); - continue; - } - throw err; - } - - if (target.status === 'READY') return target; - - if ((TERMINAL_FAILURE_STATES as readonly string[]).includes(target.status)) { - throw new Error( - `Target ${options.targetId} in gateway ${options.gatewayId} reached terminal state '${target.status}' and will not become READY` - ); - } - - const remaining = timeoutMs - (Date.now() - startTime); - if (remaining <= 0) break; - - await new Promise(resolve => setTimeout(resolve, Math.min(delayMs, remaining))); - delayMs = Math.min(delayMs * 2, 16_000); - } - - throw new Error( - `Timed out waiting for target ${options.targetId} to become READY after ${Math.round(timeoutMs / 1000)}s` - ); -} diff --git a/src/cli/aws/agentcore-payments.ts b/src/cli/aws/agentcore-payments.ts index 42a07c343..bbd9d6272 100644 --- a/src/cli/aws/agentcore-payments.ts +++ b/src/cli/aws/agentcore-payments.ts @@ -5,7 +5,7 @@ * because the Payment APIs are not yet in the SDK client. */ import { getCredentialProvider } from './account'; -import { serviceEndpoint } from './partition'; +import { controlPlaneEndpoint, dataPlaneEndpoint } from './stage-endpoint'; import { Sha256 } from '@aws-crypto/sha256-js'; import { defaultProvider } from '@aws-sdk/credential-provider-node'; import { HttpRequest } from '@smithy/protocol-http'; @@ -82,13 +82,6 @@ interface PaymentManagerDetail { // HTTP signing helper // ============================================================================ -function getControlPlaneEndpoint(region: string): string { - const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); - if (stage === 'beta') return `https://beta.${region}.elcapcp.genesis-primitives.aws.dev`; - if (stage === 'gamma') return `https://gamma.${region}.elcapcp.genesis-primitives.aws.dev`; - return `https://${serviceEndpoint('bedrock-agentcore-control', region)}`; -} - async function signedRequest(options: { region: string; method: string; @@ -96,7 +89,7 @@ async function signedRequest(options: { body?: string; }): Promise { const { region, method, path, body } = options; - const endpoint = getControlPlaneEndpoint(region); + const endpoint = controlPlaneEndpoint(region); const url = new URL(path, endpoint); const query: Record = {}; @@ -317,13 +310,6 @@ export async function getPaymentManager(options: GetPaymentManagerOptions): Prom // Data Plane Operations (Payment Sessions) // ============================================================================ -function getDataPlaneEndpoint(region: string): string { - const stage = process.env.AGENTCORE_STAGE?.toLowerCase(); - if (stage === 'beta') return `https://beta.${region}.elcapdp.genesis-primitives.aws.dev`; - if (stage === 'gamma') return `https://gamma.${region}.elcapdp.genesis-primitives.aws.dev`; - return `https://${serviceEndpoint('bedrock-agentcore', region)}`; -} - async function signedDataPlaneRequest(options: { region: string; method: string; @@ -332,7 +318,7 @@ async function signedDataPlaneRequest(options: { extraHeaders?: Record; }): Promise { const { region, method, path, body, extraHeaders } = options; - const endpoint = getDataPlaneEndpoint(region); + const endpoint = dataPlaneEndpoint(region); const url = new URL(path, endpoint); const query: Record = {}; diff --git a/src/cli/aws/agentcore-recommendation.ts b/src/cli/aws/agentcore-recommendation.ts index 88ae0473a..2cdbea000 100644 --- a/src/cli/aws/agentcore-recommendation.ts +++ b/src/cli/aws/agentcore-recommendation.ts @@ -15,6 +15,7 @@ * StartRecommendation, poll via GetRecommendation, and stop via * DeleteRecommendation (stop-via-delete pattern). */ +import { JobNotFoundError } from '../../lib'; import { getCredentialProvider } from './account'; import { dataPlaneEndpoint } from './stage-endpoint'; import { Sha256 } from '@aws-crypto/sha256-js'; @@ -54,7 +55,7 @@ export interface SessionSpan { spanId: string; } -/** Agent trace source — inline spans or CloudWatch Logs. */ +/** Agent trace source — inline spans, CloudWatch Logs, or batch evaluation. */ export interface AgentTracesSource { sessionSpans?: SessionSpan[]; cloudwatchLogs?: { @@ -65,6 +66,9 @@ export interface AgentTracesSource { limit?: number; sessionIds?: string[]; }; + batchEvaluation?: { + batchEvaluationArn: string; + }; } /** Evaluation config — exactly one evaluator as objective signal (API constraint: min 1, max 1). */ @@ -112,6 +116,7 @@ export interface RecommendationResultConfigurationBundle { export interface SystemPromptRecommendationResult { recommendedSystemPrompt?: string; configurationBundle?: RecommendationResultConfigurationBundle; + explanation?: string; errorCode?: string; errorMessage?: string; } @@ -119,6 +124,7 @@ export interface SystemPromptRecommendationResult { export interface ToolDescriptionRecommendationToolResult { toolName: string; recommendedToolDescription: string; + explanation?: string; } export interface ToolDescriptionRecommendationResult { @@ -265,7 +271,11 @@ async function signedRequest(options: { if (!response.ok) { const errorBody = await response.text(); - throw new Error(`Recommendation API error (${response.status}): ${errorBody} [requestId: ${requestId}]`); + const message = `Recommendation API error (${response.status}): ${errorBody} [requestId: ${requestId}]`; + if (response.status === 404) { + throw new JobNotFoundError(message, { errorSource: 'service' }); + } + throw new Error(message); } if (response.status === 204) return { data: {}, status: 204, requestId }; diff --git a/src/cli/aws/agentcore.ts b/src/cli/aws/agentcore.ts index ae7155d53..b91df356d 100644 --- a/src/cli/aws/agentcore.ts +++ b/src/cli/aws/agentcore.ts @@ -115,16 +115,22 @@ export function parseSSELine(line: string): { content: string | null; error: str if (!line.startsWith('data: ')) { return { content: null, error: null }; } - const content = line.slice(6); + const raw = line.slice(6); try { - const parsed: unknown = JSON.parse(content); + const parsed: unknown = JSON.parse(raw); if (typeof parsed === 'string') { return { content: parsed, error: null }; } else if (parsed && typeof parsed === 'object' && 'error' in parsed) { return { content: null, error: String((parsed as { error: unknown }).error) }; } + // ConverseStream-shaped event: extract text delta + const event = (parsed as { event?: { contentBlockDelta?: { delta?: { text?: string } } } })?.event; + const text = event?.contentBlockDelta?.delta?.text; + if (typeof text === 'string') { + return { content: text, error: null }; + } } catch { - return { content, error: null }; + return { content: raw, error: null }; } return { content: null, error: null }; } diff --git a/src/cli/aws/bedrock-agent.ts b/src/cli/aws/bedrock-agent.ts new file mode 100644 index 000000000..326adc197 --- /dev/null +++ b/src/cli/aws/bedrock-agent.ts @@ -0,0 +1,182 @@ +import { getCredentialProvider } from './account'; +import { + BedrockAgentClient, + type DataSource, + type DataSourceSummary, + GetDataSourceCommand, + GetIngestionJobCommand, + GetKnowledgeBaseCommand, + type IngestionJob, + type IngestionJobSummary, + type KnowledgeBase, + ListDataSourcesCommand, + ListIngestionJobsCommand, + StartIngestionJobCommand, +} from '@aws-sdk/client-bedrock-agent'; + +/** + * Region-scoped factory. Each call returns a fresh client; we don't pool because + * the CLI is a one-shot process and connection reuse provides marginal benefit. + */ +function makeClient(region: string): BedrockAgentClient { + return new BedrockAgentClient({ region, credentials: getCredentialProvider() }); +} + +function isNotFound(err: unknown): boolean { + if (!err || typeof err !== 'object') return false; + const name = (err as { name?: string }).name; + return name === 'ResourceNotFoundException' || name === 'NotFoundException'; +} + +export interface KnowledgeBaseLookup { + region: string; + knowledgeBaseId: string; +} + +/** + * Fetch a knowledge base by ID. Returns null if the KB doesn't exist; rethrows + * any other error so the caller can decide how to surface it. + */ +export async function getKnowledgeBase(opts: KnowledgeBaseLookup): Promise { + const client = makeClient(opts.region); + try { + const response = await client.send(new GetKnowledgeBaseCommand({ knowledgeBaseId: opts.knowledgeBaseId })); + return response.knowledgeBase ?? null; + } catch (err) { + if (isNotFound(err)) return null; + throw err; + } +} + +export interface DataSourceLookup extends KnowledgeBaseLookup { + dataSourceId: string; +} + +export async function getDataSource(opts: DataSourceLookup): Promise { + const client = makeClient(opts.region); + try { + const response = await client.send( + new GetDataSourceCommand({ knowledgeBaseId: opts.knowledgeBaseId, dataSourceId: opts.dataSourceId }) + ); + return response.dataSource ?? null; + } catch (err) { + if (isNotFound(err)) return null; + throw err; + } +} + +/** + * List ingestion jobs for a (KB, DS) pair. Returns an empty array if the + * (KB, DS) pair doesn't exist or has no jobs. + * + * Paginates through every page (loops until `nextToken` is undefined) so the + * caller sees the full job history; the service caps a single page at 100 + * summaries by default, and a busy KB can have far more than that. + * + * Accepts an optional pre-built client so callers (e.g. {@link getLatestIngestionJob}) + * can avoid re-resolving credentials and re-establishing the TCP session for + * follow-up calls in the same chain. + */ +export async function listIngestionJobs( + opts: DataSourceLookup, + client: BedrockAgentClient = makeClient(opts.region) +): Promise { + const summaries: IngestionJobSummary[] = []; + let nextToken: string | undefined; + try { + do { + const response = await client.send( + new ListIngestionJobsCommand({ + knowledgeBaseId: opts.knowledgeBaseId, + dataSourceId: opts.dataSourceId, + nextToken, + }) + ); + if (response.ingestionJobSummaries) { + summaries.push(...response.ingestionJobSummaries); + } + nextToken = response.nextToken; + } while (nextToken); + return summaries; + } catch (err) { + if (isNotFound(err)) return []; + throw err; + } +} + +/** + * List the data sources attached to a knowledge base. Used post-deploy to + * resolve data-source IDs once CFN has settled (the L3 emits per-KB outputs + * but not per-DS outputs, so we look them up by listing the KB's children). + * + * Bedrock paginates this API; this function exhausts every page so callers + * never see a partial DS list. + * + * Returns an empty array if the KB doesn't exist or has no DSes. + */ +export async function listDataSources(opts: KnowledgeBaseLookup): Promise { + const client = makeClient(opts.region); + const summaries: DataSourceSummary[] = []; + let nextToken: string | undefined; + try { + do { + const response = await client.send( + new ListDataSourcesCommand({ knowledgeBaseId: opts.knowledgeBaseId, nextToken }) + ); + if (response.dataSourceSummaries) { + summaries.push(...response.dataSourceSummaries); + } + nextToken = response.nextToken; + } while (nextToken); + return summaries; + } catch (err) { + if (isNotFound(err)) return []; + throw err; + } +} + +/** + * Start a fresh ingestion job for a (KB, DS) pair. Surfaces all service errors + * verbatim — the caller decides how to format user-facing messages (e.g. via + * IngestionError). + */ +export async function startIngestionJob(opts: DataSourceLookup): Promise { + const client = makeClient(opts.region); + const response = await client.send( + new StartIngestionJobCommand({ knowledgeBaseId: opts.knowledgeBaseId, dataSourceId: opts.dataSourceId }) + ); + if (!response.ingestionJob) { + throw new Error('StartIngestionJob succeeded but returned no ingestion job in the response'); + } + return response.ingestionJob; +} + +/** + * List ingestion jobs and fetch the most recently started one's full details. + * Returns null if no jobs have ever run for this DS. + * + * Reuses a single BedrockAgentClient across the list + get calls so we don't + * resolve credentials twice for one logical lookup. + */ +export async function getLatestIngestionJob(opts: DataSourceLookup): Promise { + const client = makeClient(opts.region); + const summaries = await listIngestionJobs(opts, client); + if (summaries.length === 0) return null; + + const latest = summaries.reduce((best, current) => { + const bestStarted = best.startedAt?.getTime() ?? 0; + const currentStarted = current.startedAt?.getTime() ?? 0; + return currentStarted > bestStarted ? current : best; + }); + + if (!latest.ingestionJobId) return null; + + const response = await client.send( + new GetIngestionJobCommand({ + knowledgeBaseId: opts.knowledgeBaseId, + dataSourceId: opts.dataSourceId, + ingestionJobId: latest.ingestionJobId, + }) + ); + return response.ingestionJob ?? null; +} diff --git a/src/cli/aws/index.ts b/src/cli/aws/index.ts index 2a143628d..09851e678 100644 --- a/src/cli/aws/index.ts +++ b/src/cli/aws/index.ts @@ -28,30 +28,19 @@ export { } from './policy-generation'; export { AgentCoreApiClient, AgentCoreApiError, type ApiClientOptions, type ApiPlane } from './api-client'; export { - createHarness, getHarness, - updateHarness, deleteHarness, - listHarnesses, - listAllHarnesses, invokeHarness, type Harness, - type HarnessSummary, type HarnessStatus, type HarnessStreamEvent, type HarnessStopReason, type TokenUsage, type StreamMetrics, - type CreateHarnessOptions, - type CreateHarnessResult, type GetHarnessOptions, type GetHarnessResult, - type UpdateHarnessOptions, - type UpdateHarnessResult, type DeleteHarnessOptions, type DeleteHarnessResult, - type ListHarnessesOptions, - type ListHarnessesResult, type InvokeHarnessOptions, } from './agentcore-harness'; export { @@ -71,6 +60,8 @@ export { mcpInitSession, mcpListTools, mcpCallTool, + parseSSE, + extractResult, stopRuntimeSession, type ExecuteBashOptions, type ExecuteBashResult, diff --git a/src/cli/cdk/toolkit-lib/wrapper.ts b/src/cli/cdk/toolkit-lib/wrapper.ts index 50a05a307..0ba5a474e 100644 --- a/src/cli/cdk/toolkit-lib/wrapper.ts +++ b/src/cli/cdk/toolkit-lib/wrapper.ts @@ -1,6 +1,7 @@ import { CONFIG_DIR } from '../../../lib'; import { CDK_APP_ENTRY, CDK_PROJECT_DIR } from '../../constants'; import { isChangesetInProgressError } from '../../errors'; +import { isPreviewEnabled } from '../../feature-flags'; import type { CdkToolkitWrapperOptions, DeployOptions, DestroyOptions, DiffOptions, ListOptions } from './types'; import { BaseCredentials, @@ -107,9 +108,18 @@ export class CdkToolkitWrapper { sdkConfig, }); + // The vended CDK app (dist/bin/cdk.js) runs as a child process and cannot see the + // build-time `__PREVIEW__` define baked into this CLI bundle. Pass the preview state + // through the child env so bin/cdk.ts can gate preview-only resources (e.g. harnesses) + // off when preview is disabled. The toolkit overlays this on top of process.env, so + // PATH/AWS_PROFILE are preserved; env must be unconditional so the flag is never + // dropped when no region override is present (absent flag defaults to off). this.cloudAssemblySource = await this.toolkit.fromCdkApp(this.getCdkAppCommand(), { workingDirectory: this.projectDir, - ...(region && { env: { AWS_REGION: region, AWS_DEFAULT_REGION: region } }), + env: { + AGENTCORE_PREVIEW: isPreviewEnabled() ? '1' : '0', + ...(region && { AWS_REGION: region, AWS_DEFAULT_REGION: region }), + }, }); }); } diff --git a/src/cli/cli.ts b/src/cli/cli.ts index 38c87cf15..f7aa35f3b 100644 --- a/src/cli/cli.ts +++ b/src/cli/cli.ts @@ -1,8 +1,9 @@ import { getOrCreateInstallationId } from '../lib/schemas/io/global-config'; -import { registerABTestCommand } from './commands/abtest'; import { registerAdd } from './commands/add'; +import { registerAddSkill } from './commands/add/skill-command'; import { registerAddTool } from './commands/add/tool-command'; import { registerArchive } from './commands/archive'; +import { registerBatchEvaluations } from './commands/batch-evaluations'; import { registerConfig } from './commands/config'; import { registerConfigBundle } from './commands/config-bundle'; import { registerCreate } from './commands/create'; @@ -11,6 +12,7 @@ import { registerDeploy } from './commands/deploy'; import { registerDev } from './commands/dev'; import { registerEval } from './commands/eval'; import { registerExec } from './commands/exec'; +import { registerExport } from './commands/export'; import { registerFeedback } from './commands/feedback'; import { registerFetch } from './commands/fetch'; import { registerHelp } from './commands/help'; @@ -18,9 +20,10 @@ import { registerImport } from './commands/import'; import { registerInvoke } from './commands/invoke'; import { registerLogs } from './commands/logs'; import { registerPackage } from './commands/package'; -import { registerPause, registerPromote } from './commands/pause'; -import { registerRecommendations } from './commands/recommendations'; +import { registerPause } from './commands/pause'; +import { registerPromote } from './commands/promote'; import { registerRemove } from './commands/remove'; +import { registerRemoveSkill } from './commands/remove/skill-command'; import { registerRemoveTool } from './commands/remove/tool-command'; import { registerResume } from './commands/resume'; import { registerRun } from './commands/run'; @@ -30,6 +33,7 @@ import { registerTelemetry } from './commands/telemetry'; import { registerTraces } from './commands/traces'; import { registerUpdate } from './commands/update'; import { registerValidate } from './commands/validate'; +import { registerView } from './commands/view'; import { COMMAND_DESCRIPTIONS, PACKAGE_VERSION } from './constants'; import { isPreviewEnabled } from './feature-flags'; import { printPostCommandNotices, printTelemetryNotice } from './notices'; @@ -102,7 +106,8 @@ export function registerCommands(program: Command) { registerLogs(program); registerPackage(program); registerPause(program); - registerRecommendations(program); + registerView(program); + registerBatchEvaluations(program); const removeCmd = registerRemove(program); registerResume(program); registerRun(program); @@ -117,6 +122,10 @@ export function registerCommands(program: Command) { registerConfig(program); registerDataset(program); registerArchive(program); + // Register export command (preview-only) + if (isPreviewEnabled()) { + registerExport(program); + } // Register primitive subcommands (add agent, remove agent, add memory, etc.) for (const primitive of ALL_PRIMITIVES) { @@ -127,10 +136,9 @@ export function registerCommands(program: Command) { if (isPreviewEnabled()) { registerAddTool(addCmd); registerRemoveTool(removeCmd); + registerAddSkill(addCmd); + registerRemoveSkill(removeCmd); } - - // Register AB test detail command - registerABTestCommand(program); } export const main = async (argv: string[]) => { diff --git a/src/cli/cloudformation/__tests__/outputs-config-bundles.test.ts b/src/cli/cloudformation/__tests__/outputs-config-bundles.test.ts new file mode 100644 index 000000000..b6fb7d800 --- /dev/null +++ b/src/cli/cloudformation/__tests__/outputs-config-bundles.test.ts @@ -0,0 +1,82 @@ +import { parseConfigBundleOutputs } from '../outputs'; +import { describe, expect, it } from 'vitest'; + +describe('parseConfigBundleOutputs', () => { + it('parses BundleId, BundleArn, and VersionId from stack outputs', () => { + const outputs = { + ApplicationConfigBundleMyBundleIdOutputABC123: 'myBundle-abc123def', + ApplicationConfigBundleMyBundleArnOutputDEF456: + 'arn:aws:bedrock-agentcore:us-west-2:123456789012:configuration-bundle/myBundle-abc123def', + ApplicationConfigBundleMyBundleVersionIdOutput789: 'a1b2c3d4-e5f6-7890-abcd-ef1234567890', + }; + + const result = parseConfigBundleOutputs(outputs, ['MyBundle']); + + expect(result).toEqual({ + MyBundle: { + bundleId: 'myBundle-abc123def', + bundleArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:configuration-bundle/myBundle-abc123def', + versionId: 'a1b2c3d4-e5f6-7890-abcd-ef1234567890', + }, + }); + }); + + it('parses multiple config bundles', () => { + const outputs = { + ApplicationConfigBundleFirstIdOutputAAA: 'first-id', + ApplicationConfigBundleFirstArnOutputBBB: 'arn:first', + ApplicationConfigBundleFirstVersionIdOutputCCC: 'version-1', + ApplicationConfigBundleSecondIdOutputDDD: 'second-id', + ApplicationConfigBundleSecondArnOutputEEE: 'arn:second', + ApplicationConfigBundleSecondVersionIdOutputFFF: 'version-2', + }; + + const result = parseConfigBundleOutputs(outputs, ['First', 'Second']); + + expect(Object.keys(result)).toHaveLength(2); + expect(result.First!.bundleId).toBe('first-id'); + expect(result.Second!.bundleId).toBe('second-id'); + }); + + it('skips bundle when Id output is missing', () => { + const outputs = { + ApplicationConfigBundleMyBundleArnOutputDEF: 'arn:test', + ApplicationConfigBundleMyBundleVersionIdOutput123: 'v1', + }; + + const result = parseConfigBundleOutputs(outputs, ['MyBundle']); + + expect(result).toEqual({}); + }); + + it('skips bundle when VersionId output is missing', () => { + const outputs = { + ApplicationConfigBundleMyBundleIdOutputABC: 'id-123', + ApplicationConfigBundleMyBundleArnOutputDEF: 'arn:test', + }; + + const result = parseConfigBundleOutputs(outputs, ['MyBundle']); + + expect(result).toEqual({}); + }); + + it('returns empty record when no matching outputs exist', () => { + const outputs = { + ApplicationAgentMyAgentRuntimeIdOutputXYZ: 'rt-123', + }; + + const result = parseConfigBundleOutputs(outputs, ['MyBundle']); + + expect(result).toEqual({}); + }); + + it('returns empty record for empty bundle names list', () => { + const outputs = { + ApplicationConfigBundleMyBundleIdOutputABC: 'id-123', + }; + + const result = parseConfigBundleOutputs(outputs, []); + + expect(result).toEqual({}); + }); +}); diff --git a/src/cli/cloudformation/__tests__/outputs-extended.test.ts b/src/cli/cloudformation/__tests__/outputs-extended.test.ts index 1f48faa96..f3d06fe58 100644 --- a/src/cli/cloudformation/__tests__/outputs-extended.test.ts +++ b/src/cli/cloudformation/__tests__/outputs-extended.test.ts @@ -213,6 +213,60 @@ describe('buildDeployedState', () => { expect(state.targets.default!.resources?.stackName).toBe('NewStack'); }); + describe('abTests carry-forward', () => { + const existingWithABTests = { + targets: { + default: { + resources: { + runtimes: {}, + stackName: 'Stack', + abTests: { + live_test: { abTestId: 'ab-live', abTestArn: 'arn:ab-live' }, + stale_test: { abTestId: 'ab-stale', abTestArn: 'arn:ab-stale' }, + }, + }, + }, + }, + }; + + it('carries forward all abTests when abTestNames is omitted (legacy behavior)', () => { + const state = buildDeployedState({ + targetName: 'default', + stackName: 'Stack', + agents: {}, + gateways: {}, + existingState: existingWithABTests, + }); + expect(state.targets.default!.resources?.abTests).toEqual(existingWithABTests.targets.default.resources.abTests); + }); + + it('prunes abTests not present in the current project spec', () => { + const state = buildDeployedState({ + targetName: 'default', + stackName: 'Stack', + agents: {}, + gateways: {}, + existingState: existingWithABTests, + abTestNames: ['live_test'], + }); + expect(state.targets.default!.resources?.abTests).toEqual({ + live_test: { abTestId: 'ab-live', abTestArn: 'arn:ab-live' }, + }); + }); + + it('omits abTests entirely when none of the existing tests remain in the spec', () => { + const state = buildDeployedState({ + targetName: 'default', + stackName: 'Stack', + agents: {}, + gateways: {}, + existingState: existingWithABTests, + abTestNames: [], + }); + expect(state.targets.default!.resources?.abTests).toBeUndefined(); + }); + }); + it('includes identityKmsKeyArn when provided', () => { const state = buildDeployedState({ targetName: 'default', diff --git a/src/cli/cloudformation/__tests__/outputs-harness.test.ts b/src/cli/cloudformation/__tests__/outputs-harness.test.ts new file mode 100644 index 000000000..b97332c12 --- /dev/null +++ b/src/cli/cloudformation/__tests__/outputs-harness.test.ts @@ -0,0 +1,138 @@ +import type { DeployedState, HarnessDeployedState } from '../../../schema'; +import { toPascalId } from '../logical-ids'; +import { buildDeployedState, parseHarnessOutputs } from '../outputs'; +import { describe, expect, it, vi } from 'vitest'; + +/** Silent onWarn sink for cases where the warning is not under test. */ +const noWarn = vi.fn(); + +/** Build the four CDK output keys for a harness, mirroring the L3's output naming. */ +function harnessOutputs( + name: string, + overrides: Partial> = {} +) { + const p = toPascalId('Harness', name); + const out: Record = {}; + const def = { + Id: `h-${name}`, + Arn: `arn:aws:bedrock-agentcore:us-west-2:111122223333:harness/h-${name}`, + Status: 'READY', + RoleRoleArn: `arn:aws:iam::111122223333:role/${name}`, + AgentRuntimeArn: `arn:aws:bedrock-agentcore:us-west-2:111122223333:runtime/rt-${name}`, + }; + const merged = { ...def, ...overrides }; + for (const [seg, val] of Object.entries(merged)) { + if (val === undefined) continue; + out[`Application${p}${seg}Output${seg}Hash`] = val; + } + return out; +} + +describe('parseHarnessOutputs', () => { + it('parses a complete harness into a cloudformation-marked record', () => { + const result = parseHarnessOutputs(harnessOutputs('h1'), ['h1'], noWarn); + expect(result.h1).toMatchObject({ + harnessId: 'h-h1', + status: 'READY', + provisioner: 'cloudformation', + }); + expect(result.h1!.agentRuntimeArn).toContain('runtime/rt-h1'); + }); + + it('warns and skips when a harness produced no outputs', () => { + const onWarn = vi.fn(); + const result = parseHarnessOutputs({}, ['ghost'], onWarn); + expect(result.ghost).toBeUndefined(); + expect(onWarn).toHaveBeenCalledTimes(1); + expect(onWarn.mock.calls[0]![0]).toContain('produced no CloudFormation outputs'); + }); + + it('warns naming the missing key when a harness is partially emitted', () => { + const onWarn = vi.fn(); + // Drop the RoleRoleArn output → partial. + const outputs = harnessOutputs('h1', { RoleRoleArn: undefined }); + const result = parseHarnessOutputs(outputs, ['h1'], onWarn); + expect(result.h1).toBeUndefined(); + expect(onWarn).toHaveBeenCalledTimes(1); + expect(onWarn.mock.calls[0]![0]).toContain('RoleArn'); + }); + + it('defaults onWarn to console.warn without throwing', () => { + const spy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + parseHarnessOutputs({}, ['ghost']); + expect(spy).toHaveBeenCalled(); + spy.mockRestore(); + }); +}); + +describe('buildDeployedState — harness carry-forward', () => { + const orphan: HarnessDeployedState = { + harnessId: 'h-orphan', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:harness/h-orphan', + roleArn: 'arn:aws:iam::111122223333:role/orphan', + status: 'READY', + // no provisioner marker → orphan + }; + const markedExisting: HarnessDeployedState = { + harnessId: 'h-old-cfn', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:harness/h-old-cfn', + roleArn: 'arn:aws:iam::111122223333:role/oldcfn', + status: 'READY', + provisioner: 'cloudformation', + }; + + function existingStateWith(harnesses: Record): DeployedState { + return { targets: { default: { resources: { stackName: 'S', harnesses } } } }; + } + + it('preserves an existing orphan that the current outputs do not cover', () => { + const result = buildDeployedState({ + targetName: 'default', + stackName: 'S', + agents: {}, + gateways: {}, + existingState: existingStateWith({ legacy: orphan }), + harnesses: parseHarnessOutputs(harnessOutputs('h1'), ['h1'], noWarn), + }); + const harnesses = result.targets.default!.resources?.harnesses ?? {}; + expect(harnesses.legacy).toMatchObject({ harnessId: 'h-orphan' }); + expect(harnesses.h1).toMatchObject({ provisioner: 'cloudformation' }); + }); + + it('lets a freshly-parsed CFN record win on key conflict (carries the marker)', () => { + const result = buildDeployedState({ + targetName: 'default', + stackName: 'S', + agents: {}, + gateways: {}, + existingState: existingStateWith({ h1: orphan }), + harnesses: parseHarnessOutputs(harnessOutputs('h1'), ['h1'], noWarn), + }); + const h1 = result.targets.default!.resources?.harnesses?.h1; + expect(h1?.provisioner).toBe('cloudformation'); + expect(h1?.harnessId).toBe('h-h1'); + }); + + it('drops a previously CFN-managed harness that is no longer in the outputs (CFN deleted it)', () => { + const result = buildDeployedState({ + targetName: 'default', + stackName: 'S', + agents: {}, + gateways: {}, + existingState: existingStateWith({ removed: markedExisting }), + harnesses: {}, + }); + expect(result.targets.default!.resources?.harnesses).toBeUndefined(); + }); + + it('does not create a harnesses key when there are neither outputs nor existing orphans', () => { + const result = buildDeployedState({ + targetName: 'default', + stackName: 'S', + agents: {}, + gateways: {}, + harnesses: {}, + }); + expect(result.targets.default!.resources?.harnesses).toBeUndefined(); + }); +}); diff --git a/src/cli/cloudformation/__tests__/outputs.test.ts b/src/cli/cloudformation/__tests__/outputs.test.ts index 24f39b451..208ad95fc 100644 --- a/src/cli/cloudformation/__tests__/outputs.test.ts +++ b/src/cli/cloudformation/__tests__/outputs.test.ts @@ -1,11 +1,12 @@ import { buildDeployedState, parseGatewayOutputs, + parseHarnessOutputs, parseMemoryOutputs, parsePolicyEngineOutputs, parsePolicyOutputs, } from '../outputs'; -import { describe, expect, it } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; describe('buildDeployedState', () => { it('persists identityKmsKeyArn when provided', () => { @@ -292,6 +293,34 @@ describe('parseMemoryOutputs', () => { }); }); +describe('parseHarnessOutputs', () => { + const completeOutputs = (name: string, extra: Record = {}) => ({ + [`ApplicationHarness${name}IdOutputAAA`]: `h-${name}`, + [`ApplicationHarness${name}ArnOutputBBB`]: `arn:aws:bedrock-agentcore:us-east-1:1:harness/h-${name}`, + [`ApplicationHarness${name}StatusOutputCCC`]: 'READY', + [`ApplicationHarness${name}RoleRoleArnOutputDDD`]: 'arn:aws:iam::1:role/r', + ...extra, + }); + + it('captures harnessVersion from the Version output', () => { + const outputs = completeOutputs('Support', { ApplicationHarnessSupportVersionOutputEEE: '3' }); + const result = parseHarnessOutputs(outputs, ['Support'], vi.fn()); + expect(result.Support?.harnessVersion).toBe(3); + }); + + it('leaves harnessVersion unset when no Version output is present (legacy stack)', () => { + const result = parseHarnessOutputs(completeOutputs('Support'), ['Support'], vi.fn()); + expect(result.Support).toBeDefined(); + expect(result.Support?.harnessVersion).toBeUndefined(); + }); + + it('ignores a non-numeric Version output', () => { + const outputs = completeOutputs('Support', { ApplicationHarnessSupportVersionOutputEEE: 'not-a-number' }); + const result = parseHarnessOutputs(outputs, ['Support'], vi.fn()); + expect(result.Support?.harnessVersion).toBeUndefined(); + }); +}); + describe('parsePolicyEngineOutputs', () => { it('extracts policy engine outputs matching pattern', () => { const outputs = { @@ -504,32 +533,21 @@ describe('buildDeployedState carry-forward', () => { }); }); - it('carries forward httpGateways from existing state', () => { - const existingState = { - targets: { - default: { - resources: { - stackName: 'TestStack', - httpGateways: { - MyHttpGw: { - gatewayId: 'hgw-456', - gatewayArn: 'arn:aws:bedrock:us-east-1:123456789012:http-gateway/hgw-456', - }, - }, - }, - }, - }, - }; - + it('populates resources.gateways from httpGateways parameter', () => { const result = buildDeployedState({ targetName: 'default', stackName: 'TestStack', agents: {}, gateways: {}, - existingState, + httpGateways: { + MyHttpGw: { + gatewayId: 'hgw-456', + gatewayArn: 'arn:aws:bedrock:us-east-1:123456789012:http-gateway/hgw-456', + }, + }, }); - expect(result.targets.default!.resources?.httpGateways).toEqual({ + expect(result.targets.default!.resources?.gateways).toEqual({ MyHttpGw: { gatewayId: 'hgw-456', gatewayArn: 'arn:aws:bedrock:us-east-1:123456789012:http-gateway/hgw-456', @@ -560,26 +578,15 @@ describe('buildDeployedState carry-forward', () => { expect(result.targets.default!.resources?.abTests).toBeUndefined(); }); - it('does not carry forward empty httpGateways', () => { - const existingState = { - targets: { - default: { - resources: { - stackName: 'TestStack', - httpGateways: {}, - }, - }, - }, - }; - + it('does not populate resources.gateways when httpGateways param is empty', () => { const result = buildDeployedState({ targetName: 'default', stackName: 'TestStack', agents: {}, gateways: {}, - existingState, + httpGateways: {}, }); - expect(result.targets.default!.resources?.httpGateways).toBeUndefined(); + expect(result.targets.default!.resources?.gateways).toBeUndefined(); }); }); diff --git a/src/cli/cloudformation/__tests__/parse-kb-outputs.test.ts b/src/cli/cloudformation/__tests__/parse-kb-outputs.test.ts new file mode 100644 index 000000000..92b18363d --- /dev/null +++ b/src/cli/cloudformation/__tests__/parse-kb-outputs.test.ts @@ -0,0 +1,69 @@ +import { parseKnowledgeBaseDataSourceOutputs, parseKnowledgeBaseOutputs } from '../outputs'; +import { describe, expect, it } from 'vitest'; + +describe('parseKnowledgeBaseOutputs', () => { + it('hydrates dataSources[] from per-DS CFN outputs (L3 #234+)', () => { + const outputs = { + ApplicationKnowledgeBaseProductDocsIdOutput06769C35: 'KB1', + ApplicationKnowledgeBaseProductDocsArnOutput9B6F9B44: 'arn:aws:bedrock:us-west-2:0:knowledge-base/KB1', + ApplicationKnowledgeBaseProductDocsDataSource0IdOutput750CF2FE: 'DS-A', + ApplicationKnowledgeBaseProductDocsDataSource0UriOutput07D6B66D: 's3://bucket-a/docs/', + ApplicationKnowledgeBaseProductDocsDataSource1IdOutput9DF50FA0: 'DS-B', + ApplicationKnowledgeBaseProductDocsDataSource1UriOutputAA112233: 's3://bucket-b/', + }; + const result = parseKnowledgeBaseOutputs(outputs, ['product-docs']); + expect(result['product-docs']).toEqual({ + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:aws:bedrock:us-west-2:0:knowledge-base/KB1', + dataSources: [ + { dataSourceId: 'DS-A', uri: 's3://bucket-a/docs/' }, + { dataSourceId: 'DS-B', uri: 's3://bucket-b/' }, + ], + }); + }); + + it('returns empty dataSources[] when per-DS outputs are absent (older L3)', () => { + const outputs = { + ApplicationKnowledgeBaseProductDocsIdOutput06769C35: 'KB1', + ApplicationKnowledgeBaseProductDocsArnOutput9B6F9B44: 'arn:x', + }; + const result = parseKnowledgeBaseOutputs(outputs, ['product-docs']); + expect(result['product-docs']?.dataSources).toEqual([]); + }); + + it('omits a KB whose Id/Arn outputs are missing entirely', () => { + const outputs = { + SomeOtherOutput: 'irrelevant', + }; + const result = parseKnowledgeBaseOutputs(outputs, ['product-docs']); + expect(result['product-docs']).toBeUndefined(); + }); +}); + +describe('parseKnowledgeBaseDataSourceOutputs', () => { + it('orders entries by index even when stack outputs come back unordered', () => { + const outputs = { + ApplicationKnowledgeBaseDocsDataSource2IdOutputAAAAAAAA: 'DS-2', + ApplicationKnowledgeBaseDocsDataSource0IdOutputBBBBBBBB: 'DS-0', + ApplicationKnowledgeBaseDocsDataSource1IdOutputCCCCCCCC: 'DS-1', + ApplicationKnowledgeBaseDocsDataSource0UriOutputDDDDDDDD: 's3://0/', + ApplicationKnowledgeBaseDocsDataSource1UriOutputEEEEEEEE: 's3://1/', + ApplicationKnowledgeBaseDocsDataSource2UriOutputFFFFFFFF: 's3://2/', + }; + expect(parseKnowledgeBaseDataSourceOutputs(outputs, 'docs')).toEqual([ + { dataSourceId: 'DS-0', uri: 's3://0/' }, + { dataSourceId: 'DS-1', uri: 's3://1/' }, + { dataSourceId: 'DS-2', uri: 's3://2/' }, + ]); + }); + + it('drops orphan entries (Id without Uri or vice versa)', () => { + const outputs = { + ApplicationKnowledgeBaseDocsDataSource0IdOutputAAAAAAAA: 'DS-0', + // no DataSource0UriOutput + ApplicationKnowledgeBaseDocsDataSource1IdOutputBBBBBBBB: 'DS-1', + ApplicationKnowledgeBaseDocsDataSource1UriOutputCCCCCCCC: 's3://1/', + }; + expect(parseKnowledgeBaseDataSourceOutputs(outputs, 'docs')).toEqual([{ dataSourceId: 'DS-1', uri: 's3://1/' }]); + }); +}); diff --git a/src/cli/cloudformation/outputs.ts b/src/cli/cloudformation/outputs.ts index 132aa7d0b..d8463a023 100644 --- a/src/cli/cloudformation/outputs.ts +++ b/src/cli/cloudformation/outputs.ts @@ -1,8 +1,11 @@ import type { AgentCoreDeployedState, + ConfigBundleDeployedState, DatasetDeployedState, DeployedState, EvaluatorDeployedState, + HarnessDeployedState, + KnowledgeBaseDeployedState, MemoryDeployedState, OnlineEvalDeployedState, PaymentDeployedState, @@ -42,16 +45,26 @@ export async function getStackOutputs(region: string, stackName: string): Promis * Parse stack outputs into deployed state for gateways. * * Output key pattern for gateways: - * Gateway{GatewayName}UrlOutput{Hash} + * Gateway{GatewayName}(Id|Arn|Url)Output{Hash} + * + * Output key pattern for gateway targets: + * GatewayTarget{TargetName}IdOutput{Hash} * * Examples: * - GatewayMyGatewayUrlOutput3E11FAB4 + * - GatewayTargetMyTargetIdOutputA1B2C3D4 */ export function parseGatewayOutputs( outputs: StackOutputs, gatewaySpecs: Record -): Record { - const gateways: Record = {}; +): Record< + string, + { gatewayId: string; gatewayArn: string; gatewayUrl?: string; targets?: Record } +> { + const gateways: Record< + string, + { gatewayId: string; gatewayArn: string; gatewayUrl?: string; targets?: Record } + > = {}; // Map PascalCase gateway names to original names for lookup const gatewayNames = Object.keys(gatewaySpecs); @@ -59,8 +72,23 @@ export function parseGatewayOutputs( // Match pattern: Gateway{Name}{Type}Output{Hash} const outputPattern = /^Gateway(.+?)(Id|Arn|Url)Output/; + // Match pattern: GatewayTarget{TargetName}IdOutput{Hash} + const targetOutputPattern = /^GatewayTarget(.+?)IdOutput/; + + // Collect target outputs separately + const targetOutputs: { logicalTarget: string; targetId: string }[] = []; for (const [key, value] of Object.entries(outputs)) { + // Check target pattern first (more specific) to avoid false matches with gateway pattern + const targetMatch = targetOutputPattern.exec(key); + if (targetMatch) { + const logicalTarget = targetMatch[1]; + if (logicalTarget) { + targetOutputs.push({ logicalTarget, targetId: value }); + } + continue; + } + const match = outputPattern.exec(key); if (!match) continue; @@ -82,6 +110,32 @@ export function parseGatewayOutputs( } } + // Associate target outputs with gateways + // Build a map from PascalCase target name to [gatewayName, originalTargetName] + const targetToGateway = new Map(); + for (const gwName of gatewayNames) { + const gwSpec = gatewaySpecs[gwName]; + if ( + gwSpec && + typeof gwSpec === 'object' && + 'targets' in gwSpec && + Array.isArray((gwSpec as { targets?: unknown[] }).targets) + ) { + for (const target of (gwSpec as { targets: { name: string }[] }).targets) { + targetToGateway.set(toPascalId(target.name), { gatewayName: gwName, targetName: target.name }); + } + } + } + + for (const { logicalTarget, targetId } of targetOutputs) { + const mapping = targetToGateway.get(logicalTarget); + const gwState = mapping ? gateways[mapping.gatewayName] : undefined; + if (mapping && gwState) { + gwState.targets ??= {}; + gwState.targets[mapping.targetName] = { targetId }; + } + } + return gateways; } @@ -214,6 +268,81 @@ export function parseMemoryOutputs(outputs: StackOutputs, memoryNames: string[]) return memories; } +/** + * Parse stack outputs into deployed state for knowledge bases. + * + * Output key patterns (L3 ≥ #234): + * ApplicationKnowledgeBase{Pascal}(Id|Arn)Output{Hash} + * ApplicationKnowledgeBase{Pascal}DataSource{N}(Id|Uri)Output{Hash} + * + * Per-DS outputs are how we map URI → deployed DS id deterministically. For + * stacks deployed against an older L3 that pre-dates those outputs, the map + * comes back empty — callers fall back to ListDataSources. + * + * `sourcesHash` is populated separately by the post-deploy step. + */ +export function parseKnowledgeBaseOutputs( + outputs: StackOutputs, + knowledgeBaseNames: string[] +): Record { + const knowledgeBases: Record = {}; + const outputKeys = Object.keys(outputs); + + for (const kbName of knowledgeBaseNames) { + const pascal = toPascalId('KnowledgeBase', kbName); + const idPrefix = `Application${pascal}IdOutput`; + const arnPrefix = `Application${pascal}ArnOutput`; + + const idKey = outputKeys.find(k => k.startsWith(idPrefix)); + const arnKey = outputKeys.find(k => k.startsWith(arnPrefix)); + + if (idKey && arnKey) { + knowledgeBases[kbName] = { + knowledgeBaseId: outputs[idKey]!, + knowledgeBaseArn: outputs[arnKey]!, + dataSources: parseKnowledgeBaseDataSourceOutputs(outputs, kbName), + }; + } + } + + return knowledgeBases; +} + +/** + * Parse the per-DataSource CFN outputs for a single KB into an ordered + * `[{dataSourceId, uri}]` array. Outputs are paired by index (DataSource{N}Id + * + DataSource{N}Uri) and sorted ascending by N so the result mirrors the + * local `dataSources[]` order from agentcore.json. + * + * Returns an empty array when no per-DS outputs are present (e.g. stack + * deployed against an older L3) — callers should fall back to a SDK listing. + */ +export function parseKnowledgeBaseDataSourceOutputs( + outputs: StackOutputs, + knowledgeBaseName: string +): { dataSourceId: string; uri: string }[] { + const pascal = toPascalId('KnowledgeBase', knowledgeBaseName); + const indexed = new Map(); + // Match `Application{Pascal}DataSource{N}IdOutput…` and `…UriOutput…`. + const pattern = new RegExp(`^Application${pascal}DataSource(\\d+)(Id|Uri)Output`); + + for (const [key, value] of Object.entries(outputs)) { + const match = pattern.exec(key); + if (!match) continue; + const idx = parseInt(match[1]!, 10); + const kind = match[2] as 'Id' | 'Uri'; + const slot = indexed.get(idx) ?? {}; + if (kind === 'Id') slot.dataSourceId = value; + else slot.uri = value; + indexed.set(idx, slot); + } + + return [...indexed.entries()] + .sort(([a], [b]) => a - b) + .map(([, slot]) => slot) + .filter((slot): slot is { dataSourceId: string; uri: string } => !!slot.dataSourceId && !!slot.uri); +} + /** * Parse stack outputs into deployed state for evaluators. * @@ -408,6 +537,103 @@ export function parseDatasetOutputs( return datasets; } +/** + * Parse CDK stack outputs for CFN-deployed harnesses into deployed-state records. + * + * The L3 AgentCoreApplication emits, per harness `${name}` (pascal = toPascalId('Harness', name)): + * ApplicationHarness{Pascal}{Id,Arn,Status,AgentRuntimeArn}Output + * and the execution role (AgentCoreHarnessRole) separately emits: + * ApplicationHarness{Pascal}RoleRoleArnOutput + * The 'Arn' harness prefix does not collide with 'RoleRoleArn' (next segment differs). + */ +export function parseHarnessOutputs( + outputs: StackOutputs, + harnessNames: string[], + onWarn: (message: string) => void = console.warn +): Record { + const harnesses: Record = {}; + const outputKeys = Object.keys(outputs); + + for (const harnessName of harnessNames) { + const pascal = toPascalId('Harness', harnessName); + const idKey = outputKeys.find(k => k.startsWith(`Application${pascal}IdOutput`)); + const arnKey = outputKeys.find(k => k.startsWith(`Application${pascal}ArnOutput`)); + const statusKey = outputKeys.find(k => k.startsWith(`Application${pascal}StatusOutput`)); + const runtimeArnKey = outputKeys.find(k => k.startsWith(`Application${pascal}AgentRuntimeArnOutput`)); + const roleArnKey = outputKeys.find(k => k.startsWith(`Application${pascal}RoleRoleArnOutput`)); + // Version is OPTIONAL: stacks deployed before the config-versioning change won't emit it, so it + // is never part of the required-set guard below — a missing Version just leaves harnessVersion unset. + const versionKey = outputKeys.find(k => k.startsWith(`Application${pascal}VersionOutput`)); + const versionRaw = versionKey ? outputs[versionKey] : undefined; + const harnessVersion = versionRaw !== undefined && /^[0-9]+$/.test(versionRaw) ? Number(versionRaw) : undefined; + + // Id/Arn/Status/RoleArn are required for a complete CDK-managed harness record. + if (idKey && arnKey && statusKey && roleArnKey) { + harnesses[harnessName] = { + harnessId: outputs[idKey]!, + harnessArn: outputs[arnKey]!, + status: outputs[statusKey]!, + roleArn: outputs[roleArnKey]!, + ...(harnessVersion !== undefined && { harnessVersion }), + ...(runtimeArnKey && { agentRuntimeArn: outputs[runtimeArnKey] }), + provisioner: 'cloudformation', + }; + continue; + } + + // A spec'd harness that produced incomplete (or no) outputs is dropped from + // deployed-state, which silently removes it from `status`/`invoke`. Surface + // the gap so a partially-emitted or missing harness leaves a trace rather + // than vanishing without explanation. + const missing = [!idKey && 'Id', !arnKey && 'Arn', !statusKey && 'Status', !roleArnKey && 'RoleArn'].filter( + (v): v is string => typeof v === 'string' + ); + if (missing.length === 4) { + onWarn( + `Harness "${harnessName}" produced no CloudFormation outputs; it will not appear in ` + + `\`agentcore status\` or be invocable until the next successful deploy.` + ); + } else { + onWarn( + `Harness "${harnessName}" is missing CloudFormation output(s): ${missing.join(', ')}. ` + + `Skipping it in deployed-state — it will not appear in \`agentcore status\` or be invocable. ` + + `Re-run \`agentcore deploy\`; if this persists, the harness stack output template may be malformed.` + ); + } + } + + return harnesses; +} + +export function parseConfigBundleOutputs( + outputs: StackOutputs, + bundleNames: string[] +): Record { + const bundles: Record = {}; + const outputKeys = Object.keys(outputs); + + for (const bundleName of bundleNames) { + const pascal = toPascalId('ConfigBundle', bundleName); + const idPrefix = `Application${pascal}IdOutput`; + const arnPrefix = `Application${pascal}ArnOutput`; + const versionPrefix = `Application${pascal}VersionIdOutput`; + + const idKey = outputKeys.find(k => k.startsWith(idPrefix)); + const arnKey = outputKeys.find(k => k.startsWith(arnPrefix)); + const versionKey = outputKeys.find(k => k.startsWith(versionPrefix)); + + if (idKey && arnKey && versionKey) { + bundles[bundleName] = { + bundleId: outputs[idKey]!, + bundleArn: outputs[arnKey]!, + versionId: outputs[versionKey]!, + }; + } + } + + return bundles; +} + /** * Strip underscores from a name to produce a valid CDK logical ID segment. * Must match the toCdkId() function in the vended cdk-stack.ts. @@ -480,6 +706,10 @@ export interface BuildDeployedStateOptions { stackName: string; agents: Record; gateways: Record; + httpGateways?: Record< + string, + { gatewayId: string; gatewayArn: string; gatewayUrl?: string; targets?: Record } + >; existingState?: DeployedState; identityKmsKeyArn?: string; credentials?: Record; @@ -489,20 +719,19 @@ export interface BuildDeployedStateOptions { policyEngines?: Record; policies?: Record; runtimeEndpoints?: Record; - harnesses?: Record< - string, - { - harnessId: string; - harnessArn: string; - roleArn: string; - status: string; - agentRuntimeArn?: string; - memoryArn?: string; - configHash?: string; - } - >; + harnesses?: Record; datasets?: Record; + configBundles?: Record; + knowledgeBases?: Record; payments?: Record; + /** + * Names of A/B tests currently declared in the project spec. AB test state is managed + * post-deploy (not via CFN outputs) and carried forward across deploys; passing the + * current spec names lets us prune entries for tests the user has since removed, so + * stale (e.g. preview) entries self-heal instead of lingering in deployed-state. + * If omitted, all existing AB test entries are carried forward unchanged. + */ + abTestNames?: string[]; } /** @@ -514,6 +743,7 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta stackName, agents, gateways, + httpGateways, existingState, identityKmsKeyArn, credentials, @@ -525,7 +755,10 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta runtimeEndpoints, harnesses, datasets, + configBundles, + knowledgeBases, payments, + abTestNames, } = opts; const targetState: TargetDeployedState = { resources: { @@ -545,6 +778,11 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta }; } + // Add HTTP gateway state if HTTP gateways exist + if (httpGateways && Object.keys(httpGateways).length > 0) { + targetState.resources!.gateways = httpGateways; + } + // Add credential state if credentials exist if (credentials && Object.keys(credentials).length > 0) { targetState.resources!.credentials = credentials; @@ -569,27 +807,52 @@ export function buildDeployedState(opts: BuildDeployedStateOptions): DeployedSta targetState.resources!.datasets = datasets; } - // Carry forward config bundles from existing state (managed post-deploy, not via CFN outputs) - const existingConfigBundles = existingState?.targets?.[targetName]?.resources?.configBundles; - if (existingConfigBundles && Object.keys(existingConfigBundles).length > 0) { - targetState.resources!.configBundles = existingConfigBundles; + if (knowledgeBases && Object.keys(knowledgeBases).length > 0) { + targetState.resources!.knowledgeBases = knowledgeBases; } - // Carry forward AB tests from existing state (managed post-deploy, not via CFN outputs) + // Config bundles from CFN outputs (preferred) or carry forward from existing state (legacy) + if (configBundles && Object.keys(configBundles).length > 0) { + targetState.resources!.configBundles = configBundles; + } else { + const existingConfigBundles = existingState?.targets?.[targetName]?.resources?.configBundles; + if (existingConfigBundles && Object.keys(existingConfigBundles).length > 0) { + targetState.resources!.configBundles = existingConfigBundles; + } + } + + // Carry forward AB tests from existing state (managed post-deploy, not via CFN outputs). + // Prune entries for tests no longer declared in the project spec so stale (e.g. preview) + // entries self-heal. When abTestNames is undefined, carry forward everything unchanged. const existingABTests = existingState?.targets?.[targetName]?.resources?.abTests; if (existingABTests && Object.keys(existingABTests).length > 0) { - targetState.resources!.abTests = existingABTests; + const carriedABTests = + abTestNames === undefined + ? existingABTests + : Object.fromEntries(Object.entries(existingABTests).filter(([specName]) => abTestNames.includes(specName))); + if (Object.keys(carriedABTests).length > 0) { + targetState.resources!.abTests = carriedABTests; + } } - // Carry forward HTTP gateways from existing state (managed post-deploy, not via CFN outputs) - const existingHttpGateways = existingState?.targets?.[targetName]?.resources?.httpGateways; - if (existingHttpGateways && Object.keys(existingHttpGateways).length > 0) { - targetState.resources!.httpGateways = existingHttpGateways; + // Merge harness state. CFN-sourced records (freshly parsed, stamped + // `provisioner: 'cloudformation'`) are authoritative for every CDK-managed harness — they + // are re-parsed in full each deploy, so a CFN harness dropped from the spec correctly + // disappears here (CloudFormation deletes the resource). On top of that, carry forward any + // existing *orphan* record (imperative-build harness, no marker) that the current outputs + // don't cover, so it stays visible to detection/cleanup instead of silently vanishing. + // Only orphans are preserved — carrying forward stale marked records would resurrect a + // harness CloudFormation just deleted. + const existingHarnesses = existingState?.targets?.[targetName]?.resources?.harnesses ?? {}; + const carriedOrphans: Record = {}; + for (const [name, record] of Object.entries(existingHarnesses)) { + if (!harnesses?.[name] && record.provisioner !== 'cloudformation') { + carriedOrphans[name] = record; + } } - - // Add harness state if harnesses exist - if (harnesses && Object.keys(harnesses).length > 0) { - targetState.resources!.harnesses = harnesses; + const mergedHarnesses = { ...carriedOrphans, ...harnesses }; + if (Object.keys(mergedHarnesses).length > 0) { + targetState.resources!.harnesses = mergedHarnesses; } // Add payment state from CFN outputs (or preserve credential provider state) diff --git a/src/cli/commands/abtest/command.ts b/src/cli/commands/abtest/command.ts deleted file mode 100644 index cc236cdb3..000000000 --- a/src/cli/commands/abtest/command.ts +++ /dev/null @@ -1,199 +0,0 @@ -/** - * AB Test commands. - * - * `agentcore ab-test ` — fetches and displays full AB test details - * from the data plane API, including evaluation scores/metrics. - */ -import { ConfigIO } from '../../../lib'; -import { getABTest, listABTests } from '../../aws/agentcore-ab-tests'; -import type { GetABTestResult } from '../../aws/agentcore-ab-tests'; -import { dnsSuffix } from '../../aws/partition'; -import { getErrorMessage } from '../../errors'; -import type { Command } from '@commander-js/extra-typings'; - -// ============================================================================ -// Helpers -// ============================================================================ - -async function getRegion(cliRegion?: string): Promise { - if (cliRegion) return cliRegion; - try { - const configIO = new ConfigIO(); - const targets = await configIO.resolveAWSDeploymentTargets(); - if (targets.length > 0) return targets[0]!.region; - } catch { - // Fall through to env vars - } - return process.env.AWS_DEFAULT_REGION ?? process.env.AWS_REGION ?? 'us-east-1'; -} - -async function resolveABTestId( - testName: string, - region: string -): Promise<{ abTestId: string; region: string; error?: string }> { - let projectName: string | undefined; - try { - const configIO = new ConfigIO(); - const deployedState = await configIO.readDeployedState(); - const awsTargets = await configIO.readAWSDeploymentTargets(); - - try { - const projectSpec = await configIO.readProjectSpec(); - projectName = projectSpec.name; - } catch { - // Project spec unavailable - } - - for (const [targetName, target] of Object.entries(deployedState.targets ?? {})) { - const abTests = target.resources?.abTests; - if (abTests?.[testName]) { - const targetConfig = awsTargets.find(t => t.name === targetName); - const resolvedRegion = targetConfig?.region ?? region; - return { abTestId: abTests[testName].abTestId, region: resolvedRegion }; - } - } - } catch { - // No deployed state available - } - - try { - const result = await listABTests({ region, maxResults: 100 }); - // Match against both prefixed name ({projectName}_{testName}) and bare testName (backwards compat) - const prefixedName = projectName ? `${projectName}_${testName}` : undefined; - // eslint-disable-next-line @typescript-eslint/prefer-nullish-coalescing -- boolean OR, not nullish coalescing - const match = result.abTests.find(t => (prefixedName && t.name === prefixedName) || t.name === testName); - if (match) { - return { abTestId: match.abTestId, region }; - } - } catch { - // API call failed - } - - return { abTestId: '', region, error: `AB test "${testName}" not found in deployed state or API.` }; -} - -function gatewayUrlFromArn(arn: string): string { - const parts = arn.split(':'); - const region = parts[3]; - const gatewayId = parts[5]?.split('/')[1]; - if (region && gatewayId) { - return `https://${gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}`; - } - return arn; -} - -function formatABTestDetails(test: GetABTestResult): string { - const lines: string[] = []; - lines.push(`AB Test: ${test.name}`); - lines.push(` Status: ${test.status}`); - lines.push(` Execution: ${test.executionStatus}`); - lines.push(` Invocation URL: ${gatewayUrlFromArn(test.gatewayArn)}//invocations`); - lines.push( - ` Online Eval: ${'onlineEvaluationConfigArn' in test.evaluationConfig ? test.evaluationConfig.onlineEvaluationConfigArn : 'per-variant'}` - ); - if (test.description) lines.push(` Description: ${test.description}`); - - for (const variant of test.variants) { - const bundleRef = variant.variantConfiguration.configurationBundle; - const targetRef = variant.variantConfiguration.target; - if (targetRef) { - lines.push(` Variant ${variant.name}: weight=${variant.weight}, target=${targetRef.name}`); - } else if (bundleRef) { - lines.push( - ` Variant ${variant.name}: weight=${variant.weight}, bundle=${bundleRef.bundleArn}, version=${bundleRef.bundleVersion}` - ); - } - } - - // TODO(post-preview): Re-enable max duration display once configurable duration is launched. - // if (test.maxDurationDays) lines.push(` Max Duration: ${test.maxDurationDays} days`); - if (test.startedAt) lines.push(` Started: ${test.startedAt}`); - if (test.stoppedAt) lines.push(` Stopped: ${test.stoppedAt}`); - if (test.failureReason) lines.push(` Failure: ${test.failureReason}`); - - if (test.results) { - lines.push(' Results:'); - if (test.results.analysisTimestamp) { - lines.push(` Analysis Time: ${test.results.analysisTimestamp}`); - } - for (const metric of test.results.evaluatorMetrics) { - lines.push(` Evaluator: ${metric.evaluatorArn}`); - lines.push( - ` Control: samples=${metric.controlStats.sampleSize}, mean=${metric.controlStats.mean.toFixed(4)}` - ); - for (const vr of metric.variantResults) { - lines.push( - ` ${vr.treatmentName}: samples=${vr.sampleSize}, mean=${vr.mean.toFixed(4)}, significant=${vr.isSignificant}` - ); - if (vr.absoluteChange !== undefined) - lines.push(` Change: ${vr.absoluteChange.toFixed(4)} (${(vr.percentChange ?? 0).toFixed(2)}%)`); - if (vr.pValue !== undefined) lines.push(` p-value: ${vr.pValue.toFixed(6)}`); - if (vr.confidenceInterval) { - lines.push( - ` CI: [${vr.confidenceInterval.lower?.toFixed(4)}, ${vr.confidenceInterval.upper?.toFixed(4)}]` - ); - } - } - } - } - - return lines.join('\n'); -} - -// ============================================================================ -// Command registration -// ============================================================================ - -export function registerABTestCommand(program: Command): void { - program - .command('ab-test') - .description('[preview] View A/B test details and results') - .argument('', 'AB test name') - .option('--region ', 'AWS region') - .option('--json', 'Output as JSON') - .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { - try { - const region = await getRegion(cliOptions.region); - const { abTestId, error } = await resolveABTestId(name, region); - if (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); - } - const result = await getABTest({ region, abTestId }); - - if (cliOptions.json) { - console.log(JSON.stringify(result)); - process.exit(0); - } else if (process.stdout.isTTY) { - // Render TUI detail screen with key bindings - const [{ render }, { default: React }, { ABTestDetailScreen }] = await Promise.all([ - import('ink'), - import('react'), - import('../../tui/screens/ab-test'), - ]); - render( - React.createElement(ABTestDetailScreen, { - abTestId, - region, - onExit: () => process.exit(0), - }) - ); - return; - } else { - console.log(formatABTestDetails(result)); - process.exit(0); - } - } catch (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); - } else { - console.error(`Error: ${getErrorMessage(error)}`); - } - process.exit(1); - } - }); -} diff --git a/src/cli/commands/abtest/index.ts b/src/cli/commands/abtest/index.ts deleted file mode 100644 index 0ff25efc5..000000000 --- a/src/cli/commands/abtest/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { registerABTestCommand } from './command'; diff --git a/src/cli/commands/add/__tests__/add-gateway-target.test.ts b/src/cli/commands/add/__tests__/add-gateway-target.test.ts index e06bc0d83..828908868 100644 --- a/src/cli/commands/add/__tests__/add-gateway-target.test.ts +++ b/src/cli/commands/add/__tests__/add-gateway-target.test.ts @@ -22,8 +22,11 @@ describe('add gateway-target command', () => { } projectDir = join(testDir, projectName); - // Create gateway for tests - const gwResult = await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], projectDir); + // Create gateway for tests with MCP protocol (required for mcpServer and lambdaFunctionArn targets) + const gwResult = await runCLI( + ['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], + projectDir + ); if (gwResult.exitCode !== 0) { throw new Error(`Failed to create gateway: ${gwResult.stdout} ${gwResult.stderr}`); } diff --git a/src/cli/commands/add/__tests__/add-knowledge-base.test.ts b/src/cli/commands/add/__tests__/add-knowledge-base.test.ts new file mode 100644 index 000000000..a4f28a699 --- /dev/null +++ b/src/cli/commands/add/__tests__/add-knowledge-base.test.ts @@ -0,0 +1,106 @@ +import { readProjectConfig, runCLI } from '../../../../test-utils/index.js'; +import { randomUUID } from 'node:crypto'; +import { mkdir, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterAll, beforeAll, describe, expect, it } from 'vitest'; + +/** + * End-to-end coverage for the `--data-source-type` flag registered on + * `add knowledge-base`. Drives the built CLI so we exercise the actual + * commander registration (flag default, --connector-config threading) rather + * than calling add() directly — that path is unit-tested elsewhere. + */ +// FMKB is gated behind ENABLE_GATED_FEATURES; pass it on every runCLI for these tests. +const GATED_ENV = { ENABLE_GATED_FEATURES: '1' }; + +describe('add knowledge-base command — --data-source-type flag', () => { + let testDir: string; + let projectDir: string; + + beforeAll(async () => { + testDir = join(tmpdir(), `agentcore-add-kb-${randomUUID()}`); + await mkdir(testDir, { recursive: true }); + const projectName = 'TestProj'; + const result = await runCLI(['create', '--name', projectName, '--no-agent'], testDir); + if (result.exitCode !== 0) { + throw new Error(`Failed to create project: ${result.stdout} ${result.stderr}`); + } + projectDir = join(testDir, projectName); + }); + + afterAll(async () => { + await rm(testDir, { recursive: true, force: true }); + }); + + it('defaults to S3 when --data-source-type is omitted', async () => { + const result = await runCLI( + ['add', 'knowledge-base', '--name', 'kb-default', '--source', 's3://my-bucket/data', '--json'], + projectDir, + { env: GATED_ENV } + ); + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json = JSON.parse(result.stdout); + expect(json.success).toBe(true); + + const project = await readProjectConfig(projectDir); + const kb = project.knowledgeBases.find(k => k.name === 'kb-default'); + expect(kb).toBeDefined(); + expect(kb!.dataSources).toEqual([{ type: 'S3', uri: 's3://my-bucket/data' }]); + }); + + it('writes a connector data source when --data-source-type web-crawler is given', async () => { + const cfgPath = join(testDir, 'web-crawler.json'); + await writeFile( + cfgPath, + JSON.stringify({ + type: 'WEB', + connectionConfiguration: { authType: 'NO_AUTH' }, + seedUrls: ['https://example.com'], + }), + 'utf-8' + ); + + const result = await runCLI( + [ + 'add', + 'knowledge-base', + '--name', + 'kb-web', + '--data-source-type', + 'web-crawler', + '--connector-config', + cfgPath, + '--json', + ], + projectDir, + { env: GATED_ENV } + ); + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + const json = JSON.parse(result.stdout); + expect(json.success).toBe(true); + + const project = await readProjectConfig(projectDir); + const kb = project.knowledgeBases.find(k => k.name === 'kb-web'); + expect(kb).toBeDefined(); + expect(kb!.dataSources).toHaveLength(1); + const ds = kb!.dataSources[0]!; + expect(ds.type).toBe('WEB'); + // Connector configs are copied into app//. + expect((ds as { connectorConfigFile?: string }).connectorConfigFile).toBe('app/kb-web/web-crawler.json'); + }); + + it('rejects --connector-config for the default S3 type', async () => { + const cfgPath = join(testDir, 'stray.json'); + await writeFile(cfgPath, JSON.stringify({ type: 'WEB' }), 'utf-8'); + + const result = await runCLI( + ['add', 'knowledge-base', '--name', 'kb-bad', '--connector-config', cfgPath, '--json'], + projectDir, + { env: GATED_ENV } + ); + expect(result.exitCode).toBe(1); + const json = JSON.parse(result.stdout); + expect(json.success).toBe(false); + }); +}); diff --git a/src/cli/commands/add/__tests__/auth-options.test.ts b/src/cli/commands/add/__tests__/auth-options.test.ts index dd702c11e..18f3ca78a 100644 --- a/src/cli/commands/add/__tests__/auth-options.test.ts +++ b/src/cli/commands/add/__tests__/auth-options.test.ts @@ -115,4 +115,164 @@ describe('validateJwtAuthorizerOptions', () => { valid: true, }); }); + + describe('PrivateLink inbound flags', () => { + it('accepts a lattice resource-config id', () => { + expect( + validateJwtAuthorizerOptions({ ...validBase, privateEndpointLatticeArn: 'rcfg-0123456789abcdefg' }) + ).toEqual({ valid: true }); + }); + + it('accepts a full managed-VPC endpoint', () => { + expect( + validateJwtAuthorizerOptions({ + ...validBase, + privateEndpointVpcId: 'vpc-0123456789abcdef0', + privateEndpointSubnets: 'subnet-0123456789abcdef0, subnet-0fedcba9876543210', + privateEndpointIpType: 'IPV4', + privateEndpointSecurityGroups: 'sg-0123456789abcdef0', + privateEndpointRoutingDomain: 'example.internal', + }) + ).toEqual({ valid: true }); + }); + + it('rejects both lattice + vpc arms (mutually exclusive)', () => { + const result = validateJwtAuthorizerOptions({ + ...validBase, + privateEndpointLatticeArn: 'rcfg-0123456789abcdefg', + privateEndpointVpcId: 'vpc-0123456789abcdef0', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('mutually exclusive'); + }); + + it('rejects managed-VPC missing subnets', () => { + const result = validateJwtAuthorizerOptions({ + ...validBase, + privateEndpointVpcId: 'vpc-0123456789abcdef0', + privateEndpointIpType: 'IPV4', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('--private-endpoint-subnets is required'); + }); + + it('rejects managed-VPC missing ip-type', () => { + const result = validateJwtAuthorizerOptions({ + ...validBase, + privateEndpointVpcId: 'vpc-0123456789abcdef0', + privateEndpointSubnets: 'subnet-0123456789abcdef0', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('--private-endpoint-ip-type'); + }); + + it('rejects an invalid lattice id', () => { + const result = validateJwtAuthorizerOptions({ ...validBase, privateEndpointLatticeArn: 'nope' }); + expect(result.valid).toBe(false); + }); + + it('rejects VPC sub-flags without --private-endpoint-vpc-id', () => { + const result = validateJwtAuthorizerOptions({ ...validBase, privateEndpointSubnets: 'subnet-0123456789abcdef0' }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('require --private-endpoint-vpc-id'); + }); + + it('rejects more than 5 overrides', () => { + const overrides = JSON.stringify( + Array.from({ length: 6 }, (_, i) => ({ + domain: `d${i}.example.com`, + privateEndpoint: { + selfManagedLatticeResource: { resourceConfigurationIdentifier: 'rcfg-0123456789abcdefg' }, + }, + })) + ); + const result = validateJwtAuthorizerOptions({ ...validBase, privateEndpointOverrides: overrides }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('at most 5'); + }); + + it('rejects malformed overrides JSON', () => { + const result = validateJwtAuthorizerOptions({ ...validBase, privateEndpointOverrides: '{not json' }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('must be valid JSON'); + }); + + it('accepts valid overrides (with a matching base lattice endpoint)', () => { + const overrides = JSON.stringify([ + { + domain: 'api.example.com', + privateEndpoint: { + selfManagedLatticeResource: { resourceConfigurationIdentifier: 'rcfg-0123456789abcdefg' }, + }, + }, + ]); + expect( + validateJwtAuthorizerOptions({ + ...validBase, + privateEndpointLatticeArn: 'rcfg-0123456789abcdefg', + privateEndpointOverrides: overrides, + }) + ).toEqual({ valid: true }); + }); + + it('rejects overrides without a base private endpoint', () => { + const overrides = JSON.stringify([ + { + domain: 'api.example.com', + privateEndpoint: { + selfManagedLatticeResource: { resourceConfigurationIdentifier: 'rcfg-0123456789abcdefg' }, + }, + }, + ]); + const result = validateJwtAuthorizerOptions({ ...validBase, privateEndpointOverrides: overrides }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('requires a base private endpoint'); + }); + + it('rejects an override arm that mismatches the base arm (lattice base, vpc override)', () => { + const overrides = JSON.stringify([ + { + domain: 'api.example.com', + privateEndpoint: { + managedVpcResource: { + vpcIdentifier: 'vpc-0123456789abcdef0', + subnetIds: ['subnet-0123456789abcdef0'], + endpointIpAddressType: 'IPV4', + }, + }, + }, + ]); + const result = validateJwtAuthorizerOptions({ + ...validBase, + privateEndpointLatticeArn: 'rcfg-0123456789abcdefg', + privateEndpointOverrides: overrides, + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('same kind as the base endpoint'); + }); + + it('rejects duplicate override domains', () => { + const overrides = JSON.stringify([ + { + domain: 'dup.example.com', + privateEndpoint: { + selfManagedLatticeResource: { resourceConfigurationIdentifier: 'rcfg-0123456789abcdefg' }, + }, + }, + { + domain: 'dup.example.com', + privateEndpoint: { + selfManagedLatticeResource: { resourceConfigurationIdentifier: 'rcfg-0123456789abcdefg' }, + }, + }, + ]); + const result = validateJwtAuthorizerOptions({ + ...validBase, + privateEndpointLatticeArn: 'rcfg-0123456789abcdefg', + privateEndpointOverrides: overrides, + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('Duplicate private-endpoint override domain'); + }); + }); }); diff --git a/src/cli/commands/add/__tests__/harness-privatelink-guard.test.ts b/src/cli/commands/add/__tests__/harness-privatelink-guard.test.ts new file mode 100644 index 000000000..3795b3b09 --- /dev/null +++ b/src/cli/commands/add/__tests__/harness-privatelink-guard.test.ts @@ -0,0 +1,191 @@ +import type { AddHarnessCliOptions } from '../types'; +import { validateAddHarnessOptions } from '../validate'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +const base: AddHarnessCliOptions = { + name: 'h1', + modelProvider: 'bedrock', + modelId: 'us.anthropic.claude-haiku-4-5-20251001-v1:0', +}; + +const DISCOVERY = 'https://idp.example.com/.well-known/openid-configuration'; + +describe('validateAddHarnessOptions — PrivateLink authorizer guard', () => { + it('rejects --private-endpoint-* flags with AWS_IAM authorizer', () => { + const result = validateAddHarnessOptions({ + ...base, + authorizerType: 'AWS_IAM', + privateEndpointVpcId: 'vpc-0123456789abcdef0', + privateEndpointSubnets: 'subnet-0123456789abcdef0', + privateEndpointIpType: 'IPV4', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('only valid with CUSTOM_JWT'); + }); + + it('rejects a private-endpoint flag when no authorizer type is set', () => { + const result = validateAddHarnessOptions({ ...base, privateEndpointLatticeArn: 'rcfg-0123456789abcdefg' }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('only valid with CUSTOM_JWT'); + }); + + it('accepts a private-endpoint flag with CUSTOM_JWT authorizer', () => { + const result = validateAddHarnessOptions({ + ...base, + authorizerType: 'CUSTOM_JWT', + discoveryUrl: DISCOVERY, + allowedAudience: 'aud-1', + privateEndpointLatticeArn: 'rcfg-0123456789abcdefg', + }); + expect(result.valid).toBe(true); + }); + + it('does not flag a plain AWS_IAM harness (no PrivateLink flags)', () => { + const result = validateAddHarnessOptions({ ...base, authorizerType: 'AWS_IAM' }); + expect(result.valid).toBe(true); + }); +}); + +describe('validateAddHarnessOptions — memory flag coupling', () => { + it('rejects --memory-arn together with --memory-name (mutually exclusive)', () => { + const result = validateAddHarnessOptions({ + ...base, + memoryArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:memory/Mem-aBcDeFgHiJ', + memoryName: 'mem', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('mutually exclusive'); + }); + + it('rejects --no-memory combined with --memory-arn', () => { + const result = validateAddHarnessOptions({ + ...base, + memory: false, + memoryArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:memory/Mem-aBcDeFgHiJ', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('--no-memory'); + }); + + it('rejects --no-memory combined with a memory tuning flag', () => { + const result = validateAddHarnessOptions({ ...base, memory: false, memoryTopK: 5 }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('--no-memory'); + }); + + it('accepts --memory-name alone', () => { + expect(validateAddHarnessOptions({ ...base, memoryName: 'mem' }).valid).toBe(true); + }); +}); + +describe('validateAddHarnessOptions — gateway oauth flag coupling', () => { + it('rejects --gateway-grant-type without --gateway-outbound-auth oauth', () => { + const result = validateAddHarnessOptions({ + ...base, + tools: 'agentcore_gateway', + gatewayArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:gateway/gw', + gatewayOutboundAuth: 'awsIam', + gatewayGrantType: 'CLIENT_CREDENTIALS', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('--gateway-outbound-auth oauth'); + }); + + it('rejects --gateway-custom-parameters when outbound-auth is absent', () => { + const result = validateAddHarnessOptions({ + ...base, + tools: 'agentcore_gateway', + gatewayArn: 'arn:aws:bedrock-agentcore:us-west-2:123456789012:gateway/gw', + gatewayCustomParameters: '{"audience":"x"}', + }); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.error).toContain('--gateway-outbound-auth oauth'); + }); +}); + +describe('validateAddHarnessOptions — memory modes (gated OFF)', () => { + const prev = process.env.ENABLE_GATED_FEATURES; + beforeEach(() => { + delete process.env.ENABLE_GATED_FEATURES; + }); + afterEach(() => { + if (prev === undefined) delete process.env.ENABLE_GATED_FEATURES; + else process.env.ENABLE_GATED_FEATURES = prev; + }); + + it('rejects --memory-mode as not-yet-available', () => { + const r = validateAddHarnessOptions({ ...base, memoryMode: 'managed' }); + expect(r.valid).toBe(false); + if (!r.valid) expect(r.error).toContain('not yet available'); + }); + + it('rejects managed-only flags as not-yet-available', () => { + const r = validateAddHarnessOptions({ ...base, memoryStrategies: 'SEMANTIC' }); + expect(r.valid).toBe(false); + if (!r.valid) expect(r.error).toContain('not yet available'); + }); + + it('still accepts the legacy --memory-name reference', () => { + expect(validateAddHarnessOptions({ ...base, memoryName: 'mem' }).valid).toBe(true); + }); +}); + +describe('validateAddHarnessOptions — memory modes (gated ON)', () => { + const prev = process.env.ENABLE_GATED_FEATURES; + beforeEach(() => { + process.env.ENABLE_GATED_FEATURES = '1'; + }); + afterEach(() => { + if (prev === undefined) delete process.env.ENABLE_GATED_FEATURES; + else process.env.ENABLE_GATED_FEATURES = prev; + }); + + it('rejects --memory-mode existing with neither arn nor name', () => { + const r = validateAddHarnessOptions({ ...base, memoryMode: 'existing' }); + expect(r.valid).toBe(false); + if (!r.valid) expect(r.error).toContain('existing'); + }); + + it('rejects managed-only flags on existing mode', () => { + const r = validateAddHarnessOptions({ + ...base, + memoryMode: 'existing', + memoryArn: 'arn:aws:bedrock-agentcore:us-west-2:1:memory/m-aBcD012345', + memoryEventExpiryDays: 30, + }); + expect(r.valid).toBe(false); + if (!r.valid) expect(r.error).toContain('--memory-event-expiry-days'); + }); + + it('rejects an invalid managed strategy', () => { + const r = validateAddHarnessOptions({ ...base, memoryMode: 'managed', memoryStrategies: 'SEMANTIC,BOGUS' }); + expect(r.valid).toBe(false); + }); + + it('rejects CUSTOM as a managed strategy', () => { + const r = validateAddHarnessOptions({ ...base, memoryMode: 'managed', memoryStrategies: 'CUSTOM' }); + expect(r.valid).toBe(false); + }); + + it('rejects an invalid --memory-mode value', () => { + const r = validateAddHarnessOptions({ ...base, memoryMode: 'bogus' }); + expect(r.valid).toBe(false); + if (!r.valid) expect(r.error).toContain('Invalid --memory-mode'); + }); + + it('rejects --no-memory combined with --memory-mode managed', () => { + const r = validateAddHarnessOptions({ ...base, memory: false, memoryMode: 'managed' }); + expect(r.valid).toBe(false); + if (!r.valid) expect(r.error).toContain('--no-memory'); + }); + + it('accepts a clean managed config', () => { + expect( + validateAddHarnessOptions({ ...base, memoryMode: 'managed', memoryStrategies: 'SEMANTIC,SUMMARIZATION' }).valid + ).toBe(true); + }); + + it('accepts managed as the implicit default (no memory flags)', () => { + expect(validateAddHarnessOptions({ ...base }).valid).toBe(true); + }); +}); diff --git a/src/cli/commands/add/__tests__/skill-action.test.ts b/src/cli/commands/add/__tests__/skill-action.test.ts new file mode 100644 index 000000000..3aaaa2d4c --- /dev/null +++ b/src/cli/commands/add/__tests__/skill-action.test.ts @@ -0,0 +1,209 @@ +import type { HarnessSpec } from '../../../../schema'; +import { handleAddSkill } from '../skill-action.js'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockReadHarnessSpec = vi.fn(); +const mockWriteHarnessSpec = vi.fn(); +const mockReadProjectSpec = vi.fn(); + +vi.mock('../../../../lib/index.js', () => ({ + ConfigIO: class { + readHarnessSpec = mockReadHarnessSpec; + writeHarnessSpec = mockWriteHarnessSpec; + readProjectSpec = mockReadProjectSpec; + }, +})); + +function makeHarnessSpec(overrides: Partial = {}): HarnessSpec { + return { + name: 'TestHarness', + model: { provider: 'bedrock', modelId: 'anthropic.claude-3-5-sonnet-20240620-v1:0' }, + tools: [], + skills: [], + ...overrides, + } as HarnessSpec; +} + +describe('handleAddSkill', () => { + beforeEach(() => { + mockReadHarnessSpec.mockReset(); + mockWriteHarnessSpec.mockReset(); + mockReadProjectSpec.mockReset(); + mockReadProjectSpec.mockResolvedValue({ + credentials: [{ name: 'my-git-cred', authorizerType: 'ApiKeyCredentialProvider' }], + }); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + it('adds a path skill to harness', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec()); + const result = await handleAddSkill({ harness: 'TestHarness', path: './my-skill' }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [{ path: './my-skill' }], + }) + ); + }); + + it('adds an S3 skill to harness', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec()); + const result = await handleAddSkill({ harness: 'TestHarness', s3: 's3://bucket/skill' }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [{ s3Uri: 's3://bucket/skill' }], + }) + ); + }); + + it('adds a git skill to harness', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec()); + const result = await handleAddSkill({ + harness: 'TestHarness', + git: 'https://github.com/org/repo', + gitPath: 'skills/foo', + }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [{ gitUrl: 'https://github.com/org/repo', path: 'skills/foo' }], + }) + ); + }); + + it('adds a git skill with auth', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec()); + const result = await handleAddSkill({ + harness: 'TestHarness', + git: 'https://github.com/org/repo', + credentialName: 'my-git-cred', + username: 'bot', + }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [{ gitUrl: 'https://github.com/org/repo', auth: { credentialName: 'my-git-cred', username: 'bot' } }], + }) + ); + }); + + it('fails when no source type provided', async () => { + const result = await handleAddSkill({ harness: 'TestHarness' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('Exactly one'); + }); + + it('fails when multiple source types provided', async () => { + const result = await handleAddSkill({ harness: 'TestHarness', path: './x', s3: 's3://y' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('Exactly one'); + }); + + it('fails when harness not found', async () => { + mockReadHarnessSpec.mockRejectedValue(new Error('not found')); + const result = await handleAddSkill({ harness: 'Missing', path: './x' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('not found'); + }); + + it('fails when s3 URI does not start with s3://', async () => { + const result = await handleAddSkill({ harness: 'TestHarness', s3: 'bucket/skill' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('s3://'); + }); + + it('fails when git URL does not start with https://', async () => { + const result = await handleAddSkill({ harness: 'TestHarness', git: 'git@github.com:org/repo.git' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('https://'); + }); + + it('rejects duplicate path skill', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec({ skills: [{ path: './my-skill' }] })); + const result = await handleAddSkill({ harness: 'TestHarness', path: './my-skill' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('already exists'); + }); + + it('rejects duplicate s3 skill', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec({ skills: [{ s3Uri: 's3://bucket/skill' }] })); + const result = await handleAddSkill({ harness: 'TestHarness', s3: 's3://bucket/skill' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('already exists'); + }); + + it('rejects duplicate git skill', async () => { + mockReadHarnessSpec.mockResolvedValue( + makeHarnessSpec({ + skills: [{ gitUrl: 'https://github.com/org/repo' }], + }) + ); + const result = await handleAddSkill({ harness: 'TestHarness', git: 'https://github.com/org/repo' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('already exists'); + }); + + it('rejects --git-path without --git', async () => { + const result = await handleAddSkill({ harness: 'TestHarness', path: './x', gitPath: 'sub' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('--git'); + }); + + it('rejects --credential without --git', async () => { + const result = await handleAddSkill({ harness: 'TestHarness', path: './x', credentialName: 'my-cred' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('--git'); + }); + + it('rejects --username without --git', async () => { + const result = await handleAddSkill({ harness: 'TestHarness', path: './x', username: 'bot' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('--git'); + }); + + it('rejects credential that does not exist in project', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec()); + mockReadProjectSpec.mockResolvedValue({ credentials: [] }); + const result = await handleAddSkill({ + harness: 'TestHarness', + git: 'https://github.com/org/repo', + credentialName: 'nonexistent', + }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('not found in project'); + }); + + it('rejects OAuth credential for git skill auth', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec()); + mockReadProjectSpec.mockResolvedValue({ + credentials: [{ name: 'my-oauth-cred', authorizerType: 'OAuthCredentialProvider' }], + }); + const result = await handleAddSkill({ + harness: 'TestHarness', + git: 'https://github.com/org/repo', + credentialName: 'my-oauth-cred', + }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('ApiKeyCredentialProvider'); + }); + + it('returns clean error when project config is unreadable', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec()); + mockReadProjectSpec.mockRejectedValue(new Error('Config file not found')); + const result = await handleAddSkill({ + harness: 'TestHarness', + git: 'https://github.com/org/repo', + credentialName: 'my-cred', + }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('Could not read project configuration'); + }); +}); diff --git a/src/cli/commands/add/__tests__/validate.test.ts b/src/cli/commands/add/__tests__/validate.test.ts index 070801f40..f28997edc 100644 --- a/src/cli/commands/add/__tests__/validate.test.ts +++ b/src/cli/commands/add/__tests__/validate.test.ts @@ -190,6 +190,25 @@ describe('validate', () => { expect(result.error?.includes('Strands')).toBeTruthy(); }); + it('rejects Python with the Vercel AI framework (TypeScript-only)', () => { + const result = validateAddAgentOptions({ + ...validAgentOptionsCreate, + language: 'Python', + framework: 'VercelAI', + }); + expect(result.valid).toBe(false); + expect(result.error?.includes('is not yet available for Python')).toBeTruthy(); + }); + + it('accepts TypeScript with the Vercel AI framework', () => { + const result = validateAddAgentOptions({ + ...validAgentOptionsCreate, + language: 'TypeScript', + framework: 'VercelAI', + }); + expect(result.valid).toBe(true); + }); + it('returns error for create path with Other language', () => { const result = validateAddAgentOptions({ ...validAgentOptionsCreate, language: 'Other' }); expect(result.valid).toBe(false); @@ -449,6 +468,43 @@ describe('validate', () => { const result = await validateAddGatewayTargetOptions({ ...validGatewayTargetOptions }); expect(result.valid).toBe(true); }); + + // Passthrough is gated behind ENABLE_GATED_FEATURES + describe('passthrough feature flag', () => { + const passthroughOpts: AddGatewayTargetOptions = { + name: 'pt-target', + type: 'passthrough', + gateway: 'my-gateway', + passthroughEndpoint: 'https://api.example.com', + } as AddGatewayTargetOptions; + + afterEach(() => { + delete process.env.ENABLE_GATED_FEATURES; + }); + + it('rejects passthrough when the flag is off', async () => { + delete process.env.ENABLE_GATED_FEATURES; + const result = await validateAddGatewayTargetOptions({ ...passthroughOpts }); + expect(result.valid).toBe(false); + expect(result.error).toBe('Passthrough targets are not yet available.'); + }); + + it('omits passthrough from the invalid-type error when the flag is off', async () => { + delete process.env.ENABLE_GATED_FEATURES; + const result = await validateAddGatewayTargetOptions({ + ...validGatewayTargetOptions, + type: 'bogus-type', + } as AddGatewayTargetOptions); + expect(result.valid).toBe(false); + expect(result.error).not.toContain('passthrough'); + }); + + it('allows passthrough when the flag is on', async () => { + process.env.ENABLE_GATED_FEATURES = '1'; + const result = await validateAddGatewayTargetOptions({ ...passthroughOpts }); + expect(result.valid).toBe(true); + }); + }); // AC20: type validation it('returns error when --type is missing', async () => { const options: AddGatewayTargetOptions = { @@ -940,6 +996,98 @@ describe('validate', () => { expect(result.valid).toBe(false); expect(result.error).toBe('--host is not applicable for MCP server targets'); }); + + // HTTP Runtime target validation + it('accepts valid http-runtime options with --runtime', async () => { + const result = await validateAddGatewayTargetOptions({ + name: 'my-http-target', + type: 'http-runtime', + runtime: 'my-agent', + gateway: 'my-gateway', + }); + expect(result.valid).toBe(true); + expect(result.error).toBeUndefined(); + }); + + it('rejects http-runtime without --runtime', async () => { + const result = await validateAddGatewayTargetOptions({ + name: 'my-http-target', + type: 'http-runtime', + gateway: 'my-gateway', + }); + expect(result.valid).toBe(false); + expect(result.error).toContain('--runtime is required'); + }); + + it('rejects http-runtime with --host', async () => { + const result = await validateAddGatewayTargetOptions({ + name: 'my-http-target', + type: 'http-runtime', + runtime: 'my-agent', + gateway: 'my-gateway', + host: 'Lambda', + }); + expect(result.valid).toBe(false); + expect(result.error).toContain('not applicable for http-runtime type'); + }); + + it('rejects http-runtime with --rest-api-id', async () => { + const result = await validateAddGatewayTargetOptions({ + name: 'my-http-target', + type: 'http-runtime', + runtime: 'my-agent', + gateway: 'my-gateway', + restApiId: 'abc123', + }); + expect(result.valid).toBe(false); + expect(result.error).toContain('not applicable for http-runtime type'); + }); + + it('rejects http-runtime with --lambda-arn', async () => { + const result = await validateAddGatewayTargetOptions({ + name: 'my-http-target', + type: 'http-runtime', + runtime: 'my-agent', + gateway: 'my-gateway', + lambdaArn: 'arn:aws:lambda:us-east-1:123456789012:function:my-func', + }); + expect(result.valid).toBe(false); + expect(result.error).toContain('not applicable for http-runtime type'); + }); + + it('rejects http-runtime with --tool-schema-file', async () => { + const result = await validateAddGatewayTargetOptions({ + name: 'my-http-target', + type: 'http-runtime', + runtime: 'my-agent', + gateway: 'my-gateway', + toolSchemaFile: './tools.json', + }); + expect(result.valid).toBe(false); + expect(result.error).toContain('not applicable for http-runtime type'); + }); + + it('accepts http-runtime with --runtime-endpoint', async () => { + const result = await validateAddGatewayTargetOptions({ + name: 'my-http-target', + type: 'http-runtime', + runtime: 'my-agent', + runtimeEndpoint: 'LIVE', + gateway: 'my-gateway', + }); + expect(result.valid).toBe(true); + }); + + it('sets language to Other for http-runtime type', async () => { + const opts: AddGatewayTargetOptions = { + name: 'my-http-target', + type: 'http-runtime', + runtime: 'my-agent', + gateway: 'my-gateway', + }; + await validateAddGatewayTargetOptions(opts); + expect(opts.language).toBe('Other'); + }); }); describe('validateAddMemoryOptions', () => { diff --git a/src/cli/commands/add/auth-options.ts b/src/cli/commands/add/auth-options.ts index 60cb155d9..07b3e61f4 100644 --- a/src/cli/commands/add/auth-options.ts +++ b/src/cli/commands/add/auth-options.ts @@ -1,4 +1,4 @@ -import { CustomClaimValidationSchema } from '../../../schema'; +import { CustomClaimValidationSchema, PrivateEndpointOverrideSchema, PrivateEndpointSchema } from '../../../schema'; import type { ValidationResult } from './validate'; const OIDC_WELL_KNOWN_SUFFIX = '/.well-known/openid-configuration'; @@ -12,6 +12,15 @@ export interface JwtAuthorizerCliOptions { customClaims?: string; clientId?: string; clientSecret?: string; + // PrivateLink inbound (private endpoint for reaching the OIDC discovery URL). + privateEndpointLatticeArn?: string; + privateEndpointVpcId?: string; + privateEndpointSubnets?: string; + privateEndpointIpType?: string; + privateEndpointSecurityGroups?: string; + privateEndpointRoutingDomain?: string; + privateEndpointTags?: string; + privateEndpointOverrides?: string; } /** @@ -76,5 +85,136 @@ export function validateJwtAuthorizerOptions(options: JwtAuthorizerCliOptions): return { valid: false, error: 'Both --client-id and --client-secret must be provided together' }; } + const privateLinkResult = validatePrivateEndpointOptions(options); + if (!privateLinkResult.valid) return privateLinkResult; + + return { valid: true }; +} + +/** + * Validate PrivateLink inbound flags. The two endpoint arms (lattice / managed-vpc) are mutually + * exclusive; managed-vpc requires --private-endpoint-subnets + --private-endpoint-ip-type. Field + * formats and the ≤5 overrides/SG limits are checked by parsing against the Zod schemas (single + * source of truth) so the CLI and the deploy-time validation never diverge. + */ +function validatePrivateEndpointOptions(options: JwtAuthorizerCliOptions): ValidationResult { + const hasLattice = !!options.privateEndpointLatticeArn?.trim(); + const hasVpc = !!options.privateEndpointVpcId?.trim(); + + if (hasLattice && hasVpc) { + return { + valid: false, + error: + '--private-endpoint-lattice-arn and --private-endpoint-vpc-id are mutually exclusive (a private endpoint is one of VPC Lattice or a managed VPC endpoint)', + }; + } + + // VPC-arm sub-flags require the VPC arm. + const vpcSubFlags = [ + options.privateEndpointSubnets, + options.privateEndpointIpType, + options.privateEndpointSecurityGroups, + options.privateEndpointRoutingDomain, + options.privateEndpointTags, + ]; + if (!hasVpc && vpcSubFlags.some(f => f?.trim())) { + return { + valid: false, + error: '--private-endpoint-* VPC flags require --private-endpoint-vpc-id', + }; + } + + if (hasLattice) { + const result = PrivateEndpointSchema.safeParse({ + selfManagedLatticeResource: { resourceConfigurationIdentifier: options.privateEndpointLatticeArn }, + }); + if (!result.success) { + return { valid: false, error: `Invalid --private-endpoint-lattice-arn: ${result.error.issues[0]?.message}` }; + } + } + + if (hasVpc) { + if (!options.privateEndpointSubnets?.trim()) { + return { valid: false, error: '--private-endpoint-subnets is required with --private-endpoint-vpc-id' }; + } + if (!options.privateEndpointIpType?.trim()) { + return { + valid: false, + error: '--private-endpoint-ip-type (IPV4 or IPV6) is required with --private-endpoint-vpc-id', + }; + } + let tags: unknown; + if (options.privateEndpointTags) { + try { + tags = JSON.parse(options.privateEndpointTags); + } catch { + return { valid: false, error: '--private-endpoint-tags must be valid JSON' }; + } + } + const result = PrivateEndpointSchema.safeParse({ + managedVpcResource: { + vpcIdentifier: options.privateEndpointVpcId, + subnetIds: options.privateEndpointSubnets.split(',').map(s => s.trim()), + endpointIpAddressType: options.privateEndpointIpType, + ...(options.privateEndpointSecurityGroups && { + securityGroupIds: options.privateEndpointSecurityGroups.split(',').map(s => s.trim()), + }), + ...(options.privateEndpointRoutingDomain && { routingDomain: options.privateEndpointRoutingDomain }), + ...(tags !== undefined && { tags }), + }, + }); + if (!result.success) { + return { valid: false, error: `Invalid managed-VPC private endpoint: ${result.error.issues[0]?.message}` }; + } + } + + if (options.privateEndpointOverrides) { + let parsed: unknown; + try { + parsed = JSON.parse(options.privateEndpointOverrides); + } catch { + return { valid: false, error: '--private-endpoint-overrides must be valid JSON' }; + } + if (!Array.isArray(parsed)) { + return { valid: false, error: '--private-endpoint-overrides must be a JSON array' }; + } + if (parsed.length > 5) { + return { valid: false, error: '--private-endpoint-overrides allows at most 5 entries' }; + } + // Coupling rules (mirror the AgentCore Identity service): overrides require a base endpoint, every + // override must use the same arm as the base, and override domains must be unique. + if (!hasLattice && !hasVpc) { + return { + valid: false, + error: + '--private-endpoint-overrides requires a base private endpoint (--private-endpoint-lattice-arn or --private-endpoint-vpc-id)', + }; + } + const baseArm = hasLattice ? 'selfManagedLatticeResource' : 'managedVpcResource'; + const seenDomains = new Set(); + for (const [i, entry] of parsed.entries()) { + const result = PrivateEndpointOverrideSchema.safeParse(entry); + if (!result.success) { + return { + valid: false, + error: `Invalid private-endpoint override at index ${i}: ${result.error.issues[0]?.message}`, + }; + } + const overrideArm = result.data.privateEndpoint.selfManagedLatticeResource + ? 'selfManagedLatticeResource' + : 'managedVpcResource'; + if (overrideArm !== baseArm) { + return { + valid: false, + error: `Private-endpoint override at index ${i} must be the same kind as the base endpoint (all ${baseArm === 'selfManagedLatticeResource' ? 'VPC Lattice' : 'managed VPC'})`, + }; + } + if (seenDomains.has(result.data.domain)) { + return { valid: false, error: `Duplicate private-endpoint override domain: ${result.data.domain}` }; + } + seenDomains.add(result.data.domain); + } + } + return { valid: true }; } diff --git a/src/cli/commands/add/skill-action.ts b/src/cli/commands/add/skill-action.ts new file mode 100644 index 000000000..f87e64130 --- /dev/null +++ b/src/cli/commands/add/skill-action.ts @@ -0,0 +1,140 @@ +import { ConfigIO } from '../../../lib'; +import type { HarnessSpec } from '../../../schema'; +import { isGatedFeaturesEnabled } from '@/cli/feature-flags'; +import { getSkillKey, validateGitSkillCredential } from '@/cli/operations/harness/skill-utils'; +import { ValidationError } from '@/lib/errors/types'; +import type { Result } from '@/lib/result'; + +export interface AddSkillOptions { + harness: string; + path?: string; + s3?: string; + git?: string; + gitPath?: string; + credentialName?: string; + username?: string; + awsSkills?: string | true; +} + +export async function handleAddSkill( + options: AddSkillOptions +): Promise> { + const { harness } = options; + + const gitOnlyFlags = [ + options.gitPath && '--git-path', + options.credentialName && '--credential', + options.username && '--username', + ].filter(Boolean); + + if (gitOnlyFlags.length > 0 && !options.git) { + return { + success: false, + error: new ValidationError(`${gitOnlyFlags.join(', ')} can only be used with --git`), + }; + } + + if (options.awsSkills && !isGatedFeaturesEnabled()) { + return { + success: false, + error: new ValidationError('AWS skills are not yet available.'), + }; + } + + const sources = [options.path, options.s3, options.git, ...(isGatedFeaturesEnabled() ? [options.awsSkills] : [])]; + const sourceCount = sources.filter(Boolean).length; + if (sourceCount !== 1) { + return { + success: false, + error: new ValidationError( + isGatedFeaturesEnabled() + ? 'Exactly one of --path, --s3, --git, or --aws-skills is required' + : 'Exactly one of --path, --s3, or --git is required' + ), + }; + } + + if (options.s3 && !options.s3.startsWith('s3://')) { + return { success: false, error: new ValidationError('--s3 must be an S3 URI starting with s3://') }; + } + + if (options.git && !options.git.startsWith('https://')) { + return { success: false, error: new ValidationError('--git must be an HTTPS URL starting with https://') }; + } + + const configIO = new ConfigIO(); + + let harnessSpec: HarnessSpec; + try { + harnessSpec = await configIO.readHarnessSpec(harness); + } catch { + return { + success: false, + error: new ValidationError( + `Harness '${harness}' not found. Check the name or run 'agentcore add harness' first.` + ), + }; + } + + if (options.credentialName) { + let project; + try { + project = await configIO.readProjectSpec(); + } catch { + return { + success: false, + error: new ValidationError(`Could not read project configuration. Ensure agentcore.json exists and is valid.`), + }; + } + const validation = validateGitSkillCredential(project, options.credentialName); + if (!validation.success) return validation; + } + + let skillEntry: HarnessSpec['skills'][number]; + let skillSource: string; + + if (options.path) { + skillEntry = { path: options.path }; + skillSource = options.path; + } else if (options.s3) { + skillEntry = { s3Uri: options.s3 }; + skillSource = options.s3; + } else if (options.git) { + skillEntry = { + gitUrl: options.git, + ...(options.gitPath && { path: options.gitPath }), + ...(options.credentialName && { + auth: { + credentialName: options.credentialName, + ...(options.username && { username: options.username }), + }, + }), + }; + skillSource = options.gitPath ? `${options.git} (path: ${options.gitPath})` : options.git; + } else { + const paths = + options.awsSkills === true + ? undefined + : options + .awsSkills!.split(',') + .map(s => s.trim()) + .filter(Boolean) + .sort(); + skillEntry = { awsSkills: { ...(paths && { paths }) } }; + skillSource = paths ? `aws-skills (${paths.join(', ')})` : 'aws-skills (all)'; + } + + const newKey = getSkillKey(skillEntry); + const isDuplicate = harnessSpec.skills.some(s => getSkillKey(s) === newKey); + if (isDuplicate) { + return { + success: false, + error: new ValidationError(`Skill '${skillSource}' already exists in harness '${harness}'`), + }; + } + + harnessSpec.skills.push(skillEntry); + await configIO.writeHarnessSpec(harness, harnessSpec); + + return { success: true, harnessName: harness, skillSource }; +} diff --git a/src/cli/commands/add/skill-command.ts b/src/cli/commands/add/skill-command.ts new file mode 100644 index 000000000..6f5e1fc00 --- /dev/null +++ b/src/cli/commands/add/skill-command.ts @@ -0,0 +1,78 @@ +import { findConfigRoot } from '../../../lib'; +import { getErrorMessage } from '../../errors'; +import { isGatedFeaturesEnabled } from '../../feature-flags'; +import { withCommandRunTelemetry } from '../../telemetry/cli-command-run.js'; +import { SkillSourceType, standardize } from '../../telemetry/schemas/common-shapes.js'; +import { handleAddSkill } from './skill-action'; +import { Option } from '@commander-js/extra-typings'; +import type { Command } from '@commander-js/extra-typings'; + +const awsSkillsOption = new Option( + '--aws-skills [paths]', + 'Add built-in AWS skills (comma-separated paths, or omit for all)' +); + +export function registerAddSkill(addCmd: Command): void { + addCmd + .command('skill') + .description('Add a skill to a harness') + .requiredOption('--harness ', 'Target harness name') + .option('--path ', 'Path to an installed skill in the environment') + .option('--s3 ', 'S3 URI (s3://bucket/path)') + .option('--git ', 'HTTPS git repository URL') + .option('--git-path ', 'Subdirectory within the git repo (for --git)') + .option('--credential ', 'Name of an API key credential in the project (for git auth)') + .option('--username ', 'Username for git auth (for --git)') + .addOption(isGatedFeaturesEnabled() ? awsSkillsOption : awsSkillsOption.hideHelp()) + .option('--json', 'Output as JSON') + .action(async cliOptions => { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + try { + const sourceType = cliOptions.awsSkills ? 'aws_skills' : cliOptions.git ? 'git' : cliOptions.s3 ? 's3' : 'path'; + const result = await withCommandRunTelemetry( + 'add.skill', + { skill_source_type: standardize(SkillSourceType, sourceType) }, + () => + handleAddSkill({ + harness: cliOptions.harness, + path: cliOptions.path, + s3: cliOptions.s3, + git: cliOptions.git, + gitPath: cliOptions.gitPath, + credentialName: cliOptions.credential, + username: cliOptions.username, + awsSkills: cliOptions.awsSkills, + }) + ); + + if (!result.success) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: result.error.message })); + } else { + console.error(result.error.message); + } + process.exit(1); + } + + if (cliOptions.json) { + console.log( + JSON.stringify({ success: true, harnessName: result.harnessName, skillSource: result.skillSource }) + ); + } else { + console.log(`Added skill '${result.skillSource}' to harness '${result.harnessName}'.`); + console.log(`Run 'agentcore deploy' to apply changes.`); + } + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(getErrorMessage(error)); + } + process.exit(1); + } + }); +} diff --git a/src/cli/commands/add/tool-action.ts b/src/cli/commands/add/tool-action.ts index cced82d81..57df6f5cb 100644 --- a/src/cli/commands/add/tool-action.ts +++ b/src/cli/commands/add/tool-action.ts @@ -1,6 +1,7 @@ import { ConfigIO } from '../../../lib'; import type { HarnessGatewayOutboundAuth, HarnessSpec } from '../../../schema'; import type { HarnessToolType } from '../../../schema/schemas/primitives/harness'; +import { readFileSync } from 'fs'; export interface AddToolOptions { harness: string; @@ -15,6 +16,10 @@ export interface AddToolOptions { providerArn?: string; scopes?: string; grantType?: string; + /** inline_function: tool description shown to the model. */ + description?: string; + /** inline_function: JSON Schema for the tool input, as a JSON string or @path/to/file.json. */ + inputSchema?: string; json?: boolean; } @@ -57,6 +62,39 @@ export async function handleAddTool(options: AddToolOptions): Promise | undefined; + if (toolType === 'inline_function') { + if (!options.description) { + return { success: false, error: '--description is required for inline_function tools' }; + } + if (!options.inputSchema) { + return { success: false, error: '--input-schema is required for inline_function tools' }; + } + let rawSchema = options.inputSchema; + if (rawSchema.startsWith('@')) { + const path = rawSchema.slice(1); + try { + rawSchema = readFileSync(path, 'utf-8'); + } catch { + return { success: false, error: `Could not read --input-schema file: ${path}` }; + } + } + let parsed: unknown; + try { + parsed = JSON.parse(rawSchema); + } catch { + return { success: false, error: '--input-schema is not valid JSON' }; + } + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + return { success: false, error: '--input-schema must be a JSON object (a JSON Schema for the tool input)' }; + } + inlineInputSchema = parsed as Record; + } + let outboundAuth: HarnessGatewayOutboundAuth | undefined; if (options.outboundAuth !== undefined) { if (toolType !== 'agentcore_gateway') { @@ -167,6 +205,8 @@ export async function handleAddTool(options: AddToolOptions): Promise', 'OAuth grant type: CLIENT_CREDENTIALS or USER_FEDERATION (for --outbound-auth oauth)' ) + .option('--description ', 'Tool description shown to the model (required for inline_function)') + .option( + '--input-schema ', + 'JSON Schema for the tool input, as a JSON string or @path/to/schema.json (required for inline_function)' + ) .option('--json', 'Output as JSON') .action(async cliOptions => { if (!findConfigRoot()) { @@ -52,6 +57,8 @@ export function registerAddTool(addCmd: Command): void { providerArn: cliOptions.providerArn, scopes: cliOptions.scopes, grantType: cliOptions.grantType, + description: cliOptions.description, + inputSchema: cliOptions.inputSchema, json: cliOptions.json, }); diff --git a/src/cli/commands/add/types.ts b/src/cli/commands/add/types.ts index 6bb3b95b8..465c4d717 100644 --- a/src/cli/commands/add/types.ts +++ b/src/cli/commands/add/types.ts @@ -50,6 +50,7 @@ export interface AddAgentOptions extends VpcOptions { export interface AddGatewayOptions { name?: string; description?: string; + protocolType?: string; authorizerType?: GatewayAuthorizerType; discoveryUrl?: string; allowedAudience?: string; @@ -75,7 +76,7 @@ export interface AddGatewayTargetOptions { language?: 'Python' | 'TypeScript' | 'Other'; gateway?: string; host?: 'Lambda' | 'AgentCoreRuntime'; - outboundAuthType?: 'OAUTH' | 'API_KEY' | 'NONE'; + outboundAuthType?: 'OAUTH' | 'API_KEY' | 'NONE' | 'GATEWAY_IAM_ROLE' | 'JWT_PASSTHROUGH'; credentialName?: string; oauthClientId?: string; oauthClientSecret?: string; @@ -89,6 +90,27 @@ export interface AddGatewayTargetOptions { toolFilterMethods?: string; schema?: string; schemaS3Account?: string; + runtime?: string; + runtimeEndpoint?: string; + /** Connector id (for --type connector): bedrock-knowledge-bases | bedrock-agentic-retrieve | web-search. */ + connector?: string; + /** + * KB reference for --type connector — either a project KB name (entry in + * knowledgeBases[]) or a literal 10-char external KB ID. Repeatable when + * --connector is bedrock-agentic-retrieve (fan-out); single-valued for + * bedrock-knowledge-bases. Not applicable to --connector web-search. + */ + knowledgeBaseId?: string[]; + passthroughEndpoint?: string; + stickinessIdentifier?: string; + stickinessTimeout?: string; + signingService?: string; + signingRegion?: string; + /** + * Comma-separated list of domains to exclude from web search results. + * Only applies to --type web-search. + */ + excludeDomains?: string; json?: boolean; } @@ -120,6 +142,18 @@ export interface AddHarnessCliOptions { gatewayOutboundAuth?: string; gatewayProviderArn?: string; gatewayScopes?: string; + gatewayGrantType?: string; + gatewayCustomParameters?: string; + memoryMode?: string; + memoryStrategies?: string; + memoryEventExpiryDays?: number; + memoryEncryptionKeyArn?: string; + memoryName?: string; + memoryArn?: string; + memoryActorId?: string; + memoryMessagesCount?: number; + memoryTopK?: number; + memoryRelevanceScore?: number; authorizerType?: RuntimeAuthorizerType; discoveryUrl?: string; allowedAudience?: string; @@ -128,6 +162,14 @@ export interface AddHarnessCliOptions { customClaims?: string; clientId?: string; clientSecret?: string; + privateEndpointLatticeArn?: string; + privateEndpointVpcId?: string; + privateEndpointSubnets?: string; + privateEndpointIpType?: string; + privateEndpointSecurityGroups?: string; + privateEndpointRoutingDomain?: string; + privateEndpointTags?: string; + privateEndpointOverrides?: string; json?: boolean; } diff --git a/src/cli/commands/add/validate.ts b/src/cli/commands/add/validate.ts index 8b509b6e5..59c0d324b 100644 --- a/src/cli/commands/add/validate.ts +++ b/src/cli/commands/add/validate.ts @@ -2,6 +2,7 @@ import { ConfigIO, findConfigRoot } from '../../../lib'; import { AgentNameSchema, BuildTypeSchema, + CONNECTOR_ID_VALUES, DatasetNameSchema, DatasetSchemaTypeSchema, GatewayAuthorizerTypeSchema, @@ -15,12 +16,15 @@ import { StreamDeliveryResourcesSchema, TARGET_TYPE_AUTH_CONFIG, TargetLanguageSchema, + getFrameworksForLanguage, getSupportedFrameworksForProtocol, getSupportedModelProviders, + isFrameworkSupportedForLanguage, isValidKmsKeyArn, matchEnumValue, validateApiFormat, } from '../../../schema'; +import { isGatedFeaturesEnabled } from '../../feature-flags'; import { ARN_VALIDATION_MESSAGE, isValidArn } from '../shared/arn-utils'; import { validateHeaderAllowlist } from '../shared/header-utils'; import { MAX_INDEXED_KEYS, parseIndexedKeyArg } from '../shared/indexed-key-parser'; @@ -259,15 +263,16 @@ export function validateAddAgentOptions(options: AddAgentOptions): ValidationRes if (options.language === 'Other') { return { valid: false, error: 'Create path only supports Python or TypeScript' }; } + // Framework must ship a template for the chosen language (e.g. Vercel AI is + // TypeScript-only, the other open-source frameworks are Python-only). if ( - options.language === 'TypeScript' && - options.framework && - options.framework !== 'Strands' && - options.framework !== 'VercelAI' + (langResult.data === 'Python' || langResult.data === 'TypeScript') && + !isFrameworkSupportedForLanguage(langResult.data, fwResult.data) ) { + const supported = getFrameworksForLanguage(langResult.data).join(', '); return { valid: false, - error: `Framework ${options.framework} is not yet available for TypeScript. Only Strands and Vercel AI SDK are supported.`, + error: `Framework ${options.framework} is not yet available for ${langResult.data}. Supported: ${supported}.`, }; } @@ -394,11 +399,23 @@ export async function validateAddGatewayTargetOptions(options: AddGatewayTargetO return { valid: false, error: '--name is required' }; } + // passthrough is gated; omit it from advertised type lists when the flag is off. + const validTypeList = [ + 'mcp-server', + 'api-gateway', + 'open-api-schema', + 'smithy-model', + 'lambda-function-arn', + 'http-runtime', + 'connector', + ...(isGatedFeaturesEnabled() ? ['passthrough'] : []), + 'web-search', + ].join(', '); + if (!options.type) { return { valid: false, - error: - '--type is required. Valid options: mcp-server, api-gateway, open-api-schema, smithy-model, lambda-function-arn', + error: `--type is required. Valid options: ${validTypeList}`, }; } @@ -408,16 +425,28 @@ export async function validateAddGatewayTargetOptions(options: AddGatewayTargetO 'open-api-schema': 'openApiSchema', 'smithy-model': 'smithyModel', 'lambda-function-arn': 'lambdaFunctionArn', + 'http-runtime': 'httpRuntime', + connector: 'connector', + passthrough: 'passthrough', + 'web-search': 'webSearch', }; const mappedType = typeMap[options.type]; if (!mappedType) { return { valid: false, - error: `Invalid type: ${options.type}. Valid options: mcp-server, api-gateway, open-api-schema, smithy-model, lambda-function-arn`, + error: `Invalid type: ${options.type}. Valid options: ${validTypeList}`, }; } options.type = mappedType; + // --exclude-domains is webSearch-target-only. Reject it on every other target type. + if (mappedType !== 'webSearch' && options.excludeDomains) { + return { + valid: false, + error: '--exclude-domains only applies to --type web-search', + }; + } + // Gateway is required — a gateway target must be attached to a gateway if (!options.gateway) { return { @@ -565,6 +594,206 @@ export async function validateAddGatewayTargetOptions(options: AddGatewayTargetO return { valid: true }; } + // HTTP Runtime targets: validate early and return + if (mappedType === 'httpRuntime') { + if (!options.runtime) { + return { valid: false, error: '--runtime is required for http-runtime type' }; + } + if (options.language && options.language !== 'Other') { + return { valid: false, error: '--language is not applicable for http-runtime type' }; + } + + const HTTP_RUNTIME_DISALLOWED_OPTIONS = [ + 'host', + 'restApiId', + 'stage', + 'lambdaArn', + 'toolSchemaFile', + 'toolFilterPath', + 'toolFilterMethods', + 'schema', + ] as const; + + for (const opt of HTTP_RUNTIME_DISALLOWED_OPTIONS) { + if (options[opt]) { + return { + valid: false, + error: `--${opt.replace(/([A-Z])/g, '-$1').toLowerCase()} is not applicable for http-runtime type`, + }; + } + } + + // Map --runtime-endpoint to the endpoint field used by createHttpRuntimeTarget + if (options.runtimeEndpoint) { + options.endpoint = options.runtimeEndpoint; + } + + options.language = 'Other'; + return { valid: true }; + } + + // Web search targets (Amazon Web Search managed connector): validate early and return + if (mappedType === 'webSearch') { + const WEB_SEARCH_DISALLOWED_OPTIONS: [string, string][] = [ + ['connector', '--connector'], + ['knowledgeBaseId', '--knowledge-base-id'], + ['endpoint', '--endpoint'], + ['host', '--host'], + ['restApiId', '--rest-api-id'], + ['stage', '--stage'], + ['lambdaArn', '--lambda-arn'], + ['toolSchemaFile', '--tool-schema-file'], + ['toolFilterPath', '--tool-filter-path'], + ['toolFilterMethods', '--tool-filter-methods'], + ['schema', '--schema'], + ['schemaS3Account', '--schema-s3-account'], + ['outboundAuthType', '--outbound-auth'], + ['credentialName', '--credential-name'], + ['oauthClientId', '--oauth-client-id'], + ['oauthClientSecret', '--oauth-client-secret'], + ['oauthDiscoveryUrl', '--oauth-discovery-url'], + ['oauthScopes', '--oauth-scopes'], + ]; + for (const [key, flag] of WEB_SEARCH_DISALLOWED_OPTIONS) { + const v = (options as unknown as Record)[key]; + // knowledgeBaseId is a string[]; treat empty array as absent + const present = Array.isArray(v) ? v.length > 0 : !!v; + if (present) { + return { valid: false, error: `${flag} is not applicable for web-search type` }; + } + } + if (options.language && options.language !== 'Other') { + return { valid: false, error: '--language is not applicable for web-search type' }; + } + options.language = 'Other'; + return { valid: true }; + } + + // Connector targets (Bedrock KB, agentic-retrieve): validate early and return + if (mappedType === 'connector') { + const validConnectors = CONNECTOR_ID_VALUES.join(', '); + if (!options.connector) { + return { + valid: false, + error: `--connector is required for connector type. Valid: ${validConnectors}`, + }; + } + if (!(CONNECTOR_ID_VALUES as readonly string[]).includes(options.connector)) { + return { + valid: false, + error: `Invalid --connector "${options.connector}". Valid: ${validConnectors}`, + }; + } + if (!options.knowledgeBaseId || options.knowledgeBaseId.length === 0) { + return { + valid: false, + error: `--knowledge-base-id is required for --connector ${options.connector}`, + }; + } + if (options.connector === 'bedrock-knowledge-bases' && options.knowledgeBaseId.length > 1) { + return { + valid: false, + error: + '--knowledge-base-id may only be specified once for --connector bedrock-knowledge-bases. Use --connector bedrock-agentic-retrieve for fan-out.', + }; + } + const irrelevant: [string, string][] = [ + ['endpoint', '--endpoint'], + ['host', '--host'], + ['restApiId', '--rest-api-id'], + ['stage', '--stage'], + ['lambdaArn', '--lambda-arn'], + ['toolSchemaFile', '--tool-schema-file'], + ['toolFilterPath', '--tool-filter-path'], + ['toolFilterMethods', '--tool-filter-methods'], + ['outboundAuthType', '--outbound-auth'], + ['credentialName', '--credential-name'], + ['oauthClientId', '--oauth-client-id'], + ['oauthClientSecret', '--oauth-client-secret'], + ['oauthDiscoveryUrl', '--oauth-discovery-url'], + ['oauthScopes', '--oauth-scopes'], + ]; + for (const [key, flag] of irrelevant) { + if ((options as unknown as Record)[key]) { + return { valid: false, error: `${flag} is not applicable for connector type` }; + } + } + if (options.language && options.language !== 'Other') { + return { valid: false, error: '--language is not applicable for connector type' }; + } + options.language = 'Other'; + return { valid: true }; + } + + // Passthrough targets: validate early and return + if (mappedType === 'passthrough') { + if (!isGatedFeaturesEnabled()) { + return { valid: false, error: 'Passthrough targets are not yet available.' }; + } + const passthroughEndpoint = (options as Record).passthroughEndpoint; + if (!passthroughEndpoint) { + return { valid: false, error: '--passthrough-endpoint is required for passthrough type' }; + } + if (!/^https:\/\/[a-zA-Z0-9\-.]+(:[0-9]{1,5})?(\/.*)?$/.test(passthroughEndpoint)) { + return { valid: false, error: '--passthrough-endpoint must be a valid HTTPS URL' }; + } + if (options.language && options.language !== 'Other') { + return { valid: false, error: '--language is not applicable for passthrough type' }; + } + + const PASSTHROUGH_DISALLOWED_OPTIONS = [ + 'host', + 'restApiId', + 'stage', + 'lambdaArn', + 'toolSchemaFile', + 'toolFilterPath', + 'toolFilterMethods', + 'schema', + 'runtime', + 'runtimeEndpoint', + ] as const; + + for (const opt of PASSTHROUGH_DISALLOWED_OPTIONS) { + if (options[opt]) { + return { + valid: false, + error: `--${opt.replace(/([A-Z])/g, '-$1').toLowerCase()} is not applicable for passthrough type`, + }; + } + } + + const stickinessTimeoutRaw = (options as Record).stickinessTimeout; + if (stickinessTimeoutRaw) { + const timeout = parseInt(stickinessTimeoutRaw, 10); + if (isNaN(timeout) || timeout < 1 || timeout > 86400) { + return { valid: false, error: '--stickiness-timeout must be a number between 1 and 86400' }; + } + } + + // Validate outbound auth for passthrough + if (options.outboundAuthType) { + const normalizedAuth = options.outboundAuthType.toUpperCase().replace(/-/g, '_'); + if (normalizedAuth === 'GATEWAY_IAM_ROLE') { + // signingService validation is done in the primitive action handler + } else if (normalizedAuth === 'JWT_PASSTHROUGH') { + // No additional fields required + } else if (normalizedAuth === 'OAUTH') { + if (!options.credentialName) { + const hasInlineOAuth = !!(options.oauthClientId ?? options.oauthClientSecret ?? options.oauthDiscoveryUrl); + if (!hasInlineOAuth) { + return { valid: false, error: '--credential-name or inline OAuth fields required for OAUTH auth' }; + } + } + } else if (normalizedAuth !== 'NONE') { + return { valid: false, error: `Unsupported outbound auth type for passthrough: ${options.outboundAuthType}` }; + } + } + + options.language = 'Other'; + return { valid: true }; + } + // Validate outbound auth configuration if (options.outboundAuthType && options.outboundAuthType !== 'NONE') { const hasInlineOAuth = !!(options.oauthClientId ?? options.oauthClientSecret ?? options.oauthDiscoveryUrl); @@ -892,6 +1121,10 @@ const VALID_HARNESS_TOOLS = [ const VALID_GATEWAY_OUTBOUND_AUTH = ['awsIam', 'none', 'oauth'] as const; +const VALID_MEMORY_MODES = ['managed', 'existing', 'disabled'] as const; +// Managed harness memory excludes CUSTOM (only the four; CUSTOM is for standalone memory). +const VALID_MANAGED_STRATEGIES = ['SEMANTIC', 'SUMMARIZATION', 'USER_PREFERENCE', 'EPISODIC'] as const; + export function validateAddHarnessOptions(options: AddHarnessCliOptions): ValidationResult { if (options.apiFormat) { const provider = options.modelProvider ?? 'bedrock'; @@ -901,6 +1134,13 @@ export function validateAddHarnessOptions(options: AddHarnessCliOptions): Valida } } + // VPC network-mode coupling: reject --subnets/--security-groups when network mode isn't VPC + // (and require them when it is), instead of silently dropping them. Mirrors the agent path. + const vpcResult = validateVpcOptions(options); + if (!vpcResult.valid) { + return vpcResult; + } + if (options.tools) { const toolNames = options.tools.split(',').map(s => s.trim()); for (const tool of toolNames) { @@ -948,6 +1188,84 @@ export function validateAddHarnessOptions(options: AddHarnessCliOptions): Valida } } + // --gateway-grant-type / --gateway-custom-parameters only live on the oauth arm; reject them for + // any other (or absent) outbound-auth rather than silently dropping them at spec-build. + if ( + (options.gatewayGrantType !== undefined || options.gatewayCustomParameters !== undefined) && + options.gatewayOutboundAuth !== 'oauth' + ) { + return { + valid: false, + error: '--gateway-grant-type and --gateway-custom-parameters are only valid with --gateway-outbound-auth oauth', + }; + } + + // Memory flag coupling. Commander sets `memory` to false for --no-memory. + const noMemory = options.memory === false; + const memoryTuningGiven = + options.memoryActorId !== undefined || + options.memoryMessagesCount !== undefined || + options.memoryTopK !== undefined || + options.memoryRelevanceScore !== undefined; + if (options.memoryArn && options.memoryName) { + return { valid: false, error: '--memory-arn and --memory-name are mutually exclusive' }; + } + if (noMemory && (options.memoryArn || options.memoryName || memoryTuningGiven)) { + return { + valid: false, + error: '--no-memory cannot be combined with --memory-arn, --memory-name, or memory tuning flags', + }; + } + + // Managed-memory mode validation — only when the gated feature is enabled. When gated off, any + // --memory-mode / managed-only flag is rejected up front as "not yet available". + const managedOnlyFlags = + options.memoryStrategies !== undefined || + options.memoryEventExpiryDays !== undefined || + options.memoryEncryptionKeyArn !== undefined; + if (!isGatedFeaturesEnabled()) { + if (options.memoryMode !== undefined || managedOnlyFlags) { + return { + valid: false, + error: + '--memory-mode and managed-memory flags (--memory-strategies, --memory-event-expiry-days, --memory-encryption-key-arn) are not yet available.', + }; + } + } else { + if (options.memoryMode && !VALID_MEMORY_MODES.includes(options.memoryMode as (typeof VALID_MEMORY_MODES)[number])) { + return { + valid: false, + error: `Invalid --memory-mode '${options.memoryMode}'. Use ${VALID_MEMORY_MODES.join(', ')}.`, + }; + } + if (noMemory && (options.memoryMode === 'managed' || options.memoryMode === 'existing')) { + return { valid: false, error: '--no-memory cannot be combined with --memory-mode managed/existing' }; + } + if (options.memoryMode === 'existing' && !options.memoryArn && !options.memoryName) { + return { valid: false, error: '--memory-mode existing requires --memory-arn or --memory-name' }; + } + if (managedOnlyFlags && options.memoryMode && options.memoryMode !== 'managed') { + return { + valid: false, + error: + '--memory-strategies, --memory-event-expiry-days, and --memory-encryption-key-arn are only valid with --memory-mode managed', + }; + } + if (options.memoryStrategies) { + const bad = options.memoryStrategies + .split(',') + .map(s => s.trim()) + .filter(Boolean) + .filter(s => !VALID_MANAGED_STRATEGIES.includes(s as (typeof VALID_MANAGED_STRATEGIES)[number])); + if (bad.length) { + return { + valid: false, + error: `Invalid managed memory strateg${bad.length > 1 ? 'ies' : 'y'}: ${bad.join(', ')}. Valid: ${VALID_MANAGED_STRATEGIES.join(', ')}`, + }; + } + } + } + if (options.authorizerType) { const authResult = RuntimeAuthorizerTypeSchema.safeParse(options.authorizerType); if (!authResult.success) { @@ -964,5 +1282,21 @@ export function validateAddHarnessOptions(options: AddHarnessCliOptions): Valida return { valid: false, error: 'OAuth client credentials are only valid with CUSTOM_JWT authorizer' }; } + // PrivateLink (private-endpoint) flags only apply to the CUSTOM_JWT inbound authorizer; reject + // them for any other authorizer rather than silently dropping the config (mirrors the OAuth guard). + const hasPrivateEndpointFlag = [ + options.privateEndpointLatticeArn, + options.privateEndpointVpcId, + options.privateEndpointSubnets, + options.privateEndpointIpType, + options.privateEndpointSecurityGroups, + options.privateEndpointRoutingDomain, + options.privateEndpointTags, + options.privateEndpointOverrides, + ].some(Boolean); + if (hasPrivateEndpointFlag && options.authorizerType !== 'CUSTOM_JWT') { + return { valid: false, error: '--private-endpoint-* flags are only valid with CUSTOM_JWT authorizer' }; + } + return { valid: true }; } diff --git a/src/cli/commands/archive/__tests__/command.test.ts b/src/cli/commands/archive/__tests__/command.test.ts index 8d65c7099..29fa994d6 100644 --- a/src/cli/commands/archive/__tests__/command.test.ts +++ b/src/cli/commands/archive/__tests__/command.test.ts @@ -2,395 +2,112 @@ import { registerArchive } from '../command.js'; import { Command } from '@commander-js/extra-typings'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -const mockDeleteBatchEvaluation = vi.fn(); -const mockDeleteRecommendation = vi.fn(); -const mockDeleteLocalBatchEvalRun = vi.fn(); -const mockDeleteLocalRecommendationRun = vi.fn(); +const mockArchive = vi.fn(); const mockRequireProject = vi.fn(); -const mockRender = vi.fn(); -const mockResolveAWSDeploymentTargets = vi.fn(); -vi.mock('../../../aws/agentcore-batch-evaluation', () => ({ - deleteBatchEvaluation: (...args: unknown[]) => mockDeleteBatchEvaluation(...args), -})); - -vi.mock('../../../aws/agentcore-recommendation', () => ({ - deleteRecommendation: (...args: unknown[]) => mockDeleteRecommendation(...args), -})); - -vi.mock('../../../operations/archive/archive-storage', () => ({ - deleteLocalBatchEvalRun: (...args: unknown[]) => mockDeleteLocalBatchEvalRun(...args), - deleteLocalRecommendationRun: (...args: unknown[]) => mockDeleteLocalRecommendationRun(...args), +vi.mock('../../../operations/jobs', () => ({ + createJobEngine: () => ({ archive: (...args: unknown[]) => mockArchive(...args) }), })); vi.mock('../../../tui/guards', () => ({ requireProject: (...args: unknown[]) => mockRequireProject(...args), })); -vi.mock('ink', () => ({ - render: (...args: unknown[]) => mockRender(...args), - Text: 'Text', +// runCliCommand owns process.exit; stub it to run fn() and surface failures as a throw. +vi.mock('../../../telemetry/cli-command-run', () => ({ + runCliCommand: async (_command: string, _json: boolean, fn: () => Promise) => { + await fn(); + }, })); vi.mock('../../../../lib', () => ({ ConfigIO: function () { - return { resolveAWSDeploymentTargets: () => mockResolveAWSDeploymentTargets() }; + return {}; }, })); -const batchEvalResult = { - batchEvaluationId: 'eval-abc-123', - batchEvaluationArn: 'arn:aws:bedrock:us-east-1:123456789:batch-evaluation/eval-abc-123', - status: 'DELETED', -}; - -const recommendationResult = { - recommendationId: 'rec-xyz-789', - status: 'DELETED', -}; - describe('registerArchive', () => { let program: Command; - let mockExit: ReturnType; let mockLog: ReturnType; beforeEach(() => { program = new Command(); program.exitOverride(); registerArchive(program); - - mockExit = vi.spyOn(process, 'exit').mockImplementation(() => { - throw new Error('process.exit'); - }); mockLog = vi.spyOn(console, 'log').mockImplementation(() => undefined); - - mockResolveAWSDeploymentTargets.mockResolvedValue([{ region: 'us-east-1' }]); - mockDeleteLocalBatchEvalRun.mockReturnValue(true); - mockDeleteLocalRecommendationRun.mockReturnValue(true); + mockArchive.mockResolvedValue({ success: true }); }); afterEach(() => { - mockExit.mockRestore(); mockLog.mockRestore(); vi.clearAllMocks(); }); describe('command registration', () => { - it('registers archive command', () => { - const archiveCmd = program.commands.find(c => c.name() === 'archive'); - expect(archiveCmd).toBeDefined(); - }); - - it('registers batch-evaluation subcommand', () => { + it('registers archive with both subcommands', () => { const archiveCmd = program.commands.find(c => c.name() === 'archive')!; - const batchCmd = archiveCmd.commands.find(c => c.name() === 'batch-evaluation'); - expect(batchCmd).toBeDefined(); - }); - - it('registers recommendation subcommand', () => { - const archiveCmd = program.commands.find(c => c.name() === 'archive')!; - const recCmd = archiveCmd.commands.find(c => c.name() === 'recommendation'); - expect(recCmd).toBeDefined(); + expect(archiveCmd).toBeDefined(); + expect(archiveCmd.commands.find(c => c.name() === 'batch-evaluation')).toBeDefined(); + expect(archiveCmd.commands.find(c => c.name() === 'recommendation')).toBeDefined(); }); }); describe('archive batch-evaluation', () => { it('rejects when --id is missing', async () => { await expect(program.parseAsync(['archive', 'batch-evaluation'], { from: 'user' })).rejects.toThrow(); - expect(mockDeleteBatchEvaluation).not.toHaveBeenCalled(); + expect(mockArchive).not.toHaveBeenCalled(); }); - it('calls deleteBatchEvaluation with the given id and auto-detected region', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - + it('calls engine.archive with the batch-evaluation type and id', async () => { await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }); - - expect(mockDeleteBatchEvaluation).toHaveBeenCalledWith({ - region: 'us-east-1', - batchEvaluationId: 'eval-abc-123', - }); - }); - - it('uses --region when provided', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - - await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123', '--region', 'eu-west-1'], { - from: 'user', - }); - - expect(mockDeleteBatchEvaluation).toHaveBeenCalledWith({ - region: 'eu-west-1', - batchEvaluationId: 'eval-abc-123', - }); + expect(mockArchive).toHaveBeenCalledWith('batch-evaluation', 'eval-abc-123'); }); - it('calls deleteLocalBatchEvalRun with the id', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - + it('calls requireProject', async () => { await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }); - - expect(mockDeleteLocalBatchEvalRun).toHaveBeenCalledWith('eval-abc-123'); - }); - - it('outputs JSON on success with --json flag', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - - await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123', '--json'], { from: 'user' }); - - expect(mockLog).toHaveBeenCalledTimes(1); - const output = JSON.parse(mockLog.mock.calls[0]![0]); - expect(output.success).toBe(true); - expect(output.batchEvaluationId).toBe('eval-abc-123'); - expect(output.status).toBe('DELETED'); - expect(output.localCliHistoryDeleted).toBe(true); - }); - - it('includes localCliHistoryDeleted: false in JSON when local file was not found', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - mockDeleteLocalBatchEvalRun.mockReturnValue(false); - - await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123', '--json'], { from: 'user' }); - - const output = JSON.parse(mockLog.mock.calls[0]![0]); - expect(output.localCliHistoryDeleted).toBe(false); + expect(mockRequireProject).toHaveBeenCalled(); }); - it('includes localDeleteWarning in JSON and exits 0 when local delete throws', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - mockDeleteLocalBatchEvalRun.mockImplementation(() => { - throw new Error('Permission denied'); - }); - + it('outputs JSON on success with --json', async () => { await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123', '--json'], { from: 'user' }); - const output = JSON.parse(mockLog.mock.calls[0]![0]); expect(output.success).toBe(true); - expect(output.localCliHistoryDeleted).toBe(false); - expect(output.localDeleteWarning).toBe('Permission denied'); - expect(mockExit).not.toHaveBeenCalled(); - }); - - it('prints warning and exits 0 when local delete throws without --json', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - mockDeleteLocalBatchEvalRun.mockImplementation(() => { - throw new Error('Permission denied'); - }); - - await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }); - - const allOutput = mockLog.mock.calls.map((c: unknown[]) => String(c[0])).join('\n'); - expect(allOutput).toContain('Warning: could not clear local history: Permission denied'); - expect(mockExit).not.toHaveBeenCalled(); + expect(output.id).toBe('eval-abc-123'); }); it('prints human-readable success output without --json', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }); - const allOutput = mockLog.mock.calls.map((c: unknown[]) => String(c[0])).join('\n'); expect(allOutput).toContain('eval-abc-123'); - expect(allOutput).toContain('DELETED'); - }); - - it('does not call process.exit on success', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - - await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }); - - expect(mockExit).not.toHaveBeenCalled(); - }); - - it('outputs JSON error when deleteBatchEvaluation throws and --json is set', async () => { - mockDeleteBatchEvaluation.mockRejectedValue(new Error('Service unavailable')); - - await expect( - program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123', '--json'], { from: 'user' }) - ).rejects.toThrow('process.exit'); - - const output = JSON.parse(mockLog.mock.calls[0]![0]); - expect(output.success).toBe(false); - expect(output.error).toBe('Service unavailable'); - }); - - it('renders error via ink when deleteBatchEvaluation throws without --json', async () => { - mockDeleteBatchEvaluation.mockRejectedValue(new Error('Service unavailable')); - - await expect( - program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }) - ).rejects.toThrow('process.exit'); - - expect(mockRender).toHaveBeenCalled(); - const renderArg = mockRender.mock.calls[0]![0]; - expect(JSON.stringify(renderArg)).toContain('Service unavailable'); + expect(allOutput).toContain('archived'); }); - it('exits with code 1 on error', async () => { - mockDeleteBatchEvaluation.mockRejectedValue(new Error('fail')); - + it('throws when engine.archive fails', async () => { + mockArchive.mockResolvedValue({ success: false, error: new Error('Service unavailable') }); await expect( program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }) - ).rejects.toThrow('process.exit'); - - expect(mockExit).toHaveBeenCalledWith(1); - }); - - it('calls requireProject', async () => { - mockDeleteBatchEvaluation.mockResolvedValue(batchEvalResult); - - await program.parseAsync(['archive', 'batch-evaluation', '--id', 'eval-abc-123'], { from: 'user' }); - - expect(mockRequireProject).toHaveBeenCalled(); + ).rejects.toThrow('Service unavailable'); }); }); describe('archive recommendation', () => { - it('rejects when --id is missing', async () => { - await expect(program.parseAsync(['archive', 'recommendation'], { from: 'user' })).rejects.toThrow(); - expect(mockDeleteRecommendation).not.toHaveBeenCalled(); - }); - - it('calls deleteRecommendation with the given id and auto-detected region', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }); - - expect(mockDeleteRecommendation).toHaveBeenCalledWith({ - region: 'us-east-1', - recommendationId: 'rec-xyz-789', - }); - }); - - it('uses --region when provided', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789', '--region', 'ap-southeast-1'], { - from: 'user', - }); - - expect(mockDeleteRecommendation).toHaveBeenCalledWith({ - region: 'ap-southeast-1', - recommendationId: 'rec-xyz-789', - }); - }); - - it('calls deleteLocalRecommendationRun with the id', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - + it('calls engine.archive with the recommendation type and id', async () => { await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }); - - expect(mockDeleteLocalRecommendationRun).toHaveBeenCalledWith('rec-xyz-789'); + expect(mockArchive).toHaveBeenCalledWith('recommendation', 'rec-xyz-789'); }); - it('outputs JSON on success with --json flag', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789', '--json'], { from: 'user' }); - - expect(mockLog).toHaveBeenCalledTimes(1); - const output = JSON.parse(mockLog.mock.calls[0]![0]); - expect(output.success).toBe(true); - expect(output.recommendationId).toBe('rec-xyz-789'); - expect(output.status).toBe('DELETED'); - expect(output.localCliHistoryDeleted).toBe(true); - }); - - it('includes localCliHistoryDeleted: false in JSON when local file was not found', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - mockDeleteLocalRecommendationRun.mockReturnValue(false); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789', '--json'], { from: 'user' }); - - const output = JSON.parse(mockLog.mock.calls[0]![0]); - expect(output.localCliHistoryDeleted).toBe(false); - }); - - it('includes localDeleteWarning in JSON and exits 0 when local delete throws', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - mockDeleteLocalRecommendationRun.mockImplementation(() => { - throw new Error('Permission denied'); - }); - + it('outputs JSON on success with --json', async () => { await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789', '--json'], { from: 'user' }); - const output = JSON.parse(mockLog.mock.calls[0]![0]); expect(output.success).toBe(true); - expect(output.localCliHistoryDeleted).toBe(false); - expect(output.localDeleteWarning).toBe('Permission denied'); - expect(mockExit).not.toHaveBeenCalled(); - }); - - it('prints warning and exits 0 when local delete throws without --json', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - mockDeleteLocalRecommendationRun.mockImplementation(() => { - throw new Error('Permission denied'); - }); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }); - - const allOutput = mockLog.mock.calls.map((c: unknown[]) => String(c[0])).join('\n'); - expect(allOutput).toContain('Warning: could not clear local history: Permission denied'); - expect(mockExit).not.toHaveBeenCalled(); - }); - - it('prints human-readable success output without --json', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }); - - const allOutput = mockLog.mock.calls.map((c: unknown[]) => String(c[0])).join('\n'); - expect(allOutput).toContain('rec-xyz-789'); - expect(allOutput).toContain('DELETED'); - }); - - it('does not call process.exit on success', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }); - - expect(mockExit).not.toHaveBeenCalled(); - }); - - it('outputs JSON error when deleteRecommendation throws and --json is set', async () => { - mockDeleteRecommendation.mockRejectedValue(new Error('Not found')); - - await expect( - program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789', '--json'], { from: 'user' }) - ).rejects.toThrow('process.exit'); - - const output = JSON.parse(mockLog.mock.calls[0]![0]); - expect(output.success).toBe(false); - expect(output.error).toBe('Not found'); - }); - - it('renders error via ink when deleteRecommendation throws without --json', async () => { - mockDeleteRecommendation.mockRejectedValue(new Error('Not found')); - - await expect( - program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }) - ).rejects.toThrow('process.exit'); - - expect(mockRender).toHaveBeenCalled(); - const renderArg = mockRender.mock.calls[0]![0]; - expect(JSON.stringify(renderArg)).toContain('Not found'); + expect(output.id).toBe('rec-xyz-789'); }); - it('exits with code 1 on error', async () => { - mockDeleteRecommendation.mockRejectedValue(new Error('fail')); - + it('throws when engine.archive fails', async () => { + mockArchive.mockResolvedValue({ success: false, error: new Error('Not found') }); await expect( program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }) - ).rejects.toThrow('process.exit'); - - expect(mockExit).toHaveBeenCalledWith(1); - }); - - it('calls requireProject', async () => { - mockDeleteRecommendation.mockResolvedValue(recommendationResult); - - await program.parseAsync(['archive', 'recommendation', '--id', 'rec-xyz-789'], { from: 'user' }); - - expect(mockRequireProject).toHaveBeenCalled(); + ).rejects.toThrow('Not found'); }); }); }); diff --git a/src/cli/commands/archive/command.tsx b/src/cli/commands/archive/command.tsx index 4cf042bee..d5aab6c03 100644 --- a/src/cli/commands/archive/command.tsx +++ b/src/cli/commands/archive/command.tsx @@ -1,61 +1,31 @@ -import { deleteBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; -import { deleteRecommendation } from '../../aws/agentcore-recommendation'; -import { COMMAND_DESCRIPTIONS } from '../../constants'; -import { getErrorMessage } from '../../errors'; -import { deleteLocalBatchEvalRun, deleteLocalRecommendationRun } from '../../operations/archive/archive-storage'; +import { ConfigIO } from '../../../lib'; +import { createJobEngine } from '../../operations/jobs'; +import type { JobType } from '../../operations/jobs'; +import { runCliCommand } from '../../telemetry/cli-command-run'; +import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; import { requireProject } from '../../tui/guards'; -import { getRegion } from '../shared/region-utils'; import type { Command } from '@commander-js/extra-typings'; -import { Text, render } from 'ink'; -import React from 'react'; -async function executeArchive( +/** Archive a job: delete it from the service and remove the local .cli record via the engine. */ +function executeArchive( + jobType: JobType, cliOptions: { id: string; region?: string; json?: boolean }, - config: { - serviceDelete: (id: string, region: string) => Promise; - localDelete: (id: string) => boolean; - getId: (result: T) => string; - successMessage: string; - } -): Promise { + label: string +): Promise { requireProject(); - try { - const region = await getRegion(cliOptions.region); - const result = await config.serviceDelete(cliOptions.id, region); - - let localCliHistoryDeleted = false; - let localDeleteWarning: string | undefined; - try { - localCliHistoryDeleted = config.localDelete(cliOptions.id); - } catch (err) { - localDeleteWarning = getErrorMessage(err); - } - - if (cliOptions.json) { - console.log( - JSON.stringify({ - success: true, - ...result, - localCliHistoryDeleted, - ...(localDeleteWarning && { localDeleteWarning }), - }) - ); - } else { - console.log(`\n${config.successMessage}`); - console.log(`ID: ${config.getId(result)}`); - console.log(`Status: ${result.status}`); - if (localCliHistoryDeleted) console.log(`Local history cleared.`); - if (localDeleteWarning) console.log(`Warning: could not clear local history: ${localDeleteWarning}`); - console.log(''); + return runCliCommand('archive.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const result = await engine.archive(jobType, cliOptions.id); + if (!result.success) { + throw result.error; } - } catch (error) { if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + console.log(JSON.stringify({ success: true, id: cliOptions.id })); } else { - render(Error: {getErrorMessage(error)}); + console.log(`\n✓ ${label} ${cliOptions.id} archived.\n`); } - process.exit(1); - } + return { job_type: jobType }; + }); } export const registerArchive = (program: Command) => { @@ -63,31 +33,41 @@ export const registerArchive = (program: Command) => { archiveCmd .command('batch-evaluation') - .description('[preview] Archive (delete) a batch evaluation on the service and clear local history') + .description('Archive a batch evaluation job record on the service and clear local history') .requiredOption('-i, --id ', 'Batch evaluation ID to archive') .option('--region ', 'AWS region (auto-detected if omitted)') .option('--json', 'Output as JSON') .action((cliOptions: { id: string; region?: string; json?: boolean }) => - executeArchive(cliOptions, { - serviceDelete: (id, region) => deleteBatchEvaluation({ region, batchEvaluationId: id }), - localDelete: deleteLocalBatchEvalRun, - getId: result => result.batchEvaluationId, - successMessage: 'Batch evaluation archived successfully', - }) + executeArchive('batch-evaluation', cliOptions, 'Batch evaluation') ); archiveCmd .command('recommendation') - .description('[preview] Archive (delete) a recommendation on the service and clear local history') + .description('Archive a recommendation job record on the service and clear local history') .requiredOption('-i, --id ', 'Recommendation ID to archive') .option('--region ', 'AWS region (auto-detected if omitted)') .option('--json', 'Output as JSON') .action((cliOptions: { id: string; region?: string; json?: boolean }) => - executeArchive(cliOptions, { - serviceDelete: (id, region) => deleteRecommendation({ region, recommendationId: id }), - localDelete: deleteLocalRecommendationRun, - getId: result => result.recommendationId, - successMessage: 'Recommendation archived successfully', - }) + executeArchive('recommendation', cliOptions, 'Recommendation') + ); + + archiveCmd + .command('ab-test') + .description('Archive (delete) an A/B test on the service and clear local history') + .requiredOption('-i, --id ', 'A/B test ID to archive') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--json', 'Output as JSON') + .action((cliOptions: { id: string; region?: string; json?: boolean }) => + executeArchive('ab-test', cliOptions, 'A/B test') + ); + + archiveCmd + .command('insights') + .description('[preview] Archive an insights job record on the service and clear local history') + .requiredOption('-i, --id ', 'Insights job ID to archive') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--json', 'Output as JSON') + .action((cliOptions: { id: string; region?: string; json?: boolean }) => + executeArchive('insights', cliOptions, 'Insights job') ); }; diff --git a/src/cli/commands/batch-evaluations/command.tsx b/src/cli/commands/batch-evaluations/command.tsx new file mode 100644 index 000000000..b4b4562ae --- /dev/null +++ b/src/cli/commands/batch-evaluations/command.tsx @@ -0,0 +1,57 @@ +import { ConfigIO, JobNotFoundError, serializeResult } from '../../../lib'; +import { createJobEngine } from '../../operations/jobs'; +import { printBatchEvaluationDetail, printBatchEvaluationHistory } from '../../operations/jobs/batch-evaluation/format'; +import { runCliCommand } from '../../telemetry/cli-command-run'; +import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; +import { requireProject } from '../../tui/guards'; +import type { Command } from '@commander-js/extra-typings'; + +export const registerBatchEvaluations = (program: Command) => { + const cmd = program.command('batch-evaluations').description(COMMAND_DESCRIPTIONS.batchEvaluations); + + cmd + .command('history') + .description('List batch evaluation jobs (running jobs are refreshed from the service)') + .option('--json', 'Output as JSON') + .action((cliOptions: { json?: boolean }) => { + requireProject(); + return runCliCommand('job.history', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const records = await engine.list({ type: 'batch-evaluation' }); + if (cliOptions.json) { + console.log( + JSON.stringify({ + success: true, + batchEvaluations: records, + }) + ); + } else { + printBatchEvaluationHistory(records); + } + return { job_type: 'batch-evaluation' }; + }); + }); + + // Bare positional on the group: `agentcore batch-evaluations ` shows one job. + // (No .description() here — that would override the group description shown in the command list.) + cmd + .argument('', 'Batch evaluation job ID to view') + .option('--json', 'Output as JSON') + .action((id: string, cliOptions: { json?: boolean }) => { + requireProject(); + return runCliCommand('job.get', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const record = await engine.get('batch-evaluation', id); + if (!record) { + // Throw only — runCliCommand owns error output (single JSON line in --json, stderr otherwise). + throw new JobNotFoundError(`Batch evaluation "${id}" not found.`); + } + if (cliOptions.json) { + console.log(JSON.stringify(serializeResult({ success: true, ...record }))); + } else { + printBatchEvaluationDetail(record); + } + return { job_type: 'batch-evaluation' }; + }); + }); +}; diff --git a/src/cli/commands/batch-evaluations/index.ts b/src/cli/commands/batch-evaluations/index.ts new file mode 100644 index 000000000..1f11554d6 --- /dev/null +++ b/src/cli/commands/batch-evaluations/index.ts @@ -0,0 +1 @@ +export { registerBatchEvaluations } from './command'; diff --git a/src/cli/commands/config-bundle/command.tsx b/src/cli/commands/config-bundle/command.tsx index ce72f2a4b..1fd118985 100644 --- a/src/cli/commands/config-bundle/command.tsx +++ b/src/cli/commands/config-bundle/command.tsx @@ -8,6 +8,7 @@ import type { ListConfigurationBundleVersionsFilter, } from '../../aws/agentcore-config-bundles'; import { getErrorMessage } from '../../errors'; +import { isGatedFeaturesEnabled } from '../../feature-flags'; import { deepDiff } from '../../operations/config-bundle/diff-versions'; import { resolveBundleByName } from '../../operations/config-bundle/resolve-bundle'; import { requireProject } from '../../tui/guards'; @@ -44,7 +45,7 @@ async function resolveRegion(): Promise { // ============================================================================ async function handleVersions(options: { - bundle: string; + name: string; branch?: string; latestPerBranch?: boolean; createdBy?: string; @@ -52,7 +53,7 @@ async function handleVersions(options: { json?: boolean; }) { const region = options.region ?? (await resolveRegion()); - const resolved = await resolveBundleByName(options.bundle, region); + const resolved = await resolveBundleByName(options.name, region); const filter: ListConfigurationBundleVersionsFilter = {}; if (options.branch) filter.branchName = options.branch; @@ -78,16 +79,16 @@ async function handleVersions(options: { // Sort by creation time, newest first allVersions.sort((a, b) => Number(b.versionCreatedAt) - Number(a.versionCreatedAt)); - return { versions: allVersions, bundleName: options.bundle, bundleId: resolved.bundleId }; + return { versions: allVersions, bundleName: options.name, bundleId: resolved.bundleId }; } // ============================================================================ // Diff // ============================================================================ -async function handleDiff(options: { bundle: string; from: string; to: string; region?: string }) { +async function handleDiff(options: { name: string; from: string; to: string; region?: string }) { const region = options.region ?? (await resolveRegion()); - const resolved = await resolveBundleByName(options.bundle, region); + const resolved = await resolveBundleByName(options.name, region); const [fromVersion, toVersion] = await Promise.all([ getConfigurationBundleVersion({ region, bundleId: resolved.bundleId, versionId: options.from }), @@ -107,13 +108,13 @@ export const registerConfigBundle = (program: Command) => { const cmd = program .command('config-bundle') .alias('cb') - .description('[preview] Manage configuration bundles (use bundle name from agentcore.json, not the ID)'); + .description('Manage configuration bundles (use bundle name from agentcore.json, not the ID)'); // --- versions --- cmd .command('versions') .description('List version history for a configuration bundle') - .requiredOption('--bundle ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') + .requiredOption('--name ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') .option('--branch ', 'Filter by branch name') .option('--latest-per-branch', 'Show only the latest version per branch') .option('--created-by ', 'Filter by creator name (e.g. "user", "recommendation")') @@ -121,7 +122,7 @@ export const registerConfigBundle = (program: Command) => { .option('--json', 'Output as JSON') .action( async (cliOptions: { - bundle: string; + name: string; branch?: string; latestPerBranch?: boolean; createdBy?: string; @@ -138,7 +139,7 @@ export const registerConfigBundle = (program: Command) => { } if (result.versions.length === 0) { - render(No versions found for bundle "{cliOptions.bundle}".); + render(No versions found for bundle "{cliOptions.name}".); return; } @@ -199,12 +200,12 @@ export const registerConfigBundle = (program: Command) => { cmd .command('diff') .description('Diff two versions of a configuration bundle (get version IDs from `cb versions`)') - .requiredOption('--bundle ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') + .requiredOption('--name ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') .requiredOption('--from ', 'Source version ID (from `config-bundle versions --json`)') .requiredOption('--to ', 'Target version ID (from `config-bundle versions --json`)') .option('--region ', 'AWS region override') .option('--json', 'Output as JSON') - .action(async (cliOptions: { bundle: string; from: string; to: string; region?: string; json?: boolean }) => { + .action(async (cliOptions: { name: string; from: string; to: string; region?: string; json?: boolean }) => { requireProject(); try { const result = await handleDiff(cliOptions); @@ -258,11 +259,12 @@ export const registerConfigBundle = (program: Command) => { } }); - // --- create-branch --- + // --- create-branch: gated until upstream CFN read-back bug is fixed for non-default branches --- + if (!isGatedFeaturesEnabled()) return cmd; cmd .command('create-branch') .description('Create a new branch on an existing configuration bundle') - .requiredOption('--bundle ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') + .requiredOption('--name ', 'Bundle name as defined in agentcore.json (e.g. "MyBundle")') .requiredOption('--branch ', 'Name for the new branch') .option('--from ', 'Parent version ID to branch from (defaults to latest version)') .option('--commit-message ', 'Commit message for the branch point') @@ -270,7 +272,7 @@ export const registerConfigBundle = (program: Command) => { .option('--json', 'Output as JSON') .action( async (cliOptions: { - bundle: string; + name: string; branch: string; from?: string; commitMessage?: string; @@ -280,7 +282,7 @@ export const registerConfigBundle = (program: Command) => { requireProject(); try { const region = cliOptions.region ?? (await resolveRegion()); - const resolved = await resolveBundleByName(cliOptions.bundle, region); + const resolved = await resolveBundleByName(cliOptions.name, region); // Determine parent version let parentVersionId = cliOptions.from; @@ -291,7 +293,7 @@ export const registerConfigBundle = (program: Command) => { maxResults: 50, }); if (versions.versions.length === 0) { - throw new Error(`No versions found for bundle "${cliOptions.bundle}".`); + throw new Error(`No versions found for bundle "${cliOptions.name}".`); } // Sort descending by creation time to get the latest version const sorted = [...versions.versions].sort( @@ -314,6 +316,7 @@ export const registerConfigBundle = (program: Command) => { parentVersionIds: [parentVersionId], branchName: cliOptions.branch, commitMessage: cliOptions.commitMessage ?? `Create branch ${cliOptions.branch}`, + createdBy: { name: 'user' }, }); if (cliOptions.json) { @@ -324,7 +327,7 @@ export const registerConfigBundle = (program: Command) => { render( - Branch "{cliOptions.branch}" created on bundle "{cliOptions.bundle}" + Branch "{cliOptions.branch}" created on bundle "{cliOptions.name}" Version: {result.versionId} diff --git a/src/cli/commands/create/__tests__/create.test.ts b/src/cli/commands/create/__tests__/create.test.ts index a186c6e75..8506f8d39 100644 --- a/src/cli/commands/create/__tests__/create.test.ts +++ b/src/cli/commands/create/__tests__/create.test.ts @@ -194,6 +194,44 @@ describe('create command', () => { expect(projectSpec.name).toBe(projectName); expect(projectSpec.runtimes[0].name).toBe(agentName); }); + + it('uses --project-name for project and --name for agent resource', async () => { + const projectName = `AgentProj${Date.now().toString().slice(-6)}`; + const agentName = `AgentResource${randomUUID().replace(/-/g, '').slice(0, 16)}`; + const result = await runCLI( + [ + 'create', + '--project-name', + projectName, + '--name', + agentName, + '--language', + 'Python', + '--framework', + 'Strands', + '--model-provider', + 'Bedrock', + '--memory', + 'none', + '--skip-git', + '--skip-install', + '--json', + ], + testDir + ); + + expect(result.exitCode, `stdout: ${result.stdout}, stderr: ${result.stderr}`).toBe(0); + + const json = JSON.parse(result.stdout); + expect(json.success).toBe(true); + expect(json.projectPath).toMatch(new RegExp(`/${projectName}$`)); + expect(json.agentName).toBe(agentName); + expect(await exists(join(json.projectPath, 'app', agentName))).toBeTruthy(); + + const projectSpec = JSON.parse(await readFile(join(json.projectPath, 'agentcore/agentcore.json'), 'utf-8')); + expect(projectSpec.name).toBe(projectName); + expect(projectSpec.runtimes[0].name).toBe(agentName); + }); }); describe('--defaults', () => { diff --git a/src/cli/commands/create/__tests__/harness-validate.test.ts b/src/cli/commands/create/__tests__/harness-validate.test.ts index 61ab2d87c..a7f82c608 100644 --- a/src/cli/commands/create/__tests__/harness-validate.test.ts +++ b/src/cli/commands/create/__tests__/harness-validate.test.ts @@ -112,6 +112,30 @@ describe('validateCreateHarnessOptions - apiFormat', () => { }); }); +// ───────────────────────────────────────────────────────────────────────────── +// lite_llm provider +// ───────────────────────────────────────────────────────────────────────────── + +describe('validateCreateHarnessOptions - lite_llm', () => { + it('accepts lite_llm without an API key ARN (key is optional)', () => { + const result = validateCreateHarnessOptions({ ...baseOptions, modelProvider: 'lite_llm' }, makeCwd()); + expect(result.valid).toBe(true); + }); + + it('normalizes the litellm alias to lite_llm', () => { + const options = { ...baseOptions, modelProvider: 'litellm' }; + const result = validateCreateHarnessOptions(options, makeCwd()); + expect(result.valid).toBe(true); + expect(options.modelProvider).toBe('lite_llm'); + }); + + it('still requires an API key ARN for open_ai', () => { + const result = validateCreateHarnessOptions({ ...baseOptions, modelProvider: 'open_ai' }, makeCwd()); + expect(result.valid).toBe(false); + expect(result.error).toContain('--api-key-arn is required'); + }); +}); + // ───────────────────────────────────────────────────────────────────────────── // EFS access point validation // ───────────────────────────────────────────────────────────────────────────── diff --git a/src/cli/commands/create/__tests__/validate.test.ts b/src/cli/commands/create/__tests__/validate.test.ts index 29c6caf76..e54687b59 100644 --- a/src/cli/commands/create/__tests__/validate.test.ts +++ b/src/cli/commands/create/__tests__/validate.test.ts @@ -148,6 +148,32 @@ describe('validateCreateOptions', () => { expect(result.error).toContain('is not yet available for TypeScript'); }); + it('rejects Python with the Vercel AI framework (TypeScript-only)', () => { + const result = validateCreateOptions( + { name: 'TestProjVercel', language: 'Python', framework: 'VercelAI', modelProvider: 'Bedrock', memory: 'none' }, + testDir + ); + expect(result.valid).toBe(false); + expect(result.error).toContain('is not yet available for Python'); + // Message lists the language's supported frameworks (derived from the matrix), not a hardcoded one + expect(result.error).toContain('Strands'); + expect(result.error).toContain('OpenAIAgents'); + }); + + it('accepts TypeScript with the Vercel AI framework', () => { + const result = validateCreateOptions( + { + name: 'TestProjVercelTs', + language: 'TypeScript', + framework: 'VercelAI', + modelProvider: 'Bedrock', + memory: 'none', + }, + testDir + ); + expect(result.valid).toBe(true); + }); + it('returns invalid for invalid framework', () => { const result = validateCreateOptions( { name: 'TestProj5', language: 'Python', framework: 'InvalidFW', modelProvider: 'Bedrock', memory: 'none' }, diff --git a/src/cli/commands/create/command.tsx b/src/cli/commands/create/command.tsx index b5a934b50..a147b4776 100644 --- a/src/cli/commands/create/command.tsx +++ b/src/cli/commands/create/command.tsx @@ -12,6 +12,7 @@ import { LIFECYCLE_TIMEOUT_MAX, LIFECYCLE_TIMEOUT_MIN } from '../../../schema'; import { ANSI, COMMAND_DESCRIPTIONS } from '../../constants'; import { getErrorMessage } from '../../errors'; import { isPreviewEnabled } from '../../feature-flags'; +import { ADDITIONAL_PARAMS_JSON_ERROR } from '../../primitives/constants'; import { harnessPrimitive } from '../../primitives/registry'; import { runCliCommand, withCommandRunTelemetry } from '../../telemetry/cli-command-run.js'; import { @@ -93,6 +94,8 @@ const AGENT_PATH_FLAGS = ['framework', 'language', 'build', 'protocol', 'type', const HARNESS_ONLY_FLAGS = [ 'modelId', 'apiKeyArn', + 'apiBase', + 'additionalParams', 'maxIterations', 'maxTokens', 'timeout', @@ -184,11 +187,21 @@ async function handleCreateHarnessCLI(options: CreateOptions): Promise { bedrock: 'global.anthropic.claude-sonnet-4-6', open_ai: 'gpt-5', gemini: 'gemini-2.5-flash', + lite_llm: 'anthropic/claude-sonnet-4-5', }; const modelId = options.modelId ?? defaultModelIds[provider] ?? 'global.anthropic.claude-sonnet-4-6'; const containerOption = harnessPrimitive!.parseContainerFlag(options.container); + let additionalParams: Record | undefined; + if (options.additionalParams) { + try { + additionalParams = JSON.parse(options.additionalParams) as Record; + } catch { + throw new Error(ADDITIONAL_PARAMS_JSON_ERROR); + } + } + const { efsMounts: harnessEfsMounts, s3Mounts: harnessS3Mounts } = await resolveAndValidateFilesystemMounts( options, parseCommaSeparatedList @@ -201,6 +214,8 @@ async function handleCreateHarnessCLI(options: CreateOptions): Promise { modelProvider: provider, modelId, apiKeyArn: options.apiKeyArn, + apiBase: options.apiBase, + additionalParams, containerUri: containerOption.containerUri, dockerfilePath: containerOption.dockerfilePath, skipMemory: options.harnessMemory === false, @@ -430,7 +445,7 @@ export const registerCreate = (program: Command) => { (val: string, prev: string[]) => [...prev, val], [] as string[] ) - .option('--with-config-bundle', 'Create a config bundle wired into the agent template [preview] [non-interactive]') + .option('--with-config-bundle', 'Create a config bundle wired into the agent template [non-interactive]') .option('--output-dir

', 'Output directory (default: current directory) [non-interactive]') .option('--skip-git', 'Skip git repository initialization [non-interactive]') .option('--skip-python-setup', 'Skip Python virtual environment setup [non-interactive]') @@ -440,20 +455,22 @@ export const registerCreate = (program: Command) => { if (isPreviewEnabled()) { createCmd - .option('--model-id ', 'Model ID for harness [non-interactive] [preview]') - .option('--api-key-arn ', 'API key ARN for non-Bedrock harness providers [non-interactive] [preview]') - .option('--no-harness-memory', 'Skip auto-creating memory for harness [non-interactive] [preview]') - .option('--max-iterations ', 'Max agent loop iterations (harness) [non-interactive] [preview]') - .option('--max-tokens ', 'Max tokens per iteration (harness) [non-interactive] [preview]') - .option('--timeout ', 'Max execution duration in seconds (harness) [non-interactive] [preview]') + .option('--model-id ', 'Model ID for harness [non-interactive]') + .option('--api-key-arn ', 'API key ARN for non-Bedrock harness providers [non-interactive]') + .option('--api-base ', 'Base URL for the harness model provider API endpoint (lite_llm) [non-interactive]') .option( - '--truncation-strategy ', - 'Truncation strategy: sliding_window or summarization (harness) [non-interactive] [preview]' + '--additional-params ', + 'Provider-specific harness params as a JSON object (lite_llm) [non-interactive]' ) + .option('--no-harness-memory', 'Skip auto-creating memory for harness [non-interactive]') + .option('--max-iterations ', 'Max agent loop iterations (harness) [non-interactive]') + .option('--max-tokens ', 'Max tokens per iteration (harness) [non-interactive]') + .option('--timeout ', 'Max execution duration in seconds (harness) [non-interactive]') .option( - '--container ', - 'Container image URI or Dockerfile path (harness) [non-interactive] [preview]' - ); + '--truncation-strategy ', + 'Truncation strategy: sliding_window or summarization (harness) [non-interactive]' + ) + .option('--container ', 'Container image URI or Dockerfile path (harness) [non-interactive]'); } createCmd.action(async (rawOptions: Record) => { diff --git a/src/cli/commands/create/harness-action.ts b/src/cli/commands/create/harness-action.ts index 3354ae641..68cbfff6e 100644 --- a/src/cli/commands/create/harness-action.ts +++ b/src/cli/commands/create/harness-action.ts @@ -14,6 +14,8 @@ export interface CreateHarnessProjectOptions { modelId: string; apiFormat?: HarnessApiFormat; apiKeyArn?: string; + apiBase?: string; + additionalParams?: Record; skipMemory?: boolean; containerUri?: string; dockerfilePath?: string; @@ -62,6 +64,8 @@ export async function createProjectWithHarness(options: CreateHarnessProjectOpti modelId: options.modelId, apiFormat: options.apiFormat, apiKeyArn: options.apiKeyArn, + apiBase: options.apiBase, + additionalParams: options.additionalParams, containerUri: options.containerUri, dockerfilePath: options.dockerfilePath, skipMemory: options.skipMemory, diff --git a/src/cli/commands/create/harness-validate.ts b/src/cli/commands/create/harness-validate.ts index feedf0784..926cda032 100644 --- a/src/cli/commands/create/harness-validate.ts +++ b/src/cli/commands/create/harness-validate.ts @@ -1,5 +1,10 @@ -import { MAX_EFS_MOUNTS, MAX_S3_MOUNTS, validateApiFormat } from '../../../schema'; -import { HarnessNameSchema, ProjectNameSchema } from '../../../schema'; +import { + HarnessNameSchema, + MAX_EFS_MOUNTS, + MAX_S3_MOUNTS, + ProjectNameSchema, + validateApiFormat, +} from '../../../schema'; import { validateAccessPointMounts, validateEfsAccessPointArn, @@ -53,6 +58,9 @@ const MODEL_PROVIDER_MAPPING: Record = { Anthropic: 'bedrock', gemini: 'gemini', Gemini: 'gemini', + lite_llm: 'lite_llm', + litellm: 'lite_llm', + LiteLLM: 'lite_llm', }; export function normalizeHarnessModelProvider(raw: string): string | undefined { @@ -85,7 +93,7 @@ export function validateCreateHarnessOptions(options: CreateHarnessCliOptions, c if (!normalized) { return { valid: false, - error: `Invalid model provider: ${options.modelProvider}. Use bedrock, open_ai, or gemini`, + error: `Invalid model provider: ${options.modelProvider}. Use bedrock, open_ai, gemini, or lite_llm`, }; } options.modelProvider = normalized; @@ -96,10 +104,12 @@ export function validateCreateHarnessOptions(options: CreateHarnessCliOptions, c bedrock: 'global.anthropic.claude-sonnet-4-6', open_ai: 'gpt-5', gemini: 'gemini-2.5-flash', + lite_llm: 'anthropic/claude-sonnet-4-5', }; options.modelId ??= defaultModelIds[options.modelProvider] ?? 'global.anthropic.claude-sonnet-4-6'; - if (options.modelProvider !== 'bedrock' && !options.apiKeyArn) { + // open_ai and gemini require an API key; bedrock uses AWS credentials and lite_llm's key is optional. + if (options.modelProvider !== 'bedrock' && options.modelProvider !== 'lite_llm' && !options.apiKeyArn) { return { valid: false, error: `--api-key-arn is required for ${options.modelProvider} provider` }; } diff --git a/src/cli/commands/create/types.ts b/src/cli/commands/create/types.ts index 947160e2d..67e40b20f 100644 --- a/src/cli/commands/create/types.ts +++ b/src/cli/commands/create/types.ts @@ -34,6 +34,8 @@ export interface CreateOptions extends VpcOptions { // Harness-specific (preview only) modelId?: string; apiKeyArn?: string; + apiBase?: string; + additionalParams?: string; container?: string; harnessMemory?: boolean; maxIterations?: string; diff --git a/src/cli/commands/create/validate.ts b/src/cli/commands/create/validate.ts index 2a4030c20..ad6dac618 100644 --- a/src/cli/commands/create/validate.ts +++ b/src/cli/commands/create/validate.ts @@ -9,8 +9,10 @@ import { SDKFrameworkSchema, SessionStorageSchema, TargetLanguageSchema, + getFrameworksForLanguage, getSupportedFrameworksForProtocol, getSupportedModelProviders, + isFrameworkSupportedForLanguage, matchEnumValue, } from '../../../schema'; import type { ProtocolMode } from '../../../schema'; @@ -200,11 +202,16 @@ export function validateCreateOptions(options: CreateOptions, cwd?: string): Val return { valid: false, error: `Invalid model provider: ${options.modelProvider}` }; } - // TypeScript supports Strands and Vercel AI only - if (options.language === 'TypeScript' && fwResult.data !== 'Strands' && fwResult.data !== 'VercelAI') { + // Framework must ship a template for the chosen language (e.g. Vercel AI is + // TypeScript-only, the other open-source frameworks are Python-only). + if ( + (langResult.data === 'Python' || langResult.data === 'TypeScript') && + !isFrameworkSupportedForLanguage(langResult.data, fwResult.data) + ) { + const supported = getFrameworksForLanguage(langResult.data).join(', '); return { valid: false, - error: `Framework ${options.framework} is not yet available for TypeScript. Only Strands and Vercel AI SDK are supported.`, + error: `Framework ${options.framework} is not yet available for ${langResult.data}. Supported: ${supported}.`, }; } diff --git a/src/cli/commands/deploy/__tests__/harness-version-drift.test.ts b/src/cli/commands/deploy/__tests__/harness-version-drift.test.ts new file mode 100644 index 000000000..1a9b45a97 --- /dev/null +++ b/src/cli/commands/deploy/__tests__/harness-version-drift.test.ts @@ -0,0 +1,35 @@ +import { computeHarnessVersionDrift } from '../actions.js'; +import { describe, expect, it } from 'vitest'; + +describe('computeHarnessVersionDrift', () => { + it('reports a first deploy (no prior version) as deployed at the new version', () => { + const notes = computeHarnessVersionDrift(undefined, { h1: { harnessVersion: 1 } }); + expect(notes).toEqual([{ name: 'h1', from: undefined, to: 1 }]); + }); + + it('reports a version bump as from → to', () => { + const notes = computeHarnessVersionDrift({ h1: { harnessVersion: 2 } }, { h1: { harnessVersion: 3 } }); + expect(notes).toEqual([{ name: 'h1', from: 2, to: 3 }]); + }); + + it('emits no note when the version is unchanged', () => { + const notes = computeHarnessVersionDrift({ h1: { harnessVersion: 3 } }, { h1: { harnessVersion: 3 } }); + expect(notes).toEqual([]); + }); + + it('skips harnesses that emit no version (legacy stack)', () => { + const notes = computeHarnessVersionDrift(undefined, { h1: {} }); + expect(notes).toEqual([]); + }); + + it('handles multiple harnesses independently', () => { + const notes = computeHarnessVersionDrift( + { a: { harnessVersion: 1 }, b: { harnessVersion: 5 } }, + { a: { harnessVersion: 2 }, b: { harnessVersion: 5 }, c: { harnessVersion: 1 } } + ); + expect(notes).toEqual([ + { name: 'a', from: 1, to: 2 }, + { name: 'c', from: undefined, to: 1 }, + ]); + }); +}); diff --git a/src/cli/commands/deploy/actions.ts b/src/cli/commands/deploy/actions.ts index 404291381..6b2a5b30b 100644 --- a/src/cli/commands/deploy/actions.ts +++ b/src/cli/commands/deploy/actions.ts @@ -1,5 +1,5 @@ import { ConfigIO, ResourceNotFoundError, SecureCredentials, ValidationError, toError } from '../../../lib'; -import type { AgentCoreMcpSpec, DeployedState, HarnessDeployedState } from '../../../schema'; +import type { AgentCoreMcpSpec, DeployedState } from '../../../schema'; import { applyTargetRegionToEnv } from '../../aws'; import { validateAwsCredentials } from '../../aws/account'; import { CdkToolkitWrapper, createSwitchableIoHost } from '../../cdk/toolkit-lib'; @@ -8,9 +8,12 @@ import { buildDeployedState, getStackOutputs, parseAgentOutputs, + parseConfigBundleOutputs, parseDatasetOutputs, parseEvaluatorOutputs, parseGatewayOutputs, + parseHarnessOutputs, + parseKnowledgeBaseOutputs, parseMemoryOutputs, parseOnlineEvalOutputs, parsePaymentOutputs, @@ -19,9 +22,10 @@ import { parseRuntimeEndpointOutputs, } from '../../cloudformation'; import { getErrorMessage } from '../../errors'; -import { isPreviewEnabled } from '../../feature-flags'; +import { isGatedFeaturesEnabled, isPreviewEnabled } from '../../feature-flags'; import { ExecLogger } from '../../logging'; import { + MANAGED_MEMORY_DEPLOY_NOTICE, assertEnvFileExists, bootstrapEnvironment, buildCdkProject, @@ -31,6 +35,7 @@ import { getAllCredentials, hasIdentityApiProviders, hasIdentityOAuthProviders, + hasManagedMemoryHarness, performStackTeardown, setupApiKeyProviders, setupOAuth2Providers, @@ -40,20 +45,16 @@ import { } from '../../operations/deploy'; import { computeProjectDeployHash } from '../../operations/deploy/change-detection'; import { formatTargetStatus, getGatewayTargetStatuses } from '../../operations/deploy/gateway-status'; -import { type ImperativeDeployContext, createDeploymentManager } from '../../operations/deploy/imperative'; -import { deleteOrphanedABTests, setupABTests } from '../../operations/deploy/post-deploy-ab-tests'; -import { - resolveConfigBundleComponentKeys, - setupConfigBundles, -} from '../../operations/deploy/post-deploy-config-bundles'; import { syncDatasets } from '../../operations/deploy/post-deploy-datasets'; -import { setupHttpGateways } from '../../operations/deploy/post-deploy-http-gateways'; +import { autoIngestKnowledgeBases } from '../../operations/deploy/post-deploy-knowledge-bases'; import { enableOnlineEvalConfigs } from '../../operations/deploy/post-deploy-online-evals'; import { cleanupPaymentCredentialProviders, hasPaymentCredentialProviders, setupPaymentCredentialProviders, } from '../../operations/deploy/pre-deploy-identity'; +import { findOrphanHarnesses } from '../../operations/harness/orphan'; +import { hydrateKnowledgeBaseDataSources } from '../../operations/knowledge-base/hydrate-data-sources'; import { toStackName } from '../import/import-utils'; import type { DeployResult } from './types'; import { StackSelectionStrategy } from '@aws-cdk/toolkit-lib'; @@ -67,11 +68,32 @@ export interface ValidatedDeployOptions { onProgress?: (step: string, status: 'start' | 'success' | 'error') => void; onResourceEvent?: (message: string) => void; onDeployMessage?: (message: DeployMessage) => void; + /** Emit a one-shot, user-facing notice to the terminal mid-deploy (e.g. the managed-memory heads-up + * shown before the slow CFN apply). Always surfaced, independent of `verbose`. */ + onNotice?: (message: string) => void; } const AGENT_NEXT_STEPS = ['agentcore invoke', 'agentcore status']; const MEMORY_ONLY_NEXT_STEPS = ['agentcore add agent', 'agentcore status']; +/** + * Compute per-harness config-version drift between the previous deployed-state and the freshly-parsed + * harness outputs. Only harnesses that emit a version are considered; a changed (or first-seen) version + * yields a note. Pure + exported for unit testing. + */ +export function computeHarnessVersionDrift( + prev: Record | undefined, + next: Record +): { name: string; from?: number; to: number }[] { + const notes: { name: string; from?: number; to: number }[] = []; + for (const [name, rec] of Object.entries(next)) { + if (rec.harnessVersion === undefined) continue; + const from = prev?.[name]?.harnessVersion; + if (from !== rec.harnessVersion) notes.push({ name, from, to: rec.harnessVersion }); + } + return notes; +} + export async function runDiff( toolkitWrapper: CdkToolkitWrapper, stackName: string, @@ -163,6 +185,27 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise undefined); + for (const orphan of findOrphanHarnesses(preDeployState)) { + const warning = + `Harness "${orphan.name}" was created by the preview build and is not managed by ` + + `CloudFormation. This deploy won't touch it, and it keeps incurring cost. To migrate it ` + + `to GA, run \`agentcore remove harness ${orphan.name} --keep\` (deletes the old resource ` + + `but keeps it in agentcore.json), then deploy again so it's recreated under CloudFormation. ` + + `Use --discard instead if you no longer want it.`; + logger.log(warning, 'warn'); + orphanWarnings.push(warning); + } + } + // Teardown confirmation: if this is a teardown deploy, require --yes if (context.isTeardownDeploy && !options.autoConfirm) { logger.finalize(false); @@ -413,6 +456,18 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise 0; const deployStepName = hasGateways ? 'Deploying gateways...' : 'Deploy to AWS'; @@ -438,38 +493,8 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise ({ targets: {} }) as DeployedState); - const teardownContext: ImperativeDeployContext = { - projectSpec: context.projectSpec, - target, - configIO, - deployedState: existingTeardownState, - onProgress: (step: string, status: 'start' | 'done' | 'error') => { - logger.log(`${step}: ${status}`); - }, - }; - - if (imperativeManager.hasDeployersForPhase('post-cdk', teardownContext)) { - startStep('Tear down imperative resources'); - const imperativeTeardown = await imperativeManager.teardownAll(teardownContext); - if (!imperativeTeardown.success) { - endStep('error', imperativeTeardown.error); - logger.finalize(false); - return { - success: false, - error: new Error(`Imperative teardown failed: ${imperativeTeardown.error}`), - logPath: logger.getRelativeLogPath(), - }; - } - endStep('success'); - } - } - - // Clean up imperative payment credential providers (CFN stack delete handles manager/connector/roles) + // Clean up imperative payment credential providers (CFN stack delete handles manager/connector/roles). + // Harnesses are part of the CloudFormation stack, so stack destroy handles them. const existingDeployedState = await configIO.readDeployedState().catch(() => undefined); const existingPayments = existingDeployedState?.targets?.[target.name]?.resources?.payments; if (existingPayments && Object.keys(existingPayments).length > 0) { @@ -560,20 +585,67 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise { - acc[gateway.name] = gateway; - return acc; - }, - {} as Record - ) ?? {}; - const gateways = parseGatewayOutputs(outputs, gatewaySpecs); + const allGatewaySpecs = mcpSpec?.agentCoreGateways ?? []; + const gatewaySpecs = allGatewaySpecs.reduce( + (acc, gateway) => { + acc[gateway.name] = gateway; + return acc; + }, + {} as Record + ); + const allGateways = parseGatewayOutputs(outputs, gatewaySpecs); + + // Split into MCP and HTTP gateways based on protocolType + const httpGatewayNames = new Set(allGatewaySpecs.filter(g => g.protocolType === 'None').map(g => g.name)); + const gateways: Record = {}; + const httpGateways: Record< + string, + { gatewayId: string; gatewayArn: string; gatewayUrl?: string; targets?: Record } + > = {}; + for (const [name, state] of Object.entries(allGateways)) { + if (httpGatewayNames.has(name)) { + httpGateways[name] = state; + } else { + gateways[name] = state; + } + } // Parse dataset outputs const datasetNames = (context.projectSpec.datasets ?? []).map(d => d.name); const datasets = parseDatasetOutputs(outputs, datasetNames); + // Parse config bundle outputs + const configBundleNames = (context.projectSpec.configBundles ?? []).map(b => b.name); + const configBundles = parseConfigBundleOutputs(outputs, configBundleNames); + + // Parse knowledge base outputs (CFN emits id+arn; DSes hydrated next via SDK). + const knowledgeBaseSpecs = context.projectSpec.knowledgeBases ?? []; + const knowledgeBaseNames = knowledgeBaseSpecs.map(kb => kb.name); + const knowledgeBases = parseKnowledgeBaseOutputs(outputs, knowledgeBaseNames); + + if (knowledgeBaseNames.length > 0 && Object.keys(knowledgeBases).length !== knowledgeBaseNames.length) { + logger.log( + `Deployed-state missing outputs for ${ + knowledgeBaseNames.length - Object.keys(knowledgeBases).length + } knowledge base(s).`, + 'warn' + ); + } + + // Hydrate dataSources[] for each KB by listing DSes via bedrock-agent + // (the L3 doesn't emit per-DS CFN outputs). + if (Object.keys(knowledgeBases).length > 0) { + try { + await hydrateKnowledgeBaseDataSources({ + knowledgeBases, + knowledgeBaseSpecs, + region: target.region, + }); + } catch (err) { + logger.log(`Failed to hydrate knowledge base data sources: ${getErrorMessage(err)}`, 'warn'); + } + } + // Parse payment outputs from CFN stack const paymentSpecs = (context.projectSpec.payments ?? []).map(p => ({ name: p.name, @@ -589,49 +661,14 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise 0 ? parsePaymentOutputs(outputs, paymentSpecs) : undefined; - endStep('success'); - - // Post-CDK: deploy imperative resources (harness) — preview mode only - let deployedHarnesses: Record | undefined; - if (isPreviewEnabled()) { - const imperativeManager = createDeploymentManager(); - const existingImperativeState: DeployedState = await configIO.readDeployedState().catch(() => ({ targets: {} })); - const imperativeContext = { - projectSpec: context.projectSpec, - target, - configIO, - deployedState: existingImperativeState, - cdkOutputs: outputs, - onProgress: (step: string, status: 'start' | 'done' | 'error') => { - logger.log(`${step}: ${status}`); - }, - }; + // Parse harness outputs (harnesses are now part of the CloudFormation stack). + // Preview-gated: when preview is off the vended app never synthesizes a harness + // (see bin/cdk.ts), so there are no outputs to parse — skip entirely to keep the + // gate complete and avoid warning on a harness that was intentionally not deployed. + const harnessNames = isPreviewEnabled() ? (context.projectSpec.harnesses ?? []).map(h => h.name) : []; + const deployedHarnesses = parseHarnessOutputs(outputs, harnessNames); - let harnessDeployError: string | undefined; - if (imperativeManager.hasDeployersForPhase('post-cdk', imperativeContext)) { - startStep('Deploy harnesses'); - const postCdkResult = await imperativeManager.runPhase('post-cdk', imperativeContext); - const harnessResult = postCdkResult.results.get('harness'); - if (harnessResult?.state) { - deployedHarnesses = harnessResult.state as Record; - } - if (!postCdkResult.success) { - endStep('error', postCdkResult.error); - harnessDeployError = postCdkResult.error; - } else { - endStep('success'); - } - } - - if (harnessDeployError) { - logger.finalize(false); - return { - success: false, - error: new Error(`Harness deployment failed: ${harnessDeployError}`), - logPath: logger.getRelativeLogPath(), - }; - } - } + endStep('success'); let deployHash: string | undefined; try { @@ -641,11 +678,14 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise undefined); + // Capture prior harness version records BEFORE the new state overwrites them, for the drift note. + const prevHarnessRecords = existingState?.targets?.[target.name]?.resources?.harnesses; let deployedState = buildDeployedState({ targetName: target.name, stackName, agents, gateways, + httpGateways: Object.keys(httpGateways).length > 0 ? httpGateways : undefined, existingState, identityKmsKeyArn, credentials: deployedCredentials, @@ -657,7 +697,10 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise t.name), }); if (deployHash) { @@ -687,6 +730,18 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise 0) { const gatewayUrls = Object.entries(gateways) @@ -766,137 +821,52 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise 0) { - const deleteResult = await deleteOrphanedABTests({ + // Post-deploy: auto-trigger ingestion for any KB whose data-source URIs + // changed since the last deploy (or has never been ingested before). + const knowledgeBaseSpecsForIngest = context.projectSpec.knowledgeBases ?? []; + if (knowledgeBaseSpecsForIngest.length > 0) { + startStep('Auto-ingest knowledge bases'); + const ingestResult = await autoIngestKnowledgeBases({ region: target.region, - projectSpec: context.projectSpec, - existingABTests: existingABTestsForCleanup, + knowledgeBases: knowledgeBaseSpecsForIngest, + deployedKnowledgeBases: deployedState.targets?.[target.name]?.resources?.knowledgeBases ?? {}, + previousKnowledgeBases: existingState?.targets?.[target.name]?.resources?.knowledgeBases, + targetName: target.name, + deployedState, + onProgress: msg => logger.log(msg), }); - if (deleteResult.hasErrors) { - const errors = deleteResult.results.filter(r => r.status === 'error'); - const errorMessages = errors.map(err => `"${err.testName}": ${err.error}`).join('; '); - logger.log(`AB test orphan cleanup warnings: ${errorMessages}`, 'warn'); - postDeployWarnings.push(...errors.map(err => `AB test "${err.testName}": ${err.error}`)); - } - - // Surface warnings (e.g., "AB test was stopped before deletion") - for (const r of deleteResult.results) { - if (r.warning) { - logger.log(r.warning, 'warn'); - postDeployWarnings.push(r.warning); - } - } - - // Update deployed state to remove deleted AB tests - if (deleteResult.results.some(r => r.status === 'deleted')) { - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources?.abTests) { - for (const r of deleteResult.results) { - if (r.status === 'deleted') delete targetResources.abTests[r.testName]; + // Persist new sourcesHash values for KBs whose ingestion fired. + const targetResources = deployedState.targets[target.name]?.resources; + if (targetResources?.knowledgeBases) { + for (const r of ingestResult.results) { + if (r.status === 'started' && r.newSourcesHash) { + const record = targetResources.knowledgeBases[r.knowledgeBaseName]; + if (record) record.sourcesHash = r.newSourcesHash; } - await configIO.writeDeployedState(updatedState); - deployedState = updatedState; } + await configIO.writeDeployedState(deployedState); } - } - - // Post-deploy: Create/update HTTP gateways for AB tests (must run BEFORE config bundles - // because config bundle component keys may reference gateway ARNs) - const httpGatewaySpecs = context.projectSpec.httpGateways ?? []; - const existingHttpGateways = deployedState.targets?.[target.name]?.resources?.httpGateways; - if (httpGatewaySpecs.length > 0 || Object.keys(existingHttpGateways ?? {}).length > 0) { - const deployedResources = deployedState.targets?.[target.name]?.resources; - const httpGatewayResult = await setupHttpGateways({ - region: target.region, - projectName: context.projectSpec.name, - projectSpec: context.projectSpec, - existingHttpGateways, - deployedResources, - }); - - // Always merge HTTP gateway state (even if empty, to clear deleted gateways) - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources) { - targetResources.httpGateways = httpGatewayResult.httpGateways; - await configIO.writeDeployedState(updatedState); - deployedState = updatedState; - } - - if (httpGatewayResult.hasErrors) { - const errors = httpGatewayResult.results.filter(r => r.status === 'error'); - const errorMessages = errors.map(err => `"${err.gatewayName}": ${err.error}`).join('; '); - logger.log(`HTTP gateway setup warnings: ${errorMessages}`, 'warn'); - postDeployWarnings.push(...errors.map(err => `HTTP gateway "${err.gatewayName}": ${err.error}`)); - } - } - // Post-deploy: Create/update configuration bundles - const configBundleSpecs = context.projectSpec.configBundles ?? []; - if (configBundleSpecs.length > 0) { - // Resolve component key placeholders (e.g., {{gateway:name}} → real ARN) - const resolvedProjectSpec = resolveConfigBundleComponentKeys(context.projectSpec, deployedState, target.name); - - const existingConfigBundles = deployedState.targets?.[target.name]?.resources?.configBundles; - const configBundleResult = await setupConfigBundles({ - region: target.region, - projectSpec: resolvedProjectSpec, - existingBundles: existingConfigBundles, - }); - - // Merge config bundle state into deployed state - if (Object.keys(configBundleResult.configBundles).length > 0) { - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources) { - targetResources.configBundles = configBundleResult.configBundles; - await configIO.writeDeployedState(updatedState); - deployedState = updatedState; + // Log per-KB result so the user sees what happened. + for (const r of ingestResult.results) { + if (r.status === 'started') { + logger.log( + `Knowledge base "${r.knowledgeBaseName}": ingestion started for ${r.startedJobCount} data source(s)` + ); + } else if (r.status === 'skipped') { + logger.log(`Knowledge base "${r.knowledgeBaseName}": skipped (${r.reason})`); + } else { + logger.log(`Knowledge base "${r.knowledgeBaseName}": ${r.error}`, 'warn'); + postDeployWarnings.push(`Knowledge base "${r.knowledgeBaseName}": ${r.error}`); } } - - if (configBundleResult.hasErrors) { - const errors = configBundleResult.results.filter(r => r.status === 'error'); - const errorMessages = errors.map(err => `"${err.bundleName}": ${err.error}`).join('; '); - logger.log(`Config bundle setup warnings: ${errorMessages}`, 'warn'); - postDeployWarnings.push(...errors.map(err => `Config bundle "${err.bundleName}": ${err.error}`)); - } + endStep(ingestResult.hasErrors ? 'error' : 'success'); } - // Post-deploy: Create/update AB tests - const abTestSpecs = context.projectSpec.abTests ?? []; - if (abTestSpecs.length > 0) { - const existingABTests = deployedState.targets?.[target.name]?.resources?.abTests; - const deployedResources = deployedState.targets?.[target.name]?.resources; - const abTestResult = await setupABTests({ - region: target.region, - projectSpec: context.projectSpec, - existingABTests, - deployedResources, - }); - - // Merge AB test state into deployed state - if (Object.keys(abTestResult.abTests).length > 0) { - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources) { - targetResources.abTests = abTestResult.abTests; - await configIO.writeDeployedState(updatedState); - } - } - - if (abTestResult.hasErrors) { - const errors = abTestResult.results.filter(r => r.status === 'error'); - const errorMessages = errors.map(err => `"${err.testName}": ${err.error}`).join('; '); - logger.log(`AB test setup warnings: ${errorMessages}`, 'warn'); - postDeployWarnings.push(...errors.map(err => `AB test "${err.testName}": ${err.error}`)); - } - } + // Config bundles are now managed via CloudFormation; their state is parsed + // from stack outputs above (no post-deploy API step). AB tests are managed + // as fire-and-forget jobs (agentcore run ab-test), not via the deploy path. // Post-deploy: Enable CloudWatch Transaction Search (non-blocking, silent) const hasHarnesses = isPreviewEnabled() && (context.projectSpec.harnesses ?? []).length > 0; @@ -927,6 +897,10 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise 0 ? postDeployWarnings : undefined, + postDeployWarnings: allWarnings.length > 0 ? allWarnings : undefined, }; } catch (err: unknown) { logger.log(getErrorMessage(err), 'error'); @@ -948,13 +922,3 @@ export async function handleDeploy(options: ValidatedDeployOptions): Promise { } : undefined; + // One-shot user-facing notices (e.g. the managed-memory heads-up before the slow CFN apply). + // Always shown, independent of --progress/--verbose. Clear any active spinner line first so the + // multi-line notice prints cleanly, then resume the spinner frame on the next onProgress tick. + const onNotice = (message: string) => { + if (spinner) { + clearInterval(spinner); + spinner = undefined; + process.stdout.write('\r\x1b[K'); + } + console.log(`\n${message}\n`); + }; + const result = await handleDeploy({ target: options.target!, autoConfirm: options.yes, @@ -113,6 +125,7 @@ async function executeDeploy(options: DeployOptions): Promise { diff: options.diff, onProgress, onResourceEvent, + onNotice, }); if (spinner) { @@ -198,6 +211,7 @@ export const registerDeploy = (program: Command) => { target: cliOptions.target ?? 'default', progress: !cliOptions.json, }; + await handleDeployCLI(options as DeployOptions); } else if (cliOptions.diff) { // Diff-only: use TUI with diff mode diff --git a/src/cli/commands/dev/command.tsx b/src/cli/commands/dev/command.tsx index 7f01a8dfc..bec8ad94e 100644 --- a/src/cli/commands/dev/command.tsx +++ b/src/cli/commands/dev/command.tsx @@ -178,7 +178,7 @@ export const registerDev = (program: Command) => { .option('--exec', 'Execute a shell command in the running dev container (Container agents only) [non-interactive]') .option('--tool ', 'MCP tool name (used with "call-tool" prompt) [non-interactive]') .option('--input ', 'MCP tool arguments as JSON (used with --tool) [non-interactive]') - .option('--skip-deploy', 'Skip automatic resource deployment before starting dev server [preview]') + .option('--skip-deploy', 'Skip automatic resource deployment before starting dev server') .option( '-H, --header
', 'Custom header to forward to the agent (format: "Name: Value", repeatable) [non-interactive]', diff --git a/src/cli/commands/exec/__tests__/command.test.ts b/src/cli/commands/exec/__tests__/command.test.ts index 24b6d1f1d..c0c68ff77 100644 --- a/src/cli/commands/exec/__tests__/command.test.ts +++ b/src/cli/commands/exec/__tests__/command.test.ts @@ -1,6 +1,7 @@ // --------------------------------------------------------------------------- // Telemetry attribute correctness // --------------------------------------------------------------------------- +import { ANSI } from '../../../constants.js'; import { withCommandRunTelemetry } from '../../../telemetry/cli-command-run.js'; import { handleExecOneShot, handleShellSession } from '../action.js'; import { registerExec } from '../command.js'; @@ -549,3 +550,63 @@ describe('exec runExecLoop fatal error handling', () => { expect(mockExit).toHaveBeenCalledWith(1); }); }); + +// --------------------------------------------------------------------------- +// Picker renders in the alternate screen (regression: exec used to drop out of +// fullscreen and leave static frames behind in the normal buffer) +// --------------------------------------------------------------------------- + +describe('exec picker alternate-screen handling', () => { + let mockExit: ReturnType; + let writeSpy: ReturnType; + let writes: string[]; + + beforeEach(() => { + mockExit = vi.spyOn(process, 'exit').mockImplementation((_code?: string | number | null) => { + throw new Error(`process.exit(${_code})`); + }); + writes = []; + writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation((chunk: string | Uint8Array) => { + writes.push(typeof chunk === 'string' ? chunk : chunk.toString()); + return true; + }); + }); + + afterEach(() => { + mockExit.mockRestore(); + writeSpy.mockRestore(); + vi.clearAllMocks(); + }); + + it('enters the alt screen for the picker and restores the normal buffer + cursor on select', async () => { + vi.mocked(handleShellSession).mockResolvedValue({ success: true }); + + const { ExecScreen } = await import('../../../tui/screens/exec/index.js'); + vi.mocked(ExecScreen).mockImplementation((_props: unknown) => null as unknown as React.ReactElement); + + // Render the picker, then immediately auto-select so the loop runs once and exits. + const { render } = await import('ink'); + vi.mocked(render).mockImplementation((_element: unknown) => { + const props = (_element as { props: { onSelect: (r: { runtimeArn: string; autoSelected: boolean }) => void } }) + .props; + void Promise.resolve().then(() => + props.onSelect({ runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/r', autoSelected: true }) + ); + return { unmount: vi.fn(), rerender: vi.fn(), clear: vi.fn(), cleanup: vi.fn(), waitUntilExit: vi.fn() }; + }); + + const program = new Command(); + program.exitOverride(); + registerExec(program); + + // The command calls process.exit when the picker loop finishes; the exact code + // is irrelevant here — we only care that the picker entered/restored the alt screen. + await expect(program.parseAsync(['exec', '--it'], { from: 'user' })).rejects.toThrow(/process\.exit/); + + // Entered the alt screen, then restored the normal buffer + cursor afterwards. + expect(writes).toContain(ANSI.enterAltScreen); + expect(writes.some(w => w.includes(ANSI.exitAltScreen))).toBe(true); + expect(writes.some(w => w.includes(ANSI.showCursor))).toBe(true); + expect(writes.indexOf(ANSI.enterAltScreen)).toBeLessThan(writes.findIndex(w => w.includes(ANSI.exitAltScreen))); + }); +}); diff --git a/src/cli/commands/exec/command.tsx b/src/cli/commands/exec/command.tsx index 3a3ee3974..fc54c3116 100644 --- a/src/cli/commands/exec/command.tsx +++ b/src/cli/commands/exec/command.tsx @@ -1,5 +1,5 @@ import { findConfigRoot } from '../../../lib'; -import { COMMAND_DESCRIPTIONS } from '../../constants'; +import { ANSI, COMMAND_DESCRIPTIONS } from '../../constants'; import { getErrorMessage } from '../../errors'; import { withCommandRunTelemetry } from '../../telemetry/cli-command-run.js'; import { requireProject, requireTTY } from '../../tui/guards'; @@ -239,6 +239,13 @@ interface PickResult { } function pickAgent(): Promise { + // Render the picker in the alternate screen so it (and any inline error state, + // e.g. missing project config) repaints in place instead of leaking static + // frames into the normal buffer. Restore the normal buffer + cursor on exit; + // the PTY shell session that follows runs in the normal buffer. + process.stdout.write(ANSI.enterAltScreen); + const restore = () => process.stdout.write(ANSI.exitAltScreen + ANSI.showCursor); + return new Promise(resolve => { let resolved = false; @@ -248,13 +255,14 @@ function pickAgent(): Promise { if (resolved) return; resolved = true; unmount(); - process.stdout.write('\x1b[2J\x1b[H'); + restore(); resolve({ runtimeArn: result.runtimeArn, sessionId: result.sessionId, autoSelected: result.autoSelected }); }} onExit={() => { if (!resolved) { resolved = true; unmount(); + restore(); resolve(null); } }} diff --git a/src/cli/commands/export/__tests__/harness-action.test.ts b/src/cli/commands/export/__tests__/harness-action.test.ts new file mode 100644 index 000000000..5823007a6 --- /dev/null +++ b/src/cli/commands/export/__tests__/harness-action.test.ts @@ -0,0 +1,42 @@ +import { CUSTOM_DOCKERFILE_NOTE_CATEGORY } from '../constants'; +import { buildCustomDockerfileNote, buildMissingDockerfileNote } from '../harness-action'; +import { describe, expect, it } from 'vitest'; + +// ============================================================================ +// Custom-Dockerfile export note +// ============================================================================ + +describe('buildCustomDockerfileNote', () => { + it('uses the custom-dockerfile category so EXPORT_NOTES is no longer empty', () => { + const note = buildCustomDockerfileNote('Harness.Dockerfile', 'MyHarnessAgent'); + expect(note.category).toBe(CUSTOM_DOCKERFILE_NOTE_CATEGORY); + }); + + it('references the actual dockerfile name and target agent path', () => { + const note = buildCustomDockerfileNote('Harness.Dockerfile', 'MyHarnessAgent'); + expect(note.message).toContain('Harness.Dockerfile'); + expect(note.message).toContain('app/MyHarnessAgent/Harness.Dockerfile'); + }); + + it('warns that the agent will not run as-is and provides the agent build layer', () => { + const note = buildCustomDockerfileNote('Custom.Dockerfile', 'AgentX'); + expect(note.message).toMatch(/will NOT run as-is/); + // The appended build layer must install deps, copy code, and set a startup command. + expect(note.message).toContain('uv sync --frozen --no-dev'); + expect(note.message).toContain('COPY --chown=bedrock_agentcore:bedrock_agentcore . .'); + expect(note.message).toContain('CMD ["opentelemetry-instrument", "python", "-m", "main"]'); + }); +}); + +// ============================================================================ +// Missing-Dockerfile export note +// ============================================================================ + +describe('buildMissingDockerfileNote', () => { + it('explains the referenced dockerfile is absent and where to create it', () => { + const note = buildMissingDockerfileNote('Harness.Dockerfile', 'MyHarness', 'MyHarnessAgent'); + expect(note.category).toMatch(/Dockerfile not found/); + expect(note.message).toContain('app/MyHarness/'); + expect(note.message).toContain('app/MyHarnessAgent/Harness.Dockerfile'); + }); +}); diff --git a/src/cli/commands/export/__tests__/harness-mapper.test.ts b/src/cli/commands/export/__tests__/harness-mapper.test.ts new file mode 100644 index 000000000..99bfed754 --- /dev/null +++ b/src/cli/commands/export/__tests__/harness-mapper.test.ts @@ -0,0 +1,1038 @@ +import type { HarnessSpec } from '../../../../schema/schemas/primitives/harness'; +import { + ALLOWED_TOOLS_NOTE_CATEGORY, + AWS_SKILLS_NOTE_CATEGORY, + BROWSER_CODZIP_NOTE_CATEGORY, + BROWSER_IAM_POLICY_NOTE_CATEGORY, + CODE_INTERPRETER_IAM_POLICY_NOTE_CATEGORY, + CONTAINER_URI_ECR_PULL_NOTE_CATEGORY, + CONTAINER_URI_NOTE_CATEGORY, + EXTERNAL_GATEWAY_NOTE_CATEGORY, + GATEWAY_IAM_POLICY_NOTE_CATEGORY, + GIT_SKILLS_CONTAINER_NOTE_CATEGORY, + MCP_HEADER_CREDS_NOTE_CATEGORY, + MEMORY_ARN_NOTE_CATEGORY, + PATH_SKILLS_NOTE_CATEGORY, + S3_SKILLS_IAM_POLICY_NOTE_CATEGORY, +} from '../constants'; +import { mapHarnessToExportConfig } from '../harness-mapper'; +import type { ResolvedHarnessContext } from '../types'; +import { describe, expect, it } from 'vitest'; + +// ============================================================================ +// Test helpers +// ============================================================================ + +function baseSpec(overrides: Partial = {}): HarnessSpec { + return { + name: 'TestHarness', + model: { provider: 'bedrock', modelId: 'global.anthropic.claude-sonnet-4-6' }, + tools: [], + skills: [], + ...overrides, + } as HarnessSpec; +} + +function baseContext( + specOverrides: Partial = {}, + contextOverrides: Partial = {} +): ResolvedHarnessContext { + return { + harnessName: 'TestHarness', + targetAgentName: 'TestAgent', + spec: baseSpec(specOverrides), + systemPrompt: 'You are helpful.', + projectSpec: { name: 'myproject', runtimes: [], memories: [], credentials: [], harnesses: [] } as any, + deployedResources: null, + configBaseDir: '/project/agentcore', + projectRoot: '/project', + exportNotes: [], + region: 'us-east-1', + ...contextOverrides, + }; +} + +function noteCategories(context: ResolvedHarnessContext): string[] { + return context.exportNotes.map(n => n.category); +} + +// ============================================================================ +// CodeZip + container suppression +// ============================================================================ + +describe('CodeZip suppression for container harnesses', () => { + it('throws when containerUri is set and build is CodeZip', () => { + const ctx = baseContext({ containerUri: '123.dkr.ecr.us-east-1.amazonaws.com/img:latest' }); + expect(() => mapHarnessToExportConfig(ctx, 'CodeZip')).toThrow(/containerUri.*requires a Container build/); + }); + + it('throws when dockerfile is set and build is CodeZip', () => { + const ctx = baseContext({ dockerfile: 'Dockerfile.custom' }); + expect(() => mapHarnessToExportConfig(ctx, 'CodeZip')).toThrow(/dockerfile.*requires a Container build/); + }); + + it('succeeds when containerUri is set and build is Container', () => { + const ctx = baseContext({ containerUri: '123.dkr.ecr.us-east-1.amazonaws.com/img:latest' }); + expect(() => mapHarnessToExportConfig(ctx, 'Container')).not.toThrow(); + }); + + it('succeeds when dockerfile is set and build is Container', () => { + const ctx = baseContext({ dockerfile: 'Dockerfile.custom' }); + expect(() => mapHarnessToExportConfig(ctx, 'Container')).not.toThrow(); + }); + + it('includes containerUri note for Container build', () => { + const ctx = baseContext({ containerUri: '123.dkr.ecr.us-east-1.amazonaws.com/img:latest' }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).toContain(CONTAINER_URI_NOTE_CATEGORY); + }); + + it('does not include containerUri note for plain CodeZip harness', () => { + const ctx = baseContext(); + mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(noteCategories(ctx)).not.toContain(CONTAINER_URI_NOTE_CATEGORY); + }); + + it('includes ECR pull note when base image is a private ECR repository', () => { + const ctx = baseContext({ containerUri: '123456789012.dkr.ecr.us-east-1.amazonaws.com/my-base:latest' }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).toContain(CONTAINER_URI_ECR_PULL_NOTE_CATEGORY); + const note = ctx.exportNotes.find(n => n.category === CONTAINER_URI_ECR_PULL_NOTE_CATEGORY); + // Note carries the resolved ECR repo ARN and a working grantPull snippet. + expect(note?.message).toContain('arn:aws:ecr:us-east-1:123456789012:repository/my-base'); + expect(note?.message).toContain('ContainerBuildProject.getOrCreate(this).role'); + }); + + it('does not include ECR pull note when base image is a public registry', () => { + const ctx = baseContext({ containerUri: 'public.ecr.aws/docker/library/python:3.12-slim' }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).toContain(CONTAINER_URI_NOTE_CATEGORY); + expect(noteCategories(ctx)).not.toContain(CONTAINER_URI_ECR_PULL_NOTE_CATEGORY); + }); +}); + +// ============================================================================ +// Browser tool — CodeZip exclusion + Container inclusion +// ============================================================================ + +describe('browser tool handling', () => { + const browserTool = { type: 'agentcore_browser' as const, name: 'browser' }; + + it('sets hasBrowser=false and emits CodeZip note for CodeZip build', () => { + const ctx = baseContext({ tools: [browserTool] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasBrowser).toBe(false); + expect(noteCategories(ctx)).toContain(BROWSER_CODZIP_NOTE_CATEGORY); + }); + + it('sets hasBrowser=true and emits IAM note for Container build', () => { + const ctx = baseContext({ tools: [browserTool] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.hasBrowser).toBe(true); + expect(noteCategories(ctx)).toContain(BROWSER_IAM_POLICY_NOTE_CATEGORY); + }); + + it('CodeZip note re-export hint uses --name flag', () => { + const ctx = baseContext({ tools: [browserTool] }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + const note = ctx.exportNotes.find(n => n.category === BROWSER_CODZIP_NOTE_CATEGORY)!; + expect(note.message).toContain('--name TestHarness'); + expect(note.message).not.toContain('--harness'); + }); + + it('IAM note uses default browser ARN when no custom browserArn', () => { + const ctx = baseContext({ tools: [browserTool] }); + mapHarnessToExportConfig(ctx, 'Container'); + const note = ctx.exportNotes.find(n => n.category === BROWSER_IAM_POLICY_NOTE_CATEGORY)!; + expect(note.message).toContain(':aws:browser/*'); + }); + + it('IAM note uses custom browserArn when provided', () => { + const ctx = baseContext({ + tools: [ + { + ...browserTool, + config: { + agentCoreBrowser: { browserArn: 'arn:aws:bedrock-agentcore:us-east-1:123:browser-custom/my_browser_id' }, + }, + }, + ], + }); + mapHarnessToExportConfig(ctx, 'Container'); + const note = ctx.exportNotes.find(n => n.category === BROWSER_IAM_POLICY_NOTE_CATEGORY)!; + expect(note.message).toContain('arn:aws:bedrock-agentcore:us-east-1:123:browser-custom/my_browser_id'); + }); +}); + +// ============================================================================ +// Code interpreter tool +// ============================================================================ + +describe('code interpreter tool handling', () => { + const ciTool = { type: 'agentcore_code_interpreter' as const, name: 'code-interpreter' }; + + it('sets hasCodeInterpreter=true for CodeZip build', () => { + const ctx = baseContext({ tools: [ciTool] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasCodeInterpreter).toBe(true); + }); + + it('emits IAM note with default ARN', () => { + const ctx = baseContext({ tools: [ciTool] }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + const note = ctx.exportNotes.find(n => n.category === CODE_INTERPRETER_IAM_POLICY_NOTE_CATEGORY)!; + expect(note.message).toContain(':aws:code-interpreter/*'); + }); + + it('emits IAM note with custom codeInterpreterArn when provided', () => { + const ctx = baseContext({ + tools: [ + { + ...ciTool, + config: { + agentCoreCodeInterpreter: { + codeInterpreterArn: 'arn:aws:bedrock-agentcore:us-east-1:123:code-interpreter-custom/my_ci_id', + }, + }, + }, + ], + }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + const note = ctx.exportNotes.find(n => n.category === CODE_INTERPRETER_IAM_POLICY_NOTE_CATEGORY)!; + expect(note.message).toContain('arn:aws:bedrock-agentcore:us-east-1:123:code-interpreter-custom/my_ci_id'); + }); +}); + +// ============================================================================ +// Custom tool identifier extraction (browserIdentifier / codeInterpreterIdentifier) +// ============================================================================ + +describe('custom tool identifier extraction', () => { + it('extracts browserIdentifier from browserArn', () => { + const ctx = baseContext({ + tools: [ + { + type: 'agentcore_browser' as const, + name: 'browser', + config: { + agentCoreBrowser: { browserArn: 'arn:aws:bedrock-agentcore:us-east-1:123:browser-custom/browser_abc123' }, + }, + }, + ], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.browserIdentifier).toBe('browser_abc123'); + }); + + it('sets browserIdentifier=undefined when no custom browserArn', () => { + const ctx = baseContext({ tools: [{ type: 'agentcore_browser' as const, name: 'browser' }] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.browserIdentifier).toBeUndefined(); + }); + + it('extracts codeInterpreterIdentifier from codeInterpreterArn', () => { + const ctx = baseContext({ + tools: [ + { + type: 'agentcore_code_interpreter' as const, + name: 'ci', + config: { + agentCoreCodeInterpreter: { + codeInterpreterArn: 'arn:aws:bedrock-agentcore:us-east-1:123:code-interpreter-custom/ci_xyz789', + }, + }, + }, + ], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.codeInterpreterIdentifier).toBe('ci_xyz789'); + }); + + it('sets codeInterpreterIdentifier=undefined when no custom codeInterpreterArn', () => { + const ctx = baseContext({ tools: [{ type: 'agentcore_code_interpreter' as const, name: 'ci' }] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.codeInterpreterIdentifier).toBeUndefined(); + }); +}); + +// ============================================================================ +// allowedTools filtering +// ============================================================================ + +describe('allowedTools filtering', () => { + const browserTool = { type: 'agentcore_browser' as const, name: 'browser' }; + const ciTool = { type: 'agentcore_code_interpreter' as const, name: 'code-interpreter' }; + + it('excludes browser when not in allowedTools', () => { + const ctx = baseContext({ tools: [browserTool, ciTool], allowedTools: ['code-interpreter'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.hasBrowser).toBe(false); + expect(renderConfig.hasCodeInterpreter).toBe(true); + }); + + it('excludes code interpreter when not in allowedTools', () => { + const ctx = baseContext({ tools: [browserTool, ciTool], allowedTools: ['browser'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.hasBrowser).toBe(true); + expect(renderConfig.hasCodeInterpreter).toBe(false); + }); + + it('includes all tools when allowedTools is wildcard', () => { + const ctx = baseContext({ tools: [browserTool, ciTool], allowedTools: ['*'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.hasBrowser).toBe(true); + expect(renderConfig.hasCodeInterpreter).toBe(true); + }); + + it('emits allowedTools note when filter is not wildcard', () => { + const ctx = baseContext({ tools: [browserTool, ciTool], allowedTools: ['code-interpreter'] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).toContain(ALLOWED_TOOLS_NOTE_CATEGORY); + }); + + it('does not emit allowedTools note when filter is wildcard', () => { + const ctx = baseContext({ tools: [browserTool, ciTool], allowedTools: ['*'] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).not.toContain(ALLOWED_TOOLS_NOTE_CATEGORY); + }); + + it('does not emit allowedTools note when no allowedTools set (defaults to wildcard)', () => { + const ctx = baseContext({ tools: [browserTool] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).not.toContain(ALLOWED_TOOLS_NOTE_CATEGORY); + }); + + it('does not emit browser IAM note when browser excluded by allowedTools on Container build', () => { + const ctx = baseContext({ tools: [browserTool, ciTool], allowedTools: ['code-interpreter'] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).not.toContain(BROWSER_IAM_POLICY_NOTE_CATEGORY); + }); + + it('does not emit code interpreter IAM note when CI excluded by allowedTools', () => { + const ctx = baseContext({ tools: [browserTool, ciTool], allowedTools: ['browser'] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).not.toContain(CODE_INTERPRETER_IAM_POLICY_NOTE_CATEGORY); + }); +}); + +// ============================================================================ +// Truncation config translation +// ============================================================================ + +describe('truncation config translation', () => { + it('translates sliding_window messagesCount to window_size', () => { + const ctx = baseContext({ + truncation: { strategy: 'sliding_window', config: { slidingWindow: { messagesCount: 4 } } }, + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationStrategy).toBe('sliding_window'); + expect(renderConfig.truncationConfig).toEqual({ window_size: 4 }); + }); + + it('returns undefined truncationConfig when no messagesCount', () => { + const ctx = baseContext({ truncation: { strategy: 'sliding_window' } }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationStrategy).toBe('sliding_window'); + expect(renderConfig.truncationConfig).toBeUndefined(); + }); + + it('translates all summarization fields to snake_case', () => { + const ctx = baseContext({ + truncation: { + strategy: 'summarization', + config: { + summarization: { + summaryRatio: 0.3, + preserveRecentMessages: 2, + summarizationSystemPrompt: 'Be concise.', + }, + }, + }, + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationStrategy).toBe('summarization'); + expect(renderConfig.truncationConfig).toEqual({ + summary_ratio: 0.3, + preserve_recent_messages: 2, + summarization_system_prompt: 'Be concise.', + }); + }); + + it('translates partial summarization fields', () => { + const ctx = baseContext({ + truncation: { strategy: 'summarization', config: { summarization: { summaryRatio: 0.5 } } }, + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationConfig).toEqual({ summary_ratio: 0.5 }); + }); + + it('returns undefined truncationConfig when summarization has no fields', () => { + const ctx = baseContext({ truncation: { strategy: 'summarization' } }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationConfig).toBeUndefined(); + }); + + it('returns undefined truncationStrategy when no truncation configured', () => { + const ctx = baseContext(); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationStrategy).toBeUndefined(); + expect(renderConfig.truncationConfig).toBeUndefined(); + }); +}); + +// ============================================================================ +// Build type auto-detection +// ============================================================================ + +describe('build type auto-detection', () => { + it('defaults to CodeZip when no override and no container fields', () => { + const ctx = baseContext(); + const { renderConfig } = mapHarnessToExportConfig(ctx); + expect(renderConfig.buildType).toBe('CodeZip'); + }); + + it('defaults to Container when containerUri is present', () => { + const ctx = baseContext({ containerUri: '123.dkr.ecr.us-east-1.amazonaws.com/img:latest' }); + const { renderConfig } = mapHarnessToExportConfig(ctx); + expect(renderConfig.buildType).toBe('Container'); + }); + + it('defaults to Container when dockerfile is present', () => { + const ctx = baseContext({ dockerfile: 'Dockerfile' }); + const { renderConfig } = mapHarnessToExportConfig(ctx); + expect(renderConfig.buildType).toBe('Container'); + }); + + it('override takes precedence over spec fields', () => { + const ctx = baseContext({ containerUri: '123.dkr.ecr.us-east-1.amazonaws.com/img:latest' }); + // Container override — no throw since it matches the spec + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.buildType).toBe('Container'); + }); +}); + +// ============================================================================ +// Skills notes +// ============================================================================ + +describe('skills notes', () => { + it('emits path skills note when path skills present and build is CodeZip', () => { + const ctx = baseContext({ skills: [{ path: 'skills/my_skill' }] }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(noteCategories(ctx)).toContain(PATH_SKILLS_NOTE_CATEGORY); + }); + + it('does not emit path skills note for Container build', () => { + const ctx = baseContext({ skills: [{ path: 'skills/my_skill' }] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).not.toContain(PATH_SKILLS_NOTE_CATEGORY); + }); + + it('emits git skills note for Container build', () => { + const ctx = baseContext({ skills: [{ gitUrl: 'https://github.com/org/repo' }] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).toContain(GIT_SKILLS_CONTAINER_NOTE_CATEGORY); + }); + + it('does not emit git skills note for CodeZip build', () => { + const ctx = baseContext({ skills: [{ gitUrl: 'https://github.com/org/repo' }] }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(noteCategories(ctx)).not.toContain(GIT_SKILLS_CONTAINER_NOTE_CATEGORY); + }); + + it('emits s3 skills IAM note with GetObject + ListBucket ARNs for CodeZip', () => { + const ctx = baseContext({ skills: [{ s3Uri: 's3://my-bucket/skills/weather/' }] }, { targetAgentName: 'MyAgent' }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(noteCategories(ctx)).toContain(S3_SKILLS_IAM_POLICY_NOTE_CATEGORY); + const note = ctx.exportNotes.find(n => n.category === S3_SKILLS_IAM_POLICY_NOTE_CATEGORY); + // Object-level read scoped to the prefix + expect(note?.message).toContain("actions: ['s3:GetObject']"); + expect(note?.message).toContain("'arn:aws:s3:::my-bucket/skills/weather/*'"); + // List scoped to the bucket + expect(note?.message).toContain("actions: ['s3:ListBucket']"); + expect(note?.message).toContain("'arn:aws:s3:::my-bucket'"); + // Snippet targets the renamed agent + expect(note?.message).toContain("this.application.environments.get('MyAgent')"); + }); + + it('emits s3 skills IAM note for Container builds too (independent of build type)', () => { + const ctx = baseContext({ skills: [{ s3Uri: 's3://my-bucket/skills/weather/' }] }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).toContain(S3_SKILLS_IAM_POLICY_NOTE_CATEGORY); + }); + + it('uses bucket-root object ARN when the s3 URI has no prefix', () => { + const ctx = baseContext({ skills: [{ s3Uri: 's3://my-bucket' }] }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + const note = ctx.exportNotes.find(n => n.category === S3_SKILLS_IAM_POLICY_NOTE_CATEGORY); + expect(note?.message).toContain("'arn:aws:s3:::my-bucket/*'"); + expect(note?.message).toContain("'arn:aws:s3:::my-bucket'"); + }); + + it('deduplicates ARNs across multiple s3 skills in the same bucket', () => { + const ctx = baseContext({ + skills: [{ s3Uri: 's3://shared/a/' }, { s3Uri: 's3://shared/b/' }], + }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + const note = ctx.exportNotes.find(n => n.category === S3_SKILLS_IAM_POLICY_NOTE_CATEGORY); + // One ListBucket resource for the shared bucket, two distinct object ARNs + expect(note?.message).toContain("'arn:aws:s3:::shared/a/*'"); + expect(note?.message).toContain("'arn:aws:s3:::shared/b/*'"); + expect(note?.message.match(/arn:aws:s3:::shared'/g)?.length).toBe(1); + }); + + it('uses the GovCloud partition prefix for gov regions', () => { + const ctx = baseContext({ skills: [{ s3Uri: 's3://gov-bucket/skills/' }] }, { region: 'us-gov-west-1' }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + const note = ctx.exportNotes.find(n => n.category === S3_SKILLS_IAM_POLICY_NOTE_CATEGORY); + expect(note?.message).toContain("'arn:aws-us-gov:s3:::gov-bucket/skills/*'"); + }); + + it('does not emit s3 skills IAM note when there are no s3 skills', () => { + const ctx = baseContext({ skills: [{ path: 'skills/local' }] }); + mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(noteCategories(ctx)).not.toContain(S3_SKILLS_IAM_POLICY_NOTE_CATEGORY); + }); +}); + +// ============================================================================ +// skills render config mapping (new flat schema shape) +// ============================================================================ + +describe('skills render config mapping', () => { + it('maps path, s3, and git skills into the render config', () => { + const ctx = baseContext({ + skills: [ + { path: 'skills/local' }, + { s3Uri: 's3://bucket/skills/xlsx/' }, + { + gitUrl: 'https://github.com/org/repo', + path: 'skills/x', + auth: { credentialName: 'MyGitCred', username: 'me' }, + }, + ], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.pathSkills).toEqual(['skills/local']); + expect(renderConfig.s3Skills).toEqual(['s3://bucket/skills/xlsx/']); + expect(renderConfig.gitSkills).toEqual([ + { url: 'https://github.com/org/repo', path: 'skills/x', credentialArn: 'MyGitCred', username: 'me' }, + ]); + expect(renderConfig.hasFetchedSkills).toBe(true); + }); + + it('does not include AWS skills in path/s3/git render config arrays and emits export note', () => { + const ctx = baseContext({ + skills: [{ path: 'skills/local' }, { awsSkills: { paths: ['core-skills/*'] } }], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.pathSkills).toEqual(['skills/local']); + expect(renderConfig.s3Skills).toEqual([]); + expect(renderConfig.gitSkills).toEqual([]); + expect(renderConfig.hasFetchedSkills).toBe(false); + expect(noteCategories(ctx)).toContain(AWS_SKILLS_NOTE_CATEGORY); + const note = ctx.exportNotes.find(n => n.category === AWS_SKILLS_NOTE_CATEGORY); + expect(note?.message).toContain('core-skills/*'); + expect(note?.message).toContain('https://github.com/aws/agent-toolkit-for-aws/tree/main/skills'); + }); + + it('does not emit AWS skills note when no AWS skills are present', () => { + const ctx = baseContext({ + skills: [{ path: 'skills/local' }, { s3Uri: 's3://bucket/skill' }], + }); + mapHarnessToExportConfig(ctx, 'Container'); + expect(noteCategories(ctx)).not.toContain(AWS_SKILLS_NOTE_CATEGORY); + }); +}); + +// ============================================================================ +// model provider +// ============================================================================ + +describe('resolveModelProvider', () => { + it('rejects the lite_llm provider (unsupported by Strands export)', () => { + const ctx = baseContext({ model: { provider: 'lite_llm', modelId: 'some-model' } as never }); + expect(() => mapHarnessToExportConfig(ctx, 'CodeZip')).toThrow(/lite_llm.*does not support/); + }); +}); + +// ============================================================================ +// extractToolIdentifier edge cases +// ============================================================================ + +describe('extractToolIdentifier edge cases', () => { + it('returns undefined when ARN has no slash', () => { + const ctx = baseContext({ + tools: [ + { + type: 'agentcore_browser' as const, + name: 'browser', + config: { agentCoreBrowser: { browserArn: 'arn:aws:bedrock-agentcore:us-east-1:123:noslash' } }, + }, + ], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.browserIdentifier).toBeUndefined(); + }); + + it('returns undefined when browserArn is empty string', () => { + const ctx = baseContext({ + tools: [ + { + type: 'agentcore_browser' as const, + name: 'browser', + config: { agentCoreBrowser: { browserArn: '' } }, + }, + ], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'Container'); + expect(renderConfig.browserIdentifier).toBeUndefined(); + }); +}); + +// ============================================================================ +// resolveTruncationConfig edge cases +// ============================================================================ + +describe('resolveTruncationConfig edge cases', () => { + it('returns undefined when sliding_window config has no slidingWindow key', () => { + const ctx = baseContext({ + truncation: { strategy: 'sliding_window', config: {} as any }, + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationConfig).toBeUndefined(); + }); + + it('returns undefined for unknown strategy', () => { + const ctx = baseContext({ + truncation: { strategy: 'sliding_window', config: { unknownKey: { foo: 1 } } as any }, + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.truncationConfig).toBeUndefined(); + }); +}); + +// ============================================================================ +// resolveMemoryProviders +// ============================================================================ + +describe('resolveMemoryProviders', () => { + it('returns empty providers when no memory configured', () => { + const ctx = baseContext(); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasMemory).toBe(false); + expect(renderConfig.memoryProviders).toHaveLength(0); + }); + + it('resolves same-project memory by name with env var', () => { + const ctx = baseContext( + { memory: { mode: 'existing', name: 'MyMemory' } }, + { + projectSpec: { + name: 'myproject', + runtimes: [], + memories: [{ name: 'MyMemory', strategies: [] }], + credentials: [], + harnesses: [], + } as any, + } + ); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasMemory).toBe(true); + expect(renderConfig.memoryProviders).toHaveLength(1); + expect(renderConfig.memoryProviders?.at(0)!.name).toBe('MyMemory'); + expect(renderConfig.memoryProviders?.at(0)!.envVarName).toBe('MEMORY_MYMEMORY_ID'); + }); + + it('resolves memory by ARN via deployed state match', () => { + const memArn = 'arn:aws:bedrock-agentcore:us-east-1:123:memory/abc123'; + const ctx = baseContext( + { memory: { mode: 'existing', arn: memArn } }, + { + deployedResources: { + memories: { DeployedMem: { memoryArn: memArn } }, + } as any, + projectSpec: { + name: 'myproject', + runtimes: [], + memories: [{ name: 'DeployedMem', strategies: [] }], + credentials: [], + harnesses: [], + } as any, + } + ); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasMemory).toBe(true); + expect(renderConfig.memoryProviders?.at(0)!.name).toBe('DeployedMem'); + expect(renderConfig.memoryProviders?.at(0)!.envVarName).toBe('MEMORY_DEPLOYEDMEM_ID'); + }); + + it('falls back to MEMORY_ARN env var for external memory ARN and emits note', () => { + const ctx = baseContext( + { memory: { mode: 'existing', arn: 'arn:aws:bedrock-agentcore:us-east-1:999:memory/external' } }, + { deployedResources: null } + ); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasMemory).toBe(true); + expect(renderConfig.memoryProviders?.at(0)!.envVarName).toBe('MEMORY_ARN'); + expect(noteCategories(ctx)).toContain(MEMORY_ARN_NOTE_CATEGORY); + }); +}); + +// ============================================================================ +// model ID propagation +// ============================================================================ + +describe('model ID propagation to renderConfig', () => { + it('propagates the bedrock model ID', () => { + const ctx = baseContext({ model: { provider: 'bedrock', modelId: 'anthropic.claude-3' } }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.modelId).toBe('anthropic.claude-3'); + }); + + it('propagates the OpenAI model ID (does not hardcode gpt-4.1)', () => { + const ctx = baseContext({ + model: { + provider: 'open_ai', + modelId: 'gpt-4o', + apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret/openai', + }, + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.modelId).toBe('gpt-4o'); + }); + + it('propagates the Gemini model ID (does not hardcode gemini-2.5-flash)', () => { + const ctx = baseContext({ + model: { + provider: 'gemini', + modelId: 'gemini-1.5-pro', + apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret/gemini', + }, + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.modelId).toBe('gemini-1.5-pro'); + }); +}); + +// ============================================================================ +// resolveIdentityProvider +// ============================================================================ + +describe('resolveIdentityProvider', () => { + it('returns no identity provider for bedrock model', () => { + const ctx = baseContext({ model: { provider: 'bedrock', modelId: 'anthropic.claude-3' } }); + const { renderConfig, credentialEntry } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasIdentity).toBe(false); + expect(renderConfig.identityProviders).toHaveLength(0); + expect(credentialEntry).toBeNull(); + }); + + it('creates new credential entry for OpenAI model with apiKeyArn', () => { + const ctx = baseContext({ + model: { + provider: 'open_ai', + modelId: 'gpt-4o', + apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret/openai', + }, + }); + const { renderConfig, credentialEntry } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasIdentity).toBe(true); + expect(renderConfig.identityProviders).toHaveLength(1); + expect(credentialEntry).not.toBeNull(); + expect(credentialEntry!.name).toContain('OpenAI'); + }); + + it('creates new credential entry for Gemini model', () => { + const ctx = baseContext({ + model: { + provider: 'gemini', + modelId: 'gemini-1.5-pro', + apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret/gemini', + }, + }); + const { renderConfig, credentialEntry } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasIdentity).toBe(true); + expect(credentialEntry!.name).toContain('Gemini'); + }); + + it('reuses existing credential when apiKeyArn matches project credential', () => { + const apiKeyArn = 'arn:aws:secretsmanager:us-east-1:123:secret/openai'; + const ctx = baseContext( + { model: { provider: 'open_ai', modelId: 'gpt-4o', apiKeyArn } }, + { + projectSpec: { + name: 'myproject', + runtimes: [], + memories: [], + credentials: [{ name: 'ExistingOpenAI', authorizerType: 'ApiKeyCredentialProvider', apiKeyArn }], + harnesses: [], + } as any, + } + ); + const { renderConfig, credentialEntry } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.identityProviders?.at(0)!.name).toBe('ExistingOpenAI'); + expect(credentialEntry).toBeNull(); // already in project, no new entry + }); + + it('returns no identity when non-bedrock model has no apiKeyArn', () => { + const ctx = baseContext({ model: { provider: 'open_ai', modelId: 'gpt-4o' } }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasIdentity).toBe(false); + }); +}); + +// ============================================================================ +// resolveGatewayProviders +// ============================================================================ + +describe('resolveGatewayProviders', () => { + const gatewayArn = 'arn:aws:bedrock-agentcore:us-east-1:123456789012:gateway/gw-abc123'; + const gatewayTool = { + type: 'agentcore_gateway' as const, + name: 'my-gateway', + config: { agentCoreGateway: { gatewayArn } }, + }; + + it('returns no gateway providers when no gateway tools', () => { + const ctx = baseContext(); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasGateway).toBe(false); + expect(renderConfig.gatewayProviders).toHaveLength(0); + }); + + it('resolves same-project gateway via deployed state without IAM note', () => { + const ctx = baseContext( + { tools: [gatewayTool] }, + { + deployedResources: { + mcp: { gateways: { MyGateway: { gatewayArn } } }, + } as any, + projectSpec: { + name: 'myproject', + runtimes: [], + memories: [], + credentials: [], + harnesses: [], + agentCoreGateways: [{ name: 'MyGateway', authorizerType: 'AWS_IAM' }], + } as any, + } + ); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasGateway).toBe(true); + expect(renderConfig.gatewayProviders?.at(0)!.name).toBe('MyGateway'); + expect(renderConfig.gatewayProviders?.at(0)!.authType).toBe('AWS_IAM'); + expect(noteCategories(ctx)).not.toContain(GATEWAY_IAM_POLICY_NOTE_CATEGORY); + }); + + it('resolves same-project CUSTOM_JWT gateway with discoveryUrl and scopes', () => { + const ctx = baseContext( + { tools: [gatewayTool] }, + { + deployedResources: { + mcp: { gateways: { MyGateway: { gatewayArn } } }, + } as any, + projectSpec: { + name: 'myproject', + runtimes: [], + memories: [], + credentials: [], + harnesses: [], + agentCoreGateways: [ + { + name: 'MyGateway', + authorizerType: 'CUSTOM_JWT', + authorizerConfiguration: { + customJwtAuthorizer: { + discoveryUrl: 'https://auth.example.com/.well-known/openid-configuration', + allowedScopes: ['read', 'write'], + }, + }, + }, + ], + } as any, + } + ); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + const provider = renderConfig.gatewayProviders.find(() => true); + expect(provider?.authType).toBe('CUSTOM_JWT'); + expect(provider?.discoveryUrl).toBe('https://auth.example.com/.well-known/openid-configuration'); + expect(provider?.scopes).toBe('read write'); + expect(noteCategories(ctx)).not.toContain(GATEWAY_IAM_POLICY_NOTE_CATEGORY); + }); + + it('hardcodes URL for external gateway and emits external note + IAM note', () => { + const ctx = baseContext({ tools: [gatewayTool] }, { deployedResources: null }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasGateway).toBe(true); + const provider = renderConfig.gatewayProviders.find(() => true); + expect(provider?.hardcodedUrl).toContain('gateway.bedrock-agentcore'); + expect(noteCategories(ctx)).toContain(EXTERNAL_GATEWAY_NOTE_CATEGORY); + expect(noteCategories(ctx)).toContain(GATEWAY_IAM_POLICY_NOTE_CATEGORY); + }); + + it('excludes gateway tool filtered out by allowedTools', () => { + const ctx = baseContext({ tools: [gatewayTool], allowedTools: ['other-tool'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasGateway).toBe(false); + }); +}); + +// ============================================================================ +// resolveRemoteMcpTools +// ============================================================================ + +describe('resolveRemoteMcpTools', () => { + it('returns remote MCP tool with URL', () => { + const ctx = baseContext({ + tools: [ + { + type: 'remote_mcp' as const, + name: 'my-mcp', + config: { remoteMcp: { url: 'https://mcp.example.com/sse' } }, + }, + ], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.remoteMcpTools).toHaveLength(1); + expect(renderConfig.remoteMcpTools?.at(0)!.url).toBe('https://mcp.example.com/sse'); + expect(renderConfig.remoteMcpTools?.at(0)!.name).toBe('my-mcp'); + }); + + it('generates credential entries for MCP tools with headers', () => { + const ctx = baseContext({ + tools: [ + { + type: 'remote_mcp' as const, + name: 'my-mcp', + config: { + remoteMcp: { + url: 'https://mcp.example.com/sse', + headers: { Authorization: 'Bearer secret-token' }, + }, + }, + }, + ], + }); + const { mcpCredentialEntries } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(mcpCredentialEntries).toHaveLength(1); + expect(mcpCredentialEntries.at(0)!.value).toBe('Bearer secret-token'); + expect(mcpCredentialEntries.at(0)!.credential.name).toContain('Mcp'); + expect(noteCategories(ctx)).toContain(MCP_HEADER_CREDS_NOTE_CATEGORY); + }); + + it('returns no credential entries for MCP tools without headers', () => { + const ctx = baseContext({ + tools: [ + { + type: 'remote_mcp' as const, + name: 'my-mcp', + config: { remoteMcp: { url: 'https://mcp.example.com/sse' } }, + }, + ], + }); + const { mcpCredentialEntries } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(mcpCredentialEntries).toHaveLength(0); + expect(noteCategories(ctx)).not.toContain(MCP_HEADER_CREDS_NOTE_CATEGORY); + }); + + it('excludes remote MCP tool filtered by allowedTools', () => { + const ctx = baseContext({ + tools: [ + { + type: 'remote_mcp' as const, + name: 'my-mcp', + config: { remoteMcp: { url: 'https://mcp.example.com/sse' } }, + }, + ], + allowedTools: ['other-tool'], + }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.remoteMcpTools).toHaveLength(0); + }); +}); + +// ============================================================================ +// resolveInlineFunctionTools +// ============================================================================ + +describe('resolveInlineFunctionTools', () => { + const inlineTool = { + type: 'inline_function' as const, + name: 'my_tool', + config: { + inlineFunction: { + description: 'Does something', + inputSchema: { type: 'object', properties: { query: { type: 'string' } } }, + }, + }, + }; + + it('includes inline function tool in renderConfig', () => { + const ctx = baseContext({ tools: [inlineTool] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.inlineFunctionTools).toHaveLength(1); + expect(renderConfig.inlineFunctionTools?.at(0)!.name).toBe('my_tool'); + expect(renderConfig.inlineFunctionTools?.at(0)!.description).toBe('Does something'); + }); + + it('excludes inline tool filtered by allowedTools', () => { + const ctx = baseContext({ tools: [inlineTool], allowedTools: ['other-tool'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.inlineFunctionTools).toHaveLength(0); + }); + + it('returns empty array when no inline tools', () => { + const ctx = baseContext(); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.inlineFunctionTools).toHaveLength(0); + }); +}); + +// ============================================================================ +// isBuiltinIncluded (shell / file_operations) +// ============================================================================ + +describe('isBuiltinIncluded (shell / file_operations)', () => { + it('includes shell and file_operations when allowedTools is wildcard', () => { + const ctx = baseContext({ allowedTools: ['*'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasShell).toBe(true); + expect(renderConfig.hasFileOperations).toBe(true); + }); + + it('includes shell and file_operations when allowedTools is unset (defaults to wildcard)', () => { + const ctx = baseContext(); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasShell).toBe(true); + expect(renderConfig.hasFileOperations).toBe(true); + }); + + it('includes shell via @builtin pattern', () => { + const ctx = baseContext({ allowedTools: ['@builtin'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasShell).toBe(true); + expect(renderConfig.hasFileOperations).toBe(true); + }); + + it('includes shell via @builtin/shell pattern', () => { + const ctx = baseContext({ allowedTools: ['@builtin/shell'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasShell).toBe(true); + expect(renderConfig.hasFileOperations).toBe(false); + }); + + it('excludes both builtins when allowedTools only lists non-builtin tools', () => { + const ctx = baseContext({ allowedTools: ['some-tool'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasShell).toBe(false); + expect(renderConfig.hasFileOperations).toBe(false); + }); + + it('plain "shell" name does not match the builtin/shell builtin', () => { + // Only @builtin or @builtin/shell patterns match builtins, not plain tool names + const ctx = baseContext({ allowedTools: ['shell'] }); + const { renderConfig } = mapHarnessToExportConfig(ctx, 'CodeZip'); + expect(renderConfig.hasShell).toBe(false); + }); +}); diff --git a/src/cli/commands/export/constants.ts b/src/cli/commands/export/constants.ts new file mode 100644 index 000000000..d06bcd300 --- /dev/null +++ b/src/cli/commands/export/constants.ts @@ -0,0 +1,19 @@ +export const EXPORT_NOTES_FILENAME = 'EXPORT_NOTES.md'; + +export const DEFAULT_SYSTEM_PROMPT = 'You are a helpful assistant.'; + +export const EXTERNAL_GATEWAY_NOTE_CATEGORY = 'External gateway ARNs hardcoded'; +export const MEMORY_ARN_NOTE_CATEGORY = 'Memory ARN requires IAM policy'; +export const CONTAINER_URI_NOTE_CATEGORY = 'containerUri: verify Python in base image'; +export const CUSTOM_DOCKERFILE_NOTE_CATEGORY = 'Custom harness Dockerfile needs the agent build layer'; +export const CONTAINER_URI_ECR_PULL_NOTE_CATEGORY = 'containerUri base image requires ECR pull permission'; +export const ALLOWED_TOOLS_NOTE_CATEGORY = 'allowedTools: per-invocation overrides dropped'; +export const PATH_SKILLS_NOTE_CATEGORY = 'path skills require container filesystem'; +export const MCP_HEADER_CREDS_NOTE_CATEGORY = 'MCP tool header credentials'; +export const GIT_SKILLS_CONTAINER_NOTE_CATEGORY = 'git skills require git in container image'; +export const GATEWAY_IAM_POLICY_NOTE_CATEGORY = 'Gateway requires InvokeGateway IAM permission'; +export const BROWSER_IAM_POLICY_NOTE_CATEGORY = 'Browser tool requires IAM permissions'; +export const BROWSER_CODZIP_NOTE_CATEGORY = 'Browser tool requires Container build — excluded from CodeZip export'; +export const CODE_INTERPRETER_IAM_POLICY_NOTE_CATEGORY = 'Code interpreter tool requires IAM permissions'; +export const AWS_SKILLS_NOTE_CATEGORY = 'AWS skills omitted — not available outside managed harness'; +export const S3_SKILLS_IAM_POLICY_NOTE_CATEGORY = 'S3 skills require S3 read IAM permission'; diff --git a/src/cli/commands/export/harness-action.ts b/src/cli/commands/export/harness-action.ts new file mode 100644 index 000000000..d95acdea9 --- /dev/null +++ b/src/cli/commands/export/harness-action.ts @@ -0,0 +1,384 @@ +import { AgentAlreadyExistsError, ConfigIO, setEnvVar } from '../../../lib'; +import { ExportHarnessError, ValidationError } from '../../../lib/errors/types'; +import { AgentNameSchema } from '../../../schema'; +import type { AgentEnvSpec, BuildType, Credential } from '../../../schema'; +import { getErrorMessage } from '../../errors'; +import type { AttributeRecorder } from '../../telemetry/cli-command-run.js'; +import { withCommandRunTelemetry } from '../../telemetry/cli-command-run.js'; +import type { CommandAttrs } from '../../telemetry/schemas/command-run.js'; +import { + BuildType as TelemetryBuildType, + ModelProvider as TelemetryModelProvider, + standardize, +} from '../../telemetry/schemas/common-shapes.js'; +import { StrandsRenderer } from '../../templates/StrandsRenderer'; +import { CUSTOM_DOCKERFILE_NOTE_CATEGORY, EXPORT_NOTES_FILENAME } from './constants'; +import { mapHarnessToExportConfig } from './harness-mapper'; +import { resolveHarnessContext } from './harness-resolver'; +import type { ExportHarnessOptions, ExportNote, ResolvedHarnessContext } from './types'; +import { execSync } from 'node:child_process'; +import { copyFileSync, cpSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { basename, join } from 'node:path'; + +export interface ExportHarnessProgress { + onProgress?: (message: string) => void; +} + +export async function handleExportHarness( + options: ExportHarnessOptions, + progress?: ExportHarnessProgress +): Promise< + { success: true; agentName: string; agentPath: string; notesPath: string } | { success: false; error: Error } +> { + const log = (msg: string) => progress?.onProgress?.(msg); + + return withCommandRunTelemetry( + 'export.harness', + {} as CommandAttrs<'export.harness'>, + async (recorder: AttributeRecorder>) => { + const harnessName = options.name; + if (!harnessName) { + return { success: false as const, error: new ValidationError('--name is required in non-interactive mode') }; + } + + const targetAgentName = options.targetAgentName ?? `${harnessName}Agent`; + const parsedAgentName = AgentNameSchema.safeParse(targetAgentName); + if (!parsedAgentName.success) { + return { + success: false as const, + error: new ValidationError( + `Invalid --target-agent-name "${targetAgentName}": ${parsedAgentName.error.issues[0]?.message ?? 'invalid name'}` + ), + }; + } + + const buildOverride = options.build as BuildType | undefined; + + const VALID_BUILD_TYPES = new Set(['CodeZip', 'Container']); + if (buildOverride && !VALID_BUILD_TYPES.has(buildOverride)) { + return { + success: false as const, + error: new ValidationError(`Invalid --build value "${buildOverride}". Expected CodeZip or Container.`), + }; + } + + // 1. Resolve all on-disk inputs + log('Reading harness configuration'); + let context: Awaited>; + try { + context = await resolveHarnessContext(harnessName, targetAgentName); + } catch (err) { + return { success: false as const, error: err instanceof Error ? err : new Error(String(err)) }; + } + + // 2. Map harness spec to render config + agent env spec + log('Mapping to Strands template config'); + const { renderConfig, agentEnvSpec, credentialEntry, mcpCredentialEntries } = mapHarnessToExportConfig( + context, + buildOverride + ); + + // The target directory is guaranteed not to pre-exist (resolveHarnessContext throws if it + // does), so anything written below is created by this export. Remove it on failure to avoid + // leaving an orphan directory with no matching agentcore.json entry. + const agentDir = join(context.projectRoot, 'app', targetAgentName); + const cleanupAgentDir = () => { + try { + rmSync(agentDir, { recursive: true, force: true }); + } catch { + // best-effort cleanup — ignore failures + } + }; + + // 3. Copy Dockerfile + supporting harness directories (e.g. path_skill/, assets/) + // Harness files that are NOT copied: harness.json, system-prompt.md (regenerated by export) + const HARNESS_SKIP_FILES = new Set(['harness.json', 'system-prompt.md']); + if (context.spec.dockerfile) { + const harnessDir = join(context.projectRoot, 'app', harnessName); + const dockerfileSrc = join(harnessDir, context.spec.dockerfile); + if (existsSync(dockerfileSrc)) { + mkdirSync(agentDir, { recursive: true }); + copyFileSync(dockerfileSrc, join(agentDir, context.spec.dockerfile)); + context.exportNotes.push(buildCustomDockerfileNote(context.spec.dockerfile, targetAgentName)); + } else { + context.exportNotes.push(buildMissingDockerfileNote(context.spec.dockerfile, harnessName, targetAgentName)); + } + // Copy all non-file entries (directories) and non-skipped files from the harness dir + if (existsSync(harnessDir)) { + cpSync(harnessDir, agentDir, { + recursive: true, + filter: src => { + const name = basename(src); + return !HARNESS_SKIP_FILES.has(name); + }, + }); + } + } + + // 4. Generate Dockerfile stub for containerUri + if (context.spec.containerUri && renderConfig.buildType === 'Container') { + mkdirSync(agentDir, { recursive: true }); + writeDockerfileStub(agentDir, context.spec.containerUri); + } + + // 5. Render Strands agent code + log('Rendering agent code'); + try { + const renderer = new StrandsRenderer(renderConfig); + await renderer.render({ outputDir: context.projectRoot }); + } catch (err) { + cleanupAgentDir(); + return { + success: false as const, + error: new ExportHarnessError( + `Failed to render agent code for "${targetAgentName}": ${getErrorMessage(err)}`, + { + cause: err instanceof Error ? err : undefined, + } + ), + }; + } + + // 5b. Generate uv.lock for Container builds (required by the Dockerfile's uv sync step) + if (renderConfig.buildType === 'Container') { + log('Generating uv.lock for container build'); + try { + execSync('uv lock', { cwd: agentDir, stdio: 'pipe' }); + } catch { + // uv not installed or failed — add a note and continue; user can run manually + context.exportNotes.push({ + category: 'uv.lock missing — run `uv lock` before deploying', + message: + `The container Dockerfile requires a uv.lock file. Run \`uv lock\` in ` + + `app/${targetAgentName}/ before running \`agentcore deploy\`.`, + }); + } + } + + // 6. Write agent to agentcore.json + log('Updating agentcore.json'); + try { + await writeExportedAgentToProject(agentEnvSpec, context, credentialEntry, mcpCredentialEntries); + } catch (err) { + cleanupAgentDir(); + return { success: false as const, error: err instanceof Error ? err : new Error(String(err)) }; + } + + // 6c. Write MCP header credential values to .env.local for local development + for (const { envVarName, value } of mcpCredentialEntries) { + await setEnvVar(envVarName, value, context.configBaseDir); + } + + // 6b. Warn if no deploy targets are configured + const configIO = new ConfigIO({ baseDir: context.configBaseDir }); + const targets = await configIO.readAWSDeploymentTargets().catch(() => []); + if (targets.length === 0) { + context.exportNotes.push({ + category: 'No AWS deployment target configured', + message: + 'aws-targets.json is empty — running `agentcore deploy` will fail with "Target \\"default\\" not found". ' + + 'Add a deployment target first:\n\n' + + ' agentcore deploy (interactive mode will prompt for account/region)\n\n' + + 'Or edit agentcore/aws-targets.json manually:\n\n' + + ' [{ "name": "default", "account": "", "region": "" }]', + }); + } + + // 7. Write EXPORT_NOTES.md + log('Writing EXPORT_NOTES.md'); + writeExportNotes(context.exportNotes, harnessName, targetAgentName, agentDir); + + // Record telemetry attrs after all work is done + recorder.set({ + build_type: standardize(TelemetryBuildType, renderConfig.buildType ?? 'CodeZip'), + model_provider: standardize(TelemetryModelProvider, renderConfig.modelProvider), + has_memory: renderConfig.hasMemory, + has_gateway: renderConfig.hasGateway, + has_container: renderConfig.buildType === 'Container', + has_execution_limits: !!renderConfig.hasExecutionLimits, + notes_count: context.exportNotes.length, + }); + + return { + success: true as const, + agentName: targetAgentName, + agentPath: agentDir, + notesPath: join(agentDir, EXPORT_NOTES_FILENAME), + }; + } + ); +} + +// ============================================================================ +// Write agent entry to agentcore.json +// ============================================================================ + +async function writeExportedAgentToProject( + agentEnvSpec: AgentEnvSpec, + context: ResolvedHarnessContext, + credentialEntry: Credential | null, + mcpCredentialEntries: { credential: Credential }[] +): Promise { + const configIO = new ConfigIO({ baseDir: context.configBaseDir }); + const project = await configIO.readProjectSpec(); + + if (project.runtimes.some(r => r.name === agentEnvSpec.name)) { + throw new AgentAlreadyExistsError(agentEnvSpec.name); + } + + project.runtimes.push(agentEnvSpec); + + if (credentialEntry && !project.credentials.some(c => c.name === credentialEntry.name)) { + project.credentials.push(credentialEntry); + } + + for (const { credential } of mcpCredentialEntries) { + if (!project.credentials.some(c => c.name === credential.name)) { + project.credentials.push(credential); + } + } + + await configIO.writeProjectSpec(project); +} + +// ============================================================================ +// Dockerfile export notes +// ============================================================================ + +/** + * Note emitted when a harness with a custom Dockerfile is exported. + * + * The harness Dockerfile describes an *execution environment* — the harness runtime overrides its + * ENTRYPOINT/CMD and supplies the agent. An exported agent, by contrast, is the entrypoint and must + * build/install its own deps and launch main.py. We cannot safely rewrite an arbitrary user + * Dockerfile (custom base image, WORKDIR, USER, apt packages), so we preserve it as-is and tell the + * user to append the agent build layer. + */ +export function buildCustomDockerfileNote(dockerfile: string, targetAgentName: string): ExportNote { + return { + category: CUSTOM_DOCKERFILE_NOTE_CATEGORY, + message: + `The harness used a custom Dockerfile ("${dockerfile}") that describes its execution ` + + `environment. It has been copied to app/${targetAgentName}/${dockerfile} unchanged, but ` + + `the exported agent will NOT run as-is: a harness Dockerfile has no dependency install, code copy, or ` + + `startup command (the harness runtime supplied those). Add the Strands agent build layer to the end ` + + `of app/${targetAgentName}/${dockerfile} before \`agentcore deploy\` ` + + `(adjust if your base image is not Python 3.12+/uv, or already sets WORKDIR/USER):\n\n` + + ` WORKDIR /app\n` + + ` RUN pip install --no-cache-dir uv\n` + + ` COPY pyproject.toml uv.lock ./\n` + + ` RUN uv sync --frozen --no-dev --no-install-project\n` + + ` COPY --chown=bedrock_agentcore:bedrock_agentcore . .\n` + + ` RUN uv sync --frozen --no-dev\n` + + ` USER bedrock_agentcore\n` + + ` EXPOSE 8080 8000 9000\n` + + ` CMD ["opentelemetry-instrument", "python", "-m", "main"]\n\n` + + `Also ensure the build sets the runtime entrypoint to the generated main.py rather than ` + + `the harness's overridden entrypoint.`, + }; +} + +/** Note emitted when a harness references a dockerfile that does not exist on disk. */ +export function buildMissingDockerfileNote( + dockerfile: string, + harnessName: string, + targetAgentName: string +): ExportNote { + return { + category: `Dockerfile not found — create ${dockerfile} before deploying`, + message: + `The harness references dockerfile: "${dockerfile}" but no such file exists in ` + + `app/${harnessName}/. Create a Dockerfile at app/${targetAgentName}/${dockerfile} ` + + `before running \`agentcore deploy\`.`, + }; +} + +// ============================================================================ +// Write EXPORT_NOTES.md +// ============================================================================ + +function readStrandsVersion(agentDir: string): string { + try { + const pyproject = readFileSync(join(agentDir, 'pyproject.toml'), 'utf8'); + const match = /strands-agents\s*(>=\s*[\d.]+)/.exec(pyproject); + return match ? `strands-agents ${match[1]}` : 'strands-agents (version unknown)'; + } catch { + return 'strands-agents (version unknown)'; + } +} + +function writeExportNotes(notes: ExportNote[], harnessName: string, agentName: string, agentDir: string): void { + const today = new Date().toISOString().split('T')[0]; + const strandsVersion = readStrandsVersion(agentDir); + const lines: string[] = [ + `# Export Notes — ${harnessName} → ${agentName}`, + '', + `Exported on: ${today}`, + `Strands version: ${strandsVersion}`, + `Source harness: agentcore/app/${harnessName}/harness.json`, + `Generated agent: app/${agentName}/`, + '', + ]; + + if (notes.length === 0) { + lines.push('No manual steps required.'); + } else { + lines.push('## Items requiring manual follow-up'); + for (const note of notes) { + lines.push(''); + lines.push(`### ${note.category}`); + lines.push(note.message); + } + } + + lines.push(''); + + const outPath = join(agentDir, EXPORT_NOTES_FILENAME); + writeFileSync(outPath, lines.join('\n'), 'utf8'); +} + +// ============================================================================ +// Dockerfile stub for containerUri harnesses +// ============================================================================ + +function writeDockerfileStub(agentDir: string, containerUri: string): void { + const content = [ + `# Base image from the source harness: ${containerUri}`, + '# The generated Strands agent is layered on top. If the base image does not', + '# include Python 3.12+ or uv, add install steps before the COPY/RUN below.', + `FROM ${containerUri}`, + '', + 'RUN pip install --no-cache-dir uv', + '', + 'WORKDIR /app', + '', + 'ARG UV_DEFAULT_INDEX', + 'ARG UV_INDEX', + '', + 'ENV UV_SYSTEM_PYTHON=1 \\', + ' UV_COMPILE_BYTECODE=1 \\', + ' UV_NO_PROGRESS=1 \\', + ' PYTHONUNBUFFERED=1 \\', + ' DOCKER_CONTAINER=1 \\', + ' UV_DEFAULT_INDEX=${UV_DEFAULT_INDEX} \\', + ' UV_INDEX=${UV_INDEX} \\', + ' PATH="/app/.venv/bin:$PATH"', + '', + 'RUN useradd -m -u 1000 bedrock_agentcore', + '', + 'COPY pyproject.toml uv.lock ./', + 'RUN uv sync --frozen --no-dev --no-install-project', + '', + 'COPY --chown=bedrock_agentcore:bedrock_agentcore . .', + 'RUN uv sync --frozen --no-dev', + '', + 'USER bedrock_agentcore', + '', + 'EXPOSE 8080 8000 9000', + '', + 'CMD ["opentelemetry-instrument", "python", "-m", "main"]', + '', + ].join('\n'); + + writeFileSync(join(agentDir, 'Dockerfile'), content, 'utf8'); +} diff --git a/src/cli/commands/export/harness-mapper.ts b/src/cli/commands/export/harness-mapper.ts new file mode 100644 index 000000000..988e3aa72 --- /dev/null +++ b/src/cli/commands/export/harness-mapper.ts @@ -0,0 +1,956 @@ +import { APP_DIR } from '../../../lib'; +import { ValidationError } from '../../../lib/errors/types'; +import type { + AgentEnvSpec, + BuildType, + Credential, + DirectoryPath, + FilePath, + MemoryStrategy, + MemoryStrategyType, + ModelProvider, +} from '../../../schema'; +import type { + HarnessGatewayOutboundAuth, + HarnessSkill, + HarnessSkillAwsSkillsSource, + HarnessSkillGitSource, + HarnessSkillPathSource, + HarnessSkillS3Source, + HarnessSpec, + HarnessToolType, + HarnessTruncationConfig, +} from '../../../schema/schemas/primitives/harness'; +import { arnPrefix, dnsSuffix } from '../../aws/partition'; +import { GatewayPrimitive } from '../../primitives/GatewayPrimitive'; +import { + computeDefaultCredentialEnvVarName, + computeManagedOAuthCredentialName, +} from '../../primitives/credential-utils'; +import type { + AgentRenderConfig, + GatewayProviderRenderConfig, + IdentityProviderRenderConfig, + MemoryProviderRenderConfig, +} from '../../templates/types'; +import { DEFAULT_PYTHON_ENTRYPOINT, DEFAULT_PYTHON_VERSION } from '../../tui/screens/generate/defaults'; +import { buildFilesystemConfigurations } from '../shared/filesystem-utils'; +import { + ALLOWED_TOOLS_NOTE_CATEGORY, + AWS_SKILLS_NOTE_CATEGORY, + BROWSER_CODZIP_NOTE_CATEGORY, + BROWSER_IAM_POLICY_NOTE_CATEGORY, + CODE_INTERPRETER_IAM_POLICY_NOTE_CATEGORY, + CONTAINER_URI_ECR_PULL_NOTE_CATEGORY, + CONTAINER_URI_NOTE_CATEGORY, + EXTERNAL_GATEWAY_NOTE_CATEGORY, + GATEWAY_IAM_POLICY_NOTE_CATEGORY, + GIT_SKILLS_CONTAINER_NOTE_CATEGORY, + MCP_HEADER_CREDS_NOTE_CATEGORY, + MEMORY_ARN_NOTE_CATEGORY, + PATH_SKILLS_NOTE_CATEGORY, + S3_SKILLS_IAM_POLICY_NOTE_CATEGORY, +} from './constants'; +import type { HarnessMappingResult, ResolvedHarnessContext } from './types'; + +// ============================================================================ +// Public entry point +// ============================================================================ + +export function mapHarnessToExportConfig( + context: ResolvedHarnessContext, + buildOverride?: BuildType +): HarnessMappingResult { + const { spec, targetAgentName } = context; + + const buildType = resolveBuildType(spec, buildOverride); + + if (buildType === 'CodeZip' && (spec.containerUri || spec.dockerfile)) { + const what = spec.containerUri ? `containerUri (${spec.containerUri})` : `dockerfile (${spec.dockerfile})`; + throw new ValidationError( + `Harness "${spec.name}" uses ${what}, which requires a Container build. ` + `Re-export with --build Container.` + ); + } + + const modelProvider = resolveModelProvider(spec.model.provider); + const allowedToolPatterns = spec.allowedTools ?? ['*']; + const identityResult = resolveIdentityProvider(spec, context); + const memoryResult = resolveMemoryProviders(spec, context); + const gatewayResult = resolveGatewayProviders(spec, context, allowedToolPatterns); + const hasGateway = gatewayResult.providers.length > 0; + addBrowserCodeInterpreterNotes(spec, allowedToolPatterns, buildType, context); + const hasExecutionLimits = + spec.maxIterations !== undefined || spec.maxTokens !== undefined || spec.timeoutSeconds !== undefined; + const hasSkillsFetcher = spec.skills.length > 0; + + // Static allowedTools filter — record note if not wildcard + if (!(allowedToolPatterns.length === 1 && allowedToolPatterns[0] === '*')) { + context.exportNotes.push({ + category: ALLOWED_TOOLS_NOTE_CATEGORY, + message: + 'The harness allowedTools filter has been applied statically at code-generation time. ' + + 'Tools excluded at export will not be available at runtime, and callers cannot override ' + + 'the tool list per invocation (unlike the harness).', + }); + } + + // Path skills note + const pathSkills = spec.skills.filter(s => isPathSkill(s)); + if (pathSkills.length > 0 && buildType === 'CodeZip') { + context.exportNotes.push({ + category: PATH_SKILLS_NOTE_CATEGORY, + message: + `The following skill paths must exist on the container filesystem at runtime: ${pathSkills.map(s => s.path).join(', ')}. ` + + 'For CodeZip builds, path skills are not supported — switch to a Container build and COPY the ' + + 'skill directory in your Dockerfile, or use s3/git skill variants.', + }); + } + + // git skills + Container: warn that git must be in the image + const gitSkills = spec.skills.filter(s => isGitSkill(s)); + if (gitSkills.length > 0 && buildType === 'Container') { + context.exportNotes.push({ + category: GIT_SKILLS_CONTAINER_NOTE_CATEGORY, + message: + 'The agent clones git skill repositories at runtime using `git`. The default Container base image ' + + '(`python:3.12-slim`) does not include git. Add it to your Dockerfile before deploying:\n\n' + + ' RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*', + }); + } + + // s3 skills: the agent fetches skill files with boto3 at runtime, so the runtime + // execution role needs S3 read access. The managed harness fetched these service-side + // and never granted the customer role S3 permissions, so without this the exported + // agent fails at first invocation with an opaque S3 AccessDenied. Independent of build type. + const s3Skills = spec.skills.filter(isS3Skill); + if (s3Skills.length > 0) { + const arns = s3Skills + .map(s => parseS3SkillArns(s.s3Uri, context.region)) + .filter((a): a is NonNullable => a !== undefined); + if (arns.length > 0) { + const objectResources = [...new Set(arns.map(a => a.objectArn))]; + const bucketResources = [...new Set(arns.map(a => a.bucketArn))]; + const fmt = (rs: string[]) => rs.map(r => `'${r}'`).join(', '); + const agentName = context.targetAgentName ?? 'YourAgentName'; + context.exportNotes.push({ + category: S3_SKILLS_IAM_POLICY_NOTE_CATEGORY, + message: + `This agent downloads its S3 skills (${s3Skills.map(s => s.s3Uri).join(', ')}) at runtime with boto3. ` + + `The exported runtime execution role is not automatically granted permission to read them, so the ` + + `agent will fail on its first invocation with an S3 AccessDenied.\n\n` + + `Add the following to agentcore/cdk/lib/cdk-stack.ts after \`this.application\` is created,\n` + + `replacing "${agentName}" if you renamed the agent:\n\n` + + ` const agentEnv = this.application.environments.get('${agentName}');\n` + + ` agentEnv?.runtime.role.addToPrincipalPolicy(\n` + + ` new iam.PolicyStatement({\n` + + ` actions: ['s3:GetObject'],\n` + + ` resources: [${fmt(objectResources)}],\n` + + ` })\n` + + ` );\n` + + ` agentEnv?.runtime.role.addToPrincipalPolicy(\n` + + ` new iam.PolicyStatement({\n` + + ` actions: ['s3:ListBucket'],\n` + + ` resources: [${fmt(bucketResources)}],\n` + + ` })\n` + + ` );`, + }); + } + } + + // AWS skills: managed-only feature, cannot export + const awsSkills = spec.skills.filter(s => isAwsSkill(s)); + if (awsSkills.length > 0) { + const patterns = awsSkills.map(s => s.awsSkills.paths?.join(', ') ?? 'all').join('; '); + context.exportNotes.push({ + category: AWS_SKILLS_NOTE_CATEGORY, + message: + `AWS skills are a managed harness feature and are not available in standalone Strands agents. ` + + `The following skill patterns have been omitted: ${patterns}. ` + + `You can copy the equivalent skills from https://github.com/aws/agent-toolkit-for-aws/tree/main/skills ` + + `into your project and load them as path or git skills instead.`, + }); + } + + // containerUri note + if (spec.containerUri) { + context.exportNotes.push({ + category: CONTAINER_URI_NOTE_CATEGORY, + message: + `The harness used a pre-built container image as its execution environment (${spec.containerUri}). ` + + 'The generated Dockerfile extends that image directly (FROM ) and layers the Strands ' + + 'agent code on top. If your base image does not include Python 3.12+ or uv, add an install step ' + + 'before the `uv sync` steps.', + }); + + // If the base image is a private ECR repository, CodeBuild needs pull access to it. + const baseImageEcrArn = ecrArnFromUri(spec.containerUri, context.region); + if (baseImageEcrArn) { + context.exportNotes.push({ + category: CONTAINER_URI_ECR_PULL_NOTE_CATEGORY, + message: + `The base image (${spec.containerUri}) is a private ECR repository. The CodeBuild project that ` + + `builds this agent's container is not automatically granted permission to pull it.\n\n` + + `Add the following to agentcore/cdk/lib/cdk-stack.ts after \`this.application\` is created:\n\n` + + ` import * as ecr from 'aws-cdk-lib/aws-ecr';\n` + + ` import { ContainerBuildProject } from '@aws/agentcore-cdk';\n\n` + + ` const baseRepo = ecr.Repository.fromRepositoryArn(this, 'BaseImageEcrRepository', '${baseImageEcrArn}');\n` + + ` baseRepo.grantPull(ContainerBuildProject.getOrCreate(this).role);`, + }); + } + } + + const mcpResolution = resolveRemoteMcpTools(spec, allowedToolPatterns, context); + + const renderConfig: AgentRenderConfig = { + name: targetAgentName, + sdkFramework: 'Strands', + targetLanguage: 'Python', + modelProvider, + hasMemory: memoryResult.providers.length > 0, + hasIdentity: identityResult.provider !== null, + hasGateway, + isVpc: spec.networkMode === 'VPC', + buildType, + memoryProviders: memoryResult.providers, + identityProviders: identityResult.provider ? [identityResult.provider] : [], + gatewayProviders: gatewayResult.providers, + gatewayAuthTypes: [...new Set(gatewayResult.providers.map(g => g.authType))], + protocol: 'HTTP', + dockerfile: resolveDockerfileName(spec, buildType), + enableOtel: true, + hasConfigBundle: false, + hasPayment: false, + // Execution limits — consumed by execution-limits capability template + maxIterations: spec.maxIterations, + maxTokens: spec.maxTokens, + timeoutSeconds: spec.timeoutSeconds, + // Truncation — consumed by main.py template + truncationStrategy: spec.truncation?.strategy, + truncationConfig: resolveTruncationConfig(spec.truncation), + // Remote MCP tools — consumed by mcp_client template + remoteMcpTools: mcpResolution.tools, + // Filesystem mounts (session storage, EFS, S3) — consumed by main.py/CDK templates + ...buildFilesystemRenderConfig(spec), + // Skills (path/s3/git) — consumed by main.py + skills/fetcher.py templates + ...buildSkillsRenderConfig(spec, hasSkillsFetcher), + // Inline + builtin + browser/code-interpreter tools (after allowedTools filter) + ...buildToolsRenderConfig(spec, allowedToolPatterns, buildType), + hasExecutionLimits, + isExportHarness: true, + modelId: spec.model.modelId, + // System prompt (written verbatim into main.py) + systemPromptText: context.systemPrompt, + actorId: spec.memory?.mode === 'existing' ? spec.memory.actorId : undefined, + }; + + const agentEnvSpec = buildAgentEnvSpec(context, targetAgentName, buildType); + + return { + renderConfig, + agentEnvSpec, + credentialEntry: identityResult.credentialEntry, + mcpCredentialEntries: mcpResolution.credentialEntries, + }; +} + +// ============================================================================ +// RenderConfig sub-builders +// ============================================================================ + +/** Filesystem mounts (session storage, EFS, S3) consumed by main.py and CDK templates. */ +function buildFilesystemRenderConfig( + spec: HarnessSpec +): Pick { + const efsMounts = (spec.efsAccessPoints ?? []).map(ap => ({ + accessPointArn: ap.accessPointArn, + mountPath: ap.mountPath, + })); + const s3Mounts = (spec.s3AccessPoints ?? []).map(ap => ({ + accessPointArn: ap.accessPointArn, + mountPath: ap.mountPath, + })); + return { + sessionStorageMountPath: spec.sessionStoragePath, + efsMounts, + s3Mounts, + needsOs: !!spec.sessionStoragePath || efsMounts.length > 0 || s3Mounts.length > 0, + }; +} + +/** Path/S3/git skills consumed by main.py and skills/fetcher.py templates. */ +function buildSkillsRenderConfig( + spec: HarnessSpec, + hasSkillsFetcher: boolean +): Pick { + return { + pathSkills: spec.skills.filter(isPathSkill).map(s => s.path), + s3Skills: spec.skills.filter(isS3Skill).map(s => s.s3Uri), + gitSkills: spec.skills.filter(isGitSkill).map(s => ({ + url: s.gitUrl, + path: s.path, + credentialArn: s.auth?.credentialName, + username: s.auth?.username, + })), + hasSkillsFetcher, + hasFetchedSkills: spec.skills.some(s => isS3Skill(s) || isGitSkill(s)), + }; +} + +/** Inline, builtin, and browser/code-interpreter tools (after allowedTools filter). */ +function buildToolsRenderConfig( + spec: HarnessSpec, + allowedToolPatterns: string[], + buildType: BuildType +): Pick< + AgentRenderConfig, + | 'inlineFunctionTools' + | 'hasBrowser' + | 'browserIdentifier' + | 'hasCodeInterpreter' + | 'codeInterpreterIdentifier' + | 'hasShell' + | 'hasFileOperations' +> { + return { + inlineFunctionTools: resolveInlineFunctionTools(spec, allowedToolPatterns), + // Browser requires a Container build (Playwright driver can't spawn subprocesses in CodeZip Lambda sandbox). + hasBrowser: isToolIncluded('agentcore_browser', spec, allowedToolPatterns) && buildType === 'Container', + browserIdentifier: extractToolIdentifier(spec, 'agentcore_browser', 'agentCoreBrowser', 'browserArn'), + hasCodeInterpreter: isToolIncluded('agentcore_code_interpreter', spec, allowedToolPatterns), + codeInterpreterIdentifier: extractToolIdentifier( + spec, + 'agentcore_code_interpreter', + 'agentCoreCodeInterpreter', + 'codeInterpreterArn' + ), + // Builtin tools — always available in the Harness runtime, included unless filtered out by allowedTools + hasShell: isBuiltinIncluded('shell', allowedToolPatterns), + hasFileOperations: isBuiltinIncluded('file_operations', allowedToolPatterns), + }; +} + +// ============================================================================ +// AgentEnvSpec builder +// ============================================================================ + +function buildAgentEnvSpec( + context: ResolvedHarnessContext, + targetAgentName: string, + buildType: BuildType +): AgentEnvSpec { + const { spec } = context; + const codeLocation = `${APP_DIR}/${targetAgentName}/` as DirectoryPath; + + const envVars = Object.entries(spec.environmentVariables ?? {}).map(([name, value]) => ({ name, value })); + + return { + name: targetAgentName, + build: buildType, + ...(resolveDockerfileName(spec, buildType) && { dockerfile: resolveDockerfileName(spec, buildType)! as FilePath }), + entrypoint: DEFAULT_PYTHON_ENTRYPOINT as FilePath, + codeLocation, + runtimeVersion: DEFAULT_PYTHON_VERSION, + networkMode: spec.networkMode ?? 'PUBLIC', + protocol: 'HTTP', + ...(spec.networkMode === 'VPC' && spec.networkConfig && { networkConfig: spec.networkConfig }), + ...(spec.authorizerType && { authorizerType: spec.authorizerType }), + ...(spec.authorizerConfiguration && { authorizerConfiguration: spec.authorizerConfiguration }), + ...(spec.lifecycleConfig && { + lifecycleConfiguration: { + ...(spec.lifecycleConfig.idleRuntimeSessionTimeout !== undefined && { + idleRuntimeSessionTimeout: spec.lifecycleConfig.idleRuntimeSessionTimeout, + }), + ...(spec.lifecycleConfig.maxLifetime !== undefined && { + maxLifetime: spec.lifecycleConfig.maxLifetime, + }), + }, + }), + ...(spec.executionRoleArn && { executionRoleArn: spec.executionRoleArn }), + ...(envVars.length > 0 && { envVars }), + ...(spec.tags && { tags: spec.tags }), + ...buildFilesystemConfigurations(spec.sessionStoragePath, spec.efsAccessPoints, spec.s3AccessPoints), + }; +} + +// ============================================================================ +// Model provider +// ============================================================================ + +function resolveModelProvider(provider: 'bedrock' | 'open_ai' | 'gemini' | 'lite_llm'): ModelProvider { + switch (provider) { + case 'bedrock': + return 'Bedrock'; + case 'open_ai': + return 'OpenAI'; + case 'gemini': + return 'Gemini'; + case 'lite_llm': + throw new ValidationError( + 'Harness uses the "lite_llm" model provider, which the Strands export does not support. ' + + 'Switch the harness to bedrock, open_ai, or gemini before exporting.' + ); + } +} + +// ============================================================================ +// Identity provider (non-Bedrock model credential) +// ============================================================================ + +interface IdentityResult { + provider: IdentityProviderRenderConfig | null; + credentialEntry: Credential | null; +} + +function resolveIdentityProvider(spec: HarnessSpec, context: ResolvedHarnessContext): IdentityResult { + if (spec.model.provider === 'bedrock') { + return { provider: null, credentialEntry: null }; + } + + const apiKeyArn = spec.model.apiKeyArn; + if (!apiKeyArn) { + return { provider: null, credentialEntry: null }; + } + + // Try to find an existing credential in the project that matches the ARN + const existing = context.projectSpec.credentials.find(c => { + if ('apiKeyArn' in c) return (c as { apiKeyArn?: string }).apiKeyArn === apiKeyArn; + if ('credentialProviderArn' in c) + return (c as { credentialProviderArn?: string }).credentialProviderArn === apiKeyArn; + return false; + }); + + if (existing) { + return { + provider: { name: existing.name, envVarName: computeDefaultCredentialEnvVarName(existing.name) }, + credentialEntry: null, // already in project + }; + } + + // Extract the credential provider name from a token-vault ARN if possible, otherwise + // synthesize one from the project name + provider. This ensures the deployed credential + // entry references the same provider that was used in the harness. + // ARN format: arn:aws:bedrock-agentcore:::token-vault//apikeycredentialprovider/ + const arnNameMatch = /\/apikeycredentialprovider\/([^/]+)$/.exec(apiKeyArn); + const credentialName = arnNameMatch + ? arnNameMatch[1]! + : `${context.projectSpec.name}${resolveModelProvider(spec.model.provider)}`; + const credentialEntry: Credential = { + authorizerType: 'ApiKeyCredentialProvider', + name: credentialName, + }; + + return { + provider: { name: credentialName, envVarName: computeDefaultCredentialEnvVarName(credentialName) }, + credentialEntry, + }; +} + +// ============================================================================ +// Memory providers +// ============================================================================ + +interface MemoryResult { + providers: MemoryProviderRenderConfig[]; +} + +function resolveMemoryProviders(spec: HarnessSpec, context: ResolvedHarnessContext): MemoryResult { + // Only an `existing` reference resolves a concrete memory to wire as an env var. Managed memory's + // ARN is service-populated at deploy time (nothing to resolve at export); disabled has none. + if (spec.memory?.mode !== 'existing') return { providers: [] }; + + const { name: memName, arn: memArn } = spec.memory; + + if (memName) { + // Same-project memory by name + const memEntry = context.projectSpec.memories?.find(m => m.name === memName); + const strategies: MemoryStrategyType[] = (memEntry?.strategies ?? []).map((s: MemoryStrategy) => s.type); + const envVarName = `MEMORY_${memName.toUpperCase()}_ID`; + return { + providers: [{ name: memName, envVarName, strategies }], + }; + } + + if (memArn) { + // Try to cross-reference against deployed state + const deployedMemories = context.deployedResources?.memories ?? {}; + const match = Object.entries(deployedMemories).find(([, state]) => state.memoryArn === memArn); + if (match) { + const [resolvedName] = match; + const memEntry = context.projectSpec.memories?.find(m => m.name === resolvedName); + const strategies: MemoryStrategyType[] = (memEntry?.strategies ?? []).map((s: MemoryStrategy) => s.type); + const envVarName = `MEMORY_${resolvedName.toUpperCase()}_ID`; + return { + providers: [{ name: resolvedName, envVarName, strategies }], + }; + } + + // External memory — hardcode ARN as env var + context.exportNotes.push({ + category: MEMORY_ARN_NOTE_CATEGORY, + message: + `The harness memory was referenced by ARN (${memArn}) and could not be matched to a ` + + 'same-project memory. A MEMORY_ARN env var will be used. Ensure the runtime IAM execution role ' + + 'has bedrock-agentcore:GetMemory and bedrock-agentcore:InvokeMemory on the above ARN.', + }); + return { + providers: [{ name: 'ExternalMemory', envVarName: 'MEMORY_ARN', strategies: [] }], + }; + } + + return { providers: [] }; +} + +// ============================================================================ +// Gateway providers +// ============================================================================ + +interface GatewayResult { + providers: GatewayProviderRenderConfig[]; +} + +function resolveGatewayProviders( + spec: HarnessSpec, + context: ResolvedHarnessContext, + allowedToolPatterns: string[] +): GatewayResult { + const providers: GatewayProviderRenderConfig[] = []; + + for (const tool of spec.tools) { + if (tool.type !== 'agentcore_gateway') continue; + if (!tool.config || !('agentCoreGateway' in tool.config)) continue; + if (!matchesAllowedTools(tool.name, allowedToolPatterns)) continue; + + const gwConfig = ( + tool.config as { agentCoreGateway: { gatewayArn: string; outboundAuth?: HarnessGatewayOutboundAuth } } + ).agentCoreGateway; + const gatewayArn = gwConfig.gatewayArn; + + // Try to find in deployed state (same-project gateway) + const deployedGateways = context.deployedResources?.mcp?.gateways ?? {}; + const deployedMatch = Object.entries(deployedGateways).find(([, state]) => state.gatewayArn === gatewayArn); + + if (deployedMatch) { + const [gatewayName] = deployedMatch; + const projectGateway = context.projectSpec.agentCoreGateways?.find(g => g.name === gatewayName); + const authType = projectGateway?.authorizerType ?? 'AWS_IAM'; + + const provider: GatewayProviderRenderConfig = { + name: gatewayName, + envVarName: GatewayPrimitive.computeDefaultGatewayEnvVarName(gatewayName), + authType, + }; + + if (authType === 'CUSTOM_JWT' && projectGateway?.authorizerConfiguration?.customJwtAuthorizer) { + const jwtConfig = projectGateway.authorizerConfiguration.customJwtAuthorizer; + provider.discoveryUrl = jwtConfig.discoveryUrl; + provider.credentialProviderName = computeManagedOAuthCredentialName(gatewayName); + const scopes = + 'allowedScopes' in jwtConfig ? (jwtConfig as { allowedScopes?: string[] }).allowedScopes : undefined; + if (scopes?.length) { + provider.scopes = scopes.join(' '); + } + } + + providers.push(provider); + // Same-project gateway: AgentCoreMcp.wireGatewayUrlsToAgents() auto-grants InvokeGateway + // to all runtime environments — no manual IAM step needed. + } else { + // External gateway — derive URL from ARN + const hardcodedUrl = deriveGatewayUrl(gatewayArn); + context.exportNotes.push({ + category: EXTERNAL_GATEWAY_NOTE_CATEGORY, + message: + `Gateway tool "${tool.name}" (ARN: ${gatewayArn}) was not found in this project's deployed state. ` + + `The URL has been hardcoded as "${hardcodedUrl}" in mcp_client/client.py. ` + + 'If the ARN changes (e.g. after re-deployment), update mcp_client/client.py manually.', + }); + + const outboundAuth = gwConfig.outboundAuth; + const authType = outboundAuth + ? 'oauth' in outboundAuth + ? 'CUSTOM_JWT' + : 'awsIam' in outboundAuth + ? 'AWS_IAM' + : 'NONE' + : 'AWS_IAM'; + + if (authType === 'AWS_IAM') { + context.exportNotes.push({ + category: GATEWAY_IAM_POLICY_NOTE_CATEGORY, + message: + `Gateway tool "${tool.name}" (ARN: ${gatewayArn}) uses AWS_IAM auth. ` + + `The exported runtime execution role is not automatically granted permission to invoke it.\n\n` + + `Add the following to agentcore/cdk/lib/cdk-stack.ts after \`this.application\` is created,\n` + + `replacing "YourAgentName" with the name of the exported agent (e.g. "${context.targetAgentName ?? 'MyHarnessAgent'}"):\n\n` + + ` const agentEnv = this.application.environments.get('${context.targetAgentName ?? 'YourAgentName'}');\n` + + ` agentEnv?.runtime.role.addToPrincipalPolicy(\n` + + ` new iam.PolicyStatement({\n` + + ` actions: ['bedrock-agentcore:InvokeGateway'],\n` + + ` resources: ['${gatewayArn}'],\n` + + ` })\n` + + ` );`, + }); + } + + const provider: GatewayProviderRenderConfig = { + name: tool.name, + envVarName: '', + authType, + hardcodedUrl, + }; + + if (authType === 'CUSTOM_JWT' && outboundAuth && 'oauth' in outboundAuth) { + provider.credentialProviderName = computeManagedOAuthCredentialName(tool.name); + const scopes = outboundAuth.oauth.scopes; + if (scopes?.length) { + provider.scopes = scopes.join(' '); + } + } + + providers.push(provider); + } + } + + return { providers }; +} + +// ============================================================================ +// Inline function tools +// ============================================================================ + +interface InlineFunctionTool { + name: string; + description: string; + inputSchema: Record; +} + +function resolveInlineFunctionTools(spec: HarnessSpec, allowedPatterns: string[]): InlineFunctionTool[] { + return spec.tools + .filter(t => t.type === 'inline_function') + .filter(t => matchesAllowedTools(t.name, allowedPatterns)) + .map(t => { + const cfg = (t.config as { inlineFunction: { description: string; inputSchema: Record } }) + .inlineFunction; + return { name: t.name, description: cfg.description, inputSchema: cfg.inputSchema }; + }); +} + +// ============================================================================ +// Remote MCP tools +// ============================================================================ + +interface RemoteMcpTool { + name: string; + url: string; + headerCredentials?: { headerKey: string; credentialName: string; envVarName: string }[]; +} + +interface McpCredentialEntry { + credential: Credential; + envVarName: string; + value: string; +} + +interface RemoteMcpResolution { + tools: RemoteMcpTool[]; + credentialEntries: McpCredentialEntry[]; +} + +function resolveRemoteMcpTools( + spec: HarnessSpec, + allowedPatterns: string[], + context: ResolvedHarnessContext +): RemoteMcpResolution { + const tools: RemoteMcpTool[] = []; + const credentialEntries: McpCredentialEntry[] = []; + + for (const tool of spec.tools) { + if (tool.type !== 'remote_mcp') continue; + if (!matchesAllowedTools(tool.name, allowedPatterns)) continue; + if (!tool.config || !('remoteMcp' in tool.config)) continue; + + const cfg = (tool.config as { remoteMcp: { url: string; headers?: Record } }).remoteMcp; + const headerKeys = Object.keys(cfg.headers ?? {}); + + let headerCredentials: RemoteMcpTool['headerCredentials']; + if (headerKeys.length > 0) { + headerCredentials = []; + const toolPrefix = tool.name.replace(/[^A-Za-z0-9]/g, ''); + for (const hdr of headerKeys) { + const credName = `${context.projectSpec.name}Mcp${toolPrefix}${hdr.replace(/[^A-Za-z0-9]/g, '')}`; + const envVarName = computeDefaultCredentialEnvVarName(credName); + headerCredentials.push({ headerKey: hdr, credentialName: credName, envVarName }); + credentialEntries.push({ + credential: { authorizerType: 'ApiKeyCredentialProvider', name: credName }, + envVarName, + value: cfg.headers![hdr] ?? '', + }); + } + context.exportNotes.push({ + category: MCP_HEADER_CREDS_NOTE_CATEGORY, + message: + `MCP tool "${tool.name}" has request headers managed via AgentCore Identity. ` + + `Credential entries added to agentcore.json; values written to agentcore/.env.local. ` + + `Credentials are provisioned automatically on \`agentcore deploy\`.\n\n` + + headerCredentials.map(h => ` ${h.credentialName} (env var: ${h.envVarName})`).join('\n'), + }); + } + + tools.push({ name: tool.name, url: cfg.url, headerCredentials }); + } + + return { tools, credentialEntries }; +} + +// ============================================================================ +// Helpers +// ============================================================================ + +function resolveBuildType(spec: HarnessSpec, override?: BuildType): BuildType { + if (override) return override; + if (spec.containerUri || spec.dockerfile) return 'Container'; + return 'CodeZip'; +} + +function resolveDockerfileName(spec: HarnessSpec, buildType: BuildType): string | undefined { + if (buildType !== 'Container') return undefined; + if (spec.dockerfile) return spec.dockerfile; + if (spec.containerUri) return 'Dockerfile'; // we generate it + return undefined; +} + +function deriveGatewayUrl(gatewayArn: string): string { + // arn:aws:bedrock-agentcore:us-east-1:123456789012:gateway/abc123 + const parts = gatewayArn.split(':'); + const region = parts[3] ?? 'us-east-1'; + const resourcePart = parts[parts.length - 1] ?? ''; + const gatewayId = resourcePart.replace('gateway/', ''); + return `https://${gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}/mcp`; +} + +/** + * Extract the ECR repository ARN from an ECR image URI. + * Returns undefined if the URI is not an ECR private registry URI. + * + * Handles formats: + * .dkr.ecr..amazonaws.com/: + * .dkr.ecr..amazonaws.com/@sha256: + */ +function ecrArnFromUri(uri: string, region?: string): string | undefined { + // Match private ECR URIs: .dkr.ecr../[:|@] + const match = /^(\d{12})\.dkr\.ecr\.([^.]+)\.[^/]+\/([^:@]+)/.exec(uri); + if (!match) return undefined; + const account = match[1]; + const ecrRegion = match[2] ?? region; + const repoName = match[3]; + if (!ecrRegion || !repoName) return undefined; + return `${arnPrefix(ecrRegion)}:ecr:${ecrRegion}:${account}:repository/${repoName}`; +} + +function isPathSkill(skill: HarnessSkill): skill is HarnessSkillPathSource { + return 'path' in skill && !('gitUrl' in skill); +} + +function isS3Skill(skill: HarnessSkill): skill is HarnessSkillS3Source { + return 's3Uri' in skill; +} + +/** + * Parse an s3:// skill URI into the bucket name and its object ARN. + * The exported agent fetches skills with boto3 at runtime (skills/fetcher.py), + * so the runtime execution role needs s3:ListBucket on the bucket and + * s3:GetObject on the objects under the prefix — permissions the managed + * harness never needed (it fetches skill files service-side). + * + * Returns undefined for a malformed URI (no bucket). + */ +function parseS3SkillArns( + s3Uri: string, + region?: string +): { bucket: string; bucketArn: string; objectArn: string } | undefined { + const withoutScheme = s3Uri.replace(/^s3:\/\//, ''); + const [bucket, ...prefixParts] = withoutScheme.split('/'); + if (!bucket) return undefined; + // S3 ARNs are partition-qualified but region/account-less: arn::s3::: + const partition = arnPrefix(region ?? 'us-east-1'); // arnPrefix -> "arn:aws" | "arn:aws-us-gov" | "arn:aws-cn" + const bucketArn = `${partition}:s3:::${bucket}`; + const prefix = prefixParts.join('/').replace(/\/+$/, ''); + const objectArn = prefix ? `${bucketArn}/${prefix}/*` : `${bucketArn}/*`; + return { bucket, bucketArn, objectArn }; +} + +function isGitSkill(skill: HarnessSkill): skill is HarnessSkillGitSource { + return 'gitUrl' in skill; +} + +function isAwsSkill(skill: HarnessSkill): skill is HarnessSkillAwsSkillsSource { + return 'awsSkills' in skill; +} + +function isToolIncluded(toolType: HarnessToolType, spec: HarnessSpec, allowedPatterns: string[]): boolean { + const tool = spec.tools.find(t => t.type === toolType); + if (!tool) return false; + return matchesAllowedTools(tool.name, allowedPatterns); +} + +function matchesAllowedTools(toolName: string, patterns: string[]): boolean { + if (patterns.includes('*')) return true; + // Mirrors Harness runtime _matches() logic: tool_name is "server/tool" qualified + for (const pattern of patterns) { + if (pattern === toolName) return true; + if (pattern.startsWith('@')) { + const slashIdx = pattern.indexOf('/', 1); + const pServer = slashIdx === -1 ? pattern.slice(1) : pattern.slice(1, slashIdx); + const pTool = slashIdx === -1 ? '*' : pattern.slice(slashIdx + 1); + const slashInName = toolName.indexOf('/'); + if (slashInName === -1) { + // MCP tools stored as "server_tool" flat names — keep legacy behaviour + if (fnmatch(`${pServer}_${pTool}`, toolName)) return true; + } else { + // Qualified names like "builtin/shell" + const nameServer = toolName.slice(0, slashInName); + const nameTool = toolName.slice(slashInName + 1); + if (fnmatch(pServer, nameServer) && fnmatch(pTool, nameTool)) return true; + } + } else { + if (fnmatch(pattern, toolName)) return true; + } + } + return false; +} + +function resolveTruncationConfig(truncation: HarnessTruncationConfig | undefined): Record | undefined { + if (!truncation?.config) return undefined; + const { strategy, config } = truncation; + if (strategy === 'sliding_window' && 'slidingWindow' in config) { + const sw = config.slidingWindow; + return sw?.messagesCount !== undefined ? { window_size: sw.messagesCount } : undefined; + } + if (strategy === 'summarization' && 'summarization' in config) { + const s = config.summarization as Record; + const keyMap: Record = { + summaryRatio: 'summary_ratio', + preserveRecentMessages: 'preserve_recent_messages', + summarizationSystemPrompt: 'summarization_system_prompt', + }; + const out = Object.fromEntries( + Object.entries(keyMap) + .filter(([k]) => s[k] !== undefined) + .map(([k, v]) => [v, s[k]]) + ); + return Object.keys(out).length > 0 ? out : undefined; + } + return undefined; +} + +function extractToolIdentifier( + spec: HarnessSpec, + toolType: HarnessToolType, + configKey: string, + arnField: string +): string | undefined { + const tool = spec.tools.find(t => t.type === toolType); + if (!tool?.config || !(configKey in tool.config)) return undefined; + const arn = (tool.config as Record>)[configKey]?.[arnField]; + if (!arn) return undefined; + // ARN format: arn:aws:bedrock-agentcore:::/ + const slashIdx = arn.lastIndexOf('/'); + return slashIdx === -1 ? undefined : arn.slice(slashIdx + 1); +} + +function isBuiltinIncluded(builtinName: string, patterns: string[]): boolean { + // Mirrors Harness runtime: builtins are keyed as "builtin/", so only @builtin or @builtin/ patterns match. + // Plain "shell" does NOT match the "builtin/shell" builtin (it would match a tool literally named "shell"). + return matchesAllowedTools(`builtin/${builtinName}`, patterns); +} + +function addBrowserCodeInterpreterNotes( + spec: HarnessSpec, + allowedToolPatterns: string[], + buildType: BuildType, + context: ResolvedHarnessContext +): void { + const agentName = context.targetAgentName; + + if (isToolIncluded('agentcore_browser', spec, allowedToolPatterns)) { + if (buildType !== 'Container') { + context.exportNotes.push({ + category: BROWSER_CODZIP_NOTE_CATEGORY, + message: + 'The browser tool requires a Container build to run. In a CodeZip (Lambda-style) runtime the ' + + 'Playwright node driver cannot be executed and the tool will fail at invocation time.\n\n' + + 'Re-export with `--build Container` to include browser tool support:\n\n' + + ` agentcore export harness --name ${spec.name} --target-agent-name ${agentName} --build Container`, + }); + } else { + const browserTool = spec.tools.find(t => t.type === 'agentcore_browser'); + const customArn = + browserTool?.config && 'agentCoreBrowser' in browserTool.config + ? (browserTool.config as { agentCoreBrowser: { browserArn?: string } }).agentCoreBrowser.browserArn + : undefined; + const resource = customArn ?? `arn:*:bedrock-agentcore:\${Stack.of(this).region}:aws:browser/*`; + context.exportNotes.push({ + category: BROWSER_IAM_POLICY_NOTE_CATEGORY, + message: + `The exported runtime execution role is not automatically granted permission to use the browser tool.\n\n` + + `Add the following to agentcore/cdk/lib/cdk-stack.ts after \`this.application\` is created:\n\n` + + ` const agentEnv = this.application.environments.get('${agentName}');\n` + + ` agentEnv?.runtime.role.addToPrincipalPolicy(\n` + + ` new iam.PolicyStatement({\n` + + ` actions: [\n` + + ` 'bedrock-agentcore:StartBrowserSession',\n` + + ` 'bedrock-agentcore:StopBrowserSession',\n` + + ` 'bedrock-agentcore:GetBrowserSession',\n` + + ` 'bedrock-agentcore:ListBrowserSessions',\n` + + ` 'bedrock-agentcore:UpdateBrowserStream',\n` + + ` 'bedrock-agentcore:ConnectBrowserAutomationStream',\n` + + ` 'bedrock-agentcore:ConnectBrowserLiveViewStream',\n` + + ` ],\n` + + ` resources: [\`${resource}\`],\n` + + ` })\n` + + ` );`, + }); + } + } + + if (isToolIncluded('agentcore_code_interpreter', spec, allowedToolPatterns)) { + const ciTool = spec.tools.find(t => t.type === 'agentcore_code_interpreter'); + const customArn = + ciTool?.config && 'agentCoreCodeInterpreter' in ciTool.config + ? (ciTool.config as { agentCoreCodeInterpreter: { codeInterpreterArn?: string } }).agentCoreCodeInterpreter + .codeInterpreterArn + : undefined; + const resource = customArn ?? `arn:*:bedrock-agentcore:\${Stack.of(this).region}:aws:code-interpreter/*`; + context.exportNotes.push({ + category: CODE_INTERPRETER_IAM_POLICY_NOTE_CATEGORY, + message: + `The exported runtime execution role is not automatically granted permission to use the code interpreter tool.\n\n` + + `Add the following to agentcore/cdk/lib/cdk-stack.ts after \`this.application\` is created:\n\n` + + ` const agentEnv = this.application.environments.get('${agentName}');\n` + + ` agentEnv?.runtime.role.addToPrincipalPolicy(\n` + + ` new iam.PolicyStatement({\n` + + ` actions: [\n` + + ` 'bedrock-agentcore:StartCodeInterpreterSession',\n` + + ` 'bedrock-agentcore:StopCodeInterpreterSession',\n` + + ` 'bedrock-agentcore:GetCodeInterpreterSession',\n` + + ` 'bedrock-agentcore:ListCodeInterpreterSessions',\n` + + ` 'bedrock-agentcore:InvokeCodeInterpreter',\n` + + ` ],\n` + + ` resources: [\`${resource}\`],\n` + + ` })\n` + + ` );`, + }); + } +} + +function fnmatch(pattern: string, str: string): boolean { + const re = new RegExp( + '^' + + pattern + .replace(/[.+^${}()|[\]\\]/g, '\\$&') + .replace(/\*/g, '.*') + .replace(/\?/g, '.') + + '$' + ); + return re.test(str); +} diff --git a/src/cli/commands/export/harness-resolver.ts b/src/cli/commands/export/harness-resolver.ts new file mode 100644 index 000000000..c88a3f1bd --- /dev/null +++ b/src/cli/commands/export/harness-resolver.ts @@ -0,0 +1,95 @@ +import { ConfigIO, requireConfigRoot } from '../../../lib'; +import { ValidationError } from '../../../lib/errors/types'; +import type { DeployedResourceState } from '../../../schema'; +import { DEFAULT_SYSTEM_PROMPT } from './constants'; +import type { ResolvedHarnessContext } from './types'; +import { existsSync, readFileSync } from 'node:fs'; +import { join } from 'node:path'; + +/** + * Read and validate all on-disk inputs for the harness export. + * Throws ValidationError for user-fixable problems. + */ +export async function resolveHarnessContext( + harnessName: string, + targetAgentName: string, + configBaseDir?: string +): Promise { + const baseDir = configBaseDir ?? requireConfigRoot(); + const configIO = new ConfigIO({ baseDir }); + const projectRoot = join(baseDir, '..'); + + // 1. Read project spec and validate harness exists before any harness file I/O + const projectSpec = await configIO.readProjectSpec(); + + const harnessEntry = projectSpec.harnesses?.find(h => h.name === harnessName); + if (!harnessEntry) { + throw new ValidationError( + `Harness "${harnessName}" not found in agentcore.json. Available harnesses: ${(projectSpec.harnesses ?? []).map(h => h.name).join(', ') || 'none'}` + ); + } + + // 2. Validate target agent name not already taken + if (projectSpec.runtimes.some(r => r.name === targetAgentName)) { + throw new ValidationError( + `A runtime agent named "${targetAgentName}" already exists. Choose a different --target-agent-name.` + ); + } + + // 2b. Validate the target directory does not already exist on disk. A leftover directory + // (e.g. from a removed agent or a prior failed export) has no runtime entry, so the + // check above would pass and the render/copy would silently overwrite it. + const targetAgentDir = join(projectRoot, 'app', targetAgentName); + if (existsSync(targetAgentDir)) { + throw new ValidationError( + `The directory "app/${targetAgentName}/" already exists. Remove it or choose a different --target-agent-name.` + ); + } + + // 3. Read harness spec + const spec = await configIO.readHarnessSpec(harnessName); + + // 4. Read system prompt — harness app files live in projectRoot/app// + const harnessDir = join(projectRoot, 'app', harnessName); + const systemPromptPath = join(harnessDir, 'system-prompt.md'); + let systemPrompt: string; + if (existsSync(systemPromptPath)) { + systemPrompt = readFileSync(systemPromptPath, 'utf8').trim(); + } else if (spec.systemPrompt) { + systemPrompt = spec.systemPrompt; + } else { + systemPrompt = DEFAULT_SYSTEM_PROMPT; + } + + // 5. Read deployed state (optional — absent before first deploy) + let deployedResources: DeployedResourceState | null = null; + let region: string | undefined; + try { + const deployedState = await configIO.readDeployedState(); + // Use the first target's resources (there is only one target per project) + const firstTarget = Object.values(deployedState.targets)[0]; + deployedResources = firstTarget?.resources ?? null; + } catch { + // File absent or unreadable — proceed without it + } + + try { + const targets = await configIO.readAWSDeploymentTargets(); + region = targets[0]?.region; + } catch { + // No targets configured yet + } + + return { + harnessName, + targetAgentName, + spec, + systemPrompt, + projectSpec, + deployedResources, + configBaseDir: baseDir, + projectRoot, + exportNotes: [], + region, + }; +} diff --git a/src/cli/commands/export/index.ts b/src/cli/commands/export/index.ts new file mode 100644 index 000000000..24587eb82 --- /dev/null +++ b/src/cli/commands/export/index.ts @@ -0,0 +1,83 @@ +import { serializeResult } from '../../../lib/result'; +import { ANSI, COMMAND_DESCRIPTIONS } from '../../constants'; +import { renderTUI } from '../../tui/render'; +import { handleExportHarness } from './harness-action'; +import type { Command } from '@commander-js/extra-typings'; + +const { green, red, cyan, dim, reset } = ANSI; + +export function registerExport(program: Command): void { + const exportCmd = program + .command('export') + .description(COMMAND_DESCRIPTIONS.export) + .showHelpAfterError() + .showSuggestionAfterError(); + + exportCmd + .command('harness') + .description('Export an in-project harness to a Python Strands runtime agent') + .option('--name ', 'Harness name [non-interactive]') + .option( + '--target-agent-name ', + 'Name for the generated runtime agent (default: Agent) [non-interactive]' + ) + .option('--build ', 'Build type: CodeZip or Container [non-interactive]') + .option('--json', 'Output results as JSON') + .action(async options => { + if (!options.name) { + if (options.json) { + console.log(JSON.stringify({ success: false, error: '--name is required in non-interactive mode' })); + process.exit(1); + } + await renderTUI({ initialRoute: { name: 'export-harness' }, actionOnBack: 'exit' }); + return; + } + + const steps: string[] = []; + let result: Awaited>; + try { + result = await handleExportHarness(options, { + onProgress: (message: string) => { + if (options.json) return; + steps.push(message); + console.log(`${green}[done]${reset} ${message}`); + }, + }); + } catch (err) { + if (options.json) { + console.log( + JSON.stringify({ success: false, error: { message: err instanceof Error ? err.message : String(err) } }) + ); + process.exit(1); + } + throw err; + } + + if (options.json) { + console.log(JSON.stringify(serializeResult(result))); + if (!result.success) process.exit(1); + return; + } + + if (!result.success) { + console.error(`\n${red}[error]${reset} Export failed: ${result.error.message}`); + process.exit(1); + } + + const targetAgentName = options.targetAgentName ?? `${options.name}Agent`; + + console.log(''); + console.log(`${green}Exported harness ${options.name} → runtime agent ${targetAgentName}${reset}`); + console.log(''); + console.log(`${dim}Generated:${reset}`); + console.log(` app/${targetAgentName}/ Python agent (Strands)`); + console.log(` agentcore/agentcore.json updated`); + console.log(` EXPORT_NOTES.md review for manual follow-up items`); + console.log(''); + console.log('Next steps:'); + console.log(''); + console.log(` ${cyan}agentcore deploy${reset} ${dim}Deploy the new runtime agent${reset}`); + console.log(` ${cyan}agentcore dev${reset} ${dim}Run the agent locally${reset}`); + console.log(''); + }); +} diff --git a/src/cli/commands/export/types.ts b/src/cli/commands/export/types.ts new file mode 100644 index 000000000..188f9d63b --- /dev/null +++ b/src/cli/commands/export/types.ts @@ -0,0 +1,78 @@ +import type { AgentCoreProjectSpec, Credential, DeployedResourceState, HarnessSpec } from '../../../schema'; +import type { + AgentRenderConfig, + GatewayProviderRenderConfig, + IdentityProviderRenderConfig, + MemoryProviderRenderConfig, +} from '../../templates/types'; + +// ============================================================================ +// CLI options +// ============================================================================ + +export interface ExportHarnessOptions { + name?: string; + targetAgentName?: string; + build?: string; + json?: boolean; +} + +// ============================================================================ +// Resolved context (all on-disk reads done before mapping) +// ============================================================================ + +export interface ResolvedHarnessContext { + harnessName: string; + targetAgentName: string; + spec: HarnessSpec; + systemPrompt: string; + projectSpec: AgentCoreProjectSpec; + /** First target's resources from deployed-state.json, or null when file absent */ + deployedResources: DeployedResourceState | null; + configBaseDir: string; + projectRoot: string; + exportNotes: ExportNote[]; + /** AWS region from the first deployment target, or undefined if not configured */ + region?: string; +} + +// ============================================================================ +// Export notes (collected during mapping, written to EXPORT_NOTES.md) +// ============================================================================ + +export interface ExportNote { + category: string; + message: string; +} + +// ============================================================================ +// Mapping output +// ============================================================================ + +export interface HarnessMappingResult { + renderConfig: AgentRenderConfig; + agentEnvSpec: import('../../../schema').AgentEnvSpec; + /** Model identity credential, if any */ + credentialEntry: Credential | null; + /** One credential entry per MCP header that carries a secret value */ + mcpCredentialEntries: { credential: Credential; envVarName: string; value: string }[]; +} + +// ============================================================================ +// Resolved sub-objects (internal to mapper) +// ============================================================================ + +export interface ResolvedGatewayProvider extends GatewayProviderRenderConfig { + /** True when the gateway was found in this project's deployed state */ + isSameProject: boolean; + /** Hardcoded URL used when gateway is external (not in deployed state) */ + hardcodedUrl?: string; +} + +export interface ResolvedMemoryProvider extends MemoryProviderRenderConfig { + isSameProject: boolean; +} + +export interface ResolvedIdentityProvider extends IdentityProviderRenderConfig { + credentialEntry: Credential | null; +} diff --git a/src/cli/commands/import/__tests__/import-gateway-flow.test.ts b/src/cli/commands/import/__tests__/import-gateway-flow.test.ts index 153ead77a..02628376d 100644 --- a/src/cli/commands/import/__tests__/import-gateway-flow.test.ts +++ b/src/cli/commands/import/__tests__/import-gateway-flow.test.ts @@ -176,12 +176,27 @@ function setupCommonMocks() { // ── Tests ──────────────────────────────────────────────────────────────────── describe('handleImportGateway', () => { + let originalAwsRegion: string | undefined; + let originalAwsDefaultRegion: string | undefined; + beforeEach(() => { + originalAwsRegion = process.env.AWS_REGION; + originalAwsDefaultRegion = process.env.AWS_DEFAULT_REGION; + process.env.AWS_REGION = REGION; + delete process.env.AWS_DEFAULT_REGION; vi.clearAllMocks(); setupCommonMocks(); }); afterEach(() => { + if (originalAwsRegion !== undefined) { + process.env.AWS_REGION = originalAwsRegion; + } else { + delete process.env.AWS_REGION; + } + if (originalAwsDefaultRegion !== undefined) { + process.env.AWS_DEFAULT_REGION = originalAwsDefaultRegion; + } vi.restoreAllMocks(); }); diff --git a/src/cli/commands/import/__tests__/import-gateway-spec.test.ts b/src/cli/commands/import/__tests__/import-gateway-spec.test.ts index 7c2963edf..2df382f8c 100644 --- a/src/cli/commands/import/__tests__/import-gateway-spec.test.ts +++ b/src/cli/commands/import/__tests__/import-gateway-spec.test.ts @@ -35,7 +35,7 @@ const emptyTargets: AgentCoreGatewayTarget[] = []; describe('toGatewaySpec – authorizer type mapping', () => { it('NONE authorizerType: no authorizerConfiguration in output', () => { const gw = makeGateway({ authorizerType: 'NONE' }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.authorizerType).toBe('NONE'); expect(result).not.toHaveProperty('authorizerConfiguration'); @@ -43,7 +43,7 @@ describe('toGatewaySpec – authorizer type mapping', () => { it('AWS_IAM authorizerType: maps to AWS_IAM, no authorizerConfiguration', () => { const gw = makeGateway({ authorizerType: 'AWS_IAM' }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.authorizerType).toBe('AWS_IAM'); expect(result).not.toHaveProperty('authorizerConfiguration'); @@ -61,7 +61,7 @@ describe('toGatewaySpec – authorizer type mapping', () => { }, }, }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.authorizerType).toBe('CUSTOM_JWT'); expect(result.authorizerConfiguration).toBeDefined(); @@ -100,7 +100,7 @@ describe('toGatewaySpec – authorizer type mapping', () => { }, }, }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); const claims = result.authorizerConfiguration!.customJwtAuthorizer!.customClaims!; expect(claims).toHaveLength(2); @@ -130,7 +130,7 @@ describe('toGatewaySpec – authorizer type mapping', () => { }, }, }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); const jwt = result.authorizerConfiguration!.customJwtAuthorizer!; expect(jwt).not.toHaveProperty('allowedAudience'); @@ -141,9 +141,8 @@ describe('toGatewaySpec – authorizer type mapping', () => { it('missing authorizerType: defaults to NONE', () => { const gw = makeGateway(); // Simulate undefined authorizerType by deleting after construction - // eslint-disable-next-line @typescript-eslint/no-explicit-any delete (gw as any).authorizerType; - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.authorizerType).toBe('NONE'); expect(result).not.toHaveProperty('authorizerConfiguration'); @@ -159,7 +158,7 @@ describe('toGatewaySpec – semantic search', () => { const gw = makeGateway({ protocolConfiguration: { mcp: { searchType: 'SEMANTIC' } }, }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.enableSemanticSearch).toBe(true); }); @@ -168,14 +167,14 @@ describe('toGatewaySpec – semantic search', () => { const gw = makeGateway({ protocolConfiguration: { mcp: { searchType: 'KEYWORD' } }, }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.enableSemanticSearch).toBe(false); }); it('protocolConfiguration missing: enableSemanticSearch is false', () => { const gw = makeGateway(); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.enableSemanticSearch).toBe(false); }); @@ -188,21 +187,21 @@ describe('toGatewaySpec – semantic search', () => { describe('toGatewaySpec – exception level', () => { it('exceptionLevel=DEBUG: maps to DEBUG', () => { const gw = makeGateway({ exceptionLevel: 'DEBUG' }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.exceptionLevel).toBe('DEBUG'); }); it('exceptionLevel undefined: maps to NONE', () => { const gw = makeGateway({ exceptionLevel: undefined }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.exceptionLevel).toBe('NONE'); }); it('exceptionLevel other value: maps to NONE', () => { const gw = makeGateway({ exceptionLevel: 'VERBOSE' }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.exceptionLevel).toBe('NONE'); }); @@ -220,7 +219,7 @@ describe('toGatewaySpec – policy engine', () => { mode: 'ENFORCE', }, }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.policyEngineConfiguration).toBeDefined(); expect(result.policyEngineConfiguration!.policyEngineName).toBe('my_policy_engine'); @@ -229,7 +228,7 @@ describe('toGatewaySpec – policy engine', () => { it('policyEngineConfiguration absent: field omitted', () => { const gw = makeGateway(); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result).not.toHaveProperty('policyEngineConfiguration'); }); @@ -242,7 +241,7 @@ describe('toGatewaySpec – policy engine', () => { describe('toGatewaySpec – other fields', () => { it('resourceName is always set to gateway.name', () => { const gw = makeGateway({ name: 'AwsGatewayName' }); - const result = toGatewaySpec(gw, emptyTargets, 'local_name'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'local_name' }); expect(result.resourceName).toBe('AwsGatewayName'); expect(result.name).toBe('local_name'); @@ -250,49 +249,49 @@ describe('toGatewaySpec – other fields', () => { it('description present: included in output', () => { const gw = makeGateway({ description: 'My gateway description' }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.description).toBe('My gateway description'); }); it('description undefined: omitted from output', () => { const gw = makeGateway({ description: undefined }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result).not.toHaveProperty('description'); }); it('tags present with entries: included in output', () => { const gw = makeGateway({ tags: { env: 'prod', team: 'platform' } }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.tags).toEqual({ env: 'prod', team: 'platform' }); }); it('tags empty object: omitted from output', () => { const gw = makeGateway({ tags: {} }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result).not.toHaveProperty('tags'); }); it('tags undefined: omitted from output', () => { const gw = makeGateway({ tags: undefined }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result).not.toHaveProperty('tags'); }); it('executionRoleArn: mapped from gateway.roleArn', () => { const gw = makeGateway({ roleArn: 'arn:aws:iam::123456789012:role/GatewayRole' }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result.executionRoleArn).toBe('arn:aws:iam::123456789012:role/GatewayRole'); }); it('roleArn undefined: executionRoleArn omitted from output', () => { const gw = makeGateway({ roleArn: undefined }); - const result = toGatewaySpec(gw, emptyTargets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets: emptyTargets, localName: 'my_gw' }); expect(result).not.toHaveProperty('executionRoleArn'); }); @@ -302,7 +301,7 @@ describe('toGatewaySpec – other fields', () => { { name: 'target1', targetType: 'mcpServer', endpoint: 'https://mcp.example.com' }, ]; const gw = makeGateway(); - const result = toGatewaySpec(gw, targets, 'my_gw'); + const result = toGatewaySpec({ gateway: gw, targets, localName: 'my_gw' }); expect(result.targets).toBe(targets); expect(result.targets).toHaveLength(1); diff --git a/src/cli/commands/import/__tests__/import-gateway-targets.test.ts b/src/cli/commands/import/__tests__/import-gateway-targets.test.ts deleted file mode 100644 index 98284c297..000000000 --- a/src/cli/commands/import/__tests__/import-gateway-targets.test.ts +++ /dev/null @@ -1,362 +0,0 @@ -/** - * Import Gateway Target Mapping Unit Tests - * - * Covers toGatewayTargetSpec for non-mcpServer target types: - * - apiGateway: toolFilters, toolOverrides, outboundAuth - * - openApiSchema: S3 URI mapping, missing URI warning - * - smithyModel: S3 URI mapping, missing URI warning - * - lambda: lambdaFunctionArn mapping, missing ARN, inline-only schema - * - Unrecognized target type - */ -import type { GatewayTargetDetail } from '../../../aws/agentcore-control'; -import { toGatewayTargetSpec } from '../import-gateway'; -import { assert, describe, expect, it, vi } from 'vitest'; - -/** Helper to build a minimal GatewayTargetDetail with only the fields under test. */ -function baseDetail(overrides: Partial = {}): GatewayTargetDetail { - return { - targetId: 'tgt-001', - name: 'test_target', - status: 'READY', - ...overrides, - }; -} - -// ============================================================================ -// apiGateway target -// ============================================================================ - -describe('toGatewayTargetSpec — apiGateway', () => { - it('maps restApiId, stage, and toolFilters correctly', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - apiGateway: { - restApiId: 'abc123', - stage: 'prod', - apiGatewayToolConfiguration: { - toolFilters: [ - { filterPath: '/pets', methods: ['GET', 'POST'] }, - { filterPath: '/users', methods: ['GET'] }, - ], - }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target!.name).toBe('test_target'); - expect(result.target!.targetType).toBe('apiGateway'); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const apigw = (result.target as any).apiGateway; - expect(apigw.restApiId).toBe('abc123'); - expect(apigw.stage).toBe('prod'); - expect(apigw.apiGatewayToolConfiguration.toolFilters).toEqual([ - { filterPath: '/pets', methods: ['GET', 'POST'] }, - { filterPath: '/users', methods: ['GET'] }, - ]); - }); - - it('maps toolOverrides when present', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - apiGateway: { - restApiId: 'abc123', - stage: 'prod', - apiGatewayToolConfiguration: { - toolFilters: [], - toolOverrides: [ - { name: 'listPets', path: '/pets', method: 'GET', description: 'List all pets' }, - { name: 'createPet', path: '/pets', method: 'POST' }, - ], - }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const apigw = (result.target as any).apiGateway; - expect(apigw.apiGatewayToolConfiguration.toolOverrides).toEqual([ - { name: 'listPets', path: '/pets', method: 'GET', description: 'List all pets' }, - { name: 'createPet', path: '/pets', method: 'POST' }, - ]); - }); - - it('omits toolOverrides when not present', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - apiGateway: { - restApiId: 'abc123', - stage: 'prod', - apiGatewayToolConfiguration: { - toolFilters: [{ filterPath: '/pets', methods: ['GET'] }], - }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const apigw = (result.target as any).apiGateway; - expect(apigw.apiGatewayToolConfiguration.toolOverrides).toBeUndefined(); - }); - - it('returns outboundAuth when OAuth credential is configured', () => { - const providerArn = 'arn:aws:bedrock-agentcore:us-west-2:123456789012:credential-provider/cred-001'; - const detail = baseDetail({ - targetConfiguration: { - mcp: { - apiGateway: { - restApiId: 'abc123', - stage: 'prod', - apiGatewayToolConfiguration: { toolFilters: [] }, - }, - }, - }, - credentialProviderConfigurations: [ - { - credentialProviderType: 'OAUTH', - credentialProvider: { - oauthCredentialProvider: { - providerArn, - scopes: ['read', 'write'], - }, - }, - }, - ], - }); - - const credentials = new Map([[providerArn, 'my_oauth_cred']]); - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, credentials, onProgress); - - assert(result.success); - expect(result.target!.outboundAuth).toEqual({ - type: 'OAUTH', - credentialName: 'my_oauth_cred', - scopes: ['read', 'write'], - }); - }); -}); - -// ============================================================================ -// openApiSchema target -// ============================================================================ - -describe('toGatewayTargetSpec — openApiSchema', () => { - it('maps S3 URI and bucketOwnerAccountId correctly', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - openApiSchema: { - s3: { uri: 's3://my-bucket/schema.yaml', bucketOwnerAccountId: '123456789012' }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target!.name).toBe('test_target'); - expect(result.target!.targetType).toBe('openApiSchema'); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const schemaSource = (result.target as any).schemaSource; - expect(schemaSource.s3.uri).toBe('s3://my-bucket/schema.yaml'); - expect(schemaSource.s3.bucketOwnerAccountId).toBe('123456789012'); - }); - - it('returns undefined and emits warning when S3 URI is missing', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - openApiSchema: { inlinePayload: '{"openapi":"3.0.0"}' }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target).toBeUndefined(); - expect(onProgress).toHaveBeenCalledWith(expect.stringContaining('(openApiSchema) has no S3 URI, skipping')); - }); -}); - -// ============================================================================ -// smithyModel target -// ============================================================================ - -describe('toGatewayTargetSpec — smithyModel', () => { - it('maps S3 URI correctly', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - smithyModel: { - s3: { uri: 's3://models-bucket/model.json' }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target!.name).toBe('test_target'); - expect(result.target!.targetType).toBe('smithyModel'); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const schemaSource = (result.target as any).schemaSource; - expect(schemaSource.s3.uri).toBe('s3://models-bucket/model.json'); - expect(schemaSource.s3.bucketOwnerAccountId).toBeUndefined(); - }); - - it('returns undefined and emits warning when S3 URI is missing', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - smithyModel: { inlinePayload: '{"smithy":"1.0"}' }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target).toBeUndefined(); - expect(onProgress).toHaveBeenCalledWith(expect.stringContaining('(smithyModel) has no S3 URI, skipping')); - }); -}); - -// ============================================================================ -// lambda target -// ============================================================================ - -describe('toGatewayTargetSpec — lambda', () => { - it('maps lambda with S3 tool schema to lambdaFunctionArn type', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - lambda: { - lambdaArn: 'arn:aws:lambda:us-west-2:123456789012:function:my-func', - toolSchema: { s3: { uri: 's3://schemas/tools.json' } }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target!.name).toBe('test_target'); - expect(result.target!.targetType).toBe('lambdaFunctionArn'); - - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const lambdaConfig = (result.target as any).lambdaFunctionArn; - expect(lambdaConfig.lambdaArn).toBe('arn:aws:lambda:us-west-2:123456789012:function:my-func'); - expect(lambdaConfig.toolSchemaFile).toBe('s3://schemas/tools.json'); - }); - - it('returns undefined and emits warning when lambdaArn is missing', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - lambda: { - lambdaArn: '', - toolSchema: { s3: { uri: 's3://schemas/tools.json' } }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target).toBeUndefined(); - expect(onProgress).toHaveBeenCalledWith(expect.stringContaining('(lambda) has no ARN, skipping')); - }); - - it('returns undefined and emits warning when lambda has inline schema only', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - lambda: { - lambdaArn: 'arn:aws:lambda:us-west-2:123456789012:function:my-func', - toolSchema: { inlinePayload: '{"tools":[]}' }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target).toBeUndefined(); - expect(onProgress).toHaveBeenCalledWith( - expect.stringContaining('has inline tool schema, which cannot be imported') - ); - }); - - it('emits progress message for successful lambda mapping', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: { - lambda: { - lambdaArn: 'arn:aws:lambda:us-west-2:123456789012:function:my-func', - toolSchema: { s3: { uri: 's3://schemas/tools.json' } }, - }, - }, - }, - }); - - const onProgress = vi.fn(); - toGatewayTargetSpec(detail, new Map(), onProgress); - - expect(onProgress).toHaveBeenCalledWith(expect.stringContaining('Mapping compute-backed Lambda target')); - }); -}); - -// ============================================================================ -// Unrecognized target type -// ============================================================================ - -describe('toGatewayTargetSpec — unrecognized target type', () => { - it('returns undefined and emits warning when no known mcp type matches', () => { - const detail = baseDetail({ - targetConfiguration: { - mcp: {}, - }, - }); - - const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, new Map(), onProgress); - - assert(result.success); - expect(result.target).toBeUndefined(); - expect(onProgress).toHaveBeenCalledWith(expect.stringContaining('unrecognized target type')); - }); -}); diff --git a/src/cli/commands/import/__tests__/import-gateway.test.ts b/src/cli/commands/import/__tests__/import-gateway.test.ts index eb7c67dd6..837993428 100644 --- a/src/cli/commands/import/__tests__/import-gateway.test.ts +++ b/src/cli/commands/import/__tests__/import-gateway.test.ts @@ -6,7 +6,7 @@ import { _resolveOutboundAuth as resolveOutboundAuth, _toGatewayTargetSpec as toGatewayTargetSpec, } from '../import-gateway'; -import { assert, describe, expect, it, vi } from 'vitest'; +import { describe, expect, it, vi } from 'vitest'; // ============================================================================ // Helpers @@ -39,13 +39,12 @@ describe('toGatewayTargetSpec — mcpServer targets', () => { const result = toGatewayTargetSpec(detail, credentials, onProgress); - assert(result.success); - expect(result.target).toEqual({ + expect(result).toEqual({ name: 'my-mcp-target', targetType: 'mcpServer', endpoint: 'https://example.com/mcp', }); - expect(result.target).not.toHaveProperty('outboundAuth'); + expect(result).not.toHaveProperty('outboundAuth'); expect(onProgress).not.toHaveBeenCalled(); }); @@ -74,8 +73,7 @@ describe('toGatewayTargetSpec — mcpServer targets', () => { const result = toGatewayTargetSpec(detail, credentials, onProgress); - assert(result.success); - expect(result.target).toEqual({ + expect(result).toEqual({ name: 'my-mcp-target', targetType: 'mcpServer', endpoint: 'https://example.com/mcp', @@ -111,8 +109,7 @@ describe('toGatewayTargetSpec — mcpServer targets', () => { const result = toGatewayTargetSpec(detail, credentials, onProgress); - assert(result.success); - expect(result.target).toEqual({ + expect(result).toEqual({ name: 'my-mcp-target', targetType: 'mcpServer', endpoint: 'https://example.com/mcp', @@ -123,7 +120,7 @@ describe('toGatewayTargetSpec — mcpServer targets', () => { }); }); - it('returns failure when OAuth credential not found in project', () => { + it('throws when OAuth credential not found in project', () => { const providerArn = 'arn:aws:bedrock:us-east-1:123456789012:credential-provider/missing-oauth'; const detail = makeDetail({ targetConfiguration: { @@ -146,12 +143,12 @@ describe('toGatewayTargetSpec — mcpServer targets', () => { const credentials = new Map(); const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, credentials, onProgress); - assert(!result.success); - expect(result.error.message).toContain('uses an OAuth credential provider not found'); + expect(() => toGatewayTargetSpec(detail, credentials, onProgress)).toThrow( + 'uses an OAuth credential provider not found' + ); }); - it('returns failure when API_KEY credential not found in project', () => { + it('throws when API_KEY credential not found in project', () => { const providerArn = 'arn:aws:bedrock:us-east-1:123456789012:credential-provider/missing-apikey'; const detail = makeDetail({ targetConfiguration: { @@ -173,9 +170,9 @@ describe('toGatewayTargetSpec — mcpServer targets', () => { const credentials = new Map(); const onProgress = vi.fn(); - const result = toGatewayTargetSpec(detail, credentials, onProgress); - assert(!result.success); - expect(result.error.message).toContain('uses an API Key credential provider not found'); + expect(() => toGatewayTargetSpec(detail, credentials, onProgress)).toThrow( + 'uses an API Key credential provider not found' + ); }); it('returns undefined and warns when target has no MCP configuration', () => { @@ -187,9 +184,8 @@ describe('toGatewayTargetSpec — mcpServer targets', () => { const result = toGatewayTargetSpec(detail, credentials, onProgress); - assert(result.success); - expect(result.target).toBeUndefined(); - expect(onProgress).toHaveBeenCalledWith(expect.stringContaining('no MCP configuration')); + expect(result).toBeUndefined(); + expect(onProgress).toHaveBeenCalledWith(expect.stringContaining('no MCP or HTTP configuration')); }); }); @@ -218,8 +214,7 @@ describe('resolveOutboundAuth — scopes handling', () => { const result = resolveOutboundAuth(detail, credentials, onProgress); - assert(result.success); - expect(result.auth).toEqual({ + expect(result).toEqual({ type: 'OAUTH', credentialName: 'scoped-cred', scopes: ['openid', 'profile', 'email'], @@ -246,11 +241,10 @@ describe('resolveOutboundAuth — scopes handling', () => { const result = resolveOutboundAuth(detail, credentials, onProgress); - assert(result.success); - expect(result.auth).toEqual({ + expect(result).toEqual({ type: 'OAUTH', credentialName: 'no-scope-cred', }); - expect(result.auth).not.toHaveProperty('scopes'); + expect(result).not.toHaveProperty('scopes'); }); }); diff --git a/src/cli/commands/import/__tests__/import-no-deploy.test.ts b/src/cli/commands/import/__tests__/import-no-deploy.test.ts index 001c551be..1bf8d5ebc 100644 --- a/src/cli/commands/import/__tests__/import-no-deploy.test.ts +++ b/src/cli/commands/import/__tests__/import-no-deploy.test.ts @@ -464,6 +464,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }) ); @@ -482,6 +483,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -508,6 +510,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -532,6 +535,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -553,6 +557,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -572,6 +577,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -590,6 +596,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -609,6 +616,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -627,6 +635,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -644,6 +653,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -680,6 +690,7 @@ describe('handleImport: target resolution with null account/region', () => { version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }) ); @@ -722,6 +733,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -768,6 +780,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -812,6 +825,7 @@ agents: version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); diff --git a/src/cli/commands/import/__tests__/import-online-eval.test.ts b/src/cli/commands/import/__tests__/import-online-eval.test.ts index f3b454494..f98eb71ff 100644 --- a/src/cli/commands/import/__tests__/import-online-eval.test.ts +++ b/src/cli/commands/import/__tests__/import-online-eval.test.ts @@ -144,8 +144,8 @@ describe('toOnlineEvalConfigSpec', () => { assert(result.success); expect(result.config.evaluators).toHaveLength(2); - expect(result.config.evaluators[0]).toBe('local_eval'); - expect(result.config.evaluators[1]).toMatch(/^arn:/); + expect(result.config.evaluators![0]).toBe('local_eval'); + expect(result.config.evaluators![1]).toMatch(/^arn:/); }); }); diff --git a/src/cli/commands/import/__tests__/import-runtime-handler.test.ts b/src/cli/commands/import/__tests__/import-runtime-handler.test.ts index d8926feb9..799663e38 100644 --- a/src/cli/commands/import/__tests__/import-runtime-handler.test.ts +++ b/src/cli/commands/import/__tests__/import-runtime-handler.test.ts @@ -116,6 +116,7 @@ const defaultProjectSpec = { version: 1, runtimes: [], memories: [], + knowledgeBases: [], evaluators: [], onlineEvalConfigs: [], }; diff --git a/src/cli/commands/import/__tests__/jwt-authorizer.test.ts b/src/cli/commands/import/__tests__/jwt-authorizer.test.ts index 1da1bb33a..7ef187ebf 100644 --- a/src/cli/commands/import/__tests__/jwt-authorizer.test.ts +++ b/src/cli/commands/import/__tests__/jwt-authorizer.test.ts @@ -250,7 +250,7 @@ describe('handleImport: JWT authorizer passthrough', () => { fs.mkdirSync(configDir, { recursive: true }); fs.writeFileSync( path.join(configDir, 'agentcore.json'), - JSON.stringify({ name: 'myproject', version: 1, runtimes: [], memories: [], credentials: [] }) + JSON.stringify({ name: 'myproject', version: 1, runtimes: [], memories: [], knowledgeBases: [], credentials: [] }) ); mockFindConfigRoot.mockReturnValue(configDir); }); @@ -265,6 +265,7 @@ describe('handleImport: JWT authorizer passthrough', () => { version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -293,6 +294,7 @@ describe('handleImport: JWT authorizer passthrough', () => { version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); @@ -315,6 +317,7 @@ describe('handleImport: JWT authorizer passthrough', () => { version: 1, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], }); mockWriteProjectSpec.mockResolvedValue(undefined); diff --git a/src/cli/commands/import/constants.ts b/src/cli/commands/import/constants.ts index 35149fead..6219d278c 100644 --- a/src/cli/commands/import/constants.ts +++ b/src/cli/commands/import/constants.ts @@ -11,6 +11,7 @@ export const CFN_RESOURCE_IDENTIFIERS: Record = { 'AWS::BedrockAgentCore::GatewayTarget': ['GatewayIdentifier', 'TargetId'], 'AWS::BedrockAgentCore::Evaluator': ['EvaluatorId'], 'AWS::BedrockAgentCore::OnlineEvaluationConfig': ['OnlineEvaluationConfigId'], + 'AWS::BedrockAgentCore::Harness': ['HarnessId'], }; /** @@ -31,6 +32,7 @@ export const PRIMARY_RESOURCE_TYPES = [ 'AWS::BedrockAgentCore::CodeInterpreterCustom', 'AWS::BedrockAgentCore::Policy', 'AWS::BedrockAgentCore::PolicyEngine', + 'AWS::BedrockAgentCore::Harness', ]; /** diff --git a/src/cli/commands/import/import-gateway.ts b/src/cli/commands/import/import-gateway.ts index d50f14fdd..163a6ede2 100644 --- a/src/cli/commands/import/import-gateway.ts +++ b/src/cli/commands/import/import-gateway.ts @@ -1,5 +1,4 @@ -import { ValidationError, toError } from '../../../lib'; -import { type Result, failureResult } from '../../../lib/result.js'; +import { toError } from '../../../lib'; import type { AgentCoreGateway, AgentCoreGatewayTarget, @@ -45,28 +44,50 @@ import type { Command } from '@commander-js/extra-typings'; function toGatewayTargetSpec( detail: GatewayTargetDetail, credentials: Map, - onProgress: (msg: string) => void -): Result<{ target: AgentCoreGatewayTarget | undefined }> { + onProgress: (msg: string) => void, + runtimeArnToName?: Map +): AgentCoreGatewayTarget | undefined { + // Handle HTTP runtime targets + const http = detail.targetConfiguration?.http; + if (http) { + const runtimeConfig = http.agentcoreRuntime ?? http.runtimeTargetConfiguration; + if (runtimeConfig) { + const managedName = runtimeArnToName?.get(runtimeConfig.runtimeArn); + if (!managedName) { + onProgress( + `Error: Target "${detail.name}" references runtime "${runtimeConfig.runtimeArn}" which is not managed by this project. ` + + `Import the runtime first: agentcore import runtime --arn ${runtimeConfig.runtimeArn}` + ); + return undefined; + } + return { + name: detail.name, + targetType: 'httpRuntime', + httpRuntime: { + runtime: managedName, + ...(runtimeConfig.qualifier && { runtimeEndpoint: runtimeConfig.qualifier }), + }, + }; + } + onProgress(`Warning: Target "${detail.name}" has HTTP configuration but no runtime, skipping`); + return undefined; + } + const mcp = detail.targetConfiguration?.mcp; if (!mcp) { - onProgress(`Warning: Target "${detail.name}" has no MCP configuration, skipping`); - return { success: true, target: undefined }; + onProgress(`Warning: Target "${detail.name}" has no MCP or HTTP configuration, skipping`); + return undefined; } - const authResult = resolveOutboundAuth(detail, credentials, onProgress); - if (!authResult.success) return authResult; - const outboundAuth = authResult.auth; + const outboundAuth = resolveOutboundAuth(detail, credentials, onProgress); // MCP Server (external endpoint) if (mcp.mcpServer) { return { - success: true, - target: { - name: detail.name, - targetType: 'mcpServer', - endpoint: mcp.mcpServer.endpoint, - ...(outboundAuth && { outboundAuth }), - }, + name: detail.name, + targetType: 'mcpServer', + endpoint: mcp.mcpServer.endpoint, + ...(outboundAuth && { outboundAuth }), }; } @@ -98,7 +119,7 @@ function toGatewayTargetSpec( ...(outboundAuth && { outboundAuth }), }; /* eslint-enable @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-assignment */ - return { success: true, target }; + return target; } // OpenAPI Schema @@ -106,22 +127,19 @@ function toGatewayTargetSpec( const schema = mcp.openApiSchema; if (schema.s3?.uri) { return { - success: true, - target: { - name: detail.name, - targetType: 'openApiSchema', - schemaSource: { - s3: { - uri: schema.s3.uri, - ...(schema.s3.bucketOwnerAccountId && { bucketOwnerAccountId: schema.s3.bucketOwnerAccountId }), - }, + name: detail.name, + targetType: 'openApiSchema', + schemaSource: { + s3: { + uri: schema.s3.uri, + ...(schema.s3.bucketOwnerAccountId && { bucketOwnerAccountId: schema.s3.bucketOwnerAccountId }), }, - ...(outboundAuth && { outboundAuth }), }, + ...(outboundAuth && { outboundAuth }), }; } onProgress(`Warning: Target "${detail.name}" (openApiSchema) has no S3 URI, skipping`); - return { success: true, target: undefined }; + return undefined; } // Smithy Model @@ -129,22 +147,19 @@ function toGatewayTargetSpec( const schema = mcp.smithyModel; if (schema.s3?.uri) { return { - success: true, - target: { - name: detail.name, - targetType: 'smithyModel', - schemaSource: { - s3: { - uri: schema.s3.uri, - ...(schema.s3.bucketOwnerAccountId && { bucketOwnerAccountId: schema.s3.bucketOwnerAccountId }), - }, + name: detail.name, + targetType: 'smithyModel', + schemaSource: { + s3: { + uri: schema.s3.uri, + ...(schema.s3.bucketOwnerAccountId && { bucketOwnerAccountId: schema.s3.bucketOwnerAccountId }), }, - ...(outboundAuth && { outboundAuth }), }, + ...(outboundAuth && { outboundAuth }), }; } onProgress(`Warning: Target "${detail.name}" (smithyModel) has no S3 URI, skipping`); - return { success: true, target: undefined }; + return undefined; } // Lambda (compute-backed) → map to lambdaFunctionArn @@ -152,7 +167,7 @@ function toGatewayTargetSpec( const lambdaArn = mcp.lambda.lambdaArn; if (!lambdaArn) { onProgress(`Warning: Target "${detail.name}" (lambda) has no ARN, skipping`); - return { success: true, target: undefined }; + return undefined; } // Extract tool schema S3 URI if available @@ -164,26 +179,23 @@ function toGatewayTargetSpec( if (s3Uri) { onProgress(`Mapping compute-backed Lambda target "${detail.name}" to lambdaFunctionArn type`); return { - success: true, - target: { - name: detail.name, - targetType: 'lambdaFunctionArn', - lambdaFunctionArn: { - lambdaArn, - toolSchemaFile: s3Uri, - }, - ...(outboundAuth && { outboundAuth }), + name: detail.name, + targetType: 'lambdaFunctionArn', + lambdaFunctionArn: { + lambdaArn, + toolSchemaFile: s3Uri, }, + ...(outboundAuth && { outboundAuth }), }; } // Lambda without S3 schema — can't import as lambdaFunctionArn since toolSchemaFile is required onProgress(`Warning: Target "${detail.name}" (lambda) has inline tool schema, which cannot be imported. Skipping.`); - return { success: true, target: undefined }; + return undefined; } onProgress(`Warning: Target "${detail.name}" has an unrecognized target type, skipping`); - return { success: true, target: undefined }; + return undefined; } /** @@ -193,9 +205,9 @@ function resolveOutboundAuth( detail: GatewayTargetDetail, credentials: Map, _onProgress: (msg: string) => void -): Result<{ auth: OutboundAuth | undefined }> { +): OutboundAuth | undefined { const configs = detail.credentialProviderConfigurations; - if (!configs || configs.length === 0) return { success: true, auth: undefined }; + if (!configs || configs.length === 0) return undefined; for (const config of configs) { if (config.credentialProviderType === 'OAUTH' && config.credentialProvider?.oauthCredentialProvider) { @@ -203,21 +215,16 @@ function resolveOutboundAuth( const credentialName = credentials.get(providerArn); if (credentialName) { return { - success: true, - auth: { - type: 'OAUTH', - credentialName, - ...(config.credentialProvider.oauthCredentialProvider.scopes?.length && { - scopes: config.credentialProvider.oauthCredentialProvider.scopes, - }), - }, + type: 'OAUTH', + credentialName, + ...(config.credentialProvider.oauthCredentialProvider.scopes?.length && { + scopes: config.credentialProvider.oauthCredentialProvider.scopes, + }), }; } - return failureResult( - new ValidationError( - `Target "${detail.name}" uses an OAuth credential provider not found in this project's deployed state. ` + - 'Import the credential first with `agentcore add credential` and re-run.' - ) + throw new Error( + `Target "${detail.name}" uses an OAuth credential provider not found in this project's deployed state. ` + + 'Import the credential first with `agentcore add credential` and re-run.' ); } @@ -225,31 +232,30 @@ function resolveOutboundAuth( const providerArn = config.credentialProvider.apiKeyCredentialProvider.providerArn; const credentialName = credentials.get(providerArn); if (credentialName) { - return { success: true, auth: { type: 'API_KEY', credentialName } }; + return { type: 'API_KEY', credentialName }; } - return failureResult( - new ValidationError( - `Target "${detail.name}" uses an API Key credential provider not found in this project's deployed state. ` + - 'Import the credential first with `agentcore add credential` and re-run.' - ) + throw new Error( + `Target "${detail.name}" uses an API Key credential provider not found in this project's deployed state. ` + + 'Import the credential first with `agentcore add credential` and re-run.' ); } // GATEWAY_IAM_ROLE — no outbound auth needed } - return { success: true, auth: undefined }; + return undefined; } /** * Map GetGateway + GetGatewayTarget[] responses to CLI AgentCoreGateway schema. * @internal */ -export function toGatewaySpec( - gateway: GatewayDetail, - targets: AgentCoreGatewayTarget[], - localName: string -): AgentCoreGateway { +export function toGatewaySpec(options: { + gateway: GatewayDetail; + targets: AgentCoreGatewayTarget[]; + localName: string; +}): AgentCoreGateway { + const { gateway, targets, localName } = options; const authorizerType = (gateway.authorizerType ?? 'NONE') as GatewayAuthorizerType; let authorizerConfiguration: AuthorizerConfig | undefined; @@ -287,6 +293,9 @@ export function toGatewaySpec( }; } + // Service returns protocolType 'MCP' or null. Null = non-MCP gateway. + const protocolType = gateway.protocolType === 'MCP' ? 'MCP' : 'None'; + const enableSemanticSearch = gateway.protocolConfiguration?.mcp?.searchType === 'SEMANTIC'; const exceptionLevel: GatewayExceptionLevel = gateway.exceptionLevel === 'DEBUG' ? 'DEBUG' : 'NONE'; @@ -304,6 +313,7 @@ export function toGatewaySpec( return { name: localName, resourceName: gateway.name, + ...(protocolType === 'None' && { protocolType: 'None' as const }), ...(gateway.description && { description: gateway.description }), targets, authorizerType, @@ -489,23 +499,47 @@ export async function handleImportGateway(options: ImportResourceOptions): Promi logger.startStep('Map gateway to project schema'); const credentialArnMap = await buildCredentialArnMap(ctx.configIO, targetName); + // Build reverse lookup: runtime ARN → local name (for managed target detection) + const runtimeArnToName = new Map(); + try { + const state = await ctx.configIO.readDeployedState(); + const deployedTarget = state?.targets?.[targetName]; + if (deployedTarget?.resources?.runtimes) { + for (const [name, rt] of Object.entries(deployedTarget.resources.runtimes)) { + runtimeArnToName.set(rt.runtimeArn, name); + } + } + } catch { + // No deployed state — httpRuntime targets without a matching runtime will be skipped + } + const mappedTargets: AgentCoreGatewayTarget[] = []; + const unmanagedRuntimeArns: string[] = []; for (const td of targetDetails) { - const mapped = toGatewayTargetSpec(td, credentialArnMap, onProgress); - if (!mapped.success) { - logger.endStep('error', mapped.error.message); - logger.finalize(false); - return { - ...mapped, - resourceType: 'gateway' as const, - resourceName: td.name, - logPath: logger.getRelativeLogPath(), - }; + const http = td.targetConfiguration?.http; + const runtimeConfig = http?.agentcoreRuntime ?? http?.runtimeTargetConfiguration; + if (http && runtimeConfig && !runtimeArnToName.has(runtimeConfig.runtimeArn)) { + unmanagedRuntimeArns.push(runtimeConfig.runtimeArn); + } + const mapped = toGatewayTargetSpec(td, credentialArnMap, onProgress, runtimeArnToName); + if (mapped) { + mappedTargets.push(mapped); } - if (mapped.target) mappedTargets.push(mapped.target); } - const gatewaySpec = toGatewaySpec(gatewayDetail, mappedTargets, localName); + if (unmanagedRuntimeArns.length > 0) { + const arns = [...new Set(unmanagedRuntimeArns)]; + const importCmds = arns.map(arn => ` agentcore import runtime --arn ${arn}`).join('\n'); + return failResult( + logger, + `Gateway has ${unmanagedRuntimeArns.length} httpRuntime target(s) referencing runtimes not managed by this project.\n` + + `Import the runtime(s) first:\n${importCmds}\n\nThen retry: agentcore import gateway --arn ${options.arn ?? gatewayDetail.gatewayArn}`, + 'gateway', + localName + ); + } + + const gatewaySpec = toGatewaySpec({ gateway: gatewayDetail, targets: mappedTargets, localName }); onProgress(`Mapped gateway with ${mappedTargets.length} target(s)`); if (mappedTargets.length < targetDetails.length) { onProgress( diff --git a/src/cli/commands/invoke/__tests__/action-gateway.test.ts b/src/cli/commands/invoke/__tests__/action-gateway.test.ts new file mode 100644 index 000000000..4cd4f2dab --- /dev/null +++ b/src/cli/commands/invoke/__tests__/action-gateway.test.ts @@ -0,0 +1,98 @@ +import type { InvokeContext } from '../action'; +import { handleInvoke } from '../action'; +import type { InvokeOptions } from '../types'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// --------------------------------------------------------------------------- +// Gateway invoke output parsing. +// +// HTTP gateways stream back the same SSE envelope as a direct runtime invoke. +// Before this fix the gateway branch returned the raw HTTP body, so users saw +// `data: "..."` frames instead of clean text. This pins the wiring: the HTTP +// gateway response is run through parseSSE (mirroring invokeAgentRuntime). The +// parsing itself is covered by agentcore.test.ts — this guards that the gateway +// branch actually calls it rather than returning the body raw. +// --------------------------------------------------------------------------- + +vi.mock('../../../feature-flags', () => ({ + isPreviewEnabled: () => false, +})); + +vi.mock('../../../logging', () => ({ + InvokeLogger: class { + logFilePath = '/tmp/fake.log'; + logPrompt = vi.fn(); + logResponse = vi.fn(); + logError = vi.fn(); + logInfo = vi.fn(); + }, +})); + +function makeContext(): InvokeContext { + const gatewayName = 'lolo-gateway'; + return { + project: { + agentCoreGateways: [ + { + name: gatewayName, + authorizerType: 'AWS_IAM', + targets: [{ name: 'lolo-target', targetType: 'httpRuntime' }], + }, + ], + runtimes: [], + } as unknown as InvokeContext['project'], + deployedState: { + targets: { + default: { + resources: { + gateways: { + [gatewayName]: { + gatewayId: 'gw-123', + gatewayUrl: 'https://gw-123.gateway.example.com', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:111122223333:gateway/gw-123', + }, + }, + }, + }, + }, + } as unknown as InvokeContext['deployedState'], + awsTargets: [{ name: 'default', region: 'us-east-1' }] as unknown as InvokeContext['awsTargets'], + }; +} + +describe('handleInvoke — gateway output parsing', () => { + beforeEach(() => { + // SigV4 signing pulls credentials from the provider chain; give it something. + process.env.AWS_ACCESS_KEY_ID = 'AKIAFAKE'; + process.env.AWS_SECRET_ACCESS_KEY = 'fakesecret'; + }); + + afterEach(() => { + vi.unstubAllGlobals(); + vi.clearAllMocks(); + delete process.env.AWS_ACCESS_KEY_ID; + delete process.env.AWS_SECRET_ACCESS_KEY; + }); + + it('parses multi-frame SSE into clean joined text (the lolo-gateway repro)', async () => { + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: true, + status: 200, + text: () => Promise.resolve('data: "Hello! How can I help you today"\n\ndata: "?"\n\n'), + }) + ); + + const options: InvokeOptions = { + gateway: 'lolo-gateway', + gatewayTarget: 'lolo-target', + targetName: 'default', + prompt: '{"message":"hello"}', + }; + const result = await handleInvoke(makeContext(), options); + + expect(result.success).toBe(true); + expect(result.response).toBe('Hello! How can I help you today?'); + }); +}); diff --git a/src/cli/commands/invoke/__tests__/build-harness-base-opts.test.ts b/src/cli/commands/invoke/__tests__/build-harness-base-opts.test.ts new file mode 100644 index 000000000..34bea7540 --- /dev/null +++ b/src/cli/commands/invoke/__tests__/build-harness-base-opts.test.ts @@ -0,0 +1,56 @@ +import { buildHarnessBaseOpts } from '../action.js'; +import type { InvokeOptions } from '../types.js'; +import { describe, expect, it } from 'vitest'; + +const base = { prompt: 'hi', targetName: 'default' } as InvokeOptions; + +describe('buildHarnessBaseOpts — model provider mapping', () => { + it('maps bedrock by default', () => { + const opts = buildHarnessBaseOpts({ ...base, modelProvider: 'bedrock', modelId: 'm' }); + expect(opts.model).toEqual({ bedrockModelConfig: { modelId: 'm' } }); + }); + + it('maps open_ai with apiKeyArn', () => { + const opts = buildHarnessBaseOpts({ ...base, modelProvider: 'open_ai', modelId: 'gpt', apiKeyArn: 'arn:key' }); + expect(opts.model).toEqual({ openAiModelConfig: { modelId: 'gpt', apiKeyArn: 'arn:key' } }); + }); + + it('maps gemini with apiKeyArn', () => { + const opts = buildHarnessBaseOpts({ ...base, modelProvider: 'gemini', modelId: 'g', apiKeyArn: 'arn:key' }); + expect(opts.model).toEqual({ geminiModelConfig: { modelId: 'g', apiKeyArn: 'arn:key' } }); + }); + + it('maps lite_llm with apiBase and additionalParams (apiKeyArn optional)', () => { + const opts = buildHarnessBaseOpts({ + ...base, + modelProvider: 'lite_llm', + modelId: 'anthropic/claude-sonnet-4-5', + apiBase: 'https://proxy.example.com/v1', + additionalParams: { reasoning_effort: 'high' }, + }); + expect(opts.model).toEqual({ + liteLlmModelConfig: { + modelId: 'anthropic/claude-sonnet-4-5', + apiBase: 'https://proxy.example.com/v1', + additionalParams: { reasoning_effort: 'high' }, + }, + }); + }); + + it('maps a keyless lite_llm override from only apiBase', () => { + const opts = buildHarnessBaseOpts({ + ...base, + modelProvider: 'lite_llm', + modelId: 'ollama/llama3', + apiBase: 'http://localhost:11434', + }); + expect(opts.model).toEqual({ + liteLlmModelConfig: { modelId: 'ollama/llama3', apiBase: 'http://localhost:11434' }, + }); + }); + + it('falls back to the harness spec provider when no override is given', () => { + const opts = buildHarnessBaseOpts({ ...base, apiBase: 'https://proxy' }, { provider: 'lite_llm', modelId: 'm' }); + expect(opts.model).toEqual({ liteLlmModelConfig: { modelId: 'm', apiBase: 'https://proxy' } }); + }); +}); diff --git a/src/cli/commands/invoke/__tests__/command.test.ts b/src/cli/commands/invoke/__tests__/command.test.ts index 5da2cd9ba..cf6c8d7ed 100644 --- a/src/cli/commands/invoke/__tests__/command.test.ts +++ b/src/cli/commands/invoke/__tests__/command.test.ts @@ -1,4 +1,5 @@ // Tests for invoke CLI mode — exitCode propagation and flag validation +import { isPreviewEnabled } from '../../../feature-flags'; import { handleInvoke } from '../action.js'; import { registerInvoke } from '../command.js'; import { resolvePrompt } from '../resolve-prompt.js'; @@ -127,4 +128,28 @@ describe('invoke CLI mode — exitCode propagation', () => { expect(exitCodes[0]).toBe(1); }); + + it('emits a JSON error envelope (not console.error) for bad --additional-params in --json mode', async () => { + // --additional-params is preview-gated; enable preview for this case. + vi.mocked(isPreviewEnabled).mockReturnValue(true); + const logged: string[] = []; + const erred: string[] = []; + vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => logged.push(args.join(' '))); + vi.spyOn(console, 'error').mockImplementation((...args: unknown[]) => erred.push(args.join(' '))); + + const program = new Command(); + program.exitOverride(); + registerInvoke(program); + + await program + .parseAsync(['invoke', '--harness', 'h', '--additional-params', '{not json}', '--json', 'hi'], { from: 'user' }) + .catch(_noop); + + expect(exitCodes[0]).toBe(1); + const out = logged.join(''); + expect(out).toContain('"success":false'); + expect(out).toContain('--additional-params must be a valid JSON object'); + // In --json mode the error must NOT go to console.error (would break JSON consumers). + expect(erred.join('')).not.toContain('--additional-params'); + }); }); diff --git a/src/cli/commands/invoke/action.ts b/src/cli/commands/invoke/action.ts index 9c8584ca1..5b867eac5 100644 --- a/src/cli/commands/invoke/action.ts +++ b/src/cli/commands/invoke/action.ts @@ -4,6 +4,7 @@ import { DEFAULT_RUNTIME_USER_ID, buildAguiRunInput, executeBashCommand, + extractResult, getOrCreatePaymentSession, invokeA2ARuntime, invokeAgentRuntime, @@ -12,8 +13,10 @@ import { mcpCallTool, mcpInitSession, mcpListTools, + parseSSE, } from '../../aws'; import { invokeHarness } from '../../aws/agentcore-harness'; +import { dnsSuffix } from '../../aws/partition'; import { ANSI } from '../../constants'; import { isPreviewEnabled } from '../../feature-flags'; import { InvokeLogger } from '../../logging'; @@ -46,6 +49,196 @@ export async function loadInvokeConfig(configIO: ConfigIO = new ConfigIO()): Pro export async function handleInvoke(context: InvokeContext, options: InvokeOptions = {}): Promise { const { project, deployedState, awsTargets } = context; + // Gateway invoke: route through a deployed gateway + if (options.gateway) { + const targetNames = Object.keys(deployedState.targets); + if (targetNames.length === 0) { + return { + success: false, + error: new ResourceNotFoundError('No deployed targets found. Run `agentcore deploy` first.'), + }; + } + const gwSelectedTarget = options.targetName ?? targetNames[0]!; + const gwTargetState = deployedState.targets[gwSelectedTarget]; + const gwTargetConfig = awsTargets.find(t => t.name === gwSelectedTarget); + if (!gwTargetConfig) { + return { + success: false, + error: new ResourceNotFoundError(`Target config '${gwSelectedTarget}' not found in aws-targets`), + }; + } + const gwState = + gwTargetState?.resources?.gateways?.[options.gateway] ?? + gwTargetState?.resources?.mcp?.gateways?.[options.gateway]; + if (!gwState) { + return { + success: false, + error: new ResourceNotFoundError( + `Gateway '${options.gateway}' is not deployed. Run \`agentcore deploy\` first.` + ), + }; + } + + const gwSpec = project.agentCoreGateways?.find(g => g.name === options.gateway); + const isMcpGateway = gwSpec?.protocolType === 'MCP'; + + // Require bearer token for CUSTOM_JWT gateways + if (gwSpec?.authorizerType === 'CUSTOM_JWT' && !options.bearerToken) { + return { + success: false, + error: new ValidationError('Gateway requires --bearer-token (CUSTOM_JWT authorizer).'), + }; + } + + const region = gwTargetConfig.region; + const gatewayUrl = + gwState.gatewayUrl ?? + (() => { + const r = gwState.gatewayArn?.split(':')[3]; + return r ? `https://${gwState.gatewayId}.gateway.bedrock-agentcore.${r}.${dnsSuffix(r)}` : undefined; + })(); + + if (!gatewayUrl) { + return { success: false, error: new ValidationError('Could not determine gateway URL.') }; + } + + let invocationUrl: string; + let body: string; + + if (!isMcpGateway) { + // HTTP gateway: requires --target-name and --prompt + if (!options.gatewayTarget) { + const httpTargets = (gwSpec?.targets ?? []) + .filter((t: { targetType?: string }) => t.targetType === 'httpRuntime') + .map((t: { name: string }) => t.name); + return { + success: false, + error: new ValidationError( + `--target-name is required for HTTP gateways. Available targets: ${httpTargets.join(', ')}` + ), + }; + } + const targetSpec = gwSpec?.targets?.find( + (t: { name: string; targetType?: string }) => t.name === options.gatewayTarget + ); + if (!targetSpec) { + return { + success: false, + error: new ResourceNotFoundError( + `Target '${options.gatewayTarget}' not found on gateway '${options.gateway}'.` + ), + }; + } + if (targetSpec.targetType !== 'httpRuntime') { + return { + success: false, + error: new ValidationError(`Target '${options.gatewayTarget}' is not an httpRuntime target.`), + }; + } + invocationUrl = `${gatewayUrl}/${options.gatewayTarget}/invocations`; + if (!options.prompt) { + return { success: false, error: new ValidationError('--prompt is required for HTTP gateway invoke.') }; + } + // Wrap plain strings into {"prompt": "..."} so the body field matches both the + // agent template (reads payload.get("prompt")) and the guardrail form default + // data path (context.input.prompt). If the prompt is already valid JSON, pass + // it through as-is. + try { + JSON.parse(options.prompt); + body = options.prompt; + } catch { + body = JSON.stringify({ prompt: options.prompt }); + } + } else { + // MCP gateway: direct HTTP POST to gateway /mcp endpoint + if (!options.tool) { + return { + success: false, + error: new ValidationError('--tool is required for MCP gateway invoke.'), + }; + } + + invocationUrl = gatewayUrl.endsWith('/mcp') ? gatewayUrl : `${gatewayUrl}/mcp`; + body = JSON.stringify({ + jsonrpc: '2.0', + id: 1, + method: 'tools/call', + params: { + name: options.tool, + arguments: options.input ? (JSON.parse(options.input) as Record) : {}, + }, + }); + } + + const headers: Record = { + 'content-type': 'application/json', + ...(options.sessionId && { 'x-amz-bedrock-agentcore-session-id': options.sessionId }), + ...(isMcpGateway && { Accept: 'application/json, text/event-stream', 'Mcp-Protocol-Version': '2025-03-26' }), + }; + + let fetchHeaders = headers; + + if (gwSpec?.authorizerType === 'CUSTOM_JWT') { + // CUSTOM_JWT: use bearer token + fetchHeaders = { ...headers, authorization: `Bearer ${options.bearerToken}` }; + } else if (gwSpec?.authorizerType === 'AWS_IAM') { + // AWS_IAM: SigV4 sign the request + const { SignatureV4 } = await import('@smithy/signature-v4'); + const { Sha256 } = await import('@aws-crypto/sha256-js'); + const { fromNodeProviderChain } = await import('@aws-sdk/credential-providers'); + + const url = new URL(invocationUrl); + const signer = new SignatureV4({ + service: 'bedrock-agentcore', + region, + credentials: fromNodeProviderChain(), + sha256: Sha256, + }); + + const signed = await signer.sign({ + method: 'POST', + protocol: 'https:', + hostname: url.hostname, + path: url.pathname, + headers: { ...headers, host: url.hostname }, + body, + }); + + fetchHeaders = signed.headers as Record; + } + // NONE: no auth needed, use headers as-is + + const response = await fetch(invocationUrl, { + method: 'POST', + headers: fetchHeaders, + body, + }); + + if (!response.ok) { + const errText = await response.text(); + return { success: false, error: new Error(`Gateway invoke failed (${response.status}): ${errText}`) }; + } + + const responseText = await response.text(); + + // HTTP gateways stream back the same SSE / JSON envelope as a direct runtime + // invoke, so parse it the same way (mirrors invokeAgentRuntime) to render clean + // text instead of raw `data: "..."` frames. MCP gateways return JSON-RPC, which + // the caller renders as-is. + const responseBody = isMcpGateway + ? responseText + : responseText.includes('data: ') + ? parseSSE(responseText) + : extractResult(responseText); + + return { + success: true, + response: responseBody, + agentName: options.gatewayTarget ?? options.gateway, + targetName: gwSelectedTarget, + }; + } + // Preview: route to harness before runtime resolution if (isPreviewEnabled()) { const harnessEntries = project.harnesses ?? []; @@ -68,15 +261,15 @@ export async function handleInvoke(context: InvokeContext, options: InvokeOption ), }; } - const targetState = deployedState.targets[selectedTarget]; - const targetConfig = awsTargets.find(t => t.name === selectedTarget); - if (!targetConfig) { + const harnessTargetState = deployedState.targets[selectedTarget]; + const harnessTargetConfig = awsTargets.find(t => t.name === selectedTarget); + if (!harnessTargetConfig) { return { success: false, error: new ResourceNotFoundError(`Target config '${selectedTarget}' not found in aws-targets`), }; } - return handleHarnessInvoke(project, targetState, targetConfig, selectedTarget, options); + return handleHarnessInvoke(project, harnessTargetState, harnessTargetConfig, selectedTarget, options); } if (harnessEntries.length > 0 && project.runtimes.length > 0 && !options.agentName) { @@ -587,10 +780,12 @@ export function buildHarnessBaseOpts( harnessSpec?: Partial ): Partial { const baseOpts: Partial = {}; - if (options.modelId || options.modelProvider || options.apiKeyArn) { + if (options.modelId || options.modelProvider || options.apiKeyArn || options.apiBase || options.additionalParams) { const provider = options.modelProvider ?? harnessSpec?.provider; const modelId = options.modelId ?? harnessSpec?.modelId ?? ''; const apiKeyArn = options.apiKeyArn ?? harnessSpec?.apiKeyArn; + const apiBase = options.apiBase ?? harnessSpec?.apiBase; + const additionalParams = options.additionalParams ?? harnessSpec?.additionalParams; switch (provider) { case 'open_ai': baseOpts.model = { @@ -602,6 +797,16 @@ export function buildHarnessBaseOpts( geminiModelConfig: { modelId, ...(apiKeyArn && { apiKeyArn }) }, }; break; + case 'lite_llm': + baseOpts.model = { + liteLlmModelConfig: { + modelId, + ...(apiKeyArn && { apiKeyArn }), + ...(apiBase && { apiBase }), + ...(additionalParams && { additionalParams }), + }, + }; + break; default: baseOpts.model = { bedrockModelConfig: { modelId }, @@ -618,7 +823,14 @@ export function buildHarnessBaseOpts( if (options.maxIterations != null) baseOpts.maxIterations = options.maxIterations; if (options.maxTokens != null) baseOpts.maxTokens = options.maxTokens; if (options.harnessTimeout != null) baseOpts.timeoutSeconds = options.harnessTimeout; - if (options.skills) baseOpts.skills = options.skills.split(',').map(p => ({ path: p.trim() })); + if (options.skills) { + baseOpts.skills = options.skills.split(',').map(s => { + const trimmed = s.trim(); + if (trimmed.startsWith('s3://')) return { s3Uri: trimmed }; + if (trimmed.startsWith('https://')) return { gitUrl: trimmed }; + return { path: trimmed }; + }); + } if (options.systemPrompt) baseOpts.systemPrompt = [{ text: options.systemPrompt }]; if (options.allowedTools) baseOpts.allowedTools = options.allowedTools.split(',').map(t => t.trim()); if (options.actorId) baseOpts.actorId = options.actorId; diff --git a/src/cli/commands/invoke/command.tsx b/src/cli/commands/invoke/command.tsx index ae98d7159..42c9f697f 100644 --- a/src/cli/commands/invoke/command.tsx +++ b/src/cli/commands/invoke/command.tsx @@ -2,6 +2,7 @@ import { ValidationError, serializeResult } from '../../../lib'; import { COMMAND_DESCRIPTIONS } from '../../constants'; import { getErrorMessage } from '../../errors'; import { isPreviewEnabled } from '../../feature-flags'; +import { ADDITIONAL_PARAMS_JSON_ERROR } from '../../primitives/constants'; import { withCommandRunTelemetry } from '../../telemetry/cli-command-run.js'; import { renderTUI } from '../../tui'; import { requireProject, requireTTY } from '../../tui/guards'; @@ -139,6 +140,8 @@ export const registerInvoke = (program: Command) => { 'Read the prompt from a file (for long or structured payloads that exceed shell arg limits) [non-interactive]' ) .option('--runtime ', 'Select specific runtime [non-interactive]') + .option('--gateway ', 'Invoke through a gateway [non-interactive]') + .option('--gateway-target-name ', 'HTTP runtime target on the gateway [non-interactive]') .option('--target ', 'Select deployment target [non-interactive]') .option('--session-id ', 'Use specific session ID for conversation continuity') .option('--user-id ', 'User ID for runtime invocation (default: "default-user")') @@ -168,37 +171,32 @@ export const registerInvoke = (program: Command) => { if (isPreviewEnabled()) { invokeCmd - .option('--harness ', 'Select specific harness to invoke [non-interactive] [preview]') - .option('--harness-arn ', 'Invoke a harness by ARN (no project required) [non-interactive] [preview]') - .option( - '--region ', - 'AWS region (required with --harness-arn when no project) [non-interactive] [preview]' - ) - .option('--verbose', 'Print verbose streaming JSON events (harness only) [non-interactive] [preview]') - .option('--model-id ', 'Override model for this invocation (harness only) [non-interactive] [preview]') + .option('--harness ', 'Select specific harness to invoke [non-interactive]') + .option('--harness-arn ', 'Invoke a harness by ARN (no project required) [non-interactive]') + .option('--region ', 'AWS region (required with --harness-arn when no project) [non-interactive]') + .option('--verbose', 'Print verbose streaming JSON events (harness only) [non-interactive]') + .option('--model-id ', 'Override model for this invocation (harness only) [non-interactive]') .option( '--model-provider ', - 'Override model provider: bedrock, open_ai, gemini (harness only) [non-interactive] [preview]' - ) - .option( - '--api-key-arn ', - 'Override API key ARN for open_ai/gemini (harness only) [non-interactive] [preview]' + 'Override model provider: bedrock, open_ai, gemini, lite_llm (harness only) [non-interactive]' ) - .option('--tools ', 'Override tools, comma-separated (harness only) [non-interactive] [preview]') - .option('--max-iterations ', 'Override max iterations (harness only) [non-interactive] [preview]', parseInt) - .option('--max-tokens ', 'Override max tokens (harness only) [non-interactive] [preview]', parseInt) + .option('--api-key-arn ', 'Override API key ARN for open_ai/gemini (harness only) [non-interactive]') + .option('--api-base ', 'Override LiteLLM API base URL (harness only, lite_llm) [non-interactive]') .option( - '--harness-timeout ', - 'Override timeout seconds (harness only) [non-interactive] [preview]', - parseInt + '--additional-params ', + 'Override LiteLLM additional params as a JSON object (harness only, lite_llm) [non-interactive]' ) - .option('--skills ', 'Skills to use, comma-separated paths (harness only) [non-interactive] [preview]') - .option('--system-prompt ', 'Override system prompt (harness only) [non-interactive] [preview]') + .option('--tools ', 'Override tools, comma-separated (harness only) [non-interactive]') + .option('--max-iterations ', 'Override max iterations (harness only) [non-interactive]', parseInt) + .option('--max-tokens ', 'Override max tokens (harness only) [non-interactive]', parseInt) + .option('--harness-timeout ', 'Override timeout seconds (harness only) [non-interactive]', parseInt) .option( - '--allowed-tools ', - 'Override allowed tools, comma-separated (harness only) [non-interactive] [preview]' + '--skills ', + 'Skills override, comma-separated (path, s3://uri, or https://git-url). Git auth not supported here — configure via agentcore add skill [non-interactive]' ) - .option('--actor-id ', 'Override memory actor ID (harness only) [non-interactive] [preview]'); + .option('--system-prompt ', 'Override system prompt (harness only) [non-interactive]') + .option('--allowed-tools ', 'Override allowed tools, comma-separated (harness only) [non-interactive]') + .option('--actor-id ', 'Override memory actor ID (harness only) [non-interactive]'); } // Group the long flag list into labelled sections (mirrors `add ab-test`). @@ -271,13 +269,13 @@ MCP & Advanced [non-interactive] invokeCmd.addHelpText( 'after', ` -Harness [non-interactive] [preview] +Harness [non-interactive] --harness Select specific harness to invoke --harness-arn Invoke a harness by ARN (no project required) --region AWS region (required with --harness-arn) --verbose Print verbose streaming JSON events -Model & Runtime Overrides (harness only) [non-interactive] [preview] +Model & Runtime Overrides (harness only) [non-interactive] --model-id Override model --model-provider bedrock, open_ai, or gemini --api-key-arn API key ARN for open_ai/gemini @@ -300,6 +298,8 @@ Model & Runtime Overrides (harness only) [non-interactive] [preview] prompt?: string; promptFile?: string; runtime?: string; + gateway?: string; + gatewayTargetName?: string; target?: string; sessionId?: string; userId?: string; @@ -318,6 +318,8 @@ Model & Runtime Overrides (harness only) [non-interactive] [preview] modelId?: string; modelProvider?: string; apiKeyArn?: string; + apiBase?: string; + additionalParams?: string; tools?: string; maxIterations?: number; maxTokens?: number; @@ -366,8 +368,10 @@ Model & Runtime Overrides (harness only) [non-interactive] [preview] resolved.prompt !== undefined || cliOptions.json || cliOptions.target || + cliOptions.gatewayTargetName || cliOptions.stream || cliOptions.runtime || + cliOptions.gateway || cliOptions.tool || cliOptions.exec || cliOptions.bearerToken || @@ -404,9 +408,25 @@ Model & Runtime Overrides (harness only) [non-interactive] [preview] headers = parseHeaderFlags(cliOptions.header); } + let additionalParams: Record | undefined; + if (cliOptions.additionalParams) { + try { + additionalParams = JSON.parse(cliOptions.additionalParams) as Record; + } catch { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: ADDITIONAL_PARAMS_JSON_ERROR })); + } else { + console.error(ADDITIONAL_PARAMS_JSON_ERROR); + } + process.exit(1); + } + } + const options: InvokeOptions = { prompt: resolved.prompt, agentName: cliOptions.runtime, + gateway: cliOptions.gateway, + gatewayTarget: cliOptions.gatewayTargetName, targetName: cliOptions.target ?? 'default', sessionId: cliOptions.sessionId, userId: cliOptions.userId, @@ -425,6 +445,8 @@ Model & Runtime Overrides (harness only) [non-interactive] [preview] modelId: cliOptions.modelId, modelProvider: cliOptions.modelProvider, apiKeyArn: cliOptions.apiKeyArn, + apiBase: cliOptions.apiBase, + additionalParams, tools: cliOptions.tools, maxIterations: cliOptions.maxIterations, maxTokens: cliOptions.maxTokens, diff --git a/src/cli/commands/invoke/types.ts b/src/cli/commands/invoke/types.ts index d06364bfb..66367c019 100644 --- a/src/cli/commands/invoke/types.ts +++ b/src/cli/commands/invoke/types.ts @@ -5,6 +5,10 @@ export interface InvokeOptions { harnessName?: string; /** Direct harness ARN — bypasses project config and deployed state resolution */ harnessArn?: string; + /** Gateway name — invoke through a deployed gateway */ + gateway?: string; + /** Gateway target name (httpRuntime target on the gateway) */ + gatewayTarget?: string; /** AWS region (used with --harness-arn) */ region?: string; targetName?: string; @@ -31,10 +35,14 @@ export interface InvokeOptions { verbose?: boolean; /** Override model ID for this invocation (harness only) */ modelId?: string; - /** Override model provider for this invocation (harness only): bedrock, open_ai, gemini */ + /** Override model provider for this invocation (harness only): bedrock, open_ai, gemini, lite_llm */ modelProvider?: string; - /** Override API key ARN for this invocation (harness only, open_ai/gemini) */ + /** Override API key ARN for this invocation (harness only, open_ai/gemini; optional for lite_llm) */ apiKeyArn?: string; + /** Override LiteLLM API base URL for this invocation (harness only, lite_llm) */ + apiBase?: string; + /** Override LiteLLM additional params for this invocation (harness only, lite_llm; JSON object) */ + additionalParams?: Record; /** Override tools for this invocation (harness only, comma-separated) */ tools?: string; /** Override max iterations (harness only) */ diff --git a/src/cli/commands/logs/__tests__/action.test.ts b/src/cli/commands/logs/__tests__/action.test.ts index 06de367e1..d6f58d858 100644 --- a/src/cli/commands/logs/__tests__/action.test.ts +++ b/src/cli/commands/logs/__tests__/action.test.ts @@ -55,6 +55,7 @@ describe('resolveAgentContext', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -62,7 +63,6 @@ describe('resolveAgentContext', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -122,6 +122,7 @@ describe('resolveAgentContext', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -129,7 +130,6 @@ describe('resolveAgentContext', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -169,6 +169,7 @@ describe('resolveAgentContext', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -176,7 +177,6 @@ describe('resolveAgentContext', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -226,6 +226,7 @@ describe('resolveAgentContext', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -233,7 +234,6 @@ describe('resolveAgentContext', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], diff --git a/src/cli/commands/pause/__tests__/promote.test.ts b/src/cli/commands/pause/__tests__/promote.test.ts deleted file mode 100644 index 4b1ae200b..000000000 --- a/src/cli/commands/pause/__tests__/promote.test.ts +++ /dev/null @@ -1,59 +0,0 @@ -import { waitForRunningThenStop } from '../promote-utils.js'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -const mockGetABTest = vi.fn(); -const mockUpdateABTest = vi.fn(); - -vi.mock('../../../aws/agentcore-ab-tests', () => ({ - getABTest: (...args: unknown[]) => mockGetABTest(...args), - updateABTest: (...args: unknown[]) => mockUpdateABTest(...args), -})); - -describe('waitForRunningThenStop', () => { - beforeEach(() => { - vi.clearAllMocks(); - mockUpdateABTest.mockResolvedValue({ executionStatus: 'STOPPED' }); - }); - - it('stops immediately when already RUNNING', async () => { - mockGetABTest.mockResolvedValue({ executionStatus: 'RUNNING' }); - - await waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 3, 0); - - expect(mockGetABTest).toHaveBeenCalledTimes(1); - expect(mockUpdateABTest).toHaveBeenCalledWith({ - region: 'us-east-1', - abTestId: 'abt-123', - executionStatus: 'STOPPED', - }); - }); - - it('polls until RUNNING then stops', async () => { - mockGetABTest - .mockResolvedValueOnce({ executionStatus: 'UPDATING' }) - .mockResolvedValueOnce({ executionStatus: 'UPDATING' }) - .mockResolvedValueOnce({ executionStatus: 'RUNNING' }); - - await waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 5, 0); - - expect(mockGetABTest).toHaveBeenCalledTimes(3); - expect(mockUpdateABTest).toHaveBeenCalledOnce(); - }); - - it('throws if AB test never reaches RUNNING', async () => { - mockGetABTest.mockResolvedValue({ executionStatus: 'UPDATING' }); - - await expect(waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 3, 0)).rejects.toThrow( - 'did not reach RUNNING state' - ); - - expect(mockGetABTest).toHaveBeenCalledTimes(3); - expect(mockUpdateABTest).not.toHaveBeenCalled(); - }); - - it('includes current status in the error message', async () => { - mockGetABTest.mockResolvedValue({ executionStatus: 'STOPPED' }); - - await expect(waitForRunningThenStop('us-east-1', 'abt-123', 'MyTest', 2, 0)).rejects.toThrow('current: STOPPED'); - }); -}); diff --git a/src/cli/commands/pause/command.tsx b/src/cli/commands/pause/command.tsx index 5ef404ed7..d790fe191 100644 --- a/src/cli/commands/pause/command.tsx +++ b/src/cli/commands/pause/command.tsx @@ -1,13 +1,11 @@ import { ConfigIO, serializeResult } from '../../../lib'; -import { listABTests, updateABTest } from '../../aws/agentcore-ab-tests'; -import { stopBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; -import { COMMAND_DESCRIPTIONS } from '../../constants'; import { getErrorMessage } from '../../errors'; import { handlePauseResume } from '../../operations/eval'; import type { OnlineEvalActionOptions } from '../../operations/eval'; +import { createJobEngine } from '../../operations/jobs'; +import { runCliCommand } from '../../telemetry/cli-command-run'; +import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; import { requireProject } from '../../tui/guards'; -import { getRegion } from '../shared/region-utils'; -import { waitForRunningThenStop } from './promote-utils'; import type { Command } from '@commander-js/extra-typings'; import { Text, render } from 'ink'; import React from 'react'; @@ -72,176 +70,86 @@ function registerOnlineEvalSubcommand(parent: Command, action: 'pause' | 'resume }); } -async function resolveABTestId( - testName: string, - region: string -): Promise<{ abTestId: string; region: string; error?: string }> { - let projectName: string | undefined; - try { - const configIO = new ConfigIO(); - const deployedState = await configIO.readDeployedState(); - const awsTargets = await configIO.readAWSDeploymentTargets(); - - try { - const projectSpec = await configIO.readProjectSpec(); - projectName = projectSpec.name; - } catch { - // Project spec unavailable - } - - for (const [targetName, target] of Object.entries(deployedState.targets ?? {})) { - const abTests = target.resources?.abTests; - if (abTests?.[testName]) { - const targetConfig = awsTargets.find(t => t.name === targetName); - const resolvedRegion = targetConfig?.region ?? region; - return { abTestId: abTests[testName].abTestId, region: resolvedRegion }; - } - } - } catch { - // No deployed state - } - - try { - const result = await listABTests({ region, maxResults: 100 }); - // Match against both prefixed name ({projectName}_{testName}) and bare testName (backwards compat) - const prefixedName = projectName ? `${projectName}_${testName}` : undefined; - const match = - result.abTests.find(t => prefixedName != null && t.name === prefixedName) ?? - result.abTests.find(t => t.name === testName); - if (match) return { abTestId: match.abTestId, region }; - } catch { - // API call failed - } - - return { abTestId: '', region, error: `AB test "${testName}" not found in deployed state or API.` }; -} - function registerABTestSubcommand(parent: Command, action: 'pause' | 'resume') { - const executionStatus = action === 'pause' ? 'PAUSED' : 'RUNNING'; - const pastTense = action === 'pause' ? 'Paused' : 'Resumed'; + const verb = action === 'pause' ? 'Pause' : 'Resume'; parent .command('ab-test') - .description(`[preview] ${action === 'pause' ? 'Pause' : 'Resume'} a deployed A/B test`) - .argument('', 'AB test name') - .option('--region ', 'AWS region') + .description(`${verb} a running A/B test`) + .requiredOption('-i, --id ', 'A/B test ID') + .option('--region ', 'AWS region (auto-detected if omitted)') .option('--json', 'Output as JSON') - .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { - try { - const region = await getRegion(cliOptions.region); - const { abTestId, error } = await resolveABTestId(name, region); - if (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); - } - - const result = await updateABTest({ - region, - abTestId, - executionStatus, - }); - - if (cliOptions.json) { - console.log(JSON.stringify({ success: true, ...result })); - } else { - console.log(`${pastTense} AB test "${name}" (execution: ${result.executionStatus})`); + .action((cliOptions: { id: string; region?: string; json?: boolean }) => { + requireProject(); + + return runCliCommand(action === 'pause' ? 'pause.job' : 'resume.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const result = + action === 'pause' + ? await engine.pause('ab-test', cliOptions.id) + : await engine.resume('ab-test', cliOptions.id); + if (!result.success) { + throw result.error; } - process.exit(0); - } catch (error) { if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + console.log(JSON.stringify({ success: true, id: cliOptions.id })); } else { - console.error(`Error: ${getErrorMessage(error)}`); + console.log(`\n✓ A/B test ${cliOptions.id} ${action === 'pause' ? 'paused' : 'resumed'}.\n`); } - process.exit(1); - } + return { job_type: 'ab-test' }; + }); }); } -export const registerPause = (program: Command) => { - const pauseCmd = program.command('pause').description(COMMAND_DESCRIPTIONS.pause); - registerOnlineEvalSubcommand(pauseCmd, 'pause'); - registerABTestSubcommand(pauseCmd, 'pause'); -}; - -export const registerResume = (program: Command) => { - const resumeCmd = program.command('resume').description(COMMAND_DESCRIPTIONS.resume); - registerOnlineEvalSubcommand(resumeCmd, 'resume'); - registerABTestSubcommand(resumeCmd, 'resume'); -}; - -export const registerStop = (program: Command) => { - const stopCmd = program.command('stop').description('Stop resources'); +function registerOnlineInsightsSubcommand(parent: Command, action: 'pause' | 'resume') { + const description = + action === 'pause' + ? '[preview] Pause a deployed online insights config. Use --arn to target configs outside the project.' + : '[preview] Resume a paused online insights config. Use --arn to target configs outside the project.'; + const pastTense = action === 'pause' ? 'Paused' : 'Resumed'; - stopCmd - .command('ab-test') - .description('[preview] Stop a deployed A/B test permanently') - .argument('', 'AB test name') - .option('--region ', 'AWS region') + parent + .command('online-insights') + .description(description) + .argument('[name]', 'Config name from project (not needed with --arn)') + .option('--arn ', 'Online insights config ARN — operate without a project directory') + .option('--region ', 'AWS region override (auto-detected from ARN otherwise)') .option('--json', 'Output as JSON') - .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { - try { - const region = await getRegion(cliOptions.region); - const { abTestId, error } = await resolveABTestId(name, region); - if (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); - } - - const result = await updateABTest({ - region, - abTestId, - executionStatus: 'STOPPED', - }); - - if (cliOptions.json) { - console.log(JSON.stringify({ success: true, ...result })); - } else { - console.log(`Stopped AB test "${name}" (execution: ${result.executionStatus})`); - } - process.exit(0); - } catch (error) { + .action(async (name: string | undefined, cliOptions: { arn?: string; region?: string; json?: boolean }) => { + if (!cliOptions.arn && !name) { + const error = 'Either a config name or --arn is required'; if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + console.log(JSON.stringify({ success: false, error })); } else { - console.error(`Error: ${getErrorMessage(error)}`); + render({error}); } process.exit(1); } - }); - stopCmd - .command('batch-evaluation') - .description('[preview] Stop a running batch evaluation') - .requiredOption('-i, --id ', 'Batch evaluation ID to stop') - .option('--region ', 'AWS region (auto-detected if omitted)') - .option('--json', 'Output as JSON') - .action(async (cliOptions: { id: string; region?: string; json?: boolean }) => { - try { - const region = await getRegion(cliOptions.region); + if (!cliOptions.arn) { + requireProject(); + } + + const options: OnlineEvalActionOptions = { + name: name ?? '', + arn: cliOptions.arn, + region: cliOptions.region, + json: cliOptions.json, + }; - const result = await stopBatchEvaluation({ - region, - batchEvaluationId: cliOptions.id, - }); + try { + const result = await handlePauseResume(options, action); if (cliOptions.json) { - console.log(JSON.stringify({ success: true, ...result })); + console.log(JSON.stringify(serializeResult(result))); + } else if (result.success) { + const displayName = cliOptions.arn ? result.configId : name; + console.log(`${pastTense} online insights config "${displayName}" (status: ${result.executionStatus})`); } else { - console.log(`\nBatch evaluation stopped successfully`); - console.log(`ID: ${result.batchEvaluationId}`); - console.log(`Status: ${result.status}\n`); + render({result.error.message}); } - process.exit(0); + process.exit(result.success ? 0 : 1); } catch (error) { if (cliOptions.json) { console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); @@ -251,71 +159,18 @@ export const registerStop = (program: Command) => { process.exit(1); } }); -}; - -export const registerPromote = (program: Command) => { - const promoteCmd = program.command('promote').description('Promote resources'); - - promoteCmd - .command('ab-test') - .description('Promote the winning treatment of an A/B test') - .argument('', 'AB test name') - .option('--region ', 'AWS region') - .option('--json', 'Output as JSON') - .action(async (name: string, cliOptions: { region?: string; json?: boolean }) => { - try { - const region = await getRegion(cliOptions.region); - const { abTestId, error } = await resolveABTestId(name, region); - if (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); - } - - const result = await waitForRunningThenStop(region, abTestId, name); +} - // Apply promotion to agentcore.json - const { promoteABTestConfig } = await import('../../operations/ab-test/promote'); - let promoted = false; - let mode: string | undefined; - let promotionDetail = ''; - try { - const promoResult = await promoteABTestConfig(abTestId, name); - promoted = promoResult.promoted; - mode = promoResult.mode; - promotionDetail = promoResult.promotionDetail; - } catch { - // Config read/write failed - } +export const registerPause = (program: Command) => { + const pauseCmd = program.command('pause').description(COMMAND_DESCRIPTIONS.pause); + registerOnlineEvalSubcommand(pauseCmd, 'pause'); + registerOnlineInsightsSubcommand(pauseCmd, 'pause'); + registerABTestSubcommand(pauseCmd, 'pause'); +}; - if (cliOptions.json) { - console.log( - JSON.stringify({ - success: true, - ...result, - ...(mode && { mode }), - promoted, - ...(promotionDetail && { promotionDetail }), - }) - ); - } else { - console.log(`AB test "${name}" stopped.`); - if (promoted) { - console.log(`\n${promotionDetail}`); - console.log(`\nRun: agentcore deploy`); - } - } - process.exit(0); - } catch (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); - } else { - console.error(`Error: ${getErrorMessage(error)}`); - } - process.exit(1); - } - }); +export const registerResume = (program: Command) => { + const resumeCmd = program.command('resume').description(COMMAND_DESCRIPTIONS.resume); + registerOnlineEvalSubcommand(resumeCmd, 'resume'); + registerOnlineInsightsSubcommand(resumeCmd, 'resume'); + registerABTestSubcommand(resumeCmd, 'resume'); }; diff --git a/src/cli/commands/pause/index.ts b/src/cli/commands/pause/index.ts index 1bc38e3be..858054fd2 100644 --- a/src/cli/commands/pause/index.ts +++ b/src/cli/commands/pause/index.ts @@ -1 +1 @@ -export { registerPause, registerPromote } from './command'; +export { registerPause } from './command'; diff --git a/src/cli/commands/pause/promote-utils.ts b/src/cli/commands/pause/promote-utils.ts deleted file mode 100644 index 9bca03f8a..000000000 --- a/src/cli/commands/pause/promote-utils.ts +++ /dev/null @@ -1,28 +0,0 @@ -import { getABTest, updateABTest } from '../../aws/agentcore-ab-tests'; -import type { UpdateABTestResult } from '../../aws/agentcore-ab-tests'; - -/** - * Poll until the AB test reaches RUNNING status, then stop it. - * Throws if the test never reaches RUNNING within the allotted attempts. - */ -export async function waitForRunningThenStop( - region: string, - abTestId: string, - name: string, - maxAttempts = 12, - delayMs = 10_000 -): Promise { - let currentStatus: string | undefined; - for (let attempt = 0; attempt < maxAttempts; attempt++) { - const current = await getABTest({ region, abTestId }); - currentStatus = current.executionStatus; - if (currentStatus === 'RUNNING') break; - await new Promise(resolve => setTimeout(resolve, delayMs)); - } - if (currentStatus !== 'RUNNING') { - throw new Error( - `AB test "${name}" did not reach RUNNING state after waiting (current: ${currentStatus}). Cannot promote.` - ); - } - return updateABTest({ region, abTestId, executionStatus: 'STOPPED' }); -} diff --git a/src/cli/commands/promote/command.tsx b/src/cli/commands/promote/command.tsx new file mode 100644 index 000000000..d7ff6dcf4 --- /dev/null +++ b/src/cli/commands/promote/command.tsx @@ -0,0 +1,34 @@ +import { ConfigIO } from '../../../lib'; +import { createJobEngine } from '../../operations/jobs'; +import { runCliCommand } from '../../telemetry/cli-command-run'; +import { requireProject } from '../../tui/guards'; +import type { Command } from '@commander-js/extra-typings'; + +export const registerPromote = (program: Command) => { + const promoteCmd = program.command('promote').description('Promote resources'); + + promoteCmd + .command('ab-test') + .description('Promote the winning treatment of an A/B test (stops the test and updates agentcore.json)') + .requiredOption('-i, --id ', 'A/B test ID') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--json', 'Output as JSON') + .action((cliOptions: { id: string; region?: string; json?: boolean }) => { + requireProject(); + + return runCliCommand('promote.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const result = await engine.promote('ab-test', cliOptions.id); + if (!result.success) { + throw result.error; + } + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, id: cliOptions.id })); + } else { + console.log(`\n✓ A/B test ${cliOptions.id} stopped and winning variant applied to agentcore.json.`); + console.log(`\nRun: agentcore deploy\n`); + } + return { job_type: 'ab-test' }; + }); + }); +}; diff --git a/src/cli/commands/promote/index.ts b/src/cli/commands/promote/index.ts new file mode 100644 index 000000000..e167015a9 --- /dev/null +++ b/src/cli/commands/promote/index.ts @@ -0,0 +1 @@ +export { registerPromote } from './command'; diff --git a/src/cli/commands/recommendations/command.tsx b/src/cli/commands/recommendations/command.tsx index cccbbc8f6..fec97617d 100644 --- a/src/cli/commands/recommendations/command.tsx +++ b/src/cli/commands/recommendations/command.tsx @@ -1,63 +1,57 @@ -import { COMMAND_DESCRIPTIONS } from '../../constants'; -import { getErrorMessage } from '../../errors'; -import { listAllRecommendations } from '../../operations/recommendation'; +import { ConfigIO, JobNotFoundError, serializeResult } from '../../../lib'; +import { createJobEngine } from '../../operations/jobs'; +import { printRecommendationDetail, printRecommendationHistory } from '../../operations/jobs/recommendation/format'; +import { runCliCommand } from '../../telemetry/cli-command-run'; +import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; import { requireProject } from '../../tui/guards'; import type { Command } from '@commander-js/extra-typings'; -import { Text, render } from 'ink'; -import React from 'react'; export const registerRecommendations = (program: Command) => { const recCmd = program.command('recommendations').description(COMMAND_DESCRIPTIONS.recommendations); recCmd .command('history') - .description('Show past recommendation runs saved locally') + .description('List recommendation jobs (running jobs are refreshed from the service)') .option('--json', 'Output as JSON') .action((cliOptions: { json?: boolean }) => { requireProject(); - - try { - const records = listAllRecommendations(); - + return runCliCommand('job.history', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const records = await engine.list({ type: 'recommendation' }); if (cliOptions.json) { - console.log(JSON.stringify({ success: true, recommendations: records })); - process.exit(0); - return; - } - - if (records.length === 0) { - console.log('No recommendation runs found. Run `agentcore run recommendation` to create one.'); - return; - } - - console.log( - `\n${'Date'.padEnd(22)} ${'Type'.padEnd(20)} ${'Agent'.padEnd(20)} ${'Recommendation ID'.padEnd(40)}` - ); - console.log('─'.repeat(105)); - - for (const record of records) { - const date = record.startedAt - ? new Date(record.startedAt).toLocaleString([], { - year: 'numeric', - month: 'short', - day: 'numeric', - hour: '2-digit', - minute: '2-digit', - }) - : 'unknown'; console.log( - `${date.padEnd(22)} ${(record.type ?? 'unknown').padEnd(20)} ${(record.agent ?? 'unknown').padEnd(20)} ${record.recommendationId.padEnd(40)}` + JSON.stringify({ + success: true, + recommendations: records, + }) ); + } else { + printRecommendationHistory(records); } + return { job_type: 'recommendation' }; + }); + }); - console.log(''); - } catch (error) { + // Bare positional on the group: `agentcore recommendations ` shows one job. + // (No .description() here — that would override the group description shown in the command list.) + recCmd + .argument('', 'Recommendation job ID to view') + .option('--json', 'Output as JSON') + .action((id: string, cliOptions: { json?: boolean }) => { + requireProject(); + return runCliCommand('job.get', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const record = await engine.get('recommendation', id); + if (!record) { + // Throw only — runCliCommand owns error output (single JSON line in --json, stderr otherwise). + throw new JobNotFoundError(`Recommendation "${id}" not found.`); + } if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + console.log(JSON.stringify(serializeResult({ success: true, ...record }))); } else { - render(Error: {getErrorMessage(error)}); + printRecommendationDetail(record); } - process.exit(1); - } + return { job_type: 'recommendation' }; + }); }); }; diff --git a/src/cli/commands/remove/__tests__/remove-gateway-target.test.ts b/src/cli/commands/remove/__tests__/remove-gateway-target.test.ts index 21a6faa98..28559baf1 100644 --- a/src/cli/commands/remove/__tests__/remove-gateway-target.test.ts +++ b/src/cli/commands/remove/__tests__/remove-gateway-target.test.ts @@ -46,9 +46,12 @@ describe('remove gateway-target command', () => { describe('remove existing-endpoint target', () => { it('removes target from gateway', async () => { - // Create a fresh gateway + // Create a fresh gateway with MCP protocol (required for mcp-server targets) const tempGateway = `TempGw${Date.now()}`; - const gwResult = await runCLI(['add', 'gateway', '--name', tempGateway, '--json'], projectDir); + const gwResult = await runCLI( + ['add', 'gateway', '--name', tempGateway, '--protocol-type', 'MCP', '--json'], + projectDir + ); expect(gwResult.exitCode, `gateway add failed: ${gwResult.stdout}`).toBe(0); // Add a target to the gateway diff --git a/src/cli/commands/remove/__tests__/remove-gateway.test.ts b/src/cli/commands/remove/__tests__/remove-gateway.test.ts index cd21f3e56..63811b4e5 100644 --- a/src/cli/commands/remove/__tests__/remove-gateway.test.ts +++ b/src/cli/commands/remove/__tests__/remove-gateway.test.ts @@ -46,8 +46,8 @@ describe('remove gateway command', () => { throw new Error(`Failed to create agent: ${result.stdout} ${result.stderr}`); } - // Add gateway - result = await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], projectDir); + // Add gateway with MCP protocol (required for mcp-server targets) + result = await runCLI(['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], projectDir); if (result.exitCode !== 0) { throw new Error(`Failed to create gateway: ${result.stdout} ${result.stderr}`); } @@ -93,8 +93,8 @@ describe('remove gateway command', () => { }); it('removes gateway with targets attached', async () => { - // Re-add gateway since previous test may have removed it - await runCLI(['add', 'gateway', '--name', gatewayName, '--json'], projectDir); + // Re-add gateway since previous test may have removed it (with MCP protocol for mcp-server targets) + await runCLI(['add', 'gateway', '--name', gatewayName, '--protocol-type', 'MCP', '--json'], projectDir); // Add a target to the gateway const targetName = `target${Date.now()}`; diff --git a/src/cli/commands/remove/__tests__/skill-command.test.ts b/src/cli/commands/remove/__tests__/skill-command.test.ts new file mode 100644 index 000000000..77ae950a9 --- /dev/null +++ b/src/cli/commands/remove/__tests__/skill-command.test.ts @@ -0,0 +1,147 @@ +import type { HarnessSpec } from '../../../../schema'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const mockReadHarnessSpec = vi.fn(); +const mockWriteHarnessSpec = vi.fn(); + +vi.mock('../../../../lib/index.js', () => ({ + ConfigIO: class { + readHarnessSpec = mockReadHarnessSpec; + writeHarnessSpec = mockWriteHarnessSpec; + }, + findConfigRoot: vi.fn(() => '/fake/path'), +})); + +function makeHarnessSpec(skills: HarnessSpec['skills'] = []): HarnessSpec { + return { + name: 'TestHarness', + model: { provider: 'bedrock', modelId: 'anthropic.claude-3-5-sonnet-20240620-v1:0' }, + tools: [], + skills, + } as HarnessSpec; +} + +describe('handleRemoveSkill', () => { + beforeEach(() => { + mockReadHarnessSpec.mockReset(); + mockWriteHarnessSpec.mockReset(); + }); + + afterEach(() => { + vi.resetAllMocks(); + }); + + it('removes a path skill by matching value', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec([{ path: './skill-a' }, { path: './skill-b' }])); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness', path: './skill-a' }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [{ path: './skill-b' }], + }) + ); + }); + + it('removes an s3 skill by matching value', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec([{ s3Uri: 's3://bucket/skill' }])); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness', s3: 's3://bucket/skill' }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [], + }) + ); + }); + + it('removes a git skill by matching URL and path', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec([{ gitUrl: 'https://github.com/org/repo', path: 'sub' }])); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ + harness: 'TestHarness', + git: 'https://github.com/org/repo', + gitPath: 'sub', + }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [], + }) + ); + }); + + it('removes a git skill without path by URL only', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec([{ gitUrl: 'https://github.com/org/repo' }])); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness', git: 'https://github.com/org/repo' }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [], + }) + ); + }); + + it('removes an aws skill by matching paths', async () => { + mockReadHarnessSpec.mockResolvedValue( + makeHarnessSpec([{ awsSkills: { paths: ['core-skills/*'] } }, { path: './other' }]) + ); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness', awsSkills: 'core-skills/*' }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [{ path: './other' }], + }) + ); + }); + + it('removes an aws skill with no paths (wildcard)', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec([{ awsSkills: {} }])); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness', awsSkills: '*' }); + expect(result.success).toBe(true); + expect(mockWriteHarnessSpec).toHaveBeenCalledWith( + 'TestHarness', + expect.objectContaining({ + skills: [], + }) + ); + }); + + it('fails when skill not found', async () => { + mockReadHarnessSpec.mockResolvedValue(makeHarnessSpec([{ path: './other' }])); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness', path: './missing' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('not found'); + }); + + it('fails when no source provided', async () => { + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('Exactly one'); + }); + + it('fails when multiple sources provided', async () => { + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'TestHarness', path: './x', s3: 's3://y' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('Exactly one'); + }); + + it('fails when harness not found', async () => { + mockReadHarnessSpec.mockRejectedValue(new Error('not found')); + const { handleRemoveSkill } = await import('../skill-command'); + const result = await handleRemoveSkill({ harness: 'Missing', path: './x' }); + expect(result.success).toBe(false); + expect(!result.success && result.error.message).toContain('not found'); + }); +}); diff --git a/src/cli/commands/remove/command.tsx b/src/cli/commands/remove/command.tsx index 496669ef2..b5e298047 100644 --- a/src/cli/commands/remove/command.tsx +++ b/src/cli/commands/remove/command.tsx @@ -73,6 +73,7 @@ async function handleRemoveAll(options: RemoveAllOptions): Promise }, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -80,7 +81,6 @@ async function handleRemoveAll(options: RemoveAllOptions): Promise policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], diff --git a/src/cli/commands/remove/skill-command.ts b/src/cli/commands/remove/skill-command.ts new file mode 100644 index 000000000..6fd4030fc --- /dev/null +++ b/src/cli/commands/remove/skill-command.ts @@ -0,0 +1,138 @@ +import { ConfigIO, findConfigRoot } from '../../../lib'; +import type { HarnessSpec } from '../../../schema'; +import { getErrorMessage } from '../../errors'; +import { withCommandRunTelemetry } from '../../telemetry/cli-command-run.js'; +import { buildGitSkillKey, getSkillKey } from '@/cli/operations/harness/skill-utils'; +import { ValidationError } from '@/lib/errors/types'; +import type { Result } from '@/lib/result'; +import type { Command } from '@commander-js/extra-typings'; + +export interface RemoveSkillOptions { + harness: string; + path?: string; + s3?: string; + git?: string; + gitPath?: string; + awsSkills?: string | true; +} + +export async function handleRemoveSkill( + options: RemoveSkillOptions +): Promise> { + const { harness } = options; + + const sourceCount = [options.path, options.s3, options.git, options.awsSkills].filter(Boolean).length; + if (sourceCount !== 1) { + return { + success: false, + error: new ValidationError( + 'Exactly one of --path, --s3, --git, or --aws-skills is required to identify the skill' + ), + }; + } + + const configIO = new ConfigIO(); + + let harnessSpec: HarnessSpec; + try { + harnessSpec = await configIO.readHarnessSpec(harness); + } catch { + return { success: false, error: new ValidationError(`Harness '${harness}' not found.`) }; + } + + let targetKey: string; + let skillSource: string; + if (options.path) { + targetKey = `path:${options.path}`; + skillSource = options.path; + } else if (options.s3) { + targetKey = `s3:${options.s3}`; + skillSource = options.s3; + } else if (options.git) { + targetKey = buildGitSkillKey(options.git, options.gitPath); + skillSource = options.gitPath ? `${options.git} (path: ${options.gitPath})` : options.git; + } else { + const awsPaths = + options.awsSkills === true + ? '*' + : options + .awsSkills!.split(',') + .map(s => s.trim()) + .filter(Boolean) + .sort() + .join(','); + targetKey = `awsSkills:${awsPaths}`; + skillSource = awsPaths === '*' ? 'aws-skills (all)' : `aws-skills (${awsPaths})`; + } + + const idx = harnessSpec.skills.findIndex(s => getSkillKey(s) === targetKey); + if (idx === -1) { + const hint = options.git && !options.gitPath ? ' If the skill has a sub-path, specify --git-path.' : ''; + return { + success: false, + error: new ValidationError(`Skill '${skillSource}' not found in harness '${harness}'.${hint}`), + }; + } + + harnessSpec.skills.splice(idx, 1); + await configIO.writeHarnessSpec(harness, harnessSpec); + + return { success: true, harnessName: harness, skillSource }; +} + +export function registerRemoveSkill(removeCmd: Command): void { + removeCmd + .command('skill') + .description('Remove a skill from a harness') + .requiredOption('--harness ', 'Target harness name') + .option('--path ', 'Path to an installed skill in the environment') + .option('--s3 ', 'S3 URI of skill to remove') + .option('--git ', 'Git URL of skill to remove') + .option('--git-path ', 'Subdirectory within the git repo (for --git)') + .option('--aws-skills [paths]', 'AWS skill paths to remove (comma-separated, or omit for wildcard)') + .option('--json', 'Output as JSON') + .action(async cliOptions => { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + try { + const result = await withCommandRunTelemetry('remove.skill', {}, () => + handleRemoveSkill({ + harness: cliOptions.harness, + path: cliOptions.path, + s3: cliOptions.s3, + git: cliOptions.git, + gitPath: cliOptions.gitPath, + awsSkills: cliOptions.awsSkills, + }) + ); + + if (!result.success) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: result.error.message })); + } else { + console.error(result.error.message); + } + process.exit(1); + } + + if (cliOptions.json) { + console.log( + JSON.stringify({ success: true, harnessName: result.harnessName, skillSource: result.skillSource }) + ); + } else { + console.log(`Removed skill '${result.skillSource}' from harness '${result.harnessName}'.`); + console.log(`Run 'agentcore deploy' to apply changes.`); + } + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(getErrorMessage(error)); + } + process.exit(1); + } + }); +} diff --git a/src/cli/commands/remove/types.ts b/src/cli/commands/remove/types.ts index b829199d1..2e3ad6346 100644 --- a/src/cli/commands/remove/types.ts +++ b/src/cli/commands/remove/types.ts @@ -10,11 +10,12 @@ export type ResourceType = | 'credential' | 'evaluator' | 'online-eval' + | 'online-insights' | 'policy-engine' | 'policy' | 'config-bundle' - | 'ab-test' | 'dataset' + | 'knowledge-base' | 'payment-manager' | 'payment-connector'; diff --git a/src/cli/commands/run/command.tsx b/src/cli/commands/run/command.tsx index dcac6dac1..db036ce8d 100644 --- a/src/cli/commands/run/command.tsx +++ b/src/cli/commands/run/command.tsx @@ -1,20 +1,22 @@ -import { serializeResult } from '../../../lib'; +import { ConfigIO, ValidationError, findConfigRoot, serializeResult } from '../../../lib'; import type { RecommendationType } from '../../aws/agentcore-recommendation'; import { COMMAND_DESCRIPTIONS } from '../../constants'; import { getErrorMessage } from '../../errors'; import { handleRunEval } from '../../operations/eval'; import type { RunEvalOptions } from '../../operations/eval'; -import { saveBatchEvalRun } from '../../operations/eval/batch-eval-storage'; -import { runBatchEvaluationCommand } from '../../operations/eval/run-batch-evaluation'; +import { runKbIngestionByName } from '../../operations/ingest'; +import { createJobEngine, runDatasetPhase1, waitForTerminal } from '../../operations/jobs'; import type { - BatchEvaluationResult, - RunBatchEvaluationCommandResult, -} from '../../operations/eval/run-batch-evaluation'; -import { - applyRecommendationToBundle, - runRecommendationCommand, - saveRecommendationRun, -} from '../../operations/recommendation'; + ABTestJobRecord, + ABTestMode, + BatchEvaluationJobRecord, + InsightsJobRecord, + RecommendationJobRecord, + StartABTestJobOptions, + StartBatchEvaluationJobOptions, +} from '../../operations/jobs'; +import { printABTestDetail } from '../../operations/jobs/ab-test/format'; +import { runCliCommand } from '../../telemetry/cli-command-run'; import { requireProject } from '../../tui/guards'; import type { Command } from '@commander-js/extra-typings'; import { Text, render } from 'ink'; @@ -131,6 +133,18 @@ export const registerRun = (program: Command) => { process.exit(1); } + // --dataset-version only applies to dataset-driven runs. Passing it without --dataset + // would otherwise be silently ignored and the eval would fall back to trace-based evaluation. + if (cliOptions.datasetVersion && !cliOptions.dataset) { + const error = '--dataset-version requires --dataset (the version selects a version of that dataset)'; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + const options: RunEvalOptions = { agent: cliOptions.runtime, agentArn: cliOptions.runtimeArn, @@ -178,9 +192,10 @@ export const registerRun = (program: Command) => { runCmd .command('batch-evaluation') - .description('[preview] Run evaluators in batch across all agent sessions in CloudWatch') - .requiredOption('-r, --runtime ', 'Runtime name from project config') - .requiredOption('-e, --evaluator ', 'Evaluator name(s) — Builtin.* IDs') + .description('Run evaluators in batch across all agent sessions in CloudWatch') + .option('-r, --runtime ', 'Runtime name from project config [non-interactive]') + .option('-e, --evaluator ', 'Evaluator name(s) — Builtin.* IDs [non-interactive]') + .option('--evaluator-arn ', 'Evaluator ARN(s) — use instead of -e when referencing evaluators by ARN') .option('-n, --name ', 'Name for the batch evaluation (auto-generated if omitted)') .option('-d, --lookback-days ', 'Lookback window in days (filters sessions by time range)') .option('-s, --session-ids ', 'Specific session IDs to evaluate') @@ -195,11 +210,14 @@ export const registerRun = (program: Command) => { ) .option('--dataset ', 'Dataset name — invoke agent with dataset scenarios before batch evaluation') .option('--dataset-version ', 'Dataset version to use (omit for local file, or N/DRAFT)') + .option('--kms-key ', 'KMS key ARN for encrypting batch evaluation results') + .option('--wait', 'Block until the batch evaluation reaches a terminal state') .option('--json', 'Output as JSON') .action( async (cliOptions: { - runtime: string; - evaluator: string[]; + runtime?: string; + evaluator?: string[]; + evaluatorArn?: string[]; name?: string; lookbackDays?: string; sessionIds?: string[]; @@ -208,94 +226,231 @@ export const registerRun = (program: Command) => { endpoint?: string; dataset?: string; datasetVersion?: string; + kmsKey?: string; + wait?: boolean; json?: boolean; }) => { requireProject(); - try { - // Parse ground truth file if provided - let sessionMetadata: import('../../aws/agentcore-batch-evaluation').SessionMetadataEntry[] | undefined; + if (!cliOptions.runtime && !cliOptions.json) { + const { requireTTY } = await import('../../tui/guards/tty'); + requireTTY(); + const { RunBatchEvalFlow } = await import('../../tui/screens/run-eval/RunBatchEvalFlow'); + const { clear, unmount } = render( + { + clear(); + unmount(); + process.exit(0); + }} + /> + ); + return; + } + + if (!cliOptions.runtime || (!cliOptions.evaluator?.length && !cliOptions.evaluatorArn?.length)) { + const error = + '--runtime and at least one --evaluator or --evaluator-arn are required in non-interactive mode'; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + + // --dataset-version only applies to dataset-driven runs. Passing it without --dataset + // would otherwise be silently ignored and the job would fall back to trace-based evaluation. + if (cliOptions.datasetVersion && !cliOptions.dataset) { + const error = '--dataset-version requires --dataset (the version selects a version of that dataset)'; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + + const log = (message: string) => { + if (!cliOptions.json) console.log(message); + }; + + await runCliCommand('run.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + + // Ground truth file (explicit sessionMetadata) + let sessionMetadata: StartBatchEvaluationJobOptions['sessionMetadata']; if (cliOptions.groundTruth) { const { readFileSync } = await import('node:fs'); - const gtContent = readFileSync(cliOptions.groundTruth, 'utf-8'); - const gtData = JSON.parse(gtContent) as Record; - // Accept either a raw array or an object with a sessionMetadata key - sessionMetadata = Array.isArray(gtData) - ? (gtData as import('../../aws/agentcore-batch-evaluation').SessionMetadataEntry[]) - : (gtData.sessionMetadata as import('../../aws/agentcore-batch-evaluation').SessionMetadataEntry[]); - if (!Array.isArray(sessionMetadata)) { + const gtData = JSON.parse(readFileSync(cliOptions.groundTruth, 'utf-8')) as Record; + const parsed = Array.isArray(gtData) ? gtData : gtData.sessionMetadata; + if (!Array.isArray(parsed)) { throw new Error( 'Ground truth file must be a JSON array of session metadata entries, or an object with a "sessionMetadata" key' ); } + sessionMetadata = parsed as StartBatchEvaluationJobOptions['sessionMetadata']; } const lookbackDays = cliOptions.lookbackDays ? parseInt(cliOptions.lookbackDays, 10) : undefined; - const result = await runBatchEvaluationCommand({ - agent: cliOptions.runtime, - evaluators: cliOptions.evaluator, + if (lookbackDays !== undefined && (isNaN(lookbackDays) || lookbackDays < 1 || lookbackDays > 90)) { + throw new Error('--lookback-days must be between 1 and 90'); + } + let sessionIds = cliOptions.sessionIds; + const datasetInfo = cliOptions.dataset + ? { id: cliOptions.dataset, version: cliOptions.datasetVersion ?? 'LOCAL' } + : undefined; + + // Dataset mode (Phase-1): invoke scenarios + wait for ingestion, then start (caller-side, blocking). + if (cliOptions.dataset) { + const phase1 = await runDatasetPhase1({ + agent: cliOptions.runtime!, + datasetName: cliOptions.dataset, + datasetVersion: cliOptions.datasetVersion, + endpoint: cliOptions.endpoint, + onProgress: (_phase, message) => log(message), + }); + if (!phase1.success) { + throw phase1.error; + } + sessionIds = [...(sessionIds ?? []), ...phase1.sessionIds]; + sessionMetadata = [...(sessionMetadata ?? []), ...phase1.sessionMetadata]; + } + + const evaluators = [...(cliOptions.evaluator ?? []), ...(cliOptions.evaluatorArn ?? [])]; + + const startResult = await engine.start('batch-evaluation', { + agent: cliOptions.runtime!, + evaluators, name: cliOptions.name, region: cliOptions.region, endpoint: cliOptions.endpoint, - sessionIds: cliOptions.sessionIds, + sessionIds, lookbackDays: lookbackDays && !isNaN(lookbackDays) ? lookbackDays : undefined, sessionMetadata, - dataset: cliOptions.dataset, - datasetVersion: cliOptions.datasetVersion, - onProgress: cliOptions.json - ? undefined - : (_status, message) => { - console.log(message); - }, + source: cliOptions.dataset ? 'dataset' : 'traces', + dataset: datasetInfo, + kmsKeyArn: cliOptions.kmsKey, + onProgress: cliOptions.json ? undefined : (_status, message) => console.log(message), }); + if (!startResult.success) { + throw startResult.error; + } + let record: BatchEvaluationJobRecord = startResult.record; - // Save results locally - if (result.success) { - try { - const datasetInfo = cliOptions.dataset - ? { - source: 'dataset', - dataset: { - id: cliOptions.dataset, - version: cliOptions.datasetVersion ?? 'LOCAL', - }, - } - : {}; - const filePath = saveBatchEvalRun({ result, ...datasetInfo }); - if (!cliOptions.json) { - console.log(`\nResults saved to: ${filePath}`); - } - } catch { - // Non-fatal — skip saving - } + if (cliOptions.wait) { + const final = await waitForTerminal(engine, 'batch-evaluation', record.id, { + onTick: status => log(`Status: ${status}`), + }); + if (final) record = final; } if (cliOptions.json) { - console.log(JSON.stringify(serializeResult(result))); - } else if (result.success) { - formatBatchEvalOutput(result); + console.log(JSON.stringify(serializeResult({ success: true, ...record }))); } else { - render({result.error.message}); - if (result.logFilePath) { - console.error(`\nLog: ${result.logFilePath}`); + console.log(`\n✓ Batch evaluation started: ${record.id} (${record.status})`); + printBatchEvalResult(record); + if (!cliOptions.wait) { + console.log(`\nNext: agentcore view batch-evaluation ${record.id}`); } + console.log(''); + } + return { job_type: 'batch-evaluation', has_wait: !!cliOptions.wait }; + }); + } + ); + + runCmd + .command('insights') + .description('[preview] Run failure analysis across agent sessions') + .option('-r, --runtime ', 'Runtime name from project config') + .option('--insights ', 'Insight type(s) (default: Builtin.Insight.FailureAnalysis)') + .option('-e, --evaluator ', 'Evaluator(s) to include (needed for chaining into recommendations)') + .option('--online-eval-config-arn ', 'Use an existing OnlineEvaluationConfig as session source') + .option('-d, --lookback-days ', 'Lookback window in days (default: 7)') + .option('--start-time ', 'Session filter start time') + .option('--end-time ', 'Session filter end time') + .option('-s, --session-ids ', 'Limit to specific session IDs') + .option('-n, --name ', 'Job name (auto-generated if omitted)') + .option('--wait', 'Block until the job reaches a terminal state') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--endpoint ', 'Runtime endpoint name (e.g. PROMPT_V1)') + .option('--json', 'Output as JSON') + .action( + async (cliOptions: { + runtime?: string; + insights?: string[]; + evaluator?: string[]; + onlineEvalConfigArn?: string; + lookbackDays?: string; + startTime?: string; + endTime?: string; + sessionIds?: string[]; + name?: string; + wait?: boolean; + region?: string; + endpoint?: string; + json?: boolean; + }) => { + requireProject(); + + const log = (message: string) => { + if (!cliOptions.json) console.log(message); + }; + + await runCliCommand('run.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + + const insightIds = cliOptions.insights ?? ['Builtin.Insight.FailureAnalysis']; + const lookbackDays = cliOptions.lookbackDays ? parseInt(cliOptions.lookbackDays, 10) : undefined; + if (lookbackDays !== undefined && (isNaN(lookbackDays) || lookbackDays < 1 || lookbackDays > 90)) { + throw new Error('--lookback-days must be between 1 and 90'); + } + + const startResult = await engine.start('insights', { + agent: cliOptions.runtime, + insights: insightIds, + evaluators: cliOptions.evaluator, + onlineEvalConfigArn: cliOptions.onlineEvalConfigArn, + lookbackDays: lookbackDays && !isNaN(lookbackDays) ? lookbackDays : undefined, + startTime: cliOptions.startTime, + endTime: cliOptions.endTime, + sessionIds: cliOptions.sessionIds, + name: cliOptions.name, + region: cliOptions.region, + endpoint: cliOptions.endpoint, + onProgress: cliOptions.json ? undefined : (_status, message) => console.log(message), + }); + if (!startResult.success) { + throw startResult.error; + } + let record: InsightsJobRecord = startResult.record; + + if (cliOptions.wait) { + const final = await waitForTerminal(engine, 'insights', record.id, { + onTick: status => log(`Status: ${status}`), + }); + if (final) record = final; } - process.exit(result.success ? 0 : 1); - } catch (error) { if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + console.log(JSON.stringify(serializeResult({ success: true, ...record }))); } else { - render(Error: {getErrorMessage(error)}); + console.log(`\n✓ Insights job started: ${record.id} (${record.status})`); + printInsightsResult(record); + if (!cliOptions.wait) { + console.log(`\nNext: agentcore insights results --id ${record.id}`); + } + console.log(''); } - process.exit(1); - } + return { job_type: 'insights', has_wait: !!cliOptions.wait }; + }); } ); runCmd .command('recommendation') - .description('[preview] Optimize a system prompt or tool descriptions using agent traces as signal') + .description('Optimize a system prompt or tool descriptions using agent traces as signal') .option('-t, --type ', 'What to optimize: system-prompt or tool-description (default: system-prompt)') .option('-r, --runtime ', 'Runtime name from project config') .option('-e, --evaluator ', 'Evaluator name — required for system-prompt (exactly one)') @@ -320,6 +475,10 @@ export const registerRun = (program: Command) => { .option('-s, --session-id ', 'Limit trace collection to specific session IDs') .option('-n, --run ', 'Run name prefix for the recommendation') .option('--region ', 'AWS region') + .option('--kms-key ', 'KMS key ARN for encrypting recommendation results') + .option('--from-insights ', 'Use a local insights run as trace source (resolves batch eval ARN)') + .option('--batch-evaluation-arn ', 'Use a batch evaluation ARN directly as trace source') + .option('--wait', 'Block until the recommendation reaches a terminal state') .option('--json', 'Output as JSON') .action( async (cliOptions: { @@ -338,10 +497,30 @@ export const registerRun = (program: Command) => { sessionId?: string[]; run?: string; region?: string; + kmsKey?: string; + fromInsights?: string; + batchEvaluationArn?: string; + wait?: boolean; json?: boolean; }) => { requireProject(); + if (!cliOptions.runtime && !cliOptions.json) { + const { requireTTY } = await import('../../tui/guards/tty'); + requireTTY(); + const { RecommendationFlow } = await import('../../tui/screens/recommendation/RecommendationFlow'); + const { clear, unmount } = render( + { + clear(); + unmount(); + process.exit(0); + }} + /> + ); + return; + } + const typeKey = cliOptions.type ?? 'system-prompt'; const recType = RECOMMENDATION_TYPE_MAP[typeKey]; if (!recType) { @@ -354,11 +533,12 @@ export const registerRun = (program: Command) => { process.exit(1); } + const isBatchEvalSource = !!(cliOptions.fromInsights ?? cliOptions.batchEvaluationArn); const agent = cliOptions.runtime; const evaluator = cliOptions.evaluator; - if (!agent) { - const error = '--runtime is required'; + if (!agent && !isBatchEvalSource) { + const error = '--runtime is required (unless --from-insights or --batch-evaluation-arn is provided)'; if (cliOptions.json) { console.log(JSON.stringify({ success: false, error })); } else { @@ -367,9 +547,9 @@ export const registerRun = (program: Command) => { process.exit(1); } - // Evaluator is required for system-prompt recs, optional for tool-description - if (recType === 'SYSTEM_PROMPT_RECOMMENDATION' && !evaluator) { - const error = '--evaluator is required for system-prompt recommendations'; + // Evaluator is required for system-prompt recs, optional for tool-description and batch-eval source + if (recType === 'SYSTEM_PROMPT_RECOMMENDATION' && !evaluator && !isBatchEvalSource) { + const error = '--evaluator is required for system-prompt recommendations (unless using --from-insights)'; if (cliOptions.json) { console.log(JSON.stringify({ success: false, error })); } else { @@ -378,36 +558,39 @@ export const registerRun = (program: Command) => { process.exit(1); } - try { - const inputSource = cliOptions.promptFile - ? ('file' as const) - : cliOptions.inline - ? ('inline' as const) - : cliOptions.bundleName - ? ('config-bundle' as const) - : ('inline' as const); - - const traceSource = cliOptions.spansFile + const inputSource = cliOptions.promptFile + ? ('file' as const) + : cliOptions.inline + ? ('inline' as const) + : cliOptions.bundleName + ? ('config-bundle' as const) + : ('inline' as const); + + const traceSource = isBatchEvalSource + ? ('batch-evaluation' as const) + : cliOptions.spansFile ? ('spans-file' as const) : cliOptions.sessionId ? ('sessions' as const) : ('cloudwatch' as const); - // Parse --tool-desc-json-path pairs ("toolName:$.json.path") into structured format - const toolDescJsonPaths = cliOptions.toolDescJsonPath - ?.map(pair => { - const colonIdx = pair.indexOf(':'); - if (colonIdx <= 0) return undefined; - return { - toolName: pair.slice(0, colonIdx), - toolDescriptionJsonPath: pair.slice(colonIdx + 1), - }; - }) - .filter((p): p is { toolName: string; toolDescriptionJsonPath: string } => p !== undefined); - - const result = await runRecommendationCommand({ + // Parse --tool-desc-json-path pairs ("toolName:$.json.path") into structured format + const toolDescJsonPaths = cliOptions.toolDescJsonPath + ?.map(pair => { + const colonIdx = pair.indexOf(':'); + if (colonIdx <= 0) return undefined; + return { + toolName: pair.slice(0, colonIdx), + toolDescriptionJsonPath: pair.slice(colonIdx + 1), + }; + }) + .filter((p): p is { toolName: string; toolDescriptionJsonPath: string } => p !== undefined); + + await runCliCommand('run.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const startResult = await engine.start('recommendation', { type: recType, - agent, + agent: agent ?? '', evaluators: evaluator ? [evaluator] : [], promptFile: cliOptions.promptFile, inlineContent: cliOptions.inline, @@ -419,114 +602,372 @@ export const registerRun = (program: Command) => { lookbackDays: parseInt(cliOptions.lookback, 10), sessionIds: cliOptions.sessionId, spansFile: cliOptions.spansFile, + fromInsights: cliOptions.fromInsights, + batchEvaluationArn: cliOptions.batchEvaluationArn, recommendationName: cliOptions.run, region: cliOptions.region, + kmsKeyArn: cliOptions.kmsKey, inputSource, traceSource, - onProgress: cliOptions.json - ? undefined - : (_status, message) => { - console.log(message); - }, + onProgress: cliOptions.json ? undefined : (_status, message) => console.log(message), }); - if (!result.success) { - if (cliOptions.json) { - console.log(JSON.stringify(serializeResult(result))); - } else { - render({result.error.message}); - if (result.logFilePath) { - console.error(`\nLog: ${result.logFilePath}`); - } - } - process.exit(1); + if (!startResult.success) { + throw startResult.error; } - - // Save results locally - let savedFilePath: string | undefined; - try { - if (result.recommendationId) { - savedFilePath = saveRecommendationRun( - result.recommendationId, - result, - recType, - agent, - evaluator ? [evaluator] : [] - ); - } - } catch { - // Non-fatal — skip saving + let record: RecommendationJobRecord = startResult.record; + + if (cliOptions.wait) { + const final = await waitForTerminal(engine, 'recommendation', record.id, { + onTick: status => { + if (!cliOptions.json) console.log(`Status: ${status}`); + }, + }); + if (final) record = final; } if (cliOptions.json) { - console.log(JSON.stringify(serializeResult(result))); + console.log(JSON.stringify(serializeResult({ success: true, ...record }))); } else { - console.log(`\nRecommendation ID: ${result.recommendationId}`); - - if (result.result) { - const sysResult = result.result.systemPromptRecommendationResult; - const toolResult = result.result.toolDescriptionRecommendationResult; - - if (sysResult) { - if (sysResult.recommendedSystemPrompt) { - console.log('\n+++ Recommended System Prompt +++'); - console.log(sysResult.recommendedSystemPrompt); - } - } else if (toolResult?.tools) { - for (const tool of toolResult.tools) { - console.log(`\nTool: ${tool.toolName}`); - console.log(`Recommended: ${tool.recommendedToolDescription}`); - } - } - } - - if (savedFilePath) { - console.log(`\nResults saved to: ${savedFilePath}`); - } - - // Sync local config bundle after server-side recommendation apply - if (inputSource === 'config-bundle' && cliOptions.bundleName && result.result && result.region) { - try { - const applyResult = await applyRecommendationToBundle({ - bundleName: cliOptions.bundleName, - result: result.result, - region: result.region, - }); - if (applyResult.success) { - console.log( - `\nA new config bundle version (${applyResult.newVersionId}) was created with the recommended changes.` - ); - console.log(`Local config for "${cliOptions.bundleName}" has been updated to match.`); - } else { - console.log(`\nCould not sync config bundle: ${applyResult.error.message}`); - } - } catch { - // Non-fatal — user can manually sync - } + console.log(`\n✓ Recommendation started: ${record.id} (${record.status})`); + printRecommendationResult(record); + if (!cliOptions.wait) { + console.log( + `\nNext: agentcore view recommendation ${record.id}` + + (inputSource === 'config-bundle' + ? ' — the new config bundle will be applied to agentcore.json automatically.' + : '') + ); } console.log(''); } + return { job_type: 'recommendation', has_wait: !!cliOptions.wait }; + }); + } + ); - process.exit(0); - } catch (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); - } else { - render(Error: {getErrorMessage(error)}); + // ────────────────────────────────────────────────────────────────────── + // run ingest — manually trigger ingestion for a deployed knowledge base. + // + // Drift correction #4 from Plan C: 2-deep nesting (`run ingest`), not + // `run ingest knowledge-base`. KBs are the only ingestible resource for + // now; future ingestible types could add a --type flag. + // ────────────────────────────────────────────────────────────────────── + runCmd + .command('ingest') + .description('Start a fresh ingestion job for every data source on a deployed knowledge base.') + .option('--name ', 'Knowledge base name (must exist in agentcore.json)') + .option('--target ', 'Deployment target name (defaults to "default")', 'default') + .option('--data-source ', 'Ingest only the data source with this URI (default: all sources)') + .option('--json', 'Output as JSON [non-interactive]') + .action(async (cliOptions: { name?: string; target?: string; dataSource?: string; json?: boolean }) => { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + await runCliCommand('run.ingest', !!cliOptions.json, async () => { + if (!cliOptions.name) { + throw new ValidationError('A --name is required for `agentcore run ingest`.'); + } + const targetName = cliOptions.target ?? 'default'; + + const configIO = new ConfigIO(); + const [project, awsTargets, deployedState] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readAWSDeploymentTargets(), + configIO.readDeployedState().catch(() => ({ targets: {} })), + ]); + + const kbExists = (project.knowledgeBases ?? []).some(kb => kb.name === cliOptions.name); + if (!kbExists) { + throw new ValidationError(`Knowledge base '${cliOptions.name}' is not in agentcore.json.`); + } + const target = awsTargets.find(t => t.name === targetName); + if (!target) { + throw new ValidationError(`Deployment target '${targetName}' is not in aws-targets.json.`); + } + + // Wire Ctrl+C → AbortController so the user can bail out of long + // retry sleeps cleanly. Progress lines go to stderr so --json stdout + // remains a single parseable object. + const abortController = new AbortController(); + const onSigint = () => abortController.abort(); + process.once('SIGINT', onSigint); + let result; + try { + result = await runKbIngestionByName({ + knowledgeBaseName: cliOptions.name, + deployedState, + targetName, + region: target.region, + dataSourceUri: cliOptions.dataSource, + signal: abortController.signal, + onProgress: cliOptions.json ? undefined : msg => process.stderr.write(`${msg}\n`), + }); + } finally { + process.off('SIGINT', onSigint); + } + + if (!result.success) { + throw result.error; + } + + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, startedJobs: result.startedJobs })); + } else { + console.log(`Started ingestion for '${cliOptions.name}' (${result.startedJobs.length} data source(s)):`); + for (const job of result.startedJobs) { + console.log(` ${job.uri} → ${job.ingestionJobId}`); } - process.exit(1); + console.log(`\nRun 'agentcore status' to track progress.`); } - } + + return { data_source_count: result.startedJobs.length }; + }); + }); + const abTestCmd = runCmd + .command('ab-test') + .description('Start an A/B test comparing two config-bundle or gateway-target variants') + // ── Shared options ── + .option('-n, --name ', 'Name for the A/B test [non-interactive]') + .option('-g, --gateway ', 'Gateway name (must be deployed) [non-interactive]') + .option('-m, --mode ', 'config-bundle | target-based (default: config-bundle)', 'config-bundle') + .option('--description ', 'Description') + .option('-r, --runtime ', 'Runtime name (recorded as the agent)') + .option('--control-weight ', 'Control traffic weight 0-100 (default: 50)', '50') + .option('--treatment-weight ', 'Treatment traffic weight 0-100 (default: 50)', '50') + .option('--role-arn ', 'Execution role ARN (auto-created if omitted)') + .option('--disable-on-create', 'Create without starting (default: enabled)') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--wait', 'Block until terminal state') + .option('--json', 'Output as JSON') + // ── Config-bundle mode ── + .option('--control-bundle ', '[config-bundle] Control bundle name or ARN') + .option('--control-version ', '[config-bundle] Control bundle version (or LATEST)') + .option('--treatment-bundle ', '[config-bundle] Treatment bundle name or ARN') + .option('--treatment-version ', '[config-bundle] Treatment bundle version (or LATEST)') + .option('--online-eval ', '[config-bundle] Shared online eval config name or ARN') + // ── Target-based mode ── + .option('--control-target ', '[target-based] Control gateway-target name') + .option('--treatment-target ', '[target-based] Treatment gateway-target name') + .option('--control-online-eval ', '[target-based] Online eval for control endpoint (required)') + .option('--treatment-online-eval ', '[target-based] Online eval for treatment endpoint (required)') + .option( + '--gateway-filter ', + 'Single gateway target path pattern to scope the test (e.g. "/orders/*"). Applies to both modes. Optional; omit for no gateway filter.' ); + + abTestCmd.addHelpText( + 'after', + ` +Config-bundle mode example: + agentcore run ab-test -n MyTest -g MyGateway \\ + --control-bundle PromptV1 --control-version LATEST \\ + --treatment-bundle PromptV2 --treatment-version LATEST \\ + --online-eval QualityEval + +Target-based mode example: + agentcore run ab-test -n MyTest -g MyGateway --mode target-based \\ + --control-target prod-target --treatment-target staging-target \\ + --control-online-eval ProdEval --treatment-online-eval StagingEval +` + ); + + abTestCmd.action( + async (cliOptions: { + name?: string; + gateway?: string; + mode: string; + description?: string; + runtime?: string; + controlBundle?: string; + controlVersion?: string; + treatmentBundle?: string; + treatmentVersion?: string; + onlineEval?: string; + controlTarget?: string; + treatmentTarget?: string; + controlOnlineEval?: string; + treatmentOnlineEval?: string; + gatewayFilter?: string; + controlWeight: string; + treatmentWeight: string; + roleArn?: string; + disableOnCreate?: boolean; + region?: string; + wait?: boolean; + json?: boolean; + }) => { + requireProject(); + + if (!cliOptions.name && !cliOptions.json) { + const { requireTTY } = await import('../../tui/guards/tty'); + requireTTY(); + const { RunABTestFlow } = await import('../../tui/screens/run-ab-test/RunABTestFlow'); + const { clear, unmount } = render( + { + clear(); + unmount(); + process.exit(0); + }} + /> + ); + return; + } + + if (!cliOptions.name || !cliOptions.gateway) { + const error = '--name and --gateway are required in non-interactive mode'; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + + if (cliOptions.mode !== 'config-bundle' && cliOptions.mode !== 'target-based') { + const error = `Invalid --mode "${cliOptions.mode}". Must be one of: config-bundle, target-based`; + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error })); + } else { + render({error}); + } + process.exit(1); + } + const mode: ABTestMode = cliOptions.mode; + + // Validate variant weights are integers in [0,100] and sum to 100. + const controlWeight = parseInt(cliOptions.controlWeight, 10); + const treatmentWeight = parseInt(cliOptions.treatmentWeight, 10); + const weightError = + isNaN(controlWeight) || controlWeight < 0 || controlWeight > 100 + ? `Invalid --control-weight "${cliOptions.controlWeight}". Must be an integer between 0 and 100.` + : isNaN(treatmentWeight) || treatmentWeight < 0 || treatmentWeight > 100 + ? `Invalid --treatment-weight "${cliOptions.treatmentWeight}". Must be an integer between 0 and 100.` + : controlWeight + treatmentWeight !== 100 + ? `Variant weights must sum to 100 (got ${controlWeight} + ${treatmentWeight} = ${controlWeight + treatmentWeight}).` + : undefined; + if (weightError) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: weightError })); + } else { + render({weightError}); + } + process.exit(1); + } + + await runCliCommand('run.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const startOpts: StartABTestJobOptions = { + name: cliOptions.name!, + mode, + description: cliOptions.description, + gateway: cliOptions.gateway!, + agent: cliOptions.runtime, + controlBundle: cliOptions.controlBundle, + controlVersion: cliOptions.controlVersion, + treatmentBundle: cliOptions.treatmentBundle, + treatmentVersion: cliOptions.treatmentVersion, + onlineEval: cliOptions.onlineEval, + runtime: cliOptions.runtime, + controlTarget: cliOptions.controlTarget, + treatmentTarget: cliOptions.treatmentTarget, + controlOnlineEval: cliOptions.controlOnlineEval, + treatmentOnlineEval: cliOptions.treatmentOnlineEval, + gatewayFilter: cliOptions.gatewayFilter, + controlWeight, + treatmentWeight, + enableOnCreate: !cliOptions.disableOnCreate, + region: cliOptions.region, + roleArn: cliOptions.roleArn, + onProgress: cliOptions.json ? undefined : (_status, message) => console.log(message), + }; + + const startResult = await engine.start('ab-test', startOpts); + if (!startResult.success) { + throw startResult.error; + } + let record: ABTestJobRecord = startResult.record; + + if (cliOptions.wait) { + const final = await waitForTerminal(engine, 'ab-test', record.id, { + onTick: status => { + if (!cliOptions.json) console.log(`Status: ${status}`); + }, + }); + if (final) record = final; + } + + if (cliOptions.json) { + console.log(JSON.stringify(serializeResult({ success: true, ...record }))); + } else { + console.log(`\n✓ A/B test started: ${record.id} (${record.status})`); + printABTestDetail(record); + if (!cliOptions.wait) { + console.log(`\nNext: agentcore view ab-test ${record.id}`); + } + console.log(''); + } + return { job_type: 'ab-test', has_wait: !!cliOptions.wait }; + }); + } + ); }; -function formatBatchEvalOutput(result: RunBatchEvaluationCommandResult): void { - console.log(`\nBatch Evaluation: ${result.name ?? result.batchEvaluationId}`); - console.log(`ID: ${result.batchEvaluationId}`); - console.log(`Status: ${result.status}`); +/** Print a recommendation's optimized artifact (system prompt / tool descriptions) when available. */ +function printRecommendationResult(record: RecommendationJobRecord): void { + const sys = record.result?.systemPromptRecommendationResult; + const tool = record.result?.toolDescriptionRecommendationResult; + if (sys?.recommendedSystemPrompt) { + if (sys.explanation) { + console.log('\n--- Explanation ---'); + console.log(sys.explanation); + } + console.log('\n+++ Recommended System Prompt +++'); + console.log(sys.recommendedSystemPrompt); + } else if (tool?.tools?.length) { + for (const t of tool.tools) { + console.log(`\nTool: ${t.toolName}`); + if (t.explanation) { + console.log(`Explanation: ${t.explanation}`); + } + console.log(`Recommended: ${t.recommendedToolDescription}`); + } + } else if (record.status === 'FAILED') { + console.log(`\nError: ${record.failureDetail ?? record.statusReasons?.join('; ') ?? 'unknown'}`); + } + if (record.syncedVersionId) { + console.log(`\nNew config bundle version ${record.syncedVersionId} applied to agentcore.json.`); + } +} + +/** Print an insights job's failure analysis results. */ +function printInsightsResult(record: InsightsJobRecord): void { + const fa = record.failureAnalysisResult; + if (fa?.failureCategories?.length) { + console.log('\nFailure Analysis:'); + for (const cat of fa.failureCategories) { + console.log(` ${cat.failureCategoryName ?? 'Unknown'}: ${cat.failureCategoryDescription ?? ''}`); + if (cat.rootCauses?.length) { + for (const rc of cat.rootCauses) { + console.log(` - ${rc.rootCauseCategory ?? ''}: ${rc.rootCauseDescription ?? ''}`); + if (rc.recommendation) console.log(` Recommendation: ${rc.recommendation}`); + } + } + } + } else if (record.evaluationResults?.evaluatorSummaries?.length) { + console.log('\nEvaluation Results:'); + for (const s of record.evaluationResults.evaluatorSummaries) { + const avg = s.statistics?.averageScore; + console.log(` ${s.evaluatorId}: ${avg != null ? avg.toFixed(2) : 'N/A'}`); + } + } +} - // Show session stats from API if available - const evalResults = result.evaluationResults; +/** Print a batch evaluation's scores (server summaries preferred, CloudWatch per-session as fallback). */ +function printBatchEvalResult(record: BatchEvaluationJobRecord): void { + const evalResults = record.evaluationResults; if (evalResults) { const parts: string[] = []; if (evalResults.totalNumberOfSessions != null) parts.push(`${evalResults.totalNumberOfSessions} sessions`); @@ -535,11 +976,9 @@ function formatBatchEvalOutput(result: RunBatchEvaluationCommandResult): void { if (parts.length > 0) console.log(`Sessions: ${parts.join(', ')}`); } - console.log(''); - - // Prefer API evaluatorSummaries over local computation const summaries = evalResults?.evaluatorSummaries; if (summaries && summaries.length > 0) { + console.log('\nResults:'); for (const s of summaries) { const avg = s.statistics?.averageScore; const avgStr = avg != null ? avg.toFixed(2) : 'N/A'; @@ -547,35 +986,19 @@ function formatBatchEvalOutput(result: RunBatchEvaluationCommandResult): void { const evalCount = s.totalEvaluated != null ? ` [${s.totalEvaluated} evaluated]` : ''; console.log(` ${s.evaluatorId}: ${avgStr} avg${failSuffix}${evalCount}`); } - } else if (result.results.length > 0) { - // Fall back to local computation from CloudWatch results - const byEvaluator = new Map(); - for (const r of result.results) { + } else if (record.results?.length) { + console.log('\nResults:'); + const byEvaluator = new Map>(); + for (const r of record.results) { const group = byEvaluator.get(r.evaluatorId) ?? []; group.push(r); byEvaluator.set(r.evaluatorId, group); } - for (const [evalId, evalGroup] of byEvaluator) { - const scores = evalGroup.filter(r => !r.error).map(r => r.score!); + const scores = evalGroup.filter(r => !r.error && r.score != null).map(r => r.score!); const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0; const errors = evalGroup.filter(r => r.error).length; - const errorSuffix = errors > 0 ? ` (${errors} errors)` : ''; - - console.log(` ${evalId}: ${avg.toFixed(2)} avg${errorSuffix}`); - - for (const r of evalGroup) { - if (r.error) { - console.log(` ERROR: ${r.error.slice(0, 80)}`); - } else { - const labelStr = r.label ? ` (${r.label})` : ''; - console.log(` ${r.score?.toFixed(2)}${labelStr}`); - } - } + console.log(` ${evalId}: ${avg.toFixed(2)} avg${errors > 0 ? ` (${errors} errors)` : ''}`); } - } else { - console.log(' No evaluation results found.'); } - - console.log(''); } diff --git a/src/cli/commands/status/__tests__/action.test.ts b/src/cli/commands/status/__tests__/action.test.ts index 25520d9ce..367048370 100644 --- a/src/cli/commands/status/__tests__/action.test.ts +++ b/src/cli/commands/status/__tests__/action.test.ts @@ -8,6 +8,8 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; const mockGetAgentRuntimeStatus = vi.fn(); const mockGetEvaluator = vi.fn(); const mockGetOnlineEvaluationConfig = vi.fn(); +const mockGetKnowledgeBase = vi.fn(); +const mockGetLatestIngestionJob = vi.fn(); vi.mock('../../../aws', () => ({ getAgentRuntimeStatus: (...args: unknown[]) => mockGetAgentRuntimeStatus(...args), @@ -18,17 +20,27 @@ vi.mock('../../../aws/agentcore-control', () => ({ getOnlineEvaluationConfig: (...args: unknown[]) => mockGetOnlineEvaluationConfig(...args), })); +vi.mock('../../../aws/bedrock-agent', () => ({ + getKnowledgeBase: (...args: unknown[]) => mockGetKnowledgeBase(...args), + getLatestIngestionJob: (...args: unknown[]) => mockGetLatestIngestionJob(...args), +})); + const mockIsPreviewEnabled = vi.fn(() => true); +const mockIsGatedFeaturesEnabled = vi.fn(() => true); vi.mock('../../../feature-flags', () => ({ isPreviewEnabled: () => mockIsPreviewEnabled(), + isGatedFeaturesEnabled: () => mockIsGatedFeaturesEnabled(), })); +const loggedLines: string[] = []; vi.mock('../../../logging', () => { return { ExecLogger: class { startStep = vi.fn(); endStep = vi.fn(); - log = vi.fn(); + log = vi.fn((line: string) => { + loggedLines.push(line); + }); finalize = vi.fn(); getRelativeLogPath = vi.fn().mockReturnValue('logs/status.log'); }, @@ -41,6 +53,7 @@ const baseProject: AgentCoreProjectSpec = { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], } as unknown as AgentCoreProjectSpec; @@ -497,6 +510,51 @@ describe('computeResourceStatuses', () => { expect(harnessEntries).toHaveLength(0); }); + it('renders the config version (v{N}) on a deployed harness when gated features are enabled', () => { + const project = { + ...baseProject, + harnesses: [{ name: 'my-harness', path: 'harnesses/my-harness' }], + } as unknown as AgentCoreProjectSpec; + const resources: DeployedResourceState = { + harnesses: { + 'my-harness': { + harnessId: 'h-123', + harnessArn: 'arn:aws:bedrock:us-east-1:123456789:harness/h-123', + roleArn: 'arn:aws:iam::123456789:role/test', + status: 'READY', + harnessVersion: 3, + }, + }, + }; + + const result = computeResourceStatuses(project, resources); + const harnessEntry = result.find(r => r.resourceType === 'harness' && r.name === 'my-harness'); + expect(harnessEntry!.detail).toBe('v3'); + }); + + it('does not render the config version when gated features are disabled', () => { + mockIsGatedFeaturesEnabled.mockReturnValueOnce(false); + const project = { + ...baseProject, + harnesses: [{ name: 'my-harness', path: 'harnesses/my-harness' }], + } as unknown as AgentCoreProjectSpec; + const resources: DeployedResourceState = { + harnesses: { + 'my-harness': { + harnessId: 'h-123', + harnessArn: 'arn:aws:bedrock:us-east-1:123456789:harness/h-123', + roleArn: 'arn:aws:iam::123456789:role/test', + status: 'READY', + harnessVersion: 3, + }, + }, + }; + + const result = computeResourceStatuses(project, resources); + const harnessEntry = result.find(r => r.resourceType === 'harness' && r.name === 'my-harness'); + expect(harnessEntry!.detail).toBeUndefined(); + }); + it('handles mixed deployed and local-only resources', () => { const project = { ...baseProject, @@ -538,6 +596,179 @@ describe('computeResourceStatuses', () => { const deployedCred = result.find(r => r.name === 'deployed-cred'); expect(deployedCred!.deploymentState).toBe('deployed'); }); + + it('marks knowledge-base as deployed when present in deployed-state', () => { + const project = { + ...baseProject, + knowledgeBases: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'product-docs', + dataSources: [{ type: 'S3', uri: 's3://b/d/' }], + }, + ], + } as unknown as AgentCoreProjectSpec; + + const resources: DeployedResourceState = { + knowledgeBases: { + 'product-docs': { + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:aws:bedrock:us-west-2:0:knowledge-base/KB1', + dataSources: [{ dataSourceId: 'DS1', uri: 's3://b/d/' }], + }, + }, + }; + + const result = computeResourceStatuses(project, resources); + const kbEntry = result.find(r => r.resourceType === 'knowledge-base' && r.name === 'product-docs'); + + expect(kbEntry).toBeDefined(); + expect(kbEntry!.deploymentState).toBe('deployed'); + expect(kbEntry!.identifier).toBe('arn:aws:bedrock:us-west-2:0:knowledge-base/KB1'); + expect(kbEntry!.detail).toBe('1 data source'); + }); + + it('marks knowledge-base as local-only when not in deployed-state', () => { + const project = { + ...baseProject, + knowledgeBases: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'fresh-kb', + dataSources: [ + { type: 'S3', uri: 's3://b/a/' }, + { type: 'S3', uri: 's3://b/b/' }, + ], + }, + ], + } as unknown as AgentCoreProjectSpec; + + const result = computeResourceStatuses(project, undefined); + const kbEntry = result.find(r => r.resourceType === 'knowledge-base' && r.name === 'fresh-kb'); + + expect(kbEntry).toBeDefined(); + expect(kbEntry!.deploymentState).toBe('local-only'); + expect(kbEntry!.detail).toBe('2 data sources'); + }); + + it('marks knowledge-base as pending-removal when in deployed-state but not local', () => { + const project = baseProject; + const resources: DeployedResourceState = { + knowledgeBases: { + 'orphan-kb': { + knowledgeBaseId: 'KBOLD', + knowledgeBaseArn: 'arn:aws:bedrock:us-west-2:0:knowledge-base/KBOLD', + dataSources: [], + }, + }, + }; + + const result = computeResourceStatuses(project, resources); + const kbEntry = result.find(r => r.resourceType === 'knowledge-base' && r.name === 'orphan-kb'); + + expect(kbEntry).toBeDefined(); + expect(kbEntry!.deploymentState).toBe('pending-removal'); + }); + + it('surfaces gateway wiring on KB detail when a connector target references it', () => { + const project = { + ...baseProject, + agentCoreGateways: [ + { + name: 'main-gw', + targets: [ + { + name: 'docs', + targetType: 'connector', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'docs', + }, + ], + }, + ], + knowledgeBases: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://b/d/' }], + }, + ], + } as unknown as AgentCoreProjectSpec; + + const result = computeResourceStatuses(project, undefined); + const kbEntry = result.find(r => r.resourceType === 'knowledge-base' && r.name === 'docs'); + expect(kbEntry?.detail).toBe('1 data source → gw:main-gw'); + }); + + it('annotates gateway detail with retrieve-target count', () => { + const project = { + ...baseProject, + agentCoreGateways: [ + { + name: 'main-gw', + targets: [ + { name: 't1', targetType: 'mcpServer' }, + { name: 'docs', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'docs' }, + ], + }, + ], + } as unknown as AgentCoreProjectSpec; + + const result = computeResourceStatuses(project, undefined); + const gwEntry = result.find(r => r.resourceType === 'gateway' && r.name === 'main-gw'); + expect(gwEntry?.detail).toBe('2 targets (1 retrieve)'); + }); + + it('annotates gateway detail with both retrieve count and agentic fan-out', () => { + const project = { + ...baseProject, + agentCoreGateways: [ + { + name: 'main-gw', + targets: [ + { name: 'docs', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'docs' }, + { name: 'hr', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'hr' }, + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['docs', 'hr'], + }, + ], + }, + ], + } as unknown as AgentCoreProjectSpec; + + const result = computeResourceStatuses(project, undefined); + const gwEntry = result.find(r => r.resourceType === 'gateway' && r.name === 'main-gw'); + expect(gwEntry?.detail).toBe('3 targets (2 retrieve, agentic ×2)'); + }); + + it('KB detail surfaces wiring from agentic-retrieve fan-out target', () => { + const project = { + ...baseProject, + agentCoreGateways: [ + { + name: 'main-gw', + targets: [ + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['docs'], + }, + ], + }, + ], + knowledgeBases: [ + { type: 'AgentCoreKnowledgeBase', name: 'docs', dataSources: [{ type: 'S3', uri: 's3://b/d/' }] }, + ], + } as unknown as AgentCoreProjectSpec; + + const result = computeResourceStatuses(project, undefined); + const kbEntry = result.find(r => r.resourceType === 'knowledge-base' && r.name === 'docs'); + expect(kbEntry?.detail).toBe('1 data source → gw:main-gw'); + }); }); describe('handleProjectStatus — live enrichment', () => { @@ -732,6 +963,224 @@ describe('handleProjectStatus — live enrichment', () => { }); }); +describe('handleProjectStatus — knowledge base enrichment', () => { + beforeEach(() => { + mockGetKnowledgeBase.mockReset(); + mockGetLatestIngestionJob.mockReset(); + loggedLines.length = 0; + }); + + afterEach(() => vi.clearAllMocks()); + + function makeKbContext(): StatusContext { + return { + project: { + ...baseProject, + agentCoreGateways: [ + { + name: 'main-gw', + targets: [ + { + name: 'docs', + targetType: 'connector', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'product-docs', + }, + ], + }, + ], + knowledgeBases: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'product-docs', + dataSources: [ + { type: 'S3', uri: 's3://bucket/docs/' }, + { type: 'S3', uri: 's3://bucket/specs/' }, + ], + }, + ], + } as unknown as AgentCoreProjectSpec, + awsTargets: [{ name: 'dev', region: 'us-east-1', account: '123456789' }], + deployedState: { + targets: { + dev: { + resources: { + knowledgeBases: { + 'product-docs': { + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:aws:bedrock:us-east-1:123456789:knowledge-base/KB1', + dataSources: [ + { dataSourceId: 'DS1', uri: 's3://bucket/docs/' }, + { dataSourceId: 'DS2', uri: 's3://bucket/specs/' }, + ], + }, + }, + }, + }, + }, + }, + } as unknown as StatusContext; + } + + it('fetches the latest ingestion job for every data source and renders all of them', async () => { + mockGetKnowledgeBase.mockResolvedValue({ knowledgeBaseId: 'KB1', status: 'ACTIVE' }); + mockGetLatestIngestionJob.mockImplementation(({ dataSourceId }: { dataSourceId: string }) => { + if (dataSourceId === 'DS1') { + return { + status: 'COMPLETE', + startedAt: new Date('2026-01-01T00:00:00Z'), + updatedAt: new Date('2026-01-01T00:05:00Z'), + statistics: { + numberOfDocumentsScanned: 10, + numberOfNewDocumentsIndexed: 8, + numberOfModifiedDocumentsIndexed: 1, + numberOfDocumentsFailed: 0, + numberOfDocumentsDeleted: 0, + }, + }; + } + return { + status: 'COMPLETE', + startedAt: new Date('2026-01-02T00:00:00Z'), + updatedAt: new Date('2026-01-02T00:03:00Z'), + statistics: { + numberOfDocumentsScanned: 5, + numberOfNewDocumentsIndexed: 5, + numberOfModifiedDocumentsIndexed: 0, + numberOfDocumentsFailed: 0, + numberOfDocumentsDeleted: 0, + }, + }; + }); + + // Drill into the named KB so the full per-DS block is rendered (the default + // view is now a one-line summary; see the dedicated summary/detail tests). + const result = await handleProjectStatus(makeKbContext(), { knowledgeBaseName: 'product-docs' }); + + assert(result.success); + + // A job was fetched for EACH data source, not just the first. + expect(mockGetLatestIngestionJob).toHaveBeenCalledTimes(2); + expect(mockGetLatestIngestionJob).toHaveBeenCalledWith({ + region: 'us-east-1', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS1', + }); + expect(mockGetLatestIngestionJob).toHaveBeenCalledWith({ + region: 'us-east-1', + knowledgeBaseId: 'KB1', + dataSourceId: 'DS2', + }); + + // The rich block, logged line-by-line, includes BOTH data source URIs and + // their per-DS document counts plus the gateway wiring. + const block = loggedLines.join('\n'); + expect(block).toContain('s3://bucket/docs/'); + expect(block).toContain('s3://bucket/specs/'); + expect(block).toContain('10 scanned, 8 new indexed'); + expect(block).toContain('5 scanned, 5 new indexed'); + expect(block).toContain('main-gw'); + + // The structured detail stays a concise one-liner for TUI/JSON consumers. + const kbEntry = result.resources.find( + (r: ResourceStatusEntry) => r.resourceType === 'knowledge-base' && r.name === 'product-docs' + ); + expect(kbEntry).toBeDefined(); + expect(kbEntry!.detail).toContain('Status: ACTIVE'); + expect(kbEntry!.detail).not.toContain('\n'); + }); + + it('renders a one-line summary (not the full per-DS block) when no knowledgeBaseName is given', async () => { + mockGetKnowledgeBase.mockResolvedValue({ knowledgeBaseId: 'KB1', status: 'ACTIVE' }); + mockGetLatestIngestionJob.mockResolvedValue({ + status: 'COMPLETE', + startedAt: new Date('2026-01-01T00:00:00Z'), + updatedAt: new Date('2026-01-01T00:05:00Z'), + statistics: { + numberOfDocumentsScanned: 10, + numberOfNewDocumentsIndexed: 8, + numberOfModifiedDocumentsIndexed: 0, + numberOfDocumentsFailed: 0, + numberOfDocumentsDeleted: 0, + }, + }); + + const result = await handleProjectStatus(makeKbContext()); + assert(result.success); + + const block = loggedLines.join('\n'); + // The summary rollup line is present (name + state + counts). + expect(block).toContain('product-docs: ✓ Ready'); + expect(block).toContain('2 data sources'); + expect(block).toContain('16 indexed'); + // The full multi-line block is NOT rendered by default. + expect(block).not.toContain('Documents:'); + expect(block).not.toContain('s3://bucket/docs/'); + }); + + it('renders the full per-DS block when knowledgeBaseName matches', async () => { + mockGetKnowledgeBase.mockResolvedValue({ knowledgeBaseId: 'KB1', status: 'ACTIVE' }); + mockGetLatestIngestionJob.mockResolvedValue({ + status: 'COMPLETE', + startedAt: new Date('2026-01-01T00:00:00Z'), + updatedAt: new Date('2026-01-01T00:05:00Z'), + statistics: { + numberOfDocumentsScanned: 10, + numberOfNewDocumentsIndexed: 8, + numberOfModifiedDocumentsIndexed: 0, + numberOfDocumentsFailed: 0, + numberOfDocumentsDeleted: 0, + }, + }); + + const result = await handleProjectStatus(makeKbContext(), { knowledgeBaseName: 'product-docs' }); + assert(result.success); + + const block = loggedLines.join('\n'); + // The full multi-line block IS rendered for the named KB. + expect(block).toContain('Documents:'); + expect(block).toContain('s3://bucket/docs/'); + expect(block).toContain('s3://bucket/specs/'); + }); + + it('marks data sources with no ingestion job as never run', async () => { + mockGetKnowledgeBase.mockResolvedValue({ knowledgeBaseId: 'KB1', status: 'ACTIVE' }); + mockGetLatestIngestionJob.mockResolvedValue(null); + + // "never run" appears only in the full drill-down block. + const result = await handleProjectStatus(makeKbContext(), { knowledgeBaseName: 'product-docs' }); + + assert(result.success); + expect(mockGetLatestIngestionJob).toHaveBeenCalledTimes(2); + expect(loggedLines.join('\n')).toContain('Ingestion: never run'); + }); + + it('flags KB as out of sync when the live KB is not found', async () => { + mockGetKnowledgeBase.mockResolvedValue(null); + + const result = await handleProjectStatus(makeKbContext()); + + assert(result.success); + const kbEntry = result.resources.find( + (r: ResourceStatusEntry) => r.resourceType === 'knowledge-base' && r.name === 'product-docs' + ); + expect(kbEntry!.detail).toContain('out of sync'); + expect(mockGetLatestIngestionJob).not.toHaveBeenCalled(); + }); + + it('sets error on KB when getKnowledgeBase throws', async () => { + mockGetKnowledgeBase.mockRejectedValue(new Error('AccessDenied')); + + const result = await handleProjectStatus(makeKbContext()); + + assert(result.success); + const kbEntry = result.resources.find( + (r: ResourceStatusEntry) => r.resourceType === 'knowledge-base' && r.name === 'product-docs' + ); + expect(kbEntry!.error).toBe('AccessDenied'); + }); +}); + describe('buildRuntimeInvocationUrl', () => { it('constructs the correct invocation URL with encoded ARN', () => { const url = buildRuntimeInvocationUrl( diff --git a/src/cli/commands/status/__tests__/format-knowledge-base.test.ts b/src/cli/commands/status/__tests__/format-knowledge-base.test.ts new file mode 100644 index 000000000..43fb51b16 --- /dev/null +++ b/src/cli/commands/status/__tests__/format-knowledge-base.test.ts @@ -0,0 +1,117 @@ +import { + type KbStatusDetail, + formatKnowledgeBaseDetail, + formatKnowledgeBaseSummaryLine, +} from '../format-knowledge-base'; +import { describe, expect, it } from 'vitest'; + +const base: KbStatusDetail = { + name: 'product-docs', + knowledgeBaseId: 'KB-ABC', + status: 'ACTIVE', + gatewayNames: ['main-gw'], + dataSources: [ + { + uri: 's3://bucket/docs/', + dataSourceId: 'DS-1', + ingestion: { + status: 'COMPLETE', + startedAt: '2026-02-20T23:03:36Z', + completedAt: '2026-02-20T23:15:42Z', + scanned: 141, + indexed: 138, + modified: 0, + failed: 3, + deleted: 0, + }, + }, + ], +}; + +describe('formatKnowledgeBaseDetail', () => { + it('renders a multi-line block with per-DS state and tool line', () => { + const text = formatKnowledgeBaseDetail(base).join('\n'); + expect(text).toContain('KB-ABC'); + expect(text).toContain('s3://bucket/docs/'); + expect(text).toContain('138 new indexed'); + expect(text).toContain('Tools:'); + expect(text).toContain('retrieve'); + }); + + it('renders troubleshooting hints on ingestion failure', () => { + const ds0 = base.dataSources[0]!; + const failed: KbStatusDetail = { + ...base, + dataSources: [{ ...ds0, ingestion: { ...ds0.ingestion, status: 'FAILED' } }], + }; + const text = formatKnowledgeBaseDetail(failed).join('\n'); + expect(text).toContain('Next steps'); + expect(text).toMatch(/50MB|file format|s3:GetObject/i); + }); + + it('omits the tool line when not wired to any gateway', () => { + const standalone = { ...base, gatewayNames: [] }; + const text = formatKnowledgeBaseDetail(standalone).join('\n'); + expect(text).not.toContain('Tools:'); + }); + + it('shows "never run" when a data source has no ingestion', () => { + const noIngest = { ...base, dataSources: [{ uri: 's3://b/', dataSourceId: 'DS-9' }] }; + const text = formatKnowledgeBaseDetail(noIngest).join('\n'); + expect(text).toContain('never run'); + }); + + it('lists every data source for a multi-DS KB', () => { + const multi = { + ...base, + dataSources: [ + { uri: 's3://b/a/', dataSourceId: 'DS-1', ingestion: { status: 'COMPLETE', scanned: 1, indexed: 1 } }, + { uri: 's3://b/c/', dataSourceId: 'DS-2', ingestion: { status: 'IN_PROGRESS', scanned: 5, indexed: 0 } }, + ], + }; + const text = formatKnowledgeBaseDetail(multi).join('\n'); + expect(text).toContain('s3://b/a/'); + expect(text).toContain('s3://b/c/'); + }); +}); + +describe('formatKnowledgeBaseSummaryLine', () => { + it('renders name, Ready state, count, and indexed total for an ACTIVE KB with complete ingestion', () => { + const line = formatKnowledgeBaseSummaryLine(base); + expect(line).toContain('product-docs'); + expect(line).toContain('Ready'); + expect(line).toContain('1 data source'); + expect(line).toContain('138 indexed'); + }); + + it('shows Failed when any data source ingestion failed', () => { + const ds0 = base.dataSources[0]!; + const failed: KbStatusDetail = { + ...base, + dataSources: [{ ...ds0, ingestion: { ...ds0.ingestion, status: 'FAILED' } }], + }; + expect(formatKnowledgeBaseSummaryLine(failed)).toContain('Failed'); + }); + + it('shows Ingesting when a data source is in progress', () => { + const ds0 = base.dataSources[0]!; + const ingesting: KbStatusDetail = { + ...base, + dataSources: [{ ...ds0, ingestion: { ...ds0.ingestion, status: 'IN_PROGRESS' } }], + }; + expect(formatKnowledgeBaseSummaryLine(ingesting)).toContain('Ingesting'); + }); + + it('sums indexed across data sources for a multi-DS KB', () => { + const multi: KbStatusDetail = { + ...base, + dataSources: [ + { uri: 's3://b/a/', dataSourceId: 'DS-1', ingestion: { status: 'COMPLETE', indexed: 100 } }, + { uri: 's3://b/c/', dataSourceId: 'DS-2', ingestion: { status: 'COMPLETE', indexed: 50 } }, + ], + }; + const line = formatKnowledgeBaseSummaryLine(multi); + expect(line).toContain('2 data sources'); + expect(line).toContain('150 indexed'); + }); +}); diff --git a/src/cli/commands/status/action.ts b/src/cli/commands/status/action.ts index 057aa272b..d2337cb1f 100644 --- a/src/cli/commands/status/action.ts +++ b/src/cli/commands/status/action.ts @@ -4,12 +4,18 @@ import type { AgentCoreProjectSpec, AwsDeploymentTargets, DeployedResourceState, import { getAgentRuntimeStatus } from '../../aws'; import { getEvaluator, getOnlineEvaluationConfig } from '../../aws/agentcore-control'; import { getPaymentManager } from '../../aws/agentcore-payments'; -import { dnsSuffix } from '../../aws/partition'; +import { getKnowledgeBase, getLatestIngestionJob } from '../../aws/bedrock-agent'; import { getErrorMessage } from '../../errors'; -import { isPreviewEnabled } from '../../feature-flags'; +import { isGatedFeaturesEnabled, isPreviewEnabled } from '../../feature-flags'; import { ExecLogger } from '../../logging'; import type { ResourceDeploymentState } from './constants'; import { buildRuntimeInvocationUrl } from './constants'; +import { + type KbDataSourceDetail, + type KbStatusDetail, + formatKnowledgeBaseDetail, + formatKnowledgeBaseSummaryLine, +} from './format-knowledge-base'; export type { ResourceDeploymentState }; @@ -24,10 +30,10 @@ export interface ResourceStatusEntry { | 'policy-engine' | 'policy' | 'config-bundle' - | 'ab-test' | 'dataset' | 'harness' | 'runtime-endpoint' + | 'knowledge-base' | 'payment'; name: string; deploymentState: ResourceDeploymentState; @@ -126,30 +132,6 @@ function diffResourceSet({ return entries; } -/** - * Build the full gateway invocation URL for an AB test. - * Appends the runtime target name and /invocations path to the gateway base URL. - */ -function buildGatewayInvocationUrl( - gwState: { gatewayId: string; gatewayArn: string; gatewayUrl?: string }, - gwName: string, - project: AgentCoreProjectSpec -): string | undefined { - // Use stored URL or derive from ARN: arn:aws:bedrock-agentcore:{region}:{account}:gateway/{id} - const baseUrl = - gwState.gatewayUrl ?? - (() => { - const region = gwState.gatewayArn.split(':')[3]; - return region - ? `https://${gwState.gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}` - : undefined; - })(); - if (!baseUrl) return undefined; - const gwSpec = (project.httpGateways ?? []).find(gw => gw.name === gwName); - if (!gwSpec) return baseUrl; - return `${baseUrl}/${gwSpec.runtimeRef}/invocations`; -} - export function computeResourceStatuses( project: AgentCoreProjectSpec, resources: DeployedResourceState | undefined @@ -183,11 +165,25 @@ export function computeResourceStatuses( const gateways = diffResourceSet({ resourceType: 'gateway', localItems: project.agentCoreGateways ?? [], - deployedRecord: resources?.mcp?.gateways ?? {}, + deployedRecord: { ...(resources?.mcp?.gateways ?? {}), ...(resources?.gateways ?? {}) }, getIdentifier: deployed => deployed.gatewayId, getLocalDetail: item => { - const count = item.targets?.length ?? 0; - return count > 0 ? `${count} target${count !== 1 ? 's' : ''}` : undefined; + const targets = item.targets ?? []; + if (targets.length === 0) return undefined; + const retrieveCount = targets.filter( + t => t.targetType === 'connector' && t.connectorId === 'bedrock-knowledge-bases' + ).length; + const agentic = targets.find(t => t.targetType === 'connector' && t.connectorId === 'bedrock-agentic-retrieve'); + const webSearchCount = targets.filter(t => t.targetType === 'webSearch').length; + const base = `${targets.length} target${targets.length !== 1 ? 's' : ''}`; + const parts: string[] = []; + if (retrieveCount > 0) parts.push(`${retrieveCount} retrieve`); + if (agentic) { + const fanOut = agentic.knowledgeBaseIds?.length ?? 0; + parts.push(`agentic ×${fanOut}`); + } + if (webSearchCount > 0) parts.push(`${webSearchCount} web-search`); + return parts.length > 0 ? `${base} (${parts.join(', ')})` : base; }, }); @@ -205,7 +201,7 @@ export function computeResourceStatuses( deployedRecord: resources?.onlineEvalConfigs ?? {}, getIdentifier: deployed => deployed.onlineEvaluationConfigArn, getLocalDetail: item => - `${item.evaluators.length} evaluator${item.evaluators.length !== 1 ? 's' : ''}, ${item.samplingRate}% sampling`, + `${(item.evaluators ?? []).length} evaluator${(item.evaluators ?? []).length !== 1 ? 's' : ''}, ${item.samplingRate}% sampling`, }); const policyEngines = diffResourceSet({ @@ -252,29 +248,43 @@ export function computeResourceStatuses( getLocalDetail: item => item.schemaType, }); - const abTests = diffResourceSet({ - resourceType: 'ab-test', - localItems: project.abTests ?? [], - deployedRecord: resources?.abTests ?? {}, - getIdentifier: deployed => deployed.abTestArn, - getLocalDetail: item => item.description, - }); - - // Enrich deployed AB tests with gateway invocation URL - const httpGatewayState = resources?.httpGateways ?? {}; - for (const entry of abTests) { - if (entry.deploymentState !== 'deployed') continue; - const testSpec = (project.abTests ?? []).find(t => t.name === entry.name); - if (!testSpec) continue; - const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(testSpec.gatewayRef); - const gwName = gwMatch?.[1]; - if (!gwName) continue; - const gwState = httpGatewayState[gwName]; - if (!gwState) continue; - const url = buildGatewayInvocationUrl(gwState, gwName, project); - if (url) entry.invocationUrl = url; + // Reverse-index: KB name -> list of gateways with a connector target referencing it. + // Walks both knowledgeBaseId (single-KB Retrieve) and knowledgeBaseIds[] + // (agentic-retrieve fan-out) so a KB shows its wiring no matter which + // connector kind references it. + const kbToGateways = new Map>(); + const recordKbWiring = (kbRef: string, gatewayName: string): void => { + const set = kbToGateways.get(kbRef) ?? new Set(); + set.add(gatewayName); + kbToGateways.set(kbRef, set); + }; + for (const gw of project.agentCoreGateways ?? []) { + for (const t of gw.targets ?? []) { + if (t.targetType !== 'connector') continue; + if (t.knowledgeBaseId) recordKbWiring(t.knowledgeBaseId, gw.name); + for (const ref of t.knowledgeBaseIds ?? []) recordKbWiring(ref, gw.name); + } } + const knowledgeBases = diffResourceSet({ + resourceType: 'knowledge-base', + localItems: project.knowledgeBases ?? [], + deployedRecord: resources?.knowledgeBases ?? {}, + getIdentifier: deployed => deployed.knowledgeBaseArn, + getLocalDetail: item => { + const dsPart = `${item.dataSources.length} data source${item.dataSources.length === 1 ? '' : 's'}`; + // Wave 2: connector target binds the KB to a gateway. Project-owned + // KBs are stored by name on the connector target; external KBs are + // stored as a literal id (which won't match a knowledgeBases[] entry). + // Either way, we look up by name here — any extra hit (the spec's own + // gateway field) is fine to fold in. + const wiredGateways = new Set(kbToGateways.get(item.name) ?? []); + if (item.gateway) wiredGateways.add(item.gateway); + if (wiredGateways.size === 0) return dsPart; + return `${dsPart} → gw:${[...wiredGateways].join(',')}`; + }, + }); + // Flatten runtime endpoints for diffing against deployed state const localEndpoints: { name: string; agentName: string; version: number; description?: string }[] = []; for (const runtime of project.runtimes) { @@ -310,6 +320,15 @@ export function computeResourceStatuses( }) : []; + // Config version (gated): the harness Version is service-incremented and only known from deployed + // state, so enrich each entry's detail post-pass rather than via getLocalDetail (local spec has none). + if (isGatedFeaturesEnabled()) { + for (const entry of harnesses) { + const version = resources?.harnesses?.[entry.name]?.harnessVersion; + if (version !== undefined) entry.detail = entry.detail ? `${entry.detail} v${version}` : `v${version}`; + } + } + const payments = diffResourceSet({ resourceType: 'payment', localItems: project.payments ?? [], @@ -330,8 +349,8 @@ export function computeResourceStatuses( ...policyEngines, ...policies, ...datasets, + ...knowledgeBases, ...configBundles, - ...abTests, ...harnesses, ...payments, ]; @@ -339,7 +358,7 @@ export function computeResourceStatuses( export async function handleProjectStatus( context: StatusContext, - options: { targetName?: string } = {} + options: { targetName?: string; knowledgeBaseName?: string } = {} ): Promise { const logger = new ExecLogger({ command: 'status' }); const { project, deployedState, awsTargets } = context; @@ -508,6 +527,133 @@ export async function handleProjectStatus( logger.endStep(hasOnlineEvalErrors ? 'error' : 'success'); } + // Enrich deployed knowledge bases with live KB status + latest ingestion job stats + const kbStates = targetResources?.knowledgeBases ?? {}; + const deployedKbs = resources.filter( + e => e.resourceType === 'knowledge-base' && e.deploymentState === 'deployed' && kbStates[e.name] + ); + + if (deployedKbs.length > 0) { + logger.startStep(`Fetch knowledge base status (${deployedKbs.length} KB${deployedKbs.length !== 1 ? 's' : ''})`); + + // Reverse-index: KB spec name -> gateways whose connector targets + // reference it. Project-owned KBs are stored by *name* on connector + // targets (single-KB Retrieve on `knowledgeBaseId`, agentic-retrieve + // fan-out on `knowledgeBaseIds[]`), so we key by the spec name + // (entry.name) below. + const kbNameToGateways = new Map>(); + const recordKbWiring = (kbRef: string, gatewayName: string): void => { + const set = kbNameToGateways.get(kbRef) ?? new Set(); + set.add(gatewayName); + kbNameToGateways.set(kbRef, set); + }; + for (const gw of project.agentCoreGateways ?? []) { + for (const t of gw.targets ?? []) { + if (t.targetType !== 'connector') continue; + if (t.knowledgeBaseId) recordKbWiring(t.knowledgeBaseId, gw.name); + for (const ref of t.knowledgeBaseIds ?? []) recordKbWiring(ref, gw.name); + } + } + + await Promise.all( + resources.map(async (entry, i) => { + if (entry.resourceType !== 'knowledge-base' || entry.deploymentState !== 'deployed') return; + + const kbState = kbStates[entry.name]; + if (!kbState) return; + + try { + const live = await getKnowledgeBase({ + region: targetConfig.region, + knowledgeBaseId: kbState.knowledgeBaseId, + }); + if (!live) { + const outOfSync = 'out of sync (KB deleted out of band)'; + const detail = entry.detail ? `${entry.detail} — ${outOfSync}` : outOfSync; + resources[i] = { ...entry, detail }; + logger.log(` ${entry.name}: KB ${kbState.knowledgeBaseId} not found`, 'error'); + return; + } + + // Fetch the latest ingestion job for EVERY data source, in parallel, + // and map each into a per-DS detail for the rich formatter. + const dataSources: KbDataSourceDetail[] = await Promise.all( + kbState.dataSources.map(async ds => { + const job = await getLatestIngestionJob({ + region: targetConfig.region, + knowledgeBaseId: kbState.knowledgeBaseId, + dataSourceId: ds.dataSourceId, + }); + if (!job) { + return { uri: ds.uri, dataSourceId: ds.dataSourceId }; + } + const stats = job.statistics ?? {}; + // 'COMPLETE' is the SDK's terminal success status for ingestion + // jobs; treat it as completed so the formatter shows a finish time. + const succeeded = job.status === 'COMPLETE'; + return { + uri: ds.uri, + dataSourceId: ds.dataSourceId, + ingestion: { + status: job.status, + startedAt: job.startedAt?.toISOString(), + updatedAt: job.updatedAt?.toISOString(), + completedAt: succeeded ? job.updatedAt?.toISOString() : undefined, + scanned: stats.numberOfDocumentsScanned, + indexed: stats.numberOfNewDocumentsIndexed, + modified: stats.numberOfModifiedDocumentsIndexed, + failed: stats.numberOfDocumentsFailed, + deleted: stats.numberOfDocumentsDeleted, + }, + }; + }) + ); + + const gatewayNames = [...(kbNameToGateways.get(entry.name) ?? new Set())]; + + const kbDetail: KbStatusDetail = { + name: entry.name, + knowledgeBaseId: kbState.knowledgeBaseId, + status: live.status, + gatewayNames, + dataSources, + }; + + // Render branch: with --name (knowledgeBaseName) we drill into the + // full multi-line block for the matched KB only; without it we emit a + // single summary rollup line per KB so `agentcore status` stays + // uncluttered when several KBs are deployed. Either way the structured + // `detail` below stays a concise one-liner because it is both rendered + // inline in the TUI and serialized in `--json` mode. + if (options.knowledgeBaseName) { + if (entry.name === options.knowledgeBaseName) { + for (const line of formatKnowledgeBaseDetail(kbDetail)) { + logger.log(line); + } + } + } else { + logger.log(formatKnowledgeBaseSummaryLine(kbDetail)); + } + + const firstWithJob = dataSources.find(ds => ds.ingestion); + const ingestionSummary = firstWithJob?.ingestion?.status + ? `Ingestion: ${firstWithJob.ingestion.status}` + : 'Ingestion: never run'; + const enriched = `Status: ${live.status ?? 'UNKNOWN'} — ${ingestionSummary}`; + const detail = entry.detail ? `${entry.detail} — ${enriched}` : enriched; + resources[i] = { ...entry, detail }; + } catch (error) { + const errorMsg = getErrorMessage(error); + resources[i] = { ...entry, error: errorMsg }; + logger.log(` ${entry.name}: ERROR - ${errorMsg}`, 'error'); + } + }) + ); + + const hasKbErrors = resources.some(r => r.resourceType === 'knowledge-base' && r.error); + logger.endStep(hasKbErrors ? 'error' : 'success'); + } + // Enrich deployed payment managers with live status const paymentStates = targetResources?.payments ?? {}; const deployedPayments = resources.filter( diff --git a/src/cli/commands/status/command.tsx b/src/cli/commands/status/command.tsx index b60276219..1d93e8162 100644 --- a/src/cli/commands/status/command.tsx +++ b/src/cli/commands/status/command.tsx @@ -25,8 +25,8 @@ const VALID_RESOURCE_TYPES = [ 'policy-engine', 'policy', 'config-bundle', - 'ab-test', 'dataset', + 'knowledge-base', ...(isPreviewEnabled() ? (['harness'] as const) : []), ] as const; const VALID_STATES = ['deployed', 'local-only', 'pending-removal'] as const; @@ -37,12 +37,13 @@ interface StatusCliOptions { type?: string; state?: string; runtime?: string; + name?: string; json?: boolean; } function filterResources( resources: ResourceStatusEntry[], - options: { type?: string; state?: string; runtime?: string } + options: { type?: string; state?: string; runtime?: string; name?: string } ): ResourceStatusEntry[] { let filtered = resources; @@ -58,6 +59,13 @@ function filterResources( filtered = filtered.filter(r => r.resourceType !== 'agent' || r.name === options.runtime); } + // --name drills into a single resource by name. It's meant for knowledge-base + // (paired with --type knowledge-base), but a bare --name still narrows the list + // by name across all types, which is sensible behaviour. + if (options.name) { + filtered = filtered.filter(r => r.name === options.name); + } + return filtered; } @@ -71,6 +79,7 @@ export const registerStatus = (program: Command) => { .option('--type ', `Filter by resource type (${VALID_RESOURCE_TYPES.join(', ')})`) .option('--state ', 'Filter by deployment state (deployed, local-only, pending-removal)') .option('--runtime ', 'Filter to a specific runtime') + .option('--name ', 'Show details for a single resource by name (knowledge-base)') .option('--json', 'Output as JSON') .action(async (cliOptions: StatusCliOptions) => { requireProject(); @@ -139,7 +148,12 @@ export const registerStatus = (program: Command) => { // Default path: show all resource types with deployment state const result = await withCommandRunTelemetry('status', telemetryAttrs, async () => { const context = await loadStatusConfig(); - return handleProjectStatus(context, { targetName: cliOptions.target }); + // --name drives the KB drill-down (full block) vs the default summary + // line. Scope it to KB filtering: only thread it when the user hasn't + // narrowed to a different resource type. + const knowledgeBaseName = + cliOptions.name && (!cliOptions.type || cliOptions.type === 'knowledge-base') ? cliOptions.name : undefined; + return handleProjectStatus(context, { targetName: cliOptions.target, knowledgeBaseName }); }); if (!result.success) { @@ -168,8 +182,8 @@ export const registerStatus = (program: Command) => { const policyEngines = filtered.filter(r => r.resourceType === 'policy-engine'); const policies = filtered.filter(r => r.resourceType === 'policy'); const configBundles = filtered.filter(r => r.resourceType === 'config-bundle'); - const abTests = filtered.filter(r => r.resourceType === 'ab-test'); const datasets = filtered.filter(r => r.resourceType === 'dataset'); + const knowledgeBases = filtered.filter(r => r.resourceType === 'knowledge-base'); const harnesses = filtered.filter(r => r.resourceType === 'harness'); const payments = filtered.filter(r => r.resourceType === 'payment'); // TODO: Add http-gateway resource type when diffResourceSet for HTTP gateways is added to action.ts @@ -314,22 +328,6 @@ export const registerStatus = (program: Command) => { )} - {abTests.length > 0 && ( - - AB Tests - {abTests.map(entry => ( - - - {entry.invocationUrl && ( - - {' '}Invocation URL: {entry.invocationUrl} - - )} - - ))} - - )} - {datasets.length > 0 && ( Datasets @@ -381,6 +379,15 @@ export const registerStatus = (program: Command) => { )} + {knowledgeBases.length > 0 && ( + + Knowledge Bases + {knowledgeBases.map(entry => ( + + ))} + + )} + {payments.length > 0 && ( Payments diff --git a/src/cli/commands/status/format-knowledge-base.ts b/src/cli/commands/status/format-knowledge-base.ts new file mode 100644 index 000000000..2a82842b3 --- /dev/null +++ b/src/cli/commands/status/format-knowledge-base.ts @@ -0,0 +1,94 @@ +export interface KbIngestionDetail { + status?: string; + startedAt?: string; + updatedAt?: string; + completedAt?: string; + scanned?: number; + indexed?: number; + modified?: number; + failed?: number; + deleted?: number; +} + +export interface KbDataSourceDetail { + uri: string; + dataSourceId: string; + ingestion?: KbIngestionDetail; +} + +export interface KbStatusDetail { + name: string; + knowledgeBaseId: string; + status?: string; + gatewayNames: string[]; + dataSources: KbDataSourceDetail[]; +} + +const FAILURE_HINTS = [ + 'Next steps:', + ' → Retry ingestion: agentcore run ingest --name ', + ' → Common causes:', + ' • Document format not supported (.txt, .md, .html, .pdf, .doc, .csv, .xls)', + ' • File exceeds 50MB size limit', + ' • S3 bucket permissions — ensure the KB role has s3:GetObject access', + ' • Data source credentials expired (Confluence, SharePoint, etc.)', +]; + +/** Render the rich, multi-line KB status block per the DevEx spec. */ +export function formatKnowledgeBaseDetail(kb: KbStatusDetail): string[] { + const lines: string[] = []; + lines.push(`Knowledge Base: ${kb.name}`); + lines.push( + ` Knowledge Base: ${kb.status === 'ACTIVE' ? '✓' : '⟳'} ${kb.status ?? 'UNKNOWN'} (${kb.knowledgeBaseId})` + ); + + let anyFailed = false; + lines.push(` Data Sources (${kb.dataSources.length}):`); + for (const ds of kb.dataSources) { + const ing = ds.ingestion; + const mark = ing?.status === 'FAILED' ? '✗' : ing?.status === 'COMPLETE' || ing?.status === 'SUCCEEDED' ? '✓' : '⟳'; + lines.push(` ${mark} ${ds.uri} (${ds.dataSourceId})`); + if (ing) { + if (ing.status === 'FAILED') anyFailed = true; + lines.push(` Ingestion: ${ing.status ?? 'UNKNOWN'}`); + if (ing.startedAt) lines.push(` Started: ${ing.startedAt}`); + if (ing.completedAt) lines.push(` Completed: ${ing.completedAt}`); + else if (ing.updatedAt) lines.push(` Updated: ${ing.updatedAt}`); + lines.push( + ` Documents: ${ing.scanned ?? 0} scanned, ${ing.indexed ?? 0} new indexed, ${ing.modified ?? 0} modified, ${ing.failed ?? 0} failed, ${ing.deleted ?? 0} deleted` + ); + } else { + lines.push(' Ingestion: never run'); + } + } + + if (kb.gatewayNames.length > 0) { + lines.push(` Gateways: ${kb.gatewayNames.join(', ')}`); + lines.push(' Tools: retrieve (available)'); + } + + if (anyFailed) { + lines.push(''); + lines.push(...FAILURE_HINTS.map(h => ` ${h}`)); + } + + return lines; +} + +/** One-line rollup for the summary view (no --name). */ +export function formatKnowledgeBaseSummaryLine(kb: KbStatusDetail): string { + const totalIndexed = kb.dataSources.reduce((n, ds) => n + (ds.ingestion?.indexed ?? 0), 0); + const anyFailed = kb.dataSources.some(ds => ds.ingestion?.status === 'FAILED'); + const ingesting = kb.dataSources.some( + ds => ds.ingestion && ['IN_PROGRESS', 'STARTING', 'SUBMITTED'].includes(ds.ingestion.status ?? '') + ); + const state = anyFailed + ? '✗ Failed' + : ingesting + ? '⟳ Ingesting' + : kb.status === 'ACTIVE' + ? '✓ Ready' + : (kb.status ?? 'Unknown'); + const dsCount = kb.dataSources.length; + return `${kb.name}: ${state} (${dsCount} data source${dsCount !== 1 ? 's' : ''}, ${totalIndexed} indexed)`; +} diff --git a/src/cli/commands/stop/command.tsx b/src/cli/commands/stop/command.tsx index 75f9d96cf..acaaa45ff 100644 --- a/src/cli/commands/stop/command.tsx +++ b/src/cli/commands/stop/command.tsx @@ -1,45 +1,58 @@ -import { stopBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; -import { COMMAND_DESCRIPTIONS } from '../../constants'; -import { getErrorMessage } from '../../errors'; -import { getRegion } from '../shared/region-utils'; +import { ConfigIO } from '../../../lib'; +import { createJobEngine } from '../../operations/jobs'; +import { runCliCommand } from '../../telemetry/cli-command-run'; +import { COMMAND_DESCRIPTIONS } from '../../tui/copy'; +import { requireProject } from '../../tui/guards'; import type { Command } from '@commander-js/extra-typings'; -import { Text, render } from 'ink'; -import React from 'react'; export const registerStop = (program: Command) => { const stopCmd = program.command('stop').description(COMMAND_DESCRIPTIONS.stop); stopCmd - .command('batch-evaluation') - .description('[preview] Stop a running batch evaluation') - .requiredOption('-i, --id ', 'Batch evaluation ID to stop') + .command('ab-test') + .description('Stop a running A/B test permanently') + .requiredOption('-i, --id ', 'A/B test ID to stop') .option('--region ', 'AWS region (auto-detected if omitted)') .option('--json', 'Output as JSON') - .action(async (cliOptions: { id: string; region?: string; json?: boolean }) => { - try { - const region = await getRegion(cliOptions.region); - - const result = await stopBatchEvaluation({ - region, - batchEvaluationId: cliOptions.id, - }); + .action((cliOptions: { id: string; region?: string; json?: boolean }) => { + requireProject(); + return runCliCommand('stop.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const result = await engine.stop('ab-test', cliOptions.id); + if (!result.success) { + throw result.error; + } if (cliOptions.json) { - console.log(JSON.stringify({ success: true, ...result })); + console.log(JSON.stringify({ success: true, id: cliOptions.id })); } else { - console.log(`\nBatch evaluation stopped successfully`); - console.log(`ID: ${result.batchEvaluationId}`); - console.log(`Status: ${result.status}\n`); + console.log(`\n✓ A/B test ${cliOptions.id} stop requested.\n`); } + return { job_type: 'ab-test' }; + }); + }); + + stopCmd + .command('batch-evaluation') + .description('Stop a running batch evaluation') + .requiredOption('-i, --id ', 'Batch evaluation ID to stop') + .option('--region ', 'AWS region (auto-detected if omitted)') + .option('--json', 'Output as JSON') + .action((cliOptions: { id: string; region?: string; json?: boolean }) => { + requireProject(); - process.exit(0); - } catch (error) { + return runCliCommand('stop.job', !!cliOptions.json, async () => { + const engine = createJobEngine(new ConfigIO()); + const result = await engine.stop('batch-evaluation', cliOptions.id); + if (!result.success) { + throw result.error; + } if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + console.log(JSON.stringify({ success: true, id: cliOptions.id })); } else { - render(Error: {getErrorMessage(error)}); + console.log(`\n✓ Batch evaluation ${cliOptions.id} stop requested.\n`); } - process.exit(1); - } + return { job_type: 'batch-evaluation' }; + }); }); }; diff --git a/src/cli/commands/stop/index.ts b/src/cli/commands/stop/index.ts index 1f1a5e1e2..3f55d16c9 100644 --- a/src/cli/commands/stop/index.ts +++ b/src/cli/commands/stop/index.ts @@ -1 +1 @@ -export { registerStop } from '../pause/command'; +export { registerStop } from './command'; diff --git a/src/cli/commands/validate/action.ts b/src/cli/commands/validate/action.ts index bcb0988d7..57ed7252b 100644 --- a/src/cli/commands/validate/action.ts +++ b/src/cli/commands/validate/action.ts @@ -10,6 +10,7 @@ import { readEnvFile, } from '../../../lib'; import type { Result } from '../../../lib/result'; +import { validateHarnessSpecs } from '../../operations/deploy/preflight'; import { computePaymentCredentialEnvVarNames, computeStripePrivyCredentialEnvVarNames, @@ -181,6 +182,17 @@ export async function handleValidate(options: ValidateOptions): Promise } } + // Validate each per-harness harness.json against HarnessSpecSchema. `validate` previously only + // checked agentcore.json, so a malformed harness spec (bad model provider, missing + // executionRoleArn, an out-of-CFN-bounds field) passed "validate" yet failed at deploy synth. + // Reuse the deploy-preflight helper so `validate` and `deploy` aggregate the SAME way (report + // every broken harness at once, not one-per-rerun). + try { + await validateHarnessSpecs(projectSpec, configRoot); + } catch (err) { + return { success: false, error: err instanceof Error ? err : new Error(String(err)) }; + } + return { success: true }; } diff --git a/src/cli/commands/view/JobDetailScreen.tsx b/src/cli/commands/view/JobDetailScreen.tsx new file mode 100644 index 000000000..444814f5f --- /dev/null +++ b/src/cli/commands/view/JobDetailScreen.tsx @@ -0,0 +1,109 @@ +import { ConfigIO } from '../../../lib'; +import { validateAwsCredentials } from '../../aws/account'; +import { getErrorMessage } from '../../errors'; +import { createJobEngine } from '../../operations/jobs'; +import type { JobRecord, JobType } from '../../operations/jobs'; +import { ErrorPrompt, Screen } from '../../tui/components'; +import { ABTestDetailView, BatchEvalDetailView, RecommendationDetailView } from '../../tui/screens/job-detail'; +import { Text } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; + +interface JobDetailScreenProps { + type: JobType; + id: string; + onExit: () => void; +} + +type State = { name: 'loading' } | { name: 'error'; message: string } | { name: 'loaded'; record: JobRecord }; + +export function JobDetailScreen({ type, id, onExit }: JobDetailScreenProps) { + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); + const [state, setState] = useState({ name: 'loading' }); + + useEffect(() => { + let cancelled = false; + void (async () => { + try { + await validateAwsCredentials(); + } catch (err) { + if (!cancelled) setState({ name: 'error', message: `AWS credentials required: ${getErrorMessage(err)}` }); + return; + } + try { + const record = await engine.get(type, id); + if (!record) { + if (!cancelled) setState({ name: 'error', message: `Job "${id}" not found.` }); + return; + } + if (!cancelled) setState({ name: 'loaded', record }); + } catch (err) { + if (!cancelled) setState({ name: 'error', message: getErrorMessage(err) }); + } + })(); + return () => { + cancelled = true; + }; + }, [engine, type, id]); + + const handleUpdate = useCallback((updated: JobRecord) => { + setState({ name: 'loaded', record: updated }); + }, []); + + if (state.name === 'loading') { + return ( + + Loading job {id}... + + ); + } + + if (state.name === 'error') { + return ; + } + + const { record } = state; + + if (record.type === 'ab-test') { + return ( + + + + ); + } + if (record.type === 'batch-evaluation') { + return ( + + + + ); + } + if (record.type === 'recommendation') { + return ( + + + + ); + } + return ( + + ); +} diff --git a/src/cli/commands/view/command.tsx b/src/cli/commands/view/command.tsx new file mode 100644 index 000000000..e656e8b3c --- /dev/null +++ b/src/cli/commands/view/command.tsx @@ -0,0 +1,129 @@ +import { ConfigIO, JobNotFoundError, serializeResult } from '../../../lib'; +import { createJobEngine } from '../../operations/jobs'; +import type { ABTestJobRecord, JobType } from '../../operations/jobs'; +import { getInvocationUrl, printABTestDetail, printABTestHistory } from '../../operations/jobs/ab-test/format'; +import { printBatchEvaluationDetail, printBatchEvaluationHistory } from '../../operations/jobs/batch-evaluation/format'; +import { printInsightsDetail, printInsightsHistory } from '../../operations/jobs/insights/format'; +import { printRecommendationDetail, printRecommendationHistory } from '../../operations/jobs/recommendation/format'; +import { runCliCommand } from '../../telemetry/cli-command-run'; +import { requireProject } from '../../tui/guards'; +import type { Command } from '@commander-js/extra-typings'; + +const TYPE_META: Record< + JobType, + { + label: string; + jsonKey: string; + printHistory: (records: unknown[]) => void; + printDetail: (record: unknown) => void; + } +> = { + recommendation: { + label: 'recommendation', + jsonKey: 'recommendations', + printHistory: printRecommendationHistory as (r: unknown[]) => void, + printDetail: printRecommendationDetail as (r: unknown) => void, + }, + 'batch-evaluation': { + label: 'batch evaluation', + jsonKey: 'batchEvaluations', + printHistory: printBatchEvaluationHistory as (r: unknown[]) => void, + printDetail: printBatchEvaluationDetail as (r: unknown) => void, + }, + 'ab-test': { + label: 'A/B test', + jsonKey: 'abTests', + printHistory: printABTestHistory as (r: unknown[]) => void, + printDetail: printABTestDetail as (r: unknown) => void, + }, + insights: { + label: 'insights', + jsonKey: 'insights', + printHistory: printInsightsHistory as (r: unknown[]) => void, + printDetail: printInsightsDetail as (r: unknown) => void, + }, +}; + +function registerViewSubcommand(viewCmd: Command, type: JobType) { + const meta = TYPE_META[type]; + + viewCmd + .command(type) + .description(`View ${meta.label} jobs`) + .argument('[id]', `${meta.label} job ID`) + .option('--json', 'Output as JSON (non-interactive)') + .option('--region ', 'AWS region (auto-detected if omitted)') + .action((id: string | undefined, cliOptions: { json?: boolean; region?: string }) => { + requireProject(); + + if (id) { + // Detail for one job + if (cliOptions.json) { + return runCliCommand('job.get', true, async () => { + const engine = createJobEngine(new ConfigIO()); + const record = await engine.get(type, id); + if (!record) { + throw new JobNotFoundError(`${meta.label} "${id}" not found.`); + } + const extra = + type === 'ab-test' ? { invocationUrl: getInvocationUrl(record as unknown as ABTestJobRecord) } : {}; + console.log(JSON.stringify(serializeResult({ success: true, ...record, ...extra }))); + return { job_type: type }; + }); + } + // Interactive detail — launch TUI + return launchTuiDetail(type, id); + } + + // List all jobs of this type + if (cliOptions.json) { + return runCliCommand('job.history', true, async () => { + const engine = createJobEngine(new ConfigIO()); + const records = await engine.list({ type }); + console.log( + JSON.stringify({ + success: true, + [meta.jsonKey]: records, + }) + ); + return { job_type: type }; + }); + } + // Interactive list — launch TUI + return launchTuiList(type); + }); +} + +async function launchTuiList(type: JobType): Promise { + const [{ render }, { default: React }] = await Promise.all([import('ink'), import('react')]); + + if (type === 'ab-test') { + const { ABTestJobsHistoryScreen } = await import('../../tui/screens/run-ab-test'); + render(React.createElement(ABTestJobsHistoryScreen, { onExit: () => process.exit(0) })); + } else if (type === 'batch-evaluation') { + const { BatchEvalHistoryScreen } = await import('../../tui/screens/run-eval'); + render(React.createElement(BatchEvalHistoryScreen, { onExit: () => process.exit(0) })); + } else if (type === 'insights') { + const { InsightsJobsScreen } = await import('../../tui/screens/insights-jobs'); + render(React.createElement(InsightsJobsScreen, { onExit: () => process.exit(0) })); + } else { + const { RecommendationHistoryScreen } = await import('../../tui/screens/recommendation'); + render(React.createElement(RecommendationHistoryScreen, { onExit: () => process.exit(0) })); + } + return new Promise(() => undefined); +} + +async function launchTuiDetail(type: JobType, id: string): Promise { + const [{ render }, { default: React }] = await Promise.all([import('ink'), import('react')]); + const { JobDetailScreen } = await import('./JobDetailScreen'); + render(React.createElement(JobDetailScreen, { type, id, onExit: () => process.exit(0) })); + return new Promise(() => undefined); +} + +export const registerView = (program: Command) => { + const viewCmd = program.command('view').description('View job history and details'); + registerViewSubcommand(viewCmd, 'recommendation'); + registerViewSubcommand(viewCmd, 'batch-evaluation'); + registerViewSubcommand(viewCmd, 'ab-test'); + registerViewSubcommand(viewCmd, 'insights'); +}; diff --git a/src/cli/commands/view/index.ts b/src/cli/commands/view/index.ts new file mode 100644 index 000000000..79c9cba1a --- /dev/null +++ b/src/cli/commands/view/index.ts @@ -0,0 +1 @@ +export { registerView } from './command'; diff --git a/src/cli/constants.ts b/src/cli/constants.ts index 3f7da3fdb..deda321d7 100644 --- a/src/cli/constants.ts +++ b/src/cli/constants.ts @@ -28,17 +28,18 @@ export const COMMAND_DESCRIPTIONS = { fetch: 'Fetch access info for deployed resources.', pause: 'Pause a deployed resource (online eval config, A/B test).', resume: 'Resume a paused resource (online eval config, A/B test).', - recommend: '[preview] Run optimization recommendations for system prompts and tool descriptions.', - recommendations: '[preview] View recommendation history from past runs.', + recommend: 'Run optimization recommendations for system prompts and tool descriptions.', + recommendations: 'View recommendation history from past runs.', run: 'Run evaluations, batch evaluations, or optimization recommendations.', stop: 'Stop a running batch evaluation or A/B test.', import: 'Import a runtime, memory, or starter toolkit into this project. [experimental]', telemetry: 'Manage anonymous usage analytics preferences.', update: 'Check for and install CLI updates', validate: 'Validate agentcore/ config files.', - 'config-bundle': '[preview] Manage configuration bundle versions and diffs.', - archive: '[preview] Archive (delete) a batch evaluation or recommendation on the service and clear local history.', + 'config-bundle': 'Manage configuration bundle versions and diffs.', + archive: 'Archive (delete) a batch evaluation or recommendation on the service and clear local history.', config: 'Adjust global configuration settings such as telemetry opt-out status', + export: 'Export a harness to a Strands runtime agent.', } as const; /** diff --git a/src/cli/external-requirements/__tests__/checks-extended.test.ts b/src/cli/external-requirements/__tests__/checks-extended.test.ts index 130e9c43c..7b8339502 100644 --- a/src/cli/external-requirements/__tests__/checks-extended.test.ts +++ b/src/cli/external-requirements/__tests__/checks-extended.test.ts @@ -48,6 +48,7 @@ describe('requiresUv', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -55,7 +56,6 @@ describe('requiresUv', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -79,6 +79,7 @@ describe('requiresUv', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -86,7 +87,6 @@ describe('requiresUv', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -101,6 +101,7 @@ describe('requiresUv', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -108,7 +109,6 @@ describe('requiresUv', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -134,6 +134,7 @@ describe('requiresContainerRuntime', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -141,7 +142,6 @@ describe('requiresContainerRuntime', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -165,6 +165,7 @@ describe('requiresContainerRuntime', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -172,7 +173,6 @@ describe('requiresContainerRuntime', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -187,6 +187,7 @@ describe('requiresContainerRuntime', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -194,7 +195,6 @@ describe('requiresContainerRuntime', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -226,6 +226,7 @@ describe('requiresContainerRuntime', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -233,7 +234,6 @@ describe('requiresContainerRuntime', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -299,6 +299,7 @@ describe('checkDependencyVersions', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -306,7 +307,6 @@ describe('checkDependencyVersions', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -325,6 +325,7 @@ describe('checkDependencyVersions', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -332,7 +333,6 @@ describe('checkDependencyVersions', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -359,6 +359,7 @@ describe('checkDependencyVersions', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -366,7 +367,6 @@ describe('checkDependencyVersions', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], diff --git a/src/cli/feature-flags.ts b/src/cli/feature-flags.ts index f6dce4f86..03d08854d 100644 --- a/src/cli/feature-flags.ts +++ b/src/cli/feature-flags.ts @@ -1,3 +1,5 @@ declare const __PREVIEW__: boolean; export const isPreviewEnabled = (): boolean => __PREVIEW__; + +export const isGatedFeaturesEnabled = (): boolean => process.env.ENABLE_GATED_FEATURES === '1'; diff --git a/src/cli/logging/remove-logger.ts b/src/cli/logging/remove-logger.ts index b58e86593..30cc5642b 100644 --- a/src/cli/logging/remove-logger.ts +++ b/src/cli/logging/remove-logger.ts @@ -17,11 +17,12 @@ export interface RemoveLoggerOptions { | 'runtime-endpoint' | 'evaluator' | 'online-eval' + | 'online-insights' | 'policy-engine' | 'policy' | 'config-bundle' - | 'ab-test' | 'dataset' + | 'knowledge-base' | 'payment-manager' | 'payment-connector'; /** Name of the resource being removed */ diff --git a/src/cli/operations/ab-test/__tests__/promote.test.ts b/src/cli/operations/ab-test/__tests__/promote.test.ts deleted file mode 100644 index 2abf8583c..000000000 --- a/src/cli/operations/ab-test/__tests__/promote.test.ts +++ /dev/null @@ -1,270 +0,0 @@ -import { promoteABTestConfig } from '../promote'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -// Mock ConfigIO — vi.hoisted ensures these are available before the hoisted vi.mock runs -const { mockReadProjectSpec, mockWriteProjectSpec, mockReadDeployedState } = vi.hoisted(() => ({ - mockReadProjectSpec: vi.fn(), - mockWriteProjectSpec: vi.fn(), - mockReadDeployedState: vi.fn(), -})); - -vi.mock('../../../../lib', () => { - class MockConfigIO { - readProjectSpec = mockReadProjectSpec; - writeProjectSpec = mockWriteProjectSpec; - readDeployedState = mockReadDeployedState; - } - return { ConfigIO: MockConfigIO }; -}); - -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -function makeConfigBundleProject(testName = 'myTest') { - return { - name: 'TestProject', - runtimes: [], - httpGateways: [], - onlineEvalConfigs: [], - abTests: [ - { - name: testName, - mode: 'config-bundle' as const, - gatewayRef: '{{gateway:my-gw}}', - variants: [ - { - name: 'C' as const, - weight: 50, - variantConfiguration: { - configurationBundle: { bundleArn: 'arn:aws:bundle:control', bundleVersion: 'v1' }, - }, - }, - { - name: 'T1' as const, - weight: 50, - variantConfiguration: { - configurationBundle: { bundleArn: 'arn:aws:bundle:treatment', bundleVersion: 'v2' }, - }, - }, - ], - evaluationConfig: { onlineEvaluationConfigArn: 'arn:aws:eval:config' }, - }, - ], - }; -} - -function makeTargetBasedProject(testName = 'targetTest') { - return { - name: 'TestProject', - runtimes: [ - { - name: 'my-runtime', - endpoints: { - control: { version: '1.0' }, - treatment: { version: '2.0' }, - }, - }, - ], - httpGateways: [ - { - name: 'my-gw', - targets: [ - { name: 'ctrl-target', runtimeRef: 'my-runtime', qualifier: 'control' }, - { name: 'treat-target', runtimeRef: 'my-runtime', qualifier: 'treatment' }, - ], - }, - ], - onlineEvalConfigs: [], - abTests: [ - { - name: testName, - mode: 'target-based' as const, - gatewayRef: '{{gateway:my-gw}}', - variants: [ - { - name: 'C' as const, - weight: 50, - variantConfiguration: { target: { targetName: 'ctrl-target' } }, - }, - { - name: 'T1' as const, - weight: 50, - variantConfiguration: { target: { targetName: 'treat-target' } }, - }, - ], - evaluationConfig: { - perVariantOnlineEvaluationConfig: [ - { treatmentName: 'C' as const, onlineEvaluationConfigArn: 'eval-c' }, - { treatmentName: 'T1' as const, onlineEvaluationConfigArn: 'eval-t1' }, - ], - }, - }, - ], - }; -} - -function makeDeployedState(specName: string, abTestId: string) { - return { - targets: { - default: { - resources: { - abTests: { - [specName]: { abTestId, abTestArn: `arn:aws:ab-test:${abTestId}` }, - }, - }, - }, - }, - }; -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -describe('promoteABTestConfig', () => { - beforeEach(() => { - vi.clearAllMocks(); - mockWriteProjectSpec.mockResolvedValue(undefined); - }); - - describe('target-based promote', () => { - it('updates control endpoint version to treatment version', async () => { - const project = makeTargetBasedProject(); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockResolvedValue(makeDeployedState('targetTest', 'ab-123')); - - const result = await promoteABTestConfig('ab-123'); - - expect(result.promoted).toBe(true); - expect(result.mode).toBe('target-based'); - expect(result.promotionDetail).toContain('control'); - expect(result.promotionDetail).toContain('2.0'); - - // Verify the project was written with updated control version - expect(mockWriteProjectSpec).toHaveBeenCalledOnce(); - const writtenProject = mockWriteProjectSpec.mock.calls[0]![0]; - expect(writtenProject.runtimes[0].endpoints.control.version).toBe('2.0'); - }); - }); - - describe('config-bundle promote', () => { - it('copies treatment bundle ref to control', async () => { - const project = makeConfigBundleProject(); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockResolvedValue(makeDeployedState('myTest', 'ab-456')); - - const result = await promoteABTestConfig('ab-456'); - - expect(result.promoted).toBe(true); - expect(result.mode).toBe('config-bundle'); - expect(result.promotionDetail).toContain('arn:aws:bundle:treatment'); - expect(result.promotionDetail).toContain('v2'); - - // Verify the control bundle was updated - expect(mockWriteProjectSpec).toHaveBeenCalledOnce(); - const writtenProject = mockWriteProjectSpec.mock.calls[0]![0]; - const controlVariant = writtenProject.abTests[0].variants.find((v: { name: string }) => v.name === 'C'); - expect(controlVariant.variantConfiguration.configurationBundle.bundleArn).toBe('arn:aws:bundle:treatment'); - expect(controlVariant.variantConfiguration.configurationBundle.bundleVersion).toBe('v2'); - }); - }); - - describe('not found', () => { - it('returns promoted=false with message when AB test not found', async () => { - const project = makeConfigBundleProject(); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockResolvedValue({ targets: { default: { resources: { abTests: {} } } } }); - - const result = await promoteABTestConfig('nonexistent-id'); - - expect(result.promoted).toBe(false); - expect(result.promotionDetail).toContain('not found'); - expect(mockWriteProjectSpec).not.toHaveBeenCalled(); - }); - }); - - describe('ID-based lookup from deployed state', () => { - it('resolves spec name from deployed state using abTestId', async () => { - const project = makeConfigBundleProject('mySpecTest'); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockResolvedValue(makeDeployedState('mySpecTest', 'ab-789')); - - const result = await promoteABTestConfig('ab-789'); - - expect(result.promoted).toBe(true); - expect(result.mode).toBe('config-bundle'); - // Should have resolved without needing testNameFallback - expect(mockWriteProjectSpec).toHaveBeenCalledOnce(); - }); - - it('searches across multiple targets in deployed state', async () => { - const project = makeConfigBundleProject('crossTarget'); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockResolvedValue({ - targets: { - 'us-east-1': { resources: { abTests: {} } }, - 'us-west-2': { - resources: { - abTests: { - crossTarget: { abTestId: 'ab-cross', abTestArn: 'arn:aws:ab-test:ab-cross' }, - }, - }, - }, - }, - }); - - const result = await promoteABTestConfig('ab-cross'); - - expect(result.promoted).toBe(true); - }); - }); - - describe('name fallback when deployed state missing', () => { - it('falls back to name-based lookup when deployed state throws', async () => { - const project = makeConfigBundleProject('fallbackTest'); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockRejectedValue(new Error('No deployed state')); - - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(vi.fn()); - - const result = await promoteABTestConfig('unknown-id', 'fallbackTest'); - - expect(result.promoted).toBe(true); - expect(result.mode).toBe('config-bundle'); - expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('falling back to name')); - - warnSpy.mockRestore(); - }); - - it('falls back to prefixed name match', async () => { - const project = makeConfigBundleProject('myTest'); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockRejectedValue(new Error('No deployed state')); - - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(vi.fn()); - - // testNameFallback uses the prefixed format {projectName}_{testName} - const result = await promoteABTestConfig('unknown-id', 'TestProject_myTest'); - - expect(result.promoted).toBe(true); - - warnSpy.mockRestore(); - }); - - it('returns not found when neither deployed state nor name matches', async () => { - const project = makeConfigBundleProject('myTest'); - mockReadProjectSpec.mockResolvedValue(project); - mockReadDeployedState.mockRejectedValue(new Error('No deployed state')); - - const warnSpy = vi.spyOn(console, 'warn').mockImplementation(vi.fn()); - - const result = await promoteABTestConfig('unknown-id', 'nonexistent'); - - expect(result.promoted).toBe(false); - expect(result.promotionDetail).toContain('not found'); - - warnSpy.mockRestore(); - }); - }); -}); diff --git a/src/cli/operations/ab-test/promote.ts b/src/cli/operations/ab-test/promote.ts deleted file mode 100644 index 5f98e52f6..000000000 --- a/src/cli/operations/ab-test/promote.ts +++ /dev/null @@ -1,124 +0,0 @@ -import { ConfigIO } from '../../../lib'; - -export interface PromoteABTestResult { - promoted: boolean; - mode?: string; - promotionDetail: string; -} - -/** - * Resolve the spec-level AB test name from a deployed abTestId. - * Looks up which entry in deployed state has that abTestId and returns - * the spec name (the key in the abTests record). - */ -function resolveSpecNameFromDeployedState( - configIO: ConfigIO, - deployedState: { targets: Record } }> }, - abTestId: string -): string | undefined { - for (const target of Object.values(deployedState.targets)) { - const abTests = target.resources?.abTests; - if (!abTests) continue; - for (const [specName, entry] of Object.entries(abTests)) { - if (entry.abTestId === abTestId) { - return specName; - } - } - } - return undefined; -} - -/** - * Apply AB test promotion to agentcore.json. - * Updates the control variant's config to match the treatment variant. - * Does NOT stop the AB test — caller is responsible for that. - * - * @param abTestId - The deployed AB test ID - * @param testNameFallback - Optional name fallback when deployed state is unavailable - */ -export async function promoteABTestConfig(abTestId: string, testNameFallback?: string): Promise { - const configIO = new ConfigIO(); - const project = await configIO.readProjectSpec(); - - // Try to resolve spec name from deployed state - let specName: string | undefined; - try { - const deployedState = await configIO.readDeployedState(); - specName = resolveSpecNameFromDeployedState(configIO, deployedState, abTestId); - } catch { - // Deployed state unavailable - } - - // Fall back to name-based lookup if deployed state didn't resolve - if (!specName && testNameFallback) { - console.warn( - `[promote] Could not resolve AB test ID "${abTestId}" from deployed state; falling back to name "${testNameFallback}".` - ); - const lowerName = testNameFallback.toLowerCase(); - const match = (project.abTests ?? []).find( - t => t.name.toLowerCase() === lowerName || `${project.name}_${t.name}`.toLowerCase() === lowerName - ); - specName = match?.name; - } - - const abTest = specName ? (project.abTests ?? []).find(t => t.name === specName) : undefined; - - if (!abTest) { - return { promoted: false, promotionDetail: `AB test with ID "${abTestId}" not found in project config.` }; - } - - const mode = abTest.mode ?? 'config-bundle'; - - if (abTest.mode === 'target-based') { - const treatmentVariant = abTest.variants.find(v => v.name === 'T1'); - const controlVariant = abTest.variants.find(v => v.name === 'C'); - const controlTargetName = controlVariant?.variantConfiguration.target?.targetName; - const treatmentTargetName = treatmentVariant?.variantConfiguration.target?.targetName; - - const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(abTest.gatewayRef); - const gwName = gwMatch?.[1]; - if (gwName) { - const gw = (project.httpGateways ?? []).find(g => g.name === gwName); - if (gw?.targets) { - const controlTarget = gw.targets.find(t => t.name === controlTargetName); - const treatmentTarget = gw.targets.find(t => t.name === treatmentTargetName); - - if (controlTarget && treatmentTarget) { - const runtime = project.runtimes.find(r => r.name === controlTarget.runtimeRef); - const controlEp = runtime?.endpoints?.[controlTarget.qualifier]; - const treatmentEp = runtime?.endpoints?.[treatmentTarget.qualifier]; - if (controlEp && treatmentEp) { - controlEp.version = treatmentEp.version; - await configIO.writeProjectSpec(project); - return { - promoted: true, - mode, - promotionDetail: `Control endpoint "${controlTarget.qualifier}" updated to version ${treatmentEp.version} (from treatment "${treatmentTarget.qualifier}").`, - }; - } - } - } - } - return { promoted: false, mode, promotionDetail: 'Could not resolve target endpoints for promotion.' }; - } - - // Config-bundle mode - const controlVariant = abTest.variants.find(v => v.name === 'C'); - const treatmentVariant = abTest.variants.find(v => v.name === 'T1'); - if ( - controlVariant?.variantConfiguration.configurationBundle && - treatmentVariant?.variantConfiguration.configurationBundle - ) { - controlVariant.variantConfiguration.configurationBundle = { - ...treatmentVariant.variantConfiguration.configurationBundle, - }; - await configIO.writeProjectSpec(project); - return { - promoted: true, - mode, - promotionDetail: `Control bundle updated to "${treatmentVariant.variantConfiguration.configurationBundle.bundleArn}" version "${treatmentVariant.variantConfiguration.configurationBundle.bundleVersion}".`, - }; - } - - return { promoted: false, mode, promotionDetail: 'Could not resolve config bundles for promotion.' }; -} diff --git a/src/cli/operations/agent/generate/write-agent-to-project.ts b/src/cli/operations/agent/generate/write-agent-to-project.ts index 55056c4b2..8163494f6 100644 --- a/src/cli/operations/agent/generate/write-agent-to-project.ts +++ b/src/cli/operations/agent/generate/write-agent-to-project.ts @@ -66,6 +66,7 @@ export async function writeAgentToProject(config: GenerateConfig, options?: Writ managedBy: 'CDK' as const, runtimes: [agent], memories, + knowledgeBases: [], credentials, evaluators: [], onlineEvalConfigs: [], @@ -73,7 +74,6 @@ export async function writeAgentToProject(config: GenerateConfig, options?: Writ policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], diff --git a/src/cli/operations/archive/__tests__/archive-storage.test.ts b/src/cli/operations/archive/__tests__/archive-storage.test.ts deleted file mode 100644 index 9ebb41fd5..000000000 --- a/src/cli/operations/archive/__tests__/archive-storage.test.ts +++ /dev/null @@ -1,130 +0,0 @@ -import { deleteLocalBatchEvalRun, deleteLocalRecommendationRun } from '../archive-storage.js'; -import { existsSync, mkdirSync, rmSync, writeFileSync } from 'fs'; -import { tmpdir } from 'os'; -import { join } from 'path'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; - -const mockFindConfigRoot = vi.fn(); - -vi.mock('../../../../lib', () => ({ - findConfigRoot: () => mockFindConfigRoot(), -})); - -function makeTmpDir(): string { - const dir = join(tmpdir(), `archive-storage-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); - mkdirSync(dir, { recursive: true }); - return dir; -} - -function writeJsonFile(path: string, data: unknown): void { - mkdirSync(join(path, '..'), { recursive: true }); - writeFileSync(path, JSON.stringify(data)); -} - -describe('archive-storage', () => { - let tmpDir: string; - - beforeEach(() => { - tmpDir = makeTmpDir(); - mockFindConfigRoot.mockReturnValue(tmpDir); - }); - - afterEach(() => { - if (existsSync(tmpDir)) { - rmSync(tmpDir, { recursive: true, force: true }); - } - vi.clearAllMocks(); - }); - - describe('deleteLocalBatchEvalRun', () => { - it('deletes the file and returns true when file exists', () => { - const filePath = join(tmpDir, '.cli', 'batch-eval-results', 'eval-123.json'); - writeJsonFile(filePath, { batchEvaluationId: 'eval-123' }); - - const result = deleteLocalBatchEvalRun('eval-123'); - - expect(result).toBe(true); - expect(existsSync(filePath)).toBe(false); - }); - - it('returns false when file does not exist', () => { - const result = deleteLocalBatchEvalRun('nonexistent-id'); - expect(result).toBe(false); - }); - - it('does not throw when the batch-eval-results directory does not exist', () => { - expect(() => deleteLocalBatchEvalRun('any-id')).not.toThrow(); - }); - - it('throws when findConfigRoot returns null', () => { - mockFindConfigRoot.mockReturnValue(null); - expect(() => deleteLocalBatchEvalRun('eval-123')).toThrow('No agentcore project found'); - }); - - it('throws when id contains a forward slash', () => { - expect(() => deleteLocalBatchEvalRun('../evil')).toThrow('Invalid batch evaluation ID'); - }); - - it('throws when id contains a backslash', () => { - expect(() => deleteLocalBatchEvalRun('evil\\path')).toThrow('Invalid batch evaluation ID'); - }); - - it('leaves other files in the directory untouched', () => { - const keep = join(tmpDir, '.cli', 'batch-eval-results', 'keep-me.json'); - const del = join(tmpDir, '.cli', 'batch-eval-results', 'delete-me.json'); - writeJsonFile(keep, { batchEvaluationId: 'keep-me' }); - writeJsonFile(del, { batchEvaluationId: 'delete-me' }); - - deleteLocalBatchEvalRun('delete-me'); - - expect(existsSync(keep)).toBe(true); - expect(existsSync(del)).toBe(false); - }); - }); - - describe('deleteLocalRecommendationRun', () => { - it('deletes the file and returns true when file exists', () => { - const filePath = join(tmpDir, '.cli', 'recommendations', 'rec-456.json'); - writeJsonFile(filePath, { recommendationId: 'rec-456' }); - - const result = deleteLocalRecommendationRun('rec-456'); - - expect(result).toBe(true); - expect(existsSync(filePath)).toBe(false); - }); - - it('returns false when file does not exist', () => { - const result = deleteLocalRecommendationRun('nonexistent-id'); - expect(result).toBe(false); - }); - - it('does not throw when the recommendations directory does not exist', () => { - expect(() => deleteLocalRecommendationRun('any-id')).not.toThrow(); - }); - - it('throws when findConfigRoot returns null', () => { - mockFindConfigRoot.mockReturnValue(null); - expect(() => deleteLocalRecommendationRun('rec-456')).toThrow('No agentcore project found'); - }); - - it('throws when id contains a forward slash', () => { - expect(() => deleteLocalRecommendationRun('../evil')).toThrow('Invalid recommendation ID'); - }); - - it('throws when id contains a backslash', () => { - expect(() => deleteLocalRecommendationRun('evil\\path')).toThrow('Invalid recommendation ID'); - }); - - it('leaves other files in the directory untouched', () => { - const keep = join(tmpDir, '.cli', 'recommendations', 'keep-me.json'); - const del = join(tmpDir, '.cli', 'recommendations', 'delete-me.json'); - writeJsonFile(keep, { recommendationId: 'keep-me' }); - writeJsonFile(del, { recommendationId: 'delete-me' }); - - deleteLocalRecommendationRun('delete-me'); - - expect(existsSync(keep)).toBe(true); - expect(existsSync(del)).toBe(false); - }); - }); -}); diff --git a/src/cli/operations/archive/archive-storage.ts b/src/cli/operations/archive/archive-storage.ts deleted file mode 100644 index 5b4481fda..000000000 --- a/src/cli/operations/archive/archive-storage.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { findConfigRoot } from '../../../lib'; -import { BATCH_EVAL_RESULTS_DIR } from '../eval/batch-eval-storage'; -import { RECOMMENDATIONS_DIR } from '../recommendation/recommendation-storage'; -import { existsSync, rmSync } from 'fs'; -import { join } from 'path'; - -function getCliDir(): string { - const configRoot = findConfigRoot(); - if (!configRoot) { - throw new Error('No agentcore project found. Run `agentcore create` first.'); - } - return join(configRoot, '.cli'); -} - -function assertSafeId(id: string, label: string): void { - if (/[/\\]/.test(id)) { - throw new Error(`Invalid ${label}: must not contain path separators`); - } -} - -/** - * Delete the local batch eval run record for the given ID. - * Returns true if the file existed and was deleted, false if it was not found. - */ -export function deleteLocalBatchEvalRun(batchEvaluationId: string): boolean { - assertSafeId(batchEvaluationId, 'batch evaluation ID'); - const filePath = join(getCliDir(), BATCH_EVAL_RESULTS_DIR, `${batchEvaluationId}.json`); - if (!existsSync(filePath)) return false; - rmSync(filePath); - return true; -} - -/** - * Delete the local recommendation run record for the given ID. - * Returns true if the file existed and was deleted, false if it was not found. - */ -export function deleteLocalRecommendationRun(recommendationId: string): boolean { - assertSafeId(recommendationId, 'recommendation ID'); - const filePath = join(getCliDir(), RECOMMENDATIONS_DIR, `${recommendationId}.json`); - if (!existsSync(filePath)) return false; - rmSync(filePath); - return true; -} diff --git a/src/cli/operations/archive/index.ts b/src/cli/operations/archive/index.ts deleted file mode 100644 index 0f5d523ba..000000000 --- a/src/cli/operations/archive/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { deleteLocalBatchEvalRun, deleteLocalRecommendationRun } from './archive-storage'; diff --git a/src/cli/operations/deploy/__tests__/managed-memory-notice.test.ts b/src/cli/operations/deploy/__tests__/managed-memory-notice.test.ts new file mode 100644 index 000000000..9d22e4cd4 --- /dev/null +++ b/src/cli/operations/deploy/__tests__/managed-memory-notice.test.ts @@ -0,0 +1,78 @@ +import type { ConfigIO } from '../../../../lib'; +import { + MANAGED_MEMORY_ADD_NOTICE, + MANAGED_MEMORY_DEPLOY_NOTICE, + hasManagedMemoryHarness, +} from '../managed-memory-notice'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +/** + * Builds a stub ConfigIO whose readHarnessSpec returns the given mode per harness name. + * Unknown names reject (mirrors a missing/unreadable harness.json). + */ +function stubConfigIO(modes: Record): ConfigIO { + return { + readHarnessSpec: vi.fn((name: string) => { + if (!(name in modes)) { + return Promise.reject(new Error(`no spec for ${name}`)); + } + const mode = modes[name]; + return Promise.resolve({ memory: mode ? { mode } : undefined } as never); + }), + } as unknown as ConfigIO; +} + +describe('hasManagedMemoryHarness', () => { + const originalGate = process.env.ENABLE_GATED_FEATURES; + + beforeEach(() => { + process.env.ENABLE_GATED_FEATURES = '1'; + }); + + afterEach(() => { + if (originalGate === undefined) { + delete process.env.ENABLE_GATED_FEATURES; + } else { + process.env.ENABLE_GATED_FEATURES = originalGate; + } + vi.clearAllMocks(); + }); + + it('returns false when the gate is off, even with a managed harness', async () => { + delete process.env.ENABLE_GATED_FEATURES; + const configIO = stubConfigIO({ h1: 'managed' }); + expect(await hasManagedMemoryHarness(configIO, [{ name: 'h1' }])).toBe(false); + }); + + it('returns false when there are no harnesses', async () => { + expect(await hasManagedMemoryHarness(stubConfigIO({}), [])).toBe(false); + expect(await hasManagedMemoryHarness(stubConfigIO({}), undefined)).toBe(false); + }); + + it('returns true when any harness uses managed memory', async () => { + const configIO = stubConfigIO({ h1: 'existing', h2: 'managed' }); + expect(await hasManagedMemoryHarness(configIO, [{ name: 'h1' }, { name: 'h2' }])).toBe(true); + }); + + it('returns false when all harnesses are existing or disabled', async () => { + const configIO = stubConfigIO({ h1: 'existing', h2: 'disabled' }); + expect(await hasManagedMemoryHarness(configIO, [{ name: 'h1' }, { name: 'h2' }])).toBe(false); + }); + + it('treats an unreadable harness spec as non-managed (does not throw)', async () => { + const configIO = stubConfigIO({ h1: 'managed' }); + expect(await hasManagedMemoryHarness(configIO, [{ name: 'missing' }, { name: 'h1' }])).toBe(true); + }); +}); + +describe('managed-memory notice text', () => { + it('deploy notice tells the user how to skip via redeploy', () => { + expect(MANAGED_MEMORY_DEPLOY_NOTICE).toContain('3-5 minutes'); + expect(MANAGED_MEMORY_DEPLOY_NOTICE).toContain('redeploy with --memory-mode disabled'); + }); + + it('add notice is future-tense and points at the next deploy', () => { + expect(MANAGED_MEMORY_ADD_NOTICE).toContain('will automatically provision'); + expect(MANAGED_MEMORY_ADD_NOTICE).toContain('on deploy'); + }); +}); diff --git a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts deleted file mode 100644 index a206fc23b..000000000 --- a/src/cli/operations/deploy/__tests__/post-deploy-ab-tests.test.ts +++ /dev/null @@ -1,660 +0,0 @@ -import type { AgentCoreProjectSpec, DeployedResourceState } from '../../../../schema'; -import { deleteOrphanedABTests, setupABTests } from '../post-deploy-ab-tests.js'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -// ── Hoisted mocks ────────────────────────────────────────────────────────── - -const { - mockCreateABTest, - mockDeleteABTest, - mockGetABTest, - mockUpdateABTest, - mockListABTests, - mockGetCredentialProvider, - mockIAMSend, -} = vi.hoisted(() => ({ - mockCreateABTest: vi.fn(), - mockDeleteABTest: vi.fn(), - mockGetABTest: vi.fn(), - mockUpdateABTest: vi.fn(), - mockListABTests: vi.fn(), - mockGetCredentialProvider: vi.fn().mockReturnValue(undefined), - mockIAMSend: vi.fn(), -})); - -vi.mock('../../../aws/agentcore-ab-tests', () => ({ - createABTest: mockCreateABTest, - deleteABTest: mockDeleteABTest, - getABTest: mockGetABTest, - updateABTest: mockUpdateABTest, - listABTests: mockListABTests, -})); - -vi.mock('../../../aws/account', () => ({ - getCredentialProvider: mockGetCredentialProvider, -})); - -vi.mock('@aws-sdk/client-iam', () => ({ - IAMClient: class { - send = mockIAMSend; - }, - CreateRoleCommand: class { - constructor(public input: unknown) {} - }, - PutRolePolicyCommand: class { - constructor(public input: unknown) {} - }, - DeleteRolePolicyCommand: class { - constructor(public input: unknown) {} - }, - DeleteRoleCommand: class { - constructor(public input: unknown) {} - }, -})); - -// ── Helpers ──────────────────────────────────────────────────────────────── - -function makeProjectSpec(abTests: AgentCoreProjectSpec['abTests'] = []): AgentCoreProjectSpec { - return { - name: 'TestProject', - version: 1, - managedBy: 'CDK' as const, - runtimes: [], - memories: [], - credentials: [], - evaluators: [], - onlineEvalConfigs: [], - agentCoreGateways: [], - policyEngines: [], - configBundles: [], - httpGateways: [], - datasets: [], - abTests, - harnesses: [], - payments: [], - }; -} - -const sampleABTest = { - name: 'TestOne', - mode: 'config-bundle' as const, - gatewayRef: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:gateway/gw-123', - variants: [ - { - name: 'C' as const, - weight: 80, - variantConfiguration: { configurationBundle: { bundleArn: 'arn:bundle:control', bundleVersion: 'v1' } }, - }, - { - name: 'T1' as const, - weight: 20, - variantConfiguration: { configurationBundle: { bundleArn: 'arn:bundle:treatment', bundleVersion: 'v1' } }, - }, - ], - evaluationConfig: { onlineEvaluationConfigArn: 'arn:eval:config' }, - roleArn: 'arn:aws:iam::123456789012:role/ExistingRole', -}; - -// ── Tests ────────────────────────────────────────────────────────────────── - -describe('setupABTests', () => { - beforeEach(() => { - vi.clearAllMocks(); - mockListABTests.mockResolvedValue({ abTests: [] }); - mockUpdateABTest.mockResolvedValue({}); - mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'STOPPED' }); - }); - - describe('creation', () => { - it('creates new AB test when not in deployed state', async () => { - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-001', abTestArn: 'arn:abt:001' }); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([sampleABTest]), - }); - - expect(result.hasErrors).toBe(false); - expect(result.results).toHaveLength(1); - expect(result.results[0]!.status).toBe('created'); - expect(result.results[0]!.abTestId).toBe('abt-001'); - expect(result.abTests.TestOne).toEqual( - expect.objectContaining({ abTestId: 'abt-001', abTestArn: 'arn:abt:001' }) - ); - }); - - it('updates already-deployed test', async () => { - mockUpdateABTest.mockResolvedValue({ abTestId: 'abt-existing', abTestArn: 'arn:abt:existing' }); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([sampleABTest]), - existingABTests: { - TestOne: { abTestId: 'abt-existing', abTestArn: 'arn:abt:existing' }, - }, - }); - - expect(result.results[0]!.status).toBe('updated'); - expect(mockCreateABTest).not.toHaveBeenCalled(); - expect(mockUpdateABTest).toHaveBeenCalled(); - }); - - it('updates test found via API list (state loss recovery)', async () => { - mockListABTests.mockResolvedValue({ - abTests: [{ name: 'TestOne', abTestId: 'abt-api', abTestArn: 'arn:abt:api' }], - }); - mockUpdateABTest.mockResolvedValue({ abTestId: 'abt-api', abTestArn: 'arn:abt:api' }); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([sampleABTest]), - }); - - expect(result.results[0]!.status).toBe('updated'); - expect(result.abTests.TestOne!.abTestId).toBe('abt-api'); - expect(mockCreateABTest).not.toHaveBeenCalled(); - expect(mockUpdateABTest).toHaveBeenCalled(); - }); - - it('auto-creates IAM role when roleArn not provided', async () => { - const testWithoutRole = { ...sampleABTest, roleArn: undefined }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-002', abTestArn: 'arn:abt:002' }); - mockIAMSend.mockResolvedValue({ Role: { Arn: 'arn:aws:iam::123:role/AutoRole' } }); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([testWithoutRole]), - }); - - expect(result.results[0]!.status).toBe('created'); - expect(result.abTests.TestOne!.roleCreatedByCli).toBe(true); - expect(mockIAMSend).toHaveBeenCalled(); - }); - - it('uses provided roleArn without creating IAM role', async () => { - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-003', abTestArn: 'arn:abt:003' }); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([sampleABTest]), - }); - - expect(result.results[0]!.status).toBe('created'); - expect(result.abTests.TestOne!.roleCreatedByCli).toBe(false); - expect(mockIAMSend).not.toHaveBeenCalled(); - }); - - it('reports error when createABTest fails', async () => { - mockCreateABTest.mockRejectedValue(new Error('API failure')); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([sampleABTest]), - }); - - expect(result.hasErrors).toBe(true); - expect(result.results[0]!.status).toBe('error'); - expect(result.results[0]!.error).toBe('API failure'); - }); - }); - - describe('ARN resolution', () => { - it('resolves bundle name to ARN from deployed state', async () => { - const testWithNames = { - ...sampleABTest, - variants: [ - { - name: 'C' as const, - weight: 80, - variantConfiguration: { configurationBundle: { bundleArn: 'my-bundle', bundleVersion: 'LATEST' } }, - }, - { - name: 'T1' as const, - weight: 20, - variantConfiguration: { configurationBundle: { bundleArn: 'my-bundle', bundleVersion: 'v2' } }, - }, - ], - }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-004', abTestArn: 'arn:abt:004' }); - - await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([testWithNames]), - deployedResources: { - configBundles: { - 'my-bundle': { bundleArn: 'arn:bundle:resolved', versionId: 'ver-latest' }, - }, - } as unknown as DeployedResourceState, - }); - - const callArgs = mockCreateABTest.mock.calls[0]![0]; - expect(callArgs.variants[0].variantConfiguration.configurationBundle.bundleArn).toBe('arn:bundle:resolved'); - expect(callArgs.variants[0].variantConfiguration.configurationBundle.bundleVersion).toBe('ver-latest'); - expect(callArgs.variants[1].variantConfiguration.configurationBundle.bundleVersion).toBe('v2'); - }); - - it('resolves gateway placeholder to ARN', async () => { - const testWithPlaceholder = { - ...sampleABTest, - gatewayRef: '{{gateway:my-gw}}', - }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-005', abTestArn: 'arn:abt:005' }); - - await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([testWithPlaceholder]), - deployedResources: { - mcp: { - gateways: { - 'my-gw': { gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/resolved-gw' }, - }, - }, - } as unknown as DeployedResourceState, - }); - - expect(mockCreateABTest.mock.calls[0]![0].gatewayArn).toBe( - 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/resolved-gw' - ); - }); - - it('resolves gateway placeholder to ARN from HTTP gateways', async () => { - const testWithPlaceholder = { - ...sampleABTest, - gatewayRef: '{{gateway:my-http-gw}}', - }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-007', abTestArn: 'arn:abt:007' }); - - await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([testWithPlaceholder]), - deployedResources: { - httpGateways: { - 'my-http-gw': { - gatewayId: 'httpgw-001', - gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/httpgw-001', - }, - }, - } as unknown as DeployedResourceState, - }); - - expect(mockCreateABTest.mock.calls[0]![0].gatewayArn).toBe( - 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/httpgw-001' - ); - }); - - it('resolves online eval config name to ARN', async () => { - const testWithEvalName = { - ...sampleABTest, - evaluationConfig: { onlineEvaluationConfigArn: 'my-eval-config' }, - }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-006', abTestArn: 'arn:abt:006' }); - - await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([testWithEvalName]), - deployedResources: { - onlineEvalConfigs: { - 'my-eval-config': { onlineEvaluationConfigArn: 'arn:eval:resolved' }, - }, - } as unknown as DeployedResourceState, - }); - - expect(mockCreateABTest.mock.calls[0]![0].evaluationConfig.onlineEvaluationConfigArn).toBe('arn:eval:resolved'); - }); - - it('resolves target-based variant with project prefix (runtime === projectName)', async () => { - const targetBasedTest = { - ...sampleABTest, - mode: 'target-based' as const, - variants: [ - { - name: 'C' as const, - weight: 90, - variantConfiguration: { target: { targetName: 'MyAgent-prod' } }, - }, - { - name: 'T1' as const, - weight: 10, - variantConfiguration: { target: { targetName: 'MyAgent-staging' } }, - }, - ], - }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-tgt-1', abTestArn: 'arn:abt:tgt1' }); - - await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([targetBasedTest]), - }); - - const callArgs = mockCreateABTest.mock.calls[0]![0]; - expect(callArgs.variants[0].variantConfiguration.target.name).toBe('TestProject-MyAgent-prod'); - expect(callArgs.variants[1].variantConfiguration.target.name).toBe('TestProject-MyAgent-staging'); - }); - - it('resolves target-based variant with project prefix (different project name)', async () => { - const targetBasedTest = { - ...sampleABTest, - mode: 'target-based' as const, - variants: [ - { - name: 'C' as const, - weight: 90, - variantConfiguration: { target: { targetName: 'Bar-prod' } }, - }, - { - name: 'T1' as const, - weight: 10, - variantConfiguration: { target: { targetName: 'Bar-staging' } }, - }, - ], - }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-tgt-2', abTestArn: 'arn:abt:tgt2' }); - - const spec = makeProjectSpec([targetBasedTest]); - spec.name = 'Foo'; - - await setupABTests({ - region: 'us-east-1', - projectSpec: spec, - }); - - const callArgs = mockCreateABTest.mock.calls[0]![0]; - expect(callArgs.variants[0].variantConfiguration.target.name).toBe('Foo-Bar-prod'); - expect(callArgs.variants[1].variantConfiguration.target.name).toBe('Foo-Bar-staging'); - }); - }); - - describe('deletion (reconciliation)', () => { - it('stops, polls until executionStatus is STOPPED, then deletes orphaned AB test', async () => { - const callOrder: string[] = []; - mockUpdateABTest.mockImplementation(() => { - callOrder.push('stop'); - return Promise.resolve({}); - }); - let getCallCount = 0; - mockGetABTest.mockImplementation(() => { - getCallCount++; - callOrder.push(`poll(${getCallCount})`); - // First poll: executionStatus not yet STOPPED (still transitioning) - if (getCallCount === 1) return Promise.resolve({ status: 'ACTIVE', executionStatus: 'RUNNING' }); - // Second poll: executionStatus is STOPPED — done - return Promise.resolve({ status: 'ACTIVE', executionStatus: 'STOPPED' }); - }); - mockDeleteABTest.mockImplementation(() => { - callOrder.push('delete'); - return Promise.resolve({ success: true }); - }); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - RemovedTest: { abTestId: 'abt-old', abTestArn: 'arn:abt:old' }, - }, - }); - - // Verify: stop → poll (RUNNING) → poll (STOPPED) → delete - expect(callOrder).toEqual(['stop', 'poll(1)', 'poll(2)', 'delete']); - expect(mockUpdateABTest).toHaveBeenCalledWith({ - region: 'us-east-1', - abTestId: 'abt-old', - executionStatus: 'STOPPED', - }); - expect(result.results[0]!.status).toBe('deleted'); - }); - - it('proceeds with delete when stop fails (already stopped)', async () => { - mockUpdateABTest.mockRejectedValue(new Error('Cannot update in current state')); - mockDeleteABTest.mockResolvedValue({ success: true }); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - RemovedTest: { abTestId: 'abt-stopped', abTestArn: 'arn:abt:stopped' }, - }, - }); - - expect(mockUpdateABTest).toHaveBeenCalled(); - expect(mockDeleteABTest).toHaveBeenCalled(); - expect(result.results[0]!.status).toBe('deleted'); - }); - - it('cleans up auto-created IAM role on deletion', async () => { - mockDeleteABTest.mockResolvedValue({ success: true }); - mockIAMSend.mockResolvedValue({}); - - await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - RemovedTest: { - abTestId: 'abt-old', - abTestArn: 'arn:abt:old', - roleArn: 'arn:aws:iam::123:role/AutoCreatedRole', - roleCreatedByCli: true, - }, - }, - }); - - // Should have called delete policy + delete role - expect(mockIAMSend).toHaveBeenCalledTimes(2); - - // Verify first call is DeleteRolePolicyCommand - const firstCall = mockIAMSend.mock.calls[0]![0]; - expect(firstCall.input).toEqual( - expect.objectContaining({ RoleName: 'AutoCreatedRole', PolicyName: expect.any(String) }) - ); - - // Verify second call is DeleteRoleCommand - const secondCall = mockIAMSend.mock.calls[1]![0]; - expect(secondCall.input).toEqual(expect.objectContaining({ RoleName: 'AutoCreatedRole' })); - }); - - it('does not delete role when roleCreatedByCli is false', async () => { - mockDeleteABTest.mockResolvedValue({ success: true }); - - await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - RemovedTest: { - abTestId: 'abt-old', - abTestArn: 'arn:abt:old', - roleArn: 'arn:aws:iam::123:role/UserRole', - roleCreatedByCli: false, - }, - }, - }); - - expect(mockIAMSend).not.toHaveBeenCalled(); - }); - - it('reports error when deletion fails', async () => { - mockDeleteABTest.mockRejectedValue(new Error('delete failed')); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - FailTest: { abTestId: 'abt-fail', abTestArn: 'arn:abt:fail' }, - }, - }); - - expect(result.hasErrors).toBe(true); - expect(result.results[0]!.status).toBe('error'); - expect(result.results[0]!.error).toBe('delete failed'); - }); - - it('sets warning when AB test was stopped before deletion', async () => { - mockUpdateABTest.mockResolvedValue({}); - mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'STOPPED' }); - mockDeleteABTest.mockResolvedValue({ success: true }); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - StoppedTest: { abTestId: 'abt-warn', abTestArn: 'arn:abt:warn' }, - }, - }); - - expect(result.results[0]!.status).toBe('deleted'); - expect(result.results[0]!.warning).toBe('AB test "StoppedTest" was stopped before deletion'); - }); - - it('does not set warning when stop fails (already stopped)', async () => { - mockUpdateABTest.mockRejectedValue(new Error('Cannot update')); - mockDeleteABTest.mockResolvedValue({ success: true }); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - AlreadyStopped: { abTestId: 'abt-no-warn', abTestArn: 'arn:abt:no-warn' }, - }, - }); - - expect(result.results[0]!.status).toBe('deleted'); - expect(result.results[0]!.warning).toBeUndefined(); - }); - - it('proceeds with delete even when poll never reaches STOPPED (timeout)', async () => { - mockUpdateABTest.mockResolvedValue({}); - // executionStatus never becomes STOPPED — always RUNNING - mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'RUNNING' }); - mockDeleteABTest.mockResolvedValue({ success: true }); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - StuckTest: { abTestId: 'abt-stuck', abTestArn: 'arn:abt:stuck' }, - }, - }); - - // Should still attempt delete after exhausting poll loop - expect(mockDeleteABTest).toHaveBeenCalledWith({ region: 'us-east-1', abTestId: 'abt-stuck' }); - expect(result.results[0]!.status).toBe('deleted'); - // Poll was called 20 times (the loop limit) - expect(mockGetABTest).toHaveBeenCalledTimes(20); - // Should warn that polling timed out - expect(result.results[0]!.warning).toBe( - 'AB test "StuckTest" did not reach STOPPED status within the polling window — proceeding with delete' - ); - }, 120_000); - - it('sets warning even when deleteABTest returns success: false', async () => { - mockUpdateABTest.mockResolvedValue({}); - mockGetABTest.mockResolvedValue({ status: 'ACTIVE', executionStatus: 'STOPPED' }); - mockDeleteABTest.mockResolvedValue({ success: false, error: 'still running' }); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - FailAfterStop: { abTestId: 'abt-fail-stop', abTestArn: 'arn:abt:fail-stop' }, - }, - }); - - expect(result.results[0]!.status).toBe('error'); - expect(result.results[0]!.error).toBe('still running'); - // Warning should still be set because stop succeeded - expect(result.results[0]!.warning).toBe('AB test "FailAfterStop" was stopped before deletion'); - }); - }); - - describe('IAM role creation', () => { - it('creates role with correct trust policy and inline policy', async () => { - const testWithoutRole = { ...sampleABTest, roleArn: undefined }; - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-iam', abTestArn: 'arn:abt:iam' }); - mockIAMSend.mockResolvedValue({ Role: { Arn: 'arn:aws:iam::123:role/AutoRole' } }); - - await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([testWithoutRole]), - }); - - // First call: CreateRoleCommand with trust policy - const createRoleCall = mockIAMSend.mock.calls[0]![0]; - const trustPolicy = JSON.parse(createRoleCall.input.AssumeRolePolicyDocument); - expect(trustPolicy.Statement).toHaveLength(1); - expect(trustPolicy.Statement[0].Principal.Service).toBe('bedrock-agentcore.amazonaws.com'); - expect(trustPolicy.Statement[0].Condition.StringEquals['aws:SourceAccount']).toBeDefined(); - expect(trustPolicy.Statement[0].Condition.ArnLike['aws:SourceArn']).toContain('ab-test/*'); - - // Second call: PutRolePolicyCommand with inline policy - const putPolicyCall = mockIAMSend.mock.calls[1]![0]; - const policy = JSON.parse(putPolicyCall.input.PolicyDocument); - const sids = policy.Statement.map((s: { Sid: string }) => s.Sid); - expect(sids).toContain('AgentCoreResources'); - expect(sids).toContain('CloudWatchLogs'); - - // AgentCoreResources must include all required actions - const agentCoreStmt = policy.Statement.find((s: { Sid: string }) => s.Sid === 'AgentCoreResources'); - expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetEvaluator'); - expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetGateway'); - expect(agentCoreStmt.Action).toContain('bedrock-agentcore:GetOnlineEvaluationConfig'); - expect(agentCoreStmt.Condition.StringEquals['aws:ResourceAccount']).toBeDefined(); - }); - }); - - describe('edge cases', () => { - it('proceeds with creation when listABTests fails', async () => { - mockListABTests.mockRejectedValue(new Error('API unavailable')); - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-new', abTestArn: 'arn:abt:new' }); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([sampleABTest]), - }); - - expect(result.results[0]!.status).toBe('created'); - expect(mockCreateABTest).toHaveBeenCalled(); - }); - - it('swallows errors during IAM role deletion', async () => { - mockDeleteABTest.mockResolvedValue({ success: true }); - mockIAMSend.mockRejectedValue(new Error('IAM permission denied')); - - const result = await deleteOrphanedABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingABTests: { - OldTest: { - abTestId: 'abt-old', - abTestArn: 'arn:abt:old', - roleArn: 'arn:aws:iam::123:role/SomeRole', - roleCreatedByCli: true, - }, - }, - }); - - // Deletion should still succeed even though IAM cleanup failed - expect(result.results[0]!.status).toBe('deleted'); - }); - }); - - describe('mixed operations', () => { - it('creates new and updates existing', async () => { - const newTest = { ...sampleABTest, name: 'NewTest' }; - const keptTest = { ...sampleABTest, name: 'KeptTest' }; - - mockCreateABTest.mockResolvedValue({ abTestId: 'abt-new', abTestArn: 'arn:abt:new' }); - mockUpdateABTest.mockResolvedValue({ abTestId: 'abt-kept', abTestArn: 'arn:abt:kept' }); - - const result = await setupABTests({ - region: 'us-east-1', - projectSpec: makeProjectSpec([newTest, keptTest]), - existingABTests: { - KeptTest: { abTestId: 'abt-kept', abTestArn: 'arn:abt:kept' }, - }, - }); - - expect(result.results).toHaveLength(2); - const statuses = result.results.map(r => `${r.testName}:${r.status}`); - expect(statuses).toContain('NewTest:created'); - expect(statuses).toContain('KeptTest:updated'); - }); - }); -}); diff --git a/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts deleted file mode 100644 index cd2dc82a8..000000000 --- a/src/cli/operations/deploy/__tests__/post-deploy-config-bundles.test.ts +++ /dev/null @@ -1,654 +0,0 @@ -import type { AgentCoreProjectSpec, DeployedState } from '../../../../schema'; -import { resolveConfigBundleComponentKeys, setupConfigBundles } from '../post-deploy-config-bundles.js'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -const { - mockCreateConfigurationBundle, - mockDeleteConfigurationBundle, - mockGetConfigurationBundleVersion, - mockListConfigurationBundleVersions, - mockListConfigurationBundles, - mockUpdateConfigurationBundle, -} = vi.hoisted(() => ({ - mockCreateConfigurationBundle: vi.fn(), - mockDeleteConfigurationBundle: vi.fn(), - mockGetConfigurationBundleVersion: vi.fn(), - mockListConfigurationBundleVersions: vi.fn(), - mockListConfigurationBundles: vi.fn(), - mockUpdateConfigurationBundle: vi.fn(), -})); - -vi.mock('../../../aws/agentcore-config-bundles', () => ({ - createConfigurationBundle: mockCreateConfigurationBundle, - deleteConfigurationBundle: mockDeleteConfigurationBundle, - getConfigurationBundleVersion: mockGetConfigurationBundleVersion, - listConfigurationBundleVersions: mockListConfigurationBundleVersions, - listConfigurationBundles: mockListConfigurationBundles, - updateConfigurationBundle: mockUpdateConfigurationBundle, -})); - -const REGION = 'us-west-2'; - -function makeProjectSpec(configBundles: Record[]) { - return { name: 'TestProject', configBundles } as any; -} - -describe('setupConfigBundles', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - describe('create new bundle', () => { - it('should create a new bundle when not in existingBundles and not found by name', async () => { - mockListConfigurationBundles.mockResolvedValue({ bundles: [] }); - mockCreateConfigurationBundle.mockResolvedValue({ - bundleId: 'b-new', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-new', - versionId: 'v-1', - }); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { name: 'MyBundle', type: 'ConfigurationBundle', components: { foo: { type: 'inline', value: 'bar' } } }, - ]), - }); - - expect(mockCreateConfigurationBundle).toHaveBeenCalledWith( - expect.objectContaining({ - region: REGION, - bundleName: 'TestProjectMyBundle', - components: { foo: { type: 'inline', value: 'bar' } }, - commitMessage: 'Create MyBundle', - }) - ); - expect(result.hasErrors).toBe(false); - expect(result.results).toHaveLength(1); - expect(result.results[0]).toMatchObject({ bundleName: 'MyBundle', status: 'created', bundleId: 'b-new' }); - expect(result.configBundles.MyBundle).toEqual({ - bundleId: 'b-new', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-new', - versionId: 'v-1', - }); - }); - }); - - describe('update existing bundle', () => { - it('should update an existing bundle when components have changed', async () => { - const existingBundles = { - MyBundle: { - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - }, - }; - - mockGetConfigurationBundleVersion.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - components: { foo: { type: 'inline', value: 'old' } }, - description: undefined, - lineageMetadata: { branchName: 'main' }, - }); - - mockUpdateConfigurationBundle.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-2', - }); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { name: 'MyBundle', type: 'ConfigurationBundle', components: { foo: { type: 'inline', value: 'new' } } }, - ]), - existingBundles, - }); - - expect(mockUpdateConfigurationBundle).toHaveBeenCalledWith( - expect.objectContaining({ - region: REGION, - bundleId: 'b-123', - components: { foo: { type: 'inline', value: 'new' } }, - parentVersionIds: ['v-1'], - branchName: 'main', - commitMessage: 'Update MyBundle', - }) - ); - expect(result.results[0]).toMatchObject({ status: 'updated', versionId: 'v-2' }); - expect(result.hasErrors).toBe(false); - }); - }); - - describe('skip unchanged bundle', () => { - it('should skip update when components and description are unchanged', async () => { - const components = { foo: { type: 'inline', value: 'same' } }; - const existingBundles = { - MyBundle: { - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - }, - }; - - mockGetConfigurationBundleVersion.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - components, - description: 'My desc', - lineageMetadata: { branchName: 'main' }, - }); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { name: 'MyBundle', type: 'ConfigurationBundle', components, description: 'My desc' }, - ]), - existingBundles, - }); - - expect(mockUpdateConfigurationBundle).not.toHaveBeenCalled(); - expect(mockCreateConfigurationBundle).not.toHaveBeenCalled(); - expect(result.results[0]).toMatchObject({ bundleName: 'MyBundle', status: 'skipped', versionId: 'v-1' }); - expect(result.configBundles.MyBundle).toEqual(existingBundles.MyBundle); - }); - }); - - describe('deep equal is key-order-independent', () => { - it('should skip update when components differ only in key order', async () => { - const existingBundles = { - MyBundle: { - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - }, - }; - - // API returns keys in one order - mockGetConfigurationBundleVersion.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - components: { a: { type: 'inline', value: '1' }, b: { type: 'inline', value: '2' } }, - description: undefined, - lineageMetadata: { branchName: 'main' }, - }); - - // Spec has same keys in different order - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { - name: 'MyBundle', - components: { b: { type: 'inline', value: '2' }, a: { type: 'inline', value: '1' } }, - }, - ]), - existingBundles, - }); - - expect(mockUpdateConfigurationBundle).not.toHaveBeenCalled(); - expect(result.results[0]).toMatchObject({ status: 'skipped' }); - }); - }); - - describe('delete orphaned bundles', () => { - it('should delete bundles in existingBundles but not in projectSpec', async () => { - const existingBundles = { - OrphanBundle: { - bundleId: 'b-orphan', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-orphan', - versionId: 'v-1', - }, - }; - - mockDeleteConfigurationBundle.mockResolvedValue(undefined); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([]), - existingBundles, - }); - - expect(mockDeleteConfigurationBundle).toHaveBeenCalledWith({ - region: REGION, - bundleId: 'b-orphan', - }); - expect(result.results[0]).toMatchObject({ bundleName: 'OrphanBundle', status: 'deleted' }); - expect(result.hasErrors).toBe(false); - }); - - it('should report error status when delete throws', async () => { - const existingBundles = { - OrphanBundle: { - bundleId: 'b-orphan', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-orphan', - versionId: 'v-1', - }, - }; - - mockDeleteConfigurationBundle.mockRejectedValue(new Error('Access denied')); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([]), - existingBundles, - }); - - expect(result.results[0]).toMatchObject({ bundleName: 'OrphanBundle', status: 'error', error: 'Access denied' }); - expect(result.hasErrors).toBe(true); - }); - }); - - describe('uses branch from API when bundleSpec has no branchName', () => { - it('should use branchName from getConfigurationBundleVersion lineageMetadata', async () => { - const existingBundles = { - MyBundle: { - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - }, - }; - - mockGetConfigurationBundleVersion.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - components: { old: { type: 'inline', value: 'data' } }, - description: undefined, - lineageMetadata: { branchName: 'feature-branch' }, - }); - - mockUpdateConfigurationBundle.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-2', - }); - - await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { - name: 'MyBundle', - components: { new: { type: 'inline', value: 'data' } }, - // no branchName specified - }, - ]), - existingBundles, - }); - - expect(mockUpdateConfigurationBundle).toHaveBeenCalledWith( - expect.objectContaining({ - branchName: 'feature-branch', - }) - ); - }); - - it('should prefer bundleSpec branchName over API branchName', async () => { - const existingBundles = { - MyBundle: { - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - }, - }; - - mockGetConfigurationBundleVersion.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - components: { old: { type: 'inline', value: 'data' } }, - description: undefined, - lineageMetadata: { branchName: 'api-branch' }, - }); - - mockUpdateConfigurationBundle.mockResolvedValue({ - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-2', - }); - - await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { - name: 'MyBundle', - components: { new: { type: 'inline', value: 'data' } }, - branchName: 'spec-branch', - }, - ]), - existingBundles, - }); - - expect(mockUpdateConfigurationBundle).toHaveBeenCalledWith( - expect.objectContaining({ - branchName: 'spec-branch', - }) - ); - }); - }); - - describe('fallback path via findBundleByName', () => { - it('should fall through to findBundleByName when getConfigurationBundleVersion throws 404', async () => { - const existingBundles = { - MyBundle: { - bundleId: 'b-old', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-old', - versionId: 'v-old', - }, - }; - - // First call (existing bundle path) throws 404 - mockGetConfigurationBundleVersion.mockRejectedValueOnce(new Error('404 not found')).mockResolvedValueOnce({ - bundleId: 'b-found', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-found', - versionId: 'v-latest', - components: { old: { type: 'inline', value: 'data' } }, - description: undefined, - lineageMetadata: { branchName: 'main' }, - }); - - mockListConfigurationBundles.mockResolvedValue({ - bundles: [{ bundleId: 'b-found', bundleName: 'TestProjectMyBundle' }], - }); - - mockListConfigurationBundleVersions.mockResolvedValue({ - versions: [{ versionId: 'v-latest', versionCreatedAt: 1234567890 }], - }); - - mockUpdateConfigurationBundle.mockResolvedValue({ - bundleId: 'b-found', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-found', - versionId: 'v-new', - }); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { - name: 'MyBundle', - components: { new: { type: 'inline', value: 'data' } }, - }, - ]), - existingBundles, - }); - - expect(mockListConfigurationBundles).toHaveBeenCalledWith({ region: REGION, maxResults: 100 }); - expect(mockListConfigurationBundleVersions).toHaveBeenCalledWith({ - region: REGION, - bundleId: 'b-found', - }); - expect(result.results[0]).toMatchObject({ status: 'updated', bundleId: 'b-found', versionId: 'v-new' }); - expect(result.hasErrors).toBe(false); - }); - - it('should create a new bundle when findBundleByName returns nothing after 404', async () => { - const existingBundles = { - MyBundle: { - bundleId: 'b-old', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-old', - versionId: 'v-old', - }, - }; - - mockGetConfigurationBundleVersion.mockRejectedValueOnce(new Error('404 not found')); - mockListConfigurationBundles.mockResolvedValue({ bundles: [] }); - mockCreateConfigurationBundle.mockResolvedValue({ - bundleId: 'b-new', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-new', - versionId: 'v-1', - }); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { name: 'MyBundle', type: 'ConfigurationBundle', components: { x: { type: 'inline', value: '1' } } }, - ]), - existingBundles, - }); - - expect(mockCreateConfigurationBundle).toHaveBeenCalled(); - expect(result.results[0]).toMatchObject({ status: 'created', bundleId: 'b-new' }); - }); - }); - - describe('error handling', () => { - it('should report error status when create fails', async () => { - mockListConfigurationBundles.mockResolvedValue({ bundles: [] }); - mockCreateConfigurationBundle.mockRejectedValue(new Error('Service unavailable')); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { name: 'MyBundle', type: 'ConfigurationBundle', components: { x: { type: 'inline', value: '1' } } }, - ]), - }); - - expect(result.results[0]).toMatchObject({ - bundleName: 'MyBundle', - status: 'error', - error: 'Service unavailable', - }); - expect(result.hasErrors).toBe(true); - }); - - it('should report error status when update fails with non-404 error', async () => { - const existingBundles = { - MyBundle: { - bundleId: 'b-123', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-123', - versionId: 'v-1', - }, - }; - - mockGetConfigurationBundleVersion.mockRejectedValue(new Error('Throttling exception')); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([ - { name: 'MyBundle', type: 'ConfigurationBundle', components: { x: { type: 'inline', value: '1' } } }, - ]), - existingBundles, - }); - - expect(result.results[0]).toMatchObject({ - bundleName: 'MyBundle', - status: 'error', - error: 'Throttling exception', - }); - expect(result.hasErrors).toBe(true); - // Should NOT fall through to findBundleByName - expect(mockListConfigurationBundles).not.toHaveBeenCalled(); - }); - - it('should report error when delete throws an exception', async () => { - const existingBundles = { - OrphanBundle: { - bundleId: 'b-orphan', - bundleArn: 'arn:aws:agentcore:us-west-2:123:bundle/b-orphan', - versionId: 'v-1', - }, - }; - - mockDeleteConfigurationBundle.mockRejectedValue(new Error('Network error')); - - const result = await setupConfigBundles({ - region: REGION, - projectSpec: makeProjectSpec([]), - existingBundles, - }); - - expect(result.results[0]).toMatchObject({ - bundleName: 'OrphanBundle', - status: 'error', - error: 'Network error', - }); - expect(result.hasErrors).toBe(true); - }); - }); -}); - -// ── resolveConfigBundleComponentKeys ─────────────────────────────────────── - -describe('resolveConfigBundleComponentKeys', () => { - function makeFullProjectSpec(configBundles: AgentCoreProjectSpec['configBundles'] = []): AgentCoreProjectSpec { - return { - name: 'TestProject', - version: 1, - managedBy: 'CDK' as const, - runtimes: [], - memories: [], - credentials: [], - evaluators: [], - onlineEvalConfigs: [], - agentCoreGateways: [], - policyEngines: [], - configBundles, - httpGateways: [], - datasets: [], - abTests: [], - harnesses: [], - payments: [], - }; - } - - function makeDeployedState(targetName: string, resources: Record): DeployedState { - return { - targets: { - [targetName]: { resources }, - }, - } as unknown as DeployedState; - } - - it('returns projectSpec unchanged when target has no resources', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{runtime:my-rt}}': { configuration: { k: 'v' } } } } as any, - ]); - const deployedState = { targets: {} } as unknown as DeployedState; - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'missing-target'); - expect(result).toBe(spec); // same reference — no transformation - }); - - it('resolves {{runtime:name}} placeholder to runtime ARN', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{runtime:my-agent}}': { configuration: { k: 'v' } } } } as any, - ]); - const deployedState = makeDeployedState('target1', { - runtimes: { 'my-agent': { runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1' } }, - }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - const keys = Object.keys(result.configBundles[0]!.components); - expect(keys).toEqual(['arn:aws:bedrock-agentcore:us-east-1:123:runtime/rt-1']); - }); - - it('resolves {{gateway:name}} placeholder to HTTP gateway ARN', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{gateway:my-gw}}': { configuration: { k: 'v' } } } } as any, - ]); - const deployedState = makeDeployedState('target1', { - httpGateways: { 'my-gw': { gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-1' } }, - }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - const keys = Object.keys(result.configBundles[0]!.components); - expect(keys).toEqual(['arn:aws:bedrock-agentcore:us-east-1:123:gateway/gw-1']); - }); - - it('resolves {{gateway:name}} placeholder to MCP gateway ARN', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{gateway:my-mcp-gw}}': { configuration: { k: 'v' } } } } as any, - ]); - const deployedState = makeDeployedState('target1', { - mcp: { gateways: { 'my-mcp-gw': { gatewayArn: 'arn:mcp:gw:resolved' } } }, - }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - const keys = Object.keys(result.configBundles[0]!.components); - expect(keys).toEqual(['arn:mcp:gw:resolved']); - }); - - it('passes through keys that are already ARNs', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { 'arn:existing:key': { configuration: { k: 'v' } } } } as any, - ]); - const deployedState = makeDeployedState('target1', { runtimes: {} }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - const keys = Object.keys(result.configBundles[0]!.components); - expect(keys).toEqual(['arn:existing:key']); - }); - - it('passes through plain string keys that are not placeholders or ARNs', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { 'some-plain-key': { configuration: { k: 'v' } } } } as any, - ]); - const deployedState = makeDeployedState('target1', { runtimes: {} }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - const keys = Object.keys(result.configBundles[0]!.components); - expect(keys).toEqual(['some-plain-key']); - }); - - it('throws when gateway placeholder references non-existent gateway', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{gateway:missing}}': { configuration: {} } } } as any, - ]); - const deployedState = makeDeployedState('target1', { httpGateways: {}, mcp: { gateways: {} } }); - - expect(() => resolveConfigBundleComponentKeys(spec, deployedState, 'target1')).toThrow( - 'Config bundle references gateway "missing" but it was not found in deployed resources' - ); - }); - - it('throws when runtime placeholder references non-existent runtime', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{runtime:missing}}': { configuration: {} } } } as any, - ]); - const deployedState = makeDeployedState('target1', { runtimes: {} }); - - expect(() => resolveConfigBundleComponentKeys(spec, deployedState, 'target1')).toThrow( - 'Config bundle references runtime "missing" but it was not found in deployed resources' - ); - }); - - it('handles projectSpec with no configBundles', () => { - const spec = makeFullProjectSpec([]); - const deployedState = makeDeployedState('target1', { runtimes: {} }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - expect(result.configBundles).toEqual([]); - }); - - it('does not mutate the original projectSpec', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{runtime:my-rt}}': { configuration: { k: 'v' } } } } as any, - ]); - const deployedState = makeDeployedState('target1', { - runtimes: { 'my-rt': { runtimeArn: 'arn:resolved' } }, - }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - // Original should still have the placeholder - expect(Object.keys(spec.configBundles[0]!.components)).toEqual(['{{runtime:my-rt}}']); - // Result should have the resolved key - expect(Object.keys(result.configBundles[0]!.components)).toEqual(['arn:resolved']); - }); - - it('prefers HTTP gateway over MCP gateway when both exist with same name', () => { - const spec = makeFullProjectSpec([ - { name: 'b1', components: { '{{gateway:dupe-gw}}': { configuration: {} } } } as any, - ]); - const deployedState = makeDeployedState('target1', { - httpGateways: { 'dupe-gw': { gatewayArn: 'arn:http:gw' } }, - mcp: { gateways: { 'dupe-gw': { gatewayArn: 'arn:mcp:gw' } } }, - }); - - const result = resolveConfigBundleComponentKeys(spec, deployedState, 'target1'); - const keys = Object.keys(result.configBundles[0]!.components); - // HTTP gateway should take precedence (checked first in code) - expect(keys).toEqual(['arn:http:gw']); - }); -}); diff --git a/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts deleted file mode 100644 index 17321f72d..000000000 --- a/src/cli/operations/deploy/__tests__/post-deploy-http-gateways.test.ts +++ /dev/null @@ -1,471 +0,0 @@ -import type { AgentCoreProjectSpec, DeployedResourceState, HttpGatewayDeployedState } from '../../../../schema'; -import { deleteOrphanedHttpGateways, setupHttpGateways } from '../post-deploy-http-gateways.js'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -// ── Hoisted mocks ────────────────────────────────────────────────────────── - -const { - mockCreateHttpGateway, - mockCreateHttpGatewayTarget, - mockDeleteHttpGateway, - mockDeleteHttpGatewayTarget, - mockListAllHttpGateways, - mockListHttpGatewayTargets, - mockWaitForGatewayReady, - mockWaitForTargetReady, - mockGetCredentialProvider, - mockIAMSend, -} = vi.hoisted(() => ({ - mockCreateHttpGateway: vi.fn(), - mockCreateHttpGatewayTarget: vi.fn(), - mockDeleteHttpGateway: vi.fn(), - mockDeleteHttpGatewayTarget: vi.fn(), - mockListAllHttpGateways: vi.fn(), - mockListHttpGatewayTargets: vi.fn(), - mockWaitForGatewayReady: vi.fn(), - mockWaitForTargetReady: vi.fn(), - mockGetCredentialProvider: vi.fn().mockReturnValue(undefined), - mockIAMSend: vi.fn(), -})); - -vi.mock('../../../aws/agentcore-http-gateways', () => ({ - createHttpGateway: mockCreateHttpGateway, - createHttpGatewayTarget: mockCreateHttpGatewayTarget, - deleteHttpGateway: mockDeleteHttpGateway, - deleteHttpGatewayTarget: mockDeleteHttpGatewayTarget, - listAllHttpGateways: mockListAllHttpGateways, - listHttpGatewayTargets: mockListHttpGatewayTargets, - waitForGatewayReady: mockWaitForGatewayReady, - waitForTargetReady: mockWaitForTargetReady, -})); - -vi.mock('../../../aws/account', () => ({ - getCredentialProvider: mockGetCredentialProvider, -})); - -vi.mock('@aws-sdk/client-iam', () => ({ - IAMClient: class { - send = mockIAMSend; - }, - CreateRoleCommand: class { - constructor(public input: unknown) {} - }, - GetRoleCommand: class { - constructor(public input: unknown) {} - }, - PutRolePolicyCommand: class { - constructor(public input: unknown) {} - }, - DeleteRolePolicyCommand: class { - constructor(public input: unknown) {} - }, - DeleteRoleCommand: class { - constructor(public input: unknown) {} - }, -})); - -// ── Helpers ──────────────────────────────────────────────────────────────── - -function makeProjectSpec(httpGateways: AgentCoreProjectSpec['httpGateways'] = []): AgentCoreProjectSpec { - return { - name: 'TestProject', - version: 1, - managedBy: 'CDK' as const, - runtimes: [], - memories: [], - credentials: [], - evaluators: [], - onlineEvalConfigs: [], - agentCoreGateways: [], - policyEngines: [], - configBundles: [], - abTests: [], - httpGateways, - harnesses: [], - datasets: [], - payments: [], - }; -} - -const sampleHttpGateway = { - name: 'MyHttpGw', - runtimeRef: 'my-agent', - roleArn: 'arn:aws:iam::123456789012:role/ExistingRole', -}; - -const sampleDeployedResources = { - runtimes: { - 'my-agent': { - runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-123', - runtimeId: 'rt-123', - }, - }, -} as unknown as DeployedResourceState; - -// ── Tests ────────────────────────────────────────────────────────────────── - -describe('setupHttpGateways', () => { - beforeEach(() => { - vi.clearAllMocks(); - mockListAllHttpGateways.mockResolvedValue([]); - mockListHttpGatewayTargets.mockResolvedValue({ targets: [] }); - mockWaitForGatewayReady.mockResolvedValue({ gatewayId: 'gw-001', status: 'READY' }); - mockWaitForTargetReady.mockResolvedValue({}); - }); - - describe('creation', () => { - it('creates gateway + target for new spec entry', async () => { - mockCreateHttpGateway.mockResolvedValue({ - gatewayId: 'gw-001', - gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/gw-001', - }); - mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-001' }); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - deployedResources: sampleDeployedResources, - }); - - expect(result.hasErrors).toBe(false); - expect(result.results).toHaveLength(1); - expect(result.results[0]!.status).toBe('created'); - expect(result.results[0]!.gatewayId).toBe('gw-001'); - expect(result.httpGateways.MyHttpGw).toEqual( - expect.objectContaining({ - gatewayId: 'gw-001', - gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:123:httpgateway/gw-001', - targetId: 'tgt-001', - }) - ); - - expect(mockCreateHttpGateway).toHaveBeenCalledWith({ - region: 'us-east-1', - name: 'TestProject-MyHttpGw', - roleArn: 'arn:aws:iam::123456789012:role/ExistingRole', - }); - expect(mockCreateHttpGatewayTarget).toHaveBeenCalledWith({ - region: 'us-east-1', - gatewayId: 'gw-001', - targetName: 'TestProject-my-agent', - runtimeArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:runtime/rt-123', - }); - }); - - it('skips existing gateway', async () => { - const existingHttpGateways: Record = { - MyHttpGw: { - gatewayId: 'gw-existing', - gatewayArn: 'arn:httpgw:existing', - targetId: 'tgt-existing', - }, - }; - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - existingHttpGateways, - deployedResources: sampleDeployedResources, - }); - - expect(result.results[0]!.status).toBe('skipped'); - expect(result.results[0]!.gatewayId).toBe('gw-existing'); - expect(mockCreateHttpGateway).not.toHaveBeenCalled(); - expect(mockCreateHttpGatewayTarget).not.toHaveBeenCalled(); - }); - - it('finds gateway by name via list (state loss recovery)', async () => { - mockListAllHttpGateways.mockResolvedValue([ - { name: 'TestProject-MyHttpGw', gatewayId: 'gw-api', gatewayArn: 'arn:httpgw:api' }, - ]); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - deployedResources: sampleDeployedResources, - }); - - expect(result.results[0]!.status).toBe('skipped'); - expect(result.httpGateways.MyHttpGw!.gatewayId).toBe('gw-api'); - expect(mockCreateHttpGateway).not.toHaveBeenCalled(); - }); - - it('recovers state using legacy (pre-migration) gateway name when prefixed name not found', async () => { - // First call: prefixed name "TestProject-MyHttpGw" → not found - // Second call: unprefixed legacy name "MyHttpGw" → found - mockListAllHttpGateways - .mockResolvedValueOnce([]) - .mockResolvedValueOnce([{ name: 'MyHttpGw', gatewayId: 'gw-legacy', gatewayArn: 'arn:httpgw:legacy' }]); - - const warnSpy = vi.spyOn(console, 'warn').mockReturnValue(undefined); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - deployedResources: sampleDeployedResources, - }); - - // findHttpGatewayByName was called twice: once for prefixed, once for unprefixed name - expect(mockListAllHttpGateways).toHaveBeenCalledTimes(2); - - // Gateway result is skipped (not created) - expect(result.results[0]!.status).toBe('skipped'); - expect(result.results[0]!.gatewayId).toBe('gw-legacy'); - expect(result.httpGateways.MyHttpGw!.gatewayId).toBe('gw-legacy'); - - // createHttpGateway was NOT called - expect(mockCreateHttpGateway).not.toHaveBeenCalled(); - - // console.warn was called with the pre-migration warning text - expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining('pre-migration name')); - - warnSpy.mockRestore(); - }); - - it('reports error on missing runtime ref', async () => { - const emptyDeployedResources = {} as unknown as DeployedResourceState; - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - deployedResources: emptyDeployedResources, - }); - - expect(result.hasErrors).toBe(true); - expect(result.results[0]!.status).toBe('error'); - expect(result.results[0]!.error).toContain('Runtime "my-agent" not found'); - expect(mockCreateHttpGateway).not.toHaveBeenCalled(); - }); - - it('auto-creates IAM role when roleArn not provided', async () => { - const gwWithoutRole = { ...sampleHttpGateway, roleArn: undefined }; - mockCreateHttpGateway.mockResolvedValue({ - gatewayId: 'gw-002', - gatewayArn: 'arn:httpgw:002', - }); - mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-002' }); - mockIAMSend.mockResolvedValue({ Role: { Arn: 'arn:aws:iam::123:role/AutoRole' } }); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([gwWithoutRole]), - deployedResources: sampleDeployedResources, - }); - - expect(result.results[0]!.status).toBe('created'); - expect(result.httpGateways.MyHttpGw!.roleCreatedByCli).toBe(true); - expect(mockIAMSend).toHaveBeenCalled(); - - // Verify CreateRoleCommand was sent with correct trust policy - const createRoleCall = mockIAMSend.mock.calls[0]![0]; - const trustPolicy = JSON.parse(createRoleCall.input.AssumeRolePolicyDocument); - expect(trustPolicy.Statement[0].Principal.Service).toBe('bedrock-agentcore.amazonaws.com'); - - // Verify PutRolePolicyCommand was sent with correct inline policy actions - const putPolicyCall = mockIAMSend.mock.calls[1]![0]; - const inlinePolicy = JSON.parse(putPolicyCall.input.PolicyDocument); - const actions = inlinePolicy.Statement[0].Action; - expect(actions).toContain('bedrock-agentcore:InvokeRuntime'); - expect(actions).toContain('bedrock-agentcore:InvokeAgent'); - expect(actions).toContain('bedrock-agentcore:InvokeAgentRuntime'); - expect(inlinePolicy.Statement[0].Resource).toBe('*'); - }); - - it('rollback on target creation failure', async () => { - mockCreateHttpGateway.mockResolvedValue({ - gatewayId: 'gw-rollback', - gatewayArn: 'arn:httpgw:rollback', - }); - mockCreateHttpGatewayTarget.mockRejectedValue(new Error('Target creation failed')); - mockDeleteHttpGateway.mockResolvedValue({ success: true }); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - deployedResources: sampleDeployedResources, - }); - - expect(result.hasErrors).toBe(true); - expect(result.results[0]!.status).toBe('error'); - expect(result.results[0]!.error).toContain('Target creation failed'); - expect(result.results[0]!.error).toContain('gateway rolled back'); - - // Verify rollback: deleteHttpGateway was called - expect(mockDeleteHttpGateway).toHaveBeenCalledWith({ - region: 'us-east-1', - gatewayId: 'gw-rollback', - }); - }); - }); - - describe('deletion (reconciliation)', () => { - it('deletes orphaned gateway not in project spec', async () => { - mockDeleteHttpGateway.mockResolvedValue({ success: true }); - mockDeleteHttpGatewayTarget.mockResolvedValue({ success: true }); - - const result = await deleteOrphanedHttpGateways({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingHttpGateways: { - RemovedGw: { - gatewayId: 'gw-old', - gatewayArn: 'arn:httpgw:old', - targetId: 'tgt-old', - }, - }, - }); - - expect(mockDeleteHttpGatewayTarget).toHaveBeenCalledWith({ - region: 'us-east-1', - gatewayId: 'gw-old', - targetId: 'tgt-old', - }); - expect(mockDeleteHttpGateway).toHaveBeenCalledWith({ - region: 'us-east-1', - gatewayId: 'gw-old', - }); - expect(result.results[0]!.status).toBe('deleted'); - }); - - it('cleans up auto-created IAM role on deletion', async () => { - mockDeleteHttpGateway.mockResolvedValue({ success: true }); - mockIAMSend.mockResolvedValue({}); - - await deleteOrphanedHttpGateways({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingHttpGateways: { - RemovedGw: { - gatewayId: 'gw-old', - gatewayArn: 'arn:httpgw:old', - roleArn: 'arn:aws:iam::123:role/AutoCreatedRole', - roleCreatedByCli: true, - }, - }, - }); - - // Should have called delete policy + delete role - expect(mockIAMSend).toHaveBeenCalledTimes(2); - - // Verify first call is DeleteRolePolicyCommand - const firstCall = mockIAMSend.mock.calls[0]![0]; - expect(firstCall.input).toEqual( - expect.objectContaining({ RoleName: 'AutoCreatedRole', PolicyName: expect.any(String) }) - ); - - // Verify second call is DeleteRoleCommand - const secondCall = mockIAMSend.mock.calls[1]![0]; - expect(secondCall.input).toEqual(expect.objectContaining({ RoleName: 'AutoCreatedRole' })); - }); - - it('reports error when deletion fails', async () => { - mockDeleteHttpGateway.mockRejectedValue(new Error('delete failed')); - - const result = await deleteOrphanedHttpGateways({ - region: 'us-east-1', - projectSpec: makeProjectSpec([]), - existingHttpGateways: { - FailGw: { gatewayId: 'gw-fail', gatewayArn: 'arn:httpgw:fail' }, - }, - }); - - expect(result.hasErrors).toBe(true); - expect(result.results[0]!.status).toBe('error'); - expect(result.results[0]!.error).toBe('delete failed'); - }); - }); - - describe('edge cases', () => { - it('proceeds with creation when listHttpGateways fails', async () => { - mockListAllHttpGateways.mockRejectedValue(new Error('API unavailable')); - mockCreateHttpGateway.mockResolvedValue({ - gatewayId: 'gw-new', - gatewayArn: 'arn:httpgw:new', - }); - mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-new' }); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - deployedResources: sampleDeployedResources, - }); - - expect(result.results[0]!.status).toBe('created'); - expect(mockCreateHttpGateway).toHaveBeenCalled(); - }); - - it('uses provided roleArn without creating IAM role', async () => { - mockCreateHttpGateway.mockResolvedValue({ - gatewayId: 'gw-003', - gatewayArn: 'arn:httpgw:003', - }); - mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-003' }); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([sampleHttpGateway]), - deployedResources: sampleDeployedResources, - }); - - expect(result.results[0]!.status).toBe('created'); - expect(result.httpGateways.MyHttpGw!.roleCreatedByCli).toBe(false); - expect(mockIAMSend).not.toHaveBeenCalled(); - }); - }); - - describe('mixed operations', () => { - it('creates new and skips existing (orphan deletion is a separate pass)', async () => { - const newGw = { ...sampleHttpGateway, name: 'NewGw' }; - const keptGw = { ...sampleHttpGateway, name: 'KeptGw' }; - - mockCreateHttpGateway.mockResolvedValue({ - gatewayId: 'gw-new', - gatewayArn: 'arn:httpgw:new', - }); - mockCreateHttpGatewayTarget.mockResolvedValue({ targetId: 'tgt-new' }); - mockDeleteHttpGateway.mockResolvedValue({ success: true }); - - const result = await setupHttpGateways({ - region: 'us-east-1', - projectName: 'TestProject', - projectSpec: makeProjectSpec([newGw, keptGw]), - existingHttpGateways: { - KeptGw: { gatewayId: 'gw-kept', gatewayArn: 'arn:httpgw:kept' }, - OrphanGw: { gatewayId: 'gw-orphan', gatewayArn: 'arn:httpgw:orphan' }, - }, - deployedResources: sampleDeployedResources, - }); - - expect(result.results).toHaveLength(2); - const statuses = result.results.map(r => `${r.gatewayName}:${r.status}`); - expect(statuses).toContain('NewGw:created'); - expect(statuses).toContain('KeptGw:skipped'); - }); - - it('deleteOrphanedHttpGateways removes orphans separately', async () => { - mockDeleteHttpGateway.mockResolvedValue({ success: true }); - - const result = await deleteOrphanedHttpGateways({ - region: 'us-east-1', - projectSpec: makeProjectSpec([{ ...sampleHttpGateway, name: 'KeptGw' }]), - existingHttpGateways: { - KeptGw: { gatewayId: 'gw-kept', gatewayArn: 'arn:httpgw:kept' }, - OrphanGw: { gatewayId: 'gw-orphan', gatewayArn: 'arn:httpgw:orphan' }, - }, - }); - - expect(result.results).toHaveLength(1); - expect(result.results[0]!.gatewayName).toBe('OrphanGw'); - expect(result.results[0]!.status).toBe('deleted'); - }); - }); -}); diff --git a/src/cli/operations/deploy/__tests__/post-deploy-knowledge-bases.test.ts b/src/cli/operations/deploy/__tests__/post-deploy-knowledge-bases.test.ts new file mode 100644 index 000000000..934ff6a58 --- /dev/null +++ b/src/cli/operations/deploy/__tests__/post-deploy-knowledge-bases.test.ts @@ -0,0 +1,165 @@ +import * as ingest from '../../ingest'; +import { autoIngestKnowledgeBases, computeSourcesHash } from '../post-deploy-knowledge-bases'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../ingest'); + +const kbWithSources = (name: string, uris: string[]) => + ({ + type: 'AgentCoreKnowledgeBase', + name, + dataSources: uris.map(uri => ({ type: 'S3' as const, uri })), + }) as never; + +const deployedKb = (id: string, dsIds: string[], sourcesHash?: string) => ({ + knowledgeBaseId: id, + knowledgeBaseArn: `arn:aws:bedrock:us-west-2:0:knowledge-base/${id}`, + dataSources: dsIds.map((dsId, idx) => ({ + dataSourceId: dsId, + uri: `s3://b/ds${idx}/`, + })), + ...(sourcesHash && { sourcesHash }), +}); + +const stubDeployedState = () => ({ targets: { default: { resources: { knowledgeBases: {} } } } }) as never; + +describe('computeSourcesHash', () => { + it('produces a stable hash for identical URI lists', () => { + const kb1 = kbWithSources('a', ['s3://b/x/', 's3://b/y/']); + const kb2 = kbWithSources('a', ['s3://b/x/', 's3://b/y/']); + expect(computeSourcesHash(kb1)).toBe(computeSourcesHash(kb2)); + }); + + it('produces different hashes when a URI changes', () => { + const kb1 = kbWithSources('a', ['s3://b/x/']); + const kb2 = kbWithSources('a', ['s3://b/y/']); + expect(computeSourcesHash(kb1)).not.toBe(computeSourcesHash(kb2)); + }); + + it('produces different hashes when URI order changes', () => { + const kb1 = kbWithSources('a', ['s3://b/x/', 's3://b/y/']); + const kb2 = kbWithSources('a', ['s3://b/y/', 's3://b/x/']); + expect(computeSourcesHash(kb1)).not.toBe(computeSourcesHash(kb2)); + }); +}); + +describe('autoIngestKnowledgeBases', () => { + beforeEach(() => vi.mocked(ingest.runKbIngestionByName).mockReset()); + afterEach(() => vi.restoreAllMocks()); + + it('starts ingestion for a KB with no prior hash (first deploy)', async () => { + vi.mocked(ingest.runKbIngestionByName).mockResolvedValueOnce({ + success: true, + startedJobs: [{ dataSourceId: 'DS1', uri: 's3://b/ds0/', ingestionJobId: 'IJ-1' }], + } as never); + + const result = await autoIngestKnowledgeBases({ + region: 'us-west-2', + knowledgeBases: [kbWithSources('docs', ['s3://b/ds0/'])], + deployedKnowledgeBases: { docs: deployedKb('KB1', ['DS1']) }, + previousKnowledgeBases: undefined, + targetName: 'default', + deployedState: stubDeployedState(), + }); + + expect(result.hasErrors).toBe(false); + expect(result.results).toHaveLength(1); + const entry = result.results[0]!; + expect(entry.status).toBe('started'); + expect(entry.startedJobCount).toBe(1); + expect(entry.newSourcesHash).toBeTruthy(); + }); + + it('skips ingestion when sourcesHash matches the prior deploy', async () => { + const kb = kbWithSources('docs', ['s3://b/ds0/']); + const priorHash = computeSourcesHash(kb); + + const result = await autoIngestKnowledgeBases({ + region: 'us-west-2', + knowledgeBases: [kb], + deployedKnowledgeBases: { docs: deployedKb('KB1', ['DS1']) }, + previousKnowledgeBases: { docs: deployedKb('KB1', ['DS1'], priorHash) }, + targetName: 'default', + deployedState: stubDeployedState(), + }); + + expect(result.hasErrors).toBe(false); + expect(result.results[0]?.status).toBe('skipped'); + expect(result.results[0]?.reason).toMatch(/no changes/i); + expect(ingest.runKbIngestionByName).not.toHaveBeenCalled(); + }); + + it('starts ingestion when sourcesHash differs from prior', async () => { + vi.mocked(ingest.runKbIngestionByName).mockResolvedValueOnce({ + success: true, + startedJobs: [{ dataSourceId: 'DS1', uri: 's3://b/ds0/', ingestionJobId: 'IJ-1' }], + } as never); + + const result = await autoIngestKnowledgeBases({ + region: 'us-west-2', + knowledgeBases: [kbWithSources('docs', ['s3://b/ds0/'])], + deployedKnowledgeBases: { docs: deployedKb('KB1', ['DS1']) }, + previousKnowledgeBases: { docs: deployedKb('KB1', ['DS1'], 'old-hash') }, + targetName: 'default', + deployedState: stubDeployedState(), + }); + + expect(result.results[0]?.status).toBe('started'); + expect(ingest.runKbIngestionByName).toHaveBeenCalledTimes(1); + }); + + it('records errors but does not abort other KBs', async () => { + vi.mocked(ingest.runKbIngestionByName) + .mockResolvedValueOnce({ success: false, error: new Error('Throttled') } as never) + .mockResolvedValueOnce({ + success: true, + startedJobs: [{ dataSourceId: 'DS2', uri: 's3://b/ds0/', ingestionJobId: 'IJ-2' }], + } as never); + + const result = await autoIngestKnowledgeBases({ + region: 'us-west-2', + knowledgeBases: [kbWithSources('a', ['s3://b/ds0/']), kbWithSources('b', ['s3://b/ds0/'])], + deployedKnowledgeBases: { + a: deployedKb('KB1', ['DS1']), + b: deployedKb('KB2', ['DS2']), + }, + previousKnowledgeBases: undefined, + targetName: 'default', + deployedState: stubDeployedState(), + }); + + expect(result.hasErrors).toBe(true); + expect(result.results[0]?.status).toBe('error'); + expect(result.results[0]?.error).toMatch(/Throttled/); + expect(result.results[1]?.status).toBe('started'); + }); + + it('skips a KB that has no data sources recorded yet', async () => { + const result = await autoIngestKnowledgeBases({ + region: 'us-west-2', + knowledgeBases: [kbWithSources('docs', ['s3://b/ds0/'])], + deployedKnowledgeBases: { docs: deployedKb('KB1', []) }, + previousKnowledgeBases: undefined, + targetName: 'default', + deployedState: stubDeployedState(), + }); + + expect(result.results[0]?.status).toBe('skipped'); + expect(result.results[0]?.reason).toMatch(/no data sources/i); + expect(ingest.runKbIngestionByName).not.toHaveBeenCalled(); + }); + + it('skips a KB that is missing from deployed state (CFN outputs missing)', async () => { + const result = await autoIngestKnowledgeBases({ + region: 'us-west-2', + knowledgeBases: [kbWithSources('docs', ['s3://b/ds0/'])], + deployedKnowledgeBases: {}, + previousKnowledgeBases: undefined, + targetName: 'default', + deployedState: stubDeployedState(), + }); + + expect(result.results[0]?.status).toBe('skipped'); + expect(result.results[0]?.reason).toMatch(/not present in deployed state/i); + }); +}); diff --git a/src/cli/operations/deploy/__tests__/preflight.test.ts b/src/cli/operations/deploy/__tests__/preflight.test.ts index 58cfc0f12..dbe675c0b 100644 --- a/src/cli/operations/deploy/__tests__/preflight.test.ts +++ b/src/cli/operations/deploy/__tests__/preflight.test.ts @@ -88,10 +88,35 @@ describe('validateProject', () => { mockReadDeployedState.mockRejectedValue(new Error('No deployed state')); await expect(validateProject()).rejects.toThrow( - 'No resources defined in project. Add at least one resource (agent, memory, evaluator, or gateway) before deploying.' + 'No resources defined in project. Add at least one resource (agent, memory, knowledge base, evaluator, or gateway) before deploying.' ); }); + it('allows deploy when only a knowledge base is defined (no agents or gateways)', async () => { + mockRequireConfigRoot.mockReturnValue('/project/agentcore'); + mockValidate.mockReturnValue(undefined); + mockReadProjectSpec.mockResolvedValue({ + name: 'test-project', + runtimes: [], + memories: [], + knowledgeBases: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/' }], + }, + ], + agentCoreGateways: [], + }); + mockReadAWSDeploymentTargets.mockResolvedValue([]); + mockValidateAwsCredentials.mockResolvedValue(undefined); + + const result = await validateProject(); + + expect(result.projectSpec.name).toBe('test-project'); + expect(result.isTeardownDeploy).toBe(false); + }); + it('allows deploy when memories exist but no agents or gateways', async () => { mockRequireConfigRoot.mockReturnValue('/project/agentcore'); mockValidate.mockReturnValue(undefined); @@ -117,6 +142,7 @@ describe('validateProject', () => { name: 'test-project', runtimes: [], memories: [], + knowledgeBases: [], datasets: [ { name: 'test-dataset', @@ -152,29 +178,6 @@ describe('validateProject', () => { expect(result.isTeardownDeploy).toBe(false); }); - it('rejects gateway target name that exceeds 48 chars when prefixed with project name', async () => { - mockRequireConfigRoot.mockReturnValue('/project/agentcore'); - mockValidate.mockReturnValue(undefined); - // projectName "myproject" (9) + "-" (1) + targetName (39) = 49 > 48 - mockReadProjectSpec.mockResolvedValue({ - name: 'myproject', - runtimes: [], - httpGateways: [ - { - name: 'gw', - targets: [{ name: 'a'.repeat(39), runtimeRef: 'rt', qualifier: 'DEFAULT' }], - }, - ], - agentCoreGateways: [{ name: 'gw' }], - }); - mockReadAWSDeploymentTargets.mockResolvedValue([]); - mockValidateAwsCredentials.mockResolvedValue(undefined); - - await expect(validateProject()).rejects.toThrow( - 'HTTP gateway target "' + 'a'.repeat(39) + '" in gateway "gw" would exceed the 48-character AWS limit' - ); - }); - it('accepts gateway target name within 48 chars when prefixed with project name', async () => { mockRequireConfigRoot.mockReturnValue('/project/agentcore'); mockValidate.mockReturnValue(undefined); @@ -182,12 +185,6 @@ describe('validateProject', () => { mockReadProjectSpec.mockResolvedValue({ name: 'myproject', runtimes: [], - httpGateways: [ - { - name: 'gw', - targets: [{ name: 'a'.repeat(38), runtimeRef: 'rt', qualifier: 'DEFAULT' }], - }, - ], agentCoreGateways: [{ name: 'gw' }], }); mockReadAWSDeploymentTargets.mockResolvedValue([]); diff --git a/src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts b/src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts deleted file mode 100644 index 4254d4581..000000000 --- a/src/cli/operations/deploy/imperative/deployers/__tests__/harness-deployer.test.ts +++ /dev/null @@ -1,466 +0,0 @@ -import { createHarness, deleteHarness, getHarness, updateHarness } from '../../../../../aws/agentcore-harness'; -import { AgentCoreApiError } from '../../../../../aws/api-client'; -import type { ImperativeDeployContext } from '../../types'; -import { HarnessDeployer } from '../harness-deployer'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -vi.mock('fs/promises', () => ({ - readFile: vi.fn().mockImplementation((path: string) => { - if (path.includes('harness.json')) { - return Promise.resolve( - JSON.stringify({ - name: 'my_harness', - model: { provider: 'bedrock', modelId: 'anthropic.claude-3-5-sonnet' }, - tools: [], - skills: [], - }) - ); - } - if (path.includes('system-prompt.md')) return Promise.resolve('You are helpful.'); - return Promise.reject(Object.assign(new Error('ENOENT'), { code: 'ENOENT' })); - }), -})); - -vi.mock('../harness-mapper', () => ({ - mapHarnessSpecToCreateOptions: vi.fn().mockResolvedValue({ - harnessName: 'proj_my-harness', - region: 'us-east-1', - executionRoleArn: 'arn:aws:iam::111:role/HarnessRole', - model: { bedrockModelConfig: { modelId: 'anthropic.claude-3-5-sonnet' } }, - systemPrompt: [{ text: 'You are helpful.' }], - }), -})); - -vi.mock('../../../../../aws/agentcore-harness', () => ({ - createHarness: vi.fn().mockResolvedValue({ - harness: { - harnessId: 'h-123', - arn: 'arn:aws:bedrock:us-east-1:111:harness/h-123', - status: 'READY', - environment: { agentCoreRuntimeEnvironment: { agentRuntimeArn: 'arn:runtime' } }, - }, - }), - updateHarness: vi.fn().mockResolvedValue({ - harness: { - harnessId: 'h-existing', - arn: 'arn:aws:bedrock:us-east-1:111:harness/h-existing', - status: 'READY', - environment: { agentCoreRuntimeEnvironment: { agentRuntimeArn: 'arn:runtime' } }, - }, - }), - deleteHarness: vi.fn().mockResolvedValue({}), - getHarness: vi.fn().mockResolvedValue({ - harness: { - harnessId: 'h-123', - arn: 'arn:aws:bedrock:us-east-1:111:harness/h-123', - status: 'READY', - environment: { agentCoreRuntimeEnvironment: { agentRuntimeArn: 'arn:runtime' } }, - }, - }), -})); - -function makeContext(overrides: Partial = {}): ImperativeDeployContext { - return { - projectSpec: { - name: 'proj', - harnesses: [{ name: 'my_harness', path: 'harnesses/my_harness' }], - } as any, - target: { name: 'dev', region: 'us-east-1' } as any, - configIO: { getConfigRoot: () => '/project/agentcore' } as any, - deployedState: { targets: {} } as any, - cdkOutputs: { ApplicationHarnessMyHarnessRoleArnOutput123: 'arn:aws:iam::111:role/HarnessRole' }, - ...overrides, - }; -} - -describe('HarnessDeployer', () => { - let deployer: HarnessDeployer; - - beforeEach(() => { - deployer = new HarnessDeployer(); - vi.clearAllMocks(); - }); - - describe('shouldRun', () => { - it('returns true when project has harnesses', () => { - expect(deployer.shouldRun(makeContext())).toBe(true); - }); - - it('returns true when deployed state has harnesses', () => { - const ctx = makeContext({ - projectSpec: { name: 'proj', harnesses: [] } as any, - deployedState: { - targets: { dev: { resources: { harnesses: { old: { harnessId: 'h-old' } } } } }, - } as any, - }); - expect(deployer.shouldRun(ctx)).toBe(true); - }); - - it('returns false when no harnesses anywhere', () => { - const ctx = makeContext({ - projectSpec: { name: 'proj' } as any, - deployedState: { targets: {} } as any, - }); - expect(deployer.shouldRun(ctx)).toBe(false); - }); - }); - - describe('deploy - create path', () => { - it('calls createHarness and returns state on success', async () => { - const result = await deployer.deploy(makeContext()); - expect(result.success).toBe(true); - expect(createHarness).toHaveBeenCalled(); - expect(result.state!.my_harness).toMatchObject({ - harnessId: 'h-123', - status: 'READY', - }); - }); - - it('throws when harness enters FAILED state after create', async () => { - vi.mocked(createHarness).mockResolvedValueOnce({ - harness: { harnessId: 'h-fail', arn: 'arn:fail', status: 'CREATING' }, - } as any); - vi.mocked(getHarness).mockResolvedValueOnce({ - harness: { harnessId: 'h-fail', arn: 'arn:fail', status: 'FAILED' }, - } as any); - - const result = await deployer.deploy(makeContext()); - expect(result.success).toBe(false); - expect(result.error).toContain('FAILED state'); - }); - }); - - describe('deploy - update path', () => { - it('calls updateHarness when existing harness has different configHash', async () => { - const ctx = makeContext({ - deployedState: { - targets: { - dev: { - resources: { - harnesses: { - my_harness: { - harnessId: 'h-existing', - configHash: 'old-hash', - harnessArn: 'arn:old', - roleArn: 'arn:role', - status: 'READY', - }, - }, - }, - }, - }, - } as any, - }); - - const result = await deployer.deploy(ctx); - expect(result.success).toBe(true); - expect(updateHarness).toHaveBeenCalled(); - expect(createHarness).not.toHaveBeenCalled(); - }); - }); - - describe('deploy - skip path', () => { - it('skips when configHash matches', async () => { - // We need to compute the actual hash. Instead, mock readFile to produce deterministic content - // and set the deployed hash to match. Easiest: just set configHash to what will be computed. - // Since we can't easily predict the hash, test the logic by verifying no API calls. - const ctx = makeContext({ - deployedState: { - targets: { - dev: { - resources: { - harnesses: { - my_harness: { - harnessId: 'h-existing', - configHash: 'WILL_NOT_MATCH', - harnessArn: 'arn:x', - roleArn: 'arn:role', - status: 'READY', - }, - }, - }, - }, - }, - } as any, - }); - - // To truly test skip, we'd need to know the hash. Let's just verify that when - // configHash matches, it skips. We'll run once to get the hash, then use it. - const firstResult = await deployer.deploy(ctx); - // It will have updated because hash doesn't match - expect(updateHarness).toHaveBeenCalledTimes(1); - - // Now use the actual computed hash - vi.clearAllMocks(); - const computedHash = firstResult.state!.my_harness!.configHash; - const ctx2 = makeContext({ - deployedState: { - targets: { - dev: { - resources: { - harnesses: { - my_harness: { - harnessId: 'h-existing', - configHash: computedHash, - harnessArn: 'arn:x', - roleArn: 'arn:role', - status: 'READY', - }, - }, - }, - }, - }, - } as any, - }); - - const result = await deployer.deploy(ctx2); - expect(result.success).toBe(true); - expect(createHarness).not.toHaveBeenCalled(); - expect(updateHarness).not.toHaveBeenCalled(); - expect(result.notes).toContain('Harness "my_harness" unchanged, skipped'); - }); - }); - - describe('deploy - delete orphaned harnesses', () => { - it('deletes harnesses not in project spec', async () => { - const ctx = makeContext({ - deployedState: { - targets: { - dev: { - resources: { - harnesses: { - 'removed-harness': { - harnessId: 'h-removed', - configHash: 'x', - harnessArn: 'arn:r', - roleArn: 'arn:role', - status: 'READY', - }, - }, - }, - }, - }, - } as any, - }); - - const result = await deployer.deploy(ctx); - expect(result.success).toBe(true); - expect(deleteHarness).toHaveBeenCalledWith({ region: 'us-east-1', harnessId: 'h-removed' }); - expect(result.state!['removed-harness']).toBeUndefined(); - }); - }); - - describe('deploy - role resolution', () => { - it('fails when CDK outputs missing role ARN', async () => { - const ctx = makeContext({ cdkOutputs: {} }); - const result = await deployer.deploy(ctx); - expect(result.success).toBe(false); - expect(result.error).toContain('Could not find role ARN'); - }); - - it('resolves role from RoleRoleArn output key pattern', async () => { - const ctx = makeContext({ - cdkOutputs: { ApplicationHarnessMyHarnessRoleArnSomeSuffix: 'arn:aws:iam::111:role/NewRole' }, - }); - const result = await deployer.deploy(ctx); - expect(result.success).toBe(true); - }); - }); - - describe('deploy - retry logic', () => { - it('retries on role validation error then succeeds', async () => { - const roleError = new AgentCoreApiError(400, 'Role validation failed for the given role'); - vi.mocked(createHarness) - .mockRejectedValueOnce(roleError) - .mockResolvedValueOnce({ - harness: { harnessId: 'h-retry', arn: 'arn:retry', status: 'READY', environment: {} }, - } as any); - - const result = await deployer.deploy(makeContext()); - expect(result.success).toBe(true); - expect(createHarness).toHaveBeenCalledTimes(2); - }, 30_000); - - it('throws non-role-validation errors immediately', async () => { - vi.mocked(createHarness).mockRejectedValueOnce(new Error('Network failure')); - - const result = await deployer.deploy(makeContext()); - expect(result.success).toBe(false); - expect(result.error).toContain('Network failure'); - expect(createHarness).toHaveBeenCalledTimes(1); - }); - }); - - describe('deploy - polling (waitForReady)', () => { - it('polls getHarness until READY', async () => { - vi.mocked(createHarness).mockResolvedValueOnce({ - harness: { harnessId: 'h-poll', arn: 'arn:poll', status: 'CREATING' }, - } as any); - vi.mocked(getHarness) - .mockResolvedValueOnce({ harness: { harnessId: 'h-poll', arn: 'arn:poll', status: 'CREATING' } } as any) - .mockResolvedValueOnce({ - harness: { - harnessId: 'h-poll', - arn: 'arn:poll', - status: 'READY', - environment: { agentCoreRuntimeEnvironment: { agentRuntimeArn: 'arn:rt' } }, - }, - } as any); - - const result = await deployer.deploy(makeContext()); - expect(result.success).toBe(true); - expect(getHarness).toHaveBeenCalledTimes(2); - }); - }); - - describe('memorySpec resolution', () => { - const ROLE_ARN = 'arn:aws:iam::123456789012:role/HarnessRole'; - const MEMORY_ARN = 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123'; - const CDK_OUTPUTS = { ApplicationHarnessMyHarnessRoleArnOutput123: ROLE_ARN }; - const READY_HARNESS = { - harnessId: 'h-new', - harnessName: 'my_harness', - arn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:harness/h-new', - status: 'READY' as const, - executionRoleArn: ROLE_ARN, - createdAt: '2024-01-01T00:00:00Z', - updatedAt: '2024-01-01T00:00:00Z', - }; - - const HARNESS_SPEC_WITH_MEMORY_ARN_JSON = JSON.stringify({ - name: 'my_harness', - model: { provider: 'bedrock', modelId: 'anthropic.claude-3-sonnet-20240229-v1:0' }, - tools: [], - skills: [], - memory: { arn: MEMORY_ARN }, - }); - - it('resolves memorySpec by deployed ARN when memory.name is absent', async () => { - const { readFile: mockedReadFile } = await import('fs/promises'); - const { mapHarnessSpecToCreateOptions: mockedMapHarness } = await import('../harness-mapper'); - - vi.mocked(mockedReadFile) - .mockResolvedValueOnce(HARNESS_SPEC_WITH_MEMORY_ARN_JSON as any) - .mockRejectedValueOnce(new Error('ENOENT')); - vi.mocked(mockedMapHarness).mockResolvedValueOnce({ - region: 'us-east-1', - harnessName: 'my_harness', - executionRoleArn: ROLE_ARN, - } as any); - vi.mocked(createHarness).mockResolvedValueOnce({ - harness: READY_HARNESS, - } as any); - - const ctx = makeContext({ - projectSpec: { - name: 'proj', - harnesses: [{ name: 'my_harness', path: 'harnesses/my_harness' }], - memories: [ - { - name: 'my_memory', - eventExpiryDuration: 30, - strategies: [{ type: 'SEMANTIC', namespaces: ['/users/{actorId}/facts'] }], - }, - ], - } as any, - deployedState: { - targets: { - dev: { - resources: { - memories: { my_memory: { memoryId: 'mem-123', memoryArn: MEMORY_ARN } }, - }, - }, - }, - } as any, - cdkOutputs: CDK_OUTPUTS, - }); - - await deployer.deploy(ctx); - - expect(mockedMapHarness).toHaveBeenCalledWith( - expect.objectContaining({ - memorySpec: { - name: 'my_memory', - eventExpiryDuration: 30, - strategies: [{ type: 'SEMANTIC', namespaces: ['/users/{actorId}/facts'] }], - }, - }) - ); - }); - - it('returns undefined memorySpec for a fully external ARN not in deployedResources', async () => { - const { readFile: mockedReadFile } = await import('fs/promises'); - const { mapHarnessSpecToCreateOptions: mockedMapHarness } = await import('../harness-mapper'); - - vi.mocked(mockedReadFile) - .mockResolvedValueOnce(HARNESS_SPEC_WITH_MEMORY_ARN_JSON as any) - .mockRejectedValueOnce(new Error('ENOENT')); - vi.mocked(mockedMapHarness).mockResolvedValueOnce({ - region: 'us-east-1', - harnessName: 'my_harness', - executionRoleArn: ROLE_ARN, - } as any); - vi.mocked(createHarness).mockResolvedValueOnce({ - harness: READY_HARNESS, - } as any); - - const ctx = makeContext({ - projectSpec: { - name: 'proj', - harnesses: [{ name: 'my_harness', path: 'harnesses/my_harness' }], - memories: [], - } as any, - cdkOutputs: CDK_OUTPUTS, - }); - - await deployer.deploy(ctx); - - expect(mockedMapHarness).toHaveBeenCalledWith(expect.objectContaining({ memorySpec: undefined })); - }); - }); - - describe('teardown', () => { - it('deletes all deployed harnesses', async () => { - const ctx = makeContext({ - deployedState: { - targets: { - dev: { - resources: { - harnesses: { - h1: { harnessId: 'id-1', configHash: 'x', harnessArn: 'arn:1', roleArn: 'arn:r', status: 'READY' }, - h2: { harnessId: 'id-2', configHash: 'y', harnessArn: 'arn:2', roleArn: 'arn:r', status: 'READY' }, - }, - }, - }, - }, - } as any, - }); - - const result = await deployer.teardown(ctx); - expect(result.success).toBe(true); - expect(deleteHarness).toHaveBeenCalledTimes(2); - expect(result.state).toEqual({}); - }); - - it('returns error if delete fails', async () => { - vi.mocked(deleteHarness).mockRejectedValueOnce(new Error('Access denied')); - const ctx = makeContext({ - deployedState: { - targets: { - dev: { - resources: { - harnesses: { - h1: { harnessId: 'id-1', configHash: 'x', harnessArn: 'arn:1', roleArn: 'arn:r', status: 'READY' }, - }, - }, - }, - }, - } as any, - }); - - const result = await deployer.teardown(ctx); - expect(result.success).toBe(false); - expect(result.error).toContain('Access denied'); - }); - }); -}); diff --git a/src/cli/operations/deploy/imperative/deployers/__tests__/harness-mapper.test.ts b/src/cli/operations/deploy/imperative/deployers/__tests__/harness-mapper.test.ts deleted file mode 100644 index bd9ec876f..000000000 --- a/src/cli/operations/deploy/imperative/deployers/__tests__/harness-mapper.test.ts +++ /dev/null @@ -1,753 +0,0 @@ -import type { DeployedResourceState, Memory } from '../../../../../../schema'; -import type { MapHarnessOptions } from '../harness-mapper'; -import { mapHarnessSpecToCreateOptions } from '../harness-mapper'; -import { describe, expect, it, vi } from 'vitest'; - -vi.mock('fs/promises', () => ({ - readFile: vi.fn().mockImplementation((path: string) => { - if (path.includes('system-prompt.md')) return Promise.resolve('You are helpful.'); - if (path.includes('custom-prompt.md')) return Promise.resolve('Custom prompt content.'); - return Promise.reject(Object.assign(new Error('ENOENT'), { code: 'ENOENT' })); - }), - stat: vi.fn().mockImplementation((path: string) => { - if (path.includes('too-large.md')) return Promise.resolve({ size: 2 * 1024 * 1024 }); - return Promise.resolve({ size: 100 }); - }), -})); - -function baseOptions(overrides: Partial = {}): MapHarnessOptions { - return { - harnessSpec: { - name: 'test-harness', - model: { provider: 'bedrock', modelId: 'anthropic.claude-3-5-sonnet' }, - tools: [], - skills: [], - } as any, - harnessDir: '/project/harnesses/test-harness', - executionRoleArn: 'arn:aws:iam::111:role/HarnessRole', - region: 'us-east-1', - projectName: 'my-project', - ...overrides, - }; -} - -describe('mapHarnessSpecToCreateOptions', () => { - describe('basic mapping', () => { - it('sets harnessName as projectName_specName', async () => { - const result = await mapHarnessSpecToCreateOptions(baseOptions()); - expect(result.harnessName).toBe('my-project_test-harness'); - }); - - it('passes region and executionRoleArn', async () => { - const result = await mapHarnessSpecToCreateOptions(baseOptions()); - expect(result.region).toBe('us-east-1'); - expect(result.executionRoleArn).toBe('arn:aws:iam::111:role/HarnessRole'); - }); - }); - - describe('model mapping', () => { - it('maps bedrock provider', async () => { - const result = await mapHarnessSpecToCreateOptions(baseOptions()); - expect(result.model).toEqual({ - bedrockModelConfig: { modelId: 'anthropic.claude-3-5-sonnet' }, - }); - }); - - it('maps open_ai provider with apiKeyArn', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'oai', - model: { - provider: 'open_ai', - modelId: 'gpt-4o', - apiKeyArn: 'arn:aws:secretsmanager:us-east-1:111:secret:key', - }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - openAiModelConfig: { modelId: 'gpt-4o', apiKeyArn: 'arn:aws:secretsmanager:us-east-1:111:secret:key' }, - }); - }); - - it('maps gemini provider with topK', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'gem', - model: { provider: 'gemini', modelId: 'gemini-2.0-flash', topK: 0.5 }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - geminiModelConfig: { modelId: 'gemini-2.0-flash', topK: 0.5 }, - }); - }); - - it('maps bedrock with apiFormat responses', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'openai.gpt-oss-120b', apiFormat: 'responses' }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - bedrockModelConfig: { modelId: 'openai.gpt-oss-120b', apiFormat: 'responses' }, - }); - }); - - it('maps bedrock with apiFormat chat_completions', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'openai.gpt-oss-120b', apiFormat: 'chat_completions' }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - bedrockModelConfig: { modelId: 'openai.gpt-oss-120b', apiFormat: 'chat_completions' }, - }); - }); - - it('omits apiFormat when converse_stream (default)', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude', apiFormat: 'converse_stream' }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - bedrockModelConfig: { modelId: 'claude' }, - }); - }); - - it('maps open_ai with apiFormat chat_completions', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { - provider: 'open_ai', - modelId: 'gpt-5', - apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret:key', - apiFormat: 'chat_completions', - }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - openAiModelConfig: { - modelId: 'gpt-5', - apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret:key', - apiFormat: 'chat_completions', - }, - }); - }); - - it('omits apiFormat for open_ai when responses (default)', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { - provider: 'open_ai', - modelId: 'gpt-5', - apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret:key', - apiFormat: 'responses', - }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - openAiModelConfig: { modelId: 'gpt-5', apiKeyArn: 'arn:aws:secretsmanager:us-east-1:123:secret:key' }, - }); - }); - - it('includes optional model params when set', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude', temperature: 0.7, topP: 0.9, maxTokens: 2048 }, - tools: [], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.model).toEqual({ - bedrockModelConfig: { modelId: 'claude', temperature: 0.7, topP: 0.9, maxTokens: 2048 }, - }); - }); - }); - - describe('system prompt', () => { - it('auto-discovers system-prompt.md when no systemPrompt in spec', async () => { - const result = await mapHarnessSpecToCreateOptions(baseOptions()); - expect(result.systemPrompt).toEqual([{ text: 'You are helpful.' }]); - }); - - it('loads from file path when systemPrompt is a relative path', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - systemPrompt: './custom-prompt.md', - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.systemPrompt).toEqual([{ text: 'Custom prompt content.' }]); - }); - - it('uses inline text when systemPrompt is not a file path', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - systemPrompt: 'Inline prompt text here', - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.systemPrompt).toEqual([{ text: 'Inline prompt text here' }]); - }); - - it('throws when prompt file exceeds max size', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - systemPrompt: './too-large.md', - } as any, - }); - await expect(mapHarnessSpecToCreateOptions(opts)).rejects.toThrow('too large'); - }); - }); - - describe('tools mapping', () => { - it('maps tools with type, name, and config', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [ - { type: 'remote_mcp', name: 'my-mcp', config: { remoteMcp: { url: 'https://example.com' } } }, - { type: 'agentcore_code_interpreter', name: 'code-interp' }, - ], - skills: [], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.tools).toEqual([ - { type: 'remote_mcp', name: 'my-mcp', config: { remoteMcp: { url: 'https://example.com' } } }, - { type: 'agentcore_code_interpreter', name: 'code-interp' }, - ]); - }); - - it('omits tools when empty array', async () => { - const result = await mapHarnessSpecToCreateOptions(baseOptions()); - expect(result.tools).toBeUndefined(); - }); - }); - - describe('skills mapping', () => { - it('maps skills as path objects', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: ['path/to/skill1', 'path/to/skill2'], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.skills).toEqual([{ path: 'path/to/skill1' }, { path: 'path/to/skill2' }]); - }); - }); - - describe('memory mapping', () => { - it('maps memory with direct ARN', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { arn: 'arn:aws:bedrock:us-east-1:111:memory/mem-123' }, - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.memory).toEqual({ - agentCoreMemoryConfiguration: { arn: 'arn:aws:bedrock:us-east-1:111:memory/mem-123' }, - }); - }); - - it('resolves memory by name from deployed state', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { name: 'my-memory' }, - } as any, - deployedResources: { - memories: { 'my-memory': { memoryArn: 'arn:aws:bedrock:us-east-1:111:memory/mem-resolved' } }, - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.memory).toEqual({ - agentCoreMemoryConfiguration: { arn: 'arn:aws:bedrock:us-east-1:111:memory/mem-resolved' }, - }); - }); - - it('throws when memory name cannot be resolved', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { name: 'missing-memory' }, - } as any, - }); - await expect(mapHarnessSpecToCreateOptions(opts)).rejects.toThrow('not in deployed state'); - }); - - it('includes retrievalConfig derived from memory strategy namespaces', async () => { - const deployedResources: DeployedResourceState = { - memories: { - my_memory: { - memoryId: 'mem-123', - memoryArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123', - }, - }, - }; - const memorySpec: Memory = { - name: 'my_memory', - eventExpiryDuration: 30, - strategies: [ - { type: 'SEMANTIC', namespaces: ['/users/{actorId}/facts'] }, - { type: 'USER_PREFERENCE', namespaces: ['/users/{actorId}/preferences'] }, - { type: 'SUMMARIZATION', namespaces: ['/summaries/{actorId}/{sessionId}'] }, - { - type: 'EPISODIC', - namespaces: ['/episodes/{actorId}/{sessionId}'], - reflectionNamespaces: ['/episodes/{actorId}'], - }, - ], - }; - - const result = await mapHarnessSpecToCreateOptions( - baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { name: 'my_memory' }, - } as any, - deployedResources, - memorySpec, - }) - ); - - expect(result.memory).toEqual({ - agentCoreMemoryConfiguration: { - arn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123', - retrievalConfig: { - '/users/{actorId}/facts': {}, - '/users/{actorId}/preferences': {}, - '/summaries/{actorId}/{sessionId}': {}, - '/episodes/{actorId}/{sessionId}': {}, - '/episodes/{actorId}': {}, - }, - }, - }); - }); - - it('includes EPISODIC reflectionNamespaces in retrievalConfig even without namespaces', async () => { - const deployedResources: DeployedResourceState = { - memories: { - my_memory: { - memoryId: 'mem-123', - memoryArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123', - }, - }, - }; - const memorySpec: Memory = { - name: 'my_memory', - eventExpiryDuration: 30, - strategies: [ - { type: 'SEMANTIC' }, - { - type: 'EPISODIC', - reflectionNamespaces: ['/episodes/{actorId}'], - }, - ], - }; - - const result = await mapHarnessSpecToCreateOptions( - baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { name: 'my_memory' }, - } as any, - deployedResources, - memorySpec, - }) - ); - - expect(result.memory?.agentCoreMemoryConfiguration.retrievalConfig).toEqual({ - '/episodes/{actorId}': {}, - }); - }); - - it('omits retrievalConfig when strategies have no namespaces or reflectionNamespaces', async () => { - const deployedResources: DeployedResourceState = { - memories: { - my_memory: { - memoryId: 'mem-123', - memoryArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123', - }, - }, - }; - const memorySpec: Memory = { - name: 'my_memory', - eventExpiryDuration: 30, - strategies: [{ type: 'SEMANTIC' }, { type: 'SUMMARIZATION' }], - }; - - const result = await mapHarnessSpecToCreateOptions( - baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { name: 'my_memory' }, - } as any, - deployedResources, - memorySpec, - }) - ); - - expect(result.memory?.agentCoreMemoryConfiguration.retrievalConfig).toBeUndefined(); - }); - - it('omits retrievalConfig when memorySpec not provided', async () => { - const deployedResources: DeployedResourceState = { - memories: { - my_memory: { - memoryId: 'mem-123', - memoryArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123', - }, - }, - }; - - const result = await mapHarnessSpecToCreateOptions( - baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { name: 'my_memory' }, - } as any, - deployedResources, - }) - ); - - expect(result.memory?.agentCoreMemoryConfiguration.retrievalConfig).toBeUndefined(); - }); - - it('includes both actorId and retrievalConfig when both are set', async () => { - const deployedResources: DeployedResourceState = { - memories: { - my_memory: { - memoryId: 'mem-123', - memoryArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123', - }, - }, - }; - const memorySpec: Memory = { - name: 'my_memory', - eventExpiryDuration: 30, - strategies: [{ type: 'SEMANTIC', namespaces: ['/users/{actorId}/facts'] }], - }; - - const result = await mapHarnessSpecToCreateOptions( - baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - memory: { name: 'my_memory', actorId: 'alice' }, - } as any, - deployedResources, - memorySpec, - }) - ); - - expect(result.memory).toEqual({ - agentCoreMemoryConfiguration: { - arn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:memory/mem-123', - actorId: 'alice', - retrievalConfig: { - '/users/{actorId}/facts': {}, - }, - }, - }); - }); - }); - - describe('execution limits', () => { - it('passes through maxIterations, maxTokens, timeoutSeconds', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - maxIterations: 10, - maxTokens: 4096, - timeoutSeconds: 120, - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.maxIterations).toBe(10); - expect(result.maxTokens).toBe(4096); - expect(result.timeoutSeconds).toBe(120); - }); - }); - - describe('container artifact', () => { - it('maps direct containerUri', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - containerUri: '111.dkr.ecr.us-east-1.amazonaws.com/repo:tag', - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.environmentArtifact).toEqual({ - containerConfiguration: { containerUri: '111.dkr.ecr.us-east-1.amazonaws.com/repo:tag' }, - }); - }); - - it('resolves container URI from CDK outputs for dockerfile', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'my-env', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - dockerfile: 'Dockerfile', - } as any, - cdkOutputs: { ApplicationHarnessMyEnvImageUriOutput123: '111.dkr.ecr.us-east-1.amazonaws.com/built:latest' }, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.environmentArtifact).toEqual({ - containerConfiguration: { containerUri: '111.dkr.ecr.us-east-1.amazonaws.com/built:latest' }, - }); - }); - - it('throws when dockerfile specified but no CDK output found', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - dockerfile: 'Dockerfile', - } as any, - cdkOutputs: {}, - }); - await expect(mapHarnessSpecToCreateOptions(opts)).rejects.toThrow('no container URI was found'); - }); - }); - - describe('environment provider', () => { - it('maps network config', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - networkConfig: { subnets: ['subnet-1'], securityGroups: ['sg-1'] }, - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.environment).toEqual({ - agentCoreRuntimeEnvironment: { - networkConfiguration: { - networkMode: 'VPC', - networkModeConfig: { subnets: ['subnet-1'], securityGroups: ['sg-1'] }, - }, - }, - }); - }); - - it('maps sessionStoragePath', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - sessionStoragePath: '/mnt/storage', - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.environment).toEqual({ - agentCoreRuntimeEnvironment: { - filesystemConfigurations: [{ sessionStorage: { mountPath: '/mnt/storage' } }], - }, - }); - }); - - it('maps efsAccessPoints to filesystemConfigurations', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - networkMode: 'VPC', - networkConfig: { subnets: ['subnet-abc'], securityGroups: ['sg-abc'] }, - efsAccessPoints: [ - { - accessPointArn: 'arn:aws:elasticfilesystem:us-east-1:123456789012:access-point/fsap-0123456789abcdef0', - mountPath: '/mnt/efs', - }, - ], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.environment?.agentCoreRuntimeEnvironment?.filesystemConfigurations).toContainEqual({ - efsAccessPoint: { - accessPointArn: 'arn:aws:elasticfilesystem:us-east-1:123456789012:access-point/fsap-0123456789abcdef0', - mountPath: '/mnt/efs', - }, - }); - }); - - it('maps s3AccessPoints to filesystemConfigurations', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - networkMode: 'VPC', - networkConfig: { subnets: ['subnet-abc'], securityGroups: ['sg-abc'] }, - s3AccessPoints: [ - { - accessPointArn: - 'arn:aws:s3files:us-east-1:123456789012:file-system/fs-12345678901234567/access-point/fsap-12345678901234567', - mountPath: '/mnt/s3', - }, - ], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.environment?.agentCoreRuntimeEnvironment?.filesystemConfigurations).toContainEqual({ - s3FilesAccessPoint: { - accessPointArn: - 'arn:aws:s3files:us-east-1:123456789012:file-system/fs-12345678901234567/access-point/fsap-12345678901234567', - mountPath: '/mnt/s3', - }, - }); - }); - - it('maps all three filesystem types together', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - networkMode: 'VPC', - networkConfig: { subnets: ['subnet-abc'], securityGroups: ['sg-abc'] }, - sessionStoragePath: '/mnt/session', - efsAccessPoints: [ - { - accessPointArn: 'arn:aws:elasticfilesystem:us-east-1:123456789012:access-point/fsap-0123456789abcdef0', - mountPath: '/mnt/efs', - }, - ], - s3AccessPoints: [ - { - accessPointArn: - 'arn:aws:s3files:us-east-1:123456789012:file-system/fs-12345678901234567/access-point/fsap-12345678901234567', - mountPath: '/mnt/s3', - }, - ], - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - const fcs = result.environment?.agentCoreRuntimeEnvironment?.filesystemConfigurations as unknown[]; - expect(fcs).toHaveLength(3); - expect(fcs[0]).toEqual({ sessionStorage: { mountPath: '/mnt/session' } }); - expect(fcs[1]).toMatchObject({ efsAccessPoint: { mountPath: '/mnt/efs' } }); - expect(fcs[2]).toMatchObject({ s3FilesAccessPoint: { mountPath: '/mnt/s3' } }); - }); - - it('returns no environment when no network/lifecycle/storage', async () => { - const result = await mapHarnessSpecToCreateOptions(baseOptions()); - expect(result.environment).toBeUndefined(); - }); - }); - - describe('authorizer configuration', () => { - it('maps custom JWT authorizer', async () => { - const opts = baseOptions({ - harnessSpec: { - name: 'h', - model: { provider: 'bedrock', modelId: 'claude' }, - tools: [], - skills: [], - authorizerConfiguration: { - customJwtAuthorizer: { - discoveryUrl: 'https://example.com/.well-known/openid-configuration', - allowedAudience: ['aud1'], - allowedClients: ['client1'], - }, - }, - } as any, - }); - const result = await mapHarnessSpecToCreateOptions(opts); - expect(result.authorizerConfiguration).toEqual({ - customJWTAuthorizer: { - discoveryUrl: 'https://example.com/.well-known/openid-configuration', - allowedAudience: ['aud1'], - allowedClients: ['client1'], - }, - }); - }); - }); -}); diff --git a/src/cli/operations/deploy/imperative/deployers/harness-deployer.ts b/src/cli/operations/deploy/imperative/deployers/harness-deployer.ts deleted file mode 100644 index 8776b15e7..000000000 --- a/src/cli/operations/deploy/imperative/deployers/harness-deployer.ts +++ /dev/null @@ -1,391 +0,0 @@ -/** - * HarnessDeployer - Post-CDK imperative deployer for Harness resources. - * - * Runs after CDK deploy to create, update, or delete harness resources - * via the SigV4 API client. Harness role ARNs are resolved from CDK - * stack outputs, and harness specs are read from disk (harness.json). - */ -import type { HarnessDeployedState, HarnessMemoryRef, HarnessSpec, Memory } from '../../../../../schema'; -import { HarnessSpecSchema } from '../../../../../schema'; -import type { DeployedResourceState } from '../../../../../schema/schemas/deployed-state'; -import type { - CreateHarnessResult, - Harness, - UpdateHarnessOptions, - UpdateHarnessResult, -} from '../../../../aws/agentcore-harness'; -import { createHarness, deleteHarness, getHarness, updateHarness } from '../../../../aws/agentcore-harness'; -import { AgentCoreApiError } from '../../../../aws/api-client'; -import { toPascalId } from '../../../../cloudformation/logical-ids'; -import type { DeployPhase, ImperativeDeployContext, ImperativeDeployResult, ImperativeDeployer } from '../types'; -import { mapHarnessSpecToCreateOptions } from './harness-mapper'; -import { readFile } from 'fs/promises'; -import { createHash } from 'node:crypto'; -import { dirname, join } from 'path'; - -const ROLE_VALIDATION_RETRY_DELAYS_MS = [5_000, 10_000, 15_000, 20_000, 30_000]; -const READY_POLL_INTERVAL_MS = 3_000; -const READY_POLL_MAX_ATTEMPTS = 40; // 2 minutes max - -// ============================================================================ -// Types -// ============================================================================ - -type HarnessDeployedStateMap = Record; - -async function computeHarnessHash( - harnessDir: string, - harnessSpec: HarnessSpec, - roleArn: string, - memorySpec?: Memory -): Promise { - const hash = createHash('sha256'); - hash.update(JSON.stringify(harnessSpec)); - hash.update(roleArn); - if (memorySpec) { - hash.update(JSON.stringify(memorySpec)); - } - try { - const promptContent = await readFile(join(harnessDir, 'system-prompt.md'), 'utf-8'); - hash.update(promptContent); - } catch { - // no system-prompt.md - } - if (harnessSpec.dockerfile) { - try { - const dockerfileContent = await readFile(join(harnessDir, harnessSpec.dockerfile), 'utf-8'); - hash.update(dockerfileContent); - } catch { - // Dockerfile missing — preflight already validates existence before deploy - } - } - return hash.digest('hex').slice(0, 16); -} - -function resolveMemorySpec( - memories: Memory[] | undefined, - memoryRef: HarnessMemoryRef | undefined, - deployedResources: DeployedResourceState | undefined -): Memory | undefined { - if (!memoryRef) return undefined; - if (memoryRef.name) return memories?.find(m => m.name === memoryRef.name); - if (memoryRef.arn && deployedResources?.memories) { - const entry = Object.entries(deployedResources.memories).find(([, v]) => v.memoryArn === memoryRef.arn); - if (entry) return memories?.find(m => m.name === entry[0]); - } - return undefined; -} - -// ============================================================================ -// Deployer -// ============================================================================ - -export class HarnessDeployer implements ImperativeDeployer { - readonly name = 'harness'; - readonly label = 'Harnesses'; - readonly phase: DeployPhase = 'post-cdk'; - - shouldRun(context: ImperativeDeployContext): boolean { - const projectHarnesses = context.projectSpec.harnesses; - const hasProjectHarnesses = !!projectHarnesses && projectHarnesses.length > 0; - - const targetName = context.target.name; - const deployedHarnesses = context.deployedState.targets?.[targetName]?.resources?.harnesses; - const hasDeployedHarnesses = !!deployedHarnesses && Object.keys(deployedHarnesses).length > 0; - - return hasProjectHarnesses || hasDeployedHarnesses; - } - - async deploy(context: ImperativeDeployContext): Promise> { - const { projectSpec, target, configIO, deployedState, cdkOutputs } = context; - const region = target.region; - const targetName = target.name; - const projectName = projectSpec.name; - const configRoot = configIO.getConfigRoot(); - const projectRoot = dirname(configRoot); - - const projectHarnesses = projectSpec.harnesses ?? []; - const deployedHarnesses = deployedState.targets?.[targetName]?.resources?.harnesses ?? {}; - const resultState: HarnessDeployedStateMap = { ...deployedHarnesses }; - const notes: string[] = []; - - // Build set of harness names in current project spec - const projectHarnessNames = new Set(projectHarnesses.map(h => h.name)); - - // Create or update each harness in the project spec - for (const entry of projectHarnesses) { - // Harness path is relative to project root (like agent codeLocation) - const harnessDir = join(projectRoot, entry.path); - - // Read harness.json from disk and validate - let harnessSpec: HarnessSpec; - try { - const raw = await readFile(join(harnessDir, 'harness.json'), 'utf-8'); - const parsed: unknown = JSON.parse(raw); - const validated = HarnessSpecSchema.safeParse(parsed); - if (!validated.success) { - return { - success: false, - error: `Invalid harness.json for "${entry.name}": ${validated.error.message}`, - state: resultState, - }; - } - harnessSpec = validated.data; - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return { - success: false, - error: `Failed to read harness.json for "${entry.name}": ${message}`, - state: resultState, - }; - } - - // Resolve role ARN from CDK outputs - const roleArn = resolveRoleArn(entry.name, cdkOutputs); - if (!roleArn) { - return { - success: false, - error: `Could not find role ARN in CDK outputs for harness "${entry.name}". Expected output key starting with "ApplicationHarness${toPascalId(entry.name)}RoleArn" or "ApplicationHarness${toPascalId(entry.name)}RoleRoleArn".`, - state: resultState, - }; - } - - // Use executionRoleArn from harness spec if provided, otherwise use CDK output - const executionRoleArn = harnessSpec.executionRoleArn ?? roleArn; - - const deployedResources = deployedState.targets?.[targetName]?.resources; - const existingHarness = deployedHarnesses[entry.name]; - const memorySpec = resolveMemorySpec(projectSpec.memories, harnessSpec.memory, deployedResources); - - const configHash = await computeHarnessHash(harnessDir, harnessSpec, executionRoleArn, memorySpec); - - if (existingHarness?.configHash === configHash) { - resultState[entry.name] = existingHarness; - notes.push(`Harness "${entry.name}" unchanged, skipped`); - context.onProgress?.(`Harness "${entry.name}": no changes`, 'done'); - continue; - } - - try { - if (existingHarness) { - // Update existing harness - const createOptions = await mapHarnessSpecToCreateOptions({ - harnessSpec, - harnessDir, - executionRoleArn, - region, - projectName, - deployedResources, - cdkOutputs, - memorySpec, - }); - - // Memory uses { optionalValue: null } to explicitly clear it when removed from config, - // since the API treats an absent field as "no change" but null as "remove". - // environmentArtifact uses undefined (omit) because container config is immutable - // after creation — it cannot be cleared via update, only set on create. - const updateOptions: UpdateHarnessOptions = { - region, - harnessId: existingHarness.harnessId, - executionRoleArn: createOptions.executionRoleArn, - model: createOptions.model, - systemPrompt: createOptions.systemPrompt, - tools: createOptions.tools, - skills: createOptions.skills, - allowedTools: createOptions.allowedTools, - memory: createOptions.memory ? { optionalValue: createOptions.memory } : { optionalValue: null }, - truncation: createOptions.truncation, - maxIterations: createOptions.maxIterations, - maxTokens: createOptions.maxTokens, - timeoutSeconds: createOptions.timeoutSeconds, - environment: createOptions.environment, - environmentArtifact: createOptions.environmentArtifact - ? { optionalValue: createOptions.environmentArtifact } - : undefined, - environmentVariables: createOptions.environmentVariables, - tags: createOptions.tags, - authorizerConfiguration: createOptions.authorizerConfiguration - ? { optionalValue: createOptions.authorizerConfiguration } - : { optionalValue: null }, - }; - - const updateResult: UpdateHarnessResult = await updateHarness(updateOptions); - const finalHarness = await waitForReady(region, updateResult.harness); - if (finalHarness.status === 'FAILED') { - throw new Error(`Harness "${entry.name}" entered FAILED state`); - } - resultState[entry.name] = { - harnessId: finalHarness.harnessId, - harnessArn: finalHarness.arn, - roleArn: executionRoleArn, - status: finalHarness.status, - agentRuntimeArn: extractRuntimeArn(finalHarness), - memoryArn: createOptions.memory?.agentCoreMemoryConfiguration?.arn, - configHash, - }; - notes.push(`Updated harness "${entry.name}"`); - } else { - // Create new harness (with retry for IAM role propagation delay) - const createOptions = await mapHarnessSpecToCreateOptions({ - harnessSpec, - harnessDir, - executionRoleArn, - region, - projectName, - deployedResources, - cdkOutputs, - memorySpec, - }); - - const createResult: CreateHarnessResult = await createWithRetry(createOptions); - const finalHarness = await waitForReady(region, createResult.harness); - if (finalHarness.status === 'FAILED') { - throw new Error(`Harness "${entry.name}" entered FAILED state`); - } - resultState[entry.name] = { - harnessId: finalHarness.harnessId, - harnessArn: finalHarness.arn, - roleArn: executionRoleArn, - status: finalHarness.status, - agentRuntimeArn: extractRuntimeArn(finalHarness), - memoryArn: createOptions.memory?.agentCoreMemoryConfiguration?.arn, - configHash, - }; - notes.push(`Created harness "${entry.name}"`); - } - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - const hint = getDeployErrorHint(err, region); - const errorMsg = hint - ? `Failed to deploy harness "${entry.name}": ${message}\n${hint}` - : `Failed to deploy harness "${entry.name}": ${message}`; - return { success: false, error: errorMsg, state: resultState }; - } - } - - // Delete harnesses that exist in deployed state but not in project spec - for (const [name, state] of Object.entries(deployedHarnesses)) { - if (!projectHarnessNames.has(name)) { - try { - await deleteHarness({ region, harnessId: state.harnessId }); - delete resultState[name]; - notes.push(`Deleted harness "${name}"`); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return { success: false, error: `Failed to delete harness "${name}": ${message}`, state: resultState }; - } - } - } - - return { success: true, state: resultState, notes }; - } - - async teardown(context: ImperativeDeployContext): Promise> { - const { target, deployedState } = context; - const region = target.region; - const targetName = target.name; - - const deployedHarnesses = deployedState.targets?.[targetName]?.resources?.harnesses ?? {}; - const notes: string[] = []; - - for (const [name, state] of Object.entries(deployedHarnesses)) { - try { - await deleteHarness({ region, harnessId: state.harnessId }); - notes.push(`Deleted harness "${name}"`); - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return { success: false, error: `Failed to delete harness "${name}": ${message}` }; - } - } - - return { success: true, state: {}, notes }; - } -} - -// ============================================================================ -// Helpers -// ============================================================================ - -/** - * Resolve the IAM role ARN for a harness from CDK stack outputs. - * - * Supports two construct tree layouts: - * Old (AgentCoreHarnessRole directly under Application): - * ApplicationHarness{PascalName}RoleArnOutput... - * New (AgentCoreHarnessEnvironment wrapping AgentCoreHarnessRole): - * ApplicationHarness{PascalName}RoleRoleArnOutput... - */ -function resolveRoleArn(harnessName: string, cdkOutputs?: Record): string | undefined { - if (!cdkOutputs) return undefined; - - const pascalName = toPascalId(harnessName); - // Longer prefix first — RoleArn is a substring of RoleRoleArn, so checking it first would match both. - const prefixes = [`ApplicationHarness${pascalName}RoleRoleArn`, `ApplicationHarness${pascalName}RoleArn`]; - - for (const [key, value] of Object.entries(cdkOutputs)) { - if (prefixes.some(p => key.startsWith(p))) { - return value; - } - } - - return undefined; -} - -function isRoleValidationError(err: unknown): boolean { - return err instanceof AgentCoreApiError && err.statusCode === 400 && err.errorBody.includes('Role validation failed'); -} - -async function createWithRetry(options: Parameters[0]): Promise { - let lastError: unknown; - for (let attempt = 0; attempt <= ROLE_VALIDATION_RETRY_DELAYS_MS.length; attempt++) { - try { - return await createHarness(options); - } catch (err) { - if (!isRoleValidationError(err) || attempt === ROLE_VALIDATION_RETRY_DELAYS_MS.length) { - throw err; - } - lastError = err; - await sleep(ROLE_VALIDATION_RETRY_DELAYS_MS[attempt]!); - } - } - throw lastError; -} - -async function waitForReady(region: string, harness: Harness): Promise { - if (harness.status === 'READY' || harness.status === 'FAILED') return harness; - - for (let i = 0; i < READY_POLL_MAX_ATTEMPTS; i++) { - await sleep(READY_POLL_INTERVAL_MS); - const result = await getHarness({ region, harnessId: harness.harnessId }); - if (result.harness.status === 'READY' || result.harness.status === 'FAILED') return result.harness; - } - - return harness; -} - -function extractRuntimeArn(harness: Harness): string | undefined { - return harness.environment?.agentCoreRuntimeEnvironment?.agentRuntimeArn; -} - -function sleep(ms: number): Promise { - return new Promise(resolve => setTimeout(resolve, ms)); -} - -function getDeployErrorHint(err: unknown, region: string): string | undefined { - if (!(err instanceof AgentCoreApiError)) return undefined; - const body = err.errorBody.toLowerCase(); - - if (err.statusCode === 403) { - return 'Check that your AWS credentials have permission to call the AgentCore Harness API.'; - } - if (body.includes('not available') || body.includes('not supported') || body.includes('endpoint')) { - return `Harness may not be available in ${region}. Try a different region (e.g., us-east-1, us-west-2).`; - } - if (err.statusCode === 429) { - return 'Too many requests. Wait a moment and try again.'; - } - if (err.statusCode >= 500) { - return 'This looks like a service-side issue. Wait a moment and redeploy.'; - } - return undefined; -} diff --git a/src/cli/operations/deploy/imperative/deployers/harness-mapper.ts b/src/cli/operations/deploy/imperative/deployers/harness-mapper.ts deleted file mode 100644 index 165cc5d19..000000000 --- a/src/cli/operations/deploy/imperative/deployers/harness-mapper.ts +++ /dev/null @@ -1,433 +0,0 @@ -/** - * Maps user-facing HarnessSpec (harness.json) to the CreateHarness API wire format. - * - * Each transformation is a pure function that converts a section of the spec - * into the corresponding API field. The top-level mapHarnessSpecToCreateOptions - * orchestrates them and returns a complete CreateHarnessOptions object. - */ -import type { DeployedResourceState, HarnessSpec, Memory } from '../../../../../schema'; -import type { - CreateHarnessOptions, - HarnessEnvironmentArtifact, - HarnessEnvironmentProvider, - HarnessMemoryConfiguration, - HarnessModelConfiguration, - HarnessSkill, - HarnessSystemPrompt, - HarnessTool, - HarnessTruncationConfiguration, -} from '../../../../aws/agentcore-harness'; -import { toPascalId } from '../../../../cloudformation/logical-ids'; -import { buildFilesystemConfigurations } from '../../../../commands/shared/filesystem-utils'; -import { readFile, stat } from 'fs/promises'; -import { join } from 'path'; - -const MAX_PROMPT_FILE_SIZE = 1024 * 1024; // 1 MB - -// ============================================================================ -// Public Interface -// ============================================================================ - -export interface MapHarnessOptions { - harnessSpec: HarnessSpec; - harnessDir: string; - executionRoleArn: string; - region: string; - projectName: string; - deployedResources?: DeployedResourceState; - cdkOutputs?: Record; - /** The memory spec for the memory this harness references, used to derive retrievalConfig namespaces. */ - memorySpec?: Memory; -} - -/** - * Transform a HarnessSpec into CreateHarnessOptions for the control plane API. - */ -export async function mapHarnessSpecToCreateOptions(options: MapHarnessOptions): Promise { - const { harnessSpec, harnessDir, executionRoleArn, region, projectName, deployedResources, cdkOutputs, memorySpec } = - options; - - const result: CreateHarnessOptions = { - region, - harnessName: `${projectName}_${harnessSpec.name}`, - executionRoleArn, - }; - - // Model - result.model = mapModel(harnessSpec.model); - - // System prompt (may read from disk or auto-discover system-prompt.md) - if (harnessSpec.systemPrompt !== undefined) { - result.systemPrompt = await mapSystemPrompt(harnessSpec.systemPrompt, harnessDir); - } else { - // Auto-discover system-prompt.md if it exists - result.systemPrompt = await tryLoadSystemPromptFile(harnessDir); - } - - // Tools - if (harnessSpec.tools.length > 0) { - result.tools = mapTools(harnessSpec.tools); - } - - // Skills - if (harnessSpec.skills.length > 0) { - result.skills = mapSkills(harnessSpec.skills); - } - - // Allowed tools - if (harnessSpec.allowedTools) { - result.allowedTools = harnessSpec.allowedTools; - } - - // Memory - if (harnessSpec.memory) { - result.memory = mapMemory(harnessSpec.memory, deployedResources, cdkOutputs, memorySpec); - } - - // Truncation - if (harnessSpec.truncation) { - result.truncation = mapTruncation(harnessSpec.truncation); - } - - // Execution limits - if (harnessSpec.maxIterations !== undefined) { - result.maxIterations = harnessSpec.maxIterations; - } - if (harnessSpec.maxTokens !== undefined) { - result.maxTokens = harnessSpec.maxTokens; - } - if (harnessSpec.timeoutSeconds !== undefined) { - result.timeoutSeconds = harnessSpec.timeoutSeconds; - } - - // Container artifact - if (harnessSpec.containerUri) { - result.environmentArtifact = mapEnvironmentArtifact(harnessSpec.containerUri); - } else if (harnessSpec.dockerfile) { - const builtUri = resolveContainerUriFromOutputs(harnessSpec.name, cdkOutputs); - if (!builtUri) { - throw new Error( - `Harness "${harnessSpec.name}" specifies "dockerfile" but no container URI was found in CDK outputs. ` + - `Expected a CDK output key starting with "ApplicationHarness${toPascalId(harnessSpec.name)}ImageUri" or "Harness${toPascalId(harnessSpec.name)}ContainerUri".` - ); - } - result.environmentArtifact = mapEnvironmentArtifact(builtUri); - } - - // Environment provider (network + lifecycle) - const environmentProvider = mapEnvironmentProvider(harnessSpec); - if (environmentProvider) { - result.environment = environmentProvider; - } - - // Environment variables - if (harnessSpec.environmentVariables) { - result.environmentVariables = harnessSpec.environmentVariables; - } - - // Tags - if (harnessSpec.tags) { - result.tags = harnessSpec.tags; - } - - // Authorizer configuration — authorizerType is inferred by the API from the - // presence of authorizerConfiguration, so only the configuration is forwarded. - if (harnessSpec.authorizerConfiguration?.customJwtAuthorizer) { - const jwt = harnessSpec.authorizerConfiguration.customJwtAuthorizer; - result.authorizerConfiguration = { - customJWTAuthorizer: { - discoveryUrl: jwt.discoveryUrl, - ...(jwt.allowedAudience && { allowedAudience: jwt.allowedAudience }), - ...(jwt.allowedClients && { allowedClients: jwt.allowedClients }), - ...(jwt.allowedScopes && { allowedScopes: jwt.allowedScopes }), - ...(jwt.customClaims && { customClaims: jwt.customClaims }), - }, - }; - } - - return result; -} - -// ============================================================================ -// Model Mapping -// ============================================================================ - -function mapModel(model: HarnessSpec['model']): HarnessModelConfiguration { - const { provider, modelId, apiKeyArn, apiFormat, temperature, topP, topK, maxTokens } = model; - - switch (provider) { - case 'bedrock': - return { - bedrockModelConfig: { - modelId, - ...(apiFormat && apiFormat !== 'converse_stream' && { apiFormat }), - ...(temperature !== undefined && { temperature }), - ...(topP !== undefined && { topP }), - ...(maxTokens !== undefined && { maxTokens }), - }, - }; - case 'open_ai': - return { - openAiModelConfig: { - modelId, - ...(apiKeyArn && { apiKeyArn }), - ...(apiFormat && apiFormat !== 'responses' && { apiFormat: apiFormat as 'responses' | 'chat_completions' }), - ...(temperature !== undefined && { temperature }), - ...(topP !== undefined && { topP }), - ...(maxTokens !== undefined && { maxTokens }), - }, - }; - case 'gemini': - return { - geminiModelConfig: { - modelId, - ...(apiKeyArn && { apiKeyArn }), - ...(temperature !== undefined && { temperature }), - ...(topP !== undefined && { topP }), - ...(topK !== undefined && { topK }), - ...(maxTokens !== undefined && { maxTokens }), - }, - }; - } -} - -// ============================================================================ -// System Prompt Mapping -// ============================================================================ - -const FILE_PATH_PATTERN = /^\.\.?\//; -const FILE_EXTENSION_PATTERN = /\.(md|txt)$/; - -function isFilePath(value: string): boolean { - return FILE_PATH_PATTERN.test(value) || FILE_EXTENSION_PATTERN.test(value); -} - -async function mapSystemPrompt(prompt: string, harnessDir: string): Promise { - let text: string; - - if (isFilePath(prompt)) { - const filePath = join(harnessDir, prompt); - const fileStats = await stat(filePath); - if (fileStats.size > MAX_PROMPT_FILE_SIZE) { - throw new Error( - `System prompt file "${prompt}" is too large (${fileStats.size} bytes). Maximum size is ${MAX_PROMPT_FILE_SIZE} bytes.` - ); - } - text = await readFile(filePath, 'utf-8'); - } else { - text = prompt; - } - - return [{ text }]; -} - -/** - * Try to load system-prompt.md from harness directory. - * Returns undefined if file doesn't exist (harness will have no system prompt). - */ -async function tryLoadSystemPromptFile(harnessDir: string): Promise { - const promptPath = join(harnessDir, 'system-prompt.md'); - - try { - const fileStats = await stat(promptPath); - if (fileStats.size > MAX_PROMPT_FILE_SIZE) { - throw new Error( - `System prompt file "system-prompt.md" is too large (${fileStats.size} bytes). Maximum size is ${MAX_PROMPT_FILE_SIZE} bytes.` - ); - } - const text = await readFile(promptPath, 'utf-8'); - return [{ text }]; - } catch (err) { - // File doesn't exist - return undefined (no system prompt) - if ((err as NodeJS.ErrnoException).code === 'ENOENT') { - return undefined; - } - // Other errors (permissions, etc.) should be thrown - throw err; - } -} - -// ============================================================================ -// Tools Mapping -// ============================================================================ - -function mapTools(tools: HarnessSpec['tools']): HarnessTool[] { - return tools.map(tool => ({ - type: tool.type, - name: tool.name, - ...(tool.config && { config: tool.config }), - })); -} - -// ============================================================================ -// Skills Mapping -// ============================================================================ - -function mapSkills(skills: string[]): HarnessSkill[] { - return skills.map(path => ({ path })); -} - -// ============================================================================ -// Memory Mapping -// ============================================================================ - -function mapMemory( - memory: NonNullable, - deployedResources?: DeployedResourceState, - cdkOutputs?: Record, - memorySpec?: Memory -): HarnessMemoryConfiguration | undefined { - let arn: string | undefined; - - // Direct ARN takes precedence - if (memory.arn) { - arn = memory.arn; - } else if (memory.name) { - // Resolve by name from deployed state or CDK outputs - const deployedMemory = deployedResources?.memories?.[memory.name]; - if (deployedMemory) { - arn = deployedMemory.memoryArn; - } else if (cdkOutputs) { - arn = resolveMemoryArnFromOutputs(memory.name, cdkOutputs); - } - - if (!arn) { - throw new Error( - `Memory "${memory.name}" referenced by harness is not in deployed state. Ensure the memory is defined in agentcore.json and has been deployed.` - ); - } - } - - if (!arn) { - return undefined; - } - - // Build retrievalConfig from the memory's strategy namespaces so the harness - // runtime knows which namespaces to search at inference time. - const retrievalConfig = buildRetrievalConfig(memorySpec); - - return { - agentCoreMemoryConfiguration: { - arn, - ...(memory.actorId && { actorId: memory.actorId }), - ...(retrievalConfig && { retrievalConfig }), - }, - }; -} - -function buildRetrievalConfig( - memorySpec: Memory | undefined -): Record | undefined { - if (!memorySpec?.strategies?.length) return undefined; - - const namespaces = memorySpec.strategies.flatMap(s => [ - ...(s.namespaces ?? []), - ...(s.type === 'EPISODIC' ? (s.reflectionNamespaces ?? []) : []), - ]); - - return namespaces.length > 0 ? Object.fromEntries(namespaces.map(ns => [ns, {}])) : undefined; -} - -/** - * Resolve memory ARN from CDK stack outputs. - * The CDK construct exports memory ARNs with keys matching: - * ApplicationMemory{PascalName}ArnOutput... - */ -function resolveMemoryArnFromOutputs(memoryName: string, cdkOutputs: Record): string | undefined { - const pascalName = toPascalId(memoryName); - const prefix = `ApplicationMemory${pascalName}ArnOutput`; - - for (const [key, value] of Object.entries(cdkOutputs)) { - if (key.startsWith(prefix)) { - return value; - } - } - - return undefined; -} - -// ============================================================================ -// Truncation Mapping -// ============================================================================ - -function mapTruncation(truncation: NonNullable): HarnessTruncationConfiguration { - return { - strategy: truncation.strategy, - config: truncation.config as HarnessTruncationConfiguration['config'], - }; -} - -// ============================================================================ -// Container URI Resolution (from CDK outputs for dockerfile-based harnesses) -// ============================================================================ - -/** - * Supports two construct tree layouts: - * Old (CfnOutput on stack root): - * Harness{PascalName}ContainerUri... - * New (CfnOutput inside AgentCoreHarnessEnvironment): - * ApplicationHarness{PascalName}ImageUriOutput... - */ -function resolveContainerUriFromOutputs(harnessName: string, cdkOutputs?: Record): string | undefined { - if (!cdkOutputs) return undefined; - - const pascalName = toPascalId(harnessName); - const prefixes = [`ApplicationHarness${pascalName}ImageUri`, `Harness${pascalName}ContainerUri`]; - - for (const [key, value] of Object.entries(cdkOutputs)) { - if (prefixes.some(p => key.startsWith(p))) { - return value; - } - } - - return undefined; -} - -// ============================================================================ -// Container / Environment Artifact Mapping -// ============================================================================ - -function mapEnvironmentArtifact(containerUri: string): HarnessEnvironmentArtifact { - return { - containerConfiguration: { containerUri }, - }; -} - -// ============================================================================ -// Environment Provider (Network + Lifecycle) Mapping -// ============================================================================ - -function mapEnvironmentProvider(spec: HarnessSpec): HarnessEnvironmentProvider | undefined { - const hasNetwork = !!spec.networkConfig; - const hasLifecycle = !!spec.lifecycleConfig; - const hasFilesystem = !!spec.sessionStoragePath || !!spec.efsAccessPoints?.length || !!spec.s3AccessPoints?.length; - - if (!hasNetwork && !hasLifecycle && !hasFilesystem) { - return undefined; - } - - const agentCoreRuntimeEnvironment: Record = {}; - - if (spec.networkConfig) { - agentCoreRuntimeEnvironment.networkConfiguration = { - networkMode: 'VPC', - networkModeConfig: { - subnets: spec.networkConfig.subnets, - securityGroups: spec.networkConfig.securityGroups, - }, - }; - } - - if (spec.lifecycleConfig) { - agentCoreRuntimeEnvironment.lifecycleConfiguration = spec.lifecycleConfig; - } - - const fsConfig = buildFilesystemConfigurations(spec.sessionStoragePath, spec.efsAccessPoints, spec.s3AccessPoints); - if ('filesystemConfigurations' in fsConfig) { - agentCoreRuntimeEnvironment.filesystemConfigurations = fsConfig.filesystemConfigurations; - } - - return { - agentCoreRuntimeEnvironment, - }; -} diff --git a/src/cli/operations/deploy/imperative/deployers/index.ts b/src/cli/operations/deploy/imperative/deployers/index.ts deleted file mode 100644 index 655785b10..000000000 --- a/src/cli/operations/deploy/imperative/deployers/index.ts +++ /dev/null @@ -1,2 +0,0 @@ -export { HarnessDeployer } from './harness-deployer'; -export { mapHarnessSpecToCreateOptions, type MapHarnessOptions } from './harness-mapper'; diff --git a/src/cli/operations/deploy/imperative/index.ts b/src/cli/operations/deploy/imperative/index.ts deleted file mode 100644 index 930dfe094..000000000 --- a/src/cli/operations/deploy/imperative/index.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { HarnessDeployer } from './deployers'; -import { ImperativeDeploymentManager } from './manager'; - -export type { - DeployPhase, - DeployProgress, - ImperativeDeployContext, - ImperativeDeployResult, - ImperativeDeployer, -} from './types'; - -export { ImperativeDeploymentManager, type ImperativePhaseResult } from './manager'; - -export { HarnessDeployer, mapHarnessSpecToCreateOptions, type MapHarnessOptions } from './deployers'; - -export function createDeploymentManager(): ImperativeDeploymentManager { - return new ImperativeDeploymentManager().register(new HarnessDeployer()); -} diff --git a/src/cli/operations/deploy/imperative/manager.ts b/src/cli/operations/deploy/imperative/manager.ts deleted file mode 100644 index b7e22ecda..000000000 --- a/src/cli/operations/deploy/imperative/manager.ts +++ /dev/null @@ -1,110 +0,0 @@ -import type { DeployPhase, ImperativeDeployContext, ImperativeDeployResult, ImperativeDeployer } from './types'; - -export interface ImperativePhaseResult { - success: boolean; - results: Map; - error?: string; - notes: string[]; -} - -export class ImperativeDeploymentManager { - private readonly deployers: ImperativeDeployer[] = []; - - register(deployer: ImperativeDeployer): this { - this.deployers.push(deployer); - return this; - } - - async runPhase(phase: DeployPhase, context: ImperativeDeployContext): Promise { - const results = new Map(); - const notes: string[] = []; - - const applicable = this.deployers.filter(d => d.phase === phase && d.shouldRun(context)); - - for (const deployer of applicable) { - context.onProgress?.(deployer.label, 'start'); - - try { - const result = await deployer.deploy(context); - results.set(deployer.name, result); - - if (result.notes) { - notes.push(...result.notes); - } - - if (!result.success) { - context.onProgress?.(deployer.label, 'error'); - return { - success: false, - results, - error: result.error ?? `Deployer '${deployer.name}' failed`, - notes, - }; - } - - context.onProgress?.(deployer.label, 'done'); - } catch (err) { - const errorMessage = err instanceof Error ? err.message : String(err); - results.set(deployer.name, { success: false, error: errorMessage }); - context.onProgress?.(deployer.label, 'error'); - return { - success: false, - results, - error: errorMessage, - notes, - }; - } - } - - return { success: true, results, notes }; - } - - async teardownAll(context: ImperativeDeployContext): Promise { - const results = new Map(); - const notes: string[] = []; - const errors: string[] = []; - - const applicable = this.deployers.filter(d => d.shouldRun(context)).reverse(); - - for (const deployer of applicable) { - context.onProgress?.(deployer.label, 'start'); - - try { - const result = await deployer.teardown(context); - results.set(deployer.name, result); - - if (result.notes) { - notes.push(...result.notes); - } - - if (!result.success) { - context.onProgress?.(deployer.label, 'error'); - errors.push(result.error ?? `Teardown of '${deployer.name}' failed`); - continue; - } - - context.onProgress?.(deployer.label, 'done'); - } catch (err) { - const errorMessage = err instanceof Error ? err.message : String(err); - results.set(deployer.name, { success: false, error: errorMessage }); - context.onProgress?.(deployer.label, 'error'); - errors.push(errorMessage); - } - } - - if (errors.length > 0) { - return { - success: false, - results, - error: errors.join('; '), - notes, - }; - } - - return { success: true, results, notes }; - } - - hasDeployersForPhase(phase: DeployPhase, context: ImperativeDeployContext): boolean { - return this.deployers.some(d => d.phase === phase && d.shouldRun(context)); - } -} diff --git a/src/cli/operations/deploy/imperative/types.ts b/src/cli/operations/deploy/imperative/types.ts deleted file mode 100644 index 7efa13e7a..000000000 --- a/src/cli/operations/deploy/imperative/types.ts +++ /dev/null @@ -1,32 +0,0 @@ -import type { ConfigIO } from '../../../../lib'; -import type { AgentCoreProjectSpec, AwsDeploymentTarget, DeployedState } from '../../../../schema'; - -export type DeployPhase = 'pre-cdk' | 'post-cdk' | 'standalone'; - -export type DeployProgress = (step: string, status: 'start' | 'done' | 'error') => void; - -export interface ImperativeDeployContext { - projectSpec: AgentCoreProjectSpec; - target: AwsDeploymentTarget; - configIO: ConfigIO; - deployedState: DeployedState; - onProgress?: DeployProgress; - cdkOutputs?: Record; - autoConfirm?: boolean; -} - -export interface ImperativeDeployResult> { - success: boolean; - state?: TState; - notes?: string[]; - error?: string; -} - -export interface ImperativeDeployer> { - readonly name: string; - readonly label: string; - readonly phase: DeployPhase; - shouldRun(context: ImperativeDeployContext): boolean; - deploy(context: ImperativeDeployContext): Promise>; - teardown(context: ImperativeDeployContext): Promise>; -} diff --git a/src/cli/operations/deploy/index.ts b/src/cli/operations/deploy/index.ts index ed1c6fc1d..29f0fd7e7 100644 --- a/src/cli/operations/deploy/index.ts +++ b/src/cli/operations/deploy/index.ts @@ -55,14 +55,6 @@ export { // Post-deploy observability setup export { setupTransactionSearch } from './post-deploy-observability'; -// Post-deploy HTTP gateways -export { - setupHttpGateways, - type SetupHttpGatewaysOptions, - type SetupHttpGatewaysResult, - type HttpGatewaySetupResult, -} from './post-deploy-http-gateways'; - // Post-deploy online eval enablement export { enableOnlineEvalConfigs, @@ -73,13 +65,12 @@ export { export { ensureDefaultDeploymentTarget } from './ensure-target'; -// Post-deploy config bundles +// Managed-memory heads-up (shared by the CLI command + TUI deploy flow + add harness) export { - setupConfigBundles, - type SetupConfigBundlesOptions, - type SetupConfigBundlesResult, - type ConfigBundleSetupResult, -} from './post-deploy-config-bundles'; + MANAGED_MEMORY_DEPLOY_NOTICE, + MANAGED_MEMORY_ADD_NOTICE, + hasManagedMemoryHarness, +} from './managed-memory-notice'; // Re-export external requirements for convenience export { diff --git a/src/cli/operations/deploy/managed-memory-notice.ts b/src/cli/operations/deploy/managed-memory-notice.ts new file mode 100644 index 000000000..51699a2c3 --- /dev/null +++ b/src/cli/operations/deploy/managed-memory-notice.ts @@ -0,0 +1,47 @@ +import type { ConfigIO } from '../../../lib'; +import { isGatedFeaturesEnabled } from '../../feature-flags'; + +/** + * One-shot heads-up shown before the CFN apply when a harness uses managed memory. + * Managed-memory harnesses provision a dedicated AgentCore Memory resource during deploy, + * which is the slow part — surface this while it happens so the wait is explained. + * + * Single source of truth for both deploy entry points (CLI command + TUI flow) so the + * wording can't drift between them. + */ +export const MANAGED_MEMORY_DEPLOY_NOTICE = + 'Managed memory: this harness automatically provisions a dedicated AgentCore Memory resource ' + + '(the default unless you set --memory-mode existing or disabled).\n\n' + + 'Memory provisioning can take 3-5 minutes. We know this is slow, and we will be reducing this ' + + 'provisioning time. To skip it, redeploy with --memory-mode disabled.'; + +/** + * Same heads-up worded for `add harness`, where the provisioning hasn't happened yet — it explains + * what the next deploy will do and how to opt out before deploying. + */ +export const MANAGED_MEMORY_ADD_NOTICE = + 'Managed memory: this harness will automatically provision a dedicated AgentCore Memory resource ' + + 'on deploy (the default unless you set --memory-mode existing or disabled).\n\n' + + 'Memory provisioning can take 3-5 minutes. We know this is slow, and we will be reducing this ' + + 'provisioning time. To skip it, recreate the harness with --memory-mode disabled.'; + +/** + * Returns true when the gate is on and at least one harness in the project uses managed memory. + * The memory mode lives in each harness's harness.json (not the agentcore.json pointer list), so + * the per-harness specs are read to detect it. + */ +export async function hasManagedMemoryHarness( + configIO: ConfigIO, + harnesses: { name: string }[] | undefined +): Promise { + if (!isGatedFeaturesEnabled()) { + return false; + } + for (const h of harnesses ?? []) { + const harnessSpec = await configIO.readHarnessSpec(h.name).catch(() => undefined); + if (harnessSpec?.memory?.mode === 'managed') { + return true; + } + } + return false; +} diff --git a/src/cli/operations/deploy/post-deploy-ab-tests.ts b/src/cli/operations/deploy/post-deploy-ab-tests.ts deleted file mode 100644 index aab6aa6ce..000000000 --- a/src/cli/operations/deploy/post-deploy-ab-tests.ts +++ /dev/null @@ -1,721 +0,0 @@ -import type { ABTestDeployedState, AgentCoreProjectSpec, DeployedResourceState } from '../../../schema'; -import { getCredentialProvider } from '../../aws/account'; -import { createABTest, deleteABTest, getABTest, listABTests, updateABTest } from '../../aws/agentcore-ab-tests'; -import type { ABTestEvaluationConfig, ABTestVariant, TrafficAllocationConfig } from '../../aws/agentcore-ab-tests'; -import { arnPrefix } from '../../aws/partition'; -import { - CreateRoleCommand, - DeleteRoleCommand, - DeleteRolePolicyCommand, - GetRoleCommand, - IAMClient, - PutRolePolicyCommand, -} from '@aws-sdk/client-iam'; -import { createHash } from 'node:crypto'; - -// ============================================================================ -// Types -// ============================================================================ - -export interface SetupABTestsOptions { - region: string; - projectSpec: AgentCoreProjectSpec; - existingABTests?: Record; - /** Full deployed resource state for resolving ARN references. */ - deployedResources?: DeployedResourceState; -} - -export interface ABTestSetupResult { - testName: string; - status: 'created' | 'updated' | 'deleted' | 'skipped' | 'error'; - abTestId?: string; - abTestArn?: string; - error?: string; - warning?: string; -} - -export interface SetupABTestsResult { - results: ABTestSetupResult[]; - abTests: Record; - hasErrors: boolean; -} - -// ============================================================================ -// Constants -// ============================================================================ - -const AB_TEST_ROLE_POLICY_NAME = 'ABTestExecutionPolicy'; - -// ============================================================================ -// Config Hash -// ============================================================================ - -/** - * Compute a deterministic SHA-256 hash of the key AB test configuration fields. - * Used to detect whether a redeployment actually changed the test config. - */ -function computeConfigHash(testSpec: { - variants: unknown; - evaluationConfig: unknown; - gatewayRef: string; - gatewayFilter?: unknown; - trafficAllocationConfig?: unknown; -}): string { - const payload = JSON.stringify({ - variants: testSpec.variants, - evaluationConfig: testSpec.evaluationConfig, - gatewayRef: testSpec.gatewayRef, - gatewayFilter: testSpec.gatewayFilter, - trafficAllocationConfig: testSpec.trafficAllocationConfig, - }); - return createHash('sha256').update(payload).digest('hex'); -} - -// ============================================================================ -// Shared Update Helper -// ============================================================================ - -interface ApplyABTestUpdateOptions { - region: string; - abTestId: string; - resolvedVariants: ABTestVariant[]; - resolvedEvalConfig: ABTestEvaluationConfig; - trafficAllocationConfig?: TrafficAllocationConfig; - resolvedRoleArn?: string; - testName: string; - roleCreatedByCli: boolean; - currentHash: string; -} - -async function applyABTestUpdate( - options: ApplyABTestUpdateOptions -): Promise<{ state: ABTestDeployedState; result: ABTestSetupResult }> { - const updateResult = await updateABTest({ - region: options.region, - abTestId: options.abTestId, - variants: options.resolvedVariants, - evaluationConfig: options.resolvedEvalConfig, - trafficAllocationConfig: options.trafficAllocationConfig, - roleArn: options.resolvedRoleArn, - }); - - return { - state: { - abTestId: updateResult.abTestId, - abTestArn: updateResult.abTestArn, - roleArn: options.resolvedRoleArn, - roleCreatedByCli: options.roleCreatedByCli, - configHash: options.currentHash, - }, - result: { - testName: options.testName, - status: 'updated', - abTestId: updateResult.abTestId, - abTestArn: updateResult.abTestArn, - }, - }; -} - -// ============================================================================ -// Implementation -// ============================================================================ - -/** - * Create, update, or delete AB tests post-deploy. - * - * Pattern: - * 1. For each AB test in project spec → resolve ARN references, create or skip - * 2. For each AB test in deployed-state but NOT in project spec → delete (reconciliation) - * 3. Return updated deployed state entries - */ -export async function setupABTests(options: SetupABTestsOptions): Promise { - const { region, projectSpec, existingABTests, deployedResources } = options; - const results: ABTestSetupResult[] = []; - const abTests: Record = {}; - - // Create or skip tests from the spec - for (const testSpec of projectSpec.abTests ?? []) { - let resolvedRoleArn: string | undefined; - let roleCreatedByCli = false; - try { - const currentHash = computeConfigHash(testSpec); - const existingTest = existingABTests?.[testSpec.name]; - - // Resolve ARN references from deployed state - const resolvedVariants = resolveVariants(testSpec.variants, projectSpec.name, deployedResources); - const resolvedGatewayArn = resolveGatewayArn(testSpec.gatewayRef, deployedResources); - if (!resolvedGatewayArn.startsWith('arn:') || resolvedGatewayArn.split(':').length < 6) { - results.push({ - testName: testSpec.name, - status: 'error', - error: `Gateway ARN could not be resolved for AB test "${testSpec.name}". Reference "${testSpec.gatewayRef}" did not match any deployed gateway. Ensure the HTTP gateway was deployed successfully.`, - }); - continue; - } - const resolvedEvalConfig = resolveEvalConfig(testSpec.evaluationConfig, deployedResources); - if (testSpec.roleArn) { - resolvedRoleArn = testSpec.roleArn; - } else { - resolvedRoleArn = await getOrCreateABTestRole({ - region, - projectName: projectSpec.name, - testName: testSpec.name, - gatewayArn: resolvedGatewayArn, - }); - roleCreatedByCli = true; - } - - if (existingTest) { - // Config unchanged — skip to preserve running state - if (existingTest.configHash === currentHash) { - abTests[testSpec.name] = existingTest; - results.push({ - testName: testSpec.name, - status: 'skipped', - abTestId: existingTest.abTestId, - abTestArn: existingTest.abTestArn, - }); - continue; - } - - // Config changed — update in-place instead of delete+recreate - const applied = await applyABTestUpdate({ - region, - abTestId: existingTest.abTestId, - resolvedVariants, - resolvedEvalConfig, - trafficAllocationConfig: testSpec.trafficAllocationConfig as TrafficAllocationConfig | undefined, - resolvedRoleArn, - testName: testSpec.name, - roleCreatedByCli: existingTest.roleCreatedByCli ?? roleCreatedByCli, - currentHash, - }); - abTests[testSpec.name] = applied.state; - results.push(applied.result); - continue; - } - - // Try to find by name via list (handles re-creation after state loss) - const existingByName = await findABTestByName(region, projectSpec.name, testSpec.name); - if (existingByName) { - // Found by name — update in-place with fresh config - const applied = await applyABTestUpdate({ - region, - abTestId: existingByName.abTestId, - resolvedVariants, - resolvedEvalConfig, - trafficAllocationConfig: testSpec.trafficAllocationConfig as TrafficAllocationConfig | undefined, - resolvedRoleArn, - testName: testSpec.name, - roleCreatedByCli, - currentHash, - }); - abTests[testSpec.name] = applied.state; - results.push(applied.result); - continue; - } - - const createOptions = { - region, - name: `${projectSpec.name}_${testSpec.name}`, - description: testSpec.description, - gatewayArn: resolvedGatewayArn, - roleArn: resolvedRoleArn, - variants: resolvedVariants, - evaluationConfig: resolvedEvalConfig, - gatewayFilter: testSpec.gatewayFilter, - trafficAllocationConfig: testSpec.trafficAllocationConfig as TrafficAllocationConfig | undefined, - maxDurationDays: testSpec.maxDurationDays, - enableOnCreate: testSpec.enableOnCreate, - }; - - // Retry on gateway/eval access denied — IAM policy propagation can take time - let result; - const MAX_RETRIES = 5; - const BASE_DELAY_MS = 5_000; - for (let attempt = 0; attempt < MAX_RETRIES; attempt++) { - try { - result = await createABTest(createOptions); - break; - } catch (err: unknown) { - const errCode = (err as { name?: string }).name; - const errStatus = (err as { $metadata?: { httpStatusCode?: number } }).$metadata?.httpStatusCode; - const msg = err instanceof Error ? err.message : String(err); - - const isRetryable = - errCode === 'AccessDeniedException' || - errStatus === 403 || - msg.includes('Access denied') || - msg.includes('Gateway validation error'); - - if (isRetryable && attempt < MAX_RETRIES - 1) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 1000; - await new Promise(resolve => setTimeout(resolve, delay)); - continue; - } - throw err; - } - } - if (!result) throw new Error('AB test creation failed after retries'); - - abTests[testSpec.name] = { - abTestId: result.abTestId, - abTestArn: result.abTestArn, - roleArn: resolvedRoleArn, - roleCreatedByCli, - configHash: currentHash, - }; - - results.push({ - testName: testSpec.name, - status: 'created', - abTestId: result.abTestId, - abTestArn: result.abTestArn, - }); - } catch (err) { - // Clean up auto-created role on AB test creation failure to avoid orphaned roles - if (roleCreatedByCli && resolvedRoleArn) { - try { - await deleteABTestRole(region, resolvedRoleArn); - } catch { - // Best-effort role cleanup - } - } - results.push({ - testName: testSpec.name, - status: 'error', - error: err instanceof Error ? err.message : String(err), - }); - } - } - - // Orphaned AB tests are deleted by deleteOrphanedABTests() which runs - // as a separate pre-pass before HTTP gateway setup. No deletion loop here. - - return { - results, - abTests, - hasErrors: results.some(r => r.status === 'error'), - }; -} - -/** - * Delete orphaned AB tests (in deployed-state but removed from spec). - * - * AB tests create rules on HTTP gateways, so they must be deleted before - * the gateway can be deleted. Call this before setupHttpGateways. - * - * The main setupABTests deletion loop becomes a no-op for any tests - * already cleaned up here. - */ -export async function deleteOrphanedABTests(options: { - region: string; - projectSpec: AgentCoreProjectSpec; - existingABTests?: Record; -}): Promise<{ results: ABTestSetupResult[]; hasErrors: boolean }> { - const { region, projectSpec, existingABTests } = options; - if (!existingABTests) return { results: [], hasErrors: false }; - - const specTestNames = new Set((projectSpec.abTests ?? []).map(t => t.name)); - const results: ABTestSetupResult[] = []; - - for (const [testName, testState] of Object.entries(existingABTests)) { - if (!specTestNames.has(testName)) { - try { - // Stop the AB test first — running tests cannot be deleted - let wasStopped = false; - let stopTimedOut = false; - try { - await updateABTest({ region, abTestId: testState.abTestId, executionStatus: 'STOPPED' }); - wasStopped = true; - - // Poll until executionStatus is STOPPED (stop is async) - let stopped = false; - for (let i = 0; i < 20; i++) { - const test = await getABTest({ region, abTestId: testState.abTestId }); - if (test.executionStatus === 'STOPPED') { - stopped = true; - break; - } - await new Promise(resolve => setTimeout(resolve, 3_000)); - } - if (!stopped) { - stopTimedOut = true; - } - } catch { - // May already be stopped or in a state that doesn't need stopping — proceed with delete - } - - const deleteResult = await deleteABTest({ - region, - abTestId: testState.abTestId, - }); - - if (deleteResult.success && testState.roleCreatedByCli && testState.roleArn) { - await deleteABTestRole(region, testState.roleArn); - } - - results.push({ - testName, - status: deleteResult.success ? 'deleted' : 'error', - error: deleteResult.error, - warning: stopTimedOut - ? `AB test "${testName}" did not reach STOPPED status within the polling window — proceeding with delete` - : wasStopped - ? `AB test "${testName}" was stopped before deletion` - : undefined, - }); - } catch (err) { - results.push({ - testName, - status: 'error', - error: err instanceof Error ? err.message : String(err), - }); - } - } - } - - return { - results, - hasErrors: results.some(r => r.status === 'error'), - }; -} - -// ============================================================================ -// ARN Resolution Helpers -// ============================================================================ - -async function findABTestByName( - region: string, - projectName: string, - testName: string -): Promise<{ abTestId: string; abTestArn: string } | undefined> { - try { - const prefixedName = `${projectName}_${testName}`; - const result = await listABTests({ region, maxResults: 100 }); - return result.abTests.find( - t => t.name.toLowerCase() === prefixedName.toLowerCase() || t.name.toLowerCase() === testName.toLowerCase() - ); - } catch { - return undefined; - } -} - -/** - * Resolve variant config bundle references. - * If bundleArn is a name (not an ARN), look it up in deployed config bundles. - * Target-based variants have their target name prefixed with projectName to match - * what post-deploy-http-gateways.ts creates on AWS (e.g. `${projectName}-${tgt.name}`). - */ -function resolveVariants( - variants: { - name: 'C' | 'T1'; - weight: number; - variantConfiguration: { - configurationBundle?: { bundleArn: string; bundleVersion: string }; - target?: { targetName: string }; - }; - }[], - projectName: string, - deployedResources?: DeployedResourceState -): ABTestVariant[] { - return variants.map(v => { - const bundle = v.variantConfiguration.configurationBundle; - if (bundle) { - return { - name: v.name, - weight: v.weight, - variantConfiguration: { - configurationBundle: { - bundleArn: resolveConfigBundleArn(bundle.bundleArn, deployedResources), - bundleVersion: resolveConfigBundleVersion(bundle.bundleArn, bundle.bundleVersion, deployedResources), - }, - }, - }; - } - // Target-based variant — prepend projectName to match the AWS-side name created by - // post-deploy-http-gateways.ts: `${projectName}-${tgt.name}` - return { - name: v.name, - weight: v.weight, - variantConfiguration: { - ...(v.variantConfiguration.target && { - target: { name: resolveTargetName(v.variantConfiguration.target.targetName, projectName) }, - }), - }, - }; - }); -} - -function resolveConfigBundleArn(ref: string, deployedResources?: DeployedResourceState): string { - if (ref.startsWith('arn:')) return ref; - - const bundles = deployedResources?.configBundles; - if (bundles?.[ref]) { - return bundles[ref].bundleArn; - } - - return ref; -} - -function resolveConfigBundleVersion( - bundleRef: string, - versionRef: string, - deployedResources?: DeployedResourceState -): string { - if (versionRef !== 'LATEST') return versionRef; - - // Resolve LATEST to the deployed versionId - const bundles = deployedResources?.configBundles; - const name = bundleRef.startsWith('arn:') ? undefined : bundleRef; - if (name && bundles?.[name]) { - return bundles[name].versionId; - } - - return versionRef; -} - -/** - * Resolve a variant target name by applying the project prefix unconditionally. - * post-deploy-http-gateways.ts always creates targets as `${projectName}-${tgt.name}`, - * so the AB test must reference the same prefixed name. - */ -function resolveTargetName(targetName: string, projectName: string): string { - return `${projectName}-${targetName}`; -} - -function resolveGatewayArn(ref: string, deployedResources?: DeployedResourceState): string { - if (ref.startsWith('arn:')) return ref; - - // Check for placeholder pattern {{gateway:}} - const placeholderMatch = /^\{\{gateway:(.+)\}\}$/.exec(ref); - const gwName = placeholderMatch ? placeholderMatch[1] : ref; - - const gateways = deployedResources?.mcp?.gateways; - if (gateways && gwName && gateways[gwName]) { - return gateways[gwName].gatewayArn; - } - - // Check HTTP gateways (imperatively created for A/B testing) - const httpGateways = deployedResources?.httpGateways; - if (httpGateways && gwName && httpGateways[gwName]) { - return httpGateways[gwName].gatewayArn; - } - - return ref; -} - -function resolveEvalConfig( - config: - | { onlineEvaluationConfigArn: string } - | { perVariantOnlineEvaluationConfig: { treatmentName: 'C' | 'T1'; onlineEvaluationConfigArn: string }[] }, - deployedResources?: DeployedResourceState -): ABTestEvaluationConfig { - if ('perVariantOnlineEvaluationConfig' in config) { - // Per-variant eval config — resolve each ARN - return { - perVariantOnlineEvaluationConfig: config.perVariantOnlineEvaluationConfig.map(pv => ({ - name: pv.treatmentName, - onlineEvaluationConfigArn: resolveOnlineEvalArn(pv.onlineEvaluationConfigArn, deployedResources), - })), - }; - } - - const ref = config.onlineEvaluationConfigArn; - return { onlineEvaluationConfigArn: resolveOnlineEvalArn(ref, deployedResources) }; -} - -function resolveOnlineEvalArn(ref: string, deployedResources?: DeployedResourceState): string { - if (ref.startsWith('arn:')) return ref; - - const configs = deployedResources?.onlineEvalConfigs; - if (configs?.[ref]) { - return configs[ref].onlineEvaluationConfigArn; - } - - return ref; -} - -// ============================================================================ -// IAM Role Management -// ============================================================================ - -/** - * Generate a project-scoped role name following the CDK pattern: - * AgentCore-{ProjectName}-ABTest{TestName}-{Hash} - */ -function generateRoleName(projectName: string, testName: string): string { - // Deterministic hash so retries produce the same role name (avoids orphaned roles) - const hash = createHash('sha256').update(`${projectName}:${testName}`).digest('hex').slice(0, 8); - const base = `AgentCore-${projectName}-ABTest${testName}`; - // IAM role names max 64 chars - return `${base.slice(0, 55)}-${hash}`; -} - -/** - * Extract role name from ARN: arn:aws:iam::123456789012:role/RoleName → RoleName - */ -function roleNameFromArn(roleArn: string): string { - const parts = roleArn.split('/'); - return parts[parts.length - 1] ?? roleArn; -} - -interface CreateABTestRoleOptions { - region: string; - projectName: string; - testName: string; - gatewayArn: string; -} - -async function getOrCreateABTestRole(options: CreateABTestRoleOptions): Promise { - const { region, projectName, testName, gatewayArn } = options; - const credentials = getCredentialProvider(); - const iamClient = new IAMClient({ region, credentials }); - - // Extract account ID from gateway ARN (arn:aws:bedrock-agentcore:REGION:ACCOUNT:gateway/ID) - const accountId = gatewayArn.split(':')[4] ?? '*'; - - const roleName = generateRoleName(projectName, testName); - - const trustPolicy = JSON.stringify({ - Version: '2012-10-17', - Statement: [ - { - Effect: 'Allow', - Principal: { Service: 'bedrock-agentcore.amazonaws.com' }, - Action: 'sts:AssumeRole', - Condition: { - StringEquals: { 'aws:SourceAccount': accountId }, - ArnLike: { 'aws:SourceArn': `${arnPrefix(region)}:bedrock-agentcore:*:${accountId}:ab-test/*` }, - }, - }, - ], - }); - - let roleArn: string; - let _needsPropagationWait = false; - - try { - const createResult = await iamClient.send( - new CreateRoleCommand({ - RoleName: roleName, - AssumeRolePolicyDocument: trustPolicy, - Description: `Auto-created execution role for AgentCore AB test: ${testName}`, - Tags: [ - { Key: 'agentcore:created-by', Value: 'agentcore-cli' }, - { Key: 'agentcore:project-name', Value: projectName }, - { Key: 'agentcore:ab-test-name', Value: testName }, - ], - }) - ); - - roleArn = createResult.Role?.Arn ?? ''; - if (!roleArn) { - throw new Error(`IAM CreateRole succeeded but returned no role ARN for "${roleName}"`); - } - _needsPropagationWait = true; - } catch (err: unknown) { - // Handle retry after a previous failed deploy left the role behind - const errName = (err as { name?: string }).name; - if (errName === 'EntityAlreadyExistsException') { - // IAM role already exists — reuse it - const existing = await iamClient.send(new GetRoleCommand({ RoleName: roleName })); - roleArn = existing.Role?.Arn ?? ''; - if (!roleArn) { - throw new Error(`Role "${roleName}" already exists but ARN could not be retrieved`); - } - } else { - throw err; - } - } - - const policy = JSON.stringify({ - Version: '2012-10-17', - Statement: [ - { - Sid: 'AgentCoreResources', - Effect: 'Allow', - Action: [ - 'bedrock-agentcore:GetGateway', - 'bedrock-agentcore:GetGatewayTarget', - 'bedrock-agentcore:ListGatewayTargets', - 'bedrock-agentcore:CreateGatewayRule', - 'bedrock-agentcore:UpdateGatewayRule', - 'bedrock-agentcore:GetGatewayRule', - 'bedrock-agentcore:DeleteGatewayRule', - 'bedrock-agentcore:ListGatewayRules', - 'bedrock-agentcore:GetOnlineEvaluationConfig', - 'bedrock-agentcore:GetEvaluator', - 'bedrock-agentcore:GetConfigurationBundle', - 'bedrock-agentcore:GetConfigurationBundleVersion', - 'bedrock-agentcore:ListConfigurationBundleVersions', - ], - Resource: `${arnPrefix(region)}:bedrock-agentcore:*:${accountId}:*`, - Condition: { StringEquals: { 'aws:ResourceAccount': accountId } }, - }, - { - Sid: 'CloudWatchLogsDescribe', - Effect: 'Allow', - Action: ['logs:DescribeLogGroups'], - Resource: '*', - }, - { - Sid: 'CloudWatchLogs', - Effect: 'Allow', - Action: [ - 'logs:DescribeIndexPolicies', - 'logs:PutIndexPolicy', - 'logs:StartQuery', - 'logs:GetQueryResults', - 'logs:StopQuery', - 'logs:FilterLogEvents', - 'logs:GetLogEvents', - ], - Resource: [ - `${arnPrefix(region)}:logs:*:${accountId}:log-group:/aws/bedrock-agentcore/evaluations/*`, - `${arnPrefix(region)}:logs:*:${accountId}:log-group:/aws/bedrock-agentcore/runtimes/*`, - `${arnPrefix(region)}:logs:*:${accountId}:log-group:aws/spans`, - `${arnPrefix(region)}:logs:*:${accountId}:log-group:aws/spans:*`, - ], - }, - ], - }); - - // Re-apply the inline policy (idempotent — covers both new and recovered roles) - await iamClient.send( - new PutRolePolicyCommand({ - RoleName: roleName, - PolicyName: AB_TEST_ROLE_POLICY_NAME, - PolicyDocument: policy, - }) - ); - - // Always wait for IAM policy propagation — both new roles and policy updates on existing roles - await new Promise(resolve => setTimeout(resolve, 15_000)); - - return roleArn; -} - -async function deleteABTestRole(region: string, roleArn: string): Promise { - const credentials = getCredentialProvider(); - const iamClient = new IAMClient({ region, credentials }); - const roleName = roleNameFromArn(roleArn); - - try { - // Must delete inline policies before deleting the role - await iamClient.send( - new DeleteRolePolicyCommand({ - RoleName: roleName, - PolicyName: AB_TEST_ROLE_POLICY_NAME, - }) - ); - } catch { - // Policy may not exist - } - - try { - await iamClient.send(new DeleteRoleCommand({ RoleName: roleName })); - } catch { - // Role may already be deleted or in use — best effort - } -} diff --git a/src/cli/operations/deploy/post-deploy-config-bundles.ts b/src/cli/operations/deploy/post-deploy-config-bundles.ts deleted file mode 100644 index 5318c54b1..000000000 --- a/src/cli/operations/deploy/post-deploy-config-bundles.ts +++ /dev/null @@ -1,348 +0,0 @@ -import type { AgentCoreProjectSpec, ConfigBundleDeployedState, DeployedState } from '../../../schema'; -import { - createConfigurationBundle, - deleteConfigurationBundle, - getConfigurationBundleVersion, - listConfigurationBundleVersions, - listConfigurationBundles, - updateConfigurationBundle, -} from '../../aws/agentcore-config-bundles'; -import type { ComponentConfigurationMap } from '../../aws/agentcore-config-bundles'; - -// ============================================================================ -// Types -// ============================================================================ - -export interface SetupConfigBundlesOptions { - region: string; - projectSpec: AgentCoreProjectSpec; - /** Existing config bundle deployed state (from deployed-state.json) */ - existingBundles?: Record; -} - -export interface ConfigBundleSetupResult { - bundleName: string; - status: 'created' | 'updated' | 'deleted' | 'skipped' | 'error'; - bundleId?: string; - bundleArn?: string; - versionId?: string; - error?: string; -} - -export interface SetupConfigBundlesResult { - results: ConfigBundleSetupResult[]; - /** Deployed state entries for config bundles (to merge into deployed-state.json) */ - configBundles: Record; - hasErrors: boolean; -} - -// ============================================================================ -// Implementation -// ============================================================================ - -/** - * Create, update, or delete configuration bundles post-deploy. - * - * Pattern: - * 1. For each configBundle in project spec → create or update - * 2. For each bundle in deployed-state but NOT in project spec → delete (reconciliation) - * 3. Return updated deployed state entries - */ -export async function setupConfigBundles(options: SetupConfigBundlesOptions): Promise { - const { region, projectSpec, existingBundles } = options; - const results: ConfigBundleSetupResult[] = []; - const configBundles: Record = {}; - - const specBundleNames = new Set((projectSpec.configBundles ?? []).map(b => b.name)); - const projectName = projectSpec.name; - - // Create or update bundles from the spec - for (const bundleSpec of projectSpec.configBundles ?? []) { - // Prepend project name to the API-side bundle name (no separator for config bundles) - const apiBundleName = `${projectName}${bundleSpec.name}`; - - try { - // Try to update if we have an existing bundle ID - const existingBundle = existingBundles?.[bundleSpec.name]; - let updated = false; - - if (existingBundle) { - try { - // Fetch the exact version we know about — avoids branch-not-found errors - const current = await getConfigurationBundleVersion({ - region, - bundleId: existingBundle.bundleId, - versionId: existingBundle.versionId, - }); - const componentsChanged = !deepEqual(current.components, bundleSpec.components); - const descriptionChanged = (bundleSpec.description ?? undefined) !== (current.description ?? undefined); - - if (!componentsChanged && !descriptionChanged) { - // Nothing changed — skip the update, preserve existing state - configBundles[bundleSpec.name] = { - bundleId: existingBundle.bundleId, - bundleArn: existingBundle.bundleArn, - versionId: existingBundle.versionId, - }; - results.push({ - bundleName: bundleSpec.name, - status: 'skipped', - bundleId: existingBundle.bundleId, - bundleArn: existingBundle.bundleArn, - versionId: existingBundle.versionId, - }); - updated = true; - } else { - // Use the branch from the spec, or fall back to whatever branch the API has - const effectiveBranch = bundleSpec.branchName ?? current.lineageMetadata?.branchName ?? 'mainline'; - const result = await updateConfigurationBundle({ - region, - bundleId: existingBundle.bundleId, - description: bundleSpec.description, - components: bundleSpec.components as ComponentConfigurationMap, - parentVersionIds: [current.versionId], - branchName: effectiveBranch, - commitMessage: bundleSpec.commitMessage ?? `Update ${bundleSpec.name}`, - }); - - configBundles[bundleSpec.name] = { - bundleId: result.bundleId, - bundleArn: result.bundleArn, - versionId: result.versionId, - }; - - results.push({ - bundleName: bundleSpec.name, - status: 'updated', - bundleId: result.bundleId, - bundleArn: result.bundleArn, - versionId: result.versionId, - }); - updated = true; - } - } catch (updateErr) { - // If bundle or branch not found, fall through to find-by-name or create - const msg = updateErr instanceof Error ? updateErr.message : String(updateErr); - if (!msg.includes('404') && !msg.includes('not found')) throw updateErr; - } - } - - if (!updated) { - // Try to find by name via list (handles re-creation after state loss) - const existingByName = await findBundleByName(region, apiBundleName); - - if (existingByName) { - // Fetch versions and pick the newest — avoids branch-not-found errors from getConfigurationBundle - const versions = await listConfigurationBundleVersions({ - region, - bundleId: existingByName.bundleId, - }); - const sorted = [...versions.versions].sort((a, b) => Number(b.versionCreatedAt) - Number(a.versionCreatedAt)); - const latestVersionId = sorted[0]?.versionId; - if (!latestVersionId) throw new Error(`No versions found for bundle ${bundleSpec.name}`); - const current = await getConfigurationBundleVersion({ - region, - bundleId: existingByName.bundleId, - versionId: latestVersionId, - }); - const componentsChanged = !deepEqual(current.components, bundleSpec.components); - const descriptionChanged = (bundleSpec.description ?? undefined) !== (current.description ?? undefined); - - if (!componentsChanged && !descriptionChanged) { - configBundles[bundleSpec.name] = { - bundleId: existingByName.bundleId, - bundleArn: current.bundleArn, - versionId: current.versionId, - }; - results.push({ - bundleName: bundleSpec.name, - status: 'skipped', - bundleId: existingByName.bundleId, - bundleArn: current.bundleArn, - versionId: current.versionId, - }); - } else { - const effectiveBranch = bundleSpec.branchName ?? current.lineageMetadata?.branchName ?? 'mainline'; - const result = await updateConfigurationBundle({ - region, - bundleId: existingByName.bundleId, - description: bundleSpec.description, - components: bundleSpec.components as ComponentConfigurationMap, - parentVersionIds: [current.versionId], - branchName: effectiveBranch, - commitMessage: bundleSpec.commitMessage ?? `Update ${bundleSpec.name}`, - }); - - configBundles[bundleSpec.name] = { - bundleId: result.bundleId, - bundleArn: result.bundleArn, - versionId: result.versionId, - }; - - results.push({ - bundleName: bundleSpec.name, - status: 'updated', - bundleId: result.bundleId, - bundleArn: result.bundleArn, - versionId: result.versionId, - }); - } - } else { - // Create new — omit branchName if not in spec so the API uses its default - const result = await createConfigurationBundle({ - region, - bundleName: apiBundleName, - description: bundleSpec.description, - components: bundleSpec.components as ComponentConfigurationMap, - branchName: bundleSpec.branchName, - commitMessage: bundleSpec.commitMessage ?? `Create ${bundleSpec.name}`, - }); - - configBundles[bundleSpec.name] = { - bundleId: result.bundleId, - bundleArn: result.bundleArn, - versionId: result.versionId, - }; - - results.push({ - bundleName: bundleSpec.name, - status: 'created', - bundleId: result.bundleId, - bundleArn: result.bundleArn, - versionId: result.versionId, - }); - } - } - } catch (err) { - results.push({ - bundleName: bundleSpec.name, - status: 'error', - error: err instanceof Error ? err.message : String(err), - }); - } - } - - // Delete orphaned bundles (in deployed-state but removed from spec) - if (existingBundles) { - for (const [bundleName, bundleState] of Object.entries(existingBundles)) { - if (!specBundleNames.has(bundleName)) { - try { - await deleteConfigurationBundle({ - region, - bundleId: bundleState.bundleId, - }); - - results.push({ - bundleName, - status: 'deleted', - }); - } catch (err) { - results.push({ - bundleName, - status: 'error', - error: err instanceof Error ? err.message : String(err), - }); - } - } - } - } - - return { - results, - configBundles, - hasErrors: results.some(r => r.status === 'error'), - }; -} - -// ============================================================================ -// Helpers -// ============================================================================ - -async function findBundleByName(region: string, bundleName: string): Promise<{ bundleId: string } | undefined> { - try { - const result = await listConfigurationBundles({ region, maxResults: 100 }); - return result.bundles.find(b => b.bundleName === bundleName); - } catch { - return undefined; - } -} - -/** Key-order-independent deep-equal for JSON-serializable objects. */ -function deepEqual(a: unknown, b: unknown): boolean { - if (a === b) return true; - if (a === null || b === null || typeof a !== typeof b) return false; - if (typeof a !== 'object') return false; - - if (Array.isArray(a)) { - if (!Array.isArray(b) || a.length !== b.length) return false; - return a.every((item, i) => deepEqual(item, b[i])); - } - - const aObj = a as Record; - const bObj = b as Record; - const aKeys = Object.keys(aObj); - const bKeys = Object.keys(bObj); - if (aKeys.length !== bKeys.length) return false; - return aKeys.every(key => key in bObj && deepEqual(aObj[key], bObj[key])); -} - -// ============================================================================ -// Component Key Resolution -// ============================================================================ - -/** - * Resolve placeholder component keys (e.g., {{runtime:name}}, {{gateway:name}}) - * to actual ARNs from deployed state. - */ -export function resolveConfigBundleComponentKeys( - projectSpec: AgentCoreProjectSpec, - deployedState: DeployedState, - targetName: string -): AgentCoreProjectSpec { - const resources = deployedState.targets?.[targetName]?.resources; - if (!resources) return projectSpec; - - const resolvedBundles = (projectSpec.configBundles ?? []).map(bundle => { - const resolvedComponents: Record }> = {}; - - for (const [key, value] of Object.entries(bundle.components ?? {})) { - const resolvedKey = resolveComponentKey(key, resources); - resolvedComponents[resolvedKey] = value; - } - - return { ...bundle, components: resolvedComponents }; - }); - - return { ...projectSpec, configBundles: resolvedBundles }; -} - -function resolveComponentKey( - key: string, - resources: NonNullable -): string { - if (key.startsWith('arn:')) return key; - - const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(key); - if (gwMatch) { - const gwName = gwMatch[1]!; - const httpGw = resources.httpGateways?.[gwName]; - if (httpGw) return httpGw.gatewayArn; - const mcpGw = resources.mcp?.gateways?.[gwName]; - if (mcpGw) return mcpGw.gatewayArn; - throw new Error( - `Config bundle references gateway "${gwName}" but it was not found in deployed resources. Ensure the gateway is defined in agentcore.json and deploys successfully.` - ); - } - - const rtMatch = /^\{\{runtime:(.+)\}\}$/.exec(key); - if (rtMatch) { - const rtName = rtMatch[1]!; - const rt = resources.runtimes?.[rtName]; - if (rt) return rt.runtimeArn; - throw new Error( - `Config bundle references runtime "${rtName}" but it was not found in deployed resources. Ensure the runtime is defined in agentcore.json and deploys successfully.` - ); - } - - return key; -} diff --git a/src/cli/operations/deploy/post-deploy-http-gateways.ts b/src/cli/operations/deploy/post-deploy-http-gateways.ts deleted file mode 100644 index d59a62bdf..000000000 --- a/src/cli/operations/deploy/post-deploy-http-gateways.ts +++ /dev/null @@ -1,652 +0,0 @@ -import type { AgentCoreProjectSpec, DeployedResourceState, HttpGatewayDeployedState } from '../../../schema'; -import { getCredentialProvider } from '../../aws/account'; -import { - createHttpGateway, - createHttpGatewayTarget, - deleteHttpGateway, - deleteHttpGatewayTarget, - getHttpGatewayTarget, - listAllHttpGateways, - listHttpGatewayTargets, - waitForGatewayReady, - waitForTargetReady, -} from '../../aws/agentcore-http-gateways'; -import { - CreateRoleCommand, - DeleteRoleCommand, - DeleteRolePolicyCommand, - GetRoleCommand, - IAMClient, - PutRolePolicyCommand, -} from '@aws-sdk/client-iam'; -import { createHash } from 'node:crypto'; - -// ============================================================================ -// Types -// ============================================================================ - -export interface SetupHttpGatewaysOptions { - region: string; - projectName: string; - projectSpec: AgentCoreProjectSpec; - existingHttpGateways?: Record; - deployedResources?: DeployedResourceState; -} - -export interface HttpGatewaySetupResult { - gatewayName: string; - status: 'created' | 'skipped' | 'deleted' | 'error'; - gatewayId?: string; - gatewayArn?: string; - error?: string; -} - -export interface SetupHttpGatewaysResult { - results: HttpGatewaySetupResult[]; - httpGateways: Record; - hasErrors: boolean; -} - -// ============================================================================ -// Constants -// ============================================================================ - -const HTTP_GATEWAY_ROLE_POLICY_NAME = 'HttpGatewayExecutionPolicy'; - -// ============================================================================ -// Implementation -// ============================================================================ - -/** - * Create or delete HTTP gateways post-deploy. - * - * Pattern: - * 1. For each httpGateway in project spec -> resolve runtime ARN, create or skip - * 2. For each httpGateway in deployed-state but NOT in project spec -> delete (reconciliation) - * 3. Return updated deployed state entries - */ -export async function setupHttpGateways(options: SetupHttpGatewaysOptions): Promise { - const { region, projectName, projectSpec, existingHttpGateways, deployedResources } = options; - const results: HttpGatewaySetupResult[] = []; - const httpGateways: Record = {}; - - // Defensive: Zod .default([]) only fires on undefined, not null. - // If someone has "httpGateways": null in their JSON, it passes through as null. - const httpGatewaySpecs = projectSpec.httpGateways ?? []; - - // Create or skip gateways from the spec - for (const gwSpec of httpGatewaySpecs) { - let resolvedRoleArn: string | undefined; - let roleCreatedByCli = false; - try { - const existingGateway = existingHttpGateways?.[gwSpec.name]; - - if (existingGateway) { - // Already deployed - - // Create or update targets from httpGateways[].targets (for target-based AB testing) - if (gwSpec.targets && gwSpec.targets.length > 0) { - // List existing targets to avoid unnecessary create calls - const existingTargetsByName = new Map(); - try { - const existingTargets = await listHttpGatewayTargets({ - region, - gatewayId: existingGateway.gatewayId, - }); - for (const t of existingTargets.targets) { - existingTargetsByName.set(t.name, { targetId: t.targetId }); - } - } catch { - // If list fails, fall through and let create handle 409s - } - - for (const tgt of gwSpec.targets) { - const existingTarget = existingTargetsByName.get(`${projectName}-${tgt.name}`); - if (existingTarget) { - // Target exists by name — check if qualifier matches - try { - const targetDetails = await getHttpGatewayTarget({ - region, - gatewayId: existingGateway.gatewayId, - targetId: existingTarget.targetId, - }); - const httpConfig = ( - targetDetails.targetConfiguration as - | { - http?: { - agentcoreRuntime?: { qualifier?: string }; - runtimeTargetConfiguration?: { qualifier?: string }; - }; - } - | undefined - )?.http; - const existingQualifier = - httpConfig?.agentcoreRuntime?.qualifier ?? httpConfig?.runtimeTargetConfiguration?.qualifier; - const specQualifier = tgt.qualifier ?? 'DEFAULT'; - if (existingQualifier === specQualifier) { - // Qualifier matches — skip - continue; - } - // Qualifier differs — delete old target and recreate - await deleteHttpGatewayTarget({ - region, - gatewayId: existingGateway.gatewayId, - targetId: existingTarget.targetId, - }); - } catch { - // If get/delete fails, fall through to create which will handle conflicts - } - } - try { - const tgtRuntime = deployedResources?.runtimes?.[tgt.runtimeRef]; - if (!tgtRuntime) continue; - const tgtResult = await createHttpGatewayTarget({ - region, - gatewayId: existingGateway.gatewayId, - targetName: `${projectName}-${tgt.name}`, - runtimeArn: tgtRuntime.runtimeArn, - qualifier: tgt.qualifier, - }); - await waitForTargetReady({ - region, - gatewayId: existingGateway.gatewayId, - targetId: tgtResult.targetId, - }); - } catch (tgtErr) { - if (tgtErr instanceof Error && tgtErr.message.includes('409')) continue; - // Non-fatal - } - } - } - - httpGateways[gwSpec.name] = existingGateway; - results.push({ - gatewayName: gwSpec.name, - status: 'skipped', - gatewayId: existingGateway.gatewayId, - gatewayArn: existingGateway.gatewayArn, - }); - continue; - } - - // Try to find by name via list (handles re-creation after state loss) - const prefixedGatewayName = `${projectName}-${gwSpec.name}`; - const existingByName = await findHttpGatewayByName(region, prefixedGatewayName); - if (existingByName) { - console.warn( - `Warning: HTTP gateway "${gwSpec.name}" found by name but local state was lost. Target and role state may be incomplete — consider re-deploying.` - ); - httpGateways[gwSpec.name] = { - gatewayId: existingByName.gatewayId, - gatewayArn: existingByName.gatewayArn, - // targetId, roleArn, roleCreatedByCli unknown after state-loss recovery - }; - results.push({ - gatewayName: gwSpec.name, - status: 'skipped', - gatewayId: existingByName.gatewayId, - gatewayArn: existingByName.gatewayArn, - }); - continue; - } - - // Migration fallback: try unprefixed name for pre-PR gateways (Comment 3 fix) - const existingByLegacyName = await findHttpGatewayByName(region, gwSpec.name); - if (existingByLegacyName) { - console.warn( - `Warning: HTTP gateway "${gwSpec.name}" was found using its pre-migration name. ` + - `This CLI version uses the naming convention "${prefixedGatewayName}". ` + - `The gateway has been recovered from state loss. ` + - `You may want to rename "${gwSpec.name}" to "${prefixedGatewayName}" on AWS to match the new convention.` - ); - httpGateways[gwSpec.name] = { - gatewayId: existingByLegacyName.gatewayId, - gatewayArn: existingByLegacyName.gatewayArn, - // targetId, roleArn, roleCreatedByCli unknown after state-loss recovery - }; - results.push({ - gatewayName: gwSpec.name, - status: 'skipped', - gatewayId: existingByLegacyName.gatewayId, - gatewayArn: existingByLegacyName.gatewayArn, - }); - continue; - } - - // Resolve runtime ARN from deployed state - const runtimeState = deployedResources?.runtimes?.[gwSpec.runtimeRef]; - if (!runtimeState) { - results.push({ - gatewayName: gwSpec.name, - status: 'error', - error: `Runtime "${gwSpec.runtimeRef}" not found in deployed resources. Deploy the runtime before creating an HTTP gateway.`, - }); - continue; - } - const runtimeArn = runtimeState.runtimeArn; - if (gwSpec.roleArn) { - resolvedRoleArn = gwSpec.roleArn; - } else { - resolvedRoleArn = await getOrCreateHttpGatewayRole({ - region, - projectName, - gatewayName: gwSpec.name, - runtimeArn, - }); - roleCreatedByCli = true; - } - - // Create gateway and wait for it to become READY before adding targets - // Creating HTTP gateway for runtime - const createResult = await createHttpGateway({ - region, - name: `${projectName}-${gwSpec.name}`, - roleArn: resolvedRoleArn, - }); - - const readyGateway = await waitForGatewayReady({ - region, - gatewayId: createResult.gatewayId, - }); - - // Create target pointing to the runtime - let targetId: string | undefined; - try { - const targetResult = await createHttpGatewayTarget({ - region, - gatewayId: createResult.gatewayId, - targetName: `${projectName}-${gwSpec.runtimeRef}`, - runtimeArn, - }); - - targetId = targetResult.targetId; - - // Wait for target to become ready - // Waiting for gateway target to become ready - await waitForTargetReady({ - region, - gatewayId: createResult.gatewayId, - targetId: targetResult.targetId, - }); - } catch (targetErr) { - // Rollback: delete target (if created), wait for deletion, then delete gateway - try { - if (targetId) { - await deleteHttpGatewayTarget({ region, gatewayId: createResult.gatewayId, targetId }); - } - } catch { - // Best-effort target cleanup - } - try { - await deleteHttpGateway({ region, gatewayId: createResult.gatewayId }); - } catch { - // Best-effort gateway rollback - } - - // Always clean up auto-created role on target failure, regardless of gateway rollback result - if (roleCreatedByCli && resolvedRoleArn) { - try { - await deleteHttpGatewayRole(region, resolvedRoleArn); - } catch { - // Best-effort role cleanup - } - } - - results.push({ - gatewayName: gwSpec.name, - status: 'error', - error: `Target creation failed, gateway rolled back: ${targetErr instanceof Error ? targetErr.message : String(targetErr)}`, - }); - continue; - } - - // Create additional targets from httpGateways[].targets (for target-based AB testing) - if (gwSpec.targets && gwSpec.targets.length > 0) { - for (const tgt of gwSpec.targets) { - try { - const tgtRuntime = deployedResources?.runtimes?.[tgt.runtimeRef]; - if (!tgtRuntime) { - // Runtime not deployed, skip this target - continue; - } - const tgtResult = await createHttpGatewayTarget({ - region, - gatewayId: createResult.gatewayId, - targetName: `${projectName}-${tgt.name}`, - runtimeArn: tgtRuntime.runtimeArn, - qualifier: tgt.qualifier, - }); - await waitForTargetReady({ - region, - gatewayId: createResult.gatewayId, - targetId: tgtResult.targetId, - }); - } catch (tgtErr) { - // 409 = already exists, skip - if (tgtErr instanceof Error && tgtErr.message.includes('409')) continue; - // Non-fatal: log but continue - } - } - } - - httpGateways[gwSpec.name] = { - gatewayId: createResult.gatewayId, - gatewayArn: createResult.gatewayArn, - gatewayUrl: readyGateway.gatewayUrl, - targetId, - roleArn: resolvedRoleArn, - roleCreatedByCli, - }; - - results.push({ - gatewayName: gwSpec.name, - status: 'created', - gatewayId: createResult.gatewayId, - gatewayArn: createResult.gatewayArn, - }); - } catch (err) { - // If we auto-created a role, clean it up on failure - if (roleCreatedByCli && resolvedRoleArn) { - try { - await deleteHttpGatewayRole(region, resolvedRoleArn); - } catch { - // Best-effort role cleanup - } - } - results.push({ - gatewayName: gwSpec.name, - status: 'error', - error: err instanceof Error ? err.message : String(err), - }); - } - } - - // Orphaned gateways are deleted by deleteOrphanedHttpGateways() which runs - // as a separate pre-pass. No deletion loop here. - - return { - results, - httpGateways, - hasErrors: results.some(r => r.status === 'error'), - }; -} - -// ============================================================================ -// Shared Gateway Deletion -// ============================================================================ - -/** - * Delete an HTTP gateway and all its targets. Best-effort — target failures - * are warned but don't prevent gateway deletion attempt. - * - * Order: targets → gateway → role - */ -export async function deleteHttpGatewayWithTargets(options: { - region: string; - gatewayId: string; - gatewayName: string; - knownTargetId?: string; - roleArn?: string; - roleCreatedByCli?: boolean; -}): Promise<{ success: boolean; error?: string }> { - const { region, gatewayId, gatewayName, knownTargetId, roleArn, roleCreatedByCli } = options; - - const targetIds: string[] = []; - if (knownTargetId) { - targetIds.push(knownTargetId); - } - try { - const targets = await listHttpGatewayTargets({ region, gatewayId, maxResults: 100 }); - for (const t of targets.targets) { - if (!targetIds.includes(t.targetId)) { - targetIds.push(t.targetId); - } - } - } catch { - // Best-effort — proceed with whatever IDs we have - } - - for (const targetId of targetIds) { - try { - await deleteHttpGatewayTarget({ region, gatewayId, targetId }); - } catch (err) { - console.warn( - `Warning: Failed to delete target ${targetId} on gateway "${gatewayName}": ${err instanceof Error ? err.message : String(err)}` - ); - } - } - - const deleteResult = await deleteHttpGateway({ region, gatewayId }); - if (!deleteResult.success) { - return { success: false, error: deleteResult.error }; - } - - if (roleCreatedByCli && roleArn) { - try { - await deleteHttpGatewayRole(region, roleArn); - } catch { - // Best-effort role cleanup - } - } - - return { success: true }; -} - -/** - * Delete orphaned HTTP gateways (in deployed-state but removed from spec). - * Call before setupHttpGateways. - */ -export async function deleteOrphanedHttpGateways(options: { - region: string; - projectSpec: AgentCoreProjectSpec; - existingHttpGateways?: Record; -}): Promise<{ results: HttpGatewaySetupResult[]; hasErrors: boolean }> { - const { region, projectSpec, existingHttpGateways } = options; - if (!existingHttpGateways) return { results: [], hasErrors: false }; - - const specGatewayNames = new Set((projectSpec.httpGateways ?? []).map(g => g.name)); - const results: HttpGatewaySetupResult[] = []; - - for (const [gwName, gwState] of Object.entries(existingHttpGateways)) { - if (!specGatewayNames.has(gwName)) { - try { - const result = await deleteHttpGatewayWithTargets({ - region, - gatewayId: gwState.gatewayId, - gatewayName: gwName, - knownTargetId: gwState.targetId, - roleArn: gwState.roleArn, - roleCreatedByCli: gwState.roleCreatedByCli, - }); - - results.push({ - gatewayName: gwName, - status: result.success ? 'deleted' : 'error', - error: result.error, - }); - } catch (err) { - results.push({ - gatewayName: gwName, - status: 'error', - error: err instanceof Error ? err.message : String(err), - }); - } - } - } - - return { - results, - hasErrors: results.some(r => r.status === 'error'), - }; -} - -// ============================================================================ -// Gateway Trace Delivery -// ============================================================================ - -// ============================================================================ -// Helpers -// ============================================================================ - -async function findHttpGatewayByName( - region: string, - name: string -): Promise<{ gatewayId: string; gatewayArn: string } | undefined> { - try { - const gateways = await listAllHttpGateways({ region }); - return gateways.find(gw => gw.name === name); - } catch (err) { - console.warn( - `Warning: Could not list HTTP gateways to check for existing "${name}": ${err instanceof Error ? err.message : String(err)}` - ); - return undefined; - } -} - -// ============================================================================ -// IAM Role Management -// ============================================================================ - -/** - * Generate a project-scoped role name following the CDK pattern: - * AgentCore-{ProjectName}-HttpGw{GatewayName}-{Hash} - */ -function generateRoleName(projectName: string, gatewayName: string): string { - const base = `AgentCore-${projectName}-HttpGw${gatewayName}`; - // Use deterministic hash so retries produce the same role name - const hash = createHash('sha256').update(`${projectName}:${gatewayName}`).digest('hex').slice(0, 8); - // IAM role names max 64 chars - return `${base.slice(0, 55)}-${hash}`; -} - -/** - * Extract role name from ARN: arn:aws:iam::123456789012:role/RoleName -> RoleName - */ -function roleNameFromArn(roleArn: string): string { - const parts = roleArn.split('/'); - return parts[parts.length - 1] ?? roleArn; -} - -interface CreateHttpGatewayRoleOptions { - region: string; - projectName: string; - gatewayName: string; - runtimeArn: string; -} - -async function getOrCreateHttpGatewayRole(options: CreateHttpGatewayRoleOptions): Promise { - const { region, projectName, gatewayName } = options; - const credentials = getCredentialProvider(); - const iamClient = new IAMClient({ region, credentials }); - - const roleName = generateRoleName(projectName, gatewayName); - - const trustPolicy = JSON.stringify({ - Version: '2012-10-17', - Statement: [ - { - Effect: 'Allow', - Principal: { Service: 'bedrock-agentcore.amazonaws.com' }, - Action: 'sts:AssumeRole', - }, - ], - }); - - const policy = JSON.stringify({ - Version: '2012-10-17', - Statement: [ - { - Sid: 'InvokeRuntimeStatement', - Effect: 'Allow', - Action: [ - 'bedrock-agentcore:InvokeRuntime', - 'bedrock-agentcore:InvokeAgent', - 'bedrock-agentcore:InvokeAgentRuntime', - ], - // Resource must be '*' because the gateway service invokes runtimes using - // a resource identifier that doesn't match the deployed runtime ARN format. - // This matches the A/B testing guide's gateway role policy. - Resource: '*', - }, - ], - }); - - let roleArn: string; - let needsPropagationWait = false; - - try { - const createResult = await iamClient.send( - new CreateRoleCommand({ - RoleName: roleName, - AssumeRolePolicyDocument: trustPolicy, - Description: `Auto-created execution role for AgentCore HTTP gateway: ${gatewayName}`, - Tags: [ - { Key: 'agentcore:created-by', Value: 'agentcore-cli' }, - { Key: 'agentcore:project-name', Value: projectName }, - { Key: 'agentcore:http-gateway-name', Value: gatewayName }, - ], - }) - ); - - roleArn = createResult.Role?.Arn ?? ''; - if (!roleArn) { - throw new Error(`IAM CreateRole succeeded but returned no role ARN for "${roleName}"`); - } - needsPropagationWait = true; - } catch (err: unknown) { - // Handle retry after a previous failed deploy left the role behind - const errName = (err as { name?: string }).name; - if (errName === 'EntityAlreadyExistsException') { - // IAM role already exists — reusing - const existing = await iamClient.send(new GetRoleCommand({ RoleName: roleName })); - roleArn = existing.Role?.Arn ?? ''; - if (!roleArn) { - throw new Error(`Role "${roleName}" already exists but ARN could not be retrieved`); - } - } else { - throw new Error( - `Failed to create IAM role "${roleName}" for HTTP gateway "${gatewayName}": ${err instanceof Error ? err.message : String(err)}` - ); - } - } - - // Re-apply the inline policy (idempotent — covers both new and recovered roles) - await iamClient.send( - new PutRolePolicyCommand({ - RoleName: roleName, - PolicyName: HTTP_GATEWAY_ROLE_POLICY_NAME, - PolicyDocument: policy, - }) - ); - - if (needsPropagationWait) { - // Waiting for IAM role propagation (~15s) - await new Promise(resolve => setTimeout(resolve, 15_000)); - } - - return roleArn; -} - -export async function deleteHttpGatewayRole(region: string, roleArn: string): Promise { - const credentials = getCredentialProvider(); - const iamClient = new IAMClient({ region, credentials }); - const roleName = roleNameFromArn(roleArn); - - try { - // Must delete inline policies before deleting the role - await iamClient.send( - new DeleteRolePolicyCommand({ - RoleName: roleName, - PolicyName: HTTP_GATEWAY_ROLE_POLICY_NAME, - }) - ); - } catch { - // Policy may not exist - } - - try { - await iamClient.send(new DeleteRoleCommand({ RoleName: roleName })); - } catch { - // Role may already be deleted or in use -- best effort - } -} diff --git a/src/cli/operations/deploy/post-deploy-knowledge-bases.ts b/src/cli/operations/deploy/post-deploy-knowledge-bases.ts new file mode 100644 index 000000000..1f2cce667 --- /dev/null +++ b/src/cli/operations/deploy/post-deploy-knowledge-bases.ts @@ -0,0 +1,134 @@ +import type { DeployedState, KnowledgeBase, KnowledgeBaseDeployedState } from '../../../schema'; +import { runKbIngestionByName } from '../ingest'; +import { createHash } from 'node:crypto'; + +export interface AutoIngestKnowledgeBasesOptions { + region: string; + knowledgeBases: KnowledgeBase[]; + /** Current deployed-state record (KB id/arn populated, dataSources hydrated, sourcesHash possibly stale). */ + deployedKnowledgeBases: Record; + /** Prior deployed-state record for sourcesHash comparison. */ + previousKnowledgeBases?: Record; + /** Deployment target name (passed through to runKbIngestionByName). */ + targetName: string; + /** Full deployed-state (passed through to runKbIngestionByName). */ + deployedState: DeployedState; + /** + * Optional progress callback. When the retry loop sleeps because Bedrock + * is busy with a sibling job, this is called with a short status line so + * the deploy logger can echo it (otherwise the deploy looks frozen). + */ + onProgress?: (message: string) => void; + /** Optional abort signal forwarded to the retry sleep. */ + signal?: AbortSignal; +} + +export interface AutoIngestEntry { + knowledgeBaseName: string; + status: 'started' | 'skipped' | 'error'; + /** Number of data sources for which an ingestion job was started. */ + startedJobCount?: number; + /** Reason for skipping (e.g. 'no changes to data sources'). */ + reason?: string; + /** Error message when status is 'error'. */ + error?: string; + /** New sourcesHash to persist when status is 'started'. */ + newSourcesHash?: string; +} + +export interface AutoIngestKnowledgeBasesResult { + hasErrors: boolean; + results: AutoIngestEntry[]; +} + +/** + * Compute the SHA-256 over the data-source URIs of a KB spec, joined with + * newlines. The post-deploy hook compares this to the previously-stored + * sourcesHash to decide whether to re-trigger ingestion. + */ +export function computeSourcesHash(kb: KnowledgeBase): string { + return createHash('sha256') + .update(kb.dataSources.map(ds => (ds.type === 'S3' ? ds.uri : ds.connectorConfigFile)).join('\n')) + .digest('hex'); +} + +/** + * For each KB in the project, fire StartIngestionJob if the current + * sourcesHash differs from the one stored in the prior deployed-state. + * + * Skipped KBs (no change) are reported in `results` with status 'skipped'. + * Errors are reported with status 'error' and surfaced in `hasErrors`; they + * do NOT abort the post-deploy flow because ingestion is async and retryable + * via `agentcore run ingest`. + * + * Caller is responsible for persisting `newSourcesHash` onto the deployed + * state record after this returns. + */ +export async function autoIngestKnowledgeBases( + opts: AutoIngestKnowledgeBasesOptions +): Promise { + const results: AutoIngestEntry[] = []; + + for (const kb of opts.knowledgeBases) { + const deployed = opts.deployedKnowledgeBases[kb.name]; + if (!deployed) { + // KB wasn't deployed (CFN output missing) — nothing to ingest into yet. + results.push({ + knowledgeBaseName: kb.name, + status: 'skipped', + reason: 'KB not present in deployed state (CFN outputs missing)', + }); + continue; + } + if (deployed.dataSources.length === 0) { + results.push({ + knowledgeBaseName: kb.name, + status: 'skipped', + reason: 'no data sources recorded', + }); + continue; + } + + const newHash = computeSourcesHash(kb); + const previousHash = opts.previousKnowledgeBases?.[kb.name]?.sourcesHash; + + if (previousHash && previousHash === newHash) { + results.push({ + knowledgeBaseName: kb.name, + status: 'skipped', + reason: 'no changes to data sources', + }); + continue; + } + + const ingestResult = await runKbIngestionByName({ + knowledgeBaseName: kb.name, + deployedState: opts.deployedState, + targetName: opts.targetName, + region: opts.region, + onProgress: opts.onProgress, + signal: opts.signal, + }); + + if (!ingestResult.success) { + results.push({ + knowledgeBaseName: kb.name, + status: 'error', + error: ingestResult.error.message, + }); + continue; + } + + results.push({ + knowledgeBaseName: kb.name, + status: 'started', + startedJobCount: ingestResult.startedJobs.length, + newSourcesHash: newHash, + }); + } + + return { + hasErrors: results.some(r => r.status === 'error'), + results, + }; +} diff --git a/src/cli/operations/deploy/preflight.ts b/src/cli/operations/deploy/preflight.ts index 044fa1e56..407384075 100644 --- a/src/cli/operations/deploy/preflight.ts +++ b/src/cli/operations/deploy/preflight.ts @@ -16,6 +16,8 @@ export interface PreflightContext { cdkProject: LocalCdkProject; /** True when agents array is empty but a deployed stack exists — deploy will tear down resources */ isTeardownDeploy: boolean; + /** True when deployed-state.json has no targets — stack has never been deployed */ + isFirstDeploy: boolean; } export interface SynthResult { @@ -60,7 +62,6 @@ export function formatError(err: unknown): string { * Returns the project context needed for subsequent steps. */ const MAX_RUNTIME_NAME_LENGTH = 48; -const MAX_GATEWAY_COMBINED_NAME_LENGTH = 48; export async function validateProject(): Promise { // Find the agentcore config directory, walking up from cwd if needed @@ -78,13 +79,22 @@ export async function validateProject(): Promise { // Validate that at least one agent or gateway is defined, unless this is a teardown deploy. // - // Teardown detection: when agents is empty but deployed-state.json records existing - // targets, the user has run `remove all` and wants to tear down AWS resources via deploy. // deployed-state.json is written by the CLI after every successful deploy, so it is a // reliable indicator of whether a CloudFormation stack exists for this project. + let hasExistingStack = false; + try { + const deployedState = await configIO.readDeployedState(); + hasExistingStack = Object.keys(deployedState.targets).length > 0; + } catch { + // No deployed state file — no existing stack + } + + // Teardown detection: when agents is empty but deployed-state.json records existing + // targets, the user has run `remove all` and wants to tear down AWS resources via deploy. let isTeardownDeploy = false; const hasAgents = projectSpec.runtimes && projectSpec.runtimes.length > 0; const hasMemories = projectSpec.memories && projectSpec.memories.length > 0; + const hasKnowledgeBases = projectSpec.knowledgeBases && projectSpec.knowledgeBases.length > 0; const hasEvaluators = projectSpec.evaluators && projectSpec.evaluators.length > 0; const hasPolicyEngines = projectSpec.policyEngines && projectSpec.policyEngines.length > 0; const hasHarnesses = projectSpec.harnesses && projectSpec.harnesses.length > 0; @@ -98,22 +108,16 @@ export async function validateProject(): Promise { !hasAgents && !hasGateways && !hasMemories && + !hasKnowledgeBases && !hasEvaluators && !hasPolicyEngines && !hasHarnesses && !hasDatasets && !hasPayments ) { - let hasExistingStack = false; - try { - const deployedState = await configIO.readDeployedState(); - hasExistingStack = Object.keys(deployedState.targets).length > 0; - } catch { - // No deployed state file — no existing stack - } if (!hasExistingStack) { throw new ValidationError( - 'No resources defined in project. Add at least one resource (agent, memory, evaluator, or gateway) before deploying.' + 'No resources defined in project. Add at least one resource (agent, memory, knowledge base, evaluator, or gateway) before deploying.' ); } isTeardownDeploy = true; @@ -122,19 +126,24 @@ export async function validateProject(): Promise { // Validate runtime names don't exceed AWS limits validateRuntimeNames(projectSpec); - // Validate HTTP gateway names don't exceed AWS limits when combined with project name - validateHttpGatewayNames(projectSpec); - // Validate Container agents have Dockerfiles validateContainerAgents(projectSpec, configRoot); + // Validate per-harness harness.json up front so a schema error shows its precise message + // here instead of as an opaque "CDK synth failed" during synth. Skipped on a teardown deploy: + // tearing down a project with a hand-broken harness.json must not be blocked by validating the + // very files the user is discarding (mirrors the credential-skip rationale below). + if (!isTeardownDeploy) { + await validateHarnessSpecs(projectSpec, configRoot); + } + // Validate AWS credentials before proceeding with build/synth. // Skip for teardown deploys — callers validate after teardown confirmation. if (!isTeardownDeploy) { await validateAwsCredentials(); } - return { projectSpec, awsTargets, cdkProject, isTeardownDeploy }; + return { projectSpec, awsTargets, cdkProject, isTeardownDeploy, isFirstDeploy: !hasExistingStack }; } /** @@ -157,36 +166,6 @@ function validateRuntimeNames(projectSpec: AgentCoreProjectSpec): void { } } -/** - * Validates that combined HTTP gateway names (projectName-gatewayName) don't exceed AWS limits. - */ -function validateHttpGatewayNames(projectSpec: AgentCoreProjectSpec): void { - const projectName = projectSpec.name; - for (const gateway of projectSpec.httpGateways ?? []) { - const gwName = gateway.name; - if (gwName) { - const combinedName = `${projectName}-${gwName}`; - if (combinedName.length > MAX_GATEWAY_COMBINED_NAME_LENGTH) { - throw new Error( - `HTTP gateway name too long: "${combinedName}" (${combinedName.length} chars). ` + - `AWS limits gateway names to ${MAX_GATEWAY_COMBINED_NAME_LENGTH} characters. ` + - `Shorten the project name or gateway name in agentcore.json.` - ); - } - } - for (const target of gateway.targets ?? []) { - const combined = `${projectName}-${target.name}`; - if (combined.length > MAX_GATEWAY_COMBINED_NAME_LENGTH) { - const maxTargetLen = MAX_GATEWAY_COMBINED_NAME_LENGTH - projectName.length - 1; - throw new Error( - `HTTP gateway target "${target.name}" in gateway "${gwName}" would exceed the ${MAX_GATEWAY_COMBINED_NAME_LENGTH}-character AWS limit when prefixed with project name "${projectName}-" (total: ${combined.length} chars). ` + - `Shorten the target name to ${maxTargetLen} characters or fewer.` - ); - } - } - } -} - /** * Validates that Container agents have required Dockerfiles. */ @@ -211,6 +190,30 @@ export function validateContainerAgents(projectSpec: AgentCoreProjectSpec, confi } } +/** + * Validate every per-harness `harness.json` against HarnessSpecSchema so a bad harness spec + * fails preflight with the precise Zod message (e.g. "sessionStoragePath ... pattern") instead + * of surfacing only "CDK synth failed: Subprocess exited with error 1" mid-deploy. The vended + * CDK app re-parses these at synth, so this just moves the same error earlier and makes it readable. + */ +export async function validateHarnessSpecs(projectSpec: AgentCoreProjectSpec, configRoot: string): Promise { + const harnesses = projectSpec.harnesses ?? []; + if (harnesses.length === 0) return; + + const configIO = new ConfigIO({ baseDir: configRoot }); + const errors: string[] = []; + for (const harness of harnesses) { + try { + await configIO.readHarnessSpec(harness.name); + } catch (err) { + errors.push(`Harness "${harness.name}": ${formatError(err)}`); + } + } + if (errors.length > 0) { + throw new ValidationError(`Invalid harness configuration:\n${errors.join('\n')}`); + } +} + const DEPRECATED_BASE_IMAGES: Record = { 'slim-bookworm': 'Affected by CVE-2026-42010 (GnuTLS authentication bypass). Update the FROM line to use a Trixie-based variant.', diff --git a/src/cli/operations/deploy/teardown.ts b/src/cli/operations/deploy/teardown.ts index 73b4777a6..d8e5541b7 100644 --- a/src/cli/operations/deploy/teardown.ts +++ b/src/cli/operations/deploy/teardown.ts @@ -3,10 +3,10 @@ import type { Result } from '../../../lib/result'; import type { AwsDeploymentTarget } from '../../../schema'; import { withTargetRegion } from '../../aws'; import { deleteConfigurationBundle } from '../../aws/agentcore-config-bundles'; +import { deleteHarness, isHarnessNotFoundError } from '../../aws/agentcore-harness'; import { CdkToolkitWrapper, silentIoHost } from '../../cdk/toolkit-lib'; import { type DiscoveredStack, findStack } from '../../cloudformation/stack-discovery'; -import { deleteOrphanedABTests } from './post-deploy-ab-tests'; -import { deleteOrphanedHttpGateways } from './post-deploy-http-gateways'; +import { findOrphanHarnesses } from '../harness/orphan'; import { StackSelectionStrategy } from '@aws-cdk/toolkit-lib'; import { existsSync } from 'fs'; import { join } from 'path'; @@ -120,7 +120,29 @@ export async function performStackTeardown(targetName: string): Promise const deployedState = await configIO.readDeployedState(); const resources = deployedState.targets?.[targetName]?.resources; - if (resources?.httpGateways || resources?.configBundles || resources?.abTests) { + // Delete imperative-build orphan harnesses. CloudFormation never created them so the stack + // destroy below won't touch them, and teardown removes the deployed-state they're recorded in — + // so a post-teardown `remove harness --discard` could no longer find them, leaving them running + // and billing forever. Teardown means "remove everything", so delete them here (using the + // recorded id+region, never re-resolving by name). 404 = already gone (success); other errors + // warn but don't abort the teardown. + for (const orphan of findOrphanHarnesses(deployedState, undefined).filter(o => o.targetName === targetName)) { + try { + await deleteHarness({ region: orphan.region, harnessId: orphan.harnessId }); + console.log(`Deleted preview-build harness "${orphan.name}"`); + } catch (err) { + if (isHarnessNotFoundError(err)) { + // Already gone — nothing to do. + } else { + console.warn( + `Warning: Could not delete preview-build harness "${orphan.name}" (${orphan.harnessId}) in ` + + `${orphan.region}: ${err instanceof Error ? err.message : String(err)}. Delete it manually to stop incurring cost.` + ); + } + } + } + + if (resources?.configBundles) { let region = deployedTarget?.target.region; if (!region) { try { @@ -135,39 +157,6 @@ export async function performStackTeardown(targetName: string): Promise console.warn('Warning: Could not determine region for resource cleanup — resources may need manual deletion'); } if (region) { - const projectSpec = await configIO.readProjectSpec(); - const emptySpec = { ...projectSpec, abTests: [], httpGateways: [] }; - - if (resources.abTests) { - const abResult = await deleteOrphanedABTests({ - region, - projectSpec: emptySpec, - existingABTests: resources.abTests, - }); - for (const r of abResult.results) { - if (r.status === 'deleted') { - console.log(`Deleted AB test "${r.testName}"`); - } else if (r.error) { - console.warn(`Warning: Failed to delete AB test "${r.testName}": ${r.error}`); - } - } - } - - if (resources.httpGateways) { - const gwResult = await deleteOrphanedHttpGateways({ - region, - projectSpec: emptySpec, - existingHttpGateways: resources.httpGateways, - }); - for (const r of gwResult.results) { - if (r.status === 'deleted') { - console.log(`Deleted HTTP gateway "${r.gatewayName}"`); - } else if (r.error) { - console.warn(`Warning: Failed to delete HTTP gateway "${r.gatewayName}": ${r.error}`); - } - } - } - for (const [bundleName, bundleState] of Object.entries(resources.configBundles ?? {})) { try { await deleteConfigurationBundle({ region, bundleId: bundleState.bundleId }); diff --git a/src/cli/operations/dev/__tests__/config.test.ts b/src/cli/operations/dev/__tests__/config.test.ts index a0a86b665..a8e2289b3 100644 --- a/src/cli/operations/dev/__tests__/config.test.ts +++ b/src/cli/operations/dev/__tests__/config.test.ts @@ -16,6 +16,7 @@ describe('getDevConfig', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -23,7 +24,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -50,6 +50,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -57,7 +58,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -83,6 +83,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -90,7 +91,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -122,6 +122,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -129,7 +130,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -156,6 +156,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -163,7 +164,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -191,6 +191,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -198,7 +199,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -226,6 +226,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -233,7 +234,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -261,6 +261,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -268,7 +269,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -296,6 +296,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -303,7 +304,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -330,6 +330,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -337,7 +338,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -364,6 +364,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -371,7 +372,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -398,6 +398,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -405,7 +406,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -432,6 +432,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -439,7 +440,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -467,6 +467,7 @@ describe('getDevConfig', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -474,7 +475,6 @@ describe('getDevConfig', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -515,6 +515,7 @@ describe('getAgentPort', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -522,7 +523,6 @@ describe('getAgentPort', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -539,6 +539,7 @@ describe('getAgentPort', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -546,7 +547,6 @@ describe('getAgentPort', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -568,6 +568,7 @@ describe('getDevSupportedAgents', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -575,7 +576,6 @@ describe('getDevSupportedAgents', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -600,6 +600,7 @@ describe('getDevSupportedAgents', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -607,7 +608,6 @@ describe('getDevSupportedAgents', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -642,6 +642,7 @@ describe('getDevSupportedAgents', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -649,7 +650,6 @@ describe('getDevSupportedAgents', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], payments: [], }; @@ -674,6 +674,7 @@ describe('getDevSupportedAgents', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -681,7 +682,6 @@ describe('getDevSupportedAgents', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], @@ -716,6 +716,7 @@ describe('getDevSupportedAgents', () => { }, ], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -723,7 +724,6 @@ describe('getDevSupportedAgents', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], diff --git a/src/cli/operations/dev/__tests__/sse-transform.test.ts b/src/cli/operations/dev/__tests__/sse-transform.test.ts new file mode 100644 index 000000000..7dda61820 --- /dev/null +++ b/src/cli/operations/dev/__tests__/sse-transform.test.ts @@ -0,0 +1,111 @@ +import { pipeSSETransformed } from '../sse-transform'; +import { type IncomingMessage, type ServerResponse } from 'node:http'; +import { PassThrough } from 'node:stream'; +import { describe, expect, it } from 'vitest'; + +function createMockInput(): PassThrough & { headers: Record } { + const stream = new PassThrough() as PassThrough & { headers: Record }; + stream.headers = {}; + return stream; +} + +function createMockOutput(): { chunks: string[]; ended: boolean; write: (data: string) => boolean; end: () => void } { + const mock = { + chunks: [] as string[], + ended: false, + write: (data: string) => { + mock.chunks.push(data); + return true; + }, + end: () => { + mock.ended = true; + }, + }; + return mock; +} + +describe('pipeSSETransformed', () => { + it('transforms ConverseStream events to plain text SSE', async () => { + const input = createMockInput(); + const output = createMockOutput(); + + const done = pipeSSETransformed(input as unknown as IncomingMessage, output as unknown as ServerResponse); + + input.write('data: {"event":{"contentBlockDelta":{"delta":{"text":"Hello"}}}}\n\n'); + input.write('data: {"event":{"contentBlockDelta":{"delta":{"text":" world"}}}}\n\n'); + input.end(); + + await done; + + expect(output.chunks).toEqual(['data: "Hello"\n\n', 'data: " world"\n\n']); + expect(output.ended).toBe(true); + }); + + it('passes through already-parsed string events', async () => { + const input = createMockInput(); + const output = createMockOutput(); + + const done = pipeSSETransformed(input as unknown as IncomingMessage, output as unknown as ServerResponse); + + input.write('data: "simple text"\n\n'); + input.end(); + + await done; + + expect(output.chunks).toEqual(['data: "simple text"\n\n']); + }); + + it('forwards errors as JSON objects', async () => { + const input = createMockInput(); + const output = createMockOutput(); + + const done = pipeSSETransformed(input as unknown as IncomingMessage, output as unknown as ServerResponse); + + input.write('data: {"error":"something broke"}\n\n'); + input.end(); + + await done; + + expect(output.chunks).toEqual(['data: {"error":"something broke"}\n\n']); + }); + + it('handles chunked data split across boundaries', async () => { + const input = createMockInput(); + const output = createMockOutput(); + + const done = pipeSSETransformed(input as unknown as IncomingMessage, output as unknown as ServerResponse); + + input.write('data: {"event":{"contentBlock'); + input.write('Delta":{"delta":{"text":"split"}}}}\n\n'); + input.end(); + + await done; + + expect(output.chunks).toEqual(['data: "split"\n\n']); + }); + + it('handles {"text": "..."} format from bedrock runtime', async () => { + const input = createMockInput(); + const output = createMockOutput(); + + const done = pipeSSETransformed(input as unknown as IncomingMessage, output as unknown as ServerResponse); + + input.write('data: {"text":"bedrock response"}\n\n'); + input.end(); + + await done; + + expect(output.chunks).toEqual(['data: "bedrock response"\n\n']); + }); + + it('rejects on input error', async () => { + const input = createMockInput(); + const output = createMockOutput(); + + const done = pipeSSETransformed(input as unknown as IncomingMessage, output as unknown as ServerResponse); + + input.destroy(new Error('connection reset')); + + await expect(done).rejects.toThrow('connection reset'); + }); +}); diff --git a/src/cli/operations/dev/invoke.ts b/src/cli/operations/dev/invoke.ts index 9a385e574..a91378ab3 100644 --- a/src/cli/operations/dev/invoke.ts +++ b/src/cli/operations/dev/invoke.ts @@ -9,13 +9,13 @@ export { type InvokeStreamingOptions, type SSELogger } from './invoke-types'; /** * Parse a single SSE data line and extract the content. */ -function parseSSELine(line: string): { content: string | null; error: string | null } { +export function parseSSELine(line: string): { content: string | null; error: string | null } { if (!line.startsWith('data: ')) { return { content: null, error: null }; } - const content = line.slice(6); + const raw = line.slice(6); try { - const parsed: unknown = JSON.parse(content); + const parsed: unknown = JSON.parse(raw); if (typeof parsed === 'string') { return { content: parsed, error: null }; } else if (parsed && typeof parsed === 'object') { @@ -27,8 +27,14 @@ function parseSSELine(line: string): { content: string | null; error: string | n return { content: String((parsed as { text: unknown }).text), error: null }; } } + // ConverseStream-shaped event: extract text delta + const event = (parsed as { event?: { contentBlockDelta?: { delta?: { text?: string } } } })?.event; + const text = event?.contentBlockDelta?.delta?.text; + if (typeof text === 'string') { + return { content: text, error: null }; + } } catch { - return { content, error: null }; + return { content: raw, error: null }; } return { content: null, error: null }; } diff --git a/src/cli/operations/dev/sse-transform.ts b/src/cli/operations/dev/sse-transform.ts new file mode 100644 index 000000000..74bcb80be --- /dev/null +++ b/src/cli/operations/dev/sse-transform.ts @@ -0,0 +1,45 @@ +import { parseSSELine } from './invoke'; +import { type IncomingMessage, type ServerResponse } from 'node:http'; + +/** + * Pipe an SSE stream from an agent response to a client response, + * transforming each SSE event through parseSSELine so formats like + * ConverseStream are normalized to plain text before reaching the browser. + * + * Non-text content (errors) is forwarded as `data: {"error": "..."}\n\n`. + * Parsed text is forwarded as `data: "text"\n\n`. + */ +export function pipeSSETransformed(input: IncomingMessage, output: ServerResponse): Promise { + return new Promise((resolve, reject) => { + let buffer = ''; + + input.on('data', (chunk: Buffer) => { + buffer += chunk.toString(); + const lines = buffer.split('\n'); + buffer = lines.pop() ?? ''; + for (const line of lines) { + const { content, error } = parseSSELine(line); + if (error) { + output.write(`data: ${JSON.stringify({ error })}\n\n`); + } else if (content) { + output.write(`data: ${JSON.stringify(content)}\n\n`); + } + } + }); + + input.on('end', () => { + if (buffer) { + const { content, error } = parseSSELine(buffer); + if (error) { + output.write(`data: ${JSON.stringify({ error })}\n\n`); + } else if (content) { + output.write(`data: ${JSON.stringify(content)}\n\n`); + } + } + output.end(); + resolve(); + }); + + input.on('error', reject); + }); +} diff --git a/src/cli/operations/dev/web-ui/__tests__/resolve-ui-dist-dir.test.ts b/src/cli/operations/dev/web-ui/__tests__/resolve-ui-dist-dir.test.ts index 6308a1f24..200f59f78 100644 --- a/src/cli/operations/dev/web-ui/__tests__/resolve-ui-dist-dir.test.ts +++ b/src/cli/operations/dev/web-ui/__tests__/resolve-ui-dist-dir.test.ts @@ -1,6 +1,5 @@ import { resolveUIDistDir } from '../web-server.js'; -import fs from 'node:fs'; -import { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; +import fs, { mkdtempSync, rmSync, writeFileSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; diff --git a/src/cli/operations/dev/web-ui/api-types.ts b/src/cli/operations/dev/web-ui/api-types.ts index 17090244e..b5ea2be12 100644 --- a/src/cli/operations/dev/web-ui/api-types.ts +++ b/src/cli/operations/dev/web-ui/api-types.ts @@ -9,7 +9,7 @@ * from a single source of truth instead of manually duplicating. */ import type { HarnessModel } from '../../../../schema'; -import type { HarnessModelConfiguration, HarnessTool } from '../../../aws/agentcore-harness'; +import type { HarnessModelConfiguration, HarnessSkill, HarnessTool } from '../../../aws/agentcore-harness'; import type { CloudWatchSpanRecord, CloudWatchTraceRecord } from '../../traces/types'; // --------------------------------------------------------------------------- @@ -207,10 +207,13 @@ export interface ResourceEvaluator { /** Online eval config details in the resources response */ export interface ResourceOnlineEvalConfig { name: string; - agent: string; - evaluators: string[]; + agent?: string; + evaluators?: string[]; + insights?: string[]; samplingRate: number; description?: string; + logGroupNames?: string[]; + serviceNames?: string[]; deploymentStatus?: ResourceDeploymentStatus; deployed?: DeployedOnlineEvalState; } @@ -442,7 +445,7 @@ export interface A2AAgentCardResponse { export interface HarnessInvocationOverrides { model?: HarnessModelConfiguration; systemPrompt?: string; - skills?: { path: string }[]; + skills?: HarnessSkill[]; actorId?: string; maxIterations?: number; maxTokens?: number; @@ -462,12 +465,21 @@ export interface StatusHarness { name: string; } +export type ResourceSkillSource = + | { path: string } + | { s3: { uri: string } } + | { + git: { url: string; path?: string; auth?: { credentialName: string; credentialArn?: string; username?: string } }; + } + | { awsSkills: { paths?: string[] } }; + export interface ResourceHarness { name: string; /** @deprecated Use modelConfig instead. */ model: string; modelConfig?: HarnessModel; tools: string[]; + skills?: ResourceSkillSource[]; deploymentStatus?: ResourceDeploymentStatus; deployed?: DeployedHarnessState; } diff --git a/src/cli/operations/dev/web-ui/handlers/invocations.ts b/src/cli/operations/dev/web-ui/handlers/invocations.ts index 45148874b..4b8fe09b5 100644 --- a/src/cli/operations/dev/web-ui/handlers/invocations.ts +++ b/src/cli/operations/dev/web-ui/handlers/invocations.ts @@ -3,6 +3,7 @@ import { invokeA2ARuntime, invokeAgentRuntimeStreaming, invokeAguiRuntime } from import { buildAguiRunInput } from '../../../../aws/agui-types'; import { resolveInvokeTarget } from '../../../../commands/invoke/resolve'; import { extractSSEEventText, extractTaskText, isStatusUpdateEvent } from '../../invoke-a2a'; +import { pipeSSETransformed } from '../../sse-transform'; import { handleHarnessInvocation } from './harness-invocation'; import { type RouteContext, parseRequestUrl } from './route-context'; import { randomUUID } from 'node:crypto'; @@ -112,9 +113,14 @@ export async function handleInvocations( responseHeaders['x-session-id'] = sessionId; } res.writeHead(agentRes.statusCode ?? 200, responseHeaders); - agentRes.pipe(res); - agentRes.on('end', resolve); - agentRes.on('error', reject); + + if (contentType.includes('text/event-stream')) { + pipeSSETransformed(agentRes, res).then(resolve, reject); + } else { + agentRes.pipe(res); + agentRes.on('end', resolve); + agentRes.on('error', reject); + } } ); diff --git a/src/cli/operations/dev/web-ui/handlers/resources.ts b/src/cli/operations/dev/web-ui/handlers/resources.ts index 25771cc11..8f48a9691 100644 --- a/src/cli/operations/dev/web-ui/handlers/resources.ts +++ b/src/cli/operations/dev/web-ui/handlers/resources.ts @@ -13,6 +13,7 @@ import type { ResourceMemory, ResourceOnlineEvalConfig, ResourcePolicyEngine, + ResourceSkillSource, } from '../api-types'; import type { RouteContext } from './route-context'; import type { ServerResponse } from 'node:http'; @@ -121,11 +122,36 @@ export async function handleResources(ctx: RouteContext, res: ServerResponse, or let model = ''; let modelConfig: ResourceHarness['modelConfig']; let tools: string[] = []; + let skills: ResourceSkillSource[] | undefined; try { const spec = await configIO.readHarnessSpec(h.name); model = `${spec.model.provider}/${spec.model.modelId}`; modelConfig = spec.model; tools = spec.tools.map(t => t.name); + if (spec.skills.length > 0) { + skills = spec.skills.map(s => { + if ('s3Uri' in s) return { s3: { uri: s.s3Uri } }; + if ('gitUrl' in s) { + return { + git: { + url: s.gitUrl, + ...(s.path && { path: s.path }), + ...(s.auth && { + auth: { + credentialName: s.auth.credentialName, + credentialArn: targetResources?.credentials?.[s.auth.credentialName]?.credentialProviderArn, + username: s.auth.username, + }, + }), + }, + }; + } + if ('awsSkills' in s) { + return { awsSkills: { ...(s.awsSkills.paths && { paths: s.awsSkills.paths }) } }; + } + return { path: s.path }; + }); + } } catch { // harness spec may be unreadable — show what we can } @@ -135,6 +161,7 @@ export async function handleResources(ctx: RouteContext, res: ServerResponse, or model, modelConfig, tools, + skills, deploymentStatus: statusByTypeAndName.get(`harness:${h.name}`), deployed: deployed ? { harnessId: deployed.harnessId, harnessArn: deployed.harnessArn } : undefined, }); diff --git a/src/cli/operations/eval/batch-eval-storage.ts b/src/cli/operations/eval/batch-eval-storage.ts deleted file mode 100644 index 2c47141d1..000000000 --- a/src/cli/operations/eval/batch-eval-storage.ts +++ /dev/null @@ -1,91 +0,0 @@ -import { findConfigRoot } from '../../../lib'; -import type { EvaluationResults } from '../../aws/agentcore-batch-evaluation'; -import type { BatchEvaluationResult, RunBatchEvaluationCommandResult } from './run-batch-evaluation'; -import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'fs'; -import { join } from 'path'; - -export const BATCH_EVAL_RESULTS_DIR = 'batch-eval-results'; - -export interface BatchEvalRunRecord { - name: string; - batchEvaluationId: string; - status: string; - startedAt?: string; - completedAt?: string; - evaluators: string[]; - results: BatchEvaluationResult[]; - evaluationResults?: EvaluationResults; - source?: string; - dataset?: { id: string; version: string }; -} - -function getResultsDir(): string { - const configRoot = findConfigRoot(); - if (!configRoot) { - throw new Error('No agentcore project found. Run `agentcore create` first.'); - } - return join(configRoot, '.cli', BATCH_EVAL_RESULTS_DIR); -} - -export interface SaveBatchEvalRunOptions { - result: RunBatchEvaluationCommandResult; - source?: string; - dataset?: { id: string; version: string }; -} - -export function saveBatchEvalRun(resultOrOptions: RunBatchEvaluationCommandResult | SaveBatchEvalRunOptions): string { - const dir = getResultsDir(); - mkdirSync(dir, { recursive: true }); - - // Support both the legacy signature and the new options object - const isOptionsObj = 'result' in resultOrOptions; - const result = isOptionsObj ? resultOrOptions.result : resultOrOptions; - const source = isOptionsObj ? resultOrOptions.source : undefined; - const dataset = isOptionsObj ? resultOrOptions.dataset : undefined; - - const id = result.batchEvaluationId ?? 'unknown'; - const filePath = join(dir, `${id}.json`); - - const record: BatchEvalRunRecord = { - name: result.name ?? 'unknown', - batchEvaluationId: id, - status: result.status ?? 'unknown', - startedAt: result.startedAt, - completedAt: result.completedAt, - evaluators: result.results.map(r => r.evaluatorId), - results: result.results, - evaluationResults: result.evaluationResults, - ...(source ? { source } : {}), - ...(dataset ? { dataset } : {}), - }; - - writeFileSync(filePath, JSON.stringify(record, null, 2)); - return filePath; -} - -export function loadBatchEvalRun(batchEvaluationId: string): BatchEvalRunRecord { - const dir = getResultsDir(); - const jsonName = batchEvaluationId.endsWith('.json') ? batchEvaluationId : `${batchEvaluationId}.json`; - const filePath = join(dir, jsonName); - - if (!existsSync(filePath)) { - throw new Error(`Batch evaluation run "${batchEvaluationId}" not found at ${filePath}`); - } - - return JSON.parse(readFileSync(filePath, 'utf-8')) as BatchEvalRunRecord; -} - -export function listBatchEvalRuns(): BatchEvalRunRecord[] { - const dir = getResultsDir(); - - if (!existsSync(dir)) { - return []; - } - - const files = readdirSync(dir) - .filter(f => f.endsWith('.json')) - .sort() - .reverse(); - - return files.map(f => JSON.parse(readFileSync(join(dir, f), 'utf-8')) as BatchEvalRunRecord); -} diff --git a/src/cli/operations/eval/run-batch-evaluation.ts b/src/cli/operations/eval/run-batch-evaluation.ts deleted file mode 100644 index 4b68456ed..000000000 --- a/src/cli/operations/eval/run-batch-evaluation.ts +++ /dev/null @@ -1,428 +0,0 @@ -/** - * Orchestrates running a BatchEvaluation: - * 1. Resolve agent from deployed state (for serviceNames / logGroupNames) - * 2. Build evaluators + dataSourceConfig - * 3. Call StartBatchEvaluation - * 4. Poll GetBatchEvaluation until terminal status - * 5. Return results - */ -import { ConfigIO, ResourceNotFoundError, ValidationError, toError } from '../../../lib'; -import type { Result } from '../../../lib/result'; -import type { DeployedState } from '../../../schema'; -import { generateClientToken, getBatchEvaluation, startBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; -import type { - CloudWatchFilterConfig, - EvaluationResults, - GetBatchEvaluationResult, - SessionMetadataEntry, -} from '../../aws/agentcore-batch-evaluation'; -import { resolveEndpointName, runtimeLogGroup } from '../../aws/cloudwatch'; -import { getRegion } from '../../commands/shared/region-utils'; -import { ExecLogger } from '../../logging/exec-logger'; -import { resolveAgentContext } from '../invoke/resolve-agent-context'; -import { runDatasetScenarios } from './shared/dataset-session-provider'; -import { CloudWatchLogsClient, GetLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; - -// ============================================================================ -// Types -// ============================================================================ - -export interface RunBatchEvaluationOptions { - /** Agent name (from project config) */ - agent: string; - /** Evaluator IDs (Builtin.* or custom) */ - evaluators: string[]; - /** Optional name for the batch evaluation */ - name?: string; - /** Region override */ - region?: string; - /** Specific session IDs to evaluate (optional — filters CloudWatch source) */ - sessionIds?: string[]; - /** Lookback window in days (optional — filters CloudWatch source by time range) */ - lookbackDays?: number; - /** Session metadata with ground truth (assertions, expected trajectory, turns) */ - sessionMetadata?: SessionMetadataEntry[]; - /** Poll interval in ms */ - pollIntervalMs?: number; - /** Progress callback */ - onProgress?: (status: string, message: string) => void; - /** Called once the batch evaluation has been created, with ID and region for cancellation */ - onStarted?: (info: { batchEvaluationId: string; region: string }) => void; - /** Dataset name — invoke agent with dataset scenarios before batch evaluation */ - dataset?: string; - /** Dataset version (omit for local file, or N/DRAFT) */ - datasetVersion?: string; - /** Runtime endpoint name (e.g. PROMPT_V1). Defaults to DEFAULT. */ - endpoint?: string; -} - -export interface BatchEvaluationResult { - evaluatorId: string; - score?: number; - label?: string; - explanation?: string; - error?: string; -} - -export type RunBatchEvaluationCommandResult = Result & { - batchEvaluationId?: string; - name?: string; - status?: string; - results: BatchEvaluationResult[]; - evaluationResults?: EvaluationResults; - startedAt?: string; - completedAt?: string; - logFilePath?: string; -}; - -// ============================================================================ -// Constants -// ============================================================================ - -const DEFAULT_POLL_INTERVAL_MS = 10_000; - -/** Delay before submitting batch eval to allow CloudWatch span ingestion. Matches SDK default. */ -const BATCH_INGESTION_DELAY_MS = 180_000; -const TERMINAL_STATUSES = new Set(['COMPLETED', 'COMPLETED_WITH_ERRORS', 'FAILED', 'STOPPED', 'CANCELLED']); - -// ============================================================================ -// Implementation -// ============================================================================ - -export async function runBatchEvaluationCommand( - options: RunBatchEvaluationOptions -): Promise { - const { agent, evaluators, pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, onProgress } = options; - - let logger: ExecLogger | undefined; - try { - logger = new ExecLogger({ command: 'batch-evaluate' }); - } catch { - // Non-fatal - } - - try { - // 1. Read project config and deployed state - logger?.startStep('Load project config'); - const configIO = new ConfigIO(); - const [projectSpec, deployedState, awsTargets] = await Promise.all([ - configIO.readProjectSpec(), - configIO.readDeployedState(), - configIO.resolveAWSDeploymentTargets(), - ]); - - const region = await getRegion(options.region); - const stage = process.env.AGENTCORE_STAGE?.toLowerCase() ?? 'prod'; - logger?.log(`Region: ${region}, Stage: ${stage}`); - logger?.endStep('success'); - - // 2. Resolve agent from deployed state - logger?.startStep('Resolve agent'); - const agentState = resolveAgentState(deployedState, agent); - if (!agentState) { - const error = `Agent "${agent}" not deployed. Run \`agentcore deploy\` first.`; - logger?.log(error, 'error'); - logger?.endStep('error', error); - logger?.finalize(false); - return { success: false, error: new ResourceNotFoundError(error), results: [], logFilePath: logger?.logFilePath }; - } - - const runtimeId = agentState.runtimeId; - // Service name in CW logs uses project_agent format without the CDK hash suffix - const endpointName = resolveEndpointName(options.endpoint); - const serviceName = `${projectSpec.name}_${agent}.${endpointName}`; - const runtimeLogGroupName = runtimeLogGroup(runtimeId, options.endpoint); - - logger?.log(`Agent: ${agent} (runtime: ${runtimeId})`); - logger?.log(`Service name: ${serviceName}`); - logger?.log(`Log group: ${runtimeLogGroupName}`); - logger?.endStep('success'); - - // 2b. Resolve evaluator names to deployed IDs - // Handles: "Builtin.Correctness", "arn:aws:...:evaluator/Builtin.Correctness", or custom evaluator names - const targetResources = Object.values(deployedState.targets).find(t => t.resources?.runtimes?.[agent])?.resources; - const resolvedEvaluators = evaluators.map(name => { - // Extract short name from ARN if passed (e.g. "arn:aws:bedrock-agentcore:::evaluator/Builtin.Correctness" → "Builtin.Correctness") - const shortName = name.includes('evaluator/') ? name.split('evaluator/').pop()! : name; - if (shortName.startsWith('Builtin.')) return shortName; - const deployed = targetResources?.evaluators?.[shortName]; - if (deployed?.evaluatorId) { - logger?.log(`Resolved evaluator "${shortName}" → ${deployed.evaluatorId}`); - return deployed.evaluatorId; - } - logger?.log(`Evaluator "${shortName}" not found in deployed state, passing as-is`, 'warn'); - return shortName; - }); - - // 3. Start the batch evaluation - logger?.startStep('Start batch evaluation'); - let evalName: string; - if (options.name) { - if (!/^[a-zA-Z][a-zA-Z0-9_]{0,47}$/.test(options.name)) { - return { - success: false, - error: new ValidationError( - `Batch evaluation name must start with a letter and contain only letters, digits, and underscores (max 48 chars). Got: "${options.name}"` - ), - results: [], - logFilePath: logger?.logFilePath, - }; - } - evalName = options.name; - } else { - evalName = `${projectSpec.name}_${agent}_${Date.now()}`.replace(/[^a-zA-Z0-9_]/g, '_').slice(0, 48); - } - - onProgress?.('starting', `Starting batch evaluation "${evalName}"...`); - - // Dataset mode: invoke agent with scenarios first, then use those sessionIds - let datasetSessionIds: string[] = []; - let datasetMetadata: SessionMetadataEntry[] = []; - if (options.dataset) { - const agentContext = await resolveAgentContext({ - project: projectSpec, - deployedState, - awsTargets, - agentName: agent, - endpoint: options.endpoint, - }); - - onProgress?.('invoking', `Invoking agent with dataset "${options.dataset}"...`); - - const datasetResult = await runDatasetScenarios({ - agentContext, - datasetName: options.dataset, - version: options.datasetVersion, - configBaseDir: configIO.getConfigRoot(), - onProgress: (phase, msg) => onProgress?.(phase, msg), - }); - - const successfulResults = datasetResult.scenarioResults.filter(r => r.status === 'success'); - if (successfulResults.length === 0) { - return { - success: false, - error: new Error('All scenarios failed during invocation. No sessions to evaluate.'), - results: [], - logFilePath: logger?.logFilePath, - }; - } - - datasetSessionIds = successfulResults.map(r => r.sessionId); - - // Build sessionMetadata with ground truth from dataset - datasetMetadata = successfulResults.map(r => { - const scenario = datasetResult.scenarios.find(s => s.scenario_id === r.scenarioId); - return { - sessionId: r.sessionId, - testScenarioId: r.scenarioId, - groundTruth: scenario - ? { - inline: { - ...(scenario.assertions ? { assertions: scenario.assertions.map(a => ({ text: a })) } : {}), - ...(scenario.expected_trajectory - ? { expectedTrajectory: { toolNames: scenario.expected_trajectory } } - : {}), - ...(scenario.turns.some(t => t.expectedResponse) - ? { - turns: scenario.turns.map(t => ({ - input: { prompt: t.input }, - ...(t.expectedResponse ? { expectedResponse: { text: t.expectedResponse } } : {}), - })), - } - : {}), - }, - } - : undefined, - }; - }) as SessionMetadataEntry[]; - - onProgress?.('invoking', `✓ ${successfulResults.length} sessions ready for batch evaluation`); - - // Wait for CloudWatch span ingestion before submitting — the batch service - // queries CloudWatch server-side, so we can't poll. Match SDK default (180s). - onProgress?.('ingesting', 'Waiting 180s for CloudWatch span ingestion...'); - await sleep(BATCH_INGESTION_DELAY_MS); - } - - // Build optional filter config for CloudWatch filtering - // API requires either sessionIds OR timeRange, not both — sessionIds takes precedence - // Merge explicit sessionIds with any sessionIds from sessionMetadata (deduplicated) - const metadataSessionIds = options.sessionMetadata?.map(m => m.sessionId).filter(Boolean) ?? []; - const explicitSessionIds = [...(options.sessionIds ?? []), ...datasetSessionIds]; - const effectiveSessionIds = [...new Set([...explicitSessionIds, ...metadataSessionIds])]; - const hasSessionIds = effectiveSessionIds.length > 0; - - const filterConfig: CloudWatchFilterConfig | undefined = (() => { - if (hasSessionIds) { - return { sessionIds: effectiveSessionIds }; - } - if (options.lookbackDays) { - const endTime = new Date().toISOString(); - const startTime = new Date(Date.now() - options.lookbackDays * 24 * 60 * 60 * 1000).toISOString(); - return { timeRange: { startTime, endTime } }; - } - return undefined; - })(); - - // Merge dataset metadata with any explicit sessionMetadata - const allSessionMetadata = [...(options.sessionMetadata ?? []), ...datasetMetadata]; - - const startPayload = { - region, - name: evalName, - evaluators: resolvedEvaluators.map(id => ({ evaluatorId: id })), - dataSourceConfig: { - cloudWatchLogs: { - serviceNames: [serviceName], - logGroupNames: [runtimeLogGroupName], - ...(filterConfig ? { filterConfig } : {}), - }, - }, - ...(allSessionMetadata.length > 0 ? { evaluationMetadata: { sessionMetadata: allSessionMetadata } } : {}), - clientToken: generateClientToken(), - }; - - logger?.log(`Request payload:\n${JSON.stringify(startPayload, null, 2)}`); - - const startResult = await startBatchEvaluation(startPayload); - - logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); - logger?.endStep('success'); - - onProgress?.('running', `Batch evaluation started (ID: ${startResult.batchEvaluationId})`); - onProgress?.('running', 'This may take a few minutes...'); - options.onStarted?.({ batchEvaluationId: startResult.batchEvaluationId, region }); - - // 4. Poll for completion - logger?.startStep('Poll for completion'); - let current: GetBatchEvaluationResult = { - batchEvaluationId: startResult.batchEvaluationId, - batchEvaluationArn: startResult.batchEvaluationArn, - name: startResult.name, - status: startResult.status, - }; - - while (!TERMINAL_STATUSES.has(current.status)) { - await sleep(pollIntervalMs); - - current = await getBatchEvaluation({ - region, - batchEvaluationId: startResult.batchEvaluationId, - }); - - onProgress?.('polling', `Status: ${current.status}`); - logger?.log(`Poll status: ${current.status}`); - } - - if (current.status !== 'COMPLETED' && current.status !== 'COMPLETED_WITH_ERRORS') { - const reasons = current.errorDetails?.join('; ') ?? ''; - const error = `Batch evaluation finished with status: ${current.status}${reasons ? ` — ${reasons}` : ''}`; - logger?.log(error, 'error'); - logger?.log(`Full poll response:\n${JSON.stringify(current, null, 2)}`, 'error'); - logger?.endStep('error', error); - logger?.finalize(false); - return { - success: false, - error: new Error(error), - batchEvaluationId: startResult.batchEvaluationId, - name: evalName, - status: current.status, - results: [], - logFilePath: logger?.logFilePath, - }; - } - - logger?.endStep('success'); - - // 5. Fetch results from CloudWatch output logs - logger?.startStep('Fetch results'); - let results: BatchEvaluationResult[] = []; - - const cwDest = current.outputConfig?.cloudWatchConfig; - if (cwDest) { - try { - results = await fetchResultsFromCloudWatch(region, cwDest.logGroupName, cwDest.logStreamName); - logger?.log(`Fetched ${results.length} result(s) from CloudWatch`); - } catch (cwErr: unknown) { - logger?.log(`Failed to fetch CW results: ${cwErr instanceof Error ? cwErr.message : String(cwErr)}`, 'error'); - } - } - - logger?.endStep('success'); - - logger?.log(`Results: ${JSON.stringify(results, null, 2)}`); - logger?.finalize(true); - - return { - success: true, - batchEvaluationId: startResult.batchEvaluationId, - name: evalName, - status: current.status, - results, - evaluationResults: current.evaluationResults, - startedAt: current.createdAt, - completedAt: current.updatedAt ?? new Date().toISOString(), - logFilePath: logger?.logFilePath, - }; - } catch (err) { - const error = err instanceof Error ? err.message : String(err); - logger?.log(error, 'error'); - logger?.finalize(false); - return { success: false, error: toError(err), results: [], logFilePath: logger?.logFilePath }; - } -} - -// ============================================================================ -// Helpers -// ============================================================================ - -function resolveAgentState( - deployedState: DeployedState, - agentName: string -): { runtimeId: string; runtimeArn: string; roleArn?: string } | undefined { - for (const target of Object.values(deployedState.targets)) { - const agent = target.resources?.runtimes?.[agentName]; - if (agent) return agent; - } - return undefined; -} - -async function fetchResultsFromCloudWatch( - region: string, - logGroupName: string, - logStreamName: string -): Promise { - const client = new CloudWatchLogsClient({ region }); - const response = await client.send( - new GetLogEventsCommand({ - logGroupName, - logStreamName, - startFromHead: true, - }) - ); - - const results: BatchEvaluationResult[] = []; - for (const event of response.events ?? []) { - if (!event.message) continue; - try { - const parsed = JSON.parse(event.message) as Record; - const attrs = (parsed.attributes ?? {}) as Record; - const evaluatorId = attrs['gen_ai.evaluation.name'] as string | undefined; - if (!evaluatorId) continue; - - results.push({ - evaluatorId, - score: attrs['gen_ai.evaluation.score.value'] as number | undefined, - label: attrs['gen_ai.evaluation.score.label'] as string | undefined, - explanation: attrs['gen_ai.evaluation.explanation'] as string | undefined, - }); - } catch { - // Skip non-JSON or malformed entries - } - } - return results; -} - -function sleep(ms: number): Promise { - return new Promise(resolve => setTimeout(resolve, ms)); -} diff --git a/src/cli/operations/fetch-access/__tests__/fetch-gateway-token.test.ts b/src/cli/operations/fetch-access/__tests__/fetch-gateway-token.test.ts index ac556ab24..43833ef7e 100644 --- a/src/cli/operations/fetch-access/__tests__/fetch-gateway-token.test.ts +++ b/src/cli/operations/fetch-access/__tests__/fetch-gateway-token.test.ts @@ -44,6 +44,7 @@ const baseProjectSpec = { ], runtimes: [], memories: [], + knowledgeBases: [], evaluators: [], onlineEvalConfigs: [], }; diff --git a/src/cli/operations/fetch-access/fetch-gateway-token.ts b/src/cli/operations/fetch-access/fetch-gateway-token.ts index d02b4ba78..6c5b6c385 100644 --- a/src/cli/operations/fetch-access/fetch-gateway-token.ts +++ b/src/cli/operations/fetch-access/fetch-gateway-token.ts @@ -30,8 +30,7 @@ export async function fetchGatewayToken( ); } - const deployedGateways = target.resources?.mcp?.gateways ?? {}; - const deployedGateway = deployedGateways[gatewayName]; + const deployedGateway = target.resources?.gateways?.[gatewayName] ?? target.resources?.mcp?.gateways?.[gatewayName]; if (!deployedGateway?.gatewayUrl) { throw new Error( `Gateway '${gatewayName}' does not have a deployed URL. Run \`agentcore deploy\` to deploy the gateway.` diff --git a/src/cli/operations/fetch-access/list-gateways.ts b/src/cli/operations/fetch-access/list-gateways.ts index 0e70559ce..e98aeba6b 100644 --- a/src/cli/operations/fetch-access/list-gateways.ts +++ b/src/cli/operations/fetch-access/list-gateways.ts @@ -30,15 +30,15 @@ export async function listGateways( }); } - // Include HTTP gateways (auto-created for A/B testing) - const deployedHttpGateways = target.resources?.httpGateways ?? {}; - for (const httpGateway of projectSpec.httpGateways ?? []) { - const deployed = deployedHttpGateways[httpGateway.name]; + // Include HTTP gateways (deployed via CFN under resources.gateways) + const deployedHttpGateways = target.resources?.gateways ?? {}; + for (const gateway of projectSpec.agentCoreGateways.filter(g => g.protocolType === 'None')) { + const deployed = deployedHttpGateways[gateway.name]; if (!deployed?.gatewayArn) continue; gateways.push({ - name: httpGateway.name, - authType: 'AWS_IAM', + name: gateway.name, + authType: gateway.authorizerType ?? 'NONE', }); } diff --git a/src/cli/operations/harness/__tests__/orphan.test.ts b/src/cli/operations/harness/__tests__/orphan.test.ts new file mode 100644 index 000000000..a9bcf1739 --- /dev/null +++ b/src/cli/operations/harness/__tests__/orphan.test.ts @@ -0,0 +1,92 @@ +import type { DeployedState, HarnessDeployedState } from '../../../../schema'; +import { findOrphanHarnesses, isOrphanHarnessRecord, regionFromHarnessArn } from '../orphan'; +import { describe, expect, it } from 'vitest'; + +const cfnRecord: HarnessDeployedState = { + harnessId: 'h-cfn', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:harness/h-cfn', + roleArn: 'arn:aws:iam::111122223333:role/cfn', + status: 'READY', + provisioner: 'cloudformation', +}; + +const orphanRecord: HarnessDeployedState = { + harnessId: 'h-orphan', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:harness/h-orphan', + roleArn: 'arn:aws:iam::111122223333:role/orphan', + status: 'READY', +}; + +function stateWith(harnesses: Record, targetName = 'default'): DeployedState { + return { targets: { [targetName]: { resources: { stackName: 'S', harnesses } } } }; +} + +describe('isOrphanHarnessRecord', () => { + it('treats a record without the cloudformation marker as an orphan', () => { + expect(isOrphanHarnessRecord(orphanRecord)).toBe(true); + }); + + it('treats a cloudformation-marked record as not an orphan', () => { + expect(isOrphanHarnessRecord(cfnRecord)).toBe(false); + }); + + it('treats an absent record as not an orphan', () => { + expect(isOrphanHarnessRecord(undefined)).toBe(false); + }); +}); + +describe('regionFromHarnessArn', () => { + it('parses the region segment', () => { + expect(regionFromHarnessArn('arn:aws:bedrock-agentcore:us-west-2:111122223333:harness/h')).toBe('us-west-2'); + }); + + it('returns undefined for a malformed ARN with no region', () => { + expect(regionFromHarnessArn('arn:aws:bedrock-agentcore::111122223333:harness/h')).toBeUndefined(); + expect(regionFromHarnessArn('not-an-arn')).toBeUndefined(); + }); +}); + +describe('findOrphanHarnesses', () => { + it('returns only unmarked records, with id/arn/region populated', () => { + const state = stateWith({ keep: cfnRecord, old: orphanRecord }); + const orphans = findOrphanHarnesses(state); + expect(orphans).toHaveLength(1); + expect(orphans[0]).toMatchObject({ + name: 'old', + targetName: 'default', + harnessId: 'h-orphan', + region: 'us-west-2', + }); + }); + + it('filters to a specific harness name when provided', () => { + const state = stateWith({ old: orphanRecord, alsoOld: { ...orphanRecord, harnessId: 'h2' } }); + expect(findOrphanHarnesses(state, 'old')).toHaveLength(1); + expect(findOrphanHarnesses(state, 'old')[0]!.name).toBe('old'); + }); + + it('returns nothing when every record carries the cloudformation marker', () => { + expect(findOrphanHarnesses(stateWith({ keep: cfnRecord }))).toEqual([]); + }); + + it('skips orphan records whose ARN has no parseable region (cannot be safely deleted)', () => { + const bad: HarnessDeployedState = { ...orphanRecord, harnessArn: 'not-an-arn' }; + expect(findOrphanHarnesses(stateWith({ bad }))).toEqual([]); + }); + + it('finds orphans across multiple targets', () => { + const state: DeployedState = { + targets: { + default: { resources: { stackName: 'S1', harnesses: { a: orphanRecord } } }, + prod: { resources: { stackName: 'S2', harnesses: { b: { ...orphanRecord, harnessId: 'h-b' } } } }, + }, + }; + const orphans = findOrphanHarnesses(state); + expect(orphans.map(o => o.targetName).sort()).toEqual(['default', 'prod']); + }); + + it('handles missing/empty deployed state', () => { + expect(findOrphanHarnesses(undefined)).toEqual([]); + expect(findOrphanHarnesses({ targets: {} })).toEqual([]); + }); +}); diff --git a/src/cli/operations/harness/orphan.ts b/src/cli/operations/harness/orphan.ts new file mode 100644 index 000000000..980a27443 --- /dev/null +++ b/src/cli/operations/harness/orphan.ts @@ -0,0 +1,76 @@ +import type { DeployedState, HarnessDeployedState } from '../../../schema'; + +/** + * Harness orphan cleanup (transitional, preview→GA). + * + * An "orphan" is a harness recorded in deployed-state.json that was created by the old + * imperative preview build: a real AWS::BedrockAgentCore::Harness that is NOT part of any + * CloudFormation stack. The GA path provisions every harness via CloudFormation and stamps + * its deployed-state record with `provisioner: 'cloudformation'`. A record WITHOUT that + * marker can only have come from the imperative build, so the marker is a purely local + * discriminator — orphan detection makes no AWS calls. + * + * CloudFormation cannot delete a resource it never created, so an orphan keeps billing and + * blocks a same-named CFN deploy (the create would 409/rollback). The CLI never auto-deletes + * it; detection only decides whether to warn the user and route deletion through + * `agentcore remove harness `. + * + * This whole module is self-terminating: once orphans are cleaned up and the project is + * redeployed, every record carries the marker and `findOrphanHarnesses` returns nothing. It + * is built to be deleted after the deprecation window. + */ + +/** A located orphan harness, carrying the recorded identifiers needed to delete it. */ +export interface OrphanHarness { + /** Harness name (key under resources.harnesses). */ + name: string; + /** Deployment target the record lives under. */ + targetName: string; + /** Control-plane harness id, used for the DeleteHarness call. */ + harnessId: string; + /** Recorded harness ARN; its region segment is authoritative for the delete. */ + harnessArn: string; + /** Region parsed from the recorded ARN (never re-resolved by name). */ + region: string; +} + +/** + * A deployed-state harness record is an orphan when it exists but lacks the + * `provisioner: 'cloudformation'` marker stamped by the CDK deploy path. + */ +export function isOrphanHarnessRecord(record: HarnessDeployedState | undefined): boolean { + return !!record && record.provisioner !== 'cloudformation'; +} + +/** + * Parse the region segment from a harness ARN + * (arn:aws:bedrock-agentcore:::harness/). Returns undefined when the + * ARN is malformed so callers can skip rather than issue a control-plane call to the wrong + * (or empty) region. + */ +export function regionFromHarnessArn(harnessArn: string): string | undefined { + const region = harnessArn.split(':')[3]; + return region && region.length > 0 ? region : undefined; +} + +/** + * Find orphan harnesses across all deployment targets in deployed-state. Reads local state + * only — no AWS calls. When `harnessName` is given, restricts the search to that name. + * + * Records whose ARN has no parseable region are skipped (they can't be safely deleted), so + * every returned orphan has the identifiers a deletion needs. + */ +export function findOrphanHarnesses(deployedState: DeployedState | undefined, harnessName?: string): OrphanHarness[] { + const orphans: OrphanHarness[] = []; + for (const [targetName, target] of Object.entries(deployedState?.targets ?? {})) { + const harnesses = target.resources?.harnesses ?? {}; + for (const [name, record] of Object.entries(harnesses)) { + if (harnessName && name !== harnessName) continue; + if (!isOrphanHarnessRecord(record)) continue; + const region = regionFromHarnessArn(record.harnessArn); + if (!region) continue; + orphans.push({ name, targetName, harnessId: record.harnessId, harnessArn: record.harnessArn, region }); + } + } + return orphans; +} diff --git a/src/cli/operations/harness/skill-utils.ts b/src/cli/operations/harness/skill-utils.ts new file mode 100644 index 000000000..f9a2313ec --- /dev/null +++ b/src/cli/operations/harness/skill-utils.ts @@ -0,0 +1,37 @@ +import type { AgentCoreProjectSpec, HarnessSpec } from '../../../schema'; +import { ValidationError } from '@/lib/errors/types'; +import type { Result } from '@/lib/result'; + +const KEY_SEPARATOR = '::'; + +export function getSkillKey(skill: HarnessSpec['skills'][number]): string { + if ('s3Uri' in skill) return `s3:${skill.s3Uri}`; + if ('gitUrl' in skill) return `git:${skill.gitUrl}${skill.path ? `${KEY_SEPARATOR}${skill.path}` : ''}`; + if ('awsSkills' in skill) return `awsSkills:${skill.awsSkills.paths?.slice().sort().join(',') ?? '*'}`; + return `path:${skill.path}`; +} + +export function buildGitSkillKey(gitUrl: string, gitPath?: string): string { + return `git:${gitUrl}${gitPath ? `${KEY_SEPARATOR}${gitPath}` : ''}`; +} + +export function validateGitSkillCredential(project: AgentCoreProjectSpec, credentialName: string): Result { + const credential = project.credentials.find(c => c.name === credentialName); + if (!credential) { + return { + success: false, + error: new ValidationError( + `Credential '${credentialName}' not found in project. Run 'agentcore add credential' first.` + ), + }; + } + if (credential.authorizerType !== 'ApiKeyCredentialProvider') { + return { + success: false, + error: new ValidationError( + `Credential '${credentialName}' is type '${credential.authorizerType}'. Git skill auth requires an ApiKeyCredentialProvider credential.` + ), + }; + } + return { success: true }; +} diff --git a/src/cli/operations/ingest/__tests__/index.test.ts b/src/cli/operations/ingest/__tests__/index.test.ts new file mode 100644 index 000000000..15821a100 --- /dev/null +++ b/src/cli/operations/ingest/__tests__/index.test.ts @@ -0,0 +1,296 @@ +import * as bedrockAgent from '../../../aws/bedrock-agent'; +import { runKbIngestionByName } from '../index'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../aws/bedrock-agent'); + +function deployedState(kbId: string, dataSourceIds: string[]) { + return { + targets: { + default: { + resources: { + knowledgeBases: { + docs: { + knowledgeBaseId: kbId, + knowledgeBaseArn: `arn:aws:bedrock:us-west-2:0:knowledge-base/${kbId}`, + dataSources: dataSourceIds.map(dsId => ({ + dataSourceId: dsId, + uri: `s3://b/${dsId}/`, + })), + }, + }, + }, + }, + }, + } as never; +} + +describe('runKbIngestionByName', () => { + beforeEach(() => vi.mocked(bedrockAgent.startIngestionJob).mockReset()); + afterEach(() => vi.restoreAllMocks()); + + it('starts an ingestion job per data source and returns their IDs in order', async () => { + vi.mocked(bedrockAgent.startIngestionJob) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-1', status: 'STARTING' } as never) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-2', status: 'STARTING' } as never); + + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedState('KB1', ['DS1', 'DS2']), + targetName: 'default', + region: 'us-west-2', + }); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.startedJobs).toEqual([ + { dataSourceId: 'DS1', uri: 's3://b/DS1/', ingestionJobId: 'IJ-1' }, + { dataSourceId: 'DS2', uri: 's3://b/DS2/', ingestionJobId: 'IJ-2' }, + ]); + expect(bedrockAgent.startIngestionJob).toHaveBeenCalledTimes(2); + }); + + function deployedStateWithUris(kbId: string, dataSources: { dataSourceId: string; uri: string }[]) { + return { + targets: { + default: { + resources: { + knowledgeBases: { + docs: { + knowledgeBaseId: kbId, + knowledgeBaseArn: `arn:aws:bedrock:us-west-2:0:knowledge-base/${kbId}`, + dataSources, + }, + }, + }, + }, + }, + } as never; + } + + it('ingests only the named data source when dataSourceUri is set', async () => { + vi.mocked(bedrockAgent.startIngestionJob).mockResolvedValueOnce({ + ingestionJobId: 'IJ-2', + status: 'STARTING', + } as never); + + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedStateWithUris('KB1', [ + { dataSourceId: 'DS-1', uri: 's3://a/' }, + { dataSourceId: 'DS-2', uri: 's3://b/' }, + ]), + targetName: 'default', + region: 'us-west-2', + dataSourceUri: 's3://b/', + concurrentRetryPolicy: { maxAttempts: 1, delayMs: 0 }, + }); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.startedJobs).toHaveLength(1); + expect(result.startedJobs[0]?.uri).toBe('s3://b/'); + expect(bedrockAgent.startIngestionJob).toHaveBeenCalledTimes(1); + }); + + it('errors when the named data source is not found', async () => { + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedStateWithUris('KB1', [ + { dataSourceId: 'DS-1', uri: 's3://a/' }, + { dataSourceId: 'DS-2', uri: 's3://b/' }, + ]), + targetName: 'default', + region: 'us-west-2', + dataSourceUri: 's3://nope/', + concurrentRetryPolicy: { maxAttempts: 1, delayMs: 0 }, + }); + + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/s3:\/\/nope\//); + expect(bedrockAgent.startIngestionJob).not.toHaveBeenCalled(); + }); + + it("errors when the KB hasn't been deployed yet", async () => { + const empty = { targets: { default: { resources: { knowledgeBases: {} } } } } as never; + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: empty, + targetName: 'default', + region: 'us-west-2', + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/has not been deployed/i); + }); + + it('errors when no data sources are recorded', async () => { + const noDs = deployedState('KB1', []); + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: noDs, + targetName: 'default', + region: 'us-west-2', + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/no data sources/i); + }); + + it('reports a partial-failure result if one DS fails (other still started)', async () => { + vi.mocked(bedrockAgent.startIngestionJob) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-1', status: 'STARTING' } as never) + .mockRejectedValueOnce(new Error('Throttled')); + + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedState('KB1', ['DS1', 'DS2']), + targetName: 'default', + region: 'us-west-2', + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/1 of 2 data sources/i); + expect(result.error.message).toMatch(/Throttled/); + }); + + it('errors when the target name does not exist in deployed-state', async () => { + const state = deployedState('KB1', ['DS1']); + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: state, + targetName: 'nonexistent', + region: 'us-west-2', + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/has not been deployed/i); + }); + + it('retries the concurrent-ingestion-limit error and eventually succeeds', async () => { + const conflictErr = Object.assign(new Error('You have reached the maximum number of concurrent ingestion jobs'), { + name: 'ConflictException', + }); + vi.mocked(bedrockAgent.startIngestionJob) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-1', status: 'STARTING' } as never) + .mockRejectedValueOnce(conflictErr) + .mockRejectedValueOnce(conflictErr) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-2', status: 'STARTING' } as never); + + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedState('KB1', ['DS1', 'DS2']), + targetName: 'default', + region: 'us-west-2', + concurrentRetryPolicy: { maxAttempts: 5, delayMs: 0 }, + }); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.startedJobs).toEqual([ + { dataSourceId: 'DS1', uri: 's3://b/DS1/', ingestionJobId: 'IJ-1' }, + { dataSourceId: 'DS2', uri: 's3://b/DS2/', ingestionJobId: 'IJ-2' }, + ]); + expect(bedrockAgent.startIngestionJob).toHaveBeenCalledTimes(4); + }); + + it('gives up after maxAttempts of concurrent-limit errors and reports the failure', async () => { + const conflictErr = Object.assign(new Error('You have reached the maximum number of concurrent ingestion jobs'), { + name: 'ConflictException', + }); + vi.mocked(bedrockAgent.startIngestionJob) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-1', status: 'STARTING' } as never) + .mockRejectedValueOnce(conflictErr) + .mockRejectedValueOnce(conflictErr) + .mockRejectedValueOnce(conflictErr); + + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedState('KB1', ['DS1', 'DS2']), + targetName: 'default', + region: 'us-west-2', + concurrentRetryPolicy: { maxAttempts: 3, delayMs: 0 }, + }); + + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/1 of 2 data sources/i); + expect(result.error.message).toMatch(/concurrent ingestion jobs/i); + // 1 success + 3 retries on DS2 + expect(bedrockAgent.startIngestionJob).toHaveBeenCalledTimes(4); + }); + + it('does not retry non-concurrent errors', async () => { + vi.mocked(bedrockAgent.startIngestionJob) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-1', status: 'STARTING' } as never) + .mockRejectedValueOnce(new Error('Throttled')); + + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedState('KB1', ['DS1', 'DS2']), + targetName: 'default', + region: 'us-west-2', + concurrentRetryPolicy: { maxAttempts: 5, delayMs: 0 }, + }); + + expect(result.success).toBe(false); + expect(bedrockAgent.startIngestionJob).toHaveBeenCalledTimes(2); + }); + + it('emits progress messages on each retry sleep', async () => { + const conflictErr = Object.assign(new Error('You have reached the maximum number of concurrent ingestion jobs'), { + name: 'ConflictException', + }); + vi.mocked(bedrockAgent.startIngestionJob) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-1', status: 'STARTING' } as never) + .mockRejectedValueOnce(conflictErr) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-2', status: 'STARTING' } as never); + + const messages: string[] = []; + const result = await runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedState('KB1', ['DS1', 'DS2']), + targetName: 'default', + region: 'us-west-2', + concurrentRetryPolicy: { maxAttempts: 5, delayMs: 0 }, + onProgress: msg => messages.push(msg), + }); + + expect(result.success).toBe(true); + expect(messages).toHaveLength(1); + expect(messages[0]).toMatch(/DS2.*another ingestion job is running/); + expect(messages[0]).toMatch(/retry 1\/4/); + }); + + it('honours an abort signal mid-sleep and reports the in-flight DS as failed', async () => { + const conflictErr = Object.assign(new Error('You have reached the maximum number of concurrent ingestion jobs'), { + name: 'ConflictException', + }); + // DS1 succeeds, DS2's first attempt rejects with conflict; the loop will + // sleep before retrying — we abort mid-sleep so the second attempt never + // fires. Only queue what gets consumed. + vi.mocked(bedrockAgent.startIngestionJob) + .mockResolvedValueOnce({ ingestionJobId: 'IJ-1', status: 'STARTING' } as never) + .mockRejectedValueOnce(conflictErr); + + const controller = new AbortController(); + const promise = runKbIngestionByName({ + knowledgeBaseName: 'docs', + deployedState: deployedState('KB1', ['DS1', 'DS2']), + targetName: 'default', + region: 'us-west-2', + concurrentRetryPolicy: { maxAttempts: 5, delayMs: 50 }, + signal: controller.signal, + }); + // Abort while DS2 is sleeping between retries. + setTimeout(() => controller.abort(), 5); + const result = await promise; + + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/Aborted/); + // 1 success + 1 failed attempt; second retry never fires due to abort. + expect(bedrockAgent.startIngestionJob).toHaveBeenCalledTimes(2); + }); +}); diff --git a/src/cli/operations/ingest/index.ts b/src/cli/operations/ingest/index.ts new file mode 100644 index 000000000..37d8db50e --- /dev/null +++ b/src/cli/operations/ingest/index.ts @@ -0,0 +1,215 @@ +import { IngestionError } from '../../../lib'; +import type { Result } from '../../../lib/result'; +import type { DeployedState } from '../../../schema'; +import { startIngestionJob } from '../../aws/bedrock-agent'; + +export interface RunKbIngestionInput { + knowledgeBaseName: string; + deployedState: DeployedState; + targetName: string; + region: string; + /** + * When set, ingest only the data source whose recorded URI matches this + * value, instead of every data source on the KB. Returns a user error if no + * data source has this URI. + */ + dataSourceUri?: string; + /** + * Override the concurrent-limit retry policy. Defaults to {@link DEFAULT_CONCURRENT_RETRY_POLICY}. + * Tests pass a tight policy to keep runs fast. + */ + concurrentRetryPolicy?: ConcurrentRetryPolicy; + /** + * Optional callback for progress updates during the retry loop. Called when + * an attempt is rejected with a concurrent-limit error and we sleep before + * the next attempt. Surfaced to the user via deploy logger / stdout so a + * quiet 5-minute wait isn't mistaken for a hang. + */ + onProgress?: (message: string) => void; + /** + * Optional abort signal. When triggered, cancels any pending sleep between + * retries and returns failure for the in-flight DS. Already-started jobs + * are not rolled back. + */ + signal?: AbortSignal; +} + +export interface ConcurrentRetryPolicy { + /** Max attempts per data source (1 = no retry). */ + maxAttempts: number; + /** Delay before each retry, in milliseconds. */ + delayMs: number; +} + +/** + * Bedrock allows only one in-flight ingestion job per knowledge base. When we + * fire StartIngestionJob for multi-DS KBs the second+ call usually races and + * gets rejected with ConflictException. Retry up to 4 times (~5min total at + * 75s intervals) which comfortably covers the average ingestion-job duration + * for small S3 sources. + */ +export const DEFAULT_CONCURRENT_RETRY_POLICY: ConcurrentRetryPolicy = { + maxAttempts: 5, + delayMs: 75_000, +}; + +function isConcurrentLimitError(err: unknown): boolean { + if (!err || typeof err !== 'object') return false; + const e = err as { name?: string; message?: string }; + if (e.name === 'ConflictException') return true; + return typeof e.message === 'string' && /concurrent ingestion jobs/i.test(e.message); +} + +/** + * Sleep that resolves early if the abort signal fires. Resolves cleanly in + * either case so the caller can decide whether to honour the abort by checking + * `signal.aborted` afterwards. + */ +function sleep(ms: number, signal?: AbortSignal): Promise { + if (signal?.aborted) return Promise.resolve(); + return new Promise(resolve => { + const timer = setTimeout(() => { + signal?.removeEventListener('abort', onAbort); + resolve(); + }, ms); + const onAbort = () => { + clearTimeout(timer); + resolve(); + }; + signal?.addEventListener('abort', onAbort, { once: true }); + }); +} + +export interface StartedIngestion { + dataSourceId: string; + uri: string; + ingestionJobId: string; +} + +export type RunKbIngestionResult = Result<{ startedJobs: StartedIngestion[] }, IngestionError>; + +/** + * Start ingestion for every data source on a deployed KB, returning the new + * ingestion-job IDs. + * + * Used by: + * - `agentcore run ingest --name X` for manual retry + * - The post-deploy auto-ingestion hook (when sourcesHash changed since last + * deploy) + * + * Pre-conditions: + * - The KB has been deployed (the deployed-state has a record for it). + * - At least one data source has been recorded in the deployed-state. + * + * Failure model: + * - Pre-flight failures (KB not deployed, no DSes) return success: false with + * errorSource overridden to 'user'. + * - Service failures bubble up the bedrock-agent error message; if any DS + * start fails, the function returns success: false and lists the failed + * DS(es) by ID. Successful starts are NOT rolled back. + */ +export async function runKbIngestionByName(input: RunKbIngestionInput): Promise { + const { knowledgeBaseName, deployedState, targetName, region, onProgress, signal } = input; + const retryPolicy = input.concurrentRetryPolicy ?? DEFAULT_CONCURRENT_RETRY_POLICY; + + const target = deployedState.targets[targetName]; + const deployed = target?.resources?.knowledgeBases?.[knowledgeBaseName]; + if (!deployed) { + return { + success: false, + error: new IngestionError( + `Knowledge base '${knowledgeBaseName}' has not been deployed to target '${targetName}'. Run 'agentcore deploy' first.`, + { errorSource: 'user' } + ), + }; + } + + if (deployed.dataSources.length === 0) { + return { + success: false, + error: new IngestionError(`Knowledge base '${knowledgeBaseName}' has no data sources to ingest.`, { + errorSource: 'user', + }), + }; + } + + let toIngest = deployed.dataSources; + if (input.dataSourceUri) { + toIngest = deployed.dataSources.filter(ds => ds.uri === input.dataSourceUri); + if (toIngest.length === 0) { + return { + success: false, + error: new IngestionError( + `Data source '${input.dataSourceUri}' not found on knowledge base '${knowledgeBaseName}'.`, + { errorSource: 'user' } + ), + }; + } + } + + const startedJobs: StartedIngestion[] = []; + const failures: { dataSourceId: string; uri: string; error: unknown }[] = []; + // Sequential to play nice with Bedrock's 1-concurrent-job-per-KB cap. When + // a start fails because a sibling job is still running, sleep and retry. + for (const ds of toIngest) { + if (signal?.aborted) { + failures.push({ + dataSourceId: ds.dataSourceId, + uri: ds.uri, + error: new Error('Aborted before start'), + }); + continue; + } + let lastError: unknown; + let started = false; + for (let attempt = 1; attempt <= retryPolicy.maxAttempts; attempt++) { + try { + const job = await startIngestionJob({ + region, + knowledgeBaseId: deployed.knowledgeBaseId, + dataSourceId: ds.dataSourceId, + }); + if (!job.ingestionJobId) { + lastError = new Error('No ingestionJobId in StartIngestionJob response'); + break; + } + startedJobs.push({ + dataSourceId: ds.dataSourceId, + uri: ds.uri, + ingestionJobId: job.ingestionJobId, + }); + started = true; + break; + } catch (err) { + lastError = err; + if (!isConcurrentLimitError(err) || attempt === retryPolicy.maxAttempts) break; + const delaySeconds = Math.round(retryPolicy.delayMs / 1000); + onProgress?.( + `${ds.dataSourceId} (${ds.uri}): another ingestion job is running on this KB, retry ${attempt}/${retryPolicy.maxAttempts - 1} in ${delaySeconds}s…` + ); + await sleep(retryPolicy.delayMs, signal); + if (signal?.aborted) { + lastError = new Error('Aborted while waiting for concurrent ingestion job to drain'); + break; + } + } + } + if (!started) { + failures.push({ dataSourceId: ds.dataSourceId, uri: ds.uri, error: lastError }); + } + } + + if (failures.length > 0) { + const detail = failures + .map(f => ` ${f.dataSourceId} (${f.uri}): ${f.error instanceof Error ? f.error.message : String(f.error)}`) + .join('\n'); + return { + success: false, + error: new IngestionError( + `Failed to start ingestion for ${failures.length} of ${toIngest.length} data sources:\n${detail}` + ), + }; + } + + return { success: true, startedJobs }; +} diff --git a/src/cli/operations/insights/index.ts b/src/cli/operations/insights/index.ts new file mode 100644 index 000000000..fabe6f3bc --- /dev/null +++ b/src/cli/operations/insights/index.ts @@ -0,0 +1,10 @@ +export { + saveInsightsRun, + loadInsightsRun, + listInsightsRuns, + deleteLocalInsightsRun, + updateInsightsRun, + INSIGHTS_DIR, +} from './insights-storage'; +export { runInsightsCommand } from './run-insights'; +export type { RunInsightsOptions, InsightsRunRecord, RunInsightsResult } from './types'; diff --git a/src/cli/operations/insights/insights-storage.ts b/src/cli/operations/insights/insights-storage.ts new file mode 100644 index 000000000..fb612e0a4 --- /dev/null +++ b/src/cli/operations/insights/insights-storage.ts @@ -0,0 +1,58 @@ +import { findConfigRoot } from '../../../lib'; +import type { InsightsRunRecord } from './types'; +import { existsSync, mkdirSync, readFileSync, readdirSync, rmSync, writeFileSync } from 'fs'; +import { join } from 'path'; + +export const INSIGHTS_DIR = 'insights'; + +function getInsightsDir(): string { + const configRoot = findConfigRoot(); + if (!configRoot) { + throw new Error('No agentcore project found. Run `agentcore create` first.'); + } + return join(configRoot, '.cli', INSIGHTS_DIR); +} + +export function saveInsightsRun(record: InsightsRunRecord): string { + const dir = getInsightsDir(); + mkdirSync(dir, { recursive: true }); + const id = record.batchEvaluationId; + const filePath = join(dir, `${id}.json`); + writeFileSync(filePath, JSON.stringify(record, null, 2)); + return filePath; +} + +export function loadInsightsRun(batchEvaluationId: string): InsightsRunRecord { + const dir = getInsightsDir(); + const jsonName = batchEvaluationId.endsWith('.json') ? batchEvaluationId : `${batchEvaluationId}.json`; + const filePath = join(dir, jsonName); + if (!existsSync(filePath)) { + throw new Error(`Insights run "${batchEvaluationId}" not found at ${filePath}`); + } + return JSON.parse(readFileSync(filePath, 'utf-8')) as InsightsRunRecord; +} + +export function listInsightsRuns(): InsightsRunRecord[] { + const dir = getInsightsDir(); + if (!existsSync(dir)) return []; + const files = readdirSync(dir) + .filter(f => f.endsWith('.json')) + .sort() + .reverse(); + return files.map(f => JSON.parse(readFileSync(join(dir, f), 'utf-8')) as InsightsRunRecord); +} + +export function deleteLocalInsightsRun(batchEvaluationId: string): boolean { + const dir = getInsightsDir(); + const filePath = join(dir, `${batchEvaluationId}.json`); + if (!existsSync(filePath)) return false; + rmSync(filePath); + return true; +} + +export function updateInsightsRun(batchEvaluationId: string, updates: Partial): void { + const record = loadInsightsRun(batchEvaluationId); + const updated: InsightsRunRecord = { ...record, ...updates }; + const dir = getInsightsDir(); + writeFileSync(join(dir, `${batchEvaluationId}.json`), JSON.stringify(updated, null, 2)); +} diff --git a/src/cli/operations/insights/run-insights.ts b/src/cli/operations/insights/run-insights.ts new file mode 100644 index 000000000..4b443e0d2 --- /dev/null +++ b/src/cli/operations/insights/run-insights.ts @@ -0,0 +1,231 @@ +/** + * Orchestrates running an Insights job: + * 1. Resolve agent from deployed state (for serviceNames / logGroupNames) + * 2. Build insights + dataSourceConfig + * 3. Call StartBatchEvaluation + * 4. Optionally poll GetBatchEvaluation until terminal status + * 5. Return results + */ +import { ConfigIO, ResourceNotFoundError, toError } from '../../../lib'; +import type { DeployedState } from '../../../schema'; +import type { CloudWatchFilterConfig, InsightConfig } from '../../aws/agentcore-batch-evaluation'; +import { generateClientToken, getBatchEvaluation, startBatchEvaluation } from '../../aws/agentcore-batch-evaluation'; +import { resolveEndpointName, runtimeLogGroup } from '../../aws/cloudwatch'; +import { getRegion } from '../../commands/shared/region-utils'; +import { ExecLogger } from '../../logging/exec-logger'; +import { saveInsightsRun, updateInsightsRun } from './insights-storage'; +import type { InsightsRunRecord, RunInsightsOptions, RunInsightsResult } from './types'; + +// ============================================================================ +// Constants +// ============================================================================ + +const DEFAULT_POLL_INTERVAL_MS = 5_000; +const DEFAULT_LOOKBACK_DAYS = 7; +const TERMINAL_STATUSES = new Set(['COMPLETED', 'FAILED', 'COMPLETED_WITH_ERRORS', 'STOPPED', 'CANCELLED']); + +// ============================================================================ +// Implementation +// ============================================================================ + +export async function runInsightsCommand(options: RunInsightsOptions): Promise { + let logger: ExecLogger | undefined; + try { + logger = new ExecLogger({ command: 'insights' }); + } catch { + // Non-fatal + } + + try { + // 1. Load project config + deployed state + logger?.startStep('Load project config'); + const configIO = new ConfigIO(); + const [projectSpec, deployedState] = await Promise.all([configIO.readProjectSpec(), configIO.readDeployedState()]); + + const region = await getRegion(options.region); + logger?.log(`Region: ${region}`); + logger?.endStep('success'); + + // 2. Build dataSourceConfig + logger?.startStep('Build data source config'); + let dataSourceConfig: { + cloudWatchLogs?: { + serviceNames: string[]; + logGroupNames: string[]; + filterConfig?: CloudWatchFilterConfig; + }; + onlineEvaluationConfigSource?: { onlineEvaluationConfigArn: string }; + }; + + if (options.onlineEvalConfigArn) { + // Online evaluation config source mode + dataSourceConfig = { + onlineEvaluationConfigSource: { onlineEvaluationConfigArn: options.onlineEvalConfigArn }, + }; + logger?.log(`Using onlineEvaluationConfigSource: ${options.onlineEvalConfigArn}`); + } else { + // CloudWatch logs mode — requires agent + if (!options.agent) { + const error = 'Agent name is required when not using --online-eval-config-arn'; + logger?.log(error, 'error'); + logger?.endStep('error', error); + logger?.finalize(false); + return { success: false, error: new ResourceNotFoundError(error), logFilePath: logger?.logFilePath }; + } + + const agentState = resolveAgentState(deployedState, options.agent); + if (!agentState) { + const error = `Agent "${options.agent}" not deployed. Run \`agentcore deploy\` first.`; + logger?.log(error, 'error'); + logger?.endStep('error', error); + logger?.finalize(false); + return { success: false, error: new ResourceNotFoundError(error), logFilePath: logger?.logFilePath }; + } + + const runtimeId = agentState.runtimeId; + const endpointName = resolveEndpointName(options.endpoint); + const serviceName = `${projectSpec.name}_${options.agent}.${endpointName}`; + const logGroupName = runtimeLogGroup(runtimeId, options.endpoint); + + logger?.log(`Agent: ${options.agent} (runtime: ${runtimeId})`); + logger?.log(`Service name: ${serviceName}`); + logger?.log(`Log group: ${logGroupName}`); + + // Build filterConfig from lookbackDays/startTime/endTime/sessionIds + const filterConfig = buildFilterConfig(options); + + dataSourceConfig = { + cloudWatchLogs: { + serviceNames: [serviceName], + logGroupNames: [logGroupName], + ...(filterConfig ? { filterConfig } : {}), + }, + }; + } + logger?.endStep('success'); + + // 3. Build insights array + const insights: InsightConfig[] = options.insights.map(id => ({ insightId: id })); + + // 4. Generate name if not provided + const dateStr = new Date().toISOString().slice(0, 10).replace(/-/g, ''); + const rand = Math.random().toString(36).slice(2, 8); + const name = options.name ?? `${options.agent ?? 'insights'}_insights_${dateStr}_${rand}`; + + // 5. Call startBatchEvaluation + logger?.startStep('Start insights job'); + options.onProgress?.('starting', `Starting insights job "${name}"...`); + + const evaluators = options.evaluators?.map(id => ({ evaluatorId: id })); + + const startPayload = { + region, + name, + insights, + ...(evaluators?.length && { evaluators }), + dataSourceConfig, + clientToken: generateClientToken(), + }; + + logger?.log(`Request payload:\n${JSON.stringify(startPayload, null, 2)}`); + const startResult = await startBatchEvaluation(startPayload); + logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); + logger?.endStep('success'); + + options.onProgress?.('running', `Insights job started (ID: ${startResult.batchEvaluationId})`); + options.onStarted?.({ batchEvaluationId: startResult.batchEvaluationId, region }); + + // 6. Save initial record + const record: InsightsRunRecord = { + batchEvaluationId: startResult.batchEvaluationId, + batchEvaluationArn: startResult.batchEvaluationArn, + name: startResult.name, + status: startResult.status, + region, + createdAt: startResult.createdAt, + insights: options.insights, + agent: options.agent, + }; + saveInsightsRun(record); + + // 7. If wait mode — poll until terminal + if (options.wait) { + logger?.startStep('Poll for completion'); + const pollInterval = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; + + while (!TERMINAL_STATUSES.has(record.status)) { + await sleep(pollInterval); + + const current = await getBatchEvaluation({ + region, + batchEvaluationId: startResult.batchEvaluationId, + }); + + record.status = current.status; + if (current.evaluationResults) { + record.sessionCount = current.evaluationResults.totalNumberOfSessions; + record.sessionsCompleted = current.evaluationResults.numberOfSessionsCompleted; + record.sessionsFailed = current.evaluationResults.numberOfSessionsFailed; + } + if (TERMINAL_STATUSES.has(current.status)) { + record.completedAt = current.updatedAt; + } + updateInsightsRun(startResult.batchEvaluationId, record); + options.onProgress?.(current.status, `Status: ${current.status}`); + logger?.log(`Poll status: ${current.status}`); + } + logger?.endStep('success'); + } + + logger?.finalize(true); + + // 8. Return result + return { + success: true, + batchEvaluationId: record.batchEvaluationId, + batchEvaluationArn: record.batchEvaluationArn, + name: record.name, + status: record.status, + region, + sessionCount: record.sessionCount, + sessionsCompleted: record.sessionsCompleted, + sessionsFailed: record.sessionsFailed, + logFilePath: logger?.logFilePath, + }; + } catch (err) { + const error = err instanceof Error ? err.message : String(err); + logger?.log(error, 'error'); + logger?.finalize(false); + return { success: false, error: toError(err), logFilePath: logger?.logFilePath }; + } +} + +// ============================================================================ +// Helpers +// ============================================================================ + +function resolveAgentState( + deployedState: DeployedState, + agentName: string +): { runtimeId: string; runtimeArn: string; roleArn?: string } | undefined { + for (const target of Object.values(deployedState.targets)) { + const agent = target.resources?.runtimes?.[agentName]; + if (agent) return agent; + } + return undefined; +} + +function buildFilterConfig(options: RunInsightsOptions): CloudWatchFilterConfig | undefined { + if (options.sessionIds && options.sessionIds.length > 0) { + return { sessionIds: options.sessionIds }; + } + // Use explicit startTime/endTime if provided, otherwise fall back to lookbackDays + const lookbackDays = options.lookbackDays ?? DEFAULT_LOOKBACK_DAYS; + const endTime = options.endTime ?? new Date().toISOString(); + const startTime = options.startTime ?? new Date(Date.now() - lookbackDays * 24 * 60 * 60 * 1000).toISOString(); + return { timeRange: { startTime, endTime } }; +} + +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/src/cli/operations/insights/types.ts b/src/cli/operations/insights/types.ts new file mode 100644 index 000000000..18267437e --- /dev/null +++ b/src/cli/operations/insights/types.ts @@ -0,0 +1,46 @@ +import type { Result } from '../../../lib/result'; + +export interface RunInsightsOptions { + agent?: string; + insights: string[]; + /** Optional evaluator — required if chaining into `run recommendation --from-insights` */ + evaluators?: string[]; + onlineEvalConfigArn?: string; + lookbackDays?: number; + startTime?: string; + endTime?: string; + sessionIds?: string[]; + name?: string; + region?: string; + endpoint?: string; + wait?: boolean; + pollIntervalMs?: number; + onProgress?: (status: string, message: string) => void; + onStarted?: (info: { batchEvaluationId: string; region: string }) => void; +} + +export interface InsightsRunRecord { + batchEvaluationId: string; + batchEvaluationArn: string; + name: string; + status: string; + region: string; + createdAt?: string; + completedAt?: string; + insights: string[]; + agent?: string; + sessionCount?: number; + sessionsCompleted?: number; + sessionsFailed?: number; +} + +export type RunInsightsResult = Result<{ + batchEvaluationId: string; + batchEvaluationArn: string; + name: string; + status: string; + region: string; + sessionCount?: number; + sessionsCompleted?: number; + sessionsFailed?: number; +}> & { logFilePath?: string }; diff --git a/src/cli/operations/invoke/__tests__/resolve-agent-context.test.ts b/src/cli/operations/invoke/__tests__/resolve-agent-context.test.ts index 67f360ee1..df6c83fa2 100644 --- a/src/cli/operations/invoke/__tests__/resolve-agent-context.test.ts +++ b/src/cli/operations/invoke/__tests__/resolve-agent-context.test.ts @@ -12,6 +12,7 @@ const mockProject = { managedBy: 'CDK' as const, runtimes: [{ name: 'MyAgent', build: 'CodeZip' as const, entrypoint: 'main.py', codeLocation: 'app/MyAgent/' }], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], diff --git a/src/cli/operations/jobs/ab-test/__tests__/build-options.test.ts b/src/cli/operations/jobs/ab-test/__tests__/build-options.test.ts new file mode 100644 index 000000000..eee9897cc --- /dev/null +++ b/src/cli/operations/jobs/ab-test/__tests__/build-options.test.ts @@ -0,0 +1,190 @@ +import type { DeployedResourceState } from '../../../../../schema'; +import type { StartABTestJobOptions } from '../../shared/types'; +import { buildABTestRequest } from '../build-options'; +import { describe, expect, it } from 'vitest'; + +const deployed: DeployedResourceState = { + configBundles: { + promptA: { bundleId: 'b-a', bundleArn: 'arn:aws:bedrock-agentcore:us-east-1:1:config-bundle/a', versionId: 'v7' }, + promptB: { bundleId: 'b-b', bundleArn: 'arn:aws:bedrock-agentcore:us-east-1:1:config-bundle/b', versionId: 'v3' }, + }, + onlineEvalConfigs: { + quality: { + onlineEvaluationConfigId: 'oe-1', + onlineEvaluationConfigArn: 'arn:aws:bedrock-agentcore:us-east-1:1:online-evaluation-config/q', + }, + }, +}; + +function baseOpts(overrides: Partial): StartABTestJobOptions { + return { + name: 't', + mode: 'config-bundle', + gateway: 'gw', + controlWeight: 50, + treatmentWeight: 50, + ...overrides, + }; +} + +describe('buildABTestRequest', () => { + describe('config-bundle mode', () => { + it('resolves bundle names to ARNs and LATEST to the deployed versionId', () => { + const built = buildABTestRequest( + baseOpts({ + controlBundle: 'promptA', + controlVersion: 'LATEST', + treatmentBundle: 'promptB', + treatmentVersion: 'v9', + onlineEval: 'quality', + }), + deployed + ); + + expect(built.variants).toHaveLength(2); + expect(built.variants[0]).toMatchObject({ + name: 'C', + weight: 50, + variantConfiguration: { + configurationBundle: { bundleArn: deployed.configBundles!.promptA!.bundleArn, bundleVersion: 'v7' }, + }, + }); + expect(built.variants[1]!.variantConfiguration.configurationBundle).toEqual({ + bundleArn: deployed.configBundles!.promptB!.bundleArn, + bundleVersion: 'v9', // explicit version is not expanded + }); + expect(built.evaluationConfig).toEqual({ + onlineEvaluationConfigArn: deployed.onlineEvalConfigs!.quality!.onlineEvaluationConfigArn, + }); + expect(built.variantSummaries[0]).toMatchObject({ name: 'C', bundleVersion: 'v7' }); + }); + + it('throws when a required bundle field is missing', () => { + expect(() => + buildABTestRequest(baseOpts({ controlBundle: 'promptA', onlineEval: 'quality' }), deployed) + ).toThrow(); + }); + + it('throws when the online-eval config is missing', () => { + expect(() => + buildABTestRequest( + baseOpts({ + controlBundle: 'promptA', + controlVersion: 'a1b2c3d4-e5f6-7890-abcd-ef1234567890', + treatmentBundle: 'promptB', + treatmentVersion: 'a1b2c3d4-e5f6-7890-abcd-ef1234567891', + }), + deployed + ) + ).toThrow(); + }); + }); + + describe('target-based mode', () => { + it('uses target names as-is and builds per-variant eval config', () => { + const built = buildABTestRequest( + baseOpts({ + mode: 'target-based', + controlTarget: 'ctrl', + treatmentTarget: 'treat', + controlOnlineEval: 'quality', + treatmentOnlineEval: 'quality', + }), + deployed + ); + expect(built.variants[0]!.variantConfiguration.target).toEqual({ name: 'ctrl' }); + expect(built.variants[1]!.variantConfiguration.target).toEqual({ name: 'treat' }); + }); + + it('builds per-variant eval config from control + treatment evals', () => { + const built = buildABTestRequest( + baseOpts({ + mode: 'target-based', + controlTarget: 'ctrl', + treatmentTarget: 'treat', + controlOnlineEval: 'quality', + treatmentOnlineEval: 'quality', + }), + deployed + ); + expect(built.evaluationConfig).toHaveProperty('perVariantOnlineEvaluationConfig'); + const perVariant = (built.evaluationConfig as { perVariantOnlineEvaluationConfig: unknown[] }) + .perVariantOnlineEvaluationConfig; + expect(perVariant).toHaveLength(2); + }); + + it('throws when control online eval is missing', () => { + expect(() => + buildABTestRequest( + baseOpts({ + mode: 'target-based', + controlTarget: 'ctrl', + treatmentTarget: 'treat', + treatmentOnlineEval: 'quality', + }), + deployed + ) + ).toThrow(/control-online-eval/); + }); + + it('throws when both evals are missing', () => { + expect(() => + buildABTestRequest( + baseOpts({ mode: 'target-based', controlTarget: 'ctrl', treatmentTarget: 'treat' }), + deployed + ) + ).toThrow(/control-online-eval/); + }); + }); + + describe('gateway filter', () => { + const configBundleOpts = (gatewayFilter?: string) => + baseOpts({ + controlBundle: 'promptA', + controlVersion: 'a1b2c3d4-e5f6-7890-abcd-ef1234567890', + treatmentBundle: 'promptB', + treatmentVersion: 'a1b2c3d4-e5f6-7890-abcd-ef1234567891', + onlineEval: 'quality', + gatewayFilter, + }); + + const targetBasedOpts = (gatewayFilter?: string) => + baseOpts({ + mode: 'target-based', + controlTarget: 'ctrl', + treatmentTarget: 'treat', + controlOnlineEval: 'quality', + treatmentOnlineEval: 'quality', + gatewayFilter, + }); + + it('sets a single target path in config-bundle mode', () => { + const built = buildABTestRequest(configBundleOpts('/orders/*'), deployed); + expect(built.gatewayFilter).toEqual({ targetPaths: ['/orders/*'] }); + }); + + it('sets a single target path in target-based mode', () => { + const built = buildABTestRequest(targetBasedOpts('/orders/*'), deployed); + expect(built.gatewayFilter).toEqual({ targetPaths: ['/orders/*'] }); + }); + + it('trims whitespace around the single path', () => { + const built = buildABTestRequest(targetBasedOpts(' /orders/* '), deployed); + expect(built.gatewayFilter).toEqual({ targetPaths: ['/orders/*'] }); + }); + + it('throws naming the exact paths when more than one is given', () => { + expect(() => buildABTestRequest(targetBasedOpts('/a,/b'), deployed)).toThrow( + /exactly one target path.*Got 2: "\/a", "\/b"/ + ); + expect(() => buildABTestRequest(configBundleOpts('/orders/*, /refunds/*'), deployed)).toThrow( + /Got 2: "\/orders\/\*", "\/refunds\/\*"/ + ); + }); + + it('omits gatewayFilter when not provided in either mode', () => { + expect(buildABTestRequest(configBundleOpts(), deployed).gatewayFilter).toBeUndefined(); + expect(buildABTestRequest(targetBasedOpts(), deployed).gatewayFilter).toBeUndefined(); + }); + }); +}); diff --git a/src/cli/operations/jobs/ab-test/__tests__/format.test.ts b/src/cli/operations/jobs/ab-test/__tests__/format.test.ts new file mode 100644 index 000000000..62b3fc49f --- /dev/null +++ b/src/cli/operations/jobs/ab-test/__tests__/format.test.ts @@ -0,0 +1,47 @@ +import type { ABTestJobRecord } from '../../shared/types'; +import { printABTestDetail } from '../format'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +function baseRecord(overrides: Partial = {}): ABTestJobRecord { + return { + type: 'ab-test', + id: 'abt-123', + arn: 'arn:aws:bedrock-agentcore:us-east-1:1:ab-test/abt-123', + status: 'ACTIVE', + lifecycleStatus: 'RUNNING', + createdAt: '2026-01-01T00:00:00.000Z', + agent: 'MyAgent', + name: 'MyTest', + mode: 'target-based', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:1:gateway/gw-abc', + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl' }, + { name: 'T1', weight: 50, targetName: 'treat' }, + ], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:aws:bedrock-agentcore:us-east-1:1:online-evaluation-config/q' }, + ...overrides, + }; +} + +describe('printABTestDetail — gateway filter', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + function capture(record: ABTestJobRecord): string { + const spy = vi.spyOn(console, 'log').mockImplementation(vi.fn()); + printABTestDetail(record); + const output = spy.mock.calls.map(c => c.join(' ')).join('\n'); + return output; + } + + it('renders the filter path when a gatewayFilter is present', () => { + const output = capture(baseRecord({ gatewayFilter: { targetPaths: ['/orders/*'] } })); + expect(output).toContain('Gateway filter: /orders/*'); + }); + + it('renders "none" when no gatewayFilter is present', () => { + const output = capture(baseRecord()); + expect(output).toContain('Gateway filter: none'); + }); +}); diff --git a/src/cli/operations/jobs/ab-test/__tests__/promote.test.ts b/src/cli/operations/jobs/ab-test/__tests__/promote.test.ts new file mode 100644 index 000000000..83b88c193 --- /dev/null +++ b/src/cli/operations/jobs/ab-test/__tests__/promote.test.ts @@ -0,0 +1,434 @@ +import type { ABTestJobRecord } from '../../shared/types'; +import { promoteABTestConfig } from '../promote'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// Mock ConfigIO — vi.hoisted ensures these are available before the hoisted vi.mock runs +const { mockReadProjectSpec, mockWriteProjectSpec, mockReadDeployedState, mockGetConfigurationBundleVersion } = + vi.hoisted(() => ({ + mockReadProjectSpec: vi.fn(), + mockWriteProjectSpec: vi.fn(), + mockReadDeployedState: vi.fn(), + mockGetConfigurationBundleVersion: vi.fn(), + })); + +vi.mock('../../../../../lib', () => { + class MockConfigIO { + readProjectSpec = mockReadProjectSpec; + writeProjectSpec = mockWriteProjectSpec; + readDeployedState = mockReadDeployedState; + } + return { ConfigIO: MockConfigIO }; +}); + +vi.mock('../../../../aws/agentcore-config-bundles', () => ({ + getConfigurationBundleVersion: mockGetConfigurationBundleVersion, +})); + +// --------------------------------------------------------------------------- +// Helpers — promote is now RECORD-DRIVEN: it reads the job record's variants, +// not project.abTests[] (which the jobs model never populates). +// --------------------------------------------------------------------------- + +function baseRecord(overrides: Partial): ABTestJobRecord { + return { + type: 'ab-test', + id: 'ab-123', + arn: 'arn:aws:bedrock-agentcore:us-east-1:1:ab-test/ab-123', + status: 'STOPPED', + lifecycleStatus: 'STOPPED', + createdAt: '2026-01-01T00:00:00Z', + agent: 'my-agent', + name: 'myTest', + mode: 'config-bundle', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:1:gateway/my-gw', + variants: [], + evaluationConfig: { onlineEvaluationConfigArn: 'arn:aws:eval:config' }, + ...overrides, + }; +} + +function makeTargetBasedProject() { + return { + name: 'TestProject', + runtimes: [ + { + name: 'my-runtime', + endpoints: { + control: { version: 1 }, + treatment: { version: 2 }, + }, + }, + ], + agentCoreGateways: [ + { + name: 'my-gw', + targets: [ + { + name: 'ctrl-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-runtime', runtimeEndpoint: 'control' }, + }, + { + name: 'treat-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-runtime', runtimeEndpoint: 'treatment' }, + }, + ], + }, + ], + onlineEvalConfigs: [], + configBundles: [], + abTests: [], + }; +} + +// A config-bundle A/B test promotes between two VERSIONS of ONE bundle, so both variants share the +// same bundleArn; only bundleVersion differs. +const BUNDLE_ARN = 'arn:aws:bedrock-agentcore:us-east-1:1:configuration-bundle/promptBundle-abc123'; + +function makeConfigBundleProject() { + return { + name: 'TestProject', + runtimes: [], + agentCoreGateways: [], + onlineEvalConfigs: [], + configBundles: [ + { + name: 'promptBundle', + type: 'ConfigurationBundle', + components: { '{{runtime:r}}': { configuration: { systemPrompt: 'OLD' } } }, + }, + ], + abTests: [], + }; +} + +function makeBundleDeployedState() { + return { + targets: { + default: { + resources: { + configBundles: { + promptBundle: { bundleId: 'promptBundle-abc123', bundleArn: BUNDLE_ARN, versionId: 'v1' }, + }, + }, + }, + }, + }; +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe('promoteABTestConfig (record-driven)', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockWriteProjectSpec.mockResolvedValue(undefined); + }); + + describe('target-based promote', () => { + it('bumps control endpoint version to treatment version', async () => { + mockReadProjectSpec.mockResolvedValue(makeTargetBasedProject()); + + const record = baseRecord({ + mode: 'target-based', + gatewayName: 'my-gw', + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl-target' }, + { name: 'T1', weight: 50, targetName: 'treat-target' }, + ], + }); + + const result = await promoteABTestConfig(record); + + expect(result.promoted).toBe(true); + expect(result.mode).toBe('target-based'); + expect(result.promotionDetail).toContain('control'); + const written = mockWriteProjectSpec.mock.calls[0]![0]; + expect(written.runtimes[0].endpoints.control.version).toBe(2); + }); + + it('repoints control to the treatment runtime when variants target different runtimes', async () => { + // Control → runtime-a (endpoint prod), Treatment → runtime-b (endpoint prod). No shared + // runtime to version-bump, so promote clones treatment's httpRuntime onto the control target. + const project = { + name: 'TestProject', + runtimes: [ + { name: 'runtime-a', endpoints: { prod: { version: 1 } } }, + { name: 'runtime-b', endpoints: { prod: { version: 5 } } }, + ], + agentCoreGateways: [ + { + name: 'my-gw', + targets: [ + { + name: 'ctrl-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'runtime-a', runtimeEndpoint: 'prod' }, + }, + { + name: 'treat-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'runtime-b', runtimeEndpoint: 'prod' }, + }, + ], + }, + ], + onlineEvalConfigs: [], + configBundles: [], + abTests: [], + }; + mockReadProjectSpec.mockResolvedValue(project); + + const record = baseRecord({ + mode: 'target-based', + gatewayName: 'my-gw', + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl-target' }, + { name: 'T1', weight: 50, targetName: 'treat-target' }, + ], + }); + + const result = await promoteABTestConfig(record); + + expect(result.promoted).toBe(true); + const written = mockWriteProjectSpec.mock.calls[0]![0]; + const ctrl = written.agentCoreGateways[0].targets.find((t: { name: string }) => t.name === 'ctrl-target'); + expect(ctrl.httpRuntime.runtime).toBe('runtime-b'); + expect(ctrl.httpRuntime.runtimeEndpoint).toBe('prod'); + }); + + it('repoints control when variants use the default (unnamed) endpoint', async () => { + // Neither target names a runtimeEndpoint → no endpoints[name].version to bump → repoint path. + const project = { + name: 'TestProject', + runtimes: [ + { name: 'runtime-a', endpoints: {} }, + { name: 'runtime-b', endpoints: {} }, + ], + agentCoreGateways: [ + { + name: 'my-gw', + targets: [ + { name: 'ctrl-target', targetType: 'httpRuntime', httpRuntime: { runtime: 'runtime-a' } }, + { name: 'treat-target', targetType: 'httpRuntime', httpRuntime: { runtime: 'runtime-b' } }, + ], + }, + ], + onlineEvalConfigs: [], + configBundles: [], + abTests: [], + }; + mockReadProjectSpec.mockResolvedValue(project); + + const record = baseRecord({ + mode: 'target-based', + gatewayName: 'my-gw', + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl-target' }, + { name: 'T1', weight: 50, targetName: 'treat-target' }, + ], + }); + + const result = await promoteABTestConfig(record); + + expect(result.promoted).toBe(true); + const written = mockWriteProjectSpec.mock.calls[0]![0]; + const ctrl = written.agentCoreGateways[0].targets.find((t: { name: string }) => t.name === 'ctrl-target'); + expect(ctrl.httpRuntime.runtime).toBe('runtime-b'); + }); + + it('returns promoted=false when the gateway name is missing from the record', async () => { + mockReadProjectSpec.mockResolvedValue(makeTargetBasedProject()); + const record = baseRecord({ + mode: 'target-based', + gatewayName: undefined, + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl-target' }, + { name: 'T1', weight: 50, targetName: 'treat-target' }, + ], + }); + + const result = await promoteABTestConfig(record); + expect(result.promoted).toBe(false); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + }); + + describe('config-bundle promote', () => { + it('adopts the winning (treatment) version components into the bundle (same bundle, diff version)', async () => { + mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject()); + mockReadDeployedState.mockResolvedValue(makeBundleDeployedState()); + // The service returns the treatment version's components. + mockGetConfigurationBundleVersion.mockResolvedValue({ + components: { '{{runtime:r}}': { configuration: { systemPrompt: 'NEW' } } }, + }); + + const record = baseRecord({ + mode: 'config-bundle', + variants: [ + { name: 'C', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v1' }, + { name: 'T1', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v2' }, + ], + }); + + const result = await promoteABTestConfig(record); + + expect(result.promoted).toBe(true); + expect(result.mode).toBe('config-bundle'); + // Fetched the WINNING (treatment) version v2 from the bundle id parsed from the ARN. + expect(mockGetConfigurationBundleVersion).toHaveBeenCalledWith( + expect.objectContaining({ bundleId: 'promptBundle-abc123', versionId: 'v2' }) + ); + const written = mockWriteProjectSpec.mock.calls[0]![0]; + const bundle = written.configBundles.find((b: { name: string }) => b.name === 'promptBundle'); + expect(bundle.components['{{runtime:r}}'].configuration.systemPrompt).toBe('NEW'); + }); + + it('returns promoted=false (error) when control and treatment are DIFFERENT bundles', async () => { + mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject()); + mockReadDeployedState.mockResolvedValue(makeBundleDeployedState()); + + const record = baseRecord({ + mode: 'config-bundle', + variants: [ + { + name: 'C', + weight: 50, + bundleArn: 'arn:aws:bedrock-agentcore:us-east-1:1:configuration-bundle/bundleA', + bundleVersion: 'v1', + }, + { + name: 'T1', + weight: 50, + bundleArn: 'arn:aws:bedrock-agentcore:us-east-1:1:configuration-bundle/bundleB', + bundleVersion: 'v1', + }, + ], + }); + + const result = await promoteABTestConfig(record); + expect(result.promoted).toBe(false); + expect(result.promotionDetail).toContain('different config bundles'); + expect(mockGetConfigurationBundleVersion).not.toHaveBeenCalled(); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + + it('returns promoted=false when the bundle cannot be resolved from deployed state', async () => { + mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject()); + mockReadDeployedState.mockResolvedValue({ targets: { default: { resources: { configBundles: {} } } } }); + + const record = baseRecord({ + mode: 'config-bundle', + variants: [ + { name: 'C', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v1' }, + { name: 'T1', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v2' }, + ], + }); + + const result = await promoteABTestConfig(record); + expect(result.promoted).toBe(false); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + }); + + describe('malformed record', () => { + it('returns promoted=false when control/treatment variants are missing', async () => { + mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject()); + const record = baseRecord({ mode: 'config-bundle', variants: [] }); + + const result = await promoteABTestConfig(record); + expect(result.promoted).toBe(false); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + }); + + // BUG-4: promote must validate the winner is applicable BEFORE stopping the test. The dry-run + // path returns the same promoted/detail as a real run but never writes agentcore.json. + describe('dry run (pre-stop preflight)', () => { + it('returns promoted=true without writing for a valid target-based promote', async () => { + mockReadProjectSpec.mockResolvedValue(makeTargetBasedProject()); + const record = baseRecord({ + mode: 'target-based', + gatewayName: 'my-gw', + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl-target' }, + { name: 'T1', weight: 50, targetName: 'treat-target' }, + ], + }); + + const result = await promoteABTestConfig(record, true); + expect(result.promoted).toBe(true); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + + it('returns promoted=false without writing when a target is missing its httpRuntime entirely', async () => { + // No httpRuntime.runtime on either target → nothing to copy from/to → not promotable. + const project = makeTargetBasedProject(); + for (const gw of project.agentCoreGateways) { + for (const t of gw.targets) { + delete (t as { httpRuntime?: unknown }).httpRuntime; + } + } + mockReadProjectSpec.mockResolvedValue(project); + const record = baseRecord({ + mode: 'target-based', + gatewayName: 'my-gw', + variants: [ + { name: 'C', weight: 50, targetName: 'ctrl-target' }, + { name: 'T1', weight: 50, targetName: 'treat-target' }, + ], + }); + + const result = await promoteABTestConfig(record, true); + expect(result.promoted).toBe(false); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + + it('returns promoted=true without writing (or fetching) for a valid config-bundle promote', async () => { + mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject()); + mockReadDeployedState.mockResolvedValue(makeBundleDeployedState()); + const record = baseRecord({ + mode: 'config-bundle', + variants: [ + { name: 'C', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v1' }, + { name: 'T1', weight: 50, bundleArn: BUNDLE_ARN, bundleVersion: 'v2' }, + ], + }); + + const result = await promoteABTestConfig(record, true); + expect(result.promoted).toBe(true); + // dry-run must not touch the service or write the spec + expect(mockGetConfigurationBundleVersion).not.toHaveBeenCalled(); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + + it('returns promoted=false without writing when control/treatment are different bundles (dry-run)', async () => { + mockReadProjectSpec.mockResolvedValue(makeConfigBundleProject()); + mockReadDeployedState.mockResolvedValue(makeBundleDeployedState()); + const record = baseRecord({ + mode: 'config-bundle', + variants: [ + { + name: 'C', + weight: 50, + bundleArn: 'arn:aws:bedrock-agentcore:us-east-1:1:configuration-bundle/bundleA', + bundleVersion: 'v1', + }, + { + name: 'T1', + weight: 50, + bundleArn: 'arn:aws:bedrock-agentcore:us-east-1:1:configuration-bundle/bundleB', + bundleVersion: 'v1', + }, + ], + }); + + const result = await promoteABTestConfig(record, true); + expect(result.promoted).toBe(false); + expect(result.promotionDetail).toContain('different config bundles'); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/src/cli/operations/jobs/ab-test/build-options.ts b/src/cli/operations/jobs/ab-test/build-options.ts new file mode 100644 index 000000000..651f68d65 --- /dev/null +++ b/src/cli/operations/jobs/ab-test/build-options.ts @@ -0,0 +1,158 @@ +/** + * Build the CreateABTest request (variants + eval config + filters) and the persisted + * variant summaries from the engine-facing StartABTestJobOptions. ARN/name resolution runs + * against deployed state so a user can pass bundle/target/eval NAMES on the command line. + */ +import { ResourceNotFoundError, ValidationError } from '../../../../lib'; +import type { DeployedResourceState } from '../../../../schema'; +import type { ABTestEvaluationConfig, ABTestVariant, GatewayFilter } from '../../../aws/agentcore-ab-tests'; +import type { ABTestVariantSummary, StartABTestJobOptions } from '../shared/types'; +import { resolveConfigBundleArn, resolveConfigBundleVersion, resolveOnlineEvalArn } from './resolve'; + +export interface BuiltABTestRequest { + variants: ABTestVariant[]; + evaluationConfig: ABTestEvaluationConfig; + gatewayFilter?: GatewayFilter; + /** Resolved summaries persisted on the record for display. */ + variantSummaries: ABTestVariantSummary[]; +} + +/** Resolve a gateway-target name. The L3 CDK construct deploys targets by their spec name as-is. */ +function resolveTargetName(targetName: string): string { + return targetName; +} + +/** + * Assemble the AB-test create request from start options. Throws ValidationError when a + * mode's required inputs are missing (caught by the handler → `{ success: false }`). + */ +export function buildABTestRequest( + opts: StartABTestJobOptions, + deployedResources?: DeployedResourceState +): BuiltABTestRequest { + const variants: ABTestVariant[] = []; + const variantSummaries: ABTestVariantSummary[] = []; + let evaluationConfig: ABTestEvaluationConfig; + let gatewayFilter: GatewayFilter | undefined; + + if (opts.mode === 'config-bundle') { + if (!opts.controlBundle || !opts.controlVersion || !opts.treatmentBundle || !opts.treatmentVersion) { + throw new ValidationError('config-bundle A/B test requires control and treatment bundle names and versions.'); + } + if (!opts.onlineEval) { + throw new ValidationError('config-bundle A/B test requires an online-eval config.'); + } + + const controlArn = resolveConfigBundleArn(opts.controlBundle, deployedResources); + const controlVer = resolveConfigBundleVersion(opts.controlBundle, opts.controlVersion, deployedResources); + const treatmentArn = resolveConfigBundleArn(opts.treatmentBundle, deployedResources); + const treatmentVer = resolveConfigBundleVersion(opts.treatmentBundle, opts.treatmentVersion, deployedResources); + + if (!controlArn) { + throw new ResourceNotFoundError( + `Config bundle "${opts.controlBundle}" is not deployed. Run \`agentcore add config-bundle\` and \`agentcore deploy\` first.` + ); + } + if (!controlVer) { + throw new ResourceNotFoundError( + `Could not resolve version "${opts.controlVersion}" for config bundle "${opts.controlBundle}". Use LATEST or a version UUID (e.g. a1b2c3d4-e5f6-7890-abcd-ef1234567890).` + ); + } + if (!treatmentArn) { + throw new ResourceNotFoundError( + `Config bundle "${opts.treatmentBundle}" is not deployed. Run \`agentcore add config-bundle\` and \`agentcore deploy\` first.` + ); + } + if (!treatmentVer) { + throw new ResourceNotFoundError( + `Could not resolve version "${opts.treatmentVersion}" for config bundle "${opts.treatmentBundle}". Use LATEST or a version UUID (e.g. a1b2c3d4-e5f6-7890-abcd-ef1234567890).` + ); + } + + variants.push( + { + name: 'C', + weight: opts.controlWeight, + variantConfiguration: { configurationBundle: { bundleArn: controlArn, bundleVersion: controlVer } }, + }, + { + name: 'T1', + weight: opts.treatmentWeight, + variantConfiguration: { configurationBundle: { bundleArn: treatmentArn, bundleVersion: treatmentVer } }, + } + ); + variantSummaries.push( + { name: 'C', weight: opts.controlWeight, bundleArn: controlArn, bundleVersion: controlVer }, + { name: 'T1', weight: opts.treatmentWeight, bundleArn: treatmentArn, bundleVersion: treatmentVer } + ); + + const onlineEvalArn = resolveOnlineEvalArn(opts.onlineEval, deployedResources); + if (!onlineEvalArn) { + throw new ResourceNotFoundError( + `Online-eval config "${opts.onlineEval}" is not deployed. Run \`agentcore add online-eval\` and \`agentcore deploy\` first.` + ); + } + evaluationConfig = { onlineEvaluationConfigArn: onlineEvalArn }; + } else { + // target-based + if (!opts.controlTarget || !opts.treatmentTarget) { + throw new ValidationError('target-based A/B test requires control and treatment target names.'); + } + + const controlName = resolveTargetName(opts.controlTarget); + const treatmentName = resolveTargetName(opts.treatmentTarget); + + variants.push( + { name: 'C', weight: opts.controlWeight, variantConfiguration: { target: { name: controlName } } }, + { name: 'T1', weight: opts.treatmentWeight, variantConfiguration: { target: { name: treatmentName } } } + ); + variantSummaries.push( + { name: 'C', weight: opts.controlWeight, targetName: controlName }, + { name: 'T1', weight: opts.treatmentWeight, targetName: treatmentName } + ); + + // Target-based mode always requires per-variant eval configs (each scoped to its endpoint). + if (!opts.controlOnlineEval || !opts.treatmentOnlineEval) { + throw new ValidationError( + 'target-based A/B test requires --control-online-eval and --treatment-online-eval (one per endpoint).' + ); + } + const controlEvalArn = resolveOnlineEvalArn(opts.controlOnlineEval, deployedResources); + if (!controlEvalArn) { + throw new ResourceNotFoundError( + `Online-eval config "${opts.controlOnlineEval}" (--control-online-eval) is not deployed. Run \`agentcore add online-eval\` and \`agentcore deploy\` first.` + ); + } + const treatmentEvalArn = resolveOnlineEvalArn(opts.treatmentOnlineEval, deployedResources); + if (!treatmentEvalArn) { + throw new ResourceNotFoundError( + `Online-eval config "${opts.treatmentOnlineEval}" (--treatment-online-eval) is not deployed. Run \`agentcore add online-eval\` and \`agentcore deploy\` first.` + ); + } + evaluationConfig = { + perVariantOnlineEvaluationConfig: [ + { name: 'C', onlineEvaluationConfigArn: controlEvalArn }, + { name: 'T1', onlineEvaluationConfigArn: treatmentEvalArn }, + ], + }; + } + + // Gateway filter applies to BOTH modes. The service allows exactly one target path; reject more. + if (opts.gatewayFilter) { + const targetPaths = opts.gatewayFilter + .split(',') + .map(s => s.trim()) + .filter(Boolean); + if (targetPaths.length > 1) { + throw new ValidationError( + `--gateway-filter accepts exactly one target path pattern (the service allows a single path). ` + + `Got ${targetPaths.length}: ${targetPaths.map(p => `"${p}"`).join(', ')}.` + ); + } + if (targetPaths.length === 1) { + gatewayFilter = { targetPaths: [targetPaths[0]!] }; + } + } + + return { variants, evaluationConfig, gatewayFilter, variantSummaries }; +} diff --git a/src/cli/operations/jobs/ab-test/format.ts b/src/cli/operations/jobs/ab-test/format.ts new file mode 100644 index 000000000..d11f66370 --- /dev/null +++ b/src/cli/operations/jobs/ab-test/format.ts @@ -0,0 +1,85 @@ +/** Presentation helpers for A/B-test job CLI output (history table + detail view). */ +import { dnsSuffix } from '../../../aws/partition'; +import { formatJobDate } from '../shared/format'; +import type { ABTestJobRecord } from '../shared/types'; + +/** + * Derive the gateway invocation URL from the stored gateway ARN. + * Target-based: `https://{gateway}/{control-target-name}/invocations`. + * Config-bundle: `https://{gateway}/{agent-name}/invocations`. + */ +export function getInvocationUrl(record: ABTestJobRecord): string | undefined { + const parts = record.gatewayArn.split(':'); + const region = parts[3]; + const gatewayId = parts[5]?.split('/')[1]; + if (!region || !gatewayId) return undefined; + const baseUrl = `https://${gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}`; + if (record.mode === 'target-based') { + const targetName = record.variants[0]?.targetName; + return targetName ? `${baseUrl}/${targetName}/invocations` : undefined; + } + return record.agent ? `${baseUrl}/${record.agent}/invocations` : undefined; +} + +export function printABTestHistory(records: ABTestJobRecord[]): void { + if (records.length === 0) { + console.log('No A/B test jobs found. Run `agentcore run ab-test` to create one.'); + return; + } + console.log( + `\n${'Date'.padEnd(22)} ${'Execution'.padEnd(12)} ${'Lifecycle'.padEnd(12)} ${'Name'.padEnd(24)} ${'ID'}` + ); + console.log('─'.repeat(100)); + for (const r of records) { + console.log( + `${formatJobDate(r.createdAt).padEnd(22)} ${r.status.padEnd(12)} ${r.lifecycleStatus.padEnd(12)} ${r.name.padEnd(24)} ${r.id}` + ); + } + console.log(''); +} + +export function printABTestDetail(record: ABTestJobRecord): void { + console.log(`\nA/B test: ${record.id}`); + console.log(`Name: ${record.name}`); + console.log(`Mode: ${record.mode}`); + console.log(`Execution status: ${record.status}`); + console.log(`Lifecycle status: ${record.lifecycleStatus}`); + console.log(`Gateway: ${record.gatewayArn}`); + console.log(`Gateway filter: ${record.gatewayFilter?.targetPaths?.[0] ?? 'none'}`); + const invocationUrl = getInvocationUrl(record); + if (invocationUrl) console.log(`Invocation URL: ${invocationUrl}`); + console.log(`Started: ${formatJobDate(record.createdAt)}`); + if (record.completedAt) console.log(`Stopped: ${formatJobDate(record.completedAt)}`); + if (record.maxDurationExpiresAt) console.log(`Max duration expires: ${formatJobDate(record.maxDurationExpiresAt)}`); + + console.log('\nVariants:'); + for (const v of record.variants) { + const detail = v.bundleArn + ? `bundle ${v.bundleArn} @ ${v.bundleVersion}` + : v.targetName + ? `target ${v.targetName}` + : '(unspecified)'; + console.log(` ${v.name} (weight ${v.weight}): ${detail}`); + } + + const metrics = record.results?.evaluatorMetrics; + if (metrics?.length) { + console.log('\nResults:'); + for (const m of metrics) { + console.log(` ${m.evaluatorArn}`); + console.log(` C (n=${m.controlStats.sampleSize}): mean ${m.controlStats.mean.toFixed(3)}`); + for (const vr of m.variantResults) { + const change = + vr.percentChange != null ? ` (${vr.percentChange > 0 ? '+' : ''}${vr.percentChange.toFixed(1)}%)` : ''; + const sig = vr.isSignificant ? ' *significant*' : ''; + console.log(` ${vr.treatmentName} (n=${vr.sampleSize}): mean ${vr.mean.toFixed(3)}${change}${sig}`); + } + } + } else if (record.failureReason) { + console.log(`\nFailure: ${record.failureReason}`); + } else { + console.log('\nResults not yet available.'); + } + if (record.logFilePath) console.log(`\nLog: ${record.logFilePath}`); + console.log(''); +} diff --git a/src/cli/operations/jobs/ab-test/handler.ts b/src/cli/operations/jobs/ab-test/handler.ts new file mode 100644 index 000000000..7c97e5ce4 --- /dev/null +++ b/src/cli/operations/jobs/ab-test/handler.ts @@ -0,0 +1,466 @@ +/** + * AB-test job handler — composes Startable, Refreshable, Stoppable, Pausable, Promotable, Archivable. + * + * - create(): resolve region + gateway ARN (gateway must already be deployed), build variants + + * eval config, create (or reuse) the execution role, make ONE CreateABTest call + * (with AccessDenied retry while IAM propagates), persist the record. The role is + * cleaned up if the create call ultimately fails. + * - refresh(): GET latest state; map 404 → NOT_FOUND. Store executionStatus in `status` and the + * lifecycle `status` in `lifecycleStatus`; carry results / failureReason / expiry. + * - stop/pause/resume(): UpdateABTest executionStatus = STOPPED / PAUSED / RUNNING. + * - promote(): wait until RUNNING, stop, then apply the winning variant to agentcore.json. + * - archive(): stop → poll STOPPED → DeleteABTest → delete the role if the CLI created it. + */ +import { ConfigIO, JobNotFoundError, ResourceNotFoundError, toError } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import type { DeployedResourceState, DeployedState } from '../../../../schema'; +import { getCredentialProvider } from '../../../aws/account'; +import { createABTest, deleteABTest, getABTest, listABTests, updateABTest } from '../../../aws/agentcore-ab-tests'; +import { getGatewayDetail, getOnlineEvaluationConfig } from '../../../aws/agentcore-control'; +import { detectRegion } from '../../../aws/region'; +import { getErrorMessage } from '../../../errors'; +import { ExecLogger } from '../../../logging/exec-logger'; +import { NOT_FOUND_STATUS } from '../shared/constants'; +import { regionFromArn, resolveJobRegion } from '../shared/region'; +import type { ABTestHandler, ABTestJobRecord, DebugCheckResult, StartABTestJobOptions } from '../shared/types'; +import { buildABTestRequest } from './build-options'; +import { promoteABTestConfig } from './promote'; +import { deleteABTestRole, getOrCreateABTestRole, resolveGatewayArn } from './resolve'; +import { CloudWatchLogsClient, FilterLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; + +/** AB-test create retries while the freshly-created IAM role propagates (gateway/eval AccessDenied). */ +const MAX_CREATE_RETRIES = 5; +const BASE_RETRY_DELAY_MS = 5_000; + +/** Merge per-target deployed resources into one view (AB tests resolve names across all targets). */ +function mergeDeployedResources(deployedState: DeployedState): DeployedResourceState { + const merged: DeployedResourceState = {}; + for (const target of Object.values(deployedState.targets)) { + const r = target.resources; + if (!r) continue; + Object.assign(merged, { + mcp: { ...merged.mcp, ...r.mcp }, + gateways: { ...merged.gateways, ...r.gateways }, + configBundles: { ...merged.configBundles, ...r.configBundles }, + onlineEvalConfigs: { ...merged.onlineEvalConfigs, ...r.onlineEvalConfigs }, + }); + } + return merged; +} + +/** Poll executionStatus until STOPPED (best-effort, bounded). */ +async function pollUntilStopped(region: string, abTestId: string, attempts = 20, delayMs = 3_000): Promise { + for (let i = 0; i < attempts; i++) { + try { + const test = await getABTest({ region, abTestId }); + if (test.executionStatus === 'STOPPED') return true; + } catch (err) { + if (err instanceof JobNotFoundError) return true; // already gone + // transient — keep polling + } + await new Promise(resolve => setTimeout(resolve, delayMs)); + } + return false; +} + +/** + * Wait until the test reaches RUNNING (a just-created test may still be enabling), then stop it. + * Throws if it never reaches RUNNING — promotion of a never-started test is not meaningful. + */ +async function waitForRunningThenStop( + region: string, + abTestId: string, + attempts = 12, + delayMs = 10_000 +): Promise { + let status: string | undefined; + for (let i = 0; i < attempts; i++) { + const current = await getABTest({ region, abTestId }); + status = current.executionStatus; + if (status === 'RUNNING') break; + if (status === 'STOPPED') return; // already stopped — nothing more to do + await new Promise(resolve => setTimeout(resolve, delayMs)); + } + if (status !== 'RUNNING') { + throw new Error(`A/B test "${abTestId}" did not reach RUNNING (current: ${status}); cannot promote.`); + } + await updateABTest({ region, abTestId, executionStatus: 'STOPPED' }); +} + +export const abTestHandler: ABTestHandler = { + async create(opts: StartABTestJobOptions, configIO: ConfigIO): Promise> { + let logger: ExecLogger | undefined; + try { + logger = new ExecLogger({ command: 'ab-test' }); + } catch { + // non-fatal + } + + let region = ''; + let roleArn: string | undefined; + let roleCreatedByCli = false; + try { + logger?.startStep('Load project config'); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + region = await resolveJobRegion(opts.region, awsTargets); + const deployedResources = mergeDeployedResources(deployedState); + logger?.endStep('success'); + + // Gateway must already be deployed — we never auto-create it. + logger?.startStep('Resolve gateway'); + const gatewayArn = resolveGatewayArn(opts.gateway, deployedResources); + if (!gatewayArn || !gatewayArn.startsWith('arn:') || gatewayArn.split(':').length < 6) { + const err = new ResourceNotFoundError( + `Gateway "${opts.gateway}" is not deployed. Run \`agentcore add gateway\` and \`agentcore deploy\` first.` + ); + logger?.endStep('error', err.message); + logger?.finalize(false); + return { success: false, error: err }; + } + logger?.log(`Gateway ARN: ${gatewayArn}`); + logger?.endStep('success'); + + // Build variants + eval config (throws ValidationError on missing mode inputs). + const built = buildABTestRequest(opts, deployedResources); + + // Resolve (or create) the execution role. + logger?.startStep('Resolve execution role'); + if (opts.roleArn) { + roleArn = opts.roleArn; + } else { + opts.onProgress?.('role', 'Creating execution role (waiting for IAM propagation)...'); + roleArn = await getOrCreateABTestRole({ + region, + projectName: projectSpec.name, + testName: opts.name, + gatewayArn, + }); + roleCreatedByCli = true; + } + logger?.log(`Role ARN: ${roleArn}`); + logger?.endStep('success'); + + // ONE create call, with AccessDenied retry while IAM propagates. + logger?.startStep('Create A/B test'); + opts.onProgress?.('starting', `Creating A/B test "${opts.name}"...`); + const createOptions = { + region, + name: `${projectSpec.name}_${opts.name}`, + description: opts.description, + gatewayArn, + roleArn, + variants: built.variants, + evaluationConfig: built.evaluationConfig, + gatewayFilter: built.gatewayFilter, + enableOnCreate: opts.enableOnCreate, + }; + + let createResult; + for (let attempt = 0; attempt < MAX_CREATE_RETRIES; attempt++) { + try { + createResult = await createABTest(createOptions); + break; + } catch (err: unknown) { + const errCode = (err as { name?: string }).name; + const errStatus = (err as { $metadata?: { httpStatusCode?: number } }).$metadata?.httpStatusCode; + const msg = err instanceof Error ? err.message : String(err); + const isRetryable = + errCode === 'AccessDeniedException' || + errStatus === 403 || + msg.includes('Access denied') || + msg.includes('Gateway validation error'); + if (isRetryable && attempt < MAX_CREATE_RETRIES - 1) { + const delay = BASE_RETRY_DELAY_MS * Math.pow(2, attempt); + opts.onProgress?.('retry', `Access not yet propagated; retrying (attempt ${attempt + 2})...`); + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } + throw err; + } + } + if (!createResult) { + throw new Error('A/B test creation failed after retries.'); + } + logger?.log(`Response: ${JSON.stringify(createResult, null, 2)}`); + logger?.endStep('success'); + opts.onProgress?.('started', `A/B test created: ${createResult.abTestId} (${createResult.executionStatus})`); + logger?.finalize(true); + + const record: ABTestJobRecord = { + type: 'ab-test', + id: createResult.abTestId, + arn: createResult.abTestArn, + status: createResult.status, + lifecycleStatus: createResult.executionStatus, + createdAt: createResult.createdAt ?? new Date().toISOString(), + agent: opts.agent ?? opts.runtime ?? opts.name, + logFilePath: logger?.logFilePath, + name: opts.name, + mode: opts.mode, + gatewayArn, + gatewayName: opts.gateway, + roleArn, + roleCreatedByCli, + variants: built.variantSummaries, + evaluationConfig: built.evaluationConfig, + gatewayFilter: built.gatewayFilter, + }; + return { success: true, record }; + } catch (err) { + // Clean up an auto-created role so a failed create doesn't orphan IAM resources. + if (roleCreatedByCli && roleArn && region) { + try { + await deleteABTestRole(region, roleArn); + } catch { + // best-effort + } + } + logger?.finalize(false); + return { success: false, error: toError(err) }; + } + }, + + async refresh(record: ABTestJobRecord): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + let response; + try { + response = await getABTest({ region, abTestId: record.id }); + } catch (err) { + if (err instanceof JobNotFoundError) { + return { success: true, record: { ...record, status: NOT_FOUND_STATUS, lifecycleStatus: NOT_FOUND_STATUS } }; + } + return { success: false, error: toError(err) }; + } + + const failureReason = response.failureReason ?? response.errorDetails?.join('; ') ?? record.failureReason; + + return { + success: true, + record: { + ...record, + status: response.status, + lifecycleStatus: response.executionStatus, + completedAt: response.stoppedAt ?? record.completedAt, + maxDurationExpiresAt: response.maxDurationExpiresAt ?? record.maxDurationExpiresAt, + results: response.results ?? record.results, + failureReason, + }, + }; + }, + + async stop(record: ABTestJobRecord): Promise { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + await updateABTest({ region, abTestId: record.id, executionStatus: 'STOPPED' }); + return { success: true }; + } catch (err) { + return { success: false, error: toError(err) }; + } + }, + + async pause(record: ABTestJobRecord): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + await updateABTest({ region, abTestId: record.id, executionStatus: 'PAUSED' }); + return { success: true, record: { ...record, lifecycleStatus: 'PAUSED' } }; + } catch (err) { + return { success: false, error: toError(err) }; + } + }, + + async resume(record: ABTestJobRecord): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + await updateABTest({ region, abTestId: record.id, executionStatus: 'RUNNING' }); + return { success: true, record: { ...record, lifecycleStatus: 'RUNNING' } }; + } catch (err) { + return { success: false, error: toError(err) }; + } + }, + + async promote(record: ABTestJobRecord, _configIO: ConfigIO): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + // Validate the winning variant is applicable to agentcore.json BEFORE stopping the test + // (dry run, no writes). This avoids the trap where promote stops a running test and only + // then discovers it can't apply the winner — e.g. target-based with no named runtime + // endpoint — leaving the test stopped but config unchanged. + const preflight = await promoteABTestConfig(record, true); + if (!preflight.promoted) { + return { + success: false, + error: new Error( + `Cannot promote A/B test "${record.id}": ${preflight.promotionDetail} ` + + `The test was left running (not stopped).` + ), + }; + } + + // Promotion stops the test first (running tests apply continuously), then mutates config. + await waitForRunningThenStop(region, record.id); + const promotion = await promoteABTestConfig(record); + if (!promotion.promoted) { + // The test was stopped, but applying the winning variant to agentcore.json failed. + // Surface the failure so the user knows config wasn't updated (test is already STOPPED). + return { + success: false, + error: new Error( + `A/B test "${record.id}" was stopped, but the winning variant could not be applied to agentcore.json: ${promotion.promotionDetail}` + ), + }; + } + return { success: true, record: { ...record, lifecycleStatus: 'STOPPED' } }; + } catch (err) { + return { success: false, error: toError(err) }; + } + }, + + async archive(record: ABTestJobRecord): Promise { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + // Running tests can't be deleted — stop and wait for STOPPED first (best-effort). + try { + await updateABTest({ region, abTestId: record.id, executionStatus: 'STOPPED' }); + await pollUntilStopped(region, record.id); + } catch (err) { + if (!(err instanceof JobNotFoundError)) { + // already-stopped / transient — proceed to delete + } + } + const deleteResult = await deleteABTest({ region, abTestId: record.id }); + if (!deleteResult.success && !deleteResult.error?.includes('404')) { + return { success: false, error: new Error(deleteResult.error ?? 'Failed to delete A/B test.') }; + } + if (record.roleCreatedByCli && record.roleArn) { + try { + const allTests = await listABTests({ region, maxResults: 100 }); + const activeTests = allTests.abTests.filter( + t => t.abTestId !== record.id && !['STOPPED', 'CREATE_FAILED', 'DELETE_FAILED'].includes(t.status) + ); + let roleInUse = false; + for (const test of activeTests) { + const detail = await getABTest({ region, abTestId: test.abTestId }); + if (detail.roleArn === record.roleArn) { + roleInUse = true; + break; + } + } + if (!roleInUse) { + await deleteABTestRole(region, record.roleArn); + } + } catch { + // Best-effort: if we can't verify, skip deletion (safe side — don't orphan other tests) + } + } + return { success: true }; + } catch (err) { + if (err instanceof JobNotFoundError) { + return { success: true }; + } + return { success: false, error: toError(err) }; + } + }, + + async debug(record: ABTestJobRecord): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + const results: DebugCheckResult[] = []; + + // 1. Fetch fresh state from the API + let test; + try { + test = await getABTest({ region, abTestId: record.id }); + results.push({ + label: 'AB Test Status', + status: test.status === 'ACTIVE' && test.executionStatus === 'RUNNING' ? 'pass' : 'warn', + detail: `${test.status} / ${test.executionStatus}`, + }); + } catch (err) { + results.push({ label: 'AB Test Status', status: 'fail', detail: getErrorMessage(err) }); + return { success: true, checks: results }; + } + + // 2. Role + results.push({ + label: 'AB Test Role', + status: test.roleArn ? 'pass' : 'warn', + detail: test.roleArn ?? 'No role ARN', + }); + + // 3. Online Eval Config(s) + const evalConfigArns: { name: string; arn: string }[] = + 'perVariantOnlineEvaluationConfig' in test.evaluationConfig + ? test.evaluationConfig.perVariantOnlineEvaluationConfig.map(v => ({ + name: v.name, + arn: v.onlineEvaluationConfigArn, + })) + : [{ name: '', arn: test.evaluationConfig.onlineEvaluationConfigArn }]; + + for (const { name: variantName, arn: evalArn } of evalConfigArns) { + const evalConfigId = evalArn.split('/').pop() ?? evalArn; + const labelSuffix = variantName ? ` (${variantName})` : ''; + try { + const evalConfig = await getOnlineEvaluationConfig({ region, configId: evalConfigId }); + results.push({ + label: `Online Eval Config${labelSuffix}`, + status: evalConfig.executionStatus === 'ENABLED' ? 'pass' : 'fail', + detail: `${evalConfig.configName} — ${evalConfig.executionStatus}`, + }); + } catch (err) { + results.push({ label: `Online Eval Config${labelSuffix}`, status: 'fail', detail: getErrorMessage(err) }); + } + } + + // 4. Gateway role + const gatewayId = test.gatewayArn.split('/').pop() ?? ''; + try { + const gateway = await getGatewayDetail({ region, gatewayId }); + results.push({ + label: 'Gateway Role', + status: gateway.roleArn ? 'pass' : 'warn', + detail: gateway.roleArn ?? 'No role ARN', + }); + } catch (err) { + results.push({ label: 'Gateway Role', status: 'fail', detail: getErrorMessage(err) }); + } + + // 5. Runtime experiment spans (last 2h) + const twoHoursAgo = Date.now() - 2 * 60 * 60 * 1000; + const logsClient = new CloudWatchLogsClient({ region, credentials: getCredentialProvider() }); + const variantNames = test.variants.map(v => v.name); + + try { + // Check for spans tagged with the AB test ARN per variant + for (const name of variantNames) { + try { + const response = await logsClient.send( + new FilterLogEventsCommand({ + logGroupName: 'aws/spans', + startTime: twoHoursAgo, + filterPattern: `"${test.abTestArn}" "${name}"`, + limit: 5, + }) + ); + const count = response.events?.length ?? 0; + results.push({ + label: `Experiment Spans — ${name} (2h)`, + status: count > 0 ? 'pass' : 'warn', + detail: + count > 0 + ? `${count}+ spans with experiment metadata` + : 'No spans found — traffic may not be reaching this variant', + }); + } catch (err) { + results.push({ label: `Experiment Spans — ${name}`, status: 'warn', detail: getErrorMessage(err) }); + } + } + } catch (err) { + results.push({ label: 'Experiment Spans', status: 'warn', detail: getErrorMessage(err) }); + } + + return { success: true, checks: results }; + }, +}; diff --git a/src/cli/operations/jobs/ab-test/promote.ts b/src/cli/operations/jobs/ab-test/promote.ts new file mode 100644 index 000000000..a70aea72b --- /dev/null +++ b/src/cli/operations/jobs/ab-test/promote.ts @@ -0,0 +1,207 @@ +import { ConfigIO } from '../../../../lib'; +import { getConfigurationBundleVersion } from '../../../aws/agentcore-config-bundles'; +import { regionFromArn } from '../shared/region'; +import type { ABTestJobRecord, ABTestVariantSummary } from '../shared/types'; + +/** Extract the bundle id (the ARN's resource suffix) from a configuration-bundle ARN. */ +function bundleIdFromArn(arn: string): string | undefined { + const id = arn.split('/').pop(); + return id && id.length > 0 ? id : undefined; +} + +export interface PromoteABTestResult { + promoted: boolean; + mode?: string; + promotionDetail: string; +} + +/** Reverse-resolve a deployed config-bundle ARN to its spec name (the key in configBundles[]). */ +function bundleNameFromArn( + deployedState: { targets: Record } }> }, + bundleArn: string +): string | undefined { + for (const target of Object.values(deployedState.targets)) { + const bundles = target.resources?.configBundles; + if (!bundles) continue; + for (const [name, entry] of Object.entries(bundles)) { + if (entry.bundleArn === bundleArn) return name; + } + } + return undefined; +} + +/** + * Apply A/B test promotion to agentcore.json, sourcing the winning (treatment / T1) variant + * from the job record's persisted `variants` — NOT from project.abTests[] (which the fire-and-forget + * jobs model never populates). Does NOT stop the test — the handler does that first. + * + * - config-bundle mode: control and treatment must be two VERSIONS of the SAME bundle (different + * bundleArn → rejected). Adopts the treatment version's components into the bundle (fetched from + * the service); a later deploy version-bumps it, with lineage handled server-side. + * - target-based mode: if both variants are named endpoints of the same runtime, bump the control + * endpoint's version to the treatment endpoint's version (control keeps its identity). Otherwise + * (different runtimes, or the default unnamed endpoint) repoint the control target at whatever the + * treatment target serves by cloning its httpRuntime. Either way control ends up serving treatment. + * + * @param dryRun When true, performs the exact same resolution/validation but does NOT write + * agentcore.json. Lets the caller verify the winner is applicable BEFORE stopping the test, so a + * non-promotable test (e.g. target-based with a missing control/treatment target) fails fast + * without first stopping the running test. The `promoted` flag + `promotionDetail` are identical + * to a real run. + */ +export async function promoteABTestConfig(record: ABTestJobRecord, dryRun = false): Promise { + const configIO = new ConfigIO(); + const project = await configIO.readProjectSpec(); + const mode = record.mode; + + const control = record.variants.find((v: ABTestVariantSummary) => v.name === 'C'); + const treatment = record.variants.find((v: ABTestVariantSummary) => v.name === 'T1'); + if (!control || !treatment) { + return { + promoted: false, + mode, + promotionDetail: 'A/B test record is missing control (C) or treatment (T1) variant.', + }; + } + + if (mode === 'target-based') { + if (!record.gatewayName) { + return { + promoted: false, + mode, + promotionDetail: 'A/B test record is missing the gateway name; cannot locate targets.', + }; + } + const gateway = (project.agentCoreGateways ?? []).find(g => g.name === record.gatewayName); + if (!gateway?.targets) { + return { promoted: false, mode, promotionDetail: `Gateway "${record.gatewayName}" not found in agentcore.json.` }; + } + const controlTarget = gateway.targets.find(t => t.name === control.targetName); + const treatmentTarget = gateway.targets.find(t => t.name === treatment.targetName); + // Control must exist (we write to it); treatment must have a runtime to copy from. These are the + // only genuinely unpromotable cases — a missing target means there is nothing to apply. + if (!controlTarget?.httpRuntime?.runtime || !treatmentTarget?.httpRuntime?.runtime) { + return { + promoted: false, + mode, + promotionDetail: 'Could not resolve control/treatment runtime targets for promotion.', + }; + } + + // Fast path: both variants are named endpoints of the SAME runtime, differing only by version. + // Promote by bumping control's endpoint version to treatment's — control keeps its identity. + const sameRuntime = controlTarget.httpRuntime.runtime === treatmentTarget.httpRuntime.runtime; + const controlEpName = controlTarget.httpRuntime.runtimeEndpoint; + const treatmentEpName = treatmentTarget.httpRuntime.runtimeEndpoint; + if (sameRuntime && controlEpName && treatmentEpName) { + const runtime = project.runtimes.find(r => r.name === controlTarget.httpRuntime!.runtime); + const controlEp = runtime?.endpoints?.[controlEpName]; + const treatmentEp = runtime?.endpoints?.[treatmentEpName]; + if (controlEp && treatmentEp) { + if (!dryRun) { + controlEp.version = treatmentEp.version; + await configIO.writeProjectSpec(project); + } + return { + promoted: true, + mode, + promotionDetail: `Control endpoint "${controlEpName}" updated to version ${treatmentEp.version} (from treatment "${treatmentEpName}").`, + }; + } + } + + // General path: control and treatment point at different runtimes, or use the default + // (unnamed) endpoint, so there is no single version field to bump. Repoint the control target + // at exactly what treatment serves by cloning its httpRuntime block. + if (!dryRun) { + controlTarget.httpRuntime = structuredClone(treatmentTarget.httpRuntime); + await configIO.writeProjectSpec(project); + } + const treatmentRef = treatmentEpName + ? `${treatmentTarget.httpRuntime.runtime} (endpoint "${treatmentEpName}")` + : treatmentTarget.httpRuntime.runtime; + return { + promoted: true, + mode, + promotionDetail: `Control target "${controlTarget.name}" repointed to treatment runtime ${treatmentRef}.`, + }; + } + + // config-bundle mode: the control bundle adopts the WINNING (treatment) version's components. + if (!control.bundleArn || !treatment.bundleArn) { + return { promoted: false, mode, promotionDetail: 'A/B test record is missing control/treatment bundle ARNs.' }; + } + + // Promote is only coherent when control and treatment are two VERSIONS of the SAME bundle. + // A ConfigurationBundle version bump is parented to the same bundle's prior version (the service + // tracks lineage per bundle), so "promote treatment into control" means adopting the treatment + // VERSION's components into that one bundle. Two different bundles have independent lineages and + // cannot be promoted into one another — reject that up front. + if (control.bundleArn !== treatment.bundleArn) { + return { + promoted: false, + mode, + promotionDetail: + 'Cannot promote: control and treatment reference different config bundles. ' + + 'A config-bundle A/B test can only promote between two versions of the SAME bundle.', + }; + } + + if (!treatment.bundleVersion) { + return { + promoted: false, + mode, + promotionDetail: 'A/B test record is missing the treatment bundle version; cannot promote.', + }; + } + + let controlName: string | undefined; + try { + const deployedState = await configIO.readDeployedState(); + controlName = bundleNameFromArn(deployedState, control.bundleArn); + } catch { + // deployed state unavailable + } + if (!controlName) { + return { + promoted: false, + mode, + promotionDetail: 'Could not resolve the config bundle from deployed state (deploy the bundle first).', + }; + } + + const controlBundle = (project.configBundles ?? []).find(b => b.name === controlName); + if (!controlBundle) { + return { + promoted: false, + mode, + promotionDetail: `Could not find config bundle "${controlName}" in agentcore.json.`, + }; + } + + const bundleId = bundleIdFromArn(treatment.bundleArn); + if (!bundleId) { + return { promoted: false, mode, promotionDetail: `Could not parse bundle id from ARN "${treatment.bundleArn}".` }; + } + + // Fetch the winning (treatment) version's components from the service and adopt them locally. + // A subsequent `agentcore deploy` version-bumps the bundle (lineage handled server-side). + if (!dryRun) { + const region = regionFromArn(treatment.bundleArn) ?? regionFromArn(record.arn); + if (!region) { + return { promoted: false, mode, promotionDetail: 'Could not determine region for the config bundle.' }; + } + const winning = await getConfigurationBundleVersion({ + region, + bundleId, + versionId: treatment.bundleVersion, + }); + controlBundle.components = winning.components as typeof controlBundle.components; + await configIO.writeProjectSpec(project); + } + return { + promoted: true, + mode, + promotionDetail: `Config bundle "${controlName}" updated to the winning version ${treatment.bundleVersion}.`, + }; +} diff --git a/src/cli/operations/jobs/ab-test/resolve.ts b/src/cli/operations/jobs/ab-test/resolve.ts new file mode 100644 index 000000000..2aafdc6b0 --- /dev/null +++ b/src/cli/operations/jobs/ab-test/resolve.ts @@ -0,0 +1,246 @@ +/** + * Shared AB-test resolution helpers: IAM role create/reuse/delete, and ARN resolution for + * gateway / config-bundle / online-eval references against deployed state. + * + * Extracted from the legacy post-deploy-ab-tests.ts so the AB-test job handler's create() + * can own role + ARN resolution at start time (the config-as-code deploy path is removed). + */ +import type { DeployedResourceState } from '../../../../schema'; +import { getCredentialProvider } from '../../../aws/account'; +import type { ABTestEvaluationConfig, ABTestVariant } from '../../../aws/agentcore-ab-tests'; +import { arnPrefix } from '../../../aws/partition'; +import { + CreateRoleCommand, + DeleteRoleCommand, + DeleteRolePolicyCommand, + GetRoleCommand, + IAMClient, + PutRolePolicyCommand, +} from '@aws-sdk/client-iam'; +import { createHash } from 'node:crypto'; + +const AB_TEST_ROLE_POLICY_NAME = 'ABTestExecutionPolicy'; + +/** IAM policy propagation wait after creating/updating the role (ms). */ +export const IAM_PROPAGATION_DELAY_MS = 15_000; + +// ============================================================================ +// IAM role management +// ============================================================================ + +/** Generate a project-scoped role name: AgentCore-{ProjectName}-ABTest{TestName}-{Hash} (max 64 chars). */ +export function generateRoleName(projectName: string, testName: string): string { + // Deterministic hash so retries produce the same role name (avoids orphaned roles). + const hash = createHash('sha256').update(`${projectName}:${testName}`).digest('hex').slice(0, 8); + const base = `AgentCore-${projectName}-ABTest${testName}`; + return `${base.slice(0, 55)}-${hash}`; +} + +/** Extract role name from ARN: arn:aws:iam::123456789012:role/RoleName → RoleName. */ +export function roleNameFromArn(roleArn: string): string { + const parts = roleArn.split('/'); + return parts[parts.length - 1] ?? roleArn; +} + +export interface CreateABTestRoleOptions { + region: string; + projectName: string; + testName: string; + gatewayArn: string; + /** Injectable propagation delay (tests). */ + propagationDelayMs?: number; +} + +/** Create (or reuse) the AB-test execution role + inline policy, then wait for IAM propagation. */ +export async function getOrCreateABTestRole(options: CreateABTestRoleOptions): Promise { + const { region, projectName, testName, gatewayArn } = options; + const credentials = getCredentialProvider(); + const iamClient = new IAMClient({ region, credentials }); + + // Account id from gateway ARN: arn:aws:bedrock-agentcore:REGION:ACCOUNT:gateway/ID + const accountId = gatewayArn.split(':')[4] ?? '*'; + const roleName = generateRoleName(projectName, testName); + + const trustPolicy = JSON.stringify({ + Version: '2012-10-17', + Statement: [ + { + Effect: 'Allow', + Principal: { Service: 'bedrock-agentcore.amazonaws.com' }, + Action: 'sts:AssumeRole', + Condition: { + StringEquals: { 'aws:SourceAccount': accountId }, + ArnLike: { 'aws:SourceArn': `${arnPrefix(region)}:bedrock-agentcore:*:${accountId}:ab-test/*` }, + }, + }, + ], + }); + + let roleArn: string; + try { + const createResult = await iamClient.send( + new CreateRoleCommand({ + RoleName: roleName, + AssumeRolePolicyDocument: trustPolicy, + Description: `Auto-created execution role for AgentCore AB test: ${testName}`, + Tags: [ + { Key: 'agentcore:created-by', Value: 'agentcore-cli' }, + { Key: 'agentcore:project-name', Value: projectName }, + { Key: 'agentcore:ab-test-name', Value: testName }, + ], + }) + ); + roleArn = createResult.Role?.Arn ?? ''; + if (!roleArn) { + throw new Error(`IAM CreateRole succeeded but returned no role ARN for "${roleName}"`); + } + } catch (err: unknown) { + // Retry after a previous failed run left the role behind — reuse it. + if ((err as { name?: string }).name === 'EntityAlreadyExistsException') { + const existing = await iamClient.send(new GetRoleCommand({ RoleName: roleName })); + roleArn = existing.Role?.Arn ?? ''; + if (!roleArn) { + throw new Error(`Role "${roleName}" already exists but ARN could not be retrieved`); + } + } else { + throw err; + } + } + + const policy = JSON.stringify({ + Version: '2012-10-17', + Statement: [ + { + Sid: 'AgentCoreResources', + Effect: 'Allow', + Action: [ + 'bedrock-agentcore:GetGateway', + 'bedrock-agentcore:GetGatewayTarget', + 'bedrock-agentcore:ListGatewayTargets', + 'bedrock-agentcore:CreateGatewayRule', + 'bedrock-agentcore:UpdateGatewayRule', + 'bedrock-agentcore:GetGatewayRule', + 'bedrock-agentcore:DeleteGatewayRule', + 'bedrock-agentcore:ListGatewayRules', + 'bedrock-agentcore:GetOnlineEvaluationConfig', + 'bedrock-agentcore:GetEvaluator', + 'bedrock-agentcore:GetConfigurationBundle', + 'bedrock-agentcore:GetConfigurationBundleVersion', + 'bedrock-agentcore:ListConfigurationBundleVersions', + ], + Resource: `${arnPrefix(region)}:bedrock-agentcore:*:${accountId}:*`, + Condition: { StringEquals: { 'aws:ResourceAccount': accountId } }, + }, + { + Sid: 'CloudWatchLogsDescribe', + Effect: 'Allow', + Action: ['logs:DescribeLogGroups'], + Resource: '*', + }, + { + Sid: 'CloudWatchLogs', + Effect: 'Allow', + Action: [ + 'logs:DescribeIndexPolicies', + 'logs:PutIndexPolicy', + 'logs:StartQuery', + 'logs:GetQueryResults', + 'logs:StopQuery', + 'logs:FilterLogEvents', + 'logs:GetLogEvents', + ], + Resource: [ + `${arnPrefix(region)}:logs:*:${accountId}:log-group:/aws/bedrock-agentcore/evaluations/*`, + `${arnPrefix(region)}:logs:*:${accountId}:log-group:/aws/bedrock-agentcore/runtimes/*`, + `${arnPrefix(region)}:logs:*:${accountId}:log-group:aws/spans`, + `${arnPrefix(region)}:logs:*:${accountId}:log-group:aws/spans:*`, + ], + }, + ], + }); + + // Re-apply the inline policy (idempotent — covers both new and recovered roles). + await iamClient.send( + new PutRolePolicyCommand({ RoleName: roleName, PolicyName: AB_TEST_ROLE_POLICY_NAME, PolicyDocument: policy }) + ); + + // Wait for IAM propagation — both new roles and policy updates on existing roles. + await new Promise(resolve => setTimeout(resolve, options.propagationDelayMs ?? IAM_PROPAGATION_DELAY_MS)); + + return roleArn; +} + +/** Best-effort role cleanup: delete the inline policy then the role. */ +export async function deleteABTestRole(region: string, roleArn: string): Promise { + const credentials = getCredentialProvider(); + const iamClient = new IAMClient({ region, credentials }); + const roleName = roleNameFromArn(roleArn); + + try { + await iamClient.send(new DeleteRolePolicyCommand({ RoleName: roleName, PolicyName: AB_TEST_ROLE_POLICY_NAME })); + } catch { + // policy may not exist + } + try { + await iamClient.send(new DeleteRoleCommand({ RoleName: roleName })); + } catch { + // role may already be deleted or in use — best effort + } +} + +// ============================================================================ +// ARN resolution against deployed state +// ============================================================================ + +/** Resolve a gateway NAME (or {{gateway:name}} placeholder, or ARN) to a gateway ARN. Returns undefined if not deployed. */ +export function resolveGatewayArn(ref: string, deployedResources?: DeployedResourceState): string | undefined { + if (ref.startsWith('arn:')) return ref; + const placeholderMatch = /^\{\{gateway:(.+)\}\}$/.exec(ref); + const gwName = placeholderMatch ? placeholderMatch[1] : ref; + + const mcpGw = gwName ? deployedResources?.mcp?.gateways?.[gwName] : undefined; + if (mcpGw) return mcpGw.gatewayArn; + const httpGw = gwName ? deployedResources?.gateways?.[gwName] : undefined; + if (httpGw) return httpGw.gatewayArn; + + return undefined; +} + +/** + * Resolve a config-bundle name (or ARN) to a bundle ARN. + * Returns undefined when a NAME is given but not found in deployed state (i.e. not deployed), + * so callers can surface a friendly "not deployed" error instead of sending a raw name to the API. + */ +export function resolveConfigBundleArn(ref: string, deployedResources?: DeployedResourceState): string | undefined { + if (ref.startsWith('arn:')) return ref; + const bundle = deployedResources?.configBundles?.[ref]; + return bundle ? bundle.bundleArn : undefined; +} + +/** + * Resolve a config-bundle version, expanding 'LATEST' to the deployed versionId. + * Returns the explicit version verbatim; returns undefined when 'LATEST' cannot be resolved + * (bundle not deployed) so the caller can error rather than send 'LATEST' to the API. + */ +export function resolveConfigBundleVersion( + bundleRef: string, + versionRef: string, + deployedResources?: DeployedResourceState +): string | undefined { + if (versionRef !== 'LATEST') return versionRef; + const name = bundleRef.startsWith('arn:') ? undefined : bundleRef; + const bundle = name ? deployedResources?.configBundles?.[name] : undefined; + return bundle ? bundle.versionId : undefined; +} + +/** + * Resolve an online-eval config name (or ARN) to its ARN. + * Returns undefined when a NAME is given but not found in deployed state (i.e. not deployed). + */ +export function resolveOnlineEvalArn(ref: string, deployedResources?: DeployedResourceState): string | undefined { + if (ref.startsWith('arn:')) return ref; + const config = deployedResources?.onlineEvalConfigs?.[ref]; + return config ? config.onlineEvaluationConfigArn : undefined; +} + +export type { ABTestEvaluationConfig, ABTestVariant }; diff --git a/src/cli/operations/jobs/batch-evaluation/build-source.ts b/src/cli/operations/jobs/batch-evaluation/build-source.ts new file mode 100644 index 000000000..3fb844977 --- /dev/null +++ b/src/cli/operations/jobs/batch-evaluation/build-source.ts @@ -0,0 +1,73 @@ +/** + * Batch-evaluation start-time helpers, extracted from the legacy run-batch-evaluation.ts: + * serviceName / logGroupName construction, evaluator name→short-id resolution (distinct from the + * recommendation ARN resolver), name validation/auto-generation, and the CloudWatch filter builder. + */ +import { ValidationError } from '../../../../lib'; +import type { DeployedState } from '../../../../schema'; +import type { CloudWatchFilterConfig } from '../../../aws/agentcore-batch-evaluation'; +import { resolveEndpointName, runtimeLogGroup } from '../../../aws/cloudwatch'; +import { BATCH_EVAL_NAME_REGEX } from '../shared/constants'; + +/** + * Resolve evaluator references to the SHORT ids the batch API expects. + * Handles "Builtin.Correctness", "arn:...:evaluator/Builtin.Correctness", or custom names + * looked up in deployed state. (Opposite of the recommendation path, which resolves to full ARNs.) + */ +export function resolveBatchEvaluatorIds(deployedState: DeployedState, agent: string, evaluators: string[]): string[] { + const targetResources = Object.values(deployedState.targets).find(t => t.resources?.runtimes?.[agent])?.resources; + return evaluators.map(name => { + const shortName = name.includes('evaluator/') ? name.split('evaluator/').pop()! : name; + if (shortName.startsWith('Builtin.')) return shortName; + const deployed = targetResources?.evaluators?.[shortName]; + if (deployed?.evaluatorId) return deployed.evaluatorId; + return shortName; // pass-through; the service will reject an unknown id + }); +} + +/** CloudWatch service name + log group for the agent's runtime traces. */ +export function buildCloudWatchSource( + projectName: string, + agent: string, + runtimeId: string, + endpoint: string | undefined +): { serviceName: string; logGroupName: string } { + const endpointName = resolveEndpointName(endpoint); + // Service name in CW logs uses project_agent format without the CDK hash suffix. + const serviceName = `${projectName}_${agent}.${endpointName}`; + const logGroupName = runtimeLogGroup(runtimeId, endpoint); + return { serviceName, logGroupName }; +} + +/** Validate an explicit name or auto-generate one. Throws ValidationError on a bad explicit name. */ +export function resolveBatchEvalName(name: string | undefined, projectName: string, agent: string): string { + if (name) { + if (!BATCH_EVAL_NAME_REGEX.test(name)) { + throw new ValidationError( + `Batch evaluation name must start with a letter and contain only letters, digits, and underscores (max 48 chars). Got: "${name}"` + ); + } + return name; + } + return `${projectName}_${agent}_${Date.now()}`.replace(/[^a-zA-Z0-9_]/g, '_').slice(0, 48); +} + +/** + * Build the optional CloudWatch filter. The API takes EITHER sessionIds OR timeRange (never both); + * sessionIds take precedence. Returns undefined when neither is provided (evaluate all in the log group). + */ +export function buildCloudWatchFilterConfig( + sessionIds: string[] | undefined, + lookbackDays: number | undefined +): CloudWatchFilterConfig | undefined { + const effective = [...new Set(sessionIds ?? [])]; + if (effective.length > 0) { + return { sessionIds: effective }; + } + if (lookbackDays) { + const endTime = new Date().toISOString(); + const startTime = new Date(Date.now() - lookbackDays * 24 * 60 * 60 * 1000).toISOString(); + return { timeRange: { startTime, endTime } }; + } + return undefined; +} diff --git a/src/cli/operations/jobs/batch-evaluation/dataset-phase1.ts b/src/cli/operations/jobs/batch-evaluation/dataset-phase1.ts new file mode 100644 index 000000000..9f8f27a27 --- /dev/null +++ b/src/cli/operations/jobs/batch-evaluation/dataset-phase1.ts @@ -0,0 +1,109 @@ +/** + * Dataset Phase-1 for batch evaluation (caller-side, blocking). + * + * The engine's create() is a single API call, but dataset-mode batch evaluation first needs to + * invoke the agent against every dataset scenario and wait for CloudWatch ingestion. That work is + * the CALLER's responsibility (CLI/TUI) — this helper performs it and returns the sessionIds + + * ground-truth sessionMetadata to hand to engine.start('batch-evaluation', ...). + */ +import { ConfigIO } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import type { SessionMetadataEntry } from '../../../aws/agentcore-batch-evaluation'; +import { runDatasetScenarios } from '../../eval/shared/dataset-session-provider'; +import { resolveAgentContext } from '../../invoke/resolve-agent-context'; + +/** Delay before submitting batch eval to allow CloudWatch span ingestion. Matches the SDK default. */ +export const BATCH_INGESTION_DELAY_MS = 180_000; + +export interface DatasetPhase1Options { + agent: string; + datasetName: string; + datasetVersion?: string; + endpoint?: string; + configIO?: ConfigIO; + onProgress?: (phase: string, message: string) => void; + /** Override the ingestion wait (tests). */ + ingestionDelayMs?: number; + /** Injectable sleep (tests). */ + sleep?: (ms: number) => Promise; +} + +export type DatasetPhase1Result = Result<{ + sessionIds: string[]; + sessionMetadata: SessionMetadataEntry[]; +}>; + +function defaultSleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +/** Run dataset scenarios, wait for ingestion, and build sessionIds + ground-truth metadata. */ +export async function runDatasetPhase1(options: DatasetPhase1Options): Promise { + const configIO = options.configIO ?? new ConfigIO(); + const sleep = options.sleep ?? defaultSleep; + + try { + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + + const agentContext = await resolveAgentContext({ + project: projectSpec, + deployedState, + awsTargets, + agentName: options.agent, + endpoint: options.endpoint, + }); + + options.onProgress?.('invoking', `Invoking agent with dataset "${options.datasetName}"...`); + const datasetResult = await runDatasetScenarios({ + agentContext, + datasetName: options.datasetName, + version: options.datasetVersion, + configBaseDir: configIO.getConfigRoot(), + onProgress: (phase, msg) => options.onProgress?.(phase, msg), + }); + + const successful = datasetResult.scenarioResults.filter(r => r.status === 'success'); + if (successful.length === 0) { + return { success: false, error: new Error('All scenarios failed during invocation. No sessions to evaluate.') }; + } + + const sessionIds = successful.map(r => r.sessionId); + const sessionMetadata = successful.map(r => { + const scenario = datasetResult.scenarios.find(s => s.scenario_id === r.scenarioId); + return { + sessionId: r.sessionId, + testScenarioId: r.scenarioId, + groundTruth: scenario + ? { + inline: { + ...(scenario.assertions ? { assertions: scenario.assertions.map(a => ({ text: a })) } : {}), + ...(scenario.expected_trajectory + ? { expectedTrajectory: { toolNames: scenario.expected_trajectory } } + : {}), + ...(scenario.turns.some(t => t.expectedResponse) + ? { + turns: scenario.turns.map(t => ({ + input: { prompt: t.input }, + ...(t.expectedResponse ? { expectedResponse: { text: t.expectedResponse } } : {}), + })), + } + : {}), + }, + } + : undefined, + }; + }) as SessionMetadataEntry[]; + + options.onProgress?.('invoking', `✓ ${successful.length} sessions ready for batch evaluation`); + options.onProgress?.('ingesting', 'Waiting 180s for CloudWatch span ingestion...'); + await sleep(options.ingestionDelayMs ?? BATCH_INGESTION_DELAY_MS); + + return { success: true, sessionIds, sessionMetadata }; + } catch (err) { + return { success: false, error: err instanceof Error ? err : new Error(String(err)) }; + } +} diff --git a/src/cli/operations/jobs/batch-evaluation/format.ts b/src/cli/operations/jobs/batch-evaluation/format.ts new file mode 100644 index 000000000..b3e8bdee3 --- /dev/null +++ b/src/cli/operations/jobs/batch-evaluation/format.ts @@ -0,0 +1,49 @@ +/** Presentation helpers for batch-evaluation job CLI output (history table + detail view). */ +import { formatJobDate } from '../shared/format'; +import type { BatchEvaluationJobRecord } from '../shared/types'; + +export function printBatchEvaluationHistory(records: BatchEvaluationJobRecord[]): void { + if (records.length === 0) { + console.log('No batch evaluation jobs found. Run `agentcore run batch-evaluation` to create one.'); + return; + } + console.log(`\n${'Date'.padEnd(22)} ${'Status'.padEnd(22)} ${'Evaluators'.padEnd(28)} ${'ID'}`); + console.log('─'.repeat(100)); + for (const r of records) { + console.log( + `${formatJobDate(r.createdAt).padEnd(22)} ${r.status.padEnd(22)} ${r.evaluators.join(', ').padEnd(28)} ${r.id}` + ); + } + console.log(''); +} + +export function printBatchEvaluationDetail(record: BatchEvaluationJobRecord): void { + console.log(`\nBatch evaluation: ${record.id}`); + console.log(`Name: ${record.name}`); + console.log(`Status: ${record.status}`); + console.log(`Agent: ${record.agent}`); + console.log(`Evaluators: ${record.evaluators.join(', ')}`); + console.log(`Started: ${formatJobDate(record.createdAt)}`); + if (record.completedAt) console.log(`Completed: ${formatJobDate(record.completedAt)}`); + if (record.source) console.log(`Source: ${record.source}`); + + const summaries = record.evaluationResults?.evaluatorSummaries; + if (summaries?.length) { + console.log('\nResults:'); + for (const s of summaries) { + const avg = s.statistics?.averageScore; + console.log( + ` ${s.evaluatorId}: ${avg != null ? avg.toFixed(2) : 'N/A'}${s.totalFailed ? ` (${s.totalFailed} failed)` : ''}` + ); + } + } else if (record.results?.length) { + console.log('\nResults:'); + for (const r of record.results) { + console.log(` ${r.evaluatorId}: ${r.score != null ? r.score.toFixed(2) : (r.label ?? 'N/A')}`); + } + } else { + console.log('\nResults not yet available.'); + } + if (record.logFilePath) console.log(`\nLog: ${record.logFilePath}`); + console.log(''); +} diff --git a/src/cli/operations/jobs/batch-evaluation/handler.ts b/src/cli/operations/jobs/batch-evaluation/handler.ts new file mode 100644 index 000000000..754ef2f80 --- /dev/null +++ b/src/cli/operations/jobs/batch-evaluation/handler.ts @@ -0,0 +1,229 @@ +/** + * Batch-evaluation job handler — composes Startable, Refreshable, Stoppable, Archivable. + * + * - create(): resolve agent + evaluators (short ids), build dataSourceConfig (serviceName/logGroup + + * sessionIds|timeRange filter), attach ground-truth metadata, make ONE StartBatchEvaluation + * call, persist the record. Dataset Phase-1 (invoke scenarios + ingestion wait) is the + * caller's responsibility — it supplies sessionIds/sessionMetadata. + * - refresh(): GET latest status; map 404 → NOT_FOUND. On terminal status, fetch per-session scores + * from the CloudWatch output log once (resultsFetched guards + enables retry). + * - stop(): StopBatchEvaluation. + * - archive(): DeleteBatchEvaluation. + */ +import { ConfigIO, JobNotFoundError, ResourceNotFoundError, toError } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import { + deleteBatchEvaluation, + generateClientToken, + getBatchEvaluation, + startBatchEvaluation, + stopBatchEvaluation, +} from '../../../aws/agentcore-batch-evaluation'; +import type { BatchEvaluationResultEntry } from '../../../aws/agentcore-batch-evaluation'; +import { detectRegion } from '../../../aws/region'; +import { ExecLogger } from '../../../logging/exec-logger'; +import { NOT_FOUND_STATUS } from '../shared/constants'; +import { regionFromArn, resolveJobRegion } from '../shared/region'; +import { resolveAgentState } from '../shared/resolve-agent-state'; +import type { BatchEvaluationHandler, BatchEvaluationJobRecord, StartBatchEvaluationJobOptions } from '../shared/types'; +import { + buildCloudWatchFilterConfig, + buildCloudWatchSource, + resolveBatchEvalName, + resolveBatchEvaluatorIds, +} from './build-source'; +import { CloudWatchLogsClient, GetLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; + +/** Read per-session evaluation scores from the batch's CloudWatch output log stream. */ +async function fetchResultsFromCloudWatch( + region: string, + logGroupName: string, + logStreamName: string +): Promise { + const client = new CloudWatchLogsClient({ region }); + const response = await client.send(new GetLogEventsCommand({ logGroupName, logStreamName, startFromHead: true })); + + const results: BatchEvaluationResultEntry[] = []; + for (const event of response.events ?? []) { + if (!event.message) continue; + try { + const parsed = JSON.parse(event.message) as Record; + const attrs = (parsed.attributes ?? {}) as Record; + const evaluatorId = attrs['gen_ai.evaluation.name'] as string | undefined; + if (!evaluatorId) continue; + results.push({ + evaluatorId, + score: attrs['gen_ai.evaluation.score.value'] as number | undefined, + label: attrs['gen_ai.evaluation.score.label'] as string | undefined, + explanation: attrs['gen_ai.evaluation.explanation'] as string | undefined, + }); + } catch { + // skip non-JSON / malformed entries + } + } + return results; +} + +export const batchEvaluationHandler: BatchEvaluationHandler = { + async create( + opts: StartBatchEvaluationJobOptions, + configIO: ConfigIO + ): Promise> { + let logger: ExecLogger | undefined; + try { + logger = new ExecLogger({ command: 'batch-evaluate' }); + } catch { + // non-fatal + } + + try { + logger?.startStep('Load project config'); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + const region = await resolveJobRegion(opts.region, awsTargets); + logger?.endStep('success'); + + logger?.startStep('Resolve agent'); + const agentState = resolveAgentState(deployedState, opts.agent); + if (!agentState) { + const err = new ResourceNotFoundError(`Agent "${opts.agent}" not deployed. Run \`agentcore deploy\` first.`); + logger?.endStep('error', err.message); + logger?.finalize(false); + return { success: false, error: err }; + } + const { serviceName, logGroupName } = buildCloudWatchSource( + projectSpec.name, + opts.agent, + agentState.runtimeId, + opts.endpoint + ); + logger?.log(`Service name: ${serviceName}`); + logger?.log(`Log group: ${logGroupName}`); + logger?.endStep('success'); + + // Resolve name + evaluators (ValidationError on a bad explicit name) + const evalName = resolveBatchEvalName(opts.name, projectSpec.name, opts.agent); + const resolvedEvaluators = resolveBatchEvaluatorIds(deployedState, opts.agent, opts.evaluators); + + // CloudWatch filter — merge explicit sessionIds with any from sessionMetadata, dedup + const metadataSessionIds = opts.sessionMetadata?.map(m => m.sessionId).filter(Boolean) ?? []; + const effectiveSessionIds = [...new Set([...(opts.sessionIds ?? []), ...metadataSessionIds])]; + const filterConfig = buildCloudWatchFilterConfig(effectiveSessionIds, opts.lookbackDays); + + logger?.startStep('Start batch evaluation'); + opts.onProgress?.('starting', `Starting batch evaluation "${evalName}"...`); + const startResult = await startBatchEvaluation({ + region, + name: evalName, + evaluators: resolvedEvaluators.map(id => ({ evaluatorId: id })), + dataSourceConfig: { + cloudWatchLogs: { + serviceNames: [serviceName], + logGroupNames: [logGroupName], + ...(filterConfig ? { filterConfig } : {}), + }, + }, + ...(opts.sessionMetadata && opts.sessionMetadata.length > 0 + ? { evaluationMetadata: { sessionMetadata: opts.sessionMetadata } } + : {}), + ...(opts.kmsKeyArn ? { kmsKeyArn: opts.kmsKeyArn } : {}), + clientToken: generateClientToken(), + }); + logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); + logger?.endStep('success'); + opts.onProgress?.( + 'started', + `Batch evaluation created: ${startResult.batchEvaluationId} (${startResult.status})` + ); + logger?.finalize(true); + + const record: BatchEvaluationJobRecord = { + type: 'batch-evaluation', + id: startResult.batchEvaluationId, + arn: startResult.batchEvaluationArn, + status: startResult.status, + createdAt: startResult.createdAt ?? new Date().toISOString(), + agent: opts.agent, + logFilePath: logger?.logFilePath, + name: evalName, + evaluators: resolvedEvaluators, + source: opts.source, + dataset: opts.dataset, + ...(opts.kmsKeyArn ? { kmsKeyArn: opts.kmsKeyArn } : {}), + }; + return { success: true, record }; + } catch (err) { + logger?.finalize(false); + return { success: false, error: toError(err) }; + } + }, + + async refresh(record: BatchEvaluationJobRecord): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + let response; + try { + response = await getBatchEvaluation({ region, batchEvaluationId: record.id }); + } catch (err) { + if (err instanceof JobNotFoundError) { + return { success: true, record: { ...record, status: NOT_FOUND_STATUS, resultsFetched: true } }; + } + return { success: false, error: toError(err) }; + } + + const updated: BatchEvaluationJobRecord = { + ...record, + status: response.status, + completedAt: response.updatedAt ?? record.completedAt, + evaluationResults: response.evaluationResults ?? record.evaluationResults, + kmsKeyArn: response.kmsKeyArn ?? record.kmsKeyArn, + }; + + // Fetch per-session scores from the CloudWatch output log once the job is terminal. + const isTerminalStatus = ['COMPLETED', 'COMPLETED_WITH_ERRORS', 'FAILED', 'STOPPED', 'CANCELLED'].includes( + response.status + ); + const cw = response.outputConfig?.cloudWatchConfig; + if (isTerminalStatus && !record.resultsFetched && cw) { + try { + const results = await fetchResultsFromCloudWatch(region, cw.logGroupName, cw.logStreamName); + // Never clobber populated results with an empty re-read. + if (results.length > 0 || !record.results?.length) { + updated.results = results; + } + updated.resultsFetched = true; + } catch { + // leave resultsFetched false so the next get()/list() retries + } + } else if (isTerminalStatus && !cw) { + // Terminal with no output log destination — nothing to fetch; mark settled. + updated.resultsFetched = true; + } + return { success: true, record: updated }; + }, + + async stop(record: BatchEvaluationJobRecord): Promise { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + await stopBatchEvaluation({ region, batchEvaluationId: record.id }); + return { success: true }; + } catch (err) { + return { success: false, error: toError(err) }; + } + }, + + async archive(record: BatchEvaluationJobRecord): Promise { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + await deleteBatchEvaluation({ region, batchEvaluationId: record.id }); + return { success: true }; + } catch (err) { + if (err instanceof JobNotFoundError) { + return { success: true }; + } + return { success: false, error: toError(err) }; + } + }, +}; diff --git a/src/cli/operations/jobs/index.ts b/src/cli/operations/jobs/index.ts new file mode 100644 index 000000000..74b9ce021 --- /dev/null +++ b/src/cli/operations/jobs/index.ts @@ -0,0 +1,46 @@ +/** + * Job Engine public API. + * + * Usage: + * const engine = createJobEngine(new ConfigIO()); + * const r = await engine.start('recommendation', opts); + * const jobs = await engine.list({ type: 'recommendation' }); + */ +export { createJobEngine } from './shared/engine'; +export { isTerminal, JOB_CAPABILITIES, STORAGE_DIRS, TERMINAL_STATUSES, NOT_FOUND_STATUS } from './shared/constants'; +export { regionFromArn } from './shared/region'; +export { waitForTerminal } from './shared/wait'; +export type { WaitForTerminalOptions } from './shared/wait'; +export { runDatasetPhase1, BATCH_INGESTION_DELAY_MS } from './batch-evaluation/dataset-phase1'; +export type { DatasetPhase1Result } from './batch-evaluation/dataset-phase1'; + +export type { + JobEngine, + JobType, + JobRecord, + JobRecordBase, + RecommendationJobRecord, + BatchEvaluationJobRecord, + ABTestJobRecord, + ABTestVariantSummary, + ABTestMode, + InsightsJobRecord, + JobCapabilities, + ListOptions, + StartRecommendationJobOptions, + StartBatchEvaluationJobOptions, + StartABTestJobOptions, + StartInsightsJobOptions, + RecommendationInputSource, + RecommendationTraceSource, + BatchEvaluationSource, + ToolDescJsonPath, + RecommendationType, + PausableJobType, + PromotableJobType, + StoppableJobType, + DebuggableJobType, + DebugCheckResult, + FailureAnalysisResult, + InsightFailureCategory, +} from './shared/types'; diff --git a/src/cli/operations/jobs/insights/__tests__/handler.test.ts b/src/cli/operations/jobs/insights/__tests__/handler.test.ts new file mode 100644 index 000000000..e69a543af --- /dev/null +++ b/src/cli/operations/jobs/insights/__tests__/handler.test.ts @@ -0,0 +1,24 @@ +import { validateLookbackDays } from '../handler.js'; +import { describe, expect, it } from 'vitest'; + +describe('validateLookbackDays', () => { + it('accepts positive integers', () => { + expect(() => validateLookbackDays(1)).not.toThrow(); + expect(() => validateLookbackDays(7)).not.toThrow(); + expect(() => validateLookbackDays(30)).not.toThrow(); + }); + + it('rejects negative values', () => { + expect(() => validateLookbackDays(-5)).toThrow('positive integer'); + expect(() => validateLookbackDays(-1)).toThrow('positive integer'); + }); + + it('rejects zero', () => { + expect(() => validateLookbackDays(0)).toThrow('positive integer'); + }); + + it('rejects non-integer values', () => { + expect(() => validateLookbackDays(2.5)).toThrow('positive integer'); + expect(() => validateLookbackDays(0.5)).toThrow('positive integer'); + }); +}); diff --git a/src/cli/operations/jobs/insights/format.ts b/src/cli/operations/jobs/insights/format.ts new file mode 100644 index 000000000..e509b2174 --- /dev/null +++ b/src/cli/operations/jobs/insights/format.ts @@ -0,0 +1,75 @@ +/** Presentation helpers for insights job CLI output (history table + detail view). */ +import { formatJobDate } from '../shared/format'; +import type { InsightsJobRecord } from '../shared/types'; + +export function printInsightsHistory(records: InsightsJobRecord[]): void { + if (records.length === 0) { + console.log('No insights jobs found. Run `agentcore run insights` to create one.'); + return; + } + console.log(`\n${'Date'.padEnd(22)} ${'Status'.padEnd(22)} ${'Insights'.padEnd(28)} ${'ID'}`); + console.log('─'.repeat(100)); + for (const r of records) { + console.log( + `${formatJobDate(r.createdAt).padEnd(22)} ${r.status.padEnd(22)} ${r.insights.join(', ').padEnd(28)} ${r.id}` + ); + } + console.log(''); +} + +export function printInsightsDetail(record: InsightsJobRecord): void { + console.log(`\nInsights job: ${record.id}`); + console.log(`Name: ${record.name}`); + console.log(`Status: ${record.status}`); + console.log(`Agent: ${record.agent}`); + console.log(`Insights: ${record.insights.join(', ')}`); + if (record.evaluators?.length) { + console.log(`Evaluators: ${record.evaluators.join(', ')}`); + } + console.log(`Started: ${formatJobDate(record.createdAt)}`); + if (record.completedAt) console.log(`Completed: ${formatJobDate(record.completedAt)}`); + + const fa = record.failureAnalysisResult; + if (fa?.failureCategories?.length) { + console.log('\nFailure Analysis:'); + for (const cat of fa.failureCategories) { + console.log(`\n Category: ${cat.failureCategoryName ?? 'Unknown'}`); + if (cat.failureCategoryDescription) { + console.log(` Description: ${cat.failureCategoryDescription}`); + } + if (cat.categoryGroupName) { + console.log(` Group: ${cat.categoryGroupName}`); + } + if (cat.rootCauses?.length) { + for (const rc of cat.rootCauses) { + console.log(` Root cause: ${rc.rootCauseCategory ?? 'Unknown'}`); + if (rc.rootCauseDescription) { + console.log(` ${rc.rootCauseDescription}`); + } + if (rc.recommendation) { + console.log(` Recommendation: ${rc.recommendation}`); + } + if (rc.relatedSessions?.length) { + console.log(` Sessions: ${rc.relatedSessions.map(s => s.sessionId).join(', ')}`); + } + } + } + } + } else { + const evalResults = record.evaluationResults; + if (evalResults?.evaluatorSummaries?.length) { + console.log('\nEvaluation Results:'); + for (const s of evalResults.evaluatorSummaries) { + const avg = s.statistics?.averageScore; + console.log( + ` ${s.evaluatorId}: ${avg != null ? avg.toFixed(2) : 'N/A'}${s.totalFailed ? ` (${s.totalFailed} failed)` : ''}` + ); + } + } else { + console.log('\nResults not yet available.'); + } + } + + if (record.logFilePath) console.log(`\nLog: ${record.logFilePath}`); + console.log(''); +} diff --git a/src/cli/operations/jobs/insights/handler.ts b/src/cli/operations/jobs/insights/handler.ts new file mode 100644 index 000000000..b822b8a73 --- /dev/null +++ b/src/cli/operations/jobs/insights/handler.ts @@ -0,0 +1,212 @@ +/** + * Insights job handler — composes Startable, Refreshable, Archivable. + * + * - create(): resolve agent, build dataSourceConfig (cloudWatchLogs or onlineEvaluationConfigSource), + * send `insights` field (optionally with evaluators for recommendation chaining), + * call startBatchEvaluation, persist the record. + * - refresh(): GET latest status; map 404 -> NOT_FOUND. Parse failureAnalysisResult from response. + * - archive(): DeleteBatchEvaluation. + * + * Insights jobs are NOT stoppable. + */ +import { ConfigIO, JobNotFoundError, ResourceNotFoundError, ValidationError, toError } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import { + deleteBatchEvaluation, + generateClientToken, + getBatchEvaluation, + startBatchEvaluation, +} from '../../../aws/agentcore-batch-evaluation'; +import type { CloudWatchFilterConfig, DataSourceConfig } from '../../../aws/agentcore-batch-evaluation'; +import { resolveEndpointName, runtimeLogGroup } from '../../../aws/cloudwatch'; +import { detectRegion } from '../../../aws/region'; +import { ExecLogger } from '../../../logging/exec-logger'; +import { resolveBatchEvaluatorIds } from '../batch-evaluation/build-source'; +import { NOT_FOUND_STATUS } from '../shared/constants'; +import { regionFromArn, resolveJobRegion } from '../shared/region'; +import { resolveAgentState } from '../shared/resolve-agent-state'; +import type { InsightsHandler, InsightsJobRecord, StartInsightsJobOptions } from '../shared/types'; + +const NAME_PATTERN = /^[a-zA-Z][a-zA-Z0-9_]{0,47}$/; + +/** Auto-generate a job name from project/agent/timestamp, validating user-provided names. */ +function resolveInsightsName(name: string | undefined, projectName: string, agent: string): string { + if (name) { + if (!NAME_PATTERN.test(name)) { + throw new ValidationError( + `Job name "${name}" is invalid. Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars).` + ); + } + return name; + } + return `${projectName}_${agent}_insights_${Date.now()}`.replace(/[^a-zA-Z0-9_]/g, '_').slice(0, 48); +} + +/** Build the CloudWatch filter config for session/time filtering. */ +function buildFilterConfig( + sessionIds: string[] | undefined, + lookbackDays: number | undefined, + startTime: string | undefined, + endTime: string | undefined +): CloudWatchFilterConfig | undefined { + const effective = [...new Set(sessionIds ?? [])]; + if (effective.length > 0) { + return { sessionIds: effective }; + } + if (startTime || endTime) { + return { timeRange: { startTime, endTime } }; + } + if (lookbackDays !== undefined) { + validateLookbackDays(lookbackDays); + const end = new Date().toISOString(); + const start = new Date(Date.now() - lookbackDays * 24 * 60 * 60 * 1000).toISOString(); + return { timeRange: { startTime: start, endTime: end } }; + } + return undefined; +} + +export const insightsHandler: InsightsHandler = { + async create(opts: StartInsightsJobOptions, configIO: ConfigIO): Promise> { + let logger: ExecLogger | undefined; + try { + logger = new ExecLogger({ command: 'insights' }); + } catch { + // non-fatal + } + + try { + logger?.startStep('Load project config'); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + const region = await resolveJobRegion(opts.region, awsTargets); + logger?.endStep('success'); + + // Determine agent name — required unless using onlineEvalConfigArn + const agentName = opts.agent ?? projectSpec.runtimes?.[0]?.name ?? ''; + + let dataSourceConfig: DataSourceConfig; + + if (opts.onlineEvalConfigArn) { + // Use existing online evaluation config as the session source + logger?.startStep('Use online eval config source'); + dataSourceConfig = { + onlineEvaluationConfigSource: { onlineEvaluationConfigArn: opts.onlineEvalConfigArn }, + }; + logger?.endStep('success'); + } else { + // Build CloudWatch logs source + logger?.startStep('Resolve agent'); + const agentState = resolveAgentState(deployedState, agentName); + if (!agentState) { + const err = new ResourceNotFoundError(`Agent "${agentName}" not deployed. Run \`agentcore deploy\` first.`); + logger?.endStep('error', err.message); + logger?.finalize(false); + return { success: false, error: err }; + } + + const endpointName = resolveEndpointName(opts.endpoint); + const serviceName = `${projectSpec.name}_${agentName}.${endpointName}`; + const logGroupName = runtimeLogGroup(agentState.runtimeId, opts.endpoint); + logger?.log(`Service name: ${serviceName}`); + logger?.log(`Log group: ${logGroupName}`); + logger?.endStep('success'); + + const filterConfig = buildFilterConfig(opts.sessionIds, opts.lookbackDays, opts.startTime, opts.endTime); + dataSourceConfig = { + cloudWatchLogs: { + serviceNames: [serviceName], + logGroupNames: [logGroupName], + ...(filterConfig ? { filterConfig } : {}), + }, + }; + } + + const evalName = resolveInsightsName(opts.name, projectSpec.name, agentName); + + // Resolve evaluators if provided (for recommendation chaining) + const resolvedEvaluators = opts.evaluators?.length + ? resolveBatchEvaluatorIds(deployedState, agentName, opts.evaluators) + : undefined; + + logger?.startStep('Start insights job'); + opts.onProgress?.('starting', `Starting insights job "${evalName}"...`); + const startResult = await startBatchEvaluation({ + region, + name: evalName, + ...(!opts.onlineEvalConfigArn && { insights: opts.insights.map(id => ({ insightId: id })) }), + ...(!opts.onlineEvalConfigArn && resolvedEvaluators && resolvedEvaluators.length > 0 + ? { evaluators: resolvedEvaluators.map(id => ({ evaluatorId: id })) } + : {}), + dataSourceConfig, + clientToken: generateClientToken(), + }); + logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); + logger?.endStep('success'); + opts.onProgress?.('started', `Insights job created: ${startResult.batchEvaluationId} (${startResult.status})`); + logger?.finalize(true); + + const record: InsightsJobRecord = { + type: 'insights', + id: startResult.batchEvaluationId, + arn: startResult.batchEvaluationArn, + status: startResult.status, + createdAt: startResult.createdAt ?? new Date().toISOString(), + agent: agentName, + logFilePath: logger?.logFilePath, + name: evalName, + insights: opts.insights, + evaluators: resolvedEvaluators, + }; + return { success: true, record }; + } catch (err) { + logger?.finalize(false); + return { success: false, error: toError(err) }; + } + }, + + async refresh(record: InsightsJobRecord): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + let response; + try { + response = await getBatchEvaluation({ region, batchEvaluationId: record.id }); + } catch (err) { + if (err instanceof JobNotFoundError) { + return { success: true, record: { ...record, status: NOT_FOUND_STATUS } }; + } + return { success: false, error: toError(err) }; + } + + const updated: InsightsJobRecord = { + ...record, + status: response.status, + completedAt: response.updatedAt ?? record.completedAt, + evaluationResults: response.evaluationResults ?? record.evaluationResults, + failureAnalysisResult: response.failureAnalysisResult ?? record.failureAnalysisResult, + }; + + return { success: true, record: updated }; + }, + + async archive(record: InsightsJobRecord): Promise { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + await deleteBatchEvaluation({ region, batchEvaluationId: record.id }); + return { success: true }; + } catch (err) { + if (err instanceof JobNotFoundError) { + return { success: true }; + } + return { success: false, error: toError(err) }; + } + }, +}; + +/** Validates that lookbackDays is a positive integer. */ +export function validateLookbackDays(lookbackDays: number): void { + if (!Number.isInteger(lookbackDays) || lookbackDays < 1) { + throw new ValidationError('--lookback-days must be a positive integer (at least 1).'); + } +} diff --git a/src/cli/operations/recommendation/__tests__/apply-to-bundle.test.ts b/src/cli/operations/jobs/recommendation/__tests__/apply-to-bundle.test.ts similarity index 97% rename from src/cli/operations/recommendation/__tests__/apply-to-bundle.test.ts rename to src/cli/operations/jobs/recommendation/__tests__/apply-to-bundle.test.ts index 981f00a75..564407293 100644 --- a/src/cli/operations/recommendation/__tests__/apply-to-bundle.test.ts +++ b/src/cli/operations/jobs/recommendation/__tests__/apply-to-bundle.test.ts @@ -1,5 +1,5 @@ -import type { ConfigIO } from '../../../../lib'; -import type { RecommendationResult } from '../../../aws/agentcore-recommendation'; +import type { ConfigIO } from '../../../../../lib'; +import type { RecommendationResult } from '../../../../aws/agentcore-recommendation'; import { applyRecommendationToBundle } from '../apply-to-bundle'; import assert from 'node:assert'; import { describe, expect, it, vi } from 'vitest'; @@ -10,7 +10,7 @@ const { RUNTIME_ARN, BUNDLE_ARN, NEW_VERSION_ID } = vi.hoisted(() => ({ NEW_VERSION_ID: 'v2-recommendation', })); -vi.mock('../../../aws/agentcore-config-bundles', () => ({ +vi.mock('../../../../aws/agentcore-config-bundles', () => ({ getConfigurationBundleVersion: vi.fn().mockResolvedValue({ bundleArn: BUNDLE_ARN, bundleId: 'MyBundle-xyz789', diff --git a/src/cli/operations/jobs/recommendation/__tests__/auto-name.test.ts b/src/cli/operations/jobs/recommendation/__tests__/auto-name.test.ts new file mode 100644 index 000000000..0773bb998 --- /dev/null +++ b/src/cli/operations/jobs/recommendation/__tests__/auto-name.test.ts @@ -0,0 +1,39 @@ +import { RECOMMENDATION_NAME_REGEX } from '../../shared/constants'; +import { autoRecommendationName } from '../handler'; +import { describe, expect, it } from 'vitest'; + +const FIXED_NOW = 1781546192034; // 13-digit ms timestamp + +describe('autoRecommendationName', () => { + it('caps long project/agent names to the 48-char service limit (regression: AfricanTripPlanner)', () => { + const name = autoRecommendationName('AfricanTripPlanner', 'AfricanTripPlanner', FIXED_NOW); + expect(name.length).toBeLessThanOrEqual(48); + expect(RECOMMENDATION_NAME_REGEX.test(name)).toBe(true); + // The full timestamp is preserved as the suffix for uniqueness. + expect(name.endsWith(`_${FIXED_NOW}`)).toBe(true); + }); + + it('leaves short names intact', () => { + const name = autoRecommendationName('Trip', 'Planner', FIXED_NOW); + expect(name).toBe(`Trip_Planner_${FIXED_NOW}`); + expect(RECOMMENDATION_NAME_REGEX.test(name)).toBe(true); + }); + + it('produces a valid name even when the prefix would start with a non-letter', () => { + const name = autoRecommendationName('123proj', 'agent', FIXED_NOW); + expect(RECOMMENDATION_NAME_REGEX.test(name)).toBe(true); + expect(/^[a-zA-Z]/.test(name)).toBe(true); + }); + + it('sanitizes characters the service regex forbids', () => { + const name = autoRecommendationName('my proj.name', 'my/agent', FIXED_NOW); + expect(RECOMMENDATION_NAME_REGEX.test(name)).toBe(true); + expect(name).not.toMatch(/[ ./]/); + }); + + it('stays within 48 chars for very long inputs', () => { + const name = autoRecommendationName('VeryLongProjectNameThatExceedsLimits', 'AndAVeryLongAgentNameToo', FIXED_NOW); + expect(name.length).toBeLessThanOrEqual(48); + expect(RECOMMENDATION_NAME_REGEX.test(name)).toBe(true); + }); +}); diff --git a/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts b/src/cli/operations/jobs/recommendation/__tests__/fetch-session-spans.test.ts similarity index 99% rename from src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts rename to src/cli/operations/jobs/recommendation/__tests__/fetch-session-spans.test.ts index e95b9b499..f1095719f 100644 --- a/src/cli/operations/recommendation/__tests__/fetch-session-spans.test.ts +++ b/src/cli/operations/jobs/recommendation/__tests__/fetch-session-spans.test.ts @@ -3,7 +3,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; const mockSearchLogs = vi.fn(); -vi.mock('../../../aws/cloudwatch', () => ({ +vi.mock('../../../../aws/cloudwatch', () => ({ searchLogs: (...args: unknown[]) => mockSearchLogs(...args), runtimeLogGroup: (runtimeId: string) => `/aws/bedrock-agentcore/runtimes/${runtimeId}-DEFAULT`, })); diff --git a/src/cli/operations/jobs/recommendation/__tests__/input-validation.test.ts b/src/cli/operations/jobs/recommendation/__tests__/input-validation.test.ts new file mode 100644 index 000000000..dead42af0 --- /dev/null +++ b/src/cli/operations/jobs/recommendation/__tests__/input-validation.test.ts @@ -0,0 +1,155 @@ +import { ValidationError } from '../../../../../lib'; +import { + MAX_INLINE_SPANS, + MAX_TOOL_NAME_LENGTH, + RECOMMENDATION_NAME_REGEX, + TOOL_NAME_REGEX, +} from '../../shared/constants'; +import { buildRecommendationConfig } from '../build-config'; +import { describe, expect, it } from 'vitest'; + +describe('RECOMMENDATION_NAME_REGEX', () => { + it('accepts valid names', () => { + expect(RECOMMENDATION_NAME_REGEX.test('myRec')).toBe(true); + expect(RECOMMENDATION_NAME_REGEX.test('A123_test-run')).toBe(true); + expect(RECOMMENDATION_NAME_REGEX.test('a')).toBe(true); + expect(RECOMMENDATION_NAME_REGEX.test('a'.repeat(48))).toBe(true); + }); + + it('rejects names starting with a number', () => { + expect(RECOMMENDATION_NAME_REGEX.test('1badName')).toBe(false); + }); + + it('rejects names with spaces', () => { + expect(RECOMMENDATION_NAME_REGEX.test('my rec')).toBe(false); + }); + + it('rejects names with special characters', () => { + expect(RECOMMENDATION_NAME_REGEX.test('my.rec')).toBe(false); + expect(RECOMMENDATION_NAME_REGEX.test('rec@name')).toBe(false); + }); + + it('rejects names exceeding 48 characters', () => { + expect(RECOMMENDATION_NAME_REGEX.test('a'.repeat(49))).toBe(false); + }); + + it('rejects empty string', () => { + expect(RECOMMENDATION_NAME_REGEX.test('')).toBe(false); + }); +}); + +describe('TOOL_NAME_REGEX', () => { + it('accepts valid tool names', () => { + expect(TOOL_NAME_REGEX.test('search')).toBe(true); + expect(TOOL_NAME_REGEX.test('my_tool')).toBe(true); + expect(TOOL_NAME_REGEX.test('my-tool')).toBe(true); + expect(TOOL_NAME_REGEX.test('my.tool.v2')).toBe(true); + expect(TOOL_NAME_REGEX.test('Tool123')).toBe(true); + }); + + it('rejects tool names with spaces', () => { + expect(TOOL_NAME_REGEX.test('my tool')).toBe(false); + }); + + it('rejects tool names with special characters', () => { + expect(TOOL_NAME_REGEX.test('tool@name')).toBe(false); + expect(TOOL_NAME_REGEX.test('tool/name')).toBe(false); + expect(TOOL_NAME_REGEX.test('tool:name')).toBe(false); + }); + + it('rejects empty string', () => { + expect(TOOL_NAME_REGEX.test('')).toBe(false); + }); +}); + +describe('buildRecommendationConfig — tool name validation', () => { + const baseOpts = { + type: 'TOOL_DESCRIPTION_RECOMMENDATION' as const, + inputSource: 'inline', + traceSource: 'batch-evaluation', + batchEvaluationArn: 'arn:aws:bedrock-agentcore:us-east-1:123456789012:batch-evaluation/test-ABCDE12345', + runtimeId: 'proj_agent-abc123', + accountId: '123456789012', + region: 'us-east-1', + evaluatorIds: ['arn:aws:bedrock-agentcore:::evaluator/Builtin.Correctness'], + }; + + it('rejects tool names with spaces', async () => { + await expect(buildRecommendationConfig({ ...baseOpts, tools: ['my tool:does stuff'] })).rejects.toThrow( + ValidationError + ); + }); + + it('rejects tool names exceeding max length', async () => { + const longName = 'a'.repeat(MAX_TOOL_NAME_LENGTH + 1); + await expect(buildRecommendationConfig({ ...baseOpts, tools: [`${longName}:description`] })).rejects.toThrow( + ValidationError + ); + }); + + it('accepts valid tool names', async () => { + const result = await buildRecommendationConfig({ ...baseOpts, tools: ['my_tool-v2.0:Does things'] }); + expect(result.toolDescriptionRecommendationConfig).toBeDefined(); + }); +}); + +describe('buildRecommendationConfig — spans limit validation', () => { + it('rejects spans file exceeding max count', async () => { + const { writeFileSync, mkdtempSync, rmSync } = await import('fs'); + const { join } = await import('path'); + const tmpDir = mkdtempSync('/tmp/spans-test-'); + const spansFile = join(tmpDir, 'spans.json'); + try { + const spans = Array.from({ length: MAX_INLINE_SPANS + 1 }, (_, i) => ({ + traceId: `trace-${i}`, + spanId: `span-${i}`, + })); + writeFileSync(spansFile, JSON.stringify(spans)); + + await expect( + buildRecommendationConfig({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + inlineContent: 'You are helpful', + inputSource: 'inline', + traceSource: 'spans-file', + spansFile, + runtimeId: 'proj_agent-abc123', + accountId: '123456789012', + region: 'us-east-1', + evaluatorIds: ['arn:aws:bedrock-agentcore:::evaluator/Builtin.Correctness'], + }) + ).rejects.toThrow(ValidationError); + } finally { + rmSync(tmpDir, { recursive: true }); + } + }); + + it('accepts spans file within limit', async () => { + const { writeFileSync, mkdtempSync, rmSync } = await import('fs'); + const { join } = await import('path'); + const tmpDir = mkdtempSync('/tmp/spans-test-'); + const spansFile = join(tmpDir, 'spans.json'); + try { + const spans = Array.from({ length: 5 }, (_, i) => ({ + traceId: `trace-${i}`, + spanId: `span-${i}`, + })); + writeFileSync(spansFile, JSON.stringify(spans)); + + const result = await buildRecommendationConfig({ + type: 'SYSTEM_PROMPT_RECOMMENDATION', + inlineContent: 'You are helpful', + inputSource: 'inline', + traceSource: 'spans-file', + spansFile, + runtimeId: 'proj_agent-abc123', + accountId: '123456789012', + region: 'us-east-1', + evaluatorIds: ['arn:aws:bedrock-agentcore:::evaluator/Builtin.Correctness'], + }); + expect(result.systemPromptRecommendationConfig).toBeDefined(); + } finally { + rmSync(tmpDir, { recursive: true }); + } + }); +}); diff --git a/src/cli/operations/jobs/recommendation/__tests__/refresh.test.ts b/src/cli/operations/jobs/recommendation/__tests__/refresh.test.ts new file mode 100644 index 000000000..fa65cfc9f --- /dev/null +++ b/src/cli/operations/jobs/recommendation/__tests__/refresh.test.ts @@ -0,0 +1,102 @@ +import { getRecommendation } from '../../../../aws/agentcore-recommendation'; +import type { GetRecommendationResult } from '../../../../aws/agentcore-recommendation'; +import type { RecommendationJobRecord } from '../../shared/types'; +import { recommendationHandler } from '../handler'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../../aws/agentcore-recommendation', () => ({ + getRecommendation: vi.fn(), +})); + +vi.mock('../../../../aws/region', () => ({ + detectRegion: vi.fn().mockResolvedValue({ region: 'us-west-2' }), +})); + +const mockGet = vi.mocked(getRecommendation); + +/** A record carrying a region-bearing ARN so refresh() never needs detectRegion(). */ +function baseRecord(overrides: Partial = {}): RecommendationJobRecord { + return { + type: 'recommendation', + id: 'rec-123', + arn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:recommendation/rec-123', + status: 'PENDING', + createdAt: '2026-06-16T01:00:00.000Z', + agent: 'MyAgent', + recommendationType: 'SYSTEM_PROMPT_RECOMMENDATION', + evaluators: ['Builtin.Correctness'], + inputSource: 'inline', + ...overrides, + }; +} + +function getResult(overrides: Partial): GetRecommendationResult { + return { + recommendationId: 'rec-123', + recommendationArn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:recommendation/rec-123', + name: 'rec', + type: 'SYSTEM_PROMPT_RECOMMENDATION', + status: 'PENDING', + ...overrides, + }; +} + +describe('recommendationHandler.refresh — completedAt only on terminal status', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('does NOT set completedAt while IN_PROGRESS even though updatedAt advances', async () => { + mockGet.mockResolvedValue(getResult({ status: 'IN_PROGRESS', updatedAt: '2026-06-16T01:00:05.000Z' })); + + const result = await recommendationHandler.refresh(baseRecord({ status: 'PENDING' })); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.record.status).toBe('IN_PROGRESS'); + // Regression: a running job must not look completed. + expect(result.record.completedAt).toBeUndefined(); + }); + + it('clears a stale completedAt left by a prior buggy refresh when still IN_PROGRESS', async () => { + mockGet.mockResolvedValue(getResult({ status: 'IN_PROGRESS', updatedAt: '2026-06-16T01:00:05.000Z' })); + + const result = await recommendationHandler.refresh( + baseRecord({ status: 'IN_PROGRESS', completedAt: '2026-06-16T01:00:01.000Z' }) + ); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.record.completedAt).toBeUndefined(); + }); + + it('sets completedAt from the service completedAt once COMPLETED', async () => { + mockGet.mockResolvedValue( + getResult({ + status: 'COMPLETED', + completedAt: '2026-06-16T01:05:00.000Z', + updatedAt: '2026-06-16T01:05:00.000Z', + }) + ); + + const result = await recommendationHandler.refresh(baseRecord({ status: 'IN_PROGRESS' })); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.record.status).toBe('COMPLETED'); + expect(result.record.completedAt).toBe('2026-06-16T01:05:00.000Z'); + }); + + it('falls back to updatedAt for a terminal status with no completedAt (e.g. FAILED)', async () => { + mockGet.mockResolvedValue( + getResult({ status: 'FAILED', updatedAt: '2026-06-16T01:02:00.000Z', statusReasons: ['boom'] }) + ); + + const result = await recommendationHandler.refresh(baseRecord({ status: 'IN_PROGRESS' })); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.record.status).toBe('FAILED'); + expect(result.record.completedAt).toBe('2026-06-16T01:02:00.000Z'); + }); +}); diff --git a/src/cli/operations/recommendation/apply-to-bundle.ts b/src/cli/operations/jobs/recommendation/apply-to-bundle.ts similarity index 94% rename from src/cli/operations/recommendation/apply-to-bundle.ts rename to src/cli/operations/jobs/recommendation/apply-to-bundle.ts index 26d0f00c6..f9a5e779b 100644 --- a/src/cli/operations/recommendation/apply-to-bundle.ts +++ b/src/cli/operations/jobs/recommendation/apply-to-bundle.ts @@ -9,10 +9,10 @@ * This module fetches that new version via GetConfigurationBundleVersion and * updates the local agentcore.json components to match the server state. */ -import { ConfigIO, ResourceNotFoundError } from '../../../lib'; -import type { Result } from '../../../lib/result'; -import { getConfigurationBundleVersion } from '../../aws/agentcore-config-bundles'; -import type { RecommendationResult } from '../../aws/agentcore-recommendation'; +import { ConfigIO, ResourceNotFoundError } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import { getConfigurationBundleVersion } from '../../../aws/agentcore-config-bundles'; +import type { RecommendationResult } from '../../../aws/agentcore-recommendation'; export interface ApplyRecommendationOptions { /** Config bundle name in agentcore.json (used by CLI) */ diff --git a/src/cli/operations/jobs/recommendation/build-config.ts b/src/cli/operations/jobs/recommendation/build-config.ts new file mode 100644 index 000000000..21dd00e71 --- /dev/null +++ b/src/cli/operations/jobs/recommendation/build-config.ts @@ -0,0 +1,310 @@ +/** + * Recommendation start-time pipeline, extracted from the legacy run-recommendation.ts so the + * job handler's create() can reuse it. Owns: evaluator name→ARN resolution, account-id extraction, + * config-bundle JSONPath component resolution, structured failure extraction, and the + * recommendationConfig builder (which includes the slow sessions/spans-file span fetch). + */ +import { ValidationError } from '../../../../lib'; +import type { DeployedState } from '../../../../schema'; +import type { + RecommendationConfig, + RecommendationEvaluationConfig, + RecommendationResult, + RecommendationType, + SessionSpan, +} from '../../../aws/agentcore-recommendation'; +import { runtimeLogGroup } from '../../../aws/cloudwatch'; +import { arnPrefix } from '../../../aws/partition'; +import type { ExecLogger } from '../../../logging/exec-logger'; +import { MAX_INLINE_SPANS, MAX_TOOL_NAME_LENGTH, TOOL_NAME_REGEX } from '../shared/constants'; +import { fetchSessionSpans } from './fetch-session-spans'; +import { readFileSync } from 'fs'; + +/** Resolve an evaluator reference to a full ARN (ARN passthrough, Builtin.* expansion, or deployed lookup). */ +export function resolveEvaluatorId( + deployedState: DeployedState, + evaluator: string, + region: string +): string | undefined { + // Already a full ARN — use as-is + if (evaluator.startsWith('arn:')) { + return evaluator; + } + // Builtin shorthand → expand to full ARN + if (evaluator.startsWith('Builtin.')) { + return `${arnPrefix(region)}:bedrock-agentcore:::evaluator/${evaluator}`; + } + // Look up custom evaluator from deployed state + for (const target of Object.values(deployedState.targets)) { + const evalState = target.resources?.evaluators?.[evaluator]; + if (evalState) return evalState.evaluatorArn; + } + return undefined; +} + +/** Extract a 12-digit account id from an ARN, or '*' if not present. */ +export function extractAccountIdFromArn(arn: string): string { + const parts = arn.split(':'); + return parts[4] && /^\d{12}$/.test(parts[4]) ? parts[4] : '*'; +} + +/** Resolve a config-bundle component key ({{runtime:...}} / {{gateway:...}}) to a real ARN for JSONPath. */ +export function resolveComponentKeyForJsonPath(key: string, deployedState: DeployedState): string { + if (key.startsWith('arn:')) return key; + + const rtMatch = /^\{\{runtime:(.+)\}\}$/.exec(key); + if (rtMatch) { + const rtName = rtMatch[1]!; + for (const target of Object.values(deployedState.targets)) { + const rt = target.resources?.runtimes?.[rtName]; + if (rt) return rt.runtimeArn; + } + } + + const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(key); + if (gwMatch) { + const gwName = gwMatch[1]!; + for (const target of Object.values(deployedState.targets)) { + const httpGw = target.resources?.gateways?.[gwName]; + if (httpGw) return httpGw.gatewayArn; + const mcpGw = target.resources?.mcp?.gateways?.[gwName]; + if (mcpGw) return mcpGw.gatewayArn; + } + } + + return key; +} + +/** Flatten statusReasons + result errorCode/errorMessage into a single display string (FAILED only). */ +export function extractFailureDetails(pollResult: { + statusReasons?: string[]; + recommendationResult?: RecommendationResult; +}): string | undefined { + const parts: string[] = []; + + if (pollResult.statusReasons?.length) { + parts.push(pollResult.statusReasons.join('; ')); + } + + const result = pollResult.recommendationResult; + if (result) { + const errorSource = result.systemPromptRecommendationResult ?? result.toolDescriptionRecommendationResult; + if (errorSource) { + if (errorSource.errorCode) parts.push(`[${errorSource.errorCode}]`); + if (errorSource.errorMessage) parts.push(errorSource.errorMessage); + } + } + + return parts.length > 0 ? parts.join(' ') : undefined; +} + +export interface BuildConfigOptions { + type: RecommendationType; + inlineContent?: string; + bundleArn?: string; + bundleVersion?: string; + systemPromptJsonPath?: string; + toolDescJsonPaths?: { toolName: string; toolDescriptionJsonPath: string }[]; + inputSource: string; + tools?: string[]; + traceSource: string; + lookbackDays?: number; + sessionIds?: string[]; + spansFile?: string; + fromInsights?: string; + batchEvaluationArn?: string; + runtimeId: string; + accountId: string; + region: string; + evaluatorIds: string[]; + onProgress?: (status: string, message: string) => void; + logger?: ExecLogger; +} + +/** + * Build the recommendationConfig request body. For traceSource 'sessions'/'spans-file' this performs + * the (slow, can-throw) client-side span fetch/read before returning — that work stays part of building + * the request, surfaced via onProgress, and throws on empty so the handler returns {success:false}. + */ +export async function buildRecommendationConfig(opts: BuildConfigOptions): Promise { + // Build agent traces — batch evaluation source, spans file, sessions, or CloudWatch + let agentTraces; + + if (opts.traceSource === 'batch-evaluation') { + let batchEvalArn: string; + if (opts.batchEvaluationArn) { + batchEvalArn = opts.batchEvaluationArn; + } else if (opts.fromInsights) { + const { loadRecord } = await import('../shared/storage'); + const record = loadRecord('insights', opts.fromInsights); + if (!record) { + throw new Error(`Insights run "${opts.fromInsights}" not found.`); + } + if (record.status !== 'COMPLETED' && record.status !== 'COMPLETED_WITH_ERRORS') { + throw new Error( + `Insights run "${opts.fromInsights}" has status ${record.status}. Only COMPLETED runs can be used as recommendation source.` + ); + } + batchEvalArn = record.arn; + } else { + throw new Error( + 'Either --from-insights or --batch-evaluation-arn is required for batch-evaluation trace source.' + ); + } + agentTraces = { batchEvaluation: { batchEvaluationArn: batchEvalArn } }; + } else if (opts.traceSource === 'spans-file' && opts.spansFile) { + // Explicit spans file — read and use as inline sessionSpans + const spansContent = readFileSync(opts.spansFile, 'utf-8'); + const sessionSpans = JSON.parse(spansContent) as SessionSpan | SessionSpan[]; + const spansList = Array.isArray(sessionSpans) ? sessionSpans : [sessionSpans]; + if (spansList.length > MAX_INLINE_SPANS) { + throw new ValidationError( + `Spans file contains ${spansList.length} spans, which exceeds the maximum of ${MAX_INLINE_SPANS}. Reduce the number of spans or use CloudWatch-based trace collection instead.` + ); + } + agentTraces = { sessionSpans: spansList }; + } else if (opts.traceSource === 'sessions' && opts.sessionIds && opts.sessionIds.length > 0) { + // Session IDs selected — auto-fetch from both log groups and use inline sessionSpans. + // The CloudWatch trace config does not support filtering by multiple session IDs, + // so we fetch spans client-side and send them inline. + opts.onProgress?.('fetching-spans', 'Fetching session spans from CloudWatch...'); + opts.logger?.log( + 'Auto-fetching spans for selected sessions (CloudWatch config does not support session ID filtering)' + ); + + const allSpans = []; + for (const sessionId of opts.sessionIds) { + const result = await fetchSessionSpans({ + region: opts.region, + runtimeId: opts.runtimeId, + sessionId, + lookbackDays: opts.lookbackDays ?? 7, + onProgress: msg => { + opts.logger?.log(msg); + opts.onProgress?.('fetching-spans', msg); + }, + }); + allSpans.push(...result.spans); + } + + if (allSpans.length === 0) { + throw new Error( + 'No spans found for the specified session(s). Ensure the agent has been invoked and traces have propagated to CloudWatch (may take 5-10 minutes).' + ); + } + if (allSpans.length > MAX_INLINE_SPANS) { + throw new ValidationError( + `Fetched ${allSpans.length} spans across the specified sessions, which exceeds the maximum of ${MAX_INLINE_SPANS}. Reduce the number of sessions or use CloudWatch-based trace collection instead.` + ); + } + + opts.logger?.log(`Total spans fetched: ${allSpans.length}`); + opts.onProgress?.('fetching-spans', `Fetched ${allSpans.length} spans`); + agentTraces = { sessionSpans: allSpans }; + } else { + // Lookback-based path — use cloudwatchLogs with time range + const runtimeLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:${runtimeLogGroup(opts.runtimeId)}`; + const spansLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:aws/spans`; + + // Derive service name: strip the random hash suffix from runtimeId + // runtimeId format: {project}_{agent}-{hash} → serviceName: {project}_{agent}.DEFAULT + const serviceName = opts.runtimeId.replace(/-[^-]+$/, '.DEFAULT'); + + const lookbackDays = opts.lookbackDays ?? 7; + agentTraces = { + cloudwatchLogs: { + logGroupArns: [runtimeLogGroupArn, spansLogGroupArn], + serviceNames: [serviceName], + startTime: new Date(Date.now() - lookbackDays * 24 * 60 * 60 * 1000).toISOString(), + endTime: new Date().toISOString(), + }, + }; + } + + const evaluationConfig: RecommendationEvaluationConfig = { + evaluators: [{ evaluatorArn: opts.evaluatorIds[0]! }], + }; + + // Validate required fields for config-bundle source (API requires all three) + if (opts.inputSource === 'config-bundle' && opts.bundleArn && !opts.bundleVersion) { + throw new Error('Config bundle version is required. Provide --bundle-version or deploy the bundle first.'); + } + + if (opts.inputSource === 'config-bundle' && opts.bundleArn) { + if (opts.type === 'SYSTEM_PROMPT_RECOMMENDATION' && !opts.systemPromptJsonPath) { + throw new Error( + 'Config bundle requires --system-prompt-json-path to locate the system prompt field.\n' + + "Use the field name (e.g. --system-prompt-json-path 'systemPrompt') and it will be resolved from agentcore.json.\n" + + "Or provide the full JSONPath (e.g. '$.ARN.configuration.systemPrompt')." + ); + } + if (opts.type === 'TOOL_DESCRIPTION_RECOMMENDATION' && !opts.toolDescJsonPaths?.length) { + throw new Error( + 'Config bundle requires --tool-desc-json-path to locate tool description fields.\n' + + "Example: --tool-desc-json-path 'toolName:$.ARN.configuration.toolDescription'" + ); + } + } + + if (opts.type === 'SYSTEM_PROMPT_RECOMMENDATION') { + return { + systemPromptRecommendationConfig: { + systemPrompt: + opts.inputSource === 'config-bundle' && opts.bundleArn + ? { + configurationBundle: { + bundleArn: opts.bundleArn, + versionId: opts.bundleVersion!, + systemPromptJsonPath: opts.systemPromptJsonPath, + }, + } + : { text: opts.inlineContent ?? '' }, + agentTraces, + evaluationConfig, + }, + }; + } + + // TOOL_DESCRIPTION_RECOMMENDATION + if (opts.inputSource === 'config-bundle' && opts.bundleArn && opts.toolDescJsonPaths?.length) { + // Config bundle source — pass bundle reference with JSON paths for server-side resolution + return { + toolDescriptionRecommendationConfig: { + toolDescription: { + configurationBundle: { + bundleArn: opts.bundleArn, + versionId: opts.bundleVersion!, + tools: opts.toolDescJsonPaths, + }, + }, + agentTraces, + }, + }; + } + + // Inline/file source — parse "toolName:description" pairs from tools array + const toolEntries = (opts.tools ?? []).map(t => { + const colonIdx = t.indexOf(':'); + const toolName = colonIdx > 0 ? t.slice(0, colonIdx) : t; + if (!TOOL_NAME_REGEX.test(toolName) || toolName.length > MAX_TOOL_NAME_LENGTH) { + throw new ValidationError( + `Tool name "${toolName}" is invalid. Must contain only alphanumeric characters, underscores, hyphens, or dots (max ${MAX_TOOL_NAME_LENGTH} chars).` + ); + } + if (colonIdx > 0) { + return { toolName, toolDescription: { text: t.slice(colonIdx + 1) } }; + } + return { toolName, toolDescription: { text: opts.inlineContent ?? '' } }; + }); + + return { + toolDescriptionRecommendationConfig: { + toolDescription: { + toolDescriptionText: { + tools: toolEntries, + }, + }, + agentTraces, + }, + }; +} diff --git a/src/cli/operations/recommendation/fetch-session-spans.ts b/src/cli/operations/jobs/recommendation/fetch-session-spans.ts similarity index 97% rename from src/cli/operations/recommendation/fetch-session-spans.ts rename to src/cli/operations/jobs/recommendation/fetch-session-spans.ts index a97dcc9c7..65c1050df 100644 --- a/src/cli/operations/recommendation/fetch-session-spans.ts +++ b/src/cli/operations/jobs/recommendation/fetch-session-spans.ts @@ -11,8 +11,8 @@ * * Without log records the mapper produces "zero trajectories". */ -import type { SessionSpan } from '../../aws/agentcore-recommendation'; -import { runtimeLogGroup, searchLogs } from '../../aws/cloudwatch'; +import type { SessionSpan } from '../../../aws/agentcore-recommendation'; +import { runtimeLogGroup, searchLogs } from '../../../aws/cloudwatch'; export interface FetchSessionSpansOptions { /** AWS region */ diff --git a/src/cli/operations/jobs/recommendation/format.ts b/src/cli/operations/jobs/recommendation/format.ts new file mode 100644 index 000000000..6c6afc2fd --- /dev/null +++ b/src/cli/operations/jobs/recommendation/format.ts @@ -0,0 +1,63 @@ +/** Presentation helpers for recommendation job CLI output (history table + detail view). */ +import { formatJobDate } from '../shared/format'; +import type { RecommendationJobRecord } from '../shared/types'; + +function shortType(type: string): string { + if (type === 'SYSTEM_PROMPT_RECOMMENDATION') return 'System Prompt'; + if (type === 'TOOL_DESCRIPTION_RECOMMENDATION') return 'Tool Description'; + return type; +} + +export function printRecommendationHistory(records: RecommendationJobRecord[]): void { + if (records.length === 0) { + console.log('No recommendation jobs found. Run `agentcore run recommendation` to create one.'); + return; + } + console.log(`\n${'Date'.padEnd(22)} ${'Status'.padEnd(14)} ${'Type'.padEnd(18)} ${'Agent'.padEnd(18)} ${'ID'}`); + console.log('─'.repeat(100)); + for (const r of records) { + console.log( + `${formatJobDate(r.createdAt).padEnd(22)} ${r.status.padEnd(14)} ${shortType(r.recommendationType).padEnd(18)} ${(r.agent ?? 'unknown').padEnd(18)} ${r.id}` + ); + } + console.log(''); +} + +export function printRecommendationDetail(record: RecommendationJobRecord): void { + console.log(`\nRecommendation: ${record.id}`); + console.log(`Status: ${record.status}`); + console.log(`Agent: ${record.agent}`); + console.log(`Type: ${shortType(record.recommendationType)}`); + console.log(`Evaluators: ${record.evaluators.join(', ') || '(none)'}`); + console.log(`Started: ${formatJobDate(record.createdAt)}`); + if (record.completedAt) console.log(`Completed: ${formatJobDate(record.completedAt)}`); + if (record.kmsKeyArn) console.log(`KMS Key: ${record.kmsKeyArn}`); + + const sys = record.result?.systemPromptRecommendationResult; + const tool = record.result?.toolDescriptionRecommendationResult; + if (sys?.recommendedSystemPrompt) { + console.log('\n+++ Recommended System Prompt +++'); + console.log(sys.recommendedSystemPrompt); + if (sys.explanation) { + console.log('\n--- Explanation ---'); + console.log(sys.explanation); + } + } else if (tool?.tools?.length) { + for (const t of tool.tools) { + console.log(`\nTool: ${t.toolName}`); + console.log(`Recommended: ${t.recommendedToolDescription}`); + if (t.explanation) { + console.log(`Explanation: ${t.explanation}`); + } + } + } else if (record.status === 'FAILED') { + console.log(`\nError: ${record.failureDetail ?? record.statusReasons?.join('; ') ?? 'unknown'}`); + } else { + console.log('\nResult not yet available.'); + } + if (record.syncedVersionId) { + console.log(`\nNew config bundle version ${record.syncedVersionId} applied to agentcore.json.`); + } + if (record.logFilePath) console.log(`\nLog: ${record.logFilePath}`); + console.log(''); +} diff --git a/src/cli/operations/jobs/recommendation/handler.ts b/src/cli/operations/jobs/recommendation/handler.ts new file mode 100644 index 000000000..8889b86df --- /dev/null +++ b/src/cli/operations/jobs/recommendation/handler.ts @@ -0,0 +1,336 @@ +/** + * Recommendation job handler — composes Startable, Refreshable, Settles, Archivable. + * + * - create(): resolve agent + evaluator(s), build the recommendationConfig (incl. the slow + * sessions/spans-file fetch), make ONE StartRecommendation call, persist the record. + * - refresh(): GET latest status; map 404 → NOT_FOUND; copy result / failure detail. Pure (no config writes). + * - settle(): once COMPLETED for a config-bundle input, sync the new bundle version into agentcore.json + * exactly once (idempotent via syncedVersionId). Runs sequentially in the engine. + * - archive(): DeleteRecommendation (recommendation has no Stop — archive is the cancel). + */ +import { ConfigIO, JobNotFoundError, ResourceNotFoundError, ValidationError, toError } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import { deleteRecommendation, getRecommendation, startRecommendation } from '../../../aws/agentcore-recommendation'; +import { detectRegion } from '../../../aws/region'; +import { ExecLogger } from '../../../logging/exec-logger'; +import { + MAX_INLINE_TEXT_LENGTH, + NOT_FOUND_STATUS, + RECOMMENDATION_NAME_REGEX, + TERMINAL_STATUSES, +} from '../shared/constants'; +import { regionFromArn, resolveJobRegion } from '../shared/region'; +import { resolveAgentState } from '../shared/resolve-agent-state'; +import type { RecommendationHandler, RecommendationJobRecord, StartRecommendationJobOptions } from '../shared/types'; +import { applyRecommendationToBundle } from './apply-to-bundle'; +import { + buildRecommendationConfig, + extractAccountIdFromArn, + extractFailureDetails, + resolveComponentKeyForJsonPath, + resolveEvaluatorId, +} from './build-config'; +import { readFileSync } from 'fs'; + +/** Max length the service allows for a recommendation name (RECOMMENDATION_NAME_REGEX: 48 chars). */ +const MAX_RECOMMENDATION_NAME_LENGTH = 48; + +/** + * Build an auto-generated recommendation name that satisfies the service constraint + * (RECOMMENDATION_NAME_REGEX = /^[a-zA-Z][a-zA-Z0-9_-]{0,47}$/). The `_` prefix is + * sanitized (invalid chars → `_`) and truncated so that `_` stays within 48 + * chars; the full timestamp is kept as the suffix to preserve uniqueness across runs. + */ +export function autoRecommendationName( + projectName: string, + agent: string | undefined, + now: number = Date.now() +): string { + const suffix = String(now); + const maxPrefix = MAX_RECOMMENDATION_NAME_LENGTH - suffix.length - 1; // 1 for the joining "_" + const rawPrefix = agent ? `${projectName}_${agent}` : projectName; + let prefix = rawPrefix.replace(/[^a-zA-Z0-9_-]/g, '_').slice(0, Math.max(1, maxPrefix)); + // Must begin with a letter; if truncation/sanitization left a non-letter lead, prefix with 'r'. + if (!/^[a-zA-Z]/.test(prefix)) prefix = `r${prefix}`.slice(0, Math.max(1, maxPrefix)); + return `${prefix}_${suffix}`; +} + +export const recommendationHandler: RecommendationHandler = { + async create( + opts: StartRecommendationJobOptions, + configIO: ConfigIO + ): Promise> { + let logger: ExecLogger | undefined; + try { + logger = new ExecLogger({ command: 'recommend' }); + } catch { + // Logger creation can fail in tests or with no project root — non-fatal. + } + + try { + logger?.startStep('Load project config'); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + const region = await resolveJobRegion(opts.region, awsTargets); + logger?.log(`Region: ${region}`); + logger?.endStep('success'); + + // Resolve agent (needed for runtimeId + account id from its ARN) — skip for batch-evaluation source + logger?.startStep('Resolve agent and evaluators'); + const agentState = opts.agent ? resolveAgentState(deployedState, opts.agent) : undefined; + if (!agentState && opts.traceSource !== 'batch-evaluation') { + const err = new ResourceNotFoundError(`Agent "${opts.agent}" not deployed. Run \`agentcore deploy\` first.`); + logger?.endStep('error', err.message); + logger?.finalize(false); + return { success: false, error: err }; + } + + // Resolve evaluators (arity enforced here, not at the command layer) + const evaluatorIds: string[] = []; + for (const evaluator of opts.evaluators) { + const id = resolveEvaluatorId(deployedState, evaluator, region); + if (!id) { + const err = new ResourceNotFoundError( + `Evaluator "${evaluator}" not found. Use a Builtin.* name, a full ARN, or deploy a custom evaluator first.` + ); + logger?.endStep('error', err.message); + logger?.finalize(false); + return { success: false, error: err }; + } + evaluatorIds.push(id); + } + if (opts.type === 'SYSTEM_PROMPT_RECOMMENDATION' && evaluatorIds.length !== 1) { + const err = new ValidationError('System prompt recommendations require exactly one evaluator.'); + logger?.endStep('error', err.message); + logger?.finalize(false); + return { success: false, error: err }; + } + logger?.log(`Evaluators: ${evaluatorIds.join(', ') || '(none)'}`); + logger?.endStep('success'); + + // Read inline/file content + validate non-empty system-prompt before any API call + let inlineContent: string | undefined; + if (opts.inputSource === 'file' && opts.promptFile) { + inlineContent = readFileSync(opts.promptFile, 'utf-8'); + } else if (opts.inputSource === 'inline') { + inlineContent = opts.inlineContent; + } + if ( + opts.type === 'SYSTEM_PROMPT_RECOMMENDATION' && + opts.inputSource !== 'config-bundle' && + !inlineContent?.trim() + ) { + const err = new ValidationError( + 'System prompt content is required. Provide via --inline, --prompt-file, or --bundle-name.' + ); + logger?.finalize(false); + return { success: false, error: err }; + } + if (inlineContent && inlineContent.length > MAX_INLINE_TEXT_LENGTH) { + const err = new ValidationError( + `Inline text exceeds the maximum allowed length (${inlineContent.length} characters, limit is ${MAX_INLINE_TEXT_LENGTH}).` + ); + logger?.finalize(false); + return { success: false, error: err }; + } + + const accountId = agentState ? extractAccountIdFromArn(agentState.runtimeArn) : ''; + + // Resolve config-bundle ARN + short JSONPath (from deployed state / agentcore.json) + let bundleArn: string | undefined; + let resolvedSystemPromptJsonPath = opts.systemPromptJsonPath; + if (opts.inputSource === 'config-bundle' && opts.bundleName) { + if (opts.bundleName.startsWith('arn:')) { + bundleArn = opts.bundleName; + } else { + for (const target of Object.values(deployedState.targets ?? {})) { + const bundle = target?.resources?.configBundles?.[opts.bundleName]; + if (bundle?.bundleArn) { + bundleArn = bundle.bundleArn; + break; + } + } + if (!bundleArn) { + const err = new ResourceNotFoundError( + `Config bundle "${opts.bundleName}" not found in deployed state. Run \`agentcore deploy\` first.` + ); + logger?.finalize(false); + return { success: false, error: err }; + } + } + + if (resolvedSystemPromptJsonPath && !resolvedSystemPromptJsonPath.startsWith('$')) { + const bundleName = opts.bundleName.startsWith('arn:') + ? Object.values(deployedState.targets) + .flatMap(t => Object.entries(t.resources?.configBundles ?? {})) + .find(([, b]) => b.bundleArn === opts.bundleName)?.[0] + : opts.bundleName; + if (bundleName) { + const projBundle = projectSpec.configBundles?.find(b => b.name === bundleName); + if (projBundle?.components) { + const firstComponentKey = Object.keys(projBundle.components)[0]; + if (firstComponentKey) { + const resolvedKey = resolveComponentKeyForJsonPath(firstComponentKey, deployedState); + resolvedSystemPromptJsonPath = `$.${resolvedKey}.configuration.${resolvedSystemPromptJsonPath}`; + } + } + } + } + } + + // Build the request body (this performs the sessions/spans-file fetch when applicable) + const recommendationConfig = await buildRecommendationConfig({ + type: opts.type, + inlineContent, + bundleArn, + bundleVersion: opts.bundleVersion, + systemPromptJsonPath: resolvedSystemPromptJsonPath, + toolDescJsonPaths: opts.toolDescJsonPaths, + inputSource: opts.inputSource, + tools: opts.tools, + traceSource: opts.traceSource, + lookbackDays: opts.lookbackDays, + sessionIds: opts.sessionIds, + spansFile: opts.spansFile, + fromInsights: opts.fromInsights, + batchEvaluationArn: opts.batchEvaluationArn, + runtimeId: agentState?.runtimeId ?? '', + accountId, + region, + evaluatorIds, + onProgress: opts.onProgress, + logger, + }); + + // ONE API call + logger?.startStep('Start recommendation'); + const name = opts.recommendationName ?? autoRecommendationName(projectSpec.name, opts.agent); + if (opts.recommendationName && !RECOMMENDATION_NAME_REGEX.test(opts.recommendationName)) { + const err = new ValidationError( + `Recommendation name "${opts.recommendationName}" is invalid. Must begin with a letter and contain only alphanumeric characters, underscores, or hyphens (max 48 chars).` + ); + logger?.endStep('error', err.message); + logger?.finalize(false); + return { success: false, error: err }; + } + opts.onProgress?.('starting', `Starting recommendation "${name}"...`); + const startResult = await startRecommendation({ + region, + name, + type: opts.type, + recommendationConfig, + ...(opts.kmsKeyArn ? { kmsKeyArn: opts.kmsKeyArn } : {}), + }); + logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); + logger?.endStep('success'); + opts.onProgress?.('started', `Recommendation created: ${startResult.recommendationId} (${startResult.status})`); + logger?.finalize(true); + + const record: RecommendationJobRecord = { + type: 'recommendation', + id: startResult.recommendationId, + arn: startResult.recommendationArn, + status: startResult.status, + createdAt: startResult.createdAt ?? new Date().toISOString(), + agent: opts.agent ?? '', + logFilePath: logger?.logFilePath, + recommendationType: opts.type, + evaluators: opts.evaluators, + inputSource: opts.inputSource, + bundleName: opts.bundleName, + bundleArn, + bundleVersion: opts.bundleVersion, + systemPromptJsonPath: resolvedSystemPromptJsonPath, + toolDescJsonPaths: opts.toolDescJsonPaths, + ...(opts.kmsKeyArn ? { kmsKeyArn: opts.kmsKeyArn } : {}), + }; + return { success: true, record }; + } catch (err) { + logger?.finalize(false); + return { success: false, error: toError(err) }; + } + }, + + async refresh(record: RecommendationJobRecord): Promise> { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + let response; + try { + response = await getRecommendation({ region, recommendationId: record.id }); + } catch (err) { + if (err instanceof JobNotFoundError) { + return { success: true, record: { ...record, status: NOT_FOUND_STATUS } }; + } + return { success: false, error: toError(err) }; + } + + const failureDetail = + response.status === 'FAILED' + ? extractFailureDetails({ + statusReasons: response.statusReasons, + recommendationResult: response.recommendationResult, + }) + : undefined; + + // `completedAt` must only be set once the job actually reaches a terminal state — the UI renders a + // "Completed:" line whenever it is present. The service's `updatedAt` advances on every state + // transition (incl. PENDING → IN_PROGRESS), so it is NOT a completion signal. For a non-terminal + // status we leave `completedAt` unset (clearing any value a prior buggy refresh may have stored), + // so a still-running job never renders as completed. + const isTerminalStatus = TERMINAL_STATUSES.recommendation.has(response.status); + const completedAt = isTerminalStatus + ? (response.completedAt ?? response.updatedAt ?? record.completedAt) + : undefined; + + return { + success: true, + record: { + ...record, + status: response.status, + completedAt, + result: response.recommendationResult ?? record.result, + statusReasons: response.statusReasons ?? record.statusReasons, + failureDetail: failureDetail ?? record.failureDetail, + }, + }; + }, + + async settle(record: RecommendationJobRecord, configIO: ConfigIO): Promise { + // Only config-bundle recommendations that completed and produced a new bundle version, once. + if (record.inputSource !== 'config-bundle' || record.status !== 'COMPLETED' || !record.result) { + return record; + } + const resultBundle = + record.result.systemPromptRecommendationResult?.configurationBundle ?? + record.result.toolDescriptionRecommendationResult?.configurationBundle; + if (!resultBundle || record.syncedVersionId === resultBundle.versionId) { + return record; + } + + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + const applyResult = await applyRecommendationToBundle( + { bundleName: record.bundleName, bundleArn: record.bundleArn, result: record.result, region }, + configIO + ); + if (applyResult.success) { + return { ...record, syncedVersionId: resultBundle.versionId }; + } + return record; // leave unsynced so a later get()/list() retries + }, + + async archive(record: RecommendationJobRecord): Promise { + const region = regionFromArn(record.arn) ?? (await detectRegion()).region; + try { + await deleteRecommendation({ region, recommendationId: record.id }); + return { success: true }; + } catch (err) { + // Already gone on the service — local cleanup can still proceed. + if (err instanceof JobNotFoundError) { + return { success: true }; + } + return { success: false, error: toError(err) }; + } + }, +}; diff --git a/src/cli/operations/jobs/shared/__tests__/constants.test.ts b/src/cli/operations/jobs/shared/__tests__/constants.test.ts new file mode 100644 index 000000000..0148b8f10 --- /dev/null +++ b/src/cli/operations/jobs/shared/__tests__/constants.test.ts @@ -0,0 +1,93 @@ +import { NOT_FOUND_STATUS, isTerminal } from '../constants'; +import type { ABTestJobRecord, BatchEvaluationJobRecord, RecommendationJobRecord } from '../types'; +import { describe, expect, it } from 'vitest'; + +function rec(status: string): RecommendationJobRecord { + return { + type: 'recommendation', + id: 'rec-1', + arn: 'arn', + status, + createdAt: '2026-06-01T00:00:00Z', + agent: 'a', + recommendationType: 'SYSTEM_PROMPT_RECOMMENDATION', + evaluators: [], + inputSource: 'inline', + }; +} + +function batch(status: string, resultsFetched?: boolean): BatchEvaluationJobRecord { + return { + type: 'batch-evaluation', + id: 'be-1', + arn: 'arn', + status, + createdAt: '2026-06-01T00:00:00Z', + agent: 'a', + name: 'n', + evaluators: [], + resultsFetched, + }; +} + +/** status = lifecycle (ACTIVE/FAILED/CREATE_FAILED), lifecycleStatus = executionStatus (RUNNING/PAUSED/STOPPED) */ +function abTest(status: string, lifecycleStatus = 'RUNNING'): ABTestJobRecord { + return { + type: 'ab-test', + id: 'abt-1', + arn: 'arn', + status, + lifecycleStatus, + createdAt: '2026-06-01T00:00:00Z', + agent: 'a', + name: 'n', + mode: 'config-bundle', + gatewayArn: 'arn:aws:bedrock-agentcore:us-east-1:1:gateway/g', + variants: [], + evaluationConfig: { onlineEvaluationConfigArn: 'arn' }, + }; +} + +describe('isTerminal', () => { + it('treats recommendation COMPLETED/FAILED/NOT_FOUND as terminal', () => { + expect(isTerminal(rec('COMPLETED'))).toBe(true); + expect(isTerminal(rec('FAILED'))).toBe(true); + expect(isTerminal(rec(NOT_FOUND_STATUS))).toBe(true); + }); + + it('treats recommendation PENDING/IN_PROGRESS as non-terminal', () => { + expect(isTerminal(rec('PENDING'))).toBe(false); + expect(isTerminal(rec('IN_PROGRESS'))).toBe(false); + }); + + it('keeps a terminal-status batch eval refreshable until results are fetched', () => { + expect(isTerminal(batch('COMPLETED', false))).toBe(false); // results not fetched yet → still refreshable + expect(isTerminal(batch('COMPLETED', true))).toBe(true); + }); + + it('treats batch NOT_FOUND as terminal regardless of resultsFetched', () => { + expect(isTerminal(batch(NOT_FOUND_STATUS, false))).toBe(true); + }); + + it('treats unknown statuses as non-terminal', () => { + expect(isTerminal(rec('SOMETHING_NEW'))).toBe(false); + expect(isTerminal(batch('STOPPING', true))).toBe(false); + }); + + it('treats ab-test as terminal when failed + failureReason captured, or NOT_FOUND', () => { + expect(isTerminal({ ...abTest('FAILED'), failureReason: 'infra error' })).toBe(true); + expect(isTerminal({ ...abTest('CREATE_FAILED'), failureReason: 'setup error' })).toBe(true); + expect(isTerminal({ ...abTest('UPDATE_FAILED'), failureReason: 'update error' })).toBe(true); + expect(isTerminal(abTest(NOT_FOUND_STATUS))).toBe(true); + }); + + it('keeps ab-test refreshable when failed but failureReason not yet captured', () => { + expect(isTerminal(abTest('FAILED'))).toBe(false); + expect(isTerminal(abTest('CREATE_FAILED'))).toBe(false); + }); + + it('treats ab-test ACTIVE/CREATING as non-terminal', () => { + expect(isTerminal(abTest('ACTIVE'))).toBe(false); + expect(isTerminal(abTest('CREATING'))).toBe(false); + }); +}); diff --git a/src/cli/operations/jobs/shared/__tests__/engine.test.ts b/src/cli/operations/jobs/shared/__tests__/engine.test.ts new file mode 100644 index 000000000..42a87b4cf --- /dev/null +++ b/src/cli/operations/jobs/shared/__tests__/engine.test.ts @@ -0,0 +1,248 @@ +import { createJobEngine } from '../engine'; +import type { BatchEvaluationJobRecord, RecommendationJobRecord } from '../types'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// ── Mocks (hoisted so the vi.mock factories below can reference them) ───────── +const { store, recHandler, batchHandler, mockValidateCreds } = vi.hoisted(() => ({ + store: { saveRecord: vi.fn(), loadRecord: vi.fn(), listRecords: vi.fn(), deleteRecord: vi.fn() }, + recHandler: { create: vi.fn(), refresh: vi.fn(), settle: vi.fn(), archive: vi.fn() }, + batchHandler: { create: vi.fn(), refresh: vi.fn(), stop: vi.fn(), archive: vi.fn() }, + mockValidateCreds: vi.fn(), +})); + +vi.mock('../storage', () => ({ + saveRecord: (...a: unknown[]) => store.saveRecord(...a), + loadRecord: (...a: unknown[]) => store.loadRecord(...a), + listRecords: (...a: unknown[]) => store.listRecords(...a), + deleteRecord: (...a: unknown[]) => store.deleteRecord(...a), +})); +vi.mock('../../recommendation/handler', () => ({ recommendationHandler: recHandler })); +vi.mock('../../batch-evaluation/handler', () => ({ batchEvaluationHandler: batchHandler })); +vi.mock('../../../../aws/account', () => ({ validateAwsCredentials: (...a: unknown[]) => mockValidateCreds(...a) })); +vi.mock('../../../../../lib', () => ({ + ConfigIO: function () { + return {}; + }, + JobNotFoundError: class JobNotFoundError extends Error {}, +})); + +function recRecord(over: Partial = {}): RecommendationJobRecord { + return { + type: 'recommendation', + id: 'rec-1', + arn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:recommendation/rec-1', + status: 'PENDING', + createdAt: '2026-06-01T00:00:00Z', + agent: 'myagent', + recommendationType: 'SYSTEM_PROMPT_RECOMMENDATION', + evaluators: ['Builtin.Correctness'], + inputSource: 'inline', + ...over, + }; +} + +function batchRecord(over: Partial = {}): BatchEvaluationJobRecord { + return { + type: 'batch-evaluation', + id: 'be-1', + arn: 'arn:aws:bedrock-agentcore:us-west-2:111122223333:batch-evaluation/be-1', + status: 'IN_PROGRESS', + createdAt: '2026-06-01T00:00:00Z', + agent: 'myagent', + name: 'run1', + evaluators: ['Builtin.Correctness'], + ...over, + }; +} + +describe('createJobEngine', () => { + beforeEach(() => { + mockValidateCreds.mockResolvedValue(undefined); + recHandler.settle.mockImplementation((r: RecommendationJobRecord) => Promise.resolve(r)); + }); + afterEach(() => vi.clearAllMocks()); + + describe('start', () => { + it('validates credentials, calls handler.create, and saves the record', async () => { + const record = recRecord(); + recHandler.create.mockResolvedValue({ success: true, record }); + const engine = createJobEngine(); + + const result = await engine.start('recommendation', { + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'myagent', + evaluators: ['Builtin.Correctness'], + inputSource: 'inline', + traceSource: 'cloudwatch', + }); + + expect(mockValidateCreds).toHaveBeenCalled(); + expect(recHandler.create).toHaveBeenCalled(); + expect(store.saveRecord).toHaveBeenCalledWith(record); + expect(result.success).toBe(true); + }); + + it('does not save when credentials are invalid', async () => { + mockValidateCreds.mockRejectedValue(new Error('expired token')); + const engine = createJobEngine(); + const result = await engine.start('recommendation', { + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'a', + evaluators: [], + inputSource: 'inline', + traceSource: 'cloudwatch', + }); + expect(result.success).toBe(false); + expect(recHandler.create).not.toHaveBeenCalled(); + expect(store.saveRecord).not.toHaveBeenCalled(); + }); + + it('does not save when create fails', async () => { + recHandler.create.mockResolvedValue({ success: false, error: new Error('bad input') }); + const engine = createJobEngine(); + const result = await engine.start('recommendation', { + type: 'SYSTEM_PROMPT_RECOMMENDATION', + agent: 'a', + evaluators: ['Builtin.Correctness'], + inputSource: 'inline', + traceSource: 'cloudwatch', + }); + expect(result.success).toBe(false); + expect(store.saveRecord).not.toHaveBeenCalled(); + }); + }); + + describe('get', () => { + it('returns a terminal record without refreshing', async () => { + store.loadRecord.mockReturnValue(recRecord({ status: 'COMPLETED' })); + const engine = createJobEngine(); + const record = await engine.get('recommendation', 'rec-1'); + expect(record?.status).toBe('COMPLETED'); + expect(recHandler.refresh).not.toHaveBeenCalled(); + }); + + it('refreshes a non-terminal record and saves the update', async () => { + store.loadRecord.mockReturnValue(recRecord({ status: 'IN_PROGRESS' })); + recHandler.refresh.mockResolvedValue({ success: true, record: recRecord({ status: 'COMPLETED' }) }); + const engine = createJobEngine(); + const record = await engine.get('recommendation', 'rec-1'); + expect(recHandler.refresh).toHaveBeenCalled(); + expect(store.saveRecord).toHaveBeenCalled(); + expect(record?.status).toBe('COMPLETED'); + }); + + it('persists error after refresh retries exhausted', async () => { + store.loadRecord.mockReturnValue(recRecord({ status: 'IN_PROGRESS' })); + recHandler.refresh.mockResolvedValue({ success: false, error: new Error('transient network error') }); + const engine = createJobEngine(); + const record = await engine.get('recommendation', 'rec-1'); + expect(record?.error).toBe('transient network error'); + }); + + it('returns undefined for a missing record', async () => { + store.loadRecord.mockReturnValue(undefined); + const engine = createJobEngine(); + expect(await engine.get('recommendation', 'nope')).toBeUndefined(); + }); + + it('runs settle() after refresh for recommendations', async () => { + store.loadRecord.mockReturnValue(recRecord({ status: 'IN_PROGRESS' })); + const completed = recRecord({ status: 'COMPLETED' }); + recHandler.refresh.mockResolvedValue({ success: true, record: completed }); + recHandler.settle.mockResolvedValue(recRecord({ status: 'COMPLETED', syncedVersionId: 'v2' })); + const engine = createJobEngine(); + const record = await engine.get('recommendation', 'rec-1'); + expect(recHandler.settle).toHaveBeenCalled(); + expect(record!.syncedVersionId).toBe('v2'); + }); + }); + + describe('list', () => { + it('refreshes non-terminal records and sorts by createdAt desc', async () => { + store.listRecords.mockReturnValue([ + recRecord({ id: 'old', status: 'COMPLETED', createdAt: '2026-06-01T00:00:00Z' }), + recRecord({ id: 'new', status: 'COMPLETED', createdAt: '2026-06-03T00:00:00Z' }), + ]); + const engine = createJobEngine(); + const records = await engine.list({ type: 'recommendation' }); + expect(records.map(r => r.id)).toEqual(['new', 'old']); + }); + + it('persists error when refresh fails for a list item', async () => { + store.listRecords.mockReturnValue([ + recRecord({ id: 'a', status: 'IN_PROGRESS' }), + recRecord({ id: 'b', status: 'COMPLETED' }), + ]); + recHandler.refresh.mockResolvedValue({ success: false, error: new Error('boom') }); + const engine = createJobEngine(); + const records = await engine.list({ type: 'recommendation' }); + expect(records).toHaveLength(2); + expect(records.find(r => r.id === 'a')?.error).toBe('boom'); + }); + + it('applies limit', async () => { + store.listRecords.mockReturnValue([ + recRecord({ id: 'a', status: 'COMPLETED', createdAt: '2026-06-03T00:00:00Z' }), + recRecord({ id: 'b', status: 'COMPLETED', createdAt: '2026-06-02T00:00:00Z' }), + recRecord({ id: 'c', status: 'COMPLETED', createdAt: '2026-06-01T00:00:00Z' }), + ]); + const engine = createJobEngine(); + const records = await engine.list({ type: 'recommendation', limit: 2 }); + expect(records.map(r => r.id)).toEqual(['a', 'b']); + }); + }); + + describe('stop', () => { + it('calls the handler stop and records STOPPING on success', async () => { + store.loadRecord.mockReturnValue(batchRecord()); + batchHandler.stop.mockResolvedValue({ success: true }); + const engine = createJobEngine(); + const result = await engine.stop('batch-evaluation', 'be-1'); + expect(batchHandler.stop).toHaveBeenCalled(); + expect(result.success).toBe(true); + expect(store.saveRecord).toHaveBeenCalledWith(expect.objectContaining({ status: 'STOPPING' })); + }); + + it('returns not-found for a missing record', async () => { + store.loadRecord.mockReturnValue(undefined); + const engine = createJobEngine(); + const result = await engine.stop('batch-evaluation', 'nope'); + expect(result.success).toBe(false); + }); + }); + + describe('archive', () => { + it('deletes the local record after a successful service delete', async () => { + store.loadRecord.mockReturnValue(recRecord()); + recHandler.archive.mockResolvedValue({ success: true }); + const engine = createJobEngine(); + const result = await engine.archive('recommendation', 'rec-1'); + expect(recHandler.archive).toHaveBeenCalled(); + expect(store.deleteRecord).toHaveBeenCalledWith('recommendation', 'rec-1'); + expect(result.success).toBe(true); + }); + + it('does not delete locally when the service delete fails', async () => { + store.loadRecord.mockReturnValue(recRecord()); + recHandler.archive.mockResolvedValue({ success: false, error: new Error('nope') }); + const engine = createJobEngine(); + const result = await engine.archive('recommendation', 'rec-1'); + expect(store.deleteRecord).not.toHaveBeenCalled(); + expect(result.success).toBe(false); + }); + }); + + describe('capabilities', () => { + it('reports batch-evaluation as stoppable and recommendation as not', () => { + const engine = createJobEngine(); + expect(engine.capabilities('batch-evaluation').canStop).toBe(true); + expect(engine.capabilities('recommendation').canStop).toBe(false); + }); + + it('reports ab-test as stoppable, pausable, promotable, and debuggable', () => { + const engine = createJobEngine(); + const caps = engine.capabilities('ab-test'); + expect(caps).toEqual({ canStop: true, canPause: true, canPromote: true, canDebug: true }); + }); + }); +}); diff --git a/src/cli/operations/jobs/shared/__tests__/region.test.ts b/src/cli/operations/jobs/shared/__tests__/region.test.ts new file mode 100644 index 000000000..fad27d4db --- /dev/null +++ b/src/cli/operations/jobs/shared/__tests__/region.test.ts @@ -0,0 +1,41 @@ +import { regionFromArn, resolveJobRegion } from '../region'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const { mockDetectRegion } = vi.hoisted(() => ({ mockDetectRegion: vi.fn() })); +vi.mock('../../../../aws/region', () => ({ detectRegion: () => mockDetectRegion() })); + +describe('regionFromArn', () => { + it('parses the region (field index 3) from a well-formed ARN', () => { + expect(regionFromArn('arn:aws:bedrock-agentcore:us-west-2:111122223333:recommendation/rec-1')).toBe('us-west-2'); + }); + + it('parses GovCloud / China partitions', () => { + expect(regionFromArn('arn:aws-us-gov:bedrock-agentcore:us-gov-west-1:111:recommendation/r')).toBe('us-gov-west-1'); + expect(regionFromArn('arn:aws-cn:bedrock-agentcore:cn-north-1:111:recommendation/r')).toBe('cn-north-1'); + }); + + it('returns undefined for a region-less / malformed ARN', () => { + expect(regionFromArn('not-an-arn')).toBeUndefined(); + expect(regionFromArn('arn:aws:svc::111:res')).toBeUndefined(); + }); +}); + +describe('resolveJobRegion', () => { + afterEach(() => vi.clearAllMocks()); + + it('prefers the explicit option', async () => { + expect(await resolveJobRegion('eu-west-1', [{ region: 'us-east-1' }])).toBe('eu-west-1'); + expect(mockDetectRegion).not.toHaveBeenCalled(); + }); + + it('falls back to the first deployment target', async () => { + expect(await resolveJobRegion(undefined, [{ region: 'ap-southeast-2' }])).toBe('ap-southeast-2'); + expect(mockDetectRegion).not.toHaveBeenCalled(); + }); + + it('falls back to detectRegion when no option or target', async () => { + mockDetectRegion.mockResolvedValue({ region: 'us-west-2' }); + expect(await resolveJobRegion(undefined, [])).toBe('us-west-2'); + expect(mockDetectRegion).toHaveBeenCalled(); + }); +}); diff --git a/src/cli/operations/jobs/shared/__tests__/storage.test.ts b/src/cli/operations/jobs/shared/__tests__/storage.test.ts new file mode 100644 index 000000000..9f66f7e6f --- /dev/null +++ b/src/cli/operations/jobs/shared/__tests__/storage.test.ts @@ -0,0 +1,102 @@ +import { assertSafeId, deleteRecord, listRecords, loadRecord, saveRecord } from '../storage'; +import type { BatchEvaluationJobRecord, RecommendationJobRecord } from '../types'; +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// Hoisted holder so the vi.mock factory reads the current temp dir at call time. +const ctx = vi.hoisted(() => ({ root: '' })); + +vi.mock('../../../../../lib', () => ({ + CLI_SYSTEM_DIR: '.cli', + findConfigRoot: () => ctx.root, + NoProjectError: class NoProjectError extends Error {}, +})); + +function rec(over: Partial = {}): RecommendationJobRecord { + return { + type: 'recommendation', + id: 'rec-1', + arn: 'arn', + status: 'COMPLETED', + createdAt: '2026-06-01T00:00:00Z', + agent: 'a', + recommendationType: 'SYSTEM_PROMPT_RECOMMENDATION', + evaluators: [], + inputSource: 'inline', + ...over, + }; +} + +describe('jobs storage', () => { + beforeEach(() => { + ctx.root = mkdtempSync(join(tmpdir(), 'jobs-storage-')); + }); + afterEach(() => { + rmSync(ctx.root, { recursive: true, force: true }); + vi.clearAllMocks(); + }); + + it('round-trips a record through save → load', () => { + const record = rec({ id: 'rec-abc' }); + saveRecord(record); + expect(loadRecord('recommendation', 'rec-abc')).toEqual(record); + }); + + it('returns undefined for a missing record', () => { + expect(loadRecord('recommendation', 'missing')).toBeUndefined(); + }); + + it('lists records of a type and skips corrupt files without throwing', () => { + saveRecord(rec({ id: 'good-1' })); + saveRecord(rec({ id: 'good-2' })); + // Drop a corrupt file into the same dir + writeFileSync(join(ctx.root, '.cli', 'jobs', 'recommendations', 'broken.json'), '{ not valid json'); + const records = listRecords('recommendation'); + expect(records.map(r => r.id).sort()).toEqual(['good-1', 'good-2']); + }); + + it('ignores legacy-shape files (missing/wrong type discriminator)', () => { + mkdirSync(join(ctx.root, '.cli', 'jobs', 'recommendations'), { recursive: true }); + // Legacy recommendation record: keyed on recommendationId, `type` holds a RecommendationType + writeFileSync( + join(ctx.root, '.cli', 'jobs', 'recommendations', 'rec-legacy.json'), + JSON.stringify({ recommendationId: 'rec-legacy', type: 'SYSTEM_PROMPT_RECOMMENDATION', status: 'COMPLETED' }) + ); + expect(loadRecord('recommendation', 'rec-legacy')).toBeUndefined(); + expect(listRecords('recommendation')).toEqual([]); + }); + + it('deletes a record and reports whether it existed', () => { + saveRecord(rec({ id: 'rec-del' })); + expect(deleteRecord('recommendation', 'rec-del')).toBe(true); + expect(deleteRecord('recommendation', 'rec-del')).toBe(false); + expect(loadRecord('recommendation', 'rec-del')).toBeUndefined(); + }); + + it('keeps recommendation and batch-evaluation records in separate directories', () => { + saveRecord(rec({ id: 'rec-1' })); + const be: BatchEvaluationJobRecord = { + type: 'batch-evaluation', + id: 'be-1', + arn: 'arn', + status: 'COMPLETED', + createdAt: '2026-06-01T00:00:00Z', + agent: 'a', + name: 'n', + evaluators: [], + resultsFetched: true, + }; + saveRecord(be); + expect(listRecords('recommendation').map(r => r.id)).toEqual(['rec-1']); + expect(listRecords('batch-evaluation').map(r => r.id)).toEqual(['be-1']); + }); + + it('rejects ids containing path separators', () => { + expect(() => assertSafeId('../escape')).toThrow(); + expect(() => assertSafeId('a/b')).toThrow(); + expect(() => assertSafeId('')).toThrow(); + expect(() => assertSafeId('safe-id_123')).not.toThrow(); + }); +}); diff --git a/src/cli/operations/jobs/shared/constants.ts b/src/cli/operations/jobs/shared/constants.ts new file mode 100644 index 000000000..b014bc7f9 --- /dev/null +++ b/src/cli/operations/jobs/shared/constants.ts @@ -0,0 +1,99 @@ +/** + * Job Engine constants: storage directory names, per-type terminal-status sets, + * capability flags, and shared validation patterns. + */ +import type { JobCapabilities, JobRecord, JobType } from './types'; + +/** + * Local storage directory per job type, under `/.cli/`. + * Reuses the existing directory names so the layout is unchanged. + */ +export const STORAGE_DIRS: Record = { + recommendation: 'recommendations', + 'batch-evaluation': 'batch-eval-results', + 'ab-test': 'ab-tests', + insights: 'insights', +}; + +/** Human-readable label per job type, for user-facing messages (e.g. "not found" errors). */ +export const JOB_TYPE_LABELS: Record = { + recommendation: 'Recommendation', + 'batch-evaluation': 'Batch evaluation', + 'ab-test': 'A/B test', + insights: 'Insights job', +}; + +/** Sentinel status set when a refresh GET 404s (job deleted on the service). Terminal for both types. */ +export const NOT_FOUND_STATUS = 'NOT_FOUND'; + +/** + * Terminal statuses per job type. The two services emit different vocabularies, so terminality + * is per-type — a single shared set would invent statuses neither service emits. + * `SUCCEEDED`/`DELETING` are kept defensively for recommendations (`COMPLETED`/`FAILED` are authoritative). + */ +export const TERMINAL_STATUSES: Record> = { + recommendation: new Set(['COMPLETED', 'FAILED', 'SUCCEEDED', 'DELETING', NOT_FOUND_STATUS]), + 'batch-evaluation': new Set([ + 'COMPLETED', + 'COMPLETED_WITH_ERRORS', + 'FAILED', + 'STOPPED', + 'CANCELLED', + NOT_FOUND_STATUS, + ]), + // AB test: `record.status` holds lifecycle status (ACTIVE/FAILED/CREATE_FAILED). + // `record.lifecycleStatus` holds executionStatus (RUNNING/PAUSED/STOPPED) for keybindings. + 'ab-test': new Set(['FAILED', 'CREATE_FAILED', 'UPDATE_FAILED', 'DELETE_FAILED', NOT_FOUND_STATUS]), + insights: new Set(['COMPLETED', 'COMPLETED_WITH_ERRORS', 'FAILED', 'STOPPED', NOT_FOUND_STATUS]), +}; + +/** Runtime capability flags (TUI display only; engine legality is enforced by types). */ +export const JOB_CAPABILITIES: Record = { + recommendation: { canStop: false, canPause: false, canPromote: false, canDebug: false }, + 'batch-evaluation': { canStop: true, canPause: false, canPromote: false, canDebug: false }, + 'ab-test': { canStop: true, canPause: true, canPromote: true, canDebug: true }, + insights: { canStop: false, canPause: false, canPromote: false, canDebug: false }, +}; + +/** Maximum character length for inline text input (system prompt or tool descriptions). */ +export const MAX_INLINE_TEXT_LENGTH = 20_000; + +/** Recommendation name rule: start with a letter, alphanumeric + underscore/hyphen, max 48 chars. */ +export const RECOMMENDATION_NAME_REGEX = /^[a-zA-Z][a-zA-Z0-9_-]{0,47}$/; + +/** Maximum number of inline spans the API accepts. */ +export const MAX_INLINE_SPANS = 1000; + +/** Recommendation tool name rule: alphanumeric + underscore/hyphen/dot, max 256 chars. */ +export const TOOL_NAME_REGEX = /^[a-zA-Z0-9_\-.]+$/; +export const MAX_TOOL_NAME_LENGTH = 256; + +/** Batch-evaluation name rule: start with a letter, then letters/digits/underscores, max 48 chars. */ +export const BATCH_EVAL_NAME_REGEX = /^[a-zA-Z][a-zA-Z0-9_]{0,47}$/; + +/** + * Whether a record is in a terminal state AND fully settled (no further work on read). + * + * Batch-evaluation has a special case: a terminal record whose per-session results have not yet + * been fetched from CloudWatch is treated as NOT-yet-settled, so the next get()/list() retries + * the fetch (the output log can lag the status flip). + */ +export function isTerminal(record: JobRecord): boolean { + if (record.error) { + return true; // refresh retries exhausted — settled with an error + } + if (!TERMINAL_STATUSES[record.type].has(record.status)) { + return false; + } + if (record.type === 'batch-evaluation' && record.status !== NOT_FOUND_STATUS) { + const batch = record; + if (!batch.resultsFetched) { + return false; // terminal status, but results still need fetching — keep refreshable + } + } + // AB test: if terminal due to failure but failureReason not yet captured, keep refreshable. + if (record.type === 'ab-test' && record.status !== NOT_FOUND_STATUS && !record.failureReason) { + return false; + } + return true; +} diff --git a/src/cli/operations/jobs/shared/engine.ts b/src/cli/operations/jobs/shared/engine.ts new file mode 100644 index 000000000..fa4d791a8 --- /dev/null +++ b/src/cli/operations/jobs/shared/engine.ts @@ -0,0 +1,231 @@ +/** + * Job Engine factory. NOT a singleton — `createJobEngine(configIO)` per call site; configIO is + * injected and read ONLY inside handler.create(). The engine owns persistence (start → save) and + * the refresh-on-read lifecycle; commands/TUI stay thin. + */ +import { ConfigIO, JobNotFoundError } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import { validateAwsCredentials } from '../../../aws/account'; +import { abTestHandler } from '../ab-test/handler'; +import { batchEvaluationHandler } from '../batch-evaluation/handler'; +import { insightsHandler } from '../insights/handler'; +import { recommendationHandler } from '../recommendation/handler'; +import { JOB_CAPABILITIES, JOB_TYPE_LABELS, isTerminal } from './constants'; +import { deleteRecord, listRecords, loadRecord, saveRecord } from './storage'; +import type { + DebugCheckResult, + DebuggableJobType, + HandlerByType, + JobCapabilities, + JobEngine, + JobRecord, + JobType, + ListOptions, + PausableJobType, + PromotableJobType, + RecordByType, + StartOptionsByType, + StoppableJobType, +} from './types'; + +/** Static registry; `satisfies` makes a missing trait on any handler a compile-time error. */ +const handlers = { + recommendation: recommendationHandler, + 'batch-evaluation': batchEvaluationHandler, + 'ab-test': abTestHandler, + insights: insightsHandler, +} as const satisfies HandlerByType; + +/** Does this handler compose the optional Settles trait? */ +function hasSettle( + handler: HandlerByType[T] +): handler is HandlerByType[T] & { settle: (r: JobRecord, c: ConfigIO) => Promise } { + return typeof (handler as { settle?: unknown }).settle === 'function'; +} + +export function createJobEngine(configIO: ConfigIO = new ConfigIO()): JobEngine { + const REFRESH_MAX_RETRIES = 3; + + /** + * Refresh one record from the service and persist it. Retries up to REFRESH_MAX_RETRIES on failure. + * On exhausted retries, persists the error on the record (makes it terminal via isTerminal()). + * Does NOT run settle() (that's sequential). + */ + async function refreshOne(record: Extract): Promise { + if (isTerminal(record)) { + return record; + } + const handler = handlers[record.type]; + type RefreshFn = (r: typeof record) => Promise>; + let lastErr: Error | undefined; + + for (let attempt = 0; attempt < REFRESH_MAX_RETRIES; attempt++) { + const result = await (handler.refresh as RefreshFn)(record); + if (result.success) { + const updated = result.record; + if (updated.error) { + delete (updated as { error?: string }).error; + } + saveRecord(updated); + return updated; + } + lastErr = result.error; + } + + // All retries exhausted — persist the error so isTerminal() settles and the user sees it. + const failed: JobRecord = { ...record, error: lastErr!.message }; + saveRecord(failed); + return failed; + } + + /** Run a handler's optional settle() step (sequential; may mutate project config). */ + async function settleOne(record: JobRecord): Promise { + const handler = handlers[record.type]; + if (!hasSettle(handler)) { + return record; + } + try { + const settled = await handler.settle(record, configIO); + if (settled !== record) { + saveRecord(settled); + } + return settled; + } catch { + return record; + } + } + + return { + async start(type: T, opts: StartOptionsByType[T]): Promise> { + const creds = await validateCredentials(); + if (!creds.success) { + return creds; + } + const handler = handlers[type]; + // create is typed per-handler; the registry guarantees opts/record line up with `type`. + const result = await ( + handler.create as (o: StartOptionsByType[T], c: ConfigIO) => Promise> + )(opts, configIO); + if (result.success) { + saveRecord(result.record); + } + return result; + }, + + async get(type: T, id: string): Promise { + const record = loadRecord(type, id); + if (!record) { + return undefined; + } + const refreshed = await refreshOne(record); + const settled = await settleOne(refreshed); + return settled as RecordByType[T]; + }, + + async list(opts?: ListOptions): Promise { + const records = listRecords(opts?.type); + // Refresh statuses in parallel... + const refreshed = await Promise.all(records.map(r => refreshOne(r))); + // ...then run any config-mutating settle steps SEQUENTIALLY (avoids concurrent agentcore.json writes). + const settled: JobRecord[] = []; + for (const r of refreshed) { + settled.push(await settleOne(r)); + } + let out = settled; + if (opts?.agent) { + out = out.filter(r => r.agent === opts.agent); + } + out.sort((a, b) => (b.createdAt > a.createdAt ? 1 : b.createdAt < a.createdAt ? -1 : a.id < b.id ? -1 : 1)); + if (opts?.limit != null) { + out = out.slice(0, opts.limit); + } + return out; + }, + + async stop(type: StoppableJobType, id: string): Promise { + const record = loadRecord(type, id); + if (!record) { + return { success: false, error: new JobNotFoundError(`${JOB_TYPE_LABELS[type]} "${id}" not found.`) }; + } + // Only stoppable handlers are reachable here (type-narrowed); the registry guarantees the match. + const result = await (handlers[type].stop as (r: JobRecord) => Promise)(record); + if (result.success) { + saveRecord({ ...record, status: 'STOPPING' }); + } + return result; + }, + + async pause(type: PausableJobType, id: string): Promise { + const record = loadRecord(type, id); + if (!record) { + return { success: false, error: new JobNotFoundError(`${JOB_TYPE_LABELS[type]} "${id}" not found.`) }; + } + const result = await (handlers[type].pause as (r: JobRecord) => Promise>)(record); + if (result.success) { + saveRecord(result.record); + } + return result; + }, + + async resume(type: PausableJobType, id: string): Promise { + const record = loadRecord(type, id); + if (!record) { + return { success: false, error: new JobNotFoundError(`${JOB_TYPE_LABELS[type]} "${id}" not found.`) }; + } + const result = await (handlers[type].resume as (r: JobRecord) => Promise>)(record); + if (result.success) { + saveRecord(result.record); + } + return result; + }, + + async promote(type: PromotableJobType, id: string): Promise { + const record = loadRecord(type, id); + if (!record) { + return { success: false, error: new JobNotFoundError(`${JOB_TYPE_LABELS[type]} "${id}" not found.`) }; + } + const result = await ( + handlers[type].promote as (r: JobRecord, c: ConfigIO) => Promise> + )(record, configIO); + if (result.success) { + saveRecord(result.record); + } + return result; + }, + + async archive(type: JobType, id: string): Promise { + const record = loadRecord(type, id); + if (!record) { + return { success: false, error: new JobNotFoundError(`${JOB_TYPE_LABELS[type]} "${id}" not found.`) }; + } + const handler = handlers[type]; + const result = await (handler.archive as (r: JobRecord) => Promise)(record); + if (result.success) { + deleteRecord(type, id); + } + return result; + }, + + async debug(type: DebuggableJobType, id: string): Promise> { + const record = loadRecord(type, id); + if (!record) { + return { success: false, error: new JobNotFoundError(`${JOB_TYPE_LABELS[type]} "${id}" not found.`) }; + } + return (handlers[type].debug as (r: JobRecord) => Promise>)(record); + }, + + capabilities(type: JobType): JobCapabilities { + return JOB_CAPABILITIES[type]; + }, + }; +} + +/** Wrap validateAwsCredentials (which throws) into a Result so start() can return it cleanly. */ +async function validateCredentials(): Promise { + try { + await validateAwsCredentials(); + return { success: true }; + } catch (err) { + return { success: false, error: err instanceof Error ? err : new Error(String(err)) }; + } +} diff --git a/src/cli/operations/jobs/shared/format.ts b/src/cli/operations/jobs/shared/format.ts new file mode 100644 index 000000000..563de5985 --- /dev/null +++ b/src/cli/operations/jobs/shared/format.ts @@ -0,0 +1,13 @@ +/** Shared presentation helpers for job CLI output. */ + +/** Format an ISO timestamp for CLI tables/detail output (shared by all job types). */ +export function formatJobDate(iso: string | undefined): string { + if (!iso) return 'unknown'; + return new Date(iso).toLocaleString([], { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + }); +} diff --git a/src/cli/operations/jobs/shared/region.ts b/src/cli/operations/jobs/shared/region.ts new file mode 100644 index 000000000..739b9d64f --- /dev/null +++ b/src/cli/operations/jobs/shared/region.ts @@ -0,0 +1,36 @@ +/** + * Region resolution for jobs. Region is resolved ONCE in create() (superset precedence, + * no regression to either legacy path) and baked into the stored ARN; refresh/stop/archive + * parse it back out of the ARN rather than storing a separate field. + */ +import { detectRegion } from '../../../aws/region'; + +/** AWS targets carry a per-target region; we only need that field here. */ +interface RegionTarget { + region: string; +} + +/** + * Resolve the region for a new job, once, at create() time. + * Precedence (superset of both legacy paths): explicit option → first deployment target → detected region. + */ +export async function resolveJobRegion(optsRegion: string | undefined, awsTargets: RegionTarget[]): Promise { + if (optsRegion) { + return optsRegion; + } + if (awsTargets.length > 0 && awsTargets[0]!.region) { + return awsTargets[0]!.region; + } + const { region } = await detectRegion(); + return region; +} + +/** + * Parse the region out of a service ARN. + * ARN format: arn:{partition}:{service}:{region}:{account}:{resource} → field index 3 is the region. + * Engine-created ARNs are always well-formed; returns undefined for a malformed/region-less ARN. + */ +export function regionFromArn(arn: string): string | undefined { + const region = arn.split(':')[3]; + return region && region.length > 0 ? region : undefined; +} diff --git a/src/cli/operations/jobs/shared/resolve-agent-state.ts b/src/cli/operations/jobs/shared/resolve-agent-state.ts new file mode 100644 index 000000000..8335664b6 --- /dev/null +++ b/src/cli/operations/jobs/shared/resolve-agent-state.ts @@ -0,0 +1,21 @@ +/** + * Resolve a deployed agent runtime from deployed state by name. + * Hoisted here to dedupe the copies previously inlined in run-recommendation.ts and + * run-batch-evaluation.ts. + */ +import type { DeployedState } from '../../../../schema'; + +export interface ResolvedAgentState { + runtimeId: string; + runtimeArn: string; + roleArn?: string; +} + +/** Find the agent runtime across all deployment targets; undefined if not deployed. */ +export function resolveAgentState(deployedState: DeployedState, agentName: string): ResolvedAgentState | undefined { + for (const target of Object.values(deployedState.targets)) { + const agent = target.resources?.runtimes?.[agentName]; + if (agent) return agent; + } + return undefined; +} diff --git a/src/cli/operations/jobs/shared/storage.ts b/src/cli/operations/jobs/shared/storage.ts new file mode 100644 index 000000000..845a43195 --- /dev/null +++ b/src/cli/operations/jobs/shared/storage.ts @@ -0,0 +1,114 @@ +/** + * Pure file I/O for job records: `/.cli/jobs/{STORAGE_DIRS[type]}/{id}.json`. + * + * Hardened over the legacy per-type storage modules: + * - atomic writes (temp file + rename) so a killed process can't leave half-written JSON, + * - per-file parse guard in listRecords (one corrupt file is skipped, never crashes the list), + * - assertSafeId path-traversal guard, and content-`id` is authoritative over the filename. + * + * Legacy (pre-engine) records in the old shape are intentionally ignored (fresh start): any file + * that doesn't parse to a current JobRecord (missing/mismatched `type`) is skipped by listRecords + * and treated as not-found by loadRecord. + */ +import { CLI_SYSTEM_DIR, NoProjectError, findConfigRoot } from '../../../../lib'; +import { STORAGE_DIRS } from './constants'; +import type { JobRecord, JobType } from './types'; +import { existsSync, mkdirSync, readFileSync, readdirSync, renameSync, rmSync, writeFileSync } from 'fs'; +import { join } from 'path'; + +function cliDir(): string { + const configRoot = findConfigRoot(); + if (!configRoot) { + throw new NoProjectError(); + } + return join(configRoot, CLI_SYSTEM_DIR); +} + +function dirFor(type: JobType): string { + return join(cliDir(), 'jobs', STORAGE_DIRS[type]); +} + +function fileFor(type: JobType, id: string): string { + return join(dirFor(type), `${id}.json`); +} + +/** Reject ids that could escape the storage directory. */ +export function assertSafeId(id: string): void { + if (!id || /[/\\]/.test(id)) { + throw new Error(`Invalid job id: must be non-empty and contain no path separators`); + } +} + +/** Is this parsed object a current-shape JobRecord for the expected type? */ +function isJobRecordOfType(value: unknown, type: JobType): value is JobRecord { + if (value === null || typeof value !== 'object') return false; + const rec = value as Partial; + return rec.type === type && typeof rec.id === 'string' && rec.id.length > 0; +} + +/** Persist a record atomically (temp file + rename). */ +export function saveRecord(record: JobRecord): string { + assertSafeId(record.id); + const dir = dirFor(record.type); + mkdirSync(dir, { recursive: true }); + + const filePath = fileFor(record.type, record.id); + const tmpPath = `${filePath}.tmp`; + writeFileSync(tmpPath, JSON.stringify(record, null, 2)); + renameSync(tmpPath, filePath); + return filePath; +} + +/** Load one record. Returns undefined if missing, unparseable, or a legacy/foreign shape. */ +export function loadRecord(type: T, id: string): Extract | undefined { + assertSafeId(id); + const filePath = fileFor(type, id); + if (!existsSync(filePath)) { + return undefined; + } + try { + const parsed = JSON.parse(readFileSync(filePath, 'utf-8')) as unknown; + if (!isJobRecordOfType(parsed, type)) { + return undefined; // legacy/foreign shape — ignored (fresh start) + } + return parsed as Extract; + } catch { + return undefined; // corrupt file — treat as not found + } +} + +/** List records for one type (or all types). Skips corrupt/legacy files; never throws on a bad file. */ +export function listRecords(type: T): Extract[]; +export function listRecords(type?: JobType): JobRecord[]; +export function listRecords(type?: JobType): JobRecord[] { + const types: JobType[] = type ? [type] : (Object.keys(STORAGE_DIRS) as JobType[]); + const records: JobRecord[] = []; + + for (const t of types) { + const dir = dirFor(t); + if (!existsSync(dir)) continue; + for (const file of readdirSync(dir)) { + if (!file.endsWith('.json')) continue; + try { + const parsed = JSON.parse(readFileSync(join(dir, file), 'utf-8')) as unknown; + if (isJobRecordOfType(parsed, t)) { + records.push(parsed); // trust content `id`, not the filename + } + } catch { + // skip corrupt/half-written file + } + } + } + return records; +} + +/** Delete the local record file. Returns true if it existed and was removed. */ +export function deleteRecord(type: JobType, id: string): boolean { + assertSafeId(id); + const filePath = fileFor(type, id); + if (!existsSync(filePath)) { + return false; + } + rmSync(filePath); + return true; +} diff --git a/src/cli/operations/jobs/shared/types.ts b/src/cli/operations/jobs/shared/types.ts new file mode 100644 index 000000000..7f501c522 --- /dev/null +++ b/src/cli/operations/jobs/shared/types.ts @@ -0,0 +1,456 @@ +/** + * Job Engine type system (Design 2 — composed traits + type-narrowed signatures). + * + * A "job" is an async, fire-and-forget operation (recommendation or batch evaluation): + * `start` makes one API call + saves a local record; `get`/`list` refresh non-terminal + * records on read. Handlers are composed from small traits; the engine's public surface + * is type-narrowed so illegal operations (e.g. stopping a recommendation) are compile errors. + */ +import type { ConfigIO } from '../../../../lib'; +import type { Result } from '../../../../lib/result'; +import type { ABTestEvaluationConfig, ABTestResults, GatewayFilter } from '../../../aws/agentcore-ab-tests'; +import type { + BatchEvaluationResultEntry, + EvaluationResults, + SessionMetadataEntry, +} from '../../../aws/agentcore-batch-evaluation'; +import type { RecommendationResult, RecommendationType } from '../../../aws/agentcore-recommendation'; + +export type { RecommendationType } from '../../../aws/agentcore-recommendation'; + +// ============================================================================ +// Job types & statuses +// ============================================================================ + +/** The job types this engine manages. */ +export type JobType = 'recommendation' | 'batch-evaluation' | 'ab-test' | 'insights'; + +/** AB test creation mode — bundle-versioned variants vs gateway-target variants. */ +export type ABTestMode = 'config-bundle' | 'target-based'; + +/** CLI-facing input source for a recommendation. */ +export type RecommendationInputSource = 'config-bundle' | 'inline' | 'file'; + +/** CLI-facing trace source for a recommendation. */ +export type RecommendationTraceSource = 'cloudwatch' | 'sessions' | 'spans-file' | 'batch-evaluation'; + +/** Where the batch evaluation sessions came from. */ +export type BatchEvaluationSource = 'traces' | 'dataset'; + +/** Tool name → JSONPath pairs for config-bundle tool descriptions. */ +export interface ToolDescJsonPath { + toolName: string; + toolDescriptionJsonPath: string; +} + +// ============================================================================ +// Records (discriminated union on `type`) +// ============================================================================ + +/** Fields every job record carries, regardless of type. Note: region is NOT stored — parse from `arn`. */ +export interface JobRecordBase { + /** Job type discriminator. */ + type: JobType; + /** Service-assigned job id (also the storage filename). */ + id: string; + /** Service ARN — region is parsed back out of this for refresh/stop/archive. */ + arn: string; + /** Latest known status (raw service string; may be a value not in the JobStatus union). */ + status: string; + /** ISO timestamp the job was created (API value, else local clock at create time). */ + createdAt: string; + /** ISO timestamp the job reached a terminal state. */ + completedAt?: string; + /** Agent the job ran against. */ + agent: string; + /** Path to the local ExecLogger trace for the start call. */ + logFilePath?: string; + /** Persistent error from the last failed refresh (after retries exhausted). Settles the record. */ + error?: string; +} + +export interface RecommendationJobRecord extends JobRecordBase { + type: 'recommendation'; + recommendationType: RecommendationType; + /** Raw user-supplied evaluator display name(s) (resolved to ARNs only transiently for the API). */ + evaluators: string[]; + inputSource: RecommendationInputSource; + /** Source config-bundle identity (needed by the apply-to-bundle settle step). */ + bundleName?: string; + bundleArn?: string; + bundleVersion?: string; + systemPromptJsonPath?: string; + toolDescJsonPaths?: ToolDescJsonPath[]; + /** Optimized artifact, populated by refresh() once COMPLETED. */ + result?: RecommendationResult; + /** Top-level failure reasons from the API (FAILED only). */ + statusReasons?: string[]; + /** Flattened failure detail (errorCode/errorMessage) for display (FAILED only). */ + failureDetail?: string; + /** New config-bundle version already synced to agentcore.json (idempotency guard for settle). */ + syncedVersionId?: string; + /** Customer-managed KMS key ARN used to encrypt results (echoes the --kms-key value sent at create). */ + kmsKeyArn?: string; +} + +export interface BatchEvaluationJobRecord extends JobRecordBase { + type: 'batch-evaluation'; + name: string; + /** Resolved evaluator ids sent to the API (short ids / Builtin.*). */ + evaluators: string[]; + source?: BatchEvaluationSource; + dataset?: { id: string; version: string }; + /** Server-computed evaluator summaries (from GetBatchEvaluation). */ + evaluationResults?: EvaluationResults; + /** Per-session scores fetched from CloudWatch output logs on terminal status. */ + results?: BatchEvaluationResultEntry[]; + /** True once per-session results have been fetched (gates the on-terminal retry). */ + resultsFetched?: boolean; + /** Customer-managed KMS key ARN used to encrypt results (echoes --kms-key; refreshed from GetBatchEvaluation). */ + kmsKeyArn?: string; +} + +/** Variant summary stored on the AB-test record (the resolved ARNs/targets sent to the API). */ +export interface ABTestVariantSummary { + name: 'C' | 'T1'; + weight: number; + bundleArn?: string; + bundleVersion?: string; + targetName?: string; +} + +export interface ABTestJobRecord extends JobRecordBase { + type: 'ab-test'; + /** + * AB test has two API axes: `status` (ACTIVE/FAILED/CREATE_FAILED) and `executionStatus` + * (RUNNING/PAUSED/STOPPED). We store lifecycle `status` in JobRecordBase.status (so + * isTerminal() works like other job types), and keep `executionStatus` here for + * keybinding gating (P/R/S). + */ + lifecycleStatus: string; + name: string; + mode: ABTestMode; + gatewayArn: string; + /** Gateway NAME (spec key) — needed by promote() to locate gateway targets in agentcore.json. */ + gatewayName?: string; + roleArn?: string; + /** True when the CLI auto-created the role in create() (so archive() cleans it up). */ + roleCreatedByCli?: boolean; + variants: ABTestVariantSummary[]; + evaluationConfig: ABTestEvaluationConfig; + /** Gateway filter applied to the test (single target path), persisted for display in `view ab-test`. */ + gatewayFilter?: GatewayFilter; + maxDurationExpiresAt?: string; + /** Per-evaluator comparison metrics, populated by refresh(). */ + results?: ABTestResults; + failureReason?: string; +} + +// ============================================================================ +// Insights (failure analysis) types +// ============================================================================ + +export interface InsightRelatedSession { + sessionId?: string; + recommendationType?: string; +} + +export interface InsightRootCause { + rootCauseCategory?: string; + rootCauseDescription?: string; + recommendation?: string; + relatedSessions?: InsightRelatedSession[]; +} + +export interface InsightFailureCategory { + failureCategoryName?: string; + failureCategoryDescription?: string; + categoryGroupName?: string; + rootCauses?: InsightRootCause[]; +} + +export interface FailureAnalysisResult { + failureCategories?: InsightFailureCategory[]; +} + +export interface InsightsJobRecord extends JobRecordBase { + type: 'insights'; + name: string; + /** Insight types requested. */ + insights: string[]; + /** Optional evaluators (needed for recommendation chaining). */ + evaluators?: string[]; + /** Server-computed evaluation results. */ + evaluationResults?: EvaluationResults; + /** Structured failure analysis results from GetBatchEvaluation. */ + failureAnalysisResult?: FailureAnalysisResult; +} + +export type JobRecord = RecommendationJobRecord | BatchEvaluationJobRecord | ABTestJobRecord | InsightsJobRecord; + +// ============================================================================ +// Start options (engine-facing; non-colliding with the AWS-layer Start* types) +// ============================================================================ + +export interface StartRecommendationJobOptions { + type: RecommendationType; + agent?: string; + /** Evaluator name(s), Builtin.* ids, or ARNs (exactly one for system-prompt; none for tool-description). */ + evaluators: string[]; + inputSource: RecommendationInputSource; + bundleName?: string; + bundleVersion?: string; + systemPromptJsonPath?: string; + toolDescJsonPaths?: ToolDescJsonPath[]; + inlineContent?: string; + promptFile?: string; + tools?: string[]; + traceSource: RecommendationTraceSource; + lookbackDays?: number; + sessionIds?: string[]; + spansFile?: string; + /** Use a local insights run as trace source (resolves batchEvaluationArn from .cli/jobs/insights/) */ + fromInsights?: string; + /** Use a batch evaluation ARN directly as trace source */ + batchEvaluationArn?: string; + region?: string; + /** Optional recommendation name. */ + recommendationName?: string; + /** KMS key ARN for encrypting recommendation results. */ + kmsKeyArn?: string; + /** Progress for the slow pre-start span fetch (sessions/spans-file). */ + onProgress?: (status: string, message: string) => void; +} + +export interface StartBatchEvaluationJobOptions { + agent: string; + /** Evaluator name(s) / Builtin.* ids (resolved to short ids in create()). */ + evaluators: string[]; + name?: string; + region?: string; + /** Sessions to evaluate (caller resolves these from a dataset Phase-1 run when applicable). */ + sessionIds?: string[]; + /** Lookback window (used only when no sessionIds are given). */ + lookbackDays?: number; + /** Ground-truth metadata (explicit or dataset-derived; caller supplies). */ + sessionMetadata?: SessionMetadataEntry[]; + /** Runtime endpoint name (e.g. PROMPT_V1). */ + endpoint?: string; + /** Recorded on the job for display; the engine does NOT run dataset Phase-1 (caller does). */ + source?: BatchEvaluationSource; + dataset?: { id: string; version: string }; + /** KMS key ARN for encrypting batch evaluation results. */ + kmsKeyArn?: string; + onProgress?: (status: string, message: string) => void; +} + +export interface StartABTestJobOptions { + name: string; + mode: ABTestMode; + description?: string; + /** Gateway NAME — resolved to an ARN in create() against deployed state (must pre-exist). */ + gateway: string; + /** Config-bundle mode: the runtime being tested (also used as the record's `agent`). */ + agent?: string; + // ── config-bundle mode ── + controlBundle?: string; + controlVersion?: string; + treatmentBundle?: string; + treatmentVersion?: string; + /** Single online-eval config name/ARN (applies to both variants). */ + onlineEval?: string; + // ── target-based mode ── + runtime?: string; + controlTarget?: string; + treatmentTarget?: string; + controlOnlineEval?: string; + treatmentOnlineEval?: string; + gatewayFilter?: string; + // ── shared ── + controlWeight: number; + treatmentWeight: number; + enableOnCreate?: boolean; + region?: string; + /** Optional role ARN override; auto-created in create() when omitted. */ + roleArn?: string; + onProgress?: (status: string, message: string) => void; +} + +export interface StartInsightsJobOptions { + agent?: string; + insights: string[]; + evaluators?: string[]; + onlineEvalConfigArn?: string; + lookbackDays?: number; + startTime?: string; + endTime?: string; + sessionIds?: string[]; + name?: string; + region?: string; + endpoint?: string; + onProgress?: (status: string, message: string) => void; +} + +export interface ListOptions { + type?: JobType; + limit?: number; + agent?: string; +} + +// ============================================================================ +// Traits — small focused capabilities composed per job type +// ============================================================================ + +/** Create the job on the service and return the initial record. configIO is injected ONLY here. */ +export interface Startable { + create(opts: O, configIO: ConfigIO): Promise>; +} + +/** Fetch latest state from the service. Returns Result so the engine handles retries/error-persist. */ +export interface Refreshable { + refresh(record: J): Promise>; +} + +/** Stop a running job. */ +export interface Stoppable { + stop(record: J): Promise; +} + +/** Delete the job from the service. */ +export interface Archivable { + archive(record: J): Promise; +} + +/** + * Optional per-type "settle" step the engine runs SEQUENTIALLY after a record first + * reaches a terminal status — separate from the parallel refresh() because it may mutate + * project config (and therefore needs configIO and must not race). Only recommendation + * composes this (apply-to-bundle sync). + */ +export interface Settles { + settle(record: J, configIO: ConfigIO): Promise; +} + +/** Pause/resume a running job (AB test only). */ +export interface Pausable { + pause(record: J): Promise>; + resume(record: J): Promise>; +} + +/** Promote a job's result into project config (AB test only — stop + apply winning variant). */ +export interface Promotable { + promote(record: J, configIO: ConfigIO): Promise>; +} + +/** Structured result from a live health/debug check. */ +export interface DebugCheckResult { + label: string; + status: 'pass' | 'fail' | 'warn'; + detail: string; +} + +/** Run live diagnostic checks against the service (AB test: eval config, gateway, spans). */ +export interface Debuggable { + debug(record: J): Promise>; +} + +// ============================================================================ +// Composed handlers + registries +// ============================================================================ + +export type RecommendationHandler = Startable & + Refreshable & + Settles & + Archivable; + +export type BatchEvaluationHandler = Startable & + Refreshable & + Stoppable & + Archivable; + +export type ABTestHandler = Startable & + Refreshable & + Stoppable & + Pausable & + Promotable & + Debuggable & + Archivable; + +export type InsightsHandler = Startable & + Refreshable & + Archivable; + +export interface RecordByType { + recommendation: RecommendationJobRecord; + 'batch-evaluation': BatchEvaluationJobRecord; + 'ab-test': ABTestJobRecord; + insights: InsightsJobRecord; +} + +export interface StartOptionsByType { + recommendation: StartRecommendationJobOptions; + 'batch-evaluation': StartBatchEvaluationJobOptions; + 'ab-test': StartABTestJobOptions; + insights: StartInsightsJobOptions; +} + +export interface HandlerByType { + recommendation: RecommendationHandler; + 'batch-evaluation': BatchEvaluationHandler; + 'ab-test': ABTestHandler; + insights: InsightsHandler; +} + +/** Job types whose handler composes a given trait — derived so they track trait composition automatically. */ +export type StoppableJobType = { + [K in JobType]: HandlerByType[K] extends Stoppable ? K : never; +}[JobType]; + +export type PausableJobType = { + [K in JobType]: HandlerByType[K] extends Pausable ? K : never; +}[JobType]; + +export type PromotableJobType = { + [K in JobType]: HandlerByType[K] extends Promotable ? K : never; +}[JobType]; + +export type DebuggableJobType = { + [K in JobType]: HandlerByType[K] extends Debuggable ? K : never; +}[JobType]; + +// ============================================================================ +// Engine +// ============================================================================ + +/** Runtime capability flags for TUI affordances (display only — legality is enforced by types). */ +export interface JobCapabilities { + canStop: boolean; + canPause: boolean; + canPromote: boolean; + canDebug: boolean; +} + +export interface JobEngine { + /** Resolve + ONE API call + save. configIO is read only here. */ + start(type: T, opts: StartOptionsByType[T]): Promise>; + /** Read one record; refresh (+ settle) if non-terminal. */ + get(type: T, id: string): Promise; + /** List records of one type; refresh non-terminal in parallel, settle sequentially. */ + list(opts: ListOptions & { type: T }): Promise; + /** List across all types (union return). */ + list(opts?: ListOptions): Promise; + /** Stop a running job — only stoppable types accepted (compile-time narrowed). */ + stop(type: StoppableJobType, id: string): Promise; + /** Pause a running job — only pausable types accepted (compile-time narrowed). */ + pause(type: PausableJobType, id: string): Promise; + /** Resume a paused job — only pausable types accepted (compile-time narrowed). */ + resume(type: PausableJobType, id: string): Promise; + /** Promote a job's result into project config — only promotable types accepted (compile-time narrowed). */ + promote(type: PromotableJobType, id: string): Promise; + /** Delete from the service + remove the local file. */ + archive(type: JobType, id: string): Promise; + /** Run live diagnostic checks — only debuggable types accepted (compile-time narrowed). */ + debug(type: DebuggableJobType, id: string): Promise>; + /** Display-only capability flags for the TUI. */ + capabilities(type: JobType): JobCapabilities; +} diff --git a/src/cli/operations/jobs/shared/wait.ts b/src/cli/operations/jobs/shared/wait.ts new file mode 100644 index 000000000..c74d2d5a4 --- /dev/null +++ b/src/cli/operations/jobs/shared/wait.ts @@ -0,0 +1,38 @@ +/** + * Block until a job reaches a terminal state by polling engine.get(). + * + * Used by the CLI `--wait` flag. Lives in jobs/ (not the command layer) because it's pure engine + * lifecycle logic — poll get() until isTerminal — reusable by any caller that wants synchronous + * completion (CLI today, potentially a TUI "wait" affordance later). + */ +import { isTerminal } from './constants'; +import type { JobEngine, JobType, RecordByType } from './types'; + +const DEFAULT_POLL_INTERVAL_MS = 5000; + +export interface WaitForTerminalOptions { + /** Poll interval in ms (default 5000). */ + pollIntervalMs?: number; + /** Called with the latest status string on each poll (e.g. to print progress). */ + onTick?: (status: string) => void; +} + +/** + * Poll `engine.get(type, id)` until the job is terminal (or vanishes from storage). + * Returns the final record, or undefined if the job is no longer found locally. + */ +export async function waitForTerminal( + engine: JobEngine, + type: T, + id: string, + options: WaitForTerminalOptions = {} +): Promise { + const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; + for (;;) { + const record = await engine.get(type, id); + if (!record) return undefined; + options.onTick?.(record.status); + if (isTerminal(record)) return record; + await new Promise(resolve => setTimeout(resolve, pollIntervalMs)); + } +} diff --git a/src/cli/operations/knowledge-base/__tests__/agentic-retrieve-upsert.test.ts b/src/cli/operations/knowledge-base/__tests__/agentic-retrieve-upsert.test.ts new file mode 100644 index 000000000..8d2ab8248 --- /dev/null +++ b/src/cli/operations/knowledge-base/__tests__/agentic-retrieve-upsert.test.ts @@ -0,0 +1,59 @@ +import type { AgentCoreProjectSpec } from '../../../../schema'; +import { upsertAgenticRetrieveTarget } from '../agentic-retrieve-upsert'; +import { describe, expect, it } from 'vitest'; + +type Gateway = AgentCoreProjectSpec['agentCoreGateways'][number]; + +function makeGateway(name = 'main-gw'): Gateway { + return { + name, + targets: [], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as Gateway; +} + +describe('upsertAgenticRetrieveTarget', () => { + it('creates a new agentic-retrieve target on first call', () => { + const gw = makeGateway(); + upsertAgenticRetrieveTarget(gw, 'kb-1'); + expect(gw.targets).toHaveLength(1); + const target = gw.targets[0]; + expect(target?.name).toBe('main-gw-agentic'); + expect(target?.targetType).toBe('connector'); + expect(target?.connectorId).toBe('bedrock-agentic-retrieve'); + expect(target?.knowledgeBaseIds).toEqual(['kb-1']); + }); + + it('appends to an existing agentic-retrieve target', () => { + const gw = makeGateway(); + upsertAgenticRetrieveTarget(gw, 'kb-1'); + upsertAgenticRetrieveTarget(gw, 'kb-2'); + expect(gw.targets).toHaveLength(1); + expect(gw.targets[0]?.knowledgeBaseIds).toEqual(['kb-1', 'kb-2']); + }); + + it('is idempotent — re-adding the same kb is a no-op', () => { + const gw = makeGateway(); + upsertAgenticRetrieveTarget(gw, 'kb-1'); + upsertAgenticRetrieveTarget(gw, 'kb-1'); + expect(gw.targets).toHaveLength(1); + expect(gw.targets[0]?.knowledgeBaseIds).toEqual(['kb-1']); + }); + + it('respects a hand-renamed agentic target and only appends', () => { + const gw = makeGateway(); + gw.targets.push({ + name: 'custom-name', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['existing-kb'], + } as unknown as Gateway['targets'][number]); + + upsertAgenticRetrieveTarget(gw, 'new-kb'); + expect(gw.targets).toHaveLength(1); + expect(gw.targets[0]?.name).toBe('custom-name'); + expect(gw.targets[0]?.knowledgeBaseIds).toEqual(['existing-kb', 'new-kb']); + }); +}); diff --git a/src/cli/operations/knowledge-base/__tests__/connector-config.test.ts b/src/cli/operations/knowledge-base/__tests__/connector-config.test.ts new file mode 100644 index 000000000..6c4a67550 --- /dev/null +++ b/src/cli/operations/knowledge-base/__tests__/connector-config.test.ts @@ -0,0 +1,119 @@ +import { + CONNECTOR_TYPE_BY_FLAG, + FLAG_BY_CONNECTOR_TYPE, + extractSecretArn, + flagToWireType, + isConnectorConfigType, + readConnectorConfig, +} from '../connector-config'; +import { mkdtempSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +describe('connector-config flag↔wire mapping', () => { + it('maps every flag to its wire type', () => { + expect(flagToWireType('s3')).toBe('S3'); + expect(flagToWireType('web-crawler')).toBe('WEB'); + expect(flagToWireType('confluence')).toBe('CONFLUENCE'); + expect(flagToWireType('sharepoint')).toBe('SHAREPOINT'); + expect(flagToWireType('onedrive')).toBe('ONEDRIVE'); + expect(flagToWireType('google-drive')).toBe('GOOGLEDRIVE'); + }); + + it('throws on an unknown flag', () => { + expect(() => flagToWireType('dropbox')).toThrow(/unknown data source type/i); + }); + + it('round-trips flag → wire → flag', () => { + for (const flag of Object.keys(CONNECTOR_TYPE_BY_FLAG)) { + const wire = flagToWireType(flag); + expect(FLAG_BY_CONNECTOR_TYPE[wire]).toBe(flag); + } + }); + + it('identifies non-S3 connector wire types', () => { + expect(isConnectorConfigType('WEB')).toBe(true); + expect(isConnectorConfigType('S3')).toBe(false); + }); +}); + +describe('readConnectorConfig', () => { + let dir: string; + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), 'cc-')); + }); + afterEach(() => rmSync(dir, { recursive: true, force: true })); + + it('reads and validates a WEB config whose type matches', () => { + const p = join(dir, 'web.json'); + writeFileSync( + p, + JSON.stringify({ + type: 'WEB', + version: '1', + connectionConfiguration: { authType: 'NO_AUTH', seedUrls: ['https://x/'] }, + crawlConfiguration: {}, + }) + ); + const r = readConnectorConfig(p, 'WEB'); + expect(r.parsed.type).toBe('WEB'); + expect(r.warnings).toHaveLength(0); + }); + + it('errors when the file is missing', () => { + expect(() => readConnectorConfig(join(dir, 'nope.json'), 'WEB')).toThrow(/not found/i); + }); + + it('errors on invalid JSON', () => { + const p = join(dir, 'bad.json'); + writeFileSync(p, '{ not json'); + expect(() => readConnectorConfig(p, 'WEB')).toThrow(/not valid JSON/i); + }); + + it('errors when the parsed value is not an object', () => { + const p = join(dir, 'arr.json'); + writeFileSync(p, '[]'); + expect(() => readConnectorConfig(p, 'WEB')).toThrow(/must be a JSON object/i); + }); + + it('errors when the config type disagrees with the declared type', () => { + const p = join(dir, 'mismatch.json'); + writeFileSync(p, JSON.stringify({ type: 'CONFLUENCE', connectionConfiguration: {} })); + expect(() => readConnectorConfig(p, 'WEB')).toThrow(/does not match/i); + }); + + it('warns (does not throw) when an auth connector has no secretArn', () => { + const p = join(dir, 'conf.json'); + writeFileSync( + p, + JSON.stringify({ type: 'CONFLUENCE', connectionConfiguration: { hostUrl: 'https://x', authType: 'OAUTH2' } }) + ); + const r = readConnectorConfig(p, 'CONFLUENCE'); + expect(r.warnings.some(w => /secretArn/i.test(w))).toBe(true); + }); + + it('does not warn for a WEB config with NO_AUTH and no secretArn', () => { + const p = join(dir, 'web2.json'); + writeFileSync(p, JSON.stringify({ type: 'WEB', connectionConfiguration: { authType: 'NO_AUTH' } })); + const r = readConnectorConfig(p, 'WEB'); + expect(r.warnings).toHaveLength(0); + }); +}); + +describe('extractSecretArn', () => { + it('returns the secretArn from connectionConfiguration', () => { + expect( + extractSecretArn({ + type: 'CONFLUENCE', + connectionConfiguration: { secretArn: 'arn:aws:secretsmanager:us-west-2:1:secret:x' }, + }) + ).toBe('arn:aws:secretsmanager:us-west-2:1:secret:x'); + }); + it('returns undefined when absent', () => { + expect(extractSecretArn({ type: 'WEB', connectionConfiguration: { authType: 'NO_AUTH' } })).toBeUndefined(); + }); + it('returns undefined when connectionConfiguration is missing', () => { + expect(extractSecretArn({ type: 'WEB' })).toBeUndefined(); + }); +}); diff --git a/src/cli/operations/knowledge-base/__tests__/hydrate-data-sources.test.ts b/src/cli/operations/knowledge-base/__tests__/hydrate-data-sources.test.ts new file mode 100644 index 000000000..2748d64cd --- /dev/null +++ b/src/cli/operations/knowledge-base/__tests__/hydrate-data-sources.test.ts @@ -0,0 +1,110 @@ +import * as bedrockAgent from '../../../aws/bedrock-agent'; +import { hydrateKnowledgeBaseDataSources } from '../hydrate-data-sources'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +vi.mock('../../../aws/bedrock-agent'); + +describe('hydrateKnowledgeBaseDataSources', () => { + beforeEach(() => vi.mocked(bedrockAgent.listDataSources).mockReset()); + afterEach(() => vi.restoreAllMocks()); + + it('falls back to listDataSources, mapping deployed DSes by URI hash suffix', async () => { + // Stack pre-dates the per-DS CFN outputs from L3 #234, so dataSources is + // empty when we get here. The L3 names each DS as + // `${kbPhysicalName}_ds_${first8charsOfSha256(uri)}` + // Hashes computed from the URIs below: + // s3://b/a/ → 28ebaa59 + // s3://b/b/ → 87791e1d + // ListDataSources order is not guaranteed; we recover the URI by hash. + vi.mocked(bedrockAgent.listDataSources).mockResolvedValueOnce([ + { dataSourceId: 'DS-second', name: 'TestProj_docs_ds_87791e1d', status: 'AVAILABLE' }, + { dataSourceId: 'DS-first', name: 'TestProj_docs_ds_28ebaa59', status: 'AVAILABLE' }, + ] as never); + + const knowledgeBases = { + docs: { + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:aws:bedrock:us-west-2:0:knowledge-base/KB1', + dataSources: [], + }, + }; + + await hydrateKnowledgeBaseDataSources({ + knowledgeBases, + knowledgeBaseSpecs: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [ + { type: 'S3', uri: 's3://b/a/' }, + { type: 'S3', uri: 's3://b/b/' }, + ], + } as never, + ], + region: 'us-west-2', + }); + + expect(knowledgeBases.docs.dataSources).toEqual([ + { dataSourceId: 'DS-first', uri: 's3://b/a/' }, + { dataSourceId: 'DS-second', uri: 's3://b/b/' }, + ]); + }); + + it('is a no-op when CFN outputs already populated dataSources[]', async () => { + const listSpy = vi.mocked(bedrockAgent.listDataSources).mockResolvedValue([] as never); + const knowledgeBases = { + docs: { + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:x', + dataSources: [{ dataSourceId: 'DS-from-cfn', uri: 's3://b/a/' }], + }, + }; + + await hydrateKnowledgeBaseDataSources({ + knowledgeBases, + knowledgeBaseSpecs: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://b/a/' }], + } as never, + ], + region: 'us-west-2', + }); + + expect(listSpy).not.toHaveBeenCalled(); + expect(knowledgeBases.docs.dataSources).toEqual([{ dataSourceId: 'DS-from-cfn', uri: 's3://b/a/' }]); + }); + + it('leaves dataSources empty if listDataSources returns []', async () => { + vi.mocked(bedrockAgent.listDataSources).mockResolvedValueOnce([]); + const knowledgeBases = { + docs: { knowledgeBaseId: 'KB1', knowledgeBaseArn: 'arn:x', dataSources: [] as never[] }, + }; + await hydrateKnowledgeBaseDataSources({ + knowledgeBases, + knowledgeBaseSpecs: [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://b/d/' }], + } as never, + ], + region: 'us-west-2', + }); + expect(knowledgeBases.docs.dataSources).toEqual([]); + }); + + it('skips KBs without a matching local spec', async () => { + const listSpy = vi.mocked(bedrockAgent.listDataSources).mockResolvedValue([] as never); + const knowledgeBases = { + orphan: { knowledgeBaseId: 'KB1', knowledgeBaseArn: 'arn:x', dataSources: [] as never[] }, + }; + await hydrateKnowledgeBaseDataSources({ + knowledgeBases, + knowledgeBaseSpecs: [], // no specs + region: 'us-west-2', + }); + expect(listSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/src/cli/operations/knowledge-base/__tests__/templates.test.ts b/src/cli/operations/knowledge-base/__tests__/templates.test.ts new file mode 100644 index 000000000..59cab6df9 --- /dev/null +++ b/src/cli/operations/knowledge-base/__tests__/templates.test.ts @@ -0,0 +1,18 @@ +import { readConnectorConfig } from '../connector-config'; +import { join } from 'path'; +import { describe, expect, it } from 'vitest'; + +const ROOT = join(__dirname, '../../../../../docs/connector-config-templates'); + +describe('connector-config templates parse', () => { + it.each([ + ['web-crawler.json', 'WEB'], + ['confluence.json', 'CONFLUENCE'], + ['sharepoint.json', 'SHAREPOINT'], + ['onedrive.json', 'ONEDRIVE'], + ['google-drive.json', 'GOOGLEDRIVE'], + ] as const)('%s validates as %s', (file, type) => { + const r = readConnectorConfig(join(ROOT, file), type); + expect(r.parsed.type).toBe(type); + }); +}); diff --git a/src/cli/operations/knowledge-base/agentic-retrieve-upsert.ts b/src/cli/operations/knowledge-base/agentic-retrieve-upsert.ts new file mode 100644 index 000000000..10702b31d --- /dev/null +++ b/src/cli/operations/knowledge-base/agentic-retrieve-upsert.ts @@ -0,0 +1,42 @@ +import type { AgentCoreGatewayTarget, AgentCoreProjectSpec } from '../../../schema'; +import { CONNECTOR_ID } from '../../../schema'; + +/** + * Ensure exactly one bedrock-agentic-retrieve target exists on this gateway, + * with kbReference present in its knowledgeBaseIds[]. Idempotent. + * + * - Creates the target on first call (named `${gateway.name}-agentic`). + * - Appends to it on subsequent calls if kbReference is missing. + * - No-op if kbReference is already in the agentic target's knowledgeBaseIds[]. + * + * Mutates `gateway.targets` in place. Used by both KnowledgeBasePrimitive + * (project-owned KBs via `add knowledge-base --gateway`) and + * GatewayTargetPrimitive (external KBs via `add gateway-target --type + * connector --connector bedrock-knowledge-bases`) so wiring is consistent + * across paths. + * + * If the user has hand-renamed the agentic target, we respect it and only + * append; we don't rename it back. + */ +export function upsertAgenticRetrieveTarget( + gateway: AgentCoreProjectSpec['agentCoreGateways'][number], + kbReference: string +): void { + const existing = gateway.targets.find( + t => t.targetType === 'connector' && t.connectorId === CONNECTOR_ID.BEDROCK_AGENTIC_RETRIEVE + ); + if (existing) { + const ids = existing.knowledgeBaseIds ?? []; + if (!ids.includes(kbReference)) { + existing.knowledgeBaseIds = [...ids, kbReference]; + } + return; + } + const agenticTarget: AgentCoreGatewayTarget = { + name: `${gateway.name}-agentic`, + targetType: 'connector', + connectorId: CONNECTOR_ID.BEDROCK_AGENTIC_RETRIEVE, + knowledgeBaseIds: [kbReference], + } as AgentCoreGatewayTarget; + gateway.targets.push(agenticTarget); +} diff --git a/src/cli/operations/knowledge-base/connector-config.ts b/src/cli/operations/knowledge-base/connector-config.ts new file mode 100644 index 000000000..1849f1240 --- /dev/null +++ b/src/cli/operations/knowledge-base/connector-config.ts @@ -0,0 +1,116 @@ +import type { ConnectorDataSourceType } from '../../../schema'; +import { existsSync, readFileSync } from 'fs'; +import { resolve } from 'path'; + +/** + * User-facing `--data-source-type` flag values, including S3. `s3` is the + * default and maps to the inline-uri S3 data source; the rest map to + * connector-file data sources. + */ +export const DATA_SOURCE_TYPE_FLAGS = [ + 's3', + 'web-crawler', + 'confluence', + 'sharepoint', + 'onedrive', + 'google-drive', +] as const; +export type DataSourceTypeFlag = (typeof DATA_SOURCE_TYPE_FLAGS)[number]; + +/** All wire types (including S3). */ +export type DataSourceWireType = 'S3' | ConnectorDataSourceType; + +/** The single translation table: user flag → wire `type`. */ +export const CONNECTOR_TYPE_BY_FLAG: Record = { + s3: 'S3', + 'web-crawler': 'WEB', + confluence: 'CONFLUENCE', + sharepoint: 'SHAREPOINT', + onedrive: 'ONEDRIVE', + 'google-drive': 'GOOGLEDRIVE', +}; + +/** Inverse table, for rendering a wire type back as a user-facing flag. */ +export const FLAG_BY_CONNECTOR_TYPE: Record = Object.fromEntries( + Object.entries(CONNECTOR_TYPE_BY_FLAG).map(([flag, wire]) => [wire, flag]) +) as Record; + +export function flagToWireType(flag: string): DataSourceWireType { + const wire = CONNECTOR_TYPE_BY_FLAG[flag as DataSourceTypeFlag]; + if (!wire) { + throw new Error(`Unknown data source type "${flag}". Expected one of: ${DATA_SOURCE_TYPE_FLAGS.join(', ')}.`); + } + return wire; +} + +/** True for every wire type that uses a connectorConfigFile (i.e. not S3). */ +export function isConnectorConfigType(wire: string): wire is ConnectorDataSourceType { + return wire !== 'S3' && wire in FLAG_BY_CONNECTOR_TYPE; +} + +/** Connector wire types that require a secretArn unless explicitly NO_AUTH. */ +const SECRET_BEARING: ReadonlySet = new Set(['CONFLUENCE', 'SHAREPOINT', 'ONEDRIVE', 'GOOGLEDRIVE']); + +export interface ConnectorConfigReadResult { + /** The parsed connectorParameters object, passed through to the L3 verbatim. */ + parsed: Record & { type: string }; + /** Non-fatal advisories surfaced to the user (e.g. missing secretArn). */ + warnings: string[]; +} + +/** + * Read a `--connector-config` JSON file and validate it lightly. The CLI does + * NOT deeply validate connector-specific structure — that lives only in the + * file and is passed through to the DataSource verbatim (the DevEx "JSON file + * passthrough" decision). We only check: file exists, parses, carries a `type` + * field matching the declared connector type, and (for auth connectors) warn + * if no secretArn is present. + */ +export function readConnectorConfig(path: string, declaredType: ConnectorDataSourceType): ConnectorConfigReadResult { + const resolved = resolve(path); + if (!existsSync(resolved)) { + throw new Error(`Connector config file not found: ${path}`); + } + let parsed: unknown; + try { + parsed = JSON.parse(readFileSync(resolved, 'utf-8')); + } catch { + throw new Error(`Connector config file is not valid JSON: ${path}`); + } + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error(`Connector config file must be a JSON object: ${path}`); + } + const obj = parsed as Record; + if (typeof obj.type !== 'string') { + throw new Error(`Connector config file is missing a "type" field: ${path}`); + } + if (obj.type !== declaredType) { + throw new Error( + `Connector config "type" (${obj.type}) does not match the declared data source type (${declaredType}) in ${path}.` + ); + } + + const warnings: string[] = []; + const cc = obj.connectionConfiguration as Record | undefined; + const hasSecret = !!cc && typeof cc.secretArn === 'string' && !!cc.secretArn; + if (SECRET_BEARING.has(declaredType) && !hasSecret) { + warnings.push( + `Connector config ${path} has no connectionConfiguration.secretArn; ${declaredType} ingestion will fail at deploy until credentials are provided.` + ); + } + if (declaredType === 'WEB') { + const authType = typeof cc?.authType === 'string' ? cc.authType : undefined; + if (authType && authType !== 'NO_AUTH' && !hasSecret) { + warnings.push(`Connector config ${path} uses authType ${authType} but has no secretArn.`); + } + } + + return { parsed: obj as ConnectorConfigReadResult['parsed'], warnings }; +} + +/** Pull connectionConfiguration.secretArn from a parsed connector config, if present. */ +export function extractSecretArn(parsed: Record): string | undefined { + const cc = parsed.connectionConfiguration as Record | undefined; + const arn = cc?.secretArn; + return typeof arn === 'string' && arn ? arn : undefined; +} diff --git a/src/cli/operations/knowledge-base/hydrate-data-sources.ts b/src/cli/operations/knowledge-base/hydrate-data-sources.ts new file mode 100644 index 000000000..d7af5ff92 --- /dev/null +++ b/src/cli/operations/knowledge-base/hydrate-data-sources.ts @@ -0,0 +1,79 @@ +import type { KnowledgeBase, KnowledgeBaseDeployedState } from '../../../schema'; +import { listDataSources } from '../../aws/bedrock-agent'; +import { createHash } from 'node:crypto'; + +export interface HydrateInput { + /** KB deployed-state records as parsed from CFN outputs (id + arn populated, dataSources empty). */ + knowledgeBases: Record; + /** Local KB specs from agentcore.json — used to recover URIs for the deployed DS IDs. */ + knowledgeBaseSpecs: KnowledgeBase[]; + /** AWS region (passed through to bedrock-agent SDK calls). */ + region: string; +} + +/** + * The L3's `AgentCoreKnowledgeBase` names each DataSource as + * `${knowledgeBasePhysicalName}_ds_${uriHashPrefix}` + * where `uriHashPrefix` is the first 8 hex chars of SHA-256(uri). This must + * stay byte-equivalent to the L3's + * createHash('sha256').update(ds.uri).digest('hex').slice(0, 8) + * or the hash-based fallback below loses every DS. + */ +function uriHashPrefix(uri: string): string { + return createHash('sha256').update(uri).digest('hex').slice(0, 8); +} + +/** + * Hydrate the `dataSources[]` array on each KB deployed-state record. + * + * Preferred path: `parseKnowledgeBaseOutputs` already populates + * `dataSources[]` from per-DS CFN outputs (L3 #234 onward). This function is + * a no-op for KBs whose outputs were present. + * + * Fallback path: when CFN outputs are absent (stack was deployed against an + * older L3, or a partial deploy), call bedrock-agent:ListDataSources and + * pair each deployed DS with its local spec by URI-hash suffix. The L3 names + * each DS deterministically using the first 8 chars of SHA-256(uri); we + * compute the same hash for every local URI and look it up against the + * deployed DS names. This is robust to ListDataSources ordering changes and + * to data sources being added or removed between deploys. + * + * Leaves `dataSources` as an empty array if both paths fail — the caller + * decides how to surface partial hydration. + */ +export async function hydrateKnowledgeBaseDataSources(input: HydrateInput): Promise { + const specsByName = new Map(input.knowledgeBaseSpecs.map(s => [s.name, s])); + + for (const [name, deployed] of Object.entries(input.knowledgeBases)) { + if (deployed.dataSources.length > 0) continue; + + const spec = specsByName.get(name); + if (!spec) continue; + + const summaries = await listDataSources({ + region: input.region, + knowledgeBaseId: deployed.knowledgeBaseId, + }); + + // Build a hash-suffix → DS-id index from the deployed DSes so we can look + // up by URI hash without depending on ListDataSources ordering. + const idByHash = new Map(); + for (const summary of summaries) { + if (!summary.dataSourceId || !summary.name) continue; + const match = /_ds_([0-9a-f]+)$/.exec(summary.name); + if (!match) continue; + idByHash.set(match[1]!, summary.dataSourceId); + } + + // For each local DS spec, recover the deployed DS id by URI hash. + const hydrated: { dataSourceId: string; uri: string }[] = []; + for (const localDs of spec.dataSources) { + if (localDs.type !== 'S3') continue; + const dataSourceId = idByHash.get(uriHashPrefix(localDs.uri)); + if (!dataSourceId) continue; + hydrated.push({ dataSourceId, uri: localDs.uri }); + } + + deployed.dataSources = hydrated; + } +} diff --git a/src/cli/operations/mcp/__tests__/create-mcp-utils.test.ts b/src/cli/operations/mcp/__tests__/create-mcp-utils.test.ts index cade05ea5..d1b5c3fc3 100644 --- a/src/cli/operations/mcp/__tests__/create-mcp-utils.test.ts +++ b/src/cli/operations/mcp/__tests__/create-mcp-utils.test.ts @@ -140,6 +140,7 @@ describe('GatewayPrimitive.add (createGateway)', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -174,6 +175,7 @@ describe('GatewayPrimitive.add (createGateway)', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -198,6 +200,7 @@ describe('GatewayPrimitive.add (createGateway)', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -225,6 +228,7 @@ describe('GatewayPrimitive.add (createGateway)', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], diff --git a/src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts b/src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts deleted file mode 100644 index 55c5b6eae..000000000 --- a/src/cli/operations/recommendation/__tests__/recommendation-storage.test.ts +++ /dev/null @@ -1,136 +0,0 @@ -import { listAllRecommendations, loadRecommendationRun, saveRecommendationRun } from '../recommendation-storage'; -import type { RunRecommendationCommandResult } from '../types'; -import { existsSync, mkdirSync, rmSync } from 'fs'; -import { tmpdir } from 'os'; -import { join } from 'path'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; - -const mockFindConfigRoot = vi.fn(); - -vi.mock('../../../../lib', () => ({ - findConfigRoot: () => mockFindConfigRoot(), -})); - -function makeTmpDir(): string { - const dir = join(tmpdir(), `recommendation-storage-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); - mkdirSync(dir, { recursive: true }); - return dir; -} - -function makeResult( - overrides: Partial> = {} -): RunRecommendationCommandResult { - return { - success: true, - recommendationId: 'rec-123', - status: 'COMPLETED', - startedAt: '2026-03-24T10:00:00.000Z', - completedAt: '2026-03-24T10:05:00.000Z', - result: { - systemPromptRecommendationResult: { - recommendedSystemPrompt: 'You are an expert booking assistant.', - }, - }, - ...overrides, - }; -} - -describe('recommendation-storage', () => { - let tmpDir: string; - - beforeEach(() => { - tmpDir = makeTmpDir(); - mockFindConfigRoot.mockReturnValue(tmpDir); - }); - - afterEach(() => { - if (existsSync(tmpDir)) { - rmSync(tmpDir, { recursive: true, force: true }); - } - vi.clearAllMocks(); - }); - - describe('saveRecommendationRun', () => { - it('creates directory and writes JSON file', () => { - const result = makeResult(); - const filePath = saveRecommendationRun('rec-123', result, 'SYSTEM_PROMPT_RECOMMENDATION', 'booking-agent', [ - 'Builtin.Helpfulness', - ]); - - expect(filePath).toContain('recommendations'); - expect(filePath).toContain('rec-123.json'); - expect(existsSync(filePath)).toBe(true); - }); - - it('writes valid JSON that can be read back', () => { - const result = makeResult(); - saveRecommendationRun('rec-123', result, 'SYSTEM_PROMPT_RECOMMENDATION', 'booking-agent', [ - 'Builtin.Helpfulness', - ]); - - const loaded = loadRecommendationRun('rec-123'); - expect(loaded.recommendationId).toBe('rec-123'); - expect(loaded.type).toBe('SYSTEM_PROMPT_RECOMMENDATION'); - expect(loaded.agent).toBe('booking-agent'); - expect(loaded.evaluators).toEqual(['Builtin.Helpfulness']); - expect(loaded.result?.systemPromptRecommendationResult?.recommendedSystemPrompt).toBe( - 'You are an expert booking assistant.' - ); - }); - }); - - describe('loadRecommendationRun', () => { - it('loads a previously saved recommendation', () => { - saveRecommendationRun('rec-123', makeResult(), 'SYSTEM_PROMPT_RECOMMENDATION', 'agent', ['eval']); - const loaded = loadRecommendationRun('rec-123'); - expect(loaded.status).toBe('COMPLETED'); - }); - - it('accepts filename with .json extension', () => { - saveRecommendationRun('rec-123', makeResult(), 'SYSTEM_PROMPT_RECOMMENDATION', 'agent', ['eval']); - const loaded = loadRecommendationRun('rec-123.json'); - expect(loaded.recommendationId).toBe('rec-123'); - }); - - it('throws for a non-existent recommendation', () => { - expect(() => loadRecommendationRun('nonexistent')).toThrow('not found'); - }); - }); - - describe('listAllRecommendations', () => { - it('returns empty array when no recommendations exist', () => { - expect(listAllRecommendations()).toEqual([]); - }); - - it('returns saved recommendations in reverse order', () => { - saveRecommendationRun( - 'rec-aaa', - makeResult({ recommendationId: 'rec-aaa' }), - 'SYSTEM_PROMPT_RECOMMENDATION', - 'agent', - ['eval'] - ); - saveRecommendationRun( - 'rec-zzz', - makeResult({ recommendationId: 'rec-zzz' }), - 'TOOL_DESCRIPTION_RECOMMENDATION', - 'agent', - ['eval'] - ); - - const all = listAllRecommendations(); - expect(all).toHaveLength(2); - expect(all[0]!.recommendationId).toBe('rec-zzz'); - expect(all[1]!.recommendationId).toBe('rec-aaa'); - }); - }); - - describe('error when no config root', () => { - it('throws when findConfigRoot returns null', () => { - mockFindConfigRoot.mockReturnValue(null); - expect(() => - saveRecommendationRun('rec-123', makeResult(), 'SYSTEM_PROMPT_RECOMMENDATION', 'agent', ['eval']) - ).toThrow('No agentcore project found'); - }); - }); -}); diff --git a/src/cli/operations/recommendation/__tests__/run-recommendation.test.ts b/src/cli/operations/recommendation/__tests__/run-recommendation.test.ts deleted file mode 100644 index 765ee6692..000000000 --- a/src/cli/operations/recommendation/__tests__/run-recommendation.test.ts +++ /dev/null @@ -1,720 +0,0 @@ -import { runRecommendationCommand } from '../run-recommendation'; -import assert from 'node:assert'; -import { beforeEach, describe, expect, it, vi } from 'vitest'; - -// Mock dependencies — paths are relative to the file under test (run-recommendation.ts) -const mockReadProjectSpec = vi.fn().mockResolvedValue({ name: 'test-project' }); -const mockReadDeployedState = vi.fn().mockResolvedValue({ - targets: { - default: { - resources: { - runtimes: { - MyAgent: { - runtimeId: 'rt-abc123', - runtimeArn: 'arn:aws:bedrock:us-east-1:998846730471:agent-runtime/rt-abc123', - }, - }, - evaluators: { - MyEvaluator: { - evaluatorArn: 'arn:aws:bedrock-agentcore:us-east-1:998846730471:evaluator/my-eval-abc1234567', - }, - }, - }, - }, - }, -}); - -vi.mock('../../../../lib', () => ({ - ConfigIO: class { - readProjectSpec = mockReadProjectSpec; - readDeployedState = mockReadDeployedState; - resolveAWSDeploymentTargets = vi.fn().mockResolvedValue([{ region: 'us-east-1' }]); - }, - toError: (err: unknown) => (err instanceof Error ? err : new Error(String(err))), - ResourceNotFoundError: class extends Error { - constructor(m: string) { - super(m); - this.name = 'ResourceNotFoundError'; - } - }, - ValidationError: class extends Error { - constructor(m: string) { - super(m); - this.name = 'ValidationError'; - } - }, - TimeoutError: class extends Error { - constructor(m: string) { - super(m); - this.name = 'TimeoutError'; - } - }, -})); - -vi.mock('../../../aws/region', () => ({ - detectRegion: vi.fn().mockResolvedValue({ region: 'us-east-1' }), -})); - -const mockStartRecommendation = vi.fn(); -const mockGetRecommendation = vi.fn(); - -vi.mock('../../../aws/agentcore-recommendation', () => ({ - startRecommendation: (...args: unknown[]) => mockStartRecommendation(...args), - getRecommendation: (...args: unknown[]) => mockGetRecommendation(...args), -})); - -const mockFetchSessionSpans = vi.fn(); -vi.mock('../fetch-session-spans', () => ({ - fetchSessionSpans: (...args: unknown[]) => mockFetchSessionSpans(...args), -})); - -const mockReadFileSync = vi.fn(); -vi.mock('fs', async () => { - const actual = await vi.importActual('fs'); - return { ...actual, readFileSync: (...args: unknown[]) => mockReadFileSync(...args) }; -}); - -describe('runRecommendationCommand', () => { - beforeEach(() => { - vi.clearAllMocks(); - }); - - it('returns error when agent is not deployed', async () => { - mockReadDeployedState.mockResolvedValueOnce({ targets: {} }); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'NonExistentAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'You are helpful.', - traceSource: 'cloudwatch', - }); - - assert(!result.success); - expect(result.error.message).toContain('NonExistentAgent'); - expect(result.error.message).toContain('not deployed'); - }); - - it('returns error when evaluator cannot be resolved', async () => { - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['UnknownEvaluator'], - inputSource: 'inline', - inlineContent: 'You are helpful.', - traceSource: 'cloudwatch', - }); - - assert(!result.success); - expect(result.error.message).toContain('UnknownEvaluator'); - expect(result.error.message).toContain('not found'); - }); - - it('returns result on COMPLETED status', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-001', - recommendationArn: 'arn:rec-001', - name: 'test-rec', - type: 'SYSTEM_PROMPT_RECOMMENDATION', - status: 'PENDING', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-001', - status: 'COMPLETED', - createdAt: '2026-03-30T00:00:00Z', - completedAt: '2026-03-30T00:01:00Z', - recommendationResult: { - systemPromptRecommendationResult: { - recommendedSystemPrompt: 'Optimized prompt', - explanation: 'Made clearer', - }, - }, - }); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'You are helpful.', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - assert(result.success); - expect(result.recommendationId).toBe('rec-001'); - expect(result.status).toBe('COMPLETED'); - expect(result.result?.systemPromptRecommendationResult?.recommendedSystemPrompt).toBe('Optimized prompt'); - }); - - it('returns error on FAILED status', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-002', - recommendationArn: 'arn:rec-002', - name: 'test-rec', - type: 'SYSTEM_PROMPT_RECOMMENDATION', - status: 'PENDING', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-002', - status: 'FAILED', - }); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'You are helpful.', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - assert(!result.success); - expect(result.error.message).toContain('FAILED'); - expect(result.recommendationId).toBe('rec-002'); - }); - - it('expands Builtin.* evaluator to full ARN in startRecommendation call', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-003', - status: 'COMPLETED', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-003', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const evaluators = callArgs.recommendationConfig.systemPromptRecommendationConfig.evaluationConfig.evaluators; - expect(evaluators[0].evaluatorArn).toBe('arn:aws:bedrock-agentcore:::evaluator/Builtin.Toxicity'); - }); - - it('uses account ID from runtime ARN in log group ARN', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-004', - status: 'COMPLETED', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-004', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const logGroupArn = - callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces.cloudwatchLogs.logGroupArns[0]; - expect(logGroupArn).toContain(':998846730471:'); - expect(logGroupArn).not.toContain(':*:'); - }); - - it('resolves custom evaluator from deployed state', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-005', - status: 'COMPLETED', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-005', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['MyEvaluator'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const evaluators = callArgs.recommendationConfig.systemPromptRecommendationConfig.evaluationConfig.evaluators; - expect(evaluators[0].evaluatorArn).toBe( - 'arn:aws:bedrock-agentcore:us-east-1:998846730471:evaluator/my-eval-abc1234567' - ); - }); - - it('builds TOOL_DESCRIPTION_RECOMMENDATION config with toolName:description pairs', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-006', - status: 'COMPLETED', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-006', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'TOOL_DESCRIPTION_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - tools: ['search:Search the web for info', 'calculate:Perform math calculations'], - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const tools = - callArgs.recommendationConfig.toolDescriptionRecommendationConfig.toolDescription.toolDescriptionText.tools; - expect(tools).toHaveLength(2); - expect(tools[0].toolName).toBe('search'); - expect(tools[0].toolDescription.text).toBe('Search the web for info'); - expect(tools[1].toolName).toBe('calculate'); - expect(tools[1].toolDescription.text).toBe('Perform math calculations'); - }); - - it('catches and returns errors from startRecommendation', async () => { - mockStartRecommendation.mockRejectedValue(new Error('API timeout')); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - }); - - assert(!result.success); - expect(result.error.message).toContain('API timeout'); - }); - - it('retries transient poll failures and succeeds', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-retry-ok', - recommendationArn: 'arn:rec-retry-ok', - name: 'test-rec', - type: 'SYSTEM_PROMPT_RECOMMENDATION', - status: 'PENDING', - }); - - // First poll fails, second succeeds - mockGetRecommendation.mockRejectedValueOnce(new Error('fetch failed')).mockResolvedValueOnce({ - recommendationId: 'rec-retry-ok', - status: 'COMPLETED', - recommendationResult: { - systemPromptRecommendationResult: { recommendedSystemPrompt: 'Better prompt' }, - }, - }); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - expect(result.success).toBe(true); - expect(result.recommendationId).toBe('rec-retry-ok'); - expect(mockGetRecommendation).toHaveBeenCalledTimes(2); - }); - - it('fails after max consecutive poll retries', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-retry-fail', - recommendationArn: 'arn:rec-retry-fail', - name: 'test-rec', - type: 'SYSTEM_PROMPT_RECOMMENDATION', - status: 'PENDING', - }); - - mockGetRecommendation.mockRejectedValue(new Error('fetch failed')); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - assert(!result.success); - expect(result.error.message).toContain('consecutive errors'); - expect(result.error.message).toContain('fetch failed'); - expect(result.error.message).toContain('rec-retry-fail'); - expect(mockGetRecommendation).toHaveBeenCalledTimes(3); - }); - - it('times out after max poll duration', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-timeout', - recommendationArn: 'arn:rec-timeout', - name: 'test-rec', - type: 'SYSTEM_PROMPT_RECOMMENDATION', - status: 'PENDING', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-timeout', - status: 'IN_PROGRESS', - }); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - maxPollDurationMs: 0, // Immediately timeout - }); - - assert(!result.success); - expect(result.error.message).toContain('Polling timed out'); - expect(result.error.message).toContain('rec-timeout'); - }); - - it('reads system prompt from file when inputSource is file', async () => { - mockReadFileSync.mockReturnValue('You are a healthcare assistant.'); - - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-file', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-file', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Helpfulness'], - inputSource: 'file', - promptFile: '/tmp/prompt.txt', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - expect(mockReadFileSync).toHaveBeenCalledWith('/tmp/prompt.txt', 'utf-8'); - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const systemPrompt = callArgs.recommendationConfig.systemPromptRecommendationConfig.systemPrompt; - expect(systemPrompt.text).toBe('You are a healthcare assistant.'); - }); - - it('uses inline sessionSpans from spans-file trace source', async () => { - const fakeSpans = [ - { traceId: 't1', spanId: 's1', body: {} }, - { traceId: 't1', spanId: 's2', body: {} }, - ]; - mockReadFileSync.mockReturnValue(JSON.stringify(fakeSpans)); - - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-spans', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-spans', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'spans-file', - spansFile: '/tmp/spans.json', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const traces = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces; - expect(traces.sessionSpans).toHaveLength(2); - expect(traces.cloudwatchLogs).toBeUndefined(); - }); - - it('wraps single span object in array for spans-file', async () => { - const singleSpan = { traceId: 't1', spanId: 's1', body: {} }; - mockReadFileSync.mockReturnValue(JSON.stringify(singleSpan)); - - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-single', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-single', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'spans-file', - spansFile: '/tmp/single.json', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const traces = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces; - expect(traces.sessionSpans).toHaveLength(1); - }); - - it('auto-fetches spans for tool-desc with sessions trace source', async () => { - mockFetchSessionSpans.mockResolvedValue({ - spans: [ - { traceId: 't1', spanId: 's1', body: {} }, - { traceId: 't1', spanId: 's2', body: {} }, - ], - spanRecordCount: 1, - logRecordCount: 1, - }); - - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-autofetch', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-autofetch', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'TOOL_DESCRIPTION_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - tools: ['add_numbers:Add two numbers together'], - traceSource: 'sessions', - sessionIds: ['session-abc'], - pollIntervalMs: 0, - }); - - expect(mockFetchSessionSpans).toHaveBeenCalledWith( - expect.objectContaining({ - region: 'us-east-1', - runtimeId: 'rt-abc123', - sessionId: 'session-abc', - }) - ); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const traces = callArgs.recommendationConfig.toolDescriptionRecommendationConfig.agentTraces; - expect(traces.sessionSpans).toHaveLength(2); - expect(traces.cloudwatchLogs).toBeUndefined(); - }); - - it('throws when auto-fetch returns zero spans', async () => { - mockFetchSessionSpans.mockResolvedValue({ - spans: [], - spanRecordCount: 0, - logRecordCount: 0, - }); - - const result = await runRecommendationCommand({ - type: 'TOOL_DESCRIPTION_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - tools: ['add_numbers:Add numbers'], - traceSource: 'sessions', - sessionIds: ['session-empty'], - pollIntervalMs: 0, - }); - - assert(!result.success); - expect(result.error.message).toContain('No spans found'); - }); - - it('derives service name from runtimeId by stripping hash suffix', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-svc', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-svc', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const serviceNames = - callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces.cloudwatchLogs.serviceNames; - // runtimeId 'rt-abc123' → service name 'rt.DEFAULT' (strips '-abc123' suffix) - expect(serviceNames[0]).toBe('rt.DEFAULT'); - }); - - it('auto-fetches spans for system-prompt with sessions trace source', async () => { - mockFetchSessionSpans.mockResolvedValue({ spans: [{ sessionId: 'sess-1', spans: [] }] }); - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-sid', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-sid', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'sessions', - sessionIds: ['sess-1'], - pollIntervalMs: 0, - }); - - expect(mockFetchSessionSpans).toHaveBeenCalledWith(expect.objectContaining({ sessionId: 'sess-1' })); - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const traces = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces; - expect(traces.sessionSpans).toBeDefined(); - expect(traces.cloudwatchLogs).toBeUndefined(); - }); - - it('builds cloudwatch config with two log group ARNs', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-cw', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-cw', - status: 'COMPLETED', - recommendationResult: {}, - }); - - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - lookbackDays: 3, - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const cwConfig = callArgs.recommendationConfig.systemPromptRecommendationConfig.agentTraces.cloudwatchLogs; - expect(cwConfig.logGroupArns).toHaveLength(2); - expect(cwConfig.logGroupArns[0]).toContain('/aws/bedrock-agentcore/runtimes/rt-abc123-DEFAULT'); - expect(cwConfig.logGroupArns[1]).toContain('aws/spans'); - expect(cwConfig.startTime).toBeDefined(); - expect(cwConfig.endTime).toBeDefined(); - }); - - it('extracts failure details from statusReasons and result error fields', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-fail-detail', - recommendationArn: 'arn:rec-fail-detail', - name: 'test', - type: 'SYSTEM_PROMPT_RECOMMENDATION', - status: 'PENDING', - requestId: 'start-req-id', - }); - - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-fail-detail', - status: 'FAILED', - requestId: 'poll-req-id', - statusReasons: ['Insufficient trace data'], - recommendationResult: { - systemPromptRecommendationResult: { - errorCode: 'INSUFFICIENT_DATA', - errorMessage: 'Not enough traces to generate recommendation', - }, - }, - }); - - const result = await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: ['Builtin.Toxicity'], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - assert(!result.success); - expect(result.error.message).toContain('Insufficient trace data'); - expect(result.error.message).toContain('INSUFFICIENT_DATA'); - expect(result.error.message).toContain('Not enough traces'); - // Request IDs are logged to file only, not included in the error message - }); - - it('passes full ARN evaluator as-is', async () => { - mockStartRecommendation.mockResolvedValue({ - recommendationId: 'rec-arn', - status: 'COMPLETED', - }); - mockGetRecommendation.mockResolvedValue({ - recommendationId: 'rec-arn', - status: 'COMPLETED', - recommendationResult: {}, - }); - - const fullArn = 'arn:aws:bedrock-agentcore:us-east-1:123456789012:evaluator/custom-eval'; - await runRecommendationCommand({ - type: 'SYSTEM_PROMPT_RECOMMENDATION', - agent: 'MyAgent', - evaluators: [fullArn], - inputSource: 'inline', - inlineContent: 'test', - traceSource: 'cloudwatch', - pollIntervalMs: 0, - }); - - const callArgs = mockStartRecommendation.mock.calls[0]![0]; - const evaluators = callArgs.recommendationConfig.systemPromptRecommendationConfig.evaluationConfig.evaluators; - expect(evaluators[0].evaluatorArn).toBe(fullArn); - }); -}); diff --git a/src/cli/operations/recommendation/constants.ts b/src/cli/operations/recommendation/constants.ts deleted file mode 100644 index c79647c44..000000000 --- a/src/cli/operations/recommendation/constants.ts +++ /dev/null @@ -1,11 +0,0 @@ -/** Polling interval in ms for checking recommendation status. */ -export const DEFAULT_POLL_INTERVAL_MS = 5000; - -/** Statuses that indicate a recommendation has reached a terminal state. */ -export const TERMINAL_STATUSES = new Set(['COMPLETED', 'SUCCEEDED', 'FAILED', 'DELETING']); - -/** Max retries for transient poll failures (network errors, 5xx). */ -export const MAX_POLL_RETRIES = 3; - -/** Max total polling duration in ms (30 minutes). */ -export const MAX_POLL_DURATION_MS = 30 * 60 * 1000; diff --git a/src/cli/operations/recommendation/index.ts b/src/cli/operations/recommendation/index.ts deleted file mode 100644 index f60a1d798..000000000 --- a/src/cli/operations/recommendation/index.ts +++ /dev/null @@ -1,18 +0,0 @@ -export { applyRecommendationToBundle } from './apply-to-bundle'; -export type { ApplyRecommendationOptions, ApplyRecommendationResult } from './apply-to-bundle'; -export { fetchSessionSpans } from './fetch-session-spans'; -export type { FetchSessionSpansOptions, FetchSessionSpansResult } from './fetch-session-spans'; -export { runRecommendationCommand } from './run-recommendation'; -export type { - RunRecommendationCommandOptions, - RunRecommendationCommandResult, - RecommendationType, - RecommendationInputSourceKind, - TraceSourceKind, -} from './types'; -export { - saveRecommendationRun, - loadRecommendationRun, - listAllRecommendations, - type RecommendationRunRecord, -} from './recommendation-storage'; diff --git a/src/cli/operations/recommendation/recommendation-storage.ts b/src/cli/operations/recommendation/recommendation-storage.ts deleted file mode 100644 index 2049535e3..000000000 --- a/src/cli/operations/recommendation/recommendation-storage.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { findConfigRoot } from '../../../lib'; -import type { RecommendationResult, RecommendationType } from '../../aws/agentcore-recommendation'; -import type { RunRecommendationCommandResult } from './types'; -import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'fs'; -import { join } from 'path'; - -export const RECOMMENDATIONS_DIR = 'recommendations'; - -export interface RecommendationRunRecord { - recommendationId: string; - type: RecommendationType; - agent: string; - evaluators: string[]; - status: string; - startedAt?: string; - completedAt?: string; - result?: RecommendationResult; -} - -function getRecommendationResultsDir(): string { - const configRoot = findConfigRoot(); - if (!configRoot) { - throw new Error('No agentcore project found. Run `agentcore create` first.'); - } - return join(configRoot, '.cli', RECOMMENDATIONS_DIR); -} - -export function saveRecommendationRun( - recommendationId: string, - result: RunRecommendationCommandResult, - type: RecommendationType, - agent: string, - evaluators: string[] -): string { - const dir = getRecommendationResultsDir(); - mkdirSync(dir, { recursive: true }); - - const filePath = join(dir, `${recommendationId}.json`); - - const record: RecommendationRunRecord = { - recommendationId, - type, - agent, - evaluators, - status: result.status ?? 'unknown', - startedAt: result.success ? result.startedAt : undefined, - completedAt: result.success ? result.completedAt : undefined, - result: result.success ? result.result : undefined, - }; - - writeFileSync(filePath, JSON.stringify(record, null, 2)); - return filePath; -} - -export function loadRecommendationRun(recommendationId: string): RecommendationRunRecord { - const dir = getRecommendationResultsDir(); - const jsonName = recommendationId.endsWith('.json') ? recommendationId : `${recommendationId}.json`; - const filePath = join(dir, jsonName); - - if (!existsSync(filePath)) { - throw new Error(`Recommendation "${recommendationId}" not found at ${filePath}`); - } - - return JSON.parse(readFileSync(filePath, 'utf-8')) as RecommendationRunRecord; -} - -export function listAllRecommendations(): RecommendationRunRecord[] { - const configRoot = findConfigRoot(); - if (!configRoot) { - throw new Error('No agentcore project found. Run `agentcore create` first.'); - } - - const dir = join(configRoot, '.cli', RECOMMENDATIONS_DIR); - if (!existsSync(dir)) { - return []; - } - - const files = readdirSync(dir) - .filter(f => f.endsWith('.json')) - .sort() - .reverse(); - - return files.map(f => JSON.parse(readFileSync(join(dir, f), 'utf-8')) as RecommendationRunRecord); -} diff --git a/src/cli/operations/recommendation/run-recommendation.ts b/src/cli/operations/recommendation/run-recommendation.ts deleted file mode 100644 index 42ff863cc..000000000 --- a/src/cli/operations/recommendation/run-recommendation.ts +++ /dev/null @@ -1,623 +0,0 @@ -/** - * Orchestrates running a Recommendation: - * 1. Resolve agent and evaluator from project - * 2. Build recommendationConfig from CLI inputs - * 3. Call StartRecommendation (creates resource, returns 202) - * 4. Poll GetRecommendation until terminal status - * 5. Return result with optimized artifact - */ -import { ConfigIO, ResourceNotFoundError, TimeoutError, ValidationError, toError } from '../../../lib'; -import type { DeployedState } from '../../../schema'; -import type { - RecommendationConfig, - RecommendationResult, - RecommendationType, - SessionSpan, -} from '../../aws/agentcore-recommendation'; -import { getRecommendation, startRecommendation } from '../../aws/agentcore-recommendation'; -import { runtimeLogGroup } from '../../aws/cloudwatch'; -import { arnPrefix } from '../../aws/partition'; -import { detectRegion } from '../../aws/region'; -import { ExecLogger } from '../../logging/exec-logger'; -import { DEFAULT_POLL_INTERVAL_MS, MAX_POLL_DURATION_MS, MAX_POLL_RETRIES, TERMINAL_STATUSES } from './constants'; -import { fetchSessionSpans } from './fetch-session-spans'; -import type { RunRecommendationCommandOptions, RunRecommendationCommandResult } from './types'; -import { readFileSync } from 'fs'; - -export async function runRecommendationCommand( - options: RunRecommendationCommandOptions -): Promise { - const { pollIntervalMs = DEFAULT_POLL_INTERVAL_MS, onProgress } = options; - let logger: ExecLogger | undefined; - try { - logger = new ExecLogger({ command: 'recommend' }); - } catch { - // Logger creation can fail in tests or when no project root exists — non-fatal - } - - try { - logger?.startStep('Load project config'); - // 1. Read project config and deployed state - const configIO = new ConfigIO(); - const [projectSpec, deployedState, awsTargets] = await Promise.all([ - configIO.readProjectSpec(), - configIO.readDeployedState(), - configIO.resolveAWSDeploymentTargets(), - ]); - - const targetRegion = awsTargets.length > 0 ? awsTargets[0]!.region : undefined; - const { region: detectedRegion } = await detectRegion(); - const region = options.region ?? targetRegion ?? detectedRegion; - const stage = process.env.AGENTCORE_STAGE?.toLowerCase() ?? 'prod'; - logger?.log(`Region: ${region}, Stage: ${stage}`); - logger?.endStep('success'); - - // 2. Resolve agent from deployed state (needed for log group ARNs) - logger?.startStep('Resolve agent and evaluators'); - const agentState = resolveAgentState(deployedState, options.agent); - if (!agentState) { - logger?.log(`Agent "${options.agent}" not found in deployed state`, 'error'); - logger?.endStep('error', `Agent "${options.agent}" not deployed`); - logger?.finalize(false); - return { - success: false, - error: new Error(`Agent "${options.agent}" not deployed. Run \`agentcore deploy\` first.`), - logFilePath: logger?.logFilePath, - }; - } - logger?.log(`Agent: ${options.agent} (runtime: ${agentState.runtimeId})`); - - // 3. Resolve evaluator ID/ARN (API accepts exactly one for system-prompt, none for tool-desc) - const evaluatorIds: string[] = []; - for (const evaluator of options.evaluators) { - const evaluatorId = resolveEvaluatorId(deployedState, evaluator, region); - if (!evaluatorId) { - return { - success: false, - error: new Error( - `Evaluator "${evaluator}" not found in deployed state. Use a Builtin.* name, a full ARN, or deploy a custom evaluator first.` - ), - logFilePath: logger?.logFilePath, - }; - } - evaluatorIds.push(evaluatorId); - } - if (options.type === 'SYSTEM_PROMPT_RECOMMENDATION' && evaluatorIds.length !== 1) { - return { - success: false, - error: new ValidationError('System prompt recommendations require exactly one evaluator.'), - logFilePath: logger?.logFilePath, - }; - } - logger?.log(`Evaluators: ${evaluatorIds.join(', ') || '(none)'}`); - logger?.endStep('success'); - - // 4. Read input content (if from file) - let inlineContent: string | undefined; - if (options.inputSource === 'file' && options.promptFile) { - inlineContent = readFileSync(options.promptFile, 'utf-8'); - } else if (options.inputSource === 'inline') { - inlineContent = options.inlineContent; - } - - // Validate that system prompt content is non-empty (API rejects empty text) - if ( - options.type === 'SYSTEM_PROMPT_RECOMMENDATION' && - options.inputSource !== 'config-bundle' && - !inlineContent?.trim() - ) { - return { - success: false, - error: new ValidationError( - 'System prompt content is required. Provide via --inline, --prompt-file, or --bundle-name.' - ), - logFilePath: logger?.logFilePath, - }; - } - - // 5. Extract account ID from agent runtime ARN - const accountId = extractAccountIdFromArn(agentState.runtimeArn); - - // 5b. Resolve config bundle ARN from deployed state (if using config bundle) - let bundleArn: string | undefined; - if (options.inputSource === 'config-bundle' && options.bundleName) { - if (options.bundleName.startsWith('arn:')) { - // Already an ARN (e.g. from TUI which stores the ARN directly) - bundleArn = options.bundleName; - } else { - // Human-readable name (e.g. from CLI --bundle-name flag) — resolve from deployed state - for (const targetName of Object.keys(deployedState.targets ?? {})) { - const target = deployedState.targets?.[targetName]; - const bundle = target?.resources?.configBundles?.[options.bundleName]; - if (bundle?.bundleArn) { - bundleArn = bundle.bundleArn; - break; - } - } - if (!bundleArn) { - return { - success: false, - error: new ResourceNotFoundError( - `Config bundle "${options.bundleName}" not found in deployed state. Run \`agentcore deploy\` first.` - ), - logFilePath: logger?.logFilePath, - }; - } - } - logger?.log(`Resolved bundle ARN: ${bundleArn}`); - } - - // 5c. Resolve short-form systemPromptJsonPath (e.g. "systemPrompt") to full JSONPath - let resolvedSystemPromptJsonPath = options.systemPromptJsonPath; - if ( - options.inputSource === 'config-bundle' && - options.bundleName && - resolvedSystemPromptJsonPath && - !resolvedSystemPromptJsonPath.startsWith('$') - ) { - // User provided a short field name like "systemPrompt" — resolve from agentcore.json - const bundleName = options.bundleName.startsWith('arn:') - ? // Find bundle name from ARN by matching deployed state - Object.values(deployedState.targets) - .flatMap(t => Object.entries(t.resources?.configBundles ?? {})) - .find(([, b]) => b.bundleArn === options.bundleName)?.[0] - : options.bundleName; - - if (bundleName) { - const projBundle = projectSpec.configBundles?.find(b => b.name === bundleName); - if (projBundle?.components) { - const subPath = resolvedSystemPromptJsonPath; - // Use the first component key, resolved to a real ARN - const firstComponentKey = Object.keys(projBundle.components)[0]; - if (firstComponentKey) { - const resolvedKey = resolveComponentKeyForJsonPath(firstComponentKey, deployedState); - resolvedSystemPromptJsonPath = `$.${resolvedKey}.configuration.${subPath}`; - logger?.log(`Resolved short JSONPath "${subPath}" → "${resolvedSystemPromptJsonPath}"`); - } - } - } - } - - // 6. Build recommendationConfig based on type - const recommendationConfig = await buildRecommendationConfig({ - type: options.type, - inlineContent, - bundleArn, - bundleVersion: options.bundleVersion, - systemPromptJsonPath: resolvedSystemPromptJsonPath, - toolDescJsonPaths: options.toolDescJsonPaths, - inputSource: options.inputSource, - tools: options.tools, - traceSource: options.traceSource, - lookbackDays: options.lookbackDays, - sessionIds: options.sessionIds, - spansFile: options.spansFile, - runtimeId: agentState.runtimeId, - accountId, - region, - evaluatorIds, - onProgress, - logger, - }); - - // 7. Start the recommendation - logger?.startStep('Start recommendation'); - const recommendationName = options.recommendationName ?? `${projectSpec.name}_${options.agent}_${Date.now()}`; - onProgress?.('starting', `Starting recommendation "${recommendationName}"...`); - - const startPayload = { - region, - name: recommendationName, - type: options.type, - recommendationConfig, - }; - logger?.log(`Request payload:\n${JSON.stringify(startPayload, null, 2)}`); - - const startResult = await startRecommendation(startPayload); - - logger?.log(`Response: ${JSON.stringify(startResult, null, 2)}`); - logger?.endStep('success'); - onProgress?.('started', `Recommendation created: ${startResult.recommendationId} (status: ${startResult.status})`); - options.onStarted?.({ recommendationId: startResult.recommendationId, region }); - - // 8. Poll GetRecommendation until terminal status - logger?.startStep('Poll for completion'); - const maxDurationMs = options.maxPollDurationMs ?? MAX_POLL_DURATION_MS; - const pollStartTime = Date.now(); - let currentStatus = startResult.status; - let consecutiveFailures = 0; - - while (!TERMINAL_STATUSES.has(currentStatus)) { - await sleep(pollIntervalMs); - - // Check max poll duration - if (Date.now() - pollStartTime > maxDurationMs) { - logger?.log(`Max poll duration (${maxDurationMs}ms) exceeded`, 'error'); - logger?.endStep('error', 'Poll timeout'); - logger?.finalize(false); - return { - success: false, - error: new TimeoutError( - `Polling timed out after ${Math.round(maxDurationMs / 60000)} minutes. The recommendation may still be running server-side.\nRecommendation ID: ${startResult.recommendationId}` - ), - recommendationId: startResult.recommendationId, - status: currentStatus, - logFilePath: logger?.logFilePath, - }; - } - - // Poll with retry for transient failures - let pollResult; - try { - pollResult = await getRecommendation({ - region, - recommendationId: startResult.recommendationId, - }); - consecutiveFailures = 0; - } catch (pollErr) { - consecutiveFailures++; - const pollErrMsg = pollErr instanceof Error ? pollErr.message : String(pollErr); - logger?.log(`Poll attempt failed (${consecutiveFailures}/${MAX_POLL_RETRIES}): ${pollErrMsg}`, 'error'); - - if (consecutiveFailures >= MAX_POLL_RETRIES) { - logger?.endStep('error', `${MAX_POLL_RETRIES} consecutive poll failures`); - logger?.finalize(false); - return { - success: false, - error: new TimeoutError( - `Polling failed after ${MAX_POLL_RETRIES} consecutive errors: ${pollErrMsg}\nThe recommendation may still be running server-side.\nRecommendation ID: ${startResult.recommendationId}` - ), - recommendationId: startResult.recommendationId, - status: currentStatus, - logFilePath: logger?.logFilePath, - }; - } - onProgress?.('polling', `Poll error, retrying (${consecutiveFailures}/${MAX_POLL_RETRIES})...`); - continue; - } - - currentStatus = pollResult.status; - onProgress?.('polling', `Status: ${currentStatus}`); - - if (TERMINAL_STATUSES.has(currentStatus)) { - if (currentStatus === 'COMPLETED' || currentStatus === 'SUCCEEDED') { - logger?.log(`Completed. Result:\n${JSON.stringify(pollResult.recommendationResult, null, 2)}`); - logger?.endStep('success'); - logger?.finalize(true); - return { - success: true, - recommendationId: startResult.recommendationId, - status: currentStatus, - result: pollResult.recommendationResult, - region, - startedAt: pollResult.createdAt, - completedAt: pollResult.completedAt, - logFilePath: logger?.logFilePath, - }; - } - - // Extract error details from the FAILED response - const failureDetails = extractFailureDetails(pollResult); - logger?.log(`Terminal status: ${currentStatus}`, 'error'); - logger?.log(`Full poll response:\n${JSON.stringify(pollResult, null, 2)}`, 'error'); - if (failureDetails) logger?.log(`Failure details: ${failureDetails}`, 'error'); - logger?.endStep('error', `Status: ${currentStatus}`); - logger?.finalize(false); - // Log request IDs for debugging (only in log file, not shown in TUI) - const requestIds = [ - startResult.requestId ? `Start: ${startResult.requestId}` : '', - pollResult.requestId ? `Poll: ${pollResult.requestId}` : '', - ] - .filter(Boolean) - .join(', '); - if (requestIds) logger?.log(`Request IDs: ${requestIds}`, 'error'); - - return { - success: false, - error: new Error( - failureDetails - ? `Recommendation failed: ${failureDetails}` - : `Recommendation finished with status: ${currentStatus}` - ), - recommendationId: startResult.recommendationId, - status: currentStatus, - logFilePath: logger?.logFilePath, - }; - } - } - - // Should not reach here, but handle gracefully - logger?.log(`Unexpected terminal status: ${currentStatus}`, 'error'); - logger?.endStep('error', `Unexpected status: ${currentStatus}`); - logger?.finalize(false); - return { - success: false, - error: new Error(`Recommendation ended with unexpected status: ${currentStatus}`), - recommendationId: startResult.recommendationId, - status: currentStatus, - logFilePath: logger?.logFilePath, - }; - } catch (err) { - const errorMsg = err instanceof Error ? err.message : String(err); - logger?.log(`Error: ${errorMsg}`, 'error'); - logger?.endStep('error', errorMsg); - logger?.finalize(false); - return { - success: false, - error: toError(err), - logFilePath: logger?.logFilePath, - }; - } -} - -// ============================================================================ -// Helpers -// ============================================================================ - -function resolveAgentState( - deployedState: DeployedState, - agentName: string -): { runtimeId: string; runtimeArn: string } | undefined { - for (const target of Object.values(deployedState.targets)) { - const agent = target.resources?.runtimes?.[agentName]; - if (agent) return agent; - } - return undefined; -} - -/** - * Resolve an evaluator name to a full ARN. - * Returns undefined if the evaluator cannot be resolved. - */ -function resolveEvaluatorId(deployedState: DeployedState, evaluator: string, region: string): string | undefined { - // Already a full ARN — use as-is - if (evaluator.startsWith('arn:')) { - return evaluator; - } - // Builtin shorthand → expand to full ARN - if (evaluator.startsWith('Builtin.')) { - return `${arnPrefix(region)}:bedrock-agentcore:::evaluator/${evaluator}`; - } - // Look up custom evaluator from deployed state - for (const target of Object.values(deployedState.targets)) { - const evalState = target.resources?.evaluators?.[evaluator]; - if (evalState) return evalState.evaluatorArn; - } - return undefined; -} - -/** - * Extract the 12-digit AWS account ID from an ARN. - * Falls back to '*' if the ARN format is unexpected. - */ -function extractAccountIdFromArn(arn: string): string { - const parts = arn.split(':'); - return parts[4] && /^\d{12}$/.test(parts[4]) ? parts[4] : '*'; -} - -interface BuildConfigOptions { - type: RecommendationType; - inlineContent?: string; - bundleArn?: string; - bundleVersion?: string; - systemPromptJsonPath?: string; - toolDescJsonPaths?: { toolName: string; toolDescriptionJsonPath: string }[]; - inputSource: string; - tools?: string[]; - traceSource: string; - lookbackDays?: number; - sessionIds?: string[]; - spansFile?: string; - runtimeId: string; - accountId: string; - region: string; - evaluatorIds: string[]; - onProgress?: (status: string, message: string) => void; - logger?: ExecLogger; -} - -async function buildRecommendationConfig(opts: BuildConfigOptions): Promise { - // Build agent traces — either from a spans file (inline session spans) or CloudWatch - let agentTraces; - - if (opts.traceSource === 'spans-file' && opts.spansFile) { - // Explicit spans file — read and use as inline sessionSpans - const spansContent = readFileSync(opts.spansFile, 'utf-8'); - const sessionSpans = JSON.parse(spansContent) as SessionSpan | SessionSpan[]; - agentTraces = { - sessionSpans: Array.isArray(sessionSpans) ? sessionSpans : [sessionSpans], - }; - } else if (opts.traceSource === 'sessions' && opts.sessionIds && opts.sessionIds.length > 0) { - // Session IDs selected — auto-fetch from both log groups and use inline sessionSpans. - // The CloudWatch trace config does not support filtering by multiple session IDs, - // so we fetch spans client-side and send them inline. - opts.onProgress?.('fetching-spans', 'Fetching session spans from CloudWatch...'); - opts.logger?.log( - 'Auto-fetching spans for selected sessions (CloudWatch config does not support session ID filtering)' - ); - - const allSpans = []; - for (const sessionId of opts.sessionIds) { - const result = await fetchSessionSpans({ - region: opts.region, - runtimeId: opts.runtimeId, - sessionId, - lookbackDays: opts.lookbackDays ?? 7, - onProgress: msg => { - opts.logger?.log(msg); - opts.onProgress?.('fetching-spans', msg); - }, - }); - allSpans.push(...result.spans); - } - - if (allSpans.length === 0) { - throw new Error( - 'No spans found for the specified session(s). Ensure the agent has been invoked and traces have propagated to CloudWatch (may take 5-10 minutes).' - ); - } - - opts.logger?.log(`Total spans fetched: ${allSpans.length}`); - opts.onProgress?.('fetching-spans', `Fetched ${allSpans.length} spans`); - agentTraces = { sessionSpans: allSpans }; - } else { - // Lookback-based path — use cloudwatchLogs with time range - const runtimeLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:${runtimeLogGroup(opts.runtimeId)}`; - const spansLogGroupArn = `${arnPrefix(opts.region)}:logs:${opts.region}:${opts.accountId}:log-group:aws/spans`; - - // Derive service name: strip the random hash suffix from runtimeId - // runtimeId format: {project}_{agent}-{hash} → serviceName: {project}_{agent}.DEFAULT - const serviceName = opts.runtimeId.replace(/-[^-]+$/, '.DEFAULT'); - - const lookbackDays = opts.lookbackDays ?? 7; - agentTraces = { - cloudwatchLogs: { - logGroupArns: [runtimeLogGroupArn, spansLogGroupArn], - serviceNames: [serviceName], - startTime: new Date(Date.now() - lookbackDays * 24 * 60 * 60 * 1000).toISOString(), - endTime: new Date().toISOString(), - }, - }; - } - - const evaluationConfig: import('../../aws/agentcore-recommendation').RecommendationEvaluationConfig = { - evaluators: [{ evaluatorArn: opts.evaluatorIds[0]! }], - }; - - // Validate required fields for config-bundle source (API requires all three) - if (opts.inputSource === 'config-bundle' && opts.bundleArn && !opts.bundleVersion) { - throw new Error('Config bundle version is required. Provide --bundle-version or deploy the bundle first.'); - } - - if (opts.inputSource === 'config-bundle' && opts.bundleArn) { - if (opts.type === 'SYSTEM_PROMPT_RECOMMENDATION' && !opts.systemPromptJsonPath) { - throw new Error( - 'Config bundle requires --system-prompt-json-path to locate the system prompt field.\n' + - "Use the field name (e.g. --system-prompt-json-path 'systemPrompt') and it will be resolved from agentcore.json.\n" + - "Or provide the full JSONPath (e.g. '$.ARN.configuration.systemPrompt')." - ); - } - if (opts.type === 'TOOL_DESCRIPTION_RECOMMENDATION' && !opts.toolDescJsonPaths?.length) { - throw new Error( - 'Config bundle requires --tool-desc-json-path to locate tool description fields.\n' + - "Example: --tool-desc-json-path 'toolName:$.ARN.configuration.toolDescription'" - ); - } - } - - if (opts.type === 'SYSTEM_PROMPT_RECOMMENDATION') { - return { - systemPromptRecommendationConfig: { - systemPrompt: - opts.inputSource === 'config-bundle' && opts.bundleArn - ? { - configurationBundle: { - bundleArn: opts.bundleArn, - versionId: opts.bundleVersion!, - systemPromptJsonPath: opts.systemPromptJsonPath, - }, - } - : { text: opts.inlineContent ?? '' }, - agentTraces, - evaluationConfig, - }, - }; - } - - // TOOL_DESCRIPTION_RECOMMENDATION - if (opts.inputSource === 'config-bundle' && opts.bundleArn && opts.toolDescJsonPaths?.length) { - // Config bundle source — pass bundle reference with JSON paths for server-side resolution - return { - toolDescriptionRecommendationConfig: { - toolDescription: { - configurationBundle: { - bundleArn: opts.bundleArn, - versionId: opts.bundleVersion!, - tools: opts.toolDescJsonPaths, - }, - }, - agentTraces, - }, - }; - } - - // Inline/file source — parse "toolName:description" pairs from tools array - const toolEntries = (opts.tools ?? []).map(t => { - const colonIdx = t.indexOf(':'); - if (colonIdx > 0) { - return { toolName: t.slice(0, colonIdx), toolDescription: { text: t.slice(colonIdx + 1) } }; - } - return { toolName: t, toolDescription: { text: opts.inlineContent ?? '' } }; - }); - - return { - toolDescriptionRecommendationConfig: { - toolDescription: { - toolDescriptionText: { - tools: toolEntries, - }, - }, - agentTraces, - }, - }; -} - -/** - * Extract error details from a FAILED recommendation response. - * The API populates errorCode/errorMessage in the result, and statusReasons at top level. - */ -function extractFailureDetails(pollResult: { - statusReasons?: string[]; - recommendationResult?: RecommendationResult; -}): string | undefined { - const parts: string[] = []; - - if (pollResult.statusReasons?.length) { - parts.push(pollResult.statusReasons.join('; ')); - } - - const result = pollResult.recommendationResult; - if (result) { - const errorSource = result.systemPromptRecommendationResult ?? result.toolDescriptionRecommendationResult; - if (errorSource) { - if (errorSource.errorCode) parts.push(`[${errorSource.errorCode}]`); - if (errorSource.errorMessage) parts.push(errorSource.errorMessage); - } - } - - return parts.length > 0 ? parts.join(' ') : undefined; -} - -/** - * Resolve a component key (which may be a placeholder like {{runtime:name}}) - * to its real ARN from deployed state. Returns the key unchanged if not a placeholder. - */ -function resolveComponentKeyForJsonPath(key: string, deployedState: DeployedState): string { - if (key.startsWith('arn:')) return key; - - const rtMatch = /^\{\{runtime:(.+)\}\}$/.exec(key); - if (rtMatch) { - const rtName = rtMatch[1]!; - for (const target of Object.values(deployedState.targets)) { - const rt = target.resources?.runtimes?.[rtName]; - if (rt) return rt.runtimeArn; - } - } - - const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(key); - if (gwMatch) { - const gwName = gwMatch[1]!; - for (const target of Object.values(deployedState.targets)) { - const httpGw = target.resources?.httpGateways?.[gwName]; - if (httpGw) return httpGw.gatewayArn; - const mcpGw = target.resources?.mcp?.gateways?.[gwName]; - if (mcpGw) return mcpGw.gatewayArn; - } - } - - return key; -} - -function sleep(ms: number): Promise { - return new Promise(resolve => setTimeout(resolve, ms)); -} diff --git a/src/cli/operations/recommendation/types.ts b/src/cli/operations/recommendation/types.ts deleted file mode 100644 index 487681a2e..000000000 --- a/src/cli/operations/recommendation/types.ts +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Shared types for the recommendation feature. - */ -import type { Result } from '../../../lib/result'; -import type { RecommendationResult, RecommendationType } from '../../aws/agentcore-recommendation'; - -export type { RecommendationType } from '../../aws/agentcore-recommendation'; - -/** CLI-facing input source kind (maps to API config shape). */ -export type RecommendationInputSourceKind = 'config-bundle' | 'inline' | 'file'; - -/** CLI-facing trace source kind (maps to API agentTraces shape). */ -export type TraceSourceKind = 'cloudwatch' | 'sessions' | 'spans-file'; - -export interface RunRecommendationCommandOptions { - /** What to optimize */ - type: RecommendationType; - /** Agent name (from project) */ - agent: string; - /** Evaluator name, Builtin.* ID, or ARN (API accepts exactly one for system-prompt) */ - evaluators: string[]; - /** Input source kind */ - inputSource: RecommendationInputSourceKind; - /** Config bundle name (when inputSource is 'config-bundle') */ - bundleName?: string; - /** Config bundle version (when inputSource is 'config-bundle') */ - bundleVersion?: string; - /** JSONPath to the system prompt field within the config bundle (when inputSource is 'config-bundle') */ - systemPromptJsonPath?: string; - /** Tool name → JSONPath pairs for tool descriptions within the config bundle (when inputSource is 'config-bundle') */ - toolDescJsonPaths?: { toolName: string; toolDescriptionJsonPath: string }[]; - /** Inline content (when inputSource is 'inline') */ - inlineContent?: string; - /** File path (when inputSource is 'file') */ - promptFile?: string; - /** Specific tool names and descriptions (for TOOL_DESCRIPTION_RECOMMENDATION) */ - tools?: string[]; - /** Trace source kind */ - traceSource: TraceSourceKind; - /** Lookback days (when traceSource is 'cloudwatch') */ - lookbackDays?: number; - /** Session IDs (when traceSource is 'sessions') — used to filter CloudWatch traces */ - sessionIds?: string[]; - /** Path to JSON file containing session spans (when traceSource is 'spans-file') */ - spansFile?: string; - /** Region override */ - region?: string; - /** Optional recommendation name */ - recommendationName?: string; - /** Poll interval in ms */ - pollIntervalMs?: number; - /** Max polling duration in ms before timing out */ - maxPollDurationMs?: number; - /** Progress callback */ - onProgress?: (status: string, message: string) => void; - /** Called once the recommendation has been created, with ID and region for cancellation */ - onStarted?: (info: { recommendationId: string; region: string }) => void; -} - -export type RunRecommendationCommandResult = Result<{ - result?: RecommendationResult; - region?: string; - startedAt?: string; - completedAt?: string; -}> & { recommendationId?: string; status?: string; logFilePath?: string }; diff --git a/src/cli/operations/remove/__tests__/remove-agent-ops.test.ts b/src/cli/operations/remove/__tests__/remove-agent-ops.test.ts index ece57dcb9..0d1ef6042 100644 --- a/src/cli/operations/remove/__tests__/remove-agent-ops.test.ts +++ b/src/cli/operations/remove/__tests__/remove-agent-ops.test.ts @@ -47,6 +47,7 @@ const makeProject = (agentNames: string[]) => ({ managedBy: 'CDK' as const, runtimes: agentNames.map(name => ({ name })), memories: [], + knowledgeBases: [], credentials: [], }); diff --git a/src/cli/operations/remove/__tests__/remove-identity-ops.test.ts b/src/cli/operations/remove/__tests__/remove-identity-ops.test.ts index f34cee7f3..570b88cdf 100644 --- a/src/cli/operations/remove/__tests__/remove-identity-ops.test.ts +++ b/src/cli/operations/remove/__tests__/remove-identity-ops.test.ts @@ -45,6 +45,7 @@ const makeProject = ( managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: credNames.map(name => ({ name, authorizerType: 'ApiKeyCredentialProvider' })), agentCoreGateways, }); diff --git a/src/cli/primitives/ABTestPrimitive.ts b/src/cli/primitives/ABTestPrimitive.ts deleted file mode 100644 index 7dc2a7fd4..000000000 --- a/src/cli/primitives/ABTestPrimitive.ts +++ /dev/null @@ -1,732 +0,0 @@ -import { ResourceNotFoundError, findConfigRoot, serializeResult, toError } from '../../lib'; -import type { Result } from '../../lib/result'; -import type { ABTest } from '../../schema/schemas/primitives/ab-test'; -import { ABTestSchema } from '../../schema/schemas/primitives/ab-test'; -import { getErrorMessage } from '../errors'; -import type { RemovalPreview, SchemaChange } from '../operations/remove/types'; -import { withCommandRunTelemetry } from '../telemetry/cli-command-run.js'; -import { requireTTY } from '../tui/guards/tty'; -import { BasePrimitive } from './BasePrimitive'; -import type { AddResult, AddScreenComponent, RemovableResource } from './types'; -import type { Command } from '@commander-js/extra-typings'; - -export type GatewayChoice = { type: 'create-new' } | { type: 'existing-http'; name: string }; - -export interface AddABTestOptions { - name: string; - description?: string; - agent: string; - gatewayChoice?: GatewayChoice; - roleArn?: string; - controlBundle: string; - controlVersion: string; - treatmentBundle: string; - treatmentVersion: string; - controlWeight: number; - treatmentWeight: number; - onlineEval: string; - trafficHeaderName?: string; - maxDurationDays?: number; - enableOnCreate?: boolean; -} - -export interface AddTargetBasedABTestOptions { - name: string; - description?: string; - gateway: string; - runtime: string; - roleArn?: string; - controlEndpoint: string; - treatmentEndpoint: string; - controlWeight: number; - treatmentWeight: number; - controlOnlineEval: string; - treatmentOnlineEval: string; - gatewayFilter?: string; - enableOnCreate?: boolean; -} - -export type RemovableABTest = RemovableResource; - -/** - * ABTestPrimitive handles all A/B test add/remove operations. - * - * A/B tests split traffic between two config bundle versions (control vs - * treatment) through a gateway, with online evaluation tracking performance. - * They are created via direct API calls (not CloudFormation) and stored in - * agentcore.json for lifecycle management. - */ -export class ABTestPrimitive extends BasePrimitive { - readonly kind = 'ab-test' as const; - readonly label = 'AB Test'; - override readonly article = 'an'; - readonly primitiveSchema = ABTestSchema; - - async add(options: AddABTestOptions): Promise> { - try { - const abTest = await this.createABTest(options); - return { success: true, abTestName: abTest.name }; - } catch (err) { - return { success: false, error: toError(err) }; - } - } - - async remove(testName: string, options?: { deleteGateway?: boolean }): Promise { - try { - const project = await this.readProjectSpec(); - - const index = (project.abTests ?? []).findIndex(t => t.name === testName); - if (index === -1) { - return { success: false, error: new ResourceNotFoundError(`AB test "${testName}" not found.`) }; - } - - const removedTest = project.abTests[index]!; - project.abTests.splice(index, 1); - - // Cascade: remove auto-created online eval configs for target-based tests - // Only remove eval configs that were auto-created (matching the {testName}_eval_ prefix pattern) - if (removedTest.mode === 'target-based' && 'perVariantOnlineEvaluationConfig' in removedTest.evaluationConfig) { - const autoCreatedPrefix = `${testName}_eval_`; - const evalNames = removedTest.evaluationConfig.perVariantOnlineEvaluationConfig - .map(pv => pv.onlineEvaluationConfigArn) - .filter(name => name.startsWith(autoCreatedPrefix)); - project.onlineEvalConfigs = project.onlineEvalConfigs.filter(c => !evalNames.includes(c.name)); - } - - // --delete-gateway: cascade remove gateway targets and orphaned gateways - if (options?.deleteGateway && removedTest.gatewayRef) { - const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(removedTest.gatewayRef); - if (gwMatch) { - const gwName = gwMatch[1]!; - - // Remove gateway targets that were created for this AB test's variants - if (removedTest.mode === 'target-based') { - const targetNames = removedTest.variants - .map(v => v.variantConfiguration.target?.targetName) - .filter((n): n is string => !!n); - const gw = (project.httpGateways ?? []).find(g => g.name === gwName); - if (gw?.targets) { - gw.targets = gw.targets.filter(t => !targetNames.includes(t.name)); - } - } - - // Remove gateway if no other AB tests reference it - const stillReferenced = (project.abTests ?? []).some(t => { - const m = /^\{\{gateway:(.+)\}\}$/.exec(t.gatewayRef); - return m?.[1] === gwName; - }); - if (!stillReferenced) { - project.httpGateways = (project.httpGateways ?? []).filter(gw => gw.name !== gwName); - } - } - } - - await this.writeProjectSpec(project); - - return { success: true }; - } catch (err) { - return { success: false, error: toError(err) }; - } - } - - async previewRemove(testName: string): Promise { - const project = await this.readProjectSpec(); - - const abTest = (project.abTests ?? []).find(t => t.name === testName); - if (!abTest) { - throw new Error(`AB test "${testName}" not found.`); - } - - const summary: string[] = [`Removing AB test: ${testName}`]; - const schemaChanges: SchemaChange[] = []; - - const testIndex = (project.abTests ?? []).findIndex(t => t.name === testName); - const afterSpec = { - ...project, - abTests: (project.abTests ?? []).filter(t => t.name !== testName), - httpGateways: [...(project.httpGateways ?? [])], - }; - - // Check if the gateway would be orphaned - const test = (project.abTests ?? [])[testIndex]; - if (test?.gatewayRef) { - const gwMatch = /^\{\{gateway:(.+)\}\}$/.exec(test.gatewayRef); - if (gwMatch) { - const gwName = gwMatch[1]; - const otherTests = (project.abTests ?? []).filter((_, i) => i !== testIndex); - const stillReferenced = otherTests.some(t => { - const m = /^\{\{gateway:(.+)\}\}$/.exec(t.gatewayRef); - return m && m[1] === gwName; - }); - if (!stillReferenced) { - summary.push(`Also removing HTTP gateway: ${gwName} (no other AB tests reference it)`); - afterSpec.httpGateways = (project.httpGateways ?? []).filter(gw => gw.name !== gwName); - } - } - } - - schemaChanges.push({ - file: 'agentcore/agentcore.json', - before: project, - after: afterSpec, - }); - - return { summary, directoriesToDelete: [], schemaChanges }; - } - - async getRemovable(): Promise { - try { - const project = await this.readProjectSpec(); - return (project.abTests ?? []).map(t => ({ name: t.name })); - } catch { - return []; - } - } - - async getAllNames(): Promise { - try { - const project = await this.readProjectSpec(); - return (project.abTests ?? []).map(t => t.name); - } catch { - return []; - } - } - - registerCommands(addCmd: Command, removeCmd: Command): void { - const abTestCmd = addCmd - .command('ab-test') - .description('[preview] Add an A/B test to the project') - .option('--mode ', 'config-bundle (default) or target-based') - .option('--name ', 'AB test name') - .option('--description ', 'AB test description') - .option('--runtime ', 'Runtime agent to A/B test') - .option('--role-arn ', 'IAM role ARN (auto-created if not provided)') - .option('--control-bundle ', 'Control config bundle name or ARN') - .option('--control-version ', 'Control config bundle version') - .option('--treatment-bundle ', 'Treatment config bundle name or ARN') - .option('--treatment-version ', 'Treatment config bundle version') - .option('--control-endpoint ', 'Endpoint qualifier for control') - .option('--treatment-endpoint ', 'Endpoint qualifier for treatment') - .option('--control-weight ', 'Traffic weight for control (1-100)', parseInt) - .option('--treatment-weight ', 'Traffic weight for treatment (1-100)', parseInt) - .option('--gateway ', 'HTTP gateway name') - .option('--online-eval ', 'Online evaluation config name or ARN') - .option('--control-online-eval ', 'Eval config name or ARN for control') - .option('--treatment-online-eval ', 'Eval config name or ARN for treatment') - .option('--gateway-filter ', 'Path pattern for routing') - .option('--traffic-header ', 'Header name for traffic routing') - // Hidden deprecated aliases for backwards compatibility - .option('--control-qualifier ', '') - .option('--treatment-qualifier ', '') - // TODO(post-preview): Re-enable --max-duration once configurable duration is launched. - // .option('--max-duration ', 'Maximum duration in days (1-90)', parseInt) - .option('--enable', 'Enable the AB test on creation') - .option('--json', 'Output as JSON'); - - // Hide mode-specific and deprecated flags from the default options list. - // They are shown in the grouped help text below instead. - const hiddenFromDefaultHelp = new Set([ - '--runtime', - '--control-bundle', - '--control-version', - '--treatment-bundle', - '--treatment-version', - '--online-eval', - '--traffic-header', - '--control-endpoint', - '--treatment-endpoint', - '--control-online-eval', - '--treatment-online-eval', - '--gateway-filter', - '--control-qualifier', - '--treatment-qualifier', - ]); - for (const opt of abTestCmd.options) { - if (hiddenFromDefaultHelp.has(opt.long ?? '')) { - opt.hidden = true; - } - } - - // Add grouped help text after the default options section - abTestCmd.addHelpText( - 'after', - ` -Config-Bundle Mode (--mode config-bundle) -- default - Split traffic between two config bundle versions. - --runtime Runtime agent to A/B test - --control-bundle Control config bundle name or ARN - --control-version Control config bundle version - --treatment-bundle Treatment config bundle name or ARN - --treatment-version Treatment config bundle version - --online-eval Online evaluation config name or ARN - --traffic-header Header name for traffic routing - -Target-Based Mode (--mode target-based) - Route traffic to different runtime endpoints. - --control-endpoint Endpoint for control target - --treatment-endpoint Endpoint for treatment target - --control-online-eval Eval config name or ARN for control - --treatment-online-eval Eval config name or ARN for treatment - --gateway-filter Path pattern for routing -` - ); - - abTestCmd.action( - async (cliOptions: { - mode?: string; - name?: string; - description?: string; - runtime?: string; - gateway?: string; - roleArn?: string; - controlBundle?: string; - controlVersion?: string; - treatmentBundle?: string; - treatmentVersion?: string; - controlEndpoint?: string; - controlQualifier?: string; // deprecated alias for --control-endpoint - treatmentEndpoint?: string; - treatmentQualifier?: string; // deprecated alias for --treatment-endpoint - controlWeight?: number; - treatmentWeight?: number; - onlineEval?: string; - controlOnlineEval?: string; - treatmentOnlineEval?: string; - gatewayFilter?: string; - trafficHeader?: string; - maxDuration?: number; - enable?: boolean; - json?: boolean; - }) => { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } - - // Resolve deprecated aliases (--control-qualifier -> --control-endpoint, etc.) - const resolvedControlEndpoint = cliOptions.controlEndpoint ?? cliOptions.controlQualifier; - const resolvedTreatmentEndpoint = cliOptions.treatmentEndpoint ?? cliOptions.treatmentQualifier; - - if (cliOptions.name || cliOptions.json) { - const fail = (error: string) => { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error })); - } else { - console.error(error); - } - process.exit(1); - }; - - const mode = cliOptions.mode ?? 'config-bundle'; - if (mode !== 'config-bundle' && mode !== 'target-based') { - fail(`Invalid --mode "${mode}". Must be one of: config-bundle, target-based`); - } - - if (!cliOptions.name) fail('--name is required'); - - // Target-based mode - if (mode === 'target-based') { - // Cross-validation: reject config-bundle flags - if (cliOptions.controlBundle) fail('--control-bundle cannot be used with --mode target-based'); - if (cliOptions.treatmentBundle) fail('--treatment-bundle cannot be used with --mode target-based'); - if (cliOptions.controlVersion) fail('--control-version cannot be used with --mode target-based'); - if (cliOptions.treatmentVersion) fail('--treatment-version cannot be used with --mode target-based'); - if (cliOptions.onlineEval) fail('--online-eval cannot be used with --mode target-based'); - - // Required flags - if (!cliOptions.gateway) fail('--gateway is required for target-based mode'); - if (!cliOptions.runtime) fail('--runtime is required for target-based mode'); - if (!resolvedControlEndpoint) fail('--control-endpoint is required for target-based mode'); - if (!resolvedTreatmentEndpoint) fail('--treatment-endpoint is required for target-based mode'); - if (cliOptions.controlWeight === undefined) fail('--control-weight is required'); - if (cliOptions.treatmentWeight === undefined) fail('--treatment-weight is required'); - - // Eval: require both online eval config names - if (!cliOptions.controlOnlineEval || !cliOptions.treatmentOnlineEval) { - fail( - '--control-online-eval and --treatment-online-eval are required. Create eval configs first with: agentcore add online-eval --endpoint ' - ); - } - - const result = await this.addTargetBased({ - name: cliOptions.name!, - description: cliOptions.description, - gateway: cliOptions.gateway!, - runtime: cliOptions.runtime!, - roleArn: cliOptions.roleArn, - controlEndpoint: resolvedControlEndpoint!, - treatmentEndpoint: resolvedTreatmentEndpoint!, - controlWeight: cliOptions.controlWeight!, - treatmentWeight: cliOptions.treatmentWeight!, - controlOnlineEval: cliOptions.controlOnlineEval!, - treatmentOnlineEval: cliOptions.treatmentOnlineEval!, - gatewayFilter: cliOptions.gatewayFilter, - enableOnCreate: cliOptions.enable, - }); - - if (cliOptions.json) { - console.log(JSON.stringify(serializeResult(result))); - } else if (result.success) { - console.log(`Added target-based AB test '${result.abTestName}'`); - } else { - console.error(result.error.message); - } - process.exit(result.success ? 0 : 1); - return; - } - - // Config-bundle mode (default) - // Cross-validation: reject target-based flags - if (cliOptions.gatewayFilter) fail('--gateway-filter requires --mode target-based'); - if (cliOptions.controlOnlineEval) fail('--control-online-eval requires --mode target-based'); - if (cliOptions.treatmentOnlineEval) fail('--treatment-online-eval requires --mode target-based'); - - if (!cliOptions.gateway && !cliOptions.runtime) - fail('--runtime is required (unless --gateway is provided)'); - if (!cliOptions.controlBundle) fail('--control-bundle is required'); - if (!cliOptions.controlVersion) fail('--control-version is required'); - if (!cliOptions.treatmentBundle) fail('--treatment-bundle is required'); - if (!cliOptions.treatmentVersion) fail('--treatment-version is required'); - if (cliOptions.controlWeight === undefined) fail('--control-weight is required'); - if (cliOptions.treatmentWeight === undefined) fail('--treatment-weight is required'); - if (!cliOptions.onlineEval) fail('--online-eval is required'); - - const result = await this.add({ - name: cliOptions.name!, - description: cliOptions.description, - agent: cliOptions.runtime ?? '', - gatewayChoice: cliOptions.gateway - ? { type: 'existing-http', name: cliOptions.gateway } - : { type: 'create-new' }, - roleArn: cliOptions.roleArn!, - controlBundle: cliOptions.controlBundle!, - controlVersion: cliOptions.controlVersion!, - treatmentBundle: cliOptions.treatmentBundle!, - treatmentVersion: cliOptions.treatmentVersion!, - controlWeight: cliOptions.controlWeight!, - treatmentWeight: cliOptions.treatmentWeight!, - onlineEval: cliOptions.onlineEval!, - trafficHeaderName: cliOptions.trafficHeader, - maxDurationDays: cliOptions.maxDuration, - enableOnCreate: cliOptions.enable, - }); - - if (cliOptions.json) { - console.log(JSON.stringify(serializeResult(result))); - } else if (result.success) { - console.log(`Added AB test '${result.abTestName}'`); - } else { - console.error(result.error.message); - } - process.exit(result.success ? 0 : 1); - } else { - // TUI fallback - const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ - import('ink'), - import('react'), - import('../tui/screens/add/AddFlow'), - ]); - const { clear, unmount } = render( - React.createElement(AddFlow, { - isInteractive: false, - initialResource: 'ab-test', - onExit: () => { - clear(); - unmount(); - process.exit(0); - }, - }) - ); - } - } catch (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); - } else { - console.error(getErrorMessage(error)); - } - process.exit(1); - } - } - ); - - removeCmd - .command(this.kind) - .description(`Remove ${this.article} ${this.label.toLowerCase()} from the project`) - .option('--name ', 'Name of resource to remove [non-interactive]') - .option('-y, --yes', 'Skip confirmation prompt [non-interactive]') - .option('--json', 'Output as JSON [non-interactive]') - .option('--delete-gateway', 'Also remove gateway targets and orphaned gateways (default: false)') - .action(async (cliOptions: { name?: string; yes?: boolean; json?: boolean; deleteGateway?: boolean }) => { - try { - if (!findConfigRoot()) { - console.error('No agentcore project found. Run `agentcore create` first.'); - process.exit(1); - } - - if (cliOptions.name || cliOptions.yes || cliOptions.json) { - if (!cliOptions.name) { - console.log(JSON.stringify({ success: false, error: '--name is required' })); - process.exit(1); - } - - const result = await withCommandRunTelemetry('remove.ab-test', {}, () => - this.remove(cliOptions.name!, { deleteGateway: cliOptions.deleteGateway }) - ); - console.log( - JSON.stringify({ - success: result.success, - resourceType: this.kind, - resourceName: cliOptions.name, - message: result.success ? `Removed ${this.label.toLowerCase()} '${cliOptions.name}'` : undefined, - error: !result.success ? result.error.message : undefined, - }) - ); - process.exit(result.success ? 0 : 1); - } else { - // TUI fallback - requireTTY(); - const [{ render }, { default: React }, { RemoveFlow }] = await Promise.all([ - import('ink'), - import('react'), - import('../tui/screens/remove'), - ]); - const { clear, unmount } = render( - React.createElement(RemoveFlow, { - isInteractive: false, - force: cliOptions.yes, - initialResourceType: this.kind, - initialResourceName: cliOptions.name, - onExit: () => { - clear(); - unmount(); - process.exit(0); - }, - }) - ); - } - } catch (error) { - if (cliOptions.json) { - console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); - } else { - console.error(`Error: ${getErrorMessage(error)}`); - } - process.exit(1); - } - }); - } - - addScreen(): AddScreenComponent { - return null; - } - - private async createABTest(options: AddABTestOptions): Promise { - const project = await this.readProjectSpec(); - - this.checkDuplicate(project.abTests ?? [], options.name); - - // Resolve gateway reference based on the user's choice - let gatewayRef: string; - const choice = options.gatewayChoice ?? { type: 'create-new' }; - - if (choice.type === 'existing-http') { - // Reuse an existing HTTP gateway from the project spec - const existing = (project.httpGateways ?? []).find(gw => gw.name === choice.name); - if (!existing) { - throw new Error(`HTTP gateway "${choice.name}" not found in project.`); - } - gatewayRef = `{{gateway:${choice.name}}}`; - } else { - // Create new HTTP gateway — truncate name to fit 48-char limit - const httpGwName = `${options.name.replace(/_/g, '-').slice(0, 44)}-gw`; - const existingGw = (project.httpGateways ?? []).find(gw => gw.name === httpGwName); - if (existingGw) { - if (existingGw.runtimeRef !== options.agent) { - throw new Error( - `HTTP gateway "${httpGwName}" already exists with a different runtime (${existingGw.runtimeRef}). ` + - `Choose a different AB test name to avoid a gateway name collision.` - ); - } - } else { - project.httpGateways ??= []; - project.httpGateways.push({ - name: httpGwName, - runtimeRef: options.agent, - }); - } - gatewayRef = `{{gateway:${httpGwName}}}`; - } - - const abTest: ABTest = { - name: options.name, - mode: 'config-bundle', - ...(options.description && { description: options.description }), - gatewayRef, - ...(options.roleArn && { roleArn: options.roleArn }), - variants: [ - { - name: 'C', - weight: options.controlWeight, - variantConfiguration: { - configurationBundle: { - bundleArn: options.controlBundle, - bundleVersion: options.controlVersion, - }, - }, - }, - { - name: 'T1', - weight: options.treatmentWeight, - variantConfiguration: { - configurationBundle: { - bundleArn: options.treatmentBundle, - bundleVersion: options.treatmentVersion, - }, - }, - }, - ], - evaluationConfig: { - onlineEvaluationConfigArn: options.onlineEval, - }, - ...(options.trafficHeaderName && { - trafficAllocationConfig: { routeOnHeader: { headerName: options.trafficHeaderName } }, - }), - ...(options.maxDurationDays !== undefined && { maxDurationDays: options.maxDurationDays }), - ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }), - }; - - project.abTests ??= []; - project.abTests.push(abTest); - await this.writeProjectSpec(project); - - return abTest; - } - - async addTargetBased(options: AddTargetBasedABTestOptions): Promise> { - try { - const abTest = await this.createTargetBasedABTest(options); - return { success: true, abTestName: abTest.name }; - } catch (err) { - return { success: false, error: toError(err) }; - } - } - - private async createTargetBasedABTest(options: AddTargetBasedABTestOptions): Promise { - const project = await this.readProjectSpec(); - - this.checkDuplicate(project.abTests ?? [], options.name); - - // Validate runtime exists - const runtime = project.runtimes.find(r => r.name === options.runtime); - if (!runtime) { - throw new Error(`Runtime "${options.runtime}" not found in project.`); - } - - // Validate endpoints exist on the runtime - if (!runtime.endpoints?.[options.controlEndpoint]) { - throw new Error( - `Endpoint "${options.controlEndpoint}" not found on runtime "${options.runtime}". Add it with: agentcore add runtime-endpoint` - ); - } - if (!runtime.endpoints?.[options.treatmentEndpoint]) { - throw new Error( - `Endpoint "${options.treatmentEndpoint}" not found on runtime "${options.runtime}". Add it with: agentcore add runtime-endpoint` - ); - } - - // Auto-generate target names from runtime + qualifier - const controlTarget = `${options.runtime}-${options.controlEndpoint}`; - const treatmentTarget = `${options.runtime}-${options.treatmentEndpoint}`; - - // Auto-create HTTP gateway if it doesn't exist - let existing = (project.httpGateways ?? []).find(gw => gw.name === options.gateway); - if (!existing) { - existing = { - name: options.gateway, - description: `HTTP gateway for AB test ${options.name}`, - runtimeRef: options.runtime, - targets: [ - { name: controlTarget, runtimeRef: options.runtime, qualifier: options.controlEndpoint }, - { name: treatmentTarget, runtimeRef: options.runtime, qualifier: options.treatmentEndpoint }, - ], - }; - project.httpGateways ??= []; - project.httpGateways.push(existing); - } else { - // Gateway exists — ensure targets exist - existing.targets ??= []; - if (!existing.targets.find(t => t.name === controlTarget)) { - existing.targets.push({ - name: controlTarget, - runtimeRef: options.runtime, - qualifier: options.controlEndpoint, - }); - } - if (!existing.targets.find(t => t.name === treatmentTarget)) { - existing.targets.push({ - name: treatmentTarget, - runtimeRef: options.runtime, - qualifier: options.treatmentEndpoint, - }); - } - } - const gatewayRef = `{{gateway:${options.gateway}}}`; - - // Look up online eval configs by name - const controlEvalConfig = project.onlineEvalConfigs.find(c => c.name === options.controlOnlineEval); - if (!controlEvalConfig) { - throw new Error( - `Online eval config '${options.controlOnlineEval}' not found. Create it first with: agentcore add online-eval` - ); - } - const treatmentEvalConfig = project.onlineEvalConfigs.find(c => c.name === options.treatmentOnlineEval); - if (!treatmentEvalConfig) { - throw new Error( - `Online eval config '${options.treatmentOnlineEval}' not found. Create it first with: agentcore add online-eval` - ); - } - - // Store eval names — post-deploy resolveOnlineEvalArn will resolve names to ARNs - const evaluationConfig: ABTest['evaluationConfig'] = { - perVariantOnlineEvaluationConfig: [ - { treatmentName: 'C' as const, onlineEvaluationConfigArn: options.controlOnlineEval }, - { treatmentName: 'T1' as const, onlineEvaluationConfigArn: options.treatmentOnlineEval }, - ], - }; - - const abTest: ABTest = { - name: options.name, - mode: 'target-based', - ...(options.description && { description: options.description }), - gatewayRef, - ...(options.roleArn && { roleArn: options.roleArn }), - variants: [ - { - name: 'C' as const, - weight: options.controlWeight, - variantConfiguration: { - target: { targetName: controlTarget }, - }, - }, - { - name: 'T1' as const, - weight: options.treatmentWeight, - variantConfiguration: { - target: { targetName: treatmentTarget }, - }, - }, - ], - evaluationConfig, - ...(options.gatewayFilter && { - gatewayFilter: { targetPaths: [options.gatewayFilter] }, - }), - ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }), - }; - - project.abTests ??= []; - project.abTests.push(abTest); - await this.writeProjectSpec(project); - - return abTest; - } -} diff --git a/src/cli/primitives/AgentPrimitive.tsx b/src/cli/primitives/AgentPrimitive.tsx index cc0fea66a..80ffa0fbf 100644 --- a/src/cli/primitives/AgentPrimitive.tsx +++ b/src/cli/primitives/AgentPrimitive.tsx @@ -322,10 +322,7 @@ export class AgentPrimitive extends BasePrimitive [...prev, val], [] as string[] ) - .option( - '--with-config-bundle', - 'Create a config bundle wired into the agent template [preview] [non-interactive]' - ) + .option('--with-config-bundle', 'Create a config bundle wired into the agent template [non-interactive]') .option('--json', 'Output as JSON [non-interactive]') .action(async options => { if (!findConfigRoot()) { diff --git a/src/cli/primitives/ConfigBundlePrimitive.ts b/src/cli/primitives/ConfigBundlePrimitive.ts index 92c798d8f..64e889447 100644 --- a/src/cli/primitives/ConfigBundlePrimitive.ts +++ b/src/cli/primitives/ConfigBundlePrimitive.ts @@ -3,9 +3,11 @@ import type { Result } from '../../lib/result'; import type { ConfigBundle } from '../../schema'; import { ConfigBundleSchema } from '../../schema'; import { getErrorMessage } from '../errors'; +import { isGatedFeaturesEnabled } from '../feature-flags'; import type { RemovalPreview, SchemaChange } from '../operations/remove/types'; import { BasePrimitive } from './BasePrimitive'; import type { AddResult, AddScreenComponent, RemovableResource } from './types'; +import { Option } from '@commander-js/extra-typings'; import type { Command } from '@commander-js/extra-typings'; import { readFileSync } from 'fs'; @@ -106,7 +108,7 @@ export class ConfigBundlePrimitive extends BasePrimitive', 'Bundle name') .option('--description ', 'Bundle description') .option( @@ -114,7 +116,12 @@ export class ConfigBundlePrimitive extends BasePrimitive}}, {{gateway:}}. Placeholders resolve to real ARNs at deploy time.' ) .option('--components-file ', 'Path to components JSON file (same format as --components)') - .option('--branch ', 'Branch name for versioning') + // Gated: custom branches blocked by upstream CFN read-back bug. Remove gate when service fixes GetConfigurationBundle. + .addOption( + isGatedFeaturesEnabled() + ? new Option('--branch ', 'Branch name for versioning') + : new Option('--branch ', 'Branch name for versioning').hideHelp() + ) .option('--commit-message ', 'Commit message for this version') .option('--json', 'Output as JSON') .action( @@ -166,7 +173,7 @@ export class ConfigBundlePrimitive extends BasePrimitive c.evaluators.includes(evaluatorName)); + const referencingConfigs = project.onlineEvalConfigs.filter(c => c.evaluators?.includes(evaluatorName)); if (referencingConfigs.length > 0) { const configNames = referencingConfigs.map(c => c.name).join(', '); return { @@ -116,7 +116,7 @@ export class EvaluatorPrimitive extends BasePrimitive c.evaluators.includes(evaluatorName)); + const referencingConfigs = project.onlineEvalConfigs.filter(c => c.evaluators?.includes(evaluatorName)); if (referencingConfigs.length > 0) { summary.push( `Blocked: Referenced by online eval config(s): ${referencingConfigs.map(c => c.name).join(', ')}. Remove those references first.` diff --git a/src/cli/primitives/GatewayPrimitive.ts b/src/cli/primitives/GatewayPrimitive.ts index d2b93c11c..a17911890 100644 --- a/src/cli/primitives/GatewayPrimitive.ts +++ b/src/cli/primitives/GatewayPrimitive.ts @@ -29,6 +29,7 @@ import type { Command } from '@commander-js/extra-typings'; export interface AddGatewayOptions { name: string; description?: string; + protocolType?: 'MCP' | 'None'; authorizerType: GatewayAuthorizerType; discoveryUrl?: string; allowedAudience?: string; @@ -138,6 +139,52 @@ export class GatewayPrimitive extends BasePrimitive { + try { + const project = await this.readProjectSpec(); + return project.agentCoreGateways.filter(g => g.protocolType !== 'None').map(g => g.name); + } catch { + return []; + } + } + + /** + * Get runtime names from the project spec. + * All runtimes in spec.runtimes are CDK-managed (deployed by the generated CDK stack), + * so no filtering is needed here. + */ + async getRuntimeNames(): Promise { + try { + const project = await this.readProjectSpec(); + return project.runtimes.map(r => r.name); + } catch { + return []; + } + } + + /** + * Get endpoints for a specific runtime from the project spec. + * Returns an array of { name, version } entries from the runtime's endpoints dictionary. + */ + async getRuntimeEndpoints(runtimeName: string): Promise<{ name: string; version: number }[]> { + try { + const project = await this.readProjectSpec(); + const runtime = project.runtimes.find(r => r.name === runtimeName); + if (!runtime?.endpoints) { + return []; + } + return Object.entries(runtime.endpoints).map(([name, ep]) => ({ + name, + version: ep.version, + })); + } catch { + return []; + } + } + /** * Get list of unassigned targets from agentcore.json. */ @@ -164,6 +211,7 @@ export class GatewayPrimitive extends BasePrimitive', 'Gateway name [non-interactive]') .option('--description ', 'Gateway description [non-interactive]') + .option('--protocol-type ', 'Protocol type: MCP or None (default: None) [non-interactive]') .option('--runtimes ', 'Comma-separated runtime names to expose through this gateway [non-interactive]') .option('--authorizer-type ', 'Authorizer type: NONE, AWS_IAM, or CUSTOM_JWT [non-interactive]') .option('--discovery-url ', 'OIDC discovery URL (for CUSTOM_JWT) [non-interactive]') @@ -202,6 +250,7 @@ export class GatewayPrimitive extends BasePrimitive s.trim()) - .filter(Boolean).length - : 0; return { authorizer_type: standardize(AuthorizerType, cliOptions.authorizerType ?? 'NONE'), has_policy_engine: !!cliOptions.policyEngine, policy_engine_mode: standardize(PolicyEngineMode, cliOptions.policyEngineMode ?? 'log_only'), semantic_search: cliOptions.semanticSearch !== false, - runtime_count: runtimeCount, + runtime_count: cliOptions.runtimes + ? cliOptions.runtimes + .split(',') + .map(s => s.trim()) + .filter(Boolean).length + : 0, }; }); }); @@ -317,6 +365,7 @@ export class GatewayPrimitive extends BasePrimitive(option: T): T => (isGatedFeaturesEnabled() ? option : option.hideHelp()); + + const typeDescription = isGatedFeaturesEnabled() + ? 'Target type (required): mcp-server, api-gateway, open-api-schema, smithy-model, lambda-function-arn, http-runtime, connector, passthrough, web-search [non-interactive]' + : 'Target type (required): mcp-server, api-gateway, open-api-schema, smithy-model, lambda-function-arn, http-runtime, connector [non-interactive]'; + + // Reject repeated use of --exclude-domains. Domains must be passed as a + // single comma-separated value. + const excludeDomainsCoercer = (val: string, prev?: string) => { + if (prev !== undefined) { + throw new ValidationError( + '--exclude-domains may only be specified once. Pass all domains as a single comma-separated value.' + ); + } + return val; + }; + addCmd .command('gateway-target') - .description('Add a target (API, MCP server, Lambda) to a gateway for tool routing') + .description('Add a target to a gateway for routing requests to backends') .option('--name ', 'Target name [non-interactive]') .option('--description ', 'Target description [non-interactive]') .option('--gateway ', 'Gateway to attach this target to [non-interactive]') + .option('--type ', typeDescription) .option( - '--type ', - 'Target type (required): mcp-server, api-gateway, open-api-schema, smithy-model, lambda-function-arn [non-interactive]' + '--connector ', + 'Connector id (for connector type): bedrock-knowledge-bases or bedrock-agentic-retrieve [non-interactive]' ) - .option('--endpoint ', 'Server endpoint URL (for mcp-server type) [non-interactive]') + .option( + '--knowledge-base-id ', + 'KB reference for connector type — either a project KB name (entry in knowledgeBases[]) or a 10-char Bedrock KB id for an external KB. Repeatable for --connector bedrock-agentic-retrieve to fan out across multiple KBs. [non-interactive]', + (val: string, acc: string[]) => [...acc, val], + [] as string[] + ) + .addOption( + gate( + new Option( + '--exclude-domains ', + 'Comma-separated domains to exclude from results (for --type web-search only) [non-interactive]' + ).argParser(excludeDomainsCoercer) + ) + ) + .option('--endpoint ', 'Server endpoint URL (for mcp-server type) [non-interactive]') .option('--language ', 'Language of target code: Python, TypeScript, Other [non-interactive]') .option('--host ', 'Where to run the target: Lambda or AgentCoreRuntime [non-interactive]') .option('--outbound-auth ', 'Outbound auth type: oauth, api-key, or none [non-interactive]') @@ -308,8 +361,97 @@ export class GatewayTargetPrimitive extends BasePrimitive', 'Tool schema JSON file path (for lambda-function-arn type) [non-interactive]' ) + .option('--runtime ', 'Runtime from your project (for http-runtime type) [non-interactive]') + .option('--runtime-endpoint ', 'Runtime endpoint / version alias (for http-runtime type) [non-interactive]') + // Passthrough-only flags are gated behind ENABLE_GATED_FEATURES — hidden from help when off. + .addOption( + gatePassthroughOption( + new Option('--passthrough-endpoint ', 'HTTPS endpoint URL for passthrough targets [non-interactive]') + ) + ) + .addOption( + gatePassthroughOption( + new Option( + '--passthrough-protocol ', + 'Passthrough protocol: MCP | A2A | INFERENCE | CUSTOM (default: CUSTOM) [non-interactive]' + ) + ) + ) + .addOption( + gatePassthroughOption( + new Option( + '--stickiness-identifier ', + 'Session routing expression for passthrough targets [non-interactive]' + ) + ) + ) + .addOption( + gatePassthroughOption( + new Option('--stickiness-timeout ', 'Sticky session timeout in seconds (1-86400) [non-interactive]') + ) + ) + .addOption( + gatePassthroughOption( + new Option( + '--signing-service ', + 'SigV4 signing service name for passthrough GATEWAY_IAM_ROLE auth [non-interactive]' + ) + ) + ) + .addOption( + gatePassthroughOption( + new Option( + '--signing-region ', + 'SigV4 signing region for passthrough (defaults to project region) [non-interactive]' + ) + ) + ) .option('--json', 'Output as JSON [non-interactive]') - .action(async (rawOptions: Record) => { + .addHelpText( + 'after', + ` +Target types and their options: + + http-runtime — Route to an AgentCore runtime + --runtime Runtime from your project + --runtime-endpoint Endpoint / version alias (optional) + + mcp-server — Connect to an MCP-compatible server + --endpoint Server endpoint URL + --host Lambda or AgentCoreRuntime + --language Python, TypeScript, or Other + + api-gateway — Connect to an Amazon API Gateway REST API + --rest-api-id REST API ID + --stage Deployment stage + + open-api-schema / smithy-model — Auto-derive tools from a schema + --schema Schema file path or S3 URI + --schema-s3-account S3 bucket owner account ID + + lambda-function-arn — Connect to an AWS Lambda function + --lambda-arn Lambda function ARN + --tool-schema-file Tool schema JSON file + + connector — Wire a managed AWS connector (Bedrock KB, agentic-retrieve) + --connector bedrock-knowledge-bases or bedrock-agentic-retrieve + --knowledge-base-id Project KB name or 10-char external KB id (repeatable for agentic-retrieve) +${ + isGatedFeaturesEnabled() + ? ` + passthrough — Route to an external HTTPS endpoint + --passthrough-endpoint HTTPS endpoint URL + --stickiness-identifier Session routing expression (optional) + --stickiness-timeout Sticky session timeout in seconds (optional) +` + : '' +} + Auth (mcp-server, open-api-schema, smithy-model, lambda-function-arn${isGatedFeaturesEnabled() ? ', passthrough' : ''}): + --outbound-auth oauth, api-key, or none + --credential-name Existing credential name +` + ) + .action(async (rawOptions: Record) => { // Commander camelCases --outbound-auth to outboundAuth, but our types use outboundAuthType if (rawOptions.outboundAuth && !rawOptions.outboundAuthType) { rawOptions.outboundAuthType = rawOptions.outboundAuth; @@ -328,12 +470,17 @@ export class GatewayTargetPrimitive extends BasePrimitive = { - oauth: 'OAUTH', - 'api-key': 'API_KEY', - api_key: 'API_KEY', - none: 'NONE', - }; + const outboundAuthMap: Record = + { + oauth: 'OAUTH', + 'api-key': 'API_KEY', + api_key: 'API_KEY', + none: 'NONE', + gateway_iam_role: 'GATEWAY_IAM_ROLE', + 'gateway-iam-role': 'GATEWAY_IAM_ROLE', + jwt_passthrough: 'JWT_PASSTHROUGH', + 'jwt-passthrough': 'JWT_PASSTHROUGH', + }; const cliType = cliOptions.type ?? ''; const telemetryTargetType = GATEWAY_TARGET_TYPE_MAP[cliType] ?? ('unknown' as const); @@ -407,7 +554,10 @@ export class GatewayTargetPrimitive extends BasePrimitive s.trim()), + } + : undefined, + }); + const output = { success: true, toolName: result.toolName }; + if (cliOptions.json) { + console.log(JSON.stringify(output)); + } else { + console.log(`Added gateway target '${result.toolName}'`); + } + return telemetryAttrs; + } + + // Handle Amazon Web Search targets (managed-service backed via gateway IAM role) + if (cliOptions.type === 'webSearch') { + if (!isGatedFeaturesEnabled()) { + throw new ValidationError('Web search target type is not yet available.'); + } + const excludeDomains = + typeof cliOptions.excludeDomains === 'string' + ? cliOptions.excludeDomains + .split(',') + .map((d: string) => d.trim()) + .filter((d: string) => d.length > 0) + : undefined; + const config: WebSearchTargetConfig = { + targetType: 'webSearch', + name: cliOptions.name!, + gateway: cliOptions.gateway!, + ...(excludeDomains && excludeDomains.length > 0 ? { excludeDomains } : {}), + }; + const result = await this.createWebSearchGatewayTarget(config); + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, toolName: result.toolName })); + } else { + const suffix = config.excludeDomains ? ` (excludeDomains=${config.excludeDomains.join(',')})` : ''; + console.log(`Added web-search gateway target '${result.toolName}' on '${config.gateway}'${suffix}`); + } + return telemetryAttrs; + } + + // Handle connector targets (managed-service backed: KB single-retrieve, agentic-retrieve fan-out) + if (cliOptions.type === 'connector') { + const validConnectors = CONNECTOR_ID_VALUES.join(', '); + if (!cliOptions.connector) { + throw new ValidationError(`--connector is required for connector targets (${validConnectors}).`); + } + if (!(CONNECTOR_ID_VALUES as readonly string[]).includes(cliOptions.connector)) { + throw new ValidationError( + `Unknown --connector value '${cliOptions.connector}'. Valid: ${validConnectors}.` + ); + } + const connectorId = cliOptions.connector as ConnectorId; + const kbRefs = cliOptions.knowledgeBaseId ?? []; + if (kbRefs.length === 0) { + throw new ValidationError(`--knowledge-base-id is required for --connector ${connectorId}.`); + } + + let config: ConnectorTargetConfig; + if (connectorId === 'bedrock-knowledge-bases') { + if (kbRefs.length > 1) { + throw new ValidationError( + '--knowledge-base-id may only be specified once for --connector bedrock-knowledge-bases. ' + + 'Use --connector bedrock-agentic-retrieve for fan-out across multiple KBs.' + ); + } + config = { + targetType: 'connector', + name: cliOptions.name!, + gateway: cliOptions.gateway!, + connectorId, + knowledgeBaseId: kbRefs[0]!, + ...(cliOptions.description && { description: cliOptions.description }), + }; + } else { + // bedrock-agentic-retrieve: fan-out via knowledgeBaseIds[]. + config = { + targetType: 'connector', + name: cliOptions.name!, + gateway: cliOptions.gateway!, + connectorId, + knowledgeBaseIds: kbRefs, + ...(cliOptions.description && { description: cliOptions.description }), + }; + } + const result = await this.createConnectorGatewayTarget(config); + const output = { success: true, toolName: result.toolName }; + if (cliOptions.json) { + console.log(JSON.stringify(output)); + } else if (config.connectorId === 'bedrock-agentic-retrieve') { + console.log( + `Added connector gateway target '${result.toolName}' on '${config.gateway}' → ${config.connectorId} (KBs ${kbRefs.join(', ')})` + ); + } else { + console.log( + `Added connector gateway target '${result.toolName}' on '${config.gateway}' → ${config.connectorId} (KB ${kbRefs[0]})` + ); + console.log( + `Also wired KB '${kbRefs[0]}' into gateway '${config.gateway}'-agentic (bedrock-agentic-retrieve fan-out)` + ); + } + return telemetryAttrs; + } + + // Handle passthrough targets (no code generation) + if (cliOptions.type === 'passthrough') { + const passthroughEndpoint = (cliOptions as Record).passthroughEndpoint; + if (!passthroughEndpoint) { + throw new ValidationError('--passthrough-endpoint is required for passthrough type'); + } + const stickinessIdentifier = (cliOptions as Record).stickinessIdentifier; + const stickinessTimeoutRaw = (cliOptions as Record).stickinessTimeout; + const stickinessTimeout = stickinessTimeoutRaw ? parseInt(stickinessTimeoutRaw, 10) : undefined; + const signingService = (rawOptions as Record).signingService; + const signingRegion = (rawOptions as Record).signingRegion; + const protocolTypeRaw = (rawOptions as Record).passthroughProtocol; + const protocolType = protocolTypeRaw?.toUpperCase() ?? 'CUSTOM'; + if (!PASSTHROUGH_PROTOCOL_TYPES.includes(protocolType as PassthroughProtocolType)) { + throw new ValidationError( + `Invalid --passthrough-protocol "${protocolTypeRaw}". Must be one of: ${PASSTHROUGH_PROTOCOL_TYPES.join(', ')}` + ); + } + + // Build outboundAuth based on the auth type + let passthroughOutboundAuth: + | { type: string; credentialName?: string; scopes?: string[]; service?: string; region?: string } + | undefined; + if (cliOptions.outboundAuthType) { + const mappedAuthType = outboundAuthMap[cliOptions.outboundAuthType.toLowerCase()] ?? 'NONE'; + if (mappedAuthType === 'GATEWAY_IAM_ROLE') { + if (!signingService) { + throw new ValidationError( + '--signing-service is required when --outbound-auth is GATEWAY_IAM_ROLE for passthrough targets' + ); + } + passthroughOutboundAuth = { + type: 'GATEWAY_IAM_ROLE', + service: signingService, + ...(signingRegion && { region: signingRegion }), + }; + } else if (mappedAuthType === 'JWT_PASSTHROUGH') { + passthroughOutboundAuth = { type: 'JWT_PASSTHROUGH' }; + } else if (mappedAuthType === 'OAUTH') { + passthroughOutboundAuth = { + type: 'OAUTH', + credentialName: cliOptions.credentialName, + scopes: cliOptions.oauthScopes?.split(',').map(s => s.trim()), + }; + } else if (mappedAuthType !== 'NONE') { + passthroughOutboundAuth = { + type: mappedAuthType, + credentialName: cliOptions.credentialName, + scopes: cliOptions.oauthScopes?.split(',').map(s => s.trim()), + }; + } + } + + const result = await this.createPassthroughTarget({ + name: cliOptions.name!, + gateway: cliOptions.gateway!, + passthroughEndpoint, + protocolType: protocolType as PassthroughProtocolType, + stickinessIdentifier, + stickinessTimeout, + outboundAuth: passthroughOutboundAuth, + }); + const output = { success: true, toolName: result.toolName }; + if (cliOptions.json) { + console.log(JSON.stringify(output)); + } else { + console.log(`Added gateway target '${result.toolName}'`); + } + return telemetryAttrs; + } + // Handle MCP server targets (existing endpoint, no code generation) if (cliOptions.type === 'mcpServer' && cliOptions.endpoint) { const config: McpServerTargetConfig = { @@ -458,7 +795,10 @@ export class GatewayTargetPrimitive extends BasePrimitive', 'Target name (default: web-search) [non-interactive]') + .option('--gateway ', 'Gateway to attach this target to [non-interactive]') + .option('--exclude-domains ', 'Comma-separated domains to exclude from results [non-interactive]') + .option('--json', 'Output as JSON [non-interactive]') + .action(async (cliOptions: { name?: string; gateway?: string; excludeDomains?: string; json?: boolean }) => { + if (!isGatedFeaturesEnabled()) { + console.error('Error: Web search target type is not yet available.'); + process.exit(1); + } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + const userPassedAnyFlag = + !!cliOptions.name || !!cliOptions.gateway || !!cliOptions.excludeDomains || !!cliOptions.json; + if (!userPassedAnyFlag) { + try { + requireTTY(); + const [{ render }, { default: React }, { AddWebSearchFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/web-search'), + ]); + const { clear, unmount } = render( + React.createElement(AddWebSearchFlow, { + isInteractive: false, + onBack: () => { + clear(); + unmount(); + process.exit(0); + }, + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + return; + } catch (error) { + console.error(getErrorMessage(error)); + process.exit(1); + } + } + + await runCliCommand('add.web-search', !!cliOptions.json, async () => { + // Default name `web-search` is convenient for the first invocation + // but produces a duplicate-target error on the second. Require an + // explicit --name when the default is already taken. + let resolvedName = cliOptions.name; + if (!resolvedName) { + const project = await this.readProjectSpec(); + const nameTaken = project.agentCoreGateways.some(g => (g.targets ?? []).some(t => t.name === 'web-search')); + if (nameTaken) { + throw new ValidationError( + 'A gateway target named "web-search" already exists. Pass --name to add another.' + ); + } + resolvedName = 'web-search'; + } + const forwardedOptions: CLIAddGatewayTargetOptions = { + name: resolvedName, + type: 'web-search', + gateway: cliOptions.gateway, + ...(cliOptions.excludeDomains && { excludeDomains: cliOptions.excludeDomains }), + }; + const validation = await validateAddGatewayTargetOptions(forwardedOptions); + if (!validation.valid) { + throw new ValidationError(validation.error!); + } + const excludeDomains = + typeof forwardedOptions.excludeDomains === 'string' + ? forwardedOptions.excludeDomains + .split(',') + .map((d: string) => d.trim()) + .filter((d: string) => d.length > 0) + : undefined; + const config: WebSearchTargetConfig = { + targetType: 'webSearch', + name: forwardedOptions.name!, + gateway: forwardedOptions.gateway!, + ...(excludeDomains && excludeDomains.length > 0 ? { excludeDomains } : {}), + }; + const result = await this.createWebSearchGatewayTarget(config); + if (cliOptions.json) { + console.log(JSON.stringify({ success: true, toolName: result.toolName })); + } else { + const suffix = config.excludeDomains ? ` (excludeDomains=${config.excludeDomains.join(',')})` : ''; + console.log(`Added web-search gateway target '${result.toolName}' on '${config.gateway}'${suffix}`); + } + return {}; + }); + }); + + removeCmd + .command('web-search', { hidden: !isGatedFeaturesEnabled() }) + .description('Remove an Amazon Web Search gateway target from the project') + .option('--name ', 'Name of the web-search target to remove [non-interactive]') + .option('-y, --yes', 'Skip confirmation prompt [non-interactive]') + .option('--json', 'Output as JSON [non-interactive]') + .action(async (cliOptions: { name?: string; yes?: boolean; json?: boolean }) => { + try { + if (!isGatedFeaturesEnabled()) { + console.error('Web search target type is not yet available.'); + process.exit(1); + } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + if (!cliOptions.name) { + throw new ValidationError('A --name is required for `agentcore remove web-search`.'); + } + const project = await this.readProjectSpec(); + const match = project.agentCoreGateways + .flatMap(g => (g.targets ?? []).map(t => ({ gateway: g.name, target: t }))) + .find(({ target }) => target.name === cliOptions.name); + if (!match) { + throw new ValidationError(`Gateway target "${cliOptions.name}" not found.`); + } + if (match.target.targetType !== 'webSearch') { + throw new ValidationError( + `Gateway target "${cliOptions.name}" is type "${match.target.targetType}", not webSearch. Use 'agentcore remove gateway-target --name ${cliOptions.name}' instead.` + ); + } + const result = await withCommandRunTelemetry('remove.web-search', {}, () => this.remove(cliOptions.name!)); + if (cliOptions.json) { + console.log( + JSON.stringify({ + success: result.success, + resourceType: this.kind, + resourceName: cliOptions.name, + message: result.success ? `Removed web-search gateway target '${cliOptions.name}'` : undefined, + error: !result.success ? result.error.message : undefined, + }) + ); + } else if (result.success) { + console.log(`Removed web-search gateway target '${cliOptions.name}'`); + } else { + throw result.error; + } + process.exit(result.success ? 0 : 1); + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(`Error: ${getErrorMessage(error)}`); + } + process.exit(1); + } + }); } addScreen(): AddScreenComponent { @@ -713,6 +1213,231 @@ export class GatewayTargetPrimitive extends BasePrimitive { + const project = await this.readProjectSpec(); + + const gateway = project.agentCoreGateways.find(g => g.name === config.gateway); + if (!gateway) { + throw new Error(`Gateway "${config.gateway}" not found.`); + } + + if (!gateway.targets) { + gateway.targets = []; + } + + if (gateway.targets.some(t => t.name === config.name)) { + throw new Error(`Target "${config.name}" already exists in gateway "${gateway.name}".`); + } + + const target: AgentCoreGatewayTarget = { + name: config.name, + targetType: 'httpRuntime', + httpRuntime: { + runtime: config.runtime, + ...(config.endpoint && { runtimeEndpoint: config.endpoint }), + }, + ...(config.outboundAuth && + config.outboundAuth.type !== 'NONE' && { + outboundAuth: { + type: config.outboundAuth.type as 'OAUTH' | 'API_KEY', + credentialName: config.outboundAuth.credentialName!, + ...(config.outboundAuth.scopes && { scopes: config.outboundAuth.scopes }), + }, + }), + }; + + gateway.targets.push(target); + await this.writeProjectSpec(project); + + return { toolName: config.name }; + } + + /** + * Create a connector-typed gateway target backed by a managed AWS service + * (currently bedrock-knowledge-bases or bedrock-agentic-retrieve). + * + * Project-owned KB: config.knowledgeBaseId is a knowledgeBases[] entry name; + * the L3 resolves it at synth time via application.knowledgeBases. + * External KB: config.knowledgeBaseId is a 10-character literal KB ID; the + * L3 passes it through verbatim. + */ + async createConnectorGatewayTarget(config: ConnectorTargetConfig): Promise<{ toolName: string }> { + const project = await this.readProjectSpec(); + + const gateway = project.agentCoreGateways.find(g => g.name === config.gateway); + if (!gateway) { + throw new Error(`Gateway "${config.gateway}" not found.`); + } + + if (!gateway.targets) { + gateway.targets = []; + } + + if (gateway.targets.some(t => t.name === config.name)) { + throw new Error(`Target "${config.name}" already exists in gateway "${gateway.name}".`); + } + + // For agentic-retrieve, refuse to silently shadow an existing one on the + // same gateway — the KB primitive would have created `${gateway}-agentic` + // already, and a user-driven low-level add should be an explicit choice. + if (config.connectorId === 'bedrock-agentic-retrieve') { + const existingAgentic = gateway.targets.find( + t => t.targetType === 'connector' && t.connectorId === 'bedrock-agentic-retrieve' + ); + if (existingAgentic) { + throw new Error( + `Gateway "${gateway.name}" already has a bedrock-agentic-retrieve target ("${existingAgentic.name}"). ` + + `Edit agentcore/agentcore.json directly to extend its knowledgeBaseIds[].` + ); + } + } + + let target: AgentCoreGatewayTarget; + if (config.connectorId === 'bedrock-agentic-retrieve') { + target = { + name: config.name, + targetType: 'connector', + connectorId: config.connectorId, + knowledgeBaseIds: config.knowledgeBaseIds, + } as AgentCoreGatewayTarget; + } else { + target = { + name: config.name, + targetType: 'connector', + connectorId: config.connectorId, + knowledgeBaseId: config.knowledgeBaseId, + } as AgentCoreGatewayTarget; + } + + gateway.targets.push(target); + + // Auto-upsert the shared agentic-retrieve target when wiring a single-KB + // Retrieve via this path, mirroring KnowledgeBasePrimitive.add({...gateway}). + // Without this, KBs added via `add gateway-target --type connector + // --connector bedrock-knowledge-bases` would be missing from the gateway's + // agentic-retrieve fan-out. + if (config.connectorId === 'bedrock-knowledge-bases') { + upsertAgenticRetrieveTarget(gateway, config.knowledgeBaseId); + } + + await this.writeProjectSpec(project); + + return { toolName: config.name }; + } + + /** + * Create a passthrough target that routes HTTP traffic to an external HTTPS endpoint. + * No code generation — this registers an endpoint for HTTP passthrough. + */ + async createPassthroughTarget(config: { + name: string; + gateway: string; + passthroughEndpoint: string; + protocolType?: PassthroughProtocolType; + stickinessIdentifier?: string; + stickinessTimeout?: number; + outboundAuth?: { type: string; credentialName?: string; scopes?: string[]; service?: string; region?: string }; + }): Promise<{ toolName: string }> { + const project = await this.readProjectSpec(); + + const gateway = project.agentCoreGateways.find(g => g.name === config.gateway); + if (!gateway) { + throw new Error(`Gateway "${config.gateway}" not found.`); + } + + if (!gateway.targets) { + gateway.targets = []; + } + + if (gateway.targets.some(t => t.name === config.name)) { + throw new Error(`Target "${config.name}" already exists in gateway "${gateway.name}".`); + } + + // Build outboundAuth object based on auth type + let outboundAuth: AgentCoreGatewayTarget['outboundAuth']; + if (config.outboundAuth && config.outboundAuth.type !== 'NONE') { + if (config.outboundAuth.type === 'GATEWAY_IAM_ROLE') { + outboundAuth = { + type: 'GATEWAY_IAM_ROLE', + service: config.outboundAuth.service, + ...(config.outboundAuth.region && { region: config.outboundAuth.region }), + }; + } else if (config.outboundAuth.type === 'JWT_PASSTHROUGH') { + outboundAuth = { type: 'JWT_PASSTHROUGH' }; + } else { + outboundAuth = { + type: config.outboundAuth.type as 'OAUTH' | 'API_KEY', + credentialName: config.outboundAuth.credentialName!, + ...(config.outboundAuth.scopes && { scopes: config.outboundAuth.scopes }), + }; + } + } + + const target: AgentCoreGatewayTarget = { + name: config.name, + targetType: 'passthrough', + passthrough: { + endpoint: config.passthroughEndpoint, + protocolType: config.protocolType ?? 'CUSTOM', + ...(config.stickinessIdentifier && { + stickinessConfiguration: { + identifier: config.stickinessIdentifier, + ...(config.stickinessTimeout && { timeout: config.stickinessTimeout }), + }, + }), + }, + ...(outboundAuth && { outboundAuth }), + }; + + gateway.targets.push(target); + await this.writeProjectSpec(project); + + return { toolName: config.name }; + } + + /** + * Create an Amazon Web Search gateway target. The target is invoked via the + * gateway's IAM role; the only admin-configurable parameter is an optional + * list of domains to exclude from results. + */ + async createWebSearchGatewayTarget(config: WebSearchTargetConfig): Promise<{ toolName: string }> { + const project = await this.readProjectSpec(); + + const gateway = project.agentCoreGateways.find(g => g.name === config.gateway); + if (!gateway) { + throw new Error(`Gateway "${config.gateway}" not found.`); + } + + if (!gateway.targets) { + gateway.targets = []; + } + + if (gateway.targets.some(t => t.name === config.name)) { + throw new Error(`Target "${config.name}" already exists in gateway "${gateway.name}".`); + } + + const target: AgentCoreGatewayTarget = { + name: config.name, + targetType: 'webSearch', + ...(config.excludeDomains && config.excludeDomains.length > 0 ? { excludeDomains: config.excludeDomains } : {}), + } as AgentCoreGatewayTarget; + + gateway.targets.push(target); + await this.writeProjectSpec(project); + + return { toolName: config.name }; + } + // ═══════════════════════════════════════════════════════════════════ // Private helpers // ═══════════════════════════════════════════════════════════════════ diff --git a/src/cli/primitives/HarnessPrimitive.ts b/src/cli/primitives/HarnessPrimitive.ts index cf5acd083..8bbc2dceb 100644 --- a/src/cli/primitives/HarnessPrimitive.ts +++ b/src/cli/primitives/HarnessPrimitive.ts @@ -1,43 +1,154 @@ import { APP_DIR, ConfigIO, type Result, findConfigRoot } from '../../lib'; import type { + AgentCoreProjectSpec, + EndpointIpAddressType, HarnessApiFormat, HarnessGatewayOutboundAuth, + HarnessMemoryRef, HarnessModelProvider, HarnessSpec, + ManagedMemoryStrategy, MemoryStrategy, MemoryStrategyType, NetworkMode, + PrivateEndpoint, RuntimeAuthorizerType, } from '../../schema'; import { DEFAULT_EPISODIC_REFLECTION_NAMESPACES, DEFAULT_STRATEGY_NAMESPACES, HarnessSpecSchema } from '../../schema'; -import { deleteHarness } from '../aws/agentcore-harness'; +import { deleteHarness, isHarnessNotFoundError } from '../aws/agentcore-harness'; import { getErrorMessage } from '../errors'; +import { isGatedFeaturesEnabled } from '../feature-flags'; +import { MANAGED_MEMORY_ADD_NOTICE } from '../operations/deploy'; +import { findOrphanHarnesses } from '../operations/harness/orphan'; +import type { OrphanHarness } from '../operations/harness/orphan'; import type { RemovalPreview, SchemaChange } from '../operations/remove/types'; +import { withCommandRunTelemetry } from '../telemetry/cli-command-run.js'; +import type { SubCommand } from '../telemetry/schemas/command-run.js'; import { getTemplatePath } from '../templates/templateRoot'; +import { requireTTY } from '../tui/guards/tty'; import { DEFAULT_MEMORY_EXPIRY_DAYS } from '../tui/screens/generate/defaults'; import { BasePrimitive } from './BasePrimitive'; import { buildAuthorizerConfigFromJwtConfig, createManagedOAuthCredential } from './auth-utils'; import type { JwtConfigOptions } from './auth-utils'; +import { ADDITIONAL_PARAMS_JSON_ERROR, SOURCE_CODE_NOTE } from './constants'; import type { AddScreenComponent, RemovableResource } from './types'; -import { ResourceNotFoundError, toError } from '@/lib/errors/types'; +import { validateGitSkillCredential } from '@/cli/operations/harness/skill-utils'; +import { ResourceNotFoundError, ValidationError, toError } from '@/lib/errors/types'; +import { InvalidArgumentError, Option } from '@commander-js/extra-typings'; import type { Command } from '@commander-js/extra-typings'; import { access, copyFile, mkdir, readFile, rm, writeFile } from 'fs/promises'; import { basename, dirname, isAbsolute, join, resolve } from 'path'; +/** Commander accumulator for repeatable `--efs/--s3-access-point ` flags. */ +function collectAccessPoint(value: string, previous: string[]): string[] { + return [...previous, value]; +} + +/** + * Strict integer flag parser. Rejects non-canonical input (`5abc`, `abc`, `1e9`, ` 5 `) with + * Commander's InvalidArgumentError instead of silently truncating (`parseInt('5abc')` → 5) or + * producing NaN, both of which would slip a wrong/garbage value into the spec. + */ +function strictInt(label: string): (value: string) => number { + return (value: string) => { + const trimmed = value.trim(); + const n = Number(trimmed); + // Require canonical integer form: rejects "", "5abc", "5.5", "1e9", "007", " 5 " producing a + // surprising value. `String(n) === trimmed` only holds for the plain decimal the user typed. + if (!Number.isInteger(n) || String(n) !== trimmed) { + throw new InvalidArgumentError(`${label} must be an integer, got "${value}"`); + } + return n; + }; +} + +/** Strict finite-number flag parser. Rejects NaN/Infinity/garbage (`abc`, `1e10000`, `5abc`). */ +function strictFloat(label: string): (value: string) => number { + return (value: string) => { + const n = Number(value); + if (!Number.isFinite(n) || value.trim() === '') { + throw new InvalidArgumentError(`${label} must be a number, got "${value}"`); + } + return n; + }; +} + +/** + * Hide a gated option from `--help` when ENABLE_GATED_FEATURES is off. The option still PARSES + * (so explicit use is caught by validation with a clean "not yet available" message) but does not + * advertise itself. Mirrors the AWS Skills gating pattern (skill-command.ts). + */ +function gatedOption(option: T): T { + return isGatedFeaturesEnabled() ? option : option.hideHelp(); +} + +/** Commander accumulator for repeatable `--env`/`--tag` KEY=VALUE flags. Last write wins per key. */ +function collectKeyValue(value: string, previous: Record): Record { + const eq = value.indexOf('='); + if (eq <= 0) { + throw new Error(`Invalid KEY=VALUE pair: "${value}" (expected KEY=VALUE with a non-empty key)`); + } + return { ...previous, [value.slice(0, eq)]: value.slice(eq + 1) }; +} + +/** + * Parse `--efs/--s3-access-point` values of the form `:` into the + * `{accessPointArn, mountPath}` shape. The ARN itself contains colons, and the mount path is a + * `/mnt/...` absolute path, so split on the LAST colon to separate the two unambiguously. + */ +function parseAccessPoints(raw: string[] | undefined): { accessPointArn: string; mountPath: string }[] | undefined { + if (!raw || raw.length === 0) return undefined; + return raw.map(entry => { + const sep = entry.lastIndexOf(':'); + if (sep <= 0 || sep === entry.length - 1) { + throw new Error(`Invalid access point "${entry}" (expected :)`); + } + return { accessPointArn: entry.slice(0, sep), mountPath: entry.slice(sep + 1) }; + }); +} + export interface AddHarnessOptions { name: string; modelProvider: HarnessModelProvider; modelId: string; apiFormat?: HarnessApiFormat; apiKeyArn?: string; + /** LiteLLM only: base URL for the third-party model provider's API endpoint. */ + apiBase?: string; + /** LiteLLM only: provider-specific parameters passed through to the model provider unchanged. */ + additionalParams?: Record; + /** Model-config sampling params (CFN ModelConfig.{Temperature,TopP,TopK,MaxTokens}). */ + temperature?: number; + topP?: number; + topK?: number; + /** Model-config sampling MaxTokens (distinct from the top-level execution maxTokens). */ + modelMaxTokens?: number; systemPrompt?: string; skipMemory?: boolean; + /** Memory mode (gated). managed = harness owns its memory; existing = BYO; disabled = none. */ + memoryMode?: 'managed' | 'existing' | 'disabled'; + /** Managed-memory strategies (gated). Subset of SEMANTIC/SUMMARIZATION/USER_PREFERENCE/EPISODIC. */ + memoryStrategies?: string[]; + /** Managed-memory event retention in days, 3-365 (gated). */ + memoryEventExpiryDays?: number; + /** Managed-memory KMS CMK ARN, create-only (gated). */ + memoryEncryptionKeyArn?: string; + /** Reference an existing memory by name or ARN instead of auto-creating one. */ + memoryName?: string; + memoryArn?: string; + /** Deploy-time ActorId for the referenced memory (CFN Memory.ActorId). */ + memoryActorId?: string; + /** Recent-message window loaded into context (CFN Memory.MessagesCount). */ + messagesCount?: number; + /** Retrieval tuning fanned across the memory's namespaces (CFN RetrievalConfig.{TopK,RelevanceScore}). */ + memoryTopK?: number; + memoryRelevanceScore?: number; containerUri?: string; dockerfilePath?: string; maxIterations?: number; maxTokens?: number; timeoutSeconds?: number; - truncationStrategy?: 'sliding_window' | 'summarization'; + truncationStrategy?: 'sliding_window' | 'summarization' | 'none'; networkMode?: NetworkMode; subnets?: string[]; securityGroups?: string[]; @@ -46,27 +157,60 @@ export interface AddHarnessOptions { sessionStoragePath?: string; efsAccessPoints?: { accessPointArn: string; mountPath: string }[]; s3AccessPoints?: { accessPointArn: string; mountPath: string }[]; + /** Allow-list of tools the agent may use (CFN AllowedTools). */ + allowedTools?: string[]; + /** Harness runtime environment variables (CFN EnvironmentVariables). */ + environmentVariables?: Record; + /** Harness-level tags, merged with project tags (CFN Tags). */ + tags?: Record; withInvokeScript?: boolean; selectedTools?: string[]; mcpName?: string; mcpUrl?: string; + /** remote_mcp request headers (CFN RemoteMcp.Headers). */ + mcpHeaders?: Record; gatewayArn?: string; gatewayOutboundAuth?: 'awsIam' | 'none' | 'oauth'; gatewayProviderArn?: string; gatewayScopes?: string[]; + /** Gateway OAuth grant type + custom parameters (CFN Oauth.{GrantType,CustomParameters}). */ + gatewayGrantType?: 'CLIENT_CREDENTIALS' | 'USER_FEDERATION'; + gatewayCustomParameters?: Record; authorizerType?: RuntimeAuthorizerType; jwtConfig?: JwtConfigOptions; + skills?: { + path?: string; + s3Uri?: string; + gitUrl?: string; + gitPath?: string; + credentialName?: string; + username?: string; + awsSkills?: string[]; + }[]; configBaseDir?: string; } export type RemovableHarness = RemovableResource; +/** + * Intent for removing an imperative-build orphan harness (one not managed by CloudFormation). + * - `keep`: delete the AWS resource but keep the agentcore.json entry (it moves to GA — the + * next deploy recreates it under CloudFormation). + * - `discard`: delete the AWS resource and remove the agentcore.json entry (no longer wanted). + */ +export type OrphanAction = 'keep' | 'discard'; + +export interface RemoveHarnessOptions { + /** Explicit intent when the named harness is an orphan. Required to delete one (never auto-deletes). */ + orphanAction?: OrphanAction; +} + export class HarnessPrimitive extends BasePrimitive { readonly kind = 'harness' as const; readonly label = 'Harness'; readonly primitiveSchema = HarnessSpecSchema; - async add(options: AddHarnessOptions): Promise> { + async add(options: AddHarnessOptions): Promise> { try { const configBaseDir = options.configBaseDir ?? findConfigRoot(); if (!configBaseDir) { @@ -82,7 +226,17 @@ export class HarnessPrimitive extends BasePrimitive 0 && { headers: options.mcpHeaders }), + }, + }, }); } else if (toolType === 'agentcore_gateway' && options.gatewayArn) { let outboundAuth: HarnessGatewayOutboundAuth | undefined; @@ -131,6 +291,11 @@ export class HarnessPrimitive extends BasePrimitive 0 && { + customParameters: options.gatewayCustomParameters, + }), }, }; } @@ -148,6 +313,29 @@ export class HarnessPrimitive extends BasePrimitive 0 && { paths: s.awsSkills }) } }); + } else { + skills.push({ path: s.path! }); + } + } + const harnessSpec: HarnessSpec = { name: options.name, model: { @@ -155,11 +343,18 @@ export class HarnessPrimitive extends BasePrimitive 0 && { + environmentVariables: options.environmentVariables, + }), + ...(options.tags && Object.keys(options.tags).length > 0 && { tags: options.tags }), ...(options.authorizerType && { authorizerType: options.authorizerType }), ...(options.authorizerType === 'CUSTOM_JWT' && options.jwtConfig ? { authorizerConfiguration: buildAuthorizerConfigFromJwtConfig(options.jwtConfig) } @@ -190,7 +390,10 @@ export class HarnessPrimitive extends BasePrimitive ({ type, @@ -211,7 +414,7 @@ export class HarnessPrimitive extends BasePrimitive { + async remove(harnessName: string, opts?: RemoveHarnessOptions): Promise { try { const configRoot = findConfigRoot(); if (!configRoot) { @@ -251,53 +454,132 @@ export class HarnessPrimitive extends BasePrimitive undefined); - const harnesses = project.harnesses ?? []; - const harnessIndex = harnesses.findIndex(h => h.name === harnessName); + // An orphan is an imperative-build harness recorded in deployed-state but not managed by + // CloudFormation (no `provisioner: 'cloudformation'` marker). CFN can't delete it, so it + // keeps billing and would 409 a same-named CFN deploy. It must be deleted directly from + // AWS — but only with the user's explicit intent (never auto-delete). + const orphans = findOrphanHarnesses(deployedState, harnessName); + if (orphans.length > 0) { + return this.removeOrphan(harnessName, orphans, opts?.orphanAction, configIO, project); + } - if (harnessIndex === -1) { + const inSpec = (project.harnesses ?? []).some(h => h.name === harnessName); + if (!inSpec) { return { success: false, error: new ResourceNotFoundError(`Harness "${harnessName}" not found.`) }; } - // Delete harness from AWS if it's deployed + // --keep/--discard express intent for deleting an imperative-build ORPHAN directly from AWS. + // They have no meaning for a CDK-managed harness (removed by the next deploy via the stack), + // so reject them rather than silently ignoring — the user expected an AWS-side delete. + if (opts?.orphanAction) { + return { + success: false, + error: new ValidationError( + `--keep/--discard only apply to a preview-build (orphan) harness. "${harnessName}" is managed by ` + + `CloudFormation; remove it without those flags and run \`agentcore deploy\` to delete it from AWS.` + ), + }; + } + + // CDK-managed harness: drop it from the project spec. The harness is part of the + // CloudFormation stack, so the next deploy removes the AWS::BedrockAgentCore::Harness. + await this.removeFromSpec(harnessName, configIO, project); + return { success: true }; + } catch (err) { + return { success: false, error: toError(err) }; + } + } + + /** + * Delete an imperative-build orphan harness directly from AWS, then reconcile local state + * per the user's chosen intent. Never auto-deletes: an unspecified action returns an + * actionable error rather than guessing. + */ + private async removeOrphan( + harnessName: string, + orphans: OrphanHarness[], + action: OrphanAction | undefined, + configIO: ConfigIO, + project: AgentCoreProjectSpec + ): Promise { + if (!action) { + return { + success: false, + error: new ValidationError( + `No changes were made — "${harnessName}" was not deleted. It was created by the preview ` + + `build and is not managed by CloudFormation, so CloudFormation cannot delete it. Removing ` + + `it deletes the resource directly from your AWS account. Re-run with an explicit choice:\n` + + ` --keep delete it from AWS but keep it in agentcore.json (it moves to GA; the ` + + `next \`agentcore deploy\` recreates it under CloudFormation)\n` + + ` --discard delete it from AWS and remove it from agentcore.json (you no longer want it)` + ), + }; + } + + // Delete each recorded orphan resource using its recorded id + ARN-derived region — never + // re-resolve by name. A 404/NotFound means it's already gone, which is success for our + // purposes; any other error aborts so local state still points at the live resource for a + // retry. + for (const orphan of orphans) { try { - const deployedState = await configIO.readDeployedState(); - for (const target of Object.values(deployedState.targets)) { - const deployedHarness = target.resources?.harnesses?.[harnessName]; - if (deployedHarness) { - const targets = await configIO.resolveAWSDeploymentTargets(); - const region = targets[0]?.region; - if (region) { - await deleteHarness({ region, harnessId: deployedHarness.harnessId }); - } - delete target.resources!.harnesses![harnessName]; - await configIO.writeDeployedState(deployedState); - break; - } + await deleteHarness({ region: orphan.region, harnessId: orphan.harnessId }); + } catch (err) { + // 404 = already gone (success). Any other error aborts so local state still points at the + // live resource for a retry. Uses the typed status code, not a message substring. + if (!isHarnessNotFoundError(err)) { + const msg = err instanceof Error ? err.message : String(err); + return { + success: false, + error: toError( + `Failed to delete orphan harness "${harnessName}" (${orphan.harnessId}) in ${orphan.region}: ${msg}. ` + + `Local state was left unchanged — resolve the error and retry.` + ), + }; } - } catch { - // AWS deletion is best-effort; next deploy will clean up } + } - harnesses.splice(harnessIndex, 1); - project.harnesses = harnesses; - - // Remove the associated memory (convention: Memory) - const associatedMemoryName = `${harnessName}Memory`; - if (project.memories) { - project.memories = project.memories.filter(m => m.name !== associatedMemoryName); + // Drop the orphan records from deployed-state so the harness is no longer flagged. + const deployedState = await configIO.readDeployedState().catch(() => undefined); + if (deployedState) { + for (const orphan of orphans) { + const harnesses = deployedState.targets?.[orphan.targetName]?.resources?.harnesses; + if (harnesses) delete harnesses[orphan.name]; } + await configIO.writeDeployedState(deployedState); + } - await this.writeProjectSpec(project, configIO); + // delete-and-discard also removes the spec entry, its memory, and its directory. + // delete-and-keep leaves the spec entry so the next deploy recreates it under CloudFormation. + if (action === 'discard' && (project.harnesses ?? []).some(h => h.name === harnessName)) { + await this.removeFromSpec(harnessName, configIO, project); + } - const pathResolver = configIO.getPathResolver(); - const harnessDir = pathResolver.getHarnessDir(harnessName); - await rm(harnessDir, { recursive: true, force: true }); + return { success: true }; + } - return { success: true }; - } catch (err) { - return { success: false, error: toError(err) }; + /** + * Remove a harness from the project spec: drop its entry, its convention-named memory sibling + * (`Memory`) IF one actually exists, persist agentcore.json, and delete its on-disk directory. + * Managed-memory harnesses own their memory internally (no sibling), so the filter is gated on the + * sibling's actual presence — matching previewRemove and never touching a memory the harness doesn't own. + */ + private async removeFromSpec(harnessName: string, configIO: ConfigIO, project: AgentCoreProjectSpec): Promise { + project.harnesses = (project.harnesses ?? []).filter(h => h.name !== harnessName); + + const associatedMemoryName = `${harnessName}Memory`; + const hasAssociatedMemory = (project.memories ?? []).some(m => m.name === associatedMemoryName); + if (hasAssociatedMemory) { + project.memories = (project.memories ?? []).filter(m => m.name !== associatedMemoryName); } + + await this.writeProjectSpec(project, configIO); + + const pathResolver = configIO.getPathResolver(); + const harnessDir = pathResolver.getHarnessDir(harnessName); + await rm(harnessDir, { recursive: true, force: true }); } async previewRemove(harnessName: string): Promise { @@ -345,30 +627,113 @@ export class HarnessPrimitive extends BasePrimitive { + try { + const configRoot = findConfigRoot(); + if (!configRoot) return false; + const configIO = new ConfigIO({ baseDir: configRoot }); + const deployedState = await configIO.readDeployedState().catch(() => undefined); + return findOrphanHarnesses(deployedState, harnessName).length > 0; + } catch { + return false; + } + } + registerCommands(addCmd: Command, removeCmd: Command): void { addCmd .command('harness') .description('Add a harness to the project') .option('--name ', 'Harness name (start with letter, alphanumeric + underscores, max 48 chars)') - .option('--model-provider ', 'Model provider: bedrock, open_ai, gemini') + .option('--model-provider ', 'Model provider: bedrock, open_ai, gemini, lite_llm') .option('--model-id ', 'Model ID (e.g., anthropic.claude-3-5-sonnet-20240620-v1:0)') .option( '--api-format ', 'API format: converse_stream, responses, chat_completions (bedrock); responses, chat_completions (open_ai)' ) - .option('--api-key-arn ', 'API key ARN for non-Bedrock providers') + .option('--api-key-arn ', 'API key ARN for non-Bedrock providers (optional for lite_llm)') + .option('--api-base ', 'Base URL for the model provider API endpoint (lite_llm only)') + .option( + '--additional-params ', + 'Provider-specific params passed through unchanged, as a JSON object (lite_llm only)' + ) + .option('--temperature ', 'Model sampling temperature (0-2)', strictFloat('--temperature')) + .option('--top-p ', 'Model nucleus-sampling top-p (0-1)', strictFloat('--top-p')) + .option('--top-k ', 'Model top-k sampling (gemini only)', strictInt('--top-k')) + .option( + '--model-max-tokens ', + 'Model-config max output tokens per turn (distinct from --max-tokens)', + strictInt('--model-max-tokens') + ) .option('--container ', 'Container image URI or path to a Dockerfile') .option('--no-memory', 'Skip auto-creating memory') - .option('--max-iterations ', 'Max iterations', parseInt) - .option('--max-tokens ', 'Max tokens', parseInt) - .option('--timeout ', 'Timeout in seconds', parseInt) - .option('--truncation-strategy ', 'Truncation strategy: sliding_window or summarization') + .option('--memory-name ', 'Reference an existing memory by name instead of auto-creating one') + .option('--memory-arn ', 'Reference an existing memory by ARN instead of auto-creating one') + .option('--memory-actor-id ', 'Deploy-time ActorId scoping memory access for the harness') + .option( + '--memory-messages-count ', + 'Number of recent memory messages to load into context', + strictInt('--memory-messages-count') + ) + .option('--memory-top-k ', 'Memory retrieval: items to retrieve per namespace', strictInt('--memory-top-k')) + .option( + '--memory-relevance-score ', + 'Memory retrieval: minimum relevance score (0-1)', + strictFloat('--memory-relevance-score') + ) + // Managed-memory flags — gated behind ENABLE_GATED_FEATURES. When off they still PARSE + // (so explicit use returns a clean "not yet available" error in validation) but are hidden + // from --help, mirroring the AWS Skills gating pattern. + .addOption( + gatedOption(new Option('--memory-mode ', 'Memory mode: managed (default), existing, or disabled')) + ) + .addOption( + gatedOption( + new Option( + '--memory-strategies ', + 'Managed memory strategies (comma-separated): SEMANTIC,SUMMARIZATION,USER_PREFERENCE,EPISODIC' + ) + ) + ) + .addOption( + gatedOption( + new Option('--memory-event-expiry-days ', 'Managed memory event retention in days (3-365)').argParser( + strictInt('--memory-event-expiry-days') + ) + ) + ) + .addOption( + gatedOption(new Option('--memory-encryption-key-arn ', 'Managed memory KMS CMK ARN (create-only)')) + ) + .option('--max-iterations ', 'Max iterations', strictInt('--max-iterations')) + .option('--max-tokens ', 'Max execution tokens per invocation (harness loop cap)', strictInt('--max-tokens')) + .option('--timeout ', 'Timeout in seconds', strictInt('--timeout')) + .option('--truncation-strategy ', 'Truncation strategy: sliding_window, summarization, or none') .option('--network-mode ', 'Network mode: PUBLIC or VPC') .option('--subnets ', 'Comma-separated subnet IDs (for VPC mode)') .option('--security-groups ', 'Comma-separated security group IDs (for VPC mode)') - .option('--idle-timeout ', 'Idle timeout in seconds', parseInt) - .option('--max-lifetime ', 'Max lifetime in seconds', parseInt) + .option('--idle-timeout ', 'Idle timeout in seconds', strictInt('--idle-timeout')) + .option('--max-lifetime ', 'Max lifetime in seconds', strictInt('--max-lifetime')) .option('--session-storage ', 'Mount path for persistent session storage (e.g., /mnt/data/)') + .option( + '--efs-access-point ', + 'EFS access point mount as : (repeatable, VPC mode; max 2)', + collectAccessPoint, + [] + ) + .option( + '--s3-access-point ', + 'S3 Files access point mount as : (repeatable, VPC mode; max 2)', + collectAccessPoint, + [] + ) + .option('--allowed-tools ', 'Comma-separated allow-list of tools the agent may use (e.g. "*" or names)') + .option('--env ', 'Harness environment variable as KEY=VALUE (repeatable)', collectKeyValue, {}) + .option('--tag ', 'Harness-level tag as KEY=VALUE (repeatable)', collectKeyValue, {}) .option('--with-invoke-script', 'Generate a standalone Python invoke script') .option( '--system-prompt ', @@ -380,6 +745,7 @@ export class HarnessPrimitive extends BasePrimitive', 'Remote MCP tool name (required when --tools includes remote_mcp)') .option('--mcp-url ', 'Remote MCP endpoint URL (required when --tools includes remote_mcp)') + .option('--mcp-headers ', 'Remote MCP request headers as a JSON object (with --tools remote_mcp)') .option('--gateway-arn ', 'Gateway ARN (required when --tools includes agentcore_gateway)') .option( '--gateway-outbound-auth ', @@ -387,6 +753,8 @@ export class HarnessPrimitive extends BasePrimitive', 'OAuth provider ARN for gateway outbound auth') .option('--gateway-scopes ', 'Comma-separated OAuth scopes for gateway outbound auth') + .option('--gateway-grant-type ', 'Gateway OAuth grant type: CLIENT_CREDENTIALS or USER_FEDERATION') + .option('--gateway-custom-parameters ', 'Gateway OAuth custom parameters as a JSON object') .option('--authorizer-type ', 'Authorizer type: AWS_IAM or CUSTOM_JWT') .option('--discovery-url ', 'OIDC discovery URL (for CUSTOM_JWT)') .option('--allowed-audience ', 'Comma-separated allowed audiences (for CUSTOM_JWT)') @@ -395,6 +763,35 @@ export class HarnessPrimitive extends BasePrimitive', 'Custom claims JSON array (for CUSTOM_JWT)') .option('--client-id ', 'OAuth client ID (for CUSTOM_JWT)') .option('--client-secret ', 'OAuth client secret (for CUSTOM_JWT)') + .option( + '--private-endpoint-lattice-arn ', + 'PrivateLink: VPC Lattice resource-config id/ARN to reach the OIDC discovery endpoint (for CUSTOM_JWT)' + ) + .option( + '--private-endpoint-vpc-id ', + 'PrivateLink: VPC id for a service-managed endpoint to the OIDC discovery endpoint (for CUSTOM_JWT)' + ) + .option( + '--private-endpoint-subnets ', + 'PrivateLink: comma-separated subnet IDs (with --private-endpoint-vpc-id)' + ) + .option( + '--private-endpoint-ip-type ', + 'PrivateLink: endpoint IP address type: IPV4 or IPV6 (with --private-endpoint-vpc-id)' + ) + .option( + '--private-endpoint-security-groups ', + 'PrivateLink: comma-separated security group IDs, max 5 (with --private-endpoint-vpc-id)' + ) + .option( + '--private-endpoint-routing-domain ', + 'PrivateLink: routing domain (with --private-endpoint-vpc-id)' + ) + .option('--private-endpoint-tags ', 'PrivateLink: tags JSON object (with --private-endpoint-vpc-id)') + .option( + '--private-endpoint-overrides ', + 'PrivateLink: JSON array (max 5) of {domain, privateEndpoint} per-domain overrides (for CUSTOM_JWT)' + ) .option('--json', 'Output as JSON') .action( async (cliOptions: { @@ -403,8 +800,24 @@ export class HarnessPrimitive extends BasePrimitive; + tag?: Record; withInvokeScript?: boolean; systemPrompt?: string; tools?: string; mcpName?: string; mcpUrl?: string; + mcpHeaders?: string; gatewayArn?: string; gatewayOutboundAuth?: string; gatewayProviderArn?: string; gatewayScopes?: string; + gatewayGrantType?: string; + gatewayCustomParameters?: string; authorizerType?: string; discoveryUrl?: string; allowedAudience?: string; @@ -432,6 +853,14 @@ export class HarnessPrimitive extends BasePrimitive { try { @@ -477,34 +906,90 @@ export class HarnessPrimitive extends BasePrimitive | undefined; + if (cliOptions.additionalParams) { + try { + additionalParams = JSON.parse(cliOptions.additionalParams) as Record; + } catch { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: ADDITIONAL_PARAMS_JSON_ERROR })); + } else { + console.error(ADDITIONAL_PARAMS_JSON_ERROR); + } + process.exit(1); + } + } + + const mcpHeaders = this.parseJsonRecordFlag(cliOptions.mcpHeaders, '--mcp-headers', cliOptions.json); + const gatewayCustomParameters = this.parseJsonRecordFlag( + cliOptions.gatewayCustomParameters, + '--gateway-custom-parameters', + cliOptions.json + ); + const result = await this.add({ name: cliOptions.name, modelProvider: provider, modelId, apiFormat: cliOptions.apiFormat as HarnessApiFormat | undefined, apiKeyArn: cliOptions.apiKeyArn, + apiBase: cliOptions.apiBase, + additionalParams, + temperature: cliOptions.temperature, + topP: cliOptions.topP, + topK: cliOptions.topK, + modelMaxTokens: cliOptions.modelMaxTokens, containerUri: containerOption.containerUri, dockerfilePath: containerOption.dockerfilePath, skipMemory: cliOptions.memory === false, + memoryMode: cliOptions.memoryMode as AddHarnessOptions['memoryMode'], + memoryStrategies: cliOptions.memoryStrategies + ?.split(',') + .map(s => s.trim()) + .filter(Boolean), + memoryEventExpiryDays: cliOptions.memoryEventExpiryDays, + memoryEncryptionKeyArn: cliOptions.memoryEncryptionKeyArn, + memoryName: cliOptions.memoryName, + memoryArn: cliOptions.memoryArn, + memoryActorId: cliOptions.memoryActorId, + messagesCount: cliOptions.memoryMessagesCount, + memoryTopK: cliOptions.memoryTopK, + memoryRelevanceScore: cliOptions.memoryRelevanceScore, maxIterations: cliOptions.maxIterations, maxTokens: cliOptions.maxTokens, timeoutSeconds: cliOptions.timeout, - truncationStrategy: cliOptions.truncationStrategy as 'sliding_window' | 'summarization' | undefined, + truncationStrategy: cliOptions.truncationStrategy as + | 'sliding_window' + | 'summarization' + | 'none' + | undefined, networkMode: cliOptions.networkMode as NetworkMode | undefined, subnets: cliOptions.subnets?.split(',').map(s => s.trim()), securityGroups: cliOptions.securityGroups?.split(',').map(s => s.trim()), idleTimeout: cliOptions.idleTimeout, maxLifetime: cliOptions.maxLifetime, sessionStoragePath: cliOptions.sessionStorage, + efsAccessPoints: parseAccessPoints(cliOptions.efsAccessPoint), + s3AccessPoints: parseAccessPoints(cliOptions.s3AccessPoint), + allowedTools: cliOptions.allowedTools + ?.split(',') + .map(s => s.trim()) + .filter(Boolean), + environmentVariables: + cliOptions.env && Object.keys(cliOptions.env).length > 0 ? cliOptions.env : undefined, + tags: cliOptions.tag && Object.keys(cliOptions.tag).length > 0 ? cliOptions.tag : undefined, withInvokeScript: cliOptions.withInvokeScript, systemPrompt: cliOptions.systemPrompt, selectedTools: cliOptions.tools?.split(',').map(s => s.trim()), mcpName: cliOptions.mcpName, mcpUrl: cliOptions.mcpUrl, + mcpHeaders, gatewayArn: cliOptions.gatewayArn, gatewayOutboundAuth: cliOptions.gatewayOutboundAuth as 'awsIam' | 'none' | 'oauth' | undefined, gatewayProviderArn: cliOptions.gatewayProviderArn, gatewayScopes: cliOptions.gatewayScopes?.split(',').map(s => s.trim()), + gatewayGrantType: cliOptions.gatewayGrantType as 'CLIENT_CREDENTIALS' | 'USER_FEDERATION' | undefined, + gatewayCustomParameters, authorizerType: cliOptions.authorizerType as RuntimeAuthorizerType | undefined, jwtConfig: cliOptions.authorizerType === 'CUSTOM_JWT' && cliOptions.discoveryUrl @@ -518,6 +1003,12 @@ export class HarnessPrimitive extends BasePrimitive', 'Name of resource to remove [non-interactive]') + .option('-y, --yes', 'Skip confirmation prompt [non-interactive]') + .option('--json', 'Output as JSON [non-interactive]') + .option( + '--keep', + 'For a preview-build orphan: delete it from AWS but keep it in agentcore.json (it moves to GA; the next deploy recreates it under CloudFormation)' + ) + .option('--discard', 'For a preview-build orphan: delete it from AWS and remove it from agentcore.json') + .action( + async (cliOptions: { name?: string; yes?: boolean; json?: boolean; keep?: boolean; discard?: boolean }) => { + try { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + if (cliOptions.keep && cliOptions.discard) { + const error = '--keep and --discard are mutually exclusive'; + console.log(JSON.stringify({ success: false, error })); + process.exit(1); + } + const orphanAction: OrphanAction | undefined = cliOptions.keep + ? 'keep' + : cliOptions.discard + ? 'discard' + : undefined; + + // Any flag triggers non-interactive CLI mode + if (cliOptions.name || cliOptions.yes || cliOptions.json || orphanAction) { + if (!cliOptions.name) { + console.log(JSON.stringify({ success: false, error: '--name is required' })); + process.exit(1); + } + + const result = await withCommandRunTelemetry, Result>( + `remove.${this.kind}`, + {}, + () => this.remove(cliOptions.name!, { orphanAction }) + ); + // The orphan no-flag refusal made no changes — surface it as a clean human error on + // stderr (with a non-zero exit) rather than a JSON blob, so a user who expected a + // deletion plainly sees that nothing happened and what to do. Scoped to that exact + // case (orphan + no --keep/--discard, non-JSON); every other path keeps the + // machine-readable JSON convention untouched. + if (!result.success && !orphanAction && !cliOptions.json && (await this.isOrphan(cliOptions.name))) { + console.error(`Error: ${result.error.message}`); + process.exit(1); + } + console.log( + JSON.stringify({ + success: result.success, + resourceType: this.kind, + resourceName: cliOptions.name, + message: result.success ? `Removed ${this.label.toLowerCase()} '${cliOptions.name}'` : undefined, + note: result.success ? SOURCE_CODE_NOTE : undefined, + error: !result.success ? result.error.message : undefined, + }) + ); + process.exit(result.success ? 0 : 1); + } else { + // TUI fallback — dynamic imports to avoid pulling ink (async) into registry + requireTTY(); + const [{ render }, { default: React }, { RemoveFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/remove'), + ]); + const { clear, unmount } = render( + React.createElement(RemoveFlow, { + isInteractive: false, + force: cliOptions.yes, + initialResourceType: this.kind, + initialResourceName: cliOptions.name, + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + } + } catch (error) { + if (cliOptions.json) { + console.log(JSON.stringify({ success: false, error: getErrorMessage(error) })); + } else { + console.error(`Error: ${getErrorMessage(error)}`); + } + process.exit(1); + } + } + ); } addScreen(): AddScreenComponent { @@ -597,4 +1194,143 @@ export class HarnessPrimitive extends BasePrimitive | undefined { + if (value === undefined) return undefined; + const fail = (msg: string): never => { + if (json) console.log(JSON.stringify({ success: false, error: msg })); + else console.error(msg); + process.exit(1); + }; + let parsed: unknown; + try { + parsed = JSON.parse(value); + } catch { + return fail(`Invalid ${flag}: not valid JSON`); + } + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + return fail(`Invalid ${flag}: expected a JSON object of string values`); + } + // CFN headers / custom-parameters are string→string maps. Reject non-string values rather than + // String()-coercing them (which silently turns {"X":{...}} into "[object Object]", {"X":[1,2]} + // into "1,2", etc.) — the user almost certainly wants an error, not a garbage header value. + const entries = Object.entries(parsed as Record); + for (const [k, v] of entries) { + if (typeof v !== 'string') { + return fail(`Invalid ${flag}: value for "${k}" must be a string`); + } + } + return Object.fromEntries(entries) as Record; + } + + /** + * Build the PrivateLink `privateEndpoint` (PrivateLink inbound) from CLI flags. Returns the + * self-managed-lattice arm when --private-endpoint-lattice-arn is set, the managed-vpc arm when + * --private-endpoint-vpc-id is set, or undefined when neither. The schema enforces exactly-one-of + * downstream; this just shapes whichever the user provided. + */ + private buildPrivateEndpointFromFlags(options: { + privateEndpointLatticeArn?: string; + privateEndpointVpcId?: string; + privateEndpointSubnets?: string; + privateEndpointIpType?: string; + privateEndpointSecurityGroups?: string; + privateEndpointRoutingDomain?: string; + privateEndpointTags?: string; + }): PrivateEndpoint | undefined { + if (options.privateEndpointLatticeArn) { + return { selfManagedLatticeResource: { resourceConfigurationIdentifier: options.privateEndpointLatticeArn } }; + } + if (options.privateEndpointVpcId) { + return { + managedVpcResource: { + vpcIdentifier: options.privateEndpointVpcId, + subnetIds: options.privateEndpointSubnets?.split(',').map(s => s.trim()) ?? [], + endpointIpAddressType: options.privateEndpointIpType as EndpointIpAddressType, + ...(options.privateEndpointSecurityGroups && { + securityGroupIds: options.privateEndpointSecurityGroups.split(',').map(s => s.trim()), + }), + ...(options.privateEndpointRoutingDomain && { routingDomain: options.privateEndpointRoutingDomain }), + ...(options.privateEndpointTags && { + tags: JSON.parse(options.privateEndpointTags) as Record, + }), + }, + }; + } + return undefined; + } } diff --git a/src/cli/primitives/KnowledgeBasePrimitive.ts b/src/cli/primitives/KnowledgeBasePrimitive.ts new file mode 100644 index 000000000..e73cb8df4 --- /dev/null +++ b/src/cli/primitives/KnowledgeBasePrimitive.ts @@ -0,0 +1,688 @@ +import { APP_DIR, ValidationError, findConfigRoot, serializeResult, toError } from '../../lib'; +import type { Result } from '../../lib/result'; +import type { + AgentCoreGatewayTarget, + AgentCoreProjectSpec, + ConnectorFileDataSource, + DataSource, + KnowledgeBase, +} from '../../schema'; +import { CONNECTOR_ID, KnowledgeBaseSchema } from '../../schema'; +import { getErrorMessage } from '../errors'; +import { isGatedFeaturesEnabled } from '../feature-flags'; +import { upsertAgenticRetrieveTarget } from '../operations/knowledge-base/agentic-retrieve-upsert'; +import { + type DataSourceTypeFlag, + flagToWireType, + isConnectorConfigType, + readConnectorConfig, +} from '../operations/knowledge-base/connector-config'; +import type { RemovalPreview } from '../operations/remove/types'; +import { runCliCommand } from '../telemetry/cli-command-run.js'; +import { requireTTY } from '../tui/guards/tty'; +import { BasePrimitive } from './BasePrimitive'; +import type { AddResult, AddScreenComponent, RemovableResource } from './types'; +import type { Command } from '@commander-js/extra-typings'; +import { copyFileSync, existsSync, mkdirSync } from 'fs'; +import { basename, dirname, join, relative, resolve } from 'path'; + +/** + * Options for adding a knowledge base resource. + * + * `agentcore add knowledge-base` creates the KB and its data sources. The + * repeatable `--source` flag maps to entries in the KB's `dataSources` array. + * Re-invoking `add` with an existing `--name` appends new data sources to the + * existing entry (idempotent append). + * + * When `--gateway` is set, a connector-typed gateway target referencing this + * KB by name is appended to `agentCoreGateways[name=X].targets[]`. + * + * Binding a pre-existing KB (one this project did not create) is done via the + * gateway-target primitive: `agentcore add gateway-target --type connector + * --connector bedrock-knowledge-bases --knowledge-base-id `. That path + * doesn't touch `knowledgeBases[]` at all. + */ +export interface AddKnowledgeBaseOptions { + name: string; + description?: string; + /** Repeatable `--source` flag values (S3 URIs). Required for S3 data sources. */ + source?: string[]; + /** Repeatable `--connector-config` flag values. Required for non-S3 connectors. */ + connectorConfig?: string[]; + /** `--data-source-type` flag (s3 default, or web-crawler/confluence/...). */ + dataSourceType?: DataSourceTypeFlag; + /** Gateway to wire the KB into via a connector target. Optional. */ + gateway?: string; + json?: boolean; +} + +export interface AddKnowledgeBaseSuccess extends Record { + knowledgeBaseName: string; + /** True if this invocation appended data sources to an existing KB; false on first creation. */ + appended: boolean; + /** New data source URIs added by this invocation (matches the order of --source flags). */ + newDataSources: string[]; + /** Gateway the KB was wired to via a connector target, if any. */ + gatewayWired?: string; +} + +export type RemovableKnowledgeBase = RemovableResource; + +/** + * Cheap shape check for early errors at the CLI boundary. The Zod schema + * (`S3DataSourceSchema`) is the canonical validator and runs on every + * `KnowledgeBaseSchema.parse(...)` and on every `writeProjectSpec` — + * keep these regexes in sync if either is edited. + */ +const S3_URI_PATTERN = /^s3:\/\/[^/]+(\/.*)?$/; + +function isS3Uri(uri: string): boolean { + return S3_URI_PATTERN.test(uri); +} + +/** + * Stable identity key for a data source across the discriminated union: S3 + * sources are keyed by their URI, non-S3 connector sources by their config + * file path. Used for dedup and human-readable summaries. + */ +function dataSourceKey(ds: DataSource): string { + return ds.type === 'S3' ? ds.uri : ds.connectorConfigFile; +} + +/** + * KB primitive. Owns the `agentcore.json` `knowledgeBases[]` lifecycle for + * CLI-managed FMKB knowledge bases. Data sources are either S3 (inline + * `--source` URIs) or non-S3 connectors (`--data-source-type` + + * `--connector-config `, e.g. Web Crawler / Confluence / SharePoint / + * OneDrive / Google Drive). Connector configs are materialized under + * `app//` and referenced by project-relative path. + * + * Existing-KB references — i.e. binding a pre-existing KB that this project + * didn't create — are managed by the gateway-target primitive (`add + * gateway-target --type connector`), since the only artifact written for that + * case is a connector gateway target. + */ +export class KnowledgeBasePrimitive extends BasePrimitive { + readonly kind = 'knowledge-base'; + readonly label = 'Knowledge Base'; + readonly primitiveSchema = KnowledgeBaseSchema; + + async add(options: AddKnowledgeBaseOptions): Promise> { + try { + const sources = options.source ?? []; + const connectorConfigs = options.connectorConfig ?? []; + const wireType = flagToWireType(options.dataSourceType ?? 's3'); + const warnings: string[] = []; + + // Phase 1 — pure validation, no side effects (no file copy). Build the + // would-be data sources for the connector path only after validating the + // config files; for S3 just validate the URIs. The actual file copy is + // deferred to phase 3 so a later validation failure (e.g. missing + // gateway) never leaves a stray file behind. + let buildDataSources: () => DataSource[]; + + if (isConnectorConfigType(wireType)) { + if (sources.length > 0) { + throw new Error(`--source is only valid for S3. For ${wireType}, use --connector-config.`); + } + if (connectorConfigs.length === 0) { + throw new Error(`--connector-config is required for --data-source-type ${options.dataSourceType}.`); + } + // Validate every config file up front (existence, JSON, type match, + // secretArn advisory) before any copy happens. Also detect destination + // basename collisions here: two configs in one invocation that resolve + // to the same `app//` would clobber each other on copy + // (and produce identical connectorConfigFile values the schema rejects), + // so reject the second BEFORE any file is written. Exact-source-path + // duplicates are caught by the batch-dedup loop below with its own + // message; here we only guard distinct sources sharing a basename. + const seenSources = new Set(); + const basenameToSource = new Map(); + for (const cfgPath of connectorConfigs) { + const { warnings: w } = readConnectorConfig(cfgPath, wireType); + warnings.push(...w); + + const resolvedSrc = resolve(cfgPath); + if (seenSources.has(resolvedSrc)) { + // Same source twice — let the batch-dedup loop emit its message. + continue; + } + seenSources.add(resolvedSrc); + const base = basename(resolvedSrc); + const prior = basenameToSource.get(base); + if (prior) { + throw new Error( + `Connector config files '${prior}' and '${cfgPath}' would both be stored as 'app/${options.name}/${base}'. Rename one so their filenames differ.` + ); + } + basenameToSource.set(base, cfgPath); + } + buildDataSources = () => + connectorConfigs.map(cfgPath => { + const stored = this.materializeConnectorConfig(options.name, cfgPath); + return { type: wireType, connectorConfigFile: stored } as ConnectorFileDataSource; + }); + } else { + if (connectorConfigs.length > 0) { + throw new Error('--connector-config is only valid for non-S3 data source types.'); + } + if (sources.length === 0) { + throw new Error('At least one --source is required for S3 data sources.'); + } + // Cheap shape check up front so we error before reading agentcore.json. + // The full bucket-name validation lives in S3DataSourceSchema. + for (const uri of sources) { + if (!isS3Uri(uri)) { + throw new Error(`Invalid S3 URI: ${uri}. Expected s3://bucket[/prefix].`); + } + } + buildDataSources = () => sources.map(uri => ({ type: 'S3', uri })); + } + + // Reject duplicates inside this batch up front (S3 by uri, connector by + // file path). The schema's superRefine catches this too at write time, + // but its generic message is less actionable than naming the offender. + const batchKeys = isConnectorConfigType(wireType) + ? connectorConfigs.map(p => relative(dirname(this.configIO.getConfigRoot()), resolve(p)).split('\\').join('/')) + : sources; + const seenInBatch = new Set(); + for (const key of batchKeys) { + if (seenInBatch.has(key)) { + throw new Error(`Duplicate data source in this invocation: ${key}`); + } + seenInBatch.add(key); + } + + const project = await this.readProjectSpec(); + + // Validate gateway exists (no auto-create) BEFORE any file copy. + if (options.gateway) { + const gw = project.agentCoreGateways.find(g => g.name === options.gateway); + if (!gw) { + throw new Error( + `Gateway "${options.gateway}" not found in agentcore.json. Add it first with 'agentcore add gateway --name ${options.gateway}'.` + ); + } + } + + // Phase 3 — all validation passed; now materialize (copy connector + // configs into app//) and build the data sources. + const newDataSources: DataSource[] = buildDataSources(); + + const existing = project.knowledgeBases.find(kb => kb.name === options.name); + if (existing) { + return await this.appendToExisting(existing, project, newDataSources, options, warnings); + } + + const kb: KnowledgeBase = KnowledgeBaseSchema.parse({ + name: options.name, + ...(options.description && { description: options.description }), + dataSources: newDataSources, + ...(options.gateway && { gateway: options.gateway }), + }); + + project.knowledgeBases.push(kb); + + // --gateway: append the connector targets — one Retrieve per KB plus the + // shared gateway-scoped agentic-retrieve target (this KB gets appended + // to its knowledgeBaseIds[]). + if (options.gateway) { + this.appendConnectorTargets(project, options.gateway, kb.name, kb.name); + } + + await this.writeProjectSpec(project); + + if (!options.json) for (const w of warnings) console.warn(w); + + return { + success: true, + knowledgeBaseName: kb.name, + appended: false, + newDataSources: newDataSources.map(dataSourceKey), + ...(options.gateway && { gatewayWired: options.gateway }), + }; + } catch (err) { + return { success: false, error: toError(err) }; + } + } + + /** + * Wires a KB into a gateway by emitting BOTH connector targets: + * 1. A bedrock-knowledge-bases target (single-KB Retrieve), and + * 2. The gateway-scoped bedrock-agentic-retrieve target (orchestrated + * fan-out across every KB on the gateway), creating it on first call + * and appending kbReference to its knowledgeBaseIds[] on subsequent + * calls. There's exactly one agentic target per gateway. + * + * `--description` is intentionally not propagated to either target entry. + * `AgentCoreGatewayTargetSchema` doesn't model a per-target description + * (only the parent gateway has one). + */ + private appendConnectorTargets( + project: Awaited>, + gatewayName: string, + retrieveTargetName: string, + kbReference: string + ): void { + const gateway = project.agentCoreGateways.find(g => g.name === gatewayName); + if (!gateway) { + throw new Error(`Gateway "${gatewayName}" not found in agentcore.json.`); + } + this.upsertRetrieveTarget(gateway, retrieveTargetName, kbReference); + upsertAgenticRetrieveTarget(gateway, kbReference); + } + + /** + * Append a single-KB Retrieve target. Idempotent when the same target + * already exists pointing at the same KB; errors if a different target + * with the same name exists. + */ + private upsertRetrieveTarget( + gateway: AgentCoreProjectSpec['agentCoreGateways'][number], + targetName: string, + knowledgeBaseId: string + ): void { + const existingTarget = gateway.targets.find(t => t.name === targetName); + if (existingTarget) { + const sameKb = existingTarget.knowledgeBaseId === knowledgeBaseId; + const sameType = existingTarget.targetType === 'connector'; + const sameConnector = existingTarget.connectorId === CONNECTOR_ID.BEDROCK_KNOWLEDGE_BASES; + if (sameType && sameConnector && sameKb) { + return; + } + throw new Error(`Gateway "${gateway.name}" already has a target named "${targetName}". Pick a different --name.`); + } + const target: AgentCoreGatewayTarget = { + name: targetName, + targetType: 'connector', + connectorId: CONNECTOR_ID.BEDROCK_KNOWLEDGE_BASES, + knowledgeBaseId, + } as AgentCoreGatewayTarget; + gateway.targets.push(target); + } + + /** + * Append data sources to an existing KB entry. Errors loudly on conflicting + * intent (e.g. trying to update description, or duplicate URI). + */ + private async appendToExisting( + existing: KnowledgeBase, + project: Awaited>, + newDataSources: DataSource[], + options: AddKnowledgeBaseOptions, + warnings: string[] = [] + ): Promise> { + // Treat '' and undefined as "no description provided", so a user appending + // a data source without re-passing --description doesn't trip the + // update-not-supported guard. + const descChanged = + options.description !== undefined && options.description !== '' && options.description !== existing.description; + if (descChanged) { + throw new Error( + `Knowledge base "${options.name}" already exists. Update operations are not supported in Wave 1; edit agentcore.json directly to change the description.` + ); + } + if (options.gateway !== undefined && options.gateway !== existing.gateway) { + throw new Error( + `Knowledge base "${options.name}" already exists with a different gateway setting. Update operations are not supported in Wave 1.` + ); + } + + const existingKeys = new Set(existing.dataSources.map(dataSourceKey)); + for (const ds of newDataSources) { + const key = dataSourceKey(ds); + if (existingKeys.has(key)) { + throw new Error(`Data source "${key}" already exists on knowledge-base "${options.name}".`); + } + } + + existing.dataSources.push(...newDataSources); + + // If the KB already has a gateway set and the connector target hasn't been + // appended yet (e.g. it was added before Wave 2 went live), append it now. + if (existing.gateway) { + this.appendConnectorTargets(project, existing.gateway, existing.name, existing.name); + } + + await this.writeProjectSpec(project); + + if (!options.json) for (const w of warnings) console.warn(w); + + return { + success: true, + knowledgeBaseName: existing.name, + appended: true, + newDataSources: newDataSources.map(dataSourceKey), + ...(existing.gateway && { gatewayWired: existing.gateway }), + }; + } + + /** + * Ensure the connector-config file lives under `app//` and return + * its project-root-relative path. If the user's path already points inside + * that folder, return it as-is; otherwise copy it in (announced to the user). + */ + private materializeConnectorConfig(kbName: string, cfgPath: string): string { + const projectRoot = dirname(this.configIO.getConfigRoot()); + const src = resolve(cfgPath); + if (!existsSync(src)) { + throw new Error(`Connector config file not found: ${cfgPath}`); + } + const destDir = join(projectRoot, APP_DIR, kbName); + const dest = join(destDir, basename(src)); + const relToProject = (p: string) => relative(projectRoot, p).split('\\').join('/'); + + if (resolve(src) === resolve(dest)) { + return relToProject(dest); + } + // Defense-in-depth: never silently overwrite a different file already at + // the destination (e.g. a prior data source on this KB whose config shares + // this basename). The in-place case above already returned, so reaching + // here with an existing dest means src !== dest. + if (existsSync(dest)) { + throw new Error( + `Connector config '${cfgPath}' would overwrite the existing file at '${relToProject(dest)}'. Rename it so its filename differs.` + ); + } + mkdirSync(destDir, { recursive: true }); + copyFileSync(src, dest); + console.error(`Copied connector config to ${relToProject(dest)}`); + return relToProject(dest); + } + + async remove(name: string): Promise { + try { + const project = await this.readProjectSpec(); + + // Find the KB entry. Cascade-remove the per-KB Retrieve target on the + // linked gateway, if any. + const idx = project.knowledgeBases.findIndex(kb => kb.name === name); + if (idx === -1) { + throw new Error(`Knowledge base "${name}" not found.`); + } + const kb = project.knowledgeBases[idx]!; + project.knowledgeBases.splice(idx, 1); + if (kb.gateway) { + this.removeConnectorTarget(project, kb.gateway, kb.name); + } + + // Cascade-prune the removed KB out of every gateway's agentic-retrieve + // target. Without this, the spec would be unwriteable: the cross-spec + // validator rejects an agentic target with a knowledgeBaseIds[] entry + // that doesn't match a knowledgeBases[] name and isn't a literal KB id. + // We keep the no-update rule for renames; remove is the one shape where + // doing nothing leaves the spec in a state the schema won't write. + this.pruneAgenticRetrieveReferences(project, name); + + await this.writeProjectSpec(project); + return { success: true }; + } catch (err) { + return { success: false, error: toError(err) }; + } + } + + async previewRemove(name: string): Promise { + const project = await this.readProjectSpec(); + + const kb = project.knowledgeBases.find(k => k.name === name); + if (!kb) { + throw new Error(`Knowledge base "${name}" not found.`); + } + const summary: string[] = [ + `Removing knowledge base: ${name}`, + ` Data sources (${kb.dataSources.length}):`, + ...kb.dataSources.map(ds => ` - ${dataSourceKey(ds)}`), + ]; + if (kb.gateway) { + summary.push(` Gateway target: '${name}' on '${kb.gateway}' will be removed`); + } + + const afterSpec = JSON.parse(JSON.stringify(project)) as typeof project; + afterSpec.knowledgeBases = afterSpec.knowledgeBases.filter(k => k.name !== name); + if (kb.gateway) { + const gw = afterSpec.agentCoreGateways.find(g => g.name === kb.gateway); + if (gw) gw.targets = gw.targets.filter(t => t.name !== name); + } + const pruneActions = this.pruneAgenticRetrieveReferences(afterSpec, name); + for (const action of pruneActions) { + if (action.removedTarget) { + summary.push( + ` Gateway "${action.gatewayName}" agentic-retrieve target '${action.targetName}' will be removed (was the last KB)` + ); + } else { + summary.push( + ` Gateway "${action.gatewayName}" agentic-retrieve target '${action.targetName}' will lose KB '${name}'` + ); + } + } + + return { + summary, + directoriesToDelete: [], + schemaChanges: [{ file: 'agentcore/agentcore.json', before: project, after: afterSpec }], + }; + } + + /** + * Walk every gateway's agentic-retrieve target and drop kbReference from + * its knowledgeBaseIds[]. If the array empties out, remove the agentic + * target itself — schema requires non-empty knowledgeBaseIds[]. Returns + * a list of actions for callers that want to surface what changed. + */ + private pruneAgenticRetrieveReferences( + project: AgentCoreProjectSpec, + kbReference: string + ): { gatewayName: string; targetName: string; removedTarget: boolean }[] { + const actions: { gatewayName: string; targetName: string; removedTarget: boolean }[] = []; + for (const gw of project.agentCoreGateways) { + const agenticIdx = gw.targets.findIndex( + t => t.targetType === 'connector' && t.connectorId === CONNECTOR_ID.BEDROCK_AGENTIC_RETRIEVE + ); + if (agenticIdx === -1) continue; + const agentic = gw.targets[agenticIdx]!; + const ids = agentic.knowledgeBaseIds ?? []; + if (!ids.includes(kbReference)) continue; + const remaining = ids.filter(id => id !== kbReference); + if (remaining.length === 0) { + gw.targets.splice(agenticIdx, 1); + actions.push({ gatewayName: gw.name, targetName: agentic.name, removedTarget: true }); + } else { + agentic.knowledgeBaseIds = remaining; + actions.push({ gatewayName: gw.name, targetName: agentic.name, removedTarget: false }); + } + } + return actions; + } + + async getRemovable(): Promise { + try { + const project = await this.readProjectSpec(); + return project.knowledgeBases.map(kb => ({ name: kb.name })); + } catch { + return []; + } + } + + /** + * Remove a connector-typed gateway target by name. No-op if the target or + * gateway is missing — that's fine because we may be cascading from a KB + * whose gateway link was unwired manually. + */ + private removeConnectorTarget( + project: Awaited>, + gatewayName: string, + targetName: string + ): void { + const gateway = project.agentCoreGateways.find(g => g.name === gatewayName); + if (!gateway) return; + gateway.targets = gateway.targets.filter(t => t.name !== targetName); + } + + registerCommands(addCmd: Command, removeCmd: Command): void { + addCmd + .command(this.kind, { hidden: !isGatedFeaturesEnabled() }) + .description('Add a knowledge base (FMKB) to the project, optionally wiring it to a gateway.') + .option('--name ', 'Knowledge base name (1-48 chars, starts with letter)') + .option('--description ', 'Optional description (used for tool discovery)') + .option( + '--source ', + 'S3 URI for a data source (s3://bucket[/prefix]). Repeatable for multiple data sources.', + (val: string, acc: string[]) => [...acc, val], + [] as string[] + ) + .option( + '--data-source-type ', + 'Data source type: s3 (default), web-crawler, confluence, sharepoint, onedrive, google-drive', + 's3' + ) + .option( + '--connector-config ', + 'Path to a JSON connector-config file (required for non-S3 types). Repeatable.', + (val: string, acc: string[]) => [...acc, val], + [] as string[] + ) + .option('--gateway ', 'Gateway to attach the KB to as a connector target.') + .option('--json', 'Output as JSON [non-interactive]') + .action( + async (cliOptions: { + name?: string; + description?: string; + source?: string[]; + dataSourceType?: string; + connectorConfig?: string[]; + gateway?: string; + json?: boolean; + }) => { + if (!isGatedFeaturesEnabled()) { + console.error('Knowledge bases are not yet available.'); + process.exit(1); + } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + // No-args (or only --json) → drop into the Add Knowledge Base TUI + // wizard so the surface matches `agentcore add agent` / + // `add memory`. --data-source-type defaults to 's3' from + // Commander, so it's always populated; check the user-supplied + // flags only. + const userPassedAnyFlag = + !!cliOptions.name || + !!cliOptions.description || + (cliOptions.source?.length ?? 0) > 0 || + (cliOptions.connectorConfig?.length ?? 0) > 0 || + !!cliOptions.gateway || + !!cliOptions.json; + if (!userPassedAnyFlag) { + try { + requireTTY(); + const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/add/AddFlow'), + ]); + const { clear, unmount } = render( + React.createElement(AddFlow, { + isInteractive: false, + initialResource: 'knowledge-base', + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + return; + } catch (error) { + console.error(getErrorMessage(error)); + process.exit(1); + } + } + + await runCliCommand('add.knowledge-base', !!cliOptions.json, async () => { + if (!cliOptions.name) { + throw new ValidationError('A --name is required for `agentcore add knowledge-base`.'); + } + + const result = await this.add({ + name: cliOptions.name, + description: cliOptions.description, + source: cliOptions.source, + dataSourceType: cliOptions.dataSourceType as DataSourceTypeFlag | undefined, + connectorConfig: cliOptions.connectorConfig, + gateway: cliOptions.gateway, + json: cliOptions.json, + }); + + if (!result.success) { + throw result.error; + } + + if (cliOptions.json) { + console.log(JSON.stringify(serializeResult(result))); + } else if (result.appended) { + for (const uri of result.newDataSources) { + console.log(`Added data source '${uri}' to knowledge-base '${result.knowledgeBaseName}'`); + } + if (result.gatewayWired) { + console.log(` (gateway '${result.gatewayWired}' connector target ensured)`); + } + } else { + console.log(`Added knowledge base '${result.knowledgeBaseName}'`); + for (const uri of result.newDataSources) { + console.log(` with data source '${uri}'`); + } + if (result.gatewayWired) { + console.log(` wired to gateway '${result.gatewayWired}' as connector target`); + } + } + + return { + data_source_count: result.newDataSources.length, + data_source_type: cliOptions.dataSourceType ?? 's3', + has_description: !!cliOptions.description, + has_gateway: !!cliOptions.gateway, + is_append: result.appended, + }; + }); + } + ); + + removeCmd + .command(this.kind, { hidden: !isGatedFeaturesEnabled() }) + .description('Remove a knowledge base from the project') + .option('--name ', 'Knowledge base name') + .option('--json', 'Output as JSON [non-interactive]') + .action(async (cliOptions: { name?: string; json?: boolean }) => { + if (!isGatedFeaturesEnabled()) { + console.error('Knowledge bases are not yet available.'); + process.exit(1); + } + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + await runCliCommand('remove.knowledge-base', !!cliOptions.json, async () => { + if (!cliOptions.name) { + throw new ValidationError('A --name is required for `agentcore remove knowledge-base`.'); + } + const result = await this.remove(cliOptions.name); + if (!result.success) { + throw result.error; + } + if (cliOptions.json) { + console.log(JSON.stringify(serializeResult(result))); + } else { + console.log(`Removed knowledge base '${cliOptions.name}'`); + } + return {}; + }); + }); + } + + addScreen(): AddScreenComponent { + // Wave 1: CLI-only. TUI lands in Plan C. + return null; + } +} diff --git a/src/cli/primitives/OnlineEvalConfigPrimitive.ts b/src/cli/primitives/OnlineEvalConfigPrimitive.ts index 1624ab790..4886ce46a 100644 --- a/src/cli/primitives/OnlineEvalConfigPrimitive.ts +++ b/src/cli/primitives/OnlineEvalConfigPrimitive.ts @@ -17,6 +17,8 @@ export interface AddOnlineEvalConfigOptions { samplingRate: number; enableOnCreate?: boolean; endpoint?: string; + logGroupNames?: string[]; + serviceNames?: string[]; } export type RemovableOnlineEvalConfig = RemovableResource; @@ -65,10 +67,13 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive 0) { + summary.push(`Uses evaluators: ${config.evaluators.join(', ')}`); + } + if (config.insights && config.insights.length > 0) { + summary.push(`Uses insights: ${config.insights.join(', ')}`); + } const schemaChanges: SchemaChange[] = []; const afterSpec = { @@ -113,6 +118,11 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive', 'Evaluator ARN(s) [non-interactive]') .option('--sampling-rate ', 'Sampling percentage (0.01-100) [non-interactive]') .option('--endpoint ', 'Runtime endpoint name to scope monitoring [non-interactive]') + .option( + '--log-group-name ', + 'CloudWatch log group name(s) for custom data sources (1-5) [non-interactive]' + ) + .option('--service-name ', 'Service name(s) to filter traces for custom data sources [non-interactive]') .option('--enable-on-create', 'Enable evaluation immediately after deploy [non-interactive]') .option('--json', 'Output as JSON [non-interactive]') .action( @@ -123,6 +133,8 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive { @@ -136,12 +148,33 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive { - if (!cliOptions.name || !cliOptions.runtime || allEvaluators.length === 0 || !cliOptions.samplingRate) { + // Mutual exclusivity: --runtime and --log-group-name cannot be used together + if (cliOptions.runtime && cliOptions.logGroupName) { throw new Error( - '--name, --runtime, --evaluator (and/or --evaluator-arn), and --sampling-rate are all required in non-interactive mode' + 'Error: --runtime and --log-group-name are mutually exclusive. Use --runtime (+ optional --endpoint) for AgentCore agents, or --log-group-name/--service-name for custom data sources.' ); } + // Validate required fields based on source mode + if (cliOptions.logGroupName) { + // Custom data source mode + if (!cliOptions.name || allEvaluators.length === 0 || !cliOptions.samplingRate) { + throw new Error( + '--name, --log-group-name, --evaluator (and/or --evaluator-arn), and --sampling-rate are all required in non-interactive mode' + ); + } + if (cliOptions.logGroupName.length > 5) { + throw new Error('--log-group-name accepts at most 5 log group names'); + } + } else { + // AgentCore runtime mode + if (!cliOptions.name || !cliOptions.runtime || allEvaluators.length === 0 || !cliOptions.samplingRate) { + throw new Error( + '--name, --runtime, --evaluator (and/or --evaluator-arn), and --sampling-rate are all required in non-interactive mode' + ); + } + } + // Sampling rate as a percentage of requests to evaluate (0.01% to 100%) const samplingRate = parseFloat(cliOptions.samplingRate); if (isNaN(samplingRate) || samplingRate < 0.01 || samplingRate > 100) { @@ -152,11 +185,13 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive r.name === options.agent); if (!runtime) { throw new Error(`Runtime "${options.agent}" not found in project.`); @@ -231,11 +266,13 @@ export class OnlineEvalConfigPrimitive extends BasePrimitive { + readonly kind = 'online-insights' as const; + readonly label = 'Online Insights Config'; + override readonly article = 'an'; + readonly primitiveSchema = OnlineEvalConfigSchema; + + async add(options: AddOnlineInsightsOptions): Promise> { + try { + const config = await this.createOnlineInsightsConfig(options); + return { success: true, configName: config.name }; + } catch (err) { + return { success: false, error: toError(err) }; + } + } + + async remove(configName: string): Promise { + try { + const project = await this.readProjectSpec(); + + const index = project.onlineEvalConfigs.findIndex( + c => c.name === configName && c.insights && c.insights.length > 0 + ); + if (index === -1) { + return { + success: false, + error: new ResourceNotFoundError(`Online insights config "${configName}" not found.`), + }; + } + + project.onlineEvalConfigs.splice(index, 1); + await this.writeProjectSpec(project); + + return { success: true }; + } catch (err) { + return { success: false, error: toError(err) }; + } + } + + async previewRemove(configName: string): Promise { + const project = await this.readProjectSpec(); + + const config = project.onlineEvalConfigs.find(c => c.name === configName && c.insights && c.insights.length > 0); + if (!config) { + throw new Error(`Online insights config "${configName}" not found.`); + } + + const summary: string[] = [ + `Removing online insights config: ${configName}`, + `Uses insights: ${(config.insights ?? []).join(', ')}`, + ]; + const schemaChanges: SchemaChange[] = []; + + const afterSpec = { + ...project, + onlineEvalConfigs: project.onlineEvalConfigs.filter(c => c.name !== configName), + }; + + schemaChanges.push({ + file: 'agentcore/agentcore.json', + before: project, + after: afterSpec, + }); + + return { summary, directoriesToDelete: [], schemaChanges }; + } + + async getRemovable(): Promise { + try { + const project = await this.readProjectSpec(); + return project.onlineEvalConfigs.filter(c => c.insights && c.insights.length > 0).map(c => ({ name: c.name })); + } catch { + return []; + } + } + + async getAllNames(): Promise { + try { + const project = await this.readProjectSpec(); + return project.onlineEvalConfigs.filter(c => c.insights && c.insights.length > 0).map(c => c.name); + } catch { + return []; + } + } + + registerCommands(addCmd: Command, removeCmd: Command): void { + addCmd + .command('online-insights') + .description('Add an online insights config to the project') + .option('--name ', 'Config name [non-interactive]') + .option('-r, --runtime ', 'Runtime to monitor [non-interactive]') + .option('--insights ', 'Insight IDs (e.g. Builtin.Insight.FailureAnalysis) [non-interactive]') + .option('--sampling-rate ', 'Sampling percentage (0.01-100) [non-interactive]') + .option( + '--clustering-frequency ', + 'Clustering frequencies: DAILY, WEEKLY, MONTHLY [non-interactive]' + ) + .option('--endpoint ', 'Runtime endpoint name to scope monitoring [non-interactive]') + .option('--enable-on-create', 'Enable insights immediately after deploy [non-interactive]') + .option('--json', 'Output as JSON [non-interactive]') + .action( + async (cliOptions: { + name?: string; + runtime?: string; + insights?: string[]; + samplingRate?: string; + clusteringFrequency?: string[]; + endpoint?: string; + enableOnCreate?: boolean; + json?: boolean; + }) => { + if (!findConfigRoot()) { + console.error('No agentcore project found. Run `agentcore create` first.'); + process.exit(1); + } + + if (cliOptions.name || cliOptions.json) { + await runCliCommand('add.online-insights', !!cliOptions.json, async () => { + if (!cliOptions.name || !cliOptions.runtime || !cliOptions.insights?.length || !cliOptions.samplingRate) { + throw new Error( + '--name, --runtime, --insights, and --sampling-rate are all required in non-interactive mode' + ); + } + + const samplingRate = parseFloat(cliOptions.samplingRate); + if (isNaN(samplingRate) || samplingRate < 0.01 || samplingRate > 100) { + throw new Error( + `Invalid --sampling-rate "${cliOptions.samplingRate}". Must be a percentage between 0.01 and 100` + ); + } + + const result = await this.add({ + name: cliOptions.name, + agent: cliOptions.runtime, + insights: cliOptions.insights, + samplingRate, + clusteringFrequencies: cliOptions.clusteringFrequency, + enableOnCreate: cliOptions.enableOnCreate, + endpoint: cliOptions.endpoint, + }); + + if (!result.success) { + throw result.error; + } + + if (cliOptions.json) { + console.log(JSON.stringify(serializeResult(result))); + } else { + console.log(`Added online insights config '${result.configName}'`); + } + + return { + insights_count: cliOptions.insights.length, + enable_on_create: cliOptions.enableOnCreate ?? false, + }; + }); + } else { + try { + requireTTY(); + const [{ render }, { default: React }, { AddFlow }] = await Promise.all([ + import('ink'), + import('react'), + import('../tui/screens/add/AddFlow'), + ]); + const { clear, unmount } = render( + React.createElement(AddFlow, { + isInteractive: false, + initialResource: 'online-insights', + onExit: () => { + clear(); + unmount(); + process.exit(0); + }, + }) + ); + } catch (error) { + console.error(getErrorMessage(error)); + process.exit(1); + } + } + } + ); + + this.registerRemoveSubcommand(removeCmd); + } + + addScreen(): AddScreenComponent { + return null; + } + + private async createOnlineInsightsConfig(options: AddOnlineInsightsOptions): Promise { + const project = await this.readProjectSpec(); + + this.checkDuplicate(project.onlineEvalConfigs, options.name, 'Online insights config'); + + validateInsightIds(options.insights); + + // Validate that the endpoint exists on the specified runtime if provided + if (options.endpoint) { + const runtime = project.runtimes.find(r => r.name === options.agent); + if (!runtime) { + throw new Error(`Runtime "${options.agent}" not found in project.`); + } + if (!runtime.endpoints?.[options.endpoint]) { + throw new Error( + `Endpoint "${options.endpoint}" not found on runtime "${options.agent}". Available endpoints: ${ + runtime.endpoints ? Object.keys(runtime.endpoints).join(', ') : '(none)' + }` + ); + } + } + + const config: OnlineEvalConfig = { + name: options.name, + agent: options.agent, + insights: options.insights, + samplingRate: options.samplingRate, + ...(options.clusteringFrequencies?.length && { + clusteringConfig: { + frequencies: options.clusteringFrequencies as ('DAILY' | 'WEEKLY' | 'MONTHLY')[], + }, + }), + ...(options.enableOnCreate !== undefined && { enableOnCreate: options.enableOnCreate }), + ...(options.endpoint && { endpoint: options.endpoint }), + }; + + project.onlineEvalConfigs.push(config); + await this.writeProjectSpec(project); + + return config; + } +} + +/** Validates that each insight ID starts with `Builtin.Insight.` or is a full ARN. */ +export function validateInsightIds(insights: string[]): void { + for (const insight of insights) { + if (!insight.startsWith('Builtin.Insight.') && !insight.startsWith('arn:')) { + throw new ValidationError( + `Invalid insight "${insight}". Must be a Builtin.Insight.* identifier (e.g. Builtin.Insight.FailureAnalysis) or a full ARN.` + ); + } + } +} diff --git a/src/cli/primitives/PaymentConnectorPrimitive.ts b/src/cli/primitives/PaymentConnectorPrimitive.ts index 9254b0c32..4fa90a68a 100644 --- a/src/cli/primitives/PaymentConnectorPrimitive.ts +++ b/src/cli/primitives/PaymentConnectorPrimitive.ts @@ -314,7 +314,7 @@ export class PaymentConnectorPrimitive extends BasePrimitive', 'Payment manager name [non-interactive]') .option('--name ', 'Payment connector name [non-interactive]') .option('--provider ', 'Payment provider: CoinbaseCDP, StripePrivy [non-interactive]') @@ -522,7 +522,7 @@ export class PaymentConnectorPrimitive extends BasePrimitive', 'Name of connector to remove [non-interactive]') .option('--manager ', 'Payment manager name [non-interactive]') .option('-y, --yes', 'Skip confirmation prompt [non-interactive]') diff --git a/src/cli/primitives/PaymentManagerPrimitive.ts b/src/cli/primitives/PaymentManagerPrimitive.ts index c3bfe6bbc..90fe3ef9e 100644 --- a/src/cli/primitives/PaymentManagerPrimitive.ts +++ b/src/cli/primitives/PaymentManagerPrimitive.ts @@ -322,7 +322,7 @@ export class PaymentManagerPrimitive extends BasePrimitive', 'Payment manager name [non-interactive]') .option('--authorizer-type ', 'Authorizer type: AWS_IAM or CUSTOM_JWT (default: AWS_IAM) [non-interactive]') .option('--discovery-url ', 'OIDC discovery URL (required for CUSTOM_JWT) [non-interactive]') @@ -494,7 +494,7 @@ export class PaymentManagerPrimitive extends BasePrimitive', 'Name of resource to remove [non-interactive]') .option('-y, --yes', 'Skip confirmation prompt [non-interactive]') .option('--json', 'Output as JSON [non-interactive]') diff --git a/src/cli/primitives/PolicyEnginePrimitive.ts b/src/cli/primitives/PolicyEnginePrimitive.ts index 44b4b5a3f..bb4cdf306 100644 --- a/src/cli/primitives/PolicyEnginePrimitive.ts +++ b/src/cli/primitives/PolicyEnginePrimitive.ts @@ -135,11 +135,14 @@ export class PolicyEnginePrimitive extends BasePrimitive { try { const project = await this.readProjectSpec(); - return project.agentCoreGateways.filter(gw => !gw.policyEngineConfiguration).map(gw => gw.name); + return project.agentCoreGateways + .filter(gw => gw.protocolType === 'None' && !gw.policyEngineConfiguration) + .map(gw => gw.name); } catch { return []; } @@ -181,15 +184,22 @@ export class PolicyEnginePrimitive extends BasePrimitive> { try { - const deployedState = await this.configIO.readDeployedState(); + const [deployedState, project] = await Promise.all([this.configIO.readDeployedState(), this.readProjectSpec()]); + const mcpGatewayNames = new Set( + project.agentCoreGateways.filter(gw => gw.protocolType !== 'None').map(gw => gw.name) + ); const result: Record = {}; for (const target of Object.values(deployedState.targets)) { - const gateways = target.resources?.mcp?.gateways; + const gateways = target.resources?.mcp?.gateways ?? target.resources?.gateways; if (gateways) { for (const [name, gw] of Object.entries(gateways)) { - if (gw?.gatewayArn) { + if (gw?.gatewayArn && !mcpGatewayNames.has(name)) { result[name] = gw.gatewayArn; } } @@ -201,6 +211,26 @@ export class PolicyEnginePrimitive extends BasePrimitive { + try { + const project = await this.readProjectSpec(); + return project.agentCoreGateways + .filter(gw => gw.protocolType === 'None') + .map(gw => ({ + name: gw.name, + httpTargets: (gw.targets || []) + .filter((t: { targetType?: string }) => t.targetType === 'httpRuntime') + .map((t: { name: string }) => t.name), + })); + } catch { + return []; + } + } + registerCommands(addCmd: Command, removeCmd: Command): void { addCmd .command('policy-engine') diff --git a/src/cli/primitives/PolicyPrimitive.ts b/src/cli/primitives/PolicyPrimitive.ts index 4cce4df34..f28bf2d7c 100644 --- a/src/cli/primitives/PolicyPrimitive.ts +++ b/src/cli/primitives/PolicyPrimitive.ts @@ -1,17 +1,20 @@ import { ResourceNotFoundError, ValidationError, findConfigRoot, serializeResult, toError } from '../../lib'; import type { Result } from '../../lib/result'; import type { Policy } from '../../schema'; -import { PolicySchema, ValidationModeSchema } from '../../schema'; +import { EnforcementModeSchema, PolicySchema, ValidationModeSchema } from '../../schema'; import { detectRegion } from '../aws'; import { getPolicyGeneration, startPolicyGeneration } from '../aws/policy-generation'; import { getErrorMessage } from '../errors'; +import { isGatedFeaturesEnabled } from '../feature-flags'; import type { RemovalPreview, SchemaChange } from '../operations/remove/types'; import { runCliCommand, withCommandRunTelemetry } from '../telemetry/cli-command-run.js'; import { PolicyValidationMode, standardize } from '../telemetry/schemas/common-shapes.js'; import { requireTTY } from '../tui/guards/tty'; +import { type PolicyEffect, authorizationPhaseForEffect, defaultDataPathForEffect } from '../tui/screens/policy/types'; import { BasePrimitive } from './BasePrimitive'; import { SOURCE_CODE_NOTE } from './constants'; import type { AddResult, AddScreenComponent, RemovableResource } from './types'; +import { Option } from '@commander-js/extra-typings'; import type { Command } from '@commander-js/extra-typings'; import { existsSync, readFileSync } from 'fs'; @@ -24,6 +27,8 @@ export interface AddPolicyOptions { generate?: string; gateway?: string; validationMode?: 'FAIL_ON_ANY_FINDINGS' | 'IGNORE_ALL_FINDINGS'; + enforcementMode?: 'ACTIVE' | 'LOG_ONLY'; + authorizationPhase?: 'INITIATE' | 'RETURN_OUTPUT'; } export interface RemovablePolicyResource extends RemovableResource { @@ -142,6 +147,8 @@ export class PolicyPrimitive extends BasePrimitive(option: T): T => (isGatedFeaturesEnabled() ? option : option.hideHelp()); + addCmd .command('policy') .description('Add a policy to a policy engine') .option('--name ', 'Policy name [non-interactive]') .option('--engine ', 'Policy engine name [non-interactive]') .option('--description ', 'Policy description [non-interactive]') - .option('--source ', 'Path to a Cedar policy file [non-interactive]') - .option('--statement ', 'Cedar policy statement [non-interactive]') - .option('-g, --generate ', 'Generate Cedar policy from natural language description [non-interactive]') + .option('--source ', 'Path to a policy file [non-interactive]') + .option('--statement ', 'Policy statement [non-interactive]') + .option('-g, --generate ', 'Generate policy from natural language description [non-interactive]') .option('--gateway ', 'Deployed gateway name for policy generation [non-interactive]') + // Guardrail form flags are gated behind ENABLE_GATED_FEATURES: hidden from help when off. + .addOption(gate(new Option('--target ', 'Gateway target name for Cedar action scope [non-interactive]'))) + .addOption( + gate( + new Option( + '--form-category ', + 'Guardrail category: contentFilter, promptAttack, or sensitiveInformation [non-interactive]' + ) + ) + ) + .addOption( + gate(new Option('--form-filters ', 'Comma-separated filters for the chosen category [non-interactive]')) + ) + .addOption( + gate( + new Option( + '--form-effect ', + 'Policy effect: forbid, permit, or suppressOutput (default: forbid) [non-interactive]' + ) + ) + ) + .addOption( + gate( + new Option( + '--form-data-path ', + 'Data path to evaluate, e.g. context.input.prompt (default: context.input.prompt) [non-interactive]' + ) + ) + ) .option( '--validation-mode ', 'Validation mode: FAIL_ON_ANY_FINDINGS or IGNORE_ALL_FINDINGS [non-interactive]' ) + .option( + '--enforcement-mode ', + 'Enforcement mode: ACTIVE (enforce decisions) or LOG_ONLY (shadow mode) (default: ACTIVE) [non-interactive]' + ) .option('--json', 'Output as JSON [non-interactive]') .action( async (cliOptions: { @@ -304,7 +347,13 @@ export class PolicyPrimitive extends BasePrimitive { if (!findConfigRoot()) { @@ -318,6 +367,7 @@ export class PolicyPrimitive extends BasePrimitive { @@ -328,7 +378,28 @@ export class PolicyPrimitive extends BasePrimitive 1) { + throw new Error('Only one of --statement, --source, --generate, or --form-* can be provided.'); + } + + if (cliOptions.enforcementMode && !EnforcementModeSchema.safeParse(cliOptions.enforcementMode).success) { + throw new Error('Invalid --enforcement-mode. Use ACTIVE or LOG_ONLY.'); + } + + // Handle form mode: synthesize Cedar from category/filters/thresholds + let effectiveOptions: AddPolicyOptions = { name: cliOptions.name, engine: cliOptions.engine, description: cliOptions.description, @@ -339,7 +410,83 @@ export class PolicyPrimitive extends BasePrimitive s.trim()); + + let resolvedGatewayArn: string | undefined; + let resolvedTargetName: string | undefined = cliOptions.target; + if (cliOptions.gateway) { + try { + const deployedState = await this.configIO.readDeployedState(); + for (const target of Object.values(deployedState.targets)) { + const gateways = target.resources?.mcp?.gateways ?? target.resources?.gateways; + if (gateways) { + const gw = gateways[cliOptions.gateway]; + if (gw?.gatewayArn) { + resolvedGatewayArn = gw.gatewayArn; + if (!resolvedTargetName) { + const gwTargets = gw.targets; + if (gwTargets) { + const targetNames = Object.keys(gwTargets); + if (targetNames.length === 1) { + resolvedTargetName = targetNames[0]; + } else if (targetNames.length > 1) { + throw new ValidationError( + `Multiple targets found on gateway "${cliOptions.gateway}": ${targetNames.join(', ')}. Use --target to specify one.` + ); + } + } + } + break; + } + } + } + } catch (e) { + if (e instanceof ValidationError) throw e; // intentional validation — re-raise + // deployed state not available — ARN will be omitted + } + } + + const { synthesizeCedar } = await import('../tui/screens/policy/synthesize-cedar'); + + const statement = synthesizeCedar( + { + category: cliOptions.formCategory as 'contentFilter' | 'promptAttack' | 'sensitiveInformation', + filters, + effect: policyEffect, + dataPath: cliOptions.formDataPath ?? defaultDataPathForEffect(policyEffect), + }, + { targetName: resolvedTargetName, gatewayArn: resolvedGatewayArn } + ); + // Output-phase effects (suppressOutput) must register on RETURN_OUTPUT. + effectiveOptions = { + ...effectiveOptions, + statement, + authorizationPhase: authorizationPhaseForEffect(policyEffect), + }; + } + + const result = await this.add(effectiveOptions); if (!result.success) { throw result.error; @@ -351,11 +498,13 @@ export class PolicyPrimitive extends BasePrimitive ({ - ConfigIO: class { - readProjectSpec = mockReadProjectSpec; - writeProjectSpec = mockWriteProjectSpec; - }, - findConfigRoot: () => '/fake/root', - toError: (err: unknown) => (err instanceof Error ? err : new Error(String(err))), - serializeResult: (r: unknown) => r, - ResourceNotFoundError: class extends Error { - constructor(m: string) { - super(m); - this.name = 'ResourceNotFoundError'; - } - }, -})); - -function makeProject(abTests: { name: string; gatewayRef?: string }[] = []) { - return { - name: 'TestProject', - version: 1, - managedBy: 'CDK' as const, - runtimes: [], - memories: [], - credentials: [], - evaluators: [], - onlineEvalConfigs: [], - agentCoreGateways: [], - policyEngines: [], - configBundles: [], - abTests, - httpGateways: [] as { name: string; runtimeRef: string }[], - }; -} - -const validOptions: AddABTestOptions = { - name: 'MyTest', - agent: 'my-agent', - controlBundle: 'arn:bundle:control', - controlVersion: 'v1', - treatmentBundle: 'arn:bundle:treatment', - treatmentVersion: 'v1', - controlWeight: 80, - treatmentWeight: 20, - onlineEval: 'arn:eval:config', -}; - -let primitive: ABTestPrimitive; - -describe('ABTestPrimitive', () => { - beforeEach(() => { - vi.clearAllMocks(); - primitive = new ABTestPrimitive(); - }); - - it('has correct kind, label, and article', () => { - expect(primitive.kind).toBe('ab-test'); - expect(primitive.label).toBe('AB Test'); - // eslint-disable-next-line @typescript-eslint/dot-notation - expect(primitive['article']).toBe('an'); - }); - - describe('add', () => { - it('adds AB test to project spec and returns success', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject()); - mockWriteProjectSpec.mockResolvedValue(undefined); - - const result = await primitive.add(validOptions); - - expect(result.success).toBe(true); - expect(result).toHaveProperty('abTestName', 'MyTest'); - - const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; - expect(writtenSpec.abTests).toHaveLength(1); - expect(writtenSpec.abTests[0].name).toBe('MyTest'); - expect(writtenSpec.abTests[0].variants).toHaveLength(2); - expect(writtenSpec.abTests[0].variants[0].name).toBe('C'); - expect(writtenSpec.abTests[0].variants[0].weight).toBe(80); - expect(writtenSpec.abTests[0].variants[1].name).toBe('T1'); - expect(writtenSpec.abTests[0].variants[1].weight).toBe(20); - }); - - it('includes optional fields when provided', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject()); - mockWriteProjectSpec.mockResolvedValue(undefined); - - await primitive.add({ - ...validOptions, - description: 'Test description', - roleArn: 'arn:aws:iam::123:role/MyRole', - trafficHeaderName: 'X-AB-Route', - maxDurationDays: 30, - enableOnCreate: true, - }); - - const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; - const test = writtenSpec.abTests[0]; - expect(test.description).toBe('Test description'); - expect(test.roleArn).toBe('arn:aws:iam::123:role/MyRole'); - expect(test.trafficAllocationConfig).toEqual({ routeOnHeader: { headerName: 'X-AB-Route' } }); - expect(test.maxDurationDays).toBe(30); - expect(test.enableOnCreate).toBe(true); - }); - - it('omits optional fields when not provided', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject()); - mockWriteProjectSpec.mockResolvedValue(undefined); - - await primitive.add(validOptions); - - const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; - const test = writtenSpec.abTests[0]; - expect(test.description).toBeUndefined(); - expect(test.roleArn).toBeUndefined(); - expect(test.trafficAllocationConfig).toBeUndefined(); - expect(test.maxDurationDays).toBeUndefined(); - expect(test.enableOnCreate).toBeUndefined(); - }); - - it('returns error when AB test name already exists', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'MyTest' }])); - - const result = await primitive.add(validOptions); - - expect(result).toEqual( - expect.objectContaining({ - success: false, - error: expect.objectContaining({ message: expect.stringContaining('already exists') }), - }) - ); - }); - - it('returns error when readProjectSpec fails', async () => { - mockReadProjectSpec.mockRejectedValue(new Error('disk read error')); - - const result = await primitive.add(validOptions); - - expect(result).toEqual(expect.objectContaining({ success: false, error: new Error('disk read error') })); - }); - - it('returns error when writeProjectSpec fails', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject()); - mockWriteProjectSpec.mockRejectedValue(new Error('disk write error')); - - const result = await primitive.add(validOptions); - - expect(result).toEqual(expect.objectContaining({ success: false, error: new Error('disk write error') })); - }); - - it('returns error when variant weights do not sum to 100', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject()); - - const result = await primitive.add({ - ...validOptions, - controlWeight: 80, - treatmentWeight: 80, - }); - - expect(result.success).toBe(false); - }); - }); - - describe('remove', () => { - it('removes AB test from project spec', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'TestA' }, { name: 'TestB' }])); - mockWriteProjectSpec.mockResolvedValue(undefined); - - const result = await primitive.remove('TestA'); - - expect(result.success).toBe(true); - const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; - expect(writtenSpec.abTests).toHaveLength(1); - expect(writtenSpec.abTests[0].name).toBe('TestB'); - }); - - it('returns error when AB test not found', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject()); - - const result = await primitive.remove('NonExistent'); - - expect(result.success).toBe(false); - if (!result.success) { - expect(result.error.message).toContain('NonExistent'); - expect(result.error.message).toContain('not found'); - } - }); - - it('returns error when readProjectSpec fails', async () => { - mockReadProjectSpec.mockRejectedValue(new Error('io error')); - - const result = await primitive.remove('Whatever'); - - expect(result.success).toBe(false); - if (!result.success) { - expect(result.error.message).toBe('io error'); - } - }); - - it('cascade-deletes orphaned HTTP gateway when last referencing AB test is removed', async () => { - const project = makeProject([{ name: 'TestA', gatewayRef: '{{gateway:TestA-gw}}' }]); - project.httpGateways = [{ name: 'TestA-gw', runtimeRef: 'my-agent' }]; - mockReadProjectSpec.mockResolvedValue(project); - mockWriteProjectSpec.mockResolvedValue(undefined); - - const result = await primitive.remove('TestA'); - - expect(result.success).toBe(true); - const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; - expect(writtenSpec.abTests).toHaveLength(0); - // Gateway is retained by default — cascade-delete only happens with deleteGateway: true - expect(writtenSpec.httpGateways).toHaveLength(1); - }); - - it('retains HTTP gateway when another AB test still references it', async () => { - const project = makeProject([ - { name: 'TestA', gatewayRef: '{{gateway:shared-gw}}' }, - { name: 'TestB', gatewayRef: '{{gateway:shared-gw}}' }, - ]); - project.httpGateways = [{ name: 'shared-gw', runtimeRef: 'my-agent' }]; - mockReadProjectSpec.mockResolvedValue(project); - mockWriteProjectSpec.mockResolvedValue(undefined); - - const result = await primitive.remove('TestA'); - - expect(result.success).toBe(true); - const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; - expect(writtenSpec.abTests).toHaveLength(1); - expect(writtenSpec.httpGateways).toHaveLength(1); - expect(writtenSpec.httpGateways[0].name).toBe('shared-gw'); - }); - }); - - describe('previewRemove', () => { - it('returns preview with schema changes', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'TestA' }])); - - const preview = await primitive.previewRemove('TestA'); - - expect(preview.summary[0]).toContain('Removing AB test: TestA'); - expect(preview.schemaChanges).toHaveLength(1); - expect(preview.schemaChanges[0]!.file).toBe('agentcore/agentcore.json'); - expect((preview.schemaChanges[0]!.after as { abTests: unknown[] }).abTests).toHaveLength(0); - }); - - it('throws when AB test not found', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject()); - - await expect(primitive.previewRemove('Missing')).rejects.toThrow('not found'); - }); - }); - - describe('getRemovable', () => { - it('returns AB test names', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'A' }, { name: 'B' }])); - - const result = await primitive.getRemovable(); - - expect(result).toEqual([{ name: 'A' }, { name: 'B' }]); - }); - - it('returns empty array on error', async () => { - mockReadProjectSpec.mockRejectedValue(new Error('fail')); - - expect(await primitive.getRemovable()).toEqual([]); - }); - }); - - describe('getAllNames', () => { - it('returns AB test names as strings', async () => { - mockReadProjectSpec.mockResolvedValue(makeProject([{ name: 'X' }, { name: 'Y' }])); - - const result = await primitive.getAllNames(); - - expect(result).toEqual(['X', 'Y']); - }); - - it('returns empty array on error', async () => { - mockReadProjectSpec.mockRejectedValue(new Error('fail')); - - expect(await primitive.getAllNames()).toEqual([]); - }); - }); -}); diff --git a/src/cli/primitives/__tests__/GatewayPrimitive.test.ts b/src/cli/primitives/__tests__/GatewayPrimitive.test.ts index 863bc780c..193a1c6b5 100644 --- a/src/cli/primitives/__tests__/GatewayPrimitive.test.ts +++ b/src/cli/primitives/__tests__/GatewayPrimitive.test.ts @@ -8,6 +8,7 @@ const defaultProject: AgentCoreProjectSpec = { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -15,7 +16,6 @@ const defaultProject: AgentCoreProjectSpec = { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], diff --git a/src/cli/primitives/__tests__/GatewayTargetPrimitive.test.ts b/src/cli/primitives/__tests__/GatewayTargetPrimitive.test.ts new file mode 100644 index 000000000..c7108dff3 --- /dev/null +++ b/src/cli/primitives/__tests__/GatewayTargetPrimitive.test.ts @@ -0,0 +1,412 @@ +import type { AgentCoreProjectSpec } from '../../../schema'; +import { GatewayTargetPrimitive } from '../GatewayTargetPrimitive'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +const defaultProject: AgentCoreProjectSpec = { + name: 'test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + memories: [], + knowledgeBases: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + agentCoreGateways: [ + { + name: 'my-gateway', + targets: [], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + }, + ], + policyEngines: [], + configBundles: [], + abTests: [], + harnesses: [], + datasets: [], +}; + +const { mockConfigExists, mockReadProjectSpec, mockWriteProjectSpec } = vi.hoisted(() => ({ + mockConfigExists: vi.fn().mockReturnValue(true), + mockReadProjectSpec: vi.fn(), + mockWriteProjectSpec: vi.fn().mockResolvedValue(undefined), +})); + +vi.mock('../../../lib', () => { + const MockConfigIO = vi.fn(function (this: Record) { + this.configExists = mockConfigExists; + this.readProjectSpec = mockReadProjectSpec; + this.writeProjectSpec = mockWriteProjectSpec; + }); + return { + ConfigIO: MockConfigIO, + findConfigRoot: vi.fn().mockReturnValue('/fake/root'), + requireConfigRoot: vi.fn().mockReturnValue('/fake/root'), + setEnvVar: vi.fn().mockResolvedValue(undefined), + toError: (err: unknown) => (err instanceof Error ? err : new Error(String(err))), + serializeResult: (r: unknown) => r, + APP_DIR: 'app', + MCP_APP_SUBDIR: 'mcp', + ResourceNotFoundError: class extends Error { + constructor(m: string) { + super(m); + this.name = 'ResourceNotFoundError'; + } + }, + ValidationError: class extends Error { + constructor(m: string) { + super(m); + this.name = 'ValidationError'; + } + }, + }; +}); + +/** Extract the written project spec targets for the gateway. */ +function getWrittenGatewayTargets() { + expect(mockWriteProjectSpec).toHaveBeenCalledTimes(1); + const spec = mockWriteProjectSpec.mock.calls[0]![0] as AgentCoreProjectSpec; + const gw = spec.agentCoreGateways[0]; + expect(gw).toBeDefined(); + return gw!.targets; +} + +describe('GatewayTargetPrimitive', () => { + let primitive: GatewayTargetPrimitive; + + beforeEach(() => { + vi.clearAllMocks(); + mockReadProjectSpec.mockImplementation(() => Promise.resolve(JSON.parse(JSON.stringify(defaultProject)))); + primitive = new GatewayTargetPrimitive(); + }); + + describe('createHttpRuntimeTarget', () => { + it('writes correct nested httpRuntime structure to agentcore.json', async () => { + await primitive.createHttpRuntimeTarget({ + name: 'my-http-target', + gateway: 'my-gateway', + runtime: 'my-agent', + }); + + const targets = getWrittenGatewayTargets(); + expect(targets).toHaveLength(1); + expect(targets[0]).toEqual({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + }); + }); + + it('includes runtimeEndpoint when endpoint is specified', async () => { + await primitive.createHttpRuntimeTarget({ + name: 'my-http-target', + gateway: 'my-gateway', + runtime: 'my-agent', + endpoint: 'LIVE', + }); + + const targets = getWrittenGatewayTargets(); + expect(targets[0]).toEqual({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent', runtimeEndpoint: 'LIVE' }, + }); + }); + + it('includes outboundAuth when OAUTH is specified', async () => { + await primitive.createHttpRuntimeTarget({ + name: 'my-http-target', + gateway: 'my-gateway', + runtime: 'my-agent', + outboundAuth: { type: 'OAUTH', credentialName: 'my-cred', scopes: ['read', 'write'] }, + }); + + const targets = getWrittenGatewayTargets(); + expect(targets[0]).toEqual({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + outboundAuth: { type: 'OAUTH', credentialName: 'my-cred', scopes: ['read', 'write'] }, + }); + }); + + it('omits outboundAuth when type is NONE', async () => { + await primitive.createHttpRuntimeTarget({ + name: 'my-http-target', + gateway: 'my-gateway', + runtime: 'my-agent', + outboundAuth: { type: 'NONE' }, + }); + + const targets = getWrittenGatewayTargets(); + expect(targets[0]!.outboundAuth).toBeUndefined(); + }); + + it('throws error for duplicate target name', async () => { + mockReadProjectSpec.mockImplementation(() => + Promise.resolve({ + ...JSON.parse(JSON.stringify(defaultProject)), + agentCoreGateways: [ + { + name: 'my-gateway', + targets: [{ name: 'existing-target', targetType: 'httpRuntime', httpRuntime: { runtime: 'x' } }], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + }, + ], + }) + ); + + await expect( + primitive.createHttpRuntimeTarget({ + name: 'existing-target', + gateway: 'my-gateway', + runtime: 'my-agent', + }) + ).rejects.toThrow(/already exists/); + }); + + it('throws error for missing gateway', async () => { + await expect( + primitive.createHttpRuntimeTarget({ + name: 'my-http-target', + gateway: 'non-existent-gateway', + runtime: 'my-agent', + }) + ).rejects.toThrow(/not found/); + }); + }); +}); + +// ============================================================================ +// Connector gateway-target tests — use spy-based mocks (different style from +// the hoisted vi.mock above). Both styles compose cleanly because the spies +// only attach to instances created inside makePrimitive(). +// ============================================================================ + +function emptyProject(): AgentCoreProjectSpec { + return { + version: '1.0', + name: 'TestProj', + runtimes: [], + memories: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + policyEngines: [], + datasets: [], + agentCoreGateways: [ + { + name: 'main-gw', + targets: [], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + }, + ], + knowledgeBases: [], + } as unknown as AgentCoreProjectSpec; +} + +function makePrimitive(initial: AgentCoreProjectSpec) { + const primitive = new GatewayTargetPrimitive(); + let project = initial; + vi.spyOn( + primitive as unknown as { readProjectSpec: () => Promise }, + 'readProjectSpec' + ).mockImplementation(() => Promise.resolve(project)); + vi.spyOn( + primitive as unknown as { writeProjectSpec: (p: AgentCoreProjectSpec) => Promise }, + 'writeProjectSpec' + ).mockImplementation((p: AgentCoreProjectSpec) => { + project = p; + return Promise.resolve(); + }); + return { primitive, getProject: () => project }; +} + +describe('GatewayTargetPrimitive — createConnectorGatewayTarget', () => { + afterEach(() => vi.restoreAllMocks()); + + it('writes a single-KB Retrieve target for bedrock-knowledge-bases', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + const result = await primitive.createConnectorGatewayTarget({ + targetType: 'connector', + name: 'product-docs', + gateway: 'main-gw', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'ABCDEFGHIJ', + }); + expect(result.toolName).toBe('product-docs'); + const targets = getProject().agentCoreGateways[0]?.targets ?? []; + const retrieve = targets.find(t => t.connectorId === 'bedrock-knowledge-bases'); + expect(retrieve?.connectorId).toBe('bedrock-knowledge-bases'); + expect(retrieve?.knowledgeBaseId).toBe('ABCDEFGHIJ'); + expect(retrieve?.knowledgeBaseIds).toBeUndefined(); + }); + + it('bedrock-knowledge-bases create also upserts a shared agentic-retrieve target', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + await primitive.createConnectorGatewayTarget({ + targetType: 'connector', + name: 'product-docs', + gateway: 'main-gw', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'ABCDEFGHIJ', + }); + const targets = getProject().agentCoreGateways[0]?.targets ?? []; + expect(targets).toHaveLength(2); + const retrieve = targets.find(t => t.connectorId === 'bedrock-knowledge-bases'); + expect(retrieve?.name).toBe('product-docs'); + expect(retrieve?.knowledgeBaseId).toBe('ABCDEFGHIJ'); + const agentic = targets.find(t => t.connectorId === 'bedrock-agentic-retrieve'); + expect(agentic?.name).toBe('main-gw-agentic'); + expect(agentic?.knowledgeBaseIds).toEqual(['ABCDEFGHIJ']); + }); + + it('two bedrock-knowledge-bases creates share a single agentic target with both KBs', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + await primitive.createConnectorGatewayTarget({ + targetType: 'connector', + name: 'docs-a', + gateway: 'main-gw', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'ABCDEFGHIJ', + }); + await primitive.createConnectorGatewayTarget({ + targetType: 'connector', + name: 'docs-b', + gateway: 'main-gw', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'KLMNOPQRST', + }); + const targets = getProject().agentCoreGateways[0]?.targets ?? []; + // Two Retrieve targets + one shared agentic target. + expect(targets).toHaveLength(3); + const agentics = targets.filter(t => t.connectorId === 'bedrock-agentic-retrieve'); + expect(agentics).toHaveLength(1); + expect(agentics[0]?.knowledgeBaseIds).toEqual(['ABCDEFGHIJ', 'KLMNOPQRST']); + }); + + it('appends to an existing agentic target created earlier (e.g. via the KB primitive)', async () => { + const initial = emptyProject(); + initial.agentCoreGateways[0]!.targets = [ + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['existing-kb'], + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]['targets'][0], + ]; + const { primitive, getProject } = makePrimitive(initial); + await primitive.createConnectorGatewayTarget({ + targetType: 'connector', + name: 'product-docs', + gateway: 'main-gw', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'ABCDEFGHIJ', + }); + const targets = getProject().agentCoreGateways[0]?.targets ?? []; + const agentics = targets.filter(t => t.connectorId === 'bedrock-agentic-retrieve'); + expect(agentics).toHaveLength(1); + expect(agentics[0]?.knowledgeBaseIds).toEqual(['existing-kb', 'ABCDEFGHIJ']); + expect(targets.find(t => t.connectorId === 'bedrock-knowledge-bases')?.name).toBe('product-docs'); + }); + + it('writes a fan-out agentic-retrieve target with knowledgeBaseIds[]', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + await primitive.createConnectorGatewayTarget({ + targetType: 'connector', + name: 'agentic', + gateway: 'main-gw', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['ABCDEFGHIJ', 'KLMNOPQRST'], + }); + const target = getProject().agentCoreGateways[0]?.targets[0]; + expect(target?.connectorId).toBe('bedrock-agentic-retrieve'); + expect(target?.knowledgeBaseIds).toEqual(['ABCDEFGHIJ', 'KLMNOPQRST']); + expect(target?.knowledgeBaseId).toBeUndefined(); + }); + + it('rejects a second agentic-retrieve target on the same gateway', async () => { + const initial = emptyProject(); + initial.agentCoreGateways[0]!.targets = [ + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['existing'], + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]['targets'][0], + ]; + const { primitive } = makePrimitive(initial); + await expect( + primitive.createConnectorGatewayTarget({ + targetType: 'connector', + name: 'another-agentic', + gateway: 'main-gw', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['ABCDEFGHIJ'], + }) + ).rejects.toThrow(/already has a bedrock-agentic-retrieve target/); + }); +}); + +describe('GatewayTargetPrimitive — createWebSearchGatewayTarget', () => { + afterEach(() => vi.restoreAllMocks()); + + it('writes a webSearch target with no excludeDomains when omitted', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + const result = await primitive.createWebSearchGatewayTarget({ + targetType: 'webSearch', + name: 'web-search', + gateway: 'main-gw', + }); + expect(result.toolName).toBe('web-search'); + const target = getProject().agentCoreGateways[0]?.targets[0]; + expect(target?.targetType).toBe('webSearch'); + expect(target?.name).toBe('web-search'); + expect(target?.excludeDomains).toBeUndefined(); + }); + + it('persists excludeDomains when provided', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + await primitive.createWebSearchGatewayTarget({ + targetType: 'webSearch', + name: 'ws', + gateway: 'main-gw', + excludeDomains: ['internal.example.com', 'staging.example.com'], + }); + const target = getProject().agentCoreGateways[0]?.targets[0]; + expect(target?.excludeDomains).toEqual(['internal.example.com', 'staging.example.com']); + }); + + it('rejects a duplicate target name on the same gateway', async () => { + const { primitive } = makePrimitive(emptyProject()); + await primitive.createWebSearchGatewayTarget({ + targetType: 'webSearch', + name: 'ws', + gateway: 'main-gw', + }); + await expect( + primitive.createWebSearchGatewayTarget({ + targetType: 'webSearch', + name: 'ws', + gateway: 'main-gw', + }) + ).rejects.toThrow(/already exists/); + }); + + it('rejects a target attached to an unknown gateway', async () => { + const { primitive } = makePrimitive(emptyProject()); + await expect( + primitive.createWebSearchGatewayTarget({ + targetType: 'webSearch', + name: 'ws', + gateway: 'does-not-exist', + }) + ).rejects.toThrow(/not found/); + }); +}); diff --git a/src/cli/primitives/__tests__/HarnessPrimitive.remove.test.ts b/src/cli/primitives/__tests__/HarnessPrimitive.remove.test.ts new file mode 100644 index 000000000..a839b09c6 --- /dev/null +++ b/src/cli/primitives/__tests__/HarnessPrimitive.remove.test.ts @@ -0,0 +1,271 @@ +import { AgentCoreApiError } from '../../aws/api-client'; +import { HarnessPrimitive } from '../HarnessPrimitive'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const mockReadProjectSpec = vi.fn(); +const mockWriteProjectSpec = vi.fn(); +const mockReadDeployedState = vi.fn(); +const mockWriteDeployedState = vi.fn(); +const mockDeleteHarness = vi.fn(); +const mockRm = vi.fn(); + +vi.mock('../../../lib', () => ({ + APP_DIR: 'app', + ConfigIO: class { + readProjectSpec = mockReadProjectSpec; + writeProjectSpec = mockWriteProjectSpec; + readDeployedState = mockReadDeployedState; + writeDeployedState = mockWriteDeployedState; + getPathResolver = () => ({ getHarnessDir: (name: string) => `/fake/root/app/${name}` }); + }, + findConfigRoot: () => '/fake/root', +})); + +vi.mock('../../aws/agentcore-harness', async importOriginal => { + // Keep the real isHarnessNotFoundError + AgentCoreApiError (the typed-error contract under test); + // only deleteHarness is stubbed. + const actual = await importOriginal(); + return { ...actual, deleteHarness: (...args: unknown[]) => mockDeleteHarness(...args) }; +}); + +vi.mock('fs/promises', () => ({ + rm: (...args: unknown[]) => mockRm(...args), + access: vi.fn(), + copyFile: vi.fn(), + mkdir: vi.fn(), + readFile: vi.fn(), + writeFile: vi.fn(), +})); + +const ARN = 'arn:aws:bedrock-agentcore:us-west-2:111122223333:harness/h-legacy'; + +function project(harnessNames: string[] = [], memoryNames: string[] = []) { + return { + name: 'TestProject', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + memories: memoryNames.map(name => ({ name })), + harnesses: harnessNames.map(name => ({ name, path: `app/${name}` })), + }; +} + +function orphanState(name = 'legacy', target = 'default') { + return { + targets: { + [target]: { + resources: { + stackName: 'S', + harnesses: { + [name]: { harnessId: 'h-legacy', harnessArn: ARN, roleArn: 'arn:r', status: 'READY' }, + }, + }, + }, + }, + }; +} + +const primitive = new HarnessPrimitive(); + +describe('HarnessPrimitive.remove — orphan handling', () => { + afterEach(() => vi.clearAllMocks()); + + it('refuses to delete an orphan without an explicit choice (never auto-deletes)', async () => { + mockReadProjectSpec.mockResolvedValue(project(['legacy'], ['legacyMemory'])); + mockReadDeployedState.mockResolvedValue(orphanState()); + + const result = await primitive.remove('legacy'); + + expect(result.success).toBe(false); + if (!result.success) { + // Explicitly states nothing happened, then offers the two explicit choices. + expect(result.error.message).toContain('No changes were made'); + expect(result.error.message).toContain('was not deleted'); + expect(result.error.message).toContain('--keep'); + expect(result.error.message).toContain('--discard'); + } + expect(mockDeleteHarness).not.toHaveBeenCalled(); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + expect(mockWriteDeployedState).not.toHaveBeenCalled(); + }); + + it('delete-and-keep: deletes from AWS, clears the orphan record, KEEPS the agentcore.json entry', async () => { + mockReadProjectSpec.mockResolvedValue(project(['legacy'], ['legacyMemory'])); + mockReadDeployedState.mockResolvedValue(orphanState()); + mockDeleteHarness.mockResolvedValue({}); + + const result = await primitive.remove('legacy', { orphanAction: 'keep' }); + + expect(result.success).toBe(true); + expect(mockDeleteHarness).toHaveBeenCalledWith({ region: 'us-west-2', harnessId: 'h-legacy' }); + // Orphan record dropped from deployed-state... + const written = mockWriteDeployedState.mock.calls[0]![0]; + expect(written.targets.default.resources.harnesses.legacy).toBeUndefined(); + // ...but the spec entry is kept (no spec write) so the next deploy recreates it under CFN. + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + expect(mockRm).not.toHaveBeenCalled(); + }); + + it('delete-and-discard: deletes from AWS, clears the record, and removes the spec entry + memory + dir', async () => { + mockReadProjectSpec.mockResolvedValue(project(['legacy'], ['legacyMemory'])); + mockReadDeployedState.mockResolvedValue(orphanState()); + mockDeleteHarness.mockResolvedValue({}); + + const result = await primitive.remove('legacy', { orphanAction: 'discard' }); + + expect(result.success).toBe(true); + expect(mockDeleteHarness).toHaveBeenCalledWith({ region: 'us-west-2', harnessId: 'h-legacy' }); + const writtenState = mockWriteDeployedState.mock.calls[0]![0]; + expect(writtenState.targets.default.resources.harnesses.legacy).toBeUndefined(); + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenSpec.harnesses.find((h: { name: string }) => h.name === 'legacy')).toBeUndefined(); + expect(writtenSpec.memories.find((m: { name: string }) => m.name === 'legacyMemory')).toBeUndefined(); + expect(mockRm).toHaveBeenCalledWith('/fake/root/app/legacy', expect.objectContaining({ recursive: true })); + }); + + it('treats a 404 from deleteHarness as already-deleted (success), still reconciling state', async () => { + mockReadProjectSpec.mockResolvedValue(project(['legacy'], ['legacyMemory'])); + mockReadDeployedState.mockResolvedValue(orphanState()); + // The control plane signals "already gone" with a typed 404 AgentCoreApiError — not a message + // substring — which isHarnessNotFoundError keys on. + mockDeleteHarness.mockRejectedValue(new AgentCoreApiError(404, 'harness not found')); + + const result = await primitive.remove('legacy', { orphanAction: 'keep' }); + + expect(result.success).toBe(true); + expect(mockWriteDeployedState).toHaveBeenCalled(); + }); + + it('aborts when deleteHarness fails with a non-404 "does not exist" message (no substring false-match)', async () => { + mockReadProjectSpec.mockResolvedValue(project(['legacy'], ['legacyMemory'])); + mockReadDeployedState.mockResolvedValue(orphanState()); + // A dependent-resource error that mentions "does not exist" must NOT be misread as harness-gone. + mockDeleteHarness.mockRejectedValue(new Error('IAM role for harness does not exist')); + + const result = await primitive.remove('legacy', { orphanAction: 'discard' }); + + expect(result.success).toBe(false); + expect(mockWriteDeployedState).not.toHaveBeenCalled(); + }); + + it('aborts and leaves local state unchanged when deleteHarness fails for a non-404 reason', async () => { + mockReadProjectSpec.mockResolvedValue(project(['legacy'], ['legacyMemory'])); + mockReadDeployedState.mockResolvedValue(orphanState()); + mockDeleteHarness.mockRejectedValue(new Error('AccessDeniedException: not authorized')); + + const result = await primitive.remove('legacy', { orphanAction: 'discard' }); + + expect(result.success).toBe(false); + if (!result.success) expect(result.error.message).toContain('left unchanged'); + expect(mockWriteDeployedState).not.toHaveBeenCalled(); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); +}); + +describe('HarnessPrimitive.remove — non-orphan (CDK-managed) handling', () => { + afterEach(() => vi.clearAllMocks()); + + it('is a pure spec edit: removes entry + memory + dir, no AWS delete', async () => { + mockReadProjectSpec.mockResolvedValue(project(['h1'], ['h1Memory'])); + mockReadDeployedState.mockResolvedValue({ + targets: { + default: { + resources: { + stackName: 'S', + harnesses: { + h1: { + harnessId: 'h-h1', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:1:harness/h-h1', + roleArn: 'arn:r', + status: 'READY', + provisioner: 'cloudformation', + }, + }, + }, + }, + }, + }); + + const result = await primitive.remove('h1'); + + expect(result.success).toBe(true); + expect(mockDeleteHarness).not.toHaveBeenCalled(); + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenSpec.harnesses.find((h: { name: string }) => h.name === 'h1')).toBeUndefined(); + expect(mockRm).toHaveBeenCalled(); + expect(mockWriteDeployedState).not.toHaveBeenCalled(); + }); + + it('managed harness (no ${name}Memory sibling): removes cleanly, leaves unrelated memories untouched', async () => { + // A managed-memory harness owns its memory internally — there is no `h1Memory` in project.memories, + // but an unrelated memory `otherMem` exists and must NOT be deleted. + mockReadProjectSpec.mockResolvedValue(project(['h1'], ['otherMem'])); + mockReadDeployedState.mockResolvedValue({ + targets: { + default: { + resources: { + stackName: 'S', + harnesses: { + h1: { + harnessId: 'h-h1', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:1:harness/h-h1', + roleArn: 'arn:r', + status: 'READY', + provisioner: 'cloudformation', + }, + }, + }, + }, + }, + }); + + const result = await primitive.remove('h1'); + + expect(result.success).toBe(true); + const writtenSpec = mockWriteProjectSpec.mock.calls[0]![0]; + expect(writtenSpec.harnesses.find((h: { name: string }) => h.name === 'h1')).toBeUndefined(); + // The unrelated memory survives — removal never deletes a memory the harness doesn't own. + expect(writtenSpec.memories.find((m: { name: string }) => m.name === 'otherMem')).toBeDefined(); + }); + + it('errors (does not silently no-op) when --keep/--discard is given for a CDK-managed harness', async () => { + mockReadProjectSpec.mockResolvedValue(project(['h1'], [])); + mockReadDeployedState.mockResolvedValue({ + targets: { + default: { + resources: { + stackName: 'S', + harnesses: { + h1: { + harnessId: 'h-h1', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:1:harness/h-h1', + roleArn: 'arn:r', + status: 'READY', + provisioner: 'cloudformation', + }, + }, + }, + }, + }, + }); + + const result = await primitive.remove('h1', { orphanAction: 'discard' }); + + // B27a: the orphan-only flags must not silently no-op on a CDK-managed harness — they error, + // the harness is NOT removed from the spec, and no AWS delete is issued. + expect(result.success).toBe(false); + if (!result.success) expect(result.error.message).toContain('only apply to a preview-build'); + expect(mockDeleteHarness).not.toHaveBeenCalled(); + expect(mockWriteProjectSpec).not.toHaveBeenCalled(); + }); + + it('returns not-found when the harness is neither in spec nor an orphan', async () => { + mockReadProjectSpec.mockResolvedValue(project([], [])); + mockReadDeployedState.mockResolvedValue({ targets: {} }); + + const result = await primitive.remove('ghost'); + + expect(result.success).toBe(false); + if (!result.success) expect(result.error.message).toContain('not found'); + }); +}); diff --git a/src/cli/primitives/__tests__/KnowledgeBasePrimitive.test.ts b/src/cli/primitives/__tests__/KnowledgeBasePrimitive.test.ts new file mode 100644 index 000000000..d984087a7 --- /dev/null +++ b/src/cli/primitives/__tests__/KnowledgeBasePrimitive.test.ts @@ -0,0 +1,743 @@ +import type { AgentCoreProjectSpec } from '../../../schema'; +import { KnowledgeBasePrimitive } from '../KnowledgeBasePrimitive'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +function emptyProject(): AgentCoreProjectSpec { + return { + version: '1.0', + name: 'TestProj', + runtimes: [], + memories: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + policyEngines: [], + datasets: [], + agentCoreGateways: [], + knowledgeBases: [], + } as unknown as AgentCoreProjectSpec; +} + +function makePrimitive(initial: AgentCoreProjectSpec) { + const primitive = new KnowledgeBasePrimitive(); + let project = initial; + vi.spyOn( + primitive as unknown as { readProjectSpec: () => Promise }, + 'readProjectSpec' + ).mockImplementation(() => Promise.resolve(project)); + vi.spyOn( + primitive as unknown as { writeProjectSpec: (p: AgentCoreProjectSpec) => Promise }, + 'writeProjectSpec' + ).mockImplementation((p: AgentCoreProjectSpec) => { + project = p; + return Promise.resolve(); + }); + return { primitive, getProject: () => project }; +} + +/** + * Like makePrimitive, but also points configIO.getConfigRoot() at a real + * temp `/agentcore` dir so materializeConnectorConfig can copy + * connector-config files into `/app//`. + */ +function makePrimitiveWithProjectDir(initial: AgentCoreProjectSpec) { + const projectRoot = mkdtempSync(join(tmpdir(), 'kb-prim-')); + const configRoot = join(projectRoot, 'agentcore'); + const base = makePrimitive(initial); + // configIO is a protected readonly field; spy on its getConfigRoot. + vi.spyOn( + (base.primitive as unknown as { configIO: { getConfigRoot: () => string } }).configIO, + 'getConfigRoot' + ).mockReturnValue(configRoot); + return { ...base, projectRoot, configRoot }; +} + +describe('add knowledge-base — non-S3 connectors', () => { + const tmpDirs: string[] = []; + afterEach(() => { + vi.restoreAllMocks(); + for (const d of tmpDirs.splice(0)) rmSync(d, { recursive: true, force: true }); + }); + + function writeConfig(dir: string, name: string, body: Record): string { + const p = join(dir, name); + writeFileSync(p, JSON.stringify(body)); + return p; + } + + it('adds a WEB data source from a connector-config file and copies it under app//', async () => { + const { primitive, getProject, projectRoot } = makePrimitiveWithProjectDir(emptyProject()); + tmpDirs.push(projectRoot); + const webCfg = writeConfig(projectRoot, 'web.json', { + type: 'WEB', + connectionConfiguration: { authType: 'NO_AUTH' }, + }); + + const result = await primitive.add({ + name: 'web-docs', + dataSourceType: 'web-crawler', + connectorConfig: [webCfg], + }); + + expect(result.success).toBe(true); + if (!result.success) return; + + const kb = getProject().knowledgeBases[0]; + expect(kb?.dataSources).toEqual([{ type: 'WEB', connectorConfigFile: 'app/web-docs/web.json' }]); + expect(existsSync(join(projectRoot, 'app', 'web-docs', 'web.json'))).toBe(true); + }); + + it('warns when an auth connector config has no secretArn but still succeeds', async () => { + const { primitive, getProject, projectRoot } = makePrimitiveWithProjectDir(emptyProject()); + tmpDirs.push(projectRoot); + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + const confCfg = writeConfig(projectRoot, 'confluence.json', { + type: 'CONFLUENCE', + connectionConfiguration: {}, + }); + + const result = await primitive.add({ + name: 'conf-docs', + dataSourceType: 'confluence', + connectorConfig: [confCfg], + }); + + expect(result.success).toBe(true); + expect(getProject().knowledgeBases).toHaveLength(1); + expect(warnSpy).toHaveBeenCalled(); + expect(warnSpy.mock.calls.flat().join(' ')).toMatch(/secretArn/i); + }); + + it('rejects --connector-config when data-source-type is s3', async () => { + const { primitive } = makePrimitive(emptyProject()); + const r = await primitive.add({ name: 'kb', dataSourceType: 's3', connectorConfig: ['/tmp/x.json'] }); + expect(r.success).toBe(false); + }); + + it('rejects --source when data-source-type is non-S3', async () => { + const { primitive } = makePrimitive(emptyProject()); + const r = await primitive.add({ name: 'kb', dataSourceType: 'web-crawler', source: ['s3://b/'] }); + expect(r.success).toBe(false); + }); + + it('errors when the connector config type disagrees with --data-source-type', async () => { + const { primitive, projectRoot } = makePrimitiveWithProjectDir(emptyProject()); + tmpDirs.push(projectRoot); + const cfg = writeConfig(projectRoot, 'mismatch.json', { + type: 'CONFLUENCE', + connectionConfiguration: {}, + }); + const r = await primitive.add({ + name: 'kb', + dataSourceType: 'web-crawler', + connectorConfig: [cfg], + }); + expect(r.success).toBe(false); + // No file should have been copied since validation failed. + expect(existsSync(join(projectRoot, 'app'))).toBe(false); + }); + + it('does not copy any file when the gateway is missing', async () => { + const { primitive, projectRoot } = makePrimitiveWithProjectDir(emptyProject()); + tmpDirs.push(projectRoot); + const cfg = writeConfig(projectRoot, 'web.json', { + type: 'WEB', + connectionConfiguration: { authType: 'NO_AUTH' }, + }); + const r = await primitive.add({ + name: 'kb', + dataSourceType: 'web-crawler', + connectorConfig: [cfg], + gateway: 'missing-gw', + }); + expect(r.success).toBe(false); + expect(existsSync(join(projectRoot, 'app'))).toBe(false); + }); + + it('rejects two connector configs with the same basename and copies nothing', async () => { + const { primitive, getProject, projectRoot } = makePrimitiveWithProjectDir(emptyProject()); + tmpDirs.push(projectRoot); + const dirA = join(projectRoot, 'a'); + const dirB = join(projectRoot, 'b'); + mkdirSync(dirA, { recursive: true }); + mkdirSync(dirB, { recursive: true }); + const aPath = writeConfig(dirA, 'web.json', { + type: 'WEB', + connectionConfiguration: { authType: 'NO_AUTH' }, + }); + const bPath = writeConfig(dirB, 'web.json', { + type: 'WEB', + connectionConfiguration: { authType: 'NO_AUTH' }, + }); + + const result = await primitive.add({ + name: 'kb', + dataSourceType: 'web-crawler', + connectorConfig: [aPath, bPath], + }); + + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/web\.json|collision|would both be stored/i); + // No copy happened, and the spec was not mutated. + expect(existsSync(join(projectRoot, 'app', 'kb', 'web.json'))).toBe(false); + expect(existsSync(join(projectRoot, 'app', 'kb'))).toBe(false); + expect(getProject().knowledgeBases).toHaveLength(0); + }); + + it('appends a different connector config to an existing connector KB and copies it', async () => { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'conf-docs', + dataSources: [{ type: 'CONFLUENCE', connectorConfigFile: 'app/conf-docs/confluence.json' }], + }, + ] as unknown as AgentCoreProjectSpec['knowledgeBases']; + const { primitive, getProject, projectRoot } = makePrimitiveWithProjectDir(initial); + tmpDirs.push(projectRoot); + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + const cfg2 = writeConfig(projectRoot, 'confluence2.json', { + type: 'CONFLUENCE', + connectionConfiguration: {}, + }); + + const result = await primitive.add({ + name: 'conf-docs', + dataSourceType: 'confluence', + connectorConfig: [cfg2], + }); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.appended).toBe(true); + expect(existsSync(join(projectRoot, 'app', 'conf-docs', 'confluence2.json'))).toBe(true); + expect(getProject().knowledgeBases[0]?.dataSources).toHaveLength(2); + // CONFLUENCE without secretArn warns. + expect(warnSpy.mock.calls.flat().join(' ')).toMatch(/secretArn/i); + }); + + it('suppresses the secretArn warning under --json', async () => { + const { primitive, projectRoot } = makePrimitiveWithProjectDir(emptyProject()); + tmpDirs.push(projectRoot); + const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => undefined); + const cfg = writeConfig(projectRoot, 'confluence.json', { + type: 'CONFLUENCE', + connectionConfiguration: {}, + }); + + const result = await primitive.add({ + name: 'conf-docs', + dataSourceType: 'confluence', + connectorConfig: [cfg], + json: true, + }); + + expect(result.success).toBe(true); + expect(warnSpy.mock.calls.flat().join(' ')).not.toMatch(/secretArn/i); + }); + + it('keeps a connector config already inside app// in place', async () => { + const { primitive, getProject, projectRoot } = makePrimitiveWithProjectDir(emptyProject()); + tmpDirs.push(projectRoot); + const destDir = join(projectRoot, 'app', 'web-docs'); + rmSync(destDir, { recursive: true, force: true }); + // Place the config directly where materialize would copy it. + mkdirSync(destDir, { recursive: true }); + const inPlace = writeConfig(destDir, 'web.json', { + type: 'WEB', + connectionConfiguration: { authType: 'NO_AUTH' }, + }); + + const result = await primitive.add({ + name: 'web-docs', + dataSourceType: 'web-crawler', + connectorConfig: [inPlace], + }); + + expect(result.success).toBe(true); + const kb = getProject().knowledgeBases[0]; + expect(kb?.dataSources).toEqual([{ type: 'WEB', connectorConfigFile: 'app/web-docs/web.json' }]); + expect(readFileSync(inPlace, 'utf-8')).toContain('NO_AUTH'); + }); +}); + +describe('KnowledgeBasePrimitive — add (new KB)', () => { + afterEach(() => vi.restoreAllMocks()); + + it('creates a new KB entry when the name does not exist', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + + const result = await primitive.add({ + name: 'product-docs', + source: ['s3://my-bucket/docs/'], + }); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.knowledgeBaseName).toBe('product-docs'); + expect(result.appended).toBe(false); + + const kbs = getProject().knowledgeBases; + expect(kbs).toHaveLength(1); + expect(kbs[0]?.name).toBe('product-docs'); + expect(kbs[0]?.dataSources).toEqual([{ type: 'S3', uri: 's3://my-bucket/docs/' }]); + }); + + it('accepts multiple --source flags on first invocation', async () => { + const { primitive, getProject } = makePrimitive(emptyProject()); + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/a/', 's3://my-bucket/b/'], + }); + expect(result.success).toBe(true); + expect(getProject().knowledgeBases[0]?.dataSources).toHaveLength(2); + }); + + it('rejects when neither --source nor --connector-config is provided', async () => { + const { primitive } = makePrimitive(emptyProject()); + const result = await primitive.add({ name: 'empty' }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/at least one --source is required/i); + }); + + it('rejects --connector-config when the data source type defaults to s3', async () => { + const { primitive } = makePrimitive(emptyProject()); + const result = await primitive.add({ + name: 'kb', + connectorConfig: ['./confluence.json'], + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/only valid for non-S3/i); + }); + + it('rejects an invalid S3 URI', async () => { + const { primitive } = makePrimitive(emptyProject()); + const result = await primitive.add({ + name: 'kb', + source: ['https://example.com/docs'], + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/Invalid S3 URI/i); + }); + + it('errors when --gateway references a gateway not in agentCoreGateways[]', async () => { + const { primitive } = makePrimitive(emptyProject()); + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/a/'], + gateway: 'missing-gw', + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/Gateway "missing-gw" not found/i); + }); + + it('with --gateway: emits a Retrieve target AND a gateway-scoped agentic-retrieve target', async () => { + const initial = emptyProject(); + initial.agentCoreGateways.push({ + name: 'main-gw', + targets: [], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]); + const { primitive, getProject } = makePrimitive(initial); + + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/a/'], + gateway: 'main-gw', + }); + + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.gatewayWired).toBe('main-gw'); + + const project = getProject(); + expect(project.knowledgeBases[0]?.gateway).toBe('main-gw'); + const targets = project.agentCoreGateways[0]?.targets ?? []; + expect(targets).toHaveLength(2); + + const retrieve = targets.find(t => t.name === 'docs'); + expect(retrieve?.targetType).toBe('connector'); + expect(retrieve?.connectorId).toBe('bedrock-knowledge-bases'); + // The connector target stores the KB *name*; the L3 looks it up at synth. + expect(retrieve?.knowledgeBaseId).toBe('docs'); + + const agentic = targets.find(t => t.connectorId === 'bedrock-agentic-retrieve'); + expect(agentic?.name).toBe('main-gw-agentic'); + expect(agentic?.knowledgeBaseIds).toEqual(['docs']); + }); + + it('second KB on the same gateway appends to the existing agentic-retrieve target', async () => { + const initial = emptyProject(); + initial.agentCoreGateways.push({ + name: 'main-gw', + targets: [], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]); + const { primitive, getProject } = makePrimitive(initial); + + await primitive.add({ name: 'docs', source: ['s3://my-bucket/a/'], gateway: 'main-gw' }); + await primitive.add({ name: 'hr', source: ['s3://my-bucket/b/'], gateway: 'main-gw' }); + + const targets = getProject().agentCoreGateways[0]?.targets ?? []; + // Two Retrieve targets + one agentic target. + expect(targets).toHaveLength(3); + expect(targets.filter(t => t.connectorId === 'bedrock-knowledge-bases').map(t => t.name)).toEqual(['docs', 'hr']); + const agentic = targets.find(t => t.connectorId === 'bedrock-agentic-retrieve'); + expect(agentic?.name).toBe('main-gw-agentic'); + expect(agentic?.knowledgeBaseIds).toEqual(['docs', 'hr']); + }); + + it('idempotent re-add: same KB twice does not duplicate it in the agentic-retrieve target', async () => { + const initial = emptyProject(); + initial.agentCoreGateways.push({ + name: 'main-gw', + targets: [], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]); + const { primitive, getProject } = makePrimitive(initial); + + await primitive.add({ name: 'docs', source: ['s3://my-bucket/a/'], gateway: 'main-gw' }); + // Append a new data source on the same KB; --gateway is the same. + await primitive.add({ name: 'docs', source: ['s3://my-bucket/c/'], gateway: 'main-gw' }); + + const agentic = getProject().agentCoreGateways[0]?.targets.find(t => t.connectorId === 'bedrock-agentic-retrieve'); + expect(agentic?.knowledgeBaseIds).toEqual(['docs']); + }); + + it('rejects duplicate --source URIs within the same invocation', async () => { + const { primitive } = makePrimitive(emptyProject()); + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/a/', 's3://my-bucket/a/'], + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/Duplicate data source in this invocation/i); + }); +}); + +describe('KnowledgeBasePrimitive — add (idempotent append)', () => { + afterEach(() => vi.restoreAllMocks()); + + function withExisting() { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + }, + ]; + return makePrimitive(initial); + } + + it('appends a new data source to an existing KB', async () => { + const { primitive, getProject } = withExisting(); + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/c/'], + }); + expect(result.success).toBe(true); + if (!result.success) return; + expect(result.appended).toBe(true); + expect(result.newDataSources).toEqual(['s3://my-bucket/c/']); + + const kb = getProject().knowledgeBases[0]; + expect(kb?.dataSources).toHaveLength(2); + expect(kb?.dataSources.map(ds => (ds.type === 'S3' ? ds.uri : ds.connectorConfigFile))).toEqual([ + 's3://my-bucket/a/', + 's3://my-bucket/c/', + ]); + }); + + it('errors on duplicate URI', async () => { + const { primitive } = withExisting(); + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/a/'], + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/already exists on knowledge-base/i); + }); + + it('errors when neither --source nor --connector-config given on re-invocation', async () => { + const { primitive } = withExisting(); + const result = await primitive.add({ name: 'docs' }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/at least one --source/i); + }); + + it('errors when description differs', async () => { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + description: 'Original description', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + }, + ]; + const { primitive } = makePrimitive(initial); + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/c/'], + description: 'Different description', + }); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/update operations are not supported/i); + }); + + it('preserves existing description if not provided', async () => { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + description: 'Original', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + }, + ]; + const { primitive, getProject } = makePrimitive(initial); + const result = await primitive.add({ name: 'docs', source: ['s3://my-bucket/c/'] }); + expect(result.success).toBe(true); + expect(getProject().knowledgeBases[0]?.description).toBe('Original'); + }); + + it('treats empty-string description on append as a no-op', async () => { + const { primitive, getProject } = withExisting(); + const result = await primitive.add({ + name: 'docs', + source: ['s3://my-bucket/c/'], + description: '', + }); + expect(result.success).toBe(true); + expect(getProject().knowledgeBases[0]?.description).toBeUndefined(); + }); +}); + +describe('KnowledgeBasePrimitive — remove', () => { + afterEach(() => vi.restoreAllMocks()); + + function withTwoKbs() { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + }, + { + type: 'AgentCoreKnowledgeBase', + name: 'compliance', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/c/' }], + }, + ]; + return makePrimitive(initial); + } + + it('removes the named KB and leaves others intact', async () => { + const { primitive, getProject } = withTwoKbs(); + const result = await primitive.remove('docs'); + expect(result.success).toBe(true); + expect(getProject().knowledgeBases.map(kb => kb.name)).toEqual(['compliance']); + }); + + it('returns failure when KB not found', async () => { + const { primitive } = withTwoKbs(); + const result = await primitive.remove('nonexistent'); + expect(result.success).toBe(false); + if (result.success) return; + expect(result.error.message).toMatch(/not found/i); + }); + + it('previewRemove returns a schema diff', async () => { + const { primitive } = withTwoKbs(); + const preview = await primitive.previewRemove('docs'); + expect(preview.summary[0]).toMatch(/Removing knowledge base: docs/); + expect(preview.schemaChanges).toHaveLength(1); + expect(preview.schemaChanges[0]?.file).toBe('agentcore/agentcore.json'); + }); + + it('previewRemove throws when KB not found', async () => { + const { primitive } = withTwoKbs(); + await expect(primitive.previewRemove('nonexistent')).rejects.toThrow(/not found/i); + }); + + it('getRemovable lists all KBs', async () => { + const { primitive } = withTwoKbs(); + const removables = await primitive.getRemovable(); + expect(removables.map(r => r.name)).toEqual(['docs', 'compliance']); + }); + + it('addScreen returns null (no TUI in Wave 1)', () => { + const primitive = new KnowledgeBasePrimitive(); + expect(primitive.addScreen()).toBeNull(); + }); + + it('cascade-prunes the removed KB out of the gateway agentic-retrieve target', async () => { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + gateway: 'main-gw', + }, + { + type: 'AgentCoreKnowledgeBase', + name: 'hr', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/b/' }], + gateway: 'main-gw', + }, + ]; + initial.agentCoreGateways.push({ + name: 'main-gw', + targets: [ + { name: 'docs', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'docs' }, + { name: 'hr', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'hr' }, + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['docs', 'hr'], + }, + ], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]); + const { primitive, getProject } = makePrimitive(initial); + + const result = await primitive.remove('docs'); + expect(result.success).toBe(true); + + const targets = getProject().agentCoreGateways[0]?.targets ?? []; + // Per-KB Retrieve target gone; agentic target stays with hr only. + expect(targets.find(t => t.name === 'docs')).toBeUndefined(); + const agentic = targets.find(t => t.connectorId === 'bedrock-agentic-retrieve'); + expect(agentic).toBeDefined(); + expect(agentic?.knowledgeBaseIds).toEqual(['hr']); + }); + + it('removes the agentic-retrieve target entirely when the removed KB was its only entry', async () => { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + gateway: 'main-gw', + }, + ]; + initial.agentCoreGateways.push({ + name: 'main-gw', + targets: [ + { name: 'docs', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'docs' }, + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['docs'], + }, + ], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]); + const { primitive, getProject } = makePrimitive(initial); + + const result = await primitive.remove('docs'); + expect(result.success).toBe(true); + + const targets = getProject().agentCoreGateways[0]?.targets ?? []; + expect(targets).toHaveLength(0); + }); + + it('previewRemove summarizes the agentic-retrieve prune', async () => { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + gateway: 'main-gw', + }, + { + type: 'AgentCoreKnowledgeBase', + name: 'hr', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/b/' }], + gateway: 'main-gw', + }, + ]; + initial.agentCoreGateways.push({ + name: 'main-gw', + targets: [ + { name: 'docs', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'docs' }, + { name: 'hr', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'hr' }, + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['docs', 'hr'], + }, + ], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]); + const { primitive } = makePrimitive(initial); + + const preview = await primitive.previewRemove('docs'); + const lines = preview.summary.join('\n'); + expect(lines).toMatch(/main-gw.*agentic-retrieve target 'main-gw-agentic' will lose KB 'docs'/); + }); + + it('previewRemove notes when the agentic-retrieve target itself will be removed', async () => { + const initial = emptyProject(); + initial.knowledgeBases = [ + { + type: 'AgentCoreKnowledgeBase', + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }], + gateway: 'main-gw', + }, + ]; + initial.agentCoreGateways.push({ + name: 'main-gw', + targets: [ + { name: 'docs', targetType: 'connector', connectorId: 'bedrock-knowledge-bases', knowledgeBaseId: 'docs' }, + { + name: 'main-gw-agentic', + targetType: 'connector', + connectorId: 'bedrock-agentic-retrieve', + knowledgeBaseIds: ['docs'], + }, + ], + authorizerType: 'NONE', + enableSemanticSearch: true, + exceptionLevel: 'NONE', + } as unknown as AgentCoreProjectSpec['agentCoreGateways'][0]); + const { primitive } = makePrimitive(initial); + + const preview = await primitive.previewRemove('docs'); + const lines = preview.summary.join('\n'); + expect(lines).toMatch(/main-gw.*agentic-retrieve target 'main-gw-agentic' will be removed \(was the last KB\)/); + }); +}); diff --git a/src/cli/primitives/__tests__/OnlineInsightsPrimitive.test.ts b/src/cli/primitives/__tests__/OnlineInsightsPrimitive.test.ts new file mode 100644 index 000000000..0b0068f4d --- /dev/null +++ b/src/cli/primitives/__tests__/OnlineInsightsPrimitive.test.ts @@ -0,0 +1,36 @@ +import { validateInsightIds } from '../OnlineInsightsPrimitive.js'; +import { describe, expect, it } from 'vitest'; + +describe('validateInsightIds', () => { + it('accepts Builtin.Insight.* identifiers', () => { + expect(() => validateInsightIds(['Builtin.Insight.FailureAnalysis'])).not.toThrow(); + }); + + it('accepts multiple valid identifiers', () => { + expect(() => validateInsightIds(['Builtin.Insight.FailureAnalysis', 'Builtin.Insight.UserIntent'])).not.toThrow(); + }); + + it('accepts full ARN identifiers', () => { + expect(() => + validateInsightIds(['arn:aws:bedrock-agentcore:us-east-1::evaluator/Builtin.Insight.FailureAnalysis']) + ).not.toThrow(); + }); + + it('rejects identifiers without Builtin.Insight.* prefix or ARN', () => { + expect(() => validateInsightIds(['InvalidString'])).toThrow('Must be a Builtin.Insight.* identifier'); + }); + + it('rejects Builtin.Helpfulness (evaluator prefix, not insight)', () => { + expect(() => validateInsightIds(['Builtin.Helpfulness'])).toThrow('Must be a Builtin.Insight.* identifier'); + }); + + it('rejects empty string', () => { + expect(() => validateInsightIds([''])).toThrow('Must be a Builtin.Insight.* identifier'); + }); + + it('rejects when any item in the array is invalid', () => { + expect(() => validateInsightIds(['Builtin.Insight.FailureAnalysis', 'bad'])).toThrow( + 'Must be a Builtin.Insight.* identifier' + ); + }); +}); diff --git a/src/cli/primitives/__tests__/PaymentConnectorPrimitive.test.ts b/src/cli/primitives/__tests__/PaymentConnectorPrimitive.test.ts index 3fc1136d3..48189aa6f 100644 --- a/src/cli/primitives/__tests__/PaymentConnectorPrimitive.test.ts +++ b/src/cli/primitives/__tests__/PaymentConnectorPrimitive.test.ts @@ -41,6 +41,7 @@ function makeProject(overrides: Partial = {}): AgentCorePr managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -50,6 +51,7 @@ function makeProject(overrides: Partial = {}): AgentCorePr abTests: [], httpGateways: [], harnesses: [], + datasets: [], payments: [], ...overrides, }; diff --git a/src/cli/primitives/__tests__/PaymentManagerPrimitive.test.ts b/src/cli/primitives/__tests__/PaymentManagerPrimitive.test.ts index 05f701b60..7bc1d47c5 100644 --- a/src/cli/primitives/__tests__/PaymentManagerPrimitive.test.ts +++ b/src/cli/primitives/__tests__/PaymentManagerPrimitive.test.ts @@ -27,6 +27,7 @@ function makeProject(overrides: Partial = {}): AgentCorePr managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -36,6 +37,7 @@ function makeProject(overrides: Partial = {}): AgentCorePr abTests: [], httpGateways: [], harnesses: [], + datasets: [], payments: [], ...overrides, }; diff --git a/src/cli/primitives/__tests__/PolicyPrimitive.test.ts b/src/cli/primitives/__tests__/PolicyPrimitive.test.ts new file mode 100644 index 000000000..2d95c7533 --- /dev/null +++ b/src/cli/primitives/__tests__/PolicyPrimitive.test.ts @@ -0,0 +1,111 @@ +import type { AgentCoreProjectSpec, Policy, PolicyEngine } from '../../../schema'; +import { PolicyPrimitive } from '../PolicyPrimitive'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const engine: PolicyEngine = { name: 'eng', policies: [] }; + +const defaultProject: AgentCoreProjectSpec = { + name: 'test', + version: 1, + managedBy: 'CDK' as const, + runtimes: [], + memories: [], + knowledgeBases: [], + credentials: [], + evaluators: [], + onlineEvalConfigs: [], + agentCoreGateways: [], + policyEngines: [engine], + configBundles: [], + abTests: [], + harnesses: [], + datasets: [], + payments: [], +}; + +const { mockConfigExists, mockReadProjectSpec, mockWriteProjectSpec } = vi.hoisted(() => ({ + mockConfigExists: vi.fn().mockReturnValue(true), + mockReadProjectSpec: vi.fn(), + mockWriteProjectSpec: vi.fn().mockResolvedValue(undefined), +})); + +vi.mock('../../../lib', () => { + const MockConfigIO = vi.fn(function (this: Record) { + this.configExists = mockConfigExists; + this.readProjectSpec = mockReadProjectSpec; + this.writeProjectSpec = mockWriteProjectSpec; + }); + return { + ConfigIO: MockConfigIO, + findConfigRoot: vi.fn().mockReturnValue('/fake/root'), + setEnvVar: vi.fn().mockResolvedValue(undefined), + toError: (err: unknown) => (err instanceof Error ? err : new Error(String(err))), + serializeResult: (r: unknown) => r, + ValidationError: class extends Error { + constructor(m: string) { + super(m); + this.name = 'ValidationError'; + } + }, + ResourceNotFoundError: class extends Error { + constructor(m: string) { + super(m); + this.name = 'ResourceNotFoundError'; + } + }, + }; +}); + +/** Extract the first policy written to the engine on writeProjectSpec. */ +function getWrittenPolicy(): Policy { + expect(mockWriteProjectSpec).toHaveBeenCalledTimes(1); + const spec = mockWriteProjectSpec.mock.calls[0]![0] as AgentCoreProjectSpec; + const policy = spec.policyEngines[0]?.policies[0]; + expect(policy).toBeDefined(); + return policy!; +} + +describe('PolicyPrimitive — enforcementMode', () => { + let primitive: PolicyPrimitive; + + beforeEach(() => { + vi.clearAllMocks(); + // Fresh engine each run so policies don't accumulate across tests + mockReadProjectSpec.mockImplementation(() => + Promise.resolve({ ...defaultProject, policyEngines: [{ name: 'eng', policies: [] }] }) + ); + primitive = new PolicyPrimitive(); + }); + + it('persists enforcementMode LOG_ONLY when provided', async () => { + const result = await primitive.add({ + name: 'shadow', + engine: 'eng', + statement: 'forbid(principal, action, resource is AgentCore::Gateway);', + enforcementMode: 'LOG_ONLY', + }); + expect(result.success).toBe(true); + expect(getWrittenPolicy().enforcementMode).toBe('LOG_ONLY'); + }); + + it('persists enforcementMode ACTIVE when provided', async () => { + const result = await primitive.add({ + name: 'active', + engine: 'eng', + statement: 'forbid(principal, action, resource is AgentCore::Gateway);', + enforcementMode: 'ACTIVE', + }); + expect(result.success).toBe(true); + expect(getWrittenPolicy().enforcementMode).toBe('ACTIVE'); + }); + + it('defaults enforcementMode to ACTIVE when omitted', async () => { + const result = await primitive.add({ + name: 'defaulted', + engine: 'eng', + statement: 'forbid(principal, action, resource is AgentCore::Gateway);', + }); + expect(result.success).toBe(true); + expect(getWrittenPolicy().enforcementMode).toBe('ACTIVE'); + }); +}); diff --git a/src/cli/primitives/__tests__/auth-utils.test.ts b/src/cli/primitives/__tests__/auth-utils.test.ts index 3eac9b61f..10c700961 100644 --- a/src/cli/primitives/__tests__/auth-utils.test.ts +++ b/src/cli/primitives/__tests__/auth-utils.test.ts @@ -88,6 +88,7 @@ describe('createManagedOAuthCredential', () => { managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -95,7 +96,6 @@ describe('createManagedOAuthCredential', () => { policyEngines: [], configBundles: [], abTests: [], - httpGateways: [], harnesses: [], datasets: [], payments: [], diff --git a/src/cli/primitives/__tests__/wirePaymentCapability.test.ts b/src/cli/primitives/__tests__/wirePaymentCapability.test.ts index 2aae99017..d00fbf2fa 100644 --- a/src/cli/primitives/__tests__/wirePaymentCapability.test.ts +++ b/src/cli/primitives/__tests__/wirePaymentCapability.test.ts @@ -83,6 +83,7 @@ function makeProject(codeLocation: string, runtimeOverrides: Record[] = [ credentialPrimitive, evaluatorPrimitive, onlineEvalConfigPrimitive, + onlineInsightsPrimitive, gatewayPrimitive, gatewayTargetPrimitive, + knowledgeBasePrimitive, policyEnginePrimitive, policyPrimitive, configBundlePrimitive, - abTestPrimitive, runtimeEndpointPrimitive, paymentManagerPrimitive, paymentConnectorPrimitive, diff --git a/src/cli/project.ts b/src/cli/project.ts index fa1654b43..3399042ab 100644 --- a/src/cli/project.ts +++ b/src/cli/project.ts @@ -13,6 +13,7 @@ export function createDefaultProjectSpec(projectName: string): AgentCoreProjectS managedBy: 'CDK' as const, runtimes: [], memories: [], + knowledgeBases: [], credentials: [], evaluators: [], onlineEvalConfigs: [], @@ -21,7 +22,6 @@ export function createDefaultProjectSpec(projectName: string): AgentCoreProjectS harnesses: [], configBundles: [], abTests: [], - httpGateways: [], datasets: [], payments: [], tags: { diff --git a/src/cli/telemetry/schemas/command-run.ts b/src/cli/telemetry/schemas/command-run.ts index 5ddea55fe..564709f2d 100644 --- a/src/cli/telemetry/schemas/command-run.ts +++ b/src/cli/telemetry/schemas/command-run.ts @@ -18,6 +18,7 @@ import { FilterType, GatewayTargetHost, GatewayTargetType, + JobType, MemoryType, Mode, ModelProvider, @@ -28,6 +29,7 @@ import { PolicyValidationMode, RefType, ResourceType, + SkillSourceType, UiMode, safeSchema, } from './common-shapes.js'; @@ -92,11 +94,20 @@ const AddGatewayTargetAttrs = safeSchema({ const AddPolicyEngineAttrs = safeSchema({ attach_gateway_count: Count, attach_mode: AttachMode }); +const AddKnowledgeBaseAttrs = safeSchema({ + data_source_count: Count, + has_description: z.boolean(), + has_gateway: z.boolean(), + is_append: z.boolean(), +}); + const AddPolicyAttrs = safeSchema({ policy_attr_source_type: PolicyAttrSourceType, policy_validation_mode: PolicyValidationMode, }); +const AddSkillAttrs = safeSchema({ skill_source_type: SkillSourceType }); + const DeployAttrs = safeSchema({ runtime_count: Count, harness_count: Count, @@ -155,8 +166,25 @@ const RunEvalAttrs = safeSchema({ has_expected_response: z.boolean(), }); +const RunIngestAttrs = safeSchema({ + data_source_count: Count, +}); + const FetchAccessAttrs = safeSchema({ resource_type: ResourceType }); +/** + * Async job commands (recommendation + batch-evaluation), keyed by verb with job_type to disambiguate. + * safeSchema only permits required enum/boolean/number/literal fields, so per-type detail enums + * (recommendation_kind, batch_eval_source, …) are recorded via the shared ATTRIBUTES set on the + * recorder when present rather than as required fields here. + */ +const RunJobAttrs = safeSchema({ + job_type: JobType, + has_wait: z.boolean(), +}); + +const JobTypeOnlyAttrs = safeSchema({ job_type: JobType }); + const UpdateAttrs = safeSchema({ is_dry_run: z.boolean() }); const FeedbackAttrs = safeSchema({ @@ -166,6 +194,18 @@ const FeedbackAttrs = safeSchema({ const PauseResumeOnlineEvalAttrs = safeSchema({ ref_type: RefType }); +const AddOnlineInsightsAttrs = safeSchema({ insights_count: Count, enable_on_create: z.boolean() }); + +const ExportHarnessAttrs = safeSchema({ + build_type: BuildType, + model_provider: ModelProvider, + has_memory: z.boolean(), + has_gateway: z.boolean(), + has_container: z.boolean(), + has_execution_limits: z.boolean(), + notes_count: Count, +}); + const NoAttrs = safeSchema({}); /* @@ -180,13 +220,17 @@ export const COMMAND_SCHEMAS = { 'add.credential': AddCredentialAttrs, 'add.evaluator': AddEvaluatorAttrs, 'add.online-eval': AddOnlineEvalAttrs, + 'add.online-insights': AddOnlineInsightsAttrs, 'add.gateway': AddGatewayAttrs, 'add.gateway-target': AddGatewayTargetAttrs, + 'add.web-search': NoAttrs, 'add.policy-engine': AddPolicyEngineAttrs, 'add.policy': AddPolicyAttrs, 'add.runtime-endpoint': NoAttrs, + 'add.knowledge-base': AddKnowledgeBaseAttrs, 'add.payment-manager': NoAttrs, 'add.payment-connector': NoAttrs, + 'add.skill': AddSkillAttrs, deploy: DeployAttrs, // dev / invoke / exec @@ -199,11 +243,22 @@ export const COMMAND_SCHEMAS = { logs: LogsAttrs, 'logs.evals': LogsEvalsAttrs, 'run.eval': RunEvalAttrs, + 'run.job': RunJobAttrs, + 'job.history': JobTypeOnlyAttrs, + 'job.get': JobTypeOnlyAttrs, + 'archive.job': JobTypeOnlyAttrs, + 'stop.job': JobTypeOnlyAttrs, + 'run.ingest': RunIngestAttrs, + 'pause.job': JobTypeOnlyAttrs, + 'resume.job': JobTypeOnlyAttrs, + 'promote.job': JobTypeOnlyAttrs, 'fetch.access': FetchAccessAttrs, feedback: FeedbackAttrs, update: UpdateAttrs, 'pause.online-eval': PauseResumeOnlineEvalAttrs, 'resume.online-eval': PauseResumeOnlineEvalAttrs, + 'pause.online-insights': PauseResumeOnlineEvalAttrs, + 'resume.online-insights': PauseResumeOnlineEvalAttrs, 'traces.list': NoAttrs, 'traces.get': NoAttrs, 'evals.history': NoAttrs, @@ -225,21 +280,25 @@ export const COMMAND_SCHEMAS = { 'remove.credential': NoAttrs, 'remove.evaluator': NoAttrs, 'remove.online-eval': NoAttrs, + 'remove.online-insights': NoAttrs, 'remove.gateway': NoAttrs, 'remove.gateway-target': NoAttrs, + 'remove.web-search': NoAttrs, 'remove.policy-engine': NoAttrs, 'remove.policy': NoAttrs, 'remove.runtime-endpoint': NoAttrs, 'remove.config-bundle': NoAttrs, - 'remove.ab-test': NoAttrs, + 'remove.knowledge-base': NoAttrs, 'dataset.download': NoAttrs, 'dataset.publish-version': NoAttrs, 'dataset.remove-version': NoAttrs, 'remove.payment-manager': NoAttrs, 'remove.payment-connector': NoAttrs, + 'remove.skill': NoAttrs, 'telemetry.disable': NoAttrs, 'telemetry.enable': NoAttrs, 'telemetry.status': NoAttrs, + 'export.harness': ExportHarnessAttrs, } as const satisfies Record>; // --------------------------------------------------------------------------- diff --git a/src/cli/telemetry/schemas/common-shapes.ts b/src/cli/telemetry/schemas/common-shapes.ts index a2389252b..5ffa63580 100644 --- a/src/cli/telemetry/schemas/common-shapes.ts +++ b/src/cli/telemetry/schemas/common-shapes.ts @@ -35,6 +35,7 @@ export const AuthType = z.enum(['sigv4', 'bearer_token']); export const AuthorizerType = z.enum(['aws_iam', 'custom_jwt', 'none']); export const BuildType = z.enum(['codezip', 'container']); export const CredentialType = z.enum(['api-key', 'oauth']); +export const SkillSourceType = z.enum(['path', 's3', 'git', 'aws_skills']); export const EvaluatorType = z.enum(['llm-as-a-judge', 'code-based']); export const ExitReason = z.enum(['success', 'failure']); export const FilterState = z.enum(['deployed', 'local-only', 'pending-removal', 'none']); @@ -49,7 +50,6 @@ export const FilterType = z.enum([ 'policy-engine', 'policy', 'config-bundle', - 'ab-test', 'dataset', 'harness', 'none', @@ -63,6 +63,9 @@ export const GatewayTargetType = z.enum([ 'open-api-schema', 'smithy-model', 'lambda-function-arn', + 'http-runtime', + 'passthrough', + 'web-search', 'unknown', ]); @@ -73,23 +76,32 @@ export const GATEWAY_TARGET_TYPE_MAP: Record { expect(mockCopyAndRenderDir).toHaveBeenCalledWith( join(tmpDir, 'python', 'http', 'strands', 'base'), '/output/app/MyAgent', - expect.objectContaining({ projectName: 'MyAgent', Name: 'MyAgent', hasMcp: false }) + expect.objectContaining({ projectName: 'MyAgent', Name: 'MyAgent' }) ); }); diff --git a/src/cli/templates/render.ts b/src/cli/templates/render.ts index 6f6aaff3d..3e4910c08 100644 --- a/src/cli/templates/render.ts +++ b/src/cli/templates/render.ts @@ -19,6 +19,34 @@ Handlebars.registerHelper('pathSlug', (str: string) => { .replace(/_+/g, '_') .toLowerCase(); }); +// Emits a value as JSON-safe Python literal (string or dict). +// Wraps result in Handlebars.SafeString to prevent double-escaping. +Handlebars.registerHelper('safeJson', (value: unknown) => { + return new Handlebars.SafeString(JSON.stringify(value)); +}); +// Emits a value as a Python string literal containing its JSON text, for use with json.loads(). +// Unlike safeJson, this is safe for arbitrary objects: JSON booleans/null inside the value stay +// inside the string (true/false/null) and are parsed by json.loads at runtime, rather than being +// inlined as bare Python tokens (which would be NameErrors). Double-encoding guarantees a valid +// Python string literal because JSON's escape set is a subset of Python's. +Handlebars.registerHelper('pyJsonStr', (value: unknown) => { + return new Handlebars.SafeString(JSON.stringify(JSON.stringify(value))); +}); +// Escapes triple-double-quotes so the value is safe to embed in a Python """...""" string. +Handlebars.registerHelper('escapePyStr', (value: unknown) => { + const s = typeof value === 'string' ? value : ''; + return new Handlebars.SafeString(s.replace(/\\/g, '\\\\').replace(/"""/g, '\\"\\"\\"')); +}); +Handlebars.registerHelper('some', (array: unknown[], key: string) => { + if (!Array.isArray(array)) return false; + return array.some( + item => item !== null && typeof item === 'object' && key in item && !!(item as Record)[key] + ); +}); +Handlebars.registerHelper('or', (...args: unknown[]) => { + // Last arg is the Handlebars options object — exclude it + return args.slice(0, -1).some(Boolean); +}); /** * Renames template files to their actual names. diff --git a/src/cli/templates/types.ts b/src/cli/templates/types.ts index 1c596226a..ec89bc103 100644 --- a/src/cli/templates/types.ts +++ b/src/cli/templates/types.ts @@ -38,6 +38,8 @@ export interface GatewayProviderRenderConfig { discoveryUrl?: string; /** Space-separated scopes for token request (CUSTOM_JWT only) */ scopes?: string; + /** Hardcoded URL for external gateways not found in deployed-state.json */ + hardcodedUrl?: string; } /** @@ -81,4 +83,59 @@ export interface AgentRenderConfig { enableOtel?: boolean; /** Whether a config bundle is wired into the agent template */ hasConfigBundle?: boolean; + + // ── Export-only fields (set by harness-mapper, consumed by export templates) ── + + /** Execution limits from harness — triggers execution-limits capability */ + maxIterations?: number; + maxTokens?: number; + timeoutSeconds?: number; + + /** Truncation strategy from harness. 'none' renders no truncation block (truncation disabled). */ + truncationStrategy?: 'sliding_window' | 'summarization' | 'none'; + truncationConfig?: Record; + + /** Inline function tool definitions — one PythonAgentTool generated per entry */ + inlineFunctionTools?: { name: string; description: string; inputSchema: Record }[]; + + /** Remote MCP tool definitions (non-gateway) */ + remoteMcpTools?: { + name: string; + url: string; + headerCredentials?: { headerKey: string; credentialName: string; envVarName: string }[]; + }[]; + + /** Skill paths for AgentSkills plugin */ + pathSkills?: string[]; + s3Skills?: string[]; + gitSkills?: { url: string; path?: string; credentialArn?: string; username?: string }[]; + /** True when any skills exist (path, s3, or git) — enables AgentSkills plugin */ + hasSkillsFetcher?: boolean; + /** True when s3 or git skills exist — enables fetcher imports */ + hasFetchedSkills?: boolean; + + /** True when agentcore_browser tool is present and allowed */ + hasBrowser?: boolean; + /** Custom browser identifier (resource ID extracted from browserArn) */ + browserIdentifier?: string; + /** True when agentcore_code_interpreter tool is present and allowed */ + hasCodeInterpreter?: boolean; + /** Custom code interpreter identifier (resource ID extracted from codeInterpreterArn) */ + codeInterpreterIdentifier?: string; + /** True when the builtin shell tool is enabled (export harness only) */ + hasShell?: boolean; + /** True when the builtin file_operations tool is enabled (export harness only) */ + hasFileOperations?: boolean; + /** True when any execution limit is configured */ + hasExecutionLimits?: boolean; + + /** Model ID to use in load.py (export path only — overrides the provider-specific template default) */ + modelId?: string; + + /** True when generating from a harness export (suppresses placeholder tools) */ + isExportHarness?: boolean; + /** System prompt text written verbatim into main.py (export path) */ + systemPromptText?: string; + /** Default actor ID for memory session manager */ + actorId?: string; } diff --git a/src/cli/tui/App.tsx b/src/cli/tui/App.tsx index f7c578aa6..81795d16d 100644 --- a/src/cli/tui/App.tsx +++ b/src/cli/tui/App.tsx @@ -4,7 +4,6 @@ import { LayoutProvider } from './context'; import { CLI_ONLY_EXAMPLES } from './copy'; import { setExitAction } from './exit-action'; import { MissingProjectMessage, WrongDirectoryMessage, getProjectRootMismatch, projectExists } from './guards'; -import { ABTestPickerScreen } from './screens/ab-test'; import { AddFlow } from './screens/add/AddFlow'; import { CliOnlyScreen } from './screens/cli-only'; import { ConfigBundleFlow } from './screens/config-bundle-hub'; @@ -12,19 +11,24 @@ import { CreateScreen } from './screens/create'; import { DatasetFlow } from './screens/dataset-hub'; import { DeployScreen } from './screens/deploy/DeployScreen'; import { EvalHubScreen, EvalScreen } from './screens/eval'; +import { ExportHarnessFlow } from './screens/export'; import { FetchAccessScreen } from './screens/fetch-access'; import { HelpScreen, HomeScreen } from './screens/home'; import { ImportFlow } from './screens/import'; +import { InsightsJobsScreen } from './screens/insights-jobs'; import { InvokeScreen } from './screens/invoke'; import { LogsScreen } from './screens/logs'; import { OnlineEvalDashboard } from './screens/online-eval'; import { PackageScreen } from './screens/package'; import { RecommendationFlow, RecommendationHistoryScreen, RecommendationsHubScreen } from './screens/recommendation'; import { RemoveFlow } from './screens/remove'; -import { BatchEvalHistoryScreen, RunBatchEvalFlow, RunEvalFlow, RunScreen } from './screens/run-eval'; +import { ABTestJobsHistoryScreen, RunABTestFlow } from './screens/run-ab-test'; +import { BatchEvalHistoryScreen, RunBatchEvalFlow, RunEvalFlow, RunIngestFlow, RunScreen } from './screens/run-eval'; +import { RunInsightsFlow } from './screens/run-insights'; import { StatusScreen } from './screens/status/StatusScreen'; import { UpdateScreen } from './screens/update'; import { ValidateScreen } from './screens/validate'; +import { ViewTypePickerScreen } from './screens/view'; import { getCommandsForUI } from './utils/commands'; import { useApp } from 'ink'; import React, { useState } from 'react'; @@ -55,10 +59,16 @@ type Route = | { name: 'run' } | { name: 'run-eval'; from?: 'run' | 'evals' } | { name: 'run-batch-eval'; from?: 'run' | 'evals' } + | { name: 'run-ingest'; from?: 'run' } | { name: 'batch-eval-history' } + | { name: 'run-insights'; from?: 'run' | 'evals' } + | { name: 'insights-jobs' } | { name: 'recommendations-hub' } | { name: 'recommend'; from?: 'recommendations-hub' | 'run' } | { name: 'recommendation-history' } + | { name: 'run-ab-test'; from?: 'run' } + | { name: 'ab-test-jobs' } + | { name: 'view' } | { name: 'evals' } | { name: 'eval-runs' } | { name: 'online-evals' } @@ -69,7 +79,7 @@ type Route = | { name: 'config-bundle' } | { name: 'dataset' } | { name: 'import' } - | { name: 'ab-test' } + | { name: 'export-harness' } | { name: 'cli-only'; commandId: string }; // Commands that don't require being at the project root @@ -163,8 +173,8 @@ function AppContent({ setRoute({ name: 'evals' }); } else if (id === 'fetch') { setRoute({ name: 'fetch-access' }); - } else if (id === 'recommendations') { - setRoute({ name: 'recommendations-hub' }); + } else if (id === 'view') { + setRoute({ name: 'view' }); } else if (id === 'validate') { setRoute({ name: 'validate' }); } else if (id === 'package') { @@ -177,12 +187,18 @@ function AppContent({ setRoute({ name: 'import' }); } else if (id === 'update') { setRoute({ name: 'update' }); + } else if (id === 'batch-evaluations') { + setRoute({ name: 'batch-eval-history' }); } else if (id === 'config-bundle') { setRoute({ name: 'config-bundle' }); } else if (id === 'dataset') { setRoute({ name: 'dataset' }); - } else if (id === 'ab-test') { - setRoute({ name: 'ab-test' }); + } else if (id === 'export') { + if (!projectExists() && route.name === 'help') { + setHelpNotice(); + return; + } + setRoute({ name: 'export-harness' }); } }; @@ -303,8 +319,21 @@ function AppContent({ setRoute({ name: 'run-eval', from: 'run' })} onRunBatchEval={() => setRoute({ name: 'run-batch-eval', from: 'run' })} + onRunInsights={() => setRoute({ name: 'run-insights', from: 'run' })} onRunRecommendation={() => setRoute({ name: 'recommend', from: 'run' })} + onRunIngest={() => setRoute({ name: 'run-ingest', from: 'run' })} + onRunABTest={() => setRoute({ name: 'run-ab-test', from: 'run' })} + onExit={handleBack} + /> + ); + } + + if (route.name === 'run-insights') { + return ( + setRoute({ name: route.from ?? 'run' } as Route)} + onViewJobs={() => setRoute({ name: 'insights-jobs' })} /> ); } @@ -317,6 +346,8 @@ function AppContent({ if (view === 'runs') setRoute({ name: 'eval-runs' }); if (view === 'run-batch-eval') setRoute({ name: 'run-batch-eval', from: 'evals' }); if (view === 'batch-eval-history') setRoute({ name: 'batch-eval-history' }); + if (view === 'run-insights') setRoute({ name: 'run-insights', from: 'evals' }); + if (view === 'insights-jobs') setRoute({ name: 'insights-jobs' }); if (view === 'online-dashboard') setRoute({ name: 'online-evals' }); }} onExit={handleBack} @@ -336,11 +367,38 @@ function AppContent({ if (route.name === 'run-batch-eval') { const backRoute = route.from ?? 'run'; - return setRoute({ name: backRoute } as Route)} />; + return ( + setRoute({ name: backRoute } as Route)} + onViewJobs={() => setRoute({ name: 'batch-eval-history' })} + /> + ); + } + + if (route.name === 'run-ingest') { + const backRoute = route.from ?? 'run'; + return setRoute({ name: backRoute } as Route)} />; + } + + if (route.name === 'view') { + return ( + { + if (view === 'recommendation') setRoute({ name: 'recommendation-history' }); + if (view === 'batch-evaluation') setRoute({ name: 'batch-eval-history' }); + if (view === 'ab-test') setRoute({ name: 'ab-test-jobs' }); + }} + onExit={handleBack} + /> + ); } if (route.name === 'batch-eval-history') { - return setRoute({ name: 'evals' })} />; + return setRoute({ name: 'view' })} />; + } + + if (route.name === 'insights-jobs') { + return setRoute({ name: 'evals' })} />; } if (route.name === 'recommendations-hub') { @@ -356,12 +414,31 @@ function AppContent({ } if (route.name === 'recommend') { - const backRoute = route.from ?? 'recommendations-hub'; - return setRoute({ name: backRoute } as Route)} />; + const backRoute = route.from ?? 'run'; + return ( + setRoute({ name: backRoute } as Route)} + onViewJobs={() => setRoute({ name: 'recommendation-history' })} + /> + ); } if (route.name === 'recommendation-history') { - return setRoute({ name: 'recommendations-hub' })} />; + return setRoute({ name: 'view' })} />; + } + + if (route.name === 'run-ab-test') { + const backRoute = route.from ?? 'run'; + return ( + setRoute({ name: backRoute } as Route)} + onViewJobs={() => setRoute({ name: 'ab-test-jobs' })} + /> + ); + } + + if (route.name === 'ab-test-jobs') { + return setRoute({ name: 'view' })} />; } if (route.name === 'eval-runs') { @@ -405,8 +482,15 @@ function AppContent({ return setRoute({ name: 'help' })} />; } - if (route.name === 'ab-test') { - return ; + if (route.name === 'export-harness') { + return ( + setRoute({ name: 'deploy' })} + /> + ); } if (route.name === 'cli-only') { diff --git a/src/cli/tui/__tests__/app-command-coverage.test.ts b/src/cli/tui/__tests__/app-command-coverage.test.ts new file mode 100644 index 000000000..b19975d83 --- /dev/null +++ b/src/cli/tui/__tests__/app-command-coverage.test.ts @@ -0,0 +1,54 @@ +/** + * Regression test: every command shown on the TUI home screen must have a handler + * in onSelectCommand (either an explicit route, or an entry in CLI_ONLY_EXAMPLES). + * + * If this test fails, you added a command to the program but forgot to handle it + * in App.tsx's onSelectCommand function or add it to CLI_ONLY_EXAMPLES in copy.ts. + */ +import { createProgram } from '../../cli'; +import { CLI_ONLY_EXAMPLES } from '../copy'; +import { getCommandsForUI } from '../utils/commands'; +import { describe, expect, it } from 'vitest'; + +// These command IDs have explicit route handlers in App.tsx onSelectCommand +const ROUTED_COMMANDS = new Set([ + 'dev', + 'exec', + 'deploy', + 'invoke', + 'logs', + 'status', + 'create', + 'add', + 'remove', + 'run', + 'evals', + 'fetch', + 'view', + 'validate', + 'package', + 'import', + 'update', + 'config-bundle', + 'dataset', + 'batch-evaluations', +]); + +describe('TUI home screen command coverage', () => { + it('every visible command has either a route handler or CLI_ONLY_EXAMPLES entry', () => { + const program = createProgram(); + const commands = getCommandsForUI(program, { inProject: true }); + + const unhandled: string[] = []; + for (const cmd of commands) { + if (cmd.id === 'help') continue; // help is special-cased + const hasRoute = ROUTED_COMMANDS.has(cmd.id); + const hasCliOnly = cmd.id in CLI_ONLY_EXAMPLES; + if (!hasRoute && !hasCliOnly) { + unhandled.push(cmd.id); + } + } + + expect(unhandled).toEqual([]); + }); +}); diff --git a/src/cli/tui/components/DeployStatus.tsx b/src/cli/tui/components/DeployStatus.tsx index c6e78b3e4..837109a46 100644 --- a/src/cli/tui/components/DeployStatus.tsx +++ b/src/cli/tui/components/DeployStatus.tsx @@ -35,8 +35,12 @@ function extractProgress(messages: DeployMessage[]): { current: number; total: n * Progress bar component. */ function ProgressBar({ current, total }: { current: number; total: number }) { - const percent = total > 0 ? current / total : 0; - const filled = Math.round(percent * PROGRESS_BAR_WIDTH); + // CDK toolkit can briefly emit completed > total during graph expansion. + // Clamp here so the bar never asks String.repeat for a negative count. + const safeTotal = total > 0 ? total : 0; + const safeCurrent = Math.max(0, Math.min(current, safeTotal)); + const percent = safeTotal > 0 ? safeCurrent / safeTotal : 0; + const filled = Math.min(PROGRESS_BAR_WIDTH, Math.max(0, Math.round(percent * PROGRESS_BAR_WIDTH))); const empty = PROGRESS_BAR_WIDTH - filled; return ( @@ -47,7 +51,7 @@ function ProgressBar({ current, total }: { current: number; total: number }) { ] {' '} - {current}/{total} + {safeCurrent}/{safeTotal} ); diff --git a/src/cli/tui/components/ResourceGraph.tsx b/src/cli/tui/components/ResourceGraph.tsx index 6e24915b4..f7f748035 100644 --- a/src/cli/tui/components/ResourceGraph.tsx +++ b/src/cli/tui/components/ResourceGraph.tsx @@ -21,10 +21,10 @@ const ICONS = { 'policy-engine': '▣', policy: '▢', 'config-bundle': '⬡', - 'ab-test': '⚗', dataset: '▤', harness: '⬢', 'runtime-endpoint': '◉', + 'knowledge-base': '✚', payment: '₿', } as const; @@ -127,6 +127,7 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res const allAgents = project.runtimes ?? []; const agents = agentName ? allAgents.filter(a => a.name === agentName) : allAgents; const memories = project.memories ?? []; + const knowledgeBases = project.knowledgeBases ?? []; const credentials = project.credentials ?? []; const evaluators = project.evaluators ?? []; const onlineEvalConfigs = project.onlineEvalConfigs ?? []; @@ -136,8 +137,8 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res const policyEngines = project.policyEngines ?? []; const configBundles = project.configBundles ?? []; const datasets = project.datasets ?? []; - const abTests = project.abTests ?? []; const payments = project.payments ?? []; + const harnesses = project.harnesses ?? []; // Build lookup map and collect pending-removal resources in a single pass const { statusMap, pendingRemovals } = useMemo(() => { @@ -163,6 +164,7 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res const hasContent = agents.length > 0 || memories.length > 0 || + knowledgeBases.length > 0 || credentials.length > 0 || evaluators.length > 0 || onlineEvalConfigs.length > 0 || @@ -171,6 +173,7 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res mcpRuntimeTools.length > 0 || unassignedTargets.length > 0 || payments.length > 0 || + harnesses.length > 0 || pendingRemovals.length > 0; return ( @@ -246,6 +249,31 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res )} + {/* Knowledge Bases */} + {knowledgeBases.length > 0 && ( + + Knowledge Bases + {knowledgeBases.map(kb => { + const rsEntry = statusMap.get(`knowledge-base:${kb.name}`); + const dsCount = kb.dataSources.length; + const fallbackDetail = `${dsCount} data source${dsCount === 1 ? '' : 's'}`; + return ( + + ); + })} + + )} + {/* Credentials */} {credentials.length > 0 && ( @@ -298,7 +326,7 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res Online Eval Configs {onlineEvalConfigs.map(config => { const rsEntry = statusMap.get(`online-eval:${config.name}`); - const defaultDetail = `${config.evaluators.length} evaluator${config.evaluators.length !== 1 ? 's' : ''} — ${config.samplingRate}% sampling`; + const defaultDetail = `${(config.evaluators ?? []).length} evaluator${(config.evaluators ?? []).length !== 1 ? 's' : ''} — ${config.samplingRate}% sampling`; return ( )} - {/* AB Tests */} - {abTests.length > 0 && ( + {/* Harnesses */} + {harnesses.length > 0 && ( - AB Tests - {abTests.map(test => { - const rsEntry = statusMap.get(`ab-test:${test.name}`); + Harnesses + {harnesses.map(harness => { + const rsEntry = statusMap.get(`harness:${harness.name}`); return ( ); })} @@ -536,13 +563,14 @@ export function ResourceGraph({ project, mcp, agentName, resourceStatuses }: Res {ICONS.agent} agent{' '} {ICONS.memory} memory{' '} + {ICONS['knowledge-base']} knowledge base{' '} {ICONS.credential} credential{' '} {ICONS.evaluator} evaluator{' '} {ICONS['online-eval']} online-eval{' '} {ICONS.gateway} gateway{' '} {ICONS['policy-engine']} policy engine{' '} {ICONS['config-bundle']} config bundle{' '} - {ICONS['ab-test']} ab test + {ICONS.harness} harness diff --git a/src/cli/tui/components/TextInput.tsx b/src/cli/tui/components/TextInput.tsx index 02ffe2bf0..abf353ded 100644 --- a/src/cli/tui/components/TextInput.tsx +++ b/src/cli/tui/components/TextInput.tsx @@ -108,9 +108,11 @@ export function TextInput({ // Get display value (masked or plain) const displayValue = mask ? mask.repeat(value.length) : value; - // Simple split for cursor positioning (used by both modes) + // Simple split for cursor positioning (used by both modes). + // When the input is empty, the cursor sits over the placeholder's first character so it + // stays visible (the dim placeholder remainder below renders placeholder.slice(1)). const beforeCursorFull = displayValue.slice(0, cursor); - const charAtCursorFull = displayValue[cursor] ?? ' '; + const charAtCursorFull = displayValue[cursor] ?? (!value && placeholder ? (placeholder[0] ?? ' ') : ' '); const afterCursorFull = displayValue.slice(cursor + 1); if (expandable) { diff --git a/src/cli/tui/components/__tests__/DeployStatus.test.tsx b/src/cli/tui/components/__tests__/DeployStatus.test.tsx index b661ec13c..63aaa26e2 100644 --- a/src/cli/tui/components/__tests__/DeployStatus.test.tsx +++ b/src/cli/tui/components/__tests__/DeployStatus.test.tsx @@ -154,6 +154,21 @@ describe('DeployStatus', () => { // Should show the latest progress expect(lastFrame()).toContain('7/10'); }); + + it('clamps when CDK reports completed greater than total without throwing', () => { + // CDK toolkit can briefly report completed > total during graph expansion. + // Before the clamp, this asked String.repeat for a negative count and crashed + // the deploy TUI with "Invalid count value: -10". + const messages = [makeMsg('overflow', 'CDK_TOOLKIT_I5502', { completed: 50, total: 30 })]; + + expect(() => render()).not.toThrow(); + }); + + it('clamps when CDK reports a negative completed count', () => { + const messages = [makeMsg('underflow', 'CDK_TOOLKIT_I5502', { completed: -5, total: 10 })]; + + expect(() => render()).not.toThrow(); + }); }); describe('warning state (post-deploy errors)', () => { diff --git a/src/cli/tui/components/__tests__/ResourceGraph.test.tsx b/src/cli/tui/components/__tests__/ResourceGraph.test.tsx index 875cb42f0..04efa7b22 100644 --- a/src/cli/tui/components/__tests__/ResourceGraph.test.tsx +++ b/src/cli/tui/components/__tests__/ResourceGraph.test.tsx @@ -9,6 +9,7 @@ const baseProject: AgentCoreProjectSpec = { name: 'test-project', runtimes: [], memories: [], + knowledgeBases: [], credentials: [], } as unknown as AgentCoreProjectSpec; diff --git a/src/cli/tui/components/__tests__/TextInput.test.tsx b/src/cli/tui/components/__tests__/TextInput.test.tsx index 79865a607..746f6ba47 100644 --- a/src/cli/tui/components/__tests__/TextInput.test.tsx +++ b/src/cli/tui/components/__tests__/TextInput.test.tsx @@ -20,13 +20,15 @@ describe('TextInput', () => { expect(lastFrame()).toContain('Enter name:'); }); - it('renders placeholder when value is empty', () => { + it('renders the full placeholder (including its first char) when value is empty', () => { const { lastFrame } = render( ); - // Placeholder shows all chars after cursor position (slice(1)) - expect(lastFrame()).toContain('y-agent'); + // The first placeholder char sits under the cursor, the rest renders dim — together + // the full placeholder must be visible (regression guard for the slice(1) truncation bug + // that rendered "my-agent" as "y-agent"). + expect(lastFrame()).toContain('my-agent'); }); it('renders initial value', () => { diff --git a/src/cli/tui/components/jwt-config/DomainOverridesManager.tsx b/src/cli/tui/components/jwt-config/DomainOverridesManager.tsx new file mode 100644 index 000000000..0326a6673 --- /dev/null +++ b/src/cli/tui/components/jwt-config/DomainOverridesManager.tsx @@ -0,0 +1,219 @@ +import { useListNavigation } from '../../hooks'; +import type { SelectableItem } from '../index'; +import { TextInput } from '../index'; +import type { DomainOverrideEntry, DomainOverridesManagerMode } from './types'; +import { formatOverrideSummary } from './types'; +import { Box, Text } from 'ink'; +import React, { useCallback, useMemo, useState } from 'react'; + +const MAX_OVERRIDES = 5; +const RCFG_PATTERN = + /^((rcfg-[0-9a-z]{17})|(arn:[a-z0-9-]+:vpc-lattice:[a-zA-Z0-9-]+:\d{12}:resourceconfiguration\/rcfg-[0-9a-z]{17}))$/; + +export interface DomainOverridesManagerProps { + initialOverrides: DomainOverrideEntry[]; + onDone: (overrides: DomainOverrideEntry[]) => void; + onCancel: () => void; + onModeChange?: (mode: DomainOverridesManagerMode) => void; +} + +/** + * Repeatable list of per-domain private-endpoint overrides (Lattice-only). Mirrors + * CustomClaimsManager's list/add/edit/delete mode machine; each entry is a {domain, rcfg-id} pair. + * Only offered under the self-managed (VPC Lattice) arm, so every override is a lattice resource — + * which is exactly what the service and the AWS Console expose. + */ +export function DomainOverridesManager({ + initialOverrides, + onDone, + onCancel, + onModeChange, +}: DomainOverridesManagerProps) { + const [overrides, setOverrides] = useState(initialOverrides); + const [mode, setMode] = useState(initialOverrides.length > 0 ? 'list' : 'add'); + const [editIndex, setEditIndex] = useState(-1); + // Two-field form: capture the domain, then the rcfg id. + const [formField, setFormField] = useState<'domain' | 'rcfg'>('domain'); + const [draftDomain, setDraftDomain] = useState(''); + + React.useEffect(() => { + onModeChange?.(mode); + }, [mode, onModeChange]); + + const atLimit = overrides.length >= MAX_OVERRIDES; + + const actionItems = useMemo(() => { + const items: SelectableItem[] = []; + if (!atLimit) items.push({ id: 'add', title: 'Add domain override' }); + if (overrides.length > 0) { + items.push({ id: 'edit', title: 'Edit existing override' }); + items.push({ id: 'delete', title: 'Delete override' }); + } + items.push({ id: 'done', title: 'Done' }); + return items; + }, [overrides.length, atLimit]); + + const startAdd = useCallback(() => { + setEditIndex(-1); + setDraftDomain(''); + setFormField('domain'); + setMode('add'); + }, []); + + const actionNav = useListNavigation({ + items: actionItems, + onSelect: item => { + if (item.id === 'add') startAdd(); + else if (item.id === 'edit') setMode('edit-pick'); + else if (item.id === 'delete') setMode('delete-pick'); + else if (item.id === 'done') onDone(overrides); + }, + onExit: onCancel, + isActive: mode === 'list', + }); + + const pickerItems = useMemo( + () => overrides.map((o, i) => ({ id: String(i), title: formatOverrideSummary(o) })), + [overrides] + ); + + const editPickerNav = useListNavigation({ + items: pickerItems, + onSelect: (_, index) => { + setEditIndex(index); + setDraftDomain(overrides[index]?.domain ?? ''); + setFormField('domain'); + setMode('edit'); + }, + onExit: () => setMode('list'), + isActive: mode === 'edit-pick', + }); + + const deletePickerNav = useListNavigation({ + items: pickerItems, + onSelect: (_, index) => { + setOverrides(prev => { + const next = prev.filter((_, i) => i !== index); + setMode(next.length === 0 ? 'add' : 'list'); + return next; + }); + }, + onExit: () => setMode('list'), + isActive: mode === 'delete-pick', + }); + + const isFormMode = mode === 'add' || mode === 'edit'; + + const cancelForm = useCallback(() => { + if (overrides.length > 0) setMode('list'); + else onCancel(); + }, [overrides.length, onCancel]); + + const submitOverride = useCallback( + (rcfg: string) => { + const entry: DomainOverrideEntry = { domain: draftDomain.trim(), resourceConfigurationId: rcfg.trim() }; + if (mode === 'edit' && editIndex >= 0) { + setOverrides(prev => prev.map((o, i) => (i === editIndex ? entry : o))); + } else { + setOverrides(prev => [...prev, entry]); + } + setEditIndex(-1); + setMode('list'); + }, + [draftDomain, mode, editIndex] + ); + + return ( + + Per-domain private-endpoint overrides (optional) + + Map an additional IdP domain (e.g. a private jwks_uri) to its own VPC Lattice resource — up to {MAX_OVERRIDES}. + + + {mode === 'list' && ( + + {overrides.length > 0 && ( + + {overrides.map((o, i) => ( + + {i + 1}. {formatOverrideSummary(o)} + + ))} + + )} + + {actionItems.map((item, idx) => { + const isCursor = idx === actionNav.selectedIndex; + return ( + + {isCursor ? '❯' : ' '} {item.title} + + ); + })} + + + )} + + {mode === 'edit-pick' && ( + + Select an override to edit: + {pickerItems.map((item, idx) => { + const isCursor = idx === editPickerNav.selectedIndex; + return ( + + {isCursor ? '❯' : ' '} {item.title} + + ); + })} + + )} + + {mode === 'delete-pick' && ( + + Select an override to delete: + {pickerItems.map((item, idx) => { + const isCursor = idx === deletePickerNav.selectedIndex; + return ( + + {isCursor ? '❯' : ' '} {item.title} + + ); + })} + + )} + + {isFormMode && formField === 'domain' && ( + + { + setDraftDomain(value.trim()); + setFormField('rcfg'); + }} + onCancel={cancelForm} + customValidation={value => + (value.trim().length >= 1 && value.trim().length <= 253) || 'Domain must be 1-253 characters' + } + /> + + )} + + {isFormMode && formField === 'rcfg' && ( + + = 0 ? overrides[editIndex]?.resourceConfigurationId : ''} + onSubmit={submitOverride} + onCancel={() => setFormField('domain')} + customValidation={value => + RCFG_PATTERN.test(value.trim()) || 'Must be a VPC Lattice resource-config id (rcfg-...) or its ARN' + } + /> + + )} + + ); +} diff --git a/src/cli/tui/components/jwt-config/JwtConfigInput.tsx b/src/cli/tui/components/jwt-config/JwtConfigInput.tsx index 871af8951..193df6dd7 100644 --- a/src/cli/tui/components/jwt-config/JwtConfigInput.tsx +++ b/src/cli/tui/components/jwt-config/JwtConfigInput.tsx @@ -1,11 +1,42 @@ -import { useMultiSelectNavigation } from '../../hooks'; -import { SecretInput, TextInput, WizardMultiSelect } from '../index'; +import { + LATTICE_RESOURCE_CONFIG_PATTERN, + SECURITY_GROUP_ID_PATTERN, + SUBNET_ID_PATTERN, +} from '../../../../schema/schemas/auth'; +import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; +import { SecretInput, TextInput, WizardMultiSelect, WizardSelect } from '../index'; import { CustomClaimsManager } from './CustomClaimsManager'; -import type { ClaimsManagerMode, ConstraintType, CustomClaimEntry, JwtSubStep } from './types'; -import { CONSTRAINT_ITEMS, OIDC_WELL_KNOWN_SUFFIX, validateCommaSeparated } from './types'; +import { DomainOverridesManager } from './DomainOverridesManager'; +import type { + ClaimsManagerMode, + ConstraintType, + CustomClaimEntry, + DomainOverrideEntry, + DomainOverridesManagerMode, + JwtSubStep, +} from './types'; +import { + CONSTRAINT_ITEMS, + ENDPOINT_IP_TYPE_ITEMS, + OIDC_WELL_KNOWN_SUFFIX, + PRIVATE_ENDPOINT_TYPE_ITEMS, + validateCommaSeparated, +} from './types'; import { Box, Text } from 'ink'; import React from 'react'; +/** Validate a comma-separated list of ids against a schema regex (matches the CLI flag path strictness). */ +function validateIdList(value: string, pattern: RegExp, label: string, max?: number): true | string { + const ids = value + .split(',') + .map(s => s.trim()) + .filter(Boolean); + if (ids.length === 0) return 'At least one value is required'; + if (max && ids.length > max) return `At most ${max} allowed`; + const bad = ids.find(id => !pattern.test(id)); + return bad ? `Invalid ${label}: "${bad}"` : true; +} + export interface JwtConfigInputProps { subStep: JwtSubStep; steps: JwtSubStep[]; @@ -15,12 +46,28 @@ export interface JwtConfigInputProps { audience: string; clients: string; scopes: string; + // PrivateLink inbound (harness-only; optional so Gateway/Agent/Generate callers are unaffected). + latticeResourceId?: string; + vpcId?: string; + vpcSubnets?: string; + vpcSecurityGroups?: string; + vpcRoutingDomain?: string; + domainOverrides?: DomainOverrideEntry[]; onDiscoveryUrl: (url: string) => void; onConstraintsPicked: (selectedIds: string[]) => void; onAudience: (audience: string) => void; onClients: (clients: string) => void; onScopes: (scopes: string) => void; onCustomClaimsDone: (claims: CustomClaimEntry[]) => void; + onPrivateEndpointType?: (type: string) => void; + onLatticeResourceId?: (value: string) => void; + onVpcId?: (value: string) => void; + onVpcSubnets?: (value: string) => void; + onVpcIpType?: (value: string) => void; + onVpcSecurityGroups?: (value: string) => void; + onVpcRoutingDomain?: (value: string) => void; + onDomainOverridesDone?: (overrides: DomainOverrideEntry[]) => void; + onOverridesManagerModeChange?: (mode: DomainOverridesManagerMode) => void; onClientId: (clientId: string) => void; onClientIdSkip: () => void; onClientSecret: (clientSecret: string) => void; @@ -37,12 +84,27 @@ export function JwtConfigInput({ audience, clients, scopes, + latticeResourceId = '', + vpcId = '', + vpcSubnets = '', + vpcSecurityGroups = '', + vpcRoutingDomain = '', + domainOverrides = [], onDiscoveryUrl, onConstraintsPicked, onAudience, onClients, onScopes, onCustomClaimsDone, + onPrivateEndpointType, + onLatticeResourceId, + onVpcId, + onVpcSubnets, + onVpcIpType, + onVpcSecurityGroups, + onVpcRoutingDomain, + onDomainOverridesDone, + onOverridesManagerModeChange, onClientId, onClientIdSkip, onClientSecret, @@ -65,6 +127,20 @@ export function JwtConfigInput({ requireSelection: true, }); + const privateEndpointTypeNav = useListNavigation({ + items: PRIVATE_ENDPOINT_TYPE_ITEMS, + onSelect: item => onPrivateEndpointType?.(item.id), + onExit: () => onBack(), + isActive: subStep === 'privateEndpointType', + }); + + const ipTypeNav = useListNavigation({ + items: ENDPOINT_IP_TYPE_ITEMS, + onSelect: item => onVpcIpType?.(item.id), + onExit: () => onBack(), + isActive: subStep === 'vpcIpType', + }); + return ( Configure Custom JWT Authorizer @@ -146,6 +222,89 @@ export function JwtConfigInput({ onModeChange={onClaimsManagerModeChange} /> )} + {subStep === 'privateEndpointType' && ( + + )} + {subStep === 'latticeResourceId' && ( + onLatticeResourceId?.(v)} + onCancel={onBack} + customValidation={value => + LATTICE_RESOURCE_CONFIG_PATTERN.test(value.trim()) || + 'Must be a VPC Lattice resource-config id (rcfg-...) or ARN' + } + /> + )} + {subStep === 'domainOverrides' && ( + onDomainOverridesDone?.(overrides)} + onCancel={onBack} + onModeChange={onOverridesManagerModeChange} + /> + )} + {subStep === 'vpcId' && ( + onVpcId?.(v)} + onCancel={onBack} + customValidation={value => + /^vpc-(([0-9a-z]{8})|([0-9a-z]{17}))$/.test(value.trim()) || 'Must be a VPC id (vpc-...)' + } + /> + )} + {subStep === 'vpcSubnets' && ( + onVpcSubnets?.(v)} + onCancel={onBack} + customValidation={value => validateIdList(value, SUBNET_ID_PATTERN, 'subnet id (subnet-...)')} + /> + )} + {subStep === 'vpcIpType' && ( + + )} + {subStep === 'vpcSecurityGroups' && ( + onVpcSecurityGroups?.(v)} + onCancel={onBack} + allowEmpty + customValidation={value => + value.trim() === '' + ? true + : validateIdList(value, SECURITY_GROUP_ID_PATTERN, 'security group id (sg-...)', 5) + } + /> + )} + {subStep === 'vpcRoutingDomain' && ( + onVpcRoutingDomain?.(v)} + onCancel={onBack} + allowEmpty + /> + )} {subStep === 'clientId' && ( Optional: Provide OAuth credentials for bearer token fetching diff --git a/src/cli/tui/components/jwt-config/index.ts b/src/cli/tui/components/jwt-config/index.ts index 4b15714ce..c2e90c9c7 100644 --- a/src/cli/tui/components/jwt-config/index.ts +++ b/src/cli/tui/components/jwt-config/index.ts @@ -4,4 +4,5 @@ export { useJwtConfigFlow } from './useJwtConfigFlow'; export type { JwtConfig } from './useJwtConfigFlow'; export { CustomClaimsManager } from './CustomClaimsManager'; export { CustomClaimForm } from './CustomClaimForm'; +export { DomainOverridesManager } from './DomainOverridesManager'; export * from './types'; diff --git a/src/cli/tui/components/jwt-config/types.ts b/src/cli/tui/components/jwt-config/types.ts index fdb477772..4814aa974 100644 --- a/src/cli/tui/components/jwt-config/types.ts +++ b/src/cli/tui/components/jwt-config/types.ts @@ -9,9 +9,39 @@ export type JwtSubStep = | 'clients' | 'scopes' | 'customClaims' + | 'privateEndpointType' + | 'latticeResourceId' + | 'domainOverrides' + | 'vpcId' + | 'vpcSubnets' + | 'vpcIpType' + | 'vpcSecurityGroups' + | 'vpcRoutingDomain' | 'clientId' | 'clientSecret'; +/** Which PrivateLink endpoint arm the user is configuring (or none). */ +export type PrivateEndpointType = 'none' | 'lattice' | 'vpc'; + +export const PRIVATE_ENDPOINT_TYPE_ITEMS: SelectableItem[] = [ + { id: 'none', title: 'None', description: 'The IdP discovery endpoint is publicly reachable' }, + { + id: 'lattice', + title: 'VPC Lattice resource', + description: 'Reach the discovery endpoint via a self-managed VPC Lattice resource configuration', + }, + { + id: 'vpc', + title: 'Managed VPC endpoint', + description: 'Reach the discovery endpoint via a service-managed VPC interface endpoint', + }, +]; + +export const ENDPOINT_IP_TYPE_ITEMS: SelectableItem[] = [ + { id: 'IPV4', title: 'IPV4', description: 'IPv4 addressing' }, + { id: 'IPV6', title: 'IPV6', description: 'IPv6 addressing' }, +]; + export type ClaimValueType = 'STRING' | 'STRING_ARRAY'; export type ClaimOperator = 'EQUALS' | 'CONTAINS' | 'CONTAINS_ANY'; @@ -24,6 +54,22 @@ export interface CustomClaimEntry { export type ClaimsManagerMode = 'list' | 'add' | 'edit-pick' | 'edit' | 'delete-pick'; +/** + * A per-domain private-endpoint override. Lattice-only: a domain mapped to its own VPC Lattice + * resource-config (mirrors the AWS Console, which surfaces overrides only under the self-managed arm + * and only as a {domain, resourceConfigurationId} pair). + */ +export interface DomainOverrideEntry { + domain: string; + resourceConfigurationId: string; +} + +export type DomainOverridesManagerMode = 'list' | 'add' | 'edit-pick' | 'edit' | 'delete-pick'; + +export function formatOverrideSummary(o: DomainOverrideEntry): string { + return `${o.domain} → ${o.resourceConfigurationId}`; +} + export const CONSTRAINT_ITEMS: SelectableItem[] = [ { id: 'audience', title: 'Allowed Audiences', description: 'Validate token audience claims' }, { id: 'clients', title: 'Allowed Clients', description: 'Validate client identifiers in the token' }, diff --git a/src/cli/tui/components/jwt-config/useJwtConfigFlow.ts b/src/cli/tui/components/jwt-config/useJwtConfigFlow.ts index 73fcff14d..8547232ac 100644 --- a/src/cli/tui/components/jwt-config/useJwtConfigFlow.ts +++ b/src/cli/tui/components/jwt-config/useJwtConfigFlow.ts @@ -1,5 +1,18 @@ -import type { CustomClaimValidation } from '../../../../schema'; -import type { ClaimsManagerMode, ConstraintType, CustomClaimEntry, JwtSubStep } from './types'; +import type { + CustomClaimValidation, + EndpointIpAddressType, + PrivateEndpoint, + PrivateEndpointOverride, +} from '../../../../schema'; +import type { + ClaimsManagerMode, + ConstraintType, + CustomClaimEntry, + DomainOverrideEntry, + DomainOverridesManagerMode, + JwtSubStep, + PrivateEndpointType, +} from './types'; import { useCallback, useMemo, useState } from 'react'; export interface JwtConfig { @@ -10,14 +23,20 @@ export interface JwtConfig { customClaims?: CustomClaimValidation[]; clientId?: string; clientSecret?: string; + /** PrivateLink inbound endpoint for reaching the OIDC discovery URL (singular arm). */ + privateEndpoint?: PrivateEndpoint; + /** Per-domain private-endpoint overrides (Lattice-only; ≤5). */ + privateEndpointOverrides?: PrivateEndpointOverride[]; } interface UseJwtConfigFlowOptions { onComplete: (jwtConfig: JwtConfig) => void; onBack: () => void; + /** Enable the PrivateLink-inbound sub-steps (harness only). Defaults to false. */ + enablePrivateEndpoint?: boolean; } -export function useJwtConfigFlow({ onComplete, onBack }: UseJwtConfigFlowOptions) { +export function useJwtConfigFlow({ onComplete, onBack, enablePrivateEndpoint = false }: UseJwtConfigFlowOptions) { const [subStep, setSubStep] = useState('discoveryUrl'); const [discoveryUrl, setDiscoveryUrl] = useState(''); const [selectedConstraints, setSelectedConstraints] = useState>(new Set()); @@ -27,17 +46,36 @@ export function useJwtConfigFlow({ onComplete, onBack }: UseJwtConfigFlowOptions const [customClaims, setCustomClaims] = useState([]); const [clientId, setClientId] = useState(''); const [claimsManagerMode, setClaimsManagerMode] = useState('add'); + // PrivateLink inbound state + const [privateEndpointType, setPrivateEndpointType] = useState('none'); + const [latticeResourceId, setLatticeResourceId] = useState(''); + const [vpcId, setVpcId] = useState(''); + const [vpcSubnets, setVpcSubnets] = useState(''); + const [vpcIpType, setVpcIpType] = useState('IPV4'); + const [vpcSecurityGroups, setVpcSecurityGroups] = useState(''); + const [vpcRoutingDomain, setVpcRoutingDomain] = useState(''); + const [domainOverrides, setDomainOverrides] = useState([]); + const [overridesManagerMode, setOverridesManagerMode] = useState('list'); - // Compute the ordered list of JWT sub-steps based on selected constraints + // Compute the ordered list of JWT sub-steps based on selected constraints + private-endpoint arm const steps = useMemo(() => { const result: JwtSubStep[] = ['discoveryUrl', 'constraintPicker']; if (selectedConstraints.has('audience')) result.push('audience'); if (selectedConstraints.has('clients')) result.push('clients'); if (selectedConstraints.has('scopes')) result.push('scopes'); if (selectedConstraints.has('customClaims')) result.push('customClaims'); + if (enablePrivateEndpoint) { + result.push('privateEndpointType'); + if (privateEndpointType === 'lattice') { + // Per-domain overrides are Lattice-only (matches the service + AWS Console). + result.push('latticeResourceId', 'domainOverrides'); + } else if (privateEndpointType === 'vpc') { + result.push('vpcId', 'vpcSubnets', 'vpcIpType', 'vpcSecurityGroups', 'vpcRoutingDomain'); + } + } result.push('clientId', 'clientSecret'); return result; - }, [selectedConstraints]); + }, [selectedConstraints, privateEndpointType, enablePrivateEndpoint]); const stepIndex = steps.indexOf(subStep); @@ -61,14 +99,47 @@ export function useJwtConfigFlow({ onComplete, onBack }: UseJwtConfigFlowOptions .map(v => v.trim()) .filter(Boolean); + const buildPrivateEndpoint = useCallback((): PrivateEndpoint | undefined => { + if (privateEndpointType === 'lattice' && latticeResourceId.trim()) { + return { selfManagedLatticeResource: { resourceConfigurationIdentifier: latticeResourceId.trim() } }; + } + if (privateEndpointType === 'vpc' && vpcId.trim()) { + const sgs = parseList(vpcSecurityGroups); + return { + managedVpcResource: { + vpcIdentifier: vpcId.trim(), + subnetIds: parseList(vpcSubnets), + endpointIpAddressType: vpcIpType, + ...(sgs.length > 0 ? { securityGroupIds: sgs } : {}), + ...(vpcRoutingDomain.trim() ? { routingDomain: vpcRoutingDomain.trim() } : {}), + }, + }; + } + return undefined; + }, [privateEndpointType, latticeResourceId, vpcId, vpcSubnets, vpcIpType, vpcSecurityGroups, vpcRoutingDomain]); + const finishConfig = useCallback( (clientSecret: string) => { const audienceList = selectedConstraints.has('audience') ? parseList(audience) : undefined; const clientsList = selectedConstraints.has('clients') ? parseList(clients) : undefined; const scopesList = selectedConstraints.has('scopes') ? parseList(scopes) : undefined; + const privateEndpoint = buildPrivateEndpoint(); + // Overrides are Lattice-only and only collected under the lattice arm, so each maps to a + // selfManagedLatticeResource — keeping every endpoint the same arm (the service's rule). + const overrides: PrivateEndpointOverride[] | undefined = + privateEndpointType === 'lattice' && domainOverrides.length > 0 + ? domainOverrides.map(o => ({ + domain: o.domain, + privateEndpoint: { + selfManagedLatticeResource: { resourceConfigurationIdentifier: o.resourceConfigurationId }, + }, + })) + : undefined; const config: JwtConfig = { discoveryUrl, + ...(privateEndpoint ? { privateEndpoint } : {}), + ...(overrides ? { privateEndpointOverrides: overrides } : {}), ...(audienceList && audienceList.length > 0 ? { allowedAudience: audienceList } : {}), ...(clientsList && clientsList.length > 0 ? { allowedClients: clientsList } : {}), ...(scopesList && scopesList.length > 0 ? { allowedScopes: scopesList } : {}), @@ -98,7 +169,19 @@ export function useJwtConfigFlow({ onComplete, onBack }: UseJwtConfigFlowOptions onComplete(config); setSubStep('discoveryUrl'); }, - [selectedConstraints, audience, clients, scopes, discoveryUrl, customClaims, clientId, onComplete] + [ + selectedConstraints, + audience, + clients, + scopes, + discoveryUrl, + customClaims, + clientId, + buildPrivateEndpoint, + privateEndpointType, + domainOverrides, + onComplete, + ] ); const handlers = { @@ -106,17 +189,53 @@ export function useJwtConfigFlow({ onComplete, onBack }: UseJwtConfigFlowOptions setDiscoveryUrl(url); setSubStep('constraintPicker'); }, - handleConstraintsPicked: useCallback((selectedIds: string[]) => { - const constraints = new Set(selectedIds as ConstraintType[]); - setSelectedConstraints(constraints); - const order: ConstraintType[] = ['audience', 'clients', 'scopes', 'customClaims']; - const first = order.find(c => constraints.has(c)); - if (first) { - setSubStep(first); - } else { - setSubStep('clientId'); - } + handleConstraintsPicked: useCallback( + (selectedIds: string[]) => { + const constraints = new Set(selectedIds as ConstraintType[]); + setSelectedConstraints(constraints); + const order: ConstraintType[] = ['audience', 'clients', 'scopes', 'customClaims']; + const first = order.find(c => constraints.has(c)); + // Private-endpoint type follows the constraints block when enabled; else jump to clientId. + setSubStep(first ?? (enablePrivateEndpoint ? 'privateEndpointType' : 'clientId')); + }, + [enablePrivateEndpoint] + ), + handlePrivateEndpointType: (type: string) => { + setPrivateEndpointType(type as PrivateEndpointType); + // Step list recomputes from privateEndpointType; advance to the first step after it. + if (type === 'lattice') setSubStep('latticeResourceId'); + else if (type === 'vpc') setSubStep('vpcId'); + else setSubStep('clientId'); + }, + handleLatticeResourceId: (value: string) => { + setLatticeResourceId(value); + setSubStep('domainOverrides'); + }, + handleDomainOverridesDone: useCallback((entries: DomainOverrideEntry[]) => { + setDomainOverrides(entries); + setSubStep('clientId'); }, []), + handleOverridesManagerModeChange: setOverridesManagerMode, + handleVpcId: (value: string) => { + setVpcId(value); + setSubStep('vpcSubnets'); + }, + handleVpcSubnets: (value: string) => { + setVpcSubnets(value); + setSubStep('vpcIpType'); + }, + handleVpcIpType: (value: string) => { + setVpcIpType(value as EndpointIpAddressType); + setSubStep('vpcSecurityGroups'); + }, + handleVpcSecurityGroups: (value: string) => { + setVpcSecurityGroups(value); + setSubStep('vpcRoutingDomain'); + }, + handleVpcRoutingDomain: (value: string) => { + setVpcRoutingDomain(value); + setSubStep('clientId'); + }, handleAudience: (value: string) => { setAudience(value); goNext(); @@ -160,6 +279,15 @@ export function useJwtConfigFlow({ onComplete, onBack }: UseJwtConfigFlowOptions clients, scopes, claimsManagerMode, + privateEndpointType, + latticeResourceId, + vpcId, + vpcSubnets, + vpcIpType, + vpcSecurityGroups, + vpcRoutingDomain, + domainOverrides, + overridesManagerMode, goBack, handlers, }; diff --git a/src/cli/tui/copy.ts b/src/cli/tui/copy.ts index bf1509f4a..794dc000b 100644 --- a/src/cli/tui/copy.ts +++ b/src/cli/tui/copy.ts @@ -24,6 +24,43 @@ export const QUICK_START = { tip: 'Coding agents can implement project and config changes', } as const; +/** + * Command descriptions used in CLI help and TUI. + */ +export const COMMAND_DESCRIPTIONS = { + /** Main program description */ + program: 'Build and deploy Agentic AI applications on AgentCore', + /** Command descriptions */ + add: 'Add resources to project config.', + create: 'Create a new AgentCore project', + deploy: 'Deploy project infrastructure to AWS via CDK.', + dev: 'Launch local dev server, or invoke an agent locally.', + invoke: 'Invoke a deployed agent endpoint.', + logs: 'Stream or search agent runtime logs.', + package: 'Package agent artifacts without deploying.', + remove: 'Remove resources from project config.', + status: 'Show deployed resource details and status.', + traces: 'View and download agent traces.', + evals: 'View saved eval and batch eval results from past runs.', + feedback: 'Send feedback about the AgentCore CLI to the team.', + fetch: 'Fetch access info for deployed resources.', + pause: 'Pause a deployed resource (online eval config, A/B test).', + resume: 'Resume a paused resource (online eval config, A/B test).', + recommend: 'Run optimization recommendations for system prompts and tool descriptions.', + recommendations: 'View recommendation jobs and their results.', + batchEvaluations: 'View batch evaluation jobs and their results.', + abTests: 'View A/B test jobs and their results.', + insights: '[preview] Manage insights analysis jobs.', + run: 'Run evaluations, batch evaluations, insights [preview], or optimization recommendations.', + stop: 'Stop a running batch evaluation or A/B test.', + import: 'Import a runtime, memory, or starter toolkit into this project. [experimental]', + telemetry: 'Manage anonymous usage analytics preferences.', + update: 'Check for and install CLI updates', + validate: 'Validate agentcore/ config files.', + 'config-bundle': 'Manage configuration bundle versions and diffs.', + archive: 'Archive (delete) a batch evaluation or recommendation on the service and clear local history.', +} as const; + /** * CLI-only command examples and usage information. * These commands must run in the terminal, not in the TUI. @@ -59,7 +96,7 @@ export const CLI_ONLY_EXAMPLES: Record --json', ], }, + 'run-insights': { + description: '[preview] Run failure analysis on agent sessions. This command runs in the terminal.', + examples: [ + 'agentcore run insights -r MyAgent -i FailureAnalysis', + 'agentcore run insights -r MyAgent -i FailureAnalysis --lookback 7', + 'agentcore run insights -r MyAgent -i FailureAnalysis --wait', + ], + }, + feedback: { + description: 'Send feedback about the AgentCore CLI to the team.', + examples: ['agentcore feedback', 'agentcore feedback --screenshot'], + }, + config: { + description: 'Adjust global configuration settings such as telemetry opt-out status.', + examples: ['agentcore config'], + }, + insights: { + description: '[preview] View insights analysis jobs and results.', + examples: ['agentcore insights history', 'agentcore insights results --id '], + }, }; diff --git a/src/cli/tui/hooks/__tests__/useDevDeploy.test.tsx b/src/cli/tui/hooks/__tests__/useDevDeploy.test.tsx index de56d0e29..690055c55 100644 --- a/src/cli/tui/hooks/__tests__/useDevDeploy.test.tsx +++ b/src/cli/tui/hooks/__tests__/useDevDeploy.test.tsx @@ -1,14 +1,40 @@ import { useDevDeploy } from '../useDevDeploy.js'; import { Text } from 'ink'; import { render } from 'ink-testing-library'; -import { afterEach, describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -const mockHandleDeploy = vi.fn(); +const { mockHandleDeploy, mockReadProjectSpec, mockEnsureDefaultDeploymentTarget, mockCanSkipDeploy } = vi.hoisted( + () => ({ + mockHandleDeploy: vi.fn(), + mockReadProjectSpec: vi.fn(), + mockEnsureDefaultDeploymentTarget: vi.fn(), + mockCanSkipDeploy: vi.fn(), + }) +); vi.mock('../../../commands/deploy/actions.js', () => ({ handleDeploy: (...args: unknown[]) => mockHandleDeploy(...args), })); +// The mount effect now reads the project spec, ensures a deploy target, and checks +// for changes before deploying. Mock those so the effect reaches handleDeploy instead +// of hanging/erroring on the real ConfigIO (no project on disk in tests). Keep the rest +// of `lib` intact (getErrorMessage et al. are resolved through it) and override only ConfigIO. +vi.mock('../../../../lib', async importActual => ({ + ...(await importActual()), + ConfigIO: vi.fn(function (this: Record) { + this.readProjectSpec = mockReadProjectSpec; + }), +})); + +vi.mock('../../../operations/deploy', () => ({ + ensureDefaultDeploymentTarget: (...args: unknown[]) => mockEnsureDefaultDeploymentTarget(...args), +})); + +vi.mock('../../../operations/deploy/change-detection', () => ({ + canSkipDeploy: (...args: unknown[]) => mockCanSkipDeploy(...args), +})); + function Harness({ skip }: { skip?: boolean }) { const { steps, isComplete, error } = useDevDeploy({ skip }); return ( @@ -19,6 +45,14 @@ function Harness({ skip }: { skip?: boolean }) { } describe('useDevDeploy', () => { + beforeEach(() => { + // Default: a deployable project (has a harness) with changes to deploy, so the + // effect proceeds to handleDeploy. Individual tests override handleDeploy's result. + mockReadProjectSpec.mockResolvedValue({ harnesses: [{ name: 'test-harness' }] }); + mockEnsureDefaultDeploymentTarget.mockResolvedValue(undefined); + mockCanSkipDeploy.mockResolvedValue(false); + }); + afterEach(() => { vi.clearAllMocks(); }); diff --git a/src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx b/src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx index 89182b2e5..5df2685fa 100644 --- a/src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx +++ b/src/cli/tui/hooks/__tests__/usePanelNavigation.test.tsx @@ -212,7 +212,7 @@ describe('usePanelNavigation', () => { if (nav.position.layer === 'active') { nav.deactivate(); } - }, [nav.position.layer, nav.position.column, nav.position.field, nav.deactivate]); + }, [nav]); return ( diff --git a/src/cli/tui/hooks/__tests__/useRemove.test.tsx b/src/cli/tui/hooks/__tests__/useRemove.test.tsx index e0ed7f9ac..aefe2cd2c 100644 --- a/src/cli/tui/hooks/__tests__/useRemove.test.tsx +++ b/src/cli/tui/hooks/__tests__/useRemove.test.tsx @@ -81,6 +81,15 @@ vi.mock('../../../logging', () => ({ })), })); +// Mock the telemetry wrapper so it just runs the inner fn. Without this, the real +// withCommandRunTelemetry awaits getTelemetryClient()/flush(), which never settles +// in tests — leaving useRemoveResource stuck at isLoading:true with a null result. +vi.mock('../../../telemetry/cli-command-run.js', () => ({ + withCommandRunTelemetry: vi.fn((_command: string, _attrs: unknown, fn: (recorder: unknown) => unknown) => + fn({ set: vi.fn(), get: vi.fn(() => ({})) }) + ), +})); + function delay(ms = 100) { return new Promise(resolve => setTimeout(resolve, ms)); } diff --git a/src/cli/tui/hooks/index.ts b/src/cli/tui/hooks/index.ts index ef420ccc6..8223f9823 100644 --- a/src/cli/tui/hooks/index.ts +++ b/src/cli/tui/hooks/index.ts @@ -6,7 +6,13 @@ export { useExitHandler } from './useExitHandler'; export { useListNavigation } from './useListNavigation'; export { useMultiSelectNavigation } from './useMultiSelectNavigation'; export { useResponsive } from './useResponsive'; -export { useAvailableAgents, useCreateGateway, useExistingGateways } from './useCreateMcp'; +export { + useAvailableAgents, + useCreateGateway, + useExistingGateways, + useExistingKnowledgeBases, + useMcpGatewayNames, +} from './useCreateMcp'; export { useDevServer } from './useDevServer'; export { useLogsStream } from './useLogsStream'; export { useProject } from './useProject'; diff --git a/src/cli/tui/hooks/useCreateABTest.ts b/src/cli/tui/hooks/useCreateABTest.ts deleted file mode 100644 index 89c36715e..000000000 --- a/src/cli/tui/hooks/useCreateABTest.ts +++ /dev/null @@ -1,93 +0,0 @@ -import type { AddTargetBasedABTestOptions } from '../../primitives/ABTestPrimitive'; -import { abTestPrimitive } from '../../primitives/registry'; -import type { GatewayChoice } from '../screens/ab-test/types'; -import { useCallback, useEffect, useState } from 'react'; - -interface CreateABTestConfig { - name: string; - description?: string; - agent: string; - gatewayChoice?: GatewayChoice; - controlBundle: string; - controlVersion: string; - treatmentBundle: string; - treatmentVersion: string; - controlWeight: number; - treatmentWeight: number; - onlineEval: string; - maxDuration?: number; - enableOnCreate?: boolean; -} - -export function useCreateABTest() { - const [status, setStatus] = useState<{ state: 'idle' | 'loading' | 'success' | 'error'; error?: string }>({ - state: 'idle', - }); - - const create = useCallback(async (config: CreateABTestConfig) => { - setStatus({ state: 'loading' }); - try { - const addResult = await abTestPrimitive.add({ - name: config.name, - description: config.description, - agent: config.agent, - gatewayChoice: config.gatewayChoice, - controlBundle: config.controlBundle, - controlVersion: config.controlVersion, - treatmentBundle: config.treatmentBundle, - treatmentVersion: config.treatmentVersion, - controlWeight: config.controlWeight, - treatmentWeight: config.treatmentWeight, - onlineEval: config.onlineEval, - maxDurationDays: config.maxDuration, - enableOnCreate: config.enableOnCreate, - }); - if (!addResult.success) { - throw new Error(addResult.error?.message ?? 'Failed to create AB test'); - } - setStatus({ state: 'success' }); - return { ok: true as const, testName: config.name }; - } catch (err) { - const message = err instanceof Error ? err.message : 'Failed to create AB test.'; - setStatus({ state: 'error', error: message }); - return { ok: false as const, error: message }; - } - }, []); - - const createTargetBased = useCallback(async (config: Omit) => { - setStatus({ state: 'loading' }); - try { - const addResult = await abTestPrimitive.addTargetBased(config); - if (!addResult.success) { - throw new Error(addResult.error?.message ?? 'Failed to create target-based AB test'); - } - setStatus({ state: 'success' }); - return { ok: true as const, testName: config.name }; - } catch (err) { - const message = err instanceof Error ? err.message : 'Failed to create target-based AB test.'; - setStatus({ state: 'error', error: message }); - return { ok: false as const, error: message }; - } - }, []); - - const reset = useCallback(() => { - setStatus({ state: 'idle' }); - }, []); - - return { status, createABTest: create, createTargetBasedABTest: createTargetBased, reset }; -} - -export function useExistingABTestNames() { - const [names, setNames] = useState([]); - - useEffect(() => { - void abTestPrimitive.getAllNames().then(setNames); - }, []); - - const refresh = useCallback(async () => { - const result = await abTestPrimitive.getAllNames(); - setNames(result); - }, []); - - return { names, refresh }; -} diff --git a/src/cli/tui/hooks/useCreateMcp.ts b/src/cli/tui/hooks/useCreateMcp.ts index db541c76e..fd9a4adce 100644 --- a/src/cli/tui/hooks/useCreateMcp.ts +++ b/src/cli/tui/hooks/useCreateMcp.ts @@ -2,6 +2,7 @@ import { agentPrimitive, gatewayPrimitive, gatewayTargetPrimitive, + knowledgeBasePrimitive, policyEnginePrimitive, } from '../../primitives/registry'; import { withCommandRunTelemetry } from '../../telemetry/cli-command-run.js'; @@ -91,6 +92,56 @@ export function useExistingGateways() { return { gateways, refresh }; } +export function useMcpGatewayNames() { + const [mcpGateways, setMcpGateways] = useState([]); + + useEffect(() => { + async function load() { + const result = await gatewayPrimitive.getMcpGatewayNames(); + setMcpGateways(result); + } + void load(); + }, []); + + return { mcpGateways }; +} + +export function useExistingRuntimeNames() { + const [runtimeNames, setRuntimeNames] = useState([]); + + useEffect(() => { + async function load() { + const result = await gatewayPrimitive.getRuntimeNames(); + setRuntimeNames(result); + } + void load(); + }, []); + + return { runtimeNames }; +} + +export function useRuntimeEndpoints(runtimeName: string | undefined) { + const [endpoints, setEndpoints] = useState<{ name: string; version: number }[]>([]); + const [loaded, setLoaded] = useState(false); + + useEffect(() => { + async function load() { + if (!runtimeName) { + setEndpoints([]); + setLoaded(false); + return; + } + setLoaded(false); + const result = await gatewayPrimitive.getRuntimeEndpoints(runtimeName); + setEndpoints(result); + setLoaded(true); + } + void load(); + }, [runtimeName]); + + return { endpoints, loaded }; +} + export function useExistingPolicyEngines() { const [engines, setEngines] = useState([]); @@ -156,6 +207,33 @@ export function useExistingToolNames() { return { toolNames, refresh }; } +export function useExistingKnowledgeBases() { + const [knowledgeBases, setKnowledgeBases] = useState([]); + + useEffect(() => { + async function load() { + try { + const removable = await knowledgeBasePrimitive.getRemovable(); + setKnowledgeBases(removable.map(kb => kb.name)); + } catch { + setKnowledgeBases([]); + } + } + void load(); + }, []); + + const refresh = useCallback(async () => { + try { + const removable = await knowledgeBasePrimitive.getRemovable(); + setKnowledgeBases(removable.map(kb => kb.name)); + } catch { + setKnowledgeBases([]); + } + }, []); + + return { knowledgeBases, refresh }; +} + export function useUnassignedTargets() { const [targets, setTargets] = useState([]); diff --git a/src/cli/tui/hooks/useCreateOnlineEval.ts b/src/cli/tui/hooks/useCreateOnlineEval.ts index 0fa53f56b..c9204fbf7 100644 --- a/src/cli/tui/hooks/useCreateOnlineEval.ts +++ b/src/cli/tui/hooks/useCreateOnlineEval.ts @@ -6,6 +6,8 @@ interface CreateOnlineEvalConfig { name: string; agent: string; endpoint?: string; + logGroupNames?: string[]; + serviceNames?: string[]; evaluators: string[]; samplingRate: number; sessionTimeoutMinutes?: number; @@ -31,6 +33,8 @@ export function useCreateOnlineEval() { name: config.name, agent: config.agent, ...(config.endpoint ? { endpoint: config.endpoint } : {}), + ...(config.logGroupNames ? { logGroupNames: config.logGroupNames } : {}), + ...(config.serviceNames ? { serviceNames: config.serviceNames } : {}), evaluators: config.evaluators, samplingRate: config.samplingRate, ...(config.sessionTimeoutMinutes !== undefined && { sessionTimeoutMinutes: config.sessionTimeoutMinutes }), diff --git a/src/cli/tui/hooks/useDevServer.ts b/src/cli/tui/hooks/useDevServer.ts index 80114cceb..17d0b969d 100644 --- a/src/cli/tui/hooks/useDevServer.ts +++ b/src/cli/tui/hooks/useDevServer.ts @@ -124,7 +124,7 @@ export function useDevServer(options: { setConfigLoaded(true); }; void load(); - }, [options.workingDir]); + }, [options.workingDir, options.agentName]); const config: DevConfig | null = useMemo(() => { if (!project || !options.agentName) { diff --git a/src/cli/tui/hooks/useRemove.ts b/src/cli/tui/hooks/useRemove.ts index 3825c00c0..b8177c632 100644 --- a/src/cli/tui/hooks/useRemove.ts +++ b/src/cli/tui/hooks/useRemove.ts @@ -3,11 +3,11 @@ import type { ResourceType } from '../../commands/remove/types'; import { RemoveLogger } from '../../logging'; import type { RemovableGatewayTarget, RemovalPreview } from '../../operations/remove'; import type { RemovableCredential } from '../../primitives/CredentialPrimitive'; +import type { RemovableKnowledgeBase } from '../../primitives/KnowledgeBasePrimitive'; import type { RemovableMemory } from '../../primitives/MemoryPrimitive'; import type { RemovablePolicyResource } from '../../primitives/PolicyPrimitive'; import type { RemovableRuntimeEndpoint } from '../../primitives/RuntimeEndpointPrimitive'; import { - abTestPrimitive, agentPrimitive, configBundlePrimitive, credentialPrimitive, @@ -16,6 +16,7 @@ import { gatewayPrimitive, gatewayTargetPrimitive, harnessPrimitive, + knowledgeBasePrimitive, memoryPrimitive, onlineEvalConfigPrimitive, paymentConnectorPrimitive, @@ -33,6 +34,7 @@ export type { RemovableMemory, RemovableCredential as RemovableIdentity, RemovableGatewayTarget, + RemovableKnowledgeBase, RemovablePolicyResource, RemovableRuntimeEndpoint, }; @@ -160,6 +162,11 @@ export function useRemovableDatasets() { return { datasets, ...rest }; } +export function useRemovableKnowledgeBases() { + const { items: knowledgeBases, ...rest } = useRemovableResources(() => knowledgeBasePrimitive.getRemovable()); + return { knowledgeBases, ...rest }; +} + export function useRemovableOnlineEvalConfigs() { const { items: onlineEvalConfigs, ...rest } = useRemovableResources(() => onlineEvalConfigPrimitive.getRemovable()); return { onlineEvalConfigs, ...rest }; @@ -180,19 +187,6 @@ export function useRemovableConfigBundles() { return { configBundles, ...rest }; } -export function useRemovableABTests() { - const { items: abTests, ...rest } = useRemovableResources(() => abTestPrimitive.getRemovable()); - return { abTests, ...rest }; -} - -export function useRemoveABTest() { - return useRemoveResource( - (name: string) => abTestPrimitive.remove(name), - 'ab-test', - name => name - ); -} - export function useRemovableRuntimeEndpoints() { const { items: endpoints, ...rest } = useRemovableResources(() => runtimeEndpointPrimitive.getRemovable() @@ -277,6 +271,10 @@ export function useRemovalPreview() { (name: string) => loadPreview(n => datasetPrimitive.previewRemove(n), name), [loadPreview] ); + const loadKnowledgeBasePreview = useCallback( + (name: string) => loadPreview(n => knowledgeBasePrimitive.previewRemove(n), name), + [loadPreview] + ); const loadOnlineEvalPreview = useCallback( (name: string) => loadPreview(n => onlineEvalConfigPrimitive.previewRemove(n), name), [loadPreview] @@ -294,11 +292,6 @@ export function useRemovalPreview() { [loadPreview] ); - const loadABTestPreview = useCallback( - (name: string) => loadPreview(n => abTestPrimitive.previewRemove(n), name), - [loadPreview] - ); - const loadRuntimeEndpointPreview = useCallback( (name: string) => loadPreview(n => runtimeEndpointPrimitive.previewRemove(n), name), [loadPreview] @@ -318,11 +311,11 @@ export function useRemovalPreview() { loadIdentityPreview, loadEvaluatorPreview, loadDatasetPreview, + loadKnowledgeBasePreview, loadOnlineEvalPreview, loadPolicyEnginePreview, loadPolicyPreview, loadConfigBundlePreview, - loadABTestPreview, loadRuntimeEndpointPreview, reset, }; @@ -403,6 +396,14 @@ export function useRemoveDataset() { ); } +export function useRemoveKnowledgeBase() { + return useRemoveResource( + (name: string) => knowledgeBasePrimitive.remove(name), + 'knowledge-base', + name => name + ); +} + export function useRemovePolicyEngine() { return useRemoveResource( (name: string) => policyEnginePrimitive.remove(name), diff --git a/src/cli/tui/render.ts b/src/cli/tui/render.ts index 89142c7bb..8a2f91434 100644 --- a/src/cli/tui/render.ts +++ b/src/cli/tui/render.ts @@ -118,10 +118,13 @@ export async function renderTUI(options: RenderTUIOptions = {}) { */ export function setupAltScreenCleanup() { const cleanup = () => { - if (!process.stdout.isTTY) return; - if (inAltScreen) { - process.stdout.write(EXIT_ALT_SCREEN); + // Only emit terminal control sequences if we actually entered the alt screen (i.e. a TUI ran). + // Plain CLI/JSON commands never hid the cursor, so writing SHOW_CURSOR here would leak the + // `\x1b[?25h` escape into stdout and corrupt piped/redirected output (e.g. `... --json | jq`). + if (!inAltScreen) { + return; } + process.stdout.write(EXIT_ALT_SCREEN); process.stdout.write(SHOW_CURSOR); }; diff --git a/src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx b/src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx deleted file mode 100644 index ddf2cbb72..000000000 --- a/src/cli/tui/screens/ab-test/ABTestDetailScreen.tsx +++ /dev/null @@ -1,637 +0,0 @@ -import { ConfigIO } from '../../../../lib'; -import { getCredentialProvider } from '../../../aws/account'; -import { getABTest, updateABTest } from '../../../aws/agentcore-ab-tests'; -import type { GetABTestResult } from '../../../aws/agentcore-ab-tests'; -import { getOnlineEvaluationConfig } from '../../../aws/agentcore-control'; -import { getHttpGateway, listHttpGatewayTargets } from '../../../aws/agentcore-http-gateways'; -import { dnsSuffix } from '../../../aws/partition'; -import { getErrorMessage } from '../../../errors'; -import { GradientText, Screen } from '../../components'; -import { CloudWatchLogsClient, FilterLogEventsCommand } from '@aws-sdk/client-cloudwatch-logs'; -import type { FilterLogEventsCommandInput, FilteredLogEvent } from '@aws-sdk/client-cloudwatch-logs'; -import { Box, Text, useInput } from 'ink'; -import React, { useCallback, useEffect, useRef, useState } from 'react'; - -interface ABTestDetailScreenProps { - abTestId: string; - region: string; - onExit: () => void; -} - -/** Derive the gateway URL from a gateway ARN. */ -function gatewayUrlFromArn(arn: string): string { - const parts = arn.split(':'); - const region = parts[3]; - const gatewayId = parts[5]?.split('/')[1]; - if (region && gatewayId) { - return `https://${gatewayId}.gateway.bedrock-agentcore.${region}.${dnsSuffix(region)}`; - } - return arn; -} - -/** Extract the resource ID from an ARN (last segment after / or :). */ -function extractId(arn: string): string { - const slashIdx = arn.lastIndexOf('/'); - if (slashIdx !== -1) return arn.slice(slashIdx + 1); - const colonIdx = arn.lastIndexOf(':'); - if (colonIdx !== -1) return arn.slice(colonIdx + 1); - return arn; -} - -/** Truncate a version ID to 8 characters. */ -function shortVersion(version: string): string { - return version.slice(0, 8); -} - -/** Format a Unix epoch timestamp (seconds) to a UTC date string. */ -function formatTimestamp(ts: string | number): string { - const ms = typeof ts === 'string' ? parseFloat(ts) * 1000 : ts * 1000; - const d = new Date(ms); - return d - .toISOString() - .replace('T', ' ') - .replace(/\.\d+Z$/, ' UTC'); -} - -/** Build a horizontal rule with optional left label and right label. */ -function rule(left?: string, right?: string, width = 48): string { - if (!left && !right) return '─'.repeat(width); - const leftPart = left ? `── ${left} ` : '──'; - const rightPart = right ? ` ${right} ──` : ''; - const fillLen = width - leftPart.length - rightPart.length; - const fill = fillLen > 0 ? '─'.repeat(fillLen) : ''; - return `${leftPart}${fill}${rightPart}`; -} - -interface DebugCheckResult { - label: string; - status: 'pass' | 'fail' | 'warn'; - detail: string; -} - -async function runDebugChecks(test: GetABTestResult, region: string): Promise { - const results: DebugCheckResult[] = []; - const logsClient = new CloudWatchLogsClient({ region, credentials: getCredentialProvider() }); - - // 1. AB Test Status - results.push({ - label: 'AB Test Status', - status: test.status === 'ACTIVE' && test.executionStatus === 'RUNNING' ? 'pass' : 'warn', - detail: `${test.status} / ${test.executionStatus}`, - }); - - // 1b. AB Test Role - results.push({ - label: 'AB Test Role', - status: test.roleArn ? 'pass' : 'warn', - detail: test.roleArn ?? 'No role ARN', - }); - - // 2. Online Eval Config(s) - const evalConfigArns: { name: string; arn: string }[] = - 'perVariantOnlineEvaluationConfig' in test.evaluationConfig - ? test.evaluationConfig.perVariantOnlineEvaluationConfig.map(v => ({ - name: v.name, - arn: v.onlineEvaluationConfigArn, - })) - : [{ name: '', arn: test.evaluationConfig.onlineEvaluationConfigArn }]; - - for (const { name: variantName, arn: evalArn } of evalConfigArns) { - const evalConfigId = extractId(evalArn); - const labelSuffix = variantName ? ` (${variantName})` : ''; - try { - const evalConfig = await getOnlineEvaluationConfig({ region, configId: evalConfigId }); - results.push({ - label: `Online Eval Config${labelSuffix}`, - status: evalConfig.executionStatus === 'ENABLED' ? 'pass' : 'fail', - detail: `${evalConfig.configName} — ${evalConfig.executionStatus}`, - }); - } catch (err) { - results.push({ label: `Online Eval Config${labelSuffix}`, status: 'fail', detail: getErrorMessage(err) }); - } - } - - // 2b. Gateway Role - const gatewayId = extractId(test.gatewayArn); - try { - const gateway = await getHttpGateway({ region, gatewayId }); - results.push({ - label: 'Gateway Role', - status: gateway.roleArn ? 'pass' : 'warn', - detail: gateway.roleArn ?? 'No role ARN', - }); - } catch (err) { - results.push({ label: 'Gateway Role', status: 'fail', detail: getErrorMessage(err) }); - } - - // 5. Runtime spans — check for experiment metadata per variant in aws/spans and runtime log group - // service.name in spans follows the pattern: {projectName}_{agentName}.{endpoint} - // We derive the service name prefix from the deployed state runtimeId (strip random suffix). - const twoHoursAgo = Date.now() - 2 * 60 * 60 * 1000; - const variantNames = test.variants.map(v => v.name); - let serviceNamePrefix: string | undefined; - let runtimeId: string | undefined; - try { - const configIO = new ConfigIO(); - const deployedState = await configIO.readDeployedState(); - for (const [, target] of Object.entries(deployedState.targets ?? {})) { - const runtimes = target.resources?.runtimes ?? {}; - const firstRuntime = Object.values(runtimes)[0]; - if (firstRuntime?.runtimeId) { - runtimeId = firstRuntime.runtimeId; - serviceNamePrefix = runtimeId.replace(/-[^-]+$/, ''); - break; - } - } - } catch { - // Fall back to abTestArn-only filtering if deployed state isn't readable - } - - const runtimeLogGroupName = runtimeId ? `/aws/bedrock-agentcore/runtimes/${runtimeId}-DEFAULT` : undefined; - const logGroupsToQuery = ['aws/spans']; - if (runtimeLogGroupName) { - logGroupsToQuery.push(runtimeLogGroupName); - } - - const queryAllGroups = (params: Omit) => - Promise.all( - logGroupsToQuery.map(lg => - logsClient - .send(new FilterLogEventsCommand({ ...params, logGroupName: lg })) - .catch(() => ({ events: [] as FilteredLogEvent[] })) - ) - ); - - try { - const baseFilter = serviceNamePrefix ? `"${serviceNamePrefix}"` : '"gen_ai_agent"'; - - const runtimeSpanResults = await queryAllGroups({ - startTime: twoHoursAgo, - filterPattern: baseFilter, - limit: 1, - }); - const hasRuntimeSpans = runtimeSpanResults.some(r => (r.events?.length ?? 0) > 0); - - const variantSpanCounts = await Promise.all( - variantNames.map(async name => { - const results = await queryAllGroups({ - startTime: twoHoursAgo, - filterPattern: `"${test.abTestArn}" "${name}"`, - limit: 50, - }); - return results.reduce((sum, r) => sum + (r.events?.length ?? 0), 0); - }) - ); - - const totalExperimentSpans = variantSpanCounts.reduce((sum, count) => sum + count, 0); - - for (let i = 0; i < variantNames.length; i++) { - const name = variantNames[i]; - const count = variantSpanCounts[i] ?? 0; - const label = `Runtime Experiment Spans — ${name} (2h)`; - - if (count > 0) { - results.push({ label, status: 'pass', detail: `${count} spans with experiment metadata` }); - } else if (hasRuntimeSpans) { - results.push({ - label, - status: 'warn', - detail: - totalExperimentSpans > 0 - ? `No spans for ${name} — traffic may not be reaching this variant` - : 'Runtime spans found but no experiment metadata — update bedrock-agentcore SDK to the latest version', - }); - } else { - results.push({ label, status: 'warn', detail: 'No runtime spans found — send traffic to the gateway first' }); - } - } - } catch (err) { - results.push({ label: 'Runtime Experiment Spans', status: 'fail', detail: getErrorMessage(err) }); - } - - // 6. Eval Results — check each eval config's log group - const thirtyMinAgo = Date.now() - 30 * 60 * 1000; - for (const { name: variantName, arn: evalArn } of evalConfigArns) { - const configId = extractId(evalArn); - const labelSuffix = variantName ? ` (${variantName})` : ''; - try { - const evalLogGroup = `/aws/bedrock-agentcore/evaluations/results/${configId}`; - - const [allEvents, taggedEvents] = await Promise.all([ - logsClient.send(new FilterLogEventsCommand({ logGroupName: evalLogGroup, startTime: thirtyMinAgo, limit: 1 })), - logsClient.send( - new FilterLogEventsCommand({ - logGroupName: evalLogGroup, - startTime: thirtyMinAgo, - filterPattern: `"${test.abTestArn}"`, - limit: 100, - }) - ), - ]); - - const hasResults = (allEvents.events?.length ?? 0) > 0; - const taggedCount = taggedEvents.events?.length ?? 0; - - if (!hasResults) { - results.push({ - label: `Eval Results${labelSuffix}`, - status: 'warn', - detail: 'No eval results yet — wait ~5m after session timeout for evaluator to process', - }); - } else { - results.push({ - label: `Eval Results${labelSuffix}`, - status: taggedCount > 0 ? 'pass' : 'warn', - detail: - taggedCount > 0 - ? `${taggedCount} results tagged with AB test` - : 'Results exist but none tagged with variant — check gateway trace delivery', - }); - } - } catch (err) { - const msg = getErrorMessage(err); - results.push({ - label: `Eval Results${labelSuffix}`, - status: msg.includes('ResourceNotFoundException') ? 'warn' : 'fail', - detail: msg.includes('ResourceNotFoundException') ? 'Log group not found — evaluator has not run yet' : msg, - }); - } - } - - // 6. Aggregation Results - const metrics = test.results?.evaluatorMetrics ?? []; - const reporting = metrics.filter(m => m.controlStats?.sampleSize > 0); - results.push({ - label: 'Aggregation Results', - status: reporting.length > 0 ? 'pass' : 'warn', - detail: - reporting.length > 0 - ? `${reporting.length} evaluator(s) reporting` - : 'No aggregation data yet — wait ~12-15m after traffic', - }); - - return results; -} - -export function ABTestDetailScreen({ abTestId, region, onExit }: ABTestDetailScreenProps) { - const [test, setTest] = useState(null); - const [error, setError] = useState(null); - const [actionMessage, setActionMessage] = useState(null); - const [confirmingStop, setConfirmingStop] = useState(false); - const [confirmingPromote, setConfirmingPromote] = useState(false); - const [debugResults, setDebugResults] = useState(null); - const [debugLoading, setDebugLoading] = useState(false); - const [targetName, setTargetName] = useState(''); - - const hasFetched = useRef(false); - useEffect(() => { - if (hasFetched.current) return; - hasFetched.current = true; - const load = async () => { - try { - const result = await getABTest({ region, abTestId }); - setTest(result); - - // Fetch gateway target name for invocation URL - const gwId = extractId(result.gatewayArn); - try { - const targets = await listHttpGatewayTargets({ region, gatewayId: gwId, maxResults: 1 }); - const firstTarget = targets.targets[0]; - if (firstTarget) setTargetName(firstTarget.name); - } catch { - // Best-effort — URL will show without target path - } - } catch (err) { - setError(getErrorMessage(err)); - } - }; - void load(); - }, [region, abTestId]); - - const performAction = useCallback( - async (targetStatus: 'PAUSED' | 'RUNNING' | 'STOPPED', label: string) => { - setActionMessage(`${label}...`); - try { - await updateABTest({ region, abTestId, executionStatus: targetStatus }); - // Poll until status updates or max attempts reached - for (let i = 0; i < 5; i++) { - await new Promise(resolve => setTimeout(resolve, 1000)); - const result = await getABTest({ region, abTestId }); - setTest(result); - if (result.executionStatus === targetStatus) { - setActionMessage(label.replace('...', 'd').replace('ing', 'ed')); - return; - } - } - // Final fetch even if status didn't converge - setActionMessage(label.replace('ing', 'ed')); - } catch (err: unknown) { - setActionMessage(`Error: ${getErrorMessage(err)}`); - } - }, - [region, abTestId] - ); - - useInput((input, _key) => { - if (!test) return; - - if (confirmingStop) { - if (input === 'y' || input === 'Y') { - setConfirmingStop(false); - void performAction('STOPPED', 'Stopping'); - } else { - setConfirmingStop(false); - } - return; - } - - if (confirmingPromote) { - if (input === 'y' || input === 'Y') { - setConfirmingPromote(false); - setActionMessage('Promoting...'); - void (async () => { - try { - // Stop the AB test - await updateABTest({ region, abTestId, executionStatus: 'STOPPED' }); - for (let i = 0; i < 5; i++) { - await new Promise(resolve => setTimeout(resolve, 1000)); - const result = await getABTest({ region, abTestId }); - setTest(result); - if (result.executionStatus === 'STOPPED') break; - } - - // Apply promotion to agentcore.json - let promotionDetail = ''; - try { - const { promoteABTestConfig } = await import('../../../operations/ab-test/promote'); - const promoResult = await promoteABTestConfig(abTestId, test.name); - promotionDetail = promoResult.promoted - ? `${promoResult.promotionDetail} Run \`agentcore deploy\` to apply.` - : promoResult.promotionDetail; - } catch { - // Config update failed — still report the stop - } - - setActionMessage(promotionDetail || 'AB test stopped. Run `agentcore deploy` to apply.'); - } catch (err) { - setActionMessage(`Error: ${getErrorMessage(err)}`); - } - })(); - } else { - setConfirmingPromote(false); - } - return; - } - - if (input === 'p' || input === 'P') { - void performAction('PAUSED', 'Pausing'); - } - - if (input === 'r' || input === 'R') { - void performAction('RUNNING', 'Resuming'); - } - - if (input === 's' || input === 'S') { - setConfirmingStop(true); - setActionMessage(null); - } - - if (input === 'w' || input === 'W') { - setConfirmingPromote(true); - setActionMessage(null); - } - - if (input === 'd' || input === 'D') { - setDebugLoading(true); - setDebugResults(null); - void runDebugChecks(test, region) - .then(results => { - setDebugResults(results); - setDebugLoading(false); - }) - .catch(() => { - setDebugResults([{ label: 'Debug', status: 'fail' as const, detail: 'Diagnostics failed to run' }]); - setDebugLoading(false); - }); - } - }); - - if (error) { - return ( - - {`Error: ${error}`} - - ); - } - - if (!test) { - return ( - - Loading... - - ); - } - - const controlVariant = test.variants.find(v => v.name === 'C'); - const treatmentVariant = test.variants.find(v => v.name === 'T1'); - - const executionColor = - test.executionStatus === 'RUNNING' ? 'green' : test.executionStatus === 'PAUSED' ? 'yellow' : 'red'; - - const helpParts: string[] = []; - if (test.executionStatus === 'RUNNING') { - helpParts.push('P pause', 'S stop', 'W promote'); - } else if (test.executionStatus === 'PAUSED') { - helpParts.push('R resume', 'S stop', 'W promote'); - } - helpParts.push('D debug', 'Esc exit'); - const helpKeys = helpParts.join(' · '); - - // Build status text: only show provisioning status if not ACTIVE - const statusPrefix = test.status !== 'ACTIVE' ? `${test.status} ` : ''; - - // TODO(post-preview): Re-enable duration display once configurable duration is launched. - const durationText = ''; - - // Column width for side-by-side variants - const colW = 28; - - return ( - - - {/* ── Header: Line 1 — status ─────────────────────────── */} - - - {statusPrefix && {statusPrefix}} - {`● ${test.executionStatus}`} - - {durationText && {durationText}} - - - {/* ── Header: Line 2 — invocation URL ────────────────────── */} - {targetName ? ( - - {`Invocation URL: ${gatewayUrlFromArn(test.gatewayArn)}/${targetName}/invocations`} - - ) : ( - - Invocation URL: loading... - - )} - - {/* ── Header: Line 3 — online eval (only for single-config mode) ── */} - {'onlineEvaluationConfigArn' in test.evaluationConfig && ( - - {`Online Eval: ${extractId(test.evaluationConfig.onlineEvaluationConfigArn)}`} - - )} - - {/* ── Description (if present) ────────────────────────── */} - {test.description && ( - - {`Description: ${test.description}`} - - )} - - {/* ── Variants: side-by-side ──────────────────────────── */} - - - {'CONTROL (C)'} - {`${String(controlVariant?.weight ?? 'N/A')}% traffic`} - - {controlVariant?.variantConfiguration.target - ? `target: ${controlVariant.variantConfiguration.target.name}` - : `${extractId(controlVariant?.variantConfiguration.configurationBundle?.bundleArn ?? '')} @ ${shortVersion(controlVariant?.variantConfiguration.configurationBundle?.bundleVersion ?? '')}`} - - - - {'TREATMENT (T1)'} - {`${String(treatmentVariant?.weight ?? 'N/A')}% traffic`} - - {treatmentVariant?.variantConfiguration.target - ? `target: ${treatmentVariant.variantConfiguration.target.name}` - : `${extractId(treatmentVariant?.variantConfiguration.configurationBundle?.bundleArn ?? '')} @ ${shortVersion(treatmentVariant?.variantConfiguration.configurationBundle?.bundleVersion ?? '')}`} - - - - - {/* ── Evaluation Results ───────────────────────────────── */} - - {test.results ? ( - <> - - {rule( - 'Results', - test.results.analysisTimestamp ? formatTimestamp(test.results.analysisTimestamp) : undefined - )} - - - - {''} - - - {'Control'} - - - {'Treatment'} - - {'Δ'} - - {test.results.evaluatorMetrics.map((metric, i) => ( - 0 ? 1 : 0}> - - - {extractId(metric.evaluatorArn)} - - - {metric.controlStats.mean.toFixed(4)} - - - {metric.variantResults[0]?.mean.toFixed(4) ?? ''} - - {metric.variantResults[0]?.isSignificant ? ( - {`+${(metric.variantResults[0]?.percentChange ?? 0).toFixed(2)}% ✓`} - ) : ( - {`${(metric.variantResults[0]?.percentChange ?? 0).toFixed(2)}% ✗`} - )} - - - - {''} - - - {`n=${metric.controlStats.sampleSize}`} - - - {`n=${metric.variantResults[0]?.sampleSize ?? ''}`} - - {`p=${metric.variantResults[0]?.pValue?.toFixed(3) ?? 'N/A'}`} - - - ))} - - ) : ( - <> - {rule('Results')} - - No evaluation results yet. - - - )} - - - {/* ── Debug Panel ─────────────────────────────────────── */} - {debugLoading && ( - - - - )} - {debugResults && ( - - {rule('Pipeline Debug')} - {debugResults.map((check, i) => { - const icon = check.status === 'pass' ? '✓' : check.status === 'fail' ? '✗' : '⚠'; - const color = check.status === 'pass' ? 'green' : check.status === 'fail' ? 'red' : 'yellow'; - return ( - - {` ${icon} `} - {check.label} - {` ${check.detail}`} - - ); - })} - - )} - - {/* ── Stop confirmation ────────────────────────────────── */} - {confirmingStop && ( - - - { - 'Stop this AB test permanently? All traffic will shift to the control variant. This cannot be undone. (Y/n)' - } - - - )} - - {/* ── Promote confirmation ─────────────────────────────── */} - {confirmingPromote && ( - - - { - 'Promote treatment as winner? This will stop the AB test and update the control endpoint to the treatment version. Run `agentcore deploy` after to apply. (Y/n)' - } - - - )} - - {/* ── Action feedback ──────────────────────────────────── */} - {actionMessage && !confirmingStop && ( - - {actionMessage} - - )} - - - ); -} diff --git a/src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx b/src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx deleted file mode 100644 index 9d47e4441..000000000 --- a/src/cli/tui/screens/ab-test/ABTestPickerScreen.tsx +++ /dev/null @@ -1,90 +0,0 @@ -import { ConfigIO } from '../../../../lib'; -import type { SelectableItem } from '../../components'; -import { Screen, SelectScreen } from '../../components'; -import { HELP_TEXT } from '../../constants'; -import { ABTestDetailScreen } from './ABTestDetailScreen'; -import { Text } from 'ink'; -import React, { useEffect, useRef, useState } from 'react'; - -interface ABTestPickerScreenProps { - onExit: () => void; -} - -interface DeployedABTest { - name: string; - abTestId: string; -} - -export function ABTestPickerScreen({ onExit }: ABTestPickerScreenProps) { - const [tests, setTests] = useState(null); - const [selectedTest, setSelectedTest] = useState(null); - const [region, setRegion] = useState('us-east-1'); - - const hasFetched = useRef(false); - useEffect(() => { - if (hasFetched.current) return; - hasFetched.current = true; - const load = async () => { - try { - const configIO = new ConfigIO(); - const [deployedState, targets] = await Promise.all([ - configIO.readDeployedState(), - configIO.resolveAWSDeploymentTargets(), - ]); - const found: DeployedABTest[] = []; - for (const target of Object.values(deployedState.targets ?? {})) { - const abTests = target.resources?.abTests; - if (abTests) { - for (const [name, state] of Object.entries(abTests)) { - found.push({ name, abTestId: state.abTestId }); - } - } - } - setTests(found); - if (targets.length > 0) setRegion(targets[0]!.region); - } catch { - setTests([]); - } - }; - void load(); - }, []); - - if (selectedTest) { - return setSelectedTest(null)} />; - } - - if (tests === null) { - return ( - - Loading AB tests... - - ); - } - - if (tests.length === 0) { - return ( - - No deployed AB tests found. - Add one with `agentcore add ab-test` and deploy. - - ); - } - - const items: SelectableItem[] = tests.map(t => ({ - id: t.name, - title: t.name, - description: `ID: ${t.abTestId}`, - })); - - return ( - { - const test = tests.find(t => t.name === item.id); - if (test) setSelectedTest(test); - }} - onExit={onExit} - /> - ); -} diff --git a/src/cli/tui/screens/ab-test/AddABTestFlow.tsx b/src/cli/tui/screens/ab-test/AddABTestFlow.tsx deleted file mode 100644 index b8313075d..000000000 --- a/src/cli/tui/screens/ab-test/AddABTestFlow.tsx +++ /dev/null @@ -1,281 +0,0 @@ -import { ConfigIO } from '../../../../lib'; -import { listConfigurationBundleVersions } from '../../../aws/agentcore-config-bundles'; -import { ErrorPrompt } from '../../components'; -import { useCreateABTest, useExistingABTestNames } from '../../hooks/useCreateABTest'; -import { AddSuccessScreen } from '../add/AddSuccessScreen'; -import { AddConfigBundleFlow } from '../config-bundle/AddConfigBundleFlow'; -import { AddABTestScreen } from './AddABTestScreen'; -import type { HttpGatewayInfo, OnlineEvalConfigInfo, RuntimeInfo } from './AddABTestScreen'; -import { TargetBasedABTestScreen } from './TargetBasedABTestScreen'; -import type { AddABTestConfig } from './types'; -import React, { useCallback, useEffect, useState } from 'react'; - -type FlowState = - | { name: 'create-wizard' } - | { name: 'target-wizard' } - | { name: 'create-bundle' } - | { name: 'create-success'; testName: string } - | { name: 'error'; message: string }; - -interface AddABTestFlowProps { - isInteractive?: boolean; - onExit: () => void; - onBack: () => void; - onDev?: () => void; - onDeploy?: () => void; -} - -export function AddABTestFlow({ isInteractive = true, onExit, onBack, onDev, onDeploy }: AddABTestFlowProps) { - const { createABTest, createTargetBasedABTest, reset: resetCreate } = useCreateABTest(); - const { names: existingNames } = useExistingABTestNames(); - const [flow, setFlow] = useState({ name: 'create-wizard' }); - - // Load deployed state for bundle lists - const [agents, setAgents] = useState<{ name: string }[]>([]); - const [existingHttpGateways, setExistingHttpGateways] = useState([]); - const [deployedBundles, setDeployedBundles] = useState<{ name: string; bundleId: string }[]>([]); - const [onlineEvalConfigs, setOnlineEvalConfigs] = useState([]); - const [runtimesInfo, setRuntimesInfo] = useState([]); - const [httpGatewayDetails, setHttpGatewayDetails] = useState([]); - const [onlineEvalConfigDetails, setOnlineEvalConfigDetails] = useState([]); - const [region, setRegion] = useState('us-east-1'); - - const [loadEpoch, setLoadEpoch] = useState(0); - - useEffect(() => { - void (async () => { - try { - const configIO = new ConfigIO(); - const deployedState = await configIO.readDeployedState(); - const projectSpec = await configIO.readProjectSpec(); - - // Get region from first target - for (const [, target] of Object.entries(deployedState.targets ?? {})) { - const resources = target.resources; - - // Deployed config bundles - const bundles = resources?.configBundles; - if (bundles) { - setDeployedBundles( - Object.entries(bundles).map(([name, state]) => ({ - name, - bundleId: state.bundleId, - })) - ); - } - break; - } - - // Agents from project spec runtimes - const runtimes = projectSpec.runtimes ?? []; - setAgents(runtimes.map(r => ({ name: r.name }))); - - // Runtimes with endpoints for target-based mode - setRuntimesInfo( - runtimes.map(r => ({ - name: r.name, - endpoints: Object.entries(r.endpoints ?? {}).map(([epName, ep]) => ({ - name: epName, - version: ep.version, - })), - })) - ); - - // Existing HTTP gateways from project spec - const httpGws = projectSpec.httpGateways ?? []; - setExistingHttpGateways(httpGws.map(gw => gw.name)); - - // HTTP gateway details with targets for target-based mode - setHttpGatewayDetails( - httpGws.map(gw => ({ - name: gw.name, - runtimeRef: gw.runtimeRef, - targets: (gw.targets ?? []).map(t => ({ - name: t.name, - runtimeRef: t.runtimeRef, - qualifier: t.qualifier, - })), - })) - ); - - // Online eval configs from project spec - const evalConfigs = projectSpec.onlineEvalConfigs ?? []; - setOnlineEvalConfigs(evalConfigs.map(c => c.name)); - setOnlineEvalConfigDetails( - evalConfigs.map(c => ({ - name: c.name, - agent: c.agent, - endpoint: c.endpoint, - })) - ); - - // Region from aws-targets, falling back to env - const targets = await configIO.resolveAWSDeploymentTargets(); - if (targets.length > 0) { - setRegion(targets[0]!.region); - } else { - setRegion(process.env.AWS_DEFAULT_REGION ?? process.env.AWS_REGION ?? 'us-east-1'); - } - } catch { - // No deployed state — lists will be empty - } - })(); - }, [loadEpoch]); - - const fetchBundleVersions = useCallback( - async (bundleId: string) => { - try { - const result = await listConfigurationBundleVersions({ region, bundleId }); - return result.versions.map(v => ({ - versionId: v.versionId, - createdAt: v.versionCreatedAt, - })); - } catch { - return []; - } - }, - [region] - ); - - useEffect(() => { - if (!isInteractive && flow.name === 'create-success') { - onExit(); - } - }, [isInteractive, flow.name, onExit]); - - const handleCreateComplete = useCallback( - (config: AddABTestConfig) => { - if (config.mode === 'target-based') { - const gatewayName = - config.gatewayChoice.type === 'existing-http' - ? config.gatewayChoice.name - : config.gatewayChoice.type === 'create-new' - ? `${config.name.replace(/_/g, '-').slice(0, 44)}-gw` - : ''; - void createTargetBasedABTest({ - name: config.name, - description: config.description || undefined, - gateway: gatewayName, - runtime: config.runtime, - controlEndpoint: config.controlEndpoint, - treatmentEndpoint: config.treatmentEndpoint, - controlWeight: config.controlWeight, - treatmentWeight: config.treatmentWeight, - controlOnlineEval: config.controlOnlineEval, - treatmentOnlineEval: config.treatmentOnlineEval, - enableOnCreate: config.enableOnCreate, - }).then(result => { - if (result.ok) { - setFlow({ name: 'create-success', testName: result.testName }); - return; - } - setFlow({ name: 'error', message: result.error }); - }); - return; - } - - const controlWeight = 100 - config.treatmentWeight; - void createABTest({ - name: config.name, - description: config.description || undefined, - agent: config.agent, - gatewayChoice: config.gatewayChoice, - controlBundle: config.controlBundle, - controlVersion: config.controlVersion, - treatmentBundle: config.treatmentBundle, - treatmentVersion: config.treatmentVersion, - controlWeight, - treatmentWeight: config.treatmentWeight, - onlineEval: config.onlineEval, - maxDuration: config.maxDuration, - enableOnCreate: config.enableOnCreate, - }).then(result => { - if (result.ok) { - setFlow({ name: 'create-success', testName: result.testName }); - return; - } - setFlow({ name: 'error', message: result.error }); - }); - }, - [createABTest, createTargetBasedABTest] - ); - - const handleSwitchToTargetBased = useCallback(() => { - setFlow({ name: 'target-wizard' }); - }, []); - - const handleCreateBundle = useCallback(() => { - setFlow({ name: 'create-bundle' }); - }, []); - - const handleBundleFlowDone = useCallback(() => { - setLoadEpoch(e => e + 1); - setFlow({ name: 'create-wizard' }); - }, []); - - if (flow.name === 'create-bundle') { - return ( - - ); - } - - if (flow.name === 'target-wizard') { - return ( - - ); - } - - if (flow.name === 'create-wizard') { - return ( - - ); - } - - if (flow.name === 'create-success') { - return ( - - ); - } - - return ( - { - resetCreate(); - setFlow({ name: 'create-wizard' }); - }} - onExit={onExit} - /> - ); -} diff --git a/src/cli/tui/screens/ab-test/AddABTestScreen.tsx b/src/cli/tui/screens/ab-test/AddABTestScreen.tsx deleted file mode 100644 index 3306ce86c..000000000 --- a/src/cli/tui/screens/ab-test/AddABTestScreen.tsx +++ /dev/null @@ -1,914 +0,0 @@ -import { ABTestNameSchema } from '../../../../schema/schemas/primitives/ab-test'; -import type { SelectableItem } from '../../components'; -import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } from '../../components'; -import { HELP_TEXT } from '../../constants'; -import { useListNavigation } from '../../hooks'; -import type { VersionLoadState } from './VariantConfigForm'; -import { VariantConfigForm } from './VariantConfigForm'; -import type { AddABTestConfig, TargetInfo } from './types'; -import { AB_TEST_STEP_LABELS } from './types'; -import { useAddABTestWizard } from './useAddABTestWizard'; -import { Box, Text } from 'ink'; -import React, { useCallback, useEffect, useMemo, useState } from 'react'; - -function formatVersionDate(value: string): string { - const n = Number(value); - if (!isNaN(n) && n > 0) { - // Epoch seconds (< 1e12) vs milliseconds (>= 1e12) - const ms = n < 1e12 ? n * 1000 : n; - return new Date(ms).toLocaleString(); - } - return new Date(value).toLocaleString(); -} - -/** Runtime endpoint info passed from the parent flow. */ -export interface RuntimeEndpointInfo { - name: string; - version: number; -} - -/** Runtime info with endpoints, passed from the parent flow. */ -export interface RuntimeInfo { - name: string; - endpoints: RuntimeEndpointInfo[]; -} - -/** Gateway target info passed from the parent flow. */ -export interface GatewayTargetInfo { - name: string; - runtimeRef: string; - qualifier: string; -} - -/** HTTP gateway info with targets, passed from the parent flow. */ -export interface HttpGatewayInfo { - name: string; - runtimeRef: string; - targets: GatewayTargetInfo[]; -} - -/** Online eval config info with agent and endpoint for filtering. */ -export interface OnlineEvalConfigInfo { - name: string; - agent: string; - endpoint?: string; -} - -interface AddABTestScreenProps { - onComplete: (config: AddABTestConfig) => void; - onExit: () => void; - existingTestNames: string[]; - agents: { name: string }[]; - existingHttpGateways: string[]; - deployedBundles: { name: string; bundleId: string }[]; - onlineEvalConfigs: string[]; - fetchBundleVersions: (bundleId: string) => Promise<{ versionId: string; createdAt: string }[]>; - onCreateBundle?: () => void; - /** Full runtime info including endpoints (for target-based mode). */ - runtimes: RuntimeInfo[]; - /** Full HTTP gateway info including targets (for target-based mode). */ - httpGatewayDetails: HttpGatewayInfo[]; - /** Full online eval config objects for target-based eval filtering. */ - onlineEvalConfigDetails?: OnlineEvalConfigInfo[]; - /** Callback to switch to the dedicated target-based wizard screen. */ - onSwitchToTargetBased?: () => void; -} - -export function AddABTestScreen({ - onComplete, - onExit, - existingTestNames, - agents, - existingHttpGateways, - deployedBundles, - onlineEvalConfigs, - fetchBundleVersions, - onCreateBundle, - runtimes, - httpGatewayDetails, - onlineEvalConfigDetails = [], - onSwitchToTargetBased, -}: AddABTestScreenProps) { - const wizard = useAddABTestWizard(); - - // Build select items - const agentItems: SelectableItem[] = useMemo( - () => agents.map(a => ({ id: a.name, title: a.name, description: 'Agent' })), - [agents] - ); - - const bundleItems: SelectableItem[] = useMemo( - () => deployedBundles.map(b => ({ id: b.name, title: b.name, description: `ID: ${b.bundleId}` })), - [deployedBundles] - ); - - const onlineEvalItems: SelectableItem[] = useMemo( - () => onlineEvalConfigs.map(name => ({ id: name, title: name, description: 'Online Eval Config' })), - [onlineEvalConfigs] - ); - - const gatewayItems: SelectableItem[] = useMemo(() => { - const items: SelectableItem[] = []; - for (const gwName of existingHttpGateways) { - items.push({ id: gwName, title: gwName, description: 'Existing HTTP gateway' }); - } - items.push({ - id: '__create__', - title: '+ Create new gateway', - description: 'Auto-create for this AB test', - spaceBefore: items.length > 0, - }); - return items; - }, [existingHttpGateways]); - - const enableItems: SelectableItem[] = useMemo( - () => [ - { id: 'yes', title: 'Yes', description: 'Start the AB test immediately after deploy' }, - { id: 'no', title: 'No', description: 'Create paused — start manually later' }, - ], - [] - ); - - // Version items — fetched dynamically per bundle - const [controlVersionItems, setControlVersionItems] = React.useState([]); - const [treatmentVersionItems, setTreatmentVersionItems] = React.useState([]); - const [controlVersionLoadState, setControlVersionLoadState] = React.useState('idle'); - const [treatmentVersionLoadState, setTreatmentVersionLoadState] = React.useState('idle'); - - const handleFetchVersions = React.useCallback( - (bundleName: string) => { - const bundle = deployedBundles.find(b => b.name === bundleName); - if (!bundle) return; - - setControlVersionLoadState('loading'); - setTreatmentVersionLoadState('loading'); - - void fetchBundleVersions(bundle.bundleId) - .then(versions => { - const items = versions.map(v => ({ - id: v.versionId, - title: v.versionId.slice(0, 8), - description: `Created: ${formatVersionDate(v.createdAt)}`, - })); - setControlVersionItems(items); - setTreatmentVersionItems(items); - setControlVersionLoadState('loaded'); - setTreatmentVersionLoadState('loaded'); - }) - .catch(() => { - setControlVersionLoadState('error'); - setTreatmentVersionLoadState('error'); - }); - }, - [deployedBundles, fetchBundleVersions] - ); - - // ── Gateway sub-flow state (target-based: "create new" text input) ──────── - const [gatewayCreateMode, setGatewayCreateMode] = useState(false); - - // ── Target picker sub-flow state ────────────────────────────────────────── - // Sub-flow phases: 'pick' -> 'selectRuntime' -> 'selectQualifier' - type TargetSubFlowPhase = 'pick' | 'selectRuntime' | 'selectQualifier'; - const [controlSubFlow, setControlSubFlow] = useState('pick'); - const [controlNewRuntime, setControlNewRuntime] = useState(''); - - const [treatmentSubFlow, setTreatmentSubFlow] = useState('pick'); - const [treatmentNewRuntime, setTreatmentNewRuntime] = useState(''); - - /* eslint-disable react-hooks/set-state-in-effect -- intentional reset on step change */ - useEffect(() => { - if (wizard.step === 'controlTarget') { - setControlSubFlow('pick'); - setControlNewRuntime(''); - } - }, [wizard.step]); - - useEffect(() => { - if (wizard.step === 'treatmentTarget') { - setTreatmentSubFlow('pick'); - setTreatmentNewRuntime(''); - } - }, [wizard.step]); - /* eslint-enable react-hooks/set-state-in-effect */ - - // Step flags - const isModeStep = wizard.step === 'mode'; - const isNameStep = wizard.step === 'name'; - const isDescriptionStep = wizard.step === 'description'; - const isAgentStep = wizard.step === 'agent'; - const isGatewayStep = wizard.step === 'gateway'; - const isVariantsStep = wizard.step === 'variants'; - const isOnlineEvalStep = wizard.step === 'onlineEval'; - const isControlTargetStep = wizard.step === 'controlTarget'; - const isTreatmentTargetStep = wizard.step === 'treatmentTarget'; - const isWeightsStep = wizard.step === 'weights'; - const isEvalPathStep = wizard.step === 'evalPath'; - const isEvalSelectStep = wizard.step === 'evalSelect'; - const isEnableStep = wizard.step === 'enableOnCreate'; - const isConfirmStep = wizard.step === 'confirm'; - - const isTargetBased = wizard.config.mode === 'target-based'; - - // Tell the wizard which steps to skip (both forward and backward navigation). - const gatewayChoiceTypeRef = React.useRef(wizard.config.gatewayChoice.type); - - const shouldSkipStep = useCallback( - (s: string) => { - // Agent selection is only needed in config-bundle mode when auto-creating a gateway. - if (s === 'agent' && (isTargetBased || gatewayChoiceTypeRef.current !== 'create-new')) return true; - // Config-bundle steps skipped in target-based mode - if (s === 'variants' && isTargetBased) return true; - if (s === 'onlineEval' && isTargetBased) return true; - // Target-based steps skipped in config-bundle mode - if (s === 'controlTarget' && !isTargetBased) return true; - if (s === 'treatmentTarget' && !isTargetBased) return true; - if (s === 'weights' && !isTargetBased) return true; - if (s === 'evalPath' && !isTargetBased) return true; - if (s === 'evalSelect' && !isTargetBased) return true; - if (s === 'evalCreate' && !isTargetBased) return true; - if (s === 'evalSamplingRate' && !isTargetBased) return true; - if (s === 'maxDuration') return true; - return false; - }, - [isTargetBased] - ); - - useEffect(() => { - wizard.setSkipCheck(shouldSkipStep); - }, [shouldSkipStep]); // wizard.setSkipCheck is stable (useCallback with no deps) - - // Mode selection items - const modeItems: SelectableItem[] = useMemo( - () => [ - { - id: 'config-bundle', - title: 'Config Bundle', - description: 'Split traffic between config bundle versions (same target, different config)', - }, - { - id: 'target-based', - title: 'Target-Based', - description: 'Split traffic between gateway targets (different targets, each self-contained)', - }, - ], - [] - ); - - // ── Target picker items builder ────────────────────────────────────────── - // Builds the three-section grouped picker items for target selection. - const buildTargetItems = useCallback( - (excludeTarget: TargetInfo | null): SelectableItem[] => { - const items: SelectableItem[] = []; - - // Section 1: Existing targets on the selected gateway - const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); - const existingTargets = selectedGw?.targets ?? []; - if (existingTargets.length > 0) { - items.push({ - id: '__section_existing__', - title: '── Existing Targets ──', - description: '', - disabled: true, - }); - for (const t of existingTargets) { - if (excludeTarget?.name === t.name) continue; - items.push({ - id: `existing:${t.name}`, - title: t.name, - description: `endpoint=${t.qualifier} runtime=${t.runtimeRef}`, - }); - } - } - - // Section 2: Endpoints from project runtimes (quick-create targets) - const endpointItems: SelectableItem[] = []; - for (const rt of runtimes) { - for (const ep of rt.endpoints) { - const targetName = ep.name; - if (excludeTarget?.name === targetName) continue; - endpointItems.push({ - id: `endpoint:${rt.name}/${ep.name}`, - title: `${rt.name}/${ep.name}`, - description: `v${ep.version}`, - }); - } - } - if (endpointItems.length > 0) { - items.push({ - id: '__section_endpoints__', - title: '── Endpoints ──', - description: 'Select to auto-create target', - disabled: true, - spaceBefore: items.length > 0, - }); - items.push(...endpointItems); - } - - // Section 3: Create new target - items.push({ - id: '__create_target__', - title: '+ Create new target', - description: 'Configure runtime, name, and endpoint', - spaceBefore: true, - }); - - return items; - }, - [httpGatewayDetails, runtimes, wizard.config.gateway] - ); - - const controlTargetItems = useMemo(() => buildTargetItems(null), [buildTargetItems]); - const treatmentTargetItems = useMemo( - () => buildTargetItems(wizard.config.controlTargetInfo), - [buildTargetItems, wizard.config.controlTargetInfo] - ); - - // Runtime items for the "create new target" sub-flow - const runtimeItems: SelectableItem[] = useMemo( - () => runtimes.map(r => ({ id: r.name, title: r.name, description: `${r.endpoints.length} endpoint(s)` })), - [runtimes] - ); - - // Qualifier items for a given runtime (DEFAULT + all endpoints) - const buildQualifierItems = useCallback( - (runtimeName: string): SelectableItem[] => { - const rt = runtimes.find(r => r.name === runtimeName); - const items: SelectableItem[] = [{ id: 'DEFAULT', title: 'DEFAULT', description: 'Default endpoint' }]; - if (rt) { - for (const ep of rt.endpoints) { - items.push({ id: ep.name, title: ep.name, description: `v${ep.version}` }); - } - } - return items; - }, - [runtimes] - ); - - const controlEndpointItems = useMemo( - () => buildQualifierItems(controlNewRuntime), - [buildQualifierItems, controlNewRuntime] - ); - const treatmentEndpointItems = useMemo( - () => buildQualifierItems(treatmentNewRuntime), - [buildQualifierItems, treatmentNewRuntime] - ); - - // Navigation hooks for select steps - const modeNav = useListNavigation({ - items: modeItems, - onSelect: item => { - if (item.id === 'target-based' && onSwitchToTargetBased) { - onSwitchToTargetBased(); - return; - } - wizard.setMode(item.id as 'config-bundle' | 'target-based'); - }, - onExit: () => wizard.goBack(), - isActive: isModeStep, - }); - - const agentNav = useListNavigation({ - items: agentItems, - onSelect: item => wizard.setAgent(item.id), - onExit: () => wizard.goBack(), - isActive: isAgentStep, - }); - - const gatewayNav = useListNavigation({ - items: gatewayItems, - onSelect: item => { - if (item.id === '__create__') { - setGatewayCreateMode(true); - return; - } - const choice = { type: 'existing-http', name: item.id } as const; - gatewayChoiceTypeRef.current = choice.type; - wizard.setGatewayWithName(item.id, false); - }, - onExit: () => wizard.goBack(), - isActive: isGatewayStep && !gatewayCreateMode, - isDisabled: item => item.disabled === true, - }); - - const onlineEvalNav = useListNavigation({ - items: onlineEvalItems, - onSelect: item => wizard.setOnlineEval(item.id), - onExit: () => wizard.goBack(), - isActive: isOnlineEvalStep, - }); - - // ── Control target picker navigation ───────────────────────────────────── - const controlTargetNav = useListNavigation({ - items: controlTargetItems, - onSelect: item => { - if (item.id === '__create_target__') { - setControlSubFlow('selectRuntime'); - return; - } - if (item.id.startsWith('existing:')) { - const targetName = item.id.replace('existing:', ''); - const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); - const target = selectedGw?.targets.find(t => t.name === targetName); - if (target) { - wizard.setControlTarget( - { name: target.name, runtimeRef: target.runtimeRef, qualifier: target.qualifier }, - false - ); - } - return; - } - if (item.id.startsWith('endpoint:')) { - const path = item.id.replace('endpoint:', ''); - const [runtimeName, endpointName] = path.split('/'); - if (runtimeName && endpointName) { - const autoName = `${runtimeName}-${endpointName}`; - wizard.setControlTarget({ name: autoName, runtimeRef: runtimeName, qualifier: endpointName }, true); - } - } - }, - onExit: () => wizard.goBack(), - isActive: isControlTargetStep && controlSubFlow === 'pick', - isDisabled: item => item.disabled === true, - }); - - // Control sub-flow: select runtime - const controlRuntimeNav = useListNavigation({ - items: runtimeItems, - onSelect: item => { - setControlNewRuntime(item.id); - setControlSubFlow('selectQualifier'); - }, - onExit: () => setControlSubFlow('pick'), - isActive: isControlTargetStep && controlSubFlow === 'selectRuntime', - }); - - // Control sub-flow: select qualifier (auto-generates target name) - const controlEndpointNav = useListNavigation({ - items: controlEndpointItems, - onSelect: item => { - const autoName = `${controlNewRuntime}-${item.id}`; - wizard.setControlTarget({ name: autoName, runtimeRef: controlNewRuntime, qualifier: item.id }, true); - }, - onExit: () => setControlSubFlow('selectRuntime'), - isActive: isControlTargetStep && controlSubFlow === 'selectQualifier', - }); - - // ── Treatment target picker navigation ─────────────────────────────────── - const treatmentTargetNav = useListNavigation({ - items: treatmentTargetItems, - onSelect: item => { - if (item.id === '__create_target__') { - setTreatmentSubFlow('selectRuntime'); - return; - } - if (item.id.startsWith('existing:')) { - const targetName = item.id.replace('existing:', ''); - const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); - const target = selectedGw?.targets.find(t => t.name === targetName); - if (target) { - wizard.setTreatmentTarget( - { name: target.name, runtimeRef: target.runtimeRef, qualifier: target.qualifier }, - false - ); - } - return; - } - if (item.id.startsWith('endpoint:')) { - const path = item.id.replace('endpoint:', ''); - const [runtimeName, endpointName] = path.split('/'); - if (runtimeName && endpointName) { - const autoName = `${runtimeName}-${endpointName}`; - wizard.setTreatmentTarget({ name: autoName, runtimeRef: runtimeName, qualifier: endpointName }, true); - } - } - }, - onExit: () => wizard.goBack(), - isActive: isTreatmentTargetStep && treatmentSubFlow === 'pick', - isDisabled: item => item.disabled === true, - }); - - // Treatment sub-flow: select runtime - const treatmentRuntimeNav = useListNavigation({ - items: runtimeItems, - onSelect: item => { - setTreatmentNewRuntime(item.id); - setTreatmentSubFlow('selectQualifier'); - }, - onExit: () => setTreatmentSubFlow('pick'), - isActive: isTreatmentTargetStep && treatmentSubFlow === 'selectRuntime', - }); - - // Treatment sub-flow: select qualifier (auto-generates target name) - const treatmentEndpointNav = useListNavigation({ - items: treatmentEndpointItems, - onSelect: item => { - const autoName = `${treatmentNewRuntime}-${item.id}`; - wizard.setTreatmentTarget({ name: autoName, runtimeRef: treatmentNewRuntime, qualifier: item.id }, true); - }, - onExit: () => setTreatmentSubFlow('selectRuntime'), - isActive: isTreatmentTargetStep && treatmentSubFlow === 'selectQualifier', - }); - - const evalPathItems: SelectableItem[] = useMemo( - () => [ - { - id: 'select', - title: 'Select existing online eval configs', - description: 'Use configs already in your project', - }, - { id: 'create', title: 'Create new', description: 'Pick evaluators + sampling rate, auto-create configs' }, - ], - [] - ); - - const evalPathNav = useListNavigation({ - items: evalPathItems, - onSelect: item => wizard.setEvalPath(item.id as 'select' | 'create'), - onExit: () => wizard.goBack(), - isActive: isEvalPathStep, - }); - - // ── Eval select sub-flow: pick control eval, then treatment eval ──────── - type EvalSelectPhase = 'controlEval' | 'treatmentEval'; - const [evalSelectPhase, setEvalSelectPhase] = useState('controlEval'); - const [selectedControlEval, setSelectedControlEval] = useState(''); - - // Reset eval select sub-flow when entering the step - /* eslint-disable react-hooks/set-state-in-effect -- intentional reset on step change */ - useEffect(() => { - if (wizard.step === 'evalSelect') { - setEvalSelectPhase('controlEval'); - setSelectedControlEval(''); - } - }, [wizard.step]); - /* eslint-enable react-hooks/set-state-in-effect */ - - // Filter online eval configs by runtime + endpoint (qualifier) - const controlRuntime = wizard.config.controlTargetInfo?.runtimeRef ?? ''; - const controlEndpoint = wizard.config.controlTargetInfo?.qualifier ?? ''; - const treatmentRuntime = wizard.config.treatmentTargetInfo?.runtimeRef ?? ''; - const treatmentEndpoint = wizard.config.treatmentTargetInfo?.qualifier ?? ''; - - const controlEvalItems: SelectableItem[] = useMemo(() => { - return onlineEvalConfigDetails - .filter(c => c.agent === controlRuntime && (c.endpoint ?? 'DEFAULT') === controlEndpoint) - .map(c => ({ id: c.name, title: c.name, description: `${c.agent}/${c.endpoint ?? 'DEFAULT'}` })); - }, [onlineEvalConfigDetails, controlRuntime, controlEndpoint]); - - const treatmentEvalItems: SelectableItem[] = useMemo(() => { - return onlineEvalConfigDetails - .filter(c => c.agent === treatmentRuntime && (c.endpoint ?? 'DEFAULT') === treatmentEndpoint) - .map(c => ({ id: c.name, title: c.name, description: `${c.agent}/${c.endpoint ?? 'DEFAULT'}` })); - }, [onlineEvalConfigDetails, treatmentRuntime, treatmentEndpoint]); - - const controlEvalNoMatch = isEvalSelectStep && evalSelectPhase === 'controlEval' && controlEvalItems.length === 0; - const treatmentEvalNoMatch = - isEvalSelectStep && evalSelectPhase === 'treatmentEval' && treatmentEvalItems.length === 0; - - const controlEvalNav = useListNavigation({ - items: controlEvalItems, - onSelect: item => { - setSelectedControlEval(item.id); - setEvalSelectPhase('treatmentEval'); - }, - onExit: () => wizard.goBack(), - isActive: isEvalSelectStep && evalSelectPhase === 'controlEval' && !controlEvalNoMatch, - }); - - const treatmentEvalNav = useListNavigation({ - items: treatmentEvalItems, - onSelect: item => { - wizard.setEvalSelect(selectedControlEval, item.id); - }, - onExit: () => setEvalSelectPhase('controlEval'), - isActive: isEvalSelectStep && evalSelectPhase === 'treatmentEval' && !treatmentEvalNoMatch, - }); - - const enableNav = useListNavigation({ - items: enableItems, - onSelect: item => wizard.setEnableOnCreate(item.id === 'yes'), - onExit: () => wizard.goBack(), - isActive: isEnableStep, - }); - - useListNavigation({ - items: [{ id: 'confirm', title: 'Confirm' }], - onSelect: () => onComplete(wizard.config), - onExit: () => wizard.goBack(), - isActive: isConfirmStep, - }); - - // Help text - const isSelectStep = - isModeStep || - isAgentStep || - (isGatewayStep && !gatewayCreateMode) || - isOnlineEvalStep || - isEnableStep || - isControlTargetStep || - isTreatmentTargetStep || - isEvalPathStep || - isEvalSelectStep; - const helpText = isSelectStep - ? HELP_TEXT.NAVIGATE_SELECT - : isConfirmStep - ? HELP_TEXT.CONFIRM_CANCEL - : isVariantsStep - ? HELP_TEXT.VARIANTS_FORM - : HELP_TEXT.TEXT_INPUT; - - const headerContent = ; - - const controlWeight = 100 - wizard.config.treatmentWeight; - - // Format target display for confirm review - const formatTargetDisplay = (info: TargetInfo | null, isNew: boolean): string => { - if (!info) return '(not set)'; - const newLabel = isNew ? ' (new)' : ''; - return `${info.name} endpoint=${info.qualifier} runtime=${info.runtimeRef}${newLabel}`; - }; - - return ( - - - {isModeStep && ( - - )} - - {isNameStep && ( - (existingTestNames.includes(value) ? `AB test "${value}" already exists` : true)} - /> - )} - - {isDescriptionStep && ( - wizard.goBack()} - /> - )} - - {isAgentStep && } - - {/* ── Step 4: Gateway selection ──────────────────────────── */} - {isGatewayStep && !gatewayCreateMode && ( - - )} - {isGatewayStep && gatewayCreateMode && ( - { - gatewayChoiceTypeRef.current = 'create-new'; - wizard.setGatewayWithName(name, true); - setGatewayCreateMode(false); - }} - onCancel={() => setGatewayCreateMode(false)} - /> - )} - - {isVariantsStep && ( - wizard.goBack()} - onCreateBundle={onCreateBundle} - /> - )} - - {/* ── Step 5: Control target selection ─────────────────── */} - {isControlTargetStep && controlSubFlow === 'pick' && ( - - )} - {isControlTargetStep && controlSubFlow === 'selectRuntime' && ( - - )} - {isControlTargetStep && controlSubFlow === 'selectQualifier' && ( - - )} - - {/* ── Step 6: Treatment target selection ───────────────── */} - {isTreatmentTargetStep && treatmentSubFlow === 'pick' && ( - - {wizard.config.controlTargetInfo && ( - - - {'\u2713'} Control: {wizard.config.controlTargetInfo.name} endpoint= - {wizard.config.controlTargetInfo.qualifier} - - - )} - - - )} - {isTreatmentTargetStep && treatmentSubFlow === 'selectRuntime' && ( - - )} - {isTreatmentTargetStep && treatmentSubFlow === 'selectQualifier' && ( - - )} - - {/* ── Target-based: Traffic weights ───────────────────── */} - {isWeightsStep && ( - { - const w = parseInt(value, 10); - if (!isNaN(w) && w >= 1 && w <= 99) { - wizard.setWeights(w, 100 - w); - } - }} - onCancel={() => wizard.goBack()} - customValidation={value => { - const w = parseInt(value, 10); - if (isNaN(w)) return 'Must be a number'; - if (w < 1 || w > 99) return 'Must be between 1 and 99'; - return true; - }} - /> - )} - - {/* ── Target-based: Eval path selection ───────────────── */} - {isEvalPathStep && ( - - )} - - {/* ── Target-based: Eval select (control) ───────────── */} - {isEvalSelectStep && evalSelectPhase === 'controlEval' && !controlEvalNoMatch && ( - - )} - {isEvalSelectStep && evalSelectPhase === 'controlEval' && controlEvalNoMatch && ( - - No online eval config found for {controlRuntime}/{controlEndpoint}. Create one first: agentcore add - online-eval --runtime {controlRuntime} --endpoint {controlEndpoint} - - )} - - {/* ── Target-based: Eval select (treatment) ─────────── */} - {isEvalSelectStep && evalSelectPhase === 'treatmentEval' && !treatmentEvalNoMatch && ( - - - - {'\u2713'} Control eval: {selectedControlEval} - - - - - )} - {isEvalSelectStep && evalSelectPhase === 'treatmentEval' && treatmentEvalNoMatch && ( - - No online eval config found for {treatmentRuntime}/{treatmentEndpoint}. Create one first: agentcore add - online-eval --runtime {treatmentRuntime} --endpoint {treatmentEndpoint} - - )} - - {/* ── Config-bundle: Online eval selection ────────────── */} - {isOnlineEvalStep && - (onlineEvalItems.length > 0 ? ( - - ) : ( - - No online eval configs found. An online eval is required for AB tests. Add one with `agentcore add - online-eval`, then retry. Press Esc to go back. - - ))} - - {/* TODO(post-preview): Re-enable maxDuration TextInput once configurable duration is launched. */} - - {isEnableStep && ( - - )} - - {isConfirmStep && ( - - )} - - - ); -} diff --git a/src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx b/src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx deleted file mode 100644 index 48adc621f..000000000 --- a/src/cli/tui/screens/ab-test/RemoveABTestScreen.tsx +++ /dev/null @@ -1,26 +0,0 @@ -import type { RemovableResource } from '../../../primitives/types'; -import type { SelectableItem } from '../../components'; -import { SelectScreen } from '../../components'; -import React, { useMemo } from 'react'; - -interface RemoveABTestScreenProps { - abTests: RemovableResource[]; - onSelect: (testName: string) => void; - onExit: () => void; -} - -export function RemoveABTestScreen({ abTests, onSelect, onExit }: RemoveABTestScreenProps) { - const items: SelectableItem[] = useMemo( - () => - abTests.map(t => ({ - id: t.name, - title: t.name, - description: 'AB Test', - })), - [abTests] - ); - - return ( - onSelect(item.id)} onExit={onExit} /> - ); -} diff --git a/src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx b/src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx deleted file mode 100644 index 60b92dd45..000000000 --- a/src/cli/tui/screens/ab-test/TargetBasedABTestScreen.tsx +++ /dev/null @@ -1,712 +0,0 @@ -import type { SelectableItem } from '../../components'; -import { - ConfirmReview, - Cursor, - Panel, - Screen, - StepIndicator, - TextInput, - TwoColumn, - WizardSelect, -} from '../../components'; -import { HELP_TEXT } from '../../constants'; -import { useListNavigation } from '../../hooks'; -import { usePanelNavigation } from '../../hooks/usePanelNavigation'; -import type { HttpGatewayInfo, OnlineEvalConfigInfo, RuntimeInfo } from './AddABTestScreen'; -import type { AddABTestConfig, TargetInfo } from './types'; -import { TARGET_BASED_STEP_LABELS, useTargetBasedWizard } from './useTargetBasedWizard'; -import { Box, Text, useInput } from 'ink'; -import React, { useCallback, useEffect, useMemo, useState } from 'react'; - -// ───────────────────────────────────────────────────────────────────────────── -// Props -// ───────────────────────────────────────────────────────────────────────────── - -interface TargetBasedABTestScreenProps { - onComplete: (config: AddABTestConfig) => void; - onExit: () => void; - existingTestNames: string[]; - runtimes: RuntimeInfo[]; - httpGatewayDetails: HttpGatewayInfo[]; - existingHttpGateways: string[]; - onlineEvalConfigDetails: OnlineEvalConfigInfo[]; -} - -// ───────────────────────────────────────────────────────────────────────────── -// Builder field indices -// ───────────────────────────────────────────────────────────────────────────── - -const FIELD_TARGET = 0; -const FIELD_WEIGHT = 1; -const FIELD_EVAL = 2; -const FIELD_COUNT = 3; - -// ───────────────────────────────────────────────────────────────────────────── -// VariantColumn sub-component -// ───────────────────────────────────────────────────────────────────────────── - -interface VariantColumnProps { - label: string; - color: string; - isActive: boolean; - focusedField: number | null; - activeField: number | null; - targetInfo: TargetInfo | null; - weight: number; - evalConfigName: string; - targetItems: SelectableItem[]; - targetNavIndex: number; - evalItems: SelectableItem[]; - evalNavIndex: number; - onWeightSubmit: (value: string) => void; - onWeightCancel: () => void; -} - -function VariantColumn({ - label, - color, - isActive, - focusedField, - activeField, - targetInfo, - weight, - evalConfigName, - targetItems, - targetNavIndex, - evalItems, - evalNavIndex, - onWeightSubmit, - onWeightCancel, -}: VariantColumnProps) { - const borderColor = isActive ? color : 'gray'; - - const fieldLabel = (idx: number, text: string, value: string) => { - const isFocused = focusedField === idx; - const isFieldActive = activeField === idx; - const prefix = isFocused || isFieldActive ? '>' : ' '; - const checkmark = value && value !== '(not set)' ? '\u2713 ' : ''; - - return ( - - - {prefix} {text}:{' '} - - - {checkmark} - {value} - - - ); - }; - - return ( - - - {label} - - - {/* Target field */} - {activeField === FIELD_TARGET ? ( - - ) : ( - fieldLabel( - FIELD_TARGET, - 'Target', - targetInfo ? `${targetInfo.name} (${targetInfo.runtimeRef}/${targetInfo.qualifier})` : '(not set)' - ) - )} - - {/* Weight field */} - {activeField === FIELD_WEIGHT ? ( - { - const w = parseInt(value, 10); - if (isNaN(w)) return 'Must be a number'; - if (w < 1 || w > 99) return 'Must be between 1 and 99'; - return true; - }} - /> - ) : ( - fieldLabel(FIELD_WEIGHT, 'Weight', `${weight}%`) - )} - - {/* Eval config field */} - {activeField === FIELD_EVAL ? ( - evalItems.length > 0 ? ( - - ) : ( - - No eval config found for this target. - Press Esc to go back. Create one with: agentcore add online-eval - - ) - ) : ( - fieldLabel(FIELD_EVAL, 'Eval', evalConfigName || '(optional)') - )} - - - ); -} - -// ───────────────────────────────────────────────────────────────────────────── -// Main Screen -// ───────────────────────────────────────────────────────────────────────────── - -export function TargetBasedABTestScreen({ - onComplete, - onExit, - existingTestNames, - runtimes, - httpGatewayDetails, - existingHttpGateways, - onlineEvalConfigDetails, -}: TargetBasedABTestScreenProps) { - const wizard = useTargetBasedWizard(); - - // ── Name/Description multi-field form ─────────────────────────────────── - type NameField = 'name' | 'description'; - const NAME_FIELDS: NameField[] = ['name', 'description']; - const [activeNameField, setActiveNameField] = useState('name'); - const [nameValue, setNameValue] = useState(''); - const [descriptionValue, setDescriptionValue] = useState(''); - const [nameError, setNameError] = useState(null); - const [gatewayCreateMode, setGatewayCreateMode] = useState(false); - - // Step flags - const isNameStep = wizard.step === 'nameDescription'; - const isGatewayStep = wizard.step === 'gateway'; - const isBuilderStep = wizard.step === 'builder'; - const isEnableStep = wizard.step === 'enableOnCreate'; - const isConfirmStep = wizard.step === 'confirm'; - - // ── Name/Description input handler ───────────────────────────────────── - useInput( - (input, key) => { - if (!isNameStep) return; - - if (key.escape) { - if (activeNameField === 'description') { - setActiveNameField('name'); - } else { - onExit(); - } - return; - } - - if (key.tab || key.upArrow || key.downArrow) { - const idx = NAME_FIELDS.indexOf(activeNameField); - if (key.shift || key.upArrow) { - setActiveNameField(NAME_FIELDS[(idx - 1 + NAME_FIELDS.length) % NAME_FIELDS.length]!); - } else { - setActiveNameField(NAME_FIELDS[(idx + 1) % NAME_FIELDS.length]!); - } - setNameError(null); - return; - } - - if (key.return) { - if (activeNameField === 'name') { - if (!nameValue.trim()) { - setNameError('Name is required'); - return; - } - if (!/^[a-zA-Z][a-zA-Z0-9_]{0,47}$/.test(nameValue.trim())) { - setNameError('Must begin with a letter, alphanumeric + underscores only (max 48 chars)'); - return; - } - if (existingTestNames.includes(nameValue.trim())) { - setNameError(`AB test "${nameValue.trim()}" already exists`); - return; - } - setActiveNameField('description'); - setNameError(null); - return; - } - // On description, submit both - if (!nameValue.trim()) { - setNameError('Name is required'); - setActiveNameField('name'); - return; - } - wizard.setName(nameValue.trim()); - wizard.setDescription(descriptionValue.trim()); - wizard.advanceFromNameDescription(); - return; - } - - // Text input - if (key.backspace || key.delete) { - if (activeNameField === 'name') setNameValue(v => v.slice(0, -1)); - else setDescriptionValue(v => v.slice(0, -1)); - setNameError(null); - return; - } - if (input && !key.ctrl && !key.meta) { - if (activeNameField === 'name') setNameValue(v => v + input); - else setDescriptionValue(v => v + input); - setNameError(null); - } - }, - { isActive: isNameStep } - ); - - // ── Gateway items ─────────────────────────────────────────────────────── - const gatewayItems: SelectableItem[] = useMemo(() => { - const items: SelectableItem[] = []; - for (const gwName of existingHttpGateways) { - items.push({ id: gwName, title: gwName, description: 'Existing HTTP gateway' }); - } - items.push({ - id: '__create__', - title: 'Create new gateway', - description: 'Auto-create for this AB test', - }); - return items; - }, [existingHttpGateways]); - - // ── Target items builder ──────────────────────────────────────────────── - const buildTargetItems = useCallback( - (excludeTarget: TargetInfo | null): SelectableItem[] => { - const items: SelectableItem[] = []; - - // Section 1: Existing targets on the selected gateway - const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); - const existingTargets = selectedGw?.targets ?? []; - if (existingTargets.length > 0) { - items.push({ - id: '__section_existing__', - title: '── Gateway Targets ──', - description: '', - disabled: true, - }); - for (const t of existingTargets) { - if (t.name === excludeTarget?.name) continue; - items.push({ - id: `existing:${t.name}`, - title: t.name, - description: `${t.runtimeRef}/${t.qualifier}`, - }); - } - } - - // Section 2: Runtime endpoints (auto-create targets) - const endpointItems: SelectableItem[] = []; - for (const rt of runtimes) { - for (const ep of rt.endpoints) { - const targetName = `${rt.name}-${ep.name}`; - if (targetName === excludeTarget?.name) continue; - endpointItems.push({ - id: `endpoint:${rt.name}/${ep.name}`, - title: `${rt.name}/${ep.name}`, - description: `v${ep.version}`, - }); - } - } - if (endpointItems.length > 0) { - items.push({ - id: '__section_endpoints__', - title: '── Runtime Endpoints ──\n Select to auto-create target', - description: '', - disabled: true, - spaceBefore: items.length > 0, - }); - items.push(...endpointItems); - } - - return items; - }, - [httpGatewayDetails, runtimes, wizard.config.gateway] - ); - - const controlTargetItems = useMemo(() => buildTargetItems(null), [buildTargetItems]); - const treatmentTargetItems = useMemo( - () => buildTargetItems(wizard.config.controlTargetInfo), - [buildTargetItems, wizard.config.controlTargetInfo] - ); - - // ── Eval items (auto-matched by runtime + endpoint) ───────────────────── - const buildEvalItems = useCallback( - (targetInfo: TargetInfo | null): SelectableItem[] => { - if (!targetInfo) return []; - return onlineEvalConfigDetails - .filter(c => c.agent === targetInfo.runtimeRef && (c.endpoint ?? 'DEFAULT') === targetInfo.qualifier) - .map(c => ({ id: c.name, title: c.name, description: `${c.agent}/${c.endpoint ?? 'DEFAULT'}` })); - }, - [onlineEvalConfigDetails] - ); - - const controlEvalItems = useMemo( - () => buildEvalItems(wizard.config.controlTargetInfo), - [buildEvalItems, wizard.config.controlTargetInfo] - ); - const treatmentEvalItems = useMemo( - () => buildEvalItems(wizard.config.treatmentTargetInfo), - [buildEvalItems, wizard.config.treatmentTargetInfo] - ); - - // Auto-match eval when target is selected and exactly one match exists - useEffect(() => { - if (wizard.config.controlTargetInfo && controlEvalItems.length === 1 && !wizard.config.controlOnlineEval) { - wizard.setControlEval(controlEvalItems[0]!.id); - } - }, [wizard.config.controlTargetInfo, controlEvalItems, wizard.config.controlOnlineEval, wizard.setControlEval]); - - useEffect(() => { - if (wizard.config.treatmentTargetInfo && treatmentEvalItems.length === 1 && !wizard.config.treatmentOnlineEval) { - wizard.setTreatmentEval(treatmentEvalItems[0]!.id); - } - }, [ - wizard.config.treatmentTargetInfo, - treatmentEvalItems, - wizard.config.treatmentOnlineEval, - wizard.setTreatmentEval, - ]); - - // ── Enable items ──────────────────────────────────────────────────────── - const enableItems: SelectableItem[] = useMemo( - () => [ - { id: 'yes', title: 'Yes', description: 'Start the AB test immediately after deploy' }, - { id: 'no', title: 'No', description: 'Create paused — start manually later' }, - ], - [] - ); - - // ── Panel navigation for the builder step ─────────────────────────────── - const panel = usePanelNavigation({ - isActive: isBuilderStep, - fieldCount: FIELD_COUNT, - onExit: () => wizard.goBack(), - onComplete: () => wizard.advance(), - }); - - // ── Target selection handler ──────────────────────────────────────────── - const handleTargetSelect = useCallback( - (column: number, item: SelectableItem) => { - const setter = column === 0 ? wizard.setControlTarget : wizard.setTreatmentTarget; - - if (item.id.startsWith('existing:')) { - const targetName = item.id.replace('existing:', ''); - const selectedGw = httpGatewayDetails.find(g => g.name === wizard.config.gateway); - const target = selectedGw?.targets.find(t => t.name === targetName); - if (target) { - setter({ name: target.name, runtimeRef: target.runtimeRef, qualifier: target.qualifier }, false); - } - } else if (item.id.startsWith('endpoint:')) { - const path = item.id.replace('endpoint:', ''); - const [runtimeName, endpointName] = path.split('/'); - if (runtimeName && endpointName) { - const autoName = `${runtimeName}-${endpointName}`; - setter({ name: autoName, runtimeRef: runtimeName, qualifier: endpointName }, true); - } - } - panel.deactivate(); - }, - [httpGatewayDetails, wizard.config.gateway, wizard.setControlTarget, wizard.setTreatmentTarget, panel] - ); - - // ── List navigations for builder pickers ──────────────────────────────── - - // Control target picker - const controlTargetNav = useListNavigation({ - items: controlTargetItems, - onSelect: item => handleTargetSelect(0, item), - onExit: () => panel.deactivate(), - isActive: panel.isFieldActive(0, FIELD_TARGET), - isDisabled: item => item.disabled === true, - }); - - // Treatment target picker - const treatmentTargetNav = useListNavigation({ - items: treatmentTargetItems, - onSelect: item => handleTargetSelect(1, item), - onExit: () => panel.deactivate(), - isActive: panel.isFieldActive(1, FIELD_TARGET), - isDisabled: item => item.disabled === true, - }); - - // Control eval picker - const controlEvalNav = useListNavigation({ - items: controlEvalItems, - onSelect: item => { - wizard.setControlEval(item.id); - panel.deactivate(); - }, - onExit: () => panel.deactivate(), - isActive: panel.isFieldActive(0, FIELD_EVAL), - }); - - // Treatment eval picker - const treatmentEvalNav = useListNavigation({ - items: treatmentEvalItems, - onSelect: item => { - wizard.setTreatmentEval(item.id); - panel.deactivate(); - }, - onExit: () => panel.deactivate(), - isActive: panel.isFieldActive(1, FIELD_EVAL), - }); - - // ── Non-builder navigation hooks ──────────────────────────────────────── - - const gatewayNav = useListNavigation({ - items: gatewayItems, - onSelect: item => { - if (item.id === '__create__') { - setGatewayCreateMode(true); - return; - } - wizard.setGateway(item.id, false); - }, - onExit: () => wizard.goBack(), - isActive: isGatewayStep && !gatewayCreateMode, - isDisabled: item => item.disabled === true, - }); - - const enableNav = useListNavigation({ - items: enableItems, - onSelect: item => wizard.setEnableOnCreate(item.id === 'yes'), - onExit: () => wizard.goBack(), - isActive: isEnableStep, - }); - - // Builder "Continue" navigation — when all fields filled, Enter on confirm row advances - const builderContinueItems: SelectableItem[] = useMemo( - () => (wizard.isBuilderComplete ? [{ id: 'continue', title: 'Continue' }] : []), - [wizard.isBuilderComplete] - ); - - const _builderContinueNav = useListNavigation({ - items: builderContinueItems, - onSelect: () => wizard.advance(), - onExit: () => wizard.goBack(), - isActive: false, // Controlled programmatically below - }); - - useListNavigation({ - items: [{ id: 'confirm', title: 'Confirm' }], - onSelect: () => onComplete(wizard.toAddABTestConfig()), - onExit: () => wizard.goBack(), - isActive: isConfirmStep, - }); - - // ── Help text ─────────────────────────────────────────────────────────── - const isSelectStep = (isGatewayStep && !gatewayCreateMode) || isEnableStep; - const helpText = isSelectStep - ? HELP_TEXT.NAVIGATE_SELECT - : isConfirmStep - ? HELP_TEXT.CONFIRM_CANCEL - : isBuilderStep - ? 'Tab switch column \u00B7 \u2191\u2193 navigate \u00B7 Enter select \u00B7 Esc back' - : HELP_TEXT.TEXT_INPUT; - - const headerContent = ( - - ); - - // ── Format display helpers ────────────────────────────────────────────── - const formatTargetDisplay = (info: TargetInfo | null, isNew: boolean): string => { - if (!info) return '(not set)'; - const newLabel = isNew ? ' (new)' : ''; - return `${info.name} endpoint=${info.qualifier} runtime=${info.runtimeRef}${newLabel}`; - }; - - // ── Weight submit handlers ────────────────────────────────────────────── - const handleControlWeightSubmit = useCallback( - (value: string) => { - const w = parseInt(value, 10); - if (!isNaN(w) && w >= 1 && w <= 99) { - wizard.setControlWeight(w); - } - panel.deactivate(); - }, - [wizard, panel] - ); - - const handleTreatmentWeightSubmit = useCallback( - (value: string) => { - const w = parseInt(value, 10); - if (!isNaN(w) && w >= 1 && w <= 99) { - // Treatment weight setter: set control to 100 - treatment - wizard.setControlWeight(100 - w); - } - panel.deactivate(); - }, - [wizard, panel] - ); - - const handleWeightCancel = useCallback(() => { - panel.deactivate(); - }, [panel]); - - return ( - - - {/* ── Step 1: Name + Description ─────────────────────── */} - {isNameStep && ( - - - {'Name: '} - {activeNameField === 'name' && !nameValue && } - - {nameValue || {'e.g., my-ab-test'}} - - {activeNameField === 'name' && nameValue ? : null} - - - {'Description: '} - {activeNameField === 'description' && !descriptionValue && } - - {descriptionValue || {'(optional)'}} - - {activeNameField === 'description' && descriptionValue ? : null} - - {nameError && ( - - {nameError} - - )} - - )} - - {/* ── Step 2: Gateway ────────────────────────────────── */} - {isGatewayStep && !gatewayCreateMode && ( - - )} - {isGatewayStep && gatewayCreateMode && ( - { - wizard.setGateway(name, true); - setGatewayCreateMode(false); - }} - onCancel={() => setGatewayCreateMode(false)} - /> - )} - - {/* ── Step 3: Side-by-Side Builder ───────────────────── */} - {isBuilderStep && ( - - - } - right={ - - } - /> - {wizard.isBuilderComplete && ( - - - {'\u2713'} All fields configured. Press Enter to continue, or adjust values above. - - - )} - {!wizard.isBuilderComplete && ( - - Configure both columns, then press Enter to continue. - - )} - - )} - - {/* ── Step 4: Enable on Create ───────────────────────── */} - {isEnableStep && ( - - )} - - {/* ── Step 5: Confirm ────────────────────────────────── */} - {isConfirmStep && ( - - )} - - - ); -} diff --git a/src/cli/tui/screens/ab-test/VariantConfigForm.tsx b/src/cli/tui/screens/ab-test/VariantConfigForm.tsx deleted file mode 100644 index 61f465323..000000000 --- a/src/cli/tui/screens/ab-test/VariantConfigForm.tsx +++ /dev/null @@ -1,268 +0,0 @@ -import type { SelectableItem } from '../../components'; -import { TextInput, WizardSelect } from '../../components'; -import { useListNavigation } from '../../hooks'; -import { Box, Text } from 'ink'; -import React, { useCallback, useMemo, useState } from 'react'; - -type VariantSubField = 'controlBundle' | 'controlVersion' | 'treatmentBundle' | 'treatmentVersion' | 'treatmentWeight'; - -const SUB_FIELDS: VariantSubField[] = [ - 'controlBundle', - 'controlVersion', - 'treatmentBundle', - 'treatmentVersion', - 'treatmentWeight', -]; - -export interface VariantConfig { - controlBundle: string; - controlVersion: string; - treatmentBundle: string; - treatmentVersion: string; - treatmentWeight: number; -} - -export type VersionLoadState = 'idle' | 'loading' | 'loaded' | 'error'; - -interface VariantConfigFormProps { - bundleItems: SelectableItem[]; - fetchVersionItems: (bundleName: string) => void; - controlVersionItems: SelectableItem[]; - treatmentVersionItems: SelectableItem[]; - controlVersionLoadState: VersionLoadState; - treatmentVersionLoadState: VersionLoadState; - onComplete: (config: VariantConfig) => void; - onCancel: () => void; - onCreateBundle?: () => void; -} - -export function VariantConfigForm({ - bundleItems, - fetchVersionItems, - controlVersionItems, - treatmentVersionItems, - controlVersionLoadState, - treatmentVersionLoadState, - onComplete, - onCancel, - onCreateBundle, -}: VariantConfigFormProps) { - const [activeField, setActiveField] = useState('controlBundle'); - const [controlBundle, setControlBundle] = useState(''); - const [controlVersion, setControlVersion] = useState(''); - const [treatmentBundle, setTreatmentBundle] = useState(''); - const [treatmentVersion, setTreatmentVersion] = useState(''); - const [treatmentWeight, setTreatmentWeight] = useState('20'); - - const augmentedBundleItems: SelectableItem[] = useMemo(() => { - const items: SelectableItem[] = []; - if (onCreateBundle) { - items.push({ id: '__create_bundle__', title: 'Create new config bundle', description: 'Add a new bundle first' }); - } - items.push(...bundleItems); - return items; - }, [bundleItems, onCreateBundle]); - - const advanceField = useCallback(() => { - const idx = SUB_FIELDS.indexOf(activeField); - const next = SUB_FIELDS[idx + 1]; - if (next) setActiveField(next); - }, [activeField]); - - // Navigation for each select sub-field - const controlBundleNav = useListNavigation({ - items: augmentedBundleItems, - onSelect: item => { - if (item.id === '__create_bundle__') { - onCreateBundle?.(); - return; - } - setControlBundle(item.id); - fetchVersionItems(item.id); - advanceField(); - }, - onExit: onCancel, - isActive: activeField === 'controlBundle', - }); - - const controlVersionNav = useListNavigation({ - items: controlVersionItems, - onSelect: item => { - setControlVersion(item.id); - advanceField(); - }, - onExit: () => setActiveField('controlBundle'), - isActive: activeField === 'controlVersion' && controlVersionLoadState === 'loaded', - }); - - const treatmentBundleNav = useListNavigation({ - items: augmentedBundleItems, - onSelect: item => { - if (item.id === '__create_bundle__') { - onCreateBundle?.(); - return; - } - setTreatmentBundle(item.id); - fetchVersionItems(item.id); - advanceField(); - }, - onExit: () => setActiveField('controlVersion'), - isActive: activeField === 'treatmentBundle', - }); - - const treatmentVersionNav = useListNavigation({ - items: treatmentVersionItems, - onSelect: item => { - setTreatmentVersion(item.id); - advanceField(); - }, - onExit: () => setActiveField('treatmentBundle'), - isActive: activeField === 'treatmentVersion' && treatmentVersionLoadState === 'loaded', - }); - - const controlWeight = 100 - parseInt(treatmentWeight || '0', 10); - - const completedValue = (value: string, label: string) => ( - - {label}: - {value || '(pending)'} - {value && } - - ); - - const pendingValue = (label: string) => ( - - {label}: - (pending) - - ); - - const renderVersionField = ( - isActive: boolean, - loadState: VersionLoadState, - items: SelectableItem[], - nav: { selectedIndex: number }, - title: string, - completedVersion: string, - label: string - ) => { - if (!isActive) { - return completedVersion ? completedValue(completedVersion.slice(0, 8), label) : pendingValue(label); - } - - switch (loadState) { - case 'loading': - return {label}: Loading versions...; - case 'error': - return {label}: Failed to load versions. Press Esc to go back and retry.; - case 'loaded': - if (items.length === 0) { - return {label}: No versions found. Deploy the config bundle first.; - } - return ; - default: - return {label}: Waiting...; - } - }; - - return ( - - Configure Variants - - {/* Control section */} - - - Control (C): - - - {activeField === 'controlBundle' ? ( - augmentedBundleItems.length > 0 ? ( - - ) : ( - No deployed config bundles found. - ) - ) : ( - completedValue(controlBundle, ' Bundle') - )} - - {renderVersionField( - activeField === 'controlVersion', - controlVersionLoadState, - controlVersionItems, - controlVersionNav, - ' Select control version', - controlVersion, - ' Version' - )} - - - {/* Treatment section */} - - - Treatment (T1): - - - {activeField === 'treatmentBundle' ? ( - - ) : treatmentBundle ? ( - completedValue(treatmentBundle, ' Bundle') - ) : ( - pendingValue(' Bundle') - )} - - {renderVersionField( - activeField === 'treatmentVersion', - treatmentVersionLoadState, - treatmentVersionItems, - treatmentVersionNav, - ' Select treatment version', - treatmentVersion, - ' Version' - )} - - {activeField === 'treatmentWeight' ? ( - - setTreatmentWeight(value)} - onSubmit={value => { - const n = parseInt(value, 10); - if (!isNaN(n) && n >= 1 && n <= 99) { - setTreatmentWeight(value); - onComplete({ - controlBundle, - controlVersion, - treatmentBundle, - treatmentVersion, - treatmentWeight: n, - }); - } - }} - onCancel={() => setActiveField('treatmentVersion')} - customValidation={(value: string) => { - const n = parseInt(value, 10); - if (isNaN(n)) return 'Must be a number'; - if (n < 1 || n > 99) return 'Must be between 1 and 99'; - return true; - }} - /> - - ) : treatmentWeight && treatmentVersion ? ( - completedValue(`${treatmentWeight}% (control: ${controlWeight}%)`, ' Weight') - ) : ( - pendingValue(' Weight') - )} - - - ); -} diff --git a/src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx b/src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx deleted file mode 100644 index 082d7662b..000000000 --- a/src/cli/tui/screens/ab-test/__tests__/useAddABTestWizard.test.tsx +++ /dev/null @@ -1,286 +0,0 @@ -import type { VariantConfig } from '../VariantConfigForm'; -import type { GatewayChoice } from '../types'; -import type { StepSkipCheck } from '../useAddABTestWizard'; -import { useAddABTestWizard } from '../useAddABTestWizard'; -import { Text } from 'ink'; -import { render } from 'ink-testing-library'; -import React, { act, useImperativeHandle } from 'react'; -import { describe, expect, it } from 'vitest'; - -// ── Simple harness ───────────────────────────────────────────────────────── - -function Harness() { - const wizard = useAddABTestWizard(); - return ( - - step:{wizard.step} - name:{wizard.config.name} - treatmentWeight:{wizard.config.treatmentWeight} - enableOnCreate:{String(wizard.config.enableOnCreate)} - steps:{wizard.steps.join(',')} - - ); -} - -// ── Imperative harness ───────────────────────────────────────────────────── - -interface HarnessHandle { - setName: (name: string) => void; - setDescription: (desc: string) => void; - setAgent: (agent: string) => void; - setGateway: (choice: GatewayChoice) => void; - setVariants: (vc: VariantConfig) => void; - setOnlineEval: (eval_: string) => void; - setMaxDuration: (days: number | undefined) => void; - setEnableOnCreate: (enable: boolean) => void; - setSkipCheck: (check: StepSkipCheck) => void; - goBack: () => void; - reset: () => void; -} - -const ImperativeHarness = React.forwardRef((_, ref) => { - const wizard = useAddABTestWizard(); - useImperativeHandle(ref, () => ({ - setName: wizard.setName, - setDescription: wizard.setDescription, - setAgent: wizard.setAgent, - setGateway: wizard.setGateway, - setVariants: wizard.setVariants, - setOnlineEval: wizard.setOnlineEval, - setMaxDuration: wizard.setMaxDuration, - setEnableOnCreate: wizard.setEnableOnCreate, - setSkipCheck: wizard.setSkipCheck, - goBack: wizard.goBack, - reset: wizard.reset, - })); - return ( - - step:{wizard.step} - name:{wizard.config.name} - description:{wizard.config.description} - agent:{wizard.config.agent} - controlBundle:{wizard.config.controlBundle} - treatmentWeight:{wizard.config.treatmentWeight} - onlineEval:{wizard.config.onlineEval} - maxDuration:{String(wizard.config.maxDuration ?? 'undefined')} - enableOnCreate:{String(wizard.config.enableOnCreate)} - - ); -}); -ImperativeHarness.displayName = 'ImperativeHarness'; - -// ── Tests ────────────────────────────────────────────────────────────────── - -describe('useAddABTestWizard', () => { - describe('defaults', () => { - it('default step is mode', () => { - const { lastFrame } = render(); - expect(lastFrame()).toContain('step:mode'); - }); - - it('default treatment weight is 20', () => { - const { lastFrame } = render(); - expect(lastFrame()).toContain('treatmentWeight:20'); - }); - - it('default enableOnCreate is true', () => { - const { lastFrame } = render(); - expect(lastFrame()).toContain('enableOnCreate:true'); - }); - - it('has all 10 steps', () => { - const { lastFrame } = render(); - const frame = lastFrame()!.replace(/\n/g, ''); - expect(frame).toContain( - 'steps:mode,name,description,gateway,agent,variants,onlineEval,maxDuration,enableOnCreate,confirm' - ); - }); - }); - - describe('step navigation', () => { - it('setName advances to description', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('Test1')); - - expect(lastFrame()).toContain('step:description'); - expect(lastFrame()).toContain('name:Test1'); - }); - - it('setDescription advances to gateway', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('Test1')); - act(() => ref.current!.setDescription('desc')); - - expect(lastFrame()).toContain('step:gateway'); - expect(lastFrame()).toContain('description:desc'); - }); - - it('setGateway advances to agent', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('T')); - act(() => ref.current!.setDescription('')); - act(() => ref.current!.setGateway({ type: 'create-new' })); - - expect(lastFrame()).toContain('step:agent'); - }); - - it('setAgent advances to variants', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('T')); - act(() => ref.current!.setDescription('')); - act(() => ref.current!.setGateway({ type: 'create-new' })); - act(() => ref.current!.setAgent('my-agent')); - - expect(lastFrame()).toContain('step:variants'); - expect(lastFrame()).toContain('agent:my-agent'); - }); - - it('setVariants advances to onlineEval', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('T')); - act(() => ref.current!.setDescription('')); - act(() => ref.current!.setGateway({ type: 'create-new' })); - act(() => ref.current!.setAgent('my-agent')); - act(() => - ref.current!.setVariants({ - controlBundle: 'cb', - controlVersion: 'v1', - treatmentBundle: 'tb', - treatmentVersion: 'v2', - treatmentWeight: 30, - }) - ); - - expect(lastFrame()).toContain('step:onlineEval'); - expect(lastFrame()).toContain('controlBundle:cb'); - expect(lastFrame()).toContain('treatmentWeight:30'); - }); - - it('full wizard reaches confirm step', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('T')); - act(() => ref.current!.setDescription('')); - act(() => ref.current!.setGateway({ type: 'create-new' })); - act(() => ref.current!.setAgent('my-agent')); - act(() => - ref.current!.setVariants({ - controlBundle: 'cb', - controlVersion: 'v1', - treatmentBundle: 'tb', - treatmentVersion: 'v2', - treatmentWeight: 25, - }) - ); - act(() => ref.current!.setOnlineEval('eval-arn')); - act(() => ref.current!.setMaxDuration(30)); - act(() => ref.current!.setEnableOnCreate(false)); - - const frame = lastFrame()!.replace(/\n/g, ''); - expect(frame).toContain('step:confirm'); - expect(frame).toContain('enableOnCreate:false'); - expect(frame).toContain('maxDuration:30'); - }); - }); - - describe('goBack', () => { - it('goes back from description to name', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('T')); - expect(lastFrame()).toContain('step:description'); - - act(() => ref.current!.goBack()); - expect(lastFrame()).toContain('step:name'); - }); - - it('does not go back from first step', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.goBack()); - expect(lastFrame()).toContain('step:mode'); - }); - }); - - describe('reset', () => { - it('resets to initial state', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('Test1')); - act(() => ref.current!.setDescription('desc')); - expect(lastFrame()).toContain('step:gateway'); - - act(() => ref.current!.reset()); - - expect(lastFrame()).toContain('step:mode'); - expect(lastFrame()).toContain('name:'); - expect(lastFrame()).toContain('treatmentWeight:20'); - }); - }); - - describe('skip check', () => { - it('advance skips over steps marked as skippable', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setSkipCheck(s => s === 'gateway')); - act(() => ref.current!.setName('T')); - act(() => ref.current!.setDescription('')); - act(() => ref.current!.setAgent('my-agent')); - - expect(lastFrame()).toContain('step:variants'); - }); - - it('goBack skips over steps marked as skippable', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('T')); - act(() => ref.current!.setDescription('')); - act(() => ref.current!.setGateway({ type: 'create-new' })); - act(() => ref.current!.setAgent('my-agent')); - expect(lastFrame()).toContain('step:variants'); - - act(() => ref.current!.setSkipCheck(s => s === 'agent')); - act(() => ref.current!.goBack()); - - expect(lastFrame()).toContain('step:gateway'); - }); - - it('advance skips multiple consecutive skippable steps', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setSkipCheck(s => s === 'agent' || s === 'variants')); - act(() => ref.current!.setName('T')); - act(() => ref.current!.setDescription('')); - act(() => ref.current!.setGateway({ type: 'create-new' })); - - expect(lastFrame()).toContain('step:onlineEval'); - }); - - it('skip check does not affect non-skippable steps', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setSkipCheck(() => false)); - act(() => ref.current!.setName('T')); - - expect(lastFrame()).toContain('step:description'); - }); - }); -}); diff --git a/src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx b/src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx deleted file mode 100644 index 4ea0a40d5..000000000 --- a/src/cli/tui/screens/ab-test/__tests__/useTargetBasedWizard.test.tsx +++ /dev/null @@ -1,319 +0,0 @@ -import type { TargetInfo } from '../types'; -import { useTargetBasedWizard } from '../useTargetBasedWizard'; -import { Text } from 'ink'; -import { render } from 'ink-testing-library'; -import React, { act, useImperativeHandle } from 'react'; -import { describe, expect, it } from 'vitest'; - -// ── Simple harness ───────────────────────────────────────────────────────── - -function Harness() { - const wizard = useTargetBasedWizard(); - return ( - - step:{wizard.step} - name:{wizard.config.name} - description:{wizard.config.description} - gateway:{wizard.config.gateway} - controlWeight:{wizard.config.controlWeight} - treatmentWeight:{wizard.config.treatmentWeight} - enableOnCreate:{String(wizard.config.enableOnCreate)} - - ); -} - -// ── Imperative harness ───────────────────────────────────────────────────── - -interface HarnessHandle { - setName: (name: string) => void; - setDescription: (desc: string) => void; - advanceFromNameDescription: () => void; - setGateway: (name: string, isNew: boolean) => void; - advance: () => void; - goBack: () => void; - setControlTarget: (target: TargetInfo, isNew: boolean) => void; - setTreatmentTarget: (target: TargetInfo, isNew: boolean) => void; - setControlWeight: (w: number) => void; - setControlEval: (name: string) => void; - setTreatmentEval: (name: string) => void; - setEnableOnCreate: (enable: boolean) => void; - toAddABTestConfig: ReturnType['toAddABTestConfig']; -} - -const ImperativeHarness = React.forwardRef((_, ref) => { - const wizard = useTargetBasedWizard(); - useImperativeHandle(ref, () => ({ - setName: wizard.setName, - setDescription: wizard.setDescription, - advanceFromNameDescription: wizard.advanceFromNameDescription, - setGateway: wizard.setGateway, - advance: wizard.advance, - goBack: wizard.goBack, - setControlTarget: wizard.setControlTarget, - setTreatmentTarget: wizard.setTreatmentTarget, - setControlWeight: wizard.setControlWeight, - setControlEval: wizard.setControlEval, - setTreatmentEval: wizard.setTreatmentEval, - setEnableOnCreate: wizard.setEnableOnCreate, - toAddABTestConfig: wizard.toAddABTestConfig, - })); - const ctrlName = wizard.config.controlTargetInfo ? wizard.config.controlTargetInfo.name : 'null'; - const treatName = wizard.config.treatmentTargetInfo ? wizard.config.treatmentTargetInfo.name : 'null'; - return ( - - {[ - `step:${wizard.step}`, - `name:${wizard.config.name}`, - `description:${wizard.config.description}`, - `gateway:${wizard.config.gateway}`, - `gatewayIsNew:${String(wizard.config.gatewayIsNew)}`, - `controlWeight:${wizard.config.controlWeight}`, - `treatmentWeight:${wizard.config.treatmentWeight}`, - `controlOnlineEval:${wizard.config.controlOnlineEval}`, - `treatmentOnlineEval:${wizard.config.treatmentOnlineEval}`, - `enableOnCreate:${String(wizard.config.enableOnCreate)}`, - `controlTargetInfo:${ctrlName}`, - `treatmentTargetInfo:${treatName}`, - ].join('|')} - - ); -}); -ImperativeHarness.displayName = 'ImperativeHarness'; - -// ── Tests ────────────────────────────────────────────────────────────────── - -describe('useTargetBasedWizard', () => { - describe('defaults', () => { - it('initial step is nameDescription', () => { - const { lastFrame } = render(); - expect(lastFrame()).toContain('step:nameDescription'); - }); - - it('default weights are 90/10', () => { - const { lastFrame } = render(); - expect(lastFrame()).toContain('controlWeight:90'); - expect(lastFrame()).toContain('treatmentWeight:10'); - }); - - it('default enableOnCreate is true', () => { - const { lastFrame } = render(); - expect(lastFrame()).toContain('enableOnCreate:true'); - }); - }); - - describe('step navigation', () => { - it('advanceFromNameDescription moves to gateway step', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.advanceFromNameDescription()); - - expect(lastFrame()).toContain('step:gateway'); - }); - - it('advance from gateway moves to builder', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.advanceFromNameDescription()); - // setGateway auto-advances to builder - act(() => ref.current!.setGateway('my-gw', false)); - - expect(lastFrame()).toContain('step:builder'); - }); - - it('advance from builder moves to enableOnCreate', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.advanceFromNameDescription()); - act(() => ref.current!.setGateway('my-gw', false)); - // Now at builder, advance to enableOnCreate - act(() => ref.current!.advance()); - - expect(lastFrame()).toContain('step:enableOnCreate'); - }); - - it('advance from enableOnCreate moves to confirm', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.advanceFromNameDescription()); - act(() => ref.current!.setGateway('my-gw', false)); - act(() => ref.current!.advance()); // builder → enableOnCreate - act(() => ref.current!.setEnableOnCreate(true)); // enableOnCreate → confirm - - expect(lastFrame()).toContain('step:confirm'); - }); - }); - - describe('goBack', () => { - it('goBack from gateway goes to nameDescription', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.advanceFromNameDescription()); - expect(lastFrame()).toContain('step:gateway'); - - act(() => ref.current!.goBack()); - expect(lastFrame()).toContain('step:nameDescription'); - }); - - it('goBack from builder goes to gateway', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.advanceFromNameDescription()); - act(() => ref.current!.setGateway('my-gw', false)); - expect(lastFrame()).toContain('step:builder'); - - act(() => ref.current!.goBack()); - expect(lastFrame()).toContain('step:gateway'); - }); - }); - - describe('config updates', () => { - it('setName updates config', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setName('MyTest')); - - expect(lastFrame()).toContain('name:MyTest'); - }); - - it('setDescription updates config', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setDescription('desc1')); - - expect(lastFrame()).toContain('description:desc1'); - }); - - it('setGateway updates config', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.advanceFromNameDescription()); - act(() => ref.current!.setGateway('gw-123', true)); - - expect(lastFrame()).toContain('gateway:gw-123'); - expect(lastFrame()).toContain('gatewayIsNew:true'); - }); - - it('setControlTarget updates config with targetInfo', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - const target: TargetInfo = { name: 'ctrl-target', runtimeRef: 'arn:runtime:1', qualifier: 'DEFAULT' }; - act(() => ref.current!.setControlTarget(target, false)); - - expect(lastFrame()).toContain('controlTargetInfo:ctrl-target'); - }); - - it('setTreatmentTarget updates config with targetInfo', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - const target: TargetInfo = { name: 'tt1', runtimeRef: 'arn:runtime:2', qualifier: 'v2' }; - act(() => ref.current!.setTreatmentTarget(target, true)); - - const frame = lastFrame()!.replace(/\n/g, ''); - expect(frame).toContain('treatmentTargetInfo:tt1'); - }); - - it('setControlWeight updates config (sum to 100)', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setControlWeight(70)); - - const frame = lastFrame()!.replace(/\n/g, ''); - expect(frame).toContain('controlWeight:70'); - expect(frame).toContain('treatmentWeight:30'); - }); - - it('setControlEval updates config', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setControlEval('eval-arn-1')); - - expect(lastFrame()).toContain('controlOnlineEval:eval-arn-1'); - }); - - it('setTreatmentEval updates config', () => { - const ref = React.createRef(); - const { lastFrame } = render(); - - act(() => ref.current!.setTreatmentEval('eval-arn-2')); - - expect(lastFrame()).toContain('treatmentOnlineEval:eval-arn-2'); - }); - }); - - describe('toAddABTestConfig', () => { - it('returns correct AddABTestConfig shape', () => { - const ref = React.createRef(); - render(); - - const controlTarget: TargetInfo = { name: 'ctrl', runtimeRef: 'arn:runtime:1', qualifier: 'DEFAULT' }; - const treatmentTarget: TargetInfo = { name: 'treat', runtimeRef: 'arn:runtime:2', qualifier: 'v2' }; - - act(() => ref.current!.setName('TestAB')); - act(() => ref.current!.setDescription('A/B test')); - act(() => ref.current!.advanceFromNameDescription()); - act(() => ref.current!.setGateway('my-gateway', false)); - act(() => ref.current!.setControlTarget(controlTarget, false)); - act(() => ref.current!.setTreatmentTarget(treatmentTarget, true)); - act(() => ref.current!.setControlWeight(80)); - act(() => ref.current!.setControlEval('eval-1')); - act(() => ref.current!.setTreatmentEval('eval-2')); - - let config: ReturnType | undefined; - act(() => { - config = ref.current!.toAddABTestConfig(); - }); - - expect(config).toBeDefined(); - expect(config!.mode).toBe('target-based'); - expect(config!.name).toBe('TestAB'); - expect(config!.description).toBe('A/B test'); - expect(config!.gateway).toBe('my-gateway'); - expect(config!.gatewayIsNew).toBe(false); - expect(config!.gatewayChoice).toEqual({ type: 'existing-http', name: 'my-gateway' }); - expect(config!.controlTargetInfo).toEqual(controlTarget); - expect(config!.controlTargetIsNew).toBe(false); - expect(config!.treatmentTargetInfo).toEqual(treatmentTarget); - expect(config!.treatmentTargetIsNew).toBe(true); - expect(config!.controlWeight).toBe(80); - expect(config!.treatmentWeight).toBe(20); - expect(config!.controlOnlineEval).toBe('eval-1'); - expect(config!.treatmentOnlineEval).toBe('eval-2'); - expect(config!.runtime).toBe('arn:runtime:1'); - expect(config!.controlTarget).toBe('ctrl'); - expect(config!.controlEndpoint).toBe('DEFAULT'); - expect(config!.treatmentTarget).toBe('treat'); - expect(config!.treatmentEndpoint).toBe('v2'); - expect(config!.enableOnCreate).toBe(true); - expect(config!.evaluators).toEqual([]); - expect(config!.samplingRate).toBe(10); - }); - - it('returns create-new gatewayChoice when gatewayIsNew is true', () => { - const ref = React.createRef(); - render(); - - act(() => ref.current!.advanceFromNameDescription()); - act(() => ref.current!.setGateway('new-gw', true)); - - let config: ReturnType | undefined; - act(() => { - config = ref.current!.toAddABTestConfig(); - }); - - expect(config!.gatewayChoice).toEqual({ type: 'create-new' }); - }); - }); -}); diff --git a/src/cli/tui/screens/ab-test/index.ts b/src/cli/tui/screens/ab-test/index.ts deleted file mode 100644 index 162b24eb9..000000000 --- a/src/cli/tui/screens/ab-test/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export { AddABTestFlow } from './AddABTestFlow'; -export { ABTestDetailScreen } from './ABTestDetailScreen'; -export { ABTestPickerScreen } from './ABTestPickerScreen'; -export { RemoveABTestScreen } from './RemoveABTestScreen'; diff --git a/src/cli/tui/screens/ab-test/types.ts b/src/cli/tui/screens/ab-test/types.ts deleted file mode 100644 index 977a2ca07..000000000 --- a/src/cli/tui/screens/ab-test/types.ts +++ /dev/null @@ -1,89 +0,0 @@ -// ───────────────────────────────────────────────────────────────────────────── -// AB Test Wizard Types -// ───────────────────────────────────────────────────────────────────────────── - -export type ABTestMode = 'config-bundle' | 'target-based'; - -export type AddABTestStep = - | 'mode' - | 'name' - | 'description' - | 'agent' - | 'gateway' - | 'variants' - | 'controlTarget' - | 'treatmentTarget' - | 'weights' - | 'evalPath' - | 'evalSelect' - | 'evalCreate' - | 'evalSamplingRate' - | 'onlineEval' - | 'maxDuration' - | 'enableOnCreate' - | 'confirm'; - -export type GatewayChoice = { type: 'create-new' } | { type: 'existing-http'; name: string }; - -/** Rich target info for target-based AB testing. */ -export interface TargetInfo { - name: string; - runtimeRef: string; - qualifier: string; -} - -export interface AddABTestConfig { - mode: ABTestMode; - name: string; - description: string; - agent: string; - gatewayChoice: GatewayChoice; - // Config-bundle mode - controlBundle: string; - controlVersion: string; - treatmentBundle: string; - treatmentVersion: string; - treatmentWeight: number; - onlineEval: string; - // Target-based mode fields - gateway: string; - gatewayIsNew: boolean; - controlTargetInfo: TargetInfo | null; - controlTargetIsNew: boolean; - treatmentTargetInfo: TargetInfo | null; - treatmentTargetIsNew: boolean; - // Legacy target-based fields (populated from TargetInfo for downstream compatibility) - runtime: string; - controlTarget: string; - controlEndpoint: string; - treatmentTarget: string; - treatmentEndpoint: string; - controlWeight: number; - controlOnlineEval: string; - treatmentOnlineEval: string; - evaluators: string[]; - samplingRate: number; - // Shared - maxDuration: number | undefined; - enableOnCreate: boolean; -} - -export const AB_TEST_STEP_LABELS: Record = { - mode: 'Mode', - name: 'Name', - description: 'Description', - agent: 'Agent', - gateway: 'Gateway', - variants: 'Variants', - controlTarget: 'Control', - treatmentTarget: 'Treatment', - weights: 'Weights', - evalPath: 'Eval', - evalSelect: 'Eval', - evalCreate: 'Eval', - evalSamplingRate: 'Eval', - onlineEval: 'Eval', - maxDuration: 'Duration', - enableOnCreate: 'Enable', - confirm: 'Confirm', -}; diff --git a/src/cli/tui/screens/ab-test/useAddABTestWizard.ts b/src/cli/tui/screens/ab-test/useAddABTestWizard.ts deleted file mode 100644 index bb4fef0ad..000000000 --- a/src/cli/tui/screens/ab-test/useAddABTestWizard.ts +++ /dev/null @@ -1,324 +0,0 @@ -import type { VariantConfig } from './VariantConfigForm'; -import type { ABTestMode, AddABTestConfig, AddABTestStep, GatewayChoice, TargetInfo } from './types'; -import { useCallback, useRef, useState } from 'react'; - -const CONFIG_BUNDLE_STEPS: AddABTestStep[] = [ - 'mode', - 'name', - 'description', - 'gateway', - 'agent', - 'variants', - 'onlineEval', - 'maxDuration', - 'enableOnCreate', - 'confirm', -]; - -const TARGET_BASED_STEPS: AddABTestStep[] = [ - 'mode', - 'name', - 'description', - 'gateway', - 'controlTarget', - 'treatmentTarget', - 'weights', - 'evalSelect', - 'enableOnCreate', - 'confirm', -]; - -function getDefaultConfig(): AddABTestConfig { - return { - mode: 'config-bundle', - name: '', - description: '', - agent: '', - gatewayChoice: { type: 'create-new' }, - controlBundle: '', - controlVersion: '', - treatmentBundle: '', - treatmentVersion: '', - treatmentWeight: 20, - onlineEval: '', - // Target-based mode fields - gateway: '', - gatewayIsNew: false, - controlTargetInfo: null, - controlTargetIsNew: false, - treatmentTargetInfo: null, - treatmentTargetIsNew: false, - // Legacy target-based fields - runtime: '', - controlTarget: '', - controlEndpoint: '', - treatmentTarget: '', - treatmentEndpoint: '', - controlWeight: 90, - controlOnlineEval: '', - treatmentOnlineEval: '', - evaluators: [], - samplingRate: 10, - maxDuration: undefined, - enableOnCreate: true, - }; -} - -export type StepSkipCheck = (step: AddABTestStep) => boolean; - -export function useAddABTestWizard() { - const [config, setConfig] = useState(getDefaultConfig); - const [step, setStep] = useState('mode'); - const skipCheckRef = useRef(() => false); - - const getSteps = useCallback((): AddABTestStep[] => { - return config.mode === 'target-based' ? TARGET_BASED_STEPS : CONFIG_BUNDLE_STEPS; - }, [config.mode]); - - const currentIndex = getSteps().indexOf(step); - - const setSkipCheck = useCallback((check: StepSkipCheck) => { - skipCheckRef.current = check; - }, []); - - const goBack = useCallback(() => { - const steps = getSteps(); - for (let i = currentIndex - 1; i >= 0; i--) { - if (!skipCheckRef.current(steps[i]!)) { - setStep(steps[i]!); - return; - } - } - }, [currentIndex, getSteps]); - - const nextStep = useCallback( - (currentStepName: AddABTestStep): AddABTestStep | undefined => { - const steps = getSteps(); - const idx = steps.indexOf(currentStepName); - for (let i = idx + 1; i < steps.length; i++) { - if (!skipCheckRef.current(steps[i]!)) { - return steps[i]!; - } - } - return undefined; - }, - [getSteps] - ); - - const advance = useCallback( - (from: AddABTestStep) => { - const next = nextStep(from); - if (next) setStep(next); - }, - [nextStep] - ); - - const setMode = useCallback( - (mode: ABTestMode) => { - setConfig(c => ({ ...c, mode })); - advance('mode'); - }, - [advance] - ); - - const setName = useCallback( - (name: string) => { - setConfig(c => ({ ...c, name })); - advance('name'); - }, - [advance] - ); - - const setDescription = useCallback( - (description: string) => { - setConfig(c => ({ ...c, description })); - advance('description'); - }, - [advance] - ); - - const setAgent = useCallback( - (agent: string) => { - setConfig(c => ({ ...c, agent })); - advance('agent'); - }, - [advance] - ); - - const setGateway = useCallback( - (gatewayChoice: GatewayChoice) => { - setConfig(c => ({ - ...c, - gatewayChoice, - gateway: gatewayChoice.type === 'existing-http' ? gatewayChoice.name : '', - gatewayIsNew: gatewayChoice.type === 'create-new', - })); - advance('gateway'); - }, - [advance] - ); - - const setGatewayWithName = useCallback( - (gatewayName: string, isNew: boolean) => { - const gatewayChoice: GatewayChoice = isNew - ? { type: 'create-new' } - : { type: 'existing-http', name: gatewayName }; - setConfig(c => ({ - ...c, - gatewayChoice, - gateway: gatewayName, - gatewayIsNew: isNew, - })); - advance('gateway'); - }, - [advance] - ); - - const setVariants = useCallback( - (variantConfig: VariantConfig) => { - setConfig(c => ({ - ...c, - controlBundle: variantConfig.controlBundle, - controlVersion: variantConfig.controlVersion, - treatmentBundle: variantConfig.treatmentBundle, - treatmentVersion: variantConfig.treatmentVersion, - treatmentWeight: variantConfig.treatmentWeight, - })); - advance('variants'); - }, - [advance] - ); - - const setOnlineEval = useCallback( - (onlineEval: string) => { - setConfig(c => ({ ...c, onlineEval })); - advance('onlineEval'); - }, - [advance] - ); - - // Target-based mode setters - - const setControlTarget = useCallback( - (target: TargetInfo, isNew: boolean) => { - setConfig(c => ({ - ...c, - controlTargetInfo: target, - controlTargetIsNew: isNew, - controlTarget: target.name, - controlEndpoint: target.qualifier, - runtime: target.runtimeRef, - })); - advance('controlTarget'); - }, - [advance] - ); - - const setTreatmentTarget = useCallback( - (target: TargetInfo, isNew: boolean) => { - setConfig(c => ({ - ...c, - treatmentTargetInfo: target, - treatmentTargetIsNew: isNew, - treatmentTarget: target.name, - treatmentEndpoint: target.qualifier, - // Keep runtime from control if already set, otherwise use treatment's - runtime: c.runtime || target.runtimeRef, - })); - advance('treatmentTarget'); - }, - [advance] - ); - - const setWeights = useCallback( - (controlWeight: number, treatmentWeight: number) => { - setConfig(c => ({ ...c, controlWeight, treatmentWeight })); - advance('weights'); - }, - [advance] - ); - - const setEvalPath = useCallback( - (path: 'select' | 'create') => { - if (path === 'select') { - advance('evalPath'); - } else { - // Skip evalSelect, go to evalCreate - setStep('evalCreate'); - } - }, - [advance] - ); - - const setEvalSelect = useCallback( - (controlEval: string, treatmentEval: string) => { - setConfig(c => ({ ...c, controlOnlineEval: controlEval, treatmentOnlineEval: treatmentEval })); - advance('evalSelect'); - }, - [advance] - ); - - const setEvaluators = useCallback( - (evaluators: string[]) => { - setConfig(c => ({ ...c, evaluators })); - advance('evalCreate'); - }, - [advance] - ); - - const setSamplingRate = useCallback( - (samplingRate: number) => { - setConfig(c => ({ ...c, samplingRate })); - advance('evalSamplingRate'); - }, - [advance] - ); - - const setMaxDuration = useCallback( - (maxDuration: number | undefined) => { - setConfig(c => ({ ...c, maxDuration })); - advance('maxDuration'); - }, - [advance] - ); - - const setEnableOnCreate = useCallback( - (enableOnCreate: boolean) => { - setConfig(c => ({ ...c, enableOnCreate })); - advance('enableOnCreate'); - }, - [advance] - ); - - const reset = useCallback(() => { - setConfig(getDefaultConfig()); - setStep('mode'); - }, []); - - return { - config, - step, - steps: getSteps(), - currentIndex, - goBack, - setSkipCheck, - setMode, - setName, - setDescription, - setAgent, - setGateway, - setGatewayWithName, - setVariants, - setOnlineEval, - setControlTarget, - setTreatmentTarget, - setWeights, - setEvalPath, - setEvalSelect, - setEvaluators, - setSamplingRate, - setMaxDuration, - setEnableOnCreate, - reset, - }; -} diff --git a/src/cli/tui/screens/ab-test/useTargetBasedWizard.ts b/src/cli/tui/screens/ab-test/useTargetBasedWizard.ts deleted file mode 100644 index 7c26474d8..000000000 --- a/src/cli/tui/screens/ab-test/useTargetBasedWizard.ts +++ /dev/null @@ -1,188 +0,0 @@ -import type { AddABTestConfig, GatewayChoice, TargetInfo } from './types'; -import { useCallback, useState } from 'react'; - -export type TargetBasedStep = 'nameDescription' | 'gateway' | 'builder' | 'enableOnCreate' | 'confirm'; - -export const TARGET_BASED_STEP_LABELS: Record = { - nameDescription: 'Name', - gateway: 'Gateway', - builder: 'Configure', - enableOnCreate: 'Enable', - confirm: 'Confirm', -}; - -const STEPS: TargetBasedStep[] = ['nameDescription', 'gateway', 'builder', 'enableOnCreate', 'confirm']; - -interface TargetBasedConfig { - name: string; - description: string; - gateway: string; - gatewayIsNew: boolean; - controlTargetInfo: TargetInfo | null; - controlTargetIsNew: boolean; - controlWeight: number; - controlOnlineEval: string; - treatmentTargetInfo: TargetInfo | null; - treatmentTargetIsNew: boolean; - treatmentWeight: number; - treatmentOnlineEval: string; - enableOnCreate: boolean; -} - -function getDefaultConfig(): TargetBasedConfig { - return { - name: '', - description: '', - gateway: '', - gatewayIsNew: false, - controlTargetInfo: null, - controlTargetIsNew: false, - controlWeight: 90, - controlOnlineEval: '', - treatmentTargetInfo: null, - treatmentTargetIsNew: false, - treatmentWeight: 10, - treatmentOnlineEval: '', - enableOnCreate: true, - }; -} - -export function useTargetBasedWizard() { - const [config, setConfig] = useState(getDefaultConfig); - const [step, setStep] = useState('nameDescription'); - - const currentIndex = STEPS.indexOf(step); - - const goBack = useCallback(() => { - const idx = STEPS.indexOf(step); - if (idx > 0) { - setStep(STEPS[idx - 1]!); - } - }, [step]); - - const advance = useCallback(() => { - const idx = STEPS.indexOf(step); - if (idx < STEPS.length - 1) { - setStep(STEPS[idx + 1]!); - } - }, [step]); - - const setName = useCallback((name: string) => { - setConfig(c => ({ ...c, name })); - }, []); - - const setDescription = useCallback((description: string) => { - setConfig(c => ({ ...c, description })); - }, []); - - const advanceFromNameDescription = useCallback(() => { - setStep('gateway'); - }, []); - - const setGateway = useCallback((name: string, isNew: boolean) => { - setConfig(c => ({ ...c, gateway: name, gatewayIsNew: isNew })); - // Auto-advance to builder - setStep('builder'); - }, []); - - const setControlTarget = useCallback((target: TargetInfo, isNew: boolean) => { - setConfig(c => ({ - ...c, - controlTargetInfo: target, - controlTargetIsNew: isNew, - })); - }, []); - - const setTreatmentTarget = useCallback((target: TargetInfo, isNew: boolean) => { - setConfig(c => ({ - ...c, - treatmentTargetInfo: target, - treatmentTargetIsNew: isNew, - })); - }, []); - - const setControlWeight = useCallback((w: number) => { - setConfig(c => ({ ...c, controlWeight: w, treatmentWeight: 100 - w })); - }, []); - - const setControlEval = useCallback((name: string) => { - setConfig(c => ({ ...c, controlOnlineEval: name })); - }, []); - - const setTreatmentEval = useCallback((name: string) => { - setConfig(c => ({ ...c, treatmentOnlineEval: name })); - }, []); - - const setEnableOnCreate = useCallback((enableOnCreate: boolean) => { - setConfig(c => ({ ...c, enableOnCreate })); - setStep('confirm'); - }, []); - - const isBuilderComplete = - config.controlTargetInfo !== null && - config.treatmentTargetInfo !== null && - config.controlWeight > 0 && - config.treatmentWeight > 0; - - const toAddABTestConfig = useCallback((): AddABTestConfig => { - const gatewayChoice: GatewayChoice = config.gatewayIsNew - ? { type: 'create-new' } - : { type: 'existing-http', name: config.gateway }; - - return { - mode: 'target-based', - name: config.name, - description: config.description, - agent: '', - gatewayChoice, - // Config-bundle fields (safe defaults) - controlBundle: '', - controlVersion: '', - treatmentBundle: '', - treatmentVersion: '', - treatmentWeight: config.treatmentWeight, - onlineEval: '', - // Target-based fields - gateway: config.gateway, - gatewayIsNew: config.gatewayIsNew, - controlTargetInfo: config.controlTargetInfo, - controlTargetIsNew: config.controlTargetIsNew, - treatmentTargetInfo: config.treatmentTargetInfo, - treatmentTargetIsNew: config.treatmentTargetIsNew, - // Legacy target-based fields - runtime: config.controlTargetInfo?.runtimeRef ?? '', - controlTarget: config.controlTargetInfo?.name ?? '', - controlEndpoint: config.controlTargetInfo?.qualifier ?? '', - treatmentTarget: config.treatmentTargetInfo?.name ?? '', - treatmentEndpoint: config.treatmentTargetInfo?.qualifier ?? '', - controlWeight: config.controlWeight, - controlOnlineEval: config.controlOnlineEval, - treatmentOnlineEval: config.treatmentOnlineEval, - evaluators: [], - samplingRate: 10, - maxDuration: undefined, - enableOnCreate: config.enableOnCreate, - }; - }, [config]); - - return { - config, - step, - steps: STEPS, - currentIndex, - goBack, - advance, - setName, - setDescription, - advanceFromNameDescription, - setGateway, - setControlTarget, - setTreatmentTarget, - setControlWeight, - setControlEval, - setTreatmentEval, - setEnableOnCreate, - isBuilderComplete, - toAddABTestConfig, - }; -} diff --git a/src/cli/tui/screens/add/AddFlow.tsx b/src/cli/tui/screens/add/AddFlow.tsx index 02995be50..8f25be054 100644 --- a/src/cli/tui/screens/add/AddFlow.tsx +++ b/src/cli/tui/screens/add/AddFlow.tsx @@ -3,7 +3,6 @@ import { VPC_ENDPOINT_WARNING } from '../../../commands/shared/vpc-utils'; import { computeDefaultCredentialEnvVarName } from '../../../primitives/credential-utils'; import { ErrorPrompt } from '../../components'; import { useAvailableAgents } from '../../hooks/useCreateMcp'; -import { AddABTestFlow } from '../ab-test'; import { AddAgentFlow } from '../agent/AddAgentFlow'; import type { AddAgentConfig } from '../agent/types'; import { FRAMEWORK_OPTIONS } from '../agent/types'; @@ -13,12 +12,15 @@ import { AddDatasetFlow } from '../dataset'; import { AddEvaluatorFlow } from '../evaluator'; import { AddHarnessFlow } from '../harness/AddHarnessFlow'; import { AddIdentityFlow } from '../identity'; +import { AddKnowledgeBaseFlow } from '../knowledge-base'; import { AddGatewayFlow, AddGatewayTargetFlow } from '../mcp'; import { AddMemoryFlow } from '../memory/AddMemoryFlow'; import { AddOnlineEvalFlow } from '../online-eval'; +import { AddOnlineInsightsFlow } from '../online-insights'; import { AddPaymentFlow } from '../payment'; import { AddPolicyFlow } from '../policy'; import { AddRuntimeEndpointFlow } from '../runtime-endpoint'; +import { AddWebSearchFlow } from '../web-search'; import type { AddResourceType } from './AddScreen'; import { AddScreen } from './AddScreen'; import { AddSuccessScreen } from './AddSuccessScreen'; @@ -33,13 +35,15 @@ type FlowState = | { name: 'gateway-wizard' } | { name: 'tool-wizard' } | { name: 'memory-wizard' } + | { name: 'knowledge-base-wizard' } + | { name: 'web-search-wizard' } | { name: 'identity-wizard' } | { name: 'evaluator-wizard' } | { name: 'online-eval-wizard' } + | { name: 'online-insights-wizard' } | { name: 'policy-wizard' } | { name: 'dataset-wizard' } | { name: 'config-bundle-wizard' } - | { name: 'ab-test-wizard' } | { name: 'runtime-endpoint-wizard' } | { name: 'payment-manager-wizard' } | { name: 'payment-connector-wizard' } @@ -186,12 +190,18 @@ function getInitialFlowState(resource?: AddResourceType): FlowState { return { name: 'tool-wizard' }; case 'memory': return { name: 'memory-wizard' }; + case 'knowledge-base': + return { name: 'knowledge-base-wizard' }; + case 'web-search': + return { name: 'web-search-wizard' }; case 'credential': return { name: 'identity-wizard' }; case 'evaluator': return { name: 'evaluator-wizard' }; case 'online-eval': return { name: 'online-eval-wizard' }; + case 'online-insights': + return { name: 'online-insights-wizard' }; case 'policy': return { name: 'policy-wizard' }; case 'runtime-endpoint': @@ -200,8 +210,6 @@ function getInitialFlowState(resource?: AddResourceType): FlowState { return { name: 'dataset-wizard' }; case 'config-bundle': return { name: 'config-bundle-wizard' }; - case 'ab-test': - return { name: 'ab-test-wizard' }; case 'payment-manager': return { name: 'payment-manager-wizard' }; case 'payment-connector': @@ -244,6 +252,12 @@ export function AddFlow(props: AddFlowProps) { case 'memory': setFlow({ name: 'memory-wizard' }); break; + case 'knowledge-base': + setFlow({ name: 'knowledge-base-wizard' }); + break; + case 'web-search': + setFlow({ name: 'web-search-wizard' }); + break; case 'credential': setFlow({ name: 'identity-wizard' }); break; @@ -253,6 +267,9 @@ export function AddFlow(props: AddFlowProps) { case 'online-eval': setFlow({ name: 'online-eval-wizard' }); break; + case 'online-insights': + setFlow({ name: 'online-insights-wizard' }); + break; case 'policy': setFlow({ name: 'policy-wizard' }); break; @@ -262,9 +279,6 @@ export function AddFlow(props: AddFlowProps) { case 'config-bundle': setFlow({ name: 'config-bundle-wizard' }); break; - case 'ab-test': - setFlow({ name: 'ab-test-wizard' }); - break; case 'runtime-endpoint': setFlow({ name: 'runtime-endpoint-wizard' }); break; @@ -474,6 +488,32 @@ export function AddFlow(props: AddFlowProps) { ); } + // Knowledge base wizard + if (flow.name === 'knowledge-base-wizard') { + return ( + setFlow({ name: 'select' })} + onExit={props.onExit} + onDev={props.onDev} + onDeploy={props.onDeploy} + /> + ); + } + + // Web search wizard + if (flow.name === 'web-search-wizard') { + return ( + setFlow({ name: 'select' })} + onExit={props.onExit} + onDev={props.onDev} + onDeploy={props.onDeploy} + /> + ); + } + // Identity wizard - now uses AddIdentityFlow with mode selection if (flow.name === 'identity-wizard') { return ( @@ -513,10 +553,10 @@ export function AddFlow(props: AddFlowProps) { ); } - // Policy wizard - picker for policy engine vs policy, then wizard - if (flow.name === 'policy-wizard') { + // Online insights wizard + if (flow.name === 'online-insights-wizard') { return ( - setFlow({ name: 'select' })} @@ -526,10 +566,10 @@ export function AddFlow(props: AddFlowProps) { ); } - // Dataset wizard - if (flow.name === 'dataset-wizard') { + // Policy wizard - picker for policy engine vs policy, then wizard + if (flow.name === 'policy-wizard') { return ( - setFlow({ name: 'select' })} @@ -539,10 +579,10 @@ export function AddFlow(props: AddFlowProps) { ); } - // Configuration bundle wizard - if (flow.name === 'config-bundle-wizard') { + // Dataset wizard + if (flow.name === 'dataset-wizard') { return ( - setFlow({ name: 'select' })} @@ -552,10 +592,10 @@ export function AddFlow(props: AddFlowProps) { ); } - // AB test wizard - if (flow.name === 'ab-test-wizard') { + // Configuration bundle wizard + if (flow.name === 'config-bundle-wizard') { return ( - setFlow({ name: 'select' })} diff --git a/src/cli/tui/screens/add/AddScreen.tsx b/src/cli/tui/screens/add/AddScreen.tsx index a05db1ee1..aca3e59a8 100644 --- a/src/cli/tui/screens/add/AddScreen.tsx +++ b/src/cli/tui/screens/add/AddScreen.tsx @@ -1,4 +1,4 @@ -import { isPreviewEnabled } from '../../../feature-flags'; +import { isGatedFeaturesEnabled, isPreviewEnabled } from '../../../feature-flags'; import type { SelectableItem } from '../../components'; import { SelectScreen } from '../../components'; @@ -6,15 +6,17 @@ export type AddResourceType = | 'harness' | 'agent' | 'memory' + | 'knowledge-base' + | 'web-search' | 'credential' | 'evaluator' | 'online-eval' + | 'online-insights' | 'gateway' | 'gateway-target' | 'runtime-endpoint' | 'policy' | 'config-bundle' - | 'ab-test' | 'dataset' | 'payment-manager' | 'payment-connector'; @@ -22,20 +24,22 @@ export type AddResourceType = const BASE_ADD_RESOURCES: { id: AddResourceType; title: string; description: string }[] = [ { id: 'agent', title: 'Agent', description: 'Deploy an HTTP, MCP, A2A, or AG-UI agent' }, { id: 'memory', title: 'Memory', description: 'Persistent context storage' }, + { id: 'knowledge-base', title: 'Knowledge Base', description: 'Create a managed knowledge base for retrieval' }, + { id: 'web-search', title: 'Web Search', description: 'Wire the Amazon Web Search managed connector to a gateway' }, { id: 'credential', title: 'Credential', description: 'API key credential providers' }, { id: 'evaluator', title: 'Evaluator', description: 'Custom LLM-as-a-Judge evaluator' }, { id: 'online-eval', title: 'Online Eval Config', description: 'Continuous evaluation pipeline' }, + { id: 'online-insights', title: 'Online Insights [preview]', description: 'Continuous failure analysis pipeline' }, { id: 'gateway', title: 'Gateway', description: 'Route and manage gateway targets' }, { id: 'gateway-target', title: 'Gateway Target', description: 'Extend agent capabilities' }, { id: 'runtime-endpoint', title: 'Runtime Endpoint', description: 'Named endpoint for a runtime' }, { id: 'policy', title: 'Policy', description: 'Cedar policies for gateway tools' }, { id: 'dataset', title: 'Dataset', description: 'Evaluation dataset for testing agents' }, - { id: 'config-bundle', title: 'Configuration Bundle [preview]', description: 'Versioned component configurations' }, - { id: 'ab-test', title: 'AB Test [preview]', description: 'Compare agent configurations with traffic splitting' }, - { id: 'payment-manager', title: 'Payment Manager', description: 'x402 crypto microtransactions config' }, + { id: 'config-bundle', title: 'Configuration Bundle', description: 'Versioned component configurations' }, + { id: 'payment-manager', title: 'Payment Manager [preview]', description: 'x402 crypto microtransactions config' }, { id: 'payment-connector', - title: 'Payment Connector', + title: 'Payment Connector [preview]', description: 'Link payment provider credentials to a manager', }, ]; @@ -47,11 +51,14 @@ const ADD_RESOURCES: { id: AddResourceType; title: string; description: string } ...BASE_ADD_RESOURCES, ]; -const ADD_RESOURCE_ITEMS: SelectableItem[] = ADD_RESOURCES.map(r => ({ - ...r, - disabled: false, - description: r.description, -})); +const ADD_RESOURCE_ITEMS: SelectableItem[] = ADD_RESOURCES.map(r => { + const gated = (r.id === 'knowledge-base' || r.id === 'web-search') && !isGatedFeaturesEnabled(); + return { + ...r, + disabled: gated, + description: gated ? 'Coming soon' : r.description, + }; +}); interface AddScreenProps { onSelect: (resourceType: AddResourceType) => void; @@ -65,7 +72,12 @@ export function AddScreen({ onSelect, onExit }: AddScreenProps) { onSelect(item.id as AddResourceType)} + onSelect={item => { + // Safe: ADD_RESOURCE_ITEMS is built from ADD_RESOURCES whose ids are + // typed as AddResourceType. + const resource = ADD_RESOURCES.find(r => r.id === item.id); + if (resource) onSelect(resource.id); + }} onExit={onExit} isDisabled={isDisabled} /> diff --git a/src/cli/tui/screens/add/__tests__/AddScreen.test.tsx b/src/cli/tui/screens/add/__tests__/AddScreen.test.tsx index d1592e14f..7ca2a3cb7 100644 --- a/src/cli/tui/screens/add/__tests__/AddScreen.test.tsx +++ b/src/cli/tui/screens/add/__tests__/AddScreen.test.tsx @@ -1,9 +1,18 @@ import { AddScreen } from '../AddScreen.js'; import { render } from 'ink-testing-library'; import React from 'react'; -import { describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; describe('AddScreen', () => { + const originalGate = process.env.ENABLE_GATED_FEATURES; + beforeEach(() => { + process.env.ENABLE_GATED_FEATURES = '1'; + }); + afterEach(() => { + if (originalGate === undefined) delete process.env.ENABLE_GATED_FEATURES; + else process.env.ENABLE_GATED_FEATURES = originalGate; + }); + it('gateway and gateway-target options are present and not disabled', () => { const onSelect = vi.fn(); const onExit = vi.fn(); @@ -23,4 +32,15 @@ describe('AddScreen', () => { expect(lastFrame()).toContain('Payment Manager'); expect(lastFrame()).toContain('Payment Connector'); }); + + it('Web Search option shows Coming soon when ENABLE_GATED_FEATURES is unset', () => { + delete process.env.ENABLE_GATED_FEATURES; + const onSelect = vi.fn(); + const onExit = vi.fn(); + + const { lastFrame } = render(); + + expect(lastFrame()).toContain('Web Search'); + expect(lastFrame()).toContain('Coming soon'); + }); }); diff --git a/src/cli/tui/screens/agent/AddAgentScreen.tsx b/src/cli/tui/screens/agent/AddAgentScreen.tsx index 757af7e9c..ad52b60f1 100644 --- a/src/cli/tui/screens/agent/AddAgentScreen.tsx +++ b/src/cli/tui/screens/agent/AddAgentScreen.tsx @@ -599,7 +599,10 @@ export function AddAgentScreen({ existingAgentNames, onComplete, onExit }: AddAg resetByoFilesystemState(); } // Config bundle has no sub-steps — set flag immediately - setByoConfig(c => ({ ...c, withConfigBundle: selected.has('configBundle') || undefined })); + setByoConfig(c => ({ + ...c, + withConfigBundle: selected.has('configBundle') || undefined, + })); // Navigate to first advanced sub-step (steps memo hasn't updated yet) setTimeout(() => { if (selected.has('dockerfile') && byoConfig.buildType === 'Container') { diff --git a/src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx b/src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx index 476336dd0..88428161e 100644 --- a/src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx +++ b/src/cli/tui/screens/config-bundle-hub/ConfigBundleHubScreen.tsx @@ -46,7 +46,7 @@ export function ConfigBundleHubScreen({ onSelectBundle, onExit }: ConfigBundleHu if (isLoading) { return ( - + Loading configuration bundles... ); @@ -54,7 +54,7 @@ export function ConfigBundleHubScreen({ onSelectBundle, onExit }: ConfigBundleHu if (error) { return ( - + Error: {error} ); @@ -62,7 +62,7 @@ export function ConfigBundleHubScreen({ onSelectBundle, onExit }: ConfigBundleHu if (bundles.length === 0) { return ( - + No configuration bundles found. Use `agentcore add config-bundle` to create one, then deploy. @@ -81,7 +81,7 @@ export function ConfigBundleHubScreen({ onSelectBundle, onExit }: ConfigBundleHu return ( Description: {bundle.description} )} + {bundle.createdAt && ( + + {' '} + Created: {formatRelativeTime(String(bundle.createdAt))} + + )} {bundle.lastUpdated && ( {' '} diff --git a/src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts b/src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts index c0276ae53..5cf7ca89b 100644 --- a/src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts +++ b/src/cli/tui/screens/config-bundle-hub/useConfigBundleHub.ts @@ -18,6 +18,7 @@ export interface BundleWithMeta { description?: string; versionCount: number; branches: string[]; + createdAt?: number; lastUpdated?: string; } @@ -73,10 +74,20 @@ export function useConfigBundleHub(): ConfigBundleHubState { const deployedBundles = Object.values(deployedState.targets).find(t => t.resources?.configBundles)?.resources?.configBundles ?? {}; + // Fetch all bundles from the list API once to get createdAt for each + let allBundlesList: Awaited> | undefined; + try { + allBundlesList = await listConfigurationBundles({ region: resolvedRegion, maxResults: 100 }); + } catch { + // Non-critical — continue without createdAt + } + // Build bundle list from project config, enriching with deployed version info const enriched = await Promise.all( projectBundles.map(async (bundleSpec): Promise => { const deployed = deployedBundles[bundleSpec.name]; + const nameVariants = getBundleNameVariants(bundleSpec.name, projectSpec.name); + const listMatch = allBundlesList?.bundles.find(b => nameVariants.includes(b.bundleName)); if (!deployed) { // Not yet deployed — show from project config only return { @@ -86,6 +97,7 @@ export function useConfigBundleHub(): ConfigBundleHubState { description: bundleSpec.description, versionCount: 0, branches: bundleSpec.branchName ? [bundleSpec.branchName] : [], + createdAt: listMatch?.createdAt, }; } @@ -113,12 +125,14 @@ export function useConfigBundleHub(): ConfigBundleHubState { description: bundleSpec.description, versionCount: versions.versions.length, branches: [...branchSet], + createdAt: listMatch?.createdAt, lastUpdated: latestTs || undefined, }; } catch { // Stale deployed-state ID — try to resolve via list API try { - const allBundles = await listConfigurationBundles({ region: resolvedRegion, maxResults: 100 }); + const allBundles = + allBundlesList ?? (await listConfigurationBundles({ region: resolvedRegion, maxResults: 100 })); const nameVariants = getBundleNameVariants(bundleSpec.name, projectSpec.name); const match = allBundles.bundles.find(b => nameVariants.includes(b.bundleName)); if (match) { @@ -142,6 +156,7 @@ export function useConfigBundleHub(): ConfigBundleHubState { description: bundleSpec.description, versionCount: versions.versions.length, branches: [...branchSet], + createdAt: match.createdAt, lastUpdated: latestTs || undefined, }; } diff --git a/src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx b/src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx index 2ccc9fab2..511265ece 100644 --- a/src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx +++ b/src/cli/tui/screens/config-bundle/AddConfigBundleFlow.tsx @@ -50,7 +50,7 @@ export function AddConfigBundleFlow({ deployedArns.add(name); } } - const httpGateways = target.resources?.httpGateways; + const httpGateways = target.resources?.gateways; if (httpGateways) { for (const [name, state] of Object.entries(httpGateways)) { components.push({ name, arn: state.gatewayArn, type: 'gateway' }); @@ -75,7 +75,7 @@ export function AddConfigBundleFlow({ }); } } - for (const gw of projectSpec.httpGateways ?? []) { + for (const gw of (projectSpec.agentCoreGateways ?? []).filter(g => g.protocolType === 'None')) { if (!deployedArns.has(gw.name)) { components.push({ name: gw.name, diff --git a/src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx b/src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx index 47d33ddf5..aab3b555a 100644 --- a/src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx +++ b/src/cli/tui/screens/config-bundle/AddConfigBundleScreen.tsx @@ -4,6 +4,7 @@ import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } import { HELP_TEXT } from '../../constants'; import { useListNavigation } from '../../hooks'; import { generateUniqueName } from '../../utils'; +import { COMPONENT_KEY_ERROR, COMPONENT_KEY_PATTERN } from './constants'; import type { AddConfigBundleConfig, ComponentType, DeployedComponent } from './types'; import { COMPONENT_TYPE_OPTIONS, CONFIG_BUNDLE_STEP_LABELS } from './types'; import { useAddConfigBundleWizard } from './useAddConfigBundleWizard'; @@ -17,6 +18,10 @@ interface AddConfigBundleScreenProps { deployedComponents: DeployedComponent[]; } +function validateComponentArn(value: string): string | true { + return COMPONENT_KEY_PATTERN.test(value) || COMPONENT_KEY_ERROR; +} + function validateConfigJson(value: string): string | true { try { const parsed: unknown = JSON.parse(value); @@ -71,6 +76,7 @@ export function AddConfigBundleScreen({ const isDescriptionStep = wizard.step === 'description'; const isComponentTypeStep = wizard.step === 'componentType'; const isComponentSelectStep = wizard.step === 'componentSelect'; + const isComponentArnEntryStep = wizard.step === 'componentArnEntry'; const isConfigurationStep = wizard.step === 'configuration'; const isAddAnotherStep = wizard.step === 'addAnother'; const isBranchNameStep = wizard.step === 'branchName'; @@ -185,6 +191,24 @@ export function AddConfigBundleScreen({ )} + {isComponentArnEntryStep && ( + <> + + Enter the component ARN + The resource this configuration applies to (e.g. a gateway target). + + wizard.goBack()} + customValidation={validateComponentArn} + /> + + )} + {isConfigurationStep && ( <> diff --git a/src/cli/tui/screens/config-bundle/__tests__/useAddConfigBundleWizard.test.tsx b/src/cli/tui/screens/config-bundle/__tests__/useAddConfigBundleWizard.test.tsx new file mode 100644 index 000000000..198c22f5c --- /dev/null +++ b/src/cli/tui/screens/config-bundle/__tests__/useAddConfigBundleWizard.test.tsx @@ -0,0 +1,167 @@ +import { COMPONENT_KEY_PATTERN } from '../constants'; +import { useAddConfigBundleWizard } from '../useAddConfigBundleWizard'; +import { Text } from 'ink'; +import { render } from 'ink-testing-library'; +import React, { act, useImperativeHandle } from 'react'; +import { describe, expect, it } from 'vitest'; + +type WizardReturn = ReturnType; + +interface HarnessHandle { + wizard: WizardReturn; +} + +const Harness = React.forwardRef((_props, ref) => { + const wizard = useAddConfigBundleWizard(); + useImperativeHandle(ref, () => ({ wizard })); + return step:{wizard.step}; +}); +Harness.displayName = 'Harness'; + +function setup() { + const ref = React.createRef(); + const result = render(); + return { ref, ...result }; +} + +/** Drive the wizard forward to the addAnother step with one component configured. */ +function advanceToAddAnother(ref: React.RefObject) { + act(() => ref.current!.wizard.setName('myBundle')); + act(() => ref.current!.wizard.setDescription('desc')); + act(() => ref.current!.wizard.setComponentType('runtime')); + act(() => ref.current!.wizard.setSelectedComponent('arn:aws:runtime/r1')); + act(() => ref.current!.wizard.setConfiguration({ systemPrompt: 'hi' })); +} + +describe('useAddConfigBundleWizard — add-another back-navigation (BUG TUI-B)', () => { + it('reaches addAnother after configuring one component', () => { + const { ref } = setup(); + advanceToAddAnother(ref); + expect(ref.current!.wizard.step).toBe('addAnother'); + }); + + it('back from a re-entered componentType returns to addAnother, not description', () => { + const { ref } = setup(); + advanceToAddAnother(ref); + + // User chooses "add another component" → re-enters componentType. + act(() => ref.current!.wizard.addAnotherComponent()); + expect(ref.current!.wizard.step).toBe('componentType'); + + // Backing out must return to the addAnother decision point (where "Continue" lives), + // NOT fall through the linear order to `description` (which would strip the Continue path). + act(() => ref.current!.wizard.goBack()); + expect(ref.current!.wizard.step).toBe('addAnother'); + + // And the already-configured component is preserved. + expect(Object.keys(ref.current!.wizard.config.components)).toHaveLength(1); + }); + + it('back from re-entered componentSelect returns to componentType, then to addAnother', () => { + const { ref } = setup(); + advanceToAddAnother(ref); + act(() => ref.current!.wizard.addAnotherComponent()); + act(() => ref.current!.wizard.setComponentType('runtime')); + expect(ref.current!.wizard.step).toBe('componentSelect'); + + act(() => ref.current!.wizard.goBack()); + expect(ref.current!.wizard.step).toBe('componentType'); + act(() => ref.current!.wizard.goBack()); + expect(ref.current!.wizard.step).toBe('addAnother'); + }); + + it('doneAddingComponents advances past components and clears the loop flag', () => { + const { ref } = setup(); + advanceToAddAnother(ref); + act(() => ref.current!.wizard.doneAddingComponents()); + // When ENABLE_GATED_FEATURES is off, branchName is skipped (defaults to mainline) + const expectedStep = process.env.ENABLE_GATED_FEATURES === '1' ? 'branchName' : 'commitMessage'; + expect(ref.current!.wizard.step).toBe(expectedStep); + + // Back from the current step follows the linear order, not the loop guard. + act(() => ref.current!.wizard.goBack()); + const expectedBackStep = process.env.ENABLE_GATED_FEATURES === '1' ? 'addAnother' : 'branchName'; + expect(ref.current!.wizard.step).toBe(expectedBackStep); + }); + + it('first-pass back-navigation is unaffected (componentType → description)', () => { + const { ref } = setup(); + act(() => ref.current!.wizard.setName('myBundle')); + act(() => ref.current!.wizard.setDescription('desc')); + expect(ref.current!.wizard.step).toBe('componentType'); + act(() => ref.current!.wizard.goBack()); + expect(ref.current!.wizard.step).toBe('description'); + }); +}); + +describe('useAddConfigBundleWizard — custom ARN component (Part 1)', () => { + /** Drive the wizard to the componentType step. */ + function advanceToComponentType(ref: React.RefObject) { + act(() => ref.current!.wizard.setName('myBundle')); + act(() => ref.current!.wizard.setDescription('desc')); + } + + const CUSTOM_ARN = 'arn:aws:bedrock-agentcore:us-west-2:123456789012:gateway-target/orders-Tg9xK2'; + + it('selecting custom routes componentType → componentArnEntry (not componentSelect)', () => { + const { ref } = setup(); + advanceToComponentType(ref); + act(() => ref.current!.wizard.setComponentType('custom')); + expect(ref.current!.wizard.step).toBe('componentArnEntry'); + expect(ref.current!.wizard.config.currentComponentType).toBe('custom'); + }); + + it('a pattern-valid ARN advances to configuration and is stored verbatim as currentComponentArn', () => { + const { ref } = setup(); + advanceToComponentType(ref); + act(() => ref.current!.wizard.setComponentType('custom')); + act(() => ref.current!.wizard.setCustomArn(CUSTOM_ARN)); + expect(ref.current!.wizard.step).toBe('configuration'); + expect(ref.current!.wizard.config.currentComponentArn).toBe(CUSTOM_ARN); + }); + + it('the custom component lands in config.components under the literal ARN key', () => { + const { ref } = setup(); + advanceToComponentType(ref); + act(() => ref.current!.wizard.setComponentType('custom')); + act(() => ref.current!.wizard.setCustomArn(CUSTOM_ARN)); + act(() => ref.current!.wizard.setConfiguration({ systemPrompt: 'hi' })); + expect(ref.current!.wizard.step).toBe('addAnother'); + expect(Object.keys(ref.current!.wizard.config.components)).toContain(CUSTOM_ARN); + expect(ref.current!.wizard.config.components[CUSTOM_ARN]).toEqual({ + configuration: { systemPrompt: 'hi' }, + }); + }); + + it('goBack from componentArnEntry returns to componentType', () => { + const { ref } = setup(); + advanceToComponentType(ref); + act(() => ref.current!.wizard.setComponentType('custom')); + expect(ref.current!.wizard.step).toBe('componentArnEntry'); + act(() => ref.current!.wizard.goBack()); + expect(ref.current!.wizard.step).toBe('componentType'); + }); + + it('runtime/gateway types still route to componentSelect (regression)', () => { + const { ref } = setup(); + advanceToComponentType(ref); + act(() => ref.current!.wizard.setComponentType('runtime')); + expect(ref.current!.wizard.step).toBe('componentSelect'); + + act(() => ref.current!.wizard.goBack()); + expect(ref.current!.wizard.step).toBe('componentType'); + act(() => ref.current!.wizard.setComponentType('gateway')); + expect(ref.current!.wizard.step).toBe('componentSelect'); + }); + + it('COMPONENT_KEY_PATTERN accepts ARNs and non-ARN identifiers, rejects placeholders/spaces/over-length', () => { + // Accept any pattern-valid string — an arn: prefix is NOT required (DECIDED). + expect(COMPONENT_KEY_PATTERN.test(CUSTOM_ARN)).toBe(true); + expect(COMPONENT_KEY_PATTERN.test('myComponentKey')).toBe(true); + // Reject placeholders, spaces, and over-length (>2048 chars). + expect(COMPONENT_KEY_PATTERN.test('{{runtime:MyAgent}}')).toBe(false); + expect(COMPONENT_KEY_PATTERN.test('not a valid arn!!')).toBe(false); + expect(COMPONENT_KEY_PATTERN.test('a'.repeat(2049))).toBe(false); + expect(COMPONENT_KEY_PATTERN.test('a'.repeat(2048))).toBe(true); + }); +}); diff --git a/src/cli/tui/screens/config-bundle/constants.ts b/src/cli/tui/screens/config-bundle/constants.ts new file mode 100644 index 000000000..0356545c8 --- /dev/null +++ b/src/cli/tui/screens/config-bundle/constants.ts @@ -0,0 +1,15 @@ +// ───────────────────────────────────────────────────────────────────────────── +// Config Bundle Wizard Constants +// ───────────────────────────────────────────────────────────────────────────── + +/** + * Service/CFN key pattern for a config-bundle component identifier. Mirrors + * `aws-bedrockagentcore-configurationbundle.json` `Components.patternProperties`. + * Any ARN qualifies, but an `arn:` prefix is NOT required — the service accepts + * any pattern-valid string (max 2048 chars). Rejects `{{...}}` placeholders, + * spaces, and over-length input — exactly what CloudFormation rejects. + */ +export const COMPONENT_KEY_PATTERN = /^[a-zA-Z][a-zA-Z0-9_:/.-]{0,2047}$/; + +/** Inline error shown when a custom component identifier fails {@link COMPONENT_KEY_PATTERN}. */ +export const COMPONENT_KEY_ERROR = 'Must be a valid component identifier (an ARN, max 2048 chars).'; diff --git a/src/cli/tui/screens/config-bundle/types.ts b/src/cli/tui/screens/config-bundle/types.ts index dba1ba4e7..aed0c56b9 100644 --- a/src/cli/tui/screens/config-bundle/types.ts +++ b/src/cli/tui/screens/config-bundle/types.ts @@ -9,13 +9,14 @@ export type AddConfigBundleStep = | 'description' | 'componentType' | 'componentSelect' + | 'componentArnEntry' | 'configuration' | 'addAnother' | 'branchName' | 'commitMessage' | 'confirm'; -export type ComponentType = 'runtime' | 'gateway'; +export type ComponentType = 'runtime' | 'gateway' | 'custom'; export interface DeployedComponent { name: string; @@ -44,6 +45,7 @@ export const CONFIG_BUNDLE_STEP_LABELS: Record = { description: 'Description', componentType: 'Type', componentSelect: 'Component', + componentArnEntry: 'ARN', configuration: 'Config', addAnother: 'More?', branchName: 'Branch', @@ -54,4 +56,5 @@ export const CONFIG_BUNDLE_STEP_LABELS: Record = { export const COMPONENT_TYPE_OPTIONS = [ { id: 'runtime', title: 'Agent Runtime', description: 'Configure an agent runtime' }, { id: 'gateway', title: 'HTTP Gateway', description: 'Configure an HTTP gateway' }, + { id: 'custom', title: 'Other (ARN)', description: 'Enter any component ARN (gateway target, etc.)' }, ] as const; diff --git a/src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts b/src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts index daa79173b..96411db2f 100644 --- a/src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts +++ b/src/cli/tui/screens/config-bundle/useAddConfigBundleWizard.ts @@ -1,4 +1,5 @@ import type { ComponentConfigurationMap } from '../../../../schema'; +import { isGatedFeaturesEnabled } from '../../../feature-flags'; import type { AddConfigBundleConfig, AddConfigBundleStep, ComponentType } from './types'; import { useCallback, useState } from 'react'; @@ -7,6 +8,7 @@ const ALL_STEPS: AddConfigBundleStep[] = [ 'description', 'componentType', 'componentSelect', + 'componentArnEntry', 'configuration', 'addAnother', 'branchName', @@ -28,13 +30,31 @@ function getDefaultConfig(): AddConfigBundleConfig { export function useAddConfigBundleWizard() { const [config, setConfig] = useState(getDefaultConfig); const [step, setStep] = useState('name'); + // True when the component-type/select steps were re-entered from the "add another?" loop, + // so back-navigation returns to the addAnother decision point (which holds "Continue" → + // branchName) instead of falling through the linear order back to `description`. Without this + // the user gets trapped: backing out of a second component drops all progress. + const [inAddAnotherLoop, setInAddAnotherLoop] = useState(false); const currentIndex = ALL_STEPS.indexOf(step); const goBack = useCallback(() => { + // The component picker (componentSelect) and the custom-ARN entry (componentArnEntry) are + // mutually exclusive branches off componentType — both return to componentType. + if (step === 'componentSelect' || step === 'componentArnEntry') { + setStep('componentType'); + return; + } + // If we're mid "add another component" loop, componentType must return to the addAnother step + // (where Continue lives), not to the linear previous step. + if (inAddAnotherLoop && step === 'componentType') { + setInAddAnotherLoop(false); + setStep('addAnother'); + return; + } const prevStep = ALL_STEPS[currentIndex - 1]; if (prevStep) setStep(prevStep); - }, [currentIndex]); + }, [currentIndex, inAddAnotherLoop, step]); const setName = useCallback((name: string) => { setConfig(c => ({ ...c, name })); @@ -48,7 +68,8 @@ export function useAddConfigBundleWizard() { const setComponentType = useCallback((componentType: ComponentType) => { setConfig(c => ({ ...c, currentComponentType: componentType, currentComponentArn: undefined })); - setStep('componentSelect'); + // Custom components are keyed by a free-text ARN; runtime/gateway pick from deployed resources. + setStep(componentType === 'custom' ? 'componentArnEntry' : 'componentSelect'); }, []); const setSelectedComponent = useCallback((arn: string) => { @@ -56,6 +77,11 @@ export function useAddConfigBundleWizard() { setStep('configuration'); }, []); + const setCustomArn = useCallback((arn: string) => { + setConfig(c => ({ ...c, currentComponentArn: arn })); + setStep('configuration'); + }, []); + const setConfiguration = useCallback((configuration: Record) => { setConfig(c => { const arn = c.currentComponentArn; @@ -71,11 +97,17 @@ export function useAddConfigBundleWizard() { const addAnotherComponent = useCallback(() => { setConfig(c => ({ ...c, currentComponentType: undefined, currentComponentArn: undefined })); + setInAddAnotherLoop(true); setStep('componentType'); }, []); const doneAddingComponents = useCallback(() => { - setStep('branchName'); + setInAddAnotherLoop(false); + if (isGatedFeaturesEnabled()) { + setStep('branchName'); + } else { + setStep('commitMessage'); + } }, []); const setBranchName = useCallback((branchName: string) => { @@ -90,6 +122,7 @@ export function useAddConfigBundleWizard() { const reset = useCallback(() => { setConfig(getDefaultConfig()); + setInAddAnotherLoop(false); setStep('name'); }, []); @@ -103,6 +136,7 @@ export function useAddConfigBundleWizard() { setDescription, setComponentType, setSelectedComponent, + setCustomArn, setConfiguration, addAnotherComponent, doneAddingComponents, diff --git a/src/cli/tui/screens/create/CreateScreen.tsx b/src/cli/tui/screens/create/CreateScreen.tsx index ac3ea6db6..d0fe55294 100644 --- a/src/cli/tui/screens/create/CreateScreen.tsx +++ b/src/cli/tui/screens/create/CreateScreen.tsx @@ -259,7 +259,7 @@ export function CreateScreen({ cwd, isInteractive, onExit, onNavigate }: CreateS const preview = isPreviewEnabled(); // Completion state for next steps - const allSuccess = !flow.hasError && flow.isComplete; + const allSuccess = !flow.hasError && flow.isComplete && flow.phase === 'complete'; // Handle exit - if successful, exit app completely and print completion screen const handleExit = useCallback(() => { diff --git a/src/cli/tui/screens/create/useCreateFlow.ts b/src/cli/tui/screens/create/useCreateFlow.ts index c76fab8ec..146498b12 100644 --- a/src/cli/tui/screens/create/useCreateFlow.ts +++ b/src/cli/tui/screens/create/useCreateFlow.ts @@ -671,6 +671,7 @@ export function useCreateFlow(cwd: string): CreateFlowState { mcpName: addHarnessConfig.mcpName, mcpUrl: addHarnessConfig.mcpUrl, gatewayArn: addHarnessConfig.gatewayArn, + skills: addHarnessConfig.skills, authorizerType: addHarnessConfig.authorizerType, jwtConfig: addHarnessConfig.jwtConfig ? { diff --git a/src/cli/tui/screens/deploy/DeployScreen.tsx b/src/cli/tui/screens/deploy/DeployScreen.tsx index 828aec38b..1c7a9c65e 100644 --- a/src/cli/tui/screens/deploy/DeployScreen.tsx +++ b/src/cli/tui/screens/deploy/DeployScreen.tsx @@ -76,6 +76,7 @@ export function DeployScreen({ diffSummaries, numStacksWithChanges, deployNotes, + managedMemoryNotice, postDeployWarnings, postDeployHasError, isDiffLoading, @@ -334,6 +335,14 @@ export function DeployScreen({ <> + {/* Managed-memory heads-up: shown while the slow CFN apply runs (not gated on success). + Styled as a plain dim "Note:" to match the transaction-search note convention below. */} + {managedMemoryNotice && !isComplete && ( + + Note: {managedMemoryNotice} + + )} + {/* Toggleable ResourceGraph view */} {showResourceGraph && context && ( diff --git a/src/cli/tui/screens/deploy/useDeployFlow.ts b/src/cli/tui/screens/deploy/useDeployFlow.ts index 187f50ba7..a5a1ebc97 100644 --- a/src/cli/tui/screens/deploy/useDeployFlow.ts +++ b/src/cli/tui/screens/deploy/useDeployFlow.ts @@ -1,39 +1,39 @@ import { ConfigIO } from '../../../../lib'; -import type { DeployedState, HarnessDeployedState } from '../../../../schema'; import type { CdkToolkitWrapper, DeployMessage, SwitchableIoHost } from '../../../cdk/toolkit-lib'; import { buildDeployedState, getStackOutputs, parseAgentOutputs, + parseConfigBundleOutputs, parseDatasetOutputs, parseEvaluatorOutputs, parseGatewayOutputs, + parseHarnessOutputs, + parseKnowledgeBaseOutputs, parseMemoryOutputs, parseOnlineEvalOutputs, parsePaymentOutputs, parsePolicyEngineOutputs, parsePolicyOutputs, + parseRuntimeEndpointOutputs, } from '../../../cloudformation'; import { DEFAULT_DEPLOY_ATTRS, computeDeployAttrs } from '../../../commands/deploy/utils.js'; import { getErrorMessage, isChangesetInProgressError, isExpiredTokenError } from '../../../errors'; import { isPreviewEnabled } from '../../../feature-flags'; import { ExecLogger } from '../../../logging'; import { + MANAGED_MEMORY_DEPLOY_NOTICE, cleanupPaymentCredentialProviders, + hasManagedMemoryHarness, performStackTeardown, setupTransactionSearch, } from '../../../operations/deploy'; import { computeProjectDeployHash } from '../../../operations/deploy/change-detection'; import { getGatewayTargetStatuses } from '../../../operations/deploy/gateway-status'; -import { createDeploymentManager } from '../../../operations/deploy/imperative'; -import { deleteOrphanedABTests, setupABTests } from '../../../operations/deploy/post-deploy-ab-tests'; -import { - resolveConfigBundleComponentKeys, - setupConfigBundles, -} from '../../../operations/deploy/post-deploy-config-bundles'; import { syncDatasets } from '../../../operations/deploy/post-deploy-datasets'; -import { setupHttpGateways } from '../../../operations/deploy/post-deploy-http-gateways'; +import { autoIngestKnowledgeBases } from '../../../operations/deploy/post-deploy-knowledge-bases'; import { enableOnlineEvalConfigs } from '../../../operations/deploy/post-deploy-online-evals'; +import { hydrateKnowledgeBaseDataSources } from '../../../operations/knowledge-base/hydrate-data-sources'; import { withCommandRunTelemetry } from '../../../telemetry/cli-command-run.js'; import { type StackDiffSummary, @@ -103,6 +103,8 @@ interface DeployFlowState { numStacksWithChanges?: number; /** Notes to display after successful deploy (e.g., transaction search info) */ deployNotes: string[]; + /** Managed-memory heads-up, shown while the CFN apply runs (null when not applicable) */ + managedMemoryNotice: string | null; /** Warnings from post-deploy steps (config bundles, AB tests) */ postDeployWarnings: string[]; /** True if any post-deploy sub-resource operation had errors */ @@ -152,6 +154,28 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState }); const [publishAssetsStep, setPublishAssetsStep] = useState({ label: 'Publish assets', status: 'pending' }); const [deployStep, setDeployStep] = useState({ label: 'Deploy to AWS', status: 'pending' }); + const [persistStateStep, setPersistStateStep] = useState({ + label: 'Persist deployment state', + status: 'pending', + }); + // Whether the hydrate-KB step needs to run for this deploy. False (the + // common case) when every KB had its `dataSources[]` already populated by + // the per-DS CFN outputs the L3 emits since #234 — the persist step did + // the work and hydrate would be a pure no-op. We hide the step from the + // visible list in that case so the user doesn't see a phantom phase. Set + // by the deploy-time code right before the hydrate call (after the parse + // step exposes which KBs came back with empty dataSources[]). + const [needsKbHydration, setNeedsKbHydration] = useState(false); + const [hydrateKbStep, setHydrateKbStep] = useState({ + label: 'Hydrate knowledge base data sources', + status: 'pending', + }); + const [autoIngestStep, setAutoIngestStep] = useState({ + label: 'Auto-ingest knowledge bases', + status: 'pending', + }); + const [datasetSyncStep, setDatasetSyncStep] = useState({ label: 'Sync datasets', status: 'pending' }); + const [onlineEvalStep, setOnlineEvalStep] = useState({ label: 'Enable online evaluation', status: 'pending' }); const [diffStep, setDiffStep] = useState({ label: 'Run CDK diff', status: 'pending' }); const [diffSummaries, setDiffSummaries] = useState([]); const [numStacksWithChanges, setNumStacksWithChanges] = useState(); @@ -162,6 +186,9 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState const isDiffRunningRef = useRef(false); const [deployOutput, setDeployOutput] = useState(null); const [deployMessages, setDeployMessages] = useState([]); + // Managed-memory heads-up: shown WHILE the slow CFN apply runs (not gated on success like + // deployNotes), because explaining the 3-5 min memory provisioning is the whole point. + const [managedMemoryNotice, setManagedMemoryNotice] = useState(null); const [stackOutputs, setStackOutputs] = useState>({}); const [targetStatuses, setTargetStatuses] = useState<{ name: string; status: string }[]>([]); const [shouldStartDeploy, setShouldStartDeploy] = useState(false); @@ -177,6 +204,14 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState setPreDeployDiffStep({ label: 'Computing diff changes...', status: 'pending' }); setPublishAssetsStep({ label: 'Publish assets', status: 'pending' }); setDeployStep({ label: 'Deploy to AWS', status: 'pending' }); + setPersistStateStep({ label: 'Persist deployment state', status: 'pending' }); + setHydrateKbStep({ label: 'Hydrate knowledge base data sources', status: 'pending' }); + setNeedsKbHydration(false); + setAutoIngestStep({ label: 'Auto-ingest knowledge bases', status: 'pending' }); + setDatasetSyncStep({ label: 'Sync datasets', status: 'pending' }); + setOnlineEvalStep({ label: 'Enable online evaluation', status: 'pending' }); + setPostDeployHasError(false); + setPostDeployWarnings([]); setDeployOutput(null); setHasTokenExpiredError(false); // Reset token expired state when retrying setHasStartedCfn(false); @@ -233,6 +268,9 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState if (!ctx || !currentStackName || !target) return; + setPersistStateStep(prev => ({ ...prev, status: 'running' })); + logger.startStep('Persist deployment state'); + const configIO = new ConfigIO(); const agentNames = ctx.projectSpec.runtimes?.map((a: { name: string }) => a.name) || []; @@ -324,6 +362,63 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState const datasetNames = (ctx.projectSpec.datasets ?? []).map((d: { name: string }) => d.name); const datasets = parseDatasetOutputs(outputs, datasetNames); + // Parse config bundle outputs + const configBundleNames = (ctx.projectSpec.configBundles ?? []).map((b: { name: string }) => b.name); + const configBundles = parseConfigBundleOutputs(outputs, configBundleNames); + + // Parse runtime endpoint outputs + const endpointSpecs: { agentName: string; endpointName: string }[] = []; + for (const runtime of ctx.projectSpec.runtimes ?? []) { + if (runtime.endpoints) { + for (const endpointName of Object.keys(runtime.endpoints)) { + endpointSpecs.push({ agentName: runtime.name, endpointName }); + } + } + } + const runtimeEndpoints = parseRuntimeEndpointOutputs(outputs, endpointSpecs); + + // Parse knowledge base outputs (CFN emits id+arn; per-DS outputs hydrate dataSources via getAtt('DataSourceId')). + const knowledgeBaseSpecs = ctx.projectSpec.knowledgeBases ?? []; + const knowledgeBaseNames = knowledgeBaseSpecs.map(kb => kb.name); + const knowledgeBases = parseKnowledgeBaseOutputs(outputs, knowledgeBaseNames); + + if (knowledgeBaseNames.length > 0 && Object.keys(knowledgeBases).length !== knowledgeBaseNames.length) { + logger.log( + `Deployed-state missing outputs for ${ + knowledgeBaseNames.length - Object.keys(knowledgeBases).length + } knowledge base(s).`, + 'warn' + ); + } + + // Hydrate dataSources[] for any KB whose CFN per-DS outputs were absent + // (older L3, before #234). With the current L3 the persist step has + // already filled `dataSources[]` from per-DS outputs — the hydrate + // function would short-circuit on every KB and the step would render as a + // pointless "running → success" flash. Skip it (and hide it from the + // visible step list) when nothing actually needs hydrating. + const kbsNeedingHydration = Object.values(knowledgeBases).filter(kb => kb.dataSources.length === 0); + if (kbsNeedingHydration.length > 0) { + setNeedsKbHydration(true); + setHydrateKbStep(prev => ({ ...prev, status: 'running' })); + logger.startStep('Hydrate knowledge base data sources'); + try { + await hydrateKnowledgeBaseDataSources({ + knowledgeBases, + knowledgeBaseSpecs, + region: target.region, + }); + logger.endStep('success'); + setHydrateKbStep(prev => ({ ...prev, status: 'success' })); + } catch (err) { + const msg = getErrorMessage(err); + logger.log(`Failed to hydrate knowledge base data sources: ${msg}`, 'warn'); + // Hydration failure is non-fatal — KBs are still deployed. + logger.endStep('success'); + setHydrateKbStep(prev => ({ ...prev, status: 'warn', warn: msg })); + } + } + // Expose outputs to UI setStackOutputs(outputs); @@ -353,36 +448,13 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState const existingState = await configIO.readDeployedState().catch(() => undefined); - // Post-CDK: deploy imperative resources (harness) — preview mode only - let deployedHarnesses: Record | undefined; - if (isPreviewEnabled()) { - const imperativeManager = createDeploymentManager(); - const imperativeDeployedState: DeployedState = existingState ?? { targets: {} }; - const imperativeContext = { - projectSpec: ctx.projectSpec, - target, - configIO, - deployedState: imperativeDeployedState, - cdkOutputs: outputs, - onProgress: (step: string, status: 'start' | 'done' | 'error') => { - logger.log(`${step}: ${status}`); - }, - }; - - if (imperativeManager.hasDeployersForPhase('post-cdk', imperativeContext)) { - logger.startStep('Deploy harnesses'); - const postCdkResult = await imperativeManager.runPhase('post-cdk', imperativeContext); - const harnessResult = postCdkResult.results.get('harness'); - if (harnessResult?.state) { - deployedHarnesses = harnessResult.state as Record; - } - if (!postCdkResult.success) { - logger.endStep('error', postCdkResult.error); - throw new Error(`Harness deployment failed: ${postCdkResult.error}`); - } - logger.endStep('success'); - } - } + // Parse harness outputs (harnesses are now part of the CloudFormation stack). + // Preview-gated to match the synth path: with preview off, bin/cdk.ts emits no harness + // resource/outputs, so skip parsing entirely (see toolkit-lib/wrapper.ts + bin/cdk.ts). + const harnessNames = isPreviewEnabled() + ? (ctx.projectSpec.harnesses ?? []).map((h: { name: string }) => h.name) + : []; + const deployedHarnesses = parseHarnessOutputs(outputs, harnessNames); let deployedState = buildDeployedState({ targetName: target.name, @@ -398,8 +470,12 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState policyEngines, policies, datasets, + configBundles, + runtimeEndpoints, + knowledgeBases, harnesses: deployedHarnesses, payments, + abTestNames: (ctx.projectSpec.abTests ?? []).map((t: { name: string }) => t.name), }); try { @@ -414,10 +490,81 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState await configIO.writeDeployedState(deployedState); + logger.endStep('success'); + setPersistStateStep(prev => ({ ...prev, status: 'success' })); + + // Post-deploy: auto-trigger ingestion for any KB whose data-source URIs + // changed since the last deploy (or has never been ingested before). + const knowledgeBaseSpecsForIngest = ctx.projectSpec.knowledgeBases ?? []; + if (knowledgeBaseSpecsForIngest.length > 0) { + setAutoIngestStep(prev => ({ ...prev, status: 'running' })); + logger.startStep('Auto-ingest knowledge bases'); + try { + const previousKnowledgeBases = existingState?.targets?.[target.name]?.resources?.knowledgeBases; + const ingestResult = await autoIngestKnowledgeBases({ + region: target.region, + knowledgeBases: knowledgeBaseSpecsForIngest, + deployedKnowledgeBases: deployedState.targets?.[target.name]?.resources?.knowledgeBases ?? {}, + previousKnowledgeBases, + targetName: target.name, + deployedState, + onProgress: msg => logger.log(msg), + }); + + // Persist new sourcesHash values for KBs whose ingestion fired. + const targetResources = deployedState.targets[target.name]?.resources; + if (targetResources?.knowledgeBases) { + for (const r of ingestResult.results) { + if (r.status === 'started' && r.newSourcesHash) { + const record = targetResources.knowledgeBases[r.knowledgeBaseName]; + if (record) record.sourcesHash = r.newSourcesHash; + } + } + await configIO.writeDeployedState(deployedState); + } + + // Log per-KB result so the user sees what happened. + for (const r of ingestResult.results) { + if (r.status === 'started') { + logger.log( + `Knowledge base "${r.knowledgeBaseName}": ingestion started for ${r.startedJobCount} data source(s)` + ); + } else if (r.status === 'skipped') { + logger.log(`Knowledge base "${r.knowledgeBaseName}": skipped (${r.reason})`); + } else { + logger.log(`Knowledge base "${r.knowledgeBaseName}": ${r.error}`, 'warn'); + setPostDeployWarnings(prev => [...prev, `Knowledge base "${r.knowledgeBaseName}": ${r.error}`]); + } + } + + logger.endStep(ingestResult.hasErrors ? 'error' : 'success'); + if (ingestResult.hasErrors) { + // Don't fail the deploy — KBs and DSes are valid CFN resources even if + // ingestion failed. The user retries via 'agentcore run ingest --name X'. + setPostDeployHasError(true); + setAutoIngestStep(prev => ({ + ...prev, + status: 'error', + error: 'One or more knowledge bases failed to ingest', + })); + } else { + setAutoIngestStep(prev => ({ ...prev, status: 'success' })); + } + } catch (err) { + const errMsg = getErrorMessage(err); + logger.endStep('error', errMsg); + setPostDeployHasError(true); + setPostDeployWarnings(prev => [...prev, `Knowledge base auto-ingest failed: ${errMsg}`]); + setAutoIngestStep(prev => ({ ...prev, status: 'error', error: errMsg })); + } + } + // Post-deploy: Sync dataset examples from local JSONL to service DRAFT. const datasetSpecs = ctx.projectSpec.datasets ?? []; const deployedDatasetsRecord = deployedState.targets?.[target.name]?.resources?.datasets ?? {}; if (datasetSpecs.length > 0 && Object.keys(deployedDatasetsRecord).length > 0) { + setDatasetSyncStep(prev => ({ ...prev, status: 'running' })); + logger.startStep('Sync datasets'); try { const datasetSyncResult = await syncDatasets({ region: target.region, @@ -443,6 +590,15 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState } setPostDeployHasError(true); setPostDeployWarnings(prev => [...prev, ...errors.map(err => `Dataset "${err.datasetName}": ${err.error}`)]); + logger.endStep('error', 'One or more datasets failed to sync'); + setDatasetSyncStep(prev => ({ + ...prev, + status: 'error', + error: 'One or more datasets failed to sync', + })); + } else { + logger.endStep('success'); + setDatasetSyncStep(prev => ({ ...prev, status: 'success' })); } for (const r of datasetSyncResult.results) { @@ -455,6 +611,8 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState logger.log(`Dataset sync failed: ${message}`, 'warn'); setPostDeployHasError(true); setPostDeployWarnings(prev => [...prev, `Dataset sync failed: ${message}`]); + logger.endStep('error', message); + setDatasetSyncStep(prev => ({ ...prev, status: 'error', error: message })); } } @@ -466,6 +624,8 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState const previouslyDeployedOnlineEvals = existingState?.targets?.[target.name]?.resources?.onlineEvalConfigs ?? {}; const newOnlineEvalFullSpecs = onlineEvalFullSpecs.filter(c => !previouslyDeployedOnlineEvals[c.name]); if (newOnlineEvalFullSpecs.length > 0 && Object.keys(deployedOnlineEvalConfigs).length > 0) { + setOnlineEvalStep(prev => ({ ...prev, status: 'running' })); + logger.startStep('Enable online evaluation'); try { const enableResult = await enableOnlineEvalConfigs({ region: target.region, @@ -483,184 +643,29 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState ...prev, ...errors.map(err => `Online eval "${err.configName}": ${err.error}`), ]); + logger.endStep('error', 'One or more online eval configs failed to enable'); + setOnlineEvalStep(prev => ({ + ...prev, + status: 'error', + error: 'One or more online eval configs failed to enable', + })); + } else { + logger.endStep('success'); + setOnlineEvalStep(prev => ({ ...prev, status: 'success' })); } } catch (err: unknown) { const message = err instanceof Error ? err.message : String(err); logger.log(`Online eval enable failed: ${message}`, 'warn'); setPostDeployHasError(true); setPostDeployWarnings(prev => [...prev, `Online eval enable failed: ${message}`]); + logger.endStep('error', message); + setOnlineEvalStep(prev => ({ ...prev, status: 'error', error: message })); } } - // Post-deploy: Create/update configuration bundles - const configBundleSpecs = ctx.projectSpec.configBundles ?? []; - if (configBundleSpecs.length > 0) { - try { - // Resolve component key placeholders (e.g., {{runtime:name}} → real ARN) - const resolvedProjectSpec = resolveConfigBundleComponentKeys(ctx.projectSpec, deployedState, target.name); - const existingConfigBundles = deployedState.targets?.[target.name]?.resources?.configBundles; - const configBundleResult = await setupConfigBundles({ - region: target.region, - projectSpec: resolvedProjectSpec, - existingBundles: existingConfigBundles, - }); - - // Merge config bundle state into deployed state - if (Object.keys(configBundleResult.configBundles).length > 0) { - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources) { - targetResources.configBundles = configBundleResult.configBundles; - await configIO.writeDeployedState(updatedState); - } - } - - if (configBundleResult.hasErrors) { - const errors = configBundleResult.results.filter(r => r.status === 'error'); - for (const err of errors) { - logger.log(`Config bundle "${err.bundleName}" setup error: ${err.error}`, 'warn'); - } - setPostDeployHasError(true); - setPostDeployWarnings(prev => [ - ...prev, - ...errors.map(err => `Config bundle "${err.bundleName}": ${err.error}`), - ]); - } - } catch (err: unknown) { - const message = err instanceof Error ? err.message : String(err); - logger.log(`Config bundle setup failed: ${message}`, 'warn'); - setPostDeployHasError(true); - setPostDeployWarnings(prev => [...prev, `Config bundle setup failed: ${message}`]); - } - } - - // Pre-gateway: Delete orphaned AB tests so their gateway rules are cleaned up - // before we attempt to delete orphaned HTTP gateways. - const existingABTests = deployedState.targets?.[target.name]?.resources?.abTests; - if (existingABTests && Object.keys(existingABTests).length > 0) { - try { - const deleteResult = await deleteOrphanedABTests({ - region: target.region, - projectSpec: ctx.projectSpec, - existingABTests, - }); - - if (deleteResult.hasErrors) { - const errors = deleteResult.results.filter(r => r.status === 'error'); - for (const err of errors) { - logger.log(`AB test delete "${err.testName}" error: ${err.error}`, 'warn'); - } - setPostDeployHasError(true); - setPostDeployWarnings(prev => [...prev, ...errors.map(err => `AB test "${err.testName}": ${err.error}`)]); - } - - // Surface warnings (e.g., "AB test was stopped before deletion") - for (const r of deleteResult.results) { - if (r.warning) { - logger.log(r.warning, 'warn'); - setPostDeployWarnings(prev => [...prev, r.warning!]); - } - } - - // Update deployed state to remove deleted AB tests - if (deleteResult.results.some(r => r.status === 'deleted')) { - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources?.abTests) { - for (const r of deleteResult.results) { - if (r.status === 'deleted') delete targetResources.abTests[r.testName]; - } - await configIO.writeDeployedState(updatedState); - deployedState = updatedState; - } - } - } catch (err: unknown) { - const message = err instanceof Error ? err.message : String(err); - logger.log(`AB test orphan cleanup failed: ${message}`, 'warn'); - setPostDeployHasError(true); - setPostDeployWarnings(prev => [...prev, `AB test orphan cleanup failed: ${message}`]); - } - } - - // Post-deploy: Create/update HTTP gateways - const httpGatewaySpecs = ctx.projectSpec.httpGateways ?? []; - const existingHttpGateways = deployedState.targets?.[target.name]?.resources?.httpGateways; - if (httpGatewaySpecs.length > 0 || Object.keys(existingHttpGateways ?? {}).length > 0) { - try { - const deployedResources = deployedState.targets?.[target.name]?.resources; - const httpGatewayResult = await setupHttpGateways({ - region: target.region, - projectName: ctx.projectSpec.name, - projectSpec: ctx.projectSpec, - existingHttpGateways, - deployedResources, - }); - - // Always merge HTTP gateway state (even if empty, to clear deleted gateways) - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources) { - targetResources.httpGateways = httpGatewayResult.httpGateways; - await configIO.writeDeployedState(updatedState); - deployedState = updatedState; - } - - if (httpGatewayResult.hasErrors) { - const errors = httpGatewayResult.results.filter(r => r.status === 'error'); - for (const err of errors) { - logger.log(`HTTP gateway "${err.gatewayName}" setup error: ${err.error}`, 'warn'); - } - setPostDeployHasError(true); - setPostDeployWarnings(prev => [ - ...prev, - ...errors.map(err => `HTTP gateway "${err.gatewayName}": ${err.error}`), - ]); - } - } catch (err: unknown) { - const message = err instanceof Error ? err.message : String(err); - logger.log(`HTTP gateway setup failed: ${message}`, 'warn'); - setPostDeployHasError(true); - setPostDeployWarnings(prev => [...prev, `HTTP gateway setup failed: ${message}`]); - } - } - - // Post-deploy: Create/update AB tests - const abTestSpecs = ctx.projectSpec.abTests ?? []; - if (abTestSpecs.length > 0) { - try { - const existingABTests = deployedState.targets?.[target.name]?.resources?.abTests; - const deployedResources = deployedState.targets?.[target.name]?.resources; - const abTestResult = await setupABTests({ - region: target.region, - projectSpec: ctx.projectSpec, - existingABTests, - deployedResources, - }); - - if (Object.keys(abTestResult.abTests).length > 0) { - const updatedState = await configIO.readDeployedState().catch(() => deployedState); - const targetResources = updatedState.targets[target.name]?.resources; - if (targetResources) { - targetResources.abTests = abTestResult.abTests; - await configIO.writeDeployedState(updatedState); - } - } - - if (abTestResult.hasErrors) { - const errors = abTestResult.results.filter(r => r.status === 'error'); - for (const err of errors) { - logger.log(`AB test "${err.testName}" setup error: ${err.error}`, 'warn'); - } - setPostDeployHasError(true); - setPostDeployWarnings(prev => [...prev, ...errors.map(err => `AB test "${err.testName}": ${err.error}`)]); - } - } catch (err: unknown) { - const message = err instanceof Error ? err.message : String(err); - logger.log(`AB test setup failed: ${message}`, 'warn'); - setPostDeployHasError(true); - setPostDeployWarnings(prev => [...prev, `AB test setup failed: ${message}`]); - } - } + // Config bundles are now managed via CloudFormation; their state is parsed + // from stack outputs above (no post-deploy API step). AB tests are managed + // as fire-and-forget jobs (agentcore run ab-test), not via the deploy path. // Query gateway target sync statuses (non-blocking) const allStatuses: { name: string; status: string }[] = []; @@ -696,8 +701,11 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState const attrs = context ? computeDeployAttrs(context.projectSpec, 'deploy') : { ...DEFAULT_DEPLOY_ATTRS }; const run = async (): Promise<{ success: true } | { success: false; error: Error }> => { - // Run diff before deploy to capture pre-deploy differences - if (!isDiffRunningRef.current) { + // Run diff before deploy to capture pre-deploy differences. + // Skip for brand new stacks: CDK changeset-based diff creates a temporary stack + // in REVIEW_IN_PROGRESS then deletes it without waiting, racing with the deploy + // that immediately follows. + if (!context?.isFirstDeploy && !isDiffRunningRef.current) { isDiffRunningRef.current = true; setIsDiffLoading(true); setPreDeployDiffStep(prev => ({ ...prev, status: 'running' })); @@ -723,6 +731,19 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState logger.endStep('success'); setPreDeployDiffStep(prev => ({ ...prev, status: 'success' })); } + } else if (context?.isFirstDeploy) { + setPreDeployDiffStep(prev => ({ ...prev, status: 'success', label: 'Skip diff (new stack)' })); + } + + // Managed-memory heads-up: surface BEFORE the slow CFN apply so the 3-5 min memory + // provisioning wait is explained while it happens. Mirrors the CLI command path; both + // read the same shared detection + notice text so the wording can't drift. + if (!context?.isTeardownDeploy) { + const noticeConfigIO = new ConfigIO(); + if (await hasManagedMemoryHarness(noticeConfigIO, context?.projectSpec.harnesses)) { + logger.log(MANAGED_MEMORY_DEPLOY_NOTICE); + setManagedMemoryNotice(MANAGED_MEMORY_DEPLOY_NOTICE); + } } setPublishAssetsStep(prev => ({ ...prev, status: 'running' })); @@ -762,39 +783,24 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState // Output goes to stdout via the switchable ioHost await cdkToolkitWrapper.deploy(); - if (context?.isTeardownDeploy) { - // Teardown imperative resources (harnesses) before destroying the stack - if (isPreviewEnabled()) { - const teardownTarget = context.awsTargets[0]; - if (teardownTarget) { - const imperativeManager = createDeploymentManager(); - const teardownConfigIO = new ConfigIO(); - const existingTeardownState = await teardownConfigIO - .readDeployedState() - .catch(() => ({ targets: {} }) as DeployedState); - const teardownContext = { - projectSpec: context.projectSpec, - target: teardownTarget, - configIO: teardownConfigIO, - deployedState: existingTeardownState, - onProgress: (step: string, status: 'start' | 'done' | 'error') => { - logger.log(`${step}: ${status}`); - }, - }; - - if (imperativeManager.hasDeployersForPhase('post-cdk', teardownContext)) { - logger.startStep('Tear down imperative resources'); - const teardownResult = await imperativeManager.teardownAll(teardownContext); - if (!teardownResult.success) { - logger.endStep('error', teardownResult.error); - throw new Error(`Imperative teardown failed: ${teardownResult.error}`); - } - logger.endStep('success'); - } - } - } + // CDK deploy itself is done. Mark "Deploy to AWS" success and let post-deploy + // phases (persist, hydrate KBs, auto-ingest, dataset sync, online evals, + // config bundles, HTTP gateways, AB tests) advance their own visible steps. + // + // No-change deploys never receive a progress-bearing CloudFormation event, so + // the message handler above never flips Publish assets out of 'running'. Catch + // both 'pending' and 'running' here so the step never gets stranded — without + // this the UI shows "stuck on Publish assets" during a 2m+ post-deploy ingest + // even though the underlying deploy had completed seconds in. + logger.endStep('success'); + setPublishAssetsStep(prev => + prev.status === 'success' || prev.status === 'error' ? prev : { ...prev, status: 'success' } + ); + setDeployStep(prev => ({ ...prev, status: 'success' })); + if (context?.isTeardownDeploy) { // After deploying the empty spec, destroy the stack entirely. + // Harnesses are part of the CloudFormation stack, so stack destroy handles them. // Clean up imperative payment credential providers before stack teardown. const targetName = context.awsTargets[0]?.name; if (targetName) { @@ -822,6 +828,15 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; logger.log(`Failed to persist deployed state: ${message}`, 'warn'); + // Mark whichever post-deploy step was running as errored so the visible + // step list resolves (areStepsComplete requires every step terminal). + // Only the persist step is reachable here without local handling. + setPersistStateStep(prev => + prev.status === 'running' ? { ...prev, status: 'error', error: message } : prev + ); + setHydrateKbStep(prev => (prev.status === 'running' ? { ...prev, status: 'error', error: message } : prev)); + setPostDeployHasError(true); + setPostDeployWarnings(p => [...p, `Persist deployed state failed: ${message}`]); } // Post-deploy: Enable CloudWatch Transaction Search (non-blocking, silent) @@ -857,12 +872,11 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState } } + // Close any still-open logger step (defensive — post-deploy phases manage + // their own start/end pairs, so this usually no-ops). logger.endStep('success'); logger.finalize(true); setDeployOutput(`Deployed ${stackNames.length} stack(s): ${stackNames.join(', ')}`); - // Mark both steps as success (in case CFn events were never received) - setPublishAssetsStep(prev => ({ ...prev, status: 'success' })); - setDeployStep(prev => ({ ...prev, status: 'success' })); return { success: true } as const; } catch (err) { const errorMsg = getErrorMessage(err); @@ -1015,60 +1029,110 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState } }, [preflight.phase, preflight.cdkToolkitWrapper, logger, skipPreflight]); + // Project-content-driven inclusion: only show post-deploy steps that will actually run. + const projectSpec = context?.projectSpec; + const hasKnowledgeBases = (projectSpec?.knowledgeBases?.length ?? 0) > 0; + const hasDatasets = (projectSpec?.datasets?.length ?? 0) > 0; + const hasOnlineEvalConfigs = (projectSpec?.onlineEvalConfigs?.length ?? 0) > 0; + const steps = useMemo(() => { if (diffMode) { return skipPreflight ? [diffStep] : [...preflight.steps, diffStep]; } - return skipPreflight - ? [preDeployDiffStep, publishAssetsStep, deployStep] - : [...preflight.steps, preDeployDiffStep, publishAssetsStep, deployStep]; - }, [preflight.steps, preDeployDiffStep, publishAssetsStep, deployStep, diffStep, skipPreflight, diffMode]); + const preflightSteps = skipPreflight ? [] : preflight.steps; + const isTeardown = projectSpec ? !!context?.isTeardownDeploy : false; + + const postDeploySteps: Step[] = isTeardown + ? [] + : [ + persistStateStep, + ...(hasKnowledgeBases && needsKbHydration ? [hydrateKbStep] : []), + ...(hasKnowledgeBases ? [autoIngestStep] : []), + ...(hasDatasets ? [datasetSyncStep] : []), + ...(hasOnlineEvalConfigs ? [onlineEvalStep] : []), + ]; + + return [...preflightSteps, preDeployDiffStep, publishAssetsStep, deployStep, ...postDeploySteps]; + }, [ + preflight.steps, + preDeployDiffStep, + publishAssetsStep, + deployStep, + persistStateStep, + hydrateKbStep, + autoIngestStep, + datasetSyncStep, + onlineEvalStep, + diffStep, + skipPreflight, + diffMode, + hasKnowledgeBases, + needsKbHydration, + hasDatasets, + hasOnlineEvalConfigs, + context?.isTeardownDeploy, + projectSpec, + ]); + + const hasError = hasStepError(steps); + const isComplete = areStepsComplete(steps); const phase: DeployPhase = useMemo(() => { - const activeStep = diffMode ? diffStep : deployStep; + if (diffMode) { + const activeStep = diffStep; + if (skipPreflight) { + if (!shouldStartDeploy && activeStep.status === 'pending') { + return 'idle'; + } + if (activeStep.status === 'error') { + return 'error'; + } + if (activeStep.status === 'success') { + return 'complete'; + } + return 'deploying'; + } + + if (preflight.phase === 'idle') return 'idle'; + if (preflight.phase === 'error') return 'error'; + if (preflight.phase === 'teardown-confirm') return 'teardown-confirm'; + if (preflight.phase === 'credentials-prompt') return 'credentials-prompt'; + if (preflight.phase === 'bootstrap-confirm') return 'bootstrap-confirm'; + if ( + preflight.phase === 'running' || + preflight.phase === 'bootstrapping' || + preflight.phase === 'identity-setup' + ) { + return 'running'; + } + if (activeStep.status === 'error') return 'error'; + if (activeStep.status === 'success') return 'complete'; + return 'deploying'; + } + // Deploy mode: derive from the full visible step list so post-CDK phases can + // hold the flow in 'deploying' until they all settle. if (skipPreflight) { - if (!shouldStartDeploy && activeStep.status === 'pending') { + if (!shouldStartDeploy && deployStep.status === 'pending') { return 'idle'; } - if (activeStep.status === 'error') { - return 'error'; - } - if (activeStep.status === 'success') { - return 'complete'; - } + if (hasError) return 'error'; + if (isComplete) return 'complete'; return 'deploying'; } - if (preflight.phase === 'idle') { - return 'idle'; - } - if (preflight.phase === 'error') { - return 'error'; - } - if (preflight.phase === 'teardown-confirm') { - return 'teardown-confirm'; - } - if (preflight.phase === 'credentials-prompt') { - return 'credentials-prompt'; - } - if (preflight.phase === 'bootstrap-confirm') { - return 'bootstrap-confirm'; - } + if (preflight.phase === 'idle') return 'idle'; + if (preflight.phase === 'error') return 'error'; + if (preflight.phase === 'teardown-confirm') return 'teardown-confirm'; + if (preflight.phase === 'credentials-prompt') return 'credentials-prompt'; + if (preflight.phase === 'bootstrap-confirm') return 'bootstrap-confirm'; if (preflight.phase === 'running' || preflight.phase === 'bootstrapping' || preflight.phase === 'identity-setup') { return 'running'; } - if (activeStep.status === 'error') { - return 'error'; - } - if (activeStep.status === 'success') { - return 'complete'; - } + if (hasError) return 'error'; + if (isComplete) return 'complete'; return 'deploying'; - }, [preflight.phase, deployStep, diffStep, skipPreflight, shouldStartDeploy, diffMode]); - - const hasError = hasStepError(steps); - const isComplete = areStepsComplete(steps); + }, [preflight.phase, deployStep, diffStep, skipPreflight, shouldStartDeploy, diffMode, hasError, isComplete]); // Combine token expired errors from both preflight and deploy phases const combinedTokenExpiredError = hasTokenExpiredError || preflight.hasTokenExpiredError; @@ -1091,6 +1155,7 @@ export function useDeployFlow(options: DeployFlowOptions = {}): DeployFlowState diffSummaries, numStacksWithChanges, deployNotes, + managedMemoryNotice, postDeployWarnings, postDeployHasError, isDiffLoading, diff --git a/src/cli/tui/screens/eval/EvalHubScreen.tsx b/src/cli/tui/screens/eval/EvalHubScreen.tsx index 27cb2e66f..f1a41b519 100644 --- a/src/cli/tui/screens/eval/EvalHubScreen.tsx +++ b/src/cli/tui/screens/eval/EvalHubScreen.tsx @@ -4,7 +4,14 @@ import { HELP_TEXT } from '../../constants'; import { useListNavigation } from '../../hooks'; import React, { useMemo } from 'react'; -type EvalHubView = 'run-eval' | 'runs' | 'run-batch-eval' | 'batch-eval-history' | 'online-dashboard'; +type EvalHubView = + | 'run-eval' + | 'runs' + | 'run-batch-eval' + | 'batch-eval-history' + | 'run-insights' + | 'insights-jobs' + | 'online-dashboard'; interface EvalHubScreenProps { onSelect: (view: EvalHubView) => void; @@ -27,8 +34,18 @@ export function EvalHubScreen({ onSelect, onExit }: EvalHubScreenProps) { }, { id: 'batch-eval-history', - title: 'Batch Eval History', - description: 'View past batch evaluation results (local)', + title: 'Batch Eval Jobs', + description: 'View batch evaluation jobs and their results', + }, + { + id: 'run-insights', + title: 'Run Insights [preview]', + description: 'Run failure analysis on agent sessions', + }, + { + id: 'insights-jobs', + title: 'Insights Jobs [preview]', + description: 'View past insights analysis jobs', }, { id: 'online-dashboard', diff --git a/src/cli/tui/screens/export/ExportHarnessFlow.tsx b/src/cli/tui/screens/export/ExportHarnessFlow.tsx new file mode 100644 index 000000000..75d29ab53 --- /dev/null +++ b/src/cli/tui/screens/export/ExportHarnessFlow.tsx @@ -0,0 +1,196 @@ +import { ErrorPrompt, GradientText, NextSteps, Screen, StepProgress } from '../../components'; +import type { NextStep, Step } from '../../components'; +import { ExportHarnessScreen } from './ExportHarnessScreen'; +import type { ExportHarnessConfig } from './types'; +import { Box, Text } from 'ink'; +import React, { useCallback, useEffect, useState } from 'react'; + +type FlowState = + | { name: 'loading' } + | { name: 'wizard'; harnessNames: string[]; existingAgentNames: string[]; containerOnlyHarnesses: Set } + | { name: 'no-harnesses' } + | { name: 'exporting'; steps: Step[] } + | { name: 'success'; agentName: string; notesPath: string } + | { name: 'error'; message: string }; + +interface ExportHarnessFlowProps { + isInteractive?: boolean; + onExit: () => void; + onBack: () => void; + onDeploy?: () => void; +} + +const EXPORT_SUCCESS_STEPS: NextStep[] = [{ command: 'deploy', label: 'Deploy to AWS' }]; + +export function ExportHarnessFlow({ isInteractive = true, onExit, onBack, onDeploy }: ExportHarnessFlowProps) { + const [flow, setFlow] = useState({ name: 'loading' }); + + useEffect(() => { + void (async () => { + try { + const { ConfigIO } = await import('../../../../lib'); + const configIO = new ConfigIO(); + if (!configIO.hasProject()) { + setFlow({ name: 'no-harnesses' }); + return; + } + const project = await configIO.readProjectSpec(); + const harnessNames = (project.harnesses ?? []).map((h: { name: string }) => h.name); + if (harnessNames.length === 0) { + setFlow({ name: 'no-harnesses' }); + return; + } + const existingAgentNames = project.runtimes.map((r: { name: string }) => r.name); + const containerOnlyHarnesses = new Set(); + await Promise.all( + harnessNames.map(async (name: string) => { + try { + const spec = await configIO.readHarnessSpec(name); + if (spec.containerUri || spec.dockerfile) containerOnlyHarnesses.add(name); + } catch { + // unreadable spec — leave unrestricted, mapper will error on export + } + }) + ); + setFlow({ name: 'wizard', harnessNames, existingAgentNames, containerOnlyHarnesses }); + } catch (err) { + const { getErrorMessage } = await import('../../../errors'); + setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + }, []); + + useEffect(() => { + if (!isInteractive && flow.name === 'success') { + onExit(); + } + }, [isInteractive, flow, onExit]); + + const handleComplete = useCallback(async (config: ExportHarnessConfig) => { + const progressSteps: Step[] = [ + { label: 'Reading harness configuration', status: 'running' }, + { label: 'Mapping to Strands template config', status: 'pending' }, + { label: 'Rendering agent code', status: 'pending' }, + ...(config.build === 'Container' + ? [{ label: 'Generating uv.lock for container build', status: 'pending' as const }] + : []), + { label: 'Updating agentcore.json', status: 'pending' }, + { label: 'Writing EXPORT_NOTES.md', status: 'pending' }, + ]; + setFlow({ name: 'exporting', steps: progressSteps }); + + let stepIdx = 0; + const advanceStep = (_message: string) => { + const currentStep = progressSteps[stepIdx]; + if (currentStep) { + progressSteps[stepIdx] = { ...currentStep, status: 'success' }; + } + stepIdx++; + const nextStep = progressSteps[stepIdx]; + if (nextStep) { + progressSteps[stepIdx] = { ...nextStep, status: 'running' }; + } + setFlow({ name: 'exporting', steps: [...progressSteps] }); + }; + + try { + const { handleExportHarness } = await import('../../../commands/export/harness-action'); + const result = await handleExportHarness( + { name: config.harness, targetAgentName: config.targetAgentName, build: config.build }, + { onProgress: advanceStep } + ); + + // Mark last running step as success + const lastStep = progressSteps[stepIdx]; + if (lastStep) { + progressSteps[stepIdx] = { ...lastStep, label: lastStep.label, status: 'success' }; + setFlow({ name: 'exporting', steps: [...progressSteps] }); + } + + if (!result.success) { + setFlow({ name: 'error', message: result.error.message }); + return; + } + + setFlow({ name: 'success', agentName: result.agentName, notesPath: result.notesPath }); + } catch (err) { + const { getErrorMessage } = await import('../../../errors'); + setFlow({ name: 'error', message: getErrorMessage(err) }); + } + }, []); + + if (flow.name === 'loading') { + return ( + + + + ); + } + + if (flow.name === 'no-harnesses') { + return ( + + ); + } + + if (flow.name === 'wizard') { + return ( + void handleComplete(config)} + onExit={onBack} + /> + ); + } + + if (flow.name === 'exporting') { + return ( + { + /* noop while exporting */ + }} + > + + + ); + } + + if (flow.name === 'success') { + const handleSelect = (step: NextStep) => { + if (step.command === 'deploy') { + onDeploy?.(); + } else { + onExit(); + } + }; + + return ( + + + + ✓ Exported harness → runtime agent {flow.agentName} + Generated: app/{flow.agentName}/ · agentcore/agentcore.json updated + Review export notes: {flow.notesPath} + + {isInteractive && ( + + )} + + + ); + } + + if (flow.name === 'error') { + return ; + } + + return null; +} diff --git a/src/cli/tui/screens/export/ExportHarnessScreen.tsx b/src/cli/tui/screens/export/ExportHarnessScreen.tsx new file mode 100644 index 000000000..c1c94efc4 --- /dev/null +++ b/src/cli/tui/screens/export/ExportHarnessScreen.tsx @@ -0,0 +1,137 @@ +import { AgentNameSchema } from '../../../../schema'; +import { ConfirmReview, Screen, StepIndicator, TextInput, WizardSelect } from '../../components'; +import type { SelectableItem } from '../../components'; +import { useListNavigation } from '../../hooks'; +import type { ExportHarnessConfig } from './types'; +import { EXPORT_HARNESS_STEP_LABELS } from './types'; +import { useExportHarnessWizard } from './useExportHarnessWizard'; +import React from 'react'; + +interface ExportHarnessScreenProps { + harnessNames: string[]; + existingAgentNames: string[]; + containerOnlyHarnesses: Set; + onComplete: (config: ExportHarnessConfig) => void; + onExit: () => void; +} + +const BUILD_ITEMS: SelectableItem[] = [ + { id: 'CodeZip', title: 'CodeZip', description: 'Package Python source as a zip artifact (default)' }, + { id: 'Container', title: 'Container', description: 'Build a Docker container image via ECR and CodeBuild' }, +]; + +const CONFIRM_ITEM = [{ id: 'confirm', title: 'Confirm' }]; + +export function ExportHarnessScreen({ + harnessNames, + existingAgentNames, + containerOnlyHarnesses, + onComplete, + onExit, +}: ExportHarnessScreenProps) { + const wizard = useExportHarnessWizard(harnessNames, onExit); + const { config, step, steps, goBack, setHarness, setTargetAgentName, setBuild } = wizard; + + const availableBuildItems = containerOnlyHarnesses.has(config.harness) + ? BUILD_ITEMS.filter(b => b.id === 'Container') + : BUILD_ITEMS; + + const harnessItems: SelectableItem[] = harnessNames.map(n => ({ id: n, title: n })); + + const harnessNav = useListNavigation({ + items: harnessItems, + onSelect: item => setHarness(item.id), + onExit: onExit, + isActive: step === 'select-harness', + }); + + const buildNav = useListNavigation({ + items: availableBuildItems, + onSelect: item => setBuild(item.id as 'CodeZip' | 'Container'), + onExit: goBack, + isActive: step === 'build-type', + }); + + useListNavigation({ + items: CONFIRM_ITEM, + onSelect: () => onComplete(config), + onExit: goBack, + isActive: step === 'confirm', + }); + + if (step === 'select-harness') { + return ( + + + + + ); + } + + if (step === 'target-name') { + return ( + + + { + setTargetAgentName(value.trim()); + }} + onCancel={goBack} + customValidation={value => { + const trimmed = value.trim(); + if (!trimmed) return 'Name is required'; + const parsed = AgentNameSchema.safeParse(trimmed); + if (!parsed.success) return parsed.error.issues[0]?.message ?? 'Invalid name'; + if (existingAgentNames.includes(trimmed)) return `Agent "${trimmed}" already exists`; + return true; + }} + /> + + ); + } + + if (step === 'build-type') { + return ( + + + + + ); + } + + if (step === 'confirm') { + return ( + + + + + ); + } + + return null; +} diff --git a/src/cli/tui/screens/export/index.ts b/src/cli/tui/screens/export/index.ts new file mode 100644 index 000000000..569002e34 --- /dev/null +++ b/src/cli/tui/screens/export/index.ts @@ -0,0 +1 @@ +export { ExportHarnessFlow } from './ExportHarnessFlow'; diff --git a/src/cli/tui/screens/export/types.ts b/src/cli/tui/screens/export/types.ts new file mode 100644 index 000000000..a1dbfa4c6 --- /dev/null +++ b/src/cli/tui/screens/export/types.ts @@ -0,0 +1,14 @@ +export type ExportHarnessStep = 'select-harness' | 'target-name' | 'build-type' | 'confirm'; + +export interface ExportHarnessConfig { + harness: string; + targetAgentName: string; + build: 'CodeZip' | 'Container'; +} + +export const EXPORT_HARNESS_STEP_LABELS: Record = { + 'select-harness': 'Select harness', + 'target-name': 'Agent name', + 'build-type': 'Build type', + confirm: 'Confirm', +}; diff --git a/src/cli/tui/screens/export/useExportHarnessWizard.ts b/src/cli/tui/screens/export/useExportHarnessWizard.ts new file mode 100644 index 000000000..0049cff14 --- /dev/null +++ b/src/cli/tui/screens/export/useExportHarnessWizard.ts @@ -0,0 +1,63 @@ +import type { ExportHarnessConfig, ExportHarnessStep } from './types'; +import { useCallback, useState } from 'react'; + +function defaultTargetName(harness: string): string { + return `${harness}Agent`; +} + +export function useExportHarnessWizard(harnessNames: string[], onExit: () => void) { + const initialHarness = harnessNames[0] ?? ''; + const [step, setStep] = useState(harnessNames.length <= 1 ? 'target-name' : 'select-harness'); + const [config, setConfig] = useState({ + harness: initialHarness, + targetAgentName: defaultTargetName(initialHarness), + build: 'CodeZip', + }); + + const steps: ExportHarnessStep[] = + harnessNames.length <= 1 + ? ['target-name', 'build-type', 'confirm'] + : ['select-harness', 'target-name', 'build-type', 'confirm']; + + const currentIndex = steps.indexOf(step); + + const goBack = useCallback(() => { + const idx = steps.indexOf(step); + if (idx === 0) { + onExit(); + return; + } + const prev = steps[idx - 1]; + if (prev) setStep(prev); + }, [step, steps, onExit]); + + const setHarness = useCallback((harness: string) => { + setConfig(c => ({ + ...c, + harness, + targetAgentName: defaultTargetName(harness), + })); + setStep('target-name'); + }, []); + + const setTargetAgentName = useCallback((targetAgentName: string) => { + setConfig(c => ({ ...c, targetAgentName })); + setStep('build-type'); + }, []); + + const setBuild = useCallback((build: 'CodeZip' | 'Container') => { + setConfig(c => ({ ...c, build })); + setStep('confirm'); + }, []); + + return { + config, + step, + steps, + currentIndex, + goBack, + setHarness, + setTargetAgentName, + setBuild, + }; +} diff --git a/src/cli/tui/screens/generate/__tests__/types.test.ts b/src/cli/tui/screens/generate/__tests__/types.test.ts new file mode 100644 index 000000000..42ba919b3 --- /dev/null +++ b/src/cli/tui/screens/generate/__tests__/types.test.ts @@ -0,0 +1,45 @@ +import { getProtocolOptionsForLanguage, getSDKOptionsForProtocol } from '../types.js'; +import { describe, expect, it } from 'vitest'; + +describe('getSDKOptionsForProtocol', () => { + it('excludes Vercel AI for Python HTTP agents (Vercel is TypeScript-only)', () => { + const ids = getSDKOptionsForProtocol('HTTP', 'Python').map(o => o.id); + expect(ids).toContain('Strands'); + expect(ids).toContain('LangChain_LangGraph'); + expect(ids).not.toContain('VercelAI'); + }); + + it('includes Vercel AI for TypeScript HTTP agents', () => { + const ids = getSDKOptionsForProtocol('HTTP', 'TypeScript').map(o => o.id); + expect(ids).toContain('Strands'); + expect(ids).toContain('VercelAI'); + }); + + it('restricts TypeScript to Strands and Vercel AI only', () => { + const ids = getSDKOptionsForProtocol('HTTP', 'TypeScript').map(o => o.id); + expect(ids).not.toContain('LangChain_LangGraph'); + expect(ids).not.toContain('GoogleADK'); + expect(ids).not.toContain('OpenAIAgents'); + }); + + it('intersects protocol and language support (A2A + Python excludes OpenAIAgents and Vercel)', () => { + const ids = getSDKOptionsForProtocol('A2A', 'Python').map(o => o.id); + expect(ids).toContain('Strands'); + expect(ids).not.toContain('OpenAIAgents'); + expect(ids).not.toContain('VercelAI'); + }); +}); + +describe('getProtocolOptionsForLanguage', () => { + it('restricts TypeScript to HTTP only', () => { + const ids = getProtocolOptionsForLanguage('TypeScript').map(o => o.id); + expect(ids).toEqual(['HTTP']); + }); + + it('offers all protocols for Python', () => { + const ids = getProtocolOptionsForLanguage('Python').map(o => o.id); + expect(ids).toContain('HTTP'); + expect(ids).toContain('MCP'); + expect(ids).toContain('A2A'); + }); +}); diff --git a/src/cli/tui/screens/generate/types.ts b/src/cli/tui/screens/generate/types.ts index 41a833f30..30bbaf066 100644 --- a/src/cli/tui/screens/generate/types.ts +++ b/src/cli/tui/screens/generate/types.ts @@ -9,7 +9,12 @@ import type { SDKFramework, TargetLanguage, } from '../../../../schema'; -import { DEFAULT_MODEL_IDS, PROTOCOL_FRAMEWORK_MATRIX, getSupportedModelProviders } from '../../../../schema'; +import { + DEFAULT_MODEL_IDS, + PROTOCOL_FRAMEWORK_MATRIX, + getFrameworksForLanguage, + getSupportedModelProviders, +} from '../../../../schema'; import type { JwtConfigOptions } from '../../../primitives/auth-utils'; export type GenerateStep = @@ -160,13 +165,15 @@ export const SDK_OPTIONS = [ /** * Get SDK options filtered by protocol compatibility and target language. - * TypeScript currently only supports Strands. + * Frameworks must ship a template for the chosen language — e.g. Vercel AI is + * TypeScript-only, so it never appears for Python agents. */ export function getSDKOptionsForProtocol(protocol: ProtocolMode, language?: TargetLanguage) { const supportedFrameworks = PROTOCOL_FRAMEWORK_MATRIX[protocol]; const byProtocol = SDK_OPTIONS.filter(option => supportedFrameworks.includes(option.id)); - if (language === 'TypeScript') { - return byProtocol.filter(option => option.id === 'Strands' || option.id === 'VercelAI'); + if (language === 'Python' || language === 'TypeScript') { + const byLanguage = getFrameworksForLanguage(language); + return byProtocol.filter(option => byLanguage.includes(option.id)); } return byProtocol; } @@ -213,7 +220,7 @@ export const ADVANCED_SETTING_OPTIONS = [ { id: 'filesystem', title: 'Filesystem mounts', description: 'Session storage, EFS, and S3 Files mounts' }, { id: 'configBundle', - title: 'Config bundle [preview]', + title: 'Config bundle', description: 'Manage system prompt and tool config without redeploying', }, ] as const; diff --git a/src/cli/tui/screens/generate/useGenerateWizard.ts b/src/cli/tui/screens/generate/useGenerateWizard.ts index 36fe7ef7d..e4267fc7c 100644 --- a/src/cli/tui/screens/generate/useGenerateWizard.ts +++ b/src/cli/tui/screens/generate/useGenerateWizard.ts @@ -285,7 +285,10 @@ export function useGenerateWizard(options?: UseGenerateWizardOptions) { resetFilesystemState(); } // Config bundle has no sub-steps — set flag immediately - setConfig(c => ({ ...c, withConfigBundle: selected.has('configBundle') || undefined })); + setConfig(c => ({ + ...c, + withConfigBundle: selected.has('configBundle') || undefined, + })); // Navigate to first advanced sub-step — determined by the steps memo on next render. // Use setTimeout so the steps memo recalculates with the new advancedSettings first. setTimeout(() => { diff --git a/src/cli/tui/screens/harness/AddHarnessFlow.tsx b/src/cli/tui/screens/harness/AddHarnessFlow.tsx index 0e24b6466..bede73f7a 100644 --- a/src/cli/tui/screens/harness/AddHarnessFlow.tsx +++ b/src/cli/tui/screens/harness/AddHarnessFlow.tsx @@ -1,12 +1,15 @@ +import { MANAGED_MEMORY_ADD_NOTICE } from '../../../operations/deploy'; import { ErrorPrompt } from '../../components'; import { AddSuccessScreen } from '../add/AddSuccessScreen'; +import { useExistingCredentials } from '../identity/useCreateIdentity'; import { AddHarnessScreen } from './AddHarnessScreen'; import type { AddHarnessConfig } from './types'; -import React, { useCallback, useEffect, useState } from 'react'; +import { Box, Text } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; type FlowState = | { name: 'create-wizard' } - | { name: 'create-success'; harnessName: string; loading?: boolean; loadingMessage?: string } + | { name: 'create-success'; harnessName: string; managedMemory?: boolean; loading?: boolean; loadingMessage?: string } | { name: 'error'; message: string }; interface AddHarnessFlowProps { @@ -20,6 +23,12 @@ interface AddHarnessFlowProps { export function AddHarnessFlow({ isInteractive = true, onExit, onBack, onDev, onDeploy }: AddHarnessFlowProps) { const [flow, setFlow] = useState({ name: 'create-wizard' }); const [existingNames, setExistingNames] = useState([]); + const { credentials } = useExistingCredentials(); + + const apiKeyCredentialNames = useMemo( + () => credentials.filter(c => c.authorizerType === 'ApiKeyCredentialProvider').map(c => c.name), + [credentials] + ); useEffect(() => { void (async () => { @@ -52,12 +61,46 @@ export function AddHarnessFlow({ isInteractive = true, onExit, onBack, onDev, on modelId: config.modelId, apiFormat: config.apiFormat, apiKeyArn: config.apiKeyArn, - skipMemory: config.skipMemory, + apiBase: config.apiBase, + additionalParams: config.additionalParams, + // Memory: when the mode-tagged union is present (gated ON), translate it to the primitive's + // memory-mode options; otherwise fall back to the legacy skipMemory + flat tuning fields. + ...(config.memory + ? config.memory.mode === 'managed' + ? { + memoryMode: 'managed' as const, + memoryStrategies: config.memory.strategies, + memoryEventExpiryDays: config.memory.eventExpiryDuration, + memoryEncryptionKeyArn: config.memory.encryptionKeyArn, + } + : config.memory.mode === 'existing' + ? { + memoryMode: 'existing' as const, + memoryName: config.memory.name, + memoryArn: config.memory.arn, + memoryActorId: config.memory.actorId, + messagesCount: config.memory.messagesCount, + memoryTopK: config.memory.topK, + memoryRelevanceScore: config.memory.relevanceScore, + } + : { memoryMode: 'disabled' as const, skipMemory: true } + : { + skipMemory: config.skipMemory, + messagesCount: config.messagesCount, + memoryTopK: config.memoryTopK, + memoryRelevanceScore: config.memoryRelevanceScore, + }), containerUri: config.containerUri, dockerfilePath: config.dockerfilePath, maxIterations: config.maxIterations, maxTokens: config.maxTokens, timeoutSeconds: config.timeoutSeconds, + temperature: config.temperature, + topP: config.topP, + topK: config.topK, + modelMaxTokens: config.modelMaxTokens, + allowedTools: config.allowedTools, + mcpHeaders: config.mcpHeaders, truncationStrategy: config.truncationStrategy, networkMode: config.networkMode, subnets: config.subnets, @@ -79,6 +122,7 @@ export function AddHarnessFlow({ isInteractive = true, onExit, onBack, onDev, on .map(s => s.trim()) .filter(Boolean) : undefined, + skills: config.skills, authorizerType: config.authorizerType, jwtConfig: config.jwtConfig ? { @@ -89,6 +133,8 @@ export function AddHarnessFlow({ isInteractive = true, onExit, onBack, onDev, on customClaims: config.jwtConfig.customClaims, clientId: config.jwtConfig.clientId, clientSecret: config.jwtConfig.clientSecret, + privateEndpoint: config.jwtConfig.privateEndpoint, + privateEndpointOverrides: config.jwtConfig.privateEndpointOverrides, } : undefined, }); @@ -97,7 +143,7 @@ export function AddHarnessFlow({ isInteractive = true, onExit, onBack, onDev, on return; } - setFlow({ name: 'create-success', harnessName: config.name }); + setFlow({ name: 'create-success', harnessName: config.name, managedMemory: result.memoryMode === 'managed' }); } catch (err) { const { getErrorMessage } = await import('../../../errors'); setFlow({ name: 'error', message: getErrorMessage(err) }); @@ -108,6 +154,7 @@ export function AddHarnessFlow({ isInteractive = true, onExit, onBack, onDev, on return ( void handleCreateComplete(config)} onExit={onBack} /> @@ -120,6 +167,13 @@ export function AddHarnessFlow({ isInteractive = true, onExit, onBack, onDev, on isInteractive={isInteractive} message={`Added harness: ${flow.harnessName}`} detail="Harness config written to app/. Deploy with `agentcore deploy`." + summary={ + flow.managedMemory ? ( + + Note: {MANAGED_MEMORY_ADD_NOTICE} + + ) : undefined + } loading={flow.loading} loadingMessage={flow.loadingMessage} onAddAnother={onBack} diff --git a/src/cli/tui/screens/harness/AddHarnessScreen.tsx b/src/cli/tui/screens/harness/AddHarnessScreen.tsx index c2ba67d72..3d94b85cb 100644 --- a/src/cli/tui/screens/harness/AddHarnessScreen.tsx +++ b/src/cli/tui/screens/harness/AddHarnessScreen.tsx @@ -1,5 +1,12 @@ import type { HarnessModelProvider, RuntimeAuthorizerType } from '../../../../schema'; -import { HarnessApiFormatSchema, MAX_EFS_MOUNTS, MAX_S3_MOUNTS, NetworkModeSchema } from '../../../../schema'; +import { + HarnessApiFormatSchema, + MAX_EFS_MOUNTS, + MAX_S3_MOUNTS, + NetworkModeSchema, + SECURITY_GROUP_ID_PATTERN, + SUBNET_ID_PATTERN, +} from '../../../../schema'; import { HarnessNameSchema, HarnessTruncationStrategySchema } from '../../../../schema/schemas/primitives/harness'; import { ARN_VALIDATION_MESSAGE, isValidArn } from '../../../commands/shared/arn-utils'; import { @@ -32,29 +39,52 @@ import { CONTAINER_MODE_OPTIONS, GATEWAY_OUTBOUND_AUTH_OPTIONS, HARNESS_STEP_LABELS, + MANAGED_STRATEGY_OPTIONS, + MEMORY_MODE_OPTIONS, MEMORY_OPTIONS, MODEL_PROVIDER_OPTIONS, NETWORK_MODE_OPTIONS, OPENAI_API_FORMAT_OPTIONS, + SKILL_SOURCE_TYPE_OPTIONS, TOOL_SELECT_OPTIONS, TRUNCATION_STRATEGY_OPTIONS, } from './types'; import { useAddHarnessWizard } from './useAddHarnessWizard'; +import { isGatedFeaturesEnabled } from '@/cli/feature-flags'; import { Text } from 'ink'; -import React, { useMemo } from 'react'; +import React, { useEffect, useMemo } from 'react'; + +/** Inline-validate a comma-separated VPC id list against `pattern` so malformed ids are rejected + * at the step (not deferred to a late write/deploy error with a misleading green checkmark). */ +function validateIdList(value: string, pattern: RegExp, label: string, example: string): true | string { + const ids = value + .split(',') + .map(s => s.trim()) + .filter(Boolean); + if (ids.length === 0) return `At least one ${label} is required for VPC mode`; + const invalid = ids.find(id => !pattern.test(id)); + return invalid ? `Invalid ${label} "${invalid}" (expected e.g. ${example})` : true; +} interface AddHarnessScreenProps { existingHarnessNames: string[]; + existingApiKeyCredentialNames?: string[]; onComplete: (config: AddHarnessConfig) => void; onExit: () => void; } -export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: AddHarnessScreenProps) { +export function AddHarnessScreen({ + existingHarnessNames, + existingApiKeyCredentialNames = [], + onComplete, + onExit, +}: AddHarnessScreenProps) { const wizard = useAddHarnessWizard(); const jwtFlow = useJwtConfigFlow({ onComplete: jwtConfig => wizard.setJwtConfig(jwtConfig), onBack: () => wizard.goBack(), + enablePrivateEndpoint: true, }); const modelProviderItems: SelectableItem[] = useMemo( @@ -78,8 +108,21 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A ); const advancedSettingItems: SelectableItem[] = useMemo( - () => ADVANCED_SETTING_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), - [] + () => + ADVANCED_SETTING_OPTIONS + // Memory-tuning options are mode-scoped: each appears only for the memory mode the user chose, + // and managed/existing have disjoint knob sets (per the harness API). Disabled shows none. + // - memory-managed-tuning → only when mode === 'managed' (strategies/event-expiry/KMS) + // - memory-existing-tuning → only when mode === 'existing' (actorId/messagesCount/topK/relevance) + // - memory-tuning (legacy) → only in the gated-off model, when memory isn't skipped + .filter(opt => { + if (opt.id === 'memory-managed-tuning') return wizard.config.memory?.mode === 'managed'; + if (opt.id === 'memory-existing-tuning') return wizard.config.memory?.mode === 'existing'; + if (opt.id === 'memory-tuning') return !wizard.config.memory && wizard.config.skipMemory !== true; + return true; + }) + .map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), + [wizard.config.skipMemory, wizard.config.memory] ); const toolSelectItems: SelectableItem[] = useMemo( @@ -92,6 +135,16 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A [] ); + const memoryModeItems: SelectableItem[] = useMemo( + () => MEMORY_MODE_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), + [] + ); + + const managedStrategyItems: SelectableItem[] = useMemo( + () => MANAGED_STRATEGY_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), + [] + ); + const networkModeItems: SelectableItem[] = useMemo( () => NETWORK_MODE_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), [] @@ -116,6 +169,8 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A const isModelProviderStep = wizard.step === 'model-provider'; const isApiFormatStep = wizard.step === 'api-format'; const isApiKeyArnStep = wizard.step === 'api-key-arn'; + const isApiBaseStep = wizard.step === 'api-base'; + const isAdditionalParamsStep = wizard.step === 'additional-params'; const isContainerStep = wizard.step === 'container'; const isContainerUriStep = wizard.step === 'container-uri'; const isContainerDockerfileStep = wizard.step === 'container-dockerfile'; @@ -128,6 +183,11 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A const isGatewayProviderArnStep = wizard.step === 'gateway-provider-arn'; const isGatewayScopesStep = wizard.step === 'gateway-scopes'; const isMemoryStep = wizard.step === 'memory'; + const isMemoryModeStep = wizard.step === 'memory-mode'; + const isMemoryStrategiesStep = wizard.step === 'memory-strategies'; + const isMemoryEventExpiryStep = wizard.step === 'memory-event-expiry'; + const isMemoryKmsStep = wizard.step === 'memory-kms'; + const isMemoryExistingRefStep = wizard.step === 'memory-existing-ref'; const isAuthorizerTypeStep = wizard.step === 'authorizerType'; const isJwtConfigStep = wizard.step === 'jwtConfig'; const isNetworkModeStep = wizard.step === 'network-mode'; @@ -138,6 +198,15 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A const isMaxIterationsStep = wizard.step === 'max-iterations'; const isMaxTokensStep = wizard.step === 'max-tokens'; const isTimeoutStep = wizard.step === 'timeout'; + const isTemperatureStep = wizard.step === 'temperature'; + const isTopPStep = wizard.step === 'top-p'; + const isTopKStep = wizard.step === 'top-k'; + const isModelMaxTokensStep = wizard.step === 'model-max-tokens'; + const isMessagesCountStep = wizard.step === 'memory-messages-count'; + const isMemoryRetrievalTopKStep = wizard.step === 'memory-retrieval-top-k'; + const isMemoryRelevanceScoreStep = wizard.step === 'memory-relevance-score'; + const isMcpHeadersStep = wizard.step === 'mcp-headers'; + const isAllowedToolsStep = wizard.step === 'allowed-tools'; const isTruncationStrategyStep = wizard.step === 'truncation-strategy'; const isSessionStoragePathStep = wizard.step === 'session-storage-path'; const isEfsArnStep = wizard.step === 'efs-arn'; @@ -146,6 +215,14 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A const isS3ArnStep = wizard.step === 's3-arn'; const isS3MountPathStep = wizard.step === 's3-mount-path'; const isS3AddAnotherStep = wizard.step === 's3-add-another'; + const isSkillsSourceTypeStep = wizard.step === 'skills-source-type'; + const isSkillPathStep = wizard.step === 'skill-path'; + const isSkillS3UriStep = wizard.step === 'skill-s3-uri'; + const isSkillGitUrlStep = wizard.step === 'skill-git-url'; + const isSkillGitPathStep = wizard.step === 'skill-git-path'; + const isSkillGitCredentialStep = wizard.step === 'skill-git-credential'; + const isSkillGitUsernameStep = wizard.step === 'skill-git-username'; + const isSkillAddAnotherStep = wizard.step === 'skill-add-another'; const isConfirmStep = wizard.step === 'confirm'; const modelProviderNav = useListNavigation({ @@ -194,6 +271,30 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A isActive: isMemoryStep, }); + const memoryModeNav = useListNavigation({ + items: memoryModeItems, + onSelect: item => wizard.setMemoryMode(item.id as 'managed' | 'existing' | 'disabled'), + onExit: () => wizard.goBack(), + isActive: isMemoryModeStep, + }); + + const initialStrategyIds = useMemo( + () => (wizard.config.memory?.mode === 'managed' ? (wizard.config.memory.strategies ?? []) : []), + // Seed once from the current config; per-keystroke selection is owned by the nav hook thereafter. + // eslint-disable-next-line react-hooks/exhaustive-deps + [] + ); + const managedStrategyNav = useMultiSelectNavigation({ + items: managedStrategyItems, + getId: item => item.id, + initialSelectedIds: initialStrategyIds, + onConfirm: ids => wizard.setMemoryStrategies(ids), + onExit: () => wizard.goBack(), + isActive: isMemoryStrategiesStep, + // Optional: confirming with nothing selected leaves strategies absent → service default. + requireSelection: false, + }); + const authorizerTypeNav = useListNavigation({ items: authorizerTypeItems, onSelect: item => wizard.setAuthorizerType(item.id as RuntimeAuthorizerType), @@ -222,6 +323,67 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A isActive: isTruncationStrategyStep, }); + const skillSourceTypeItems: SelectableItem[] = useMemo( + () => + SKILL_SOURCE_TYPE_OPTIONS.map(opt => ({ + id: opt.id, + title: opt.title, + description: opt.id === 'aws_skills' && !isGatedFeaturesEnabled() ? 'Coming soon' : opt.description, + disabled: opt.id === 'aws_skills' && !isGatedFeaturesEnabled(), + })), + [] + ); + + const skillSourceTypeNav = useListNavigation({ + items: skillSourceTypeItems, + onSelect: item => wizard.setSkillSourceType(item.id as 'path' | 's3' | 'git' | 'aws_skills'), + onExit: () => wizard.goBack(), + isActive: isSkillsSourceTypeStep, + isDisabled: item => item.disabled === true, + }); + + const skillGitCredentialItems: SelectableItem[] = useMemo( + () => [ + ...existingApiKeyCredentialNames.map(name => ({ + id: name, + title: name, + description: 'Use existing API key credential', + })), + { id: 'skip', title: 'Skip (no auth needed)', description: 'Repository is publicly accessible' }, + ], + [existingApiKeyCredentialNames] + ); + + const skillGitCredentialNav = useListNavigation({ + items: skillGitCredentialItems, + onSelect: item => { + wizard.submitSkillGitCredential(item.id); + }, + onExit: () => wizard.goBack(), + isActive: isSkillGitCredentialStep, + }); + + useEffect(() => { + if (isSkillGitCredentialStep && existingApiKeyCredentialNames.length === 0) { + wizard.submitSkillGitCredential('skip'); + } + }, [isSkillGitCredentialStep, existingApiKeyCredentialNames.length]); + + const skillAddAnotherItems: SelectableItem[] = useMemo( + () => [ + { id: 'add', title: 'Add another skill', description: 'Add one more skill source' }, + { id: 'done', title: 'Done', description: `${(wizard.config.skills ?? []).length} skill(s) configured` }, + ], + [wizard.config.skills] + ); + + const skillAddAnotherNav = useListNavigation({ + items: skillAddAnotherItems, + onSelect: item => wizard.submitSkillAddAnother(item.id), + onExit: () => wizard.goBack(), + isActive: isSkillAddAnotherStep, + }); + useListNavigation({ items: [{ id: 'confirm', title: 'Confirm' }], onSelect: () => onComplete(wizard.config), @@ -232,21 +394,31 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A const helpText = isJwtConfigStep ? jwtFlow.subStep === 'constraintPicker' ? HELP_TEXT.MULTI_SELECT - : jwtFlow.subStep === 'customClaims' - ? jwtFlow.claimsManagerMode === 'add' || jwtFlow.claimsManagerMode === 'edit' - ? '↑/↓ field · ←/→ cycle · Enter next/save · Esc cancel' - : 'Navigate · Enter select · Esc back' - : HELP_TEXT.TEXT_INPUT - : isAdvancedStep || isToolsSelectStep + : jwtFlow.subStep === 'privateEndpointType' || jwtFlow.subStep === 'vpcIpType' + ? HELP_TEXT.NAVIGATE_SELECT + : jwtFlow.subStep === 'customClaims' + ? jwtFlow.claimsManagerMode === 'add' || jwtFlow.claimsManagerMode === 'edit' + ? '↑/↓ field · ←/→ cycle · Enter next/save · Esc cancel' + : 'Navigate · Enter select · Esc back' + : jwtFlow.subStep === 'domainOverrides' + ? jwtFlow.overridesManagerMode === 'add' || jwtFlow.overridesManagerMode === 'edit' + ? HELP_TEXT.TEXT_INPUT + : 'Navigate · Enter select · Esc back' + : HELP_TEXT.TEXT_INPUT + : isAdvancedStep || isToolsSelectStep || isMemoryStrategiesStep ? 'Space toggle · Enter confirm · Esc back' : isModelProviderStep || isApiFormatStep || isMemoryStep || + isMemoryModeStep || isContainerStep || isNetworkModeStep || isTruncationStrategyStep || isAuthorizerTypeStep || - isGatewayOutboundAuthStep + isGatewayOutboundAuthStep || + isSkillsSourceTypeStep || + isSkillGitCredentialStep || + isSkillAddAnotherStep ? HELP_TEXT.NAVIGATE_SELECT : isConfirmStep ? HELP_TEXT.CONFIRM_CANCEL @@ -269,10 +441,52 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A fields.push({ label: 'API Key ARN', value: wizard.config.apiKeyArn }); } - if (wizard.config.skipMemory !== undefined) { + if (wizard.config.apiBase) { + fields.push({ label: 'API Base URL', value: wizard.config.apiBase }); + } + + if (wizard.config.additionalParams) { + fields.push({ label: 'Additional Params', value: JSON.stringify(wizard.config.additionalParams) }); + } + + const mem = wizard.config.memory; + if (mem) { + // Mode-tagged memory (gated ON). + if (mem.mode === 'managed') { + const titled = mem.strategies?.length + ? mem.strategies.map(s => s.charAt(0) + s.slice(1).toLowerCase().replace('_', ' ')).join(', ') + : 'default strategies'; + fields.push({ label: 'Memory', value: `Managed (${titled})` }); + if (mem.eventExpiryDuration !== undefined) { + fields.push({ label: 'Memory Event Expiry', value: `${mem.eventExpiryDuration} days` }); + } + if (mem.encryptionKeyArn) { + fields.push({ label: 'Memory KMS Key', value: mem.encryptionKeyArn }); + } + } else if (mem.mode === 'existing') { + fields.push({ label: 'Memory', value: `Existing (${mem.arn ?? mem.name ?? '—'})` }); + } else { + fields.push({ label: 'Memory', value: 'Disabled' }); + } + } else if (wizard.config.skipMemory !== undefined) { + // Legacy enabled/disabled (gated OFF). fields.push({ label: 'Memory', value: wizard.config.skipMemory ? 'Disabled' : 'Enabled' }); } + if (wizard.config.messagesCount !== undefined) { + fields.push({ label: 'Memory Messages Count', value: String(wizard.config.messagesCount) }); + } + if (wizard.config.memoryTopK !== undefined) { + fields.push({ label: 'Memory Retrieval Top K', value: String(wizard.config.memoryTopK) }); + } + if (wizard.config.memoryRelevanceScore !== undefined) { + fields.push({ label: 'Memory Relevance Score', value: String(wizard.config.memoryRelevanceScore) }); + } + + if (wizard.config.allowedTools?.length) { + fields.push({ label: 'Allowed Tools', value: wizard.config.allowedTools.join(', ') }); + } + if (wizard.config.authorizerType) { fields.push({ label: 'Auth Type', @@ -298,6 +512,25 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A value: `${wizard.config.jwtConfig.customClaims.length} claim(s) configured`, }); } + const pe = wizard.config.jwtConfig.privateEndpoint; + if (pe?.selfManagedLatticeResource) { + fields.push({ + label: 'Private Endpoint', + value: `VPC Lattice (${pe.selfManagedLatticeResource.resourceConfigurationIdentifier})`, + }); + } else if (pe?.managedVpcResource) { + const v = pe.managedVpcResource; + fields.push({ + label: 'Private Endpoint', + value: `Managed VPC ${v.vpcIdentifier} · ${v.subnetIds.length} subnet(s) · ${v.endpointIpAddressType}`, + }); + } + if (wizard.config.jwtConfig.privateEndpointOverrides?.length) { + fields.push({ + label: 'Domain Overrides', + value: `${wizard.config.jwtConfig.privateEndpointOverrides.length} per-domain override(s)`, + }); + } if (wizard.config.jwtConfig.clientId) { fields.push({ label: 'Harness Credential', value: computeManagedOAuthCredentialName(wizard.config.name) }); } @@ -309,6 +542,9 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A if (wizard.config.mcpName) { fields.push({ label: 'MCP Server', value: `${wizard.config.mcpName} (${wizard.config.mcpUrl})` }); } + if (wizard.config.mcpHeaders && Object.keys(wizard.config.mcpHeaders).length > 0) { + fields.push({ label: 'MCP Headers', value: JSON.stringify(wizard.config.mcpHeaders) }); + } if (wizard.config.gatewayArn) { fields.push({ label: 'Gateway ARN', value: wizard.config.gatewayArn }); } @@ -330,6 +566,13 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A } } + if (wizard.config.skills?.length) { + for (const [i, skill] of wizard.config.skills.entries()) { + const label = skill.s3Uri ?? skill.gitUrl ?? skill.path ?? 'unknown'; + fields.push({ label: `Skill ${i + 1}`, value: label }); + } + } + if (wizard.config.containerUri) { fields.push({ label: 'Container URI', value: wizard.config.containerUri }); } @@ -370,6 +613,19 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A fields.push({ label: 'Timeout', value: `${wizard.config.timeoutSeconds}s` }); } + if (wizard.config.temperature !== undefined) { + fields.push({ label: 'Temperature', value: String(wizard.config.temperature) }); + } + if (wizard.config.topP !== undefined) { + fields.push({ label: 'Top P', value: String(wizard.config.topP) }); + } + if (wizard.config.topK !== undefined) { + fields.push({ label: 'Top K', value: String(wizard.config.topK) }); + } + if (wizard.config.modelMaxTokens !== undefined) { + fields.push({ label: 'Model Max Tokens', value: String(wizard.config.modelMaxTokens) }); + } + if (wizard.config.truncationStrategy) { fields.push({ label: 'Truncation Strategy', value: wizard.config.truncationStrategy }); } @@ -446,7 +702,11 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A {isApiFormatStep && ( @@ -455,11 +715,64 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A {isApiKeyArnStep && ( wizard.goBack()} - customValidation={value => isValidArn(value) || ARN_VALIDATION_MESSAGE} + customValidation={value => + // LiteLLM's key is optional — allow an empty value to skip it. + (wizard.config.modelProvider === 'lite_llm' && value.trim().length === 0) || + isValidArn(value) || + ARN_VALIDATION_MESSAGE + } + /> + )} + + {isApiBaseStep && ( + wizard.goBack()} + /> + )} + + {isAdditionalParamsStep && ( + { + const trimmed = value.trim(); + if (trimmed.length === 0) { + wizard.setAdditionalParams(undefined); + return; + } + wizard.setAdditionalParams(JSON.parse(trimmed) as Record); + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const trimmed = value.trim(); + if (trimmed.length === 0) return true; + try { + const parsed = JSON.parse(trimmed) as unknown; + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + return 'Additional params must be a JSON object'; + } + return true; + } catch { + return 'Additional params must be valid JSON'; + } + }} /> )} @@ -539,6 +852,41 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A /> )} + {isMcpHeadersStep && ( + { + const trimmed = value.trim(); + if (trimmed.length === 0) { + wizard.setMcpHeaders(undefined); + return; + } + wizard.setMcpHeaders(JSON.parse(trimmed) as Record); + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const trimmed = value.trim(); + if (trimmed.length === 0) return true; + try { + const parsed = JSON.parse(trimmed) as unknown; + if (typeof parsed !== 'object' || parsed === null || Array.isArray(parsed)) { + return 'Headers must be a JSON object'; + } + for (const [k, v] of Object.entries(parsed)) { + if (typeof v !== 'string') return `Header "${k}" value must be a string`; + } + return true; + } catch { + return 'Headers must be valid JSON'; + } + }} + /> + )} + {isGatewayArnStep && ( )} + {isSkillsSourceTypeStep && ( + + )} + + {isSkillPathStep && ( + wizard.goBack()} + customValidation={value => (value.trim().length > 0 ? true : 'Path is required')} + /> + )} + + {isSkillS3UriStep && ( + wizard.goBack()} + customValidation={value => (value.startsWith('s3://') ? true : 'Must start with s3://')} + /> + )} + + {isSkillGitUrlStep && ( + wizard.goBack()} + customValidation={value => (value.startsWith('https://') ? true : 'Must be an HTTPS URL')} + /> + )} + + {isSkillGitPathStep && ( + wizard.goBack()} + /> + )} + + {isSkillGitCredentialStep && ( + + )} + + {isSkillGitUsernameStep && ( + wizard.goBack()} + /> + )} + + {wizard.step === 'skill-aws-skills-paths' && ( + wizard.goBack()} + /> + )} + + {isSkillAddAnotherStep && ( + + )} + {isMemoryStep && ( )} + {isMemoryModeStep && ( + + )} + + {isMemoryStrategiesStep && ( + + )} + + {isMemoryEventExpiryStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseInt(value, 10); + return !isNaN(num) && num >= 3 && num <= 365 ? true : 'Must be an integer between 3 and 365'; + }} + /> + )} + + {isMemoryKmsStep && ( + wizard.goBack()} + customValidation={value => + value.trim() === '' || isValidArn(value.trim()) ? true : ARN_VALIDATION_MESSAGE + } + /> + )} + + {isMemoryExistingRefStep && ( + wizard.goBack()} + customValidation={value => { + const v = value.trim(); + if (v === '') return 'A memory name or ARN is required'; + if (v.startsWith('arn:') && !isValidArn(v)) return ARN_VALIDATION_MESSAGE; + return true; + }} + /> + )} + {isAuthorizerTypeStep && ( wizard.goBack()} - customValidation={value => - value.trim().length > 0 ? true : 'At least one subnet is required for VPC mode' - } + customValidation={value => validateIdList(value, SUBNET_ID_PATTERN, 'subnet', 'subnet-0abc123def456')} /> )} @@ -657,7 +1179,7 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A onSubmit={wizard.setSecurityGroups} onCancel={() => wizard.goBack()} customValidation={value => - value.trim().length > 0 ? true : 'At least one security group is required for VPC mode' + validateIdList(value, SECURITY_GROUP_ID_PATTERN, 'security group', 'sg-0abc123def456') } /> )} @@ -687,7 +1209,13 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A onCancel={() => wizard.goBack()} customValidation={value => { const num = parseInt(value, 10); - return !isNaN(num) && num >= 60 && num <= 28800 ? true : 'Must be between 60 and 28800'; + if (isNaN(num) || num < 60 || num > 28800) return 'Must be between 60 and 28800'; + // Enforce idle <= maxLifetime inline (idle-timeout is collected first) rather than + // deferring this cross-field rule to schema-write where it surfaces as a late error. + if (wizard.config.idleTimeout !== undefined && num < wizard.config.idleTimeout) { + return `Max lifetime must be >= idle timeout (${wizard.config.idleTimeout}s)`; + } + return true; }} /> )} @@ -737,6 +1265,147 @@ export function AddHarnessScreen({ existingHarnessNames, onComplete, onExit }: A /> )} + {isTemperatureStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseFloat(value); + return !isNaN(num) && num >= 0 && num <= 2 ? true : 'Must be between 0.0 and 2.0'; + }} + /> + )} + + {isTopPStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseFloat(value); + return !isNaN(num) && num >= 0 && num <= 1 ? true : 'Must be between 0.0 and 1.0'; + }} + /> + )} + + {isTopKStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseInt(value, 10); + return !isNaN(num) && num >= 0 && num <= 500 ? true : 'Must be an integer between 0 and 500'; + }} + /> + )} + + {isModelMaxTokensStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseInt(value, 10); + return !isNaN(num) && num > 0 ? true : 'Must be a positive integer'; + }} + /> + )} + + {isMessagesCountStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseInt(value, 10); + return !isNaN(num) && num >= 1 ? true : 'Must be a positive integer'; + }} + /> + )} + + {isMemoryRetrievalTopKStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseInt(value, 10); + return !isNaN(num) && num >= 1 ? true : 'Must be a positive integer'; + }} + /> + )} + + {isMemoryRelevanceScoreStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const num = parseFloat(value); + return !isNaN(num) && num >= 0 && num <= 1 ? true : 'Must be between 0.0 and 1.0'; + }} + /> + )} + + {isAllowedToolsStep && ( + wizard.goBack()} + customValidation={value => { + if (value.trim() === '') return true; + const items = value + .split(',') + .map(s => s.trim()) + .filter(Boolean); + if (items.length === 0) return true; + const bad = items.find(t => t.length > 64 || !/^(\*|@?[^/]+(\/[^/]+)?)$/.test(t)); + return bad ? `Invalid pattern "${bad}" (use "*" or a tool name, max 64 chars)` : true; + }} + /> + )} + {isTruncationStrategyStep && ( ; containerMode?: ContainerMode; containerUri?: string; dockerfilePath?: string; maxIterations?: number; maxTokens?: number; timeoutSeconds?: number; - truncationStrategy?: 'sliding_window' | 'summarization'; + temperature?: number; + topP?: number; + topK?: number; + modelMaxTokens?: number; + /** Legacy enabled/disabled memory toggle — used only when gated features are OFF. */ + skipMemory?: boolean; + /** + * Mode-tagged memory ref — used when gated features are ON. Mirrors the schema union. + * `managed` owns memory internally; `existing` references a memory by name/arn; `disabled` opts out. + */ + memory?: + | { mode: 'managed'; strategies?: string[]; eventExpiryDuration?: number; encryptionKeyArn?: string } + | { + mode: 'existing'; + name?: string; + arn?: string; + actorId?: string; + messagesCount?: number; + topK?: number; + relevanceScore?: number; + } + | { mode: 'disabled' }; + messagesCount?: number; + memoryTopK?: number; + memoryRelevanceScore?: number; + mcpHeaders?: Record; + allowedTools?: string[]; + truncationStrategy?: 'sliding_window' | 'summarization' | 'none'; networkMode?: NetworkMode; subnets?: string[]; securityGroups?: string[]; @@ -71,6 +124,19 @@ export interface AddHarnessConfig { gatewayOutboundAuth?: 'awsIam' | 'none' | 'oauth'; gatewayProviderArn?: string; gatewayScopes?: string; + skills?: { + path?: string; + s3Uri?: string; + gitUrl?: string; + gitPath?: string; + credentialName?: string; + username?: string; + awsSkills?: string[]; + }[]; + pendingSkillSourceType?: 'path' | 's3' | 'git' | 'aws_skills'; + pendingSkillGitUrl?: string; + pendingSkillGitPath?: string; + pendingSkillCredentialName?: string; } export const HARNESS_STEP_LABELS: Record = { @@ -78,6 +144,8 @@ export const HARNESS_STEP_LABELS: Record = { 'model-provider': 'Model provider', 'api-format': 'API format', 'api-key-arn': 'API key ARN', + 'api-base': 'API base URL', + 'additional-params': 'Additional params', container: 'Custom environment', 'container-uri': 'Container URI', 'container-dockerfile': 'Dockerfile path', @@ -89,7 +157,21 @@ export const HARNESS_STEP_LABELS: Record = { 'gateway-outbound-auth': 'Gateway auth', 'gateway-provider-arn': 'Provider ARN', 'gateway-scopes': 'OAuth scopes', + 'skills-source-type': 'Skill source', + 'skill-path': 'Skill path', + 'skill-s3-uri': 'S3 URI', + 'skill-git-url': 'Git URL', + 'skill-git-path': 'Git sub-path', + 'skill-git-credential': 'Git credential', + 'skill-git-username': 'Username', + 'skill-aws-skills-paths': 'AWS Skills paths', + 'skill-add-another': 'Add skill', memory: 'Memory', + 'memory-mode': 'Memory mode', + 'memory-strategies': 'Memory strategies', + 'memory-event-expiry': 'Memory event expiry (days)', + 'memory-kms': 'Memory KMS key ARN', + 'memory-existing-ref': 'Existing memory reference', authorizerType: 'Auth type', jwtConfig: 'JWT config', 'network-mode': 'Network mode', @@ -100,6 +182,15 @@ export const HARNESS_STEP_LABELS: Record = { 'max-iterations': 'Max iterations', 'max-tokens': 'Max tokens', timeout: 'Timeout', + temperature: 'Temperature', + 'top-p': 'Top P', + 'top-k': 'Top K', + 'model-max-tokens': 'Model max tokens', + 'memory-messages-count': 'Memory messages count', + 'memory-retrieval-top-k': 'Memory retrieval top K', + 'memory-relevance-score': 'Memory relevance score', + 'mcp-headers': 'MCP headers', + 'allowed-tools': 'Allowed tools', 'truncation-strategy': 'Truncation', 'session-storage-path': 'Session storage path', 'efs-arn': 'EFS ARN', @@ -115,6 +206,7 @@ export const DEFAULT_MODEL_IDS: Record = { bedrock: 'global.anthropic.claude-sonnet-4-6', open_ai: 'gpt-5', gemini: 'gemini-2.5-flash', + lite_llm: 'anthropic/claude-sonnet-4-5', }; export const DEFAULT_BEDROCK_MANTLE_MODEL_ID = 'openai.gpt-oss-120b'; @@ -131,6 +223,11 @@ export const MODEL_PROVIDER_OPTIONS = [ title: 'Google Gemini', description: `Default: ${DEFAULT_MODEL_IDS.gemini} (requires API key ARN)`, }, + { + id: 'lite_llm' as const, + title: 'LiteLLM', + description: `Default: ${DEFAULT_MODEL_IDS.lite_llm} (API key ARN optional)`, + }, ] as const; export const BEDROCK_API_FORMAT_OPTIONS = [ @@ -169,14 +266,39 @@ export const API_FORMAT_OPTIONS = BEDROCK_API_FORMAT_OPTIONS; export const TRUNCATION_STRATEGY_OPTIONS = [ { id: 'sliding_window' as const, title: 'Sliding window', description: 'Keep most recent messages' }, { id: 'summarization' as const, title: 'Summarization', description: 'Compress older context' }, + { id: 'none' as const, title: 'None', description: 'Disable truncation' }, ] as const; export const ADVANCED_SETTING_OPTIONS = [ { id: 'tools', title: 'Tools', description: 'Add browser, code interpreter, MCP, or gateway tools' }, + { id: 'skills', title: 'Skills', description: 'Add agent skills' }, + // Two mode-scoped memory-tuning options: only the one matching the chosen memory mode is shown in + // the advanced list (see AddHarnessScreen's filter). Managed and existing have disjoint knob sets + // per the harness API, so they never both appear. Legacy (gated-off) uses 'memory-tuning'. + { + id: 'memory-tuning', + title: 'Memory tuning', + description: 'Tune messages count and retrieval (topK, relevance score)', + }, + { + id: 'memory-managed-tuning', + title: 'Memory tuning', + description: 'Managed memory: strategies, event retention, encryption key', + }, + { + id: 'memory-existing-tuning', + title: 'Memory tuning', + description: 'Existing memory: actor ID, messages count, retrieval (topK, relevance)', + }, + { id: 'allowed-tools', title: 'Allowed tools', description: 'Restrict which tools the agent may invoke' }, { id: 'auth', title: 'Authentication', description: 'Inbound auth: AWS_IAM or Custom JWT' }, { id: 'network', title: 'Network', description: 'Deploy inside a VPC with custom subnets and security groups' }, { id: 'lifecycle', title: 'Lifecycle', description: 'Set idle timeout and max session lifetime' }, - { id: 'execution', title: 'Execution limits', description: 'Cap iterations, tokens, and per-turn timeout' }, + { + id: 'execution', + title: 'Execution & sampling', + description: 'Cap iterations, tokens, timeout; tune temperature, topP, topK', + }, { id: 'truncation', title: 'Truncation', description: 'Choose how context is managed when it exceeds limits' }, { id: 'session-storage', @@ -196,6 +318,27 @@ export const MEMORY_OPTIONS = [ { id: 'enabled' as const, title: 'Enabled', description: 'Create persistent memory for this harness' }, ] as const; +/** Mode-first memory options (gated features ON). Mirrors the schema's 3-mode union. */ +export const MEMORY_MODE_OPTIONS = [ + { + id: 'managed' as const, + title: 'Managed', + description: 'AgentCore creates and manages memory for this harness (default)', + }, + { id: 'existing' as const, title: 'Existing', description: 'Reference an existing memory by name or ARN' }, + { id: 'disabled' as const, title: 'Disabled', description: 'No memory' }, +] as const; + +/** Managed-memory strategy choices (the four CFN ManagedMemoryConfiguration.Strategies values). */ +export const MANAGED_STRATEGY_OPTIONS = [ + { id: 'SEMANTIC' as const, title: 'Semantic', description: 'Extract and retrieve semantic facts' }, + { id: 'SUMMARIZATION' as const, title: 'Summarization', description: 'Summarize conversation history' }, + { id: 'USER_PREFERENCE' as const, title: 'User preference', description: 'Track user preferences' }, + { id: 'EPISODIC' as const, title: 'Episodic', description: 'Recall past episodes/sessions' }, +] as const; + +/** Keep/customize options for the managed retention + encryption tuning sub-flow. */ + export const CONTAINER_MODE_OPTIONS = [ { id: 'none' as const, title: 'Default Environment', description: 'Includes Python, Bash, File tools' }, { id: 'uri' as const, title: 'Container URI', description: 'Use a pre-built container image (ECR URI)' }, @@ -228,3 +371,14 @@ export const GATEWAY_OUTBOUND_AUTH_OPTIONS = [ { id: 'none', title: 'None', description: 'No authentication headers' }, { id: 'oauth', title: 'OAuth', description: 'Bearer token via AgentCore Identity credential provider' }, ]; + +export const SKILL_SOURCE_TYPE_OPTIONS = [ + { id: 'path' as const, title: 'Path', description: 'Path to an installed skill in the environment' }, + { id: 's3' as const, title: 'S3', description: 'S3 URI (s3://bucket/path)' }, + { id: 'git' as const, title: 'Git', description: 'HTTPS git repository URL' }, + { + id: 'aws_skills' as const, + title: 'AWS Skills', + description: 'Built-in AWS skills (github.com/aws/agent-toolkit-for-aws/tree/main/skills)', + }, +] as const; diff --git a/src/cli/tui/screens/harness/useAddHarnessWizard.ts b/src/cli/tui/screens/harness/useAddHarnessWizard.ts index f181ec8b6..ccf057293 100644 --- a/src/cli/tui/screens/harness/useAddHarnessWizard.ts +++ b/src/cli/tui/screens/harness/useAddHarnessWizard.ts @@ -1,5 +1,5 @@ import type { HarnessApiFormat, HarnessModelProvider, NetworkMode, RuntimeAuthorizerType } from '../../../../schema'; -import { isPreviewEnabled } from '../../../feature-flags'; +import { isGatedFeaturesEnabled, isPreviewEnabled } from '../../../feature-flags'; import type { JwtConfig } from '../../components/jwt-config'; import { HARNESS_FILESYSTEM_STEP_NAMES, useFilesystemMountState } from '../../hooks/useFilesystemMountState'; import type { AddHarnessConfig, AddHarnessStep, AdvancedSetting, ContainerMode } from './types'; @@ -8,6 +8,11 @@ import { useCallback, useMemo, useState } from 'react'; const ADVANCED_SETTING_ORDER: AdvancedSetting[] = [ 'tools', + 'skills', + 'memory-tuning', + 'memory-managed-tuning', + 'memory-existing-tuning', + 'allowed-tools', 'auth', 'network', 'lifecycle', @@ -18,6 +23,11 @@ const ADVANCED_SETTING_ORDER: AdvancedSetting[] = [ const SETTING_TO_FIRST_STEP: Record = { tools: 'tools-select', + skills: 'skills-source-type', + 'memory-tuning': 'memory-messages-count', + 'memory-managed-tuning': 'memory-strategies', + 'memory-existing-tuning': 'memory-messages-count', + 'allowed-tools': 'allowed-tools', auth: 'authorizerType', network: 'network-mode', lifecycle: 'idle-timeout', @@ -47,6 +57,12 @@ function getDefaultConfig(): AddHarnessConfig { name: '', modelProvider: 'bedrock', modelId: DEFAULT_MODEL_IDS.bedrock, + // Managed memory is the default for new harnesses when the gated feature is on (strategies left + // absent → service default; tunable under Advanced). When off, the legacy enabled/disabled + // `memory` step drives skipMemory instead and this stays undefined. + ...(isGatedFeaturesEnabled() && { + memory: { mode: 'managed' as const }, + }), }; } @@ -66,6 +82,10 @@ export function useAddHarnessWizard() { steps.push('api-key-arn'); } + if (config.modelProvider === 'lite_llm') { + steps.push('api-base', 'additional-params'); + } + steps.push('container'); if (config.containerMode === 'uri') { steps.push('container-uri'); @@ -73,14 +93,25 @@ export function useAddHarnessWizard() { steps.push('container-dockerfile'); } - steps.push('memory'); + if (isGatedFeaturesEnabled()) { + // Main path is just the mode pick. Managed defaults to the service's own strategy set (nothing + // more to ask); existing REQUIRES a name/ARN so it's collected here; disabled needs nothing. + // All other knobs (managed strategies/expiry/KMS, existing tuning) live under Advanced → Memory tuning. + steps.push('memory-mode'); + if (config.memory?.mode === 'existing') { + steps.push('memory-existing-ref'); + } + } else { + // Legacy enabled/disabled memory step. + steps.push('memory'); + } steps.push('advanced'); if (advancedSettings.includes('tools')) { steps.push('tools-select'); if (config.selectedTools?.includes('remote_mcp')) { - steps.push('mcp-name', 'mcp-url'); + steps.push('mcp-name', 'mcp-url', 'mcp-headers'); } if (config.selectedTools?.includes('agentcore_gateway')) { steps.push('gateway-arn'); @@ -91,6 +122,20 @@ export function useAddHarnessWizard() { } } + if (advancedSettings.includes('skills')) { + steps.push('skills-source-type'); + if (config.pendingSkillSourceType === 'path') { + steps.push('skill-path'); + } else if (config.pendingSkillSourceType === 's3') { + steps.push('skill-s3-uri'); + } else if (config.pendingSkillSourceType === 'git') { + steps.push('skill-git-url', 'skill-git-path', 'skill-git-credential', 'skill-git-username'); + } else if (config.pendingSkillSourceType === 'aws_skills') { + steps.push('skill-aws-skills-paths'); + } + steps.push('skill-add-another'); + } + if (advancedSettings.includes('auth')) { steps.push('authorizerType'); if (config.authorizerType === 'CUSTOM_JWT') { @@ -105,12 +150,34 @@ export function useAddHarnessWizard() { } } + // Mode-scoped memory tuning (gated on). Only the advanced option matching the chosen memory mode is + // offered (see AddHarnessScreen's filter), so these are mutually exclusive: managed and existing have + // disjoint knob sets per the harness API. + if (advancedSettings.includes('memory-managed-tuning') && config.memory?.mode === 'managed') { + steps.push('memory-strategies', 'memory-event-expiry', 'memory-kms'); + } + if (advancedSettings.includes('memory-existing-tuning') && config.memory?.mode === 'existing') { + steps.push('memory-messages-count', 'memory-retrieval-top-k', 'memory-relevance-score'); + } + // Legacy tuning (gated off): the old flat topK/relevance/messages knobs. + if (advancedSettings.includes('memory-tuning') && !isGatedFeaturesEnabled()) { + steps.push('memory-messages-count', 'memory-retrieval-top-k', 'memory-relevance-score'); + } + + if (advancedSettings.includes('allowed-tools')) { + steps.push('allowed-tools'); + } + if (advancedSettings.includes('lifecycle')) { steps.push('idle-timeout', 'max-lifetime'); } if (advancedSettings.includes('execution')) { - steps.push('max-iterations', 'max-tokens', 'timeout'); + steps.push('max-iterations', 'max-tokens', 'timeout', 'temperature', 'top-p'); + if (config.modelProvider === 'gemini') { + steps.push('top-k'); + } + steps.push('model-max-tokens'); } if (advancedSettings.includes('truncation')) { @@ -133,6 +200,9 @@ export function useAddHarnessWizard() { config.networkMode, config.selectedTools, config.gatewayOutboundAuth, + config.pendingSkillSourceType, + config.skills, + config.memory?.mode, advancedSettings, ]); @@ -223,6 +293,22 @@ export function useAddHarnessWizard() { } return; } + if (step === 'skills-source-type') { + if ((config.skills?.length ?? 0) > 0) { + setStep('skill-add-another'); + } else { + const idx = allSteps.indexOf('skills-source-type'); + const prev = allSteps[idx - 1]; + if (prev) setStep(prev); + } + return; + } + if (step === 'skill-add-another') { + const idx = allSteps.indexOf('skills-source-type'); + const prev = allSteps[idx - 1]; + if (prev) setStep(prev); + return; + } const idx = allSteps.indexOf(step); const prevStep = allSteps[idx - 1]; if (prevStep) setStep(prevStep); @@ -233,6 +319,7 @@ export function useAddHarnessWizard() { editingS3Index, config.efsAccessPoints, config.s3AccessPoints, + config.skills, resetFilesystemState, ]); @@ -254,8 +341,18 @@ export function useAddHarnessWizard() { ); const setModelProvider = useCallback((modelProvider: HarnessModelProvider) => { - setConfig(c => ({ ...c, modelProvider, modelId: DEFAULT_MODEL_IDS[modelProvider], apiFormat: undefined })); - if (modelProvider === 'bedrock' && isPreviewEnabled()) { + setConfig(c => ({ + ...c, + modelProvider, + modelId: DEFAULT_MODEL_IDS[modelProvider], + apiFormat: undefined, + // apiBase / additionalParams only apply to lite_llm — clear them when switching away. + ...(modelProvider !== 'lite_llm' && { apiBase: undefined, additionalParams: undefined }), + })); + // bedrock and open_ai both have a preview-gated api-format step that sits before api-key-arn + // in allSteps — route through it for BOTH (open_ai previously jumped straight to api-key-arn, + // making api-format forward-unreachable and leaving a false ✓ on the skipped step). + if ((modelProvider === 'bedrock' || modelProvider === 'open_ai') && isPreviewEnabled()) { setStep('api-format'); } else if (modelProvider !== 'bedrock') { setStep('api-key-arn'); @@ -264,20 +361,30 @@ export function useAddHarnessWizard() { } }, []); - const setApiFormat = useCallback((apiFormat: HarnessApiFormat) => { - setConfig(c => { - if (c.modelProvider === 'bedrock') { - const isMantle = apiFormat !== 'converse_stream'; - return { - ...c, - apiFormat: isMantle ? apiFormat : undefined, - modelId: isMantle ? DEFAULT_BEDROCK_MANTLE_MODEL_ID : DEFAULT_MODEL_IDS.bedrock, - }; - } - return { ...c, apiFormat }; - }); - setStep('container'); - }, []); + const setApiFormat = useCallback( + (apiFormat: HarnessApiFormat) => { + let provider: HarnessModelProvider = 'bedrock'; + setConfig(c => { + provider = c.modelProvider; + if (c.modelProvider === 'bedrock') { + const isMantle = apiFormat !== 'converse_stream'; + return { + ...c, + apiFormat: isMantle ? apiFormat : undefined, + modelId: isMantle ? DEFAULT_BEDROCK_MANTLE_MODEL_ID : DEFAULT_MODEL_IDS.bedrock, + }; + } + return { ...c, apiFormat }; + }); + // Advance to the natural next step instead of hard-coding 'container'. For open_ai the next + // step is the REQUIRED api-key-arn — hard-coding 'container' skipped it, so a Back→api-format + // →select path reached Confirm with apiKeyArn undefined and failed hard at write time. + const next = nextStep('api-format'); + if (next) setStep(next); + else setStep(provider === 'bedrock' ? 'container' : 'api-key-arn'); + }, + [nextStep] + ); const setApiKeyArn = useCallback( (apiKeyArn: string) => { @@ -288,6 +395,24 @@ export function useAddHarnessWizard() { [nextStep] ); + const setApiBase = useCallback( + (apiBase: string) => { + setConfig(c => ({ ...c, apiBase: apiBase || undefined })); + const next = nextStep('api-base'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setAdditionalParams = useCallback( + (additionalParams: Record | undefined) => { + setConfig(c => ({ ...c, additionalParams })); + const next = nextStep('additional-params'); + if (next) setStep(next); + }, + [nextStep] + ); + const setContainerMode = useCallback((containerMode: ContainerMode) => { setConfig(c => ({ ...c, containerMode, containerUri: undefined, dockerfilePath: undefined })); if (containerMode === 'uri') { @@ -295,7 +420,8 @@ export function useAddHarnessWizard() { } else if (containerMode === 'dockerfile') { setStep('container-dockerfile'); } else { - setStep('memory'); + // Route to the first memory step: the mode picker (gated on) or the legacy toggle (gated off). + setStep(isGatedFeaturesEnabled() ? 'memory-mode' : 'memory'); } }, []); @@ -359,9 +485,14 @@ export function useAddHarnessWizard() { [nextStep] ); - const setMcpUrl = useCallback( - (mcpUrl: string) => { - setConfig(c => ({ ...c, mcpUrl })); + const setMcpUrl = useCallback((mcpUrl: string) => { + setConfig(c => ({ ...c, mcpUrl })); + setStep('mcp-headers'); + }, []); + + const setMcpHeaders = useCallback( + (headers: Record | undefined) => { + setConfig(c => ({ ...c, mcpHeaders: headers })); if (config.selectedTools?.includes('agentcore_gateway')) { setStep('gateway-arn'); } else { @@ -409,6 +540,65 @@ export function useAddHarnessWizard() { setStep('advanced'); }, []); + // --- Mode-first memory sub-flow setters (gated features ON) --- + + const setMemoryMode = useCallback((mode: 'managed' | 'existing' | 'disabled') => { + // Managed seeds nothing beyond the mode — strategies/expiry/KMS are opt-in under Advanced, and an + // absent strategy set means "use the service default". Existing collects its required ref next. + setConfig(c => ({ ...c, memory: { mode } })); + if (mode === 'existing') { + setStep('memory-existing-ref'); + } else { + // Managed / disabled have nothing more on the main path → continue to Advanced. + setStep('advanced'); + } + }, []); + + const setMemoryStrategies = useCallback( + (strategies: string[]) => { + setConfig(c => + c.memory?.mode === 'managed' + ? { ...c, memory: { ...c.memory, strategies: strategies.length > 0 ? strategies : undefined } } + : c + ); + const next = nextStep('memory-strategies'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setMemoryEventExpiry = useCallback( + (raw: string) => { + const days = raw.trim() === '' ? undefined : parseInt(raw, 10); + setConfig(c => (c.memory?.mode === 'managed' ? { ...c, memory: { ...c.memory, eventExpiryDuration: days } } : c)); + const next = nextStep('memory-event-expiry'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setMemoryKms = useCallback( + (raw: string) => { + const encryptionKeyArn = raw.trim() === '' ? undefined : raw.trim(); + setConfig(c => (c.memory?.mode === 'managed' ? { ...c, memory: { ...c.memory, encryptionKeyArn } } : c)); + const next = nextStep('memory-kms'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setMemoryExistingRef = useCallback((raw: string) => { + const value = raw.trim(); + // An ARN goes to `arn`, anything else is treated as a project memory name. + const isArn = value.startsWith('arn:'); + setConfig(c => ({ + ...c, + memory: { mode: 'existing', ...(isArn ? { arn: value } : { name: value }) }, + })); + // Existing-ref is the last main-path memory step → continue to Advanced. + setStep('advanced'); + }, []); + const setAuthorizerType = useCallback( (authorizerType: RuntimeAuthorizerType) => { setConfig(c => ({ ...c, authorizerType, jwtConfig: undefined })); @@ -520,8 +710,95 @@ export function useAddHarnessWizard() { [nextStep] ); + const setTemperature = useCallback( + (raw: string) => { + const temperature = raw.trim() === '' ? undefined : parseFloat(raw); + setConfig(c => ({ ...c, temperature })); + const next = nextStep('temperature'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setTopP = useCallback( + (raw: string) => { + const topP = raw.trim() === '' ? undefined : parseFloat(raw); + setConfig(c => ({ ...c, topP })); + const next = nextStep('top-p'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setTopK = useCallback( + (raw: string) => { + const topK = raw.trim() === '' ? undefined : parseInt(raw, 10); + setConfig(c => ({ ...c, topK })); + const next = nextStep('top-k'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setModelMaxTokens = useCallback( + (raw: string) => { + const modelMaxTokens = raw.trim() === '' ? undefined : parseInt(raw, 10); + setConfig(c => ({ ...c, modelMaxTokens })); + const next = nextStep('model-max-tokens'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setMessagesCount = useCallback( + (raw: string) => { + const messagesCount = raw.trim() === '' ? undefined : parseInt(raw, 10); + setConfig(c => ({ ...c, messagesCount })); + const next = nextStep('memory-messages-count'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setMemoryTopK = useCallback( + (raw: string) => { + const memoryTopK = raw.trim() === '' ? undefined : parseInt(raw, 10); + setConfig(c => ({ ...c, memoryTopK })); + const next = nextStep('memory-retrieval-top-k'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setMemoryRelevanceScore = useCallback( + (raw: string) => { + const memoryRelevanceScore = raw.trim() === '' ? undefined : parseFloat(raw); + setConfig(c => ({ ...c, memoryRelevanceScore })); + const next = nextStep('memory-relevance-score'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setAllowedTools = useCallback( + (raw: string) => { + const trimmed = raw.trim(); + const allowedTools = + trimmed === '' + ? undefined + : trimmed + .split(',') + .map(s => s.trim()) + .filter(Boolean); + setConfig(c => ({ ...c, allowedTools })); + const next = nextStep('allowed-tools'); + if (next) setStep(next); + }, + [nextStep] + ); + const setTruncationStrategy = useCallback( - (truncationStrategy: 'sliding_window' | 'summarization') => { + (truncationStrategy: 'sliding_window' | 'summarization' | 'none') => { setConfig(c => ({ ...c, truncationStrategy })); const next = nextStep('truncation-strategy'); if (next) setStep(next); @@ -538,6 +815,100 @@ export function useAddHarnessWizard() { [nextStep] ); + const setSkillSourceType = useCallback((sourceType: 'path' | 's3' | 'git' | 'aws_skills') => { + setConfig(c => ({ ...c, pendingSkillSourceType: sourceType })); + if (sourceType === 'path') setStep('skill-path'); + else if (sourceType === 's3') setStep('skill-s3-uri'); + else if (sourceType === 'aws_skills') setStep('skill-aws-skills-paths'); + else setStep('skill-git-url'); + }, []); + + const submitSkillPath = useCallback((path: string) => { + setConfig(c => ({ + ...c, + skills: [...(c.skills ?? []), { path }], + pendingSkillSourceType: undefined, + })); + setStep('skill-add-another'); + }, []); + + const submitSkillS3 = useCallback((s3Uri: string) => { + setConfig(c => ({ + ...c, + skills: [...(c.skills ?? []), { s3Uri }], + pendingSkillSourceType: undefined, + })); + setStep('skill-add-another'); + }, []); + + const submitSkillGitUrl = useCallback((gitUrl: string) => { + setConfig(c => ({ ...c, pendingSkillGitUrl: gitUrl })); + setStep('skill-git-path'); + }, []); + + const submitSkillGitPath = useCallback((gitPath: string) => { + setConfig(c => ({ ...c, pendingSkillGitPath: gitPath || undefined })); + setStep('skill-git-credential'); + }, []); + + const submitSkillGitCredential = useCallback((selection: string) => { + if (selection === 'skip') { + setConfig(c => ({ ...c, pendingSkillCredentialName: undefined })); + setStep('skill-git-username'); + } else { + // selection is a credential name (existing or newly created) + setConfig(c => ({ ...c, pendingSkillCredentialName: selection })); + setStep('skill-git-username'); + } + }, []); + + const submitSkillGitUsername = useCallback((username: string) => { + setConfig(c => { + const skill: NonNullable[number] = { + gitUrl: c.pendingSkillGitUrl, + ...(c.pendingSkillGitPath && { gitPath: c.pendingSkillGitPath }), + ...(c.pendingSkillCredentialName && { + credentialName: c.pendingSkillCredentialName, + ...(username && { username }), + }), + }; + return { + ...c, + skills: [...(c.skills ?? []), skill], + pendingSkillSourceType: undefined, + pendingSkillGitUrl: undefined, + pendingSkillGitPath: undefined, + pendingSkillCredentialName: undefined, + }; + }); + setStep('skill-add-another'); + }, []); + + const submitSkillAwsSkillsPaths = useCallback((pathsStr: string) => { + const paths = pathsStr + .split(',') + .map(s => s.trim()) + .filter(Boolean); + setConfig(c => ({ + ...c, + skills: [...(c.skills ?? []), { awsSkills: paths }], + pendingSkillSourceType: undefined, + })); + setStep('skill-add-another'); + }, []); + + const submitSkillAddAnother = useCallback( + (choice: string) => { + if (choice === 'add') { + setStep('skills-source-type'); + } else { + const next = getNextAdvancedStep(advancedSettings, 'skills'); + setStep(next ?? 'confirm'); + } + }, + [advancedSettings] + ); + const reset = useCallback(() => { setConfig(getDefaultConfig()); setStep('name'); @@ -556,6 +927,8 @@ export function useAddHarnessWizard() { setModelProvider, setApiFormat, setApiKeyArn, + setApiBase, + setAdditionalParams, setContainerMode, setContainerUri, setDockerfilePath, @@ -568,6 +941,11 @@ export function useAddHarnessWizard() { setGatewayProviderArn, setGatewayScopes, setMemoryEnabled, + setMemoryMode, + setMemoryStrategies, + setMemoryEventExpiry, + setMemoryKms, + setMemoryExistingRef, setAuthorizerType, setJwtConfig, setNetworkMode, @@ -578,6 +956,15 @@ export function useAddHarnessWizard() { setMaxIterations, setMaxTokens, setTimeoutSeconds, + setTemperature, + setTopP, + setTopK, + setModelMaxTokens, + setMessagesCount, + setMemoryTopK, + setMemoryRelevanceScore, + setAllowedTools, + setMcpHeaders, setTruncationStrategy, setSessionStoragePath, pendingEfsArn, @@ -590,6 +977,15 @@ export function useAddHarnessWizard() { submitS3Arn, submitS3MountPath, submitS3AddAnother, + setSkillSourceType, + submitSkillPath, + submitSkillS3, + submitSkillGitUrl, + submitSkillGitPath, + submitSkillGitCredential, + submitSkillGitUsername, + submitSkillAwsSkillsPaths, + submitSkillAddAnother, reset, }; } diff --git a/src/cli/tui/screens/import/ArnInputScreen.tsx b/src/cli/tui/screens/import/ArnInputScreen.tsx index 9381ca7b8..5dc9237bc 100644 --- a/src/cli/tui/screens/import/ArnInputScreen.tsx +++ b/src/cli/tui/screens/import/ArnInputScreen.tsx @@ -4,7 +4,8 @@ import { Screen } from '../../components/Screen'; import { TextInput } from '../../components/TextInput'; import { HELP_TEXT } from '../../constants'; -const ARN_PATTERN = /^arn:[^:]+:bedrock-agentcore:[^:]+:[^:]+:(runtime|memory|evaluator|online-evaluation-config)\/.+$/; +const ARN_PATTERN = + /^arn:[^:]+:bedrock-agentcore:[^:]+:[^:]+:(runtime|memory|evaluator|gateway|online-evaluation-config)\/.+$/; function validateArn(value: string): true | string { if (!ARN_PATTERN.test(value)) { diff --git a/src/cli/tui/screens/insights-jobs/InsightsJobsScreen.tsx b/src/cli/tui/screens/insights-jobs/InsightsJobsScreen.tsx new file mode 100644 index 000000000..edaba804c --- /dev/null +++ b/src/cli/tui/screens/insights-jobs/InsightsJobsScreen.tsx @@ -0,0 +1,383 @@ +import type { FailureAnalysisResult, GetBatchEvaluationResult } from '../../../aws/agentcore-batch-evaluation'; +import { getBatchEvaluation } from '../../../aws/agentcore-batch-evaluation'; +import type { InsightsRunRecord } from '../../../operations/insights'; +import { listInsightsRuns } from '../../../operations/insights'; +import { Panel, Screen } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { Box, Text, useInput, useStdout } from 'ink'; +import React, { useEffect, useMemo, useState } from 'react'; + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; + +function formatShortDate(timestamp: string): string { + const d = new Date(timestamp); + const mon = MONTHS[d.getMonth()]; + const day = d.getDate(); + const h = d.getHours(); + const m = d.getMinutes().toString().padStart(2, '0'); + const ampm = h >= 12 ? 'PM' : 'AM'; + const h12 = h % 12 || 12; + return `${mon} ${day} ${h12}:${m} ${ampm}`; +} + +function statusColor(status: string): string { + if (status === 'COMPLETED' || status === 'SUCCEEDED') return 'green'; + if (status === 'FAILED') return 'red'; + if (status === 'IN_PROGRESS' || status === 'PENDING') return 'yellow'; + return 'gray'; +} + +const CHROME_LINES = 9; + +// ───────────────────────────────────────────────────────────────────────────── +// List view +// ───────────────────────────────────────────────────────────────────────────── + +function InsightsJobsListView({ + records, + onSelect, + onExit, + availableHeight, +}: { + records: InsightsRunRecord[]; + onSelect: (record: InsightsRunRecord) => void; + onExit: () => void; + availableHeight: number; +}) { + const nav = useListNavigation({ + items: records, + onSelect: item => onSelect(item), + onExit, + isActive: true, + }); + + const maxVisible = Math.max(1, availableHeight - 3); + const visible = useMemo(() => { + let start = 0; + if (nav.selectedIndex >= maxVisible) { + start = nav.selectedIndex - maxVisible + 1; + } + return { items: records.slice(start, start + maxVisible), startIdx: start }; + }, [records, nav.selectedIndex, maxVisible]); + + return ( + + + Insights Jobs + + {records.length} insights run{records.length !== 1 ? 's' : ''} + + + {visible.items.map((rec, vIdx) => { + const idx = visible.startIdx + vIdx; + const selected = idx === nav.selectedIndex; + const date = rec.createdAt ? formatShortDate(rec.createdAt) : 'unknown'; + + return ( + + {selected ? '>' : ' '} + {date.padEnd(16)} + {rec.status.padEnd(12)} + {rec.name || rec.batchEvaluationId} + + ); + })} + {visible.startIdx + maxVisible < records.length && ( + {records.length - visible.startIdx - maxVisible} more + )} + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Results view +// ───────────────────────────────────────────────────────────────────────────── + +function InsightsResultsView({ record, onBack }: { record: InsightsRunRecord; onBack: () => void }) { + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [failureAnalysis, setFailureAnalysis] = useState(undefined); + const [totalSessions, setTotalSessions] = useState(0); + + useEffect(() => { + let cancelled = false; + void (async () => { + try { + const result: GetBatchEvaluationResult = await getBatchEvaluation({ + region: record.region, + batchEvaluationId: record.batchEvaluationId, + }); + if (cancelled) return; + if (result.status !== 'COMPLETED' && result.status !== 'COMPLETEDWITHERRORS') { + setError(`Job has status ${result.status}. Results are only available for completed jobs.`); + } else { + setFailureAnalysis(result.failureAnalysisResult); + setTotalSessions(result.evaluationResults?.totalNumberOfSessions ?? 0); + } + } catch (err) { + if (!cancelled) { + setError(err instanceof Error ? err.message : String(err)); + } + } finally { + if (!cancelled) setLoading(false); + } + })(); + return () => { + cancelled = true; + }; + }, [record.batchEvaluationId, record.region]); + + useInput((input, key) => { + if (key.escape || input === 'b') { + onBack(); + } + }); + + if (loading) { + return ( + + Loading results... + + ); + } + + if (error) { + return ( + + + {error} + + Press Esc or B to go back + + + + ); + } + + const categories = failureAnalysis?.failureCategories ?? []; + + if (categories.length === 0) { + return ( + + + No failure categories found in this insights run. + + Press Esc or B to go back + + + + ); + } + + return ( + + + Insights Results: {record.name || record.batchEvaluationId} + + Sessions: {totalSessions} | Clusters: {categories.length} + + + {categories.map((cat, i) => { + const failureCount = cat.rootCauses?.length ?? 0; + const pct = totalSessions > 0 ? Math.round((failureCount / totalSessions) * 100) : 0; + const impact = pct >= 20 ? 'HIGH IMPACT' : pct >= 10 ? 'MEDIUM' : ''; + + return ( + + + + #{i + 1} ({pct}% of sessions) + + {impact ? = 20 ? 'red' : 'yellow'}> {impact} : null} + + + {' '}Category: {cat.failureCategoryName ?? 'Unknown'} + + {cat.failureCategoryDescription && ( + + {' '} + {cat.failureCategoryDescription} + + )} + {(cat.rootCauses ?? []).map((rc, rcIdx) => ( + + Root cause: {rc.rootCauseDescription ?? rc.rootCauseCategory ?? 'Unknown'} + {rc.recommendation && Fix: {rc.recommendation}} + {rc.relatedSessions?.[0]?.recommendationType && ( + Fix type: {rc.relatedSessions[0].recommendationType} + )} + + ))} + + ); + })} + + + Press Esc or B to go back + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Detail view +// ───────────────────────────────────────────────────────────────────────────── + +function InsightsJobDetailView({ + record, + onBack, + onViewResults, +}: { + record: InsightsRunRecord; + onBack: () => void; + onViewResults: () => void; +}) { + const isCompleted = record.status === 'COMPLETED' || record.status === 'COMPLETEDWITHERRORS'; + + useInput((input, key) => { + if (key.escape || input === 'b') { + onBack(); + } + if ((input === 'v' || input === 'V') && isCompleted) { + onViewResults(); + } + }); + + return ( + + + + ID: {record.batchEvaluationId} + + + Status: {record.status} + + + Insights type(s): {record.insights.join(', ')} + + {record.agent && ( + + Agent: {record.agent} + + )} + {record.createdAt && ( + + Started: {new Date(record.createdAt).toLocaleString()} + + )} + {record.completedAt && ( + + Completed: {new Date(record.completedAt).toLocaleString()} + + )} + + + Sessions: + + {' '}total: {record.sessionCount ?? 'N/A'} + {record.sessionsCompleted != null && , completed: {record.sessionsCompleted}} + {record.sessionsFailed != null && record.sessionsFailed > 0 && ( + , failed: {record.sessionsFailed} + )} + + + + + + To generate a recommendation: agentcore run recommendation --from-insights {record.batchEvaluationId} + + + + + {isCompleted ? 'V view results - ' : ''}G generate recommendation - Esc back + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main screen +// ───────────────────────────────────────────────────────────────────────────── + +interface InsightsJobsScreenProps { + onExit: () => void; +} + +export function InsightsJobsScreen({ onExit }: InsightsJobsScreenProps) { + const { stdout } = useStdout(); + const terminalHeight = stdout?.rows ?? 24; + const availableHeight = Math.max(6, terminalHeight - CHROME_LINES); + + const [selectedRecord, setSelectedRecord] = useState(null); + const [viewingResults, setViewingResults] = useState(false); + + const [records, loaded, error] = useMemo(() => { + try { + return [listInsightsRuns(), true, null] as const; + } catch (err) { + return [[] as InsightsRunRecord[], true, err instanceof Error ? err.message : String(err)] as const; + } + }, []); + + if (!loaded) { + return ( + + Loading... + + ); + } + + if (error) { + return ( + + {error} + + ); + } + + if (records.length === 0) { + return ( + + + No insights runs found. Run `agentcore run insights` to get started. + + + ); + } + + const helpText = viewingResults + ? 'Esc/B back to detail' + : selectedRecord + ? 'V view results - Esc/B back to list' + : HELP_TEXT.NAVIGATE_SELECT; + + return ( + + {viewingResults && selectedRecord ? ( + setViewingResults(false)} /> + ) : selectedRecord ? ( + setSelectedRecord(null)} + onViewResults={() => setViewingResults(true)} + /> + ) : ( + + )} + + ); +} diff --git a/src/cli/tui/screens/insights-jobs/index.ts b/src/cli/tui/screens/insights-jobs/index.ts new file mode 100644 index 000000000..a2d535983 --- /dev/null +++ b/src/cli/tui/screens/insights-jobs/index.ts @@ -0,0 +1 @@ +export { InsightsJobsScreen } from './InsightsJobsScreen'; diff --git a/src/cli/tui/screens/job-detail/ABTestDetailView.tsx b/src/cli/tui/screens/job-detail/ABTestDetailView.tsx new file mode 100644 index 000000000..7b742bd22 --- /dev/null +++ b/src/cli/tui/screens/job-detail/ABTestDetailView.tsx @@ -0,0 +1,235 @@ +import { getErrorMessage } from '../../../errors'; +import { isTerminal } from '../../../operations/jobs'; +import type { ABTestJobRecord, DebugCheckResult, JobEngine } from '../../../operations/jobs'; +import { getInvocationUrl } from '../../../operations/jobs/ab-test/format'; +import { Panel } from '../../components'; +import { lifecycleColor, statusColor } from './helpers'; +import { Box, Text, useInput } from 'ink'; +import React, { useCallback, useState } from 'react'; + +type ActionState = 'idle' | 'working' | 'error'; + +/** + * Shared presentational detail view for an A/B-test job. Renders the panel body + * only (the caller supplies the surrounding `Screen`). It owns the lifecycle + * action keybindings (stop/pause/resume/promote) and the debug check action. + * + * Back/exit semantics are caller-controlled: `backKey` is the letter that, like + * Escape, invokes `onBack`. Flow A (`agentcore view ab-test `) uses `'q'` + * ("exit"); the TUI history flow uses `'b'` ("back to list"). + */ +export function ABTestDetailView({ + record, + engine, + onBack, + onUpdate, + backKey = 'b', + backLabel = 'back', +}: { + record: ABTestJobRecord; + engine: JobEngine; + onBack: () => void; + onUpdate: (record: ABTestJobRecord) => void; + backKey?: string; + backLabel?: string; +}) { + const [actionState, setActionState] = useState('idle'); + const [actionError, setActionError] = useState(null); + const [debugResults, setDebugResults] = useState(null); + const [debugLoading, setDebugLoading] = useState(false); + + const caps = engine.capabilities('ab-test'); + const terminal = isTerminal(record); + const canStop = caps.canStop && !terminal; + const canPause = caps.canPause && record.lifecycleStatus === 'RUNNING'; + const canResume = caps.canPause && record.lifecycleStatus === 'PAUSED'; + const canPromote = caps.canPromote && !terminal; + + const runAction = useCallback( + async (fn: () => Promise<{ success: boolean; error?: { message: string } }>) => { + setActionState('working'); + setActionError(null); + try { + const result = await fn(); + if (!result.success) { + setActionState('error'); + setActionError(result.error?.message ?? 'Action failed'); + return; + } + const refreshed = await engine.get('ab-test', record.id); + setActionState('idle'); + if (refreshed) onUpdate(refreshed); + } catch (err) { + setActionState('error'); + setActionError(getErrorMessage(err)); + } + }, + [engine, record.id, onUpdate] + ); + + const handleDebug = useCallback(async () => { + setDebugLoading(true); + setDebugResults(null); + try { + const result = await engine.debug('ab-test', record.id); + if (result.success) { + setDebugResults(result.checks); + } else { + setDebugResults([{ label: 'Debug', status: 'fail', detail: result.error.message }]); + } + } catch { + setDebugResults([{ label: 'Debug', status: 'fail', detail: 'Failed to run debug checks' }]); + } + setDebugLoading(false); + }, [engine, record.id]); + + useInput((input, key) => { + if (actionState === 'working' || debugLoading) return; + if (key.escape || input === backKey) { + onBack(); + return; + } + const ch = input.toLowerCase(); + if (ch === 's' && canStop) void runAction(() => engine.stop('ab-test', record.id)); + else if (ch === 'p' && canPause) void runAction(() => engine.pause('ab-test', record.id)); + else if (ch === 'r' && canResume) void runAction(() => engine.resume('ab-test', record.id)); + else if (ch === 'w' && canPromote) void runAction(() => engine.promote('ab-test', record.id)); + else if (ch === 'd') void handleDebug(); + }); + + const invocationUrl = getInvocationUrl(record); + const metrics = record.results?.evaluatorMetrics; + + const keyHints = [ + `Esc/${backKey.toUpperCase()} ${backLabel}`, + canStop ? 'S stop' : null, + canPause ? 'P pause' : null, + canResume ? 'R resume' : null, + canPromote ? 'W promote' : null, + 'D debug', + ].filter(Boolean); + + return ( + + + + ID: {record.id} + + + Name: {record.name} + {' '} + Mode: {record.mode} + + + Execution: {record.status} + {' '} + Lifecycle:{' '} + {record.lifecycleStatus} + + + Gateway: {record.gatewayArn} + + {invocationUrl && ( + + Invocation URL: {invocationUrl} + + )} + {record.createdAt && ( + + Started: {new Date(record.createdAt).toLocaleString()} + + )} + {record.completedAt && ( + + Stopped: {new Date(record.completedAt).toLocaleString()} + + )} + + + Variants: + {record.variants.map(v => { + const detail = v.bundleArn + ? `bundle ${v.bundleArn} @ ${v.bundleVersion}` + : v.targetName + ? `target ${v.targetName}` + : '(unspecified)'; + return ( + + {' '} + {v.name} (weight {v.weight}): {detail} + + ); + })} + + + {metrics && metrics.length > 0 ? ( + + Results: + {metrics.map(m => ( + + {m.evaluatorArn} + + {' '}C (n={m.controlStats.sampleSize}): {m.controlStats.mean.toFixed(3)} + + {m.variantResults.map(vr => ( + + {' '} + {vr.treatmentName} (n={vr.sampleSize}): {vr.mean.toFixed(3)} + {vr.percentChange != null + ? ` (${vr.percentChange > 0 ? '+' : ''}${vr.percentChange.toFixed(1)}%)` + : ''} + {vr.isSignificant ? *significant* : null} + + ))} + + ))} + + ) : record.failureReason ? ( + + Failure: {record.failureReason} + + ) : ( + + No results available yet. + + )} + + {actionState === 'working' && ( + + Working... + + )} + {actionState === 'error' && actionError && ( + + Action failed: {actionError} + + )} + + {debugLoading && ( + + Running debug checks... + + )} + {debugResults && ( + + Debug Checks: + {debugResults.map((check, i) => { + const icon = check.status === 'pass' ? '✓' : check.status === 'warn' ? '⚠' : '✗'; + const color = check.status === 'pass' ? 'green' : check.status === 'warn' ? 'yellow' : 'red'; + return ( + + {' '} + {icon} {check.label}: {check.detail} + + ); + })} + + )} + + + {keyHints.join(' · ')} + + + + ); +} diff --git a/src/cli/tui/screens/job-detail/BatchEvalDetailView.tsx b/src/cli/tui/screens/job-detail/BatchEvalDetailView.tsx new file mode 100644 index 000000000..71c381609 --- /dev/null +++ b/src/cli/tui/screens/job-detail/BatchEvalDetailView.tsx @@ -0,0 +1,157 @@ +import { getErrorMessage } from '../../../errors'; +import { isTerminal } from '../../../operations/jobs'; +import type { BatchEvaluationJobRecord, JobEngine } from '../../../operations/jobs'; +import { Panel } from '../../components'; +import { scoreColor, statusColor } from './helpers'; +import { Box, Text, useInput } from 'ink'; +import React, { useCallback, useState } from 'react'; + +type StopState = 'idle' | 'stopping' | 'error'; + +/** + * Shared presentational detail view for a batch-evaluation job. Renders the + * panel body only (the caller supplies the surrounding `Screen`). It owns the + * stop keybinding. + * + * Back/exit semantics are caller-controlled: `backKey` is the letter that, like + * Escape, invokes `onBack`. Flow A (`agentcore view batch-evaluation `) + * uses `'q'` ("exit"); the TUI history flow uses `'b'` ("back to list"). + */ +export function BatchEvalDetailView({ + record, + engine, + onBack, + onUpdate, + backKey = 'b', + backLabel = 'back', +}: { + record: BatchEvaluationJobRecord; + engine: JobEngine; + onBack: () => void; + onUpdate: (record: BatchEvaluationJobRecord) => void; + backKey?: string; + backLabel?: string; +}) { + const [stopState, setStopState] = useState('idle'); + const [stopError, setStopError] = useState(null); + + const canStop = engine.capabilities('batch-evaluation').canStop && !isTerminal(record); + + const handleStop = useCallback(async () => { + setStopState('stopping'); + setStopError(null); + try { + const result = await engine.stop('batch-evaluation', record.id); + if (!result.success) { + setStopState('error'); + setStopError(result.error.message); + return; + } + const refreshed = await engine.get('batch-evaluation', record.id); + setStopState('idle'); + if (refreshed) onUpdate(refreshed); + } catch (err) { + setStopState('error'); + setStopError(getErrorMessage(err)); + } + }, [engine, record.id, onUpdate]); + + useInput((input, key) => { + if (key.escape || input === backKey) { + onBack(); + return; + } + if ((input === 's' || input === 'S') && canStop && stopState !== 'stopping') { + void handleStop(); + } + }); + + const evalRes = record.evaluationResults; + const summaries = evalRes?.evaluatorSummaries; + + return ( + + + + ID: {record.id} + + + Name: {record.name} + {' '} + Status: {record.status} + + + Agent: {record.agent} + {' '} + Evaluators: {record.evaluators.join(', ')} + + {record.source === 'dataset' && record.dataset && ( + + Dataset: {record.dataset.id} (version: {record.dataset.version}) + + )} + {record.createdAt && ( + + Created: {new Date(record.createdAt).toLocaleString()} + + )} + {record.completedAt && ( + + Completed: {new Date(record.completedAt).toLocaleString()} + + )} + + {evalRes?.totalNumberOfSessions != null && ( + + Sessions: {evalRes.totalNumberOfSessions} total + {evalRes.numberOfSessionsCompleted != null && , {evalRes.numberOfSessionsCompleted} completed} + {evalRes.numberOfSessionsFailed ? , {evalRes.numberOfSessionsFailed} failed : null} + + )} + + {summaries && summaries.length > 0 ? ( + + Scores (0 worst — 1 best): + {summaries.map(s => { + const avg = s.statistics?.averageScore; + const avgStr = avg != null ? avg.toFixed(2) : 'N/A'; + const color = avg != null ? scoreColor(avg) : undefined; + return ( + + {' '} + {s.evaluatorId} + {' '} + {avgStr} + {s.totalFailed ? ({s.totalFailed} failed) : null} + {s.totalEvaluated != null && [{s.totalEvaluated} evaluated]} + + ); + })} + + ) : ( + + No evaluation results available yet. + + )} + + {stopState === 'stopping' && ( + + Stopping... + + )} + {stopState === 'error' && stopError && ( + + Could not stop: {stopError} + + )} + + + + Esc/{backKey.toUpperCase()} {backLabel} + {canStop ? ' · S stop' : ''} + + + + + ); +} diff --git a/src/cli/tui/screens/job-detail/RecommendationDetailView.tsx b/src/cli/tui/screens/job-detail/RecommendationDetailView.tsx new file mode 100644 index 000000000..b59e46f47 --- /dev/null +++ b/src/cli/tui/screens/job-detail/RecommendationDetailView.tsx @@ -0,0 +1,132 @@ +import type { RecommendationJobRecord } from '../../../operations/jobs'; +import { Panel } from '../../components'; +import { shortTypeName, statusColor } from './helpers'; +import { Box, Text, useInput } from 'ink'; +import React from 'react'; + +/** + * Shared presentational detail view for a recommendation job. Renders the panel + * body only (the caller supplies the surrounding `Screen`). Recommendation jobs + * have no lifecycle actions, so this view is read-only. + * + * Back/exit semantics are caller-controlled: `backKey` is the letter that, like + * Escape, invokes `onBack`. Flow A (`agentcore view recommendation `) uses + * `'q'` ("exit"); the TUI history flow uses `'b'` ("back to list"). + */ +export function RecommendationDetailView({ + record, + onBack, + backKey = 'b', + backLabel = 'back', +}: { + record: RecommendationJobRecord; + onBack: () => void; + backKey?: string; + backLabel?: string; +}) { + useInput((input, key) => { + if (key.escape || input === backKey) { + onBack(); + } + }); + + const sysResult = record.result?.systemPromptRecommendationResult; + const toolResult = record.result?.toolDescriptionRecommendationResult; + const isFailed = record.status === 'FAILED'; + const failureText = record.failureDetail ?? record.statusReasons?.join('; '); + + return ( + + + + ID: {record.id} + + + Type: {shortTypeName(record.recommendationType)} + {' '} + Agent: {record.agent} + {' '} + Status: {record.status} + + + Evaluators: {record.evaluators.join(', ') || '(none)'} + + {record.createdAt && ( + + Created: {new Date(record.createdAt).toLocaleString()} + + )} + {record.completedAt && ( + + Completed: {new Date(record.completedAt).toLocaleString()} + + )} + + {isFailed && failureText && ( + + + Failure: + + + {failureText} + + + )} + + {sysResult?.explanation && ( + + + Explanation: + + + {sysResult.explanation} + + + )} + + {sysResult?.recommendedSystemPrompt && ( + + + Recommended System Prompt: + + + {sysResult.recommendedSystemPrompt} + + + )} + + {toolResult?.tools && toolResult.tools.length > 0 && ( + + + Recommended Tool Descriptions: + + {toolResult.tools.map(tool => ( + + {tool.toolName} + {tool.explanation && ( + + Explanation: + {tool.explanation} + + )} + {tool.recommendedToolDescription} + + ))} + + )} + + {!isFailed && !sysResult?.recommendedSystemPrompt && !(toolResult?.tools && toolResult.tools.length > 0) && ( + + No recommendation results available yet. + + )} + + + + Esc/{backKey.toUpperCase()} {backLabel} + + + + + ); +} diff --git a/src/cli/tui/screens/job-detail/helpers.ts b/src/cli/tui/screens/job-detail/helpers.ts new file mode 100644 index 000000000..b9fa2d419 --- /dev/null +++ b/src/cli/tui/screens/job-detail/helpers.ts @@ -0,0 +1,32 @@ +/** Shared color/format helpers for job-detail views (used by both the + * interactive `agentcore view ` flow and the TUI history screens). */ + +/** Color for an execution/job status string. */ +export function statusColor(status: string): string { + if (status === 'COMPLETED' || status === 'SUCCEEDED' || status === 'RUNNING') return 'green'; + if (status === 'PAUSED' || status === 'IN_PROGRESS' || status === 'PENDING' || status === 'COMPLETED_WITH_ERRORS') + return 'yellow'; + if (status === 'FAILED' || status === 'STOPPED' || status === 'CANCELLED' || status === 'NOT_FOUND') return 'red'; + return 'gray'; +} + +/** Color for an A/B-test lifecycleStatus. */ +export function lifecycleColor(status: string): string { + if (status === 'ACTIVE') return 'green'; + if (status === 'FAILED') return 'red'; + return 'gray'; +} + +/** Color for an evaluation average score (0 worst — 1 best). */ +export function scoreColor(score: number): string { + if (score >= 0.8) return 'green'; + if (score >= 0.5) return 'yellow'; + return 'red'; +} + +/** Human-friendly short name for a recommendation type. */ +export function shortTypeName(type: string): string { + if (type === 'SYSTEM_PROMPT_RECOMMENDATION') return 'System Prompt'; + if (type === 'TOOL_DESCRIPTION_RECOMMENDATION') return 'Tool Description'; + return type; +} diff --git a/src/cli/tui/screens/job-detail/index.ts b/src/cli/tui/screens/job-detail/index.ts new file mode 100644 index 000000000..106bf749e --- /dev/null +++ b/src/cli/tui/screens/job-detail/index.ts @@ -0,0 +1,4 @@ +export { ABTestDetailView } from './ABTestDetailView'; +export { BatchEvalDetailView } from './BatchEvalDetailView'; +export { RecommendationDetailView } from './RecommendationDetailView'; +export { lifecycleColor, scoreColor, shortTypeName, statusColor } from './helpers'; diff --git a/src/cli/tui/screens/knowledge-base/AddKnowledgeBaseFlow.tsx b/src/cli/tui/screens/knowledge-base/AddKnowledgeBaseFlow.tsx new file mode 100644 index 000000000..8a462a087 --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/AddKnowledgeBaseFlow.tsx @@ -0,0 +1,196 @@ +import { gatewayPrimitive, knowledgeBasePrimitive } from '../../../primitives/registry'; +import { ErrorPrompt } from '../../components'; +import { useExistingGateways } from '../../hooks/useCreateMcp'; +import { AddSuccessScreen } from '../add/AddSuccessScreen'; +import { AddKnowledgeBaseScreen } from './AddKnowledgeBaseScreen'; +import { groupDataSources } from './groupDataSources'; +import { isInlineJsonValue, materializeInlineConnectorConfig, stripInlineJsonPrefix } from './inline-connector-config'; +import type { AddKnowledgeBaseConfig, CapturedDataSource } from './types'; +import React, { useCallback, useEffect, useState } from 'react'; + +type FlowState = + | { name: 'create-wizard' } + | { name: 'create-success'; knowledgeBaseName: string; sources: string[]; gatewayWired?: string } + | { name: 'error'; message: string }; + +interface AddKnowledgeBaseFlowProps { + isInteractive?: boolean; + onExit: () => void; + onBack: () => void; + onDev?: () => void; + onDeploy?: () => void; +} + +export function AddKnowledgeBaseFlow({ + isInteractive = true, + onExit, + onBack, + onDev, + onDeploy, +}: AddKnowledgeBaseFlowProps) { + const [flow, setFlow] = useState({ name: 'create-wizard' }); + const [existingNames, setExistingNames] = useState([]); + const { gateways: existingGateways } = useExistingGateways(); + + // Load existing KB names for duplicate detection. + useEffect(() => { + void knowledgeBasePrimitive.getRemovable().then(removables => { + setExistingNames(removables.map(r => r.name)); + }); + }, []); + + // In non-interactive mode, exit after success. + useEffect(() => { + if (!isInteractive && flow.name === 'create-success') { + onExit(); + } + }, [isInteractive, flow.name, onExit]); + + const handleComplete = useCallback((config: AddKnowledgeBaseConfig) => { + void (async () => { + // Materialize any inline-JSON connector configs to disk first. The + // wizard tags those entries with INLINE_JSON_PREFIX; we strip the + // prefix, write the JSON to app//.json, and replace + // the captured value with the resulting path so the primitive sees a + // normal connector-config path. Failures here surface to the user as a + // wizard error before any primitive call. + let materializedSources: CapturedDataSource[]; + try { + materializedSources = await Promise.all( + config.dataSources.map(async ds => { + if (!isInlineJsonValue(ds.value)) return ds; + const json = stripInlineJsonPrefix(ds.value); + const path = await materializeInlineConnectorConfig({ + kbName: config.name, + dataSourceType: ds.dataSourceType, + jsonContents: json, + }); + return { dataSourceType: ds.dataSourceType, value: path }; + }) + ); + } catch (err) { + setFlow({ + name: 'error', + message: `Failed to save inline connector config: ${err instanceof Error ? err.message : String(err)}`, + }); + return; + } + + // Group captured sources by data-source-type, then dispatch one + // primitive.add() per group sequentially: the first call creates the + // KB, subsequent calls hit appendToExisting() and add their sources to + // the same KB. The primitive's gateway-equality guard accepts the same + // gateway value on every append; description is sent only on the first + // call so the no-update guard can't trip. + const groups = groupDataSources(materializedSources); + if (groups.length === 0) { + setFlow({ name: 'error', message: 'No data sources captured.' }); + return; + } + + // If the user chose "Create a new gateway and attach", create the + // gateway BEFORE the KB add. Use sensible defaults — authorizer NONE, + // semantic search on — so the inline-create stays a single step. The + // user can edit the gateway later via `agentcore add gateway` flags or + // the schema directly. Mutually exclusive with `config.gateway`. + // + // Track whether we created the gateway in *this* flow so we can roll it + // back if a downstream KB add fails. Without this, a failure mid-flow + // (duplicate source, gateway-equality mismatch, etc.) leaves the new + // gateway persisted in agentcore.json with no KB attached — the user + // sees an error toast but their config has drifted. + let gatewayToWire: string | undefined = config.gateway; + let createdGatewayInThisFlow: string | undefined; + if (config.newGatewayName) { + const gwResult = await gatewayPrimitive.add({ + name: config.newGatewayName, + authorizerType: 'NONE', + enableSemanticSearch: true, + }); + if (!gwResult.success) { + setFlow({ + name: 'error', + message: `Failed to create gateway "${config.newGatewayName}": ${gwResult.error.message}`, + }); + return; + } + gatewayToWire = gwResult.gatewayName; + createdGatewayInThisFlow = gwResult.gatewayName; + } + + const rollbackGatewayIfCreated = async (reason: string): Promise => { + if (!createdGatewayInThisFlow) return reason; + const removeResult = await gatewayPrimitive.remove(createdGatewayInThisFlow); + if (removeResult.success) { + return `${reason} (rolled back the gateway "${createdGatewayInThisFlow}" that was just created.)`; + } + return `${reason} (note: gateway "${createdGatewayInThisFlow}" was created but rollback failed: ${removeResult.error?.message ?? 'unknown error'}. Run \`agentcore remove gateway --name ${createdGatewayInThisFlow}\` to clean up.)`; + }; + + const totalSources: string[] = []; + let gatewayWired: string | undefined; + + for (let i = 0; i < groups.length; i++) { + const group = groups[i]!; + const isS3 = group.dataSourceType === 's3'; + const isFirst = i === 0; + const result = await knowledgeBasePrimitive.add({ + name: config.name, + ...(isFirst && config.description ? { description: config.description } : {}), + dataSourceType: group.dataSourceType, + ...(isS3 ? { source: group.values } : { connectorConfig: group.values }), + gateway: gatewayToWire, + }); + + if (!result.success) { + const message = await rollbackGatewayIfCreated( + `Failed on ${group.dataSourceType} group: ${result.error.message}` + ); + setFlow({ name: 'error', message }); + return; + } + + totalSources.push(...result.newDataSources); + if (result.gatewayWired) { + gatewayWired = result.gatewayWired; + } + } + + setFlow({ + name: 'create-success', + knowledgeBaseName: config.name, + sources: totalSources, + gatewayWired, + }); + })(); + }, []); + + if (flow.name === 'create-wizard') { + return ( + + ); + } + if (flow.name === 'create-success') { + const wiredSuffix = flow.gatewayWired ? ` Wired to gateway "${flow.gatewayWired}" as a connector target.` : ''; + return ( + + ); + } + if (flow.name === 'error') { + return ; + } + return null; +} diff --git a/src/cli/tui/screens/knowledge-base/AddKnowledgeBaseScreen.tsx b/src/cli/tui/screens/knowledge-base/AddKnowledgeBaseScreen.tsx new file mode 100644 index 000000000..14596c8f1 --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/AddKnowledgeBaseScreen.tsx @@ -0,0 +1,506 @@ +import { GatewayNameSchema, KnowledgeBaseNameSchema, S3DataSourceSchema } from '../../../../schema'; +import { type DataSourceTypeFlag, flagToWireType } from '../../../operations/knowledge-base/connector-config'; +import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } from '../../components'; +import type { SelectableItem } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { generateUniqueName } from '../../utils'; +import { INLINE_JSON_PREFIX } from './inline-connector-config'; +import type { AddKnowledgeBaseConfig, CapturedDataSource } from './types'; +import React, { useMemo, useState } from 'react'; +import { z } from 'zod'; + +// Canonical step list. The 'new-gateway-name' state is intentionally a +// sub-step of 'gateway' (mirrors the kb-id sub-step pattern in +// useAddGatewayTargetWizard) and is not in this list — it maps onto 'gateway' +// for the StepIndicator/index lookup. +type Step = 'name' | 'description' | 'data-source-type' | 'sources' | 'add-another' | 'gateway' | 'confirm'; +// 'remove-source' is a sub-step of 'add-another' — it shows a picker of +// captured sources so the user can drop one before continuing. Mapped onto +// 'add-another' for the StepIndicator. Same pattern as 'new-gateway-name'. +type WizardState = Step | 'new-gateway-name' | 'remove-source'; + +const STEP_LABELS: Record = { + name: 'Name', + description: 'Description', + 'data-source-type': 'Source Type', + sources: 'Sources', + 'add-another': 'Add another?', + gateway: 'Gateway', + confirm: 'Confirm', +}; + +const STEPS: Step[] = ['name', 'description', 'data-source-type', 'sources', 'add-another', 'gateway', 'confirm']; + +// Each source carries its own type, so a single wizard run can mix S3 with one +// or more connector types. The Flow groups by `dataSourceType` and dispatches +// one primitive.add() call per group: the first creates the KB, subsequent +// groups append to it. +const DATA_SOURCE_TYPE_OPTIONS: SelectableItem[] = [ + { id: 's3', title: 'Amazon S3 — documents in an S3 bucket' }, + { id: 'web-crawler', title: 'Web Crawler — crawl and index web pages' }, + { id: 'confluence', title: 'Confluence — Atlassian Confluence wiki' }, + { id: 'sharepoint', title: 'SharePoint — Microsoft SharePoint documents' }, + { id: 'onedrive', title: 'OneDrive — Microsoft OneDrive files' }, + { id: 'google-drive', title: 'Google Drive — Google Drive files' }, +]; + +// Friendly label for each data-source-type id, used in the confirm view. +const DATA_SOURCE_TYPE_LABELS: Record = { + s3: 'S3', + 'web-crawler': 'Web Crawler', + confluence: 'Confluence', + sharepoint: 'SharePoint', + onedrive: 'OneDrive', + 'google-drive': 'Google Drive', +}; + +const ADD_ANOTHER_OPTIONS: SelectableItem[] = [ + { id: 'add-another', title: 'Add another data source' }, + { id: 'done', title: 'Done — review and submit' }, +]; + +// Same shape, augmented with a "Remove a captured source" option that we +// surface only when the user already has a source they could drop. +const ADD_ANOTHER_OPTIONS_WITH_REMOVE: SelectableItem[] = [ + { id: 'add-another', title: 'Add another data source' }, + { id: 'remove-source', title: 'Remove a captured data source' }, + { id: 'done', title: 'Done — review and submit' }, +]; + +// Connector-config inputs accept EITHER a file path OR the JSON contents +// pasted in directly. Most terminals collapse a pasted multi-line JSON into a +// single line of text — that's fine, JSON.parse doesn't care about newlines. +// +// We classify by the first non-whitespace character: `{` means inline JSON, +// anything else means a file path. Inline JSON is parsed inline so the user +// gets immediate feedback if it's malformed or its `type` field doesn't match +// the connector kind they picked at the data-source-type step. The Flow +// materializes accepted inline JSON to a file under app// before +// dispatching to the primitive. +// +// Path inputs only get a non-empty check here; the file's actual contents are +// validated in the primitive's add() (file exists, JSON parses, type matches). +function makeConnectorConfigSchema(pendingType: string) { + const declaredWireType = flagToWireType(pendingType); + return z + .string() + .min(1, 'Enter a connector config file path or paste the JSON contents') + .superRefine((s, ctx) => { + const trimmed = s.trimStart(); + if (!trimmed.startsWith('{') && !trimmed.startsWith('[')) { + // Treat as a file path; primitive validates the rest. + return; + } + let parsed: unknown; + try { + parsed = JSON.parse(trimmed); + } catch { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'Looks like JSON but failed to parse. Check brackets and quoting.', + }); + return; + } + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'Connector config must be a JSON object (e.g. { "type": "WEB", ... }).', + }); + return; + } + const obj = parsed as Record; + if (typeof obj.type !== 'string') { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'Connector config is missing a string "type" field.', + }); + return; + } + if (obj.type !== declaredWireType) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Connector config "type" is "${obj.type}" but you picked ${pendingType} (expects "${declaredWireType}").`, + }); + } + }); +} + +const SKIP_GATEWAY_ID = '__skip__'; +const CREATE_NEW_GATEWAY_ID = '__create_new__'; + +// Extract just the URI piece of S3DataSourceSchema for inline validation in +// the TextInput component. +const S3UriSchema = z + .string() + .min(1) + .refine(uri => S3DataSourceSchema.safeParse({ type: 'S3', uri }).success, { + message: 'Must be a valid s3://bucket[/prefix] URI', + }); + +interface AddKnowledgeBaseScreenProps { + onComplete: (config: AddKnowledgeBaseConfig) => void; + onExit: () => void; + existingKnowledgeBaseNames: string[]; + existingGatewayNames: string[]; +} + +export function AddKnowledgeBaseScreen({ + onComplete, + onExit, + existingKnowledgeBaseNames, + existingGatewayNames, +}: AddKnowledgeBaseScreenProps) { + const [step, setStep] = useState('name'); + const [name, setName] = useState(''); + const [description, setDescription] = useState(''); + // The type currently being entered at the 'sources' step. Updated every + // time the user passes through 'data-source-type' (including the loop from + // 'add-another -> yes'), so each captured source is tagged with the type + // active at the moment it was entered. + const [pendingType, setPendingType] = useState('s3'); + const [dataSources, setDataSources] = useState([]); + const [gateway, setGateway] = useState(undefined); + // When the user chose "Create a new gateway and attach", this holds the + // typed name. The KB Flow consumes this and creates the gateway first + // before adding the KB. Mutually exclusive with `gateway`. + const [newGatewayName, setNewGatewayName] = useState(undefined); + + const isPendingS3 = pendingType === 's3'; + + const isNameStep = step === 'name'; + const isDescriptionStep = step === 'description'; + const isDataSourceTypeStep = step === 'data-source-type'; + const isSourcesStep = step === 'sources'; + const isAddAnotherStep = step === 'add-another'; + const isRemoveSourceStep = step === 'remove-source'; + const isGatewayStep = step === 'gateway'; + const isNewGatewayNameStep = step === 'new-gateway-name'; + const isConfirmStep = step === 'confirm'; + + const hasGateways = existingGatewayNames.length > 0; + + // Number of sources already captured for the *current* pendingType run, used + // to label inputs ("S3 URI #2") and decide where Esc returns to. + const sourcesForPendingType = useMemo( + () => dataSources.filter(ds => ds.dataSourceType === pendingType).length, + [dataSources, pendingType] + ); + + // Gateway-step picker contents adapt to whether any gateways exist: + // - Zero gateways: ["Create a new gateway and attach", "Skip — KB will be standalone"]. + // - One or more gateways: existing names + "Skip" sentinel + "Create a new gateway and attach" + // appended at the end. + const gatewayItems: SelectableItem[] = useMemo(() => { + if (!hasGateways) { + return [ + { id: CREATE_NEW_GATEWAY_ID, title: 'Create a new gateway and attach' }, + { id: SKIP_GATEWAY_ID, title: 'Skip — KB will be standalone (you can attach later)' }, + ]; + } + return [ + ...existingGatewayNames.map(g => ({ id: g, title: g })), + { id: SKIP_GATEWAY_ID, title: 'Skip — don’t wire to a gateway' }, + { id: CREATE_NEW_GATEWAY_ID, title: 'Create a new gateway and attach' }, + ]; + }, [existingGatewayNames, hasGateways]); + + const dataSourceTypeNav = useListNavigation({ + items: DATA_SOURCE_TYPE_OPTIONS, + isActive: isDataSourceTypeStep, + onSelect: (item: SelectableItem) => { + setPendingType(item.id as DataSourceTypeFlag); + setStep('sources'); + }, + // Esc from the type picker: if we already have at least one captured + // source, the only sensible return is the add-another decision (we can't + // un-capture earlier sources). Otherwise go back to description. + onExit: () => setStep(dataSources.length === 0 ? 'description' : 'add-another'), + }); + + // Surface the "Remove a captured source" option only when there's something + // to remove. Avoids showing a dead-end action when the user has just one + // source and would have to cancel the wizard if they picked it (you can't + // submit a KB with zero sources). + const addAnotherItems = dataSources.length > 1 ? ADD_ANOTHER_OPTIONS_WITH_REMOVE : ADD_ANOTHER_OPTIONS; + const addAnotherNav = useListNavigation({ + items: addAnotherItems, + isActive: isAddAnotherStep, + onSelect: (item: SelectableItem) => { + if (item.id === 'add-another') { + // FIX: route back through the data-source-type picker so the user can + // pick a different type (or the same one) for the next source. + setStep('data-source-type'); + } else if (item.id === 'remove-source') { + setStep('remove-source'); + } else { + setStep('gateway'); + } + }, + onExit: () => setStep('data-source-type'), + }); + + // Captured-source picker (sub-step of 'add-another'). Each item shows the + // type and the value (or an inline-JSON marker), keyed by capture index so + // duplicates pick different rows. + const removableSourceItems = useMemo( + () => + dataSources.map((ds, idx) => { + const label = DATA_SOURCE_TYPE_LABELS[ds.dataSourceType] ?? ds.dataSourceType; + const display = ds.value.startsWith(INLINE_JSON_PREFIX) ? '' : ds.value; + return { id: String(idx), title: `${label}: ${display}` }; + }), + [dataSources] + ); + + const removeSourceNav = useListNavigation({ + items: removableSourceItems, + isActive: isRemoveSourceStep, + onSelect: (item: SelectableItem) => { + const idx = Number(item.id); + const next = dataSources.filter((_, i) => i !== idx); + setDataSources(next); + // Stay on add-another if there's still anything left, otherwise drop + // straight back to data-source-type so the user can capture again. + setStep(next.length > 0 ? 'add-another' : 'data-source-type'); + }, + onExit: () => setStep('add-another'), + }); + + const gatewayNav = useListNavigation({ + items: gatewayItems, + isActive: isGatewayStep, + onSelect: (item: SelectableItem) => { + if (item.id === CREATE_NEW_GATEWAY_ID) { + // Sub-step: prompt for a new gateway name. Don't create it yet — the + // Flow does the create + KB add as a single submit so the user only + // sees the gateway materialise after confirming. + setGateway(undefined); + setStep('new-gateway-name'); + return; + } + setNewGatewayName(undefined); + setGateway(item.id === SKIP_GATEWAY_ID ? undefined : item.id); + setStep('confirm'); + }, + onExit: () => setStep('add-another'), + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => + onComplete({ + name, + dataSources, + description: description || undefined, + gateway, + newGatewayName, + }), + onExit: () => setStep('gateway'), + isActive: isConfirmStep, + }); + + const helpText = + isDataSourceTypeStep || isAddAnotherStep || isGatewayStep || isRemoveSourceStep + ? HELP_TEXT.NAVIGATE_SELECT + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : HELP_TEXT.TEXT_INPUT; + + // The new-gateway-name and remove-source sub-steps map onto their parents + // for the StepIndicator, mirroring the kb-id sub-step pattern in + // useAddGatewayTargetWizard. + const indicatorStep: Step = isNewGatewayNameStep ? 'gateway' : isRemoveSourceStep ? 'add-another' : step; + const headerContent = ; + + // Confirm view: render every captured source on its own line, prefixed by + // its type label (e.g. "S3: s3://bucket/docs/"). Lines stay in + // capture order so the user sees exactly what they entered. + const dataSourcesSummary = useMemo(() => { + if (dataSources.length === 0) return '(none)'; + const labelWidth = + Math.max(...dataSources.map(ds => (DATA_SOURCE_TYPE_LABELS[ds.dataSourceType] ?? ds.dataSourceType).length)) + 1; + return dataSources + .map(ds => { + const label = `${DATA_SOURCE_TYPE_LABELS[ds.dataSourceType] ?? ds.dataSourceType}:`.padEnd(labelWidth + 1); + // Inline-JSON entries carry the full payload as their `value`; render + // a short summary instead of dumping the JSON into the confirm card. + const display = ds.value.startsWith(INLINE_JSON_PREFIX) + ? `'}/>` + : ds.value; + return `${label} ${display}`; + }) + .join('\n'); + }, [dataSources, name]); + + const gatewayConfirmValue = useMemo(() => { + if (newGatewayName) return `${newGatewayName} (will be created)`; + if (gateway) return `${gateway} (existing)`; + return 'none — KB will be standalone'; + }, [gateway, newGatewayName]); + + const confirmFields = useMemo( + () => [ + { label: 'Name', value: name }, + ...(description ? [{ label: 'Description', value: description }] : []), + { label: `Data Sources (${dataSources.length})`, value: dataSourcesSummary }, + { label: 'Gateway', value: gatewayConfirmValue }, + ], + [name, description, dataSources.length, dataSourcesSummary, gatewayConfirmValue] + ); + + return ( + + + {isNameStep && ( + { + setName(value); + setStep('description'); + }} + onCancel={onExit} + schema={KnowledgeBaseNameSchema} + customValidation={value => + !existingKnowledgeBaseNames.includes(value) || 'Knowledge base name already exists' + } + /> + )} + + {isDescriptionStep && ( + { + setDescription(value); + setStep('data-source-type'); + }} + onCancel={() => setStep('name')} + allowEmpty + /> + )} + + {isDataSourceTypeStep && ( + + )} + + {isSourcesStep && isPendingS3 && ( + { + setDataSources([...dataSources, { dataSourceType: pendingType, value }]); + setStep('add-another'); + }} + onCancel={() => setStep(dataSources.length === 0 ? 'data-source-type' : 'add-another')} + schema={S3UriSchema} + /> + )} + + {isSourcesStep && !isPendingS3 && ( + { + const trimmed = value.trimStart(); + const isInlineJson = trimmed.startsWith('{') || trimmed.startsWith('['); + setDataSources([ + ...dataSources, + { + dataSourceType: pendingType, + // Tag inline JSON with a sentinel prefix; the Flow writes it + // to disk before dispatching to the primitive. Plain paths + // pass through unchanged (the primitive does its own copy). + value: isInlineJson ? `${INLINE_JSON_PREFIX}${trimmed}` : value, + }, + ]); + setStep('add-another'); + }} + onCancel={() => setStep(dataSources.length === 0 ? 'data-source-type' : 'add-another')} + schema={makeConnectorConfigSchema(pendingType)} + /> + )} + + {isAddAnotherStep && ( + ds.dataSourceType)).size + } type(s)`} + items={addAnotherItems} + selectedIndex={addAnotherNav.selectedIndex} + /> + )} + + {isRemoveSourceStep && ( + + )} + + {isGatewayStep && ( + + )} + + {isNewGatewayNameStep && ( + { + setNewGatewayName(value); + setGateway(undefined); + setStep('confirm'); + }} + onCancel={() => setStep('gateway')} + schema={GatewayNameSchema} + customValidation={value => + !existingGatewayNames.includes(value) || 'Gateway name already exists in this project' + } + /> + )} + + {isConfirmStep && } + + + ); +} diff --git a/src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseFlow.test.tsx b/src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseFlow.test.tsx new file mode 100644 index 000000000..12c9c01e9 --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseFlow.test.tsx @@ -0,0 +1,134 @@ +import { AddKnowledgeBaseFlow } from '../AddKnowledgeBaseFlow'; +import { render } from 'ink-testing-library'; +import React from 'react'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +// ─── mocks ─────────────────────────────────────────────────────────────────── +const mockKbAdd = vi.fn(); +const mockKbGetRemovable = vi.fn(); +const mockGatewayAdd = vi.fn(); + +vi.mock('../../../../primitives/registry', () => ({ + knowledgeBasePrimitive: { + add: (...args: unknown[]) => mockKbAdd(...args), + getRemovable: (...args: unknown[]) => mockKbGetRemovable(...args), + }, + gatewayPrimitive: { + add: (...args: unknown[]) => mockGatewayAdd(...args), + }, +})); + +vi.mock('../../../hooks/useCreateMcp', () => ({ + useExistingGateways: () => ({ gateways: [] }), +})); + +// Replace the screen with a stub that immediately invokes onComplete with a +// fixed config — keeps Flow tests focused on the post-screen logic. +vi.mock('../AddKnowledgeBaseScreen', () => { + return { + AddKnowledgeBaseScreen: ({ onComplete }: { onComplete: (cfg: unknown) => void }) => { + // Immediately submit on first render. Tests below customise the payload + // by setting a global before the render. + const cfg = (globalThis as { __KB_FLOW_TEST_CFG?: unknown }).__KB_FLOW_TEST_CFG; + React.useEffect(() => { + if (cfg) onComplete(cfg); + }, [onComplete, cfg]); + return null; + }, + }; +}); + +// ─── helpers ───────────────────────────────────────────────────────────────── +const delay = (ms = 50) => new Promise(resolve => setTimeout(resolve, ms)); + +beforeEach(() => { + mockKbAdd.mockReset(); + mockKbGetRemovable.mockReset(); + mockGatewayAdd.mockReset(); + mockKbGetRemovable.mockResolvedValue([]); + mockKbAdd.mockResolvedValue({ + success: true, + knowledgeBaseName: 'kb1', + newDataSources: ['s3-1'], + gatewayWired: undefined, + }); + mockGatewayAdd.mockResolvedValue({ success: true, gatewayName: 'tui-kb-gw' }); +}); + +afterEach(() => { + vi.restoreAllMocks(); + delete (globalThis as { __KB_FLOW_TEST_CFG?: unknown }).__KB_FLOW_TEST_CFG; +}); + +// ─── tests ─────────────────────────────────────────────────────────────────── +describe('AddKnowledgeBaseFlow — newGatewayName path', () => { + it('creates the gateway first, then adds the KB with that gateway name', async () => { + (globalThis as { __KB_FLOW_TEST_CFG?: unknown }).__KB_FLOW_TEST_CFG = { + name: 'tui-kb', + dataSources: [{ dataSourceType: 's3', value: 's3://b/' }], + newGatewayName: 'tui-kb-gw', + }; + mockKbAdd.mockResolvedValueOnce({ success: true, newDataSources: ['s3-1'], gatewayWired: 'tui-kb-gw' }); + + render(); + await delay(80); + + expect(mockGatewayAdd).toHaveBeenCalledTimes(1); + expect(mockGatewayAdd.mock.calls[0]![0]).toMatchObject({ name: 'tui-kb-gw', authorizerType: 'NONE' }); + expect(mockKbAdd).toHaveBeenCalledTimes(1); + expect(mockKbAdd.mock.calls[0]![0]).toMatchObject({ name: 'tui-kb', gateway: 'tui-kb-gw' }); + }); + + it('aborts (no KB add) if the gateway create fails', async () => { + (globalThis as { __KB_FLOW_TEST_CFG?: unknown }).__KB_FLOW_TEST_CFG = { + name: 'tui-kb', + dataSources: [{ dataSourceType: 's3', value: 's3://b/' }], + newGatewayName: 'tui-kb-gw', + }; + mockGatewayAdd.mockResolvedValueOnce({ success: false, error: new Error('boom') }); + + const { lastFrame } = render(); + await delay(80); + + expect(mockGatewayAdd).toHaveBeenCalledTimes(1); + expect(mockKbAdd).not.toHaveBeenCalled(); + expect(lastFrame() ?? '').toContain('Failed'); + }); +}); + +describe('AddKnowledgeBaseFlow — Skip / standalone path (zero gateways case)', () => { + it('does not call gatewayPrimitive.add and adds the KB with no gateway', async () => { + (globalThis as { __KB_FLOW_TEST_CFG?: unknown }).__KB_FLOW_TEST_CFG = { + name: 'standalone-kb', + dataSources: [{ dataSourceType: 's3', value: 's3://b/' }], + // No gateway, no newGatewayName + }; + mockKbAdd.mockResolvedValueOnce({ success: true, newDataSources: ['s3-1'], gatewayWired: undefined }); + + render(); + await delay(80); + + expect(mockGatewayAdd).not.toHaveBeenCalled(); + expect(mockKbAdd).toHaveBeenCalledTimes(1); + expect(mockKbAdd.mock.calls[0]![0]).toMatchObject({ name: 'standalone-kb' }); + expect(mockKbAdd.mock.calls[0]![0].gateway).toBeUndefined(); + }); +}); + +describe('AddKnowledgeBaseFlow — existing gateway path', () => { + it('passes the existing gateway through to KB add and skips gateway create', async () => { + (globalThis as { __KB_FLOW_TEST_CFG?: unknown }).__KB_FLOW_TEST_CFG = { + name: 'kb-existing', + dataSources: [{ dataSourceType: 's3', value: 's3://b/' }], + gateway: 'g1', + }; + mockKbAdd.mockResolvedValueOnce({ success: true, newDataSources: ['s3-1'], gatewayWired: 'g1' }); + + render(); + await delay(80); + + expect(mockGatewayAdd).not.toHaveBeenCalled(); + expect(mockKbAdd).toHaveBeenCalledTimes(1); + expect(mockKbAdd.mock.calls[0]![0]).toMatchObject({ name: 'kb-existing', gateway: 'g1' }); + }); +}); diff --git a/src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseScreen.test.tsx b/src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseScreen.test.tsx new file mode 100644 index 000000000..787f9623f --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/__tests__/AddKnowledgeBaseScreen.test.tsx @@ -0,0 +1,238 @@ +import { AddKnowledgeBaseScreen } from '../AddKnowledgeBaseScreen'; +import type { AddKnowledgeBaseConfig } from '../types'; +import { render } from 'ink-testing-library'; +import React from 'react'; +import { afterEach, describe, expect, it, vi } from 'vitest'; + +const DOWN_ARROW = '\x1B[B'; +const UP_ARROW = '\x1B[A'; +const ENTER = '\r'; +const ESCAPE = '\x1B'; +const BACKSPACE = '\x7f'; +const delay = (ms = 50) => new Promise(resolve => setTimeout(resolve, ms)); + +const BASE_PROPS = { + onComplete: vi.fn<(config: AddKnowledgeBaseConfig) => void>(), + onExit: vi.fn(), + existingKnowledgeBaseNames: [], + existingGatewayNames: [], +}; + +afterEach(() => vi.restoreAllMocks()); + +// Helper: walk through name → description → s3 → one URI → done. +// Stops on the gateway-step picker. +async function walkToGatewayStep(stdin: ReturnType['stdin'], kbName = 'tui-kb') { + // Name step: clear default and type custom name. + for (let i = 0; i < 30; i++) stdin.write(BACKSPACE); + for (const ch of kbName) stdin.write(ch); + await delay(); + stdin.write(ENTER); + await delay(); + + // Description: skip + stdin.write(ENTER); + await delay(); + + // Data-source-type: S3 is index 0, accept + stdin.write(ENTER); + await delay(); + + // Sources: type a URI + for (const ch of 's3://my-bucket/docs/') stdin.write(ch); + await delay(); + stdin.write(ENTER); + await delay(); + + // Add another? Move down to "Done — review and submit" + stdin.write(DOWN_ARROW); + await delay(); + stdin.write(ENTER); + await delay(); +} + +describe('AddKnowledgeBaseScreen — gateway step always shown', () => { + it('zero gateways: gateway step shows Create-new + Skip (no other items)', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('Wire this knowledge base to a gateway?'); + expect(frame).toContain('Create a new gateway and attach'); + expect(frame).toContain('Skip — KB will be standalone'); + expect(frame).toContain('No gateways exist in this project yet'); + }); + + it('zero gateways: picking Skip goes to confirm with "Gateway: none — KB will be standalone"', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin); + + // Move from "Create new" (index 0) down to "Skip" (index 1) + stdin.write(DOWN_ARROW); + await delay(); + stdin.write(ENTER); + await delay(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('Gateway:'); + expect(frame).toContain('none — KB will be standalone'); + }); + + it('zero gateways: picking Create-new advances to a name input defaulted to "${kbName}-gw"', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin, 'mykb'); + + // "Create a new gateway and attach" is index 0 in the zero-gateway picker + stdin.write(ENTER); + await delay(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('New gateway name'); + expect(frame).toContain('mykb-gw'); + }); + + it('zero gateways: full flow Create-new → submit emits newGatewayName, no gateway', async () => { + const onComplete = vi.fn<(config: AddKnowledgeBaseConfig) => void>(); + const { stdin } = render(); + await walkToGatewayStep(stdin, 'tui-kb'); + + // Pick Create-new (index 0) + stdin.write(ENTER); + await delay(); + // Accept default name "tui-kb-gw" + stdin.write(ENTER); + await delay(); + // Confirm + stdin.write(ENTER); + await delay(); + + expect(onComplete).toHaveBeenCalledTimes(1); + const cfg = onComplete.mock.calls[0]![0]; + expect(cfg.newGatewayName).toBe('tui-kb-gw'); + expect(cfg.gateway).toBeUndefined(); + expect(cfg.name).toBe('tui-kb'); + }); + + it('zero gateways: confirm view shows "Gateway: tui-kb-gw (will be created)"', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin, 'tui-kb'); + + stdin.write(ENTER); // Create-new + await delay(); + stdin.write(ENTER); // accept default + await delay(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('tui-kb-gw (will be created)'); + }); + + it('rejects invalid gateway names with the schema error', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin, 'tui-kb'); + + stdin.write(ENTER); // Create-new + await delay(); + // Clear default and type invalid name + for (let i = 0; i < 30; i++) stdin.write(BACKSPACE); + for (const ch of 'bad name!') stdin.write(ch); + await delay(); + stdin.write(ENTER); + await delay(); + + const frame = lastFrame() ?? ''; + expect(frame).toMatch(/alphanumeric with optional hyphens|invalid|error/i); + }); +}); + +describe('AddKnowledgeBaseScreen — at-least-one-gateway path', () => { + const PROPS_WITH_GW = { ...BASE_PROPS, existingGatewayNames: ['g1', 'g2'] }; + + it('shows existing names + Skip + Create-new in that order', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin); + const frame = lastFrame() ?? ''; + expect(frame).toContain('g1'); + expect(frame).toContain('g2'); + expect(frame).toContain('Skip'); + expect(frame).toContain('Create a new gateway and attach'); + }); + + it('selecting an existing gateway emits gateway=g1, no newGatewayName', async () => { + const onComplete = vi.fn<(config: AddKnowledgeBaseConfig) => void>(); + const { stdin } = render(); + await walkToGatewayStep(stdin); + + // g1 is index 0 + stdin.write(ENTER); + await delay(); + // Confirm + stdin.write(ENTER); + await delay(); + + expect(onComplete).toHaveBeenCalledTimes(1); + const cfg = onComplete.mock.calls[0]![0]; + expect(cfg.gateway).toBe('g1'); + expect(cfg.newGatewayName).toBeUndefined(); + }); + + it('Create-new appended at end is reachable; choosing it advances to the name input', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin, 'kb'); + // Items: g1(0), g2(1), Skip(2), Create-new(3). Down 3 times. + stdin.write(DOWN_ARROW); + await delay(); + stdin.write(DOWN_ARROW); + await delay(); + stdin.write(DOWN_ARROW); + await delay(); + stdin.write(ENTER); + await delay(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('New gateway name'); + expect(frame).toContain('kb-gw'); + }); + + it('confirm shows "Gateway: g2 (existing)" when an existing gateway picked', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin); + + // Move to g2 + stdin.write(DOWN_ARROW); + await delay(); + stdin.write(ENTER); + await delay(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('g2 (existing)'); + }); + + it('Esc from new-gateway-name returns to the gateway picker', async () => { + const { lastFrame, stdin } = render(); + await walkToGatewayStep(stdin); + + // Pick Create-new (index 3) + for (let i = 0; i < 3; i++) { + stdin.write(DOWN_ARROW); + await delay(); + } + stdin.write(ENTER); + await delay(); + // Should be on new-gateway-name. Esc back. + stdin.write(ESCAPE); + await delay(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('Wire this knowledge base to a gateway?'); + }); +}); + +describe('AddKnowledgeBaseScreen — step indicator', () => { + it('always shows the Gateway label in the step list, even with zero gateways', async () => { + const { lastFrame } = render(); + await delay(); + expect(lastFrame() ?? '').toContain('Gateway'); + }); +}); +// Suppress unused imports from helper — keep references silent +void UP_ARROW; diff --git a/src/cli/tui/screens/knowledge-base/__tests__/groupDataSources.test.ts b/src/cli/tui/screens/knowledge-base/__tests__/groupDataSources.test.ts new file mode 100644 index 000000000..a8a9e01f5 --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/__tests__/groupDataSources.test.ts @@ -0,0 +1,49 @@ +import { groupDataSources } from '../groupDataSources'; +import { describe, expect, it } from 'vitest'; + +describe('groupDataSources', () => { + it('returns empty array for empty input', () => { + expect(groupDataSources([])).toEqual([]); + }); + + it('groups a single-type list into one group', () => { + const result = groupDataSources([ + { dataSourceType: 's3', value: 's3://a/' }, + { dataSourceType: 's3', value: 's3://b/' }, + ]); + expect(result).toEqual([{ dataSourceType: 's3', values: ['s3://a/', 's3://b/'] }]); + }); + + it('groups by type and preserves first-seen-type ordering across groups', () => { + const result = groupDataSources([ + { dataSourceType: 's3', value: 's3://a/' }, + { dataSourceType: 'web-crawler', value: 'app/k/web.json' }, + { dataSourceType: 's3', value: 's3://b/' }, + { dataSourceType: 'confluence', value: 'app/k/conf.json' }, + { dataSourceType: 'web-crawler', value: 'app/k/web2.json' }, + ]); + expect(result).toEqual([ + { dataSourceType: 's3', values: ['s3://a/', 's3://b/'] }, + { dataSourceType: 'web-crawler', values: ['app/k/web.json', 'app/k/web2.json'] }, + { dataSourceType: 'confluence', values: ['app/k/conf.json'] }, + ]); + }); + + it('preserves insertion order within a group', () => { + const result = groupDataSources([ + { dataSourceType: 'web-crawler', value: 'first.json' }, + { dataSourceType: 'web-crawler', value: 'second.json' }, + { dataSourceType: 'web-crawler', value: 'third.json' }, + ]); + expect(result[0]!.values).toEqual(['first.json', 'second.json', 'third.json']); + }); + + it('handles a single source per type with no inter-group reordering', () => { + const result = groupDataSources([ + { dataSourceType: 'confluence', value: 'a' }, + { dataSourceType: 's3', value: 's3://b/' }, + { dataSourceType: 'web-crawler', value: 'c' }, + ]); + expect(result.map(g => g.dataSourceType)).toEqual(['confluence', 's3', 'web-crawler']); + }); +}); diff --git a/src/cli/tui/screens/knowledge-base/__tests__/inline-connector-config.test.ts b/src/cli/tui/screens/knowledge-base/__tests__/inline-connector-config.test.ts new file mode 100644 index 000000000..df9001d38 --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/__tests__/inline-connector-config.test.ts @@ -0,0 +1,97 @@ +import { ConfigIO } from '../../../../../lib'; +import { + INLINE_JSON_PREFIX, + isInlineJsonValue, + materializeInlineConnectorConfig, + stripInlineJsonPrefix, +} from '../inline-connector-config'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +describe('inline-connector-config — sentinel helpers', () => { + it('isInlineJsonValue / stripInlineJsonPrefix round-trip', () => { + const json = '{"type":"WEB"}'; + const tagged = `${INLINE_JSON_PREFIX}${json}`; + expect(isInlineJsonValue(tagged)).toBe(true); + expect(isInlineJsonValue('app/kb/web.json')).toBe(false); + expect(stripInlineJsonPrefix(tagged)).toBe(json); + expect(stripInlineJsonPrefix('app/kb/web.json')).toBe('app/kb/web.json'); + }); +}); + +describe('materializeInlineConnectorConfig', () => { + let projectRoot: string; + let configIO: ConfigIO; + + beforeEach(() => { + // Build a minimal project tree so ConfigIO discovers the agentcore/ dir. + projectRoot = mkdtempSync(join(tmpdir(), 'fmkb-inline-')); + mkdirSync(join(projectRoot, 'agentcore'), { recursive: true }); + writeFileSync( + join(projectRoot, 'agentcore', 'agentcore.json'), + JSON.stringify({ name: 'p', version: 1, managedBy: 'CDK', runtimes: [], memories: [], credentials: [] }) + ); + configIO = new ConfigIO({ baseDir: join(projectRoot, 'agentcore') }); + }); + + afterEach(() => { + rmSync(projectRoot, { recursive: true, force: true }); + }); + + it('writes the JSON under app// and returns the resulting path', async () => { + const json = JSON.stringify({ + type: 'WEB', + version: 1, + connectionConfiguration: { authType: 'NO_AUTH', seedUrls: ['https://x/'] }, + crawlConfiguration: {}, + }); + const dest = await materializeInlineConnectorConfig({ + kbName: 'mykb', + dataSourceType: 'web-crawler', + jsonContents: json, + configIO, + }); + expect(dest).toBe(join(projectRoot, 'app', 'mykb', 'web-crawler-1.json')); + expect(existsSync(dest)).toBe(true); + // Pretty-printed and round-trips to the original object. + const parsed = JSON.parse(readFileSync(dest, 'utf8')); + expect(parsed.type).toBe('WEB'); + expect(parsed.connectionConfiguration.seedUrls).toEqual(['https://x/']); + // Pretty-print: at least one newline + two-space indent line present. + expect(readFileSync(dest, 'utf8')).toMatch(/\n {2}"type": "WEB"/); + }); + + it('avoids filename collisions by appending an incrementing suffix', async () => { + const json = JSON.stringify({ type: 'WEB' }); + const a = await materializeInlineConnectorConfig({ + kbName: 'kb', + dataSourceType: 'web-crawler', + jsonContents: json, + configIO, + }); + const b = await materializeInlineConnectorConfig({ + kbName: 'kb', + dataSourceType: 'web-crawler', + jsonContents: json, + configIO, + }); + expect(a).toMatch(/web-crawler-1\.json$/); + expect(b).toMatch(/web-crawler-2\.json$/); + expect(existsSync(a)).toBe(true); + expect(existsSync(b)).toBe(true); + }); + + it('rejects malformed JSON before writing anything', async () => { + await expect( + materializeInlineConnectorConfig({ + kbName: 'kb', + dataSourceType: 'web-crawler', + jsonContents: '{ not-json', + configIO, + }) + ).rejects.toThrow(); + expect(existsSync(join(projectRoot, 'app', 'kb'))).toBe(false); + }); +}); diff --git a/src/cli/tui/screens/knowledge-base/groupDataSources.ts b/src/cli/tui/screens/knowledge-base/groupDataSources.ts new file mode 100644 index 000000000..70fa372d5 --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/groupDataSources.ts @@ -0,0 +1,49 @@ +import type { DataSourceTypeFlag } from '../../../operations/knowledge-base/connector-config'; +import type { CapturedDataSource } from './types'; + +/** + * One group passed to a single primitive.add() call: all values share a + * `dataSourceType`. Insertion order across groups follows the order in which + * each type was first seen in the original list (first-seen-type wins). + */ +export interface DataSourceGroup { + dataSourceType: DataSourceTypeFlag; + values: string[]; +} + +/** + * Group captured wizard data sources by `dataSourceType`, preserving: + * - first-seen-type ordering across groups, and + * - original insertion ordering within each group. + * + * Example: + * [ + * { dataSourceType: 's3', value: 's3://a/' }, + * { dataSourceType: 'web-crawler', value: 'app/k/web.json' }, + * { dataSourceType: 's3', value: 's3://b/' }, + * ] + * + * yields: + * [ + * { dataSourceType: 's3', values: ['s3://a/', 's3://b/'] }, + * { dataSourceType: 'web-crawler', values: ['app/k/web.json'] }, + * ] + * + * The Flow dispatches these groups sequentially: the first becomes a + * `primitive.add()` create call; later groups become `appendToExisting` + * appends on the same KB. + */ +export function groupDataSources(dataSources: CapturedDataSource[]): DataSourceGroup[] { + const groups: DataSourceGroup[] = []; + const indexByType = new Map(); + for (const ds of dataSources) { + const idx = indexByType.get(ds.dataSourceType); + if (idx === undefined) { + indexByType.set(ds.dataSourceType, groups.length); + groups.push({ dataSourceType: ds.dataSourceType, values: [ds.value] }); + } else { + groups[idx]!.values.push(ds.value); + } + } + return groups; +} diff --git a/src/cli/tui/screens/knowledge-base/index.ts b/src/cli/tui/screens/knowledge-base/index.ts new file mode 100644 index 000000000..657d74ca1 --- /dev/null +++ b/src/cli/tui/screens/knowledge-base/index.ts @@ -0,0 +1,2 @@ +export { AddKnowledgeBaseFlow } from './AddKnowledgeBaseFlow'; +export type { AddKnowledgeBaseConfig } from './types'; diff --git a/src/cli/tui/screens/knowledge-base/inline-connector-config.ts b/src/cli/tui/screens/knowledge-base/inline-connector-config.ts new file mode 100644 index 0000000000000000000000000000000000000000..a6bd90c106da50999c3f36b351953939c69d1f6f GIT binary patch literal 3191 zcmb7HO>Y}F5bfE&Vh#Z$CDID%sqiNfCkB)hwqd(X4+XN^T}sP{A{Qjr^2QL*AJJdf zU(z?ER*~&q3glo(x{~AbRO-%L6X+DTD$ zBp;Ly8GY$<3a(=aVJeKWh$SYS$E>1lb7NQz04NU-W}dWzCX}Lu8-}$-ootUla2_qd zqa=>S$4MY(h(T2}l-VMS=hw2BluoQQ5=|!SfmU=rn5-~pv(;L8NNX~8G6rJ{<*Ynf z5^CFq_!Kg5Ec2b?DYGO|@XsprFyBPIff=yk!du;fq|xf0Qh-ER$_D0L*ih3yfBlUR zp6kf>SaII734dTXV8E1ov&+~yY1_Qn)!E_h6#M{ocb$gzaWbLPSi^Km*TNy-#q8$xj>RFMH;n{V zAZ{Z%Z3QGY0-fRLssYt!|WN7x2!J# zain|L0zP8`YFRDhd&uUt1j$Sm{KJTPtwhek4bpTTQvpgjIcZP#h9sp0|e9orEJmwi$phjgHL}P|w{I$Gxq)-WU)QX8%biK#@C~_U01akyx zEhNT##hjL_CUmsj2u`^zfCd{L^)^iQ)F25m!2>H09`k{AQ-0E z_9|Q&Z=6v6>XFR=Q;Ua-ns>t#bxfw!dQ3l$Hh-EN6SBXF`)Lb2ffFozw)=c`Mn9U5^>@HSo&z@+19)<~>ka#x*+Pc%7?lqUM|&69l0PViOS~5j_u`Cc0DuNC8BQXp zDP9z70nd*6=Wu&{jO7*JwP^G`JUO@)`zQ`0KLC3XQIBg;G^vW&(3?|6AMh}iFJ9=_ zrxI^Hy}O>#-b$E4)xyVBEitH%fxWC<2{t|@O$8PW_v~`=Hg;=FS(foGx$VzmvUY#V z{<1;+sjReL4+nEU*h7g21s`D~p1KIPK+8Y|KfmoiFe>{tQrW0xI1qm5q256Uf^huJ z ({ loadDeployedProjectConfig: mockLoadDeployedProjectConfig, })); +// useLogsFlow wraps its load in withCommandRunTelemetry, whose real implementation +// awaits getTelemetryClient()/flush() and never settles in tests — leaving the screen +// stuck in the 'loading' phase. Pass through to the inner fn so the load resolves. +vi.mock('../../../../telemetry/cli-command-run.js', () => ({ + withCommandRunTelemetry: vi.fn((_command: string, _attrs: unknown, fn: (recorder: unknown) => unknown) => + fn({ set: vi.fn(), get: vi.fn(() => ({})) }) + ), +})); + vi.mock('../../../../aws/cloudwatch.js', () => ({ // eslint-disable-next-line require-yield async *streamLogs() { diff --git a/src/cli/tui/screens/mcp/AddGatewayTargetFlow.tsx b/src/cli/tui/screens/mcp/AddGatewayTargetFlow.tsx index 8fcd707aa..9800aca6e 100644 --- a/src/cli/tui/screens/mcp/AddGatewayTargetFlow.tsx +++ b/src/cli/tui/screens/mcp/AddGatewayTargetFlow.tsx @@ -1,6 +1,12 @@ import { gatewayTargetPrimitive } from '../../../primitives/registry'; import { ErrorPrompt } from '../../components'; -import { useExistingGateways, useExistingToolNames } from '../../hooks/useCreateMcp'; +import { + useExistingGateways, + useExistingKnowledgeBases, + useExistingRuntimeNames, + useExistingToolNames, + useMcpGatewayNames, +} from '../../hooks/useCreateMcp'; import { AddSuccessScreen } from '../add/AddSuccessScreen'; import { AddIdentityScreen } from '../identity/AddIdentityScreen'; import type { AddIdentityConfig } from '../identity/types'; @@ -12,7 +18,14 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'; type FlowState = | { name: 'create-wizard'; resumeConfig?: GatewayTargetWizardState; resumeStep?: AddGatewayTargetStep } | { name: 'creating-credential'; pendingConfig: GatewayTargetWizardState } - | { name: 'create-success'; toolName: string; projectPath: string; loading?: boolean; loadingMessage?: string } + | { + name: 'create-success'; + toolName: string; + projectPath: string; + detail?: string; + loading?: boolean; + loadingMessage?: string; + } | { name: 'error'; message: string }; interface AddGatewayTargetFlowProps { @@ -34,7 +47,10 @@ export function AddGatewayTargetFlow({ onDeploy, }: AddGatewayTargetFlowProps) { const { gateways: existingGateways } = useExistingGateways(); + const { mcpGateways: mcpGatewayNames } = useMcpGatewayNames(); + const { runtimeNames: existingRuntimeNames } = useExistingRuntimeNames(); const { toolNames: existingToolNames } = useExistingToolNames(); + const { knowledgeBases: existingKnowledgeBases } = useExistingKnowledgeBases(); const { credentials } = useExistingCredentials(); const { names: existingIdentityNames } = useExistingIdentityNames(); const { createIdentity } = useCreateIdentity(); @@ -102,6 +118,60 @@ export function AddGatewayTargetFlow({ .catch((err: unknown) => { setFlow({ name: 'error', message: err instanceof Error ? err.message : 'Unknown error' }); }); + } else if (config.targetType === 'httpRuntime') { + void gatewayTargetPrimitive + .createHttpRuntimeTarget( + config as { + name: string; + gateway: string; + runtime: string; + endpoint?: string; + outboundAuth?: { type: string; credentialName?: string; scopes?: string[] }; + } + ) + .then((result: { toolName: string }) => { + setFlow({ name: 'create-success', toolName: result.toolName, projectPath: '' }); + }) + .catch((err: unknown) => { + setFlow({ name: 'error', message: err instanceof Error ? err.message : 'Unknown error' }); + }); + } else if (config.targetType === 'connector') { + void gatewayTargetPrimitive + .createConnectorGatewayTarget(config) + .then((result: { toolName: string }) => { + // For single-KB Retrieve adds, the primitive also upserts the + // gateway's shared agentic-retrieve target. Surface that to the user. + const detail = + config.connectorId === 'bedrock-knowledge-bases' + ? `Also wired KB '${config.knowledgeBaseId}' into '${config.gateway}-agentic' (bedrock-agentic-retrieve fan-out)` + : undefined; + setFlow({ name: 'create-success', toolName: result.toolName, projectPath: '', detail }); + }) + .catch((err: unknown) => { + setFlow({ name: 'error', message: err instanceof Error ? err.message : 'Unknown error' }); + }); + } else if (config.targetType === 'passthrough') { + void gatewayTargetPrimitive + .createPassthroughTarget(config) + .then((result: { toolName: string }) => { + setFlow({ name: 'create-success', toolName: result.toolName, projectPath: '' }); + }) + .catch((err: unknown) => { + setFlow({ name: 'error', message: err instanceof Error ? err.message : 'Unknown error' }); + }); + } else if (config.targetType === 'webSearch') { + void gatewayTargetPrimitive + .createWebSearchGatewayTarget(config) + .then((result: { toolName: string }) => { + const detail = + config.excludeDomains && config.excludeDomains.length > 0 + ? `Excluded domains: ${config.excludeDomains.join(', ')}` + : undefined; + setFlow({ name: 'create-success', toolName: result.toolName, projectPath: '', detail }); + }) + .catch((err: unknown) => { + setFlow({ name: 'error', message: err instanceof Error ? err.message : 'Unknown error' }); + }); } else { setFlow({ name: 'error', message: `Unsupported target type: ${(config as { targetType: string }).targetType}` }); } @@ -158,9 +228,12 @@ export function AddGatewayTargetFlow({ return ( void; onCreateCredential: (pendingConfig: GatewayTargetWizardState) => void; onExit: () => void; @@ -51,9 +67,12 @@ interface AddGatewayTargetScreenProps { export function AddGatewayTargetScreen({ existingGateways, + mcpGatewayNames = [], + existingRuntimeNames = [], existingToolNames, existingOAuthCredentialNames, existingApiKeyCredentialNames, + existingKnowledgeBases, onComplete, onCreateCredential, onExit, @@ -62,10 +81,14 @@ export function AddGatewayTargetScreen({ }: AddGatewayTargetScreenProps) { const wizard = useAddGatewayTargetWizard(existingGateways, initialConfig, initialStep); + // Load endpoints for the selected runtime (used by runtime-endpoint step) + const { endpoints: runtimeEndpoints, loaded: runtimeEndpointsLoaded } = useRuntimeEndpoints(wizard.config.runtime); + // Tracks which credential type sub-step is active within either auth step. // null = showing the auth type picker; 'OAUTH'/'API_KEY' = showing credential list. const [pendingCredType, setPendingCredType] = useState<'OAUTH' | 'API_KEY' | null>(null); const [filterPath, setFilterPathLocal] = useState(null); + const [customSigningService, setCustomSigningService] = useState(false); // ── Step flags ── const isGatewayStep = wizard.step === 'gateway'; @@ -79,17 +102,67 @@ export function AddGatewayTargetScreen({ const isSchemaSourceStep = wizard.step === 'schema-source'; const isLambdaArnStep = wizard.step === 'lambda-arn'; const isToolSchemaStep = wizard.step === 'tool-schema'; + const isRuntimeStep = wizard.step === 'runtime'; + const isRuntimeEndpointStep = wizard.step === 'runtime-endpoint'; + const isKbSelectStep = wizard.step === 'kb-select'; + const isKbIdStep = wizard.step === 'kb-id'; + const isPassthroughEndpointStep = wizard.step === 'passthrough-endpoint'; + const isPassthroughProtocolStep = wizard.step === 'passthrough-protocol'; + const isPassthroughStickinessStep = wizard.step === 'passthrough-stickiness'; + const isExcludeDomainsStep = wizard.step === 'exclude-domains'; const isConfirmStep = wizard.step === 'confirm'; const isAuthStep = isOutboundAuthStep || isApiGatewayAuthStep; const noGatewaysAvailable = isGatewayStep && existingGateways.length === 0; + // Auto-select the gateway for webSearch targets when there's exactly one option. + useEffect(() => { + if ( + isGatewayStep && + wizard.config.targetType === 'webSearch' && + existingGateways.length === 1 && + !wizard.config.gateway + ) { + wizard.setGateway(existingGateways[0]!); + } + }, [isGatewayStep, wizard.config.targetType, existingGateways, wizard.config.gateway, wizard.setGateway]); + // ── Selectable item lists ── + const isNonMcpTarget = wizard.config.targetType === 'httpRuntime' || wizard.config.targetType === 'passthrough'; + const isHttpRuntimeTarget = isNonMcpTarget; + const runtimeItems: SelectableItem[] = useMemo( + () => existingRuntimeNames.map(r => ({ id: r, title: r })), + [existingRuntimeNames] + ); + const runtimeEndpointItems: SelectableItem[] = useMemo( + () => [ + { id: 'DEFAULT', title: 'DEFAULT (latest version)' }, + ...runtimeEndpoints.map(ep => ({ id: ep.name, title: `${ep.name} (v${ep.version})` })), + ], + [runtimeEndpoints] + ); const gatewayItems: SelectableItem[] = useMemo( - () => existingGateways.map(g => ({ id: g, title: g })), - [existingGateways] + () => + existingGateways.map(g => { + const isMcpOnly = isHttpRuntimeTarget && mcpGatewayNames.includes(g); + return { + id: g, + title: isMcpOnly ? `${g} (MCP — not compatible with HTTP Runtime)` : g, + disabled: isMcpOnly, + }; + }), + [existingGateways, mcpGatewayNames, isHttpRuntimeTarget] ); const targetTypeItems: SelectableItem[] = useMemo( - () => TARGET_TYPE_OPTIONS.map(o => ({ id: o.id, title: o.title, description: o.description })), + () => + TARGET_TYPE_OPTIONS.map(o => { + const gated = (o.id === 'passthrough' || o.id === 'webSearch') && !isGatedFeaturesEnabled(); + return { + id: o.id, + title: o.title, + description: gated ? 'Coming soon' : o.description, + disabled: gated, + }; + }), [] ); const outboundAuthItems: SelectableItem[] = useMemo( @@ -101,6 +174,10 @@ export function AddGatewayTargetScreen({ })), [wizard.config.targetType] ); + const passthroughProtocolItems: SelectableItem[] = useMemo( + () => PASSTHROUGH_PROTOCOL_OPTIONS.map(o => ({ id: o.id, title: o.title, description: o.description })), + [] + ); const apiGatewayAuthItems: SelectableItem[] = useMemo( () => API_GATEWAY_AUTH_OPTIONS.map(o => ({ id: o.id, title: o.title, description: o.description })), [] @@ -113,6 +190,21 @@ export function AddGatewayTargetScreen({ () => buildCredentialItems(existingApiKeyCredentialNames, 'API key credential'), [existingApiKeyCredentialNames] ); + const knowledgeBaseItems: SelectableItem[] = useMemo( + () => [ + ...existingKnowledgeBases.map(name => ({ + id: name, + title: name, + description: 'Project Knowledge Base (resolved at synth)', + })), + { + id: ENTER_KB_ID_MANUALLY, + title: 'Enter an existing KB ID manually...', + description: 'Provide a 10-character Bedrock Knowledge Base ID', + }, + ], + [existingKnowledgeBases] + ); // ── Auth completion callbacks ── // Shared handler that routes to the correct wizard setter based on the active step. @@ -143,9 +235,13 @@ export function AddGatewayTargetScreen({ // ── Navigation hooks ── const targetTypeNav = useListNavigation({ items: targetTypeItems, - onSelect: item => wizard.setTargetType(item.id as GatewayTargetType), + onSelect: item => { + if ((item as SelectableItem & { disabled?: boolean }).disabled) return; + wizard.setTargetType(item.id as GatewayTargetType); + }, onExit: () => wizard.goBack(), isActive: isTargetTypeStep, + isDisabled: item => item.disabled === true, }); const gatewayNav = useListNavigation({ @@ -155,13 +251,60 @@ export function AddGatewayTargetScreen({ isActive: isGatewayStep && !noGatewaysAvailable, }); - // Outbound auth type selection (for mcpServer, openApiSchema) + const runtimeNav = useListNavigation({ + items: runtimeItems, + onSelect: item => wizard.setRuntime(item.id), + onExit: () => wizard.goBack(), + isActive: isRuntimeStep && runtimeItems.length > 0, + }); + + // Auto-skip runtime-endpoint step when the runtime has no endpoints defined. + // Must wait until endpoints have been loaded to avoid a race condition where the + // step is skipped before the async fetch completes. + const hasRuntimeEndpoints = runtimeEndpoints.length > 0; + React.useEffect(() => { + if (isRuntimeEndpointStep && runtimeEndpointsLoaded && !hasRuntimeEndpoints) { + // No endpoints defined — skip to gateway step (select DEFAULT implicitly) + wizard.setRuntimeEndpoint(undefined); + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [isRuntimeEndpointStep, runtimeEndpointsLoaded, hasRuntimeEndpoints]); + + const runtimeEndpointNav = useListNavigation({ + items: runtimeEndpointItems, + onSelect: item => { + wizard.setRuntimeEndpoint(item.id === 'DEFAULT' ? undefined : item.id); + }, + onExit: () => wizard.goBack(), + isActive: isRuntimeEndpointStep && hasRuntimeEndpoints, + }); + + // Knowledge Base selection (connector branch). Selecting a project KB stores + // its name on the wizard (resolved at synth from application.knowledgeBases). + // Selecting "Enter an existing KB ID manually..." advances to the kb-id + // text-input sub-step for an external KB. + const knowledgeBaseNav = useListNavigation({ + items: knowledgeBaseItems, + onSelect: item => { + if (item.id === ENTER_KB_ID_MANUALLY) { + wizard.beginManualKbId(); + } else { + wizard.setKnowledgeBaseId(item.id); + } + }, + onExit: () => wizard.goBack(), + isActive: isKbSelectStep, + }); + + // Outbound auth type selection (for mcpServer, openApiSchema, passthrough) const outboundAuthNav = useListNavigation({ items: outboundAuthItems, onSelect: item => { - const authType = item.id as 'OAUTH' | 'API_KEY' | 'NONE'; + const authType = item.id as 'OAUTH' | 'API_KEY' | 'NONE' | 'GATEWAY_IAM_ROLE' | 'JWT_PASSTHROUGH'; if (authType === 'NONE') { completeAuth({ type: 'NONE' }); + } else if (authType === 'GATEWAY_IAM_ROLE' || authType === 'JWT_PASSTHROUGH') { + wizard.setOutboundAuth({ type: authType }); } else { selectAuthType(authType); } @@ -215,6 +358,33 @@ export function AddGatewayTargetScreen({ isActive: isAuthStep && pendingCredType === 'API_KEY', }); + // Passthrough protocol selection + const passthroughProtocolNav = useListNavigation({ + items: passthroughProtocolItems, + onSelect: item => wizard.setPassthroughProtocol(item.id as PassthroughProtocolType), + onExit: () => wizard.goBack(), + isActive: isPassthroughProtocolStep, + }); + + // Signing service selection for passthrough GATEWAY_IAM_ROLE + const signingServiceNav = useListNavigation({ + items: [ + { id: 'execute-api', title: 'execute-api' }, + { id: 'lambda', title: 'lambda' }, + { id: 'bedrock-agentcore', title: 'bedrock-agentcore' }, + { id: 'custom', title: 'Custom...' }, + ], + onSelect: item => { + if (item.id === 'custom') { + setCustomSigningService(true); + } else { + wizard.setSigningService(item.id); + } + }, + onExit: () => wizard.goBack(), + isActive: wizard.step === 'signing-service' && !customSigningService, + }); + // Confirm step useListNavigation({ items: [{ id: 'confirm', title: 'Confirm' }], @@ -246,6 +416,44 @@ export function AddGatewayTargetScreen({ lambdaArn: c.lambdaArn!, toolSchemaFile: c.toolSchemaFile!, }); + } else if (c.targetType === 'httpRuntime') { + onComplete({ + targetType: 'httpRuntime', + name: c.name, + gateway: c.gateway!, + runtime: c.runtime!, + endpoint: c.endpoint, + outboundAuth: c.outboundAuth, + }); + } else if (c.targetType === 'webSearch') { + onComplete({ + targetType: 'webSearch', + name: c.name, + gateway: c.gateway!, + ...(c.excludeDomains && c.excludeDomains.length > 0 ? { excludeDomains: c.excludeDomains } : {}), + }); + } else if (c.targetType === 'connector') { + // KB connector path. `bedrock-agentic-retrieve` is gateway-managed by + // the Add Knowledge Base flow, so the TUI only emits + // `bedrock-knowledge-bases` here. + onComplete({ + targetType: 'connector', + connectorId: 'bedrock-knowledge-bases', + name: c.name, + gateway: c.gateway!, + knowledgeBaseId: c.knowledgeBaseId!, + }); + } else if (c.targetType === 'passthrough') { + onComplete({ + targetType: 'passthrough', + name: c.name, + gateway: c.gateway!, + passthroughEndpoint: c.passthroughEndpoint!, + protocolType: c.passthroughProtocol ?? 'CUSTOM', + stickinessIdentifier: c.stickinessIdentifier, + stickinessTimeout: c.stickinessTimeout, + outboundAuth: c.outboundAuth, + } as PassthroughTargetConfig); } else { onComplete({ targetType: 'mcpServer', @@ -254,7 +462,7 @@ export function AddGatewayTargetScreen({ endpoint: c.endpoint!, gateway: c.gateway!, toolDefinition: c.toolDefinition!, - outboundAuth: c.outboundAuth, + outboundAuth: c.outboundAuth as McpServerTargetConfig['outboundAuth'], }); } }, @@ -266,6 +474,7 @@ export function AddGatewayTargetScreen({ }); // ── Render ── + const isSigningRegionStep = wizard.step === 'signing-region'; const helpText = isConfirmStep ? HELP_TEXT.CONFIRM_CANCEL : isTextStep || @@ -274,7 +483,13 @@ export function AddGatewayTargetScreen({ isToolFiltersStep || isSchemaSourceStep || isLambdaArnStep || - isToolSchemaStep + isToolSchemaStep || + isRuntimeStep || + isKbIdStep || + isPassthroughEndpointStep || + isPassthroughStickinessStep || + isSigningRegionStep || + isExcludeDomainsStep ? HELP_TEXT.TEXT_INPUT : HELP_TEXT.NAVIGATE_SELECT; @@ -480,6 +695,146 @@ export function AddGatewayTargetScreen({ /> )} + {isRuntimeStep && runtimeItems.length > 0 && ( + + )} + + {isRuntimeStep && runtimeItems.length === 0 && ( + wizard.setRuntime(value)} + onCancel={() => wizard.goBack()} + customValidation={(value: string) => { + if (!value.trim()) return 'Runtime is required'; + return true; + }} + /> + )} + + {isKbSelectStep && ( + + )} + + {isKbIdStep && ( + wizard.goBack()} + customValidation={(value: string) => { + if (!value.trim()) return 'KB ID is required'; + if (!REAL_KB_ID_PATTERN.test(value)) { + return 'Must be a 10-character uppercase alphanumeric Bedrock KB ID (e.g. ABCDE12345)'; + } + return true; + }} + /> + )} + + {isRuntimeEndpointStep && !runtimeEndpointsLoaded && Loading runtime endpoints...} + + {isRuntimeEndpointStep && runtimeEndpointsLoaded && hasRuntimeEndpoints && ( + + )} + + {isPassthroughEndpointStep && ( + wizard.goBack()} + customValidation={(value: string) => { + if (!value.startsWith('https://')) return 'Must start with https://'; + if (!/^https:\/\/[a-zA-Z0-9\-.]+(:[0-9]{1,5})?(\/.*)?$/.test(value)) return 'Must be a valid HTTPS URL'; + return true; + }} + /> + )} + + {isPassthroughProtocolStep && ( + + )} + + {isPassthroughStickinessStep && ( + { + if (!value.trim()) { + wizard.setStickinessConfig(undefined, undefined); + } else { + // For simplicity in TUI, use default timeout (skip timeout input) + wizard.setStickinessConfig(value.trim(), undefined); + } + }} + onCancel={() => wizard.goBack()} + /> + )} + + {wizard.step === 'signing-service' && ( + + )} + + {wizard.step === 'signing-region' && ( + { + wizard.setSigningRegion(value.trim() || undefined); + }} + onCancel={() => wizard.goBack()} + /> + )} + + {isExcludeDomainsStep && ( + { + const domains = value + .split(',') + .map(d => d.trim()) + .filter(d => d.length > 0); + wizard.setExcludeDomains(domains.length > 0 ? domains : undefined); + }} + onCancel={() => wizard.goBack()} + /> + )} + {isConfirmStep && ( { expect(config.targetType).toBe('lambdaFunctionArn'); }); + it('narrows to BedrockKnowledgeBasesConnectorTargetConfig when targetType is connector', () => { + const config: AddGatewayTargetConfig = { + targetType: 'connector', + connectorId: 'bedrock-knowledge-bases', + name: 'kb-target', + gateway: 'my-gateway', + knowledgeBaseId: 'my-project-kb', + }; + + if (config.targetType === 'connector') { + // TypeScript narrows on the connectorId discriminator inside the union. + if (config.connectorId === 'bedrock-knowledge-bases') { + expect(config.knowledgeBaseId).toBe('my-project-kb'); + expect(config.gateway).toBe('my-gateway'); + } + } + }); + + it('BedrockKnowledgeBasesConnectorTargetConfig accepts a literal 10-char external KB ID', () => { + const config: BedrockKnowledgeBasesConnectorTargetConfig = { + targetType: 'connector', + connectorId: 'bedrock-knowledge-bases', + name: 'kb-target', + gateway: 'gw', + knowledgeBaseId: 'ABCDE12345', + }; + expect(config.connectorId).toBe('bedrock-knowledge-bases'); + expect(config.knowledgeBaseId).toBe('ABCDE12345'); + }); + it('three-way dispatch handles all target types', () => { const configs: AddGatewayTargetConfig[] = [ { diff --git a/src/cli/tui/screens/mcp/__tests__/types.test.ts b/src/cli/tui/screens/mcp/__tests__/types.test.ts index cac8e71f6..a3306544b 100644 --- a/src/cli/tui/screens/mcp/__tests__/types.test.ts +++ b/src/cli/tui/screens/mcp/__tests__/types.test.ts @@ -1,4 +1,4 @@ -import { AUTHORIZER_TYPE_OPTIONS, SKIP_FOR_NOW, TARGET_TYPE_OPTIONS } from '../types.js'; +import { AUTHORIZER_TYPE_OPTIONS, ENTER_KB_ID_MANUALLY, SKIP_FOR_NOW, TARGET_TYPE_OPTIONS } from '../types.js'; import { describe, expect, it } from 'vitest'; describe('MCP types constants', () => { @@ -14,4 +14,16 @@ describe('MCP types constants', () => { const mcpServer = TARGET_TYPE_OPTIONS.find((opt: { id: string }) => opt.id === 'mcpServer'); expect(mcpServer).toBeDefined(); }); + + it('TARGET_TYPE_OPTIONS exposes a connector (Knowledge Base) entry', () => { + const connector = TARGET_TYPE_OPTIONS.find((opt: { id: string }) => opt.id === 'connector'); + expect(connector).toBeDefined(); + expect(connector?.title).toBe('Knowledge Base'); + }); + + it('ENTER_KB_ID_MANUALLY is a stable sentinel id', () => { + // Sentinel for the "Enter an existing KB ID manually..." picker entry — + // the screen branches on this exact id when the user picks the manual path. + expect(ENTER_KB_ID_MANUALLY).toBe('__enter_kb_id__'); + }); }); diff --git a/src/cli/tui/screens/mcp/types.ts b/src/cli/tui/screens/mcp/types.ts index 59be5abe7..482b47ff7 100644 --- a/src/cli/tui/screens/mcp/types.ts +++ b/src/cli/tui/screens/mcp/types.ts @@ -6,6 +6,7 @@ import type { GatewayPolicyEngineConfiguration, GatewayTargetType, NodeRuntime, + PassthroughProtocolType, SchemaSource, ToolDefinition, } from '../../../../schema'; @@ -27,6 +28,8 @@ export type AddGatewayStep = export interface AddGatewayConfig { name: string; description: string; + /** Protocol type for the gateway. Omit for MCP (default). */ + protocolType?: 'MCP' | 'None'; /** Authorization type for the gateway */ authorizerType: GatewayAuthorizerType; /** JWT authorizer configuration (when authorizerType is 'CUSTOM_JWT') */ @@ -94,6 +97,16 @@ export type AddGatewayTargetStep = | 'schema-source' | 'lambda-arn' | 'tool-schema' + | 'runtime' + | 'runtime-endpoint' + | 'kb-select' + | 'kb-id' + | 'passthrough-endpoint' + | 'passthrough-protocol' + | 'passthrough-stickiness' + | 'signing-service' + | 'signing-region' + | 'exclude-domains' | 'confirm'; export type TargetLanguage = 'Python' | 'TypeScript' | 'Other'; @@ -113,9 +126,11 @@ export interface GatewayTargetWizardState { host?: ComputeHost; toolDefinition?: ToolDefinition; outboundAuth?: { - type: 'OAUTH' | 'API_KEY' | 'NONE'; + type: 'OAUTH' | 'API_KEY' | 'NONE' | 'GATEWAY_IAM_ROLE' | 'JWT_PASSTHROUGH'; credentialName?: string; scopes?: string[]; + service?: string; + region?: string; }; restApiId?: string; stage?: string; @@ -124,6 +139,30 @@ export interface GatewayTargetWizardState { schemaSource?: SchemaSource; lambdaArn?: string; toolSchemaFile?: string; + /** Runtime name reference for httpRuntime targets */ + runtime?: string; + /** Knowledge Base reference for connector targets — either a project KB name or a literal 10-char KB ID. */ + knowledgeBaseId?: string; + /** + * Connector identifier when targetType is 'connector'. Only + * `bedrock-knowledge-bases` is exposed in the TUI; `bedrock-agentic-retrieve` + * is gateway-managed by the Add Knowledge Base flow. + */ + connectorId?: 'bedrock-knowledge-bases' | 'bedrock-agentic-retrieve'; + /** Passthrough endpoint URL for passthrough targets */ + passthroughEndpoint?: string; + /** Passthrough protocol type for passthrough targets */ + passthroughProtocol?: PassthroughProtocolType; + /** Stickiness routing identifier for passthrough targets */ + stickinessIdentifier?: string; + /** Stickiness timeout in seconds for passthrough targets */ + stickinessTimeout?: number; + /** SigV4 signing service for passthrough GATEWAY_IAM_ROLE auth */ + signingService?: string; + /** SigV4 signing region for passthrough GATEWAY_IAM_ROLE auth */ + signingRegion?: string; + /** Optional list of domains to exclude (webSearch target type only). */ + excludeDomains?: string[]; } // ───────────────────────────────────────────────────────────────────────────── @@ -178,11 +217,75 @@ export interface LambdaFunctionArnTargetConfig { toolSchemaFile: string; } +export interface HttpRuntimeTargetConfig { + targetType: 'httpRuntime'; + name: string; + gateway: string; + runtime: string; + endpoint?: string; + outboundAuth?: { type: string; credentialName?: string; scopes?: string[] }; +} + +interface ConnectorTargetConfigBase { + targetType: 'connector'; + name: string; + gateway: string; + description?: string; +} + +export interface BedrockKnowledgeBasesConnectorTargetConfig extends ConnectorTargetConfigBase { + connectorId: 'bedrock-knowledge-bases'; + /** + * Either a project KB name (a knowledgeBases[] entry, resolved at synth + * via application.knowledgeBases) or a literal 10-char external KB ID. + */ + knowledgeBaseId: string; +} + +export interface BedrockAgenticRetrieveConnectorTargetConfig extends ConnectorTargetConfigBase { + connectorId: 'bedrock-agentic-retrieve'; + /** Fan-out: project KB names and/or literal 10-char external KB IDs. */ + knowledgeBaseIds: string[]; +} + +export type ConnectorTargetConfig = + | BedrockKnowledgeBasesConnectorTargetConfig + | BedrockAgenticRetrieveConnectorTargetConfig; + +export interface PassthroughTargetConfig { + targetType: 'passthrough'; + name: string; + gateway: string; + passthroughEndpoint: string; + protocolType?: PassthroughProtocolType; + stickinessIdentifier?: string; + stickinessTimeout?: number; + outboundAuth?: { + type: 'OAUTH' | 'API_KEY' | 'NONE' | 'GATEWAY_IAM_ROLE' | 'JWT_PASSTHROUGH'; + credentialName?: string; + scopes?: string[]; + service?: string; + region?: string; + }; +} + +export interface WebSearchTargetConfig { + targetType: 'webSearch'; + name: string; + gateway: string; + /** Optional list of domains to exclude from web search results. */ + excludeDomains?: string[]; +} + export type AddGatewayTargetConfig = | McpServerTargetConfig | ApiGatewayTargetConfig | SchemaBasedTargetConfig - | LambdaFunctionArnTargetConfig; + | LambdaFunctionArnTargetConfig + | HttpRuntimeTargetConfig + | ConnectorTargetConfig + | PassthroughTargetConfig + | WebSearchTargetConfig; export const MCP_TOOL_STEP_LABELS: Record = { name: 'Name', @@ -199,6 +302,16 @@ export const MCP_TOOL_STEP_LABELS: Record = { 'schema-source': 'Schema Source', 'lambda-arn': 'Lambda ARN', 'tool-schema': 'Tool Schema File', + runtime: 'Runtime', + 'runtime-endpoint': 'Endpoint', + 'kb-select': 'Knowledge Base', + 'kb-id': 'KB ID', + 'passthrough-endpoint': 'Endpoint', + 'passthrough-protocol': 'Protocol', + 'passthrough-stickiness': 'Stickiness', + 'signing-service': 'Signing Service', + 'signing-region': 'Signing Region', + 'exclude-domains': 'Exclude Domains', confirm: 'Confirm', }; @@ -218,6 +331,7 @@ export const SKIP_FOR_NOW = 'skip-for-now' as const; export const NONE_SELECTION = '__none__' as const; export const TARGET_TYPE_OPTIONS = [ + // MCP targets { id: 'mcpServer', title: 'MCP Server endpoint', description: 'Connect to an existing MCP-compatible server' }, { id: 'apiGateway', @@ -231,6 +345,38 @@ export const TARGET_TYPE_OPTIONS = [ title: 'Lambda function', description: 'Connect to an existing AWS Lambda function', }, + // HTTP targets + { + id: 'httpRuntime', + title: 'HTTP Runtime', + description: 'Route HTTP traffic to an AgentCore runtime', + }, + { + id: 'connector', + title: 'Knowledge Base', + description: 'Wire an existing Knowledge Base to this gateway as a connector target', + }, + { + id: 'passthrough', + title: 'Passthrough', + description: 'Route to external HTTPS endpoint', + }, + { + id: 'webSearch', + title: 'Amazon Web Search', + description: 'Wire the Amazon Web Search managed connector to this gateway', + }, +] as const; + +/** Sentinel ID for the "Enter an existing KB ID manually..." option in the KB-select step. */ +export const ENTER_KB_ID_MANUALLY = '__enter_kb_id__' as const; + +/** Passthrough protocol options. CUSTOM is the default (first). */ +export const PASSTHROUGH_PROTOCOL_OPTIONS = [ + { id: 'CUSTOM', title: 'CUSTOM', description: 'Generic HTTP/REST endpoint (default)' }, + { id: 'MCP', title: 'MCP', description: 'Model Context Protocol server' }, + { id: 'A2A', title: 'A2A', description: 'Agent-to-Agent protocol' }, + { id: 'INFERENCE', title: 'INFERENCE', description: 'Model inference endpoint' }, ] as const; export const TARGET_LANGUAGE_OPTIONS = [ @@ -249,6 +395,8 @@ const AUTH_OPTION_LABELS = { NONE: { title: 'No authorization', description: 'No outbound authentication' }, OAUTH: { title: 'OAuth 2LO', description: 'OAuth 2.0 client credentials' }, API_KEY: { title: 'API Key', description: 'API key credential' }, + GATEWAY_IAM_ROLE: { title: 'Gateway IAM Role', description: 'Gateway signs with SigV4' }, + JWT_PASSTHROUGH: { title: 'JWT Passthrough', description: 'Forward caller JWT token' }, } as const; /** Derive the outbound auth UI options for a given target type from the centralized config. */ diff --git a/src/cli/tui/screens/mcp/useAddGatewayTargetWizard.ts b/src/cli/tui/screens/mcp/useAddGatewayTargetWizard.ts index bfef7d4ac..525ffe456 100644 --- a/src/cli/tui/screens/mcp/useAddGatewayTargetWizard.ts +++ b/src/cli/tui/screens/mcp/useAddGatewayTargetWizard.ts @@ -1,5 +1,11 @@ import { APP_DIR, MCP_APP_SUBDIR } from '../../../../lib'; -import type { ApiGatewayHttpMethod, GatewayTargetType, SchemaSource, ToolDefinition } from '../../../../schema'; +import type { + ApiGatewayHttpMethod, + GatewayTargetType, + PassthroughProtocolType, + SchemaSource, + ToolDefinition, +} from '../../../../schema'; import type { AddGatewayTargetStep, GatewayTargetWizardState } from './types'; import { useCallback, useMemo, useState } from 'react'; @@ -47,6 +53,31 @@ export function useAddGatewayTargetWizard( case 'lambdaFunctionArn': baseSteps.push('lambda-arn', 'tool-schema', 'gateway'); break; + case 'httpRuntime': + baseSteps.push('runtime', 'runtime-endpoint', 'gateway', 'outbound-auth'); + break; + case 'connector': + // Connector (Knowledge Base) flow: select a KB (project name or + // literal 10-char ID), then attach to a gateway. No outbound auth — + // connector targets are managed by the gateway IAM role. + baseSteps.push('kb-select', 'gateway'); + break; + case 'passthrough': + baseSteps.push( + 'passthrough-endpoint', + 'passthrough-protocol', + 'passthrough-stickiness', + 'gateway', + 'outbound-auth', + 'signing-service', + 'signing-region' + ); + break; + case 'webSearch': + // Amazon Web Search flow: pick a gateway, optionally specify domains + // to exclude. No outbound auth — managed by the gateway IAM role. + baseSteps.push('gateway', 'exclude-domains'); + break; case 'mcpServer': default: baseSteps.push('endpoint', 'gateway', 'outbound-auth'); @@ -57,10 +88,15 @@ export function useAddGatewayTargetWizard( return baseSteps; }, [config.targetType]); - const currentIndex = steps.indexOf(step); + // The 'kb-id' step is a sub-step of 'kb-select' for manual literal-KB-ID entry. + // It is not part of the canonical step list, so map it onto kb-select for + // navigation/index purposes. + const stepForIndex: AddGatewayTargetStep = step === 'kb-id' ? 'kb-select' : step; + const currentIndex = steps.indexOf(stepForIndex); const goToNextStep = useCallback(() => { - const idx = steps.indexOf(step); + const lookup = step === 'kb-id' ? 'kb-select' : step; + const idx = steps.indexOf(lookup); const next = steps[idx + 1]; if (idx >= 0 && next) { setStep(next); @@ -68,9 +104,14 @@ export function useAddGatewayTargetWizard( }, [steps, step]); const goBack = useCallback(() => { + // From the manual KB-ID entry, fall back to the KB selection picker. + if (step === 'kb-id') { + setStep('kb-select'); + return; + } const prevStep = steps[currentIndex - 1]; if (prevStep) setStep(prevStep); - }, [currentIndex, steps]); + }, [currentIndex, steps, step]); const setName = useCallback( (name: string) => { @@ -87,7 +128,16 @@ export function useAddGatewayTargetWizard( ); const setTargetType = useCallback((targetType: GatewayTargetType) => { - setConfig(c => ({ ...c, targetType })); + // KB connector targets default to 'bedrock-knowledge-bases' for the TUI; + // 'bedrock-agentic-retrieve' is gateway-managed by the Add Knowledge Base + // flow and not directly exposed here. webSearch targets carry no connectorId. + const connectorIdDefault = targetType === 'connector' ? ('bedrock-knowledge-bases' as const) : undefined; + setConfig(c => ({ + ...c, + targetType, + ...(connectorIdDefault ? { connectorId: connectorIdDefault } : { connectorId: undefined }), + ...(targetType !== 'webSearch' ? { excludeDomains: undefined } : {}), + })); // Cannot use goToNextStep() here — config.targetType is changing, which triggers // useMemo to recompute steps, but goToNextStep captures the OLD steps via closure. // Must explicitly set the first type-specific step. @@ -102,6 +152,18 @@ export function useAddGatewayTargetWizard( case 'lambdaFunctionArn': setStep('lambda-arn'); break; + case 'httpRuntime': + setStep('runtime'); + break; + case 'connector': + setStep('kb-select'); + break; + case 'passthrough': + setStep('passthrough-endpoint'); + break; + case 'webSearch': + setStep('gateway'); + break; case 'mcpServer': default: setStep('endpoint'); @@ -137,14 +199,24 @@ export function useAddGatewayTargetWizard( ); const setOutboundAuth = useCallback( - (outboundAuth: { type: 'OAUTH' | 'API_KEY' | 'NONE'; credentialName?: string }) => { + (outboundAuth: { + type: 'OAUTH' | 'API_KEY' | 'NONE' | 'GATEWAY_IAM_ROLE' | 'JWT_PASSTHROUGH'; + credentialName?: string; + }) => { setConfig(c => ({ ...c, outboundAuth, })); - goToNextStep(); + // For GATEWAY_IAM_ROLE, next step is signing-service (handled via steps array) + // For JWT_PASSTHROUGH and others, skip signing steps (go to confirm) + if (outboundAuth.type === 'GATEWAY_IAM_ROLE') { + setStep('signing-service'); + } else { + // Skip signing-service and signing-region, go to confirm + setStep('confirm'); + } }, - [goToNextStep] + [] ); const reset = useCallback(() => { @@ -200,6 +272,111 @@ export function useAddGatewayTargetWizard( [goToNextStep] ); + const setRuntime = useCallback( + (runtime: string) => { + setConfig(c => ({ ...c, runtime })); + goToNextStep(); + }, + [goToNextStep] + ); + + const setRuntimeEndpoint = useCallback( + (endpoint: string | undefined) => { + setConfig(c => ({ ...c, endpoint })); + goToNextStep(); + }, + [goToNextStep] + ); + + /** + * Set the Knowledge Base reference (a project KB name or a literal 10-char + * external KB ID) and advance to the gateway step. The wizard's `name` + * field defaults to the KB reference if the user hasn't typed one yet. + */ + const setKnowledgeBaseId = useCallback( + (knowledgeBaseId: string) => { + setConfig(c => ({ + ...c, + knowledgeBaseId, + name: c.name || knowledgeBaseId, + })); + goToNextStep(); + }, + [goToNextStep] + ); + + const setPassthroughEndpoint = useCallback( + (passthroughEndpoint: string) => { + setConfig(c => ({ ...c, passthroughEndpoint })); + goToNextStep(); + }, + [goToNextStep] + ); + + const setPassthroughProtocol = useCallback( + (passthroughProtocol: PassthroughProtocolType) => { + setConfig(c => ({ ...c, passthroughProtocol })); + goToNextStep(); + }, + [goToNextStep] + ); + + const setStickinessConfig = useCallback( + (identifier?: string, timeout?: number) => { + setConfig(c => ({ + ...c, + stickinessIdentifier: identifier, + stickinessTimeout: timeout, + })); + goToNextStep(); + }, + [goToNextStep] + ); + + /** Switch from the kb-select picker to the manual literal-ID entry step. */ + const beginManualKbId = useCallback(() => { + setStep('kb-id'); + }, []); + + const setSigningService = useCallback( + (signingService: string) => { + setConfig(c => ({ + ...c, + signingService, + outboundAuth: { ...c.outboundAuth!, service: signingService }, + })); + goToNextStep(); + }, + [goToNextStep] + ); + + const setSigningRegion = useCallback( + (signingRegion?: string) => { + setConfig(c => ({ + ...c, + signingRegion, + outboundAuth: { ...c.outboundAuth!, region: signingRegion }, + })); + goToNextStep(); + }, + [goToNextStep] + ); + + /** + * Set the optional list of domains to exclude (web-search connector only) + * and advance to confirm. An empty submission clears the field. + */ + const setExcludeDomains = useCallback( + (excludeDomains: string[] | undefined) => { + setConfig(c => ({ + ...c, + excludeDomains: excludeDomains && excludeDomains.length > 0 ? excludeDomains : undefined, + })); + goToNextStep(); + }, + [goToNextStep] + ); + return { config, step, @@ -219,6 +396,16 @@ export function useAddGatewayTargetWizard( setApiGatewayAuth, setLambdaArn, setToolSchemaFile, + setRuntime, + setRuntimeEndpoint, + setKnowledgeBaseId, + beginManualKbId, + setPassthroughEndpoint, + setPassthroughProtocol, + setStickinessConfig, + setSigningService, + setSigningRegion, + setExcludeDomains, reset, }; } diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx index 243eba4e7..309c2c71e 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalFlow.tsx @@ -59,14 +59,6 @@ export function AddOnlineEvalFlow({ isInteractive = true, onExit, onBack, onDev, const runtimesList = projectSpec.runtimes ?? []; const agentNames = runtimesList.map(a => a.name); - if (agentNames.length === 0) { - setFlow({ - name: 'error', - message: 'No agents found in project. Add an agent first with `agentcore add agent`.', - }); - return; - } - // Build runtime info with endpoints for the endpoint picker const runtimesInfo: RuntimeInfoForEval[] = runtimesList.map(r => ({ name: r.name, diff --git a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx index fd5fafcf6..3846711cf 100644 --- a/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx +++ b/src/cli/tui/screens/online-eval/AddOnlineEvalScreen.tsx @@ -12,11 +12,11 @@ import { import { HELP_TEXT } from '../../constants'; import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; import { generateUniqueName } from '../../utils'; -import type { AddOnlineEvalConfig, EvaluatorItem, RuntimeEndpointEntry } from './types'; +import type { AddOnlineEvalConfig, EvaluatorItem, OnlineEvalSource, RuntimeEndpointEntry } from './types'; import { DEFAULT_SAMPLING_RATE, ONLINE_EVAL_STEP_LABELS } from './types'; import { useAddOnlineEvalWizard } from './useAddOnlineEvalWizard'; import { Box, Text } from 'ink'; -import React, { useCallback, useEffect, useMemo } from 'react'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; /** Runtime info with endpoints, passed from the parent flow. */ export interface RuntimeInfoForEval { @@ -44,13 +44,16 @@ export function AddOnlineEvalScreen({ }: AddOnlineEvalScreenProps) { const wizard = useAddOnlineEvalWizard(agentNames.length); - // Auto-set agent when there's only one + // State for the repeating log group input + const [logGroupEntries, setLogGroupEntries] = useState([]); + + // Auto-set agent when there's only one and source is agentcore-runtime const effectiveConfig = useMemo(() => { - if (agentNames.length === 1 && !wizard.config.agent) { + if (wizard.source === 'agentcore-runtime' && agentNames.length === 1 && !wizard.config.agent) { return { ...wizard.config, agent: agentNames[0]! }; } return wizard.config; - }, [wizard.config, agentNames]); + }, [wizard.config, wizard.source, agentNames]); // Determine endpoints for the currently selected agent const agentEndpoints = useMemo(() => { @@ -60,18 +63,34 @@ export function AddOnlineEvalScreen({ return rt?.endpoints ?? []; }, [effectiveConfig.agent, runtimes]); - // Skip endpoint step when the selected agent has no endpoints + // Skip steps based on source selection const shouldSkipStep = useCallback( (s: string) => { - if (s === 'endpoint' && agentEndpoints.length === 0) return true; + if (s === 'endpoint' && (wizard.source === 'cloudwatch-logs' || agentEndpoints.length === 0)) return true; + if (s === 'agent' && wizard.source === 'cloudwatch-logs') return true; + if (s === 'logGroupNames' && wizard.source === 'agentcore-runtime') return true; + if (s === 'serviceName' && wizard.source === 'agentcore-runtime') return true; return false; }, - [agentEndpoints.length] + [wizard.source, agentEndpoints.length] ); useEffect(() => { wizard.setSkipCheck(shouldSkipStep); - }, [shouldSkipStep]); // wizard.setSkipCheck is stable (useCallback with no deps) + }, [shouldSkipStep, wizard]); // wizard.setSkipCheck is stable (useCallback with no deps) + + // Source selection items + const sourceItems: SelectableItem[] = useMemo( + () => [ + { id: 'agentcore-runtime', title: 'AgentCore Runtime', description: 'Monitor a managed AgentCore agent' }, + { + id: 'cloudwatch-logs', + title: 'CloudWatch Logs', + description: 'Provide custom log groups for 3rd-party agents', + }, + ], + [] + ); // Build endpoint picker items: DEFAULT (plain) + each endpoint const endpointItems: SelectableItem[] = useMemo(() => { @@ -95,8 +114,11 @@ export function AddOnlineEvalScreen({ }, [agentNames]); const isNameStep = wizard.step === 'name'; + const isSourceStep = wizard.step === 'source'; const isAgentStep = wizard.step === 'agent'; const isEndpointStep = wizard.step === 'endpoint'; + const isLogGroupNamesStep = wizard.step === 'logGroupNames'; + const isServiceNameStep = wizard.step === 'serviceName'; const isEvaluatorsStep = wizard.step === 'evaluators'; const isSamplingRateStep = wizard.step === 'samplingRate'; const isEnableOnCreateStep = wizard.step === 'enableOnCreate'; @@ -110,6 +132,13 @@ export function AddOnlineEvalScreen({ [] ); + const sourceNav = useListNavigation({ + items: sourceItems, + onSelect: item => wizard.setSource(item.id as OnlineEvalSource), + onExit: () => wizard.goBack(), + isActive: isSourceStep, + }); + const agentNav = useListNavigation({ items: agentItems, onSelect: item => wizard.setAgent(item.id), @@ -152,7 +181,7 @@ export function AddOnlineEvalScreen({ const helpText = isEvaluatorsStep ? 'Space toggle · Enter confirm · Esc back' - : isAgentStep || isEndpointStep || isEnableOnCreateStep + : isSourceStep || isAgentStep || isEndpointStep || isEnableOnCreateStep ? HELP_TEXT.NAVIGATE_SELECT : isConfirmStep ? HELP_TEXT.CONFIRM_CANCEL @@ -162,6 +191,26 @@ export function AddOnlineEvalScreen({ ); + // Build confirm fields based on source + const confirmFields = useMemo(() => { + const fields = [{ label: 'Name', value: effectiveConfig.name }]; + if (wizard.source === 'agentcore-runtime') { + fields.push({ label: 'Agent', value: effectiveConfig.agent }); + if (effectiveConfig.endpoint) { + fields.push({ label: 'Endpoint', value: effectiveConfig.endpoint }); + } + } else { + fields.push({ label: 'Log Groups', value: (effectiveConfig.logGroupNames ?? []).join(', ') }); + if (effectiveConfig.serviceNames && effectiveConfig.serviceNames.length > 0) { + fields.push({ label: 'Service Names', value: effectiveConfig.serviceNames.join(', ') }); + } + } + fields.push({ label: 'Evaluators', value: effectiveConfig.evaluators.join(', ') }); + fields.push({ label: 'Sampling Rate', value: `${effectiveConfig.samplingRate}%` }); + fields.push({ label: 'Enable on Deploy', value: effectiveConfig.enableOnCreate ? 'Yes' : 'No' }); + return fields; + }, [effectiveConfig, wizard.source]); + return ( @@ -177,6 +226,15 @@ export function AddOnlineEvalScreen({ /> )} + {isSourceStep && ( + + )} + {isAgentStep && ( )} + {isLogGroupNamesStep && ( + + + Enter CloudWatch log group names (1-5). Press Enter to add each name. Submit an empty value when done. + + {logGroupEntries.length > 0 && ( + + {logGroupEntries.map((entry, i) => ( + + {' '} + {i + 1}. {entry} + + ))} + + )} + { + if (value === '' && logGroupEntries.length > 0) { + // Empty submission finalizes the list + wizard.setLogGroupNames(logGroupEntries); + setLogGroupEntries([]); + } else if (value !== '') { + if (logGroupEntries.length >= 5) return; + setLogGroupEntries(prev => [...prev, value]); + } + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + if (value === '' && logGroupEntries.length === 0) return 'At least one log group name is required'; + if (value === '' && logGroupEntries.length > 0) return true; // allow empty to finish + if (logGroupEntries.length >= 5) return 'Maximum 5 log group names allowed'; + return true; + }} + /> + + )} + + {isServiceNameStep && ( + + Enter service names separated by spaces (optional). Leave empty to skip. + { + const names = value.trim() ? value.trim().split(/\s+/) : []; + wizard.setServiceNames(names); + }} + onCancel={() => wizard.goBack()} + /> + + )} + {isEvaluatorsStep && ( { const rate = parseFloat(value); @@ -239,18 +353,7 @@ export function AddOnlineEvalScreen({ /> )} - {isConfirmStep && ( - - )} + {isConfirmStep && } ); diff --git a/src/cli/tui/screens/online-eval/OnlineEvalDashboard.tsx b/src/cli/tui/screens/online-eval/OnlineEvalDashboard.tsx index bfc0e7ed1..8dd5fb969 100644 --- a/src/cli/tui/screens/online-eval/OnlineEvalDashboard.tsx +++ b/src/cli/tui/screens/online-eval/OnlineEvalDashboard.tsx @@ -81,7 +81,7 @@ async function fetchDashboardConfigs(): Promise { name: local.name, configId: deployed?.onlineEvaluationConfigId ?? '', region, - evaluators: local.evaluators, + evaluators: local.evaluators ?? [], samplingRate: local.samplingRate, executionStatus: deployed?.executionStatus, }); diff --git a/src/cli/tui/screens/online-eval/types.ts b/src/cli/tui/screens/online-eval/types.ts index 1a1e5940c..0073c29d7 100644 --- a/src/cli/tui/screens/online-eval/types.ts +++ b/src/cli/tui/screens/online-eval/types.ts @@ -4,17 +4,24 @@ export type AddOnlineEvalStep = | 'name' + | 'source' | 'agent' | 'endpoint' + | 'logGroupNames' + | 'serviceName' | 'evaluators' | 'samplingRate' | 'enableOnCreate' | 'confirm'; +export type OnlineEvalSource = 'agentcore-runtime' | 'cloudwatch-logs'; + export interface AddOnlineEvalConfig { name: string; agent: string; endpoint?: string; + logGroupNames?: string[]; + serviceNames?: string[]; evaluators: string[]; samplingRate: number; enableOnCreate: boolean; @@ -29,8 +36,11 @@ export interface RuntimeEndpointEntry { export const ONLINE_EVAL_STEP_LABELS: Record = { name: 'Name', + source: 'Source', agent: 'Agent', endpoint: 'Endpoint', + logGroupNames: 'Log Groups', + serviceName: 'Services', evaluators: 'Evaluators', samplingRate: 'Rate', enableOnCreate: 'Enable', diff --git a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts index 239a95edc..440629bea 100644 --- a/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts +++ b/src/cli/tui/screens/online-eval/useAddOnlineEvalWizard.ts @@ -1,13 +1,35 @@ -import type { AddOnlineEvalConfig, AddOnlineEvalStep } from './types'; +import type { AddOnlineEvalConfig, AddOnlineEvalStep, OnlineEvalSource } from './types'; import { DEFAULT_SAMPLING_RATE } from './types'; import { useCallback, useRef, useState } from 'react'; function getAllSteps(agentCount: number): AddOnlineEvalStep[] { if (agentCount <= 1) { // endpoint step is included but will be skipped dynamically when no endpoints exist - return ['name', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + // source step routes to either agent/endpoint OR logGroupNames/serviceName + return [ + 'name', + 'source', + 'endpoint', + 'logGroupNames', + 'serviceName', + 'evaluators', + 'samplingRate', + 'enableOnCreate', + 'confirm', + ]; } - return ['name', 'agent', 'endpoint', 'evaluators', 'samplingRate', 'enableOnCreate', 'confirm']; + return [ + 'name', + 'source', + 'agent', + 'endpoint', + 'logGroupNames', + 'serviceName', + 'evaluators', + 'samplingRate', + 'enableOnCreate', + 'confirm', + ]; } function getDefaultConfig(): AddOnlineEvalConfig { @@ -15,6 +37,8 @@ function getDefaultConfig(): AddOnlineEvalConfig { name: '', agent: '', endpoint: undefined, + logGroupNames: undefined, + serviceNames: undefined, evaluators: [], samplingRate: DEFAULT_SAMPLING_RATE, enableOnCreate: true, @@ -27,6 +51,7 @@ export function useAddOnlineEvalWizard(agentCount: number) { const allSteps = getAllSteps(agentCount); const [config, setConfig] = useState(getDefaultConfig); const [step, setStep] = useState(allSteps[0]!); + const [source, setSourceState] = useState('agentcore-runtime'); const skipCheckRef = useRef(() => false); const currentIndex = allSteps.indexOf(step); @@ -66,6 +91,21 @@ export function useAddOnlineEvalWizard(agentCount: number) { [nextStep, setConfig, setStep] ); + const setSource = useCallback( + (selectedSource: OnlineEvalSource) => { + setSourceState(selectedSource); + // Reset fields based on source selection + if (selectedSource === 'cloudwatch-logs') { + setConfig(c => ({ ...c, agent: '', endpoint: undefined, logGroupNames: undefined, serviceNames: undefined })); + } else { + setConfig(c => ({ ...c, logGroupNames: undefined, serviceNames: undefined })); + } + const next = nextStep('source'); + if (next) setStep(next); + }, + [nextStep, setSourceState, setConfig, setStep] + ); + const setAgent = useCallback( (agent: string) => { setConfig(c => ({ ...c, agent, endpoint: undefined })); @@ -84,6 +124,24 @@ export function useAddOnlineEvalWizard(agentCount: number) { [nextStep, setConfig, setStep] ); + const setLogGroupNames = useCallback( + (logGroupNames: string[]) => { + setConfig(c => ({ ...c, logGroupNames })); + const next = nextStep('logGroupNames'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setServiceNames = useCallback( + (serviceNames: string[]) => { + setConfig(c => ({ ...c, serviceNames: serviceNames.length > 0 ? serviceNames : undefined })); + const next = nextStep('serviceName'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + const setEvaluators = useCallback( (evaluators: string[]) => { setConfig(c => ({ ...c, evaluators })); @@ -113,19 +171,24 @@ export function useAddOnlineEvalWizard(agentCount: number) { const reset = useCallback(() => { setConfig(getDefaultConfig()); + setSourceState('agentcore-runtime'); setStep(allSteps[0]!); - }, [allSteps, setConfig, setStep]); + }, [allSteps, setSourceState, setConfig, setStep]); return { config, step, steps: allSteps, currentIndex, + source, goBack, setSkipCheck, setName, + setSource, setAgent, setEndpoint, + setLogGroupNames, + setServiceNames, setEvaluators, setSamplingRate, setEnableOnCreate, diff --git a/src/cli/tui/screens/online-insights/AddOnlineInsightsFlow.tsx b/src/cli/tui/screens/online-insights/AddOnlineInsightsFlow.tsx new file mode 100644 index 000000000..ca31cb1d1 --- /dev/null +++ b/src/cli/tui/screens/online-insights/AddOnlineInsightsFlow.tsx @@ -0,0 +1,145 @@ +import { ConfigIO } from '../../../../lib'; +import { getErrorMessage } from '../../../errors'; +import { onlineInsightsPrimitive } from '../../../primitives/registry'; +import { withCommandRunTelemetry } from '../../../telemetry/cli-command-run.js'; +import { ErrorPrompt, GradientText } from '../../components'; +import { AddSuccessScreen } from '../add/AddSuccessScreen'; +import { AddOnlineInsightsScreen } from './AddOnlineInsightsScreen'; +import type { AddOnlineInsightsConfig } from './types'; +import React, { useCallback, useEffect, useState } from 'react'; + +type FlowState = + | { name: 'loading' } + | { name: 'create-wizard'; agentNames: string[] } + | { name: 'create-success'; configName: string } + | { name: 'error'; message: string }; + +interface AddOnlineInsightsFlowProps { + isInteractive?: boolean; + onExit: () => void; + onBack: () => void; + onDev?: () => void; + onDeploy?: () => void; +} + +export function AddOnlineInsightsFlow({ + isInteractive = true, + onExit, + onBack, + onDev, + onDeploy, +}: AddOnlineInsightsFlowProps) { + const [flow, setFlow] = useState({ name: 'loading' }); + const [existingConfigNames, setExistingConfigNames] = useState([]); + + // Load project data + useEffect(() => { + if (flow.name !== 'loading') return; + let cancelled = false; + + void (async () => { + try { + const projectSpec = await new ConfigIO().readProjectSpec(); + if (cancelled) return; + + const runtimesList = projectSpec.runtimes ?? []; + const agentNames = runtimesList.map(a => a.name); + + if (agentNames.length === 0) { + setFlow({ + name: 'error', + message: 'No agents found in project. Add an agent first with `agentcore add agent`.', + }); + return; + } + + const names = await onlineInsightsPrimitive.getAllNames(); + if (cancelled) return; + setExistingConfigNames(names); + + setFlow({ name: 'create-wizard', agentNames }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); + + useEffect(() => { + if (!isInteractive && flow.name === 'create-success') { + onExit(); + } + }, [isInteractive, flow.name, onExit]); + + const handleCreateComplete = useCallback((config: AddOnlineInsightsConfig) => { + void (async () => { + try { + const addResult = await withCommandRunTelemetry( + 'add.online-insights', + { + insights_count: config.insights.length, + enable_on_create: config.enableOnCreate, + }, + () => + onlineInsightsPrimitive.add({ + name: config.name, + agent: config.agent, + insights: config.insights, + samplingRate: config.samplingRate, + clusteringFrequencies: config.clusteringFrequencies.length > 0 ? config.clusteringFrequencies : undefined, + enableOnCreate: config.enableOnCreate, + }) + ); + if (!addResult.success) { + throw new Error(addResult.error?.message ?? 'Failed to create online insights config'); + } + setFlow({ name: 'create-success', configName: config.name }); + } catch (err) { + setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + }, []); + + if (flow.name === 'loading') { + return ; + } + + if (flow.name === 'create-wizard') { + return ( + + ); + } + + if (flow.name === 'create-success') { + return ( + + ); + } + + return ( + { + setFlow({ name: 'loading' }); + }} + onExit={onExit} + /> + ); +} diff --git a/src/cli/tui/screens/online-insights/AddOnlineInsightsScreen.tsx b/src/cli/tui/screens/online-insights/AddOnlineInsightsScreen.tsx new file mode 100644 index 000000000..622fa2a52 --- /dev/null +++ b/src/cli/tui/screens/online-insights/AddOnlineInsightsScreen.tsx @@ -0,0 +1,216 @@ +import { OnlineEvalConfigNameSchema } from '../../../../schema'; +import type { SelectableItem } from '../../components'; +import { + ConfirmReview, + Panel, + Screen, + StepIndicator, + TextInput, + WizardMultiSelect, + WizardSelect, +} from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; +import { generateUniqueName } from '../../utils'; +import type { AddOnlineInsightsConfig } from './types'; +import { + AVAILABLE_INSIGHTS, + CLUSTERING_FREQUENCIES, + DEFAULT_INSIGHTS_SAMPLING_RATE, + ONLINE_INSIGHTS_STEP_LABELS, +} from './types'; +import { useAddOnlineInsightsWizard } from './useAddOnlineInsightsWizard'; +import { Box, Text } from 'ink'; +import React, { useMemo } from 'react'; + +interface AddOnlineInsightsScreenProps { + onComplete: (config: AddOnlineInsightsConfig) => void; + onExit: () => void; + existingConfigNames: string[]; + agentNames: string[]; +} + +export function AddOnlineInsightsScreen({ + onComplete, + onExit, + existingConfigNames, + agentNames, +}: AddOnlineInsightsScreenProps) { + const wizard = useAddOnlineInsightsWizard(); + + // Auto-set agent when there's only one + const effectiveConfig = useMemo(() => { + if (agentNames.length === 1 && !wizard.config.agent) { + return { ...wizard.config, agent: agentNames[0]! }; + } + return wizard.config; + }, [wizard.config, agentNames]); + + const isAgentStep = wizard.step === 'agent'; + const isInsightsStep = wizard.step === 'insights'; + const isSamplingRateStep = wizard.step === 'samplingRate'; + const isClusteringStep = wizard.step === 'clustering'; + const isNameStep = wizard.step === 'name'; + const isConfirmStep = wizard.step === 'confirm'; + + const agentItems: SelectableItem[] = useMemo(() => { + return agentNames.map(name => ({ id: name, title: name })); + }, [agentNames]); + + const insightItems: SelectableItem[] = useMemo(() => { + return AVAILABLE_INSIGHTS.map(i => ({ + id: i.id, + title: i.title, + description: i.description, + })); + }, []); + + const clusteringItems: SelectableItem[] = useMemo(() => { + return CLUSTERING_FREQUENCIES.map(f => ({ + id: f.id, + title: f.title, + description: `Cluster insights ${f.title.toLowerCase()}`, + })); + }, []); + + const agentNav = useListNavigation({ + items: agentItems, + onSelect: item => wizard.setAgent(item.id), + onExit: () => onExit(), + isActive: isAgentStep, + }); + + const insightsNav = useMultiSelectNavigation({ + items: insightItems, + getId: item => item.id, + onConfirm: ids => wizard.setInsights(ids), + onExit: () => wizard.goBack(), + isActive: isInsightsStep, + requireSelection: true, + }); + + const clusteringNav = useMultiSelectNavigation({ + items: clusteringItems, + getId: item => item.id, + onConfirm: ids => wizard.setClusteringFrequencies(ids), + onExit: () => wizard.goBack(), + isActive: isClusteringStep, + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete(effectiveConfig), + onExit: () => wizard.goBack(), + isActive: isConfirmStep, + }); + + const helpText = + isInsightsStep || isClusteringStep + ? 'Space toggle · Enter confirm · Esc back' + : isAgentStep + ? HELP_TEXT.NAVIGATE_SELECT + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : HELP_TEXT.TEXT_INPUT; + + const headerContent = ( + + ); + + return ( + + + {isAgentStep && ( + + )} + + {isInsightsStep && ( + + )} + + {isSamplingRateStep && ( + + + Percentage of agent sessions to analyze. Higher rates give better coverage but increase costs. + + { + const filtered = value.replace(/[^0-9.]/g, ''); + if (filtered !== value) setValue(filtered); + }} + onSubmit={value => { + const rate = parseFloat(value); + if (isNaN(rate) || rate < 0.01 || rate > 100) return; + wizard.setSamplingRate(rate); + }} + onCancel={() => wizard.goBack()} + customValidation={value => { + const rate = parseFloat(value); + if (isNaN(rate)) return 'Must be a number'; + if (rate < 0.01 || rate > 100) return 'Must be between 0.01 and 100'; + return true; + }} + /> + + )} + + {isClusteringStep && ( + + )} + + {isNameStep && ( + wizard.goBack()} + schema={OnlineEvalConfigNameSchema} + customValidation={value => !existingConfigNames.includes(value) || 'Config name already exists'} + /> + )} + + {isConfirmStep && ( + i.split('.').pop()!).join(', ') }, + { label: 'Sampling Rate', value: `${effectiveConfig.samplingRate}%` }, + ...(effectiveConfig.clusteringFrequencies.length > 0 + ? [{ label: 'Clustering', value: effectiveConfig.clusteringFrequencies.join(', ') }] + : []), + { label: 'Enable on Deploy', value: 'Yes' }, + ]} + /> + )} + + + ); +} diff --git a/src/cli/tui/screens/online-insights/index.ts b/src/cli/tui/screens/online-insights/index.ts new file mode 100644 index 000000000..9c49ec844 --- /dev/null +++ b/src/cli/tui/screens/online-insights/index.ts @@ -0,0 +1,2 @@ +export { AddOnlineInsightsFlow } from './AddOnlineInsightsFlow'; +export { AddOnlineInsightsScreen } from './AddOnlineInsightsScreen'; diff --git a/src/cli/tui/screens/online-insights/types.ts b/src/cli/tui/screens/online-insights/types.ts new file mode 100644 index 000000000..87217987d --- /dev/null +++ b/src/cli/tui/screens/online-insights/types.ts @@ -0,0 +1,49 @@ +// ───────────────────────────────────────────────────────────────────────────── +// Online Insights Config Flow Types +// ───────────────────────────────────────────────────────────────────────────── + +export type AddOnlineInsightsStep = 'agent' | 'insights' | 'samplingRate' | 'clustering' | 'name' | 'confirm'; + +export interface AddOnlineInsightsConfig { + name: string; + agent: string; + insights: string[]; + samplingRate: number; + clusteringFrequencies: string[]; + enableOnCreate: boolean; +} + +export const ONLINE_INSIGHTS_STEP_LABELS: Record = { + agent: 'Agent', + insights: 'Insights', + samplingRate: 'Rate', + clustering: 'Clustering', + name: 'Name', + confirm: 'Confirm', +}; + +export const DEFAULT_INSIGHTS_SAMPLING_RATE = 100; + +export const AVAILABLE_INSIGHTS = [ + { + id: 'Builtin.Insight.FailureAnalysis', + title: 'Failure Analysis', + description: 'Analyze failure patterns and root causes across sessions', + }, + { + id: 'Builtin.Insight.UserIntent', + title: 'User Intent', + description: 'Classify and cluster user intents from session transcripts', + }, + { + id: 'Builtin.Insight.ExecutionSummary', + title: 'Execution Summary', + description: 'Summarize execution patterns and tool usage across sessions', + }, +]; + +export const CLUSTERING_FREQUENCIES = [ + { id: 'DAILY', title: 'Daily' }, + { id: 'WEEKLY', title: 'Weekly' }, + { id: 'MONTHLY', title: 'Monthly' }, +]; diff --git a/src/cli/tui/screens/online-insights/useAddOnlineInsightsWizard.ts b/src/cli/tui/screens/online-insights/useAddOnlineInsightsWizard.ts new file mode 100644 index 000000000..6255663f7 --- /dev/null +++ b/src/cli/tui/screens/online-insights/useAddOnlineInsightsWizard.ts @@ -0,0 +1,106 @@ +import type { AddOnlineInsightsConfig, AddOnlineInsightsStep } from './types'; +import { DEFAULT_INSIGHTS_SAMPLING_RATE } from './types'; +import { useCallback, useState } from 'react'; + +const ALL_STEPS: AddOnlineInsightsStep[] = ['agent', 'insights', 'samplingRate', 'clustering', 'name', 'confirm']; + +function getDefaultConfig(): AddOnlineInsightsConfig { + return { + name: '', + agent: '', + insights: [], + samplingRate: DEFAULT_INSIGHTS_SAMPLING_RATE, + clusteringFrequencies: [], + enableOnCreate: true, + }; +} + +export function useAddOnlineInsightsWizard() { + const allSteps = ALL_STEPS; + const [config, setConfig] = useState(getDefaultConfig); + const [step, setStep] = useState(allSteps[0]!); + + const currentIndex = allSteps.indexOf(step); + + const nextStep = useCallback( + (currentStep: AddOnlineInsightsStep): AddOnlineInsightsStep | undefined => { + const idx = allSteps.indexOf(currentStep); + if (idx + 1 < allSteps.length) { + return allSteps[idx + 1]!; + } + return undefined; + }, + [allSteps] + ); + + const goBack = useCallback(() => { + for (let i = currentIndex - 1; i >= 0; i--) { + setStep(allSteps[i]!); + return; + } + }, [allSteps, currentIndex, setStep]); + + const setAgent = useCallback( + (agent: string) => { + setConfig(c => ({ ...c, agent })); + const next = nextStep('agent'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setInsights = useCallback( + (insights: string[]) => { + setConfig(c => ({ ...c, insights })); + const next = nextStep('insights'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setSamplingRate = useCallback( + (samplingRate: number) => { + setConfig(c => ({ ...c, samplingRate })); + const next = nextStep('samplingRate'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setClusteringFrequencies = useCallback( + (clusteringFrequencies: string[]) => { + setConfig(c => ({ ...c, clusteringFrequencies })); + const next = nextStep('clustering'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const setName = useCallback( + (name: string) => { + setConfig(c => ({ ...c, name })); + const next = nextStep('name'); + if (next) setStep(next); + }, + [nextStep, setConfig, setStep] + ); + + const reset = useCallback(() => { + setConfig(getDefaultConfig()); + setStep(allSteps[0]!); + }, [allSteps, setConfig, setStep]); + + return { + config, + step, + steps: allSteps, + currentIndex, + goBack, + setAgent, + setInsights, + setSamplingRate, + setClusteringFrequencies, + setName, + reset, + }; +} diff --git a/src/cli/tui/screens/policy/AddPolicyFlow.tsx b/src/cli/tui/screens/policy/AddPolicyFlow.tsx index 0f6758e97..dcbaeff2e 100644 --- a/src/cli/tui/screens/policy/AddPolicyFlow.tsx +++ b/src/cli/tui/screens/policy/AddPolicyFlow.tsx @@ -17,7 +17,7 @@ import { AddSuccessScreen } from '../add/AddSuccessScreen'; import { POLICY_ENGINE_MODE_OPTIONS } from '../mcp/types'; import { AddPolicyEngineScreen } from './AddPolicyEngineScreen'; import { AddPolicyScreen } from './AddPolicyScreen'; -import type { AddPolicyConfig, AddPolicyEngineConfig } from './types'; +import { type AddPolicyConfig, type AddPolicyEngineConfig, authorizationPhaseForEffect } from './types'; import { Box, Text } from 'ink'; import React, { useCallback, useEffect, useMemo, useState } from 'react'; @@ -40,6 +40,7 @@ type FlowState = preSelectedEngine: string; isEngineDeployed: boolean; deployedGateways: Record; + projectGateways: { name: string; httpTargets: string[] }[]; } | { name: 'engine-success'; engineName: string } | { name: 'policy-success'; policyName: string; engineName: string } @@ -58,6 +59,7 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD const [engineNames, setEngineNames] = useState([]); const [policyNames, setPolicyNames] = useState([]); const [hasUnprotectedGateways, setHasUnprotectedGateways] = useState(false); + const [policyAddInFlight, setPolicyAddInFlight] = useState(false); const [pendingEngineName, setPendingEngineName] = useState(); const engineSteps = useMemo(() => { @@ -88,11 +90,11 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD }; }, []); - // In non-interactive mode, exit after success + // In non-interactive mode, show success screen and let user dismiss with Esc/Ctrl+C useEffect(() => { if (!isInteractive) { if (flow.name === 'engine-success' || flow.name === 'policy-success') { - onExit(); + // Success screen renders with exit instructions — user presses Esc/Ctrl+C } } }, [isInteractive, flow.name, onExit]); @@ -116,15 +118,17 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD setFlow({ name: 'engine-wizard' }); } else { setFlow({ name: 'loading' }); - const [deployedId, deployedGateways] = await Promise.all([ + const [deployedId, deployedGateways, projectGateways] = await Promise.all([ policyEnginePrimitive.getDeployedEngineId(item.id), policyEnginePrimitive.getDeployedGateways(), + policyEnginePrimitive.getProjectGateways(), ]); setFlow({ name: 'policy-wizard', preSelectedEngine: item.id, isEngineDeployed: deployedId !== null && Object.keys(deployedGateways).length > 0, deployedGateways, + projectGateways, }); } }, []); @@ -163,46 +167,59 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD [commitEngine] ); - const handlePolicyComplete = useCallback(async (config: AddPolicyConfig) => { - const result = await withCommandRunTelemetry( - 'add.policy', - { - policy_attr_source_type: config.sourceFile - ? 'file' - : config.sourceMethod === 'generate' - ? 'generate' - : 'statement', - policy_validation_mode: standardize(PolicyValidationMode, config.validationMode ?? 'FAIL_ON_ANY_FINDINGS'), - }, - () => - policyPrimitive.add({ - name: config.name, - engine: config.engine, - statement: config.statement, - source: config.sourceFile || undefined, - validationMode: config.validationMode, - }) - ); + const handlePolicyComplete = useCallback( + async (config: AddPolicyConfig) => { + if (policyAddInFlight) return; + setPolicyAddInFlight(true); + const result = await withCommandRunTelemetry( + 'add.policy', + { + policy_attr_source_type: config.sourceFile + ? 'file' + : config.sourceMethod === 'generate' + ? 'generate' + : 'statement', + policy_validation_mode: standardize(PolicyValidationMode, config.validationMode ?? 'FAIL_ON_ANY_FINDINGS'), + }, + () => + policyPrimitive.add({ + name: config.name, + engine: config.engine, + statement: config.statement, + source: config.sourceFile || undefined, + validationMode: config.validationMode, + enforcementMode: config.enforcementMode, + // Output-phase effects (suppressOutput) must register on RETURN_OUTPUT. The + // effect is only known for the form source; other sources stay on INITIATE. + authorizationPhase: + config.sourceMethod === 'form' ? authorizationPhaseForEffect(config.guardrailForm.effect) : 'INITIATE', + }) + ); - if (result.success) { - setPolicyNames(prev => [...prev, config.name]); - setFlow({ name: 'policy-success', policyName: config.name, engineName: config.engine }); - } else { - setFlow({ name: 'error', message: result.error.message }); - } - }, []); + if (result.success) { + setPolicyNames(prev => [...prev, config.name]); + setFlow({ name: 'policy-success', policyName: config.name, engineName: config.engine }); + } else { + setPolicyAddInFlight(false); + setFlow({ name: 'error', message: result.error.message }); + } + }, + [policyAddInFlight] + ); const handleAddPolicyToNewEngine = useCallback(async (engineName: string) => { setFlow({ name: 'loading' }); - const [deployedId, deployedGateways] = await Promise.all([ + const [deployedId, deployedGateways, projectGateways] = await Promise.all([ policyEnginePrimitive.getDeployedEngineId(engineName), policyEnginePrimitive.getDeployedGateways(), + policyEnginePrimitive.getProjectGateways(), ]); setFlow({ name: 'policy-wizard', preSelectedEngine: engineName, isEngineDeployed: deployedId !== null && Object.keys(deployedGateways).length > 0, deployedGateways, + projectGateways, }); }, []); @@ -255,6 +272,7 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD preSelectedEngine={flow.preSelectedEngine} isEngineDeployed={flow.isEngineDeployed} deployedGateways={flow.deployedGateways} + projectGateways={flow.projectGateways} onComplete={(config: AddPolicyConfig) => void handlePolicyComplete(config)} onExit={() => setFlow({ name: 'select' })} /> @@ -361,7 +379,7 @@ export function AddPolicyFlow({ isInteractive = true, onExit, onBack, onDev, onD agentcore/agentcore.json{' '} - Cedar policy added to engine {flow.engineName} + Policy added to engine {flow.engineName} diff --git a/src/cli/tui/screens/policy/AddPolicyScreen.tsx b/src/cli/tui/screens/policy/AddPolicyScreen.tsx index 38005228d..13ee435f6 100644 --- a/src/cli/tui/screens/policy/AddPolicyScreen.tsx +++ b/src/cli/tui/screens/policy/AddPolicyScreen.tsx @@ -1,14 +1,33 @@ import { PolicyNameSchema } from '../../../../schema'; import { detectRegion } from '../../../aws'; import { getPolicyGeneration, startPolicyGeneration } from '../../../aws/policy-generation'; +import { isGatedFeaturesEnabled } from '../../../feature-flags'; import { policyEnginePrimitive } from '../../../primitives/registry'; -import { ConfirmReview, Panel, PathInput, Screen, StepIndicator, TextInput, WizardSelect } from '../../components'; +import { + ConfirmReview, + Panel, + PathInput, + Screen, + StepIndicator, + TextInput, + WizardMultiSelect, + WizardSelect, +} from '../../components'; import type { SelectableItem } from '../../components'; import { HELP_TEXT } from '../../constants'; -import { useListNavigation } from '../../hooks'; +import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; import { generateUniqueName } from '../../utils'; -import type { AddPolicyConfig, PolicySourceMethod } from './types'; -import { POLICY_SOURCE_METHOD_OPTIONS, POLICY_STEP_LABELS, VALIDATION_MODE_OPTIONS } from './types'; +import { synthesizeCedar } from './synthesize-cedar'; +import type { AddPolicyConfig, GuardrailCategoryType, PolicyEffect, PolicySourceMethod } from './types'; +import { + ENFORCEMENT_MODE_OPTIONS, + GUARDRAIL_CATEGORY_OPTIONS, + POLICY_EFFECT_OPTIONS, + POLICY_SOURCE_METHOD_OPTIONS, + POLICY_STEP_LABELS, + VALIDATION_MODE_OPTIONS, + defaultDataPathForEffect, +} from './types'; import { useAddPolicyWizard } from './useAddPolicyWizard'; import { Box, Text } from 'ink'; import Spinner from 'ink-spinner'; @@ -22,6 +41,8 @@ interface AddPolicyScreenProps { preSelectedEngine?: string; isEngineDeployed?: boolean; deployedGateways?: Record; + /** Gateways from agentcore.json with their mcpServer target names */ + projectGateways?: { name: string; httpTargets: string[] }[]; } export function AddPolicyScreen({ @@ -32,8 +53,9 @@ export function AddPolicyScreen({ preSelectedEngine, isEngineDeployed = false, deployedGateways = {}, + projectGateways = [], }: AddPolicyScreenProps) { - const wizard = useAddPolicyWizard(preSelectedEngine); + const wizard = useAddPolicyWizard(preSelectedEngine, Object.keys(deployedGateways).length > 0); // Generation state const [generatedPolicy, setGeneratedPolicy] = useState(null); @@ -54,11 +76,13 @@ export function AddPolicyScreen({ () => POLICY_SOURCE_METHOD_OPTIONS.map(opt => { const isGenerate = opt.id === 'generate'; - const disabled = isGenerate && !isEngineDeployed; + // Guardrail form is gated behind ENABLE_GATED_FEATURES. + const gated = opt.id === 'form' && !isGatedFeaturesEnabled(); + const disabled = gated || (isGenerate && !isEngineDeployed); return { id: opt.id, title: opt.title, - description: disabled ? 'Deploy engine first' : opt.description, + description: gated ? 'Coming soon' : disabled ? 'Deploy engine first' : opt.description, disabled, }; }), @@ -83,6 +107,7 @@ export function AddPolicyScreen({ const isFirstStep = wizard.currentIndex === 0; const goBackOrExit = isFirstStep ? onExit : () => wizard.goBack(); + const isGatewaySelectStep = wizard.step === 'gateway'; const isEngineStep = wizard.step === 'engine'; const isNameStep = wizard.step === 'name'; const isSourceMethodStep = wizard.step === 'source-method'; @@ -92,9 +117,52 @@ export function AddPolicyScreen({ const isGenerateDescriptionStep = wizard.step === 'source-generate-description'; const isGenerateLoadingStep = wizard.step === 'source-generate-loading'; const isGenerateReviewStep = wizard.step === 'source-generate-review'; + const isFormCategoryStep = wizard.step === 'source-form-category'; + const isFormFiltersStep = wizard.step === 'source-form-filters'; + const isFormDataPathStep = wizard.step === 'source-form-data-path'; + const isFormEffectStep = wizard.step === 'source-form-effect'; + const isFormReviewStep = wizard.step === 'source-form-review'; const isValidationStep = wizard.step === 'validation-mode'; + const isEnforcementStep = wizard.step === 'enforcement-mode'; const isConfirmStep = wizard.step === 'confirm'; + // ─── Standard navigation hooks ──────────────────────────────────────────────── + + const hasGateways = Object.keys(deployedGateways).length > 0; + + const deployedGatewayItems: SelectableItem[] = useMemo( + () => + Object.entries(deployedGateways).map(([name, arn]) => ({ + id: name, + title: name, + description: arn.split(':').slice(-1)[0], + })), + [deployedGateways] + ); + + const gatewaySelectNav = useListNavigation({ + items: deployedGatewayItems, + onSelect: item => wizard.setGatewayForPolicy(item.id), + onExit: goBackOrExit, + isActive: isGatewaySelectStep && hasGateways, + }); + + // Target items based on selected gateway + const isTargetStep = wizard.step === 'target'; + + const targetItems: SelectableItem[] = useMemo(() => { + const gw = projectGateways.find(g => g.name === wizard.config.gatewayName); + if (!gw) return []; + return gw.httpTargets.map(t => ({ id: t, title: t, description: 'HTTP runtime target' })); + }, [projectGateways, wizard.config.gatewayName]); + + const targetNav = useListNavigation({ + items: targetItems, + onSelect: item => wizard.setTargetForPolicy(item.id), + onExit: goBackOrExit, + isActive: isTargetStep, + }); + const engineNav = useListNavigation({ items: engineItems, onSelect: item => wizard.setEngine(item.id), @@ -126,6 +194,18 @@ export function AddPolicyScreen({ isActive: isValidationStep, }); + const enforcementModeItems = useMemo( + () => ENFORCEMENT_MODE_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), + [] + ); + + const enforcementNav = useListNavigation({ + items: enforcementModeItems, + onSelect: item => wizard.setEnforcementMode(item.id), + onExit: goBackOrExit, + isActive: isEnforcementStep, + }); + useListNavigation({ items: [{ id: 'confirm', title: 'Confirm' }], onSelect: () => onComplete(wizard.config), @@ -133,7 +213,101 @@ export function AddPolicyScreen({ isActive: isConfirmStep, }); - // Handle generation review: accept or go back + // ─── Form mode: Category select ────────────────────────────────────────────── + + const categoryItems: SelectableItem[] = useMemo( + () => + GUARDRAIL_CATEGORY_OPTIONS.map(opt => ({ + id: opt.id, + title: opt.title, + description: opt.description, + })), + [] + ); + + const categoryNav = useListNavigation({ + items: categoryItems, + onSelect: item => wizard.setFormCategory(item.id as GuardrailCategoryType), + onExit: goBackOrExit, + isActive: isFormCategoryStep, + }); + + // ─── Form mode: Effect select (permit/forbid) ────────────────────────────── + + const effectItems: SelectableItem[] = useMemo( + () => POLICY_EFFECT_OPTIONS.map(opt => ({ id: opt.id, title: opt.title, description: opt.description })), + [] + ); + + const effectNav = useListNavigation({ + items: effectItems, + onSelect: item => wizard.setFormEffect(item.id as PolicyEffect), + onExit: goBackOrExit, + isActive: isFormEffectStep, + }); + + // ─── Form mode: Filter multi-select ─────────────────────────────────────────── + + const filterItems: SelectableItem[] = useMemo(() => { + const cat = wizard.config.guardrailForm.category; + if (!cat) return []; + const opt = GUARDRAIL_CATEGORY_OPTIONS.find(o => o.id === cat); + if (!opt) return []; + return opt.filters.map(f => ({ id: f, title: f })); + }, [wizard.config.guardrailForm.category]); + + const filterNav = useMultiSelectNavigation({ + items: filterItems, + getId: item => item.id, + onConfirm: ids => { + if (ids.length > 0) { + wizard.setFormFilters(ids); + } + }, + onExit: goBackOrExit, + isActive: isFormFiltersStep, + requireSelection: true, + }); + + // ─── Form mode: Review ──────────────────────────────────────────────────────── + + const formCedar = useMemo(() => { + if (!isFormReviewStep) return ''; + return synthesizeCedar(wizard.config.guardrailForm, { + targetName: wizard.config.targetName ?? undefined, + gatewayArn: deployedGateways[wizard.config.gatewayName] ?? undefined, + }); + }, [ + isFormReviewStep, + wizard.config.guardrailForm, + wizard.config.targetName, + wizard.config.gatewayName, + deployedGateways, + ]); + + const formReviewItems: SelectableItem[] = useMemo( + () => [ + { id: 'accept', title: 'Accept policy', description: 'Use this generated policy' }, + { id: 'edit', title: 'Edit selections', description: 'Go back and change filters/thresholds' }, + ], + [] + ); + + const formReviewNav = useListNavigation({ + items: formReviewItems, + onSelect: item => { + if (item.id === 'accept') { + wizard.acceptFormReview(formCedar); + } else { + wizard.goBack(); + } + }, + onExit: goBackOrExit, + isActive: isFormReviewStep, + }); + + // ─── Generate mode: Review ──────────────────────────────────────────────────── + const reviewItems: SelectableItem[] = useMemo( () => [ { id: 'accept', title: 'Accept generated policy', description: 'Use this policy' }, @@ -168,14 +342,12 @@ export function AddPolicyScreen({ isActive: isGenerateReviewStep && !generationError, }); - // Real policy generation when entering the loading step + // ─── Generate mode: Loading effect ──────────────────────────────────────────── + useEffect(() => { if (!isGenerateLoadingStep) return undefined; if (skipGeneration.current) { skipGeneration.current = false; - // Navigate back past the loading step to the description step. - // This runs after React re-rendered with the loading step active, - // so goBack() correctly sees 'source-generate-loading' as current step. wizard.goBack(); return undefined; } @@ -186,8 +358,6 @@ export function AddPolicyScreen({ try { const regionResult = await detectRegion(); const region = regionResult.region; - - // policyEngineId is needed; get it from deployed state const policyEngineId = await policyEnginePrimitive.getDeployedEngineId(wizard.config.engine); if (!policyEngineId) { @@ -227,7 +397,6 @@ export function AddPolicyScreen({ } void generate(); - return () => { cancelled = true; }; @@ -239,32 +408,75 @@ export function AddPolicyScreen({ wizard, ]); - // Determine help text + // ─── Help text ──────────────────────────────────────────────────────────────── + const helpText: string = - isEngineStep || isSourceMethodStep || isValidationStep || isGenerateReviewStep || isGatewayStep + isEngineStep || + isSourceMethodStep || + isValidationStep || + isGenerateReviewStep || + isGatewayStep || + isFormCategoryStep || + isFormReviewStep || + isFormEffectStep || + isGatewaySelectStep || + isTargetStep || + isEnforcementStep ? HELP_TEXT.NAVIGATE_SELECT - : isConfirmStep - ? HELP_TEXT.CONFIRM_CANCEL - : isGenerateLoadingStep - ? HELP_TEXT.BACK - : HELP_TEXT.TEXT_INPUT; + : isFormFiltersStep + ? 'Space toggle · Enter confirm · Esc back' + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : isGenerateLoadingStep + ? HELP_TEXT.BACK + : HELP_TEXT.TEXT_INPUT; const headerContent = ; const validationModeLabel = wizard.config.validationMode === 'FAIL_ON_ANY_FINDINGS' ? 'Fail on any findings' : 'Ignore all findings'; - // Determine the cedar source display for confirm screen const cedarSourceDisplay = wizard.config.sourceMethod === 'file' ? wizard.config.sourceFile : wizard.config.sourceMethod === 'generate' ? `Generated from: "${wizard.config.naturalLanguageDescription}"` - : '(inline statement)'; + : wizard.config.sourceMethod === 'form' + ? `Form: ${wizard.config.guardrailForm.category} (${wizard.config.guardrailForm.filters.length} filters)` + : '(inline statement)'; + + // ─── Render ─────────────────────────────────────────────────────────────────── return ( + {isGatewaySelectStep && hasGateways && ( + + )} + + {isGatewaySelectStep && !hasGateways && ( + + No deployed gateways found. + + Run `agentcore deploy` to deploy a gateway first. + + + )} + + {isTargetStep && ( + + )} + {isEngineStep && ( @@ -306,7 +518,7 @@ export function AddPolicyScreen({ {isSourceInlineStep && ( - Generating Cedar policy from description... + Generating policy from description... “{wizard.config.naturalLanguageDescription}” @@ -356,7 +568,7 @@ export function AddPolicyScreen({ {isGenerateReviewStep && generatedPolicy && !generationError && ( - Generated Cedar policy: + Generated policy: {generatedPolicy.split('\n').map((line, i) => ( @@ -373,10 +585,81 @@ export function AddPolicyScreen({ )} + {isFormEffectStep && ( + + )} + + {isFormCategoryStep && ( + + )} + + {isFormFiltersStep && ( + + )} + + {isFormDataPathStep && ( + + )} + + {isFormReviewStep && ( + + Generated policy from guardrail form: + + {formCedar.split('\n').map((line, i) => ( + + {line} + + ))} + + + Authorization phase: INITIATE (default) + + + + )} + + {isEnforcementStep && ( + + )} + {isValidationStep && ( @@ -387,7 +670,8 @@ export function AddPolicyScreen({ fields={[ { label: 'Engine', value: wizard.config.engine }, { label: 'Name', value: wizard.config.name }, - { label: 'Cedar source', value: cedarSourceDisplay }, + { label: 'Policy source', value: cedarSourceDisplay }, + { label: 'Enforcement', value: wizard.config.enforcementMode === 'ACTIVE' ? 'Active' : 'Log only' }, { label: 'Validation', value: validationModeLabel }, ]} /> diff --git a/src/cli/tui/screens/policy/__tests__/synthesize-cedar.test.ts b/src/cli/tui/screens/policy/__tests__/synthesize-cedar.test.ts new file mode 100644 index 000000000..3f4324f2c --- /dev/null +++ b/src/cli/tui/screens/policy/__tests__/synthesize-cedar.test.ts @@ -0,0 +1,128 @@ +import { synthesizeCedar } from '../synthesize-cedar.js'; +import type { GuardrailFormConfig } from '../types.js'; +import { describe, expect, it } from 'vitest'; + +describe('synthesizeCedar', () => { + const baseForm: GuardrailFormConfig = { + category: 'contentFilter', + filters: ['VIOLENCE'], + effect: 'forbid', + dataPath: 'context.input.prompt', + }; + + it('returns comment when no category is set', () => { + const form: GuardrailFormConfig = { category: null, filters: [], effect: 'forbid', dataPath: '' }; + expect(synthesizeCedar(form)).toBe('// No guardrail rules configured'); + }); + + it('returns comment when filters are empty', () => { + const form: GuardrailFormConfig = { category: 'contentFilter', filters: [], effect: 'forbid', dataPath: '' }; + expect(synthesizeCedar(form)).toBe('// No guardrail rules configured'); + }); + + it('generates forbid policy with single content filter', () => { + const result = synthesizeCedar(baseForm); + expect(result).toContain('forbid (principal, action, resource is AgentCore::Gateway)'); + expect(result).toContain('BedrockGuardrails::ContentFilter'); + expect(result).toContain('["VIOLENCE"]'); + expect(result).toContain('["VIOLENCE"].confidenceScore'); + expect(result).toContain('.greaterThan(decimal("0.2"))'); + expect(result).toContain('[context.input.prompt]'); + }); + + it('generates permit policy with single filter using lessThanOrEqual', () => { + const form: GuardrailFormConfig = { ...baseForm, effect: 'permit' }; + const result = synthesizeCedar(form); + expect(result).toContain('permit (principal, action, resource is AgentCore::Gateway)'); + expect(result).toContain('.lessThanOrEqual(decimal("0.2"))'); + }); + + it('uses maxConfidenceScore for multiple filters', () => { + const form: GuardrailFormConfig = { ...baseForm, filters: ['VIOLENCE', 'HATE'] }; + const result = synthesizeCedar(form); + expect(result).toContain('["VIOLENCE", "HATE"]'); + expect(result).toContain('.maxConfidenceScore().greaterThan(decimal("0.2"))'); + expect(result).not.toContain('confidenceScore'); + }); + + it('uses promptAttack function and threshold', () => { + const form: GuardrailFormConfig = { + category: 'promptAttack', + filters: ['JAILBREAK'], + effect: 'forbid', + dataPath: 'context.input.prompt', + }; + const result = synthesizeCedar(form); + expect(result).toContain('BedrockGuardrails::PromptAttack'); + expect(result).toContain('.greaterThan(decimal("0.4"))'); + }); + + it('uses sensitiveInformation function and threshold', () => { + const form: GuardrailFormConfig = { + category: 'sensitiveInformation', + filters: ['EMAIL', 'PHONE'], + effect: 'forbid', + dataPath: 'context.input.prompt', + }; + const result = synthesizeCedar(form); + expect(result).toContain('BedrockGuardrails::SensitiveInformation'); + expect(result).toContain('["EMAIL", "PHONE"]'); + expect(result).toContain('.maxConfidenceScore().greaterThan(decimal("0.2"))'); + }); + + it('includes targetName in action reference when provided', () => { + const result = synthesizeCedar(baseForm, { targetName: 'my-target' }); + expect(result).toContain('action == AgentCore::Action::"my-target___POST:/invocations"'); + }); + + it('includes gatewayArn in resource reference when provided', () => { + const result = synthesizeCedar(baseForm, { gatewayArn: 'arn:aws:agentcore:us-east-1:123456:gateway/gw-123' }); + expect(result).toContain('resource == AgentCore::Gateway::"arn:aws:agentcore:us-east-1:123456:gateway/gw-123"'); + }); + + it('includes both targetName and gatewayArn when provided', () => { + const result = synthesizeCedar(baseForm, { + targetName: 'prod', + gatewayArn: 'arn:aws:agentcore:us-east-1:123456:gateway/gw-abc', + }); + expect(result).toContain('action == AgentCore::Action::"prod___POST:/invocations"'); + expect(result).toContain('resource == AgentCore::Gateway::"arn:aws:agentcore:us-east-1:123456:gateway/gw-abc"'); + }); + + it('uses custom dataPath', () => { + const form: GuardrailFormConfig = { ...baseForm, dataPath: 'context.output.response' }; + const result = synthesizeCedar(form); + expect(result).toContain('[context.output.response]'); + }); + + it('generates suppressOutput policy using greaterThan and an output data path by default', () => { + const form: GuardrailFormConfig = { ...baseForm, effect: 'suppressOutput', dataPath: '' }; + const result = synthesizeCedar(form); + expect(result).toContain('suppressOutput (principal, action, resource is AgentCore::Gateway)'); + expect(result).toContain('.greaterThan(decimal("0.2"))'); + // suppressOutput evaluates the model response, so it defaults to context.output.* + expect(result).toContain('[context.output.prompt]'); + expect(result).not.toContain('[context.input.prompt]'); + }); + + it('respects an explicit dataPath for suppressOutput', () => { + const form: GuardrailFormConfig = { + ...baseForm, + effect: 'suppressOutput', + dataPath: 'context.output.response', + }; + const result = synthesizeCedar(form); + expect(result).toContain('[context.output.response]'); + }); + + it('uses maxConfidenceScore + greaterThan for multi-filter suppressOutput', () => { + const form: GuardrailFormConfig = { + ...baseForm, + effect: 'suppressOutput', + filters: ['VIOLENCE', 'HATE'], + dataPath: '', + }; + const result = synthesizeCedar(form); + expect(result).toContain('.maxConfidenceScore().greaterThan(decimal("0.2"))'); + }); +}); diff --git a/src/cli/tui/screens/policy/__tests__/useAddPolicyWizard.render.test.tsx b/src/cli/tui/screens/policy/__tests__/useAddPolicyWizard.render.test.tsx new file mode 100644 index 000000000..456e42c51 --- /dev/null +++ b/src/cli/tui/screens/policy/__tests__/useAddPolicyWizard.render.test.tsx @@ -0,0 +1,76 @@ +// Render-level tests for the add-policy wizard hook. These mount the real hook +// and assert its live step state — specifically that with no deployed gateways +// the wizard opens on a usable step (never the "No deployed gateways" dead-end) +// and can advance through the policy steps without ever hitting gateway/target. +import type { AddPolicyStep } from '../types'; +import { useAddPolicyWizard } from '../useAddPolicyWizard'; +import { Text } from 'ink'; +import { render } from 'ink-testing-library'; +import React, { act, useImperativeHandle } from 'react'; +import { describe, expect, it } from 'vitest'; + +type WizardReturn = ReturnType; + +interface HarnessHandle { + wizard: WizardReturn; +} + +interface HarnessProps { + preSelectedEngine?: string; + hasDeployedGateways: boolean; +} + +const Harness = React.forwardRef(({ preSelectedEngine, hasDeployedGateways }, ref) => { + const wizard = useAddPolicyWizard(preSelectedEngine, hasDeployedGateways); + useImperativeHandle(ref, () => ({ wizard }), [wizard]); + return ( + + step:{wizard.step} steps:{wizard.steps.join(',')} + + ); +}); +Harness.displayName = 'Harness'; + +function mount(props: HarnessProps) { + const ref = React.createRef(); + const { lastFrame } = render(); + return { ref, lastFrame }; +} + +describe('useAddPolicyWizard — gateway/target skipping', () => { + it('opens on a non-gateway step when no gateway is deployed', () => { + const { ref, lastFrame } = mount({ hasDeployedGateways: false }); + // Must NOT open on the gateway step (the dead-end when nothing is deployed). + expect(ref.current!.wizard.step).not.toBe('gateway'); + expect(ref.current!.wizard.step).toBe('engine'); + expect(ref.current!.wizard.steps).not.toContain('gateway'); + expect(ref.current!.wizard.steps).not.toContain('target'); + expect(lastFrame()).toContain('step:engine'); + }); + + it('opens on the gateway step when a gateway is deployed', () => { + const { ref } = mount({ hasDeployedGateways: true }); + expect(ref.current!.wizard.step).toBe('gateway'); + expect(ref.current!.wizard.steps).toContain('gateway'); + expect(ref.current!.wizard.steps).toContain('target'); + }); + + it('advances engine -> name -> source-method without touching gateway/target when none deployed', () => { + const { ref } = mount({ hasDeployedGateways: false }); + expect(ref.current!.wizard.step).toBe('engine'); + + act(() => ref.current!.wizard.setEngine('eng')); + expect(ref.current!.wizard.step).toBe('name'); + + act(() => ref.current!.wizard.setName('p1')); + expect(ref.current!.wizard.step).toBe('source-method'); + + // Picking the inline source advances into its step — no gateway prompt in between. + act(() => ref.current!.wizard.setSourceMethod('inline')); + expect(ref.current!.wizard.step).toBe('source-inline'); + + const seen: AddPolicyStep[] = ref.current!.wizard.steps; + expect(seen).not.toContain('gateway'); + expect(seen).not.toContain('target'); + }); +}); diff --git a/src/cli/tui/screens/policy/synthesize-cedar.ts b/src/cli/tui/screens/policy/synthesize-cedar.ts new file mode 100644 index 000000000..c2c5da381 --- /dev/null +++ b/src/cli/tui/screens/policy/synthesize-cedar.ts @@ -0,0 +1,58 @@ +import { type GuardrailCategoryType, type GuardrailFormConfig, defaultDataPathForEffect } from './types'; + +const GUARDRAIL_FUNCTION_MAP: Record = { + contentFilter: 'BedrockGuardrails::ContentFilter', + promptAttack: 'BedrockGuardrails::PromptAttack', + sensitiveInformation: 'BedrockGuardrails::SensitiveInformation', +}; + +// Default thresholds per category type +const DEFAULT_THRESHOLDS: Record = { + contentFilter: 0.2, + promptAttack: 0.4, + sensitiveInformation: 0.2, +}; + +/** + * Synthesize a Cedar policy from a guardrail form config. + * + * Single filter: ...["FILTER"].confidenceScore.greaterThan(decimal("0.4")) + * Multiple filters (forbid/suppressOutput): ...maxConfidenceScore().greaterThan(decimal("0.4")) + * Multiple filters (permit): ...maxConfidenceScore().lessThanOrEqual(decimal("0.4")) + * + * `suppressOutput` is an output-phase forbid: it evaluates `context.output.*` and + * blocks the model response when the score exceeds the threshold (greaterThan), + * so it shares forbid's comparator but defaults to an output data path. + */ +export interface SynthesizeCedarOptions { + targetName?: string; + gatewayArn?: string; +} + +export function synthesizeCedar(form: GuardrailFormConfig, options: SynthesizeCedarOptions = {}): string { + if (!form.category || form.filters.length === 0) { + return '// No guardrail rules configured'; + } + + const { targetName, gatewayArn } = options; + const fn = GUARDRAIL_FUNCTION_MAP[form.category]; + const gwRef = gatewayArn ? `resource == AgentCore::Gateway::"${gatewayArn}"` : 'resource is AgentCore::Gateway'; + const actionRef = targetName ? `action == AgentCore::Action::"${targetName}___POST:/invocations"` : 'action'; + const dataPath = form.dataPath || defaultDataPathForEffect(form.effect); + const threshold = DEFAULT_THRESHOLDS[form.category]; + // permit allows below threshold; forbid and suppressOutput block above it. + const comparator = form.effect === 'permit' ? 'lessThanOrEqual' : 'greaterThan'; + const filterList = `[${form.filters.map(f => `"${f}"`).join(', ')}]`; + + let scoreExpr: string; + if (form.filters.length === 1) { + scoreExpr = `["${form.filters[0]}"].confidenceScore`; + } else { + scoreExpr = '.maxConfidenceScore()'; + } + + return ( + `${form.effect} (principal, ${actionRef}, ${gwRef})\n` + + `when guardrails { ${fn}(${filterList}, [${dataPath}])${scoreExpr}.${comparator}(decimal("${threshold.toFixed(1)}")) };` + ); +} diff --git a/src/cli/tui/screens/policy/types.ts b/src/cli/tui/screens/policy/types.ts index 8d7066e7b..b1bceaf5b 100644 --- a/src/cli/tui/screens/policy/types.ts +++ b/src/cli/tui/screens/policy/types.ts @@ -4,9 +4,129 @@ export type PolicyResourceType = 'policy-engine' | 'policy'; -export type PolicySourceMethod = 'file' | 'inline' | 'generate'; +export type PolicySourceMethod = 'file' | 'inline' | 'generate' | 'form'; + +// ───────────────────────────────────────────────────────────────────────────── +// Guardrail Model (matches KobaPolicyEvaluator Smithy model) +// ───────────────────────────────────────────────────────────────────────────── + +export type GuardrailCategoryType = 'contentFilter' | 'promptAttack' | 'sensitiveInformation'; + +export const CONTENT_FILTER_FILTERS = ['VIOLENCE', 'HATE', 'SEXUAL', 'MISCONDUCT', 'INSULT'] as const; +export type ContentFilterCategory = (typeof CONTENT_FILTER_FILTERS)[number]; + +export const PROMPT_ATTACK_FILTERS = ['JAILBREAK', 'PROMPT_INJECTION', 'PROMPT_LEAKAGE'] as const; +export type PromptAttackCategory = (typeof PROMPT_ATTACK_FILTERS)[number]; + +export const SENSITIVE_INFO_FILTERS = [ + 'ADDRESS', + 'AGE', + 'AWS_ACCESS_KEY', + 'AWS_SECRET_KEY', + 'CA_HEALTH_NUMBER', + 'CA_SOCIAL_INSURANCE_NUMBER', + 'CREDIT_DEBIT_CARD_CVV', + 'CREDIT_DEBIT_CARD_EXPIRY', + 'CREDIT_DEBIT_CARD_NUMBER', + 'DRIVER_ID', + 'EMAIL', + 'INTERNATIONAL_BANK_ACCOUNT_NUMBER', + 'IP_ADDRESS', + 'LICENSE_PLATE', + 'MAC_ADDRESS', + 'NAME', + 'PASSWORD', + 'PHONE', + 'PIN', + 'SWIFT_CODE', + 'UK_NATIONAL_HEALTH_SERVICE_NUMBER', + 'UK_NATIONAL_INSURANCE_NUMBER', + 'UK_UNIQUE_TAXPAYER_REFERENCE_NUMBER', + 'URL', + 'USERNAME', + 'US_BANK_ACCOUNT_NUMBER', + 'US_BANK_ROUTING_NUMBER', + 'US_INDIVIDUAL_TAX_IDENTIFICATION_NUMBER', + 'US_PASSPORT_NUMBER', + 'US_SOCIAL_SECURITY_NUMBER', + 'VEHICLE_IDENTIFICATION_NUMBER', +] as const; +export type SensitiveInformationEntityType = (typeof SENSITIVE_INFO_FILTERS)[number]; + +export type GuardrailFilter = ContentFilterCategory | PromptAttackCategory | SensitiveInformationEntityType; + +export const GUARDRAIL_CATEGORY_OPTIONS: { + id: GuardrailCategoryType; + title: string; + description: string; + filters: readonly string[]; +}[] = [ + { + id: 'contentFilter', + title: 'Content Filter', + description: 'Violence, hate, sexual, misconduct, insults', + filters: CONTENT_FILTER_FILTERS, + }, + { + id: 'promptAttack', + title: 'Prompt Attack', + description: 'Jailbreak, injection, leakage', + filters: PROMPT_ATTACK_FILTERS, + }, + { + id: 'sensitiveInformation', + title: 'Sensitive Information', + description: 'PII & credentials detection', + filters: SENSITIVE_INFO_FILTERS, + }, +]; + +export type PolicyEffect = 'permit' | 'forbid' | 'suppressOutput'; + +/** + * `permit`/`forbid` evaluate at request time (INITIATE phase) against input data. + * `suppressOutput` is an output-phase effect: it evaluates the model's response + * (RETURN_OUTPUT phase) against `context.output.*` and blocks the response when a + * guardrail trips. The Koba registry infers the phase from the effect keyword and + * rejects input data paths for `suppressOutput`. + */ +export const POLICY_EFFECT_OPTIONS: { id: PolicyEffect; title: string; description: string }[] = [ + { id: 'forbid', title: 'Forbid', description: 'Block requests that exceed threshold (greaterThan)' }, + { id: 'permit', title: 'Permit', description: 'Allow requests that fall below threshold (lessThan)' }, + { + id: 'suppressOutput', + title: 'Suppress Output', + description: "Block the model's response when output exceeds threshold (greaterThan)", + }, +]; + +/** Effects that evaluate the model output (RETURN_OUTPUT phase) rather than the request. */ +export const OUTPUT_PHASE_EFFECTS: readonly PolicyEffect[] = ['suppressOutput']; + +export const DEFAULT_INPUT_DATA_PATH = 'context.input.prompt'; +export const DEFAULT_OUTPUT_DATA_PATH = 'context.output.prompt'; + +/** The authorization phase a given effect must be registered under. */ +export function authorizationPhaseForEffect(effect: PolicyEffect): 'INITIATE' | 'RETURN_OUTPUT' { + return OUTPUT_PHASE_EFFECTS.includes(effect) ? 'RETURN_OUTPUT' : 'INITIATE'; +} + +/** The default data path to suggest for a given effect. */ +export function defaultDataPathForEffect(effect: PolicyEffect): string { + return OUTPUT_PHASE_EFFECTS.includes(effect) ? DEFAULT_OUTPUT_DATA_PATH : DEFAULT_INPUT_DATA_PATH; +} + +/** Form config: selected category, chosen filters, effect, and data path */ +export interface GuardrailFormConfig { + category: GuardrailCategoryType | null; + filters: string[]; + effect: PolicyEffect; + dataPath: string; +} export type AddPolicyStep = + | 'gateway' + | 'target' | 'engine' | 'name' | 'source-method' @@ -16,6 +136,12 @@ export type AddPolicyStep = | 'source-generate-description' | 'source-generate-loading' | 'source-generate-review' + | 'source-form-effect' + | 'source-form-category' + | 'source-form-filters' + | 'source-form-data-path' + | 'source-form-review' + | 'enforcement-mode' | 'validation-mode' | 'confirm'; @@ -29,9 +155,13 @@ export interface AddPolicyConfig { sourceMethod: PolicySourceMethod; statement: string; sourceFile: string; + gatewayName: string; + targetName: string; gatewayArn: string; naturalLanguageDescription: string; validationMode: 'FAIL_ON_ANY_FINDINGS' | 'IGNORE_ALL_FINDINGS'; + enforcementMode: 'ACTIVE' | 'LOG_ONLY'; + guardrailForm: GuardrailFormConfig; } // ───────────────────────────────────────────────────────────────────────────── @@ -39,15 +169,23 @@ export interface AddPolicyConfig { // ───────────────────────────────────────────────────────────────────────────── export const POLICY_STEP_LABELS: Record = { + gateway: 'Gateway', + target: 'Target', engine: 'Engine', name: 'Name', 'source-method': 'Source', 'source-file': 'File', - 'source-inline': 'Cedar', + 'source-inline': 'Policy', 'source-generate-gateway': 'Gateway', 'source-generate-description': 'Describe', 'source-generate-loading': 'Generating', 'source-generate-review': 'Review', + 'source-form-effect': 'Effect', + 'source-form-category': 'Category', + 'source-form-filters': 'Filters', + 'source-form-data-path': 'Data Path', + 'source-form-review': 'Review', + 'enforcement-mode': 'Enforcement', 'validation-mode': 'Validation', confirm: 'Confirm', }; @@ -56,29 +194,47 @@ export const VALIDATION_MODE_OPTIONS = [ { id: 'FAIL_ON_ANY_FINDINGS', title: 'Fail on any findings', - description: 'Block policies that fail Cedar analyzer validation', + description: 'Block policies that fail analyzer validation', }, { id: 'IGNORE_ALL_FINDINGS', title: 'Ignore all findings', - description: 'Skip Cedar analyzer validation checks', + description: 'Skip analyzer validation checks', + }, +] as const; + +export const ENFORCEMENT_MODE_OPTIONS = [ + { + id: 'ACTIVE', + title: 'Active', + description: 'Policy decisions are enforced on requests', + }, + { + id: 'LOG_ONLY', + title: 'Log only', + description: 'Policy is evaluated but decisions are observed only (shadow mode)', }, ] as const; export const POLICY_SOURCE_METHOD_OPTIONS = [ + { + id: 'form' as const, + title: 'Use a form', + description: 'Guardrail categories, filters & thresholds', + }, { id: 'file' as const, - title: 'Select a Cedar policy file', + title: 'Select a policy file', description: 'From your project', }, { id: 'inline' as const, - title: 'Write a Cedar policy', - description: 'Type Cedar directly', + title: 'Write a policy', + description: 'Type policy directly', }, { id: 'generate' as const, - title: 'Generate a Cedar policy', + title: 'Generate a policy', description: 'From natural language', }, ] as const; @@ -92,6 +248,6 @@ export const POLICY_RESOURCE_OPTIONS = [ { id: 'policy' as const, title: 'Policy', - description: 'Cedar policy within an engine', + description: 'Policy within an engine', }, ] as const; diff --git a/src/cli/tui/screens/policy/useAddPolicyWizard.ts b/src/cli/tui/screens/policy/useAddPolicyWizard.ts index 69664083f..4973383e5 100644 --- a/src/cli/tui/screens/policy/useAddPolicyWizard.ts +++ b/src/cli/tui/screens/policy/useAddPolicyWizard.ts @@ -1,11 +1,18 @@ -import type { AddPolicyConfig, AddPolicyStep, PolicySourceMethod } from './types'; +import type { AddPolicyConfig, AddPolicyStep, GuardrailCategoryType, PolicyEffect, PolicySourceMethod } from './types'; import { useCallback, useState } from 'react'; // Steps vary based on source method, but the wizard tracks the current step directly -const COMMON_PREFIX: AddPolicyStep[] = ['engine', 'name', 'source-method']; -const COMMON_SUFFIX: AddPolicyStep[] = ['validation-mode', 'confirm']; +const COMMON_PREFIX: AddPolicyStep[] = ['gateway', 'target', 'engine', 'name', 'source-method']; +const COMMON_SUFFIX: AddPolicyStep[] = ['enforcement-mode', 'validation-mode', 'confirm']; const SOURCE_STEPS: Record = { + form: [ + 'source-form-effect', + 'source-form-category', + 'source-form-filters', + 'source-form-data-path', + 'source-form-review', + ], file: ['source-file'], inline: ['source-inline'], generate: [ @@ -16,8 +23,21 @@ const SOURCE_STEPS: Record = { ], }; -function getSteps(sourceMethod: PolicySourceMethod | null, skipEngine: boolean): AddPolicyStep[] { - const prefix = skipEngine ? COMMON_PREFIX.filter(s => s !== 'engine') : COMMON_PREFIX; +function getSteps( + sourceMethod: PolicySourceMethod | null, + skipEngine: boolean, + hasDeployedGateways: boolean +): AddPolicyStep[] { + // The gateway/target steps scope the policy to a deployed gateway. With no + // deployed gateways there is nothing to pick, so skip them (the policy is + // still valid — just not gateway-scoped) instead of dead-ending the wizard. + const skip = new Set(); + if (skipEngine) skip.add('engine'); + if (!hasDeployedGateways) { + skip.add('gateway'); + skip.add('target'); + } + const prefix = COMMON_PREFIX.filter(s => !skip.has(s)); const sourceSteps = sourceMethod ? SOURCE_STEPS[sourceMethod] : []; return [...prefix, ...sourceSteps, ...COMMON_SUFFIX]; } @@ -29,43 +49,64 @@ function getDefaultConfig(preSelectedEngine?: string): AddPolicyConfig { sourceMethod: 'file', statement: '', sourceFile: '', + gatewayName: '', + targetName: '', gatewayArn: '', naturalLanguageDescription: '', validationMode: 'FAIL_ON_ANY_FINDINGS', + enforcementMode: 'ACTIVE', + guardrailForm: { category: null, filters: [], effect: 'forbid', dataPath: '' }, }; } -export function useAddPolicyWizard(preSelectedEngine?: string) { +export function useAddPolicyWizard(preSelectedEngine?: string, hasDeployedGateways = true) { const skipEngine = !!preSelectedEngine; const [config, setConfig] = useState(() => getDefaultConfig(preSelectedEngine)); - const initialStep: AddPolicyStep = skipEngine ? 'name' : 'engine'; + // Start on the first step that survives skipping (gateway/target are dropped + // when nothing is deployed), so the wizard never opens on a dead-end screen. + const initialStep: AddPolicyStep = getSteps(null, skipEngine, hasDeployedGateways)[0]!; const [step, setStep] = useState(initialStep); const [sourceMethod, setSourceMethodState] = useState(null); - const steps = getSteps(sourceMethod, skipEngine); + const steps = getSteps(sourceMethod, skipEngine, hasDeployedGateways); const currentIndex = steps.indexOf(step); const goBack = useCallback(() => { - const allSteps = getSteps(sourceMethod, skipEngine); + const allSteps = getSteps(sourceMethod, skipEngine, hasDeployedGateways); const idx = allSteps.indexOf(step); if (idx > 0) { const prevStep = allSteps[idx - 1]!; - // If going back from a source sub-step to source-method, clear the source method if (prevStep === 'source-method') { setSourceMethodState(null); } setStep(prevStep); } - }, [sourceMethod, step, skipEngine]); + }, [sourceMethod, step, skipEngine, hasDeployedGateways]); const advance = useCallback( (fromStep: AddPolicyStep) => { - const allSteps = getSteps(sourceMethod, skipEngine); + const allSteps = getSteps(sourceMethod, skipEngine, hasDeployedGateways); const idx = allSteps.indexOf(fromStep); const next = allSteps[idx + 1]; if (next) setStep(next); }, - [sourceMethod, skipEngine] + [sourceMethod, skipEngine, hasDeployedGateways] + ); + + const setGatewayForPolicy = useCallback( + (gatewayName: string) => { + setConfig(c => ({ ...c, gatewayName })); + advance('gateway'); + }, + [advance] + ); + + const setTargetForPolicy = useCallback( + (targetName: string) => { + setConfig(c => ({ ...c, targetName })); + advance('target'); + }, + [advance] ); const setEngine = useCallback( @@ -88,13 +129,12 @@ export function useAddPolicyWizard(preSelectedEngine?: string) { (method: PolicySourceMethod) => { setSourceMethodState(method); setConfig(c => ({ ...c, sourceMethod: method })); - // Compute next step with the new source method - const allSteps = getSteps(method, skipEngine); + const allSteps = getSteps(method, skipEngine, hasDeployedGateways); const idx = allSteps.indexOf('source-method'); const next = allSteps[idx + 1]; if (next) setStep(next); }, - [skipEngine] + [skipEngine, hasDeployedGateways] ); const setSourceFile = useCallback( @@ -137,7 +177,6 @@ export function useAddPolicyWizard(preSelectedEngine?: string) { [advance] ); - // Called when generation completes to move past the loading step const onGenerationComplete = useCallback( (statement: string) => { setConfig(c => ({ ...c, statement, sourceFile: '' })); @@ -154,6 +193,60 @@ export function useAddPolicyWizard(preSelectedEngine?: string) { [advance] ); + // Enforcement mode: ACTIVE or LOG_ONLY + const setEnforcementMode = useCallback( + (enforcementMode: AddPolicyConfig['enforcementMode']) => { + setConfig(c => ({ ...c, enforcementMode })); + advance('enforcement-mode'); + }, + [advance] + ); + + // Form mode: set effect (permit/forbid) + const setFormEffect = useCallback( + (effect: PolicyEffect) => { + setConfig(c => ({ ...c, guardrailForm: { ...c.guardrailForm, effect } })); + advance('source-form-effect'); + }, + [advance] + ); + + // Form mode: set category + const setFormCategory = useCallback( + (category: GuardrailCategoryType) => { + setConfig(c => ({ ...c, guardrailForm: { ...c.guardrailForm, category, filters: [] } })); + advance('source-form-category'); + }, + [advance] + ); + + // Form mode: set filters (multi-select within the chosen category) + const setFormFilters = useCallback( + (filters: string[]) => { + setConfig(c => ({ ...c, guardrailForm: { ...c.guardrailForm, filters } })); + advance('source-form-filters'); + }, + [advance] + ); + + // Form mode: set data path + const setFormDataPath = useCallback( + (dataPath: string) => { + setConfig(c => ({ ...c, guardrailForm: { ...c.guardrailForm, dataPath } })); + advance('source-form-data-path'); + }, + [advance] + ); + + // Form mode: accept review (store synthesized Cedar) + const acceptFormReview = useCallback( + (statement: string) => { + setConfig(c => ({ ...c, statement, sourceFile: '' })); + advance('source-form-review'); + }, + [advance] + ); + const reset = useCallback(() => { setConfig(getDefaultConfig(preSelectedEngine)); setStep(initialStep); @@ -166,6 +259,8 @@ export function useAddPolicyWizard(preSelectedEngine?: string) { steps, currentIndex, goBack, + setGatewayForPolicy, + setTargetForPolicy, setEngine, setName, setSourceMethod, @@ -176,6 +271,12 @@ export function useAddPolicyWizard(preSelectedEngine?: string) { setGeneratedStatement, onGenerationComplete, setValidationMode, + setEnforcementMode, + setFormEffect, + setFormCategory, + setFormFilters, + setFormDataPath, + acceptFormReview, reset, }; } diff --git a/src/cli/tui/screens/recommendation/RecommendationFlow.tsx b/src/cli/tui/screens/recommendation/RecommendationFlow.tsx index b044d5464..a55d9ca67 100644 --- a/src/cli/tui/screens/recommendation/RecommendationFlow.tsx +++ b/src/cli/tui/screens/recommendation/RecommendationFlow.tsx @@ -4,11 +4,9 @@ import { validateAwsCredentials } from '../../../aws/account'; import { listEvaluators } from '../../../aws/agentcore-control'; import { detectRegion } from '../../../aws/region'; import { getErrorMessage } from '../../../errors'; -import { applyRecommendationToBundle, runRecommendationCommand } from '../../../operations/recommendation'; -import type { RunRecommendationCommandResult } from '../../../operations/recommendation'; -import { saveRecommendationRun } from '../../../operations/recommendation/recommendation-storage'; -import { ErrorPrompt, GradientText, Panel, Screen, StepProgress } from '../../components'; -import type { Step } from '../../components'; +import { createJobEngine } from '../../../operations/jobs'; +import type { RecommendationJobRecord } from '../../../operations/jobs'; +import { ErrorPrompt, GradientText, Panel, Screen } from '../../components'; import { HELP_TEXT } from '../../constants'; import { useListNavigation } from '../../hooks'; import { RecommendationScreen } from './RecommendationScreen'; @@ -20,33 +18,24 @@ import type { RecommendationWizardConfig, } from './types'; import { Box, Text } from 'ink'; -import React, { useCallback, useEffect, useState } from 'react'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; type FlowState = | { name: 'loading' } | { name: 'wizard'; agents: AgentItem[]; evaluators: EvaluatorItem[]; configBundles: ConfigBundleItem[] } - | { - name: 'running'; - config: RecommendationWizardConfig; - steps: Step[]; - elapsed: number; - recommendationId?: string; - region?: string; - } - | { - name: 'results'; - result: Extract; - config: RecommendationWizardConfig; - filePath?: string; - } + | { name: 'starting'; config: RecommendationWizardConfig } + | { name: 'started'; record: RecommendationJobRecord; config: RecommendationWizardConfig } | { name: 'creds-error'; message: string } | { name: 'error'; message: string; logFilePath?: string }; interface RecommendationFlowProps { onExit: () => void; + /** Navigate to the Recommendation Jobs screen (falls back to onExit when not provided). */ + onViewJobs?: () => void; } -export function RecommendationFlow({ onExit }: RecommendationFlowProps) { +export function RecommendationFlow({ onExit, onViewJobs }: RecommendationFlowProps) { + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); const [flow, setFlow] = useState({ name: 'loading' }); // Load agents and evaluators @@ -101,44 +90,19 @@ export function RecommendationFlow({ onExit }: RecommendationFlowProps) { }, [flow.name]); const handleRunComplete = useCallback((config: RecommendationWizardConfig) => { - const willFetchSpans = config.traceSource === 'sessions'; - - const initialSteps: Step[] = [ - ...(willFetchSpans ? [{ label: 'Fetching session spans from CloudWatch...', status: 'pending' as const }] : []), - { label: 'Starting recommendation...', status: 'running' }, - { label: 'Polling for results', status: 'pending' }, - { label: 'Saving results', status: 'pending' }, - ]; - - // If auto-fetching, the first step is active - if (willFetchSpans) { - initialSteps[0] = { ...initialSteps[0]!, status: 'running' }; - initialSteps[1] = { ...initialSteps[1]!, status: 'pending' }; - } - - setFlow({ name: 'running', config, steps: initialSteps, elapsed: 0 }); + setFlow({ name: 'starting', config }); }, []); - // Execute the recommendation when entering 'running' state + // Fire-and-forget: start the recommendation job, then show the Started confirmation screen. useEffect(() => { - if (flow.name !== 'running') return; + if (flow.name !== 'starting') return; let cancelled = false; const { config } = flow; - const startTime = Date.now(); - - const timer = setInterval(() => { - if (!cancelled) { - setFlow(prev => { - if (prev.name !== 'running') return prev; - return { ...prev, elapsed: Math.floor((Date.now() - startTime) / 1000) }; - }); - } - }, 1000); void (async () => { try { - const result = await runRecommendationCommand({ + const result = await engine.start('recommendation', { type: config.type, agent: config.agent, evaluators: config.evaluators, @@ -164,104 +128,24 @@ export function RecommendationFlow({ onExit }: RecommendationFlowProps) { traceSource: config.traceSource, lookbackDays: config.days, sessionIds: config.sessionIds.length > 0 ? config.sessionIds : undefined, - onProgress: (status, _message) => { - if (cancelled) return; - const hasFetchStep = config.traceSource === 'sessions'; - const offset = hasFetchStep ? 1 : 0; - - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = [...prev.steps]; - if (status === 'fetching-spans') { - steps[0] = { ...steps[0]!, status: 'running' }; - } else if (status === 'starting') { - if (hasFetchStep) steps[0] = { ...steps[0]!, status: 'success' }; - steps[offset] = { ...steps[offset]!, status: 'running' }; - } else if (status === 'started' || status === 'polling') { - steps[offset] = { ...steps[offset]!, status: 'success' }; - steps[offset + 1] = { ...steps[offset + 1]!, status: 'running' }; - } - return { ...prev, steps }; - }); - }, - onStarted: info => { - setFlow(prev => { - if (prev.name !== 'running') return prev; - return { ...prev, recommendationId: info.recommendationId, region: info.region }; - }); - }, + kmsKeyArn: config.kmsKeyArn || undefined, }); - clearInterval(timer); if (cancelled) return; if (!result.success) { - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = prev.steps.map(s => - s.status === 'running' ? { ...s, status: 'error' as const, error: result.error.message } : s - ); - return { ...prev, steps }; - }); - await new Promise(resolve => setTimeout(resolve, 2000)); - if (cancelled) return; - setFlow({ - name: 'error', - message: result.error?.message ?? 'Recommendation failed', - logFilePath: result.logFilePath, - }); + setFlow({ name: 'error', message: result.error.message }); return; } - // Mark polling success, saving running - const hasFetchStep = config.traceSource === 'sessions'; - const offset = hasFetchStep ? 1 : 0; - - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = [...prev.steps]; - steps[offset + 1] = { ...steps[offset + 1]!, status: 'success' }; - steps[offset + 2] = { ...steps[offset + 2]!, status: 'running' }; - return { ...prev, steps }; - }); - - // Save results locally - let filePath: string | undefined; - try { - if (result.recommendationId) { - filePath = saveRecommendationRun( - result.recommendationId, - result, - config.type, - config.agent, - config.evaluators - ); - } - } catch { - // Non-fatal - } - - setFlow({ name: 'results', result, config, filePath }); + setFlow({ name: 'started', record: result.record, config }); } catch (err) { - clearInterval(timer); - if (!cancelled) { - const errorMsg = getErrorMessage(err); - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = prev.steps.map(s => - s.status === 'running' ? { ...s, status: 'error' as const, error: errorMsg } : s - ); - return { ...prev, steps }; - }); - await new Promise(resolve => setTimeout(resolve, 2000)); - setFlow({ name: 'error', message: errorMsg }); - } + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); } })(); return () => { cancelled = true; - clearInterval(timer); }; }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps @@ -269,7 +153,7 @@ export function RecommendationFlow({ onExit }: RecommendationFlowProps) { if (flow.name === 'loading') { return ( - + ); @@ -291,37 +175,21 @@ export function RecommendationFlow({ onExit }: RecommendationFlowProps) { ); } - if (flow.name === 'running') { - const minutes = Math.floor(flow.elapsed / 60); - const seconds = flow.elapsed % 60; - const timeStr = minutes > 0 ? `${minutes}m ${seconds}s` : `${seconds}s`; - + if (flow.name === 'starting') { return ( - - - - - Agent: {flow.config.agent} - {' '} - Evaluator(s):{' '} - {flow.config.evaluators.map(e => (e.includes('/') ? e.split('/').pop()! : e)).join(', ')} - {' '} - ({timeStr}) - - - - + + ); } - if (flow.name === 'results') { + if (flow.name === 'started') { return ( - setFlow({ name: 'loading' })} + onViewJobs={onViewJobs} onExit={onExit} /> ); @@ -338,57 +206,28 @@ export function RecommendationFlow({ onExit }: RecommendationFlowProps) { } // ───────────────────────────────────────────────────────────────────────────── -// Results view +// Started confirmation view // ───────────────────────────────────────────────────────────────────────────── -interface ResultsViewProps { - result: Extract; +interface StartedViewProps { + record: RecommendationJobRecord; config: RecommendationWizardConfig; - filePath?: string; onRunAnother: () => void; + onViewJobs?: () => void; onExit: () => void; } -function ResultsView({ result, config, filePath, onRunAnother, onExit }: ResultsViewProps) { - const [applyStatus, setApplyStatus] = useState<{ applied: boolean; message: string } | null>(null); - - const isConfigBundle = config.inputSource === 'config-bundle' && config.bundleName; - const hasNewVersion = - !!result.result?.systemPromptRecommendationResult?.configurationBundle || - !!result.result?.toolDescriptionRecommendationResult?.configurationBundle; - const canApply = isConfigBundle && hasNewVersion && result.region && !applyStatus; - +function StartedView({ record, config, onRunAnother, onViewJobs, onExit }: StartedViewProps) { const actions = [ - ...(canApply ? [{ id: 'apply', title: 'Sync new bundle version to local config' }] : []), - { id: 'another', title: 'Run another recommendation' }, + { id: 'jobs', title: 'View jobs' }, + { id: 'another', title: 'Run another' }, { id: 'back', title: 'Back' }, ]; - const handleApply = useCallback(async () => { - if (!result.result || !result.region) return; - try { - const applyResult = await applyRecommendationToBundle({ - bundleArn: config.bundleName, // TUI stores ARN in bundleName - result: result.result, - region: result.region, - }); - if (applyResult.success) { - setApplyStatus({ - applied: true, - message: `New bundle version (${applyResult.newVersionId}) created with recommended changes. Local config updated.`, - }); - } else { - setApplyStatus({ applied: false, message: applyResult.error?.message ?? 'Unknown error' }); - } - } catch (err) { - setApplyStatus({ applied: false, message: getErrorMessage(err) }); - } - }, [result, config]); - const nav = useListNavigation({ items: actions, onSelect: item => { - if (item.id === 'apply') void handleApply(); + if (item.id === 'jobs') (onViewJobs ?? onExit)(); else if (item.id === 'another') onRunAnother(); else onExit(); }, @@ -396,75 +235,26 @@ function ResultsView({ result, config, filePath, onRunAnother, onExit }: Results isActive: true, }); - const sysResult = result.result?.systemPromptRecommendationResult; - const toolResult = result.result?.toolDescriptionRecommendationResult; - return ( - + - ✓ Recommendation complete + ✓ Recommendation submitted - ID: {result.recommendationId} + ID: {record.id} {' '} + Status: {record.status} + + Agent: {config.agent} - {sysResult && ( - - {sysResult.recommendedSystemPrompt && ( - - - Recommended System Prompt: - - - {sysResult.recommendedSystemPrompt} - - - )} - - )} - - {toolResult?.tools && toolResult.tools.length > 0 && ( - - - Recommended Tool Descriptions: - - {toolResult.tools.map(tool => ( - - {tool.toolName} - {tool.recommendedToolDescription} - - ))} - - )} - - {!sysResult && !toolResult && ( - - No recommendation results returned. - - )} - - {filePath && ( - - Results saved to: {filePath} - - )} - - {applyStatus && ( - - {applyStatus.applied ? ( - ✓ {applyStatus.message} - ) : ( - Could not sync: {applyStatus.message} - )} - - )} + + + When it completes, view it in Recommendation Jobs — the new config bundle (if any) will be applied to + agentcore.json automatically. + + {actions.map((action, idx) => { diff --git a/src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx b/src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx index e1ee7e9d9..b782202dd 100644 --- a/src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx +++ b/src/cli/tui/screens/recommendation/RecommendationHistoryScreen.tsx @@ -1,10 +1,14 @@ -import type { RecommendationRunRecord } from '../../../operations/recommendation/recommendation-storage'; -import { listAllRecommendations } from '../../../operations/recommendation/recommendation-storage'; -import { Panel, Screen } from '../../components'; +import { ConfigIO } from '../../../../lib'; +import { validateAwsCredentials } from '../../../aws/account'; +import { getErrorMessage } from '../../../errors'; +import { createJobEngine } from '../../../operations/jobs'; +import type { RecommendationJobRecord } from '../../../operations/jobs'; +import { ErrorPrompt, Panel, Screen } from '../../components'; import { HELP_TEXT } from '../../constants'; import { useListNavigation } from '../../hooks'; -import { Box, Text, useInput, useStdout } from 'ink'; -import React, { useMemo, useState } from 'react'; +import { RecommendationDetailView, shortTypeName, statusColor } from '../job-detail'; +import { Box, Text, useStdout } from 'ink'; +import React, { useEffect, useMemo, useState } from 'react'; const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; @@ -19,19 +23,6 @@ function formatShortDate(timestamp: string): string { return `${mon} ${day} ${h12}:${m} ${ampm}`; } -function shortTypeName(type: string): string { - if (type === 'SYSTEM_PROMPT_RECOMMENDATION') return 'System Prompt'; - if (type === 'TOOL_DESCRIPTION_RECOMMENDATION') return 'Tool Description'; - return type; -} - -function statusColor(status: string): string { - if (status === 'COMPLETED' || status === 'SUCCEEDED') return 'green'; - if (status === 'FAILED') return 'red'; - if (status === 'IN_PROGRESS' || status === 'PENDING') return 'yellow'; - return 'gray'; -} - const CHROME_LINES = 9; // ───────────────────────────────────────────────────────────────────────────── @@ -44,8 +35,8 @@ function RecommendationListView({ onExit, availableHeight, }: { - records: RecommendationRunRecord[]; - onSelect: (record: RecommendationRunRecord) => void; + records: RecommendationJobRecord[]; + onSelect: (record: RecommendationJobRecord) => void; onExit: () => void; availableHeight: number; }) { @@ -68,7 +59,7 @@ function RecommendationListView({ return ( - Recommendation History + Recommendation Jobs {records.length} recommendation{records.length !== 1 ? 's' : ''} @@ -76,14 +67,14 @@ function RecommendationListView({ {visible.items.map((rec, vIdx) => { const idx = visible.startIdx + vIdx; const selected = idx === nav.selectedIndex; - const date = rec.startedAt ? formatShortDate(rec.startedAt) : 'unknown'; + const date = rec.createdAt ? formatShortDate(rec.createdAt) : 'unknown'; return ( - + {selected ? '❯' : ' '} {date.padEnd(16)} {rec.status.padEnd(12)} - {shortTypeName(rec.type).padEnd(18)} + {shortTypeName(rec.recommendationType).padEnd(18)} {rec.agent} ); @@ -96,135 +87,78 @@ function RecommendationListView({ ); } - -// ───────────────────────────────────────────────────────────────────────────── -// Detail view -// ───────────────────────────────────────────────────────────────────────────── - -function RecommendationDetailView({ record, onBack }: { record: RecommendationRunRecord; onBack: () => void }) { - useInput((input, key) => { - if (key.escape || input === 'b') { - onBack(); - } - }); - - const sysResult = record.result?.systemPromptRecommendationResult; - const toolResult = record.result?.toolDescriptionRecommendationResult; - - return ( - - - - ID: {record.recommendationId} - - - Type: {shortTypeName(record.type)} - {' '} - Agent: {record.agent} - {' '} - Status: {record.status} - - - Evaluators: {record.evaluators.join(', ')} - - {record.startedAt && ( - - Started: {new Date(record.startedAt).toLocaleString()} - - )} - {record.completedAt && ( - - Completed: {new Date(record.completedAt).toLocaleString()} - - )} - - {sysResult && ( - - {sysResult.recommendedSystemPrompt && ( - - - Recommended System Prompt: - - - {sysResult.recommendedSystemPrompt} - - - )} - - )} - - {toolResult?.tools && toolResult.tools.length > 0 && ( - - - Recommended Tool Descriptions: - - {toolResult.tools.map(tool => ( - - {tool.toolName} - {tool.recommendedToolDescription} - - ))} - - )} - - {!sysResult && !toolResult && ( - - No recommendation results available. - - )} - - - Press Esc or B to go back - - - - ); -} - // ───────────────────────────────────────────────────────────────────────────── // Main screen // ───────────────────────────────────────────────────────────────────────────── +type FlowState = + | { name: 'loading' } + | { name: 'creds-error'; message: string } + | { name: 'error'; message: string } + | { name: 'loaded'; records: RecommendationJobRecord[] }; + interface RecommendationHistoryScreenProps { onExit: () => void; } export function RecommendationHistoryScreen({ onExit }: RecommendationHistoryScreenProps) { + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); const { stdout } = useStdout(); const terminalHeight = stdout?.rows ?? 24; const availableHeight = Math.max(6, terminalHeight - CHROME_LINES); - const [selectedRecord, setSelectedRecord] = useState(null); - - const [records, loaded, error] = useMemo(() => { - try { - return [listAllRecommendations(), true, null] as const; - } catch (err) { - return [[] as RecommendationRunRecord[], true, err instanceof Error ? err.message : String(err)] as const; - } - }, []); - - if (!loaded) { + const [flow, setFlow] = useState({ name: 'loading' }); + const [selectedRecord, setSelectedRecord] = useState(null); + + useEffect(() => { + let cancelled = false; + + void (async () => { + try { + await validateAwsCredentials(); + } catch (err) { + if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) }); + return; + } + + try { + const records = await engine.list({ type: 'recommendation' }); + if (!cancelled) setFlow({ name: 'loaded', records }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [engine]); + + if (flow.name === 'loading') { return ( - - Loading... + + Loading recommendation jobs... ); } - if (error) { + if (flow.name === 'creds-error') { + return ; + } + + if (flow.name === 'error') { return ( - - {error} + + {flow.message} ); } - if (records.length === 0) { + if (flow.records.length === 0) { return ( - + - No recommendation runs found. + No recommendation jobs found. Run `agentcore run recommendation` to create one. @@ -234,12 +168,12 @@ export function RecommendationHistoryScreen({ onExit }: RecommendationHistoryScr const helpText = selectedRecord ? 'Esc/B back to list' : HELP_TEXT.NAVIGATE_SELECT; return ( - + {selectedRecord ? ( setSelectedRecord(null)} /> ) : ( )} + {isKmsKeyArnStep && ( + wizard.goBack()} + customValidation={value => { + if (!value) return true; + if (!isValidKmsKeyArn(value)) { + return 'Invalid KMS key ARN (e.g. arn:aws:kms:us-east-1:123456789012:key/12345678-1234-1234-1234-123456789012)'; + } + return true; + }} + /> + )} + {isConfirmStep && } diff --git a/src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx b/src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx index 2e53e0ebd..e510f4806 100644 --- a/src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx +++ b/src/cli/tui/screens/recommendation/RecommendationsHubScreen.tsx @@ -21,8 +21,8 @@ export function RecommendationsHubScreen({ onSelect, onExit }: RecommendationsHu }, { id: 'recommendation-history', - title: 'Recommendation History', - description: 'View past recommendation results (local)', + title: 'Recommendation Jobs', + description: 'View recommendation jobs and their results', }, ], [] @@ -36,7 +36,7 @@ export function RecommendationsHubScreen({ onSelect, onExit }: RecommendationsHu }); return ( - + ); diff --git a/src/cli/tui/screens/recommendation/types.ts b/src/cli/tui/screens/recommendation/types.ts index 587ea4a20..e70d0ba22 100644 --- a/src/cli/tui/screens/recommendation/types.ts +++ b/src/cli/tui/screens/recommendation/types.ts @@ -1,8 +1,8 @@ import type { - RecommendationInputSourceKind, + RecommendationInputSource as RecommendationInputSourceKind, RecommendationType, - TraceSourceKind, -} from '../../../operations/recommendation'; + RecommendationTraceSource as TraceSourceKind, +} from '../../../operations/jobs'; export type RecommendationStep = | 'type' @@ -16,6 +16,7 @@ export type RecommendationStep = | 'traceSource' | 'days' | 'sessions' + | 'kms-key-arn' | 'confirm'; export interface RecommendationWizardConfig { @@ -35,6 +36,8 @@ export interface RecommendationWizardConfig { systemPromptJsonPath: string; /** Tool name → JSONPath pairs for tool descriptions within the config bundle */ toolDescJsonPaths: { toolName: string; toolDescriptionJsonPath: string }[]; + /** KMS key ARN for encrypting recommendation results */ + kmsKeyArn: string; } export const RECOMMENDATION_STEP_LABELS: Record = { @@ -49,6 +52,7 @@ export const RECOMMENDATION_STEP_LABELS: Record = { traceSource: 'Traces', days: 'Lookback', sessions: 'Sessions', + 'kms-key-arn': 'KMS Key', confirm: 'Confirm', }; diff --git a/src/cli/tui/screens/recommendation/useRecommendationWizard.ts b/src/cli/tui/screens/recommendation/useRecommendationWizard.ts index 94c3c66d1..99a22bcda 100644 --- a/src/cli/tui/screens/recommendation/useRecommendationWizard.ts +++ b/src/cli/tui/screens/recommendation/useRecommendationWizard.ts @@ -1,8 +1,8 @@ import type { - RecommendationInputSourceKind, + RecommendationInputSource as RecommendationInputSourceKind, RecommendationType, - TraceSourceKind, -} from '../../../operations/recommendation'; + RecommendationTraceSource as TraceSourceKind, +} from '../../../operations/jobs'; import type { RecommendationStep, RecommendationWizardConfig } from './types'; import { DEFAULT_LOOKBACK_DAYS } from './types'; import { useCallback, useState } from 'react'; @@ -50,6 +50,7 @@ function getAllSteps( steps.push('days'); } + steps.push('kms-key-arn'); steps.push('confirm'); return steps; } @@ -70,6 +71,7 @@ function getDefaultConfig(): RecommendationWizardConfig { bundleFields: [], systemPromptJsonPath: '', toolDescJsonPaths: [], + kmsKeyArn: '', }; } @@ -205,6 +207,14 @@ export function useRecommendationWizard() { [advance] ); + const setKmsKeyArn = useCallback( + (kmsKeyArn: string) => { + setConfig(c => ({ ...c, kmsKeyArn })); + advance('kms-key-arn'); + }, + [advance] + ); + const reset = useCallback(() => { setConfig(getDefaultConfig()); setStep('type'); @@ -227,6 +237,7 @@ export function useRecommendationWizard() { setTraceSource, setDays, setSessions, + setKmsKeyArn, reset, }; } diff --git a/src/cli/tui/screens/remove/RemoveFlow.tsx b/src/cli/tui/screens/remove/RemoveFlow.tsx index dcf77ca1b..af7c4633e 100644 --- a/src/cli/tui/screens/remove/RemoveFlow.tsx +++ b/src/cli/tui/screens/remove/RemoveFlow.tsx @@ -1,8 +1,8 @@ import type { RemovableGatewayTarget, RemovalPreview } from '../../../operations/remove'; -import { paymentManagerPrimitive } from '../../../primitives/registry'; +import type { OrphanAction } from '../../../primitives/HarnessPrimitive'; +import { harnessPrimitive, paymentManagerPrimitive } from '../../../primitives/registry'; import { ErrorPrompt, Panel, Screen, SelectScreen } from '../../components'; import { - useRemovableABTests, useRemovableAgents, useRemovableConfigBundles, useRemovableDatasets, @@ -11,6 +11,7 @@ import { useRemovableGateways, useRemovableHarnesses, useRemovableIdentities, + useRemovableKnowledgeBases, useRemovableMemories, useRemovableOnlineEvalConfigs, useRemovablePaymentManagers, @@ -18,7 +19,6 @@ import { useRemovablePolicyEngines, useRemovableRuntimeEndpoints, useRemovalPreview, - useRemoveABTest, useRemoveAgent, useRemoveConfigBundle, useRemoveDataset, @@ -27,13 +27,13 @@ import { useRemoveGatewayTarget, useRemoveHarness, useRemoveIdentity, + useRemoveKnowledgeBase, useRemoveMemory, useRemoveOnlineEvalConfig, useRemovePolicy, useRemovePolicyEngine, useRemoveRuntimeEndpoint, } from '../../hooks/useRemove'; -import { RemoveABTestScreen } from '../ab-test/RemoveABTestScreen'; import { RemoveAgentScreen } from './RemoveAgentScreen'; import { RemoveAllScreen } from './RemoveAllScreen'; import { RemoveConfigBundleScreen } from './RemoveConfigBundleScreen'; @@ -43,6 +43,7 @@ import { RemoveEvaluatorScreen } from './RemoveEvaluatorScreen'; import { RemoveGatewayScreen } from './RemoveGatewayScreen'; import { RemoveGatewayTargetScreen } from './RemoveGatewayTargetScreen'; import { RemoveIdentityScreen } from './RemoveIdentityScreen'; +import { RemoveKnowledgeBaseScreen } from './RemoveKnowledgeBaseScreen'; import { RemoveMemoryScreen } from './RemoveMemoryScreen'; import { RemoveOnlineEvalScreen } from './RemoveOnlineEvalScreen'; import { RemovePolicyEngineScreen } from './RemovePolicyEngineScreen'; @@ -64,13 +65,14 @@ type FlowState = | { name: 'select-identity' } | { name: 'select-evaluator' } | { name: 'select-dataset' } + | { name: 'select-knowledge-base' } | { name: 'select-online-eval' } | { name: 'select-policy-engine' } | { name: 'select-policy' } | { name: 'select-harness' } | { name: 'confirm-harness'; harnessName: string; preview: RemovalPreview } + | { name: 'confirm-orphan-harness'; harnessName: string } | { name: 'select-config-bundle' } - | { name: 'select-ab-test' } | { name: 'select-runtime-endpoint' } | { name: 'select-payment' } | { name: 'confirm-agent'; agentName: string; preview: RemovalPreview } @@ -80,11 +82,11 @@ type FlowState = | { name: 'confirm-identity'; identityName: string; preview: RemovalPreview } | { name: 'confirm-evaluator'; evaluatorName: string; preview: RemovalPreview } | { name: 'confirm-dataset'; datasetName: string; preview: RemovalPreview } + | { name: 'confirm-knowledge-base'; knowledgeBaseName: string; preview: RemovalPreview } | { name: 'confirm-online-eval'; configName: string; preview: RemovalPreview } | { name: 'confirm-policy-engine'; engineName: string; preview: RemovalPreview } | { name: 'confirm-policy'; compositeKey: string; policyName: string; preview: RemovalPreview } | { name: 'confirm-config-bundle'; bundleName: string; preview: RemovalPreview } - | { name: 'confirm-ab-test'; testName: string; preview: RemovalPreview } | { name: 'confirm-runtime-endpoint'; endpointName: string; preview: RemovalPreview } | { name: 'confirm-payment'; managerName: string; preview: RemovalPreview } | { name: 'loading'; message: string } @@ -96,11 +98,11 @@ type FlowState = | { name: 'identity-success'; identityName: string; logFilePath?: string } | { name: 'evaluator-success'; evaluatorName: string; logFilePath?: string } | { name: 'dataset-success'; datasetName: string; logFilePath?: string } + | { name: 'knowledge-base-success'; knowledgeBaseName: string; logFilePath?: string } | { name: 'online-eval-success'; configName: string; logFilePath?: string } | { name: 'policy-engine-success'; engineName: string; logFilePath?: string } | { name: 'policy-success'; policyName: string; logFilePath?: string } | { name: 'config-bundle-success'; bundleName: string; logFilePath?: string } - | { name: 'ab-test-success'; testName: string; logFilePath?: string } | { name: 'runtime-endpoint-success'; endpointName: string; logFilePath?: string } | { name: 'payment-success'; managerName: string } | { name: 'remove-all' } @@ -125,11 +127,12 @@ interface RemoveFlowProps { | 'credential' | 'evaluator' | 'online-eval' + | 'online-insights' | 'policy-engine' | 'policy' | 'config-bundle' - | 'ab-test' | 'dataset' + | 'knowledge-base' | 'payment' | 'payment-manager' | 'payment-connector' @@ -165,16 +168,18 @@ export function RemoveFlow({ return { name: 'select-evaluator' }; case 'dataset': return { name: 'select-dataset' }; + case 'knowledge-base': + return { name: 'select-knowledge-base' }; case 'online-eval': return { name: 'select-online-eval' }; + case 'online-insights': + return { name: 'select-online-eval' }; case 'policy-engine': return { name: 'select-policy-engine' }; case 'policy': return { name: 'select-policy' }; case 'config-bundle': return { name: 'select-config-bundle' }; - case 'ab-test': - return { name: 'select-ab-test' }; case 'runtime-endpoint': return { name: 'select-runtime-endpoint' }; case 'payment': @@ -198,6 +203,11 @@ export function RemoveFlow({ const { identities, isLoading: isLoadingIdentities, refresh: refreshIdentities } = useRemovableIdentities(); const { evaluators, isLoading: isLoadingEvaluators, refresh: refreshEvaluators } = useRemovableEvaluators(); const { datasets, isLoading: isLoadingDatasets, refresh: refreshDatasets } = useRemovableDatasets(); + const { + knowledgeBases, + isLoading: isLoadingKnowledgeBases, + refresh: refreshKnowledgeBases, + } = useRemovableKnowledgeBases(); const { onlineEvalConfigs, isLoading: isLoadingOnlineEvals, @@ -214,7 +224,6 @@ export function RemoveFlow({ isLoading: isLoadingConfigBundles, refresh: refreshConfigBundles, } = useRemovableConfigBundles(); - const { abTests } = useRemovableABTests(); const { endpoints: runtimeEndpoints, isLoading: isLoadingRuntimeEndpoints, @@ -232,6 +241,7 @@ export function RemoveFlow({ isLoadingIdentities || isLoadingEvaluators || isLoadingDatasets || + isLoadingKnowledgeBases || isLoadingOnlineEvals || isLoadingPolicyEngines || isLoadingPolicies || @@ -249,11 +259,11 @@ export function RemoveFlow({ loadIdentityPreview, loadEvaluatorPreview, loadDatasetPreview, + loadKnowledgeBasePreview, loadOnlineEvalPreview, loadPolicyEnginePreview, loadPolicyPreview, loadConfigBundlePreview, - loadABTestPreview, loadRuntimeEndpointPreview, reset: resetPreview, } = useRemovalPreview(); @@ -267,11 +277,11 @@ export function RemoveFlow({ const { remove: removeIdentityOp, reset: resetRemoveIdentity } = useRemoveIdentity(); const { remove: removeEvaluatorOp, reset: resetRemoveEvaluator } = useRemoveEvaluator(); const { remove: removeDatasetOp, reset: resetRemoveDataset } = useRemoveDataset(); + const { remove: removeKnowledgeBaseOp, reset: resetRemoveKnowledgeBase } = useRemoveKnowledgeBase(); const { remove: removeOnlineEvalOp, reset: resetRemoveOnlineEval } = useRemoveOnlineEvalConfig(); const { remove: removePolicyEngineOp, reset: resetRemovePolicyEngine } = useRemovePolicyEngine(); const { remove: removePolicyOp, reset: resetRemovePolicy } = useRemovePolicy(); const { remove: removeConfigBundleOp, reset: resetRemoveConfigBundle } = useRemoveConfigBundle(); - const { remove: removeABTestOp, reset: resetRemoveABTest } = useRemoveABTest(); const { remove: removeRuntimeEndpointOp, reset: resetRemoveRuntimeEndpoint } = useRemoveRuntimeEndpoint(); // Track pending result state @@ -302,11 +312,11 @@ export function RemoveFlow({ 'identity-success', 'evaluator-success', 'dataset-success', + 'knowledge-base-success', 'online-eval-success', 'policy-engine-success', 'policy-success', 'config-bundle-success', - 'ab-test-success', 'runtime-endpoint-success', 'payment-success', ]; @@ -345,6 +355,9 @@ export function RemoveFlow({ case 'dataset': setFlow({ name: 'select-dataset' }); break; + case 'knowledge-base': + setFlow({ name: 'select-knowledge-base' }); + break; case 'online-eval': setFlow({ name: 'select-online-eval' }); break; @@ -357,9 +370,6 @@ export function RemoveFlow({ case 'config-bundle': setFlow({ name: 'select-config-bundle' }); break; - case 'ab-test': - setFlow({ name: 'select-ab-test' }); - break; case 'runtime-endpoint': setFlow({ name: 'select-runtime-endpoint' }); break; @@ -399,6 +409,13 @@ export function RemoveFlow({ const handleSelectHarness = useCallback( async (harnessName: string) => { + // Imperative-build orphans need an explicit delete-and-keep / delete-and-discard choice + // (deleting a real AWS resource), so they bypass the plain confirm and the force path — + // we never auto-delete an orphan, even with --yes. + if (harnessPrimitive && (await harnessPrimitive.isOrphan(harnessName))) { + setFlow({ name: 'confirm-orphan-harness', harnessName }); + return; + } const result = await loadHarnessPreview(harnessName); if (result.ok) { if (force) { @@ -419,6 +436,16 @@ export function RemoveFlow({ [loadHarnessPreview, force, removeHarnessOp] ); + const handleConfirmOrphanHarness = useCallback(async (harnessName: string, orphanAction: OrphanAction) => { + setFlow({ name: 'loading', message: `Deleting orphan harness ${harnessName} from AWS...` }); + const result = await harnessPrimitive!.remove(harnessName, { orphanAction }); + if (result.success) { + setFlow({ name: 'harness-success', harnessName }); + } else { + setFlow({ name: 'error', message: result.error.message }); + } + }, []); + const handleSelectGateway = useCallback( async (gatewayName: string) => { const result = await loadGatewayPreview(gatewayName); @@ -551,6 +578,28 @@ export function RemoveFlow({ [loadDatasetPreview, force, removeDatasetOp] ); + const handleSelectKnowledgeBase = useCallback( + async (knowledgeBaseName: string) => { + const result = await loadKnowledgeBasePreview(knowledgeBaseName); + if (result.ok) { + if (force) { + setFlow({ name: 'loading', message: `Removing knowledge base ${knowledgeBaseName}...` }); + const removeResult = await removeKnowledgeBaseOp(knowledgeBaseName, result.preview); + if (removeResult.success) { + setFlow({ name: 'knowledge-base-success', knowledgeBaseName }); + } else { + setFlow({ name: 'error', message: removeResult.error.message }); + } + } else { + setFlow({ name: 'confirm-knowledge-base', knowledgeBaseName, preview: result.preview }); + } + } else { + setFlow({ name: 'error', message: result.error }); + } + }, + [loadKnowledgeBasePreview, force, removeKnowledgeBaseOp] + ); + const handleSelectOnlineEval = useCallback( async (configName: string) => { const result = await loadOnlineEvalPreview(configName); @@ -642,28 +691,6 @@ export function RemoveFlow({ [loadConfigBundlePreview, force, removeConfigBundleOp] ); - const handleSelectABTest = useCallback( - async (testName: string) => { - const result = await loadABTestPreview(testName); - if (result.ok) { - if (force) { - setFlow({ name: 'loading', message: `Removing AB test ${testName}...` }); - const removeResult = await removeABTestOp(testName, result.preview); - if (removeResult.success) { - setFlow({ name: 'ab-test-success', testName }); - } else { - setFlow({ name: 'error', message: removeResult.error.message }); - } - } else { - setFlow({ name: 'confirm-ab-test', testName, preview: result.preview }); - } - } else { - setFlow({ name: 'error', message: result.error }); - } - }, - [loadABTestPreview, force, removeABTestOp] - ); - const handleSelectRuntimeEndpoint = useCallback( async (endpointName: string) => { const result = await loadRuntimeEndpointPreview(endpointName); @@ -738,6 +765,9 @@ export function RemoveFlow({ case 'online-eval': void handleSelectOnlineEval(initialResourceName); break; + case 'online-insights': + void handleSelectOnlineEval(initialResourceName); + break; case 'policy-engine': void handleSelectPolicyEngine(initialResourceName); break; @@ -747,15 +777,15 @@ export function RemoveFlow({ case 'config-bundle': void handleSelectConfigBundle(initialResourceName); break; - case 'ab-test': - void handleSelectABTest(initialResourceName); - break; case 'runtime-endpoint': void handleSelectRuntimeEndpoint(initialResourceName); break; case 'dataset': void handleSelectDataset(initialResourceName); break; + case 'knowledge-base': + void handleSelectKnowledgeBase(initialResourceName); + break; case 'payment': case 'payment-manager': void handleSelectPaymentManager(initialResourceName); @@ -772,11 +802,11 @@ export function RemoveFlow({ handleSelectIdentity, handleSelectEvaluator, handleSelectDataset, + handleSelectKnowledgeBase, handleSelectOnlineEval, handleSelectPolicyEngine, handleSelectPolicy, handleSelectConfigBundle, - handleSelectABTest, handleSelectRuntimeEndpoint, handleSelectPaymentManager, ]); @@ -910,6 +940,26 @@ export function RemoveFlow({ [removeDatasetOp] ); + const handleConfirmKnowledgeBase = useCallback( + async (knowledgeBaseName: string, preview: RemovalPreview) => { + pendingResultRef.current = null; + setResultReady(false); + setFlow({ name: 'loading', message: `Removing knowledge base ${knowledgeBaseName}...` }); + const result = await removeKnowledgeBaseOp(knowledgeBaseName, preview); + if (result.success) { + pendingResultRef.current = { + name: 'knowledge-base-success', + knowledgeBaseName, + logFilePath: result.logFilePath, + }; + } else { + pendingResultRef.current = { name: 'error', message: result.error.message }; + } + setResultReady(true); + }, + [removeKnowledgeBaseOp] + ); + const handleConfirmOnlineEval = useCallback( async (configName: string, preview: RemovalPreview) => { pendingResultRef.current = null; @@ -974,22 +1024,6 @@ export function RemoveFlow({ [removeConfigBundleOp] ); - const handleConfirmABTest = useCallback( - async (testName: string, preview: RemovalPreview) => { - pendingResultRef.current = null; - setResultReady(false); - setFlow({ name: 'loading', message: `Removing AB test ${testName}...` }); - const result = await removeABTestOp(testName, preview); - if (result.success) { - pendingResultRef.current = { name: 'ab-test-success', testName, logFilePath: result.logFilePath }; - } else { - pendingResultRef.current = { name: 'error', message: result.error.message }; - } - setResultReady(true); - }, - [removeABTestOp] - ); - const handleConfirmRuntimeEndpoint = useCallback( async (endpointName: string, preview: RemovalPreview) => { pendingResultRef.current = null; @@ -1016,11 +1050,11 @@ export function RemoveFlow({ resetRemoveIdentity(); resetRemoveEvaluator(); resetRemoveDataset(); + resetRemoveKnowledgeBase(); resetRemoveOnlineEval(); resetRemovePolicyEngine(); resetRemovePolicy(); resetRemoveConfigBundle(); - resetRemoveABTest(); resetRemoveRuntimeEndpoint(); }, [ resetPreview, @@ -1032,11 +1066,11 @@ export function RemoveFlow({ resetRemoveIdentity, resetRemoveEvaluator, resetRemoveDataset, + resetRemoveKnowledgeBase, resetRemoveOnlineEval, resetRemovePolicyEngine, resetRemovePolicy, resetRemoveConfigBundle, - resetRemoveABTest, resetRemoveRuntimeEndpoint, ]); @@ -1050,6 +1084,7 @@ export function RemoveFlow({ refreshIdentities(), refreshEvaluators(), refreshDatasets(), + refreshKnowledgeBases(), refreshOnlineEvals(), refreshPolicyEngines(), refreshPolicies(), @@ -1066,6 +1101,7 @@ export function RemoveFlow({ refreshIdentities, refreshEvaluators, refreshDatasets, + refreshKnowledgeBases, refreshOnlineEvals, refreshPolicyEngines, refreshPolicies, @@ -1094,9 +1130,9 @@ export function RemoveFlow({ policyEngineCount={policyEngines.length} policyCount={policies.length} configBundleCount={configBundles.length} - abTestCount={abTests.length} runtimeEndpointCount={runtimeEndpoints.length} datasetCount={datasets.length} + knowledgeBaseCount={knowledgeBases.length} paymentCount={paymentManagers.length} /> ); @@ -1219,6 +1255,19 @@ export function RemoveFlow({ ); } + if (flow.name === 'select-knowledge-base') { + if (initialResourceName && isLoading) { + return null; + } + return ( + void handleSelectKnowledgeBase(name)} + onExit={() => setFlow({ name: 'select' })} + /> + ); + } + if (flow.name === 'select-online-eval') { if (initialResourceName && isLoading) { return null; @@ -1271,19 +1320,6 @@ export function RemoveFlow({ ); } - if (flow.name === 'select-ab-test') { - if (initialResourceName && isLoading) { - return null; - } - return ( - void handleSelectABTest(name)} - onExit={() => setFlow({ name: 'select' })} - /> - ); - } - if (flow.name === 'select-runtime-endpoint') { if (initialResourceName && isLoading) { return null; @@ -1342,6 +1378,42 @@ export function RemoveFlow({ ); } + if (flow.name === 'confirm-orphan-harness') { + const orphanName = flow.harnessName; + return ( + + title={`Remove Harness: ${orphanName}`} + color="yellow" + headerContent={ + + {`"${orphanName}" was created by the preview build and is not managed by CloudFormation, so CloudFormation cannot delete it. This will delete it directly from your AWS account.`} + + } + items={[ + { + id: 'keep', + title: 'Delete it and keep it in agentcore.json', + description: 'Moves to GA — the next `agentcore deploy` recreates it under CloudFormation.', + }, + { + id: 'discard', + title: 'Delete it and remove it from agentcore.json', + description: 'You no longer want this harness.', + }, + { id: 'cancel', title: 'Cancel', description: 'Leave the harness untouched.', spaceBefore: true }, + ]} + onSelect={item => { + if (item.id === 'cancel') { + setFlow({ name: 'select-harness' }); + } else { + void handleConfirmOrphanHarness(orphanName, item.id as OrphanAction); + } + }} + onExit={() => setFlow({ name: 'select-harness' })} + /> + ); + } + if (flow.name === 'confirm-gateway') { return ( void handleConfirmKnowledgeBase(flow.knowledgeBaseName, flow.preview)} + onCancel={() => setFlow({ name: 'select-knowledge-base' })} + /> + ); + } + if (flow.name === 'confirm-online-eval') { return ( void handleConfirmABTest(flow.testName, flow.preview)} - onCancel={() => setFlow({ name: 'select-ab-test' })} - /> - ); - } - if (flow.name === 'confirm-runtime-endpoint') { return ( { resetAll(); @@ -1640,12 +1712,12 @@ export function RemoveFlow({ ); } - if (flow.name === 'policy-engine-success') { + if (flow.name === 'online-eval-success') { return ( { resetAll(); @@ -1656,12 +1728,12 @@ export function RemoveFlow({ ); } - if (flow.name === 'policy-success') { + if (flow.name === 'policy-engine-success') { return ( { resetAll(); @@ -1672,12 +1744,12 @@ export function RemoveFlow({ ); } - if (flow.name === 'config-bundle-success') { + if (flow.name === 'policy-success') { return ( { resetAll(); @@ -1688,12 +1760,12 @@ export function RemoveFlow({ ); } - if (flow.name === 'ab-test-success') { + if (flow.name === 'config-bundle-success') { return ( { resetAll(); diff --git a/src/cli/tui/screens/remove/RemoveKnowledgeBaseScreen.tsx b/src/cli/tui/screens/remove/RemoveKnowledgeBaseScreen.tsx new file mode 100644 index 000000000..b6ac035a8 --- /dev/null +++ b/src/cli/tui/screens/remove/RemoveKnowledgeBaseScreen.tsx @@ -0,0 +1,26 @@ +import type { RemovableKnowledgeBase } from '../../../primitives/KnowledgeBasePrimitive'; +import { SelectScreen } from '../../components'; +import React from 'react'; + +interface RemoveKnowledgeBaseScreenProps { + knowledgeBases: RemovableKnowledgeBase[]; + onSelect: (knowledgeBaseName: string) => void; + onExit: () => void; +} + +export function RemoveKnowledgeBaseScreen({ knowledgeBases, onSelect, onExit }: RemoveKnowledgeBaseScreenProps) { + const items = knowledgeBases.map(kb => ({ + id: kb.name, + title: kb.name, + description: 'Knowledge Base', + })); + + return ( + onSelect(item.id)} + onExit={onExit} + /> + ); +} diff --git a/src/cli/tui/screens/remove/RemoveScreen.tsx b/src/cli/tui/screens/remove/RemoveScreen.tsx index 2f54c6010..16acf4a2d 100644 --- a/src/cli/tui/screens/remove/RemoveScreen.tsx +++ b/src/cli/tui/screens/remove/RemoveScreen.tsx @@ -1,4 +1,4 @@ -import { isPreviewEnabled } from '../../../feature-flags'; +import { isGatedFeaturesEnabled, isPreviewEnabled } from '../../../feature-flags'; import type { SelectableItem } from '../../components'; import { SelectScreen } from '../../components'; import { useMemo } from 'react'; @@ -10,12 +10,13 @@ export type RemoveResourceType = | 'credential' | 'evaluator' | 'online-eval' + | 'online-insights' | 'policy-engine' | 'policy' | 'gateway' | 'gateway-target' + | 'knowledge-base' | 'config-bundle' - | 'ab-test' | 'runtime-endpoint' | 'dataset' | 'payment' @@ -32,11 +33,15 @@ const REMOVE_RESOURCES: { id: RemoveResourceType; title: string; description: st { id: 'online-eval', title: 'Online Eval Config', description: 'Remove an online eval config' }, { id: 'policy-engine', title: 'Policy Engine', description: 'Remove a policy engine' }, { id: 'policy', title: 'Policy', description: 'Remove a policy from a policy engine' }, - { id: 'payment', title: 'Payment', description: 'Remove a payment manager' }, + { id: 'payment', title: 'Payment [preview]', description: 'Remove a payment manager' }, { id: 'gateway', title: 'Gateway', description: 'Remove a gateway' }, { id: 'gateway-target', title: 'Gateway Target', description: 'Remove a gateway target' }, - { id: 'config-bundle', title: 'Configuration Bundle [preview]', description: 'Remove a configuration bundle' }, - { id: 'ab-test', title: 'AB Test [preview]', description: 'Remove an A/B test' }, + { + id: 'knowledge-base', + title: 'Knowledge Base', + description: 'Remove a knowledge base (cascade-prunes connector gateway targets)', + }, + { id: 'config-bundle', title: 'Configuration Bundle', description: 'Remove a configuration bundle' }, { id: 'runtime-endpoint', title: 'Runtime Endpoint', description: 'Remove a runtime endpoint' }, { id: 'dataset', title: 'Dataset', description: 'Remove a dataset' }, { id: 'all', title: 'All', description: 'Reset entire agentcore project' }, @@ -67,12 +72,12 @@ interface RemoveScreenProps { policyCount: number; /** Number of configuration bundles available for removal */ configBundleCount: number; - /** Number of AB tests available for removal */ - abTestCount: number; /** Number of runtime endpoints available for removal */ runtimeEndpointCount: number; /** Number of datasets available for removal */ datasetCount: number; + /** Number of knowledge bases available for removal */ + knowledgeBaseCount: number; /** Number of payment managers available for removal */ paymentCount: number; } @@ -91,9 +96,9 @@ export function RemoveScreen({ policyEngineCount, policyCount, configBundleCount, - abTestCount, runtimeEndpointCount, datasetCount, + knowledgeBaseCount, paymentCount, }: RemoveScreenProps) { const items: SelectableItem[] = useMemo(() => { @@ -168,12 +173,6 @@ export function RemoveScreen({ description = 'No configuration bundles to remove'; } break; - case 'ab-test': - if (abTestCount === 0) { - disabled = true; - description = 'No AB tests to remove'; - } - break; case 'runtime-endpoint': if (runtimeEndpointCount === 0) { disabled = true; @@ -186,6 +185,15 @@ export function RemoveScreen({ description = 'No datasets to remove'; } break; + case 'knowledge-base': + if (!isGatedFeaturesEnabled()) { + disabled = true; + description = 'Coming soon'; + } else if (knowledgeBaseCount === 0) { + disabled = true; + description = 'No knowledge bases to remove'; + } + break; case 'payment': if (paymentCount === 0) { disabled = true; @@ -211,9 +219,9 @@ export function RemoveScreen({ policyEngineCount, policyCount, configBundleCount, - abTestCount, runtimeEndpointCount, datasetCount, + knowledgeBaseCount, paymentCount, ]); diff --git a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx index 345062af3..f1227bccd 100644 --- a/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx +++ b/src/cli/tui/screens/remove/__tests__/RemoveScreen.test.tsx @@ -1,9 +1,17 @@ import { RemoveScreen } from '../RemoveScreen.js'; import { render } from 'ink-testing-library'; import React from 'react'; -import { describe, expect, it, vi } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; describe('RemoveScreen', () => { + const originalGate = process.env.ENABLE_GATED_FEATURES; + beforeEach(() => { + process.env.ENABLE_GATED_FEATURES = '1'; + }); + afterEach(() => { + if (originalGate === undefined) delete process.env.ENABLE_GATED_FEATURES; + else process.env.ENABLE_GATED_FEATURES = originalGate; + }); it('gateway and gateway-target options enabled when counts > 0', () => { const onSelect = vi.fn(); const onExit = vi.fn(); @@ -23,9 +31,9 @@ describe('RemoveScreen', () => { policyEngineCount={1} policyCount={1} configBundleCount={1} - abTestCount={0} runtimeEndpointCount={1} datasetCount={0} + knowledgeBaseCount={0} paymentCount={1} /> ); @@ -59,9 +67,9 @@ describe('RemoveScreen', () => { policyEngineCount={0} policyCount={0} configBundleCount={0} - abTestCount={0} runtimeEndpointCount={0} datasetCount={0} + knowledgeBaseCount={0} paymentCount={0} /> ); @@ -72,7 +80,37 @@ describe('RemoveScreen', () => { expect(lastFrame()).toContain('No policies to remove'); }); - it('AB test option enabled when abTestCount > 0', () => { + it('Knowledge Base option enabled when knowledgeBaseCount > 0', () => { + const onSelect = vi.fn(); + const onExit = vi.fn(); + + const { lastFrame } = render( + + ); + + expect(lastFrame()).toContain('Knowledge Base'); + expect(lastFrame()).not.toContain('No knowledge bases to remove'); + }); + + it('Knowledge Base option disabled when knowledgeBaseCount = 0', () => { const onSelect = vi.fn(); const onExit = vi.fn(); @@ -91,18 +129,18 @@ describe('RemoveScreen', () => { policyEngineCount={0} policyCount={0} configBundleCount={0} - abTestCount={2} runtimeEndpointCount={0} datasetCount={0} + knowledgeBaseCount={0} paymentCount={0} /> ); - expect(lastFrame()).toContain('AB Test'); - expect(lastFrame()).not.toContain('No AB tests to remove'); + expect(lastFrame()).toContain('No knowledge bases to remove'); }); - it('AB test option disabled when abTestCount = 0', () => { + it('Knowledge Base option shows Coming soon when ENABLE_GATED_FEATURES is unset', () => { + delete process.env.ENABLE_GATED_FEATURES; const onSelect = vi.fn(); const onExit = vi.fn(); @@ -121,13 +159,15 @@ describe('RemoveScreen', () => { policyEngineCount={0} policyCount={0} configBundleCount={0} - abTestCount={0} runtimeEndpointCount={0} datasetCount={0} + knowledgeBaseCount={3} paymentCount={0} /> ); - expect(lastFrame()).toContain('No AB tests to remove'); + expect(lastFrame()).toContain('Knowledge Base'); + expect(lastFrame()).toContain('Coming soon'); + expect(lastFrame()).not.toContain('No knowledge bases to remove'); }); }); diff --git a/src/cli/tui/screens/remove/index.ts b/src/cli/tui/screens/remove/index.ts index 8d77b9b10..7a2105002 100644 --- a/src/cli/tui/screens/remove/index.ts +++ b/src/cli/tui/screens/remove/index.ts @@ -6,6 +6,7 @@ export { RemoveFlow } from './RemoveFlow'; export { RemoveGatewayScreen } from './RemoveGatewayScreen'; export { RemoveIdentityScreen } from './RemoveIdentityScreen'; export { RemoveGatewayTargetScreen } from './RemoveGatewayTargetScreen'; +export { RemoveKnowledgeBaseScreen } from './RemoveKnowledgeBaseScreen'; export { RemoveMemoryScreen } from './RemoveMemoryScreen'; export { RemoveOnlineEvalScreen } from './RemoveOnlineEvalScreen'; export { RemovePolicyEngineScreen } from './RemovePolicyEngineScreen'; diff --git a/src/cli/tui/screens/run-ab-test/ABTestJobsHistoryScreen.tsx b/src/cli/tui/screens/run-ab-test/ABTestJobsHistoryScreen.tsx new file mode 100644 index 000000000..9b789d1cf --- /dev/null +++ b/src/cli/tui/screens/run-ab-test/ABTestJobsHistoryScreen.tsx @@ -0,0 +1,196 @@ +import { ConfigIO } from '../../../../lib'; +import { validateAwsCredentials } from '../../../aws/account'; +import { getErrorMessage } from '../../../errors'; +import { createJobEngine } from '../../../operations/jobs'; +import type { ABTestJobRecord } from '../../../operations/jobs'; +import { ErrorPrompt, Panel, Screen } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { ABTestDetailView, lifecycleColor, statusColor } from '../job-detail'; +import { Box, Text, useStdout } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; + +const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; + +function formatShortDate(timestamp: string): string { + const d = new Date(timestamp); + const mon = MONTHS[d.getMonth()]; + const day = d.getDate(); + const h = d.getHours(); + const m = d.getMinutes().toString().padStart(2, '0'); + const ampm = h >= 12 ? 'PM' : 'AM'; + const h12 = h % 12 || 12; + return `${mon} ${day} ${h12}:${m} ${ampm}`; +} + +const CHROME_LINES = 9; + +// ───────────────────────────────────────────────────────────────────────────── +// List view +// ───────────────────────────────────────────────────────────────────────────── + +function ABTestListView({ + records, + onSelect, + onExit, + availableHeight, +}: { + records: ABTestJobRecord[]; + onSelect: (record: ABTestJobRecord) => void; + onExit: () => void; + availableHeight: number; +}) { + const nav = useListNavigation({ + items: records, + onSelect: item => onSelect(item), + onExit, + isActive: true, + }); + + const maxVisible = Math.max(1, availableHeight - 3); + const visible = useMemo(() => { + let start = 0; + if (nav.selectedIndex >= maxVisible) { + start = nav.selectedIndex - maxVisible + 1; + } + return { items: records.slice(start, start + maxVisible), startIdx: start }; + }, [records, nav.selectedIndex, maxVisible]); + + return ( + + + A/B Test Jobs + + {records.length} A/B test{records.length !== 1 ? 's' : ''} + + + {visible.items.map((rec, vIdx) => { + const idx = visible.startIdx + vIdx; + const selected = idx === nav.selectedIndex; + const date = rec.createdAt ? formatShortDate(rec.createdAt) : 'unknown'; + return ( + + {selected ? '❯' : ' '} + {date.padEnd(16)} + {rec.status.padEnd(10)} + {rec.lifecycleStatus.padEnd(10)} + {rec.name} + + ); + })} + {visible.startIdx + maxVisible < records.length && ( + ↓ {records.length - visible.startIdx - maxVisible} more + )} + + + + ); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main screen +// ───────────────────────────────────────────────────────────────────────────── + +type FlowState = + | { name: 'loading' } + | { name: 'creds-error'; message: string } + | { name: 'error'; message: string } + | { name: 'loaded'; records: ABTestJobRecord[] }; + +interface ABTestJobsHistoryScreenProps { + onExit: () => void; +} + +export function ABTestJobsHistoryScreen({ onExit }: ABTestJobsHistoryScreenProps) { + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); + const { stdout } = useStdout(); + const terminalHeight = stdout?.rows ?? 24; + const availableHeight = Math.max(6, terminalHeight - CHROME_LINES); + + const [flow, setFlow] = useState({ name: 'loading' }); + const [selectedRecord, setSelectedRecord] = useState(null); + + useEffect(() => { + let cancelled = false; + + void (async () => { + try { + await validateAwsCredentials(); + } catch (err) { + if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) }); + return; + } + + try { + const records = await engine.list({ type: 'ab-test' }); + if (!cancelled) setFlow({ name: 'loaded', records }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [engine]); + + const handleUpdate = useCallback((updated: ABTestJobRecord) => { + setSelectedRecord(updated); + setFlow(prev => + prev.name === 'loaded' ? { ...prev, records: prev.records.map(r => (r.id === updated.id ? updated : r)) } : prev + ); + }, []); + + if (flow.name === 'loading') { + return ( + + Loading A/B test jobs... + + ); + } + + if (flow.name === 'creds-error') { + return ; + } + + if (flow.name === 'error') { + return ( + + {flow.message} + + ); + } + + if (flow.records.length === 0) { + return ( + + + No A/B test jobs found. + Run an A/B test from the TUI or CLI to see results here. + + + ); + } + + const helpText = selectedRecord ? 'Esc/B back to list' : HELP_TEXT.NAVIGATE_SELECT; + + return ( + + {selectedRecord ? ( + setSelectedRecord(null)} + onUpdate={handleUpdate} + /> + ) : ( + + )} + + ); +} diff --git a/src/cli/tui/screens/run-ab-test/RunABTestFlow.tsx b/src/cli/tui/screens/run-ab-test/RunABTestFlow.tsx new file mode 100644 index 000000000..5347f3de2 --- /dev/null +++ b/src/cli/tui/screens/run-ab-test/RunABTestFlow.tsx @@ -0,0 +1,805 @@ +import { ConfigIO } from '../../../../lib'; +import { validateAwsCredentials } from '../../../aws/account'; +import { getErrorMessage } from '../../../errors'; +import { createJobEngine } from '../../../operations/jobs'; +import type { ABTestJobRecord, ABTestMode, StartABTestJobOptions } from '../../../operations/jobs'; +import { + ConfirmReview, + ErrorPrompt, + GradientText, + Panel, + Screen, + StepIndicator, + TextInput, + WizardSelect, +} from '../../components'; +import type { SelectableItem } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import type { ABTestResources, RunABTestConfig, RunABTestStep } from './types'; +import { Box, Text } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; + +// ============================================================================ +// Resource loading +// ============================================================================ + +const CONFIG_BUNDLE_STEPS: RunABTestStep[] = [ + 'mode', + 'gateway', + 'control', + 'treatment', + 'onlineEval', + 'filter', + 'name', + 'confirm', +]; + +const TARGET_BASED_STEPS: RunABTestStep[] = ['mode', 'gateway', 'control', 'treatment', 'filter', 'name', 'confirm']; + +const STEP_LABELS: Record = { + mode: 'Mode', + gateway: 'Gateway', + control: 'Control', + treatment: 'Treatment', + onlineEval: 'Online Eval', + filter: 'Filter', + name: 'Name', + confirm: 'Confirm', +}; + +async function loadResources(): Promise<{ resources: ABTestResources; region: string }> { + const configIO = new ConfigIO(); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + + const bundles: { name: string; bundleId: string }[] = []; + const gateways = new Set(); + const targets = new Set(); + const onlineEvalConfigs = new Set(); + + for (const target of Object.values(deployedState.targets ?? {})) { + const resources = target.resources; + if (!resources) continue; + for (const [name, state] of Object.entries(resources.configBundles ?? {})) { + bundles.push({ name, bundleId: state.bundleId }); + } + for (const name of Object.keys(resources.mcp?.gateways ?? {})) gateways.add(name); + for (const name of Object.keys(resources.gateways ?? {})) gateways.add(name); + for (const name of Object.keys(resources.onlineEvalConfigs ?? {})) onlineEvalConfigs.add(name); + } + + // Gateway-target names come from project spec (deployed as `${project}-${target}`). + for (const gw of projectSpec.agentCoreGateways ?? []) { + for (const t of gw.targets ?? []) { + if (t.targetType === 'httpRuntime') targets.add(t.name); + } + } + + const runtimes = (projectSpec.runtimes ?? []).map(r => r.name); + const region = awsTargets[0]?.region ?? process.env.AWS_DEFAULT_REGION ?? process.env.AWS_REGION ?? 'us-east-1'; + + return { + resources: { + gateways: [...gateways], + bundles, + targets: [...targets], + runtimes, + onlineEvalConfigs: [...onlineEvalConfigs], + }, + region, + }; +} + +// ============================================================================ +// Flow Component +// ============================================================================ + +type FlowState = + | { name: 'loading' } + | { name: 'wizard'; resources: ABTestResources; region: string } + | { name: 'starting'; config: RunABTestConfig } + | { name: 'started'; record: ABTestJobRecord; config: RunABTestConfig } + | { name: 'creds-error'; message: string } + | { name: 'error'; message: string }; + +interface RunABTestFlowProps { + onExit: () => void; + /** Navigate to the A/B Test Jobs screen (falls back to onExit when not provided). */ + onViewJobs?: () => void; +} + +export function RunABTestFlow({ onExit, onViewJobs }: RunABTestFlowProps) { + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); + const [flow, setFlow] = useState({ name: 'loading' }); + + useEffect(() => { + if (flow.name !== 'loading') return; + let cancelled = false; + + void (async () => { + try { + await validateAwsCredentials(); + } catch (err) { + if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) }); + return; + } + + try { + const { resources, region } = await loadResources(); + if (cancelled) return; + if (resources.gateways.length === 0) { + setFlow({ + name: 'error', + message: 'No deployed gateway found. Run `agentcore add gateway` and `agentcore deploy` first.', + }); + return; + } + setFlow({ name: 'wizard', resources, region }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); + + // Fire-and-forget: start the A/B test job, then show the Started confirmation screen. + useEffect(() => { + if (flow.name !== 'starting') return; + let cancelled = false; + const { config } = flow; + + void (async () => { + try { + const opts: StartABTestJobOptions = { + name: config.name, + mode: config.mode, + gateway: config.gateway, + agent: config.runtime || undefined, + runtime: config.runtime || undefined, + controlBundle: config.mode === 'config-bundle' ? config.controlBundle : undefined, + controlVersion: config.mode === 'config-bundle' ? config.controlVersion : undefined, + treatmentBundle: config.mode === 'config-bundle' ? config.treatmentBundle : undefined, + treatmentVersion: config.mode === 'config-bundle' ? config.treatmentVersion : undefined, + controlTarget: config.mode === 'target-based' ? config.controlTarget : undefined, + treatmentTarget: config.mode === 'target-based' ? config.treatmentTarget : undefined, + onlineEval: config.mode === 'config-bundle' ? config.onlineEval : undefined, + controlOnlineEval: config.mode === 'target-based' ? config.controlOnlineEval : undefined, + treatmentOnlineEval: config.mode === 'target-based' ? config.treatmentOnlineEval : undefined, + gatewayFilter: config.gatewayFilter.trim() || undefined, + controlWeight: config.controlWeight, + treatmentWeight: config.treatmentWeight, + enableOnCreate: true, + }; + const result = await engine.start('ab-test', opts); + if (cancelled) return; + if (!result.success) { + setFlow({ name: 'error', message: result.error.message }); + return; + } + setFlow({ name: 'started', record: result.record, config }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps + + if (flow.name === 'loading') { + return ( + + + + ); + } + + if (flow.name === 'creds-error') { + return ; + } + + if (flow.name === 'wizard') { + return ( + setFlow({ name: 'starting', config })} + onExit={onExit} + /> + ); + } + + if (flow.name === 'starting') { + return ( + + + + ); + } + + if (flow.name === 'started') { + return ( + setFlow({ name: 'loading' })} + onViewJobs={onViewJobs} + onExit={onExit} + /> + ); + } + + return ( + setFlow({ name: 'loading' })} + onExit={onExit} + /> + ); +} + +// ============================================================================ +// Started confirmation view +// ============================================================================ + +interface StartedViewProps { + record: ABTestJobRecord; + config: RunABTestConfig; + onRunAnother: () => void; + onViewJobs?: () => void; + onExit: () => void; +} + +function StartedView({ record, config, onRunAnother, onViewJobs, onExit }: StartedViewProps) { + const actions = [ + { id: 'jobs', title: 'View jobs' }, + { id: 'another', title: 'Run another' }, + { id: 'back', title: 'Back' }, + ]; + + const nav = useListNavigation({ + items: actions, + onSelect: item => { + if (item.id === 'jobs') (onViewJobs ?? onExit)(); + else if (item.id === 'another') onRunAnother(); + else onExit(); + }, + onExit, + isActive: true, + }); + + return ( + + + + + ✓ {record.id} ({record.status}) + + + Name: {config.name} + {' '} + Mode: {config.mode} + {' '} + Gateway: {config.gateway} + + + + Track its progress and results in A/B Test Jobs. + + + + {actions.map((action, idx) => { + const selected = idx === nav.selectedIndex; + return ( + + {selected ? '❯' : ' '} + + {action.title} + + + ); + })} + + + + + ); +} + +// ============================================================================ +// Wizard Component +// ============================================================================ + +interface RunABTestWizardProps { + resources: ABTestResources; + region: string; + onComplete: (config: RunABTestConfig) => void; + onExit: () => void; +} + +function RunABTestWizard({ resources, region, onComplete, onExit }: RunABTestWizardProps) { + const [config, setConfig] = useState({ + mode: 'config-bundle', + name: '', + gateway: resources.gateways[0] ?? '', + controlBundle: '', + controlVersion: 'LATEST', + treatmentBundle: '', + treatmentVersion: 'LATEST', + controlTarget: '', + treatmentTarget: '', + runtime: resources.runtimes[0] ?? '', + controlWeight: 50, + treatmentWeight: 50, + onlineEval: '', + controlOnlineEval: '', + treatmentOnlineEval: '', + gatewayFilter: '', + }); + + const steps = config.mode === 'target-based' ? TARGET_BASED_STEPS : CONFIG_BUNDLE_STEPS; + const [step, setStep] = useState('mode'); + const currentIndex = steps.indexOf(step); + + // Live draft of the optional filter input, so the footer hint can flip submit/skip. + const [filterDraft, setFilterDraft] = useState(config.gatewayFilter); + + const goBack = useCallback(() => { + const prev = steps[currentIndex - 1]; + if (prev) setStep(prev); + else onExit(); + }, [steps, currentIndex, onExit]); + + const goNext = useCallback(() => { + const next = steps[currentIndex + 1]; + if (next) setStep(next); + }, [steps, currentIndex]); + + // ── step item lists ── + const modeItems: SelectableItem[] = useMemo( + () => [ + { id: 'config-bundle', title: 'Config bundle', description: 'Compare two configuration bundle versions' }, + { id: 'target-based', title: 'Target based', description: 'Compare two gateway-target runtime endpoints' }, + ], + [] + ); + const gatewayItems: SelectableItem[] = useMemo( + () => resources.gateways.map(g => ({ id: g, title: g })), + [resources.gateways] + ); + const onlineEvalItems: SelectableItem[] = useMemo( + () => resources.onlineEvalConfigs.map(c => ({ id: c, title: c })), + [resources.onlineEvalConfigs] + ); + + const isStep = (s: RunABTestStep) => step === s; + + const modeNav = useListNavigation({ + items: modeItems, + onSelect: item => { + setConfig(c => ({ ...c, mode: item.id as ABTestMode })); + setStep('gateway'); + }, + onExit, + isActive: isStep('mode'), + }); + + const gatewayNav = useListNavigation({ + items: gatewayItems, + onSelect: item => { + setConfig(c => ({ ...c, gateway: item.id })); + goNext(); + }, + onExit: goBack, + isActive: isStep('gateway'), + }); + + const onlineEvalNav = useListNavigation({ + items: onlineEvalItems, + onSelect: item => { + setConfig(c => ({ ...c, onlineEval: item.id })); + goNext(); + }, + onExit: goBack, + isActive: isStep('onlineEval'), + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete(config), + onExit: goBack, + isActive: isStep('confirm'), + }); + + const helpText = isStep('filter') + ? filterDraft.trim() + ? 'Enter submit · Esc back' + : 'Enter to skip · Esc back' + : isStep('control') || isStep('treatment') || isStep('name') + ? HELP_TEXT.TEXT_INPUT + : HELP_TEXT.NAVIGATE_SELECT; + const headerContent = ; + + return ( + + + {isStep('mode') && ( + + )} + + {isStep('gateway') && ( + + )} + + {isStep('control') && ( + ({ id: b.name, title: b.name }))} + targetItems={resources.targets.map(t => ({ id: t, title: t }))} + evalItems={onlineEvalItems} + initialBundle={config.controlBundle} + initialVersion={config.controlVersion} + initialTarget={config.controlTarget} + initialEval={config.controlOnlineEval} + initialWeight={config.controlWeight} + onPartialUpdate={(bundle, version, target, evalCfg) => { + setConfig(c => ({ + ...c, + controlBundle: bundle, + controlVersion: version, + controlTarget: target, + controlOnlineEval: evalCfg, + })); + }} + onComplete={(bundle, version, target, evalCfg, weight) => { + setConfig(c => ({ + ...c, + controlBundle: bundle, + controlVersion: version, + controlTarget: target, + controlOnlineEval: evalCfg, + controlWeight: weight, + treatmentWeight: 100 - weight, + })); + goNext(); + }} + onCancel={goBack} + /> + )} + + {isStep('treatment') && ( + ({ id: b.name, title: b.name }))} + targetItems={resources.targets.map(t => ({ id: t, title: t }))} + evalItems={onlineEvalItems} + initialBundle={config.treatmentBundle} + initialVersion={config.treatmentVersion} + initialTarget={config.treatmentTarget} + initialEval={config.treatmentOnlineEval} + initialWeight={config.treatmentWeight} + onPartialUpdate={(bundle, version, target, evalCfg) => { + setConfig(c => ({ + ...c, + treatmentBundle: bundle, + treatmentVersion: version, + treatmentTarget: target, + treatmentOnlineEval: evalCfg, + })); + }} + onComplete={(bundle, version, target, evalCfg, weight) => { + setConfig(c => ({ + ...c, + treatmentBundle: bundle, + treatmentVersion: version, + treatmentTarget: target, + treatmentOnlineEval: evalCfg, + treatmentWeight: weight, + controlWeight: 100 - weight, + })); + goNext(); + }} + onCancel={goBack} + /> + )} + + {isStep('onlineEval') && ( + + )} + + {isStep('filter') && ( + + Gateway filter (optional) + Restrict the test to one gateway target path pattern (e.g. "/orders/*"). + Leave blank to have no gateway filter. + + setFilterDraft(value)} + onSubmit={value => { + setConfig(c => ({ ...c, gatewayFilter: value.trim() })); + goNext(); + }} + onCancel={goBack} + /> + + + )} + + {isStep('name') && ( + + A short name for this A/B test. + { + if (value.trim()) { + setConfig(c => ({ ...c, name: value.trim() })); + goNext(); + } + }} + onCancel={goBack} + /> + + )} + + {isStep('confirm') && ( + + )} + + + ); +} + +// ============================================================================ +// Variant Form — one screen per variant with all its fields +// ============================================================================ + +type VariantSubField = 'picker' | 'version' | 'eval' | 'weight'; + +interface VariantFormProps { + variant: 'Control' | 'Treatment'; + mode: ABTestMode; + bundleItems: SelectableItem[]; + targetItems: SelectableItem[]; + evalItems: SelectableItem[]; + initialBundle: string; + initialVersion: string; + initialTarget: string; + initialEval: string; + initialWeight: number; + onPartialUpdate?: (bundle: string, version: string, target: string, evalCfg: string) => void; + onComplete: (bundle: string, version: string, target: string, evalCfg: string, weight: number) => void; + onCancel: () => void; +} + +function VariantForm({ + variant, + mode, + bundleItems, + targetItems, + evalItems, + initialBundle, + initialVersion, + initialTarget, + initialEval, + initialWeight, + onPartialUpdate, + onComplete, + onCancel, +}: VariantFormProps) { + const isConfigBundle = mode === 'config-bundle'; + const fields: VariantSubField[] = useMemo( + () => (isConfigBundle ? ['picker', 'version', 'weight'] : ['picker', 'eval', 'weight']), + [isConfigBundle] + ); + + const [activeField, setActiveField] = useState('picker'); + const [selectedPicker, setSelectedPicker] = useState(isConfigBundle ? initialBundle : initialTarget); + const [version, setVersion] = useState(initialVersion); + const [evalCfg, setEvalCfg] = useState(initialEval); + const [weight, setWeight] = useState(String(initialWeight)); + + const advanceField = useCallback(() => { + const idx = fields.indexOf(activeField); + const next = fields[idx + 1]; + if (next) { + // Save partial state to parent so going back preserves selections + onPartialUpdate?.(isConfigBundle ? selectedPicker : '', version, isConfigBundle ? '' : selectedPicker, evalCfg); + setActiveField(next); + } else { + const w = parseInt(weight, 10); + onComplete( + isConfigBundle ? selectedPicker : '', + version, + isConfigBundle ? '' : selectedPicker, + evalCfg, + isNaN(w) ? initialWeight : w + ); + } + }, [ + activeField, + fields, + weight, + selectedPicker, + version, + evalCfg, + isConfigBundle, + initialWeight, + onComplete, + onPartialUpdate, + ]); + + const goBackField = useCallback(() => { + const idx = fields.indexOf(activeField); + if (idx > 0) setActiveField(fields[idx - 1]!); + else onCancel(); + }, [activeField, fields, onCancel]); + + const pickerItems = isConfigBundle ? bundleItems : targetItems; + + const pickerNav = useListNavigation({ + items: pickerItems, + onSelect: item => { + setSelectedPicker(item.id); + advanceField(); + }, + onExit: goBackField, + isActive: activeField === 'picker', + }); + + const evalNav = useListNavigation({ + items: evalItems, + onSelect: item => { + setEvalCfg(item.id); + advanceField(); + }, + onExit: goBackField, + isActive: activeField === 'eval', + }); + + return ( + + {variant} Variant + {'─'.repeat(30)} + + {/* Summary of completed fields (shown above the active input) */} + {selectedPicker && activeField !== 'picker' && ( + + {isConfigBundle ? 'Bundle:' : 'Target:'} {selectedPicker} + + )} + {version && activeField === 'weight' && isConfigBundle && ( + + Version: {version} + + )} + {evalCfg && activeField === 'weight' && !isConfigBundle && ( + + Online Eval: {evalCfg} + + )} + + {/* Active field */} + + {activeField === 'picker' && ( + + )} + + {activeField === 'version' && ( + + Bundle version (or LATEST): + { + setVersion(value || 'LATEST'); + advanceField(); + }} + onCancel={goBackField} + /> + + )} + + {activeField === 'eval' && ( + + )} + + {activeField === 'weight' && ( + + Traffic weight (0-100): + { + const w = parseInt(value, 10); + if (!isNaN(w) && w >= 0 && w <= 100) { + setWeight(value); + advanceField(); + } + }} + onCancel={goBackField} + customValidation={value => { + const w = parseInt(value, 10); + if (isNaN(w)) return 'Must be a number'; + if (w < 0 || w > 100) return 'Must be between 0 and 100'; + return true; + }} + /> + + )} + + + ); +} diff --git a/src/cli/tui/screens/run-ab-test/index.ts b/src/cli/tui/screens/run-ab-test/index.ts new file mode 100644 index 000000000..0f655333b --- /dev/null +++ b/src/cli/tui/screens/run-ab-test/index.ts @@ -0,0 +1,2 @@ +export { RunABTestFlow } from './RunABTestFlow'; +export { ABTestJobsHistoryScreen } from './ABTestJobsHistoryScreen'; diff --git a/src/cli/tui/screens/run-ab-test/types.ts b/src/cli/tui/screens/run-ab-test/types.ts new file mode 100644 index 000000000..215ebd11d --- /dev/null +++ b/src/cli/tui/screens/run-ab-test/types.ts @@ -0,0 +1,37 @@ +import type { ABTestMode } from '../../../operations/jobs'; + +/** Wizard step ids for the run-as-job A/B test flow. */ +export type RunABTestStep = 'mode' | 'gateway' | 'control' | 'treatment' | 'onlineEval' | 'filter' | 'name' | 'confirm'; + +export interface RunABTestConfig { + mode: ABTestMode; + name: string; + gateway: string; + // config-bundle mode + controlBundle: string; + controlVersion: string; + treatmentBundle: string; + treatmentVersion: string; + // target-based mode + controlTarget: string; + treatmentTarget: string; + runtime: string; + // eval configs + onlineEval: string; + controlOnlineEval: string; + treatmentOnlineEval: string; + // shared + controlWeight: number; + treatmentWeight: number; + /** Single gateway target path pattern; blank means no gateway filter. Applies to both modes. */ + gatewayFilter: string; +} + +/** Deployed resource lists loaded once for the wizard's pickers. */ +export interface ABTestResources { + gateways: string[]; + bundles: { name: string; bundleId: string }[]; + targets: string[]; + runtimes: string[]; + onlineEvalConfigs: string[]; +} diff --git a/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx b/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx index 642759154..96ee676bd 100644 --- a/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx +++ b/src/cli/tui/screens/run-eval/BatchEvalHistoryScreen.tsx @@ -1,10 +1,14 @@ -import type { BatchEvalRunRecord } from '../../../operations/eval/batch-eval-storage'; -import { listBatchEvalRuns } from '../../../operations/eval/batch-eval-storage'; -import { Panel, Screen } from '../../components'; +import { ConfigIO } from '../../../../lib'; +import { validateAwsCredentials } from '../../../aws/account'; +import { getErrorMessage } from '../../../errors'; +import { createJobEngine } from '../../../operations/jobs'; +import type { BatchEvaluationJobRecord } from '../../../operations/jobs'; +import { ErrorPrompt, Panel, Screen } from '../../components'; import { HELP_TEXT } from '../../constants'; import { useListNavigation } from '../../hooks'; -import { Box, Text, useInput, useStdout } from 'ink'; -import React, { useMemo, useState } from 'react'; +import { BatchEvalDetailView, scoreColor, statusColor } from '../job-detail'; +import { Box, Text, useStdout } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; const MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']; @@ -19,19 +23,6 @@ function formatShortDate(timestamp: string): string { return `${mon} ${day} ${h12}:${m} ${ampm}`; } -function statusColor(status: string): string { - if (status === 'COMPLETED' || status === 'SUCCEEDED') return 'green'; - if (status === 'FAILED') return 'red'; - if (status === 'IN_PROGRESS' || status === 'PENDING') return 'yellow'; - return 'gray'; -} - -function scoreColor(score: number): string { - if (score >= 0.8) return 'green'; - if (score >= 0.5) return 'yellow'; - return 'red'; -} - const CHROME_LINES = 9; // ───────────────────────────────────────────────────────────────────────────── @@ -44,8 +35,8 @@ function BatchEvalListView({ onExit, availableHeight, }: { - records: BatchEvalRunRecord[]; - onSelect: (record: BatchEvalRunRecord) => void; + records: BatchEvaluationJobRecord[]; + onSelect: (record: BatchEvaluationJobRecord) => void; onExit: () => void; availableHeight: number; }) { @@ -68,7 +59,7 @@ function BatchEvalListView({ return ( - Batch Evaluation History + Batch Evaluation Jobs {records.length} batch evaluation{records.length !== 1 ? 's' : ''} @@ -76,48 +67,39 @@ function BatchEvalListView({ {visible.items.map((rec, vIdx) => { const idx = visible.startIdx + vIdx; const selected = idx === nav.selectedIndex; - const date = rec.startedAt ? formatShortDate(rec.startedAt) : 'unknown'; + const date = rec.createdAt ? formatShortDate(rec.createdAt) : 'unknown'; - // Build a short score summary from evaluationResults or results - const summaries = rec.evaluationResults?.evaluatorSummaries; - let scoreText = ''; - if (summaries && summaries.length > 0) { - scoreText = summaries - .map(s => { - const avg = s.statistics?.averageScore; - return avg != null ? avg.toFixed(2) : 'N/A'; - }) - .join(', '); - } else if (rec.results.length > 0) { - const byEval = new Map(); - for (const r of rec.results) { - if (r.score != null) { - const scores = byEval.get(r.evaluatorId) ?? []; - scores.push(r.score); - byEval.set(r.evaluatorId, scores); - } - } - scoreText = [...byEval.entries()] - .map(([, scores]) => (scores.reduce((a, b) => a + b, 0) / scores.length).toFixed(2)) - .join(', '); - } + // Average score per evaluator, read straight from the API summaries in the record. + const avgScores = (rec.evaluationResults?.evaluatorSummaries ?? []) + .map(s => s.statistics?.averageScore) + .filter((v): v is number => v != null); const datasetLabel = rec.source === 'dataset' && rec.dataset ? ` [${rec.dataset.id}@${rec.dataset.version}]` : ''; return ( - - {selected ? '>' : ' '} + + {selected ? '❯' : ' '} {date.padEnd(16)} {rec.status.padEnd(12)} - {scoreText && {scoreText.padEnd(10)}} + avg + {avgScores.length > 0 ? ( + avgScores.map((avg, i) => ( + + {avg.toFixed(2)} + {i < avgScores.length - 1 ? , : ' '} + + )) + ) : ( + {'—'.padEnd(7)} + )} {rec.name} {datasetLabel && {datasetLabel}} ); })} {visible.startIdx + maxVisible < records.length && ( - {records.length - visible.startIdx - maxVisible} more + ↓ {records.length - visible.startIdx - maxVisible} more )} @@ -125,167 +107,86 @@ function BatchEvalListView({ ); } -// ───────────────────────────────────────────────────────────────────────────── -// Detail view -// ───────────────────────────────────────────────────────────────────────────── - -function BatchEvalDetailView({ record, onBack }: { record: BatchEvalRunRecord; onBack: () => void }) { - useInput((input, key) => { - if (key.escape || input === 'b') { - onBack(); - } - }); - - const evalRes = record.evaluationResults; - const summaries = evalRes?.evaluatorSummaries; - - // Fall back to local grouping when API summaries aren't available - const byEvaluator = useMemo(() => { - if (summaries && summaries.length > 0) return null; - const map = new Map(); - for (const r of record.results) { - const entry = map.get(r.evaluatorId) ?? { scores: [], errors: 0 }; - if (r.error) { - entry.errors++; - } else if (r.score != null) { - entry.scores.push(r.score); - } - map.set(r.evaluatorId, entry); - } - return map; - }, [record.results, summaries]); - - return ( - - - - ID: {record.batchEvaluationId} - - - Name: {record.name} - {' '} - Status: {record.status} - - - Evaluators: {record.evaluators.join(', ')} - - {record.source === 'dataset' && record.dataset && ( - - Dataset: {record.dataset.id} (version: {record.dataset.version}) - - )} - {record.startedAt && ( - - Started: {new Date(record.startedAt).toLocaleString()} - - )} - {record.completedAt && ( - - Completed: {new Date(record.completedAt).toLocaleString()} - - )} - - {evalRes?.totalNumberOfSessions != null && ( - - Sessions: {evalRes.totalNumberOfSessions} total - {evalRes.numberOfSessionsCompleted != null && , {evalRes.numberOfSessionsCompleted} completed} - {evalRes.numberOfSessionsFailed ? , {evalRes.numberOfSessionsFailed} failed : null} - - )} - - {summaries && summaries.length > 0 ? ( - - Scores (0 worst — 1 best): - {summaries.map(s => { - const avg = s.statistics?.averageScore; - const avgStr = avg != null ? avg.toFixed(2) : 'N/A'; - const color = avg != null ? scoreColor(avg) : undefined; - return ( - - {' '} - {s.evaluatorId} - {' '} - {avgStr} - {s.totalFailed ? ({s.totalFailed} failed) : null} - {s.totalEvaluated != null && [{s.totalEvaluated} evaluated]} - - ); - })} - - ) : byEvaluator && byEvaluator.size > 0 ? ( - - Scores (0 worst — 1 best): - {[...byEvaluator.entries()].map(([evalId, { scores, errors }]) => { - const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0; - return ( - - {' '} - {evalId} - {' '} - {avg.toFixed(2)} - {errors > 0 && ({errors} errors)} - - ); - })} - - ) : ( - - No evaluation results available. - - )} - - - Press Esc or B to go back - - - - ); -} - // ───────────────────────────────────────────────────────────────────────────── // Main screen // ───────────────────────────────────────────────────────────────────────────── +type FlowState = + | { name: 'loading' } + | { name: 'creds-error'; message: string } + | { name: 'error'; message: string } + | { name: 'loaded'; records: BatchEvaluationJobRecord[] }; + interface BatchEvalHistoryScreenProps { onExit: () => void; } export function BatchEvalHistoryScreen({ onExit }: BatchEvalHistoryScreenProps) { + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); const { stdout } = useStdout(); const terminalHeight = stdout?.rows ?? 24; const availableHeight = Math.max(6, terminalHeight - CHROME_LINES); - const [selectedRecord, setSelectedRecord] = useState(null); + const [flow, setFlow] = useState({ name: 'loading' }); + const [selectedRecord, setSelectedRecord] = useState(null); - const [records, loaded, error] = useMemo(() => { - try { - return [listBatchEvalRuns(), true, null] as const; - } catch (err) { - return [[] as BatchEvalRunRecord[], true, err instanceof Error ? err.message : String(err)] as const; - } + useEffect(() => { + let cancelled = false; + + void (async () => { + try { + await validateAwsCredentials(); + } catch (err) { + if (!cancelled) setFlow({ name: 'creds-error', message: getErrorMessage(err) }); + return; + } + + try { + const records = await engine.list({ type: 'batch-evaluation' }); + if (!cancelled) setFlow({ name: 'loaded', records }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [engine]); + + // Apply an updated record (e.g. after a stop) into both the selection and the list. + const handleUpdate = useCallback((updated: BatchEvaluationJobRecord) => { + setSelectedRecord(updated); + setFlow(prev => + prev.name === 'loaded' ? { ...prev, records: prev.records.map(r => (r.id === updated.id ? updated : r)) } : prev + ); }, []); - if (!loaded) { + if (flow.name === 'loading') { return ( - - Loading... + + Loading batch evaluation jobs... ); } - if (error) { + if (flow.name === 'creds-error') { + return ; + } + + if (flow.name === 'error') { return ( - - {error} + + {flow.message} ); } - if (records.length === 0) { + if (flow.records.length === 0) { return ( - + - No batch evaluation runs found. + No batch evaluation jobs found. Run a batch evaluation from the TUI or CLI to see results here. @@ -295,17 +196,17 @@ export function BatchEvalHistoryScreen({ onExit }: BatchEvalHistoryScreenProps) const helpText = selectedRecord ? 'Esc/B back to list' : HELP_TEXT.NAVIGATE_SELECT; return ( - + {selectedRecord ? ( - setSelectedRecord(null)} /> + setSelectedRecord(null)} + onUpdate={handleUpdate} + /> ) : ( = { + source: 'Source', agent: 'Agent', evaluators: 'Evaluators', days: 'Lookback', sessions: 'Sessions', 'ground-truth': 'Ground Truth', + 'kms-key-arn': 'KMS Key', name: 'Name', confirm: 'Confirm', }; @@ -82,17 +97,12 @@ type FlowState = dataset?: string; datasetVersion?: string; } - | { - name: 'running'; - config: BatchEvalConfig; - steps: Step[]; - elapsed: number; - batchEvaluationId?: string; - region?: string; - } - | { name: 'results'; result: RunBatchEvaluationCommandResult; savedFilePath?: string } + // Dataset mode only: blocking Phase-1 invocation of dataset scenarios before engine.start. + | { name: 'phase1'; config: BatchEvalConfig; message: string } + | { name: 'starting'; config: BatchEvalConfig } + | { name: 'started'; record: BatchEvaluationJobRecord; config: BatchEvalConfig } | { name: 'creds-error'; message: string } - | { name: 'error'; message: string; logFilePath?: string }; + | { name: 'error'; message: string }; // ============================================================================ // Flow Component @@ -100,29 +110,13 @@ type FlowState = interface RunBatchEvalFlowProps { onExit: () => void; + /** Navigate to the Batch Eval Jobs screen (falls back to onExit when not provided). */ + onViewJobs?: () => void; } -export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) { +export function RunBatchEvalFlow({ onExit, onViewJobs }: RunBatchEvalFlowProps) { + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); const [flow, setFlow] = useState({ name: 'loading' }); - const stoppingRef = useRef(false); - - // Handle Esc to stop a running batch evaluation - useInput((_input, key) => { - if (flow.name !== 'running' || !flow.batchEvaluationId || !flow.region || stoppingRef.current) return; - if (key.escape) { - stoppingRef.current = true; - void stopBatchEvaluation({ region: flow.region, batchEvaluationId: flow.batchEvaluationId }).catch(() => { - // Best-effort — the poll loop will pick up the final status - }); - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = prev.steps.map(s => - s.status === 'running' ? { ...s, status: 'error' as const, error: 'Stopping...' } : s - ); - return { ...prev, steps }; - }); - } - }); // Load agents and evaluators useEffect(() => { @@ -201,148 +195,160 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) { const handleWizardComplete = useCallback( (config: BatchEvalConfig) => { - // Inject dataset info from source-picker selection - if (flow.name === 'wizard' && flow.source === 'dataset') { - config = { ...config, dataset: flow.dataset, datasetVersion: flow.datasetVersion }; - } - stoppingRef.current = false; + // Dataset mode needs a blocking pre-start phase ('phase1': invoke scenarios + ~180s ingestion + // wait) to produce the sessionIds before starting. Historical-traces mode already has its + // sessions (collected in the wizard), so it skips straight to 'starting'. That asymmetry is + // intentional — only dataset mode has pre-start work. const isDataset = flow.name === 'wizard' && flow.source === 'dataset'; - const initialSteps: Step[] = isDataset - ? [ - { label: 'Running dataset scenarios...', status: 'running' }, - { label: 'Starting batch evaluation', status: 'pending' }, - { label: 'Polling for results', status: 'pending' }, - { label: 'Fetching scores', status: 'pending' }, - ] - : [ - { label: 'Starting batch evaluation...', status: 'running' }, - { label: 'Polling for results', status: 'pending' }, - { label: 'Fetching scores', status: 'pending' }, - ]; - setFlow({ name: 'running', config, steps: initialSteps, elapsed: 0 }); + if (isDataset && flow.name === 'wizard') { + // Inject dataset info from source-picker selection + const datasetConfig = { ...config, dataset: flow.dataset, datasetVersion: flow.datasetVersion }; + setFlow({ + name: 'phase1', + config: datasetConfig, + message: `Loading dataset "${flow.dataset ?? 'default'}"...`, + }); + } else { + setFlow({ name: 'starting', config }); + } }, [flow] ); - // Execute batch evaluation + // Phase 1 (dataset mode only): invoke dataset scenarios, build ground-truth metadata, then start. useEffect(() => { - if (flow.name !== 'running') return; + if (flow.name !== 'phase1') return; let cancelled = false; const { config } = flow; - const startTime = Date.now(); - const timer = setInterval(() => { - if (!cancelled) { - setFlow(prev => { - if (prev.name !== 'running') return prev; - return { ...prev, elapsed: Math.floor((Date.now() - startTime) / 1000) }; + void (async () => { + try { + const configIO = new ConfigIO(); + const [projectSpec, deployedState, awsTargets] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + configIO.resolveAWSDeploymentTargets(), + ]); + + const agentContext = await resolveAgentContext({ + project: projectSpec, + deployedState, + awsTargets, + agentName: config.agent, }); + + if (cancelled) return; + + const datasetResult = await runDatasetScenarios({ + agentContext, + datasetName: config.dataset!, + version: config.datasetVersion, + configBaseDir: configIO.getConfigRoot(), + onProgress: (_phase, msg) => { + if (!cancelled) setFlow(prev => (prev.name === 'phase1' ? { ...prev, message: msg } : prev)); + }, + }); + + if (cancelled) return; + + const successfulResults = datasetResult.scenarioResults.filter(r => r.status === 'success'); + if (successfulResults.length === 0) { + setFlow({ name: 'error', message: 'All scenarios failed during invocation. No sessions to evaluate.' }); + return; + } + + const sessionIds = successfulResults.map(r => r.sessionId); + + // Build sessionMetadata with ground truth from dataset scenarios + const sessionMetadata: SessionMetadataEntry[] = successfulResults.map(r => { + const scenario = datasetResult.scenarios.find(s => s.scenario_id === r.scenarioId); + return { + sessionId: r.sessionId, + testScenarioId: r.scenarioId, + groundTruth: scenario + ? { + inline: { + ...(scenario.assertions ? { assertions: scenario.assertions.map(a => ({ text: a })) } : {}), + ...(scenario.expected_trajectory + ? { expectedTrajectory: { toolNames: scenario.expected_trajectory } } + : {}), + ...(scenario.turns.some(t => t.expectedResponse) + ? { + turns: scenario.turns.map(t => ({ + input: { prompt: t.input }, + ...(t.expectedResponse ? { expectedResponse: { text: t.expectedResponse } } : {}), + })), + } + : {}), + }, + } + : undefined, + }; + }) as SessionMetadataEntry[]; + + setFlow(prev => + prev.name === 'phase1' ? { ...prev, message: 'Waiting 180s for CloudWatch span ingestion...' } : prev + ); + + // Wait for CloudWatch span ingestion before submitting — the batch service + // queries CloudWatch server-side, so we can't poll. Match SDK default (180s). + await sleep(BATCH_INGESTION_DELAY_MS); + if (cancelled) return; + + setFlow({ name: 'starting', config: { ...config, sessionIds, sessionMetadata } }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); } - }, 1000); + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps + + // Fire-and-forget: start the batch evaluation job, then show the Started confirmation screen. + useEffect(() => { + if (flow.name !== 'starting') return; + let cancelled = false; + + const { config } = flow; void (async () => { try { - const result = await runBatchEvaluationCommand({ + const result = await engine.start('batch-evaluation', { agent: config.agent, evaluators: config.evaluators, name: config.name || undefined, sessionIds: config.sessionIds.length > 0 ? config.sessionIds : undefined, lookbackDays: config.days, sessionMetadata: config.sessionMetadata, - dataset: config.dataset, - datasetVersion: config.datasetVersion, - onProgress: (status, _message) => { - if (cancelled) return; - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = [...prev.steps]; - if (status === 'running') { - steps[0] = { ...steps[0]!, status: 'success' }; - steps[1] = { ...steps[1]!, status: 'running' }; - } - return { ...prev, steps }; - }); - }, - onStarted: info => { - setFlow(prev => { - if (prev.name !== 'running') return prev; - return { ...prev, batchEvaluationId: info.batchEvaluationId, region: info.region }; - }); - }, + source: config.dataset ? 'dataset' : 'traces', + dataset: config.dataset ? { id: config.dataset, version: config.datasetVersion ?? 'LOCAL' } : undefined, + kmsKeyArn: config.kmsKeyArn || undefined, }); - clearInterval(timer); if (cancelled) return; - // Save results locally - let savedFilePath: string | undefined; - if (result.success) { - try { - const datasetInfo = config.dataset - ? { - source: 'dataset' as const, - dataset: { id: config.dataset, version: config.datasetVersion ?? 'LOCAL' }, - } - : {}; - savedFilePath = saveBatchEvalRun({ result, ...datasetInfo }); - } catch { - // Non-fatal - } - } - if (!result.success) { - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = prev.steps.map(s => - s.status === 'running' ? { ...s, status: 'error' as const, error: result.error.message } : s - ); - return { ...prev, steps }; - }); - await new Promise(resolve => setTimeout(resolve, 2000)); - if (cancelled) return; - setFlow({ - name: 'error', - message: result.error?.message ?? 'Batch evaluation failed', - logFilePath: result.logFilePath, - }); + setFlow({ name: 'error', message: result.error.message }); return; } - // Mark all steps success - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = prev.steps.map(s => ({ ...s, status: 'success' as const })); - return { ...prev, steps }; - }); - - setFlow({ name: 'results', result, savedFilePath }); + setFlow({ name: 'started', record: result.record, config }); } catch (err) { - clearInterval(timer); - if (!cancelled) { - const errorMsg = getErrorMessage(err); - setFlow(prev => { - if (prev.name !== 'running') return prev; - const steps = prev.steps.map(s => - s.status === 'running' ? { ...s, status: 'error' as const, error: errorMsg } : s - ); - return { ...prev, steps }; - }); - await new Promise(resolve => setTimeout(resolve, 2000)); - setFlow({ name: 'error', message: errorMsg }); - } + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); } })(); return () => { cancelled = true; - clearInterval(timer); }; }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps if (flow.name === 'loading') { return ( - + ); @@ -389,37 +395,34 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) { ); } - if (flow.name === 'running') { - const minutes = Math.floor(flow.elapsed / 60); - const seconds = flow.elapsed % 60; - const timeStr = minutes > 0 ? `${minutes}m ${seconds}s` : `${seconds}s`; - + if (flow.name === 'phase1') { return ( - + - - - Agent: {flow.config.agent} - {' '} - Evaluators: {flow.config.evaluatorNames.join(', ')} - {' '} - ({timeStr}) - - - This may take a few minutes... - {flow.batchEvaluationId && Press Esc to stop the evaluation} + + Phase 1: invoking dataset scenarios... + ); } - if (flow.name === 'results') { + if (flow.name === 'starting') { return ( - + + + ); + } + + if (flow.name === 'started') { + return ( + setFlow({ name: 'loading' })} + onViewJobs={onViewJobs} onExit={onExit} /> ); @@ -428,13 +431,79 @@ export function RunBatchEvalFlow({ onExit }: RunBatchEvalFlowProps) { return ( setFlow({ name: 'loading' })} onExit={onExit} /> ); } +// ============================================================================ +// Started confirmation view +// ============================================================================ + +interface StartedViewProps { + record: BatchEvaluationJobRecord; + config: BatchEvalConfig; + onRunAnother: () => void; + onViewJobs?: () => void; + onExit: () => void; +} + +function StartedView({ record, config, onRunAnother, onViewJobs, onExit }: StartedViewProps) { + const actions = [ + { id: 'jobs', title: 'View jobs' }, + { id: 'another', title: 'Run another' }, + { id: 'back', title: 'Back' }, + ]; + + const nav = useListNavigation({ + items: actions, + onSelect: item => { + if (item.id === 'jobs') (onViewJobs ?? onExit)(); + else if (item.id === 'another') onRunAnother(); + else onExit(); + }, + onExit, + isActive: true, + }); + + return ( + + + + + ✓ {record.id} ({record.status}) + + + Agent: {config.agent} + {' '} + Evaluators: {config.evaluatorNames.join(', ')} + + + + When it completes, view it in Batch Eval Jobs. + + + + {actions.map((action, idx) => { + const selected = idx === nav.selectedIndex; + return ( + + {selected ? '❯' : ' '} + + {action.title} + + + ); + })} + + + + + ); +} + // ============================================================================ // Wizard Component // ============================================================================ @@ -460,11 +529,13 @@ function BatchEvalWizard({ const isDatasetMode = source === 'dataset'; const allSteps = useMemo(() => { if (isDatasetMode) { - return skipAgent ? ['evaluators', 'name', 'confirm'] : ['agent', 'evaluators', 'name', 'confirm']; + return skipAgent + ? ['evaluators', 'kms-key-arn', 'name', 'confirm'] + : ['agent', 'evaluators', 'kms-key-arn', 'name', 'confirm']; } return skipAgent - ? ['evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm'] - : ['agent', 'evaluators', 'days', 'sessions', 'ground-truth', 'name', 'confirm']; + ? ['evaluators', 'days', 'sessions', 'ground-truth', 'kms-key-arn', 'name', 'confirm'] + : ['agent', 'evaluators', 'days', 'sessions', 'ground-truth', 'kms-key-arn', 'name', 'confirm']; }, [skipAgent, isDatasetMode]); const [step, setStep] = useState(allSteps[0]!); @@ -476,6 +547,7 @@ function BatchEvalWizard({ sessionIds: [], groundTruthFile: '', sessionMetadata: undefined, + kmsKeyArn: '', name: '', }); @@ -523,6 +595,7 @@ function BatchEvalWizard({ const isDaysStep = step === 'days'; const isSessionsStep = step === 'sessions'; const isGroundTruthStep = step === 'ground-truth'; + const isKmsKeyArnStep = step === 'kms-key-arn'; const isNameStep = step === 'name'; const isConfirmStep = step === 'confirm'; @@ -704,10 +777,13 @@ function BatchEvalWizard({ ? HELP_TEXT.TEXT_INPUT : HELP_TEXT.CONFIRM_CANCEL; - const headerContent = ; + // Prepend the breadcrumb-only 'source' step so the wizard header matches the source-picker's + // (it renders as a completed step here). 'source' is intentionally absent from navigable allSteps. + const displaySteps = useMemo(() => ['source', ...allSteps], [allSteps]); + const headerContent = ; return ( - + {isAgentStep && ( )} + {isKmsKeyArnStep && ( + { + setConfig(c => ({ ...c, kmsKeyArn: value })); + goNext(); + }} + onCancel={() => goBack()} + customValidation={value => { + if (!value) return true; + if (!isValidKmsKeyArn(value)) { + return 'Invalid KMS key ARN (e.g. arn:aws:kms:us-east-1:123456789012:key/12345678-1234-1234-1234-123456789012)'; + } + return true; + }} + /> + )} + {isNameStep && ( Optional — leave blank for auto-generated name. @@ -885,6 +982,7 @@ function BatchEvalWizard({ ] : []), ]), + ...(config.kmsKeyArn ? [{ label: 'KMS Key ARN', value: config.kmsKeyArn }] : []), ...(config.name ? [{ label: 'Name', value: config.name }] : []), ]} /> @@ -921,7 +1019,6 @@ function BatchEvalSourcePicker({ useEffect(() => { void (async () => { try { - const { ConfigIO } = await import('../../../../lib'); const configIO = new ConfigIO(); const spec = await configIO.readProjectSpec(); setDatasets( @@ -1043,218 +1140,85 @@ function BatchEvalSourcePicker({ isActive: step === 'version' && !loadingVersions, }); + // Breadcrumb-only header so the picker shares the wizard's chrome (border + step indicator). + // 'source' is the active step; the remaining steps are a representative preview (the actual + // step list is finalized once a mode is chosen and the wizard takes over). + const pickerSteps: BatchEvalStep[] = ['source', 'evaluators', 'name', 'confirm']; + const pickerHeader = ; + if (step === 'version') { return ( (datasets.length > 1 ? setStep('dataset') : setStep('source'))} + headerContent={pickerHeader} > - - Select version for {selectedDataset}: - {loadingVersions ? ( - - ) : ( - <> - {versionItems.map((item, i) => ( - - {i === versionNav.selectedIndex ? : ' '} - {item.title} - — {item.description} - - ))} - {'\n'}↑↓ Enter select · Esc back - - )} - + + + Select version for {selectedDataset}: + {loadingVersions ? ( + + ) : ( + <> + {versionItems.map((item, i) => ( + + {i === versionNav.selectedIndex ? : ' '} + {item.title} + — {item.description} + + ))} + {'\n'}↑↓ Enter select · Esc back + + )} + + ); } if (step === 'dataset') { return ( - setStep('source')}> + setStep('source')} headerContent={pickerHeader}> + + + Select dataset: + {datasetItems.map((item, i) => ( + + {i === datasetNav.selectedIndex ? : ' '} + {item.title} + {item.description && — {item.description}} + + ))} + {'\n'}↑↓ Enter select · Esc back + + + + ); + } + + return ( + + - Select dataset: - {datasetItems.map((item, i) => ( + Evaluation source: + {sourceItems.map((item, i) => ( - {i === datasetNav.selectedIndex ? : ' '} - {item.title} - {item.description && — {item.description}} + {i === sourceNav.selectedIndex ? : ' '} + {item.title} + — {item.description} ))} {'\n'}↑↓ Enter select · Esc back - - ); - } - - return ( - - - Evaluation source: - {sourceItems.map((item, i) => ( - - {i === sourceNav.selectedIndex ? : ' '} - {item.title} - — {item.description} - - ))} - {'\n'}↑↓ Enter select · Esc back - + ); } // ============================================================================ -// Results View +// Helpers // ============================================================================ -function scoreColor(score: number): string { - if (score >= 0.8) return 'green'; - if (score >= 0.5) return 'yellow'; - return 'red'; -} - -interface ResultsViewProps { - result: RunBatchEvaluationCommandResult; - savedFilePath?: string; - onRunAnother: () => void; - onExit: () => void; -} - -function ResultsView({ result, savedFilePath, onRunAnother, onExit }: ResultsViewProps) { - const actions = [ - { id: 'another', title: 'Run another batch evaluation' }, - { id: 'back', title: 'Back' }, - ]; - - const nav = useListNavigation({ - items: actions, - onSelect: item => { - if (item.id === 'another') onRunAnother(); - else onExit(); - }, - onExit, - isActive: true, - }); - - const evalRes = result.evaluationResults; - const summaries = evalRes?.evaluatorSummaries; - - // Fall back to local grouping when API summaries aren't available - const byEvaluator = useMemo(() => { - if (summaries && summaries.length > 0) return null; - const map = new Map(); - for (const r of result.results) { - const group = map.get(r.evaluatorId) ?? []; - group.push(r); - map.set(r.evaluatorId, group); - } - return map; - }, [result.results, summaries]); - - return ( - - - - ✓ Batch evaluation complete - - ID: {result.batchEvaluationId} - {' '} - Status: {result.status} - - {result.name && ( - - Name: {result.name} - - )} - - {evalRes?.totalNumberOfSessions != null && ( - - Sessions: {evalRes.totalNumberOfSessions} total - {evalRes.numberOfSessionsCompleted != null && ( - , {evalRes.numberOfSessionsCompleted} completed - )} - {evalRes.numberOfSessionsFailed ? ( - , {evalRes.numberOfSessionsFailed} failed - ) : null} - - )} - - {summaries && summaries.length > 0 ? ( - - Scores range from 0 (worst) to 1 (best). - {summaries.map(s => { - const avg = s.statistics?.averageScore; - const avgStr = avg != null ? avg.toFixed(2) : 'N/A'; - const color = avg != null ? scoreColor(avg) : undefined; - return ( - - {' '} - {s.evaluatorId} - {' '} - {avgStr} - {s.totalFailed ? ({s.totalFailed} failed) : null} - {s.totalEvaluated != null && [{s.totalEvaluated} evaluated]} - - ); - })} - - ) : byEvaluator && byEvaluator.size > 0 ? ( - - Scores range from 0 (worst) to 1 (best). - {[...byEvaluator.entries()].map(([evalId, evalResults]) => { - const scores = evalResults.filter(r => !r.error).map(r => r.score!); - const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0; - const errors = evalResults.filter(r => r.error).length; - return ( - - {' '} - {evalId} - {' '} - {avg.toFixed(2)} - {errors > 0 && ({errors} errors)} - - ); - })} - - ) : ( - - No evaluation results returned. - - )} - - {savedFilePath && ( - - Results saved to: {savedFilePath} - - )} - {result.logFilePath && ( - - Log: {result.logFilePath} - - )} - - - {actions.map((action, idx) => { - const selected = idx === nav.selectedIndex; - return ( - - {selected ? '❯' : ' '} - - {action.title} - - - ); - })} - - - - - ); +function sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); } diff --git a/src/cli/tui/screens/run-eval/RunIngestFlow.tsx b/src/cli/tui/screens/run-eval/RunIngestFlow.tsx new file mode 100644 index 000000000..4bc072bf1 --- /dev/null +++ b/src/cli/tui/screens/run-eval/RunIngestFlow.tsx @@ -0,0 +1,669 @@ +import { ConfigIO } from '../../../../lib'; +import { getErrorMessage } from '../../../errors'; +import { runKbIngestionByName } from '../../../operations/ingest'; +import type { StartedIngestion } from '../../../operations/ingest'; +import { ConfirmReview, ErrorPrompt, GradientText, Panel, Screen, WizardSelect } from '../../components'; +import type { SelectableItem } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { Box, Text } from 'ink'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; + +const SCREEN_TITLE = 'Ingest Knowledge Base'; + +interface KnowledgeBaseInfo { + name: string; +} + +interface DeployedDataSource { + dataSourceId: string; + uri: string; +} + +interface DeployedKb { + knowledgeBaseId: string; + dataSources: DeployedDataSource[]; +} + +interface FlowContext { + /** All knowledge bases declared in agentcore.json */ + knowledgeBases: KnowledgeBaseInfo[]; + /** AWS deployment target names */ + targetNames: string[]; + /** Region per target name */ + regionByTarget: Record; + /** Deployed-state lookup: targetName -> kbName -> deployed kb */ + deployedKbsByTarget: Record>; + /** Raw deployed-state — passed straight back into runKbIngestionByName */ + deployedState: Parameters[0]['deployedState']; +} + +type FlowState = + | { name: 'loading' } + | { name: 'select-kb'; ctx: FlowContext } + | { name: 'select-target'; ctx: FlowContext; kbName: string } + | { name: 'select-scope'; ctx: FlowContext; kbName: string; targetName: string; deployed: DeployedKb } + | { + name: 'select-data-source'; + ctx: FlowContext; + kbName: string; + targetName: string; + deployed: DeployedKb; + } + | { + name: 'confirm'; + ctx: FlowContext; + kbName: string; + targetName: string; + deployed: DeployedKb; + dataSourceUri?: string; + } + | { + name: 'running'; + ctx: FlowContext; + kbName: string; + targetName: string; + deployed: DeployedKb; + dataSourceUri?: string; + progress: string[]; + } + | { name: 'success'; kbName: string; startedJobs: StartedIngestion[] } + | { name: 'error'; message: string; ctx?: FlowContext }; + +interface RunIngestFlowProps { + onExit: () => void; +} + +export function RunIngestFlow({ onExit }: RunIngestFlowProps) { + const [flow, setFlow] = useState({ name: 'loading' }); + + // ── Initial load ───────────────────────────────────────────────────────── + useEffect(() => { + if (flow.name !== 'loading') return; + let cancelled = false; + + void (async () => { + try { + const configIO = new ConfigIO(); + const [project, awsTargets, deployedState] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readAWSDeploymentTargets(), + configIO.readDeployedState().catch(() => ({ targets: {} })), + ]); + + if (cancelled) return; + + const knowledgeBases: KnowledgeBaseInfo[] = (project.knowledgeBases ?? []).map(kb => ({ name: kb.name })); + + if (knowledgeBases.length === 0) { + setFlow({ + name: 'error', + message: 'No knowledge bases found in agentcore.json. Run `agentcore add knowledge-base` first.', + }); + return; + } + + const targetNames = awsTargets.map(t => t.name); + const regionByTarget: Record = {}; + for (const t of awsTargets) regionByTarget[t.name] = t.region; + + if (targetNames.length === 0) { + setFlow({ + name: 'error', + message: 'No AWS deployment targets found in aws-targets.json.', + }); + return; + } + + const deployedKbsByTarget: Record> = {}; + for (const [tname, target] of Object.entries(deployedState.targets ?? {})) { + const kbs = target?.resources?.knowledgeBases ?? {}; + const map: Record = {}; + for (const [kbName, kb] of Object.entries(kbs)) { + map[kbName] = { + knowledgeBaseId: kb.knowledgeBaseId, + dataSources: (kb.dataSources ?? []).map(ds => ({ dataSourceId: ds.dataSourceId, uri: ds.uri })), + }; + } + deployedKbsByTarget[tname] = map; + } + + setFlow({ + name: 'select-kb', + ctx: { + knowledgeBases, + targetNames, + regionByTarget, + deployedKbsByTarget, + deployedState, + }, + }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); + + // ── Run ingestion when entering 'running' ──────────────────────────────── + useEffect(() => { + if (flow.name !== 'running') return; + let cancelled = false; + + const { ctx, kbName, targetName, dataSourceUri } = flow; + const region = ctx.regionByTarget[targetName]; + + void (async () => { + if (!region) { + if (cancelled) return; + setFlow({ name: 'error', message: `Region for target '${targetName}' could not be resolved.`, ctx }); + return; + } + try { + const result = await runKbIngestionByName({ + knowledgeBaseName: kbName, + deployedState: ctx.deployedState, + targetName, + region, + dataSourceUri, + onProgress: msg => { + if (cancelled) return; + setFlow(prev => (prev.name === 'running' ? { ...prev, progress: [...prev.progress, msg] } : prev)); + }, + }); + + if (cancelled) return; + + if (!result.success) { + setFlow({ name: 'error', message: result.error.message, ctx }); + return; + } + setFlow({ name: 'success', kbName, startedJobs: result.startedJobs }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err), ctx }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); // eslint-disable-line react-hooks/exhaustive-deps + + // ── Renders ────────────────────────────────────────────────────────────── + if (flow.name === 'loading') { + return ( + + + + ); + } + + if (flow.name === 'error') { + return ( + (flow.ctx ? setFlow({ name: 'select-kb', ctx: flow.ctx }) : onExit())} + onExit={onExit} + /> + ); + } + + if (flow.name === 'select-kb') { + return ( + { + const ctx = flow.ctx; + // Auto-skip target picker when only one target is configured + if (ctx.targetNames.length === 1) { + const targetName = ctx.targetNames[0]!; + return advanceAfterTarget(setFlow, ctx, kbName, targetName); + } + setFlow({ name: 'select-target', ctx, kbName }); + }} + onExit={onExit} + /> + ); + } + + if (flow.name === 'select-target') { + return ( + advanceAfterTarget(setFlow, flow.ctx, flow.kbName, targetName)} + onBack={() => setFlow({ name: 'select-kb', ctx: flow.ctx })} + /> + ); + } + + if (flow.name === 'select-scope') { + return ( + + setFlow({ + name: 'confirm', + ctx: flow.ctx, + kbName: flow.kbName, + targetName: flow.targetName, + deployed: flow.deployed, + }) + } + onChooseOne={() => + setFlow({ + name: 'select-data-source', + ctx: flow.ctx, + kbName: flow.kbName, + targetName: flow.targetName, + deployed: flow.deployed, + }) + } + onBack={() => { + if (flow.ctx.targetNames.length > 1) { + setFlow({ name: 'select-target', ctx: flow.ctx, kbName: flow.kbName }); + } else { + setFlow({ name: 'select-kb', ctx: flow.ctx }); + } + }} + /> + ); + } + + if (flow.name === 'select-data-source') { + return ( + + setFlow({ + name: 'confirm', + ctx: flow.ctx, + kbName: flow.kbName, + targetName: flow.targetName, + deployed: flow.deployed, + dataSourceUri: uri, + }) + } + onBack={() => + setFlow({ + name: 'select-scope', + ctx: flow.ctx, + kbName: flow.kbName, + targetName: flow.targetName, + deployed: flow.deployed, + }) + } + /> + ); + } + + if (flow.name === 'confirm') { + return ( + + setFlow({ + name: 'running', + ctx: flow.ctx, + kbName: flow.kbName, + targetName: flow.targetName, + deployed: flow.deployed, + dataSourceUri: flow.dataSourceUri, + progress: [], + }) + } + onBack={() => { + if (flow.dataSourceUri !== undefined) { + setFlow({ + name: 'select-data-source', + ctx: flow.ctx, + kbName: flow.kbName, + targetName: flow.targetName, + deployed: flow.deployed, + }); + } else { + setFlow({ + name: 'select-scope', + ctx: flow.ctx, + kbName: flow.kbName, + targetName: flow.targetName, + deployed: flow.deployed, + }); + } + }} + /> + ); + } + + if (flow.name === 'running') { + return ( + + + + + {flow.progress.length > 0 && ( + + {flow.progress.map((line, i) => ( + + {line} + + ))} + + )} + Bedrock allows one ingestion job per KB at a time. Sit tight while jobs start. + + + + ); + } + + // success + return ; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function advanceAfterTarget( + setFlow: React.Dispatch>, + ctx: FlowContext, + kbName: string, + targetName: string +) { + const deployed = ctx.deployedKbsByTarget[targetName]?.[kbName]; + if (!deployed) { + setFlow({ + name: 'error', + ctx, + message: `Knowledge base '${kbName}' has not been deployed to target '${targetName}'. Run \`agentcore deploy\` first.`, + }); + return; + } + if (deployed.dataSources.length === 0) { + setFlow({ + name: 'error', + ctx, + message: `Knowledge base '${kbName}' has no recorded data sources. Run \`agentcore deploy\` first.`, + }); + return; + } + setFlow({ name: 'select-scope', ctx, kbName, targetName, deployed }); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Step components +// ───────────────────────────────────────────────────────────────────────────── + +interface SelectKbStepProps { + ctx: FlowContext; + onSelect: (kbName: string) => void; + onExit: () => void; +} + +function SelectKbStep({ ctx, onSelect, onExit }: SelectKbStepProps) { + const items: SelectableItem[] = useMemo( + () => + ctx.knowledgeBases.map(kb => { + // Show whether KB is deployed to *any* target as a hint + const anyDeployed = Object.values(ctx.deployedKbsByTarget).some(map => kb.name in map); + return { + id: kb.name, + title: kb.name, + description: anyDeployed ? 'deployed' : 'not yet deployed', + }; + }), + [ctx] + ); + + const nav = useListNavigation({ + items, + onSelect: item => onSelect(item.id), + onExit, + isActive: true, + }); + + return ( + + + + + + ); +} + +interface SelectTargetStepProps { + ctx: FlowContext; + kbName: string; + onSelect: (targetName: string) => void; + onBack: () => void; +} + +function SelectTargetStep({ ctx, kbName, onSelect, onBack }: SelectTargetStepProps) { + const items: SelectableItem[] = useMemo( + () => + ctx.targetNames.map(name => ({ + id: name, + title: name, + description: ctx.regionByTarget[name] ?? '', + })), + [ctx] + ); + + const nav = useListNavigation({ + items, + onSelect: item => onSelect(item.id), + onExit: onBack, + isActive: true, + }); + + return ( + + + + + + ); +} + +interface SelectScopeStepProps { + ctx: FlowContext; + kbName: string; + targetName: string; + deployed: DeployedKb; + onAll: () => void; + onChooseOne: () => void; + onBack: () => void; +} + +function SelectScopeStep({ kbName, deployed, onAll, onChooseOne, onBack }: SelectScopeStepProps) { + const items: SelectableItem[] = useMemo( + () => [ + { + id: 'all', + title: 'All data sources', + description: `Start ingestion for all ${deployed.dataSources.length} data source(s) on this KB.`, + }, + { + id: 'one', + title: 'Choose one data source', + description: 'Pick a single data source to ingest.', + }, + ], + [deployed.dataSources.length] + ); + + const nav = useListNavigation({ + items, + onSelect: item => { + if (item.id === 'all') onAll(); + else onChooseOne(); + }, + onExit: onBack, + isActive: true, + }); + + return ( + + + + + + ); +} + +interface SelectDataSourceStepProps { + deployed: DeployedKb; + kbName: string; + onSelect: (uri: string) => void; + onBack: () => void; +} + +function SelectDataSourceStep({ deployed, kbName, onSelect, onBack }: SelectDataSourceStepProps) { + const items: SelectableItem[] = useMemo( + () => + deployed.dataSources.map(ds => ({ + id: ds.uri, + title: ds.uri, + description: ds.dataSourceId, + })), + [deployed] + ); + + const nav = useListNavigation({ + items, + onSelect: item => onSelect(item.id), + onExit: onBack, + isActive: true, + }); + + return ( + + + + + + ); +} + +interface ConfirmStepProps { + kbName: string; + targetName: string; + deployed: DeployedKb; + dataSourceUri?: string; + onConfirm: () => void; + onBack: () => void; +} + +function ConfirmStep({ kbName, targetName, deployed, dataSourceUri, onConfirm, onBack }: ConfirmStepProps) { + // Single-button confirm — Enter to start, Esc back + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: onConfirm, + onExit: onBack, + isActive: true, + }); + + const scope = dataSourceUri + ? `Single data source — ${dataSourceUri}` + : `All data sources (${deployed.dataSources.length})`; + + return ( + + + + + + ); +} + +interface SuccessViewProps { + kbName: string; + startedJobs: StartedIngestion[]; + onExit: () => void; +} + +function SuccessView({ kbName, startedJobs, onExit }: SuccessViewProps) { + const actions = useMemo(() => [{ id: 'back', title: 'Back to Run menu' }], []); + const nav = useListNavigation({ + items: actions, + onSelect: useCallback(() => onExit(), [onExit]), + onExit, + isActive: true, + }); + + return ( + + + + + ✓ Started ingestion for '{kbName}' ({startedJobs.length} job(s)) + + + {startedJobs.map(job => ( + + {' '} + {job.uri} + + {job.ingestionJobId} + + ))} + + Run `agentcore status --type knowledge-base --name {kbName}` to track progress. + + {actions.map((action, idx) => { + const selected = idx === nav.selectedIndex; + return ( + + {selected ? '❯' : ' '} + + {action.title} + + + ); + })} + + + + + ); +} diff --git a/src/cli/tui/screens/run-eval/RunScreen.tsx b/src/cli/tui/screens/run-eval/RunScreen.tsx index a9a797a37..fc3b69f80 100644 --- a/src/cli/tui/screens/run-eval/RunScreen.tsx +++ b/src/cli/tui/screens/run-eval/RunScreen.tsx @@ -7,11 +7,22 @@ import React, { useMemo } from 'react'; interface RunScreenProps { onRunEval: () => void; onRunBatchEval: () => void; + onRunInsights: () => void; onRunRecommendation: () => void; + onRunIngest: () => void; + onRunABTest: () => void; onExit: () => void; } -export function RunScreen({ onRunEval, onRunBatchEval, onRunRecommendation, onExit }: RunScreenProps) { +export function RunScreen({ + onRunEval, + onRunBatchEval, + onRunInsights, + onRunRecommendation, + onRunIngest, + onRunABTest, + onExit, +}: RunScreenProps) { const items: SelectableItem[] = useMemo( () => [ { @@ -24,11 +35,26 @@ export function RunScreen({ onRunEval, onRunBatchEval, onRunRecommendation, onEx title: 'Batch Evaluation', description: 'Run a batch evaluation against agent sessions via CloudWatch.', }, + { + id: 'run-insights', + title: 'Insights [preview]', + description: 'Run failure analysis across agent sessions to detect patterns and root causes.', + }, { id: 'run-recommendation', title: 'Recommendation', description: 'Optimize system prompts or tool descriptions using agent traces.', }, + { + id: 'run-ingest', + title: 'Ingest knowledge base', + description: 'Start an ingestion job for a deployed knowledge base.', + }, + { + id: 'run-ab-test', + title: 'A/B Test', + description: 'Compare two config-bundle or gateway-target variants live through a gateway.', + }, ], [] ); @@ -38,7 +64,10 @@ export function RunScreen({ onRunEval, onRunBatchEval, onRunRecommendation, onEx onSelect: item => { if (item.id === 'run-eval') onRunEval(); else if (item.id === 'run-batch-eval') onRunBatchEval(); + else if (item.id === 'run-insights') onRunInsights(); else if (item.id === 'run-recommendation') onRunRecommendation(); + else if (item.id === 'run-ingest') onRunIngest(); + else if (item.id === 'run-ab-test') onRunABTest(); }, onExit, isActive: true, diff --git a/src/cli/tui/screens/run-eval/index.ts b/src/cli/tui/screens/run-eval/index.ts index 7c56bd639..fc804947f 100644 --- a/src/cli/tui/screens/run-eval/index.ts +++ b/src/cli/tui/screens/run-eval/index.ts @@ -2,4 +2,5 @@ export { BatchEvalHistoryScreen } from './BatchEvalHistoryScreen'; export { RunBatchEvalFlow } from './RunBatchEvalFlow'; export { RunEvalFlow } from './RunEvalFlow'; export { RunEvalScreen } from './RunEvalScreen'; +export { RunIngestFlow } from './RunIngestFlow'; export { RunScreen } from './RunScreen'; diff --git a/src/cli/tui/screens/run-insights/RunInsightsFlow.tsx b/src/cli/tui/screens/run-insights/RunInsightsFlow.tsx new file mode 100644 index 000000000..a043f3552 --- /dev/null +++ b/src/cli/tui/screens/run-insights/RunInsightsFlow.tsx @@ -0,0 +1,148 @@ +import { ConfigIO } from '../../../../lib'; +import type { DeployedState } from '../../../../schema'; +import { getErrorMessage } from '../../../errors'; +import { createJobEngine } from '../../../operations/jobs'; +import type { InsightsJobRecord } from '../../../operations/jobs/shared/types'; +import { withCommandRunTelemetry } from '../../../telemetry/cli-command-run.js'; +import { ErrorPrompt, GradientText, SuccessPrompt } from '../../components'; +import { RunInsightsScreen } from './RunInsightsScreen'; +import type { RunInsightsConfig } from './types'; +import React, { useCallback, useEffect, useMemo, useState } from 'react'; + +type FlowState = + | { name: 'loading' } + | { name: 'wizard'; agentNames: string[]; onlineEvalConfigArns: string[] } + | { name: 'submitting' } + | { name: 'success'; record: InsightsJobRecord } + | { name: 'error'; message: string }; + +interface RunInsightsFlowProps { + isInteractive?: boolean; + onExit: () => void; + onBack: () => void; + onViewJobs?: () => void; +} + +export function RunInsightsFlow({ isInteractive = true, onExit, onBack, onViewJobs }: RunInsightsFlowProps) { + const [flow, setFlow] = useState({ name: 'loading' }); + const engine = useMemo(() => createJobEngine(new ConfigIO()), []); + + useEffect(() => { + if (flow.name !== 'loading') return; + let cancelled = false; + + void (async () => { + try { + const configIO = new ConfigIO(); + const [projectSpec, deployedState] = await Promise.all([ + configIO.readProjectSpec(), + configIO.readDeployedState(), + ]); + if (cancelled) return; + + const agentNames = (projectSpec.runtimes ?? []).map(a => a.name); + if (agentNames.length === 0) { + setFlow({ + name: 'error', + message: 'No agents found in project. Add an agent first with `agentcore add agent`.', + }); + return; + } + + const onlineEvalConfigArns = extractOnlineEvalConfigArns(deployedState); + + setFlow({ name: 'wizard', agentNames, onlineEvalConfigArns }); + } catch (err) { + if (!cancelled) setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + + return () => { + cancelled = true; + }; + }, [flow.name]); + + useEffect(() => { + if (!isInteractive && flow.name === 'success') { + onExit(); + } + }, [isInteractive, flow.name, onExit]); + + const handleComplete = useCallback( + (config: RunInsightsConfig) => { + setFlow({ name: 'submitting' }); + + void (async () => { + try { + const startResult = await withCommandRunTelemetry('run.job', { job_type: 'insights', has_wait: false }, () => + engine.start('insights', { + agent: config.agent || undefined, + insights: config.insights.length > 0 ? config.insights : ['Builtin.Insight.FailureAnalysis'], + onlineEvalConfigArn: config.source === 'online-eval-config' ? config.onlineEvalConfigArn : undefined, + lookbackDays: config.source === 'agent' ? config.lookbackDays : undefined, + sessionIds: config.sessionIds.length > 0 ? config.sessionIds : undefined, + name: config.name || undefined, + }) + ); + + if (!startResult.success) { + throw startResult.error ?? new Error('Failed to start insights job'); + } + setFlow({ name: 'success', record: startResult.record }); + } catch (err) { + setFlow({ name: 'error', message: getErrorMessage(err) }); + } + })(); + }, + [engine] + ); + + if (flow.name === 'loading' || flow.name === 'submitting') { + return ; + } + + if (flow.name === 'wizard') { + return ( + + ); + } + + if (flow.name === 'success') { + return ( + + ); + } + + return ( + setFlow({ name: 'loading' })} + onExit={onExit} + /> + ); +} + +function extractOnlineEvalConfigArns(deployedState: DeployedState): string[] { + const arns: string[] = []; + for (const target of Object.values(deployedState.targets ?? {})) { + const configs = target?.resources?.onlineEvalConfigs ?? {}; + for (const config of Object.values(configs)) { + if (config.onlineEvaluationConfigArn) { + arns.push(config.onlineEvaluationConfigArn); + } + } + } + return arns; +} diff --git a/src/cli/tui/screens/run-insights/RunInsightsScreen.tsx b/src/cli/tui/screens/run-insights/RunInsightsScreen.tsx new file mode 100644 index 000000000..9384d83f9 --- /dev/null +++ b/src/cli/tui/screens/run-insights/RunInsightsScreen.tsx @@ -0,0 +1,221 @@ +import { + ConfirmReview, + Panel, + Screen, + StepIndicator, + TextInput, + WizardMultiSelect, + WizardSelect, +} from '../../components'; +import type { SelectableItem } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation, useMultiSelectNavigation } from '../../hooks'; +import { AVAILABLE_INSIGHTS, RUN_INSIGHTS_STEP_LABELS, SESSION_MODE_OPTIONS, SOURCE_OPTIONS } from './types'; +import type { RunInsightsConfig } from './types'; +import { useRunInsightsWizard } from './useRunInsightsWizard'; +import React, { useMemo } from 'react'; + +interface RunInsightsScreenProps { + agentNames: string[]; + onlineEvalConfigArns: string[]; + onComplete: (config: RunInsightsConfig) => void; + onExit: () => void; +} + +export function RunInsightsScreen({ agentNames, onlineEvalConfigArns, onComplete, onExit }: RunInsightsScreenProps) { + const wizard = useRunInsightsWizard(agentNames.length); + + const isSourceStep = wizard.step === 'source'; + const isAgentStep = wizard.step === 'agent'; + const isInsightsStep = wizard.step === 'insights'; + const isSessionsStep = wizard.step === 'sessions'; + const isLookbackStep = wizard.step === 'lookbackDays'; + const isConfigArnStep = wizard.step === 'configArn'; + const isNameStep = wizard.step === 'name'; + const isConfirmStep = wizard.step === 'confirm'; + + const sourceItems: SelectableItem[] = useMemo( + () => SOURCE_OPTIONS.map(o => ({ id: o.id, title: o.title, description: o.description })), + [] + ); + + const agentItems: SelectableItem[] = useMemo(() => agentNames.map(name => ({ id: name, title: name })), [agentNames]); + + const insightItems = useMemo( + () => AVAILABLE_INSIGHTS.map(i => ({ id: i.id, title: i.title, description: i.description })), + [] + ); + + const sessionModeItems: SelectableItem[] = useMemo( + () => SESSION_MODE_OPTIONS.map(o => ({ id: o.id, title: o.title, description: o.description })), + [] + ); + + const configArnItems: SelectableItem[] = useMemo( + () => onlineEvalConfigArns.map(arn => ({ id: arn, title: arn.split('/').pop() ?? arn })), + [onlineEvalConfigArns] + ); + + const sourceNav = useListNavigation({ + items: sourceItems, + onSelect: item => wizard.setSource(item.id as 'agent' | 'online-eval-config'), + onExit, + isActive: isSourceStep, + }); + + const agentNav = useListNavigation({ + items: agentItems, + onSelect: item => wizard.setAgent(item.id), + onExit: () => wizard.goBack(), + isActive: isAgentStep, + }); + + const insightsNav = useMultiSelectNavigation({ + items: insightItems, + getId: item => item.id, + onConfirm: ids => wizard.setInsights(ids), + onExit: () => wizard.goBack(), + isActive: isInsightsStep, + requireSelection: true, + }); + + const sessionModeNav = useListNavigation({ + items: sessionModeItems, + onSelect: item => wizard.setSessionMode(item.id as 'lookback' | 'specific'), + onExit: () => wizard.goBack(), + isActive: isSessionsStep, + }); + + const configArnNav = useListNavigation({ + items: configArnItems, + onSelect: item => wizard.setOnlineEvalConfigArn(item.id), + onExit: () => wizard.goBack(), + isActive: isConfigArnStep, + }); + + useListNavigation({ + items: [{ id: 'submit', title: 'Start insights job' }], + onSelect: () => onComplete(wizard.config), + onExit: () => wizard.goBack(), + isActive: isConfirmStep, + }); + + const helpText = isInsightsStep + ? 'Space toggle · Enter confirm · Esc back' + : isNameStep || isLookbackStep + ? 'Enter confirm · Esc back' + : HELP_TEXT.NAVIGATE_SELECT; + + return ( + } + > + + {isSourceStep && ( + + )} + + {isAgentStep && ( + + )} + + {isInsightsStep && ( + + )} + + {isSessionsStep && ( + + )} + + {isLookbackStep && ( + { + const days = parseInt(value, 10); + wizard.setLookbackDays(isNaN(days) || days <= 0 ? 7 : days); + }} + onCancel={() => wizard.goBack()} + /> + )} + + {isConfigArnStep && configArnItems.length > 0 && ( + + )} + + {isConfigArnStep && configArnItems.length === 0 && ( + wizard.setOnlineEvalConfigArn(value)} + onCancel={() => wizard.goBack()} + /> + )} + + {isNameStep && ( + wizard.setName(value)} + onCancel={() => wizard.goBack()} + /> + )} + + {isConfirmStep && ( + AVAILABLE_INSIGHTS.find(i => i.id === id)?.title ?? id) + .join(', '), + }, + { label: 'Sessions', value: `Last ${wizard.config.lookbackDays} days` }, + { label: 'Name', value: wizard.config.name || '(auto-generated)' }, + ] + : [ + { label: 'Source', value: 'Online eval config' }, + { label: 'Config', value: wizard.config.onlineEvalConfigArn.split('/').pop() ?? '' }, + { label: 'Name', value: wizard.config.name || '(auto-generated)' }, + ] + } + /> + )} + + + ); +} diff --git a/src/cli/tui/screens/run-insights/index.ts b/src/cli/tui/screens/run-insights/index.ts new file mode 100644 index 000000000..0a569c50e --- /dev/null +++ b/src/cli/tui/screens/run-insights/index.ts @@ -0,0 +1,2 @@ +export { RunInsightsFlow } from './RunInsightsFlow'; +export { RunInsightsScreen } from './RunInsightsScreen'; diff --git a/src/cli/tui/screens/run-insights/types.ts b/src/cli/tui/screens/run-insights/types.ts new file mode 100644 index 000000000..01dc0fd1b --- /dev/null +++ b/src/cli/tui/screens/run-insights/types.ts @@ -0,0 +1,81 @@ +export type RunInsightsSource = 'agent' | 'online-eval-config'; + +export type RunInsightsSessionMode = 'lookback' | 'specific'; + +export type RunInsightsStep = + | 'source' + | 'agent' + | 'insights' + | 'sessions' + | 'lookbackDays' + | 'configArn' + | 'name' + | 'confirm'; + +export interface RunInsightsConfig { + source: RunInsightsSource; + agent: string; + insights: string[]; + sessionMode: RunInsightsSessionMode; + lookbackDays: number; + sessionIds: string[]; + onlineEvalConfigArn: string; + name: string; +} + +export const RUN_INSIGHTS_STEP_LABELS: Record = { + source: 'Source', + agent: 'Agent', + insights: 'Insights', + sessions: 'Sessions', + lookbackDays: 'Lookback', + configArn: 'Config', + name: 'Name', + confirm: 'Confirm', +}; + +export const DEFAULT_LOOKBACK_DAYS = 7; + +export const AVAILABLE_INSIGHTS = [ + { + id: 'Builtin.Insight.FailureAnalysis', + title: 'Failure Analysis', + description: 'Detect failure patterns and generate root causes', + }, + { + id: 'Builtin.Insight.UserIntent', + title: 'User Intent', + description: 'Classify and cluster user intents from session transcripts', + }, + { + id: 'Builtin.Insight.ExecutionSummary', + title: 'Execution Summary', + description: 'Summarize execution patterns and tool usage across sessions', + }, +]; + +export const SOURCE_OPTIONS = [ + { + id: 'agent' as const, + title: 'Agent (CloudWatch)', + description: "Pull sessions from a deployed agent's log group", + }, + { + id: 'online-eval-config' as const, + title: 'Online eval config', + description: 'Use sessions from an existing online eval config', + }, +]; + +export const SESSION_MODE_OPTIONS = [ + { + id: 'lookback' as const, + title: 'Lookback window', + description: 'Use all sessions within N days', + }, + { + id: 'specific' as const, + title: 'Specific sessions', + description: 'Pick individual session IDs', + }, +]; diff --git a/src/cli/tui/screens/run-insights/useRunInsightsWizard.ts b/src/cli/tui/screens/run-insights/useRunInsightsWizard.ts new file mode 100644 index 000000000..df2a16bc4 --- /dev/null +++ b/src/cli/tui/screens/run-insights/useRunInsightsWizard.ts @@ -0,0 +1,151 @@ +import type { RunInsightsConfig, RunInsightsSessionMode, RunInsightsSource, RunInsightsStep } from './types'; +import { DEFAULT_LOOKBACK_DAYS } from './types'; +import { useCallback, useMemo, useState } from 'react'; + +function getStepsForSource(source: RunInsightsSource, agentCount: number): RunInsightsStep[] { + if (source === 'online-eval-config') { + return ['source', 'configArn', 'name', 'confirm']; + } + if (agentCount <= 1) { + return ['source', 'insights', 'sessions', 'lookbackDays', 'name', 'confirm']; + } + return ['source', 'agent', 'insights', 'sessions', 'lookbackDays', 'name', 'confirm']; +} + +function getDefaultConfig(): RunInsightsConfig { + return { + source: 'agent', + agent: '', + insights: [], + sessionMode: 'lookback', + lookbackDays: DEFAULT_LOOKBACK_DAYS, + sessionIds: [], + onlineEvalConfigArn: '', + name: '', + }; +} + +export function useRunInsightsWizard(agentCount: number) { + const [config, setConfig] = useState(getDefaultConfig); + const [step, setStep] = useState('source'); + + const allSteps = useMemo(() => getStepsForSource(config.source, agentCount), [config.source, agentCount]); + const currentIndex = allSteps.indexOf(step); + + const nextStep = useCallback( + (currentStep: RunInsightsStep): RunInsightsStep | undefined => { + const steps = allSteps; + const idx = steps.indexOf(currentStep); + if (idx + 1 < steps.length) { + return steps[idx + 1]!; + } + return undefined; + }, + [allSteps] + ); + + const goBack = useCallback(() => { + if (currentIndex > 0) { + setStep(allSteps[currentIndex - 1]!); + } + }, [allSteps, currentIndex]); + + const setSource = useCallback( + (source: RunInsightsSource) => { + setConfig(c => ({ ...c, source })); + const steps = getStepsForSource(source, agentCount); + setStep(steps[1]!); + }, + [agentCount] + ); + + const setAgent = useCallback( + (agent: string) => { + setConfig(c => ({ ...c, agent })); + const next = nextStep('agent'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setInsights = useCallback( + (insights: string[]) => { + setConfig(c => ({ ...c, insights })); + const next = nextStep('insights'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setSessionMode = useCallback( + (sessionMode: RunInsightsSessionMode) => { + setConfig(c => ({ ...c, sessionMode })); + if (sessionMode === 'lookback') { + const next = nextStep('sessions'); + if (next) setStep(next); + } else { + const next = nextStep('sessions'); + if (next) setStep(next); + } + }, + [nextStep] + ); + + const setLookbackDays = useCallback( + (lookbackDays: number) => { + setConfig(c => ({ ...c, lookbackDays })); + const next = nextStep('lookbackDays'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setSessionIds = useCallback( + (sessionIds: string[]) => { + setConfig(c => ({ ...c, sessionIds })); + const next = nextStep('lookbackDays'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setOnlineEvalConfigArn = useCallback( + (onlineEvalConfigArn: string) => { + setConfig(c => ({ ...c, onlineEvalConfigArn })); + const next = nextStep('configArn'); + if (next) setStep(next); + }, + [nextStep] + ); + + const setName = useCallback( + (name: string) => { + setConfig(c => ({ ...c, name })); + const next = nextStep('name'); + if (next) setStep(next); + }, + [nextStep] + ); + + const reset = useCallback(() => { + setConfig(getDefaultConfig()); + setStep('source'); + }, []); + + return { + config, + step, + steps: allSteps, + currentIndex, + goBack, + setSource, + setAgent, + setInsights, + setSessionMode, + setLookbackDays, + setSessionIds, + setOnlineEvalConfigArn, + setName, + reset, + }; +} diff --git a/src/cli/tui/screens/view/ViewTypePickerScreen.tsx b/src/cli/tui/screens/view/ViewTypePickerScreen.tsx new file mode 100644 index 000000000..8733640c4 --- /dev/null +++ b/src/cli/tui/screens/view/ViewTypePickerScreen.tsx @@ -0,0 +1,40 @@ +import { Screen, WizardSelect } from '../../components'; +import type { SelectableItem } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import React, { useMemo } from 'react'; + +type ViewType = 'recommendation' | 'batch-evaluation' | 'ab-test'; + +interface ViewTypePickerScreenProps { + onSelect: (type: ViewType) => void; + onExit: () => void; +} + +export function ViewTypePickerScreen({ onSelect, onExit }: ViewTypePickerScreenProps) { + const items: SelectableItem[] = useMemo( + () => [ + { id: 'recommendation', title: 'Recommendations', description: 'View recommendation job history and results' }, + { + id: 'batch-evaluation', + title: 'Batch Evaluations', + description: 'View batch evaluation job history and results', + }, + { id: 'ab-test', title: 'A/B Tests', description: 'View A/B test job history and results' }, + ], + [] + ); + + const nav = useListNavigation({ + items, + onSelect: item => onSelect(item.id as ViewType), + onExit, + isActive: true, + }); + + return ( + + + + ); +} diff --git a/src/cli/tui/screens/view/index.ts b/src/cli/tui/screens/view/index.ts new file mode 100644 index 000000000..640feac03 --- /dev/null +++ b/src/cli/tui/screens/view/index.ts @@ -0,0 +1 @@ +export { ViewTypePickerScreen } from './ViewTypePickerScreen'; diff --git a/src/cli/tui/screens/web-search/AddWebSearchFlow.tsx b/src/cli/tui/screens/web-search/AddWebSearchFlow.tsx new file mode 100644 index 000000000..09b27c3b7 --- /dev/null +++ b/src/cli/tui/screens/web-search/AddWebSearchFlow.tsx @@ -0,0 +1,88 @@ +import { gatewayTargetPrimitive } from '../../../primitives/registry'; +import { ErrorPrompt } from '../../components'; +import { useExistingGateways, useExistingToolNames } from '../../hooks/useCreateMcp'; +import { AddSuccessScreen } from '../add/AddSuccessScreen'; +import { AddWebSearchScreen } from './AddWebSearchScreen'; +import type { AddWebSearchConfig } from './types'; +import React, { useCallback, useEffect, useState } from 'react'; + +type FlowState = + | { name: 'create-wizard' } + | { name: 'create-success'; toolName: string; gateway: string; excludeDomains?: string[] } + | { name: 'error'; message: string }; + +interface AddWebSearchFlowProps { + isInteractive?: boolean; + onExit: () => void; + onBack: () => void; + onDev?: () => void; + onDeploy?: () => void; +} + +export function AddWebSearchFlow({ isInteractive = true, onExit, onBack, onDev, onDeploy }: AddWebSearchFlowProps) { + const [flow, setFlow] = useState({ name: 'create-wizard' }); + const { gateways: existingGateways } = useExistingGateways(); + const { toolNames: existingToolNames } = useExistingToolNames(); + + // In non-interactive mode, exit after success. + useEffect(() => { + if (!isInteractive && flow.name === 'create-success') { + onExit(); + } + }, [isInteractive, flow.name, onExit]); + + const handleComplete = useCallback((config: AddWebSearchConfig) => { + void gatewayTargetPrimitive + .createWebSearchGatewayTarget({ + targetType: 'webSearch', + name: config.name, + gateway: config.gateway, + ...(config.excludeDomains && config.excludeDomains.length > 0 ? { excludeDomains: config.excludeDomains } : {}), + }) + .then((result: { toolName: string }) => { + setFlow({ + name: 'create-success', + toolName: result.toolName, + gateway: config.gateway, + excludeDomains: config.excludeDomains, + }); + }) + .catch((err: unknown) => { + setFlow({ name: 'error', message: err instanceof Error ? err.message : 'Unknown error' }); + }); + }, []); + + if (flow.name === 'create-wizard') { + return ( + + ); + } + if (flow.name === 'create-success') { + const excludeSuffix = + flow.excludeDomains && flow.excludeDomains.length > 0 + ? ` Excluded domains: ${flow.excludeDomains.join(', ')}.` + : ''; + return ( + + ); + } + if (flow.name === 'error') { + return ( + + ); + } + return null; +} diff --git a/src/cli/tui/screens/web-search/AddWebSearchScreen.tsx b/src/cli/tui/screens/web-search/AddWebSearchScreen.tsx new file mode 100644 index 000000000..1dcd2f2e2 --- /dev/null +++ b/src/cli/tui/screens/web-search/AddWebSearchScreen.tsx @@ -0,0 +1,158 @@ +import { ToolNameSchema } from '../../../../schema'; +import { ConfirmReview, Panel, Screen, StepIndicator, TextInput, WizardSelect } from '../../components'; +import type { SelectableItem } from '../../components'; +import { HELP_TEXT } from '../../constants'; +import { useListNavigation } from '../../hooks'; +import { generateUniqueName } from '../../utils'; +import type { AddWebSearchConfig } from './types'; +import { Box, Text } from 'ink'; +import React, { useMemo, useState } from 'react'; + +type Step = 'name' | 'gateway' | 'exclude-domains' | 'confirm'; + +const STEP_LABELS: Record = { + name: 'Name', + gateway: 'Gateway', + 'exclude-domains': 'Exclude domains', + confirm: 'Confirm', +}; + +const STEPS: Step[] = ['name', 'gateway', 'exclude-domains', 'confirm']; + +interface AddWebSearchScreenProps { + onComplete: (config: AddWebSearchConfig) => void; + onExit: () => void; + existingGatewayNames: string[]; + existingToolNames: string[]; +} + +export function AddWebSearchScreen({ + onComplete, + onExit, + existingGatewayNames, + existingToolNames, +}: AddWebSearchScreenProps) { + const [step, setStep] = useState('name'); + const [name, setName] = useState(''); + const [gateway, setGateway] = useState(undefined); + const [excludeDomains, setExcludeDomains] = useState(undefined); + + const isNameStep = step === 'name'; + const isGatewayStep = step === 'gateway'; + const isExcludeDomainsStep = step === 'exclude-domains'; + const isConfirmStep = step === 'confirm'; + + const noGatewaysAvailable = isGatewayStep && existingGatewayNames.length === 0; + + const gatewayItems: SelectableItem[] = useMemo( + () => existingGatewayNames.map(g => ({ id: g, title: g })), + [existingGatewayNames] + ); + + const gatewayNav = useListNavigation({ + items: gatewayItems, + isActive: isGatewayStep && !noGatewaysAvailable, + onSelect: (item: SelectableItem) => { + setGateway(item.id); + setStep('exclude-domains'); + }, + onExit: () => setStep('name'), + }); + + useListNavigation({ + items: [{ id: 'confirm', title: 'Confirm' }], + onSelect: () => onComplete({ name, gateway: gateway!, excludeDomains }), + onExit: () => setStep('exclude-domains'), + isActive: isConfirmStep, + }); + + const helpText = isGatewayStep + ? HELP_TEXT.NAVIGATE_SELECT + : isConfirmStep + ? HELP_TEXT.CONFIRM_CANCEL + : HELP_TEXT.TEXT_INPUT; + + const headerContent = ; + + const confirmFields = useMemo( + () => [ + { label: 'Name', value: name }, + { label: 'Gateway', value: gateway ?? '' }, + { + label: 'Exclude domains', + value: excludeDomains && excludeDomains.length > 0 ? excludeDomains.join(', ') : '(none)', + }, + ], + [name, gateway, excludeDomains] + ); + + return ( + + + {isNameStep && ( + { + setName(value); + setStep('gateway'); + }} + onCancel={onExit} + schema={ToolNameSchema} + customValidation={value => !existingToolNames.includes(value) || 'Target name already exists'} + /> + )} + + {isGatewayStep && noGatewaysAvailable && } + + {isGatewayStep && !noGatewaysAvailable && ( + + )} + + {isExcludeDomainsStep && ( + { + const domains = value + .split(',') + .map(d => d.trim()) + .filter(d => d.length > 0); + setExcludeDomains(domains.length > 0 ? domains : undefined); + setStep('confirm'); + }} + onCancel={() => setStep('gateway')} + /> + )} + + {isConfirmStep && } + + + ); +} + +function NoGatewaysMessage() { + return ( + + No gateways found + Run `agentcore add gateway` first, then re-run this command. + + Esc back + + + ); +} diff --git a/src/cli/tui/screens/web-search/index.ts b/src/cli/tui/screens/web-search/index.ts new file mode 100644 index 000000000..94c25c5e4 --- /dev/null +++ b/src/cli/tui/screens/web-search/index.ts @@ -0,0 +1,2 @@ +export { AddWebSearchFlow } from './AddWebSearchFlow'; +export type { AddWebSearchConfig } from './types'; diff --git a/src/cli/tui/screens/web-search/types.ts b/src/cli/tui/screens/web-search/types.ts new file mode 100644 index 000000000..a2dadcb4b --- /dev/null +++ b/src/cli/tui/screens/web-search/types.ts @@ -0,0 +1,10 @@ +/** + * Captured by the AddWebSearchScreen wizard and passed to the Flow, which + * dispatches to gatewayTargetPrimitive.createWebSearchGatewayTarget(). + */ +export interface AddWebSearchConfig { + name: string; + gateway: string; + /** Optional list of domains to exclude from search results. */ + excludeDomains?: string[]; +} diff --git a/src/lib/errors/types.ts b/src/lib/errors/types.ts index db9d4f5e9..4e488ffca 100644 --- a/src/lib/errors/types.ts +++ b/src/lib/errors/types.ts @@ -79,6 +79,15 @@ export class ResourceNotFoundError extends BaseError { } } +/** + * Error indicating a job (recommendation / batch evaluation) was not found locally or on the service. + */ +export class JobNotFoundError extends BaseError { + constructor(message: string, options?: BaseErrorOptions) { + super(message, { defaultSource: 'user', ...options }); + } +} + /** * Error indicating invalid input or configuration values. */ @@ -291,6 +300,18 @@ export class PollExhaustedError extends BaseError { } } +/** + * Thrown when starting or driving a Bedrock Knowledge Base ingestion job + * fails. Default source is 'service' because most ingestion failures are + * AWS-side (validation, throttling, KB not ready). Pre-flight failures from + * the CLI side (KB not deployed, no data sources recorded) override to 'user'. + */ +export class IngestionError extends BaseError { + constructor(message: string, options?: BaseErrorOptions) { + super(message, { defaultSource: 'service', ...options }); + } +} + export class ShellKickedError extends BaseError { constructor(options?: BaseErrorOptions) { super('Shell session was taken over by another client (close code 4000)', { @@ -308,3 +329,9 @@ export class UserCancellationError extends BaseError { super(`User cancelled`, { defaultSource: 'user', ...options }); } } + +export class ExportHarnessError extends BaseError { + constructor(message: string, options?: BaseErrorOptions) { + super(message, { defaultSource: 'client', ...options }); + } +} diff --git a/src/lib/schemas/io/config-io.ts b/src/lib/schemas/io/config-io.ts index 5338b86ba..9fd6bf8c6 100644 --- a/src/lib/schemas/io/config-io.ts +++ b/src/lib/schemas/io/config-io.ts @@ -122,7 +122,6 @@ export class ConfigIO { const cleaned = { ...data }; if (cleaned.configBundles?.length === 0) delete (cleaned as Record).configBundles; if (cleaned.abTests?.length === 0) delete (cleaned as Record).abTests; - if (cleaned.httpGateways?.length === 0) delete (cleaned as Record).httpGateways; await this.validateAndWrite(filePath, 'AgentCore Project Config', AgentCoreProjectSpecSchema, cleaned); } diff --git a/src/schema/__tests__/constants.test.ts b/src/schema/__tests__/constants.test.ts index 2d8bd3bc9..0c52b8f2c 100644 --- a/src/schema/__tests__/constants.test.ts +++ b/src/schema/__tests__/constants.test.ts @@ -1,4 +1,5 @@ import { + LANGUAGE_FRAMEWORK_MATRIX, ModelProviderSchema, NetworkModeSchema, NodeRuntimeSchema, @@ -8,8 +9,10 @@ import { RuntimeVersionSchema, SDKFrameworkSchema, TargetLanguageSchema, + getFrameworksForLanguage, getSupportedFrameworksForProtocol, getSupportedModelProviders, + isFrameworkSupportedForLanguage, isFrameworkSupportedForProtocol, isModelProviderSupported, isReservedProjectName, @@ -175,6 +178,54 @@ describe('getSupportedFrameworksForProtocol', () => { }); }); +describe('LANGUAGE_FRAMEWORK_MATRIX', () => { + it('defines Python and TypeScript', () => { + expect(Object.keys(LANGUAGE_FRAMEWORK_MATRIX)).toEqual(expect.arrayContaining(['Python', 'TypeScript'])); + }); + + it('Python supports the open-source frameworks but not Vercel AI (TypeScript-only)', () => { + expect(LANGUAGE_FRAMEWORK_MATRIX.Python).toEqual( + expect.arrayContaining(['Strands', 'LangChain_LangGraph', 'GoogleADK', 'OpenAIAgents']) + ); + expect(LANGUAGE_FRAMEWORK_MATRIX.Python).not.toContain('VercelAI'); + }); + + it('TypeScript supports only Strands and Vercel AI', () => { + expect([...LANGUAGE_FRAMEWORK_MATRIX.TypeScript].sort()).toEqual(['Strands', 'VercelAI']); + }); +}); + +describe('getFrameworksForLanguage', () => { + it('returns Python frameworks without Vercel AI', () => { + const frameworks = getFrameworksForLanguage('Python'); + expect(frameworks).toContain('Strands'); + expect(frameworks).not.toContain('VercelAI'); + }); + + it('returns TypeScript frameworks including Vercel AI', () => { + const frameworks = getFrameworksForLanguage('TypeScript'); + expect(frameworks).toContain('Strands'); + expect(frameworks).toContain('VercelAI'); + }); +}); + +describe('isFrameworkSupportedForLanguage', () => { + it('returns true for supported combinations', () => { + expect(isFrameworkSupportedForLanguage('Python', 'Strands')).toBe(true); + expect(isFrameworkSupportedForLanguage('TypeScript', 'VercelAI')).toBe(true); + expect(isFrameworkSupportedForLanguage('TypeScript', 'Strands')).toBe(true); + }); + + it('returns false for Python + Vercel AI (the bug being fixed)', () => { + expect(isFrameworkSupportedForLanguage('Python', 'VercelAI')).toBe(false); + }); + + it('returns false for TypeScript + a Python-only framework', () => { + expect(isFrameworkSupportedForLanguage('TypeScript', 'LangChain_LangGraph')).toBe(false); + expect(isFrameworkSupportedForLanguage('TypeScript', 'GoogleADK')).toBe(false); + }); +}); + describe('isFrameworkSupportedForProtocol', () => { it('returns true for Strands + HTTP', () => { expect(isFrameworkSupportedForProtocol('HTTP', 'Strands')).toBe(true); diff --git a/src/schema/constants.ts b/src/schema/constants.ts index ca8732b45..35d09fc61 100644 --- a/src/schema/constants.ts +++ b/src/schema/constants.ts @@ -208,3 +208,31 @@ export function getSupportedFrameworksForProtocol(protocol: ProtocolMode): reado export function isFrameworkSupportedForProtocol(protocol: ProtocolMode, framework: SDKFramework): boolean { return PROTOCOL_FRAMEWORK_MATRIX[protocol].includes(framework); } + +/** + * Matrix defining which SDK frameworks ship templates for each target language. + * Vercel AI is TypeScript-only; the remaining frameworks are Python-only today. + * Used to keep framework pickers and validation in sync with the templates that + * actually exist under `assets//...`. + */ +export const LANGUAGE_FRAMEWORK_MATRIX = { + Python: ['Strands', 'LangChain_LangGraph', 'GoogleADK', 'OpenAIAgents'], + TypeScript: ['Strands', 'VercelAI'], +} as const satisfies Record; + +/** Languages that scaffold from templates (excludes 'Other', which is BYO-only). */ +export type TemplateLanguage = keyof typeof LANGUAGE_FRAMEWORK_MATRIX; + +/** + * Returns the SDK frameworks that have templates for a given target language. + */ +export function getFrameworksForLanguage(language: TemplateLanguage): readonly SDKFramework[] { + return LANGUAGE_FRAMEWORK_MATRIX[language]; +} + +/** + * Checks if a framework has a template for a given target language. + */ +export function isFrameworkSupportedForLanguage(language: TemplateLanguage, framework: SDKFramework): boolean { + return getFrameworksForLanguage(language).includes(framework); +} diff --git a/src/schema/llm-compacted/agentcore.ts b/src/schema/llm-compacted/agentcore.ts index c42cac1a6..d224bc86a 100644 --- a/src/schema/llm-compacted/agentcore.ts +++ b/src/schema/llm-compacted/agentcore.ts @@ -26,8 +26,6 @@ interface AgentCoreProjectSpec { policyEngines: PolicyEngine[]; // Unique by name — Cedar policy engines configBundles: ConfigBundle[]; // Unique by name — configuration bundles for versioned config abTests: ABTest[]; // Unique by name — A/B test experiments - /** @internal Auto-managed by AB test creation. Do not configure directly. */ - httpGateways: HttpGateway[]; // Unique by name — HTTP gateways bound to a runtime datasets: DatasetSpec[]; // Unique by name — datasets for Dataset Management } @@ -57,7 +55,14 @@ interface NetworkConfig { type MemoryStrategyType = 'SEMANTIC' | 'SUMMARIZATION' | 'USER_PREFERENCE' | 'EPISODIC'; type ModelProvider = 'Bedrock' | 'Gemini' | 'OpenAI' | 'Anthropic'; type EvaluationLevel = 'SESSION' | 'TRACE' | 'TOOL_CALL'; -type GatewayTargetType = 'lambda' | 'mcpServer' | 'openApiSchema' | 'smithyModel' | 'apiGateway' | 'lambdaFunctionArn'; +type GatewayTargetType = + | 'lambda' + | 'mcpServer' + | 'openApiSchema' + | 'smithyModel' + | 'apiGateway' + | 'lambdaFunctionArn' + | 'httpRuntime'; type OutboundAuthType = 'OAUTH' | 'API_KEY' | 'NONE'; type GatewayAuthorizerType = 'NONE' | 'AWS_IAM' | 'CUSTOM_JWT'; type GatewayExceptionLevel = 'NONE' | 'DEBUG'; @@ -221,6 +226,7 @@ interface OnlineEvalConfig { interface AgentCoreGateway { name: string; // @regex ^[0-9a-zA-Z](?:[0-9a-zA-Z-]*[0-9a-zA-Z])?$ @max 100 + protocolType?: 'MCP' | 'None'; description?: string; targets: AgentCoreGatewayTarget[]; // Gateway targets authorizerType?: GatewayAuthorizerType; // default 'NONE' @@ -262,16 +268,22 @@ interface GatewayPolicyEngineConfiguration { // GATEWAY TARGET // ───────────────────────────────────────────────────────────────────────────── +interface HttpRuntimeConfig { + runtime: string; // Reference to a runtime name in spec.runtimes + runtimeEndpoint?: string; // Version alias / qualifier +} + interface AgentCoreGatewayTarget { name: string; targetType: GatewayTargetType; toolDefinitions?: ToolDefinition[]; // Required for 'lambda' targets compute?: ToolComputeConfig; // Required for 'lambda' and scaffold targets - endpoint?: string; // URL — required for external 'mcpServer' targets + endpoint?: string; // URL for 'mcpServer' targets outboundAuth?: OutboundAuth; apiGateway?: ApiGatewayConfig; // Required for 'apiGateway' target type schemaSource?: SchemaSource; // Required for 'openApiSchema' / 'smithyModel' targets lambdaFunctionArn?: LambdaFunctionArnConfig; // Required for 'lambdaFunctionArn' target type + httpRuntime?: HttpRuntimeConfig; // Required for 'httpRuntime' targets } interface OutboundAuth { @@ -407,10 +419,6 @@ interface ABTest { evaluationConfig: { onlineEvaluationConfigArn: string; }; - trafficAllocationConfig?: { - routeOnHeader: { headerName: string }; - }; - maxDurationDays?: number; // @min 1 @max 90 enableOnCreate?: boolean; } @@ -424,15 +432,3 @@ interface ABTestVariant { }; }; } - -// ───────────────────────────────────────────────────────────────────────────── -// HTTP GATEWAY -// ───────────────────────────────────────────────────────────────────────────── - -/** @internal HTTP gateway auto-created when setting up an AB test. */ -interface HttpGateway { - name: string; // @regex ^[a-zA-Z][a-zA-Z0-9-]{0,47}$ @max 48 - description?: string; // @max 200 - runtimeRef: string; // Reference to a runtime name from spec.runtimes - roleArn?: string; // IAM role ARN — auto-created if omitted -} diff --git a/src/schema/llm-compacted/mcp.ts b/src/schema/llm-compacted/mcp.ts index 9e1e8d8a2..cfdd94e6b 100644 --- a/src/schema/llm-compacted/mcp.ts +++ b/src/schema/llm-compacted/mcp.ts @@ -41,6 +41,24 @@ interface AgentCoreGatewayTarget { /** Schema source for openApiSchema / smithyModel targets. */ schemaSource?: { inline: { path: string } } | { s3: { uri: string; bucketOwnerAccountId?: string } }; lambdaFunctionArn?: LambdaFunctionArnConfig; + /** Required for `connector` target type. */ + connectorId?: ConnectorId; + /** + * For bedrock-knowledge-bases connector targets — a project KB name or a + * literal 10-char external KB ID. Mutually exclusive with `knowledgeBaseIds`. + */ + knowledgeBaseId?: string; + /** + * For bedrock-agentic-retrieve connector targets — fan-out list of project + * KB names or literal 10-char external KB IDs. Mutually exclusive with + * `knowledgeBaseId`. + */ + knowledgeBaseIds?: string[]; + /** + * For `webSearch` target type only — domains to exclude from search results. + * Maps to the connector's `domainFilter.exclude` parameterValue at synth. + */ + excludeDomains?: string[]; } interface OutboundAuth { @@ -176,7 +194,18 @@ interface IamPolicyDocument { // ENUMS // ───────────────────────────────────────────────────────────────────────────── -type GatewayTargetType = 'lambda' | 'mcpServer' | 'openApiSchema' | 'smithyModel' | 'apiGateway' | 'lambdaFunctionArn'; +type GatewayTargetType = + | 'lambda' + | 'mcpServer' + | 'openApiSchema' + | 'smithyModel' + | 'apiGateway' + | 'lambdaFunctionArn' + | 'httpRuntime' + | 'connector' + | 'passthrough' + | 'webSearch'; +type ConnectorId = 'bedrock-knowledge-bases' | 'bedrock-agentic-retrieve'; type PythonRuntime = 'PYTHON_3_10' | 'PYTHON_3_11' | 'PYTHON_3_12' | 'PYTHON_3_13' | 'PYTHON_3_14'; type NodeRuntime = 'NODE_18' | 'NODE_20' | 'NODE_22'; type NetworkMode = 'PUBLIC' | 'VPC'; diff --git a/src/schema/schemas/__tests__/agentcore-project.test.ts b/src/schema/schemas/__tests__/agentcore-project.test.ts index 5423d16ab..3796f85cf 100644 --- a/src/schema/schemas/__tests__/agentcore-project.test.ts +++ b/src/schema/schemas/__tests__/agentcore-project.test.ts @@ -1,3 +1,4 @@ +import type { DirectoryPath, FilePath } from '../../types/index.js'; import { AgentCoreProjectSpecSchema, CredentialNameSchema, @@ -616,4 +617,225 @@ describe('AgentCoreProjectSpecSchema', () => { }); expect(result.success).toBe(false); }); + + it('httpGateways empty array passes silently', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + httpGateways: [], + }); + expect(result.success).toBe(true); + }); + + it('httpGateways with entries produces migration error', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + httpGateways: [{ name: 'old-gw' }], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('deprecated'))).toBe(true); + } + }); + + it('rejects httpRuntime target on MCP gateway (no protocolType None)', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + agentCoreGateways: [ + { + name: 'mcp-gw', + targets: [ + { + name: 'http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'MyAgent' }, + }, + ], + }, + ], + runtimes: [ + { + name: 'MyAgent', + build: 'CodeZip', + entrypoint: 'main.py' as FilePath, + codeLocation: './src' as DirectoryPath, + runtimeVersion: 'PYTHON_3_12', + protocol: 'HTTP', + }, + ], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('protocolType'))).toBe(true); + } + }); + + it('accepts httpRuntime target on gateway with protocolType None', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + agentCoreGateways: [ + { + name: 'http-gw', + protocolType: 'None', + targets: [ + { + name: 'http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'MyAgent' }, + }, + ], + }, + ], + runtimes: [ + { + name: 'MyAgent', + build: 'CodeZip', + entrypoint: 'main.py' as FilePath, + codeLocation: './src' as DirectoryPath, + runtimeVersion: 'PYTHON_3_12', + protocol: 'HTTP', + }, + ], + }); + expect(result.success).toBe(true); + }); + + it('accepts connector target on gateway with protocolType None (HTTP is a superset of MCP)', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + agentCoreGateways: [ + { + name: 'http-gw', + protocolType: 'None', + targets: [ + { + name: 'kb-target', + targetType: 'connector', + connectorId: 'bedrock-knowledge-bases', + knowledgeBaseId: 'ABCDEFGHIJ', + }, + ], + }, + ], + }); + expect(result.success).toBe(true); + }); + + it('accepts mcpServer target on gateway with protocolType None (HTTP is a superset of MCP)', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + agentCoreGateways: [ + { + name: 'http-gw', + protocolType: 'None', + targets: [ + { + name: 'mytool', + targetType: 'mcpServer', + endpoint: 'https://example.com/mcp', + }, + ], + }, + ], + }); + expect(result.success).toBe(true); + }); + + it('rejects httpRuntime target referencing non-existent runtime', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + agentCoreGateways: [ + { + name: 'http-gw', + protocolType: 'None', + targets: [ + { + name: 'http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'NonExistentAgent' }, + }, + ], + }, + ], + runtimes: [], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('unknown runtime'))).toBe(true); + } + }); + + it('rejects httpRuntime target referencing non-existent runtimeEndpoint', () => { + const result = AgentCoreProjectSpecSchema.safeParse({ + ...minimalProject, + agentCoreGateways: [ + { + name: 'http-gw', + protocolType: 'None', + targets: [ + { + name: 'http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'MyAgent', runtimeEndpoint: 'NONEXISTENT' }, + }, + ], + }, + ], + runtimes: [ + { + name: 'MyAgent', + build: 'CodeZip', + entrypoint: 'main.py' as FilePath, + codeLocation: './src' as DirectoryPath, + runtimeVersion: 'PYTHON_3_12', + protocol: 'HTTP', + endpoints: { + LIVE: { version: 1, description: 'Live endpoint' }, + }, + }, + ], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('does not exist on runtime'))).toBe(true); + } + }); +}); + +describe('AgentCoreProjectSpec — knowledgeBases', () => { + it('defaults knowledgeBases to []', () => { + const result = AgentCoreProjectSpecSchema.parse({ + name: 'TestProj', + version: 1, + }); + expect(result.knowledgeBases).toEqual([]); + }); + + it('accepts a populated knowledgeBases array', () => { + const result = AgentCoreProjectSpecSchema.parse({ + name: 'TestProj', + version: 1, + knowledgeBases: [ + { + name: 'product-docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/docs/' }], + }, + ], + }); + expect(result.knowledgeBases).toHaveLength(1); + expect(result.knowledgeBases[0]?.name).toBe('product-docs'); + expect(result.knowledgeBases[0]?.type).toBe('AgentCoreKnowledgeBase'); + }); + + it('rejects duplicate knowledge base names', () => { + expect(() => + AgentCoreProjectSpecSchema.parse({ + name: 'TestProj', + version: 1, + knowledgeBases: [ + { name: 'docs', dataSources: [{ type: 'S3', uri: 's3://my-bucket/a/' }] }, + { name: 'docs', dataSources: [{ type: 'S3', uri: 's3://my-bucket/b/' }] }, + ], + }) + ).toThrow(/Duplicate knowledge base name/); + }); }); diff --git a/src/schema/schemas/__tests__/deployed-state.test.ts b/src/schema/schemas/__tests__/deployed-state.test.ts index 4387dc63e..97da30a4a 100644 --- a/src/schema/schemas/__tests__/deployed-state.test.ts +++ b/src/schema/schemas/__tests__/deployed-state.test.ts @@ -6,6 +6,7 @@ import { DeployedStateSchema, GatewayDeployedStateSchema, HarnessDeployedStateSchema, + KnowledgeBaseDeployedStateSchema, McpDeployedStateSchema, McpLambdaDeployedStateSchema, McpRuntimeDeployedStateSchema, @@ -109,6 +110,40 @@ describe('MemoryDeployedStateSchema', () => { }); }); +describe('KnowledgeBaseDeployedStateSchema', () => { + it('accepts valid KB state with no sourcesHash', () => { + expect( + KnowledgeBaseDeployedStateSchema.safeParse({ + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:aws:bedrock:us-east-1:123:knowledge-base/KB1', + dataSources: [], + }).success + ).toBe(true); + }); + + it('accepts valid KB state with sourcesHash', () => { + expect( + KnowledgeBaseDeployedStateSchema.safeParse({ + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:aws:bedrock:us-east-1:123:knowledge-base/KB1', + dataSources: [{ dataSourceId: 'DS1', uri: 's3://b/d/' }], + sourcesHash: 'a'.repeat(64), + }).success + ).toBe(true); + }); + + it('rejects empty sourcesHash', () => { + expect( + KnowledgeBaseDeployedStateSchema.safeParse({ + knowledgeBaseId: 'KB1', + knowledgeBaseArn: 'arn:aws:bedrock:us-east-1:123:knowledge-base/KB1', + dataSources: [], + sourcesHash: '', + }).success + ).toBe(false); + }); +}); + describe('GatewayDeployedStateSchema', () => { it('accepts valid gateway state', () => { expect( @@ -334,6 +369,40 @@ describe('HarnessDeployedStateSchema', () => { }); expect(result.success).toBe(true); }); + + it('accepts a harness deployed-state with harnessVersion', () => { + const result = HarnessDeployedStateSchema.safeParse({ + harnessId: 'abc123', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:123:harness/abc123', + roleArn: 'arn:aws:iam::123456789012:role/HarnessRole', + status: 'READY', + harnessVersion: 3, + }); + expect(result.success).toBe(true); + if (result.success) expect(result.data.harnessVersion).toBe(3); + }); + + it('accepts a harness deployed-state without harnessVersion (backwards compatible)', () => { + const result = HarnessDeployedStateSchema.safeParse({ + harnessId: 'abc123', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:123:harness/abc123', + roleArn: 'arn:aws:iam::123456789012:role/HarnessRole', + status: 'READY', + }); + expect(result.success).toBe(true); + if (result.success) expect(result.data.harnessVersion).toBeUndefined(); + }); + + it('rejects a harnessVersion of 0 (must be >= 1)', () => { + const result = HarnessDeployedStateSchema.safeParse({ + harnessId: 'abc123', + harnessArn: 'arn:aws:bedrock-agentcore:us-west-2:123:harness/abc123', + roleArn: 'arn:aws:iam::123456789012:role/HarnessRole', + status: 'READY', + harnessVersion: 0, + }); + expect(result.success).toBe(false); + }); }); describe('createValidatedDeployedStateSchema', () => { diff --git a/src/schema/schemas/__tests__/mcp-defs.test.ts b/src/schema/schemas/__tests__/mcp-defs.test.ts index f411ca99a..8c04de27d 100644 --- a/src/schema/schemas/__tests__/mcp-defs.test.ts +++ b/src/schema/schemas/__tests__/mcp-defs.test.ts @@ -9,7 +9,6 @@ import { describe, expect, it } from 'vitest'; describe('ToolNameSchema', () => { it('accepts valid names', () => { expect(ToolNameSchema.safeParse('myTool').success).toBe(true); - expect(ToolNameSchema.safeParse('get_user').success).toBe(true); expect(ToolNameSchema.safeParse('search-results').success).toBe(true); }); @@ -20,6 +19,10 @@ describe('ToolNameSchema', () => { expect(ToolNameSchema.safeParse('my.tool').success).toBe(false); }); + it('rejects underscores', () => { + expect(ToolNameSchema.safeParse('get_user').success).toBe(false); + }); + it('enforces 128-char boundary', () => { expect(ToolNameSchema.safeParse('a'.repeat(128)).success).toBe(true); expect(ToolNameSchema.safeParse('a'.repeat(129)).success).toBe(false); diff --git a/src/schema/schemas/__tests__/mcp.test.ts b/src/schema/schemas/__tests__/mcp.test.ts index 0ab33c2a4..392e3bdca 100644 --- a/src/schema/schemas/__tests__/mcp.test.ts +++ b/src/schema/schemas/__tests__/mcp.test.ts @@ -11,6 +11,7 @@ import { ApiGatewayConfigSchema, GatewayExceptionLevelSchema, GatewayTargetTypeSchema, + HttpRuntimeConfigSchema, LambdaFunctionArnConfigSchema, McpImplLanguageSchema, RuntimeConfigSchema, @@ -29,6 +30,10 @@ describe('GatewayTargetTypeSchema', () => { 'smithyModel', 'apiGateway', 'lambdaFunctionArn', + 'httpRuntime', + 'connector', + 'passthrough', + 'webSearch', ]); }); @@ -1055,3 +1060,303 @@ describe('CustomClaimValidationSchema', () => { expect(result.success).toBe(false); }); }); + +describe('HttpRuntimeConfigSchema', () => { + it('accepts config with runtime only', () => { + const result = HttpRuntimeConfigSchema.safeParse({ runtime: 'my-agent' }); + expect(result.success).toBe(true); + }); + + it('accepts config with runtime and runtimeEndpoint', () => { + const result = HttpRuntimeConfigSchema.safeParse({ runtime: 'my-agent', runtimeEndpoint: 'LIVE' }); + expect(result.success).toBe(true); + }); + + it('rejects when runtime is missing', () => { + const result = HttpRuntimeConfigSchema.safeParse({}); + expect(result.success).toBe(false); + }); + + it('rejects extra fields (strict)', () => { + const result = HttpRuntimeConfigSchema.safeParse({ runtime: 'my-agent', extra: 'not-allowed' }); + expect(result.success).toBe(false); + }); +}); + +describe('AgentCoreGatewayTargetSchema with httpRuntime', () => { + it('accepts valid httpRuntime target with httpRuntime object', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + }); + expect(result.success).toBe(true); + }); + + it('accepts httpRuntime target with runtimeEndpoint', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent', runtimeEndpoint: 'LIVE' }, + }); + expect(result.success).toBe(true); + }); + + it('rejects httpRuntime target without httpRuntime object', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('httpRuntime'))).toBe(true); + } + }); + + it('rejects httpRuntime target with endpoint set (should use httpRuntime.runtimeEndpoint)', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + endpoint: 'https://example.com/runtime', + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('httpRuntime.runtimeEndpoint'))).toBe(true); + } + }); + + it('rejects httpRuntime target with compute', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + compute: { + host: 'Lambda', + implementation: { language: 'Python', path: 'tools', handler: 'h' }, + pythonVersion: 'PYTHON_3_12', + }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('compute'))).toBe(true); + } + }); + + it('rejects httpRuntime target with apiGateway config', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + apiGateway: { + restApiId: 'abc123', + stage: 'prod', + apiGatewayToolConfiguration: { toolFilters: [{ filterPath: '/*', methods: ['GET'] }] }, + }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('apiGateway'))).toBe(true); + } + }); + + it('rejects httpRuntime target with lambdaFunctionArn config', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + lambdaFunctionArn: { + lambdaArn: 'arn:aws:lambda:us-east-1:123456789012:function:my-func', + toolSchemaFile: './tools.json', + }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('lambdaFunctionArn'))).toBe(true); + } + }); + + it('rejects httpRuntime target with toolDefinitions', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + toolDefinitions: [{ name: 'myTool', description: 'A tool', inputSchema: { type: 'object' as const } }], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('toolDefinitions'))).toBe(true); + } + }); + + it('accepts httpRuntime target with OAUTH outbound auth', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + outboundAuth: { type: 'OAUTH', credentialName: 'my-cred' }, + }); + expect(result.success).toBe(true); + }); + + it('rejects httpRuntime target with API_KEY outbound auth (not supported)', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'my-http-target', + targetType: 'httpRuntime', + httpRuntime: { runtime: 'my-agent' }, + outboundAuth: { type: 'API_KEY', credentialName: 'my-cred' }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('API_KEY'))).toBe(true); + } + }); +}); + +describe('AgentCoreGatewayTargetSchema with webSearch', () => { + it('accepts a minimal webSearch target', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + }); + expect(result.success).toBe(true); + }); + + it('accepts a webSearch target with excludeDomains', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + excludeDomains: ['internal.example.com', 'staging.example.com'], + }); + expect(result.success).toBe(true); + }); + + it('rejects an empty excludeDomains array', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + excludeDomains: [], + }); + expect(result.success).toBe(false); + }); + + it('rejects a webSearch target with compute', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + compute: { + host: 'Lambda', + implementation: { language: 'Python', path: 'tools', handler: 'h' }, + pythonVersion: 'PYTHON_3_12', + }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('compute'))).toBe(true); + } + }); + + it('rejects a webSearch target with endpoint', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + endpoint: 'https://example.com/mcp', + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('endpoint'))).toBe(true); + } + }); + + it('rejects a webSearch target with apiGateway config', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + apiGateway: { + restApiId: 'abc123', + stage: 'prod', + apiGatewayToolConfiguration: { toolFilters: [{ filterPath: '/*', methods: ['GET'] }] }, + }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('apiGateway'))).toBe(true); + } + }); + + it('rejects a webSearch target with lambdaFunctionArn config', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + lambdaFunctionArn: { + lambdaArn: 'arn:aws:lambda:us-east-1:123456789012:function:my-func', + toolSchemaFile: './tools.json', + }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('lambdaFunctionArn'))).toBe(true); + } + }); + + it('rejects a webSearch target with schemaSource', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + schemaSource: { inline: { path: './schema.json' } }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('schemaSource'))).toBe(true); + } + }); + + it('rejects a webSearch target with connectorId', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + connectorId: 'bedrock-knowledge-bases', + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.path.includes('connectorId'))).toBe(true); + } + }); + + it('rejects a webSearch target with httpRuntime', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + httpRuntime: { runtime: 'my-agent' }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.path.includes('httpRuntime'))).toBe(true); + } + }); + + it('rejects a webSearch target with outboundAuth', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'web-search', + targetType: 'webSearch', + outboundAuth: { type: 'OAUTH', credentialName: 'my-cred' }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.path.includes('outboundAuth'))).toBe(true); + } + }); + + it('rejects excludeDomains on a non-webSearch target', () => { + const result = AgentCoreGatewayTargetSchema.safeParse({ + name: 'mcp', + targetType: 'mcpServer', + endpoint: 'https://example.com/mcp', + excludeDomains: ['internal.example.com'], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.path.includes('excludeDomains'))).toBe(true); + } + }); +}); diff --git a/src/schema/schemas/__tests__/online-eval-config.test.ts b/src/schema/schemas/__tests__/online-eval-config.test.ts new file mode 100644 index 000000000..015c8c4df --- /dev/null +++ b/src/schema/schemas/__tests__/online-eval-config.test.ts @@ -0,0 +1,69 @@ +import { OnlineEvalConfigSchema } from '../primitives/online-eval-config'; +import { describe, expect, it } from 'vitest'; + +describe('OnlineEvalConfigSchema - evaluators and insights', () => { + const baseConfig = { + name: 'TestConfig', + agent: 'MyAgent', + samplingRate: 10, + }; + + it('accepts config with evaluators only', () => { + const config = { ...baseConfig, evaluators: ['Builtin.GoalSuccessRate'] }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('accepts config with insights only', () => { + const config = { ...baseConfig, insights: ['FailureAnalyzer'] }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('rejects config with neither evaluators nor insights', () => { + const result = OnlineEvalConfigSchema.safeParse(baseConfig); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('At least one of evaluators or insights'))).toBe(true); + } + }); + + it('rejects config with both evaluators and insights (preview)', () => { + const config = { ...baseConfig, evaluators: ['Builtin.GoalSuccessRate'], insights: ['FailureAnalyzer'] }; + const result = OnlineEvalConfigSchema.safeParse(config); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('Cannot have both evaluators and insights'))).toBe(true); + } + }); + + it('accepts clusteringConfig with valid frequencies', () => { + const config = { + ...baseConfig, + insights: ['FailureAnalyzer'], + clusteringConfig: { frequencies: ['DAILY', 'WEEKLY'] }, + }; + expect(OnlineEvalConfigSchema.safeParse(config).success).toBe(true); + }); + + it('rejects clusteringConfig with more than 3 frequencies', () => { + const config = { + ...baseConfig, + insights: ['FailureAnalyzer'], + clusteringConfig: { frequencies: ['DAILY', 'WEEKLY', 'MONTHLY', 'DAILY'] }, + }; + const result = OnlineEvalConfigSchema.safeParse(config); + expect(result.success).toBe(false); + }); + + it('rejects clusteringConfig without insights', () => { + const config = { + ...baseConfig, + evaluators: ['Builtin.GoalSuccessRate'], + clusteringConfig: { frequencies: ['DAILY'] }, + }; + const result = OnlineEvalConfigSchema.safeParse(config); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('clusteringConfig requires insights'))).toBe(true); + } + }); +}); diff --git a/src/schema/schemas/__tests__/private-endpoint.test.ts b/src/schema/schemas/__tests__/private-endpoint.test.ts new file mode 100644 index 000000000..51f81c9fc --- /dev/null +++ b/src/schema/schemas/__tests__/private-endpoint.test.ts @@ -0,0 +1,232 @@ +import { + CustomJwtAuthorizerConfigSchema, + ManagedVpcResourceSchema, + PrivateEndpointOverrideSchema, + PrivateEndpointSchema, + SelfManagedLatticeResourceSchema, +} from '../auth'; +import { describe, expect, it } from 'vitest'; + +const RCFG = 'rcfg-0123456789abcdefg'; +const RCFG_ARN = 'arn:aws:vpc-lattice:us-west-2:123456789012:resourceconfiguration/rcfg-0123456789abcdefg'; + +describe('SelfManagedLatticeResourceSchema', () => { + it('accepts an rcfg id', () => { + expect(SelfManagedLatticeResourceSchema.safeParse({ resourceConfigurationIdentifier: RCFG }).success).toBe(true); + }); + it('accepts a full VPC Lattice ARN', () => { + expect(SelfManagedLatticeResourceSchema.safeParse({ resourceConfigurationIdentifier: RCFG_ARN }).success).toBe( + true + ); + }); + it('rejects a malformed identifier', () => { + expect(SelfManagedLatticeResourceSchema.safeParse({ resourceConfigurationIdentifier: 'nope' }).success).toBe(false); + }); +}); + +describe('ManagedVpcResourceSchema', () => { + const valid = { + vpcIdentifier: 'vpc-0123456789abcdef0', + subnetIds: ['subnet-0123456789abcdef0'], + endpointIpAddressType: 'IPV4' as const, + }; + + it('accepts required-only', () => { + expect(ManagedVpcResourceSchema.safeParse(valid).success).toBe(true); + }); + it('accepts optional securityGroupIds/tags/routingDomain', () => { + expect( + ManagedVpcResourceSchema.safeParse({ + ...valid, + securityGroupIds: ['sg-0123456789abcdef0', 'sg-0fedcba9876543210'], + tags: { team: 'agentcore' }, + routingDomain: 'example.internal', + }).success + ).toBe(true); + }); + it('rejects missing vpcIdentifier', () => { + const { vpcIdentifier, ...rest } = valid; + void vpcIdentifier; + expect(ManagedVpcResourceSchema.safeParse(rest).success).toBe(false); + }); + it('rejects an empty subnetIds array', () => { + expect(ManagedVpcResourceSchema.safeParse({ ...valid, subnetIds: [] }).success).toBe(false); + }); + it('rejects a bad subnet id', () => { + expect(ManagedVpcResourceSchema.safeParse({ ...valid, subnetIds: ['nope'] }).success).toBe(false); + }); + it('rejects an invalid endpointIpAddressType', () => { + expect(ManagedVpcResourceSchema.safeParse({ ...valid, endpointIpAddressType: 'ipv4' }).success).toBe(false); + }); + it('rejects more than 5 securityGroupIds', () => { + const sgs = Array.from({ length: 6 }, (_, i) => `sg-0123456789abcde${i}0`); + expect(ManagedVpcResourceSchema.safeParse({ ...valid, securityGroupIds: sgs }).success).toBe(false); + }); + it('rejects a bad security group id', () => { + expect(ManagedVpcResourceSchema.safeParse({ ...valid, securityGroupIds: ['nope'] }).success).toBe(false); + }); +}); + +describe('PrivateEndpointSchema (exactly-one-of)', () => { + it('accepts the lattice arm alone', () => { + expect( + PrivateEndpointSchema.safeParse({ selfManagedLatticeResource: { resourceConfigurationIdentifier: RCFG } }).success + ).toBe(true); + }); + it('accepts the managed-vpc arm alone', () => { + expect( + PrivateEndpointSchema.safeParse({ + managedVpcResource: { + vpcIdentifier: 'vpc-0123456789abcdef0', + subnetIds: ['subnet-0123456789abcdef0'], + endpointIpAddressType: 'IPV4', + }, + }).success + ).toBe(true); + }); + it('rejects BOTH arms present', () => { + const result = PrivateEndpointSchema.safeParse({ + selfManagedLatticeResource: { resourceConfigurationIdentifier: RCFG }, + managedVpcResource: { + vpcIdentifier: 'vpc-0123456789abcdef0', + subnetIds: ['subnet-0123456789abcdef0'], + endpointIpAddressType: 'IPV4', + }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('exactly one'))).toBe(true); + } + }); + it('rejects NEITHER arm present (empty object)', () => { + expect(PrivateEndpointSchema.safeParse({}).success).toBe(false); + }); +}); + +describe('PrivateEndpointOverrideSchema', () => { + it('accepts a domain + nested private endpoint', () => { + expect( + PrivateEndpointOverrideSchema.safeParse({ + domain: 'api.example.com', + privateEndpoint: { selfManagedLatticeResource: { resourceConfigurationIdentifier: RCFG } }, + }).success + ).toBe(true); + }); + it('rejects a missing domain', () => { + expect( + PrivateEndpointOverrideSchema.safeParse({ + privateEndpoint: { selfManagedLatticeResource: { resourceConfigurationIdentifier: RCFG } }, + }).success + ).toBe(false); + }); +}); + +describe('CustomJwtAuthorizerConfigSchema with PrivateLink fields', () => { + const base = { + discoveryUrl: 'https://idp.example.com/.well-known/openid-configuration', + allowedAudience: ['aud-1'], + }; + + it('accepts a privateEndpoint', () => { + expect( + CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpoint: { selfManagedLatticeResource: { resourceConfigurationIdentifier: RCFG } }, + }).success + ).toBe(true); + }); + const latticeEndpoint = { selfManagedLatticeResource: { resourceConfigurationIdentifier: RCFG } }; + const vpcEndpoint = { + managedVpcResource: { + vpcIdentifier: 'vpc-0123456789abcdef0', + subnetIds: ['subnet-0123456789abcdef0'], + endpointIpAddressType: 'IPV4' as const, + }, + }; + + it('accepts up to 5 privateEndpointOverrides (with a base privateEndpoint)', () => { + const overrides = Array.from({ length: 5 }, (_, i) => ({ + domain: `d${i}.example.com`, + privateEndpoint: latticeEndpoint, + })); + expect( + CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpoint: latticeEndpoint, + privateEndpointOverrides: overrides, + }).success + ).toBe(true); + }); + it('rejects more than 5 privateEndpointOverrides', () => { + const overrides = Array.from({ length: 6 }, (_, i) => ({ + domain: `d${i}.example.com`, + privateEndpoint: latticeEndpoint, + })); + expect( + CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpoint: latticeEndpoint, + privateEndpointOverrides: overrides, + }).success + ).toBe(false); + }); + it('still accepts a config with no PrivateLink fields (backwards compat)', () => { + expect(CustomJwtAuthorizerConfigSchema.safeParse(base).success).toBe(true); + }); + + // ── PrivateEndpointOverrides coupling rules (mirror the AgentCore Identity service) ── + it('rejects privateEndpointOverrides without a base privateEndpoint', () => { + const result = CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpointOverrides: [{ domain: 'd.example.com', privateEndpoint: latticeEndpoint }], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('only be used when privateEndpoint is also set'))).toBe( + true + ); + } + }); + it('rejects an override arm that mismatches the base arm (lattice base, vpc override)', () => { + const result = CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpoint: latticeEndpoint, + privateEndpointOverrides: [{ domain: 'd.example.com', privateEndpoint: vpcEndpoint }], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('same kind'))).toBe(true); + } + }); + it('rejects an override arm that mismatches the base arm (vpc base, lattice override)', () => { + const result = CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpoint: vpcEndpoint, + privateEndpointOverrides: [{ domain: 'd.example.com', privateEndpoint: latticeEndpoint }], + }); + expect(result.success).toBe(false); + }); + it('accepts all-managed-vpc base + overrides', () => { + expect( + CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpoint: vpcEndpoint, + privateEndpointOverrides: [{ domain: 'd.example.com', privateEndpoint: vpcEndpoint }], + }).success + ).toBe(true); + }); + it('rejects duplicate override domains', () => { + const result = CustomJwtAuthorizerConfigSchema.safeParse({ + ...base, + privateEndpoint: latticeEndpoint, + privateEndpointOverrides: [ + { domain: 'dup.example.com', privateEndpoint: latticeEndpoint }, + { domain: 'dup.example.com', privateEndpoint: latticeEndpoint }, + ], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues.some(i => i.message.includes('Duplicate privateEndpointOverride domain'))).toBe(true); + } + }); +}); diff --git a/src/schema/schemas/agentcore-project.ts b/src/schema/schemas/agentcore-project.ts index c4f904e84..b9762179c 100644 --- a/src/schema/schemas/agentcore-project.ts +++ b/src/schema/schemas/agentcore-project.ts @@ -19,7 +19,7 @@ import { KmsKeyArnSchema, } from './primitives/evaluator'; import { HarnessNameSchema } from './primitives/harness'; -import { HttpGatewaySchema } from './primitives/http-gateway'; +import { KnowledgeBaseSchema } from './primitives/knowledge-base'; import { DEFAULT_EPISODIC_REFLECTION_NAMESPACES, DEFAULT_EPISODIC_REFLECTION_NAMESPACE_TEMPLATES, @@ -77,8 +77,15 @@ export { ConfigBundleSchema }; export type { ComponentConfiguration, ComponentConfigurationMap, ConfigBundle } from './primitives/config-bundle'; export { ConfigBundleNameSchema, ComponentConfigurationMapSchema } from './primitives/config-bundle'; export { PolicyEngineSchema }; -export type { Policy, PolicyEngine, ValidationMode } from './primitives/policy'; -export { PolicyEngineNameSchema, PolicyNameSchema, PolicySchema, ValidationModeSchema } from './primitives/policy'; +export type { AuthorizationPhase, EnforcementMode, Policy, PolicyEngine, ValidationMode } from './primitives/policy'; +export { + AuthorizationPhaseSchema, + EnforcementModeSchema, + PolicyEngineNameSchema, + PolicyNameSchema, + PolicySchema, + ValidationModeSchema, +} from './primitives/policy'; export { TagsSchema }; export type { Tags } from './primitives/tags'; export { DatasetSchema }; @@ -86,8 +93,6 @@ export { DatasetNameSchema, DatasetSchemaTypeSchema } from './primitives/dataset export type { Dataset, DatasetSchemaType } from './primitives/dataset'; export type { ABTestMode, TargetRef, GatewayFilter, PerVariantOnlineEvaluationConfig } from './primitives/ab-test'; export { ABTestModeSchema, TargetRefSchema, GatewayFilterSchema } from './primitives/ab-test'; -export type { HttpGatewayTarget } from './primitives/http-gateway'; -export { HttpGatewayTargetSchema } from './primitives/http-gateway'; export type { BedrockApiFormat, HarnessApiFormat, @@ -96,6 +101,7 @@ export type { HarnessModel, HarnessModelProvider, HarnessSpec, + ManagedMemoryStrategy, OpenAiApiFormat, } from './primitives/harness'; export { @@ -108,8 +114,24 @@ export { HarnessNameSchema, HarnessSpecSchema, HarnessToolTypeSchema, + ManagedMemoryStrategySchema, validateApiFormat, } from './primitives/harness'; +export type { + KnowledgeBase, + DataSource, + S3DataSource, + ConnectorDataSourceType, + ConnectorFileDataSource, +} from './primitives/knowledge-base'; +export { + KnowledgeBaseNameSchema, + KnowledgeBaseSchema, + S3DataSourceSchema, + DataSourceSchema, + ConnectorDataSourceTypeSchema, + ConnectorFileDataSourceSchema, +} from './primitives/knowledge-base'; export { DEFAULT_AUTO_PAYMENT, DEFAULT_SPEND_LIMIT, @@ -393,6 +415,16 @@ export const AgentCoreProjectSpecSchema = z ) ), + knowledgeBases: z + .array(KnowledgeBaseSchema) + .default([]) + .superRefine( + uniqueBy( + kb => kb.name, + name => `Duplicate knowledge base name: ${name}` + ) + ), + credentials: z .array(CredentialSchema) .default([]) @@ -486,16 +518,6 @@ export const AgentCoreProjectSpecSchema = z ) ), - httpGateways: z - .array(HttpGatewaySchema) - .default([]) - .superRefine( - uniqueBy( - gw => gw.name, - name => `Duplicate HTTP gateway name: ${name}` - ) - ), - harnesses: z .array(HarnessRegistryEntrySchema) .default([]) @@ -520,6 +542,14 @@ export const AgentCoreProjectSpecSchema = z } }), + httpGateways: z + .array(z.unknown()) + .max( + 0, + '"httpGateways" is deprecated. Migrate to agentCoreGateways with protocolType: "None", or use "agentcore import gateway".' + ) + .optional(), + payments: z .array(PaymentManagerSchema) .optional() @@ -537,8 +567,8 @@ export const AgentCoreProjectSpecSchema = z const evaluatorNames = new Set(spec.evaluators.map(e => e.name)); for (const config of spec.onlineEvalConfigs) { - // Validate agent reference - if (!agentNames.has(config.agent)) { + // Validate agent reference (only when agent is specified — custom log groups don't need one) + if (config.agent && !agentNames.has(config.agent)) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: `Online eval config "${config.name}" references unknown agent "${config.agent}"`, @@ -546,7 +576,7 @@ export const AgentCoreProjectSpecSchema = z } // Validate evaluator references - for (const evalName of config.evaluators) { + for (const evalName of config.evaluators ?? []) { // Skip built-in evaluators and ARN references (externally managed) if (evalName.startsWith(BUILTIN_EVALUATOR_PREFIX) || evalName.startsWith(ARN_PREFIX)) continue; if (!evaluatorNames.has(evalName)) { @@ -558,14 +588,28 @@ export const AgentCoreProjectSpecSchema = z } } - // Validate HTTP gateway runtimeRef references - for (const gw of spec.httpGateways ?? []) { - const runtimeExists = spec.runtimes.some(r => r.name === gw.runtimeRef); - if (!runtimeExists) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - message: `HTTP gateway "${gw.name}" references unknown runtime "${gw.runtimeRef}"`, - }); + // Validate httpRuntime target runtime references + for (const gw of spec.agentCoreGateways ?? []) { + for (const target of gw.targets) { + if (target.targetType === 'httpRuntime') { + if (target.httpRuntime?.runtime) { + const runtimeExists = spec.runtimes.some(r => r.name === target.httpRuntime!.runtime); + if (!runtimeExists) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Gateway "${gw.name}" target "${target.name}" references unknown runtime "${target.httpRuntime.runtime}". Check spec.runtimes.`, + }); + } else if (target.httpRuntime.runtimeEndpoint && target.httpRuntime.runtimeEndpoint !== 'DEFAULT') { + const runtime = spec.runtimes.find(r => r.name === target.httpRuntime!.runtime); + if (runtime && !runtime.endpoints?.[target.httpRuntime.runtimeEndpoint]) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Gateway "${gw.name}" target "${target.name}" references endpoint "${target.httpRuntime.runtimeEndpoint}" which does not exist on runtime "${target.httpRuntime.runtime}".`, + }); + } + } + } + } } } @@ -576,17 +620,17 @@ export const AgentCoreProjectSpecSchema = z const match = /^\{\{gateway:(.+)\}\}$/.exec(gwField); if (match) { const gwName = match[1]; - const gwExists = (spec.httpGateways ?? []).some(gw => gw.name === gwName); + const gwExists = (spec.agentCoreGateways ?? []).some(gw => gw.name === gwName); if (!gwExists) { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: `AB test "${test.name}" references gateway "${gwName}" which does not exist in httpGateways`, + message: `AB test "${test.name}" references gateway "${gwName}" which does not exist in agentCoreGateways`, }); } // For target-based AB tests, validate target names exist in the gateway's targets array if (test.mode === 'target-based') { - const gw = (spec.httpGateways ?? []).find(g => g.name === gwName); + const gw = (spec.agentCoreGateways ?? []).find(g => g.name === gwName); if (gw) { const gwTargetNames = new Set((gw.targets ?? []).map(t => t.name)); for (const variant of test.variants) { @@ -604,22 +648,45 @@ export const AgentCoreProjectSpecSchema = z } } - // Validate HTTP gateway target runtimeRef and qualifier references - for (const gw of spec.httpGateways ?? []) { - for (const target of gw.targets ?? []) { - const runtime = spec.runtimes.find(r => r.name === target.runtimeRef); - if (!runtime) { + // Connector gateway target KB reference: a project KB name (entry in + // knowledgeBases[]) or a literal 10-char KB ID (an external KB this + // project does not own). Real KB IDs match ^[A-Z0-9]{10}$; KB names + // start with a letter and may include dashes/underscores. The two + // formats can never collide. + const knowledgeBaseNames = new Set((spec.knowledgeBases ?? []).map(kb => kb.name)); + const REAL_KB_ID_PATTERN = /^[A-Z0-9]{10}$/; + const validateKbReference = (target: { name: string }, value: string, fieldLabel: string): void => { + const looksLikeRealId = REAL_KB_ID_PATTERN.test(value); + if (looksLikeRealId) { + if (knowledgeBaseNames.has(value)) { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: `HTTP gateway "${gw.name}" target "${target.name}" references unknown runtime "${target.runtimeRef}"`, + message: `Connector target "${target.name}" ${fieldLabel} "${value}" looks like a literal KB ID but also matches a knowledgeBases[] entry. Rename the knowledge base or reference it by its project name instead.`, }); - } else if (target.qualifier && target.qualifier !== 'DEFAULT' && !runtime.endpoints?.[target.qualifier]) { + } + } else { + if (!knowledgeBaseNames.has(value)) { ctx.addIssue({ code: z.ZodIssueCode.custom, - message: `HTTP gateway "${gw.name}" target "${target.name}" references qualifier "${target.qualifier}" which is not an endpoint on runtime "${target.runtimeRef}"`, + message: `Connector target "${target.name}" ${fieldLabel} "${value}" does not match any knowledgeBases[] entry. To wire an external KB that this project does not own, use its 10-character KB ID.`, }); } } + }; + + for (const gateway of spec.agentCoreGateways ?? []) { + for (const target of gateway.targets ?? []) { + if (target.targetType !== 'connector') continue; + if (target.connectorId !== 'bedrock-knowledge-bases' && target.connectorId !== 'bedrock-agentic-retrieve') { + continue; + } + if (target.knowledgeBaseId) { + validateKbReference(target, target.knowledgeBaseId, 'knowledgeBaseId'); + } + for (const value of target.knowledgeBaseIds ?? []) { + validateKbReference(target, value, 'knowledgeBaseIds[]'); + } + } } // Validate payment connector credential references diff --git a/src/schema/schemas/auth.ts b/src/schema/schemas/auth.ts index 9b9df72ad..8310a5231 100644 --- a/src/schema/schemas/auth.ts +++ b/src/schema/schemas/auth.ts @@ -1,3 +1,4 @@ +import { TagsSchema } from './primitives/tags'; import { z } from 'zod'; // ============================================================================ @@ -32,6 +33,9 @@ const OidcDiscoveryUrlSchema = z // API-documented patterns (from ClaimMatchValueType and CustomClaimValidationType) const MATCH_VALUE_PATTERN = /^[A-Za-z0-9_.-]+$/; +// OAuth scope token: printable ASCII excluding space (0x20) and double-quote (0x22). +// Mirrors CFN CustomJWTAuthorizerConfiguration.AllowedScopes.items pattern byte-for-byte. +const ALLOWED_SCOPE_PATTERN = /^[\x21\x23-\x5B\x5D-\x7E]+$/; const CLAIM_NAME_PATTERN = /^[A-Za-z0-9_.:-]+$/; // Server-side reserved claim names (not regex-documented; API rejects these at deploy time) const RESERVED_CLAIM_NAMES = ['client_id']; @@ -83,6 +87,94 @@ export const CustomClaimValidationSchema = z .strict(); export type CustomClaimValidation = z.infer; +// ── PrivateLink Inbound — private endpoint for reaching the OIDC discovery URL ── +// +// Nested inside CustomJWTAuthorizerConfiguration ("PrivateLink inbound" GA-parity feature): +// when a JWT IdP's discovery/JWKS endpoint is privately hosted, this tells the harness how to +// reach it over a private network. Two mutually-exclusive arms. Patterns match the +// AWS::BedrockAgentCore::Harness CFN spec byte-for-byte. + +// VPC Lattice resource-config id, or its full ARN. Exported so the TUI validators reuse the +// schema regex (single source of truth) instead of hand-rolled checks that drift from it. +export const LATTICE_RESOURCE_CONFIG_PATTERN = + /^((rcfg-[0-9a-z]{17})|(arn:[a-z0-9-]+:vpc-lattice:[a-zA-Z0-9-]+:\d{12}:resourceconfiguration\/rcfg-[0-9a-z]{17}))$/; +export const VPC_ID_PATTERN = /^vpc-(([0-9a-z]{8})|([0-9a-z]{17}))$/; +export const SUBNET_ID_PATTERN = /^subnet-[0-9a-zA-Z]{8,17}$/; +export const SECURITY_GROUP_ID_PATTERN = /^sg-(([0-9a-z]{8})|([0-9a-z]{17}))$/; + +export const EndpointIpAddressTypeSchema = z.enum(['IPV4', 'IPV6']); +export type EndpointIpAddressType = z.infer; + +/** Reach the discovery endpoint via a self-managed VPC Lattice resource configuration. */ +export const SelfManagedLatticeResourceSchema = z + .object({ + resourceConfigurationIdentifier: z + .string() + .min(20) + .max(2048) + .regex(LATTICE_RESOURCE_CONFIG_PATTERN, 'Must be a VPC Lattice resource-config id (rcfg-...) or its ARN'), + }) + .strict(); +export type SelfManagedLatticeResource = z.infer; + +/** Reach the discovery endpoint via a service-managed VPC interface endpoint. */ +export const ManagedVpcResourceSchema = z + .object({ + vpcIdentifier: z.string().regex(VPC_ID_PATTERN, 'Must be a VPC id (vpc-...)'), + subnetIds: z.array(z.string().regex(SUBNET_ID_PATTERN, 'Must be a subnet id (subnet-...)')).min(1), + endpointIpAddressType: EndpointIpAddressTypeSchema, + securityGroupIds: z + .array(z.string().regex(SECURITY_GROUP_ID_PATTERN, 'Must be a security group id (sg-...)')) + .max(5) + .optional(), + tags: TagsSchema.optional(), + routingDomain: z.string().min(3).max(255).optional(), + }) + .strict(); +export type ManagedVpcResource = z.infer; + +/** + * A private endpoint: exactly one of selfManagedLatticeResource or managedVpcResource. + * The CFN spec dropped `oneOf` (contract-test antipattern) and enforces exactly-one structurally; + * we mirror that with a superRefine rather than a discriminated union. + */ +export const PrivateEndpointSchema = z + .object({ + selfManagedLatticeResource: SelfManagedLatticeResourceSchema.optional(), + managedVpcResource: ManagedVpcResourceSchema.optional(), + }) + .strict() + .superRefine((data, ctx) => { + const count = [data.selfManagedLatticeResource, data.managedVpcResource].filter(v => v !== undefined).length; + if (count !== 1) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'A private endpoint must set exactly one of selfManagedLatticeResource or managedVpcResource', + }); + } + }); +export type PrivateEndpoint = z.infer; + +/** Maps a specific domain to its own private endpoint (overrides the discovery-URL endpoint for that domain). */ +export const PrivateEndpointOverrideSchema = z + .object({ + domain: z.string().min(1).max(253), + privateEndpoint: PrivateEndpointSchema, + }) + .strict(); +export type PrivateEndpointOverride = z.infer; + +/** + * Which arm a PrivateEndpoint uses. The nested exactly-one-of refine guarantees a single arm, + * so this is read at the authorizer level to enforce the service's "all endpoints same kind" rule. + */ +type PrivateEndpointArm = 'selfManagedLatticeResource' | 'managedVpcResource' | undefined; +function privateEndpointArm(pe: PrivateEndpoint): PrivateEndpointArm { + if (pe.selfManagedLatticeResource) return 'selfManagedLatticeResource'; + if (pe.managedVpcResource) return 'managedVpcResource'; + return undefined; +} + // ── Custom JWT Authorizer Configuration ── /** @@ -100,10 +192,22 @@ export const CustomJwtAuthorizerConfigSchema = z allowedAudience: z.array(z.string().min(1)).optional(), /** List of allowed client IDs */ allowedClients: z.array(z.string().min(1)).optional(), - /** List of allowed scopes */ - allowedScopes: z.array(z.string().min(1)).optional(), + /** List of allowed scopes (printable ASCII, no space/quote; ≤255 chars each — CFN parity) */ + allowedScopes: z + .array( + z + .string() + .min(1) + .max(255) + .regex(ALLOWED_SCOPE_PATTERN, 'Scope must be printable ASCII with no spaces or quotes') + ) + .optional(), /** Custom claim validations */ customClaims: z.array(CustomClaimValidationSchema).min(1).optional(), + /** PrivateLink inbound: how to reach the OIDC discovery endpoint over a private network. */ + privateEndpoint: PrivateEndpointSchema.optional(), + /** Per-domain private-endpoint overrides (≤5). */ + privateEndpointOverrides: z.array(PrivateEndpointOverrideSchema).max(5).optional(), }) .strict() .superRefine((data, ctx) => { @@ -118,6 +222,46 @@ export const CustomJwtAuthorizerConfigSchema = z message: 'At least one of allowedAudience, allowedClients, allowedScopes, or customClaims must be provided', }); } + + // PrivateEndpointOverrides coupling rules — mirror the AgentCore Identity service's deploy-time + // validation so the user fails fast here instead of mid-deploy. + const overrides = data.privateEndpointOverrides ?? []; + if (overrides.length > 0) { + // 1. Overrides require a base privateEndpoint. + if (!data.privateEndpoint) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'privateEndpointOverrides can only be used when privateEndpoint is also set', + path: ['privateEndpointOverrides'], + }); + } else { + // 2. The base endpoint and every override must use the same arm (all self-managed or all service-managed). + const baseArm = privateEndpointArm(data.privateEndpoint); + overrides.forEach((o, i) => { + if (privateEndpointArm(o.privateEndpoint) !== baseArm) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: + 'privateEndpoint and privateEndpointOverrides must all be the same kind — either all selfManagedLatticeResource or all managedVpcResource', + path: ['privateEndpointOverrides', i, 'privateEndpoint'], + }); + } + }); + } + + // 3. Override domains must be unique. + const seen = new Set(); + overrides.forEach((o, i) => { + if (seen.has(o.domain)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Duplicate privateEndpointOverride domain: ${o.domain}`, + path: ['privateEndpointOverrides', i, 'domain'], + }); + } + seen.add(o.domain); + }); + } }); export type CustomJwtAuthorizerConfig = z.infer; diff --git a/src/schema/schemas/deployed-state.ts b/src/schema/schemas/deployed-state.ts index 495dd444f..49d369da2 100644 --- a/src/schema/schemas/deployed-state.ts +++ b/src/schema/schemas/deployed-state.ts @@ -35,10 +35,17 @@ export type MemoryDeployedState = z.infer; // MCP Gateway Deployed State // ============================================================================ +export const GatewayTargetDeployedStateSchema = z.object({ + targetId: z.string().min(1), +}); + +export type GatewayTargetDeployedState = z.infer; + export const GatewayDeployedStateSchema = z.object({ gatewayId: z.string().min(1), gatewayArn: z.string().min(1), gatewayUrl: z.string().optional(), + targets: z.record(z.string(), GatewayTargetDeployedStateSchema).optional(), }); export type GatewayDeployedState = z.infer; @@ -144,9 +151,12 @@ export const HarnessDeployedStateSchema = z.object({ harnessArn: z.string().min(1), roleArn: z.string().min(1), status: z.string().min(1), + /** Read-only harness config version from the CFN `Version` output; service-incremented per update. */ + harnessVersion: z.number().int().min(1).optional(), agentRuntimeArn: z.string().optional(), memoryArn: z.string().optional(), - configHash: z.string().optional(), + /** Which subsystem provisioned this harness. Stamped 'cloudformation' by the CDK deploy path. */ + provisioner: z.enum(['cloudformation']).optional(), }); export type HarnessDeployedState = z.infer; @@ -202,6 +212,41 @@ export const DatasetDeployedStateSchema = z.object({ export type DatasetDeployedState = z.infer; +// ============================================================================ +// Knowledge Base Deployed State +// ============================================================================ + +export const KnowledgeBaseDataSourceDeployedStateSchema = z.object({ + dataSourceId: z.string().min(1), + uri: z.string().min(1), +}); + +export type KnowledgeBaseDataSourceDeployedState = z.infer; + +/** + * Per-target deployed state for a knowledge base. Captures the IDs the + * status command needs to call bedrock-agent for live KB and ingestion state. + * + * `dataSources` is an array (not a record) because the deploy step writes + * them in the same order as the local `dataSources[]` array; the index + * lets us correlate local sources with deployed DSs without extra IDs. + * + * `sourcesHash` is a SHA-256 of the data-source URIs (joined with newlines) + * captured at the time auto-ingestion last fired. The post-deploy ingestion + * hook computes a fresh hash from the current spec and compares — if equal, + * skip ingestion (no changes to ingest). Optional so projects deployed + * before the hook shipped don't fail validation; treated as "ingest needed" + * when absent. + */ +export const KnowledgeBaseDeployedStateSchema = z.object({ + knowledgeBaseId: z.string().min(1), + knowledgeBaseArn: z.string().min(1), + dataSources: z.array(KnowledgeBaseDataSourceDeployedStateSchema).default([]), + sourcesHash: z.string().min(1).optional(), +}); + +export type KnowledgeBaseDeployedState = z.infer; + // ============================================================================ // Configuration Bundle Deployed State // ============================================================================ @@ -228,21 +273,6 @@ export const ABTestDeployedStateSchema = z.object({ export type ABTestDeployedState = z.infer; -// ============================================================================ -// HTTP Gateway Deployed State -// ============================================================================ - -export const HttpGatewayDeployedStateSchema = z.object({ - gatewayId: z.string().min(1), - gatewayArn: z.string().min(1), - gatewayUrl: z.string().optional(), - targetId: z.string().min(1).optional(), - roleArn: z.string().min(1).optional(), - roleCreatedByCli: z.boolean().optional(), -}); - -export type HttpGatewayDeployedState = z.infer; - // ============================================================================ // Runtime Endpoint Deployed State // ============================================================================ @@ -292,14 +322,15 @@ export const DeployedResourceStateSchema = z.object({ runtimes: z.record(z.string(), AgentCoreDeployedStateSchema).optional(), memories: z.record(z.string(), MemoryDeployedStateSchema).optional(), mcp: McpDeployedStateSchema.optional(), + gateways: z.record(z.string(), GatewayDeployedStateSchema).optional(), externallyManaged: ExternallyManagedStateSchema.optional(), credentials: z.record(z.string(), CredentialDeployedStateSchema).optional(), evaluators: z.record(z.string(), EvaluatorDeployedStateSchema).optional(), onlineEvalConfigs: z.record(z.string(), OnlineEvalDeployedStateSchema).optional(), datasets: z.record(z.string(), DatasetDeployedStateSchema).optional(), + knowledgeBases: z.record(z.string(), KnowledgeBaseDeployedStateSchema).optional(), configBundles: z.record(z.string(), ConfigBundleDeployedStateSchema).optional(), abTests: z.record(z.string(), ABTestDeployedStateSchema).optional(), - httpGateways: z.record(z.string(), HttpGatewayDeployedStateSchema).optional(), policyEngines: z.record(z.string(), PolicyEngineDeployedStateSchema).optional(), policies: z.record(z.string(), PolicyDeployedStateSchema).optional(), harnesses: z.record(z.string(), HarnessDeployedStateSchema).optional(), diff --git a/src/schema/schemas/mcp-defs.ts b/src/schema/schemas/mcp-defs.ts index ef0b5f482..72c449bcb 100644 --- a/src/schema/schemas/mcp-defs.ts +++ b/src/schema/schemas/mcp-defs.ts @@ -37,7 +37,8 @@ export const SchemaDefinitionSchema: z.ZodType = z.object({ /** * Tool name validation for CLI input. - * Allows alphanumeric characters, hyphens, and underscores. + * Allows alphanumeric characters and hyphens. Underscores are not permitted + * for gateway targets. * This is a general-purpose schema for tool names that works for both * MCP runtime tools (direct) and gateway target tools. */ @@ -46,8 +47,8 @@ export const ToolNameSchema = z .min(1, 'Tool name is required') .max(128, 'Tool name must be at most 128 characters') .regex( - /^[a-zA-Z][a-zA-Z0-9_-]*$/, - 'Tool name must start with a letter and contain only alphanumeric characters, hyphens, or underscores' + /^[a-zA-Z][a-zA-Z0-9-]*$/, + 'Tool name must start with a letter and contain only alphanumeric characters or hyphens' ); /** diff --git a/src/schema/schemas/mcp.ts b/src/schema/schemas/mcp.ts index 42cd89810..00b86698a 100644 --- a/src/schema/schemas/mcp.ts +++ b/src/schema/schemas/mcp.ts @@ -17,16 +17,69 @@ export const GatewayTargetTypeSchema = z.enum([ 'smithyModel', 'apiGateway', 'lambdaFunctionArn', + 'httpRuntime', + 'connector', + 'passthrough', + 'webSearch', ]); export type GatewayTargetType = z.infer; +/** + * Target types that use the non-MCP (HTTP) protocol. + * These targets require a gateway with protocolType: "None". + */ +export const NON_MCP_TARGET_TYPES: readonly GatewayTargetType[] = ['httpRuntime', 'passthrough'] as const; + +/** + * Target types that use the MCP protocol. + */ +export const MCP_TARGET_TYPES: readonly GatewayTargetType[] = [ + 'lambda', + 'mcpServer', + 'openApiSchema', + 'smithyModel', + 'apiGateway', + 'lambdaFunctionArn', +] as const; + +// ============================================================================ +// Connector (managed-service gateway target) +// ============================================================================ + +/** + * Managed-service connector identifiers. The L3 maps each one to an operation + * name + Enabled list via CONNECTOR_DEFAULTS. + * + * Spec note: the original DevEx doc (cli-knowledge-bases-devex.md) uses + * `agentic-retrieve`, but the service accepts `bedrock-agentic-retrieve`. The + * latter is canonical. + */ +export const CONNECTOR_ID = { + BEDROCK_KNOWLEDGE_BASES: 'bedrock-knowledge-bases', + BEDROCK_AGENTIC_RETRIEVE: 'bedrock-agentic-retrieve', +} as const; +export const CONNECTOR_ID_VALUES = [ + CONNECTOR_ID.BEDROCK_KNOWLEDGE_BASES, + CONNECTOR_ID.BEDROCK_AGENTIC_RETRIEVE, +] as const; +export const ConnectorIdSchema = z.enum(CONNECTOR_ID_VALUES); +export type ConnectorId = z.infer; + +/** + * Real Bedrock Knowledge Base IDs are 10 uppercase alphanumeric chars. + * KB names follow the standard primitive-name shape (1-48 chars, starts with a letter). + * The two formats can never collide, so a connector target's `knowledgeBaseId` + * field is unambiguously a project KB name or a literal external KB ID. + */ +export const REAL_KB_ID_PATTERN = /^[A-Z0-9]{10}$/; + // ============================================================================ // Gateway Authorization Schemas // ============================================================================ // Auth schemas (GatewayAuthorizerTypeSchema, CustomJwtAuthorizerConfigSchema, etc.) // are defined in ./auth.ts and exported via the barrel (index.ts). -export const OutboundAuthTypeSchema = z.enum(['OAUTH', 'API_KEY', 'NONE']); +export const OutboundAuthTypeSchema = z.enum(['OAUTH', 'API_KEY', 'NONE', 'GATEWAY_IAM_ROLE', 'JWT_PASSTHROUGH']); export type OutboundAuthType = z.infer; export const OutboundAuthSchema = z @@ -34,6 +87,8 @@ export const OutboundAuthSchema = z type: OutboundAuthTypeSchema.default('NONE'), credentialName: z.string().min(1).optional(), scopes: z.array(z.string()).optional(), + service: z.string().min(1).max(64).optional(), + region: z.string().min(1).max(32).optional(), }) .strict(); @@ -60,6 +115,17 @@ export const TARGET_TYPE_AUTH_CONFIG: Record< mcpServer: { authRequired: false, validAuthTypes: ['OAUTH', 'NONE'], iamRoleFallback: false }, lambda: { authRequired: false, validAuthTypes: ['OAUTH', 'NONE'], iamRoleFallback: true }, lambdaFunctionArn: { authRequired: false, validAuthTypes: ['OAUTH', 'NONE'], iamRoleFallback: true }, + httpRuntime: { authRequired: false, validAuthTypes: ['OAUTH', 'NONE'], iamRoleFallback: true }, + // Connector targets call the underlying managed service (Bedrock KB, etc.) + // via the gateway's IAM role. No outbound auth applies. + connector: { authRequired: false, validAuthTypes: [], iamRoleFallback: true }, + passthrough: { + authRequired: true, + validAuthTypes: ['GATEWAY_IAM_ROLE', 'OAUTH', 'JWT_PASSTHROUGH'], + iamRoleFallback: false, + }, + // Amazon Web Search is invoked via the gateway's IAM role. No outbound auth. + webSearch: { authRequired: false, validAuthTypes: [], iamRoleFallback: true }, }; // ============================================================================ @@ -324,6 +390,50 @@ export const SchemaSourceSchema = z.union([ ]); export type SchemaSource = z.infer; +// ============================================================================ +// HTTP Runtime Configuration +// ============================================================================ + +export const HttpRuntimeConfigSchema = z + .object({ + runtime: z.string().min(1), + runtimeEndpoint: z.string().min(1).optional(), + }) + .strict(); + +export type HttpRuntimeConfig = z.infer; + +// ============================================================================ +// Passthrough Target Configuration +// ============================================================================ + +export const StickinessConfigSchema = z + .object({ + identifier: z.string().min(1).max(256), + timeout: z.number().int().min(1).max(86400).optional(), + }) + .strict(); +export type StickinessConfig = z.infer; + +/** + * Passthrough protocol type. HTTP is NOT valid — use CUSTOM for plain HTTP/REST backends. + */ +export const PassthroughProtocolTypeSchema = z.enum(['MCP', 'A2A', 'INFERENCE', 'CUSTOM']); +export type PassthroughProtocolType = z.infer; + +export const PassthroughConfigSchema = z + .object({ + endpoint: z + .string() + .min(1) + .regex(/^https:\/\/[a-zA-Z0-9\-.]+(:[0-9]{1,5})?(\/.*)?$/, 'Must be a valid HTTPS URL'), + /** Protocol type for the passthrough backend. Defaults to CUSTOM (generic HTTP/REST). */ + protocolType: PassthroughProtocolTypeSchema.default('CUSTOM'), + stickinessConfiguration: StickinessConfigSchema.optional(), + }) + .strict(); +export type PassthroughConfig = z.infer; + // ============================================================================ // Gateway Target // ============================================================================ @@ -344,7 +454,7 @@ export const AgentCoreGatewayTargetSchema = z toolDefinitions: z.array(ToolDefinitionSchema).optional(), /** Compute configuration. Required for Lambda/Runtime scaffold targets. */ compute: ToolComputeConfigSchema.optional(), - /** MCP Server endpoint URL. Required for external MCP Server targets. */ + /** Endpoint URL for mcpServer targets. */ endpoint: z.string().url().optional(), /** Outbound auth configuration for the target. */ outboundAuth: OutboundAuthSchema.optional(), @@ -354,6 +464,49 @@ export const AgentCoreGatewayTargetSchema = z schemaSource: SchemaSourceSchema.optional(), /** Lambda Function ARN configuration. Required for lambdaFunctionArn target type. */ lambdaFunctionArn: LambdaFunctionArnConfigSchema.optional(), + /** HTTP Runtime configuration. Required for httpRuntime target type. */ + httpRuntime: HttpRuntimeConfigSchema.optional(), + /** + * Managed-service connector identifier. Required for `connector` target type. + */ + connectorId: ConnectorIdSchema.optional(), + /** + * For `bedrock-knowledge-bases` connector targets: either a project KB + * name (references an entry in `knowledgeBases[]` on the project spec) + * or a literal 10-character KB ID (refers to an external KB this project + * does not own). The L3 disambiguates by regex match. Mutually exclusive + * with `knowledgeBaseIds`. + */ + knowledgeBaseId: z + .string() + .min(1) + .max(48) + .regex(/^[a-zA-Z0-9_-]+$/, 'Must be a KB name (1-48 chars, letters/digits/dash/underscore) or a 10-char KB ID') + .optional(), + /** + * For `bedrock-agentic-retrieve` connector targets only. List of project + * KB names or literal 10-char external KB IDs that this orchestrated + * retriever should fan out across. Each entry is disambiguated the same + * way `knowledgeBaseId` is. Mutually exclusive with `knowledgeBaseId`. + */ + knowledgeBaseIds: z + .array( + z + .string() + .min(1) + .max(48) + .regex(/^[a-zA-Z0-9_-]+$/, 'Each entry must be a KB name (1-48 chars) or a 10-char KB ID') + ) + .min(1) + .optional(), + /** Passthrough configuration. Required for passthrough target type. */ + passthrough: PassthroughConfigSchema.optional(), + /** + * For `webSearch` target type only. Domains to exclude from web search + * results. Maps to the connector's `domainFilter.exclude` parameterValue + * at synth time. + */ + excludeDomains: z.array(z.string().min(1)).min(1).optional(), }) .strict() .superRefine((data, ctx) => { @@ -498,6 +651,101 @@ export const AgentCoreGatewayTargetSchema = z }); } } + if (data.targetType === 'httpRuntime') { + if (!data.httpRuntime) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'httpRuntime targets require an httpRuntime configuration (with a runtime reference).', + path: ['httpRuntime'], + }); + } + if (data.endpoint) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'httpRuntime targets should use httpRuntime.runtimeEndpoint instead of endpoint.', + path: ['endpoint'], + }); + } + if (data.compute) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'compute is not applicable for httpRuntime target type', + path: ['compute'], + }); + } + if (data.apiGateway) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'apiGateway is not applicable for httpRuntime target type', + path: ['apiGateway'], + }); + } + if (data.lambdaFunctionArn) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'lambdaFunctionArn is not applicable for httpRuntime target type', + path: ['lambdaFunctionArn'], + }); + } + if (data.toolDefinitions && data.toolDefinitions.length > 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'toolDefinitions is not applicable for httpRuntime target type', + path: ['toolDefinitions'], + }); + } + } + if (data.targetType === 'passthrough') { + if (!data.passthrough) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'passthrough targets require a passthrough configuration (with an endpoint).', + path: ['passthrough'], + }); + } + if (data.endpoint) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'passthrough targets should use passthrough.endpoint instead of endpoint.', + path: ['endpoint'], + }); + } + if (data.compute) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'compute is not applicable for passthrough target type', + path: ['compute'], + }); + } + if (data.apiGateway) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'apiGateway is not applicable for passthrough target type', + path: ['apiGateway'], + }); + } + if (data.lambdaFunctionArn) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'lambdaFunctionArn is not applicable for passthrough target type', + path: ['lambdaFunctionArn'], + }); + } + if (data.httpRuntime) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'httpRuntime is not applicable for passthrough target type', + path: ['httpRuntime'], + }); + } + if (data.toolDefinitions && data.toolDefinitions.length > 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'toolDefinitions is not applicable for passthrough target type', + path: ['toolDefinitions'], + }); + } + } if (data.targetType === 'lambda' && !data.compute) { ctx.addIssue({ code: z.ZodIssueCode.custom, @@ -512,6 +760,214 @@ export const AgentCoreGatewayTargetSchema = z path: ['toolDefinitions'], }); } + if (data.targetType === 'connector') { + if (!data.connectorId) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'connectorId is required for connector target type', + path: ['connectorId'], + }); + } + if (data.connectorId === CONNECTOR_ID.BEDROCK_KNOWLEDGE_BASES) { + if (!data.knowledgeBaseId) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `knowledgeBaseId is required for connectorId '${data.connectorId}'`, + path: ['knowledgeBaseId'], + }); + } + if (data.knowledgeBaseIds) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `knowledgeBaseIds is not applicable for connectorId '${data.connectorId}' (use knowledgeBaseId)`, + path: ['knowledgeBaseIds'], + }); + } + } + if (data.connectorId === CONNECTOR_ID.BEDROCK_AGENTIC_RETRIEVE) { + if (!data.knowledgeBaseIds || data.knowledgeBaseIds.length === 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `knowledgeBaseIds (non-empty) is required for connectorId '${data.connectorId}'`, + path: ['knowledgeBaseIds'], + }); + } + if (data.knowledgeBaseId) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `knowledgeBaseId is not applicable for connectorId '${data.connectorId}' (use knowledgeBaseIds)`, + path: ['knowledgeBaseId'], + }); + } + } + if (data.excludeDomains) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `excludeDomains only applies to webSearch target type`, + path: ['excludeDomains'], + }); + } + if (data.compute) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'compute is not applicable for connector target type', + path: ['compute'], + }); + } + if (data.endpoint) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'endpoint is not applicable for connector target type', + path: ['endpoint'], + }); + } + if (data.toolDefinitions && data.toolDefinitions.length > 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'toolDefinitions is not applicable for connector target type', + path: ['toolDefinitions'], + }); + } + if (data.apiGateway) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'apiGateway is not applicable for connector target type', + path: ['apiGateway'], + }); + } + if (data.lambdaFunctionArn) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'lambdaFunctionArn is not applicable for connector target type', + path: ['lambdaFunctionArn'], + }); + } + if (data.schemaSource) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'schemaSource is not applicable for connector target type', + path: ['schemaSource'], + }); + } + } + if (data.targetType === 'webSearch') { + // Web search is invoked via the gateway's IAM role and takes only an + // optional excludeDomains list. Reject anything else. + if (data.compute) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'compute is not applicable for webSearch target type', + path: ['compute'], + }); + } + if (data.endpoint) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'endpoint is not applicable for webSearch target type', + path: ['endpoint'], + }); + } + if (data.toolDefinitions && data.toolDefinitions.length > 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'toolDefinitions is not applicable for webSearch target type', + path: ['toolDefinitions'], + }); + } + if (data.apiGateway) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'apiGateway is not applicable for webSearch target type', + path: ['apiGateway'], + }); + } + if (data.lambdaFunctionArn) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'lambdaFunctionArn is not applicable for webSearch target type', + path: ['lambdaFunctionArn'], + }); + } + if (data.schemaSource) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'schemaSource is not applicable for webSearch target type', + path: ['schemaSource'], + }); + } + if (data.httpRuntime) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'httpRuntime is not applicable for webSearch target type', + path: ['httpRuntime'], + }); + } + if (data.passthrough) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'passthrough is not applicable for webSearch target type', + path: ['passthrough'], + }); + } + if (data.outboundAuth && data.outboundAuth.type !== 'NONE') { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'outboundAuth is not applicable for webSearch target type (uses gateway IAM role)', + path: ['outboundAuth'], + }); + } + if (data.connectorId) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'connectorId is not applicable for webSearch target type', + path: ['connectorId'], + }); + } + if (data.knowledgeBaseId) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'knowledgeBaseId is not applicable for webSearch target type', + path: ['knowledgeBaseId'], + }); + } + if (data.knowledgeBaseIds) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'knowledgeBaseIds is not applicable for webSearch target type', + path: ['knowledgeBaseIds'], + }); + } + } + if (data.targetType !== 'connector' && data.targetType !== 'webSearch') { + if (data.connectorId) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `connectorId only applies to connector target type`, + path: ['connectorId'], + }); + } + if (data.knowledgeBaseId) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `knowledgeBaseId only applies to connector target type`, + path: ['knowledgeBaseId'], + }); + } + if (data.knowledgeBaseIds) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `knowledgeBaseIds only applies to connector target type`, + path: ['knowledgeBaseIds'], + }); + } + if (data.excludeDomains) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `excludeDomains only applies to webSearch target type`, + path: ['excludeDomains'], + }); + } + } // Centralized outbound auth validation (driven by TARGET_TYPE_AUTH_CONFIG) const authConfig = TARGET_TYPE_AUTH_CONFIG[data.targetType]; const authType = data.outboundAuth?.type ?? 'NONE'; @@ -536,13 +992,39 @@ export const AgentCoreGatewayTargetSchema = z path: ['outboundAuth'], }); } - if (data.outboundAuth && data.outboundAuth.type !== 'NONE' && !data.outboundAuth.credentialName) { + if ( + data.outboundAuth && + data.outboundAuth.type !== 'NONE' && + data.outboundAuth.type !== 'GATEWAY_IAM_ROLE' && + data.outboundAuth.type !== 'JWT_PASSTHROUGH' && + !data.outboundAuth.credentialName + ) { ctx.addIssue({ code: z.ZodIssueCode.custom, message: `${data.outboundAuth.type} outbound auth requires a credentialName.`, path: ['outboundAuth', 'credentialName'], }); } + // GATEWAY_IAM_ROLE on passthrough requires service + if ( + data.targetType === 'passthrough' && + data.outboundAuth?.type === 'GATEWAY_IAM_ROLE' && + !data.outboundAuth.service + ) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'GATEWAY_IAM_ROLE outbound auth on passthrough targets requires a service name.', + path: ['outboundAuth', 'service'], + }); + } + // JWT_PASSTHROUGH is only valid for passthrough targets + if (data.outboundAuth?.type === 'JWT_PASSTHROUGH' && data.targetType !== 'passthrough') { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'JWT_PASSTHROUGH outbound auth is only valid for passthrough targets.', + path: ['outboundAuth', 'type'], + }); + } }); export type AgentCoreGatewayTarget = z.infer; @@ -577,11 +1059,16 @@ export type GatewayPolicyEngineConfiguration = z.infer; + export const AgentCoreGatewaySchema = z .object({ name: GatewayNameSchema, /** Actual AWS resource name for imported gateways. When set, CDK uses this instead of generating projectName-name. */ resourceName: GatewayNameSchema.optional(), + /** Protocol type for this gateway. */ + protocolType: GatewayProtocolTypeSchema.optional(), description: z.string().optional(), targets: z.array(AgentCoreGatewayTargetSchema), /** Authorization type for the gateway. Defaults to 'NONE'. */ @@ -615,7 +1102,20 @@ export const AgentCoreGatewaySchema = z message: 'customJwtAuthorizer configuration is required when authorizerType is CUSTOM_JWT', path: ['authorizerConfiguration'], } - ); + ) + .superRefine((gw, ctx) => { + // A protocolType: "None" (HTTP) gateway is a superset: it can host any target + // type, including MCP targets (mcpServer, connector, KB, etc.). Only an MCP + // gateway is restrictive — it cannot host the HTTP-only target types. + for (const target of gw.targets) { + if (gw.protocolType !== 'None' && NON_MCP_TARGET_TYPES.includes(target.targetType)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Target "${target.name}" is ${target.targetType} but gateway does not have protocolType: "None". Add --protocol-type None when creating the gateway.`, + }); + } + } + }); export type AgentCoreGateway = z.infer; diff --git a/src/schema/schemas/primitives/__tests__/ab-test.test.ts b/src/schema/schemas/primitives/__tests__/ab-test.test.ts index 874cd7d13..3adc1132d 100644 --- a/src/schema/schemas/primitives/__tests__/ab-test.test.ts +++ b/src/schema/schemas/primitives/__tests__/ab-test.test.ts @@ -119,9 +119,7 @@ describe('ABTestSchema', () => { ...validABTest, description: 'A test', roleArn: 'arn:aws:iam::123:role/MyRole', - maxDurationDays: 30, enableOnCreate: true, - trafficAllocationConfig: { routeOnHeader: { headerName: 'X-AB-Route' } }, }); expect(result.success).toBe(true); }); @@ -142,11 +140,6 @@ describe('ABTestSchema', () => { expect(result.success).toBe(false); }); - it('rejects maxDurationDays outside 1-90', () => { - expect(ABTestSchema.safeParse({ ...validABTest, maxDurationDays: 0 }).success).toBe(false); - expect(ABTestSchema.safeParse({ ...validABTest, maxDurationDays: 91 }).success).toBe(false); - }); - describe('variant weight sum validation', () => { it('accepts weights summing to 100 (50/50)', () => { const test = { diff --git a/src/schema/schemas/primitives/__tests__/evaluator.test.ts b/src/schema/schemas/primitives/__tests__/evaluator.test.ts index f378e526b..379176244 100644 --- a/src/schema/schemas/primitives/__tests__/evaluator.test.ts +++ b/src/schema/schemas/primitives/__tests__/evaluator.test.ts @@ -5,6 +5,7 @@ import { EvaluatorNameSchema, NumericalRatingSchema, RatingScaleSchema, + isValidKmsKeyArn, } from '../evaluator'; import { describe, expect, it } from 'vitest'; @@ -155,3 +156,45 @@ describe('EvaluatorConfigSchema', () => { expect(EvaluatorConfigSchema.safeParse({}).success).toBe(false); }); }); + +describe('isValidKmsKeyArn', () => { + it('accepts valid commercial KMS key ARN', () => { + expect(isValidKmsKeyArn('arn:aws:kms:us-east-1:123456789012:key/12345678-1234-1234-1234-123456789012')).toBe(true); + }); + + it('accepts valid GovCloud KMS key ARN', () => { + expect( + isValidKmsKeyArn('arn:aws-us-gov:kms:us-gov-west-1:123456789012:key/12345678-1234-1234-1234-123456789012') + ).toBe(true); + }); + + it('accepts valid China partition KMS key ARN', () => { + expect(isValidKmsKeyArn('arn:aws-cn:kms:cn-north-1:123456789012:key/12345678-1234-1234-1234-123456789012')).toBe( + true + ); + }); + + it('rejects ARN with wrong service', () => { + expect(isValidKmsKeyArn('arn:aws:s3:us-east-1:123456789012:key/12345678-1234-1234-1234-123456789012')).toBe(false); + }); + + it('rejects ARN with alias instead of key', () => { + expect(isValidKmsKeyArn('arn:aws:kms:us-east-1:123456789012:alias/my-key')).toBe(false); + }); + + it('rejects ARN with invalid account ID length', () => { + expect(isValidKmsKeyArn('arn:aws:kms:us-east-1:12345:key/12345678-1234-1234-1234-123456789012')).toBe(false); + }); + + it('rejects ARN with invalid key UUID format', () => { + expect(isValidKmsKeyArn('arn:aws:kms:us-east-1:123456789012:key/not-a-valid-uuid')).toBe(false); + }); + + it('rejects empty string', () => { + expect(isValidKmsKeyArn('')).toBe(false); + }); + + it('rejects random string', () => { + expect(isValidKmsKeyArn('not-an-arn-at-all')).toBe(false); + }); +}); diff --git a/src/schema/schemas/primitives/__tests__/harness.test.ts b/src/schema/schemas/primitives/__tests__/harness.test.ts index 4b63fd774..c4c89a390 100644 --- a/src/schema/schemas/primitives/__tests__/harness.test.ts +++ b/src/schema/schemas/primitives/__tests__/harness.test.ts @@ -2,6 +2,7 @@ import { HarnessModelProviderSchema, HarnessModelSchema, HarnessNameSchema, + HarnessSkillSchema, HarnessSpecSchema, HarnessToolSchema, HarnessToolTypeSchema, @@ -13,15 +14,15 @@ describe('HarnessNameSchema', () => { expect(HarnessNameSchema.safeParse(name).success).toBe(true); }); - it('accepts 48-character name (max)', () => { - const name = 'A' + 'b'.repeat(47); - expect(name).toHaveLength(48); + it('accepts 40-character name (max)', () => { + const name = 'A' + 'b'.repeat(39); + expect(name).toHaveLength(40); expect(HarnessNameSchema.safeParse(name).success).toBe(true); }); - it('rejects 49-character name', () => { - const name = 'A' + 'b'.repeat(48); - expect(name).toHaveLength(49); + it('rejects 41-character name', () => { + const name = 'A' + 'b'.repeat(40); + expect(name).toHaveLength(41); expect(HarnessNameSchema.safeParse(name).success).toBe(false); }); @@ -56,7 +57,7 @@ describe('HarnessToolTypeSchema', () => { }); describe('HarnessModelProviderSchema', () => { - it.each(['bedrock', 'open_ai', 'gemini'])('accepts "%s"', provider => { + it.each(['bedrock', 'open_ai', 'gemini', 'lite_llm'])('accepts "%s"', provider => { expect(HarnessModelProviderSchema.safeParse(provider).success).toBe(true); }); @@ -343,15 +344,68 @@ describe('HarnessModelSchema', () => { }); it('accepts gemini model with topK', () => { + const result = HarnessModelSchema.safeParse({ + provider: 'gemini', + modelId: 'gemini-2.5-pro', + apiKeyArn: 'arn:aws:bedrock-agentcore:us-west-2:123:apikey/abc', + topK: 40, + }); + expect(result.success).toBe(true); + }); + + it('rejects non-integer topK', () => { const result = HarnessModelSchema.safeParse({ provider: 'gemini', modelId: 'gemini-2.5-pro', apiKeyArn: 'arn:aws:bedrock-agentcore:us-west-2:123:apikey/abc', topK: 0.5, }); + expect(result.success).toBe(false); + }); + + it('rejects topK above 500', () => { + const result = HarnessModelSchema.safeParse({ + provider: 'gemini', + modelId: 'gemini-2.5-pro', + apiKeyArn: 'arn:aws:bedrock-agentcore:us-west-2:123:apikey/abc', + topK: 501, + }); + expect(result.success).toBe(false); + }); + + it('requires apiKeyArn for open_ai and gemini providers', () => { + expect(HarnessModelSchema.safeParse({ provider: 'open_ai', modelId: 'gpt-4o' }).success).toBe(false); + expect(HarnessModelSchema.safeParse({ provider: 'gemini', modelId: 'gemini-2.5-pro' }).success).toBe(false); + expect(HarnessModelSchema.safeParse({ provider: 'bedrock', modelId: 'claude' }).success).toBe(true); + }); + + it('accepts lite_llm model without apiKeyArn (key is optional)', () => { + const result = HarnessModelSchema.safeParse({ provider: 'lite_llm', modelId: 'anthropic/claude-sonnet-4-5' }); expect(result.success).toBe(true); }); + it('accepts lite_llm model with apiBase and additionalParams', () => { + const result = HarnessModelSchema.safeParse({ + provider: 'lite_llm', + modelId: 'anthropic/claude-sonnet-4-5', + apiBase: 'https://proxy.example.com/v1', + additionalParams: { reasoning_effort: 'high' }, + }); + expect(result.success).toBe(true); + }); + + it('rejects apiBase for non-lite_llm providers', () => { + expect(HarnessModelSchema.safeParse({ provider: 'bedrock', modelId: 'm', apiBase: 'https://x' }).success).toBe( + false + ); + }); + + it('rejects additionalParams for non-lite_llm providers', () => { + expect( + HarnessModelSchema.safeParse({ provider: 'bedrock', modelId: 'm', additionalParams: { foo: 'bar' } }).success + ).toBe(false); + }); + it('rejects temperature above 2.0', () => { const result = HarnessModelSchema.safeParse({ provider: 'bedrock', @@ -397,7 +451,7 @@ describe('HarnessModelSchema', () => { const result = HarnessModelSchema.safeParse({ provider: 'bedrock', modelId: 'us.anthropic.claude-sonnet-4-5-20250514-v1:0', - topK: 0.5, + topK: 40, }); expect(result.success).toBe(false); if (!result.success) { @@ -436,10 +490,25 @@ describe('HarnessSpecSchema', () => { } }); - it('accepts harness with system prompt file path', () => { + it('accepts harness with a literal system prompt', () => { const result = HarnessSpecSchema.safeParse({ ...minimalHarness, - systemPrompt: './system-prompt.md', + systemPrompt: 'You are a helpful research assistant. Cite your sources.', + }); + expect(result.success).toBe(true); + }); + + it('rejects a file-path-shaped system prompt (migration fail-fast; use system-prompt.md)', () => { + for (const bad of ['./system-prompt.md', '../prompts/system.md', 'prompts/system.txt']) { + const result = HarnessSpecSchema.safeParse({ ...minimalHarness, systemPrompt: bad }); + expect(result.success).toBe(false); + } + }); + + it('does not misfire on prose that merely mentions a filename', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + systemPrompt: 'Refer to docs.md when the user asks about setup.', }); expect(result.success).toBe(true); }); @@ -474,14 +543,47 @@ describe('HarnessSpecSchema', () => { } }); - it('accepts harness with skills as string paths', () => { + it('accepts harness with path skills', () => { const result = HarnessSpecSchema.safeParse({ ...minimalHarness, - skills: ['./skills/research', '.agents/skills/xlsx'], + skills: [{ path: './skills/research' }, { path: '.agents/skills/xlsx' }], }); expect(result.success).toBe(true); }); + it('accepts harness with s3 skills', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + skills: [{ s3Uri: 's3://my-bucket/skills/calc' }], + }); + expect(result.success).toBe(true); + }); + + it('accepts harness with git skills (public and private)', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + skills: [ + { gitUrl: 'https://github.com/owner/repo', path: 'skills/greet' }, + { + gitUrl: 'https://github.com/owner/private', + auth: { + credentialName: + 'arn:aws:bedrock-agentcore:us-east-1:123456789012:token-vault/default/apikeycredentialprovider/my-pat', + }, + }, + ], + }); + expect(result.success).toBe(true); + }); + + it('rejects git skill with non-https URL', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + skills: [{ gitUrl: 'git@github.com:owner/repo' }], + }); + expect(result.success).toBe(false); + }); + it('accepts harness with allowedTools', () => { const result = HarnessSpecSchema.safeParse({ ...minimalHarness, @@ -554,6 +656,252 @@ describe('HarnessSpecSchema', () => { expect(result.success).toBe(false); }); + // B5 — truncation strategy "none" + it('accepts truncation strategy "none"', () => { + const result = HarnessSpecSchema.safeParse({ ...minimalHarness, truncation: { strategy: 'none' } }); + expect(result.success).toBe(true); + }); + + // B27e — truncation config must match the strategy + it('rejects a summarization config under a sliding_window strategy', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + truncation: { strategy: 'sliding_window', config: { summarization: { summaryRatio: 0.5 } } }, + }); + expect(result.success).toBe(false); + }); + + it('rejects a config under the "none" strategy', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + truncation: { strategy: 'none', config: { slidingWindow: { messagesCount: 5 } } }, + }); + expect(result.success).toBe(false); + }); + + // B6 / B7 — memory messagesCount + retrievalConfig + it('accepts memory messagesCount and retrievalConfig', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { name: 'mem', messagesCount: 20, retrievalConfig: { topK: 5, relevanceScore: 0.7 } }, + }); + expect(result.success).toBe(true); + }); + + it('rejects an unknown key in memory.retrievalConfig', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { name: 'mem', retrievalConfig: { topK: 5, strategyId: 'x' } }, + }); + expect(result.success).toBe(false); + }); + + // Review fix — retrievalConfig must carry at least one knob (an empty {} fans out to per-namespace + // {} objects, the pre-v6 crash shape). + it('rejects an empty memory.retrievalConfig', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { name: 'mem', retrievalConfig: {} }, + }); + expect(result.success).toBe(false); + }); + + // Review fix — HarnessMemoryRefSchema is .strict(): a typo'd key (e.g. messageCount) is a parse + // error, not a silently-dropped field. + it('rejects an unknown key on the memory ref (typo guard)', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { name: 'mem', messageCount: 20 }, + }); + expect(result.success).toBe(false); + }); + + // Review fix — retrievalConfig is dropped at synth for a by-arn ref (no resolvable strategies), so + // reject it at parse time. Gated on `arn` alone: the { arn, name, retrievalConfig } combo must also + // fail, because arn takes precedence in resolveHarnessMemory. + it('rejects retrievalConfig on a by-arn memory ref', () => { + const byArn = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { arn: 'arn:aws:bedrock-agentcore:us-west-2:123:memory/abc', retrievalConfig: { topK: 5 } }, + }); + expect(byArn.success).toBe(false); + + const byArnAndName = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { arn: 'arn:aws:bedrock-agentcore:us-west-2:123:memory/abc', name: 'mem', retrievalConfig: { topK: 5 } }, + }); + expect(byArnAndName.success).toBe(false); + }); + + it('still accepts messagesCount/actorId on a by-arn memory ref', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { arn: 'arn:aws:bedrock-agentcore:us-west-2:123:memory/abc', actorId: 'user-1', messagesCount: 20 }, + }); + expect(result.success).toBe(true); + }); + + // Managed memory + 3-mode discriminated union (NY-Summit). + describe('HarnessMemoryRefSchema — 3-mode union', () => { + it('accepts managed mode with default-shaped strategies', () => { + const r = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { mode: 'managed', strategies: ['SEMANTIC', 'SUMMARIZATION'] }, + }); + expect(r.success).toBe(true); + }); + + it('leaves managed strategies absent when omitted (service applies its own default)', () => { + const r = HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { mode: 'managed' } }); + expect(r.success).toBe(true); + if (r.success && r.data.memory?.mode === 'managed') { + expect(r.data.memory.strategies).toBeUndefined(); + } + }); + + it('rejects CUSTOM in managed strategies (not valid for managed memory)', () => { + const r = HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { mode: 'managed', strategies: ['CUSTOM'] } }); + expect(r.success).toBe(false); + }); + + it('rejects an out-of-range managed eventExpiryDuration', () => { + const r = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { mode: 'managed', strategies: ['SEMANTIC'], eventExpiryDuration: 2 }, + }); + expect(r.success).toBe(false); + }); + + it('accepts managed eventExpiryDuration within 3-365', () => { + const r = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { mode: 'managed', strategies: ['SEMANTIC'], eventExpiryDuration: 30 }, + }); + expect(r.success).toBe(true); + }); + + it('rejects an unknown key on the managed arm (strict)', () => { + const r = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { mode: 'managed', strategies: ['SEMANTIC'], bogus: true }, + }); + expect(r.success).toBe(false); + }); + + it('accepts existing mode by name', () => { + expect( + HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { mode: 'existing', name: 'mem' } }).success + ).toBe(true); + }); + + it('rejects existing mode with neither arn nor name', () => { + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { mode: 'existing' } }).success).toBe(false); + }); + + it('preserves the by-arn retrievalConfig rejection on the existing arm', () => { + const r = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { + mode: 'existing', + arn: 'arn:aws:bedrock-agentcore:us-west-2:1:memory/m-aBcD012345', + retrievalConfig: { topK: 5 }, + }, + }); + expect(r.success).toBe(false); + }); + + it('accepts disabled mode', () => { + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { mode: 'disabled' } }).success).toBe(true); + }); + + it('rejects an unknown key on the disabled arm (strict)', () => { + expect( + HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { mode: 'disabled', bogus: true } }).success + ).toBe(false); + }); + + it('rejects an unknown mode', () => { + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { mode: 'bogus' } }).success).toBe(false); + }); + + describe('legacy normalization', () => { + it('maps a legacy by-name ref to existing', () => { + const r = HarnessSpecSchema.safeParse({ ...minimalHarness, memory: { name: 'mem' } }); + expect(r.success).toBe(true); + if (r.success) expect(r.data.memory).toEqual({ mode: 'existing', name: 'mem' }); + }); + + it('maps a legacy by-arn ref to existing', () => { + const r = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { arn: 'arn:aws:bedrock-agentcore:us-west-2:1:memory/m-aBcD012345' }, + }); + expect(r.success).toBe(true); + if (r.success && r.data.memory) expect(r.data.memory.mode).toBe('existing'); + }); + + it('leaves absent memory absent (never invents managed)', () => { + const r = HarnessSpecSchema.safeParse({ ...minimalHarness }); + expect(r.success).toBe(true); + if (r.success) expect(r.data.memory).toBeUndefined(); + }); + + it('passes an already-tagged managed ref through unchanged', () => { + const r = HarnessSpecSchema.safeParse({ + ...minimalHarness, + memory: { mode: 'managed', strategies: ['SEMANTIC', 'SUMMARIZATION'] }, + }); + expect(r.success).toBe(true); + if (r.success && r.data.memory) expect(r.data.memory.mode).toBe('managed'); + }); + }); + }); + + // Review fix — both truncation arms present must fail (the outer .strict() rejects the second + // arm's key rather than silently dropping it). + it('rejects a truncation config carrying both arms', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + truncation: { + strategy: 'sliding_window', + config: { slidingWindow: { messagesCount: 5 }, summarization: { summaryRatio: 0.5 } }, + }, + }); + expect(result.success).toBe(false); + }); + + // B8 — sessionStoragePath CFN MountPath parity ('/mnt/' is 5 chars + no subdir; spaces and + // multi-level paths fail the pattern; a valid single-level path is accepted). + it('enforces the CFN MountPath constraint on sessionStoragePath', () => { + for (const p of ['/mnt/', '/mnt/bad path', '/mnt/x/y/z']) { + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, sessionStoragePath: p }).success).toBe(false); + } + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, sessionStoragePath: '/mnt/data' }).success).toBe(true); + }); + + // B16 — empty / whitespace system prompt + it('rejects an empty or whitespace-only system prompt', () => { + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, systemPrompt: '' }).success).toBe(false); + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, systemPrompt: ' ' }).success).toBe(false); + }); + + // B21 — env-var value length + map size + it('rejects an env-var value over 5000 chars or more than 50 entries', () => { + expect( + HarnessSpecSchema.safeParse({ ...minimalHarness, environmentVariables: { K: 'x'.repeat(5001) } }).success + ).toBe(false); + const many: Record = {}; + for (let i = 0; i < 51; i++) many[`K${i}`] = 'v'; + expect(HarnessSpecSchema.safeParse({ ...minimalHarness, environmentVariables: many }).success).toBe(false); + }); + + // B25 — containerUri ECR pattern + it('rejects a non-ECR containerUri', () => { + expect( + HarnessSpecSchema.safeParse({ ...minimalHarness, containerUri: 'docker.io/library/nginx:latest' }).success + ).toBe(false); + }); + it('accepts harness with container config', () => { const result = HarnessSpecSchema.safeParse({ ...minimalHarness, @@ -657,7 +1005,7 @@ describe('HarnessSpecSchema', () => { temperature: 0.7, maxTokens: 4096, }, - systemPrompt: './system-prompt.md', + systemPrompt: 'You are a research agent. Use tools when appropriate and cite sources.', tools: [ { type: 'agentcore_browser', name: 'browser' }, { type: 'agentcore_code_interpreter', name: 'code_interpreter' }, @@ -678,7 +1026,7 @@ describe('HarnessSpecSchema', () => { }, }, ], - skills: ['./skills/research'], + skills: [{ path: './skills/research' }], allowedTools: ['*'], memory: { name: 'research_memory' }, maxIterations: 75, @@ -782,3 +1130,129 @@ describe('HarnessSpecSchema', () => { expect(result.success).toBe(false); }); }); + +describe('HarnessSkillSchema', () => { + it('accepts a bare path string and normalizes to object', () => { + const result = HarnessSkillSchema.safeParse('./my-skill'); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual({ path: './my-skill' }); + } + }); + + it('accepts a path object', () => { + const result = HarnessSkillSchema.safeParse({ path: './skills/research' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual({ path: './skills/research' }); + } + }); + + it('accepts an S3 source', () => { + const result = HarnessSkillSchema.safeParse({ s3Uri: 's3://my-bucket/skills/research' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual({ s3Uri: 's3://my-bucket/skills/research' }); + } + }); + + it('rejects S3 source without s3:// prefix', () => { + const result = HarnessSkillSchema.safeParse({ s3Uri: 'my-bucket/skills/research' }); + expect(result.success).toBe(false); + }); + + it('accepts a git source with URL only', () => { + const result = HarnessSkillSchema.safeParse({ gitUrl: 'https://github.com/org/repo' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual({ gitUrl: 'https://github.com/org/repo' }); + } + }); + + it('accepts a git source with path and auth', () => { + const input = { + gitUrl: 'https://github.com/org/repo', + path: 'skills/research', + auth: { + credentialName: 'my-cred', + username: 'bot-user', + }, + }; + const result = HarnessSkillSchema.safeParse(input); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual(input); + } + }); + + it('rejects git source without https:// prefix', () => { + const result = HarnessSkillSchema.safeParse({ gitUrl: 'git@github.com:org/repo.git' }); + expect(result.success).toBe(false); + }); + + it('rejects empty string', () => { + const result = HarnessSkillSchema.safeParse(''); + expect(result.success).toBe(false); + }); + + it('rejects empty path object', () => { + const result = HarnessSkillSchema.safeParse({ path: '' }); + expect(result.success).toBe(false); + }); + + it('accepts an AWS skills source without paths', () => { + const result = HarnessSkillSchema.safeParse({ awsSkills: {} }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual({ awsSkills: {} }); + } + }); + + it('accepts an AWS skills source with paths', () => { + const result = HarnessSkillSchema.safeParse({ awsSkills: { paths: ['core-skills/*'] } }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data).toEqual({ awsSkills: { paths: ['core-skills/*'] } }); + } + }); + + it('rejects an AWS skills source with empty path string', () => { + const result = HarnessSkillSchema.safeParse({ awsSkills: { paths: [''] } }); + expect(result.success).toBe(false); + }); +}); + +describe('HarnessSpecSchema skills field', () => { + const minimalHarness = { + name: 'TestHarness', + model: { provider: 'bedrock', modelId: 'anthropic.claude-3-5-sonnet-20240620-v1:0' }, + }; + + it('accepts mixed skill sources including AWS skills', () => { + const result = HarnessSpecSchema.safeParse({ + ...minimalHarness, + skills: [ + './local-skill', + { s3Uri: 's3://bucket/skill' }, + { gitUrl: 'https://github.com/org/repo', path: 'skills/foo' }, + { awsSkills: { paths: ['core-skills/*'] } }, + ], + }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.skills).toHaveLength(4); + expect(result.data.skills[0]).toEqual({ path: './local-skill' }); + expect(result.data.skills[1]).toEqual({ s3Uri: 's3://bucket/skill' }); + expect(result.data.skills[2]).toEqual({ gitUrl: 'https://github.com/org/repo', path: 'skills/foo' }); + expect(result.data.skills[3]).toEqual({ awsSkills: { paths: ['core-skills/*'] } }); + } + }); + + it('defaults skills to empty array', () => { + const result = HarnessSpecSchema.safeParse(minimalHarness); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.skills).toEqual([]); + } + }); +}); diff --git a/src/schema/schemas/primitives/__tests__/http-gateway.test.ts b/src/schema/schemas/primitives/__tests__/http-gateway.test.ts deleted file mode 100644 index 4fd885df1..000000000 --- a/src/schema/schemas/primitives/__tests__/http-gateway.test.ts +++ /dev/null @@ -1,82 +0,0 @@ -import { HttpGatewayNameSchema, HttpGatewaySchema } from '../http-gateway'; -import { describe, expect, it } from 'vitest'; - -describe('HttpGatewayNameSchema', () => { - it('accepts valid name starting with letter', () => { - expect(HttpGatewayNameSchema.safeParse('MyGateway1').success).toBe(true); - }); - - it('accepts name with hyphens', () => { - expect(HttpGatewayNameSchema.safeParse('my-gateway').success).toBe(true); - }); - - it('rejects empty string', () => { - expect(HttpGatewayNameSchema.safeParse('').success).toBe(false); - }); - - it('rejects name starting with number', () => { - expect(HttpGatewayNameSchema.safeParse('1gateway').success).toBe(false); - }); - - it('rejects name with underscores', () => { - expect(HttpGatewayNameSchema.safeParse('my_gateway').success).toBe(false); - }); - - it('accepts name longer than 24 chars', () => { - expect(HttpGatewayNameSchema.safeParse('a'.repeat(25)).success).toBe(true); - }); - - it('accepts name at 47 chars (room for 1-char project name + hyphen)', () => { - expect(HttpGatewayNameSchema.safeParse('a' + 'b'.repeat(46)).success).toBe(true); - }); -}); - -describe('HttpGatewaySchema', () => { - const validHttpGateway = { - name: 'MyGateway', - runtimeRef: 'my-runtime', - }; - - it('accepts valid HTTP gateway with required fields', () => { - expect(HttpGatewaySchema.safeParse(validHttpGateway).success).toBe(true); - }); - - it('accepts valid HTTP gateway with all optional fields', () => { - const result = HttpGatewaySchema.safeParse({ - ...validHttpGateway, - description: 'A test gateway', - roleArn: 'arn:aws:iam::123456789012:role/MyRole', - }); - expect(result.success).toBe(true); - }); - - it('rejects missing name', () => { - const { name: _, ...withoutName } = validHttpGateway; - expect(HttpGatewaySchema.safeParse(withoutName).success).toBe(false); - }); - - it('rejects missing runtimeRef', () => { - const { runtimeRef: _, ...withoutRuntimeRef } = validHttpGateway; - expect(HttpGatewaySchema.safeParse(withoutRuntimeRef).success).toBe(false); - }); - - it('accepts name longer than 24 chars (no standalone max cap)', () => { - expect(HttpGatewaySchema.safeParse({ ...validHttpGateway, name: 'a' + 'b'.repeat(30) }).success).toBe(true); - }); - - it('rejects name starting with number', () => { - expect(HttpGatewaySchema.safeParse({ ...validHttpGateway, name: '1Gateway' }).success).toBe(false); - }); - - it('rejects name with invalid characters (underscores)', () => { - expect(HttpGatewaySchema.safeParse({ ...validHttpGateway, name: 'my_gateway' }).success).toBe(false); - }); - - it('rejects extra unknown fields (.strict())', () => { - const result = HttpGatewaySchema.safeParse({ - ...validHttpGateway, - unknownField: 'should fail', - }); - expect(result.success).toBe(false); - }); -}); diff --git a/src/schema/schemas/primitives/__tests__/knowledge-base.test.ts b/src/schema/schemas/primitives/__tests__/knowledge-base.test.ts new file mode 100644 index 000000000..d752a2398 --- /dev/null +++ b/src/schema/schemas/primitives/__tests__/knowledge-base.test.ts @@ -0,0 +1,203 @@ +import { DataSourceSchema, KnowledgeBaseNameSchema, KnowledgeBaseSchema, S3DataSourceSchema } from '../knowledge-base'; +import { describe, expect, it } from 'vitest'; + +describe('KnowledgeBaseNameSchema', () => { + it('accepts a valid name', () => { + expect(() => KnowledgeBaseNameSchema.parse('product-docs')).not.toThrow(); + }); + + it('rejects names longer than 48 chars', () => { + expect(() => KnowledgeBaseNameSchema.parse('a'.repeat(49))).toThrow(); + }); + + it('rejects names that do not start with a letter', () => { + expect(() => KnowledgeBaseNameSchema.parse('1bad')).toThrow(); + }); +}); + +describe('S3DataSourceSchema', () => { + it('accepts a valid S3 URI with prefix', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://my-bucket/docs/' })).not.toThrow(); + }); + + it('accepts a valid S3 URI without trailing slash', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://my-bucket' })).not.toThrow(); + }); + + it('rejects a non-s3 URI', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 'https://example.com' })).toThrow(); + }); + + it('rejects type other than S3', () => { + expect(() => S3DataSourceSchema.parse({ type: 'CONFLUENCE', uri: 's3://my-bucket/y/' })).toThrow(); + }); + + it('rejects unknown keys', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://my-bucket/', extra: 1 })).toThrow(); + }); + + it('rejects bucket with uppercase letter', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://My-Bucket/x' })).toThrow(); + }); + + it('rejects bucket with consecutive dots', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://my..bucket/x' })).toThrow(); + }); + + it('rejects bucket with trailing dot', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://my-bucket./x' })).toThrow(); + }); + + it('rejects xn-- reserved bucket prefix', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://xn--my-bucket/x' })).toThrow(); + }); + + it('rejects sthree- reserved bucket prefix', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://sthree-my-bucket/x' })).toThrow(); + }); + + it('rejects -s3alias reserved suffix', () => { + expect(() => S3DataSourceSchema.parse({ type: 'S3', uri: 's3://my-bucket-s3alias/x' })).toThrow(); + }); +}); + +describe('KnowledgeBaseSchema', () => { + it('accepts a minimal project-owned KB entry', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: 'product-docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/docs/' }], + }) + ).not.toThrow(); + }); + + it('accepts multiple data sources', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: 'company-docs', + dataSources: [ + { type: 'S3', uri: 's3://bucket/a/' }, + { type: 'S3', uri: 's3://bucket/b/' }, + ], + }) + ).not.toThrow(); + }); + + it('rejects entries with no data sources', () => { + expect(() => KnowledgeBaseSchema.parse({ name: 'empty', dataSources: [] })).toThrow(); + }); + + it('rejects duplicate data source URIs', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: 'dup', + dataSources: [ + { type: 'S3', uri: 's3://my-bucket/a/' }, + { type: 'S3', uri: 's3://my-bucket/a/' }, + ], + }) + ).toThrow(); + }); + + it('accepts optional description and gateway', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: 'docs', + description: 'Customer docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/d/' }], + gateway: 'main-gw', + }) + ).not.toThrow(); + }); + + it('rejects unknown top-level keys', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/' }], + foo: 'bar', + }) + ).toThrow(); + }); + + it('rejects description longer than 2048 chars', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: 'docs', + description: 'x'.repeat(2049), + dataSources: [{ type: 'S3', uri: 's3://my-bucket/' }], + }) + ).toThrow(); + }); + + it('defaults type to AgentCoreKnowledgeBase when omitted', () => { + const parsed = KnowledgeBaseSchema.parse({ + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/' }], + }); + expect(parsed.type).toBe('AgentCoreKnowledgeBase'); + }); + + it('rejects empty name', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: '', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/' }], + }) + ).toThrow(); + }); + + it('rejects empty gateway string', () => { + expect(() => + KnowledgeBaseSchema.parse({ + name: 'docs', + dataSources: [{ type: 'S3', uri: 's3://my-bucket/' }], + gateway: '', + }) + ).toThrow(); + }); +}); + +describe('DataSourceSchema — connector variants', () => { + it('accepts an S3 data source', () => { + const r = DataSourceSchema.safeParse({ type: 'S3', uri: 's3://my-bucket/docs/' }); + expect(r.success).toBe(true); + }); + + it('accepts a WEB connector-file data source', () => { + const r = DataSourceSchema.safeParse({ type: 'WEB', connectorConfigFile: 'app/web-docs/web.json' }); + expect(r.success).toBe(true); + }); + + it.each(['CONFLUENCE', 'SHAREPOINT', 'ONEDRIVE', 'GOOGLEDRIVE'])('accepts a %s connector-file data source', type => { + const r = DataSourceSchema.safeParse({ type, connectorConfigFile: `app/kb/${type}.json` }); + expect(r.success).toBe(true); + }); + + it('rejects a connector-file source missing connectorConfigFile', () => { + const r = DataSourceSchema.safeParse({ type: 'WEB' }); + expect(r.success).toBe(false); + }); + + it('rejects an unknown connector type', () => { + const r = DataSourceSchema.safeParse({ type: 'WEBCRAWLER', connectorConfigFile: 'x.json' }); + expect(r.success).toBe(false); + }); + + it('rejects a connector-file source that also carries a uri (strict)', () => { + const r = DataSourceSchema.safeParse({ type: 'WEB', connectorConfigFile: 'x.json', uri: 's3://b/' }); + expect(r.success).toBe(false); + }); + + it('dedups a mixed dataSources[] by uri AND connectorConfigFile', () => { + const r = KnowledgeBaseSchema.safeParse({ + name: 'kb', + dataSources: [ + { type: 'S3', uri: 's3://b/a/' }, + { type: 'WEB', connectorConfigFile: 'app/kb/web.json' }, + { type: 'WEB', connectorConfigFile: 'app/kb/web.json' }, + ], + }); + expect(r.success).toBe(false); + }); +}); diff --git a/src/schema/schemas/primitives/ab-test.ts b/src/schema/schemas/primitives/ab-test.ts index ec04ab4f7..431bdf7b9 100644 --- a/src/schema/schemas/primitives/ab-test.ts +++ b/src/schema/schemas/primitives/ab-test.ts @@ -115,8 +115,6 @@ export const ABTestSchema = z variants: z.array(ABTestVariantSchema).length(2), evaluationConfig: ABTestEvaluationConfigSchema, gatewayFilter: GatewayFilterSchema.optional(), - trafficAllocationConfig: TrafficAllocationConfigSchema.optional(), - maxDurationDays: z.number().int().min(1).max(90).optional(), enableOnCreate: z.boolean().optional(), promoted: z.boolean().optional(), }) diff --git a/src/schema/schemas/primitives/harness.ts b/src/schema/schemas/primitives/harness.ts index 4531774c2..c165e933e 100644 --- a/src/schema/schemas/primitives/harness.ts +++ b/src/schema/schemas/primitives/harness.ts @@ -4,12 +4,29 @@ import { LifecycleConfigurationSchema, NetworkConfigSchema, S3FilesAccessPointConfigSchema, + SessionStorageSchema, } from '../agent-env'; import { AuthorizerConfigSchema, RuntimeAuthorizerTypeSchema } from '../auth'; import { uniqueBy } from '../zod-util'; import { TagsSchema } from './tags'; import { z } from 'zod'; +/** + * ECR container image URI pattern, mirroring the CFN ContainerConfiguration.ContainerUri + * constraint (private 12-digit ECR registry or public.ecr.aws, optional tag/digest). + * A non-ECR URI is rejected client-side instead of failing at CloudFormation CREATE. + */ +export const CONTAINER_URI_PATTERN = + // eslint-disable-next-line security/detect-unsafe-regex -- mirrors the CFN ContainerUri pattern; input is length-bounded by .max(MAX_CONTAINER_URI_LENGTH) + /^(([0-9]{12})\.dkr\.ecr\.([a-z0-9-]+)\.amazonaws\.com(\.cn)?|public\.ecr\.aws)\/((?:[a-z0-9]+(?:[._-][a-z0-9]+)*\/)*[a-z0-9]+(?:[._-][a-z0-9]+)*)(?::([^:@]{1,300}))?(?:@(.+))?$/; +/** CFN ContainerConfiguration.ContainerUri maxLength. */ +export const MAX_CONTAINER_URI_LENGTH = 1024; +/** CFN EnvironmentVariables value maxLength and map maxProperties. */ +export const MAX_ENV_VAR_VALUE_LENGTH = 5000; +export const MAX_ENV_VARS = 50; +/** CFN/Smithy EnvironmentVariableKey length bounds (Smithy EnvironmentVariableKey: 1–100, no char pattern). */ +export const MAX_ENV_VAR_KEY_LENGTH = 100; + // ============================================================================ // Harness Name // ============================================================================ @@ -17,19 +34,22 @@ import { z } from 'zod'; export const HarnessNameSchema = z .string() .min(1, 'Harness name is required') - .max(48) + .max(40) .regex( - /^[a-zA-Z][a-zA-Z0-9_]{0,47}$/, - 'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)' + /^[a-zA-Z][a-zA-Z0-9_]{0,39}$/, + 'Must begin with a letter and contain only alphanumeric characters and underscores (max 40 chars)' ); // ============================================================================ // Model Configuration // ============================================================================ -export const HarnessModelProviderSchema = z.enum(['bedrock', 'open_ai', 'gemini']); +export const HarnessModelProviderSchema = z.enum(['bedrock', 'open_ai', 'gemini', 'lite_llm']); export type HarnessModelProvider = z.infer; +/** Max length of the LiteLLM apiBase URL (Smithy: HarnessLiteLlmApiBase, 1–16383). */ +export const MAX_LITE_LLM_API_BASE_LENGTH = 16383; + export const BedrockApiFormatSchema = z.enum(['converse_stream', 'responses', 'chat_completions']); export type BedrockApiFormat = z.infer; @@ -47,8 +67,12 @@ export const HarnessModelSchema = z apiFormat: HarnessApiFormatSchema.optional(), temperature: z.number().min(0).max(2).optional(), topP: z.number().min(0).max(1).optional(), - topK: z.number().min(0).max(1).optional(), + topK: z.number().int().min(0).max(500).optional(), maxTokens: z.number().int().min(1).optional(), + /** LiteLLM only: base URL for the third-party model provider's API endpoint. */ + apiBase: z.string().min(1).max(MAX_LITE_LLM_API_BASE_LENGTH).optional(), + /** LiteLLM only: provider-specific parameters passed through to the model provider unchanged. */ + additionalParams: z.record(z.string(), z.unknown()).optional(), }) .superRefine((model, ctx) => { if (model.topK !== undefined && model.provider !== 'gemini') { @@ -73,6 +97,28 @@ export const HarnessModelSchema = z }); } } + // CFN requires ApiKeyArn for the open_ai and gemini model configs (bedrock and lite_llm do not). + if (model.apiKeyArn === undefined && (model.provider === 'open_ai' || model.provider === 'gemini')) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `apiKeyArn is required for the "${model.provider}" provider`, + path: ['apiKeyArn'], + }); + } + if (model.apiBase !== undefined && model.provider !== 'lite_llm') { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'apiBase is only supported for the "lite_llm" provider', + path: ['apiBase'], + }); + } + if (model.additionalParams !== undefined && model.provider !== 'lite_llm') { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: 'additionalParams is only supported for the "lite_llm" provider', + path: ['additionalParams'], + }); + } }); export type HarnessModel = z.infer; @@ -238,41 +284,239 @@ export type HarnessTool = z.infer; // Memory Reference // ============================================================================ -export const HarnessMemoryRefSchema = z.object({ - name: z.string().min(1).optional(), - arn: z.string().min(1).optional(), - actorId: z.string().optional(), -}); +/** + * Per-namespace retrieval tuning, applied as a flat default across every namespace + * of the referenced memory's strategies. Namespaces are only known at CDK synth + * (from the resolved Memory's strategies), so a per-namespace map cannot be authored + * here; the CDK fans this single config out to each namespace. StrategyId is omitted + * deliberately — the namespace already identifies the strategy and the id is + * service-assigned. Maps to CFN HarnessAgentCoreMemoryRetrievalConfig {TopK, RelevanceScore}. + */ +export const HarnessMemoryRetrievalConfigSchema = z + .object({ + topK: z.number().int().min(1).optional(), + relevanceScore: z.number().min(0).max(1).optional(), + }) + .strict() + // An empty `{}` parses against the optional fields, but fans out to per-namespace `{}` objects — + // the exact shape that crashed the pre-v6 service handler. Require at least one knob. + .refine(v => v.topK !== undefined || v.relevanceScore !== undefined, { + message: 'retrievalConfig must specify at least one of topK or relevanceScore', + }); + +export type HarnessMemoryRetrievalConfig = z.infer; + +/** + * Managed-memory strategy set. NOT MemoryStrategyTypeSchema: managed harness memory excludes + * CUSTOM (the CFN ManagedMemoryConfiguration.Strategies enum is exactly these four). + */ +export const ManagedMemoryStrategySchema = z.enum(['SEMANTIC', 'SUMMARIZATION', 'USER_PREFERENCE', 'EPISODIC']); + +/** + * Managed: the harness creates and owns its memory internally. The memory `arn` is read-only + * (service-populated) and never authored here. `strategies` is OPTIONAL and left absent by default: + * when omitted, the harness/service applies its own default strategy set. It is only written when the + * user explicitly tunes it, so the CLI never pins a default the service might evolve. + */ +const ManagedMemoryRefSchema = z + .object({ + mode: z.literal('managed'), + strategies: z.array(ManagedMemoryStrategySchema).min(1).max(4).optional(), + eventExpiryDuration: z.number().int().min(3).max(365).optional(), + encryptionKeyArn: z.string().min(1).optional(), + }) + .strict(); + +/** + * Existing (bring-your-own): reference a memory by name (project sibling) or arn, carrying the + * optional deploy-time tuning. This is the pre-union flat shape, now tagged `existing`. + */ +const ExistingMemoryRefSchema = z + .object({ + mode: z.literal('existing'), + name: z.string().min(1).optional(), + arn: z.string().min(1).optional(), + actorId: z.string().optional(), + /** Limits how many recent memory messages are loaded into context (CFN MessagesCount). */ + messagesCount: z.number().int().min(1).optional(), + /** Retrieval tuning applied to every namespace of the referenced memory (CFN RetrievalConfig). */ + retrievalConfig: HarnessMemoryRetrievalConfigSchema.optional(), + }) + // .strict() so a typo (e.g. `messageCount` for `messagesCount`) is a parse error rather than a + // silently-dropped field that defeats the B6 MessagesCount fix. + .strict() + .refine(m => m.arn != null || m.name != null, { + message: 'existing memory requires `arn` or `name`', + path: ['name'], + }) + .superRefine((ref, ctx) => { + // retrievalConfig is applied per-namespace, and namespaces are only resolvable from a by-name + // memory's strategies. At synth `arn` takes precedence over `name` (resolveHarnessMemory), so + // ANY ref carrying `arn` resolves no strategies and buildRetrievalConfig drops the tuning — + // including the `{ arn, name, retrievalConfig }` combo. Gate on `arn` alone, not `arn && !name`, + // so that combo can't slip through. (messagesCount and actorId are emitted directly from the + // ref, so they stay valid for a by-arn reference.) + if (ref.arn && ref.retrievalConfig !== undefined) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: + 'retrievalConfig is not supported when memory is referenced by `arn` (per-namespace tuning is only resolvable for a by-name reference). Reference the memory by `name` only, or drop retrievalConfig.', + path: ['retrievalConfig'], + }); + } + }); + +/** Disabled: explicit opt-out of memory. */ +const DisabledMemoryRefSchema = z.object({ mode: z.literal('disabled') }).strict(); + +/** + * Memory reference for a harness — exactly one of managed | existing | disabled. + * A legacy-aware preprocess maps the pre-union flat shape ({ name | arn | ... }) to the `existing` + * arm. Absent memory STAYS absent — managed is the default only for NEW harnesses (seeded at create + * time by the CLI/TUI), never invented here, so already-deployed harnesses are not auto-upgraded. + */ +export const HarnessMemoryRefSchema = z.preprocess( + val => { + if (val == null || typeof val !== 'object') return val; + const obj = val as Record; + if ('mode' in obj) return obj; // already union-tagged + // Legacy flat shape → existing (it always referenced a sibling/external memory by name or arn). + return { mode: 'existing', ...obj }; + }, + z.discriminatedUnion('mode', [ManagedMemoryRefSchema, ExistingMemoryRefSchema, DisabledMemoryRefSchema]) +); export type HarnessMemoryRef = z.infer; +export type ManagedMemoryStrategy = z.infer; // ============================================================================ // Truncation Configuration // ============================================================================ -export const HarnessTruncationStrategySchema = z.enum(['sliding_window', 'summarization']); +export const HarnessTruncationStrategySchema = z.enum(['sliding_window', 'summarization', 'none']); -export const SlidingWindowConfigSchema = z.object({ - slidingWindow: z.object({ - messagesCount: z.number().int().min(1).optional(), - }), -}); +// .strict() on the outer object so a payload carrying BOTH arms +// (`{ slidingWindow: {...}, summarization: {...} }`) fails the union — without it the union +// matches the first arm and silently drops the second arm's config. +export const SlidingWindowConfigSchema = z + .object({ + slidingWindow: z.object({ + messagesCount: z.number().int().min(1).optional(), + }), + }) + .strict(); -export const SummarizationConfigSchema = z.object({ - summarization: z.object({ - summaryRatio: z.number().min(0).max(1).optional(), - preserveRecentMessages: z.number().int().min(0).optional(), - summarizationSystemPrompt: z.string().optional(), - }), -}); +export const SummarizationConfigSchema = z + .object({ + summarization: z.object({ + summaryRatio: z.number().min(0).max(1).optional(), + preserveRecentMessages: z.number().int().min(0).optional(), + summarizationSystemPrompt: z.string().optional(), + }), + }) + .strict(); -export const HarnessTruncationConfigSchema = z.object({ - strategy: HarnessTruncationStrategySchema, - config: z.union([SlidingWindowConfigSchema, SummarizationConfigSchema]).optional(), -}); +export const HarnessTruncationConfigSchema = z + .object({ + strategy: HarnessTruncationStrategySchema, + config: z.union([SlidingWindowConfigSchema, SummarizationConfigSchema]).optional(), + }) + .superRefine((data, ctx) => { + // Bind the config arm to the chosen strategy: sliding_window must carry a slidingWindow + // config (or none), summarization a summarization config (or none), and none takes no config. + if (!data.config) return; + const configKey = 'slidingWindow' in data.config ? 'slidingWindow' : 'summarization'; + const expected: Record = { + sliding_window: 'slidingWindow', + summarization: 'summarization', + none: undefined, + }; + if (expected[data.strategy] === undefined) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Truncation strategy "${data.strategy}" does not take a config`, + path: ['config'], + }); + } else if (expected[data.strategy] !== configKey) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `Truncation strategy "${data.strategy}" requires a "${expected[data.strategy]}" config, got "${configKey}"`, + path: ['config'], + }); + } + }); export type HarnessTruncationConfig = z.infer; +// ============================================================================ +// Skill Configuration +// ============================================================================ + +export const HarnessSkillGitAuthSchema = z.object({ + credentialName: z.string().min(1), + username: z.string().optional(), +}); + +export type HarnessSkillGitAuth = z.infer; + +export const HarnessSkillS3SourceSchema = z + .object({ + s3Uri: z + .string() + .min(5) + .regex(/^s3:\/\//, 'Must be an S3 URI starting with s3://'), + }) + .strict(); + +export type HarnessSkillS3Source = z.infer; + +export const HarnessSkillGitSourceSchema = z + .object({ + gitUrl: z + .string() + .min(8) + .regex(/^https:\/\//, 'Must be an HTTPS git URL'), + path: z.string().min(1).optional(), + auth: HarnessSkillGitAuthSchema.optional(), + }) + .strict(); + +export type HarnessSkillGitSource = z.infer; + +export const HarnessSkillPathSourceSchema = z + .object({ + path: z.string().min(1), + }) + .strict(); + +export type HarnessSkillPathSource = z.infer; + +export const HarnessSkillAwsSkillsSourceSchema = z + .object({ + awsSkills: z + .object({ + paths: z.array(z.string().min(1).max(4096)).optional(), + }) + .strict(), + }) + .strict(); + +export type HarnessSkillAwsSkillsSource = z.infer; + +export const HarnessSkillSchema = z.union([ + z + .string() + .min(1) + .transform(path => ({ path })), + HarnessSkillS3SourceSchema, + HarnessSkillGitSourceSchema, + HarnessSkillPathSourceSchema, + HarnessSkillAwsSkillsSourceSchema, +]); + +export type HarnessSkillInput = z.input; +export type HarnessSkill = z.output; + // ============================================================================ // Allowed Tools // ============================================================================ @@ -284,6 +528,18 @@ export const AllowedToolSchema = z // eslint-disable-next-line security/detect-unsafe-regex -- safe: input is bounded to 64 chars by .max(64) .regex(/^(\*|@?[^/]+(\/[^/]+)?)$/, 'Must be "*" or a tool name pattern (max 64 chars)'); +/** + * Detects the legacy file-path-shaped `systemPrompt` (the pre-migration `./prompt.md` style that + * the old L3 read from disk). A real system prompt is instructional prose containing whitespace; + * the legacy shape was always a single bare token that is a relative path or ends in .md/.txt. + * Used to fail fast on upgrade rather than silently shipping the path string as the prompt text. + */ +export function looksLikeLegacyPromptPath(value: string): boolean { + const v = value.trim(); + if (!/^\S+$/.test(v)) return false; // prose contains whitespace — never a path + return /^\.\.?\//.test(v) || /\.(md|txt)$/i.test(v); +} + // ============================================================================ // HarnessSpec — per-harness config file schema (harness.json) // ============================================================================ @@ -292,7 +548,21 @@ export const HarnessSpecSchema = z .object({ name: HarnessNameSchema, model: HarnessModelSchema, - systemPrompt: z.string().optional(), + // Always literal text. CFN HarnessSystemContentBlock.Text is minLength:1, so a blank or + // whitespace-only prompt would deploy to CREATE_FAILED — reject it client-side instead. + // (File-backed prompts are supplied via system-prompt.md auto-discovery, not this field.) + systemPrompt: z + .string() + .refine(val => val.trim().length > 0, { message: 'systemPrompt must not be empty or whitespace-only' }) + // Migration fail-fast: the previous L3 treated a `./prompt.md`-style value as a file path and + // loaded its contents; this field is now ALWAYS literal text, so such a value would silently + // ship as the prompt itself. Reject the legacy file-path shape so the divergence fails loudly + // at parse/validate time (before synth) instead of in production. + .refine(val => !looksLikeLegacyPromptPath(val), { + message: + 'systemPrompt looks like a file path. It is now always literal text — put file-backed prompts in a `system-prompt.md` in the harness directory (auto-discovered), or inline the prompt text here.', + }) + .optional(), tools: z .array(HarnessToolSchema) .default([]) @@ -302,27 +572,35 @@ export const HarnessSpecSchema = z name => `Duplicate tool name: ${name}` ) ), - skills: z.array(z.string().min(1)).default([]), + skills: z.array(HarnessSkillSchema).default([]), allowedTools: z.array(AllowedToolSchema).optional(), memory: HarnessMemoryRefSchema.optional(), maxIterations: z.number().int().min(1).optional(), maxTokens: z.number().int().min(1).optional(), timeoutSeconds: z.number().int().min(1).optional(), truncation: HarnessTruncationConfigSchema.optional(), - containerUri: z.string().min(1).optional(), + containerUri: z + .string() + .min(1) + .max(MAX_CONTAINER_URI_LENGTH) + .regex(CONTAINER_URI_PATTERN, 'containerUri must be an ECR image URI (12-digit private ECR or public.ecr.aws)') + .optional(), dockerfile: z.string().min(1).optional(), executionRoleArn: z.string().optional(), networkMode: NetworkModeSchema.optional(), networkConfig: NetworkConfigSchema.optional(), lifecycleConfig: LifecycleConfigurationSchema.optional(), - sessionStoragePath: z - .string() - .min(1) - .refine(val => val.startsWith('/mnt/'), { message: 'sessionStoragePath must be an absolute path under /mnt/' }) - .optional(), + sessionStoragePath: SessionStorageSchema.shape.mountPath.optional(), efsAccessPoints: z.array(EfsAccessPointConfigSchema).max(2).optional(), s3AccessPoints: z.array(S3FilesAccessPointConfigSchema).max(2).optional(), - environmentVariables: z.record(z.string(), z.string()).optional(), + environmentVariables: z + // Key bound (Smithy EnvironmentVariableKey: length 1–100, no character pattern) — an empty or + // >100-char key passes a bare z.string() here but fails at CFN CREATE. + .record(z.string().min(1).max(MAX_ENV_VAR_KEY_LENGTH), z.string().max(MAX_ENV_VAR_VALUE_LENGTH)) + .refine(rec => Object.keys(rec).length <= MAX_ENV_VARS, { + message: `A maximum of ${MAX_ENV_VARS} environment variables is allowed`, + }) + .optional(), /** Authorizer type for inbound requests. Defaults to AWS_IAM. */ authorizerType: RuntimeAuthorizerTypeSchema.optional(), /** Authorizer configuration. Required when authorizerType is CUSTOM_JWT. */ diff --git a/src/schema/schemas/primitives/http-gateway.ts b/src/schema/schemas/primitives/http-gateway.ts deleted file mode 100644 index 4773b32e2..000000000 --- a/src/schema/schemas/primitives/http-gateway.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { z } from 'zod'; - -// ============================================================================ -// HTTP Gateway Types -// ============================================================================ - -export const HttpGatewayNameSchema = z - .string() - .min(1, 'Name is required') - .regex( - /^[a-zA-Z][a-zA-Z0-9-]*$/, - 'Gateway name must start with a letter and contain only alphanumeric characters or hyphens (combined with project name must fit 48-char AWS limit)' - ); - -export const HttpGatewayTargetSchema = z.object({ - /** Gateway target name (referenced by AB test variants) */ - name: z.string().min(1).max(100), - /** Reference to a runtime name from spec.runtimes */ - runtimeRef: z.string().min(1), - /** Endpoint qualifier on the runtime (e.g., 'prod', 'staging'). Defaults to 'DEFAULT'. */ - qualifier: z.string().min(1).default('DEFAULT'), -}); - -export type HttpGatewayTarget = z.infer; - -export const HttpGatewaySchema = z - .object({ - /** Unique name for the HTTP gateway */ - name: HttpGatewayNameSchema, - /** Optional description */ - description: z.string().min(1).max(200).optional(), - /** Reference to a runtime name from spec.runtimes. One target is created per gateway pointing to this runtime. */ - runtimeRef: z.string().min(1), - /** IAM role ARN for gateway execution. Auto-created if omitted. */ - roleArn: z.string().min(1).optional(), - /** Additional targets for the gateway (for target-based AB testing). */ - targets: z.array(HttpGatewayTargetSchema).optional(), - }) - .strict(); - -export type HttpGateway = z.infer; diff --git a/src/schema/schemas/primitives/index.ts b/src/schema/schemas/primitives/index.ts index f9ae0e856..d988ee49f 100644 --- a/src/schema/schemas/primitives/index.ts +++ b/src/schema/schemas/primitives/index.ts @@ -59,11 +59,13 @@ export { RatingScaleSchema, } from './evaluator'; -export type { OnlineEvalConfig } from './online-eval-config'; -export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema } from './online-eval-config'; +export type { OnlineEvalConfig, ClusteringConfig } from './online-eval-config'; +export { OnlineEvalConfigSchema, OnlineEvalConfigNameSchema, ClusteringConfigSchema } from './online-eval-config'; -export type { Policy, PolicyEngine, ValidationMode } from './policy'; +export type { AuthorizationPhase, EnforcementMode, Policy, PolicyEngine, ValidationMode } from './policy'; export { + AuthorizationPhaseSchema, + EnforcementModeSchema, PolicyEngineNameSchema, PolicyEngineSchema, PolicyNameSchema, @@ -82,6 +84,7 @@ export type { HarnessTool, HarnessToolType, HarnessTruncationConfig, + ManagedMemoryStrategy, OpenAiApiFormat, } from './harness'; export { @@ -102,11 +105,9 @@ export { HarnessToolTypeSchema, HarnessTruncationConfigSchema, HarnessTruncationStrategySchema, + ManagedMemoryStrategySchema, } from './harness'; -export type { HttpGateway } from './http-gateway'; -export { HttpGatewayNameSchema, HttpGatewaySchema } from './http-gateway'; - export type { PaymentManager, PaymentConnector, PaymentProvider, PaymentAuthorizerType } from './payment'; export { DEFAULT_AUTO_PAYMENT, diff --git a/src/schema/schemas/primitives/knowledge-base.ts b/src/schema/schemas/primitives/knowledge-base.ts new file mode 100644 index 000000000..5940d620a --- /dev/null +++ b/src/schema/schemas/primitives/knowledge-base.ts @@ -0,0 +1,122 @@ +import { uniqueBy } from '../zod-util'; +import { z } from 'zod'; + +/** + * Knowledge Base name validation. + * 1-48 chars, starts with a letter, alphanumeric + dash + underscore. + * Mirrors the naming convention used by Memory/Evaluator/Dataset primitives; + * stricter than the Bedrock CreateKnowledgeBase API allows (which permits up + * to 100 chars), but consistent with the rest of the agentcore-cli schemas. + */ +export const KnowledgeBaseNameSchema = z + .string() + .min(1, 'Name is required') + .max(48) + .regex( + /^[a-zA-Z][a-zA-Z0-9_-]{0,47}$/, + 'Must begin with a letter and contain only alphanumeric characters, dashes, and underscores (max 48 chars)' + ); + +/** + * S3 data source for FMKB. Wave 1 supports S3 only. + * + * `uri` must be an `s3://bucket[/prefix]` URI. Non-S3 connectors (Confluence, + * SharePoint, OneDrive, Google Drive, Web Crawler) are deferred to a later wave. + * + * Bucket validation enforces the AWS S3 bucket naming rules at parse time: + * 3-63 chars, lowercase + digits + dot + hyphen, must start and end with + * letter/digit, no consecutive dots, and none of the AWS-reserved prefixes + * (`xn--`, `sthree-`) or suffixes (`-s3alias`). AWS will ultimately reject + * non-conformant buckets server-side; failing fast at config-load time + * gives a better error surface. + */ +const S3_BUCKET_NAME = /^(?!xn--)(?!sthree-)[a-z0-9](?!.*\.\.)[a-z0-9.-]{1,61}[a-z0-9](? { + const m = /^s3:\/\/([^/]+)(?:\/.*)?$/.exec(s); + return !!m && S3_BUCKET_NAME.test(m[1]!); + }, + { message: 'Must be a valid s3:// URI with an AWS-compliant bucket name' } + ), + }) + .strict(); + +export type S3DataSource = z.infer; + +/** + * Wire-verbatim connector type values for non-S3 FMKB data sources. These are + * the exact `connectorParameters.type` literals the Bedrock managed-connector + * API uses (confirmed against the FMKB console module's read and write paths). + * Note `WEB` (not `WEBCRAWLER`) and the single-word `GOOGLEDRIVE`. + */ +export const ConnectorDataSourceTypeSchema = z.enum(['WEB', 'CONFLUENCE', 'SHAREPOINT', 'ONEDRIVE', 'GOOGLEDRIVE']); +export type ConnectorDataSourceType = z.infer; + +/** + * Non-S3 data source. The connector-specific structure lives in a JSON file + * (`connectorConfigFile`, a project-relative path) and is passed through to + * the DataSource's connectorParameters verbatim at deploy time. This honors + * the DevEx "JSON file passthrough" decision: new connector params don't + * require CLI/schema changes. + */ +export const ConnectorFileDataSourceSchema = z + .object({ + type: ConnectorDataSourceTypeSchema, + connectorConfigFile: z.string().min(1, 'connectorConfigFile path is required'), + }) + .strict(); + +export type ConnectorFileDataSource = z.infer; + +/** + * Knowledge Base data source: S3 (inline `uri`) or a non-S3 connector + * (file-path reference). Discriminated union on `type`. + */ +export const DataSourceSchema = z.discriminatedUnion('type', [S3DataSourceSchema, ConnectorFileDataSourceSchema]); +export type DataSource = z.infer; + +/** + * Type literal for KnowledgeBase entries in `agentcore.json`. + * Mirrors how Memory/Evaluator/etc. tag themselves for forward-compat. + */ +export const KnowledgeBaseTypeSchema = z.literal('AgentCoreKnowledgeBase'); +export type KnowledgeBaseType = z.infer; + +/** + * Knowledge Base entry. The CLI creates and owns the KB, its data sources, + * its IAM role, and (when `gateway` is set in Wave 2) its connector gateway + * target. + * + * To wire an EXTERNAL KB (one this project does not own) as a gateway target, + * skip this schema and use a connector gateway target with the external KB's + * literal 10-char ID set on `knowledgeBaseId`. See `agentcore add gateway- + * target --type connector --connector bedrock-knowledge-bases`. + * + * `gateway` is optional in Wave 1 — when set, it's stored but not yet wired + * to a gateway target. Wave 2 lights up the connector gateway target. + */ +export const KnowledgeBaseSchema = z + .object({ + type: KnowledgeBaseTypeSchema.default('AgentCoreKnowledgeBase'), + name: KnowledgeBaseNameSchema, + description: z.string().max(2048).optional(), + dataSources: z + .array(DataSourceSchema) + .min(1, 'At least one data source is required') + .superRefine( + uniqueBy( + ds => (ds.type === 'S3' ? ds.uri : ds.connectorConfigFile), + key => `Duplicate data source: ${key}` + ) + ), + gateway: z.string().min(1).optional(), + }) + .strict(); + +export type KnowledgeBase = z.infer; diff --git a/src/schema/schemas/primitives/online-eval-config.ts b/src/schema/schemas/primitives/online-eval-config.ts index 5b6f13cb6..d95bde8d9 100644 --- a/src/schema/schemas/primitives/online-eval-config.ts +++ b/src/schema/schemas/primitives/online-eval-config.ts @@ -14,21 +14,65 @@ export const OnlineEvalConfigNameSchema = z 'Must begin with a letter and contain only alphanumeric characters and underscores (max 48 chars)' ); -export const OnlineEvalConfigSchema = z.object({ - name: OnlineEvalConfigNameSchema, - /** Agent name to monitor (must match a project agent) */ - agent: z.string().min(1, 'Agent name is required'), - /** Optional runtime endpoint name to scope monitoring to a specific endpoint */ - endpoint: z.string().min(1).optional(), - /** Evaluator names (custom), Builtin.* IDs, or evaluator ARNs */ - evaluators: z.array(z.string().min(1)).min(1, 'At least one evaluator is required'), - /** Sampling rate as a percentage (0.01 to 100) */ - samplingRate: z.number().min(0.01).max(100), - /** Optional description for the online eval config */ - description: z.string().max(200).optional(), - /** Whether to enable execution on create (default: true) */ - enableOnCreate: z.boolean().optional(), - tags: TagsSchema.optional(), +export const ClusteringConfigSchema = z.object({ + frequencies: z + .array(z.enum(['DAILY', 'WEEKLY', 'MONTHLY'])) + .min(1) + .max(3), }); +export type ClusteringConfig = z.infer; + +export const OnlineEvalConfigSchema = z + .object({ + name: OnlineEvalConfigNameSchema, + /** Agent name to monitor (must match a project agent). Required when using managed AgentCore agents. */ + agent: z.string().min(1, 'Agent name is required').optional(), + /** Optional runtime endpoint name to scope monitoring to a specific endpoint */ + endpoint: z.string().min(1).optional(), + /** CloudWatch log group names for custom/3rd-party agents (1-5 entries) */ + logGroupNames: z.array(z.string().min(1)).min(1).max(5).optional(), + /** Service names to filter traces for custom/3rd-party agents */ + serviceNames: z.array(z.string().min(1)).min(1).optional(), + /** Evaluator names (custom), Builtin.* IDs, or evaluator ARNs */ + evaluators: z.array(z.string().min(1)).optional(), + /** Insight IDs for continuous analysis */ + insights: z.array(z.string().min(1)).optional(), + /** Clustering configuration (requires insights) */ + clusteringConfig: ClusteringConfigSchema.optional(), + /** Sampling rate as a percentage (0.01 to 100) */ + samplingRate: z.number().min(0.01).max(100), + /** Optional description for the online eval config */ + description: z.string().max(200).optional(), + /** Whether to enable execution on create (default: true) */ + enableOnCreate: z.boolean().optional(), + tags: TagsSchema.optional(), + }) + .refine(data => data.agent ?? data.logGroupNames, { + message: 'Either "agent" or "logGroupNames" must be provided', + }) + .refine(data => !(data.agent && data.logGroupNames), { + message: '"agent" and "logGroupNames" are mutually exclusive', + }) + .refine(data => !data.endpoint || data.agent, { + message: '"endpoint" requires "agent"', + }) + .refine(data => !data.serviceNames || data.logGroupNames, { + message: '"serviceNames" requires "logGroupNames"', + }) + .refine( + data => { + const hasEvaluators = data.evaluators != null && data.evaluators.length > 0; + const hasInsights = data.insights != null && data.insights.length > 0; + return hasEvaluators || hasInsights; + }, + { message: 'At least one of evaluators or insights must be provided' } + ) + .refine(data => !(data.evaluators && data.evaluators.length > 0 && data.insights && data.insights.length > 0), { + message: 'Cannot have both evaluators and insights (preview constraint)', + }) + .refine(data => !data.clusteringConfig || (data.insights && data.insights.length > 0), { + message: 'clusteringConfig requires insights to be set', + }); + export type OnlineEvalConfig = z.infer; diff --git a/src/schema/schemas/primitives/policy.ts b/src/schema/schemas/primitives/policy.ts index e80385f94..4c47d86a1 100644 --- a/src/schema/schemas/primitives/policy.ts +++ b/src/schema/schemas/primitives/policy.ts @@ -47,6 +47,12 @@ export const PolicyNameSchema = z export const ValidationModeSchema = z.enum(['FAIL_ON_ANY_FINDINGS', 'IGNORE_ALL_FINDINGS']); export type ValidationMode = z.infer; +export const AuthorizationPhaseSchema = z.enum(['INITIATE', 'RETURN_OUTPUT']).default('INITIATE'); +export type AuthorizationPhase = z.infer; + +export const EnforcementModeSchema = z.enum(['ACTIVE', 'LOG_ONLY']).default('ACTIVE'); +export type EnforcementMode = z.infer; + // ============================================================================ // Policy Schema // ============================================================================ @@ -57,6 +63,8 @@ export const PolicySchema = z.object({ statement: z.string().min(1, 'Cedar policy statement is required'), sourceFile: z.string().optional(), validationMode: ValidationModeSchema.default('FAIL_ON_ANY_FINDINGS'), + enforcementMode: EnforcementModeSchema.default('ACTIVE'), + authorizationPhase: AuthorizationPhaseSchema.optional(), }); export type Policy = z.infer; From e50afbc3279da80e9fe643b754d44cd1f893bc0e Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:45:27 +0000 Subject: [PATCH 02/11] fix(ci): avoid caching on non-existent package-lock --- .github/workflows/lint.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 53c7c2ca6..1c9f84ea5 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -29,7 +29,7 @@ jobs: uses: actions/cache/save@v5 with: path: node_modules - key: node-modules-${{ hashFiles('package-lock.json') }} + key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} format: needs: setup @@ -42,7 +42,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('package-lock.json') }} + key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} - run: npm run format:check lint: @@ -56,7 +56,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('package-lock.json') }} + key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} - run: npm run lint security: @@ -70,7 +70,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('package-lock.json') }} + key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} - run: npm run security:audit secrets: @@ -84,7 +84,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('package-lock.json') }} + key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} - run: npm run secrets:check typecheck: @@ -98,7 +98,7 @@ jobs: - uses: actions/cache/restore@v5 with: path: node_modules - key: node-modules-${{ hashFiles('package-lock.json') }} + key: node-modules-${{ hashFiles('npm-shrinkwrap.json') }} - run: npm run typecheck schema-check: From 76869b698ef54ae6fd2dee81722fe5763fd1095d Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:49:10 +0000 Subject: [PATCH 03/11] fix(ci): avoid changes to e2e test script --- .github/workflows/e2e-tests.yml | 104 +++++++++++++++++++++++++------- 1 file changed, 83 insertions(+), 21 deletions(-) diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 2d13211d0..4e8edb4aa 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -5,6 +5,9 @@ on: aws_region: description: 'AWS region for deployment' default: 'us-east-1' + cdk_branch: + description: 'CDK repo branch to build from (default: main)' + default: 'main' pull_request_target: branches: [main, feat/**] @@ -47,6 +50,8 @@ jobs: runs-on: ubuntu-latest environment: e2e-testing timeout-minutes: 30 + env: + AGENTCORE_TELEMETRY_DISABLED: '1' steps: - uses: actions/checkout@v6 with: @@ -70,7 +75,7 @@ jobs: id: aws run: echo "account_id=$(aws sts get-caller-identity --query Account --output text)" >> "$GITHUB_OUTPUT" - name: Get API keys from Secrets Manager - uses: aws-actions/aws-secretsmanager-get-secrets@v3 + uses: aws-actions/aws-secretsmanager-get-secrets@v2 with: secret-ids: | E2E,${{ secrets.E2E_SECRET_ARN }} @@ -78,45 +83,102 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v3 + uses: actions/create-github-app-token@v1 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} owner: aws - - name: Build CDK package from main + # Clone CDK repo for bundle script (requires App token for private repo access) + - name: Clone CDK repo run: | - git clone --depth 1 "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo - cd /tmp/cdk-repo - npm ci - npm run build - TARBALL=$(npm pack --pack-destination "$RUNNER_TEMP" | tail -1) - echo "CDK_TARBALL=$RUNNER_TEMP/$TARBALL" >> "$GITHUB_ENV" + CDK_BRANCH="${{ inputs.cdk_branch || 'main' }}" + echo "Cloning CDK from branch: $CDK_BRANCH" + git clone --depth 1 --branch "$CDK_BRANCH" "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo env: CDK_REPO_TOKEN: ${{ steps.app-token.outputs.token }} CDK_REPO: ${{ secrets.CDK_REPO_NAME }} - run: npm ci - - run: npm run build - - name: Install CLI globally - run: npm install -g "$(npm pack | tail -1)" + + - name: Bundle GA and preview tarballs + run: | + npm run bundle + GA_TARBALL=$(ls aws-agentcore-*.tgz | grep -v preview | head -1) + PREVIEW_TARBALL=$(ls aws-agentcore-*-preview-*.tgz | head -1) + echo "GA_TARBALL=$PWD/$GA_TARBALL" >> "$GITHUB_ENV" + echo "PREVIEW_TARBALL=$PWD/$PREVIEW_TARBALL" >> "$GITHUB_ENV" + env: + AGENTCORE_CDK_PATH: /tmp/cdk-repo + + - name: Install GA CLI globally + run: npm install -g "$GA_TARBALL" - name: Detect changed e2e test files id: changed run: | BASE_SHA=${{ github.event.pull_request.base.sha || 'HEAD~1' }} - CHANGED=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/*.test.ts' \ - | grep -v '^e2e-tests/strands-bedrock\.test\.ts$' \ - | tr '\n' ' ') - echo "extra_tests=$CHANGED" >> "$GITHUB_OUTPUT" - echo "Changed e2e tests: ${CHANGED:-none}" + # If any helper file changed, run all e2e tests + HELPERS_CHANGED=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/*.ts' \ + | grep -v '\.test\.ts$' | head -1) + if [ -n "$HELPERS_CHANGED" ]; then + GA_EXTRA=$(find e2e-tests -name '*.test.ts' \ + | grep -v '^e2e-tests/strands-bedrock\.test\.ts$' \ + | grep -v '^e2e-tests/payment-strands-bedrock\.test\.ts$' \ + | grep -v '^e2e-tests/harness-' \ + | tr '\n' ' ') + HARNESS_EXTRA=$(find e2e-tests -name 'harness-*.test.ts' \ + | grep -v '^e2e-tests/harness-bedrock\.test\.ts$' \ + | tr '\n' ' ') + else + GA_EXTRA=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/*.test.ts' \ + | grep -v '^e2e-tests/strands-bedrock\.test\.ts$' \ + | grep -v '^e2e-tests/payment-strands-bedrock\.test\.ts$' \ + | grep -v '^e2e-tests/harness-' \ + | tr '\n' ' ') + HARNESS_EXTRA=$(git diff --name-only "$BASE_SHA"..HEAD -- 'e2e-tests/harness-*.test.ts' \ + | grep -v '^e2e-tests/harness-bedrock\.test\.ts$' \ + | tr '\n' ' ') + fi + echo "ga_extra=$GA_EXTRA" >> "$GITHUB_OUTPUT" + echo "harness_extra=$HARNESS_EXTRA" >> "$GITHUB_OUTPUT" + echo "GA extra tests: ${GA_EXTRA:-none}" + echo "Harness extra tests: ${HARNESS_EXTRA:-none}" + + - name: Run E2E tests (GA) + env: + AWS_ACCOUNT_ID: ${{ steps.aws.outputs.account_id }} + AWS_REGION: ${{ inputs.aws_region || 'us-east-1' }} + ANTHROPIC_API_KEY: ${{ env.E2E_ANTHROPIC_API_KEY }} + OPENAI_API_KEY: ${{ env.E2E_OPENAI_API_KEY }} + GEMINI_API_KEY: ${{ env.E2E_GEMINI_API_KEY }} + E2E_EFS_ACCESS_POINT_ARN: ${{ env.E2E_EFS_ACCESS_POINT_ARN }} + E2E_S3_ACCESS_POINT_ARN: ${{ env.E2E_S3_ACCESS_POINT_ARN }} + E2E_FILESYSTEM_SUBNET_ID: ${{ env.E2E_FILESYSTEM_SUBNET_ID }} + E2E_FILESYSTEM_SECURITY_GROUP_ID: ${{ env.E2E_FILESYSTEM_SECURITY_GROUP_ID }} + # CoinbaseCDP testnet creds for payment-strands-bedrock.test.ts. Sourced from + # the same E2E secret (keys CDP_API_KEY_ID / CDP_API_KEY_SECRET / CDP_WALLET_SECRET), + # which parse-json-secrets surfaces as E2E_CDP_*; remapped here to the unprefixed + # names the test reads. Absent on forks -> test self-skips via its hasCdpCreds gate. + CDP_API_KEY_ID: ${{ env.E2E_CDP_API_KEY_ID }} + CDP_API_KEY_SECRET: ${{ env.E2E_CDP_API_KEY_SECRET }} + CDP_WALLET_SECRET: ${{ env.E2E_CDP_WALLET_SECRET }} + run: + npx vitest run --project e2e e2e-tests/strands-bedrock.test.ts e2e-tests/payment-strands-bedrock.test.ts ${{ + steps.changed.outputs.ga_extra }} + + - name: Install preview CLI globally + run: npm install -g "$PREVIEW_TARBALL" - - name: Run E2E tests + - name: Run E2E tests (preview/harness) env: AWS_ACCOUNT_ID: ${{ steps.aws.outputs.account_id }} AWS_REGION: ${{ inputs.aws_region || 'us-east-1' }} ANTHROPIC_API_KEY: ${{ env.E2E_ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ env.E2E_OPENAI_API_KEY }} GEMINI_API_KEY: ${{ env.E2E_GEMINI_API_KEY }} - CDK_TARBALL: ${{ env.CDK_TARBALL }} - # Always run strands-bedrock as baseline, plus any e2e test files changed in the PR - run: npx vitest run --project e2e e2e-tests/strands-bedrock.test.ts ${{ steps.changed.outputs.extra_tests }} + E2E_EFS_ACCESS_POINT_ARN: ${{ env.E2E_EFS_ACCESS_POINT_ARN }} + E2E_S3_ACCESS_POINT_ARN: ${{ env.E2E_S3_ACCESS_POINT_ARN }} + E2E_FILESYSTEM_SUBNET_ID: ${{ env.E2E_FILESYSTEM_SUBNET_ID }} + E2E_FILESYSTEM_SECURITY_GROUP_ID: ${{ env.E2E_FILESYSTEM_SECURITY_GROUP_ID }} + BUILD_PREVIEW: '1' + run: npx vitest run --project e2e e2e-tests/harness-bedrock.test.ts ${{ steps.changed.outputs.harness_extra }} From b8c693781405f8b9fa8a71dfe9198a4693ced61a Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:52:13 +0000 Subject: [PATCH 04/11] fix(ci): avoid changes to pr-tarball --- .github/workflows/pr-tarball.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr-tarball.yml b/.github/workflows/pr-tarball.yml index 5da53aa4a..8357370e9 100644 --- a/.github/workflows/pr-tarball.yml +++ b/.github/workflows/pr-tarball.yml @@ -90,5 +90,6 @@ jobs: ### How to install ```bash - npm install ${{ steps.release.outputs.url }} + gh release download pr-${{ github.event.pull_request.number }}-tarball --repo ${{ github.repository }} --pattern "*.tgz" --dir /tmp/pr-tarball + npm install -g /tmp/pr-tarball/${{ steps.tarball.outputs.name }} ``` From 2e361be97862e9d9224df3a15c7e084e3d4b541e Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:52:43 +0000 Subject: [PATCH 05/11] fix(ci): avoid changes to release workflow --- .../workflows/release-main-and-preview.yml | 403 +++++++----------- .github/workflows/release.yml | 79 ++-- 2 files changed, 207 insertions(+), 275 deletions(-) diff --git a/.github/workflows/release-main-and-preview.yml b/.github/workflows/release-main-and-preview.yml index 751a5167f..6a99f94f0 100644 --- a/.github/workflows/release-main-and-preview.yml +++ b/.github/workflows/release-main-and-preview.yml @@ -1,10 +1,18 @@ -name: Release Both (Main + Preview) +name: Release Main and Preview on: workflow_dispatch: inputs: + release_target: + description: 'What to release' + required: true + type: choice + options: + - both + - main-only + - preview-only main_bump_type: - description: 'Main branch version bump' + description: 'Main version bump (ignored for preview-only)' required: true type: choice options: @@ -12,11 +20,13 @@ on: - minor - major preview_bump_type: - description: 'Preview branch version bump (prerelease with preview tag)' + description: 'Preview version bump (ignored for main-only)' required: true type: choice options: - prerelease + - minor + - major main_changelog: description: 'Main changelog entry (optional)' required: false @@ -26,7 +36,7 @@ on: required: false type: string dry_run: - description: 'Dry run — create PRs but skip npm publish' + description: 'Dry run — create PR but skip npm publish' required: false type: boolean default: false @@ -35,64 +45,27 @@ permissions: contents: write pull-requests: write -jobs: - # ═══════════════════════════════════════════════════════════════════ - # Preflight — verify preview contains all of main - # ═══════════════════════════════════════════════════════════════════ - preflight: - name: Preflight Checks - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - - name: Verify running from main - run: | - if [[ "${{ github.ref }}" != "refs/heads/main" ]]; then - echo "❌ This workflow must be run from the main branch." - exit 1 - fi - - - name: Verify preview contains all of main - run: | - git fetch origin preview - MAIN_SHA=$(git rev-parse HEAD) - MERGE_BASE=$(git merge-base HEAD origin/preview) - - if [[ "$MAIN_SHA" != "$MERGE_BASE" ]]; then - echo "❌ preview branch does not contain all of main." - echo "" - echo "Main HEAD: $MAIN_SHA" - echo "Merge base: $MERGE_BASE" - echo "" - echo "The sync-preview workflow should have merged automatically." - echo "If it failed due to conflicts, resolve manually:" - echo " git checkout preview && git merge main && git push origin preview" - echo "" - echo "Then re-run this workflow." - exit 1 - fi - - echo "✅ preview contains all of main" +env: + AGENTCORE_TELEMETRY_DISABLED: '1' +jobs: # ═══════════════════════════════════════════════════════════════════ - # Step 1 — Prepare main release (bump, PR) + # Step 1 — Prepare release (bump both versions, single PR) # ═══════════════════════════════════════════════════════════════════ - prepare-main: - name: Prepare Main Release - needs: preflight + prepare-release: + name: Prepare Release runs-on: ubuntu-latest outputs: - version: ${{ steps.bump.outputs.version }} - branch: ${{ steps.bump.outputs.branch }} + main_version: ${{ steps.bump-main.outputs.version || steps.current-main.outputs.version }} + preview_version: ${{ steps.bump-preview.outputs.version }} + branch: ${{ steps.create-pr.outputs.branch }} + release_target: ${{ github.event.inputs.release_target }} steps: - name: Checkout main uses: actions/checkout@v6 with: - ref: main + ref: ${{ github.ref_name }} fetch-depth: 0 - uses: actions/setup-node@v6 @@ -109,8 +82,9 @@ jobs: - run: npm ci - - name: Bump version - id: bump + - name: Bump main version + id: bump-main + if: inputs.release_target != 'preview-only' env: BUMP_TYPE: ${{ github.event.inputs.main_bump_type }} CHANGELOG_INPUT: ${{ github.event.inputs.main_changelog }} @@ -123,98 +97,42 @@ jobs: NEW_VERSION=$(node -p "require('./package.json').version") echo "version=$NEW_VERSION" >> $GITHUB_OUTPUT - echo "branch=release/v$NEW_VERSION" >> $GITHUB_OUTPUT echo "📦 Main version: $NEW_VERSION" - - name: Regenerate JSON schema - run: | - npm run build - node scripts/generate-schema.mjs - npx prettier --write schemas/ - - - name: Update snapshots - run: npm run test:update-snapshots - - - name: Generate GitHub App Token - id: app-token - uses: actions/create-github-app-token@v3 - with: - app-id: ${{ vars.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} - - - name: Create release branch and PR - env: - GH_TOKEN: ${{ steps.app-token.outputs.token }} - NEW_VERSION: ${{ steps.bump.outputs.version }} - run: | - BRANCH_NAME="release/v$NEW_VERSION" - git ls-remote --exit-code --heads origin $BRANCH_NAME && git push origin --delete $BRANCH_NAME || true - git show-ref --verify --quiet refs/heads/$BRANCH_NAME && git branch -D $BRANCH_NAME || true - - git checkout -b $BRANCH_NAME - git add -A - git commit -m "chore: bump version to $NEW_VERSION" - git push origin $BRANCH_NAME - - gh pr create \ - --base main \ - --head "$BRANCH_NAME" \ - --title "Release v$NEW_VERSION" \ - --body "## Release v$NEW_VERSION (main) - - Part of a coordinated main + preview release. - - ### Checklist - - [ ] Review CHANGELOG.md - - [ ] All CI checks passing - - [ ] Merge this PR before approving the publish step" - - # ═══════════════════════════════════════════════════════════════════ - # Step 2 — Prepare preview release (bump, PR) - # ═══════════════════════════════════════════════════════════════════ - prepare-preview: - name: Prepare Preview Release - needs: preflight - runs-on: ubuntu-latest - outputs: - version: ${{ steps.bump.outputs.version }} - branch: ${{ steps.bump.outputs.branch }} - - steps: - - name: Checkout preview - uses: actions/checkout@v6 - with: - ref: preview - fetch-depth: 0 - - - uses: actions/setup-node@v6 - with: - node-version: 20.x - - - name: Install uv - uses: astral-sh/setup-uv@v7 - - - name: Configure git + - name: Output current main version (preview-only) + id: current-main + if: inputs.release_target == 'preview-only' run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - - - run: npm ci + echo "version=$(node -p "require('./package.json').version")" >> $GITHUB_OUTPUT - - name: Bump version - id: bump + - name: Bump preview version + id: bump-preview + if: inputs.release_target != 'main-only' env: - CHANGELOG_INPUT: ${{ github.event.inputs.preview_changelog }} + BUMP_TYPE: ${{ github.event.inputs.preview_bump_type }} run: | - BUMP_CMD="npx tsx scripts/bump-version.ts prerelease --prerelease-tag preview" - if [ -n "$CHANGELOG_INPUT" ]; then - BUMP_CMD="$BUMP_CMD --changelog \"$CHANGELOG_INPUT\"" - fi - eval $BUMP_CMD + CURRENT_VERSION=$(node -p "require('./preview-version.json').version") + echo "Current preview version: $CURRENT_VERSION" + + NEW_VERSION=$(node -e " + const current = require('./preview-version.json').version; + const bumpType = process.env.BUMP_TYPE; + const parts = current.match(/^(\d+)\.(\d+)\.(\d+)(?:-preview\.(\d+))?$/); + if (!parts) { console.error('Cannot parse version:', current); process.exit(1); } + let [, major, minor, patch, pre] = parts.map((v, i) => i > 0 && i < 5 ? parseInt(v || '0') : v); + if (bumpType === 'major') { major++; minor = 0; patch = 0; pre = 1; } + else if (bumpType === 'minor') { minor++; patch = 0; pre = 1; } + else { pre = (pre || 0) + 1; } + console.log(major + '.' + minor + '.' + patch + '-preview.' + pre); + ") + + node -e " + const fs = require('fs'); + const data = { version: '$NEW_VERSION' }; + fs.writeFileSync('preview-version.json', JSON.stringify(data, null, 2) + '\n'); + " - NEW_VERSION=$(node -p "require('./package.json').version") echo "version=$NEW_VERSION" >> $GITHUB_OUTPUT - echo "branch=release/v$NEW_VERSION" >> $GITHUB_OUTPUT echo "📦 Preview version: $NEW_VERSION" - name: Regenerate JSON schema @@ -228,158 +146,144 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v3 + uses: actions/create-github-app-token@v1 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} - name: Create release branch and PR + id: create-pr env: GH_TOKEN: ${{ steps.app-token.outputs.token }} - NEW_VERSION: ${{ steps.bump.outputs.version }} + MAIN_VERSION: ${{ steps.bump-main.outputs.version || steps.current-main.outputs.version }} + PREVIEW_VERSION: ${{ steps.bump-preview.outputs.version }} + RELEASE_TARGET: ${{ github.event.inputs.release_target }} run: | - BRANCH_NAME="release/v$NEW_VERSION" - git ls-remote --exit-code --heads origin $BRANCH_NAME && git push origin --delete $BRANCH_NAME || true - git show-ref --verify --quiet refs/heads/$BRANCH_NAME && git branch -D $BRANCH_NAME || true + # Build branch name based on what we're releasing + if [ "$RELEASE_TARGET" = "main-only" ]; then + BRANCH_NAME="release/v${MAIN_VERSION}" + TITLE="Release v$MAIN_VERSION" + COMMIT_MSG="chore: bump main to $MAIN_VERSION" + elif [ "$RELEASE_TARGET" = "preview-only" ]; then + BRANCH_NAME="release/preview-v${PREVIEW_VERSION}" + TITLE="Release preview v$PREVIEW_VERSION" + COMMIT_MSG="chore: bump preview to $PREVIEW_VERSION" + else + BRANCH_NAME="release/v${MAIN_VERSION}+preview.${PREVIEW_VERSION}" + TITLE="Release v$MAIN_VERSION + preview v$PREVIEW_VERSION" + COMMIT_MSG="chore: bump main to $MAIN_VERSION, preview to $PREVIEW_VERSION" + fi + echo "branch=$BRANCH_NAME" >> $GITHUB_OUTPUT + + git ls-remote --exit-code --heads origin "$BRANCH_NAME" && git push origin --delete "$BRANCH_NAME" || true + git show-ref --verify --quiet "refs/heads/$BRANCH_NAME" && git branch -D "$BRANCH_NAME" || true - git checkout -b $BRANCH_NAME + git checkout -b "$BRANCH_NAME" git add -A - git commit -m "chore: bump version to $NEW_VERSION" - git push origin $BRANCH_NAME + git commit -m "$COMMIT_MSG" + git push origin "$BRANCH_NAME" - gh pr create \ - --base preview \ - --head "$BRANCH_NAME" \ - --title "Release v$NEW_VERSION (preview)" \ - --body "## Release v$NEW_VERSION (preview) + # Build PR body + BODY="## $TITLE - Part of a coordinated main + preview release. + | Package | Version | npm Tag | + |---------|---------|---------|" + if [ "$RELEASE_TARGET" != "preview-only" ]; then + BODY="$BODY + | @aws/agentcore | $MAIN_VERSION | latest |" + fi + if [ "$RELEASE_TARGET" != "main-only" ]; then + BODY="$BODY + | @aws/agentcore | $PREVIEW_VERSION | preview |" + fi + BODY="$BODY ### Checklist - [ ] Review CHANGELOG.md - [ ] All CI checks passing - [ ] Merge this PR before approving the publish step" - # ═══════════════════════════════════════════════════════════════════ - # Step 3 — Build and test both - # ═══════════════════════════════════════════════════════════════════ - test-main: - name: Test Main - needs: prepare-main - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - with: - ref: release/v${{ needs.prepare-main.outputs.version }} - - uses: actions/setup-node@v6 - with: - node-version: 20.x - - name: Configure git - run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - - run: curl -LsSf https://astral.sh/uv/install.sh | sh - - run: npm ci - - run: npm run lint - - run: npm run typecheck - - run: npm run build - - run: npm run test:unit - - test-preview: - name: Test Preview - needs: prepare-preview - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - with: - ref: release/v${{ needs.prepare-preview.outputs.version }} - - uses: actions/setup-node@v6 - with: - node-version: 20.x - - name: Configure git - run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - - run: curl -LsSf https://astral.sh/uv/install.sh | sh - - run: npm ci - - run: npm run lint - - run: npm run typecheck - - run: npm run build - - run: npm run test:unit + gh pr create \ + --base "${{ github.ref_name }}" \ + --head "$BRANCH_NAME" \ + --label release \ + --title "$TITLE" \ + --body "$BODY" # ═══════════════════════════════════════════════════════════════════ - # Step 4 — Manual approval gate + # Step 2 — Manual approval gate # ═══════════════════════════════════════════════════════════════════ release-approval: - name: Release Approval (Both) - needs: [test-main, test-preview, prepare-main, prepare-preview] + name: Release Approval + needs: [prepare-release] runs-on: ubuntu-latest environment: name: npm-publish-approval steps: - name: Approval checkpoint env: - MAIN_VERSION: ${{ needs.prepare-main.outputs.version }} - PREVIEW_VERSION: ${{ needs.prepare-preview.outputs.version }} + MAIN_VERSION: ${{ needs.prepare-release.outputs.main_version }} + PREVIEW_VERSION: ${{ needs.prepare-release.outputs.preview_version }} run: | - echo "✅ Both builds and tests passed" - echo "" echo "📦 Main version: $MAIN_VERSION (npm tag: latest)" echo "📦 Preview version: $PREVIEW_VERSION (npm tag: preview)" echo "" echo "⚠️ MANUAL APPROVAL REQUIRED" echo "" echo "Before approving:" - echo "1. Merge the main release PR (release/v$MAIN_VERSION → main)" - echo "2. Merge the preview release PR (release/v$PREVIEW_VERSION → preview)" - echo "3. Verify both PRs are merged" + echo "1. Verify PR CI checks are passing" + echo "2. Merge the release PR to main" + echo "3. Verify the PR is merged" # ═══════════════════════════════════════════════════════════════════ - # Step 5 — Verify both PRs merged before any publish + # Step 3 — Verify PR merged # ═══════════════════════════════════════════════════════════════════ - verify-merges: - name: Verify Both PRs Merged - needs: [prepare-main, prepare-preview, release-approval] + verify-merge: + name: Verify PR Merged + needs: [prepare-release, release-approval] if: ${{ !inputs.dry_run }} runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v6 with: + ref: ${{ github.ref_name }} fetch-depth: 0 - name: Verify main version + if: needs.prepare-release.outputs.release_target != 'preview-only' env: - EXPECTED: ${{ needs.prepare-main.outputs.version }} + EXPECTED: ${{ needs.prepare-release.outputs.main_version }} run: | - git fetch origin main - ACTUAL=$(git show origin/main:package.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") + git fetch origin ${{ github.ref_name }} + ACTUAL=$(git show origin/${{ github.ref_name }}:package.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") if [ "$ACTUAL" != "$EXPECTED" ]; then - echo "❌ Main release PR not merged yet!" - echo "Expected: $EXPECTED, Got: $ACTUAL" + echo "❌ Release PR not merged yet!" + echo "Expected main version: $EXPECTED, Got: $ACTUAL" exit 1 fi echo "✅ Main version verified: $ACTUAL" - name: Verify preview version + if: needs.prepare-release.outputs.release_target != 'main-only' env: - EXPECTED: ${{ needs.prepare-preview.outputs.version }} + EXPECTED: ${{ needs.prepare-release.outputs.preview_version }} run: | - git fetch origin preview - ACTUAL=$(git show origin/preview:package.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") + ACTUAL=$(git show origin/${{ github.ref_name }}:preview-version.json | node -p "JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).version") if [ "$ACTUAL" != "$EXPECTED" ]; then - echo "❌ Preview release PR not merged yet!" - echo "Expected: $EXPECTED, Got: $ACTUAL" + echo "❌ Release PR not merged yet!" + echo "Expected preview version: $EXPECTED, Got: $ACTUAL" exit 1 fi echo "✅ Preview version verified: $ACTUAL" # ═══════════════════════════════════════════════════════════════════ - # Step 6a — Publish main to npm (tag: latest) + # Step 4a — Publish main to npm (tag: latest) # ═══════════════════════════════════════════════════════════════════ publish-main: name: Publish Main (@latest) - needs: [prepare-main, verify-merges] + needs: [prepare-release, verify-merge] + if: inputs.release_target != 'preview-only' runs-on: ubuntu-latest environment: name: npm-publish @@ -392,7 +296,7 @@ jobs: - name: Checkout main uses: actions/checkout@v6 with: - ref: main + ref: ${{ github.ref_name }} fetch-depth: 0 - uses: actions/setup-node@v6 @@ -409,7 +313,7 @@ jobs: - name: Tag and release env: - VERSION: ${{ needs.prepare-main.outputs.version }} + VERSION: ${{ needs.prepare-release.outputs.main_version }} run: | git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" @@ -419,23 +323,24 @@ jobs: - name: Create GitHub Release uses: softprops/action-gh-release@v3 with: - tag_name: v${{ needs.prepare-main.outputs.version }} - name: AgentCore CLI v${{ needs.prepare-main.outputs.version }} + tag_name: v${{ needs.prepare-release.outputs.main_version }} + name: AgentCore CLI v${{ needs.prepare-release.outputs.main_version }} generate_release_notes: true prerelease: false body: | ## Installation ```bash - npm install -g @aws/agentcore@${{ needs.prepare-main.outputs.version }} + npm install -g @aws/agentcore@${{ needs.prepare-release.outputs.main_version }} ``` # ═══════════════════════════════════════════════════════════════════ - # Step 6b — Publish preview to npm (tag: preview) + # Step 4b — Publish preview to npm (tag: preview) # ═══════════════════════════════════════════════════════════════════ publish-preview: name: Publish Preview (@preview) - needs: [prepare-preview, verify-merges] + needs: [prepare-release, verify-merge] + if: inputs.release_target != 'main-only' runs-on: ubuntu-latest environment: name: npm-publish @@ -445,10 +350,10 @@ jobs: contents: write steps: - - name: Checkout preview + - name: Checkout main uses: actions/checkout@v6 with: - ref: preview + ref: ${{ github.ref_name }} fetch-depth: 0 - uses: actions/setup-node@v6 @@ -458,14 +363,30 @@ jobs: - run: npm install -g npm@11.5.1 - run: npm ci - - run: npm run build + + - name: Set preview version in package.json + env: + VERSION: ${{ needs.prepare-release.outputs.preview_version }} + run: | + node -e " + const fs = require('fs'); + const pkg = JSON.parse(fs.readFileSync('package.json', 'utf8')); + pkg.version = process.env.VERSION; + fs.writeFileSync('package.json', JSON.stringify(pkg, null, 2) + '\n'); + " + echo "Set package.json version to $VERSION for preview publish" + + - name: Build package + env: + BUILD_PREVIEW: '1' + run: npm run build - name: Publish to npm run: npm publish --access public --provenance --tag preview - name: Tag and release env: - VERSION: ${{ needs.prepare-preview.outputs.version }} + VERSION: ${{ needs.prepare-release.outputs.preview_version }} run: | git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" @@ -475,8 +396,8 @@ jobs: - name: Create GitHub Release uses: softprops/action-gh-release@v3 with: - tag_name: v${{ needs.prepare-preview.outputs.version }} - name: AgentCore CLI v${{ needs.prepare-preview.outputs.version }} (Preview) + tag_name: v${{ needs.prepare-release.outputs.preview_version }} + name: AgentCore CLI v${{ needs.prepare-release.outputs.preview_version }} (Preview) generate_release_notes: true prerelease: true body: | @@ -491,14 +412,14 @@ jobs: # ═══════════════════════════════════════════════════════════════════ summary: name: Release Summary - needs: [prepare-main, prepare-preview, publish-main, publish-preview] - if: always() + needs: [prepare-release, publish-main, publish-preview] + if: always() && !cancelled() runs-on: ubuntu-latest steps: - name: Summary env: - MAIN_VERSION: ${{ needs.prepare-main.outputs.version }} - PREVIEW_VERSION: ${{ needs.prepare-preview.outputs.version }} + MAIN_VERSION: ${{ needs.prepare-release.outputs.main_version }} + PREVIEW_VERSION: ${{ needs.prepare-release.outputs.preview_version }} MAIN_STATUS: ${{ needs.publish-main.result }} PREVIEW_STATUS: ${{ needs.publish-preview.result }} run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a7bfb6416..9984ad891 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,8 +8,6 @@ on: required: true type: choice options: - - preview - - preview-major - patch - minor - major @@ -35,13 +33,28 @@ jobs: outputs: version: ${{ steps.bump.outputs.version }} branch: ${{ steps.bump.outputs.branch }} + dist_tag: ${{ steps.release-meta.outputs.dist_tag }} + base_branch: ${{ steps.release-meta.outputs.base_branch }} steps: - - name: Validate running from main + - name: Determine release metadata + id: release-meta run: | - if [[ "${{ github.ref }}" != "refs/heads/main" ]]; then - echo "⚠️ WARNING: Running from ${{ github.ref }}" - echo "⚠️ Production releases should only run from main branch" + BRANCH_NAME="${{ github.ref_name }}" + VERSION_BUMP="${{ github.event.inputs.bump_type }}" + + if [[ "$BRANCH_NAME" == "main" ]]; then + echo "dist_tag=latest" >> $GITHUB_OUTPUT + echo "base_branch=main" >> $GITHUB_OUTPUT + else + if [[ "$VERSION_BUMP" != "prerelease" ]]; then + echo "❌ ERROR: Only the prerelease bump type is allowed from non-main branches." + echo "Current branch: $BRANCH_NAME, bump type: $VERSION_BUMP" + exit 1 + fi + echo "dist_tag=preview" >> $GITHUB_OUTPUT + echo "base_branch=$BRANCH_NAME" >> $GITHUB_OUTPUT + echo "ℹ️ Publishing preview release from branch: $BRANCH_NAME" fi - name: Checkout code @@ -149,7 +162,7 @@ jobs: - name: Generate GitHub App Token id: app-token - uses: actions/create-github-app-token@v3 + uses: actions/create-github-app-token@v1 with: app-id: ${{ vars.APP_ID }} private-key: ${{ secrets.APP_PRIVATE_KEY }} @@ -158,46 +171,40 @@ jobs: env: GH_TOKEN: ${{ steps.app-token.outputs.token }} NEW_VERSION: ${{ steps.bump.outputs.version }} - GITHUB_REF: ${{ github.ref }} + BASE_BRANCH: ${{ steps.release-meta.outputs.base_branch }} + DIST_TAG: ${{ steps.release-meta.outputs.dist_tag }} GITHUB_ACTOR: ${{ github.actor }} run: | BRANCH_NAME="release/v$NEW_VERSION" - WARNING_TEXT="" - if [ "$GITHUB_REF" != "refs/heads/main" ]; then - WARNING_TEXT="**WARNING**: Not running from main branch!" - else - WARNING_TEXT="✅ Running from main branch" + RELEASE_TYPE="Production" + if [ "$DIST_TAG" != "latest" ]; then + RELEASE_TYPE="Preview (npm tag: $DIST_TAG)" fi gh pr create \ - --base main \ + --base "$BASE_BRANCH" \ --head "$BRANCH_NAME" \ + --label release \ --title "Release v$NEW_VERSION" \ - --body "## 🚀 Release v$NEW_VERSION + --body "## Release v$NEW_VERSION This PR was automatically created by the release workflow. - ### ⚠️ Pre-merge Checklist + **Release type:** $RELEASE_TYPE + **Base branch:** $BASE_BRANCH + + ### Pre-merge Checklist - [ ] Review CHANGELOG.md - ensure it has meaningful release notes - [ ] Verify version numbers are correct in all files - [ ] All CI checks are passing - ### 📝 How to improve changelog - If the auto-generated changelog isn't good enough: - 1. Edit CHANGELOG.md in this PR - 2. Commit the changes - 3. Then approve and merge - - ### 🔄 Release Process + ### Release Process After merging this PR: 1. Package will be built and tested 2. **Manual approval required** before publishing to npm 3. GitHub release and tag created after publication - ### 🚨 Running from: $GITHUB_REF - $WARNING_TEXT - --- *Triggered by @$GITHUB_ACTOR*" @@ -292,7 +299,6 @@ jobs: name: Publish to npm needs: [prepare-release, release-approval] runs-on: ubuntu-latest - if: github.ref == 'refs/heads/main' environment: name: npm-publish url: https://www.npmjs.com/package/@aws/agentcore @@ -301,14 +307,15 @@ jobs: contents: write # Required to push git tags steps: - - name: Checkout latest main (AFTER PR merge) + - name: Checkout base branch (AFTER PR merge) uses: actions/checkout@v6 with: - ref: main + ref: ${{ needs.prepare-release.outputs.base_branch }} fetch-depth: 0 - name: Verify we have the merged code run: | + echo "Branch: ${{ needs.prepare-release.outputs.base_branch }}" echo "Current version in package.json:" cat package.json | grep '"version"' echo "" @@ -343,8 +350,9 @@ jobs: env: VERSION: ${{ steps.version.outputs.version }} EXPECTED_VERSION: ${{ needs.prepare-release.outputs.version }} + BASE_BRANCH: ${{ needs.prepare-release.outputs.base_branch }} run: | - echo "Version in main branch: $VERSION" + echo "Version in $BASE_BRANCH: $VERSION" echo "Expected version from PR: $EXPECTED_VERSION" if [ "$VERSION" != "$EXPECTED_VERSION" ]; then @@ -352,15 +360,15 @@ jobs: echo "❌ ERROR: Version mismatch!" echo "" echo "The release PR has NOT been merged yet." - echo "Main branch has: $VERSION" + echo "$BASE_BRANCH has: $VERSION" echo "Release PR has: $EXPECTED_VERSION" echo "" - echo "👉 Please MERGE the release PR first, then approve this deployment." + echo "Please MERGE the release PR first, then approve this deployment." echo "" exit 1 fi - echo "✅ Version matches - PR was merged correctly" + echo "Version matches - PR was merged correctly" - name: Install dependencies run: npm ci @@ -369,10 +377,13 @@ jobs: run: npm run build - name: Publish to npm (using OIDC trusted publishing) + env: + DIST_TAG: ${{ needs.prepare-release.outputs.dist_tag }} run: | echo "Publishing with OIDC trusted publishing..." echo "No NPM_TOKEN needed - using GitHub OIDC" - npm publish --access public --provenance --tag latest + echo "Dist tag: $DIST_TAG" + npm publish --access public --provenance --tag "$DIST_TAG" - name: Create and push tag env: From eadca11222b78c96b87f06eede5fa781820c16bf Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:53:36 +0000 Subject: [PATCH 06/11] fix(ci): remove duplicated slack issue notification pr --- .../workflows/slack-issue-notification.yml | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 .github/workflows/slack-issue-notification.yml diff --git a/.github/workflows/slack-issue-notification.yml b/.github/workflows/slack-issue-notification.yml deleted file mode 100644 index eca83b08b..000000000 --- a/.github/workflows/slack-issue-notification.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: Slack Issue Notification - -on: - issues: - types: [opened] - -permissions: {} - -jobs: - notify-slack: - runs-on: ubuntu-latest - steps: - - name: Send issue details to Slack - # Attacker-controlled fields are passed through env: rather than - # interpolated into the YAML payload, to prevent workflow injection. - # Schema is uniform across event types: every workflow sends the - # same 20 keys so Slack-side branching on event_type is reliable. - # For issue_opened, the issue_* fields carry the data and the - # pr_*/comment_* fields are empty. - env: - REPOSITORY: ${{ github.repository }} - CREATED_AT: ${{ github.event.issue.created_at }} - ISSUE_NUMBER: ${{ github.event.issue.number }} - ISSUE_TITLE: ${{ github.event.issue.title }} - ISSUE_URL: ${{ github.event.issue.html_url }} - ISSUE_AUTHOR: ${{ github.event.issue.user.login }} - ISSUE_BODY: ${{ github.event.issue.body }} - LABELS: ${{ join(github.event.issue.labels.*.name, ', ') }} - uses: slackapi/slack-github-action@v3.0.3 - with: - webhook: ${{ secrets.SLACK_WEBHOOK_URL }} - webhook-type: webhook-trigger - payload: | - event_type: "issue_opened" - repository: "${{ env.REPOSITORY }}" - created_at: "${{ env.CREATED_AT }}" - issue_number: "${{ env.ISSUE_NUMBER }}" - issue_title: ${{ toJSON(env.ISSUE_TITLE) }} - issue_url: "${{ env.ISSUE_URL }}" - issue_author: "${{ env.ISSUE_AUTHOR }}" - issue_body: ${{ toJSON(env.ISSUE_BODY) }} - labels: ${{ toJSON(env.LABELS) }} - pr_number: "" - pr_title: "" - pr_url: "" - pr_author: "" - pr_state: "" - pr_closed_at: "" - pr_merged_at: "" - comment_id: "" - comment_url: "" - comment_author: "" - comment_body: "" From 7ce96688d121af601a2d84c16ed88976a88dfa48 Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:55:05 +0000 Subject: [PATCH 07/11] fix(ci): delete sync-preview workflow changes --- .github/workflows/sync-preview.yml | 191 ----------------------------- 1 file changed, 191 deletions(-) delete mode 100644 .github/workflows/sync-preview.yml diff --git a/.github/workflows/sync-preview.yml b/.github/workflows/sync-preview.yml deleted file mode 100644 index 14f0add13..000000000 --- a/.github/workflows/sync-preview.yml +++ /dev/null @@ -1,191 +0,0 @@ -name: Sync Preview with Main - -on: - push: - branches: [main] - -concurrency: - group: sync-preview - cancel-in-progress: false - -permissions: - contents: write - pull-requests: write - -jobs: - sync: - name: Merge main into preview - runs-on: ubuntu-latest - steps: - - name: Generate GitHub App Token - id: app-token - uses: actions/create-github-app-token@v3 - with: - app-id: ${{ vars.APP_ID }} - private-key: ${{ secrets.APP_PRIVATE_KEY }} - - - name: Checkout preview - uses: actions/checkout@v6 - with: - ref: preview - fetch-depth: 0 - token: ${{ steps.app-token.outputs.token }} - - - name: Configure git - run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - - - name: Check if sync needed - id: check - run: | - git fetch origin main - MAIN_SHA=$(git rev-parse origin/main) - MERGE_BASE=$(git merge-base HEAD origin/main) - - if [[ "$MAIN_SHA" == "$MERGE_BASE" ]]; then - echo "✅ preview already contains all of main" - echo "needed=false" >> $GITHUB_OUTPUT - else - echo "needed=true" >> $GITHUB_OUTPUT - fi - - - name: Skip if already synced - if: steps.check.outputs.needed == 'false' - run: echo "Nothing to sync." - - - name: Merge main into preview - if: steps.check.outputs.needed == 'true' - id: merge - run: | - # Save preview's version before merge so we can restore it after - PREVIEW_VERSION=$(node -p "require('./package.json').version") - echo "preview_version=$PREVIEW_VERSION" >> $GITHUB_OUTPUT - - if git merge origin/main --no-edit -m "chore: merge main into preview"; then - echo "status=clean" >> $GITHUB_OUTPUT - else - # preview carries a higher version string than main (e.g. 1.0.0-preview.X vs 0.13.X). - # This means package.json/package-lock.json almost always conflict on the version field. - # Accept main's content here; the version is restored in the next step. - for f in package.json package-lock.json; do - if git diff --name-only --diff-filter=U | grep -qx "$f"; then - git checkout --theirs "$f" - git add "$f" - echo " ↳ resolved $f conflict (accepted main, will restore version)" - fi - done - - # Check if all conflicts are now resolved - if [[ -z "$(git diff --name-only --diff-filter=U)" ]]; then - git commit --no-edit -m "chore: merge main into preview" - echo "status=clean" >> $GITHUB_OUTPUT - else - echo "status=conflict" >> $GITHUB_OUTPUT - fi - fi - - - name: Restore preview-owned files - if: steps.merge.outputs.status == 'clean' - run: | - # These files are auto-generated during preview releases and must not - # be overwritten by main's versions (schema-check CI will reject changes - # to schemas/, and CHANGELOG.md tracks preview releases separately). - PREVIEW_HEAD=$(git rev-parse HEAD^1) - for f in schemas/agentcore.schema.v1.json CHANGELOG.md; do - if git show "$PREVIEW_HEAD:$f" > /dev/null 2>&1; then - git show "$PREVIEW_HEAD:$f" > "$f" - git add "$f" - echo " ↳ restored preview's $f" - fi - done - if ! git diff --cached --quiet; then - git commit -m "chore: restore preview-owned files (schema, changelog)" - fi - - - name: Restore preview version and push - if: steps.merge.outputs.status == 'clean' - run: | - PREVIEW_VERSION="${{ steps.merge.outputs.preview_version }}" - CURRENT_VERSION=$(node -p "require('./package.json').version") - - if [[ "$CURRENT_VERSION" != "$PREVIEW_VERSION" ]]; then - PREVIEW_VERSION="$PREVIEW_VERSION" node -e " - const fs = require('fs'); - const pkg = JSON.parse(fs.readFileSync('package.json', 'utf8')); - pkg.version = process.env.PREVIEW_VERSION; - fs.writeFileSync('package.json', JSON.stringify(pkg, null, 2) + '\n'); - " - if [[ -f package-lock.json ]]; then - PREVIEW_VERSION="$PREVIEW_VERSION" node -e " - const fs = require('fs'); - const lock = JSON.parse(fs.readFileSync('package-lock.json', 'utf8')); - lock.version = process.env.PREVIEW_VERSION; - if (lock.packages && lock.packages['']) { - lock.packages[''].version = process.env.PREVIEW_VERSION; - } - fs.writeFileSync('package-lock.json', JSON.stringify(lock, null, 2) + '\n'); - " - fi - git add package.json - [[ -f package-lock.json ]] && git add package-lock.json - git commit -m "chore: restore preview version ($PREVIEW_VERSION)" - fi - - git push origin HEAD:preview - echo "✅ main merged into preview and pushed" - - - name: Create PR for conflict resolution - if: steps.merge.outputs.status == 'conflict' - env: - GH_TOKEN: ${{ steps.app-token.outputs.token }} - run: | - # Check if there's already an open sync PR (match by branch prefix, not title search) - COUNT=$(gh pr list --base preview --state open --json headRefName \ - --jq '[.[] | select(.headRefName | startswith("sync-preview/"))] | length') - if [[ "$COUNT" != "0" ]]; then - echo "ℹ️ Sync PR already open — skipping duplicate." - exit 0 - fi - - # Abort the failed merge and redo on a branch for the PR - git merge --abort - - BRANCH="sync-preview/merge-main-$(date +%Y%m%d-%H%M%S)" - git checkout -b "$BRANCH" - git merge origin/main --no-edit -m "chore: merge main into preview (conflicts need resolution)" || true - git add -A - git commit --no-edit -m "chore: merge main into preview (conflicts need resolution)" || true - git push origin "$BRANCH" - - GH_USER=$(gh api "/repos/${{ github.repository }}/commits/$(git rev-parse origin/main)" --jq '.author.login // empty' 2>/dev/null || echo "") - MENTION="" - if [[ -n "$GH_USER" ]]; then - MENTION="cc @${GH_USER}" - fi - - gh pr create \ - --base preview \ - --head "$BRANCH" \ - --title "sync-preview: merge main into preview (conflicts)" \ - --body "$(cat < - \`\`\` - 2. Search for conflict markers and resolve them: - \`\`\`bash - grep -rn '<<<<<<< HEAD' . - \`\`\` - 3. Keep preview-specific values (package version, preview tests, etc.) — accept main's changes for everything else. - 4. Commit and push, then merge this PR. - - ${MENTION} - - _Opened automatically by the sync-preview workflow._ - BODY - )" From bdceacadc9d0598bec9e78568fb407d8204f4a90 Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 16:56:17 +0000 Subject: [PATCH 08/11] fix(ci): add back pre-release tarball generation workflow --- .github/workflows/prerelease-tarball.yml | 84 ++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 .github/workflows/prerelease-tarball.yml diff --git a/.github/workflows/prerelease-tarball.yml b/.github/workflows/prerelease-tarball.yml new file mode 100644 index 000000000..daddcd7c8 --- /dev/null +++ b/.github/workflows/prerelease-tarball.yml @@ -0,0 +1,84 @@ +name: Prerelease Tarball + +on: + push: + branches: [main] + # Manually trigger to pull in the latest CDK constructs changes + workflow_dispatch: + +permissions: + contents: write + +concurrency: + group: prerelease-tarball + cancel-in-progress: true + +jobs: + prerelease-tarball: + runs-on: ubuntu-latest + timeout-minutes: 15 + env: + TARBALL_BASE: agentcore-cli-prerelease + steps: + - uses: actions/checkout@v6 + - uses: actions/setup-node@v6 + with: + node-version: '20.x' + cache: 'npm' + - uses: astral-sh/setup-uv@v7 + - name: Generate GitHub App Token + id: app-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ vars.APP_ID }} + private-key: ${{ secrets.APP_PRIVATE_KEY }} + owner: aws + - name: Clone CDK repo + run: | + git clone --depth 1 "https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" /tmp/cdk-repo + env: + CDK_REPO_TOKEN: ${{ steps.app-token.outputs.token }} + CDK_REPO: ${{ secrets.CDK_REPO_NAME }} + - name: Compute version suffix + id: version + run: | + CLI_SHA=$(git rev-parse --short=5 HEAD) + CDK_SHA=$(git -C /tmp/cdk-repo rev-parse --short=5 HEAD) + SUFFIX="${CLI_SHA}-${CDK_SHA}" + echo "suffix=$SUFFIX" >> $GITHUB_OUTPUT + echo "Version suffix: $SUFFIX" + - run: npm run bundle + env: + AGENTCORE_CDK_PATH: /tmp/cdk-repo + AGENTCORE_TARBALL_OUTPUT: ${{ env.TARBALL_BASE }} + AGENTCORE_TARBALL_VERSION_SUFFIX: ${{ steps.version.outputs.suffix }} + - name: Create or update prerelease + env: + GH_TOKEN: ${{ steps.app-token.outputs.token }} + VERSION_SUFFIX: ${{ steps.version.outputs.suffix }} + run: | + TAG="prerelease" + + # Delete existing release if it exists (to update the tarballs) + gh release delete "$TAG" --yes --cleanup-tag 2>/dev/null || true + + # Create a new pre-release with both tarballs + gh release create "$TAG" \ + "${TARBALL_BASE}.tgz" \ + "${TARBALL_BASE}-preview.tgz" \ + --title "Prerelease" \ + --notes "Auto-generated tarballs from the latest commit on main. + + Version: \`${VERSION_SUFFIX}\` (cli-cdk) + + **GA build** (no harness features): + \`\`\` + npm install -g https://github.com/aws/agentcore-cli/releases/download/prerelease/${TARBALL_BASE}.tgz + \`\`\` + + **Preview build** (harness features enabled): + \`\`\` + npm install -g https://github.com/aws/agentcore-cli/releases/download/prerelease/${TARBALL_BASE}-preview.tgz + \`\`\`" \ + --prerelease \ + --target "${{ github.sha }}" From acf34c40fbb0a610c56e780cb1ae88bd95aa627a Mon Sep 17 00:00:00 2001 From: notgitika Date: Wed, 17 Jun 2026 12:57:08 -0400 Subject: [PATCH 09/11] fix(release): revert schemas/agentcore.schema.v1.json to main MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The build-and-test schema-check gate blocks any direct edit to schemas/agentcore.schema.vN.json — the JSON schema is regenerated by the release workflow (release.yml step 'Regenerate JSON schema' runs node scripts/generate-schema.mjs) and committed during release, never by hand on a release PR. Revert this branch's hand-edited schema diff back to main. The release workflow will regenerate it from the source-of-truth Zod schemas in src/schema/ when this PR runs through the actual release flow. --- schemas/agentcore.schema.v1.json | 201 +++++++++++++++++++++++++++++-- 1 file changed, 191 insertions(+), 10 deletions(-) diff --git a/schemas/agentcore.schema.v1.json b/schemas/agentcore.schema.v1.json index 8780a3ded..36e417528 100644 --- a/schemas/agentcore.schema.v1.json +++ b/schemas/agentcore.schema.v1.json @@ -284,22 +284,76 @@ "filesystemConfigurations": { "type": "array", "items": { - "type": "object", - "properties": { - "sessionStorage": { + "anyOf": [ + { "type": "object", "properties": { - "mountPath": { - "type": "string", - "pattern": "^\\/mnt\\/[^/]+$" + "sessionStorage": { + "type": "object", + "properties": { + "mountPath": { + "type": "string", + "minLength": 6, + "maxLength": 200, + "pattern": "^\\/mnt\\/[a-zA-Z0-9._-]+\\/?$" + } + }, + "required": ["mountPath"], + "additionalProperties": false + } + }, + "required": ["sessionStorage"], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "efsAccessPoint": { + "type": "object", + "properties": { + "accessPointArn": { + "type": "string", + "pattern": "^arn:aws[-a-z]*:elasticfilesystem:[a-z][a-z0-9-]*:[0-9]{12}:access-point\\/fsap-[0-9a-f]{8,40}$" + }, + "mountPath": { + "type": "string", + "minLength": 6, + "maxLength": 200, + "pattern": "^\\/mnt\\/[a-zA-Z0-9._-]+\\/?$" + } + }, + "required": ["accessPointArn", "mountPath"], + "additionalProperties": false + } + }, + "required": ["efsAccessPoint"], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "s3FilesAccessPoint": { + "type": "object", + "properties": { + "accessPointArn": { + "type": "string", + "pattern": "^arn:aws[-a-z]*:s3files:[a-z][a-z0-9-]*:[0-9]{12}:file-system\\/fs-[0-9a-f]{17,40}\\/access-point\\/fsap-[0-9a-f]{17,40}$" + }, + "mountPath": { + "type": "string", + "minLength": 6, + "maxLength": 200, + "pattern": "^\\/mnt\\/[a-zA-Z0-9._-]+\\/?$" + } + }, + "required": ["accessPointArn", "mountPath"], + "additionalProperties": false } }, - "required": ["mountPath"], + "required": ["s3FilesAccessPoint"], "additionalProperties": false } - }, - "required": ["sessionStorage"], - "additionalProperties": false + ] } }, "endpoints": { @@ -550,6 +604,27 @@ }, "required": ["authorizerType", "name"], "additionalProperties": false + }, + { + "type": "object", + "properties": { + "authorizerType": { + "type": "string", + "const": "PaymentCredentialProvider" + }, + "name": { + "type": "string", + "minLength": 1, + "maxLength": 128, + "pattern": "^[a-zA-Z0-9\\-_]+$" + }, + "provider": { + "type": "string", + "enum": ["CoinbaseCDP", "StripePrivy"] + } + }, + "required": ["authorizerType", "name", "provider"], + "additionalProperties": false } ] } @@ -2250,6 +2325,112 @@ "required": ["name", "schemaType", "config"], "additionalProperties": false } + }, + "payments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "minLength": 1, + "maxLength": 48, + "pattern": "^[a-zA-Z][a-zA-Z0-9]{0,47}$" + }, + "authorizerType": { + "default": "AWS_IAM", + "type": "string", + "enum": ["AWS_IAM", "CUSTOM_JWT"] + }, + "authorizerConfiguration": { + "type": "object", + "properties": { + "customJWTAuthorizer": { + "type": "object", + "properties": { + "discoveryUrl": { + "type": "string", + "format": "uri" + }, + "allowedClients": { + "type": "array", + "items": { + "type": "string" + } + }, + "allowedAudience": { + "type": "array", + "items": { + "type": "string" + } + }, + "allowedScopes": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["discoveryUrl"], + "additionalProperties": false + } + }, + "required": ["customJWTAuthorizer"], + "additionalProperties": false + }, + "connectors": { + "default": [], + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "minLength": 1, + "maxLength": 48, + "pattern": "^[a-zA-Z][a-zA-Z0-9_]{0,47}$" + }, + "provider": { + "default": "CoinbaseCDP", + "type": "string", + "enum": ["CoinbaseCDP", "StripePrivy"] + }, + "credentialName": { + "type": "string", + "minLength": 1 + } + }, + "required": ["name", "credentialName"], + "additionalProperties": false + } + }, + "description": { + "type": "string" + }, + "autoPayment": { + "default": true, + "type": "boolean" + }, + "defaultSpendLimit": { + "default": "10.00", + "type": "string" + }, + "paymentToolAllowlist": { + "type": "array", + "items": { + "type": "string" + } + }, + "networkPreferences": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["name"], + "additionalProperties": false + } } }, "required": ["name", "version"], From 94c74149aebf2498f6646def8b53e6dd369e0b2a Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 17:02:49 +0000 Subject: [PATCH 10/11] fix(ci): remove legacy support for _ to - --- .github/workflows/e2e-tests-full.yml | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/e2e-tests-full.yml b/.github/workflows/e2e-tests-full.yml index e20a6b037..ce7878d95 100644 --- a/.github/workflows/e2e-tests-full.yml +++ b/.github/workflows/e2e-tests-full.yml @@ -78,16 +78,9 @@ jobs: elif [ "${{ github.ref_name }}" != "main" ]; then CDK_BRANCH="main" REPO_URL="https://x-access-token:${CDK_REPO_TOKEN}@github.com/${CDK_REPO}.git" - # Check if a branch exists on the CDK repo with the same + # Check if a branch exists on the CDK repo with the same name if git ls-remote --exit-code --heads "$REPO_URL" "${{ github.ref_name }}" > /dev/null 2>&1; then CDK_BRANCH="${{ github.ref_name }}" - else - # Check if a branch exists with _ subbed for -. (legacy support for summit branch) - ALT="${{ github.ref_name }}" - ALT="${ALT//_/-}" - if git ls-remote --exit-code --heads "$REPO_URL" "$ALT" > /dev/null 2>&1; then - CDK_BRANCH="$ALT" - fi fi else CDK_BRANCH="main" From 88b014ae05553984e528f3bfec5d08d53b2820e4 Mon Sep 17 00:00:00 2001 From: Hweinstock Date: Wed, 17 Jun 2026 17:10:26 +0000 Subject: [PATCH 11/11] fix(ci): add back logic to avoid schema check on release PRs --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1c9f84ea5..dc5e35ba8 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -102,6 +102,7 @@ jobs: - run: npm run typecheck schema-check: + if: ${{ !contains(github.event.pull_request.labels.*.name, 'release') }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v6