From a95917a2f0cf26e2a601b529215f9392e0247722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radek=20Je=C5=BEek?= Date: Wed, 11 Mar 2026 12:51:57 +0100 Subject: [PATCH] feat: kagenti integration PoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace Docker/registry-based providers with network-only providers - Add kagenti agent sync cron and provider health check refresh - Expose otel-collector via HTTPRoute for local agent telemetry - Upgrade Phoenix image to 12.31.2 for GraphQL API compatibility - Fix server traces reaching otel-collector (port 4318→8335) - Set default OTEL endpoint in Python SDK for local deployments - Simplify provider model: ProviderState (online/offline) replaces ProviderType/ProviderStatus/ProviderUnmanagedStatus - Remove Docker image labels, GitHub version resolving, provider builds - Fix checkbox selection UX in agent remove command Signed-off-by: Radek Ježek --- .../src/agentstack_cli/__init__.py | 16 +- apps/agentstack-cli/src/agentstack_cli/api.py | 4 +- .../src/agentstack_cli/auth_manager.py | 45 ++ .../src/agentstack_cli/commands/agent.py | 522 +++++++------ .../src/agentstack_cli/commands/build.py | 264 ++----- .../src/agentstack_cli/commands/platform.py | 612 +++++++++++++-- .../src/agentstack_cli/commands/self.py | 2 +- .../src/agentstack_cli/commands/server.py | 9 + .../src/agentstack_cli/configuration.py | 26 +- .../data/k8s/keycloak-postgres-pv.yaml | 17 + .../data/k8s/patch_kagenti_otel.py | 143 ++++ .../data/k8s/phoenix-data-pv.yaml | 17 + .../src/agentstack_cli/kagenti_client.py | 77 ++ .../src/agentstack_cli/utils.py | 23 - .../a2a/extensions/services/platform.py | 2 +- .../src/agentstack_sdk/platform/__init__.py | 2 - .../src/agentstack_sdk/platform/client.py | 2 +- .../src/agentstack_sdk/platform/common.py | 23 - .../src/agentstack_sdk/platform/provider.py | 69 +- .../agentstack_sdk/platform/provider_build.py | 191 ----- .../platform/provider_discovery.py | 58 -- .../src/agentstack_sdk/server/server.py | 62 +- .../src/agentstack_sdk/server/telemetry.py | 8 + .../src/client/api/common/schemas.ts | 32 - .../src/client/api/common/types.ts | 26 +- .../src/client/api/core/client.ts | 3 - .../src/client/api/provider-builds/api.ts | 80 -- .../src/client/api/provider-builds/schemas.ts | 98 --- .../src/client/api/provider-builds/types.ts | 61 -- .../src/client/api/providers/schemas.ts | 43 +- .../src/client/api/providers/types.ts | 17 +- .../src/client/api/schemas.ts | 1 - .../agentstack-sdk-ts/src/client/api/types.ts | 1 - .../server/core/config/schemas.ts | 2 +- .../src/agentstack_server/api/auth/auth.py | 9 +- .../src/agentstack_server/api/dependencies.py | 6 +- .../api/routes/provider_builds.py | 92 --- .../api/routes/provider_discovery.py | 35 - .../agentstack_server/api/routes/providers.py | 67 +- .../agentstack_server/api/schema/contexts.py | 5 +- .../agentstack_server/api/schema/provider.py | 46 +- .../api/schema/provider_build.py | 21 - .../src/agentstack_server/application.py | 8 +- .../src/agentstack_server/bootstrap.py | 22 - .../src/agentstack_server/configuration.py | 49 +- .../src/agentstack_server/domain/constants.py | 5 - .../domain/models/permissions.py | 4 +- .../domain/models/provider.py | 135 +--- .../domain/models/provider_build.py | 97 --- .../domain/models/provider_discovery.py | 29 - .../domain/models/registry.py | 98 +-- .../domain/repositories/provider.py | 6 +- .../domain/repositories/provider_build.py | 35 - .../domain/repositories/provider_discovery.py | 24 - .../src/agentstack_server/exceptions.py | 19 +- .../infrastructure/kagenti/__init__.py | 2 + .../infrastructure/kagenti/client.py | 81 ++ .../default_templates/build-provider-job.yaml | 191 ----- .../build-provider-secret.yaml | 8 - .../default_templates/deployment.yaml | 44 -- .../kubernetes/default_templates/secret.yaml | 8 - .../kubernetes/default_templates/service.yaml | 16 - .../kubernetes/provider_build_manager.py | 227 ------ .../kubernetes/provider_deployment_manager.py | 314 -------- .../alembic/versions/c0095389475b_.py | 141 ++++ .../persistence/repositories/provider.py | 36 +- .../repositories/provider_build.py | 151 ---- .../repositories/provider_discovery.py | 108 --- .../persistence/unit_of_work.py | 8 - .../agentstack_server/jobs/crons/cleanup.py | 10 - .../agentstack_server/jobs/crons/provider.py | 200 +++-- .../agentstack_server/jobs/procrastinate.py | 4 - .../src/agentstack_server/jobs/queues.py | 2 - .../jobs/tasks/provider_build.py | 57 -- .../jobs/tasks/provider_discovery.py | 20 - .../src/agentstack_server/run_workers.py | 2 - .../service_layer/build_manager.py | 27 - .../service_layer/deployment_manager.py | 38 - .../service_layer/services/a2a.py | 62 +- .../service_layer/services/provider_build.py | 285 ------- .../services/provider_discovery.py | 146 ---- .../service_layer/services/providers.py | 264 +------ .../service_layer/unit_of_work.py | 4 - .../src/agentstack_server/utils/docker.py | 282 +------ .../src/agentstack_server/utils/github.py | 279 ------- apps/agentstack-server/tasks.toml | 88 +-- apps/agentstack-server/tests/conftest.py | 4 +- .../tests/e2e/agents/test_agent_builds.py | 62 -- .../e2e/agents/test_platform_extensions.py | 30 +- .../tests/e2e/routes/test_a2a_proxy.py | 2 +- .../e2e/routes/test_provider_variables.py | 85 --- .../tests/e2e/routes/test_providers.py | 13 +- .../persistence/repositories/test_env.py | 30 +- .../persistence/repositories/test_provider.py | 56 +- .../tests/integration/utils/test_docker.py | 41 - .../tests/integration/utils/test_github.py | 52 -- .../unit/domain/models/test_permissions.py | 8 +- .../tests/unit/utils/test_docker.py | 52 -- .../tests/unit/utils/test_github.py | 75 -- .../agents/api/queries/useListAgents.ts | 9 +- .../components/import/ImportAgentsModal.tsx | 121 +-- .../modules/agents/hooks/useImportAgent.ts | 161 +--- .../modules/agents/hooks/useProviderStatus.ts | 10 +- .../agentstack-ui/src/modules/agents/types.ts | 3 +- .../src/modules/provider-builds/api/index.ts | 37 - .../src/modules/provider-builds/api/keys.ts | 16 - .../api/mutations/useCreateProviderBuild.ts | 22 - .../api/mutations/usePreviewProviderBuild.ts | 22 - .../api/queries/useProviderBuild.ts | 33 - .../api/queries/useProviderBuildLogs.ts | 33 - .../src/modules/provider-builds/api/utils.ts | 26 - .../src/modules/runs/components/RunInput.tsx | 4 +- apps/agentstack-ui/src/utils/constants.ts | 2 +- apps/agentstack-ui/src/utils/feature-flags.ts | 2 - apps/agentstack-ui/tasks.toml | 8 +- apps/agentstack-ui/template.env | 6 +- .../200-microshift-local.conf | 4 +- .../system/kubectl-port-forward@.service | 2 +- .../agent-integration/observability.mdx | 13 +- docs/development/reference/cli-reference.mdx | 38 +- docs/poc-kagenti-integration.md | 701 ++++++++++++++++++ .../agent-integration/observability.mdx | 13 +- docs/stable/reference/cli-reference.mdx | 37 +- helm/.claude/settings.json | 5 + helm/Chart.lock | 9 +- helm/Chart.yaml | 7 +- helm/templates/NOTES.txt | 10 - helm/templates/_helpers.tpl | 104 +-- helm/templates/agent/deployment.yaml | 86 --- helm/templates/agent/secret.yaml | 12 - helm/templates/agent/service.yaml | 21 - helm/templates/collector/config.yaml | 77 -- helm/templates/collector/deployment.yaml | 71 -- helm/templates/collector/service.yaml | 21 - helm/templates/config/provider_templates.yaml | 362 --------- helm/templates/config/providers.yaml | 12 - helm/templates/config/secret.yaml | 3 - helm/templates/deployment.yaml | 77 +- helm/templates/keycloak/provision-job.yaml | 19 +- helm/templates/keycloak/secret.yaml | 4 +- helm/templates/registry/deployment.yaml | 35 - helm/templates/registry/secret.yaml | 16 - helm/templates/registry/service.yaml | 20 - helm/templates/ui/deployment.yaml | 1 - helm/values.yaml | 150 +--- mise.lock | 73 +- mise.toml | 2 +- tasks.toml | 225 +++--- 148 files changed, 2722 insertions(+), 7008 deletions(-) create mode 100644 apps/agentstack-cli/src/agentstack_cli/data/k8s/keycloak-postgres-pv.yaml create mode 100644 apps/agentstack-cli/src/agentstack_cli/data/k8s/patch_kagenti_otel.py create mode 100644 apps/agentstack-cli/src/agentstack_cli/data/k8s/phoenix-data-pv.yaml create mode 100644 apps/agentstack-cli/src/agentstack_cli/kagenti_client.py delete mode 100644 apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_build.py delete mode 100644 apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_discovery.py delete mode 100644 apps/agentstack-sdk-ts/src/client/api/provider-builds/api.ts delete mode 100644 apps/agentstack-sdk-ts/src/client/api/provider-builds/schemas.ts delete mode 100644 apps/agentstack-sdk-ts/src/client/api/provider-builds/types.ts delete mode 100644 apps/agentstack-server/src/agentstack_server/api/routes/provider_builds.py delete mode 100644 apps/agentstack-server/src/agentstack_server/api/routes/provider_discovery.py delete mode 100644 apps/agentstack-server/src/agentstack_server/api/schema/provider_build.py delete mode 100644 apps/agentstack-server/src/agentstack_server/domain/models/provider_build.py delete mode 100644 apps/agentstack-server/src/agentstack_server/domain/models/provider_discovery.py delete mode 100644 apps/agentstack-server/src/agentstack_server/domain/repositories/provider_build.py delete mode 100644 apps/agentstack-server/src/agentstack_server/domain/repositories/provider_discovery.py create mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/__init__.py create mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/client.py delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-job.yaml delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-secret.yaml delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/deployment.yaml delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/secret.yaml delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/service.yaml delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_build_manager.py delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_deployment_manager.py create mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/persistence/migrations/alembic/versions/c0095389475b_.py delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_build.py delete mode 100644 apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_discovery.py delete mode 100644 apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_build.py delete mode 100644 apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_discovery.py delete mode 100644 apps/agentstack-server/src/agentstack_server/service_layer/build_manager.py delete mode 100644 apps/agentstack-server/src/agentstack_server/service_layer/deployment_manager.py delete mode 100644 apps/agentstack-server/src/agentstack_server/service_layer/services/provider_build.py delete mode 100644 apps/agentstack-server/src/agentstack_server/service_layer/services/provider_discovery.py delete mode 100644 apps/agentstack-server/src/agentstack_server/utils/github.py delete mode 100644 apps/agentstack-server/tests/e2e/agents/test_agent_builds.py delete mode 100644 apps/agentstack-server/tests/e2e/routes/test_provider_variables.py delete mode 100644 apps/agentstack-server/tests/integration/utils/test_docker.py delete mode 100644 apps/agentstack-server/tests/integration/utils/test_github.py delete mode 100644 apps/agentstack-server/tests/unit/utils/test_docker.py delete mode 100644 apps/agentstack-server/tests/unit/utils/test_github.py delete mode 100644 apps/agentstack-ui/src/modules/provider-builds/api/index.ts delete mode 100644 apps/agentstack-ui/src/modules/provider-builds/api/keys.ts delete mode 100644 apps/agentstack-ui/src/modules/provider-builds/api/mutations/useCreateProviderBuild.ts delete mode 100644 apps/agentstack-ui/src/modules/provider-builds/api/mutations/usePreviewProviderBuild.ts delete mode 100644 apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuild.ts delete mode 100644 apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuildLogs.ts delete mode 100644 apps/agentstack-ui/src/modules/provider-builds/api/utils.ts create mode 100644 docs/poc-kagenti-integration.md create mode 100644 helm/.claude/settings.json delete mode 100644 helm/templates/agent/deployment.yaml delete mode 100644 helm/templates/agent/secret.yaml delete mode 100644 helm/templates/agent/service.yaml delete mode 100644 helm/templates/collector/config.yaml delete mode 100644 helm/templates/collector/deployment.yaml delete mode 100644 helm/templates/collector/service.yaml delete mode 100644 helm/templates/config/provider_templates.yaml delete mode 100644 helm/templates/registry/deployment.yaml delete mode 100644 helm/templates/registry/secret.yaml delete mode 100644 helm/templates/registry/service.yaml diff --git a/apps/agentstack-cli/src/agentstack_cli/__init__.py b/apps/agentstack-cli/src/agentstack_cli/__init__.py index 83c762857e..31b559f6aa 100644 --- a/apps/agentstack-cli/src/agentstack_cli/__init__.py +++ b/apps/agentstack-cli/src/agentstack_cli/__init__.py @@ -39,13 +39,10 @@ ╰────────────────────────────────────────────────────────────────────────────╯ ╭─ Agent Management [Admin only] ────────────────────────────────────────────╮ -│ add Install an agent (Docker, GitHub) │ +│ add Install an agent │ │ remove Uninstall an agent │ │ update Update an agent │ -│ logs Stream agent execution logs │ -│ env Manage agent environment variables │ -│ build Build an agent remotely │ -│ client-side-build Build an agent container image locally │ +│ build Build an agent image locally │ ╰────────────────────────────────────────────────────────────────────────────╯ ╭─ Platform & Configuration ─────────────────────────────────────────────────╮ @@ -105,7 +102,6 @@ def main( no_args_is_help=True, help="Manage Agent Stack platform. [Local only]", ) -app.add_typer(agentstack_cli.commands.build.app, name="", no_args_is_help=True, help="Build agent images.") app.add_typer( agentstack_cli.commands.server.app, name="server", @@ -128,6 +124,8 @@ def main( # ) +app.add_typer(agentstack_cli.commands.build.app, name="", no_args_is_help=True, help="Build agent images.") + agent_alias = deepcopy(agentstack_cli.commands.agent.app) for cmd in agent_alias.registered_commands: cmd.rich_help_panel = "Agent commands" @@ -158,12 +156,14 @@ async def ui(): active_server = config.auth_manager.active_server if active_server: - if re.search(r"(localhost|127\.0\.0\.1):8333", active_server): + if "agentstack-api.localtest.me" in active_server: + ui_url = active_server.replace("agentstack-api.localtest.me", "agentstack.localtest.me") + elif re.search(r"(localhost|127\.0\.0\.1):8333", active_server): ui_url = re.sub(r":8333", ":8334", active_server) else: ui_url = active_server else: - ui_url = "http://localhost:8334" + ui_url = "http://agentstack.localtest.me:8080" webbrowser.open(ui_url) diff --git a/apps/agentstack-cli/src/agentstack_cli/api.py b/apps/agentstack-cli/src/agentstack_cli/api.py index 1adbcdf15e..afa71cda51 100644 --- a/apps/agentstack-cli/src/agentstack_cli/api.py +++ b/apps/agentstack-cli/src/agentstack_cli/api.py @@ -155,8 +155,10 @@ async def a2a_client(agent_card: AgentCard, context_token: ContextToken) -> Asyn @asynccontextmanager async def openai_client() -> AsyncIterator[openai.AsyncOpenAI]: async with Configuration().use_platform_client() as platform_client: + headers = platform_client.headers.copy() + headers.pop("Authorization", None) yield openai.AsyncOpenAI( api_key=platform_client.headers.get("Authorization", "").removeprefix("Bearer ") or "dummy", base_url=urllib.parse.urljoin(str(platform_client.base_url), urllib.parse.urljoin(API_BASE_URL, "openai")), - default_headers=platform_client.headers, + default_headers=headers, ) diff --git a/apps/agentstack-cli/src/agentstack_cli/auth_manager.py b/apps/agentstack-cli/src/agentstack_cli/auth_manager.py index efa8fc8307..88ee55114d 100644 --- a/apps/agentstack-cli/src/agentstack_cli/auth_manager.py +++ b/apps/agentstack-cli/src/agentstack_cli/auth_manager.py @@ -155,6 +155,51 @@ def save_auth_info( self._auth.servers[server] # touch self._save() + async def login_with_password( + self, + server: str, + username: str, + password: str, + client_id: str = "agentstack-cli", + ) -> AuthToken: + """Authenticate using resource owner password grant (direct access).""" + oauth_metadata = await self.fetch_oauth_protected_resource_metadata(server) + auth_servers = oauth_metadata.get("authorization_servers", []) + if not auth_servers: + raise RuntimeError(f"No authorization servers found for {server}") + + auth_server_url = auth_servers[0] + oidc = await self.get_oidc_metadata(auth_server_url) + token_endpoint = oidc["token_endpoint"] + + async with httpx.AsyncClient() as client: + resp = await client.post( + token_endpoint, + data={ + "grant_type": "password", + "client_id": client_id, + "username": username, + "password": password, + "scope": " ".join(oauth_metadata.get("scopes_supported", ["openid", "email", "profile"])), + }, + ) + resp.raise_for_status() + token_data = resp.json() + + auth_token = AuthToken(**token_data) + + self.save_auth_info( + server=server, + auth_server=auth_server_url, + client_id=client_id, + token=token_data, + ) + self._auth.active_server = server + self._auth.active_auth_server = auth_server_url + self._save() + + return auth_token + async def _exchange_refresh_token(self, auth_server: str, token: AuthToken) -> AuthToken: if not self._auth.active_server: raise ValueError("No active server configured") diff --git a/apps/agentstack-cli/src/agentstack_cli/commands/agent.py b/apps/agentstack-cli/src/agentstack_cli/commands/agent.py index bf4937f35a..48e9e77781 100644 --- a/apps/agentstack-cli/src/agentstack_cli/commands/agent.py +++ b/apps/agentstack-cli/src/agentstack_cli/commands/agent.py @@ -92,7 +92,7 @@ from agentstack_sdk.a2a.extensions.ui.settings import ( SingleSelectFieldValue as SettingsSingleSelectFieldValue, ) -from agentstack_sdk.platform import BuildState, File, ModelProvider, Provider, UserFeedback +from agentstack_sdk.platform import File, ModelProvider, Provider, UserFeedback from agentstack_sdk.platform.context import Context, ContextPermissions, ContextToken, Permissions from agentstack_sdk.platform.model_provider import ModelCapability from InquirerPy import inquirer @@ -104,7 +104,6 @@ from rich.panel import Panel from rich.text import Text -from agentstack_cli.commands.build import _server_side_build from agentstack_cli.commands.model import ensure_llm_provider from agentstack_cli.configuration import Configuration @@ -132,11 +131,6 @@ from agentstack_cli.server_utils import announce_server_action, confirm_server_action from agentstack_cli.utils import ( generate_schema_example, - get_github_repo_tags, - github_url_verbose_pattern, - is_github_url, - parse_env_var, - print_log, prompt_user, remove_nullable, status, @@ -199,132 +193,209 @@ def short_location(provider: Provider) -> str: configuration = Configuration() -DISCOVERY_TIMEOUT_SEC = 180 -DISCOVERY_POLL_INTERVAL_SEC = 2 +async def _discover_agent_card(location: str) -> AgentCard: + """Fetch agent card from a network URL's well-known endpoint.""" + from a2a.utils import AGENT_CARD_WELL_KNOWN_PATH -async def _discover_agent_card(docker_image: str) -> AgentCard: - from agentstack_sdk.platform.provider_discovery import DiscoveryState, ProviderDiscovery - - console.info("Image missing agent card label, starting discovery...") - - async with configuration.use_platform_client(): - with status("Creating discovery task"): - discovery = await ProviderDiscovery.create(docker_image=docker_image) - - start = asyncio.get_event_loop().time() - with status("Discovering agent card (this may take a while)"): - while discovery.status in (DiscoveryState.PENDING, DiscoveryState.IN_PROGRESS): - if asyncio.get_event_loop().time() - start > DISCOVERY_TIMEOUT_SEC: - raise RuntimeError("Discovery timed out after 3 minutes") - await asyncio.sleep(DISCOVERY_POLL_INTERVAL_SEC) - await discovery.get() - - if discovery.status == DiscoveryState.FAILED: - raise RuntimeError(f"Discovery failed: {discovery.error_message}") - - card = discovery.agent_card - if not card: - raise RuntimeError("Discovery completed but no agent card was returned") - - return card + url = location.rstrip("/") + AGENT_CARD_WELL_KNOWN_PATH + console.info(f"Fetching agent card from {url}...") + async with httpx.AsyncClient() as client: + resp = await client.get(url, timeout=30) + resp.raise_for_status() + return AgentCard.model_validate(resp.json()) @app.command("add") async def add_agent( location: typing.Annotated[ - str | None, typer.Argument(help="Agent location (public docker image or github url)") + str | None, typer.Argument(help="Agent image or network URL") ] = None, - dockerfile: typing.Annotated[str | None, typer.Option(help="Use custom dockerfile path")] = None, - verbose: typing.Annotated[bool, typer.Option("-v", "--verbose", help="Show verbose output")] = False, + name: typing.Annotated[str | None, typer.Option("--name", "-n", help="Agent name (default: derived from image)")] = None, + namespace: typing.Annotated[str, typer.Option(help="Target Kubernetes namespace")] = "team1", + port: typing.Annotated[int, typer.Option(help="Agent service port")] = 8080, + env: typing.Annotated[list[str] | None, typer.Option("--env", "-e", help="Environment variable in KEY=VALUE format (repeatable)")] = None, + env_file: typing.Annotated[str | None, typer.Option("--env-file", help="Path to env file (KEY=VALUE per line)")] = None, yes: typing.Annotated[bool, typer.Option("--yes", "-y", help="Skip confirmation prompts.")] = False, ) -> None: - """Add a docker image or GitHub repository. [Admin only] - - This command supports a variety of GitHub URL formats for deploying agents: - - - **Basic URL**: `https://github.com/myorg/myrepo` - - **Git Protocol URL**: `git+https://github.com/myorg/myrepo` - - **URL with .git suffix**: `https://github.com/myorg/myrepo.git` - - **URL with Version Tag**: `https://github.com/myorg/myrepo@v1.0.0` - - **URL with Branch Name**: `https://github.com/myorg/myrepo@my-branch` - - **URL with Subfolder Path**: `https://github.com/myorg/myrepo#path=/path/to/agent` - - **Combined Formats**: `https://github.com/myorg/myrepo.git@v1.0.0#path=/path/to/agent` - - **Enterprise GitHub**: `https://github.mycompany.com/myorg/myrepo` - - **With a custom Dockerfile location**: `agentstack add --dockerfile /my-agent/path/to/Dockerfile "https://github.com/my-org/my-awesome-agents@main#path=/my-agent"` - """ - repo_input = location + """Add an agent by container image or network URL. [Admin only]""" if location is None: - repo_input = ( + location = ( await inquirer.text( - message="Enter GitHub repository (owner/repo or full URL):", + message="Enter agent image or URL:", ).execute_async() or "" ) - if not repo_input: + if not location: console.error("No location provided. Exiting.") sys.exit(1) - if match := re.match(github_url_verbose_pattern, repo_input, re.VERBOSE): - owner, repo, version, path = ( - match.group("org"), - match.group("repo").removesuffix(".git"), - match.group("version"), - match.group("path"), - ) + url = announce_server_action(f"Installing agent '{location}' for") + await confirm_server_action("Proceed with installing this agent on", url=url, yes=yes) - if version is None and path is None: - host = match.group("host") - tags = await get_github_repo_tags(host, owner, repo) + # Detect if location is a container image (contains registry address or no protocol) + is_image = not location.startswith("http://") and not location.startswith("https://") - if tags: - selected_tag = await inquirer.fuzzy( - message="Select a tag to use:", - choices=tags, - ).execute_async() + if is_image: + await _add_agent_via_kagenti(location, name=name, namespace=namespace, port=port, env=env, env_file=env_file) + else: + # Legacy path: register network URL directly with agentstack + try: + with status("Registering agent to platform"): + async with configuration.use_platform_client(): + await Provider.create(location=location) + except httpx.HTTPStatusError as e: + if e.response.status_code == 422: + agent_card = await _discover_agent_card(location) + with status("Registering agent with discovered card"): + async with configuration.use_platform_client(): + await Provider.create(location=location, agent_card=agent_card) else: - selected_tag = ( - await inquirer.text( - message="Enter tag to use:", - ).execute_async() - or "main" + raise + console.success(f"Agent [bold]{location}[/bold] added to platform") + await list_agents() + + +async def _add_agent_via_kagenti( + image: str, + *, + name: str | None, + namespace: str, + port: int, + env: list[str] | None = None, + env_file: str | None = None, +) -> None: + """Deploy a pre-built image via kagenti and wait for it to become healthy.""" + import asyncio + import contextlib + import re + + from agentstack_cli.kagenti_client import KagentiClient + + # Derive name from image if not provided + if not name: + # Extract last path component, strip tag/digest + raw = image.rsplit("/", 1)[-1].split(":")[0].split("@")[0] + name = re.sub(r"[^a-z0-9-]", "-", raw.lower()).strip("-")[:63] or "agent" + + # Get auth token + auth_token = None + try: + auth_token = await configuration.auth_manager.load_auth_token() + except Exception: + if configuration.auth_manager.active_server and "agentstack-api.localtest.me" in configuration.auth_manager.active_server: + with contextlib.suppress(Exception): + auth_token = await configuration.auth_manager.login_with_password( + configuration.auth_manager.active_server, username="admin", password="admin" ) - location = f"https://github.com/{owner}/{repo}@{selected_tag}" - else: - location = repo_input + if not auth_token: + console.error("Not authenticated. Run [green]agentstack server login[/green] first.") + sys.exit(1) + + client = KagentiClient(configuration.kagenti_url, auth_token.access_token) + + # Build env vars: start with defaults, then merge user-provided ones + env_vars: dict[str, str] = { + "PORT": "8000", + "HOST": "0.0.0.0", + "PLATFORM_URL": "http://agentstack-server-svc.default:8333", + "PLATFORM_AUTH__SKIP_AUDIENCE_VALIDATION": "true", + "OTEL_EXPORTER_OTLP_ENDPOINT": "http://otel-collector.localtest.me:8080", + } + + # Parse --env-file (KEY=VALUE per line, ignoring comments and blank lines) + if env_file: + try: + with open(env_file) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + key, _, value = line.partition("=") + if key: + env_vars[key.strip()] = value.strip() + except FileNotFoundError: + console.error(f"Env file not found: {env_file}") + sys.exit(1) + + # Parse --env KEY=VALUE flags (override env-file and defaults) + if env: + for entry in env: + key, _, value = entry.partition("=") + if not key or not _: + console.error(f"Invalid env format '{entry}', expected KEY=VALUE") + sys.exit(1) + env_vars[key] = value + + # Create agent in kagenti + request = { + "name": name, + "namespace": namespace, + "deploymentMethod": "image", + "containerImage": image, + "workloadType": "deployment", + "servicePorts": [{"port": port, "protocol": "TCP"}], + "envVars": [{"name": k, "value": v} for k, v in env_vars.items()], + } + + try: + with status(f"Deploying agent '{name}' via kagenti"): + await client.create_agent(request) + except httpx.ConnectError: + console.error(f"Cannot connect to kagenti at [cyan]{configuration.kagenti_url}[/cyan]") + sys.exit(1) + except httpx.HTTPStatusError as e: + detail = "" + with contextlib.suppress(Exception): + detail = e.response.json().get("detail", "") + console.error(f"Failed to create agent: {e.response.status_code} {detail or e.response.text}") + sys.exit(1) + + console.success(f"Agent [bold]{name}[/bold] submitted to kagenti") + + # Poll kagenti for agent health (60s timeout) + console.info("Waiting for agent to become healthy in kagenti...") + healthy = False + for _ in range(60): + await asyncio.sleep(1) + try: + agent = await client.get_agent(namespace, name) + ready_status = agent.get("readyStatus", "") + if ready_status.lower() in ("running", "ready", "healthy"): + healthy = True + break + except Exception: + pass + + if not healthy: + console.warning( + f"Agent [bold]{name}[/bold] did not become healthy within 60s. " + f"Check kagenti UI or [green]kubectl get pods -n {namespace}[/green] for details." + ) else: - location = repo_input + console.success(f"Agent [bold]{name}[/bold] is healthy in kagenti") - url = announce_server_action(f"Installing agent '{location}' for") - await confirm_server_action("Proceed with installing this agent on", url=url, yes=yes) - with verbosity(verbose): - if is_github_url(location): - console.info(f"Assuming GitHub repository, attempting to build agent from [bold]{location}[/bold]") - with status("Building agent"): - build = await _server_side_build(location, dockerfile, add=True, verbose=verbose) - if build.status != BuildState.COMPLETED: - error = build.error_message or "see logs above for details" - raise RuntimeError(f"Agent build failed: {error}") - else: - if dockerfile: - raise ValueError("Dockerfile can be specified only if location is a GitHub url") - console.info(f"Assuming public docker image or network address, attempting to add {location}") - try: - with status("Registering agent to platform"): - async with configuration.use_platform_client(): - await Provider.create(location=location) - except httpx.HTTPStatusError as e: - if e.response.status_code == 422: - agent_card = await _discover_agent_card(location) - with status("Registering agent with discovered card"): - async with configuration.use_platform_client(): - await Provider.create(location=location, agent_card=agent_card) - else: - raise - console.success(f"Agent [bold]{location}[/bold] added to platform") - await list_agents() + # Wait for agent to appear in agentstack (30s timeout) + console.info("Waiting for agent to appear in agentstack...") + appeared = False + for _ in range(30): + await asyncio.sleep(1) + try: + async with configuration.use_platform_client(): + providers = await Provider.list() + if any(name in (p.agent_card.name or "") or name in (p.origin or "") for p in providers): + appeared = True + break + except Exception: + pass + + if not appeared: + console.warning( + f"Agent [bold]{name}[/bold] has not appeared in agentstack within 30s. " + "It may take longer for kagenti to sync the agent." + ) @app.command("update") @@ -333,89 +404,53 @@ async def update_agent( str | None, typer.Argument(help="Short ID, agent name or part of the provider location of agent to replace") ] = None, location: typing.Annotated[ - str | None, typer.Argument(help="Agent location (public docker image or github url)") + str | None, typer.Argument(help="New agent location (network URL)") ] = None, - dockerfile: typing.Annotated[str | None, typer.Option(help="Use custom dockerfile path")] = None, - verbose: typing.Annotated[bool, typer.Option("-v", "--verbose", help="Show verbose output")] = False, yes: typing.Annotated[bool, typer.Option("--yes", "-y", help="Skip confirmation prompts.")] = False, ) -> None: - """Upgrade agent to a newer docker image or build from GitHub repository. [Admin only]""" - with verbosity(verbose): - async with configuration.use_platform_client(): - providers = await Provider.list() - - if search_path is None: - if not providers: - console.error("No agents found. Add an agent first using 'agentstack agent add'.") - sys.exit(1) - - provider_choices = [ - Choice(value=p, name=f"{p.agent_card.name} ({ProviderUtils.short_location(p)})") for p in providers - ] - provider = await inquirer.fuzzy( - message="Select an agent to update:", - choices=provider_choices, - ).execute_async() - if not provider: - console.error("No agent selected. Exiting.") - sys.exit(1) - else: - provider = select_provider(search_path, providers=providers) + """Update an agent's location. [Admin only]""" + async with configuration.use_platform_client(): + providers = await Provider.list() - if location is None and is_github_url(provider.origin): - match = re.match(github_url_verbose_pattern, provider.origin, re.VERBOSE) + if search_path is None: + if not providers: + console.error("No agents found. Add an agent first using 'agentstack agent add'.") + sys.exit(1) - if match: - host, owner, repo = ( - match.group("host"), - match.group("owner"), - match.group("repo").removesuffix(".git"), - ) + provider_choices = [ + Choice(value=p, name=f"{p.agent_card.name} ({ProviderUtils.short_location(p)})") for p in providers + ] + provider = await inquirer.fuzzy( + message="Select an agent to update:", + choices=provider_choices, + ).execute_async() + if not provider: + console.error("No agent selected. Exiting.") + sys.exit(1) + else: + provider = select_provider(search_path, providers=providers) - tags = await get_github_repo_tags(host, owner, repo) - - if tags: - selected_tag = await inquirer.fuzzy( - message="Select a new tag to use:", - choices=tags, - ).execute_async() - if selected_tag: - location = f"https://github.com/{owner}/{repo}@{selected_tag}" - - if location is None: - location = ( - await inquirer.text( - message="Enter new agent location (public docker image or github url):", - default=provider.origin.lstrip("git+"), - ).execute_async() - or "" - ) + if location is None: + location = ( + await inquirer.text( + message="Enter new agent location (URL):", + default=provider.origin, + ).execute_async() + or "" + ) - if not location: - console.error("No location provided. Exiting.") - sys.exit(1) + if not location: + console.error("No location provided. Exiting.") + sys.exit(1) - url = announce_server_action(f"Upgrading agent from '{provider.origin}' to {location}") - await confirm_server_action("Proceed with upgrading agent on", url=url, yes=yes) + url = announce_server_action(f"Upgrading agent from '{provider.origin}' to {location}") + await confirm_server_action("Proceed with upgrading agent on", url=url, yes=yes) - if is_github_url(location): - console.info(f"Assuming GitHub repository, attempting to build agent from [bold]{location}[/bold]") - with status("Building agent"): - build = await _server_side_build( - github_url=location, dockerfile=dockerfile, replace=provider.id, verbose=verbose - ) - if build.status != BuildState.COMPLETED: - error = build.error_message or "see logs above for details" - raise RuntimeError(f"Agent build failed: {error}") - else: - if dockerfile: - raise ValueError("Dockerfile can be specified only if location is a GitHub url") - console.info(f"Assuming public docker image or network address, attempting to add {location}") - with status("Upgrading agent in the platform"): - async with configuration.use_platform_client(): - await provider.patch(location=location) - console.success(f"Agent [bold]{location}[/bold] added to platform") - await list_agents() + with status("Upgrading agent in the platform"): + async with configuration.use_platform_client(): + await provider.patch(location=location) + console.success(f"Agent [bold]{location}[/bold] updated on platform") + await list_agents() def search_path_match_providers(search_path: str, providers: list[Provider]) -> dict[str, Provider]: @@ -455,7 +490,10 @@ async def select_providers_multi(search_path: str, providers: list[Provider]) -> choices = [Choice(value=p.id, name=f"{p.agent_card.name} - {p.id}") for p in provider_candidates.values()] selected_ids = await inquirer.checkbox( - message="Select agents to remove (use ↑/↓ to navigate, Space to select):", choices=choices + message="Select agents to remove (use ↑/↓ to navigate, Space to select):", + choices=choices, + validate=lambda result: len(result) > 0, + invalid_message="Please select at least one agent using Space before pressing Enter.", ).execute_async() return [provider_candidates[pid] for pid in (selected_ids or [])] @@ -511,21 +549,41 @@ async def uninstall_agent( err_console.print(f"Failed to delete {provider.agent_card.name}: {result}") # else: deletion succeeded - await list_agents() + # Also delete kagenti-sourced agents from kagenti + kagenti_providers = [p for p in selected_providers if p.source_type == "kagenti" and p.origin] + if kagenti_providers: + import contextlib + from urllib.parse import urlparse + from agentstack_cli.kagenti_client import KagentiClient -@app.command("logs") -async def stream_logs( - search_path: typing.Annotated[ - str, typer.Argument(..., help="Short ID, agent name or part of the provider location") - ], -): - """Stream agent provider logs. [Admin only]""" - async with configuration.use_platform_client(): - provider = select_provider(search_path, await Provider.list()) - announce_server_action(f"Streaming logs for '{provider.agent_card.name}' from") - async for message in Provider.stream_logs(provider.id): - print_log(message, ansi_mode=True) + auth_token = None + try: + auth_token = await configuration.auth_manager.load_auth_token() + except Exception: + if configuration.auth_manager.active_server and "agentstack-api.localtest.me" in configuration.auth_manager.active_server: + with contextlib.suppress(Exception): + auth_token = await configuration.auth_manager.login_with_password( + configuration.auth_manager.active_server, username="admin", password="admin" + ) + + if auth_token: + client = KagentiClient(configuration.kagenti_url, auth_token.access_token) + for provider in kagenti_providers: + # Parse name and namespace from origin URL: http://{name}.{namespace}.svc.cluster.local:8080 + parsed = urlparse(provider.origin) + host_parts = (parsed.hostname or "").split(".") + if len(host_parts) >= 2: + agent_name, namespace = host_parts[0], host_parts[1] + try: + await client.delete_agent(namespace, agent_name) + console.success(f"Deleted [bold]{agent_name}[/bold] from kagenti") + except Exception as ex: + err_console.print(f"Failed to delete {agent_name} from kagenti: {ex}") + else: + err_console.print("Could not authenticate to kagenti — agents may reappear via sync.") + + await list_agents() async def _ask_form_questions(form_render: FormRender) -> FormResponse: @@ -1311,12 +1369,7 @@ async def list_agents(): max_provider_len = max(len(ProviderUtils.short_location(p)) for p in providers) if providers else 0 def _sort_fn(provider: Provider): - state = {"missing": "1"} - return ( - str(state.get(provider.state, 0)) + f"_{provider.agent_card.name}" - if provider.registry - else provider.agent_card.name - ) + return provider.agent_card.name with create_table( Column("Short ID", style="yellow"), @@ -1343,9 +1396,7 @@ def _sort_fn(provider: Provider): ( f"Error: {error}" if provider.state == "error" and (error := ProviderUtils.last_error(provider)) - else f"Missing ENV: {{{', '.join(missing_env)}}}" - if (missing_env := [var.name for var in provider.missing_configuration]) - else "" + else "" ), ) console.print(table) @@ -1416,73 +1467,12 @@ async def agent_detail( console.print(table) with create_table(Column("Key", ratio=1), Column("Value", ratio=5), title="Provider") as table: - for key, value in provider.model_dump(exclude={"image_id", "manifest", "source", "registry"}).items(): + for key, value in provider.model_dump(exclude={"source"}).items(): table.add_row(key, str(value)) console.print() console.print(table) -env_app = AsyncTyper() -app.add_typer(env_app, name="env") - - -async def _list_env(provider: Provider): - async with configuration.use_platform_client(): - variables = await provider.list_variables() - with create_table(Column("name", style="yellow"), Column("value", ratio=1)) as table: - for name, value in sorted(variables.items()): - table.add_row(name, value) - console.print(table) - - -@env_app.command("add") -async def add_env( - search_path: typing.Annotated[ - str, typer.Argument(..., help="Short ID, agent name or part of the provider location") - ], - env: typing.Annotated[list[str], typer.Argument(help="Environment variables to pass to agent")], - yes: typing.Annotated[bool, typer.Option("--yes", "-y", help="Skip confirmation prompts.")] = False, -) -> None: - """Store environment variables. [Admin only]""" - url = announce_server_action(f"Adding environment variables for '{search_path}' on") - await confirm_server_action("Apply these environment variable changes on", url=url, yes=yes) - env_vars = dict(parse_env_var(var) for var in env) - async with configuration.use_platform_client(): - provider = select_provider(search_path, await Provider.list()) - await provider.update_variables(variables=env_vars) - await _list_env(provider) - - -@env_app.command("list") -async def list_env( - search_path: typing.Annotated[ - str, typer.Argument(..., help="Short ID, agent name or part of the provider location") - ], -): - """List stored environment variables. [Admin only]""" - announce_server_action(f"Listing environment variables for '{search_path}' on") - async with configuration.use_platform_client(): - provider = select_provider(search_path, await Provider.list()) - await _list_env(provider) - - -@env_app.command("remove") -async def remove_env( - search_path: typing.Annotated[ - str, typer.Argument(..., help="Short ID, agent name or part of the provider location") - ], - env: typing.Annotated[list[str], typer.Argument(help="Environment variable(s) to remove")], - yes: typing.Annotated[bool, typer.Option("--yes", "-y", help="Skip confirmation prompts.")] = False, -): - """Remove environment variable(s). [Admin only]""" - url = announce_server_action(f"Removing environment variables from '{search_path}' on") - await confirm_server_action("Remove the selected environment variables on", url=url, yes=yes) - async with configuration.use_platform_client(): - provider = select_provider(search_path, await Provider.list()) - await provider.update_variables(variables=dict.fromkeys(env)) - await _list_env(provider) - - feedback_app = AsyncTyper() app.add_typer(feedback_app, name="feedback", help="Manage user feedback for your agents", no_args_is_help=True) diff --git a/apps/agentstack-cli/src/agentstack_cli/commands/build.py b/apps/agentstack-cli/src/agentstack_cli/commands/build.py index 6bf66a80aa..da44eb66d0 100644 --- a/apps/agentstack-cli/src/agentstack_cli/commands/build.py +++ b/apps/agentstack-cli/src/agentstack_cli/commands/build.py @@ -3,233 +3,89 @@ from __future__ import annotations -import base64 import hashlib -import json import re import typing -import uuid -from asyncio import CancelledError -from contextlib import suppress -from datetime import timedelta -from pathlib import Path import anyio -import anyio.abc import typer -from a2a.utils import AGENT_CARD_WELL_KNOWN_PATH -from agentstack_sdk.platform import AddProvider, BuildConfiguration, Provider, UpdateProvider -from agentstack_sdk.platform.provider_build import ProviderBuild -from anyio import open_process -from httpx import AsyncClient, HTTPError -from tenacity import AsyncRetrying, retry_if_exception_type, stop_after_delay, wait_fixed from agentstack_cli.async_typer import AsyncTyper -from agentstack_cli.console import console, err_console -from agentstack_cli.server_utils import announce_server_action, confirm_server_action -from agentstack_cli.utils import ( - capture_output, - extract_messages, - print_log, - run_command, - status, - verbosity, -) - - -async def find_free_port(): - """Get a random free port assigned by the OS.""" - listener = await anyio.create_tcp_listener() - port = listener.extra(anyio.abc.SocketAttribute.local_address)[1] - await listener.aclose() - return port - +from agentstack_cli.console import console +from agentstack_cli.utils import run_command, verbosity app = AsyncTyper() +# The in-cluster registry DNS name (used in pod image references) +REGISTRY_INTERNAL = "registry.cr-system.svc.cluster.local:5000" +# NodePort exposed by the post-renderer (matches registries.conf mirror) +REGISTRY_NODEPORT = 30500 -@app.command("client-side-build") -async def client_side_build( - context: typing.Annotated[str, typer.Argument(help="Docker context for the agent")] = ".", - dockerfile: typing.Annotated[str | None, typer.Option(help="Use custom dockerfile path")] = None, - tag: typing.Annotated[str | None, typer.Option(help="Docker tag for the agent")] = None, - multi_platform: bool | None = False, - push: typing.Annotated[bool, typer.Option(help="Push the image to the target registry.")] = False, - import_image: typing.Annotated[ - bool, typer.Option("--import/--no-import", is_flag=True, help="Import the image into Agent Stack platform") - ] = True, - extract_agent_card: typing.Annotated[ - bool, - typer.Option( - "--extract-agent-card/--no-extract-agent-card", - is_flag=True, - help="Extract agent card from running container", - ), - ] = True, + +@app.command("build") +async def build_agent( + context: typing.Annotated[str, typer.Argument(help="Docker build context (path or URL)")] = ".", + dockerfile: typing.Annotated[str | None, typer.Option("-f", "--dockerfile", help="Dockerfile path")] = None, + tag: typing.Annotated[str | None, typer.Option("-t", "--tag", help="Image tag (default: auto-generated)")] = None, vm_name: typing.Annotated[str, typer.Option(hidden=True)] = "agentstack", verbose: typing.Annotated[bool, typer.Option("-v", "--verbose", help="Show verbose output")] = False, -): - """Build agent locally using Docker. [Local only]""" +) -> None: + """Build an agent image locally and push it to the platform registry. [Local only]""" with verbosity(verbose): - image_id = "agentstack-agent-build-tmp:latest" - port = await find_free_port() dockerfile_args = ("-f", dockerfile) if dockerfile else () + # Derive a short image name if tag not provided + if not tag: + context_hash = hashlib.sha256((context + (dockerfile or "")).encode()).hexdigest()[:6] + context_shorter = re.sub(r"https?://", "", context).replace(r".git", "") + context_shorter = re.sub(r"[^a-zA-Z0-9_-]+", "-", context_shorter)[:32].lstrip("-") or "agent" + image_name = f"{context_shorter}-{context_hash}:latest" + else: + # Strip any registry prefix — we always push to the platform registry + image_name = tag.split("/")[-1] + if ":" not in image_name: + image_name += ":latest" + image_name = image_name.lower() + + # Full image ref for cluster-internal use (pod specs) + cluster_ref = f"{REGISTRY_INTERNAL}/{image_name}" + # Local docker build tag + build_tag = f"localhost/agentstack/{image_name}" + + # Build the image await run_command( - ["docker", "build", context, *dockerfile_args, "-t", image_id], + ["docker", "build", context, *dockerfile_args, "-t", build_tag, "--load"], "Building agent image", ) + console.success(f"Built image: [bold]{build_tag}[/bold]") - agent_card = None - - if extract_agent_card: - container_id = str(uuid.uuid4()) + # Push to the in-cluster registry via NodePort (localhost:30500) + from agentstack_cli.commands.platform import detect_export_import_paths, detect_vm_status, run_in_vm - try: - with status("Extracting agent metadata"): - async with ( - await open_process( - [ - "docker", - "run", - "--name", - container_id, - "-p", - f"{port}:8000", - "-e", - "HOST=0.0.0.0", - "-e", - "PORT=8000", - image_id, - ], - ) as process, - ): - async with capture_output(process) as task_group: - try: - async for attempt in AsyncRetrying( - stop=stop_after_delay(timedelta(seconds=30)), - wait=wait_fixed(timedelta(seconds=0.5)), - retry=retry_if_exception_type(HTTPError), - reraise=True, - ): - with attempt: - async with AsyncClient() as client: - resp = await client.get( - f"http://localhost:{port}{AGENT_CARD_WELL_KNOWN_PATH}", timeout=1 - ) - resp.raise_for_status() - agent_card = resp.json() - except BaseException as ex: - raise RuntimeError(f"Failed to build agent: {extract_messages(ex)}") from ex - finally: - task_group.cancel_scope.cancel() - # Kill the docker run process so open_process.__aexit__ can proceed - process.kill() - finally: - with suppress(BaseException): - await run_command(["docker", "rm", "-f", container_id], "Removing container", check=False) + if (await detect_vm_status(vm_name)) != "running": + console.error("Agent Stack platform is not running.") + raise typer.Exit(1) - context_hash = hashlib.sha256((context + (dockerfile or "")).encode()).hexdigest()[:6] - context_shorter = re.sub(r"https?://", "", context).replace(r".git", "") - context_shorter = re.sub(r"[^a-zA-Z0-9_-]+", "-", context_shorter)[:32].lstrip("-") or "provider" - tag = (tag or f"agentstack-registry-svc.default:5001/{context_shorter}-{context_hash}:latest").lower() - await run_command( - command=[ - *( - ["docker", "buildx", "build", "--platform=linux/amd64,linux/arm64"] - if multi_platform - else ["docker", "build"] - ), - "--push" if push else "--load", - context, - *dockerfile_args, - "-t", - tag, - f"--label=beeai.dev.agent.json={base64.b64encode(json.dumps(agent_card).encode()).decode()}", - ], - message="Adding agent labels to container", - check=True, - ) - console.success(f"Successfully built agent: {tag}") - if import_image: - from agentstack_cli.commands.platform import ImageImportMode, import_cmd - - if "agentstack-registry-svc.default" not in tag: - source_tag = tag - tag = re.sub("^[^/]*/", "agentstack-registry-svc.default:5001/", tag) - await run_command(["docker", "tag", source_tag, tag], "Tagging image") - - await import_cmd(tag, vm_name, mode=ImageImportMode.registry) - console.success( - "Agent was imported to the agent stack internal registry.\n" - + f"You can add it using [blue]agentstack add {tag}[/blue]" + host_path, guest_path = detect_export_import_paths() + try: + await run_command( + ["docker", "image", "save", "-o", host_path, build_tag], + f"Exporting image {build_tag} from Docker", ) - - return tag, agent_card - - -async def _server_side_build( - github_url: str, - dockerfile: str | None = None, - replace: str | None = None, - add: bool = False, - verbose: bool = False, -) -> ProviderBuild: - build = None - from agentstack_cli.commands.agent import select_provider - from agentstack_cli.configuration import Configuration - - try: - if replace and add: - raise ValueError("Cannot specify both replace and add options.") - - build_configuration = None - if dockerfile: - build_configuration = BuildConfiguration(dockerfile_path=Path(dockerfile)) - - async with Configuration().use_platform_client(): - on_complete = None - if replace: - provider = select_provider(replace, await Provider.list()) - on_complete = UpdateProvider(provider_id=uuid.UUID(provider.id)) - elif add: - on_complete = AddProvider() - - build = await ProviderBuild.create( - location=github_url, - on_complete=on_complete, - build_configuration=build_configuration, + await run_in_vm( + vm_name, + [ + "skopeo", "copy", + f"docker-archive:{guest_path}", + f"docker://localhost:{REGISTRY_NODEPORT}/{image_name}", + "--dest-tls-verify=false", + ], + "Pushing image to platform registry", ) - with verbosity(verbose): - async for message in build.stream_logs(): - print_log(message, ansi_mode=True, out_console=err_console) - return await build.get() - except KeyboardInterrupt, CancelledError: - async with Configuration().use_platform_client(): - if build: - await build.delete() - console.error("Build aborted.") - raise - + finally: + await anyio.Path(host_path).unlink(missing_ok=True) -@app.command("build") -async def server_side_build( - github_url: typing.Annotated[ - str, typer.Argument(..., help="Github repository URL (public or private if supported by the platform instance)") - ], - dockerfile: typing.Annotated[ - str | None, typer.Option(help="Use custom dockerfile path, relative to github url sub-path") - ] = None, - verbose: typing.Annotated[bool, typer.Option("-v", "--verbose", help="Show verbose output")] = False, - yes: typing.Annotated[bool, typer.Option("--yes", "-y", help="Skip confirmation prompts.")] = False, -): - """Build agent from a GitHub repository in the platform. [Admin only]""" - - url = announce_server_action(f"Starting build for '{github_url}' on") - await confirm_server_action("Proceed with building this agent on", url=url, yes=yes) - - build = await _server_side_build(github_url=github_url, dockerfile=dockerfile, verbose=verbose) - - console.success( - f"Agent built successfully, add it to the platform using: [green]agentstack add {build.destination}[/green]" - ) + console.success( + f"Image pushed to platform registry.\n" + f"Add it using: [green]agentstack add {cluster_ref}[/green]" + ) diff --git a/apps/agentstack-cli/src/agentstack_cli/commands/platform.py b/apps/agentstack-cli/src/agentstack_cli/commands/platform.py index 3c49bc98d3..be62df43b0 100644 --- a/apps/agentstack-cli/src/agentstack_cli/commands/platform.py +++ b/apps/agentstack-cli/src/agentstack_cli/commands/platform.py @@ -218,10 +218,31 @@ class ImagePullMode(StrEnum): skip = "skip" +CHART_PREFIXES = ("kagenti-deps:", "kagenti:", "agentstack:") + + +def parse_scoped_set_values(set_values_list: list[str]) -> dict[str, list[str]]: + """Split --set values by chart prefix. Unprefixed defaults to 'agentstack'.""" + result: dict[str, list[str]] = {"agentstack": [], "kagenti": [], "kagenti-deps": []} + for value in set_values_list: + for prefix in CHART_PREFIXES: + if value.startswith(prefix): + result[prefix.rstrip(":")].append(value[len(prefix) :]) + break + else: + result["agentstack"].append(value) + return result + + @app.command("start", help="Start Agent Stack platform. [Local only]") async def start_cmd( set_values_list: typing.Annotated[ - list[str], typer.Option("--set", help="Set Helm chart values using = syntax", default_factory=list) + list[str], + typer.Option( + "--set", + help="Set Helm chart values. Prefix with chart name: --set kagenti:key=val, --set kagenti-deps:key=val. Unprefixed applies to agentstack.", + default_factory=list, + ), ], image_pull_mode: typing.Annotated[ ImagePullMode, @@ -238,7 +259,11 @@ async def start_cmd( ), ] = ImagePullMode.guest, values_file: typing.Annotated[ - pathlib.Path | None, typer.Option("-f", help="Set Helm chart values using yaml values file") + pathlib.Path | None, + typer.Option( + "-f", + help="YAML values file with chart-scoped sections: agentstack:, kagenti:, kagenti-deps:", + ), ] = None, lima_image: typing.Annotated[ str | None, typer.Option("--lima-image", help="Local path or URL to Lima image (.qcow2)") @@ -256,6 +281,13 @@ async def start_cmd( if values_file and not await anyio.Path(values_file).is_file(): raise FileNotFoundError(f"Values file {values_file} not found.") + # Parse chart-scoped values from -f file and --set flags + user_values = yaml.safe_load(pathlib.Path(values_file).read_text()) if values_file else {} # noqa: ASYNC240 + user_agentstack_values = user_values.get("agentstack", {}) if isinstance(user_values, dict) else {} + user_kagenti_values = user_values.get("kagenti", {}) if isinstance(user_values, dict) else {} + user_kagenti_deps_values = user_values.get("kagenti-deps", {}) if isinstance(user_values, dict) else {} + scoped_sets = parse_scoped_set_values(set_values_list) + with verbosity(verbose): version = importlib.metadata.version("agentstack-cli").replace("rc", "-rc") arch = "x86_64" if platform_module.machine().lower() in ["x86_64", "amd64"] else "aarch64" @@ -428,34 +460,113 @@ async def start_cmd( "Setting up internal networking", ) - platform = typing.cast( - typing.Literal["k3s", "microshift"], - ( + detected_platform: typing.Literal["k3s", "microshift"] | None = None + try: + detected_platform = typing.cast( + typing.Literal["k3s", "microshift"], ( - await run_in_vm( - vm_name, - ["bash", "-c", "command -v k3s || command -v microshift"], - "Detecting Kubernetes platform", + ( + await run_in_vm( + vm_name, + ["bash", "-c", "command -v k3s || command -v microshift"], + "Detecting Kubernetes platform", + ) ) - ) - .stdout.decode() - .strip() - .splitlines()[0] - .split("/")[-1] - ), - ) - - if platform == "k3s": - await run_in_vm( - vm_name, - [ - "bash", - "-c", - "ln -sf /etc/rancher/k3s/k3s.yaml /kubeconfig && chmod 644 /etc/rancher/k3s/k3s.yaml /kubeconfig", - ], - "Setting up kubeconfig symlink", + .stdout.decode() + .strip() + .splitlines()[0] + .split("/")[-1] + ), ) + except Exception: + pass + match detected_platform: + case None: + await run_in_vm( + vm_name, + [ + "bash", + "-c", + textwrap.dedent("""\ + sysctl -w net.ipv4.ip_forward=1 + mkdir -p /tmp/microshift-install + curl -fsSL "https://github.com/microshift-io/microshift/releases/download/4.21.0_g29f429c21_4.21.0_okd_scos.ec.15/microshift-debs-$(uname -m).tgz" | tar -xz -C /tmp/microshift-install & + eatmydata apt-get update -y -q + eatmydata apt-get install -y -q --no-install-recommends skopeo cri-o cri-tools containernetworking-plugins kubectl + mkdir -p -m 777 /postgresql-data /seaweedfs-data /registry-data /redis-data + systemctl enable --now crio + wait + eatmydata dpkg -i /tmp/microshift-install/microshift_*.deb /tmp/microshift-install/microshift-kindnet_*.deb + rm -rf /tmp/microshift-install + systemctl enable --now microshift + """), + ], + "Installing MicroShift", + ) + case "k3s": + await run_in_vm( + vm_name, + [ + "bash", + "-c", + "apt-get install -y -q skopeo; systemctl is-active --quiet k3s || systemctl enable --now k3s", + ], + "Refreshing existing k3s VM", + ) + case "microshift": + await run_in_vm( + vm_name, + [ + "bash", + "-c", + "systemctl is-active --quiet crio && systemctl is-active --quiet microshift || systemctl enable --now crio && systemctl enable --now microshift", + ], + "Refreshing existing MicroShift VM", + ) + + platform: typing.Literal["k3s", "microshift"] = detected_platform or "microshift" + await run_in_vm( + vm_name, + [ + "bash", + "-c", + f"ln -sf {'/etc/rancher/k3s/k3s.yaml' if platform == 'k3s' else '/var/lib/microshift/resources/kubeadmin/kubeconfig'} /kubeconfig && chmod 644 /kubeconfig", + ], + "Setting up kubeconfig symlink", + ) + kubeconfig_local = anyio.Path(Configuration().lima_home) / vm_name / "copied-from-guest" / "kubeconfig.yaml" + await kubeconfig_local.parent.mkdir(parents=True, exist_ok=True) + await kubeconfig_local.write_text( + ( + await run_in_vm( + vm_name, + [ + "timeout", + "5m", + "bash", + "-c", + 'until grep -q "current-context:" /kubeconfig 2>/dev/null; do sleep 5; done && cat /kubeconfig', + ], + "Copying kubeconfig from Agent Stack platform", + ) + ).stdout.decode() + ) + await run_in_vm( + vm_name, + [ + "bash", + "-c", + textwrap.dedent("""\ + command -v helm && exit 0 + case $(uname -m) in x86_64) ARCH="amd64" ;; aarch64) ARCH="arm64" ;; esac + curl -fsSL "https://get.helm.sh/helm-v4.1.1-linux-${ARCH}.tar.gz" | tar -xzf - --strip-components=1 -C /usr/local/bin "linux-${ARCH}/helm" + chmod +x /usr/local/bin/helm + """), + ], + "Installing Helm", + ) + # --- Prepare agentstack chart and import images before any deployments --- await run_in_vm( vm_name, ["bash", "-c", "cat >/tmp/agentstack-chart.tgz"], @@ -469,29 +580,142 @@ async def start_cmd( input=yaml.dump( merge( { - "externalRegistries": {"public_github": str(Configuration().agent_registry)}, "encryptionKey": "Ovx8qImylfooq4-HNwOzKKDcXLZCB3c_m0JlB9eJBxc=", "trustProxyHeaders": True, "localStorage": platform == "microshift", # k3s uses local path provisioner instead "keycloak": { + "enabled": False, + "auth": { + "adminUser": "admin", + "adminPassword": "admin", + "seedAgentstackUsers": [ + { + "username": "admin", + "password": "admin", + "firstName": "Admin", + "lastName": "User", + "email": "admin@beeai.dev", + "roles": ["agentstack-admin", "kagenti-admin"], + "enabled": True, + } + ], + }, + }, + "externalOidcProvider": { + "issuerUrl": "http://keycloak-service.keycloak:8080/realms/agentstack", + "publicIssuerUrl": "http://keycloak.localtest.me:8080/realms/agentstack", + "name": "Keycloak", + "id": "keycloak", + "rolesPath": "realm_access.roles", + "uiClientId": "agentstack-ui", "uiClientSecret": "agentstack-ui-secret", + "serverClientId": "agentstack-server", "serverClientSecret": "agentstack-server-secret", - "auth": {"adminPassword": "admin"}, + }, + "auth": { + "enabled": True, + "provisionKeycloak": True, + "validateAudience": False, + "nextauthUrl": "http://agentstack.localtest.me:8080", + "apiUrl": "http://agentstack-api.localtest.me:8080", }, "features": {"uiLocalSetup": True}, "providerBuilds": {"enabled": True}, - "localDockerRegistry": {"enabled": True}, - "auth": {"enabled": False}, + "disableProviderDownscaling": True, "cors": { "enabled": True, - "allowOriginRegex": r"https?://(localhost|127\.0\.0\.1)(:\d+)?", + "allowOriginRegex": r"https?://(localhost|127\.0\.0\.1|[a-z0-9.-]*\.?localtest\.me)(:\d+)?", "allowCredentials": True, }, }, - yaml.safe_load(await anyio.Path(values_file).read_text()) if values_file else {}, + user_agentstack_values, ) ).encode("utf-8"), ) + # --- Prepare kagenti chart values and version before image listing --- + kagenti_chart_version = "0.5.1" + kagenti_deps_values = yaml.dump( + merge( + { + "openshift": False, + "components": { + "keycloak": {"enabled": True}, + "istio": {"enabled": False}, + "kiali": {"enabled": False}, + "mcpInspector": {"enabled": False}, + "otel": {"enabled": False}, + "mlflow": {"enabled": False}, + "containerRegistry": {"enabled": True}, + "spire": {"enabled": False}, + "tekton": {"enabled": False}, + "shipwright": {"enabled": False}, + "certManager": {"enabled": False}, + "gatewayApi": {"enabled": False}, + "metricsServer": {"enabled": False}, + "ingressGateway": {"enabled": True}, + }, + "keycloak": { + "namespace": "keycloak", + "auth": {"adminUser": "admin", "adminPassword": "admin"}, + "url": "http://keycloak-service.keycloak:8080", + "publicUrl": "http://keycloak.localtest.me:8080", + }, + }, + user_kagenti_deps_values, + ) + ) + kagenti_values = yaml.dump( + merge( + { + "openshift": False, + "components": { + "agentOperator": {"enabled": False}, + "platformWebhook": {"enabled": False}, + "agentNamespaces": {"enabled": True}, + "ui": {"enabled": True}, + "mcpGateway": {"enabled": False}, + "istio": {"enabled": False}, + }, + "agentNamespaces": ["team1"], + "keycloak": { + "enabled": True, + "namespace": "keycloak", + "url": "http://keycloak-service.keycloak:8080", + "publicUrl": "http://keycloak.localtest.me:8080", + "realm": "agentstack", + "autoBootstrapRealm": False, + "adminSecretName": "keycloak-initial-admin", + "adminUsernameKey": "username", + "adminPasswordKey": "password", + }, + "ui": { + "auth": {"enabled": True}, + "namespace": "kagenti-system", + "domainName": "localtest.me", + "hostname": "kagenti-ui.localtest.me", + "url": "http://kagenti-ui.localtest.me:8080", + "api": {"hostname": "kagenti-api.localtest.me"}, + }, + "apiOAuthSecret": {"enabled": True}, + "spire": {"enabled": False}, + }, + user_kagenti_values, + ) + ) + await run_in_vm( + vm_name, + ["bash", "-c", "cat >/tmp/kagenti-deps-values.yaml"], + "Preparing kagenti-deps values", + input=kagenti_deps_values.encode("utf-8"), + ) + await run_in_vm( + vm_name, + ["bash", "-c", "cat >/tmp/kagenti-values.yaml"], + "Preparing kagenti values", + input=kagenti_values.encode("utf-8"), + ) + # List images from all charts (agentstack + kagenti + kagenti-deps) + image_sed = r"sed -n '/^\s*image:/{ /{{/!{ s/.*image:\s*//p } }'" loaded_images = { canonify_image_tag(typing.cast(str, yaml.safe_load(line))) for line in ( @@ -500,9 +724,16 @@ async def start_cmd( [ "/bin/bash", "-c", - "helm template agentstack /tmp/agentstack-chart.tgz --values=/tmp/agentstack-values.yaml " - + " ".join(shlex.quote(f"--set={value}") for value in set_values_list) - + " | sed -n '/^\\s*image:/{ /{{/!{ s/.*image:\\s*//p } }'", + "{ " + + "helm template agentstack /tmp/agentstack-chart.tgz --values=/tmp/agentstack-values.yaml " + + " ".join(shlex.quote(f"--set={v}") for v in scoped_sets["agentstack"]) + + "; " + + f"helm template kagenti-deps oci://ghcr.io/kagenti/kagenti/kagenti-deps --version={kagenti_chart_version} --values=/tmp/kagenti-deps-values.yaml " + + " ".join(shlex.quote(f"--set={v}") for v in scoped_sets["kagenti-deps"]) + + "; " + + f"helm template kagenti oci://ghcr.io/kagenti/kagenti/kagenti --version={kagenti_chart_version} --values=/tmp/kagenti-values.yaml " + + " ".join(shlex.quote(f"--set={v}") for v in scoped_sets["kagenti"]) + + "; } | " + image_sed, ], "Listing necessary images", ) @@ -510,6 +741,27 @@ async def start_cmd( .stdout.decode() .splitlines() } + # The post-renderer strips the x86-only Fedora postgres image and its reference + # won't appear in helm template output, so no extra image additions needed. + # Add the keycloak-themed image: the agentstack chart has keycloak disabled + # (kagenti-deps deploys it), but we patch kagenti's keycloak to use our themed image. + keycloak_image = ( + await run_in_vm( + vm_name, + [ + "bash", "-c", + "helm template agentstack /tmp/agentstack-chart.tgz" + " --values=/tmp/agentstack-values.yaml" + " --set keycloak.enabled=true --set auth.enabled=true" + " --show-only templates/keycloak/statefulset.yaml " + + " ".join(shlex.quote(f"--set={v}") for v in scoped_sets.get("agentstack", [])) + + " | grep -m1 'image:.*keycloak' | sed 's/.*image: *//;s/\"//g' | tr -d ' '", + ], + "Resolving keycloak image from agentstack chart", + ) + ).stdout.decode().strip() + if keycloak_image: + loaded_images.add(keycloak_image) images_to_import_from_host, shas_guest_before = set[str](), {} if image_pull_mode in {ImagePullMode.host, ImagePullMode.hybrid}: if platform == "microshift": @@ -559,30 +811,233 @@ async def start_cmd( else [ "skopeo", "copy", - *(["--src-username", "x-access-token", "--src-password", github_token] if github_token else []), + *(["--src-username", "x-access-token", "--src-password", github_token] if github_token and image.startswith("ghcr.io/") else []), f"docker://{image}", f"containers-storage:{image}", ], f"Pulling image {image}", - env={"GITHUB_TOKEN": github_token} if github_token else None, + env={"GITHUB_TOKEN": github_token} if github_token and image.startswith("ghcr.io/") else None, ) - kubeconfig_local = anyio.Path(Configuration().lima_home) / vm_name / "copied-from-guest" / "kubeconfig.yaml" - await kubeconfig_local.parent.mkdir(parents=True, exist_ok=True) - await kubeconfig_local.write_text( - ( + + # --- Kagenti platform installation --- + await run_in_vm( + vm_name, + [ + "bash", + "-c", + textwrap.dedent("""\ + kubectl --kubeconfig=/kubeconfig apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.4.0/standard-install.yaml + """), + ], + "Installing kagenti prerequisites (Gateway API CRDs)", + ) + # Install Istio as Gateway API controller (same charts/version as kagenti ansible installer) + await run_in_vm( + vm_name, + [ + "bash", + "-c", + textwrap.dedent("""\ + ISTIO_VERSION=1.28.0 + ISTIO_REPO=https://istio-release.storage.googleapis.com/charts/ + helm repo add istio "$ISTIO_REPO" 2>/dev/null || true + helm repo update istio + kubectl --kubeconfig=/kubeconfig create namespace istio-system --dry-run=client -o yaml | kubectl --kubeconfig=/kubeconfig apply -f - + helm upgrade --install istio-base istio/base --version=$ISTIO_VERSION --namespace=istio-system --kubeconfig=/kubeconfig --wait --force-conflicts + helm upgrade --install istiod istio/istiod --version=$ISTIO_VERSION --namespace=istio-system --kubeconfig=/kubeconfig --wait --force-conflicts \ + --set pilot.resources.requests.cpu=50m \ + --set pilot.resources.requests.memory=256Mi + """), + ], + "Installing Istio (Gateway API controller)", + ) + # Create hostPath PVs (MicroShift has no dynamic storage provisioner) + k8s_data = importlib.resources.files("agentstack_cli") / "data" / "k8s" + if platform == "microshift": + keycloak_pv_yaml = (k8s_data / "keycloak-postgres-pv.yaml").read_text() + await run_in_vm( + vm_name, + ["bash", "-c", "mkdir -p /kagenti-keycloak-postgres-data && chmod 777 /kagenti-keycloak-postgres-data && kubectl --kubeconfig=/kubeconfig apply -f -"], + "Creating PV for kagenti Keycloak Postgres", + input=keycloak_pv_yaml.encode("utf-8"), + ) + if any("components.otel.enabled=true" in v.lower() for v in scoped_sets.get("kagenti-deps", [])): + phoenix_pv_yaml = (k8s_data / "phoenix-data-pv.yaml").read_text() await run_in_vm( vm_name, - [ - "timeout", - "5m", - "bash", - "-c", - 'until grep -q "current-context:" /kubeconfig 2>/dev/null; do sleep 5; done && cat /kubeconfig', - ], - "Copying kubeconfig from Agent Stack platform", + ["bash", "-c", "mkdir -p /phoenix-data && chmod 777 /phoenix-data && kubectl --kubeconfig=/kubeconfig apply -f -"], + "Creating PV for Phoenix data", + input=phoenix_pv_yaml.encode("utf-8"), ) - ).stdout.decode() + # Install a Helm 4 post-renderer plugin that strips the postgres-otel + # StatefulSet from kagenti-deps (x86-only Fedora image) and patches Phoenix + # to use SQLite instead of PostgreSQL for local dev. + otel_enabled = any("components.otel.enabled=true" in v.lower() for v in scoped_sets.get("kagenti-deps", [])) + kagenti_deps_post_renderer: list[str] = [] + if otel_enabled: + patch_script = (importlib.resources.files("agentstack_cli") / "data" / "k8s" / "patch_kagenti_otel.py").read_text() + await run_in_vm( + vm_name, + [ + "bash", "-c", + textwrap.dedent("""\ + PLUGIN_DIR=/tmp/helm-plugin-patch-postgres + mkdir -p "$PLUGIN_DIR" + cat > "$PLUGIN_DIR/plugin.yaml" << 'YAML' + apiVersion: v1 + type: postrenderer/v1 + name: patch-postgres + version: 0.1.0 + runtime: subprocess + runtimeConfig: + platformCommand: + - command: ${HELM_PLUGIN_DIR}/run.sh + YAML + cat > "$PLUGIN_DIR/patch.py" + """), + ], + "Preparing post-renderer patch script", + input=patch_script.encode("utf-8"), + ) + # Write run.sh separately to avoid shebang escaping issues with bash -c + await run_in_vm( + vm_name, + [ + "python3", "-c", + "import os; " + "p='/tmp/helm-plugin-patch-postgres/run.sh'; " + f"open(p,'wb').write(b'\\x23\\x21/bin/bash\\nset -e\\nexport AGENTSTACK_KEYCLOAK_IMAGE={shlex.quote(keycloak_image)}\\nexec python3 /tmp/helm-plugin-patch-postgres/patch.py\\n'); " + "os.chmod(p, 0o755)", + ], + "Writing post-renderer entrypoint", + ) + await run_in_vm( + vm_name, + ["helm", "plugin", "install", "/tmp/helm-plugin-patch-postgres"], + "Installing post-renderer plugin", + check=False, # already installed on subsequent runs + ) + kagenti_deps_post_renderer = ["--post-renderer=patch-postgres"] + await run_in_vm( + vm_name, + [ + "helm", + "upgrade", + "--install", + "kagenti-deps", + "oci://ghcr.io/kagenti/kagenti/kagenti-deps", + f"--version={kagenti_chart_version}", + "--namespace=kagenti-system", + "--create-namespace", + "--values=/tmp/kagenti-deps-values.yaml", + "--timeout=10m", + "--kubeconfig=/kubeconfig", + "--force-conflicts", + *kagenti_deps_post_renderer, + *(f"--set={v}" for v in scoped_sets["kagenti-deps"]), + ], + "Installing kagenti dependencies (Keycloak)", ) + # Enable dynamic backchannel hostname so Keycloak returns the internal service URL + # (not KC_HOSTNAME) for backend-to-backend OIDC discovery requests + await run_in_vm( + vm_name, + [ + "bash", + "-c", + textwrap.dedent("""\ + kubectl --kubeconfig=/kubeconfig -n keycloak set env statefulset/keycloak KC_HOSTNAME_BACKCHANNEL_DYNAMIC=true + kubectl --kubeconfig=/kubeconfig -n keycloak rollout status statefulset/keycloak --timeout=300s + """), + ], + "Enabling Keycloak backchannel dynamic hostname", + ) + await run_in_vm( + vm_name, + [ + "helm", + "upgrade", + "--install", + "kagenti", + "oci://ghcr.io/kagenti/kagenti/kagenti", + f"--version={kagenti_chart_version}", + "--namespace=kagenti-system", + "--create-namespace", + "--values=/tmp/kagenti-values.yaml", + "--timeout=10m", + "--kubeconfig=/kubeconfig", + *(f"--set={v}" for v in scoped_sets["kagenti"]), + ], + "Installing kagenti platform (operator + backend)", + ) + # Label namespaces for shared gateway access and create agentstack HTTPRoutes + await run_in_vm( + vm_name, + [ + "bash", + "-c", + textwrap.dedent("""\ + KC=/kubeconfig + # Label namespaces so the Gateway allows HTTPRoutes from them + for ns in default keycloak kagenti-system istio-system; do + kubectl --kubeconfig=$KC label namespace $ns shared-gateway-access=true --overwrite + done + # Create HTTPRoutes for agentstack services through the shared gateway + cat <<'EOF' | kubectl --kubeconfig=$KC apply -f - + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: agentstack-ui + namespace: default + spec: + parentRefs: + - name: http + namespace: kagenti-system + hostnames: + - "agentstack.localtest.me" + rules: + - backendRefs: + - name: agentstack-ui-svc + port: 8334 + --- + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: agentstack-api + namespace: default + spec: + parentRefs: + - name: http + namespace: kagenti-system + hostnames: + - "agentstack-api.localtest.me" + rules: + - backendRefs: + - name: agentstack-server-svc + port: 8333 + --- + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: otel-collector + namespace: kagenti-system + spec: + parentRefs: + - name: http + namespace: kagenti-system + hostnames: + - "otel-collector.localtest.me" + rules: + - backendRefs: + - name: otel-collector + port: 8335 + EOF + """), + ], + "Configuring gateway routes for agentstack services", + ) + + # --- Agentstack helm install --- await run_in_vm( vm_name, [ @@ -607,7 +1062,7 @@ async def start_cmd( "--timeout=20m", "--wait", "--kubeconfig=/kubeconfig", - *(f"--set={value}" for value in set_values_list), + *(f"--set={v}" for v in scoped_sets["agentstack"]), ], "Deploying Agent Stack platform with Helm", ) @@ -668,13 +1123,11 @@ async def start_cmd( input=textwrap.dedent("""\ set -euxo pipefail systemctl daemon-reload - kubectl --kubeconfig=/kubeconfig get svc -n default -o 'jsonpath={range .items[*]}{.metadata.name}{":"}{.spec.ports[*].port}{"\\n"}{end}' | while IFS=: read svc ports; do - for port in $ports; do - if [[ ( "$port" -ge 8333 && "$port" -le 8399 ) || "$port" -eq 4318 ]]; then - systemctl start "kubectl-port-forward@${svc}:${port}" - fi - done - done + # Forward the Istio gateway on port 8080 — all services route through it via hostnames + # (keycloak.localtest.me, agentstack.localtest.me, kagenti-ui.localtest.me, etc.) + systemctl start "kubectl-port-forward@kagenti-system:http-istio:8080:80" & + # Forward OTel collector directly (kagenti-system namespace, not routed through gateway) + systemctl start "kubectl-port-forward@kagenti-system:otel-collector:4318" & """) .strip() .encode(), @@ -691,29 +1144,38 @@ async def start_cmd( reraise=True, ): with attempt: - (await client.get("http://localhost:8333/healthcheck")).raise_for_status() + (await client.get("http://agentstack-api.localtest.me:8080/healthcheck")).raise_for_status() except Exception as ex: raise ConnectionError( "Server did not start in 20 minutes. Please check your internet connection." ) from ex + await run_in_vm( + vm_name, + [ + "bash", + "-c", + "kubectl --kubeconfig=/kubeconfig wait --for=condition=Complete job/keycloak-provision -n default --timeout=300s", + ], + "Waiting for Keycloak provisioning to complete", + ) console.success("Agent Stack platform started successfully!") - if any("phoenix.enabled=true" in value.lower() for value in set_values_list): + phoenix_enabled = any("components.otel.enabled=true" in v.lower() for v in scoped_sets.get("kagenti-deps", [])) + if phoenix_enabled: console.print( textwrap.dedent("""\ License Notice: - When you enable Phoenix, be aware that Arize Phoenix is licensed under the Elastic License v2 (ELv2), - which has specific terms regarding commercial use and distribution. By enabling Phoenix, you acknowledge - that you are responsible for ensuring compliance with the ELv2 license terms for your specific use case. - Please review the Phoenix license (https://github.com/Arize-ai/phoenix/blob/main/LICENSE) before enabling - this feature in production environments. + Phoenix (provided by kagenti) is licensed under the Elastic License v2 (ELv2), + which has specific terms regarding commercial use and distribution. By using this platform, + you acknowledge compliance with the ELv2 license terms. + See: https://github.com/Arize-ai/phoenix/blob/main/LICENSE """), style="dim", ) if not skip_login: - await agentstack_cli.commands.server.server_login("http://localhost:8333") + await agentstack_cli.commands.server.server_login("http://agentstack-api.localtest.me:8080") # ###### ######## ####### ######## @@ -783,17 +1245,11 @@ async def delete_cmd( # #### ## ## ## ####### ## ## ## -class ImageImportMode(StrEnum): - daemon = "daemon" - registry = "registry" - - @app.command("import", help="Import a local docker image into the Agent Stack platform. [Local only]") async def import_cmd( tag: typing.Annotated[str, typer.Argument(help="Docker image tag to import")], vm_name: typing.Annotated[str, typer.Option(hidden=True)] = "agentstack", verbose: typing.Annotated[bool, typer.Option("-v", "--verbose", help="Show verbose output")] = False, - mode: typing.Annotated[ImageImportMode, typer.Option("--mode")] = ImageImportMode.daemon, ): with verbosity(verbose): if (await detect_vm_status(vm_name)) != "running": @@ -819,18 +1275,10 @@ async def import_cmd( await run_command(["docker", "image", "save", "-o", host_path, tag], f"Exporting image {tag} from Docker") await run_in_vm( vm_name, - [ - "skopeo", - "copy", - f"docker-archive:{guest_path}", - f"docker://localhost:30501/{tag.split('/')[-1]}", - "--dest-tls-verify=false", - ] - if mode == ImageImportMode.registry - else ["k3s", "ctr", "images", "import", guest_path] + ["k3s", "ctr", "images", "import", guest_path] if platform == "k3s" else ["skopeo", "copy", f"docker-archive:{guest_path}:{tag}", f"containers-storage:{tag}"], - f"Importing image {tag} into Agent Stack platform {mode}", + f"Importing image {tag} into Agent Stack platform", ) finally: await anyio.Path(host_path).unlink(missing_ok=True) diff --git a/apps/agentstack-cli/src/agentstack_cli/commands/self.py b/apps/agentstack-cli/src/agentstack_cli/commands/self.py index 5de326709d..ee104402d0 100644 --- a/apps/agentstack-cli/src/agentstack_cli/commands/self.py +++ b/apps/agentstack-cli/src/agentstack_cli/commands/self.py @@ -160,7 +160,7 @@ async def install( ): import webbrowser - webbrowser.open("http://localhost:8334") + webbrowser.open("http://agentstack.localtest.me:8080") console.print() console.success("Installation complete!") diff --git a/apps/agentstack-cli/src/agentstack_cli/commands/server.py b/apps/agentstack-cli/src/agentstack_cli/commands/server.py index 9a25dfdcd0..819dcf355f 100644 --- a/apps/agentstack-cli/src/agentstack_cli/commands/server.py +++ b/apps/agentstack-cli/src/agentstack_cli/commands/server.py @@ -117,6 +117,15 @@ async def server_login( server = server.rstrip("/") + # Local dev shortcut: use resource owner password grant with admin:admin + if "agentstack-api.localtest.me" in server: + try: + await config.auth_manager.login_with_password(server, username="admin", password="admin") + console.success(f"Logged in to [cyan]{server}[/cyan] (local dev).") + return + except Exception as e: + console.warning(f"Auto-login failed: {e!s}. Falling back to interactive login.") + check_token = True log_in_message = "No authentication tokens found for this server. Proceeding to log in." diff --git a/apps/agentstack-cli/src/agentstack_cli/configuration.py b/apps/agentstack-cli/src/agentstack_cli/configuration.py index 2de1a1765a..22a2c208be 100644 --- a/apps/agentstack-cli/src/agentstack_cli/configuration.py +++ b/apps/agentstack-cli/src/agentstack_cli/configuration.py @@ -3,6 +3,7 @@ from __future__ import annotations +import contextlib import functools import importlib.metadata import pathlib @@ -15,7 +16,7 @@ import pydantic import pydantic_settings from agentstack_sdk.platform import PlatformClient, use_platform_client -from pydantic import HttpUrl, SecretStr +from pydantic import SecretStr from agentstack_cli.auth_manager import AuthManager from agentstack_cli.console import console @@ -34,13 +35,12 @@ class Configuration(pydantic_settings.BaseSettings): ) debug: bool = False home: pathlib.Path = pydantic.Field(default_factory=lambda: pathlib.Path.home() / ".agentstack") - agent_registry: pydantic.AnyUrl = HttpUrl( - f"https://github.com/i-am-bee/agentstack@v{version()}#path=agent-registry.yaml" - ) username: str = "admin" password: SecretStr | None = None server_metadata_ttl: int = 86400 + kagenti_url: str = "http://kagenti-api.localtest.me:8080" + oidc_enabled: bool = False client_id: str | None = None client_secret: str | None = None @@ -67,14 +67,22 @@ async def use_platform_client(self) -> AsyncIterator[PlatformClient]: ) sys.exit(1) + auth_token = None try: auth_token = await self.auth_manager.load_auth_token() except Exception as e: - console.error(f"Failed to load authentication: {e}") - console.hint( - f"Run [green]agentstack server login {self.auth_manager.active_server}[/green] to re-authenticate." - ) - sys.exit(1) + # Auto-recover for local dev by re-authenticating with admin:admin + if self.auth_manager.active_server and "agentstack-api.localtest.me" in self.auth_manager.active_server: + with contextlib.suppress(Exception): + auth_token = await self.auth_manager.login_with_password( + self.auth_manager.active_server, username="admin", password="admin" + ) + if not auth_token: + console.error(f"Failed to load authentication: {e}") + console.hint( + f"Run [green]agentstack server login {self.auth_manager.active_server}[/green] to re-authenticate." + ) + sys.exit(1) async with use_platform_client( auth=(self.username, self.password.get_secret_value()) if self.password else None, diff --git a/apps/agentstack-cli/src/agentstack_cli/data/k8s/keycloak-postgres-pv.yaml b/apps/agentstack-cli/src/agentstack_cli/data/k8s/keycloak-postgres-pv.yaml new file mode 100644 index 0000000000..0c248e62e2 --- /dev/null +++ b/apps/agentstack-cli/src/agentstack_cli/data/k8s/keycloak-postgres-pv.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: kagenti-keycloak-postgres-pv +spec: + capacity: + storage: 1Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + hostPath: + path: /kagenti-keycloak-postgres-data + type: Directory + claimRef: + namespace: keycloak + name: postgres-data-postgres-0 diff --git a/apps/agentstack-cli/src/agentstack_cli/data/k8s/patch_kagenti_otel.py b/apps/agentstack-cli/src/agentstack_cli/data/k8s/patch_kagenti_otel.py new file mode 100644 index 0000000000..41f2e1a2e7 --- /dev/null +++ b/apps/agentstack-cli/src/agentstack_cli/data/k8s/patch_kagenti_otel.py @@ -0,0 +1,143 @@ +# Copyright 2026 © BeeAI a Series of LF Projects, LLC +# SPDX-License-Identifier: Apache-2.0 + +"""Helm post-renderer: patch kagenti-deps for local dev. + +1. Removes the postgres-otel StatefulSet and its Service/ConfigMap + (x86-only Fedora image, SCC-incompatible on MicroShift ARM). +2. Patches the Phoenix StatefulSet to use SQLite instead of PostgreSQL, + removing the need for an external database entirely. +3. Upgrades the Phoenix image from kagenti's outdated 8.x to 12.31.2 + (matching the agentstack main branch) for GraphQL API compatibility. +4. Patches the container registry Service to NodePort 30500 so CRI-O on + the host can pull images via the localhost mirror in registries.conf. +5. Replaces the upstream Keycloak image with the agentstack-themed build + so the login UI matches the agentstack branding. +6. Patches the otel-collector filter/phoenix from a blocklist (a2a.* only) + to an allowlist (openinference instrumentation scopes only), matching + the agentstack helm chart's collector config. +""" + +import os +import re +import sys + +content = sys.stdin.read() + +# The openinference allowlist filter rule (single OTTL condition). +# Only keeps spans from openinference instrumentation packages: +# Python: openinference.instrumentation.* +# JavaScript: @arizeai/openinference-instrumentation-* +# CrewAI: crewai.telemetry +OPENINFERENCE_FILTER_RULE = ( + "not(" + 'IsMatch(instrumentation_scope.name, "^openinference\\\\.instrumentation\\\\..*")' + " or " + 'IsMatch(instrumentation_scope.name, "^@arizeai/openinference-instrumentation-.*")' + " or " + 'instrumentation_scope.name == "crewai.telemetry"' + ")" +) + +PHOENIX_SQLITE_ENV = """\ + - name: PHOENIX_SQL_DATABASE_URL + value: "sqlite:////mnt/data/phoenix.db" + - name: PHOENIX_WORKING_DIR + value: /mnt/data + - name: PHOENIX_PORT + value: "6006" + - name: PHOENIX_GRPC_PORT + value: "4317" + - name: PHOENIX_ENABLE_AUTH + value: "false" +""" + +PHOENIX_ENV_REMOVE_PREFIXES = ( + "- name: PHOENIX_POSTGRES_", + "- name: PHOENIX_SQL_DATABASE_POOL", + "- name: PHOENIX_WORKING_DIR", + "- name: PHOENIX_PORT", + "- name: PHOENIX_GRPC_PORT", +) + +docs = content.split("\n---\n") +result = [] + +for doc in docs: + # Strip the postgres-otel StatefulSet + if "kind: StatefulSet" in doc and "name: postgres-otel" in doc: + continue + # Strip the postgres-otel Service + if "kind: Service" in doc and "app: postgres-otel" in doc: + continue + # Strip the postgres-otel ConfigMap (init scripts) + if "kind: ConfigMap" in doc and "name: postgres-otel-init-script" in doc: + continue + + # Patch Phoenix StatefulSet: replace postgres env vars with SQLite config + if "kind: StatefulSet" in doc and "name: phoenix\n" in doc: + lines = doc.split("\n") + filtered = [] + skip_value_block = False + for line in lines: + stripped = line.strip() + if any(stripped.startswith(prefix) for prefix in PHOENIX_ENV_REMOVE_PREFIXES): + skip_value_block = True + continue + if skip_value_block: + if ( + stripped.startswith("value") + or stripped.startswith("secretKeyRef") + or stripped.startswith("name:") + or stripped.startswith("key:") + ): + continue + skip_value_block = False + filtered.append(line) + doc = "\n".join(filtered) + doc = re.sub(r"( env:\n)", r"\1" + PHOENIX_SQLITE_ENV, doc, count=1) + # Upgrade Phoenix image to 12.31.2 (kagenti ships 8.32.1 which lacks + # the getTraceByOtelId GraphQL query needed by the feedback service). + doc = re.sub( + r"image: arizephoenix/phoenix:version-[\d.]+", + "image: arizephoenix/phoenix:version-12.31.2", + doc, + ) + + # Patch Keycloak: replace upstream image with agentstack-themed build. + # The target image is passed via AGENTSTACK_KEYCLOAK_IMAGE env var from run.sh. + keycloak_image = os.environ.get("AGENTSTACK_KEYCLOAK_IMAGE") + if keycloak_image and "namespace: keycloak" in doc and ("kind: StatefulSet" in doc or "kind: Job" in doc): + doc = re.sub( + r"image: [^\s]*keycloak[^\s]*", + f"image: {keycloak_image}", + doc, + ) + + # Patch container registry Service: ClusterIP → NodePort 30500 + # so CRI-O on the host can pull via the localhost:30500 mirror. + if "kind: Service" in doc and "namespace: cr-system" in doc and "app: registry" in doc: + doc = re.sub( + r"(spec:\n)", + r"\1 type: NodePort\n", + doc, + count=1, + ) + doc = re.sub( + r"(- port: 5000\n\s+targetPort: 5000)", + r"\1\n nodePort: 30500", + doc, + count=1, + ) + + # Patch otel-collector-config: replace filter/phoenix span rules with openinference allowlist. + if "kind: ConfigMap" in doc and "name: otel-collector-config" in doc: + doc = re.sub( + r"(filter/phoenix:\s*\n\s+traces:\s*\n(\s+)span:\s*\n)(?:\s+(?:-\s+|#).*\n?)+", + lambda m: m.group(1) + m.group(2) + " - '" + OPENINFERENCE_FILTER_RULE + "'\n", + doc, + ) + + result.append(doc) + +sys.stdout.write("\n---\n".join(result)) diff --git a/apps/agentstack-cli/src/agentstack_cli/data/k8s/phoenix-data-pv.yaml b/apps/agentstack-cli/src/agentstack_cli/data/k8s/phoenix-data-pv.yaml new file mode 100644 index 0000000000..123b285efc --- /dev/null +++ b/apps/agentstack-cli/src/agentstack_cli/data/k8s/phoenix-data-pv.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: PersistentVolume +metadata: + name: phoenix-data-pv +spec: + capacity: + storage: 8Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: "" + hostPath: + path: /phoenix-data + type: Directory + claimRef: + namespace: kagenti-system + name: phoenix-phoenix-0 diff --git a/apps/agentstack-cli/src/agentstack_cli/kagenti_client.py b/apps/agentstack-cli/src/agentstack_cli/kagenti_client.py new file mode 100644 index 0000000000..e16fdd5f29 --- /dev/null +++ b/apps/agentstack-cli/src/agentstack_cli/kagenti_client.py @@ -0,0 +1,77 @@ +# Copyright 2025 © BeeAI a Series of LF Projects, LLC +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import Any + +import httpx + + +class KagentiClient: + """Lightweight client for kagenti backend API.""" + + def __init__(self, base_url: str, access_token: str): + self._base_url = base_url.rstrip("/") + self._headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + } + + async def create_agent(self, request: dict[str, Any]) -> dict[str, Any]: + """Create an agent in kagenti. + + POST /api/v1/agents/ + """ + async with httpx.AsyncClient(follow_redirects=True) as client: + resp = await client.post( + f"{self._base_url}/api/v1/agents/", + json=request, + headers=self._headers, + timeout=30, + ) + resp.raise_for_status() + return resp.json() + + async def get_agent(self, namespace: str, name: str) -> dict[str, Any]: + """Get agent details. + + GET /api/v1/agents/{namespace}/{name} + """ + async with httpx.AsyncClient(follow_redirects=True) as client: + resp = await client.get( + f"{self._base_url}/api/v1/agents/{namespace}/{name}", + headers=self._headers, + timeout=30, + ) + resp.raise_for_status() + return resp.json() + + async def delete_agent(self, namespace: str, name: str) -> None: + """Delete an agent from kagenti. + + DELETE /api/v1/agents/{namespace}/{name} + """ + async with httpx.AsyncClient(follow_redirects=True) as client: + resp = await client.delete( + f"{self._base_url}/api/v1/agents/{namespace}/{name}", + headers=self._headers, + timeout=30, + ) + resp.raise_for_status() + + async def list_agents(self, namespace: str | None = None) -> list[dict[str, Any]]: + """List agents, optionally filtered by namespace. + + GET /api/v1/agents/ + """ + params = {"namespace": namespace} if namespace else {} + async with httpx.AsyncClient(follow_redirects=True) as client: + resp = await client.get( + f"{self._base_url}/api/v1/agents/", + headers=self._headers, + params=params, + timeout=30, + ) + resp.raise_for_status() + return resp.json() diff --git a/apps/agentstack-cli/src/agentstack_cli/utils.py b/apps/agentstack-cli/src/agentstack_cli/utils.py index 12b7e4a8fb..71388d7a02 100644 --- a/apps/agentstack-cli/src/agentstack_cli/utils.py +++ b/apps/agentstack-cli/src/agentstack_cli/utils.py @@ -53,7 +53,6 @@ "merge", "parse_env_var", "print_httpx_response_error_details", - "print_log", "prompt_user", "remove_nullable", "run_command", @@ -303,28 +302,6 @@ def verbosity(verbose: bool, show_success_status: bool = True): SHOW_SUCCESS_STATUS.reset(token_command_status) -def print_log(line, ansi_mode=False, out_console: Console | None = None): - if "error" in line: - - class CustomError(Exception): ... - - CustomError.__name__ = line["error"]["type"] - - raise CustomError(line["error"]["detail"]) - - def decode(text: str): - return Text.from_ansi(text) if ansi_mode else text - - match line: - case {"stream": "stderr"}: - (out_console or err_console).print(decode(line["message"])) - case {"stream": "stdout"}: - (out_console or console).print(decode(line["message"])) - case {"event": "[DONE]"}: - return - case _: - (out_console or console).print(line) - # ! This pattern is taken from agentstack_server.utils.github.GithubUrl, make sure to keep it in sync github_url_verbose_pattern = r""" diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/a2a/extensions/services/platform.py b/apps/agentstack-sdk-py/src/agentstack_sdk/a2a/extensions/services/platform.py index 6f717705ae..92a23b90db 100644 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/a2a/extensions/services/platform.py +++ b/apps/agentstack-sdk-py/src/agentstack_sdk/a2a/extensions/services/platform.py @@ -97,7 +97,7 @@ def handle_incoming_message(self, message: A2AMessage, run_context: RunContext, self._metadata_from_client = self._metadata_from_client or PlatformApiExtensionMetadata() data = self._metadata_from_client - data.base_url = data.base_url or HttpUrl(os.getenv("PLATFORM_URL", "http://127.0.0.1:8333")) + data.base_url = data.base_url or HttpUrl(os.getenv("PLATFORM_URL", "http://agentstack-api.localtest.me:8080")) auth_token = data.auth_token or self._get_header_token(request_context) data.auth_token = pydantic.SecretStr(auth_token.get_secret_value()) if auth_token else None diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/__init__.py b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/__init__.py index bfad54342e..7ffc3216ec 100644 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/__init__.py +++ b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/__init__.py @@ -9,8 +9,6 @@ from .file import * from .model_provider import * from .provider import * -from .provider_build import * -from .provider_discovery import * from .user import * from .user_feedback import * from .vector_store import * diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/client.py b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/client.py index 168e75ac54..027cdff869 100644 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/client.py +++ b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/client.py @@ -52,7 +52,7 @@ def __init__( default_encoding: str | typing.Callable[[bytes], str] = "utf-8", ) -> None: if not base_url: - base_url = os.environ.get("PLATFORM_URL", "http://127.0.0.1:8333") + base_url = os.environ.get("PLATFORM_URL", "http://agentstack-api.localtest.me:8080") super().__init__( auth=auth, params=params, diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/common.py b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/common.py index 55541e04b8..d95b0a1a50 100644 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/common.py +++ b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/common.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -from enum import StrEnum from typing import Generic, TypeVar from pydantic import BaseModel @@ -15,25 +14,3 @@ class PaginatedResult(BaseModel, Generic[T]): total_count: int has_more: bool = False next_page_token: str | None = None - - -class GithubVersionType(StrEnum): - HEAD = "head" - TAG = "tag" - - -class ResolvedGithubUrl(BaseModel): - host: str = "github.com" - org: str - repo: str - version: str - version_type: GithubVersionType - commit_hash: str - path: str | None = None - - -class ResolvedDockerImageID(BaseModel): - registry: str - repository: str - tag: str - digest: str diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider.py b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider.py index 1d12bc3b02..0bb87c91da 100644 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider.py +++ b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider.py @@ -8,7 +8,6 @@ import typing import urllib.parse from contextlib import asynccontextmanager -from datetime import timedelta from uuid import UUID import pydantic @@ -16,41 +15,25 @@ from a2a.types import AgentCard from agentstack_sdk.platform.client import PlatformClient, get_platform_client -from agentstack_sdk.platform.common import ResolvedDockerImageID, ResolvedGithubUrl -from agentstack_sdk.util.utils import filter_dict, parse_stream +from agentstack_sdk.util.utils import filter_dict class ProviderErrorMessage(pydantic.BaseModel): message: str -class EnvVar(pydantic.BaseModel): - name: str - description: str | None = None - required: bool = False - - -class VersionInfo(pydantic.BaseModel): - docker: ResolvedDockerImageID | None = None - github: ResolvedGithubUrl | None = None - - class Provider(pydantic.BaseModel): id: str - auto_stop_timeout: timedelta source: str origin: str - version_info: VersionInfo = pydantic.Field(default_factory=VersionInfo) - registry: str | None = None + source_type: typing.Literal["kagenti", "api"] = "api" created_at: pydantic.AwareDatetime updated_at: pydantic.AwareDatetime last_active_at: pydantic.AwareDatetime agent_card: AgentCard - state: typing.Literal["missing", "starting", "ready", "running", "error", "online", "offline"] = "missing" - managed: bool + state: typing.Literal["online", "offline"] = "online" last_error: ProviderErrorMessage | None = None created_by: UUID - missing_configuration: builtins.list[EnvVar] = pydantic.Field(default_factory=list) @staticmethod async def create( @@ -58,12 +41,8 @@ async def create( location: str, agent_card: AgentCard | None = None, origin: str | None = None, - auto_stop_timeout: timedelta | None = None, - variables: dict[str, str] | None = None, client: PlatformClient | None = None, ) -> "Provider": - auto_stop_timeout_sec = auto_stop_timeout.total_seconds() if auto_stop_timeout is not None else None - async with client or get_platform_client() as client: return pydantic.TypeAdapter(Provider).validate_python( ( @@ -74,8 +53,6 @@ async def create( "location": location, "agent_card": agent_card.model_dump(mode="json") if agent_card else None, "origin": origin, - "variables": variables, - "auto_stop_timeout_sec": auto_stop_timeout_sec, } ), ) @@ -90,8 +67,6 @@ async def patch( location: str | None = None, agent_card: AgentCard | None = None, origin: str | None = None, - auto_stop_timeout: timedelta | None = None, - variables: dict[str, str] | None = None, client: PlatformClient | None = None, ) -> "Provider": # `self` has a weird type so that you can call both `instance.patch()` to update an instance, or `Provider.patch("123", ...)` to update a provider @@ -101,8 +76,6 @@ async def patch( { "location": location, "agent_card": agent_card.model_dump(mode="json") if agent_card else None, - "variables": variables, - "auto_stop_timeout_sec": None if auto_stop_timeout is None else auto_stop_timeout.total_seconds(), "origin": origin, } ) @@ -168,42 +141,6 @@ async def delete(self: "Provider" | str, *, client: PlatformClient | None = None async with client or get_platform_client() as client: _ = (await client.delete(f"/api/v1/providers/{provider_id}")).raise_for_status() - async def update_variables( - self: "Provider" | str, - *, - variables: dict[str, str | None] | dict[str, str], - client: PlatformClient | None = None, - ) -> None: - # `self` has a weird type so that you can call both `instance.delete()` or `Provider.delete("123")` - provider_id = self if isinstance(self, str) else self.id - async with client or get_platform_client() as client: - _ = ( - await client.put(f"/api/v1/providers/{provider_id}/variables", json={"variables": variables}) - ).raise_for_status() - - async def stream_logs( - self: "Provider" | str, *, client: PlatformClient | None = None - ) -> typing.AsyncIterator[dict[str, typing.Any]]: - # `self` has a weird type so that you can call both `instance.stream_logs()` or `ProviderBuild.stream_logs("123")` - provider_id = self if isinstance(self, str) else self.id - async with ( - client or get_platform_client() as client, - client.stream( - "GET", - url=f"/api/v1/providers/{provider_id}/logs", - timeout=timedelta(hours=1).total_seconds(), - ) as response, - ): - async for line in parse_stream(response): - yield line - - async def list_variables(self: "Provider" | str, *, client: PlatformClient | None = None) -> dict[str, str]: - # `self` has a weird type so that you can call both `instance.delete()` or `Provider.delete("123")` - provider_id = self if isinstance(self, str) else self.id - async with client or get_platform_client() as client: - result = await client.get(f"/api/v1/providers/{provider_id}/variables") - return result.raise_for_status().json()["variables"] - @staticmethod async def list( *, origin: str | None = None, user_owned: bool | None = None, client: PlatformClient | None = None diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_build.py b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_build.py deleted file mode 100644 index d6916410e2..0000000000 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_build.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from collections.abc import AsyncIterator -from datetime import timedelta -from enum import StrEnum -from pathlib import Path -from typing import Any, Literal, TypeAlias -from uuid import UUID - -import pydantic -from pydantic import Field - -from agentstack_sdk.platform.client import PlatformClient, get_platform_client -from agentstack_sdk.platform.common import PaginatedResult, ResolvedGithubUrl -from agentstack_sdk.util.utils import filter_dict, parse_stream - - -class BuildState(StrEnum): - MISSING = "missing" - IN_PROGRESS = "in_progress" - BUILD_COMPLETED = "build_completed" - COMPLETED = "completed" - FAILED = "failed" - - -class AddProvider(pydantic.BaseModel): - """ - Will add a new provider or update an existing one with the same base docker image ID - (docker registry + repository, excluding tag) - """ - - type: Literal["add_provider"] = "add_provider" - auto_stop_timeout_sec: int | None = pydantic.Field( - default=None, - gt=0, - le=600, - description=( - "Timeout after which the agent provider will be automatically downscaled if unused." - "Contact administrator if you need to increase this value." - ), - ) - variables: dict[str, str] | None = None - - -class UpdateProvider(pydantic.BaseModel): - """Will update provider specified by ID""" - - type: Literal["update_provider"] = "update_provider" - provider_id: UUID - - -class NoAction(pydantic.BaseModel): - type: Literal["no_action"] = "no_action" - - -class BuildConfiguration(pydantic.BaseModel): - dockerfile_path: Path | None = Field( - default=None, - description=( - "Path to Dockerfile relative to the repository path " - "(provider_build.source.path or repository root if not defined)" - ), - ) - - -OnCompleteAction: TypeAlias = AddProvider | UpdateProvider | NoAction - - -class ProviderBuild(pydantic.BaseModel): - id: str - created_at: pydantic.AwareDatetime - status: BuildState - source: ResolvedGithubUrl - destination: str - provider_id: str | None = None - build_configuration: BuildConfiguration | None = None - created_by: str - error_message: str | None = None - provider_origin: str - - @staticmethod - async def create( - *, - location: str, - client: PlatformClient | None = None, - on_complete: OnCompleteAction | None = None, - build_configuration: BuildConfiguration | None = None, - ) -> "ProviderBuild": - on_complete = on_complete or NoAction() - async with client or get_platform_client() as client: - return pydantic.TypeAdapter(ProviderBuild).validate_python( - ( - await client.post( - url="/api/v1/provider_builds", - json={ - "location": location, - "on_complete": on_complete.model_dump(exclude_none=True, mode="json"), - "build_configuration": build_configuration.model_dump(exclude_none=True, mode="json") - if build_configuration - else None, - }, - ) - ) - .raise_for_status() - .json() - ) - - @staticmethod - async def preview( - *, location: str, client: PlatformClient | None = None, on_complete: OnCompleteAction | None = None - ) -> "ProviderBuild": - on_complete = on_complete or NoAction() - async with client or get_platform_client() as client: - return pydantic.TypeAdapter(ProviderBuild).validate_python( - ( - await client.post( - url="/api/v1/provider_builds/preview", - json={"location": location, "on_complete": on_complete.model_dump(exclude_none=True)}, - ) - ) - .raise_for_status() - .json() - ) - - async def stream_logs( - self: "ProviderBuild" | str, *, client: PlatformClient | None = None - ) -> AsyncIterator[dict[str, Any]]: - # `self` has a weird type so that you can call both `instance.stream_logs()` or `ProviderBuild.stream_logs("123")` - provider_build_id = self if isinstance(self, str) else self.id - async with ( - client or get_platform_client() as client, - client.stream( - "GET", - url=f"/api/v1/provider_builds/{provider_build_id}/logs", - timeout=timedelta(hours=1).total_seconds(), - ) as response, - ): - async for line in parse_stream(response): - yield line - - async def get(self: "ProviderBuild" | str, *, client: PlatformClient | None = None) -> "ProviderBuild": - # `self` has a weird type so that you can call both `instance.get()` to update an instance, or `ProviderBuild.get("123")` to obtain a new instance - provider_build_id = self if isinstance(self, str) else self.id - async with client or get_platform_client() as client: - result = pydantic.TypeAdapter(ProviderBuild).validate_json( - (await client.get(url=f"/api/v1/provider_builds/{provider_build_id}")).raise_for_status().content - ) - if isinstance(self, ProviderBuild): - self.__dict__.update(result.__dict__) - return self - return result - - async def delete(self: "ProviderBuild" | str, *, client: PlatformClient | None = None) -> None: - # `self` has a weird type so that you can call both `instance.delete()` or `ProviderBuild.delete("123")` - provider_build_id = self if isinstance(self, str) else self.id - async with client or get_platform_client() as client: - _ = (await client.delete(f"/api/v1/provider_builds/{provider_build_id}")).raise_for_status() - - @staticmethod - async def list( - *, - page_token: str | None = None, - limit: int | None = None, - order: Literal["asc"] | Literal["desc"] | None = "asc", - order_by: Literal["created_at"] | Literal["updated_at"] | None = None, - user_owned: bool | None = None, - client: PlatformClient | None = None, - ) -> PaginatedResult["ProviderBuild"]: - # `self` has a weird type so that you can call both `instance.list_history()` or `ProviderBuild.list_history("123")` - async with client or get_platform_client() as platform_client: - return pydantic.TypeAdapter(PaginatedResult[ProviderBuild]).validate_python( - ( - await platform_client.get( - url="/api/v1/provider_builds", - params=filter_dict( - { - "page_token": page_token, - "limit": limit, - "order": order, - "order_by": order_by, - "user_owned": user_owned, - } - ), - ) - ) - .raise_for_status() - .json() - ) diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_discovery.py b/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_discovery.py deleted file mode 100644 index bda939deeb..0000000000 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/platform/provider_discovery.py +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from enum import StrEnum -from uuid import UUID - -import pydantic -from a2a.types import AgentCard - -from agentstack_sdk.platform.client import PlatformClient, get_platform_client - - -class DiscoveryState(StrEnum): - PENDING = "pending" - IN_PROGRESS = "in_progress" - COMPLETED = "completed" - FAILED = "failed" - - -class ProviderDiscovery(pydantic.BaseModel): - id: UUID - created_at: pydantic.AwareDatetime - status: DiscoveryState - docker_image: str - created_by: UUID - agent_card: AgentCard | None = None - error_message: str | None = None - - @staticmethod - async def create( - *, - docker_image: str, - client: PlatformClient | None = None, - ) -> "ProviderDiscovery": - async with client or get_platform_client() as client: - return pydantic.TypeAdapter(ProviderDiscovery).validate_python( - ( - await client.post( - url="/api/v1/providers/discovery", - json={"docker_image": docker_image}, - ) - ) - .raise_for_status() - .json() - ) - - async def get(self: "ProviderDiscovery" | str, *, client: PlatformClient | None = None) -> "ProviderDiscovery": - discovery_id = self if isinstance(self, str) else str(self.id) - async with client or get_platform_client() as client: - result = pydantic.TypeAdapter(ProviderDiscovery).validate_json( - (await client.get(url=f"/api/v1/providers/discovery/{discovery_id}")).raise_for_status().content - ) - if isinstance(self, ProviderDiscovery): - self.__dict__.update(result.__dict__) - return self - return result diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/server/server.py b/apps/agentstack-sdk-py/src/agentstack_sdk/server/server.py index 736553e974..848244593a 100644 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/server/server.py +++ b/apps/agentstack-sdk-py/src/agentstack_sdk/server/server.py @@ -20,7 +20,6 @@ from a2a.server.agent_execution import RequestContextBuilder from a2a.server.events import QueueManager from a2a.server.tasks import PushNotificationConfigStore, PushNotificationSender, TaskStore -from a2a.types import AgentExtension from fastapi import FastAPI from fastapi.applications import AppType from fastapi.responses import PlainTextResponse @@ -55,7 +54,6 @@ def __init__(self) -> None: self._self_registration_client: PlatformClient | None = None self._self_registration_id: str | None = None self._provider_id: str | None = None - self._all_configured_variables: set[str] = set() @functools.wraps(agent_decorator) def agent(self, *args, **kwargs) -> Callable: @@ -145,9 +143,19 @@ async def serve( card_url = url and url.strip() self._agent.card.url = card_url.rstrip("/") if card_url else f"http://{host}:{port}" - self._self_registration_client = ( - self_registration_client_factory() if self_registration_client_factory else None - ) + if self_registration_client_factory: + self._self_registration_client = self_registration_client_factory() + elif not self._production_mode: + # Use basic auth (admin:admin) for local dev self-registration + self._self_registration_client = PlatformClient( + base_url=self._platform_url, + auth=( + os.getenv("PLATFORM_USERNAME", "admin"), + os.getenv("PLATFORM_PASSWORD", "admin"), + ), + ) + else: + self._self_registration_client = None self._self_registration_id = urllib.parse.quote(self_registration_id or self._agent.card.name) if headers is None: @@ -165,7 +173,6 @@ async def serve( async def _lifespan_fn(app: FastAPI) -> AsyncGenerator[None, None]: async with self._self_registration_client or nullcontext(): register_task = asyncio.create_task(self._register_agent()) if self_registration else None - reload_task = asyncio.create_task(self._reload_variables_periodically()) if self_registration else None try: # pyrefly: ignore [bad-argument-type] -- probably bug in Pyrefly @@ -175,9 +182,6 @@ async def _lifespan_fn(app: FastAPI) -> AsyncGenerator[None, None]: if register_task: with suppress(Exception): await cancel_task(register_task) - if reload_task: - with suppress(Exception): - await cancel_task(reload_task) card_url = AnyUrl(self._agent.card.url) if card_url.host == "invalid": @@ -293,48 +297,12 @@ def should_exit(self, value: bool) -> None: @property def _platform_url(self) -> str: - return os.getenv("PLATFORM_URL", "http://127.0.0.1:8333") + return os.getenv("PLATFORM_URL", "http://agentstack-api.localtest.me:8080") @property def _production_mode(self) -> bool: return os.getenv("PRODUCTION_MODE", "").lower() in ["true", "1"] - async def _reload_variables_periodically(self): - while True: - await asyncio.sleep(5) - await self._load_variables() - - async def _load_variables(self, first_run: bool = False) -> None: - from agentstack_sdk.a2a.extensions import AgentDetail, AgentDetailExtensionSpec - - assert self.server and self._agent - if not self._provider_id: - return - - variables = await Provider.list_variables(self._provider_id, client=self._self_registration_client) - old_variables = self._all_configured_variables.copy() - - for variable in list(self._all_configured_variables - variables.keys()): # reset removed variables - os.environ.pop(variable, None) - self._all_configured_variables.remove(variable) - - os.environ.update(variables) - self._all_configured_variables.update(variables.keys()) - - if dirty := old_variables != self._all_configured_variables: - logger.info(f"Environment variables reloaded dynamically: {self._all_configured_variables}") - - if first_run or dirty: - for extension in self._agent.card.capabilities.extensions or []: - match extension: - case AgentExtension(uri=AgentDetailExtensionSpec.URI, params=params): - variables = AgentDetail.model_validate(params).variables or [] - if missing_keys := [env for env in variables if env.required and os.getenv(env.name) is None]: - logger.warning( - f"Missing required env variables: {missing_keys}, " - f"add them using `agentstack env add key=value`" - ) - async def _register_agent(self) -> None: """If not in PRODUCTION mode, register agent to the agentstack platform and provide missing env variables""" assert self.server and self._agent @@ -369,8 +337,6 @@ async def _register_agent(self) -> None: ) self._provider_id = provider.id logger.debug("Agent registered to the agentstack server.") - await self._load_variables() - logger.debug("Environment variables loaded dynamically.") logger.info("Agent registered successfully") except HTTPStatusError as e: with suppress(Exception): diff --git a/apps/agentstack-sdk-py/src/agentstack_sdk/server/telemetry.py b/apps/agentstack-sdk-py/src/agentstack_sdk/server/telemetry.py index 9e286925f4..8cfe429d33 100644 --- a/apps/agentstack-sdk-py/src/agentstack_sdk/server/telemetry.py +++ b/apps/agentstack-sdk-py/src/agentstack_sdk/server/telemetry.py @@ -4,6 +4,7 @@ from __future__ import annotations import logging +import os from fastapi import FastAPI from opentelemetry import metrics, trace @@ -29,9 +30,16 @@ root_logger = logging.getLogger() +_DEFAULT_OTEL_ENDPOINT = "http://otel-collector.localtest.me:8080" + + def configure_telemetry(app: FastAPI) -> None: """Utility that configures opentelemetry with OTLP exporter and FastAPI instrumentation""" + # Set a sensible default OTLP endpoint for local agentstack deployments + if not os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"): + os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = _DEFAULT_OTEL_ENDPOINT + FastAPIInstrumentor.instrument_app(app) httpxclient_instrumentor = HTTPXClientInstrumentor() diff --git a/apps/agentstack-sdk-ts/src/client/api/common/schemas.ts b/apps/agentstack-sdk-ts/src/client/api/common/schemas.ts index 6a7e109244..e01e1ed1b1 100644 --- a/apps/agentstack-sdk-ts/src/client/api/common/schemas.ts +++ b/apps/agentstack-sdk-ts/src/client/api/common/schemas.ts @@ -5,40 +5,8 @@ import z from 'zod'; -import { GitHubVersionType } from './types'; - -export const gitHubVersionTypeSchema = z.enum(GitHubVersionType); - -export const gitHubRegistryLocationSchema = z.string(); - -export const networkRegistryLocationSchema = z.string(); - -export const fileSystemRegistryLocationSchema = z.string(); - -export const dockerImageIdSchema = z.string(); - -export const dockerImageProviderLocationSchema = dockerImageIdSchema; - export const networkProviderLocationSchema = z.string(); -export const resolvedGitHubUrlSchema = z.object({ - host: z.string(), - org: z.string(), - repo: z.string(), - version: z.string(), - version_type: gitHubVersionTypeSchema, - commit_hash: z.string(), - path: z.string().nullish(), -}); - -export const resolvedDockerImageIdSchema = z.object({ - registry: z.string(), - repository: z.string(), - tag: z.string(), - digest: z.string(), - image_id: dockerImageIdSchema, -}); - export const readableStreamSchema = z.custom>>( (value) => value instanceof ReadableStream, { error: 'Expected ReadableStream' }, diff --git a/apps/agentstack-sdk-ts/src/client/api/common/types.ts b/apps/agentstack-sdk-ts/src/client/api/common/types.ts index 25634b66dc..63f8da894b 100644 --- a/apps/agentstack-sdk-ts/src/client/api/common/types.ts +++ b/apps/agentstack-sdk-ts/src/client/api/common/types.ts @@ -5,30 +5,6 @@ import type z from 'zod'; -import type { - dockerImageIdSchema, - dockerImageProviderLocationSchema, - fileSystemRegistryLocationSchema, - gitHubRegistryLocationSchema, - networkProviderLocationSchema, - networkRegistryLocationSchema, - resolvedDockerImageIdSchema, - resolvedGitHubUrlSchema, -} from './schemas'; +import type { networkProviderLocationSchema } from './schemas'; -export enum GitHubVersionType { - Head = 'head', - Tag = 'tag', -} - -export type GitHubRegistryLocation = z.infer; -export type NetworkRegistryLocation = z.infer; -export type FileSystemRegistryLocation = z.infer; - -export type DockerImageId = z.infer; - -export type DockerImageProviderLocation = z.infer; export type NetworkProviderLocation = z.infer; - -export type ResolvedGitHubUrl = z.infer; -export type ResolvedDockerImageId = z.infer; diff --git a/apps/agentstack-sdk-ts/src/client/api/core/client.ts b/apps/agentstack-sdk-ts/src/client/api/core/client.ts index 42ea637b4b..31e38537f9 100644 --- a/apps/agentstack-sdk-ts/src/client/api/core/client.ts +++ b/apps/agentstack-sdk-ts/src/client/api/core/client.ts @@ -10,7 +10,6 @@ import { createConnectorsApi } from '../connectors/api'; import { createContextsApi } from '../contexts/api'; import { createFilesApi } from '../files/api'; import { createModelProvidersApi } from '../model-providers/api'; -import { createProviderBuildsApi } from '../provider-builds/api'; import { createProvidersApi } from '../providers/api'; import { createUserFeedbackApi } from '../user-feedback/api'; import { createUsersApi } from '../users/api'; @@ -122,7 +121,6 @@ export const buildApiClient = ( const contextsApi = createContextsApi(callApi); const filesApi = createFilesApi(callApi); const modelProvidersApi = createModelProvidersApi(callApi); - const providerBuildsApi = createProviderBuildsApi(callApi); const providersApi = createProvidersApi(callApi); const userFeedbackApi = createUserFeedbackApi(callApi); const usersApi = createUsersApi(callApi); @@ -134,7 +132,6 @@ export const buildApiClient = ( ...contextsApi, ...filesApi, ...modelProvidersApi, - ...providerBuildsApi, ...providersApi, ...userFeedbackApi, ...usersApi, diff --git a/apps/agentstack-sdk-ts/src/client/api/provider-builds/api.ts b/apps/agentstack-sdk-ts/src/client/api/provider-builds/api.ts deleted file mode 100644 index 7a4cd5db53..0000000000 --- a/apps/agentstack-sdk-ts/src/client/api/provider-builds/api.ts +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import type { CallApi } from '../core/types'; -import { ApiMethod } from '../core/types'; -import { - createProviderBuildResponseSchema, - deleteProviderBuildResponseSchema, - listProviderBuildsResponseSchema, - previewProviderBuildResponseSchema, - readProviderBuildLogsResponseSchema, - readProviderBuildResponseSchema, -} from './schemas'; -import type { - CreateProviderBuildRequest, - DeleteProviderBuildRequest, - ListProviderBuildsRequest, - PreviewProviderBuildRequest, - ReadProviderBuildLogsRequest, - ReadProviderBuildRequest, -} from './types'; - -export function createProviderBuildsApi(callApi: CallApi) { - const listProviderBuilds = ({ query }: ListProviderBuildsRequest) => - callApi({ - method: ApiMethod.Get, - path: '/api/v1/provider_builds', - schema: listProviderBuildsResponseSchema, - query, - }); - - const createProviderBuild = ({ ...body }: CreateProviderBuildRequest) => - callApi({ - method: ApiMethod.Post, - path: '/api/v1/provider_builds', - schema: createProviderBuildResponseSchema, - body, - }); - - const readProviderBuild = ({ id }: ReadProviderBuildRequest) => - callApi({ - method: ApiMethod.Get, - path: `/api/v1/provider_builds/${id}`, - schema: readProviderBuildResponseSchema, - }); - - const deleteProviderBuild = ({ id }: DeleteProviderBuildRequest) => - callApi({ - method: ApiMethod.Delete, - path: `/api/v1/provider_builds/${id}`, - schema: deleteProviderBuildResponseSchema, - }); - - const readProviderBuildLogs = ({ id }: ReadProviderBuildLogsRequest) => - callApi({ - method: ApiMethod.Get, - path: `/api/v1/provider_builds/${id}/logs`, - schema: readProviderBuildLogsResponseSchema, - parseAsStream: true, - }); - - const previewProviderBuild = ({ ...body }: PreviewProviderBuildRequest) => - callApi({ - method: ApiMethod.Post, - path: '/api/v1/provider_builds/preview', - schema: previewProviderBuildResponseSchema, - body, - }); - - return { - listProviderBuilds, - createProviderBuild, - readProviderBuild, - deleteProviderBuild, - readProviderBuildLogs, - previewProviderBuild, - }; -} diff --git a/apps/agentstack-sdk-ts/src/client/api/provider-builds/schemas.ts b/apps/agentstack-sdk-ts/src/client/api/provider-builds/schemas.ts deleted file mode 100644 index 3564748af2..0000000000 --- a/apps/agentstack-sdk-ts/src/client/api/provider-builds/schemas.ts +++ /dev/null @@ -1,98 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import z from 'zod'; - -import { dockerImageIdSchema, readableStreamSchema, resolvedGitHubUrlSchema } from '../../api/common/schemas'; -import { paginatedResponseSchema, paginationQuerySchema } from '../core/schemas'; -import { ProviderBuildState } from './types'; - -export const providerBuildStateSchema = z.enum(ProviderBuildState); - -export const providerBuildAddActionSchema = z.object({ - type: z.literal('add_provider'), - auto_stop_timeout_sec: z.number().nullish(), - variables: z.record(z.string(), z.string()).nullish(), -}); - -export const providerBuildUpdateActionSchema = z.object({ - type: z.literal('update_provider'), - provider_id: z.string(), -}); - -export const providerBuildNoActionSchema = z.object({ - type: z.literal('no_action'), -}); - -export const providerBuildOnCompleteActionSchema = z.union([ - providerBuildAddActionSchema, - providerBuildUpdateActionSchema, - providerBuildNoActionSchema, -]); - -export const providerBuildConfigurationSchema = z.object({ - dockerfile_path: z.string().nullish(), -}); - -export const providerBuildSchema = z.object({ - id: z.string(), - created_at: z.string(), - created_by: z.string(), - provider_origin: z.string(), - status: providerBuildStateSchema, - source: resolvedGitHubUrlSchema, - destination: dockerImageIdSchema, - on_complete: providerBuildOnCompleteActionSchema, - build_configuration: providerBuildConfigurationSchema.nullish(), - provider_id: z.string().nullish(), - error_message: z.string().nullish(), -}); - -export const listProviderBuildsRequestSchema = z.object({ - query: paginationQuerySchema - .extend({ - status: providerBuildStateSchema.nullish(), - user_owned: z.boolean().nullish(), - }) - .optional(), -}); - -export const listProviderBuildsResponseSchema = paginatedResponseSchema.extend({ - items: z.array(providerBuildSchema), -}); - -export const createProviderBuildRequestSchema = z.object({ - location: z.string(), - build_configuration: providerBuildConfigurationSchema.nullish(), - on_complete: providerBuildOnCompleteActionSchema.optional(), -}); - -export const createProviderBuildResponseSchema = providerBuildSchema; - -export const readProviderBuildRequestSchema = z.object({ - id: z.string(), -}); - -export const readProviderBuildResponseSchema = providerBuildSchema; - -export const deleteProviderBuildRequestSchema = z.object({ - id: z.string(), -}); - -export const deleteProviderBuildResponseSchema = z.null(); - -export const readProviderBuildLogsRequestSchema = z.object({ - id: z.string(), -}); - -export const readProviderBuildLogsResponseSchema = readableStreamSchema; - -export const previewProviderBuildRequestSchema = z.object({ - location: z.string(), - build_configuration: providerBuildConfigurationSchema.nullish(), - on_complete: providerBuildOnCompleteActionSchema.optional(), -}); - -export const previewProviderBuildResponseSchema = providerBuildSchema; diff --git a/apps/agentstack-sdk-ts/src/client/api/provider-builds/types.ts b/apps/agentstack-sdk-ts/src/client/api/provider-builds/types.ts deleted file mode 100644 index 2f5bc68439..0000000000 --- a/apps/agentstack-sdk-ts/src/client/api/provider-builds/types.ts +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import type z from 'zod'; - -import type { - createProviderBuildRequestSchema, - createProviderBuildResponseSchema, - deleteProviderBuildRequestSchema, - deleteProviderBuildResponseSchema, - listProviderBuildsRequestSchema, - listProviderBuildsResponseSchema, - previewProviderBuildRequestSchema, - previewProviderBuildResponseSchema, - providerBuildAddActionSchema, - providerBuildConfigurationSchema, - providerBuildNoActionSchema, - providerBuildOnCompleteActionSchema, - providerBuildSchema, - providerBuildUpdateActionSchema, - readProviderBuildLogsRequestSchema, - readProviderBuildLogsResponseSchema, - readProviderBuildRequestSchema, - readProviderBuildResponseSchema, -} from './schemas'; - -export enum ProviderBuildState { - Missing = 'missing', - InProgress = 'in_progress', - BuildCompleted = 'build_completed', - Completed = 'completed', - Failed = 'failed', -} - -export type ProviderBuildAddAction = z.infer; -export type ProviderBuildUpdateAction = z.infer; -export type ProviderBuildNoAction = z.infer; -export type ProviderBuildOnCompleteAction = z.infer; -export type ProviderBuildConfiguration = z.infer; - -export type ProviderBuild = z.infer; - -export type ListProviderBuildsRequest = z.infer; -export type ListProviderBuildsResponse = z.infer; - -export type CreateProviderBuildRequest = z.infer; -export type CreateProviderBuildResponse = z.infer; - -export type ReadProviderBuildRequest = z.infer; -export type ReadProviderBuildResponse = z.infer; - -export type DeleteProviderBuildRequest = z.infer; -export type DeleteProviderBuildResponse = z.infer; - -export type ReadProviderBuildLogsRequest = z.infer; -export type ReadProviderBuildLogsResponse = z.infer; - -export type PreviewProviderBuildRequest = z.infer; -export type PreviewProviderBuildResponse = z.infer; diff --git a/apps/agentstack-sdk-ts/src/client/api/providers/schemas.ts b/apps/agentstack-sdk-ts/src/client/api/providers/schemas.ts index 0174eda735..e59d119901 100644 --- a/apps/agentstack-sdk-ts/src/client/api/providers/schemas.ts +++ b/apps/agentstack-sdk-ts/src/client/api/providers/schemas.ts @@ -6,24 +6,11 @@ import z from 'zod'; import { agentCardSchema } from '../../a2a/protocol/schemas'; -import { - dockerImageProviderLocationSchema, - fileSystemRegistryLocationSchema, - gitHubRegistryLocationSchema, - networkProviderLocationSchema, - networkRegistryLocationSchema, - readableStreamSchema, - resolvedDockerImageIdSchema, - resolvedGitHubUrlSchema, -} from '../common/schemas'; +import { networkProviderLocationSchema, readableStreamSchema } from '../common/schemas'; import { paginatedResponseSchema } from '../core/schemas'; -import { ProviderStatus, ProviderType, ProviderUnmanagedStatus } from './types'; +import { ProviderState } from './types'; -export const providerTypeSchema = z.enum(ProviderType); - -export const providerStatusSchema = z.enum(ProviderStatus); - -export const providerUnmanagedStatusSchema = z.enum(ProviderUnmanagedStatus); +export const providerStateSchema = z.enum(ProviderState); export const providerErrorSchema = z.object({ message: z.string(), @@ -35,31 +22,19 @@ export const providerEnvVarSchema = z.object({ description: z.string().nullish(), }); -export const providerVersionInfoSchema = z.object({ - docker: resolvedDockerImageIdSchema.nullish(), - github: resolvedGitHubUrlSchema.nullish(), -}); - export const providerSchema = z.object({ id: z.string(), - source: z.union([dockerImageProviderLocationSchema, networkProviderLocationSchema]), + source: networkProviderLocationSchema, + source_type: z.string().optional(), agent_card: agentCardSchema, - state: z.union([providerStatusSchema, providerUnmanagedStatusSchema]), + state: providerStateSchema, origin: z.string(), created_at: z.string(), created_by: z.string(), updated_at: z.string(), last_active_at: z.string(), - auto_stop_timeout: z.string(), - managed: z.boolean(), - type: providerTypeSchema, - env: z.array(providerEnvVarSchema), - registry: z - .union([gitHubRegistryLocationSchema, networkRegistryLocationSchema, fileSystemRegistryLocationSchema]) - .nullish(), last_error: providerErrorSchema.nullish(), missing_configuration: z.array(providerEnvVarSchema).optional(), - version_info: providerVersionInfoSchema.optional(), }); export const listProvidersRequestSchema = z.object({ @@ -76,9 +51,8 @@ export const listProvidersResponseSchema = paginatedResponseSchema.extend({ }); export const createProviderRequestSchema = z.object({ - location: z.union([dockerImageProviderLocationSchema, networkProviderLocationSchema]), + location: networkProviderLocationSchema, agent_card: agentCardSchema.nullish(), - auto_stop_timeout_sec: z.number().nullish(), origin: z.string().nullish(), variables: z.record(z.string(), z.string()).nullish(), }); @@ -99,9 +73,8 @@ export const deleteProviderResponseSchema = z.null(); export const patchProviderRequestSchema = z.object({ id: z.string(), - location: z.union([dockerImageProviderLocationSchema, networkProviderLocationSchema]).nullish(), + location: networkProviderLocationSchema.nullish(), agent_card: agentCardSchema.nullish(), - auto_stop_timeout_sec: z.number().nullish(), origin: z.string().nullish(), variables: z.record(z.string(), z.string()).nullish(), }); diff --git a/apps/agentstack-sdk-ts/src/client/api/providers/types.ts b/apps/agentstack-sdk-ts/src/client/api/providers/types.ts index 7ed136819e..51f0769501 100644 --- a/apps/agentstack-sdk-ts/src/client/api/providers/types.ts +++ b/apps/agentstack-sdk-ts/src/client/api/providers/types.ts @@ -21,7 +21,6 @@ import type { providerEnvVarSchema, providerErrorSchema, providerSchema, - providerVersionInfoSchema, readProviderByLocationRequestSchema, readProviderByLocationResponseSchema, readProviderLogsRequestSchema, @@ -32,27 +31,13 @@ import type { updateProviderVariablesResponseSchema, } from './schemas'; -export enum ProviderType { - Managed = 'managed', - Unmanaged = 'unmanaged', -} - -export enum ProviderStatus { - Missing = 'missing', - Starting = 'starting', - Ready = 'ready', - Running = 'running', - Error = 'error', -} - -export enum ProviderUnmanagedStatus { +export enum ProviderState { Online = 'online', Offline = 'offline', } export type ProviderError = z.infer; export type ProviderEnvVar = z.infer; -export type ProviderVersionInfo = z.infer; export type Provider = z.infer; diff --git a/apps/agentstack-sdk-ts/src/client/api/schemas.ts b/apps/agentstack-sdk-ts/src/client/api/schemas.ts index deb5f2b089..c65c42be76 100644 --- a/apps/agentstack-sdk-ts/src/client/api/schemas.ts +++ b/apps/agentstack-sdk-ts/src/client/api/schemas.ts @@ -9,7 +9,6 @@ export * from './connectors/schemas'; export * from './contexts/schemas'; export * from './files/schemas'; export * from './model-providers/schemas'; -export * from './provider-builds/schemas'; export * from './providers/schemas'; export * from './user-feedback/schemas'; export * from './users/schemas'; diff --git a/apps/agentstack-sdk-ts/src/client/api/types.ts b/apps/agentstack-sdk-ts/src/client/api/types.ts index 03bded9995..953a9be728 100644 --- a/apps/agentstack-sdk-ts/src/client/api/types.ts +++ b/apps/agentstack-sdk-ts/src/client/api/types.ts @@ -9,7 +9,6 @@ export * from './connectors/types'; export * from './contexts/types'; export * from './files/types'; export * from './model-providers/types'; -export * from './provider-builds/types'; export * from './providers/types'; export * from './user-feedback/types'; export * from './users/types'; diff --git a/apps/agentstack-sdk-ts/src/experimental/server/core/config/schemas.ts b/apps/agentstack-sdk-ts/src/experimental/server/core/config/schemas.ts index d26461233d..825e20c212 100644 --- a/apps/agentstack-sdk-ts/src/experimental/server/core/config/schemas.ts +++ b/apps/agentstack-sdk-ts/src/experimental/server/core/config/schemas.ts @@ -6,7 +6,7 @@ import z from 'zod'; export const configSchema = z.object({ - platformUrl: z.string().default('http://127.0.0.1:8333'), + platformUrl: z.string().default('http://agentstack-api.localtest.me:8080'), productionMode: z .string() .optional() diff --git a/apps/agentstack-server/src/agentstack_server/api/auth/auth.py b/apps/agentstack-server/src/agentstack_server/api/auth/auth.py index 0a8cbfbe2e..79f91da8bb 100644 --- a/apps/agentstack-server/src/agentstack_server/api/auth/auth.py +++ b/apps/agentstack-server/src/agentstack_server/api/auth/auth.py @@ -57,8 +57,6 @@ ROLE_PERMISSIONS[UserRole.DEVELOPER] = ROLE_PERMISSIONS[UserRole.USER] | Permissions( providers={"read", "write"}, feedback={"read", "write"}, - provider_builds={"read", "write"}, - provider_variables={"read", "write"}, ) """ @@ -79,10 +77,6 @@ - any user list and show detail about any provider - developers can create/delete and manage only their own providers - admins can create/delete and manage any provider - - provider_builds - - any user list and show detail about any build - - developers can create/delete and manage only their own builds - - admins can create/delete and manage any build """ @@ -237,6 +231,7 @@ def extract_oauth_token( @alru_cache(ttl=timedelta(seconds=5).seconds) async def validate_jwt(token: str, *, provider: OidcProvider, aud: Iterable[str]) -> JWTClaims | Exception: keyset = await discover_jwks(provider) + claims = None try: claims = jwt.decode( token, @@ -251,6 +246,8 @@ async def validate_jwt(token: str, *, provider: OidcProvider, aud: Iterable[str] claims.validate() return claims except Exception as e: + token_aud = claims.get("aud") if claims is not None else "" + logger.warning(f"JWT validation failed: {e} | expected_aud={set(aud)} | token_aud={token_aud}") return e # Cache exception response diff --git a/apps/agentstack-server/src/agentstack_server/api/dependencies.py b/apps/agentstack-server/src/agentstack_server/api/dependencies.py index 12f36519f9..278b39a64a 100644 --- a/apps/agentstack-server/src/agentstack_server/api/dependencies.py +++ b/apps/agentstack-server/src/agentstack_server/api/dependencies.py @@ -35,8 +35,6 @@ from agentstack_server.service_layer.services.external_mcp_service import ExternalMcpService from agentstack_server.service_layer.services.files import FileService from agentstack_server.service_layer.services.model_providers import ModelProviderService -from agentstack_server.service_layer.services.provider_build import ProviderBuildService -from agentstack_server.service_layer.services.provider_discovery import ProviderDiscoveryService from agentstack_server.service_layer.services.providers import ProviderService from agentstack_server.service_layer.services.user_feedback import UserFeedbackService from agentstack_server.service_layer.services.users import UserService @@ -44,8 +42,6 @@ ConfigurationDependency = Annotated[Configuration, Depends(lambda: di[Configuration])] ProviderServiceDependency = Annotated[ProviderService, Depends(lambda: di[ProviderService])] -ProviderBuildServiceDependency = Annotated[ProviderBuildService, Depends(lambda: di[ProviderBuildService])] -ProviderDiscoveryServiceDependency = Annotated[ProviderDiscoveryService, Depends(lambda: di[ProviderDiscoveryService])] A2AProxyServiceDependency = Annotated[A2AProxyService, Depends(lambda: di[A2AProxyService])] ContextServiceDependency = Annotated[ContextService, Depends(lambda: di[ContextService])] ConfigurationServiceDependency = Annotated[ConfigurationService, Depends(lambda: di[ConfigurationService])] @@ -92,7 +88,7 @@ async def authenticate_oauth_user( try: claims, provider = await validate_oauth_access_token(token=token, aud=expected_aud, configuration=configuration) except Exception as e: - logger.warning(f"Token validation failed: {e}") + logger.warning(f"Token validation failed: {e} | expected_aud={expected_aud}") raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Token validation failed") from e realm_roles = get_claims_by_path(claims or {}, configuration.auth.oidc.roles_path) diff --git a/apps/agentstack-server/src/agentstack_server/api/routes/provider_builds.py b/apps/agentstack-server/src/agentstack_server/api/routes/provider_builds.py deleted file mode 100644 index 41a68e8d4f..0000000000 --- a/apps/agentstack-server/src/agentstack_server/api/routes/provider_builds.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from typing import Annotated -from uuid import UUID - -import fastapi -from fastapi import Depends, Query -from starlette.responses import StreamingResponse - -from agentstack_server.api.dependencies import ( - ProviderBuildServiceDependency, - RequiresPermissions, -) -from agentstack_server.api.schema.provider_build import CreateProviderBuildRequest, ProviderBuildListQuery -from agentstack_server.configuration import get_configuration -from agentstack_server.domain.models.common import PaginatedResult -from agentstack_server.domain.models.permissions import AuthorizedUser -from agentstack_server.domain.models.provider_build import ProviderBuild -from agentstack_server.utils.fastapi import streaming_response - -router = fastapi.APIRouter() - -if get_configuration().provider_build.enabled: - - @router.post("") - async def create_provider_build( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_builds={"write"}))], - request: CreateProviderBuildRequest, - provider_build_service: ProviderBuildServiceDependency, - ) -> ProviderBuild: - return await provider_build_service.create_build( - location=request.location, - user=user.user, - on_complete=request.on_complete, - build_configuration=request.build_configuration, - ) - - @router.post("/preview") - async def preview_provider_build( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_builds={"write"}))], - request: CreateProviderBuildRequest, - provider_build_service: ProviderBuildServiceDependency, - ) -> ProviderBuild: - return await provider_build_service.preview_build( - location=request.location, - user=user.user, - on_complete=request.on_complete, - build_configuration=request.build_configuration, - ) - - @router.get("/{id}") - async def get_provider_build( - _: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_builds={"read"}))], - id: UUID, - provider_build_service: ProviderBuildServiceDependency, - ) -> ProviderBuild: - return await provider_build_service.get_build(provider_build_id=id) - - @router.get("") - async def list_provider_builds( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_builds={"read"}))], - provider_build_service: ProviderBuildServiceDependency, - query: Annotated[ProviderBuildListQuery, Query()], - ) -> PaginatedResult[ProviderBuild]: - return await provider_build_service.list_builds( - pagination=query, - status=query.status, - user=user.user, - user_owned=query.user_owned, - ) - - @router.get("/{id}/logs") - async def stream_logs( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_builds={"write"}))], - id: UUID, - provider_build_service: ProviderBuildServiceDependency, - ) -> StreamingResponse: - # admin can see logs from all builds, other users only logs of their build - logs_iterator = await provider_build_service.stream_logs(provider_build_id=id, user=user.user) - return streaming_response(logs_iterator()) - - @router.delete("/{id}", status_code=fastapi.status.HTTP_204_NO_CONTENT) - async def delete( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_builds={"write"}))], - id: UUID, - provider_build_service: ProviderBuildServiceDependency, - ) -> None: - # admin can delete all builds, other users only their build - await provider_build_service.delete_build(provider_build_id=id, user=user.user) diff --git a/apps/agentstack-server/src/agentstack_server/api/routes/provider_discovery.py b/apps/agentstack-server/src/agentstack_server/api/routes/provider_discovery.py deleted file mode 100644 index 7e77f1a180..0000000000 --- a/apps/agentstack-server/src/agentstack_server/api/routes/provider_discovery.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from typing import Annotated -from uuid import UUID - -import fastapi -from fastapi import Depends - -from agentstack_server.api.dependencies import ProviderDiscoveryServiceDependency, RequiresPermissions -from agentstack_server.api.schema.provider_discovery import CreateDiscoveryRequest -from agentstack_server.domain.models.permissions import AuthorizedUser -from agentstack_server.domain.models.provider_discovery import ProviderDiscovery - -router = fastapi.APIRouter() - - -@router.post("") -async def create_provider_discovery( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"write"}))], - request: CreateDiscoveryRequest, - service: ProviderDiscoveryServiceDependency, -) -> ProviderDiscovery: - return await service.create_discovery(docker_image=request.docker_image, user=user.user) - - -@router.get("/{id}") -async def get_provider_discovery( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"read"}))], - id: UUID, - service: ProviderDiscoveryServiceDependency, -) -> ProviderDiscovery: - return await service.get_discovery(discovery_id=id, user=user.user) diff --git a/apps/agentstack-server/src/agentstack_server/api/routes/providers.py b/apps/agentstack-server/src/agentstack_server/api/routes/providers.py index c6756b0b57..dabdfcde6f 100644 --- a/apps/agentstack-server/src/agentstack_server/api/routes/providers.py +++ b/apps/agentstack-server/src/agentstack_server/api/routes/providers.py @@ -10,7 +10,6 @@ from fastapi.params import Depends, Query from fastapi.requests import Request from pydantic import TypeAdapter -from starlette.responses import StreamingResponse from agentstack_server.api.dependencies import ( ConfigurationDependency, @@ -19,12 +18,10 @@ ) from agentstack_server.api.routes.a2a import create_proxy_agent_card from agentstack_server.api.schema.common import EntityModel -from agentstack_server.api.schema.env import ListVariablesSchema, UpdateVariablesRequest -from agentstack_server.api.schema.provider import CreateProviderRequest, PatchProviderRequest +from agentstack_server.api.schema.provider import CreateProviderRequest from agentstack_server.domain.models.common import PaginatedResult from agentstack_server.domain.models.permissions import AuthorizedUser -from agentstack_server.domain.models.provider import ProviderLocation, ProviderWithState -from agentstack_server.utils.fastapi import streaming_response +from agentstack_server.domain.models.provider import Provider, ProviderLocation router = fastapi.APIRouter() @@ -35,32 +32,12 @@ async def create_provider( request: CreateProviderRequest, provider_service: ProviderServiceDependency, configuration: ConfigurationDependency, -) -> ProviderWithState: +) -> Provider: return await provider_service.create_provider( user=user.user, - auto_stop_timeout=request.auto_stop_timeout, location=request.location, origin=request.origin, agent_card=request.agent_card, - variables=request.variables, - ) - - -@router.patch("/{id}") -async def patch_provider( - id: UUID, - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"write"}))], - request: PatchProviderRequest, - provider_service: ProviderServiceDependency, -) -> ProviderWithState: - return await provider_service.patch_provider( - provider_id=id, - user=user.user, - auto_stop_timeout=request.auto_stop_timeout, - location=request.location, - origin=request.origin, - agent_card=request.agent_card, - variables=request.variables, ) @@ -69,7 +46,7 @@ async def preview_provider( request: CreateProviderRequest, provider_service: ProviderServiceDependency, _: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"write"}))], -) -> ProviderWithState: +) -> Provider: return await provider_service.preview_provider(location=request.location, agent_card=request.agent_card) @@ -81,7 +58,7 @@ async def list_providers( user: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"read"}), use_cache=False)], user_owned: Annotated[bool | None, Query()] = None, origin: Annotated[str | None, Query()] = None, -) -> PaginatedResult[EntityModel[ProviderWithState]]: +) -> PaginatedResult[EntityModel[Provider]]: providers = [] for provider in await provider_service.list_providers(user=user.user, user_owned=user_owned, origin=origin): new_provider = provider.model_copy( @@ -106,7 +83,7 @@ async def get_provider( configuration: ConfigurationDependency, request: Request, _: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"read"}))], -) -> EntityModel[ProviderWithState]: +) -> EntityModel[Provider]: provider = await provider_service.get_provider(provider_id=id) return EntityModel( # pyrefly: ignore [bad-return] -- TODO: fix the EntityModel hack so that both Pyrefly and FastAPI understand it provider.model_copy( @@ -126,7 +103,7 @@ async def get_provider_by_location( configuration: ConfigurationDependency, request: Request, _: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"read"}))], -) -> EntityModel[ProviderWithState]: +) -> EntityModel[Provider]: try: parsed_location: ProviderLocation = TypeAdapter(ProviderLocation).validate_python(location) except ValueError as e: @@ -153,33 +130,3 @@ async def delete_provider( await provider_service.delete_provider(provider_id=id, user=user.user) -@router.get("/{id}/logs") -async def stream_logs( - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(providers={"write"}))], - id: UUID, - provider_service: ProviderServiceDependency, -) -> StreamingResponse: - # admin can see logs from all providers, other users only logs of their provider - logs_iterator = await provider_service.stream_logs(provider_id=id, user=user.user) - return streaming_response(logs_iterator()) - - -@router.put("/{id}/variables", status_code=fastapi.status.HTTP_201_CREATED) -async def update_provider_variables( - id: UUID, - request: UpdateVariablesRequest, - provider_service: ProviderServiceDependency, - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_variables={"write"}))], -) -> None: - # admin can update all variables, other users only variables of their provider - await provider_service.update_provider_env(provider_id=id, env=request.variables, user=user.user) - - -@router.get("/{id}/variables") -async def list_provider_variables( - id: UUID, - provider_service: ProviderServiceDependency, - user: Annotated[AuthorizedUser, Depends(RequiresPermissions(provider_variables={"read"}))], -) -> ListVariablesSchema: - # admin can see all variables, other users only variables of their provider - return ListVariablesSchema(variables=await provider_service.list_provider_env(provider_id=id, user=user.user)) diff --git a/apps/agentstack-server/src/agentstack_server/api/schema/contexts.py b/apps/agentstack-server/src/agentstack_server/api/schema/contexts.py index 84e47ad9ec..e80a96ef40 100644 --- a/apps/agentstack-server/src/agentstack_server/api/schema/contexts.py +++ b/apps/agentstack-server/src/agentstack_server/api/schema/contexts.py @@ -55,10 +55,7 @@ class GlobalPermissionGrant(BaseModel): a2a_proxy: list[Literal["*"]] | list[UUID] = Field(default_factory=list) # agent providers - providers: list[Literal["read", "write", "*"]] = Field( - default_factory=list - ) # write includes "show logs" permission - provider_variables: list[Literal["read", "write", "*"]] = Field(default_factory=list) + providers: list[Literal["read", "write", "*"]] = Field(default_factory=list) contexts: list[Literal["read", "write", "*"]] = Field(default_factory=list) context_data: list[Literal["read", "write", "*"]] = Field(default_factory=list) diff --git a/apps/agentstack-server/src/agentstack_server/api/schema/provider.py b/apps/agentstack-server/src/agentstack_server/api/schema/provider.py index ca3f10078f..64dc9d2d12 100644 --- a/apps/agentstack-server/src/agentstack_server/api/schema/provider.py +++ b/apps/agentstack-server/src/agentstack_server/api/schema/provider.py @@ -4,63 +4,19 @@ from __future__ import annotations -from datetime import timedelta - from a2a.types import AgentCard from pydantic import BaseModel, Field -from agentstack_server.domain.constants import DEFAULT_AUTO_STOP_TIMEOUT from agentstack_server.domain.models.provider import ProviderLocation class CreateProviderRequest(BaseModel): location: ProviderLocation agent_card: AgentCard | None = None - variables: dict[str, str] | None = None origin: str | None = Field( default=None, description=( - "A unique origin of the provider: most often a docker or github repository url (without tag). " + "A unique origin of the provider: most often a url. " "This is used to determine multiple versions of the same agent." ), ) - auto_stop_timeout_sec: int | None = Field( - default=None, - gt=0, - le=1800, - description=( - "Timeout after which the agent provider will be automatically downscaled if unused." - "Contact administrator if you need to increase this value." - ), - ) - - @property - def auto_stop_timeout(self) -> timedelta: - return timedelta(seconds=self.auto_stop_timeout_sec or int(DEFAULT_AUTO_STOP_TIMEOUT.total_seconds())) - - -class PatchProviderRequest(BaseModel): - location: ProviderLocation | None = None - agent_card: AgentCard | None = None - variables: dict[str, str] | None = None - origin: str | None = Field( - default=None, - description=( - "A unique origin of the provider: most often a docker or github repository url (without tag). " - "This is used to determine multiple versions of the same agent. " - "None means that origin will be recomputed from location. To preserve original value, set it explicitly." - ), - ) - auto_stop_timeout_sec: int | None = Field( - default=None, - gt=0, - le=1800, - description=( - "Timeout after which the agent provider will be automatically downscaled if unused." - "Contact administrator if you need to increase this value." - ), - ) - - @property - def auto_stop_timeout(self) -> timedelta | None: - return timedelta(seconds=self.auto_stop_timeout_sec) if self.auto_stop_timeout_sec else None diff --git a/apps/agentstack-server/src/agentstack_server/api/schema/provider_build.py b/apps/agentstack-server/src/agentstack_server/api/schema/provider_build.py deleted file mode 100644 index dd1ebefebe..0000000000 --- a/apps/agentstack-server/src/agentstack_server/api/schema/provider_build.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from pydantic import BaseModel, Field - -from agentstack_server.api.schema.common import PaginationQuery -from agentstack_server.domain.models.provider_build import BuildConfiguration, BuildState, NoAction, OnCompleteAction -from agentstack_server.utils.github import GithubUrl - - -class CreateProviderBuildRequest(BaseModel): - location: GithubUrl - on_complete: OnCompleteAction = Field(default_factory=NoAction) - build_configuration: BuildConfiguration | None = None - - -class ProviderBuildListQuery(PaginationQuery): - status: BuildState | None = None - user_owned: bool | None = None diff --git a/apps/agentstack-server/src/agentstack_server/application.py b/apps/agentstack-server/src/agentstack_server/application.py index 294664eef0..3c6a7268d2 100644 --- a/apps/agentstack-server/src/agentstack_server/application.py +++ b/apps/agentstack-server/src/agentstack_server/application.py @@ -32,8 +32,6 @@ from agentstack_server.api.routes.files import router as files_router from agentstack_server.api.routes.model_providers import router as model_providers_router from agentstack_server.api.routes.openai import router as openai_router -from agentstack_server.api.routes.provider_builds import router as provider_builds_router -from agentstack_server.api.routes.provider_discovery import router as provider_discovery_router from agentstack_server.api.routes.providers import router as provider_router from agentstack_server.api.routes.user import router as user_router from agentstack_server.api.routes.user_feedback import router as user_feedback_router @@ -48,7 +46,7 @@ RateLimitExceededError, ) from agentstack_server.jobs.crons.model_provider import check_model_provider_registry, update_model_state_and_cache -from agentstack_server.jobs.crons.provider import check_registry +from agentstack_server.jobs.crons.provider import sync_kagenti_agents from agentstack_server.run_workers import run_workers from agentstack_server.service_layer.services.user_feedback import UserFeedbackService from agentstack_server.telemetry import INSTRUMENTATION_NAME, shutdown_telemetry @@ -126,8 +124,6 @@ def mount_routes(app: FastAPI): server_router.include_router(users_router, prefix="/users", tags=["users"]) server_router.include_router(a2a_router, prefix="/a2a", tags=["a2a"]) server_router.include_router(provider_router, prefix="/providers", tags=["providers"]) - server_router.include_router(provider_discovery_router, prefix="/providers/discovery", tags=["provider_discovery"]) - server_router.include_router(provider_builds_router, prefix="/provider_builds", tags=["provider_builds"]) server_router.include_router(model_providers_router, prefix="/model_providers", tags=["model_providers"]) server_router.include_router(configuration_router, prefix="/configurations", tags=["configurations"]) server_router.include_router(files_router, prefix="/files", tags=["files"]) @@ -217,7 +213,7 @@ async def lifespan(_: FastAPI): ): # Force initial synchronization job with suppress(AlreadyEnqueued): - await check_registry.defer_async(timestamp=int(time.time())) + await sync_kagenti_agents.defer_async(timestamp=int(time.time())) with suppress(AlreadyEnqueued): await check_model_provider_registry.defer_async(timestamp=int(time.time())) with suppress(AlreadyEnqueued): diff --git a/apps/agentstack-server/src/agentstack_server/bootstrap.py b/apps/agentstack-server/src/agentstack_server/bootstrap.py index 02808a3e75..d6ca91acea 100644 --- a/apps/agentstack-server/src/agentstack_server/bootstrap.py +++ b/apps/agentstack-server/src/agentstack_server/bootstrap.py @@ -21,16 +21,12 @@ from agentstack_server.domain.repositories.openai_proxy import IOpenAIProxy from agentstack_server.infrastructure.cache.memory_cache import MemoryCacheFactory from agentstack_server.infrastructure.cache.redis_cache import RedisCacheFactory -from agentstack_server.infrastructure.kubernetes.provider_build_manager import KubernetesProviderBuildManager -from agentstack_server.infrastructure.kubernetes.provider_deployment_manager import KubernetesProviderDeploymentManager from agentstack_server.infrastructure.object_storage.repository import S3ObjectStorageRepository from agentstack_server.infrastructure.openai_proxy.openai_proxy import CustomOpenAIProxy from agentstack_server.infrastructure.persistence.unit_of_work import SqlAlchemyUnitOfWorkFactory from agentstack_server.infrastructure.text_extraction.docling import DoclingTextExtractionBackend from agentstack_server.jobs.procrastinate import create_app -from agentstack_server.service_layer.build_manager import IProviderBuildManager from agentstack_server.service_layer.cache import ICacheFactory -from agentstack_server.service_layer.deployment_manager import IProviderDeploymentManager from agentstack_server.service_layer.services.managed_mcp_service import ManagedMcpService from agentstack_server.service_layer.unit_of_work import IUnitOfWorkFactory from agentstack_server.utils.utils import async_to_sync_isolated @@ -94,13 +90,6 @@ def _set_di[T](service: type[T], instance: T | None = None, create_instance: Cal di[Configuration].k8s_namespace, di[Configuration].k8s_kubeconfig, ) - _set_di( - IProviderDeploymentManager, - KubernetesProviderDeploymentManager( - api_factory=kr8s_api_factory, - manifest_template_dir=di[Configuration].provider.manifest_template_dir, - ), - ) _set_di( ManagedMcpService, ManagedMcpService( @@ -108,17 +97,6 @@ def _set_di[T](service: type[T], instance: T | None = None, create_instance: Cal api_factory=kr8s_api_factory, ), ) - _set_di( - IProviderBuildManager, - KubernetesProviderBuildManager( - configuration=di[Configuration], - api_factory=await setup_kubernetes_client( - di[Configuration].provider_build.k8s_namespace, - di[Configuration].provider_build.k8s_kubeconfig, - ), - manifest_template_dir=di[Configuration].provider.manifest_template_dir, - ), - ) _set_di( IUnitOfWorkFactory, SqlAlchemyUnitOfWorkFactory(setup_database_engine(di[Configuration]), di[Configuration]), diff --git a/apps/agentstack-server/src/agentstack_server/configuration.py b/apps/agentstack-server/src/agentstack_server/configuration.py index 832bfcc035..993b62ae2b 100644 --- a/apps/agentstack-server/src/agentstack_server/configuration.py +++ b/apps/agentstack-server/src/agentstack_server/configuration.py @@ -21,7 +21,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine -from agentstack_server.domain.models.registry import ModelProviderRegistryLocation, RegistryLocation +from agentstack_server.domain.models.registry import ModelProviderRegistryLocation logger = logging.getLogger(__name__) @@ -84,9 +84,19 @@ class ModelProviderConfiguration(BaseModel): default_embedding_model: str | None = None -class AgentRegistryConfiguration(BaseModel): - locations: dict[str, RegistryLocation] = Field(default_factory=dict) - sync_period_cron: str = Field(default="*/5 * * * *") # every 10 minutes +class KagentiConfiguration(BaseModel): + enabled: bool = True + api_url: str = "http://kagenti-api.localtest.me:8080" + sync_period_cron: str = "* * * * * */5" # every 5 seconds + # Kubernetes namespaces to scan for kagenti agents. + # The kagenti API requires an explicit namespace per request (no wildcard). + namespaces: list[str] = ["team1"] + # OAuth2 client credentials for authenticating to kagenti API. + # Defaults to reusing the agentstack-server OIDC client (same realm). + # The agentstack-server service account needs kagenti-viewer role assigned in Keycloak. + auth_token_url: str = "http://keycloak-service.keycloak:8080/realms/agentstack/protocol/openid-connect/token" + client_id: str = "agentstack-server" + client_secret: Secret[str] = Secret("agentstack-server-secret") class OidcProvider(BaseModel): @@ -108,8 +118,8 @@ class OidcConfiguration(BaseModel): # Flattened configuration allows setting a single provider via environment variables # e.g., AGENTSTACK__AUTH__OIDC__NAME="Keycloak" name: str = "Keycloak" - issuer: AnyUrl = HttpUrl("http://keycloak:8336/realms/agentstack") - external_issuer: AnyUrl = HttpUrl("http://localhost:8336/realms/agentstack") + issuer: AnyUrl = HttpUrl("http://keycloak-service.keycloak:8080/realms/agentstack") + external_issuer: AnyUrl = HttpUrl("http://keycloak.localtest.me:8080/realms/agentstack") client_id: str = "agentstack-server" client_secret: Secret[str] = Secret("agentstack-server-secret") insecure_transport: bool = False @@ -240,7 +250,7 @@ class VectorStoresConfiguration(BaseModel): class TelemetryConfiguration(BaseModel): - collector_url: AnyUrl = AnyUrl("http://otel-collector-svc:4318") + collector_url: AnyUrl = AnyUrl("http://otel-collector.kagenti-system:4318") phoenix_url: AnyUrl | None = None phoenix_api_key: Secret[str] | None = None @@ -274,8 +284,6 @@ class DockerConfigJson(BaseModel): class ManagedProviderConfiguration(BaseModel): - disable_downscaling: bool = False - manifest_template_dir: Path | None = None self_registration_use_local_network: bool = Field( default=False, description="Which network to use for self-registered providers - should be False when running in cluster", @@ -328,16 +336,6 @@ class A2AProxyConfiguration(BaseModel): requests_expire_after_days: int = 14 -class ProviderBuildConfiguration(BaseModel): - enabled: bool = True - oci_build_registry_prefix: str | None = None - image_format: str = "{registry_prefix}/{org}/{repo}/{path}{dockerfile_path}:{commit_hash}" - job_timeout_sec: int = int(timedelta(minutes=20).total_seconds()) - manifest_template_dir: Path | None = None - k8s_namespace: str | None = None - k8s_kubeconfig: Path | None = None - - class GenerateConversationTitleConfiguration(BaseModel): enabled: bool = True model: str | Literal["default"] = "default" @@ -470,8 +468,7 @@ class Configuration(BaseSettings): generate_conversation_title: GenerateConversationTitleConfiguration = Field( default_factory=GenerateConversationTitleConfiguration ) - provider_build: ProviderBuildConfiguration = Field(default_factory=ProviderBuildConfiguration) - agent_registry: AgentRegistryConfiguration = Field(default_factory=AgentRegistryConfiguration) + kagenti: KagentiConfiguration = Field(default_factory=KagentiConfiguration) model_provider_registry: ModelProviderRegistryConfiguration = Field( default_factory=ModelProviderRegistryConfiguration ) @@ -527,8 +524,6 @@ def _oci_registry_defaultdict(self): self.oci_registry[alias].insecure = conf.insecure except ValueError as e: logger.error(f"Failed to parse .dockerconfigjson: {e}. Some agent images might not work correctly.") - if not self.provider_build.oci_build_registry_prefix and len(self.oci_registry): - self.provider_build.oci_build_registry_prefix = next(iter(self.oci_registry.keys())) return self @model_validator(mode="after") @@ -540,14 +535,6 @@ def _github_registry_config(self): logger.error(f"Failed to parse .githubconfigjson: {e}. GitHub access might not work correctly.") return self - @model_validator(mode="after") - def _set_default_provider_build_values(self): - self.provider_build.k8s_namespace = self.provider_build.k8s_namespace or self.k8s_namespace - self.provider_build.k8s_kubeconfig = self.provider_build.k8s_kubeconfig or self.k8s_kubeconfig - self.provider_build.manifest_template_dir = ( - self.provider_build.manifest_template_dir or self.provider.manifest_template_dir - ) - return self @cache diff --git a/apps/agentstack-server/src/agentstack_server/domain/constants.py b/apps/agentstack-server/src/agentstack_server/domain/constants.py index 5d2e62d571..53564b7f0e 100644 --- a/apps/agentstack-server/src/agentstack_server/domain/constants.py +++ b/apps/agentstack-server/src/agentstack_server/domain/constants.py @@ -3,12 +3,9 @@ from __future__ import annotations -from datetime import timedelta from enum import Enum from typing import Final, Literal, TypeAlias -DOCKER_MANIFEST_LABEL_NAME: Final[str] = "beeai.dev.agent.json" - class _Undefined(Enum): undefined = "undefined" @@ -17,8 +14,6 @@ class _Undefined(Enum): undefined = _Undefined.undefined Undefined: TypeAlias = Literal[_Undefined.undefined] # noqa: UP040 -DEFAULT_AUTO_STOP_TIMEOUT: Final[timedelta] = timedelta(minutes=20) - # A2A platform constants AGENT_DETAIL_EXTENSION_URI: Final[str] = "https://a2a-extensions.agentstack.beeai.dev/ui/agent-detail/v1" SELF_REGISTRATION_EXTENSION_URI: Final[str] = ( diff --git a/apps/agentstack-server/src/agentstack_server/domain/models/permissions.py b/apps/agentstack-server/src/agentstack_server/domain/models/permissions.py index d792c2670a..5f0200183e 100644 --- a/apps/agentstack-server/src/agentstack_server/domain/models/permissions.py +++ b/apps/agentstack-server/src/agentstack_server/domain/models/permissions.py @@ -35,9 +35,7 @@ class Permissions(BaseModel): a2a_proxy: SerializeAsAny[set[Literal["*"] | UUID]] = set() # agent providers - providers: SerializeAsAny[set[Literal["read", "write", "*"]]] = set() # write includes "show logs" permission - provider_variables: SerializeAsAny[set[Literal["read", "write", "*"]]] = set() - provider_builds: SerializeAsAny[set[Literal["read", "write", "*"]]] = set() # write includes "show logs" permission + providers: SerializeAsAny[set[Literal["read", "write", "*"]]] = set() contexts: SerializeAsAny[set[Literal["read", "write", "*"]]] = set() context_data: SerializeAsAny[set[Literal["read", "write", "*"]]] = set() # covers history (TODO: variables) diff --git a/apps/agentstack-server/src/agentstack_server/domain/models/provider.py b/apps/agentstack-server/src/agentstack_server/domain/models/provider.py index 4cb9ae7972..55cd37f1e8 100644 --- a/apps/agentstack-server/src/agentstack_server/domain/models/provider.py +++ b/apps/agentstack-server/src/agentstack_server/domain/models/provider.py @@ -3,11 +3,8 @@ from __future__ import annotations -import base64 import hashlib -import json import logging -from datetime import timedelta from enum import StrEnum from typing import Any from urllib.parse import quote, urljoin @@ -24,70 +21,28 @@ HttpUrl, ModelWrapValidatorHandler, RootModel, - computed_field, model_validator, ) from agentstack_server.configuration import Configuration from agentstack_server.domain.constants import ( - AGENT_DETAIL_EXTENSION_URI, - DEFAULT_AUTO_STOP_TIMEOUT, - DOCKER_MANIFEST_LABEL_NAME, SELF_REGISTRATION_EXTENSION_URI, ) -from agentstack_server.domain.models.registry import RegistryLocation from agentstack_server.domain.utils import bridge_k8s_to_localhost, bridge_localhost_to_k8s -from agentstack_server.exceptions import MissingAgentCardLabelError, MissingConfigurationError, VersionResolveError from agentstack_server.utils.a2a import get_extension -from agentstack_server.utils.docker import DockerImageID, ResolvedDockerImageID -from agentstack_server.utils.github import ResolvedGithubUrl from agentstack_server.utils.utils import utc_now logger = logging.getLogger(__name__) -class VersionInfo(BaseModel): - docker: ResolvedDockerImageID | None = None - github: ResolvedGithubUrl | None = None - - -class DockerImageProviderLocation(RootModel): - root: DockerImageID - - _resolved_version: ResolvedDockerImageID | None = None - - @property - def provider_id(self) -> UUID: - location_digest = hashlib.sha256(str(self.root).encode()).digest() - return UUID(bytes=location_digest[:16]) - - @property - def is_on_host(self) -> bool: - return False - - @property - def origin(self) -> str: - return self.root.base - - async def get_resolved_version(self) -> ResolvedDockerImageID: - if not self._resolved_version: - try: - self._resolved_version = await self.root.resolve_version() - except Exception as ex: - raise VersionResolveError(str(self.root), str(ex)) from ex - return self._resolved_version - - async def get_version_info(self) -> VersionInfo: - return VersionInfo(docker=await self.get_resolved_version()) +class ProviderState(StrEnum): + ONLINE = "online" + OFFLINE = "offline" - async def load_agent_card(self) -> AgentCard: - from a2a.types import AgentCard - resolved_version = await self.get_resolved_version() - labels = await resolved_version.get_labels() - if DOCKER_MANIFEST_LABEL_NAME not in labels: - raise MissingAgentCardLabelError(str(self.root)) - return AgentCard.model_validate(json.loads(base64.b64decode(labels[DOCKER_MANIFEST_LABEL_NAME]))) +class SourceType(StrEnum): + KAGENTI = "kagenti" + API = "api" class NetworkProviderLocation(RootModel): @@ -108,9 +63,6 @@ def a2a_url(self): path=self.root.path.lstrip("/") if self.root.path else None, ) - async def get_version_info(self) -> VersionInfo: - return VersionInfo() - @model_validator(mode="wrap") @classmethod def _replace_localhost_url(cls, data: Any, handler: ModelWrapValidatorHandler): @@ -155,88 +107,21 @@ async def load_agent_card(self) -> AgentCard: raise ValueError(f"Unable to load agents from location: {self.root}: {ex}") from ex -class EnvVar(BaseModel, extra="allow"): - name: str - description: str | None = None - required: bool = False - - -class UnmanagedState(StrEnum): - ONLINE = "online" - OFFLINE = "offline" - - -class ProviderType(StrEnum): - MANAGED = "managed" - UNMANAGED = "unmanaged" - - -ProviderLocation = DockerImageProviderLocation | NetworkProviderLocation +ProviderLocation = NetworkProviderLocation class Provider(BaseModel): source: ProviderLocation id: UUID = Field(default_factory=lambda data: data["source"].provider_id) - auto_stop_timeout: timedelta = Field(default=DEFAULT_AUTO_STOP_TIMEOUT) - origin: str # docker or github respository - version_info: VersionInfo = Field(default_factory=VersionInfo) - registry: RegistryLocation | None = None + source_type: SourceType = SourceType.API + origin: str created_at: AwareDatetime = Field(default_factory=utc_now) updated_at: AwareDatetime = Field(default_factory=utc_now) created_by: UUID last_active_at: AwareDatetime = Field(default_factory=utc_now) agent_card: AgentCard - unmanaged_state: UnmanagedState | None = Field(default=None, exclude=True) - - @computed_field - @property - def type(self) -> ProviderType: - return ProviderType.MANAGED if isinstance(self.source, DockerImageProviderLocation) else ProviderType.UNMANAGED - - @model_validator(mode="after") - def unmanaged_fields_discrimination(self): - if self.unmanaged_state and self.type == ProviderType.MANAGED: - raise ValueError("unmanaged_state can only be set for unmanaged providers") - return self - - @computed_field - @property - def managed(self) -> bool: - return self.type == ProviderType.MANAGED - - @computed_field - @property - def env(self) -> list[EnvVar]: - if agent_detail := get_extension(self.agent_card, AGENT_DETAIL_EXTENSION_URI): - variables = agent_detail.model_dump()["params"].get("variables") or [] - return [EnvVar.model_validate(v) for v in variables] - return [] - - def check_env(self, env: dict[str, str] | None = None, raise_error: bool = True) -> list[EnvVar]: - env = env or {} - provider_env = self.env - required_env = {var.name for var in provider_env if var.required} - all_env = {var.name for var in provider_env} - missing_env = [var for var in provider_env if var.name in all_env - env.keys()] - missing_required_env = [var for var in provider_env if var.name in required_env - env.keys()] - if missing_required_env and raise_error: - raise MissingConfigurationError(missing_env=missing_env) - return missing_env - - -class ProviderDeploymentState(StrEnum): - MISSING = "missing" - STARTING = "starting" - READY = "ready" - RUNNING = "running" - ERROR = "error" + state: ProviderState = ProviderState.ONLINE class ProviderErrorMessage(BaseModel): message: str - - -class ProviderWithState(Provider, extra="allow"): - state: ProviderDeploymentState | UnmanagedState - last_error: ProviderErrorMessage | None = None - missing_configuration: list[EnvVar] = Field(default_factory=list) diff --git a/apps/agentstack-server/src/agentstack_server/domain/models/provider_build.py b/apps/agentstack-server/src/agentstack_server/domain/models/provider_build.py deleted file mode 100644 index ce9566c68f..0000000000 --- a/apps/agentstack-server/src/agentstack_server/domain/models/provider_build.py +++ /dev/null @@ -1,97 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 -from __future__ import annotations - -import logging -from datetime import timedelta -from enum import StrEnum -from pathlib import Path -from typing import Literal -from uuid import UUID, uuid4 - -from pydantic import ( - AwareDatetime, - BaseModel, - Field, - computed_field, -) - -from agentstack_server.domain.constants import DEFAULT_AUTO_STOP_TIMEOUT -from agentstack_server.utils.docker import DockerImageID -from agentstack_server.utils.github import ResolvedGithubUrl -from agentstack_server.utils.utils import utc_now - -logger = logging.getLogger(__name__) - - -class BuildState(StrEnum): - MISSING = "missing" - IN_PROGRESS = "in_progress" - BUILD_COMPLETED = "build_completed" - COMPLETED = "completed" - FAILED = "failed" - - -class AddProvider(BaseModel): - """ - Will add a new provider or update an existing one with the same base docker image ID - (docker registry + repository, excluding tag) - """ - - type: Literal["add_provider"] = "add_provider" - auto_stop_timeout_sec: int | None = Field( - default=None, - gt=0, - le=600, - description=( - "Timeout after which the agent provider will be automatically downscaled if unused." - "Contact administrator if you need to increase this value." - ), - ) - variables: dict[str, str] | None = None - - @property - def auto_stop_timeout(self) -> timedelta: - return timedelta(seconds=self.auto_stop_timeout_sec or int(DEFAULT_AUTO_STOP_TIMEOUT.total_seconds())) - - -class UpdateProvider(BaseModel): - """Will update provider specified by ID""" - - type: Literal["update_provider"] = "update_provider" - provider_id: UUID - - -class NoAction(BaseModel): - type: Literal["no_action"] = "no_action" - - -type OnCompleteAction = AddProvider | UpdateProvider | NoAction - - -class BuildConfiguration(BaseModel): - dockerfile_path: Path | None = Field( - default=None, - description=( - "Path to Dockerfile relative to the repository path " - "(provider_build.source.path or repository root if not defined)" - ), - ) - - -class ProviderBuild(BaseModel): - id: UUID = Field(default_factory=uuid4) - created_at: AwareDatetime = Field(default_factory=utc_now) - status: BuildState - source: ResolvedGithubUrl - destination: DockerImageID - build_configuration: BuildConfiguration | None = None - provider_id: UUID | None = Field(default=None, description="ID of the provider added or modified by this build") - created_by: UUID - on_complete: OnCompleteAction = NoAction() - error_message: str | None = None - - @computed_field - @property - def provider_origin(self) -> str: - return self.source.base diff --git a/apps/agentstack-server/src/agentstack_server/domain/models/provider_discovery.py b/apps/agentstack-server/src/agentstack_server/domain/models/provider_discovery.py deleted file mode 100644 index f2af76f3bc..0000000000 --- a/apps/agentstack-server/src/agentstack_server/domain/models/provider_discovery.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from enum import StrEnum -from uuid import UUID, uuid4 - -from a2a.types import AgentCard -from pydantic import AwareDatetime, BaseModel, Field - -from agentstack_server.utils.utils import utc_now - - -class DiscoveryState(StrEnum): - PENDING = "pending" - IN_PROGRESS = "in_progress" - COMPLETED = "completed" - FAILED = "failed" - - -class ProviderDiscovery(BaseModel): - id: UUID = Field(default_factory=uuid4) - created_at: AwareDatetime = Field(default_factory=utc_now) - status: DiscoveryState - docker_image: str - created_by: UUID - agent_card: AgentCard | None = None - error_message: str | None = None diff --git a/apps/agentstack-server/src/agentstack_server/domain/models/registry.py b/apps/agentstack-server/src/agentstack_server/domain/models/registry.py index 7e0a1c8d25..f18e254b8b 100644 --- a/apps/agentstack-server/src/agentstack_server/domain/models/registry.py +++ b/apps/agentstack-server/src/agentstack_server/domain/models/registry.py @@ -2,120 +2,24 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -from collections import Counter -from datetime import timedelta from typing import TYPE_CHECKING, Any -import httpx import yaml from anyio import Path -from pydantic import BaseModel, Field, FileUrl, HttpUrl, RootModel, field_validator, model_validator - -from agentstack_server.domain.constants import DEFAULT_AUTO_STOP_TIMEOUT -from agentstack_server.exceptions import VersionResolveError -from agentstack_server.utils.github import GithubUrl +from pydantic import BaseModel, FileUrl, HttpUrl, RootModel __all__ = [ "FileSystemModelProviderRegistryLocation", - "FileSystemRegistryLocation", - "GithubRegistryLocation", "ModelProviderRegistryLocation", "ModelProviderRegistryManifest", "ModelProviderRegistryRecord", - "NetworkRegistryLocation", - "ProviderRegistryRecord", - "RegistryLocation", - "RegistryManifest", "parse_model_providers_manifest", - "parse_providers_manifest", ] if TYPE_CHECKING: # Workaround to prevent cyclic imports # Models from this file are used in config which is used everywhere throughout the codebase from agentstack_server.domain.models.model_provider import ModelProviderType - from agentstack_server.domain.models.provider import ProviderLocation - - -class ProviderRegistryRecord(BaseModel, extra="allow"): - location: ProviderLocation - origin: str = Field(default_factory=lambda data: data["location"].origin) - auto_stop_timeout_sec: int = Field( - default=int(DEFAULT_AUTO_STOP_TIMEOUT.total_seconds()), - ge=0, - description="Downscale after this many seconds of inactivity. Set to 0 to disable downscaling.", - ) - variables: dict[str, str] = {} - - @property - def auto_stop_timeout(self) -> timedelta: - return timedelta(seconds=self.auto_stop_timeout_sec) - - @field_validator("variables", mode="before") - @classmethod - def convert_variables_to_str(cls, v: Any | None): - if v is None: - return {} - if not isinstance(v, dict): - raise ValueError("env must be a dictionary") - return {str(k): str(v) for k, v in v.items()} - - -class RegistryManifest(BaseModel): - providers: list[ProviderRegistryRecord] - - @model_validator(mode="after") - def unique_origin(self): - origin_counts = Counter(p.origin for p in self.providers if p.origin is not None) - assert all(count == 1 for count in origin_counts.values()), ( - f"Registry origins must be unique: {origin_counts.most_common()}" - ) - return self - - -def parse_providers_manifest(content: dict[str, Any]) -> list[ProviderRegistryRecord]: - from agentstack_server.domain.models.provider import ProviderLocation - - _ = ProviderLocation # make sure this is imported - - return RegistryManifest.model_validate(content).providers - - -class NetworkRegistryLocation(RootModel[HttpUrl]): - root: HttpUrl - - async def load(self) -> list[ProviderRegistryRecord]: - async with httpx.AsyncClient( - headers={"Cache-Control": "no-cache, no-store, must-revalidate", "Pragma": "no-cache", "Expires": "0"} - ) as client: - resp = await client.get(str(self.root)) - return parse_providers_manifest(yaml.safe_load(resp.content)) - - -class GithubRegistryLocation(RootModel[GithubUrl]): - root: GithubUrl - - async def load(self) -> list[ProviderRegistryRecord]: - try: - resolved_url = await self.root.resolve_version() - except Exception as ex: - raise VersionResolveError(str(self.root), str(ex)) from ex - url = await resolved_url.get_raw_url() - network_location = NetworkRegistryLocation(root=HttpUrl(url)) - return await network_location.load() - - -class FileSystemRegistryLocation(RootModel[FileUrl]): - root: FileUrl - - async def load(self) -> list[ProviderRegistryRecord]: - if self.root.path is None: - return [] - content = await Path(self.root.path).read_text() - return parse_providers_manifest(yaml.safe_load(content)) - - -RegistryLocation = GithubRegistryLocation | NetworkRegistryLocation | FileSystemRegistryLocation class ModelProviderRegistryRecord(BaseModel, extra="allow"): diff --git a/apps/agentstack-server/src/agentstack_server/domain/repositories/provider.py b/apps/agentstack-server/src/agentstack_server/domain/repositories/provider.py index e795614481..0ac1459715 100644 --- a/apps/agentstack-server/src/agentstack_server/domain/repositories/provider.py +++ b/apps/agentstack-server/src/agentstack_server/domain/repositories/provider.py @@ -7,7 +7,7 @@ from typing import Protocol, runtime_checkable from uuid import UUID -from agentstack_server.domain.models.provider import Provider, ProviderType, UnmanagedState +from agentstack_server.domain.models.provider import Provider, ProviderState @runtime_checkable @@ -15,7 +15,7 @@ class IProviderRepository(Protocol): def list( self, *, - type: ProviderType | None = None, + source_type: str | None = None, user_id: UUID | None = None, exclude_user_id: UUID | None = None, origin: str | None = None, @@ -26,5 +26,5 @@ async def update(self, *, provider: Provider) -> None: ... async def get(self, *, provider_id: UUID, user_id: UUID | None = None) -> Provider: ... async def delete(self, *, provider_id: UUID, user_id: UUID | None = None) -> int: ... - async def update_unmanaged_state(self, provider_id: UUID, state: UnmanagedState) -> None: ... + async def update_state(self, provider_id: UUID, state: ProviderState) -> None: ... async def update_last_accessed(self, *, provider_id: UUID) -> None: ... diff --git a/apps/agentstack-server/src/agentstack_server/domain/repositories/provider_build.py b/apps/agentstack-server/src/agentstack_server/domain/repositories/provider_build.py deleted file mode 100644 index 3a1842441a..0000000000 --- a/apps/agentstack-server/src/agentstack_server/domain/repositories/provider_build.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from collections.abc import AsyncIterator -from typing import Protocol, runtime_checkable -from uuid import UUID - -from agentstack_server.domain.models.common import PaginatedResult -from agentstack_server.domain.models.provider_build import BuildState, ProviderBuild - - -@runtime_checkable -class IProviderBuildRepository(Protocol): - def list( - self, *, status: BuildState | None = None, user_id: UUID | None = None - ) -> AsyncIterator[ProviderBuild]: ... - - async def list_paginated( - self, - *, - limit: int = 20, - page_token: UUID | None = None, - order: str = "desc", - order_by: str = "created_at", - status: BuildState | None = None, - user_id: UUID | None = None, - exclude_user_id: UUID | None = None, - ) -> PaginatedResult[ProviderBuild]: ... - - async def create(self, *, provider_build: ProviderBuild) -> None: ... - async def update(self, *, provider_build: ProviderBuild) -> None: ... - async def get(self, *, provider_build_id: UUID, user_id: UUID | None = None) -> ProviderBuild: ... - async def delete(self, *, provider_build_id: UUID, user_id: UUID | None = None) -> int: ... diff --git a/apps/agentstack-server/src/agentstack_server/domain/repositories/provider_discovery.py b/apps/agentstack-server/src/agentstack_server/domain/repositories/provider_discovery.py deleted file mode 100644 index 38be677c7f..0000000000 --- a/apps/agentstack-server/src/agentstack_server/domain/repositories/provider_discovery.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from collections.abc import AsyncIterator -from datetime import datetime -from typing import Protocol, runtime_checkable -from uuid import UUID - -from agentstack_server.domain.models.provider_discovery import DiscoveryState, ProviderDiscovery - - -@runtime_checkable -class IProviderDiscoveryRepository(Protocol): - async def create(self, *, discovery: ProviderDiscovery) -> None: ... - async def get(self, *, discovery_id: UUID, user_id: UUID | None = None) -> ProviderDiscovery: ... - async def update(self, *, discovery: ProviderDiscovery) -> None: ... - async def delete(self, *, discovery_id: UUID, user_id: UUID | None = None) -> int: ... - async def delete_older_than(self, *, older_than: datetime) -> int: ... - - def list( - self, *, user_id: UUID | None = None, status: DiscoveryState | None = None - ) -> AsyncIterator[ProviderDiscovery]: ... diff --git a/apps/agentstack-server/src/agentstack_server/exceptions.py b/apps/agentstack-server/src/agentstack_server/exceptions.py index aa64d0f325..3c4656c44f 100644 --- a/apps/agentstack-server/src/agentstack_server/exceptions.py +++ b/apps/agentstack-server/src/agentstack_server/exceptions.py @@ -11,7 +11,6 @@ from tenacity import retry_base, retry_if_exception __all__ = [ - "BuildAlreadyFinishedError", "DuplicateEntityError", "EntityNotFoundError", "ForbiddenUpdateError", @@ -21,7 +20,6 @@ "InvalidVectorDimensionError", "ManifestLoadError", "MissingAgentCardLabelError", - "MissingConfigurationError", "ModelLoadFailedError", "PlatformError", "RateLimitExceededError", @@ -33,8 +31,7 @@ if TYPE_CHECKING: from agentstack_server.domain.models.model_provider import ModelProvider - from agentstack_server.domain.models.provider import EnvVar, ProviderLocation - from agentstack_server.domain.models.provider_build import BuildState + from agentstack_server.domain.models.provider import ProviderLocation class PlatformError(Exception): @@ -126,12 +123,6 @@ def __init__( ) -class MissingConfigurationError(Exception): - def __init__(self, missing_env: list[EnvVar], status_code: int = status.HTTP_400_BAD_REQUEST): - self.missing_env = missing_env - self.status_code = status_code - - class RateLimitExceededError(PlatformError): def __init__( self, @@ -176,14 +167,6 @@ def __init__( super().__init__(message, status_code) -class BuildAlreadyFinishedError(PlatformError): - def __init__(self, platform_build_id: UUID, state: BuildState, status_code: int = status.HTTP_409_CONFLICT): - super().__init__( - message=f"Build with ID {platform_build_id} already finished in state: {state}", - status_code=status_code, - ) - - def retry_if_exception_grp_type(*exception_types: type[BaseException]) -> retry_base: """Handle also exception groups""" diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/__init__.py b/apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/__init__.py new file mode 100644 index 0000000000..ef775222db --- /dev/null +++ b/apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/__init__.py @@ -0,0 +1,2 @@ +# Copyright 2025 © BeeAI a Series of LF Projects, LLC +# SPDX-License-Identifier: Apache-2.0 diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/client.py b/apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/client.py new file mode 100644 index 0000000000..6a0910ee67 --- /dev/null +++ b/apps/agentstack-server/src/agentstack_server/infrastructure/kagenti/client.py @@ -0,0 +1,81 @@ +# Copyright 2025 © BeeAI a Series of LF Projects, LLC +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import logging +from typing import Any + +import httpx + +from agentstack_server.configuration import KagentiConfiguration + +logger = logging.getLogger(__name__) + + +class KagentiClient: + def __init__(self, configuration: KagentiConfiguration): + self._config = configuration + self._token: str | None = None + + async def _get_token(self) -> str | None: + """Get OAuth2 token via client credentials grant.""" + if not self._config.auth_token_url or not self._config.client_id or not self._config.client_secret: + return None + + async with httpx.AsyncClient() as client: + response = await client.post( + self._config.auth_token_url, + data={ + "grant_type": "client_credentials", + "client_id": self._config.client_id, + "client_secret": self._config.client_secret.get_secret_value(), + }, + ) + response.raise_for_status() + return response.json()["access_token"] + + async def list_agents(self) -> list[dict[str, Any]]: + """Fetch all agents from kagenti backend API across configured namespaces. + + Returns a list of agent dicts, each with at least 'name', 'namespace', and 'url' keys. + """ + headers: dict[str, str] = {} + token = await self._get_token() + if token: + headers["Authorization"] = f"Bearer {token}" + + result = [] + async with httpx.AsyncClient(timeout=30) as client: + for namespace in self._config.namespaces: + try: + response = await client.get( + f"{self._config.api_url}/api/v1/agents", + params={"namespace": namespace}, + headers=headers, + ) + response.raise_for_status() + data = response.json() + except httpx.HTTPError as e: + logger.warning("Failed to list agents in namespace %s: %s", namespace, e) + continue + + agents = data.get("items", []) + + for agent in agents: + name = agent.get("name", "") + agent_namespace = agent.get("namespace", namespace) + + # Construct service URL from k8s naming convention + url = f"http://{name}.{agent_namespace}.svc.cluster.local:8080" + + result.append( + { + "name": name, + "namespace": agent_namespace, + "url": url, + "status": agent.get("status", "unknown"), + } + ) + + return result diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-job.yaml b/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-job.yaml deleted file mode 100644 index cd125b29da..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-job.yaml +++ /dev/null @@ -1,191 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - name: "{{ provider_build_name }}" - labels: - app: "{{ provider_build_label }}" - managedBy: agentstack -spec: - backoffLimit: 0 - activeDeadlineSeconds: {{ job_timeout_seconds }} - ttlSecondsAfterFinished: 60 - template: - spec: - terminationGracePeriodSeconds: 3 - imagePullSecrets: - - name: agentstack-registry-secret - securityContext: - fsGroup: 1000 - restartPolicy: Never - initContainers: - - name: git-clone - image: ghcr.io/i-am-bee/alpine/git:v2.49.1 - command: [ "/bin/sh" ] - env: - - name: GIT_TOKEN - valueFrom: - secretKeyRef: - name: "{{ git_token_secret_name }}" - key: GIT_TOKEN - optional: true - args: - - -c - - | - set -eo pipefail - echo "Cloning repository..." - # Check if GitHub token is available for this host - if [ -n "$GIT_TOKEN" ]; then - echo "Using authenticated clone for {{ git_host }}" - else - echo "Using unauthenticated clone for {{ git_host }}" - fi - git clone --depth 1 \ - --revision {{ git_ref }} \ - "https://$GIT_TOKEN@{{ git_host }}/{{ git_org }}/{{ git_repo }}.git" \ - /tmp/repo - mv "/tmp/repo/{{ git_path }}"/* /workspace/ 2>/dev/null || true - mv "/tmp/repo/{{ git_path }}"/.[^.]* /workspace/ 2>/dev/null || true - echo "Repository cloned successfully" - ls -la /workspace - volumeMounts: - - name: workspace - mountPath: /workspace - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - # Build image with Kaniko (no securityContext required) - - name: kaniko-build - image: ghcr.io/kaniko-build/dist/chainguard-dev-kaniko/executor:v1.25.2-slim - args: - - --context=/workspace - - "--dockerfile=/workspace/{{ dockerfile_path }}" - - --no-push - - --tar-path=/tmp/image.tar - volumeMounts: - - name: workspace - mountPath: /workspace - - name: image-tar - mountPath: /tmp - securityContext: - runAsUser: 0 - # Main container: Step 3 Push the intermediary image - - name: crane-push - image: ghcr.io/i-am-bee/alpine/crane:0.20.6 - env: - - name: DOCKER_CONFIG - value: /tmp/.docker - args: - - push - - /tmp/image.tar - - "{{ destination }}" - - --insecure - volumeMounts: - - name: image-tar - mountPath: /tmp - - name: docker-config - mountPath: /tmp/.docker/config.json - subPath: .dockerconfigjson - readOnly: true - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - resources: - requests: - memory: "256Mi" - cpu: "200m" - limits: - memory: "512Mi" - cpu: "500m" - - name: run-agent - image: "{{ destination }}" - restartPolicy: Always # This makes it a daemon sidecar container - env: - - name: PORT - value: "8000" - - name: HOST - value: "0.0.0.0" - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - resources: - requests: - memory: "512Mi" - cpu: "500m" - limits: - memory: "1Gi" - cpu: "1" - containers: - # Main container: Step 3 - Mutate and push final image - - name: mutate - image: ghcr.io/i-am-bee/alpine/crane:0.20.6 - env: - - name: DOCKER_CONFIG - value: /tmp/.docker - command: - - /bin/sh - - -c - - | - set -eo pipefail - # Extract agent manifest - for i in $(seq 1 30); do - if nc -z 127.0.0.1 8000; then - break - fi - echo "Waiting for server to start... (attempt $i/10)" - sleep 1 - done - - AGENT_CARD_CONTENT=$(wget -O - http://127.0.0.1:8000/.well-known/agent-card.json) - if [ $? -eq 0 ] && [ -n "$AGENT_CARD_CONTENT" ]; then - echo "Successfully extracted agent-card.json" - echo "Content preview:" - echo "$AGENT_CARD_CONTENT" | head -n 10 - else - echo "Failed to extract agent-card.json" - exit 1 - fi - - echo "Starting image mutation..." - echo "Extraction complete!" - - LABEL_CONTENT=$(echo -n "$AGENT_CARD_CONTENT" | base64 -w 0) - echo "Label content size: $(echo -n $LABEL_CONTENT | wc -c) bytes" - crane mutate --insecure {{ destination }} \ - --label "beeai.dev.agent.json=${LABEL_CONTENT}" - - volumeMounts: - - name: docker-config - mountPath: /tmp/.docker/config.json - subPath: .dockerconfigjson - readOnly: true - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - resources: - requests: - memory: "256Mi" - cpu: "200m" - limits: - memory: "512Mi" - cpu: "500m" - - volumes: - - name: workspace - emptyDir: { } - - name: image-tar - emptyDir: { } - - name: docker-config - secret: - secretName: agentstack-registry-secret diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-secret.yaml b/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-secret.yaml deleted file mode 100644 index 1720b178d9..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/build-provider-secret.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: "{{ git_token_secret_name }}" - labels: - app: "{{ provider_build_label }}" -type: Opaque -data: {{ secret_data }} diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/deployment.yaml b/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/deployment.yaml deleted file mode 100644 index 9c8d6e2ae3..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/deployment.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: "{{ provider_deployment_name }}" - labels: - app: "{{ provider_app_label }}" - managedBy: agentstack -spec: - replicas: 1 - selector: - matchLabels: - app: "{{ provider_app_label }}" - template: - metadata: - labels: - app: "{{ provider_app_label }}" - spec: - containers: - - name: agent-container - image: "{{ image }}" - imagePullPolicy: IfNotPresent - ports: - - containerPort: 8000 - env: - - name: HOME - value: /tmp - envFrom: - - secretRef: - name: "{{ provider_secret_name }}" - startupProbe: - httpGet: - path: /.well-known/agent-card.json - port: 8000 - initialDelaySeconds: 1 - periodSeconds: 3 - timeoutSeconds: 2 - failureThreshold: 20 - livenessProbe: - httpGet: - path: /.well-known/agent-card.json - port: 8000 - periodSeconds: 10 - timeoutSeconds: 2 - failureThreshold: 2 diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/secret.yaml b/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/secret.yaml deleted file mode 100644 index 8ad50a0eef..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/secret.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: "{{ provider_secret_name }}" - labels: - app: "{{ provider_app_label }}" -type: Opaque -data: {{ secret_data }} diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/service.yaml b/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/service.yaml deleted file mode 100644 index 9f22e14d9b..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/default_templates/service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: "{{ provider_service_name }}" - labels: - app: "{{ provider_app_label }}" -spec: - type: ClusterIP - selector: - app: "{{ provider_app_label }}" - ports: - - port: 8000 - targetPort: 8000 - protocol: TCP - name: http - diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_build_manager.py b/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_build_manager.py deleted file mode 100644 index d96c7eb097..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_build_manager.py +++ /dev/null @@ -1,227 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import asyncio -import base64 -import logging -import re -from asyncio import TaskGroup -from collections.abc import AsyncIterator, Awaitable, Callable -from contextlib import asynccontextmanager, suppress -from datetime import timedelta -from pathlib import Path -from typing import Any, Final, cast -from uuid import UUID - -import anyio -import kr8s -import yaml -from jinja2 import Template -from kr8s.asyncio.objects import Job, Pod, Secret -from tenacity import AsyncRetrying, retry_if_exception_type, stop_after_delay, wait_fixed - -from agentstack_server.configuration import Configuration -from agentstack_server.domain.models.provider_build import BuildState, ProviderBuild -from agentstack_server.exceptions import EntityNotFoundError -from agentstack_server.service_layer.build_manager import IProviderBuildManager -from agentstack_server.utils.logs_container import LogsContainer, ProcessLogMessage, ProcessLogType -from agentstack_server.utils.utils import extract_messages - -logger = logging.getLogger(__name__) - - -BUILD_AGENT_JOB_FILE_NAME: Final = "build-provider-job.yaml" -BUILD_AGENT_GITHUB_SECRET_NAME: Final = "build-provider-secret.yaml" -DEFAULT_TEMPLATE_DIR: Final = Path(__file__).parent / "default_templates" - - -class KubernetesProviderBuildManager(IProviderBuildManager): - def __init__( - self, - configuration: Configuration, - api_factory: Callable[[], Awaitable[kr8s.asyncio.Api]], - manifest_template_dir: Path | None = None, - ): - self._api_factory = api_factory - self._create_lock = asyncio.Lock() - self._template_dir = anyio.Path(manifest_template_dir or DEFAULT_TEMPLATE_DIR) - self._template = None - self._secret_template = None - self._configuration = configuration - - @asynccontextmanager - async def api(self) -> AsyncIterator[kr8s.asyncio.Api]: - client = await self._api_factory() - yield client - - async def _render_template(self, **variables) -> dict[str, Any]: - if self._template is None: - self._template = await (self._template_dir / BUILD_AGENT_JOB_FILE_NAME).read_text() - return yaml.safe_load(Template(self._template).render(**variables)) - - async def _render_secret_template(self, **variables) -> dict[str, Any]: - if self._secret_template is None: - self._secret_template = await (self._template_dir / BUILD_AGENT_GITHUB_SECRET_NAME).read_text() - return yaml.safe_load(Template(self._secret_template).render(**variables)) - - def _get_k8s_name(self, provider_build_id: UUID): - return f"agentstack-build-{provider_build_id}" - - def _get_build_id_from_name(self, name: str) -> UUID: - pattern = r"agentstack-build-([0-9a-f-]+)$" - if match := re.match(pattern, name): - [provider_build_id] = match.groups() - return UUID(provider_build_id) - raise ValueError(f"Invalid provider name format: {name}") - - def _get_build_status(self, job: Job | None) -> BuildState: - if not job: - return BuildState.MISSING - conditions: list = cast(list, job.status.get("conditions", [])) - for condition in conditions: - if condition.get("type") == "Complete" and condition.get("status") == "True": - return BuildState.BUILD_COMPLETED - elif condition.get("type") == "Failed" and condition.get("status") == "True": - return BuildState.FAILED - return BuildState.IN_PROGRESS - - async def create_job( - self, *, provider_build: ProviderBuild, job_timeout: timedelta = timedelta(minutes=20) - ) -> BuildState: - async with self.api() as api: - name = self._get_k8s_name(provider_build.id) - secret = None - if github_token := await provider_build.source.get_github_token(): - secret = Secret( - await self._render_secret_template( - git_token_secret_name=f"{name}-secret", - provider_build_label=name, - secret_data={"GIT_TOKEN": base64.b64encode(f"x-access-token:{github_token}".encode()).decode()}, - ), - api=api, - ) - - dockerfile_path = Path("Dockerfile") - if provider_build.build_configuration and provider_build.build_configuration.dockerfile_path: - dockerfile_path = provider_build.build_configuration.dockerfile_path - - job = Job( - await self._render_template( - job_timeout_seconds=int(job_timeout.total_seconds()), - provider_build_name=name, - provider_build_label=name, - git_host=provider_build.source.host, - git_host_upper=provider_build.source.host.upper(), - git_org=provider_build.source.org, - git_repo=provider_build.source.repo, - git_path=provider_build.source.path or ".", - git_ref=provider_build.source.commit_hash, - destination=str(provider_build.destination), - git_token_secret_name=f"{name}-secret", - dockerfile_path=str(dockerfile_path).lstrip("/"), - ), - api=api, - ) - try: - if secret: - await secret.create() - await job.create() - if secret: - await job.adopt(secret) - except Exception as ex: - logger.error("Failed to create build job", exc_info=ex) - if secret: - with suppress(Exception): - await secret.delete() - with suppress(Exception): - await job.delete() - raise - - return BuildState.IN_PROGRESS - - async def wait_for_completion(self, *, provider_build_id: UUID) -> BuildState: - async with self.api() as api: - try: - job = await Job.get(name=self._get_k8s_name(provider_build_id), api=api) - await job.wait(["condition=Complete", "condition=Failed"]) - return self._get_build_status(job) - except kr8s.NotFoundError as e: - raise EntityNotFoundError("build_provider_job", provider_build_id) from e - - async def cancel_job(self, *, provider_build_id: UUID, grace_period: timedelta = timedelta(seconds=20)) -> None: - async with self.api() as api: - try: - job = await Job.get(name=self._get_k8s_name(provider_build_id), api=api) - await job.delete(grace_period=int(grace_period.total_seconds()), propagation_policy="Background") - except kr8s.NotFoundError as e: - raise EntityNotFoundError("build_provider_job", provider_build_id) from e - - async def state(self, *, provider_build_ids: list[UUID]) -> dict[UUID, BuildState]: - async with self.api() as api: - jobs = { - self._get_build_id_from_name(cast(Job, job).metadata.name): cast(Job, job) - async for job in kr8s.asyncio.get(kind="job", label_selector={"managedBy": "agentstack"}, api=api) - } - return {build_id: self._get_build_status(jobs.get(build_id)) for build_id in provider_build_ids} - - async def stream_logs( - self, *, provider_build_id: UUID, logs_container: LogsContainer, wait_timeout: timedelta = timedelta(minutes=10) - ) -> None: - try: - async with self.api() as api: - missing_logged = False - while True: - try: - # Get pods for this job - pods = [ - cast(Pod, pod) - async for pod in kr8s.asyncio.get( - kind="pod", - label_selector={"job-name": self._get_k8s_name(provider_build_id)}, - api=api, - ) - ] - if pods: - break - except kr8s.NotFoundError: - ... - if not missing_logged: - logs_container.add_stdout("Build job is not running...") - missing_logged = True - await asyncio.sleep(1) - - pod = pods[0] - - async def stream_container_logs(container_name: str): - async for attempt in AsyncRetrying( - stop=stop_after_delay(wait_timeout), - wait=wait_fixed(timedelta(seconds=1)), - retry=retry_if_exception_type(kr8s.ServerError), - reraise=True, - ): - with attempt: - # Test if we can get logs (even just 1 line) - _ = [log async for log in pod.logs(container=container_name, tail_lines=1)] - - logs_container.add_stdout(f"[{container_name}]: Starting log stream...") - try: - async for line in pod.logs(container=container_name, follow=True): - logs_container.add_stdout(f"[{container_name}]: {line}") - except kr8s.ServerError as e: - logs_container.add_stdout(f"[{container_name}]: Log streaming ended: {e}") - except Exception as e: - logs_container.add_stdout(f"[{container_name}]: Unexpected error during streaming: {e}") - - # Get container names from pod spec (init containers + regular containers) - containers = cast(list, pod.spec.get("initContainers", [])) + cast(list, pod.spec.get("containers", [])) - async with TaskGroup() as tg: - for container in containers: - tg.create_task(stream_container_logs(container["name"])) - - except Exception as ex: - messages = ", ".join([f"{exc_type}: {msg}" for exc_type, msg in extract_messages(ex)]) - logs_container.add(ProcessLogMessage(stream=ProcessLogType.STDERR, message=messages)) - logger.error(f"Error while streaming logs: {messages}") - raise diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_deployment_manager.py b/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_deployment_manager.py deleted file mode 100644 index b429b653ee..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/kubernetes/provider_deployment_manager.py +++ /dev/null @@ -1,314 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import asyncio -import base64 -import hashlib -import json -import logging -import re -from asyncio import TaskGroup -from collections.abc import AsyncIterator, Awaitable, Callable -from contextlib import asynccontextmanager, suppress -from datetime import timedelta -from enum import StrEnum -from pathlib import Path -from typing import Any, Final, cast -from uuid import UUID - -import anyio -import kr8s -import yaml -from a2a.utils import AGENT_CARD_WELL_KNOWN_PATH -from httpx import AsyncClient, HTTPError -from jinja2 import Template -from kr8s.asyncio.objects import APIObject, Deployment, Pod, Secret, Service -from pydantic import HttpUrl -from tenacity import AsyncRetrying, retry_if_exception_type, stop_after_delay, wait_fixed - -from agentstack_server.domain.models.provider import Provider, ProviderDeploymentState -from agentstack_server.service_layer.deployment_manager import IProviderDeploymentManager, global_provider_variables -from agentstack_server.utils.logs_container import LogsContainer, ProcessLogMessage, ProcessLogType -from agentstack_server.utils.utils import extract_messages - -logger = logging.getLogger(__name__) - - -class TemplateKind(StrEnum): - DEPLOY = "deploy" - SVC = "svc" - SECRET = "secret" - - -TEMPLATE_KIND_TO_FILE_NAME: Final = { - TemplateKind.DEPLOY: "deployment.yaml", - TemplateKind.SVC: "service.yaml", - TemplateKind.SECRET: "secret.yaml", -} - -DEFAULT_TEMPLATE_DIR: Final = Path(__file__).parent / "default_templates" - - -class KubernetesProviderDeploymentManager(IProviderDeploymentManager): - def __init__( - self, - api_factory: Callable[[], Awaitable[kr8s.asyncio.Api]], - manifest_template_dir: Path | None = None, - ): - self._api_factory = api_factory - self._create_lock = asyncio.Lock() - self._template_dir = anyio.Path(manifest_template_dir or DEFAULT_TEMPLATE_DIR) - self._templates: dict[TemplateKind, str] = {} - - @asynccontextmanager - async def api(self) -> AsyncIterator[kr8s.asyncio.Api]: - client = await self._api_factory() - yield client - - async def _render_template(self, kind: TemplateKind, **variables) -> dict[str, Any]: - if kind not in self._templates: - self._templates[kind] = await (self._template_dir / TEMPLATE_KIND_TO_FILE_NAME[kind]).read_text() - template = self._templates[kind] - return yaml.safe_load(Template(template).render(**variables)) - - def _get_k8s_name(self, provider_id: UUID, kind: TemplateKind | None = None): - return f"agentstack-provider-{provider_id}" + (f"-{kind}" if kind else "") - - def _get_provider_id_from_name(self, name: str, kind: TemplateKind | None = None) -> UUID: - pattern = rf"agentstack-provider-([0-9a-f-]+)-{kind}$" if kind else r"agentstack-provider-([0-9a-f-]+)$" - if match := re.match(pattern, name): - [provider_id] = match.groups() - return UUID(provider_id) - raise ValueError(f"Invalid provider name format: {name}") - - async def create_or_replace(self, *, provider: Provider, env: dict[str, str] | None = None) -> bool: - if not provider.managed: - raise ValueError("Attempted to update provider not managed by Kubernetes") - - async with self.api() as api: - label = self._get_k8s_name(provider.id) - - service = Service( - await self._render_template( - TemplateKind.SVC, - provider_service_name=self._get_k8s_name(provider.id, kind=TemplateKind.SVC), - provider_app_label=label, - ), - api=api, - ) - env = (env or {}) | global_provider_variables( - provider_url=await self.get_provider_url(provider_id=provider.id) - ) - secret = Secret( - await self._render_template( - TemplateKind.SECRET, - provider_secret_name=self._get_k8s_name(provider.id, TemplateKind.SECRET), - provider_app_label=label, - secret_data={key: base64.b64encode(value.encode()).decode() for key, value in env.items()}, - ), - api=api, - ) - - deployment_manifest = await self._render_template( - TemplateKind.DEPLOY, - provider_deployment_name=self._get_k8s_name(provider.id, TemplateKind.DEPLOY), - provider_app_label=label, - image=str(provider.source.root), - provider_secret_name=self._get_k8s_name(provider.id, TemplateKind.SECRET), - ) - combined_manifest = json.dumps( - {"service": service.raw, "secret": secret.raw, "deployment": deployment_manifest} - ) - deployment_hash = hashlib.sha256(combined_manifest.encode()).hexdigest()[:63] - deployment_manifest["metadata"]["labels"]["deployment-hash"] = deployment_hash - - deployment = Deployment(deployment_manifest, api=api) - async with self._create_lock: - try: - existing_deployment = await Deployment.get(deployment.metadata.name, api=api) - if existing_deployment.metadata.labels["deployment-hash"] == deployment_hash: - if existing_deployment.replicas == 0: - await deployment.scale(1) - return True - return False # Deployment was not modified - logger.info(f"Recreating deployment {deployment.metadata.name} due to configuration change") - await self.delete(provider_id=provider.id) - except kr8s.NotFoundError: - logger.info(f"Creating new deployment {deployment.metadata.name}") - try: - await secret.create() - await service.create() - await deployment.create() - await deployment.adopt(service) - await deployment.adopt(secret) - except Exception as ex: - logger.error("Failed to create provider", exc_info=ex) - # Try to revert changes already made - with suppress(Exception): - await secret.delete() - with suppress(Exception): - await service.delete() - with suppress(Exception): - await deployment.delete() - raise - return True - - async def delete(self, *, provider_id: UUID) -> None: - with suppress(kr8s.NotFoundError): - async with self.api() as api: - deploy = await Deployment.get(name=self._get_k8s_name(provider_id, TemplateKind.DEPLOY), api=api) - await deploy.delete(propagation_policy="Foreground", force=True) - await deploy.wait(["delete"]) - - async def remove_orphaned_providers(self, existing_providers: list[UUID]) -> None: - errors = [] - - async def _delete(deploy: APIObject): - try: - with suppress(kr8s.NotFoundError): - await deploy.delete(propagation_policy="Foreground", force=True) - await deploy.wait(["delete"]) - logger.info(f"Deleted orphaned provider {deploy.metadata.name}") - except Exception as ex: - errors.append(ex) - - async with self.api() as api, TaskGroup() as tg: - async for deployment in kr8s.asyncio.get( - kind="deployment", - label_selector={"managedBy": "agentstack"}, - api=api, - ): - provider_id = self._get_provider_id_from_name( - cast(kr8s.APIObject, deployment).metadata.name, - TemplateKind.DEPLOY, - ) - if provider_id not in existing_providers: - tg.create_task(_delete(cast(kr8s.APIObject, deployment))) - if errors: - raise ExceptionGroup("Exceptions occurred when removing orphaned providers", errors) - - async def scale_down(self, *, provider_id: UUID) -> None: - async with self.api() as api: - deploy = await Deployment.get(name=self._get_k8s_name(provider_id, TemplateKind.DEPLOY), api=api) - await deploy.scale(0) - - async def scale_up(self, *, provider_id: UUID) -> None: - async with self.api() as api: - deploy = await Deployment.get(name=self._get_k8s_name(provider_id, TemplateKind.DEPLOY), api=api) - await deploy.scale(1) - - async def wait_for_startup(self, *, provider_id: UUID, timeout: timedelta) -> None: # noqa: ASYNC109 (the timeout actually corresponds to kubernetes timeout) - async with self.api() as api: - deployment = await Deployment.get(name=self._get_k8s_name(provider_id, kind=TemplateKind.DEPLOY), api=api) - await deployment.wait("condition=Available", timeout=int(timeout.total_seconds())) - # For some reason the first request sometimes doesn't come through - # (the service does not route immediately after deploy is available?) - async for attempt in AsyncRetrying( - stop=stop_after_delay(timedelta(seconds=10)), - wait=wait_fixed(timedelta(seconds=0.5)), - retry=retry_if_exception_type(HTTPError), - reraise=True, - ): - with attempt: - async with AsyncClient( - base_url=str(await self.get_provider_url(provider_id=provider_id)) - ) as client: - resp = await client.get(AGENT_CARD_WELL_KNOWN_PATH, timeout=2) - resp.raise_for_status() - - async def state(self, *, provider_ids: list[UUID]) -> list[ProviderDeploymentState]: - async with self.api() as api: - deployments = { - self._get_provider_id_from_name( - cast(kr8s.APIObject, deployment).metadata.name, - TemplateKind.DEPLOY, - ): cast(kr8s.APIObject, deployment) - async for deployment in kr8s.asyncio.get( - kind="deployment", - label_selector={"managedBy": "agentstack"}, - api=api, - ) - } - provider_ids_set = set(provider_ids) - deployments = {provider_id: d for provider_id, d in deployments.items() if provider_id in provider_ids_set} - states = [] - for provider_id in provider_ids: - deployment = cast(kr8s.APIObject, deployments.get(provider_id)) - if not deployment: - state = ProviderDeploymentState.MISSING - elif deployment.status.get("availableReplicas", 0) > 0: - state = ProviderDeploymentState.RUNNING - elif deployment.status.get("replicas", 0) == 0: - state = ProviderDeploymentState.READY - else: - state = ProviderDeploymentState.STARTING - states.append(state) - return states - - async def get_provider_url(self, *, provider_id: UUID) -> HttpUrl: - return HttpUrl(f"http://{self._get_k8s_name(provider_id, TemplateKind.SVC)}:8000") - - async def stream_logs(self, *, provider_id: UUID, logs_container: LogsContainer): - try: - async with self.api() as api: - missing_logged = False - while True: - try: - deploy = await Deployment.get( - name=self._get_k8s_name(provider_id, kind=TemplateKind.DEPLOY), - api=api, - ) - if pods := await deploy.pods(): - break - except kr8s.NotFoundError: - ... - if not missing_logged: - logs_container.add_stdout("Agent is starting up...") - missing_logged = True - await asyncio.sleep(1) - - if deploy.status.get("availableReplicas", 0) == 0: - async for _event_stream_type, event in api.watch( - kind="event", - # TODO: we select for only one pod, for multi-pod agents this might hold up the logs for a while - field_selector=f"involvedObject.name=={pods[0].name},involvedObject.kind==Pod", - ): - message = event.raw.get("message", "") - logs_container.add_stdout(f"{event.raw.reason}: {message}") - if event.raw.reason == "Started": - break - - for _ in range(10): - try: - _ = [log async for log in pods[0].logs(tail_lines=1)] - break - except kr8s.ServerError: - await asyncio.sleep(1) - else: - logs_container.add_stdout("Container crashed or not starting up, attempting to get previous logs:") - with suppress(kr8s.ServerError): - previous_logs = [log async for log in pods[0].logs(previous=True)] - if previous_logs: - logs_container.add_stdout("Previous container logs:") - for log in previous_logs: - logs_container.add_stdout(f"Previous: {log}") - return - - # Stream logs from pods - async def stream_logs(pod: Pod): - async for line in pod.logs(follow=True): - logs_container.add_stdout( - f"{pod.name.replace(self._get_k8s_name(provider_id, TemplateKind.DEPLOY), '')}: {line}" - ) - - async with TaskGroup() as tg: - for pod in await deploy.pods(): - tg.create_task(stream_logs(pod)) - - except Exception as ex: - messages = ", ".join([f"{exc_type}: {msg}" for exc_type, msg in extract_messages(ex)]) - logs_container.add(ProcessLogMessage(stream=ProcessLogType.STDERR, message=messages)) - logger.error(f"Error while streaming logs: {messages}") - raise diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/migrations/alembic/versions/c0095389475b_.py b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/migrations/alembic/versions/c0095389475b_.py new file mode 100644 index 0000000000..2aea85a4a2 --- /dev/null +++ b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/migrations/alembic/versions/c0095389475b_.py @@ -0,0 +1,141 @@ +# Copyright 2025 © BeeAI a Series of LF Projects, LLC +# SPDX-License-Identifier: Apache-2.0 + +"""Remove managed provider features, add kagenti sync support + +Revision ID: c0095389475b +Revises: 764ca0fd6a5b +Create Date: 2026-03-09 13:33:33.334880 + +""" + +from collections.abc import Sequence + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = "c0095389475b" +down_revision: str | None = "764ca0fd6a5b" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Upgrade schema.""" + # Drop removed tables + op.drop_table("provider_builds") + op.drop_table("provider_discoveries") + + # Create new providerstate enum (same values as unmanagedstate) + providerstate = postgresql.ENUM("online", "offline", name="providerstate", create_type=False) + providerstate.create(op.get_bind(), checkfirst=True) + + # Add new columns + op.add_column("providers", sa.Column("source_type", sa.String(length=32), server_default="api", nullable=False)) + op.add_column("providers", sa.Column("state", providerstate, nullable=True)) + + # Migrate unmanaged_state data to new state column, default to 'online' + op.execute("UPDATE providers SET state = COALESCE(unmanaged_state::text::providerstate, 'online')") + + # Make state non-nullable now that all rows have a value + op.alter_column("providers", "state", nullable=False, server_default="online") + + # Drop old columns + op.drop_column("providers", "auto_stop_timeout_sec") + op.drop_column("providers", "version_info") + op.drop_column("providers", "unmanaged_state") + op.drop_column("providers", "registry") + op.drop_column("providers", "type") + + # Drop old enum types + sa.Enum(name="unmanagedstate").drop(op.get_bind(), checkfirst=True) + sa.Enum(name="providertype").drop(op.get_bind(), checkfirst=True) + sa.Enum(name="buildstate").drop(op.get_bind(), checkfirst=True) + sa.Enum(name="discoverystate").drop(op.get_bind(), checkfirst=True) + + +def downgrade() -> None: + """Downgrade schema.""" + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + "providers", + sa.Column( + "type", postgresql.ENUM("managed", "unmanaged", name="providertype"), autoincrement=False, nullable=False + ), + ) + op.add_column("providers", sa.Column("registry", sa.VARCHAR(length=2048), autoincrement=False, nullable=True)) + op.add_column( + "providers", + sa.Column( + "unmanaged_state", + postgresql.ENUM("online", "offline", name="unmanagedstate"), + autoincrement=False, + nullable=True, + ), + ) + op.add_column( + "providers", + sa.Column( + "version_info", + postgresql.JSON(astext_type=sa.Text()), + server_default=sa.text('\'{"docker": null, "github": null}\'::json'), + autoincrement=False, + nullable=False, + ), + ) + op.add_column("providers", sa.Column("auto_stop_timeout_sec", sa.INTEGER(), autoincrement=False, nullable=False)) + op.drop_column("providers", "state") + op.drop_column("providers", "source_type") + op.create_table( + "provider_discoveries", + sa.Column("id", sa.UUID(), autoincrement=False, nullable=False), + sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False), + sa.Column("docker_image", sa.VARCHAR(length=2048), autoincrement=False, nullable=False), + sa.Column("created_by", sa.UUID(), autoincrement=False, nullable=False), + sa.Column("agent_card", postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column("error_message", sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + "status", + postgresql.ENUM("pending", "in_progress", "completed", "failed", name="discoverystate"), + autoincrement=False, + nullable=False, + ), + sa.ForeignKeyConstraint( + ["created_by"], ["users.id"], name=op.f("provider_discoveries_created_by_fkey"), ondelete="CASCADE" + ), + sa.PrimaryKeyConstraint("id", name=op.f("provider_discoveries_pkey")), + ) + op.create_table( + "provider_builds", + sa.Column("id", sa.UUID(), autoincrement=False, nullable=False), + sa.Column("source", postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=False), + sa.Column("created_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False), + sa.Column("created_by", sa.UUID(), autoincrement=False, nullable=False), + sa.Column( + "status", + postgresql.ENUM("missing", "in_progress", "completed", "failed", "build_completed", name="buildstate"), + autoincrement=False, + nullable=False, + ), + sa.Column("destination", sa.VARCHAR(length=512), autoincrement=False, nullable=False), + sa.Column( + "on_complete", + postgresql.JSON(astext_type=sa.Text()), + server_default=sa.text('\'{"type": "no_action"}\'::json'), + autoincrement=False, + nullable=False, + ), + sa.Column("error_message", sa.TEXT(), autoincrement=False, nullable=True), + sa.Column("build_configuration", postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column("provider_id", sa.UUID(), autoincrement=False, nullable=True), + sa.ForeignKeyConstraint( + ["created_by"], ["users.id"], name=op.f("provider_builds_created_by_fkey"), ondelete="CASCADE" + ), + sa.ForeignKeyConstraint( + ["provider_id"], ["providers.id"], name=op.f("provider_builds_provider_id_fkey"), ondelete="SET NULL" + ), + sa.PrimaryKeyConstraint("id", name=op.f("provider_builds_pkey")), + ) + # ### end Alembic commands ### diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider.py b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider.py index 36b7caf313..c32dc1ea1b 100644 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider.py +++ b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider.py @@ -4,17 +4,16 @@ from __future__ import annotations from collections.abc import AsyncIterator -from datetime import timedelta from typing import Any from uuid import UUID -from sqlalchemy import JSON, Column, DateTime, ForeignKey, Integer, Row, String, Table +from sqlalchemy import JSON, Column, DateTime, ForeignKey, Row, String, Table from sqlalchemy import UUID as SQL_UUID from sqlalchemy.exc import IntegrityError from sqlalchemy.ext.asyncio import AsyncConnection from sqlalchemy.sql import delete, select -from agentstack_server.domain.models.provider import Provider, ProviderType, UnmanagedState +from agentstack_server.domain.models.provider import Provider, ProviderState from agentstack_server.domain.repositories.provider import IProviderRepository from agentstack_server.exceptions import DuplicateEntityError, EntityNotFoundError from agentstack_server.infrastructure.persistence.repositories.db_metadata import metadata @@ -25,18 +24,15 @@ "providers", metadata, Column("id", SQL_UUID, primary_key=True), - Column("type", sql_enum(ProviderType), nullable=False), Column("source", String(2048), nullable=False, unique=True), + Column("source_type", String(32), nullable=False, server_default="api"), Column("origin", String(2048), nullable=False), - Column("version_info", JSON, nullable=False), - Column("registry", String(2048), nullable=True), - Column("auto_stop_timeout_sec", Integer, nullable=False), Column("created_at", DateTime(timezone=True), nullable=False), Column("updated_at", DateTime(timezone=True), nullable=False), Column("created_by", ForeignKey("users.id", ondelete="CASCADE"), nullable=False), Column("last_active_at", DateTime(timezone=True), nullable=False), Column("agent_card", JSON, nullable=False), - Column("unmanaged_state", sql_enum(UnmanagedState), nullable=True), + Column("state", sql_enum(ProviderState), nullable=False, server_default=ProviderState.ONLINE.value), ) @@ -52,8 +48,8 @@ async def create(self, *, provider: Provider) -> None: except IntegrityError as e: raise DuplicateEntityError(entity="provider", field="source", value=str(provider.source.root)) from e - async def update_unmanaged_state(self, provider_id: UUID, state: UnmanagedState) -> None: - query = providers_table.update().where(providers_table.c.id == provider_id).values(unmanaged_state=state) + async def update_state(self, provider_id: UUID, state: ProviderState) -> None: + query = providers_table.update().where(providers_table.c.id == provider_id).values(state=state) await self.connection.execute(query) async def update(self, *, provider: Provider) -> None: @@ -63,18 +59,15 @@ async def update(self, *, provider: Provider) -> None: def _to_row(self, provider: Provider) -> dict[str, Any]: return { "id": provider.id, - "auto_stop_timeout_sec": provider.auto_stop_timeout.total_seconds(), - "type": provider.type, "source": str(provider.source.root), + "source_type": provider.source_type, "origin": provider.origin, - "version_info": provider.version_info.model_dump(mode="json"), - "registry": provider.registry and str(provider.registry.root), "agent_card": provider.agent_card.model_dump(mode="json"), "created_at": provider.created_at, "updated_at": provider.updated_at, "created_by": provider.created_by, "last_active_at": provider.last_active_at, - "unmanaged_state": provider.unmanaged_state, + "state": provider.state, } def _to_provider(self, row: Row) -> Provider: @@ -82,17 +75,14 @@ def _to_provider(self, row: Row) -> Provider: { "id": row.id, "source": row.source, + "source_type": row.source_type, "origin": row.origin, - # "type": row.type, # type is determined dynamically from source (but we store it for filtering) - "version_info": row.version_info, - "registry": row.registry, - "auto_stop_timeout": timedelta(seconds=row.auto_stop_timeout_sec), "last_active_at": row.last_active_at, "created_at": row.created_at, "updated_at": row.updated_at, "created_by": row.created_by, "agent_card": row.agent_card, - "unmanaged_state": row.unmanaged_state, + "state": row.state, } ) @@ -122,7 +112,7 @@ async def delete(self, *, provider_id: UUID, user_id: UUID | None = None) -> int async def list( self, *, - type: ProviderType | None = None, + source_type: str | None = None, user_id: UUID | None = None, exclude_user_id: UUID | None = None, origin: str | None = None, @@ -134,7 +124,7 @@ async def list( query = query.where(providers_table.c.created_by != exclude_user_id) if origin is not None: query = query.where(providers_table.c.origin == origin) - if type is not None: - query = query.where(providers_table.c.type == type) + if source_type is not None: + query = query.where(providers_table.c.source_type == source_type) async for row in await self.connection.stream(query): yield self._to_provider(row) diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_build.py b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_build.py deleted file mode 100644 index d79711835a..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_build.py +++ /dev/null @@ -1,151 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from collections.abc import AsyncIterator -from typing import Any -from uuid import UUID - -from sqlalchemy import JSON, Column, DateTime, ForeignKey, Row, String, Table, Text -from sqlalchemy import UUID as SQL_UUID -from sqlalchemy.ext.asyncio import AsyncConnection -from sqlalchemy.sql import select - -from agentstack_server.domain.models.common import PaginatedResult -from agentstack_server.domain.models.provider_build import BuildState, ProviderBuild -from agentstack_server.domain.repositories.provider_build import IProviderBuildRepository -from agentstack_server.exceptions import EntityNotFoundError -from agentstack_server.infrastructure.persistence.repositories.db_metadata import metadata -from agentstack_server.infrastructure.persistence.repositories.utils import cursor_paginate, sql_enum - -provider_builds_table = Table( - "provider_builds", - metadata, - Column("id", SQL_UUID, primary_key=True), - Column("source", JSON, nullable=False), - Column("created_at", DateTime(timezone=True), nullable=False), - # The CASCADE might leave some k8s jobs orphaned without cancellation, but jobs have timeout and self-deletion - Column("created_by", ForeignKey("users.id", ondelete="CASCADE"), nullable=False), - Column("status", sql_enum(BuildState), nullable=False), - Column("build_configuration", JSON, nullable=True), - Column("provider_id", SQL_UUID, ForeignKey("providers.id", ondelete="SET NULL"), nullable=True), - Column("on_complete", JSON, nullable=False), - Column("error_message", Text, nullable=True), - Column("destination", String(512), nullable=False), -) - - -class SqlAlchemyProviderBuildRepository(IProviderBuildRepository): - def __init__(self, connection: AsyncConnection): - self._connection = connection - - async def create(self, *, provider_build: ProviderBuild) -> None: - query = provider_builds_table.insert().values(self._to_row(provider_build)) - await self._connection.execute(query) - - async def update(self, *, provider_build: ProviderBuild) -> None: - query = ( - provider_builds_table.update() - .where(provider_builds_table.c.id == provider_build.id) - .values(self._to_row(provider_build)) - ) - await self._connection.execute(query) - - def _to_row(self, provider_build: ProviderBuild) -> dict[str, Any]: - return { - "id": provider_build.id, - "source": provider_build.source.model_dump(mode="json"), - "created_at": provider_build.created_at, - "status": provider_build.status, - "created_by": provider_build.created_by, - "on_complete": provider_build.on_complete.model_dump(mode="json"), - "build_configuration": ( - provider_build.build_configuration.model_dump(mode="json") - if provider_build.build_configuration - else None - ), - "provider_id": provider_build.provider_id, - "destination": str(provider_build.destination), - "error_message": provider_build.error_message, - } - - def _to_provider_build(self, row: Row) -> ProviderBuild: - return ProviderBuild.model_validate( - { - "id": row.id, - "source": row.source, - "destination": row.destination, - "created_at": row.created_at, - "created_by": row.created_by, - "on_complete": row.on_complete, - "provider_id": row.provider_id, - "build_configuration": row.build_configuration, - "status": row.status, - "error_message": row.error_message, - } - ) - - async def get(self, *, provider_build_id: UUID, user_id: UUID | None = None) -> ProviderBuild: - query = select(provider_builds_table).where(provider_builds_table.c.id == provider_build_id) - if user_id: - query = query.where(provider_builds_table.c.created_by == user_id) - result = await self._connection.execute(query) - if not (row := result.fetchone()): - raise EntityNotFoundError(entity="provider_build", id=provider_build_id) - return self._to_provider_build(row) - - async def delete(self, *, provider_build_id: UUID, user_id: UUID | None = None) -> int: - query = provider_builds_table.delete().where(provider_builds_table.c.id == provider_build_id) - if user_id: - query = query.where(provider_builds_table.c.created_by == user_id) - result = await self._connection.execute(query) - if not result.rowcount: - raise EntityNotFoundError("provider_build", provider_build_id) - return result.rowcount - - async def list( - self, *, status: BuildState | None = None, user_id: UUID | None = None - ) -> AsyncIterator[ProviderBuild]: - query = provider_builds_table.select() - if user_id: - query = query.where(provider_builds_table.c.created_by == user_id) - if status is not None: - query = query.where(provider_builds_table.c.status == status) - async for row in await self._connection.stream(query): - yield self._to_provider_build(row) - - async def list_paginated( - self, - *, - limit: int = 20, - page_token: UUID | None = None, - order: str = "desc", - order_by: str = "created_at", - status: BuildState | None = None, - user_id: UUID | None = None, - exclude_user_id: UUID | None = None, - ) -> PaginatedResult[ProviderBuild]: - query = provider_builds_table.select() - if user_id: - query = query.where(provider_builds_table.c.created_by == user_id) - if exclude_user_id: - query = query.where(provider_builds_table.c.created_by != exclude_user_id) - if status is not None: - query = query.where(provider_builds_table.c.status == status) - - result = await cursor_paginate( - connection=self._connection, - query=query, - id_column=provider_builds_table.c.id, - limit=limit, - after_cursor=page_token, - order=order, - order_column=getattr(provider_builds_table.c, order_by), - ) - - return PaginatedResult( - items=[self._to_provider_build(row) for row in result.items], - total_count=result.total_count, - has_more=result.has_more, - ) diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_discovery.py b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_discovery.py deleted file mode 100644 index c027625e16..0000000000 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/repositories/provider_discovery.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from collections.abc import AsyncIterator -from datetime import datetime -from typing import Any -from uuid import UUID - -from sqlalchemy import UUID as SQL_UUID -from sqlalchemy import Column, DateTime, ForeignKey, Row, String, Table -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.ext.asyncio import AsyncConnection -from sqlalchemy.sql import delete, select - -from agentstack_server.domain.models.provider_discovery import DiscoveryState, ProviderDiscovery -from agentstack_server.domain.repositories.provider_discovery import IProviderDiscoveryRepository -from agentstack_server.exceptions import EntityNotFoundError -from agentstack_server.infrastructure.persistence.repositories.db_metadata import metadata -from agentstack_server.infrastructure.persistence.repositories.utils import sql_enum - -provider_discoveries_table = Table( - "provider_discoveries", - metadata, - Column("id", SQL_UUID, primary_key=True), - Column("created_at", DateTime(timezone=True), nullable=False), - Column("status", sql_enum(DiscoveryState), nullable=False), - Column("docker_image", String(2048), nullable=False), - Column("created_by", ForeignKey("users.id", ondelete="CASCADE"), nullable=False), - Column("agent_card", JSONB, nullable=True), - Column("error_message", String, nullable=True), -) - - -class SqlAlchemyProviderDiscoveryRepository(IProviderDiscoveryRepository): - def __init__(self, connection: AsyncConnection): - self.connection = connection - - async def create(self, *, discovery: ProviderDiscovery) -> None: - query = provider_discoveries_table.insert().values(self._to_row(discovery)) - await self.connection.execute(query) - - async def get(self, *, discovery_id: UUID, user_id: UUID | None = None) -> ProviderDiscovery: - query = select(provider_discoveries_table).where(provider_discoveries_table.c.id == discovery_id) - if user_id is not None: - query = query.where(provider_discoveries_table.c.created_by == user_id) - result = await self.connection.execute(query) - if not (row := result.fetchone()): - raise EntityNotFoundError(entity="provider_discovery", id=discovery_id) - return self._to_discovery(row) - - async def update(self, *, discovery: ProviderDiscovery) -> None: - query = ( - provider_discoveries_table.update() - .where(provider_discoveries_table.c.id == discovery.id) - .values(self._to_row(discovery)) - ) - await self.connection.execute(query) - - async def delete(self, *, discovery_id: UUID, user_id: UUID | None = None) -> int: - query = delete(provider_discoveries_table).where(provider_discoveries_table.c.id == discovery_id) - if user_id is not None: - query = query.where(provider_discoveries_table.c.created_by == user_id) - result = await self.connection.execute(query) - if not result.rowcount: - raise EntityNotFoundError(entity="provider_discovery", id=discovery_id) - return result.rowcount - - async def delete_older_than(self, *, older_than: datetime) -> int: - query = delete(provider_discoveries_table).where(provider_discoveries_table.c.created_at < older_than) - result = await self.connection.execute(query) - return result.rowcount - - async def list( - self, *, user_id: UUID | None = None, status: DiscoveryState | None = None - ) -> AsyncIterator[ProviderDiscovery]: - query = select(provider_discoveries_table) - if user_id is not None: - query = query.where(provider_discoveries_table.c.created_by == user_id) - if status is not None: - query = query.where(provider_discoveries_table.c.status == status) - async for row in await self.connection.stream(query): - yield self._to_discovery(row) - - def _to_row(self, discovery: ProviderDiscovery) -> dict[str, Any]: - return { - "id": discovery.id, - "created_at": discovery.created_at, - "status": discovery.status, - "docker_image": discovery.docker_image, - "created_by": discovery.created_by, - "agent_card": discovery.agent_card.model_dump(mode="json") if discovery.agent_card else None, - "error_message": discovery.error_message, - } - - def _to_discovery(self, row: Row) -> ProviderDiscovery: - return ProviderDiscovery.model_validate( - { - "id": row.id, - "created_at": row.created_at, - "status": row.status, - "docker_image": row.docker_image, - "created_by": row.created_by, - "agent_card": row.agent_card, - "error_message": row.error_message, - } - ) diff --git a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/unit_of_work.py b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/unit_of_work.py index 3cbfc1c7e6..4f2d45380e 100644 --- a/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/unit_of_work.py +++ b/apps/agentstack-server/src/agentstack_server/infrastructure/persistence/unit_of_work.py @@ -17,7 +17,6 @@ from agentstack_server.domain.repositories.file import IFileRepository from agentstack_server.domain.repositories.model_provider import IModelProviderRepository from agentstack_server.domain.repositories.provider import IProviderRepository -from agentstack_server.domain.repositories.provider_discovery import IProviderDiscoveryRepository from agentstack_server.domain.repositories.user import IUserRepository from agentstack_server.domain.repositories.user_feedback import IUserFeedbackRepository from agentstack_server.domain.repositories.vector_store import IVectorDatabaseRepository, IVectorStoreRepository @@ -28,10 +27,6 @@ from agentstack_server.infrastructure.persistence.repositories.file import SqlAlchemyFileRepository from agentstack_server.infrastructure.persistence.repositories.model_provider import SqlAlchemyModelProviderRepository from agentstack_server.infrastructure.persistence.repositories.provider import SqlAlchemyProviderRepository -from agentstack_server.infrastructure.persistence.repositories.provider_build import SqlAlchemyProviderBuildRepository -from agentstack_server.infrastructure.persistence.repositories.provider_discovery import ( - SqlAlchemyProviderDiscoveryRepository, -) from agentstack_server.infrastructure.persistence.repositories.requests import SqlAlchemyA2ARequestRepository from agentstack_server.infrastructure.persistence.repositories.user import SqlAlchemyUserRepository from agentstack_server.infrastructure.persistence.repositories.user_feedback import SqlAlchemyUserFeedbackRepository @@ -58,7 +53,6 @@ class SQLAlchemyUnitOfWork(IUnitOfWork): vector_database: IVectorDatabaseRepository user_feedback: IUserFeedbackRepository connectors: IConnectorRepository - provider_discoveries: IProviderDiscoveryRepository def __init__(self, engine: AsyncEngine, config: Configuration) -> None: self._engine: AsyncEngine = engine @@ -77,7 +71,6 @@ async def __aenter__(self) -> Self: self.a2a_requests = SqlAlchemyA2ARequestRepository(self._connection) self.providers = SqlAlchemyProviderRepository(self._connection) self.model_providers = SqlAlchemyModelProviderRepository(self._connection) - self.provider_builds = SqlAlchemyProviderBuildRepository(self._connection) self.contexts = SqlAlchemyContextRepository(self._connection) self.env = SqlAlchemyEnvVariableRepository(self._connection, configuration=self._config) self.files = SqlAlchemyFileRepository(self._connection) @@ -89,7 +82,6 @@ async def __aenter__(self) -> Self: ) self.user_feedback = SqlAlchemyUserFeedbackRepository(self._connection) self.connectors = SqlAlchemyConnectorRepository(self._connection) - self.provider_discoveries = SqlAlchemyProviderDiscoveryRepository(self._connection) except Exception as e: await self._exit_stack.aclose() diff --git a/apps/agentstack-server/src/agentstack_server/jobs/crons/cleanup.py b/apps/agentstack-server/src/agentstack_server/jobs/crons/cleanup.py index 77ef3de91b..88e2fc0282 100644 --- a/apps/agentstack-server/src/agentstack_server/jobs/crons/cleanup.py +++ b/apps/agentstack-server/src/agentstack_server/jobs/crons/cleanup.py @@ -10,7 +10,6 @@ from agentstack_server.jobs.queues import Queues from agentstack_server.service_layer.services.a2a import A2AProxyService from agentstack_server.service_layer.services.contexts import ContextService -from agentstack_server.service_layer.services.provider_discovery import ProviderDiscoveryService blueprint = Blueprint() @@ -35,15 +34,6 @@ async def cleanup_expired_a2a_tasks(timestamp: int, a2a_proxy: A2AProxyService) logger.info(f"Deleted: {deleted_stats}") -@blueprint.periodic(cron="15 * * * *") # pyrefly: ignore [bad-argument-type] -- bad typing in blueprint library -@blueprint.task(queueing_lock="cleanup_expired_provider_discoveries", queue=str(Queues.CRON_CLEANUP)) -@inject -async def cleanup_expired_provider_discoveries(timestamp: int, service: ProviderDiscoveryService) -> None: - """Delete provider discovery records older than 1 day.""" - deleted_count = await service.cleanup_expired_discoveries() - logger.info(f"Deleted {deleted_count} expired provider discoveries") - - @blueprint.periodic(cron="*/10 * * * *") # pyrefly: ignore [bad-argument-type] -- bad typing in blueprint library @blueprint.task(queueing_lock="remove_old_jobs", queue=str(Queues.CRON_CLEANUP), pass_context=True) async def remove_old_jobs(context: JobContext, timestamp: int): diff --git a/apps/agentstack-server/src/agentstack_server/jobs/crons/provider.py b/apps/agentstack-server/src/agentstack_server/jobs/crons/provider.py index 20b2a00b9b..ebb824bd0b 100644 --- a/apps/agentstack-server/src/agentstack_server/jobs/crons/provider.py +++ b/apps/agentstack-server/src/agentstack_server/jobs/crons/provider.py @@ -16,12 +16,15 @@ from agentstack_server import get_configuration from agentstack_server.configuration import Configuration from agentstack_server.domain.constants import SELF_REGISTRATION_EXTENSION_URI -from agentstack_server.domain.models.provider import NetworkProviderLocation, Provider, ProviderType, UnmanagedState -from agentstack_server.domain.models.registry import ProviderRegistryRecord, RegistryLocation +from agentstack_server.domain.models.provider import ( + NetworkProviderLocation, + Provider, + ProviderState, + SourceType, +) from agentstack_server.jobs.queues import Queues from agentstack_server.service_layer.services.providers import ProviderService from agentstack_server.service_layer.services.users import UserService -from agentstack_server.service_layer.unit_of_work import IUnitOfWorkFactory from agentstack_server.utils.a2a import get_extension from agentstack_server.utils.utils import extract_messages @@ -30,115 +33,110 @@ blueprint = Blueprint() -@blueprint.periodic(cron="*/1 * * * *") # pyrefly: ignore [bad-argument-type] -- bad typing in blueprint library -@blueprint.task(queueing_lock="scale_down_providers", queue=str(Queues.CRON_PROVIDER)) -@inject -async def scale_down_providers(timestamp: int, service: ProviderService, configuration: Configuration): - if configuration.provider.disable_downscaling: - return - await service.scale_down_providers() - - # TODO: Can't use DI here because it's not initialized yet # pyrefly: ignore [bad-argument-type] -- bad typing in blueprint library -@blueprint.periodic(cron=get_configuration().agent_registry.sync_period_cron) -@blueprint.task(queueing_lock="check_registry", queue=str(Queues.CRON_PROVIDER)) +@blueprint.periodic(cron=get_configuration().kagenti.sync_period_cron) +@blueprint.task(queueing_lock="sync_kagenti_agents", queue=str(Queues.CRON_PROVIDER)) @inject -async def check_registry( +async def sync_kagenti_agents( timestamp: int, configuration: Configuration, provider_service: ProviderService, user_service: UserService, ): - if not configuration.agent_registry.locations: + if not configuration.kagenti.enabled: return - user = await user_service.get_user_by_email("admin@beeai.dev") + from agentstack_server.infrastructure.kagenti.client import KagentiClient - registry_by_provider_origin: dict[str, RegistryLocation] = {} - desired_providers: dict[str, ProviderRegistryRecord] = {} - errors = [] + user = await user_service.get_user_by_email("admin@beeai.dev") + client = KagentiClient(configuration.kagenti) try: - await provider_service.remove_orphaned_providers() + kagenti_agents = await client.list_agents() except Exception as ex: - errors.extend(ex.exceptions if isinstance(ex, ExceptionGroup) else [ex]) + logger.error(f"Failed to fetch agents from kagenti: {ex}") + return - for registry in configuration.agent_registry.locations.values(): - for provider_record in await registry.load(): - try: - desired_providers[provider_record.origin] = provider_record - registry_by_provider_origin[provider_record.origin] = registry - except ValueError as e: - errors.append(e) - - # TODO: two providers with the same origin managed under registry are not supported - managed_providers = { - provider.origin: provider for provider in await provider_service.list_providers() if provider.registry - } - - new_providers = desired_providers.keys() - managed_providers.keys() - old_providers = managed_providers.keys() - desired_providers.keys() - existing_providers = managed_providers.keys() & desired_providers.keys() - - # Remove old providers - to prevent agent name collisions - for provider_origin in old_providers: - provider = managed_providers[provider_origin] - try: - await provider_service.delete_provider(provider_id=provider.id, user=user) - logger.info(f"Removed provider {provider.source}") - except Exception as ex: - errors.append(RuntimeError(f"[{provider.source}]: Failed to remove provider: {ex}")) + # Build desired state from kagenti agents (keyed by origin = agent URL) + desired: dict[str, dict] = {} + for agent in kagenti_agents: + if not agent.get("url"): + continue + url = agent["url"] + desired[url] = agent - for provider_origin in new_providers: - provider_record = desired_providers[provider_origin] - try: - await provider_service.create_provider( - user=user, - location=provider_record.location, - origin=provider_record.origin, - registry=registry_by_provider_origin[provider_origin], - auto_stop_timeout=provider_record.auto_stop_timeout, - variables=provider_record.variables, - ) - logger.info(f"Added provider {provider_record}") - except Exception as ex: - errors.append(RuntimeError(f"[{provider_record}]: Failed to add provider: {ex}")) + # Get existing kagenti-sourced providers + existing_providers = await provider_service.list_providers() + existing_kagenti = {p.origin: p for p in existing_providers if p.source_type == SourceType.KAGENTI} - for provider_origin in existing_providers: - provider_record = desired_providers[provider_origin] - provider = managed_providers[provider_origin] - try: - result = await provider_service.patch_provider( - provider_id=provider.id, - user=user, - location=provider_record.location, - origin=provider_record.origin, - auto_stop_timeout=provider_record.auto_stop_timeout, - variables=provider_record.variables, - allow_registry_update=True, - ) - if managed_providers[provider_origin].source.root != result.source.root: - logger.info(f"Updated provider {provider_record}") - except Exception as ex: - errors.append(RuntimeError(f"[{provider_record}]: Failed to add provider: {ex}")) + errors = [] + + # Remove providers for agents no longer in kagenti + for origin, provider in existing_kagenti.items(): + if origin not in desired: + try: + await provider_service.delete_provider(provider_id=provider.id, user=user) + logger.info(f"Removed kagenti provider {provider.id} ({origin})") + except Exception as ex: + errors.append(ex) + + # Create new providers for new kagenti agents + for url, agent in desired.items(): + if url not in existing_kagenti: + try: + from agentstack_server.domain.models.provider import NetworkProviderLocation + + location = NetworkProviderLocation(root=url) + await provider_service.create_provider( + user=user, + location=location, + origin=url, + source_type=SourceType.KAGENTI, + ) + logger.info(f"Added kagenti provider from {url}") + except Exception as ex: + errors.append(RuntimeError(f"Failed to add kagenti provider {url}: {ex}")) + + # Update existing providers (refresh agent card) + for url, agent in desired.items(): + if url in existing_kagenti: + provider = existing_kagenti[url] + try: + from agentstack_server.domain.models.provider import NetworkProviderLocation + + location = NetworkProviderLocation(root=url) + agent_card = await location.load_agent_card() + if agent_card != provider.agent_card: + await provider_service.patch_provider( + provider_id=provider.id, + user=user, + agent_card=agent_card, + ) + logger.info(f"Updated kagenti provider {provider.id} agent card") + except Exception as ex: + # Agent might not be ready yet, skip + logger.debug(f"Failed to update kagenti provider {url}: {ex}") if errors: - raise ExceptionGroup("Exceptions occurred when reloading providers", errors) + raise ExceptionGroup("Exceptions occurred when syncing kagenti agents", errors) @blueprint.periodic(cron="* * * * * */5") # pyrefly: ignore [bad-argument-type] -- bad typing in blueprint library -@blueprint.task(queueing_lock="check_unmanaged_providers", queue=str(Queues.CRON_PROVIDER)) +@blueprint.task(queueing_lock="refresh_provider_state", queue=str(Queues.CRON_PROVIDER)) @inject -async def refresh_unmanaged_provider_state( - timestamp: int, uow_f: IUnitOfWorkFactory, provider_service: ProviderService, user_service: UserService +async def refresh_provider_state( + timestamp: int, + provider_service: ProviderService, + user_service: UserService, ): + """Periodically check all providers' health by fetching their agent card endpoint.""" timeout_sec = timedelta(seconds=20).total_seconds() async def _check_provider(provider: Provider): - state = UnmanagedState.OFFLINE + state = ProviderState.OFFLINE resp_card = None - should_update_provider = True + should_update = True user = await user_service.get_user_by_email("admin@beeai.dev") @@ -149,47 +147,43 @@ async def _check_provider(provider: Provider): (await client.get(AGENT_CARD_WELL_KNOWN_PATH)).raise_for_status().json() ) - # For self-registered provider we need to check their self-registration ID, because their URL - # might overlap (more agents on the same URL, only one can be online) + # For self-registered providers, verify their self-registration ID matches provider_self_reg_ext = get_extension(provider.agent_card, SELF_REGISTRATION_EXTENSION_URI) resp_self_reg_ext = get_extension(resp_card, SELF_REGISTRATION_EXTENSION_URI) if provider_self_reg_ext is not None and resp_self_reg_ext is not None: if provider_self_reg_ext.params == resp_self_reg_ext.params: - state = UnmanagedState.ONLINE + state = ProviderState.ONLINE else: - # Different agent responding at the same URL, don't update this provider - should_update_provider = False + should_update = False else: - state = UnmanagedState.ONLINE + state = ProviderState.ONLINE except HTTPError as ex: logger.warning( - f"Provider {provider.id} failed to respond to ping in {int(timeout_sec)} seconds: {extract_messages(ex)}" + f"Provider {provider.id} failed to respond to ping in {int(timeout_sec)} seconds: " + f"{extract_messages(ex)}" ) - state = UnmanagedState.OFFLINE + except Exception as ex: + logger.debug(f"Provider {provider.id} health check failed: {ex}") finally: - # Unified update: patch both agent card (if changed) and state (if changed) in a single call - # Only detect card changes if we got a response (resp_card is not None) - card_changes = resp_card is not None and provider.agent_card != resp_card - state_changed = state != provider.unmanaged_state + card_changed = resp_card is not None and provider.agent_card != resp_card + state_changed = state != provider.state - if should_update_provider and (card_changes or state_changed): + if should_update and (card_changed or state_changed): try: await provider_service.patch_provider( provider_id=provider.id, user=user, - agent_card=resp_card if card_changes else None, - unmanaged_state=state if state_changed else None, - allow_registry_update=True, + agent_card=resp_card if card_changed else None, + state=state if state_changed else None, ) except Exception as ex: if isinstance(ex, asyncio.CancelledError): raise logger.error(f"Failed to update provider {provider.id}: {extract_messages(ex)}") - async with uow_f() as uow: - unmanaged_providers = [provider async for provider in uow.providers.list(type=ProviderType.UNMANAGED)] + providers = await provider_service.list_providers() async with asyncio.TaskGroup() as tg: - for provider in unmanaged_providers: + for provider in providers: tg.create_task(_check_provider(provider)) diff --git a/apps/agentstack-server/src/agentstack_server/jobs/procrastinate.py b/apps/agentstack-server/src/agentstack_server/jobs/procrastinate.py index d648166274..8d64ad177b 100644 --- a/apps/agentstack-server/src/agentstack_server/jobs/procrastinate.py +++ b/apps/agentstack-server/src/agentstack_server/jobs/procrastinate.py @@ -15,8 +15,6 @@ from agentstack_server.jobs.crons.provider import blueprint as provider_crons from agentstack_server.jobs.tasks.context import blueprint as context_tasks from agentstack_server.jobs.tasks.file import blueprint as file_tasks -from agentstack_server.jobs.tasks.provider_build import blueprint as provider_build_tasks -from agentstack_server.jobs.tasks.provider_discovery import blueprint as provider_discovery_tasks logger = logging.getLogger(__name__) @@ -57,8 +55,6 @@ def exit_app_on_db_error(*_args, **_kwargs): ) app.add_tasks_from(blueprint=file_tasks, namespace="text_extraction") app.add_tasks_from(blueprint=context_tasks, namespace="context_tasks") - app.add_tasks_from(blueprint=provider_build_tasks, namespace="provider_build_tasks") - app.add_tasks_from(blueprint=provider_discovery_tasks, namespace="provider_discovery_tasks") app.add_tasks_from(blueprint=provider_crons, namespace="cron_provider") app.add_tasks_from(blueprint=model_provider_crons, namespace="cron_model_provider") app.add_tasks_from(blueprint=cleanup_crons, namespace="cron_cleanup") diff --git a/apps/agentstack-server/src/agentstack_server/jobs/queues.py b/apps/agentstack-server/src/agentstack_server/jobs/queues.py index 971b2cbba5..832f852884 100644 --- a/apps/agentstack-server/src/agentstack_server/jobs/queues.py +++ b/apps/agentstack-server/src/agentstack_server/jobs/queues.py @@ -16,8 +16,6 @@ class Queues(StrEnum): GENERATE_CONVERSATION_TITLE = "generate_conversation_title" TEXT_EXTRACTION = "text_extraction" TOOLKIT_DELETION = "toolkit_deletion" - BUILD_PROVIDER = "build_provider" - PROVIDER_DISCOVERY = "provider_discovery" @staticmethod def all() -> set[str]: diff --git a/apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_build.py b/apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_build.py deleted file mode 100644 index 2124540cfa..0000000000 --- a/apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_build.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from uuid import UUID - -from kink import inject -from procrastinate import Blueprint - -from agentstack_server.domain.models.provider import DockerImageProviderLocation -from agentstack_server.domain.models.provider_build import AddProvider, BuildState, UpdateProvider -from agentstack_server.jobs.queues import Queues -from agentstack_server.service_layer.services.provider_build import ProviderBuildService -from agentstack_server.service_layer.services.providers import ProviderService -from agentstack_server.service_layer.services.users import UserService - -blueprint = Blueprint() - - -@blueprint.task(queue=str(Queues.BUILD_PROVIDER)) -@inject -async def build_provider( - provider_build_id: str, - provider_build_service: ProviderBuildService, - provider_service: ProviderService, - user_service: UserService, -): - build = await provider_build_service.build_provider(provider_build_id=UUID(provider_build_id)) - try: - if build.status == BuildState.BUILD_COMPLETED: - user = await user_service.get_user(user_id=build.created_by) - match build.on_complete: - case UpdateProvider(provider_id=provider_id): - await provider_service.patch_provider( - provider_id=provider_id, - user=user, - location=DockerImageProviderLocation(root=build.destination), - origin=build.source, - ) - build.provider_id = provider_id - case AddProvider() as add_provider: - provider = await provider_service.create_provider( - user=user, - location=DockerImageProviderLocation(root=build.destination), - origin=build.source, - auto_stop_timeout=add_provider.auto_stop_timeout, - variables=add_provider.variables, - ) - build.provider_id = provider.id - build.status = BuildState.COMPLETED - except Exception as ex: - build.status = BuildState.FAILED - build.error_message = f"Failed to process {build.on_complete.type} action: {ex}" - raise - finally: - await provider_build_service.update_build(provider_build=build) diff --git a/apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_discovery.py b/apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_discovery.py deleted file mode 100644 index acff1bef42..0000000000 --- a/apps/agentstack-server/src/agentstack_server/jobs/tasks/provider_discovery.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from uuid import UUID - -from kink import inject -from procrastinate import Blueprint - -from agentstack_server.jobs.queues import Queues -from agentstack_server.service_layer.services.provider_discovery import ProviderDiscoveryService - -blueprint = Blueprint() - - -@blueprint.task(queue=str(Queues.PROVIDER_DISCOVERY)) -@inject -async def discover_provider(provider_discovery_id: str, service: ProviderDiscoveryService): - await service.run_discovery(discovery_id=UUID(provider_discovery_id)) diff --git a/apps/agentstack-server/src/agentstack_server/run_workers.py b/apps/agentstack-server/src/agentstack_server/run_workers.py index ae26fac291..ec93c2a075 100644 --- a/apps/agentstack-server/src/agentstack_server/run_workers.py +++ b/apps/agentstack-server/src/agentstack_server/run_workers.py @@ -26,7 +26,6 @@ async def run_workers(app: procrastinate.App): str(Queues.CRON_CONNECTOR), str(Queues.CRON_CLEANUP), str(Queues.TOOLKIT_DELETION), - str(Queues.PROVIDER_DISCOVERY), ], concurrency=10, ), @@ -36,7 +35,6 @@ async def run_workers(app: procrastinate.App): concurrency=10, ), WorkerOptions(name="text_extraction_worker", queues=[str(Queues.TEXT_EXTRACTION)], concurrency=5), - WorkerOptions(name="build_provider_worker", queues=[str(Queues.BUILD_PROVIDER)], concurrency=5), ] worker_tasks = [] diff --git a/apps/agentstack-server/src/agentstack_server/service_layer/build_manager.py b/apps/agentstack-server/src/agentstack_server/service_layer/build_manager.py deleted file mode 100644 index 69cdc1033b..0000000000 --- a/apps/agentstack-server/src/agentstack_server/service_layer/build_manager.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from datetime import timedelta -from typing import Protocol -from uuid import UUID - -from agentstack_server.domain.models.provider_build import BuildState, ProviderBuild -from agentstack_server.utils.logs_container import LogsContainer - - -class IProviderBuildManager(Protocol): - async def create_job( - self, *, provider_build: ProviderBuild, job_timeout: timedelta = timedelta(minutes=10) - ) -> BuildState: ... - - async def cancel_job(self, *, provider_build_id: UUID) -> None: ... - async def wait_for_completion(self, *, provider_build_id: UUID) -> BuildState: ... - async def stream_logs( - self, - *, - provider_build_id: UUID, - logs_container: LogsContainer, - wait_timeout: timedelta = timedelta(minutes=10), - ) -> None: ... diff --git a/apps/agentstack-server/src/agentstack_server/service_layer/deployment_manager.py b/apps/agentstack-server/src/agentstack_server/service_layer/deployment_manager.py deleted file mode 100644 index 562ec966d5..0000000000 --- a/apps/agentstack-server/src/agentstack_server/service_layer/deployment_manager.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -from datetime import timedelta -from typing import Protocol -from uuid import UUID - -from kink import di -from pydantic import HttpUrl - -from agentstack_server.configuration import Configuration -from agentstack_server.domain.models.provider import Provider, ProviderDeploymentState -from agentstack_server.utils.logs_container import LogsContainer - - -def global_provider_variables(provider_url: HttpUrl): - configuration = di[Configuration] - return { - "PORT": "8000", - "HOST": "0.0.0.0", - "OTEL_EXPORTER_OTLP_ENDPOINT": str(configuration.telemetry.collector_url), - "PLATFORM_URL": f"http://{configuration.platform_service_url}", - "PLATFORM_AUTH__PUBLIC_URL": str(provider_url), - } - - -class IProviderDeploymentManager(Protocol): - async def create_or_replace(self, *, provider: Provider, env: dict[str, str] | None = None) -> bool: ... - async def delete(self, *, provider_id: UUID) -> None: ... - async def remove_orphaned_providers(self, existing_providers: list[UUID]) -> None: ... - async def state(self, *, provider_ids: list[UUID]) -> list[ProviderDeploymentState]: ... - async def scale_down(self, *, provider_id: UUID) -> None: ... - async def scale_up(self, *, provider_id: UUID) -> None: ... - async def wait_for_startup(self, *, provider_id: UUID, timeout: timedelta) -> None: ... # noqa: ASYNC109 (the timeout actually corresponds to kubernetes timeout) - async def get_provider_url(self, *, provider_id: UUID) -> HttpUrl: ... - async def stream_logs(self, *, provider_id: UUID, logs_container: LogsContainer) -> None: ... diff --git a/apps/agentstack-server/src/agentstack_server/service_layer/services/a2a.py b/apps/agentstack-server/src/agentstack_server/service_layer/services/a2a.py index b17e769869..1c03e44d69 100644 --- a/apps/agentstack-server/src/agentstack_server/service_layer/services/a2a.py +++ b/apps/agentstack-server/src/agentstack_server/service_layer/services/a2a.py @@ -53,14 +53,10 @@ from agentstack_server.domain.models.provider import ( NetworkProviderLocation, Provider, - ProviderDeploymentState, - UnmanagedState, + ProviderState, ) from agentstack_server.domain.models.user import User from agentstack_server.exceptions import EntityNotFoundError, ForbiddenUpdateError, InvalidProviderCallError -from agentstack_server.service_layer.deployment_manager import ( - IProviderDeploymentManager, -) from agentstack_server.service_layer.services.users import UserService from agentstack_server.service_layer.unit_of_work import IUnitOfWorkFactory from agentstack_server.telemetry import INSTRUMENTATION_NAME @@ -262,7 +258,7 @@ async def on_message_send( ) -> Task | Message: # we set task_id and context_id if not configured with trace.get_tracer(INSTRUMENTATION_NAME).start_as_current_span("on_message_send") as span: - trace_id = hex(span.get_span_context().trace_id)[2:] + trace_id = f"{span.get_span_context().trace_id:032x}" params.message.context_id = params.message.context_id or str(uuid.uuid4()) await self._check_and_record_request(params.message.task_id, params.message.context_id, trace_id=trace_id) @@ -282,7 +278,7 @@ async def on_message_send_stream( ) -> AsyncGenerator[Event]: with trace.get_tracer(INSTRUMENTATION_NAME).start_as_current_span("on_message_send_stream") as span: # we set task_id and context_id if not configured - trace_id = hex(span.get_span_context().trace_id)[2:] + trace_id = f"{span.get_span_context().trace_id:032x}" params.message.context_id = params.message.context_id or str(uuid.uuid4()) await self._check_and_record_request(params.message.task_id, params.message.context_id, trace_id=trace_id) @@ -359,16 +355,12 @@ async def on_delete_task_push_notification_config( @inject class A2AProxyService: - STARTUP_TIMEOUT = timedelta(minutes=5) - def __init__( self, - provider_deployment_manager: IProviderDeploymentManager, uow: IUnitOfWorkFactory, user_service: UserService, configuration: Configuration, ): - self._deploy_manager = provider_deployment_manager self._uow = uow self._user_service = user_service self._config = configuration @@ -406,46 +398,12 @@ async def ensure_agent(self, *, provider_id: UUID) -> HttpUrl: await uow.providers.update_last_accessed(provider_id=provider_id) await uow.commit() - if not provider.managed: - if provider.unmanaged_state is UnmanagedState.OFFLINE: - raise InvalidProviderCallError( - f"Cannot send message to provider {provider_id}: provider is offline" - ) - - assert isinstance(provider.source, NetworkProviderLocation) - return provider.source.a2a_url - - provider_url = await self._deploy_manager.get_provider_url(provider_id=provider.id) - [state] = await self._deploy_manager.state(provider_ids=[provider.id]) - should_wait = False - match state: - case ProviderDeploymentState.ERROR: - raise InvalidProviderCallError( - f"Cannot send message to provider {provider_id}: provider is in an error state" - ) - case ( - ProviderDeploymentState.MISSING - | ProviderDeploymentState.RUNNING - | ProviderDeploymentState.STARTING - | ProviderDeploymentState.READY - ): - async with self._uow() as uow: - from agentstack_server.domain.repositories.env import ( - EnvStoreEntity, - ) - - env = await uow.env.get_all( - parent_entity=EnvStoreEntity.PROVIDER, - parent_entity_ids=[provider.id], - ) - modified = await self._deploy_manager.create_or_replace(provider=provider, env=env[provider.id]) - should_wait = modified or state != ProviderDeploymentState.RUNNING - case _: - raise ValueError(f"Unknown provider state: {state}") - if should_wait: - logger.info("Waiting for provider to start up...") - await self._deploy_manager.wait_for_startup(provider_id=provider.id, timeout=self.STARTUP_TIMEOUT) - logger.info("Provider is ready...") - return provider_url + if provider.state is ProviderState.OFFLINE: + raise InvalidProviderCallError( + f"Cannot send message to provider {provider_id}: provider is offline" + ) + + assert isinstance(provider.source, NetworkProviderLocation) + return provider.source.a2a_url finally: unbind_contextvars("provider") diff --git a/apps/agentstack-server/src/agentstack_server/service_layer/services/provider_build.py b/apps/agentstack-server/src/agentstack_server/service_layer/services/provider_build.py deleted file mode 100644 index ef88d5484c..0000000000 --- a/apps/agentstack-server/src/agentstack_server/service_layer/services/provider_build.py +++ /dev/null @@ -1,285 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - - -from __future__ import annotations - -import asyncio -import json -import logging -from collections.abc import AsyncIterator, Callable -from contextlib import suppress -from datetime import timedelta -from uuid import UUID - -from kink import inject -from tenacity import AsyncRetrying, retry_if_exception_type, stop_after_delay, wait_fixed - -from agentstack_server.api.schema.common import PaginationQuery -from agentstack_server.configuration import Configuration -from agentstack_server.domain.models.common import PaginatedResult -from agentstack_server.domain.models.provider_build import ( - BuildConfiguration, - BuildState, - NoAction, - OnCompleteAction, - ProviderBuild, - UpdateProvider, -) -from agentstack_server.domain.models.user import User, UserRole -from agentstack_server.exceptions import ( - BuildAlreadyFinishedError, - EntityNotFoundError, - VersionResolveError, -) -from agentstack_server.service_layer.build_manager import IProviderBuildManager -from agentstack_server.service_layer.unit_of_work import IUnitOfWorkFactory -from agentstack_server.utils.docker import DockerImageID -from agentstack_server.utils.github import GithubUrl, ResolvedGithubUrl -from agentstack_server.utils.logs_container import LogsContainer, ProcessLogMessage -from agentstack_server.utils.utils import cancel_task - -logger = logging.getLogger(__name__) - - -@inject -class ProviderBuildService: - def __init__(self, build_manager: IProviderBuildManager, configuration: Configuration, uow: IUnitOfWorkFactory): - self._uow = uow - self._build_manager = build_manager - self._config = configuration - - async def _resolve_version( - self, location: GithubUrl, build_configuration: BuildConfiguration | None = None - ) -> tuple[ResolvedGithubUrl, DockerImageID]: - try: - version = await location.resolve_version() - except Exception as e: - raise VersionResolveError(str(location), str(e)) from e - if not self._config.provider_build.oci_build_registry_prefix: - raise RuntimeError("OCI build registry is not configured") - - def sanitize_path(path: str) -> str: - return path.replace(" ", "-").replace("/", "-").lower() - - dockerfile_path = "" - if build_configuration and build_configuration.dockerfile_path: - dockerfile_path = f"_{sanitize_path(str(build_configuration.dockerfile_path))}" - - destination = DockerImageID( - root=self._config.provider_build.image_format.format( - registry_prefix=self._config.provider_build.oci_build_registry_prefix.lower(), - org=version.org.lower(), - repo=version.repo.lower(), - path=sanitize_path(version.path or "agent"), - dockerfile_path=dockerfile_path, - commit_hash=version.commit_hash.lower(), - ) - ) - return version, destination - - async def preview_build( - self, - location: GithubUrl, - user: User, - on_complete: OnCompleteAction | None = None, - build_configuration: BuildConfiguration | None = None, - ) -> ProviderBuild: - version, destination = await self._resolve_version(location, build_configuration) - return ProviderBuild( - status=BuildState.MISSING, - source=version, - destination=destination, - created_by=user.id, - on_complete=on_complete or NoAction(), - ) - - async def create_build( - self, - location: GithubUrl, - user: User, - on_complete: OnCompleteAction | None = None, - build_configuration: BuildConfiguration | None = None, - ) -> ProviderBuild: - from agentstack_server.jobs.tasks.provider_build import build_provider as task - - version, destination = await self._resolve_version(location, build_configuration) - - build = ProviderBuild( - status=BuildState.MISSING, - source=version, - destination=destination, - created_by=user.id, - on_complete=on_complete or NoAction(), - build_configuration=build_configuration, - ) - async with self._uow() as uow: - match on_complete: - case UpdateProvider(provider_id=provider_id): - # check permissions to update - _ = await uow.providers.get(provider_id=provider_id, user_id=user.id) - - await uow.provider_builds.create(provider_build=build) - await task.configure(queueing_lock=str(build.id)).defer_async(provider_build_id=str(build.id)) - await uow.commit() - return build - - async def update_build(self, *, provider_build: ProviderBuild): - async with self._uow() as uow: - await uow.provider_builds.update(provider_build=provider_build) - await uow.commit() - - async def get_build(self, provider_build_id: UUID) -> ProviderBuild: - async with self._uow() as uow: - return await uow.provider_builds.get(provider_build_id=provider_build_id) - - async def list_builds( - self, - pagination: PaginationQuery, - status: BuildState | None = None, - user: User | None = None, - user_owned: bool | None = None, - ) -> PaginatedResult[ProviderBuild]: - # user_owned: True -> show user owned entities - # user_owned: False -> show all but user owned entities - # user_owned: None -> show all entities - - if user_owned is not None and user is None: - raise ValueError("user_owned cannot be specified without a user") - - async with self._uow() as uow: - return await uow.provider_builds.list_paginated( - user_id=user.id if user_owned is True and user else None, - exclude_user_id=user.id if user_owned is False and user else None, - limit=pagination.limit, - page_token=pagination.page_token, - order=pagination.order, - order_by=pagination.order_by, - status=status, - ) - - async def build_provider(self, provider_build_id: UUID) -> ProviderBuild: - async with self._uow() as uow: - build = await uow.provider_builds.get(provider_build_id=provider_build_id) - if build.status != BuildState.MISSING: - raise RuntimeError("Build already started or completed") - try: - build.status = await self._build_manager.create_job( - provider_build=build, - job_timeout=timedelta(seconds=self._config.provider_build.job_timeout_sec), - ) - except Exception as e: - logger.warning(f"Failed to build provider: {e}") - build.status = BuildState.FAILED - raise - finally: - await uow.provider_builds.update(provider_build=build) - await uow.commit() - - try: - # This can take very long, opening transaction after - build.status = await self._build_manager.wait_for_completion(provider_build_id=build.id) - if build.status == BuildState.FAILED: - build.error_message = "Build Job failed, please retry and watch log stream for more details" - async with self._uow() as uow: - await uow.provider_builds.update(provider_build=build) - await uow.commit() - return build - except Exception as e: - logger.warning(f"Failed to build provider: {e}") - build.status = BuildState.FAILED - build.error_message = str(e) - async with self._uow() as uow: - await uow.provider_builds.update(provider_build=build) - await uow.commit() - raise - - async def delete_build(self, provider_build_id: UUID, user: User): - user_id = user.id if user.role != UserRole.ADMIN else None - async with self._uow() as uow: - build = await uow.provider_builds.get(provider_build_id=provider_build_id, user_id=user_id) - if build.status not in {BuildState.FAILED, BuildState.COMPLETED}: - with suppress(EntityNotFoundError): - await self._build_manager.cancel_job(provider_build_id=provider_build_id) - await uow.provider_builds.delete(provider_build_id=provider_build_id, user_id=user_id) - await uow.commit() - - async def stream_logs( - self, - provider_build_id: UUID, - user: User, - wait_for_start_timeout: timedelta = timedelta(minutes=5), - ) -> Callable[..., AsyncIterator[str]]: - logs_container = LogsContainer() - user_id = user.id if user.role != UserRole.ADMIN else None - async with self._uow() as uow: - build = await uow.provider_builds.get(provider_build_id=provider_build_id, user_id=user_id) - if build.status in {BuildState.FAILED, BuildState.COMPLETED}: - raise BuildAlreadyFinishedError(platform_build_id=build.id, state=build.status) - - logs_task = asyncio.create_task( - self._build_manager.stream_logs(provider_build_id=provider_build_id, logs_container=logs_container) - ) - - async def watch_for_completion(): - logs_container.add_stdout("Waiting for build job to be scheduled...") - state = BuildState.FAILED - on_complete = NoAction() - try: - async for attempt in AsyncRetrying( - stop=stop_after_delay(wait_for_start_timeout), - wait=wait_fixed(timedelta(seconds=2)), - retry=retry_if_exception_type(EntityNotFoundError), - reraise=True, - ): - with attempt: - async with self._uow() as uow: - # If the build or worker fails to deploy the job, the wait would get stuck retrying - # waiting for a k8s job that will never be created. Hence, we check database state: - build = await uow.provider_builds.get(provider_build_id=provider_build_id) - on_complete = build.on_complete - if build.status in {BuildState.FAILED, BuildState.COMPLETED}: - state = build.status - break - state = await self._build_manager.wait_for_completion(provider_build_id=provider_build_id) - except EntityNotFoundError: - message = ( - "Wait timeout for job to be scheduled exceeded, the job queue might be busy at the moment." - "The job will continue to run in the background when the queue is available." - ) - logs_container.add(ProcessLogMessage(message=message, error=True)) - return - - # Wait for post-build action to complete - if state == BuildState.BUILD_COMPLETED: - logs_container.add_stdout(f"Processing post-build action: {on_complete.type}") - await asyncio.sleep(0.5) - try: - async with asyncio.timeout(timedelta(seconds=10).total_seconds()): - while True: - async with self._uow() as uow: - build = await uow.provider_builds.get(provider_build_id=provider_build_id) - if build.status in {BuildState.FAILED, BuildState.COMPLETED}: - break - await asyncio.sleep(1) - except TimeoutError: - logs_container.add(ProcessLogMessage(message="Waiting for action timed out.", error=True)) - logs_container.add(ProcessLogMessage(message=f"Job {state}.", finished=True)) - - watch_for_completion_task = asyncio.create_task(watch_for_completion()) - - async def logs_iterator() -> AsyncIterator[str]: - try: - async with logs_container.stream() as stream: - async for message in stream: - if message.model_dump().get("error"): - raise RuntimeError(f"Error capturing logs: {message.message}") - yield json.dumps(message.model_dump(mode="json")) - message_dict = message.model_dump() - if message_dict.get("finished") or message_dict.get("error"): - return - finally: - await cancel_task(logs_task) - await cancel_task(watch_for_completion_task) - - return logs_iterator diff --git a/apps/agentstack-server/src/agentstack_server/service_layer/services/provider_discovery.py b/apps/agentstack-server/src/agentstack_server/service_layer/services/provider_discovery.py deleted file mode 100644 index 74479375a3..0000000000 --- a/apps/agentstack-server/src/agentstack_server/service_layer/services/provider_discovery.py +++ /dev/null @@ -1,146 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - - -from __future__ import annotations - -import logging -import uuid -from datetime import timedelta -from uuid import UUID - -from a2a.types import AgentCapabilities, AgentCard, AgentExtension -from a2a.utils import AGENT_CARD_WELL_KNOWN_PATH -from httpx import AsyncClient -from kink import inject - -from agentstack_server.configuration import Configuration -from agentstack_server.domain.constants import AGENT_DETAIL_EXTENSION_URI -from agentstack_server.domain.models.provider import DockerImageProviderLocation, Provider -from agentstack_server.domain.models.provider_discovery import DiscoveryState, ProviderDiscovery -from agentstack_server.domain.models.user import User -from agentstack_server.service_layer.deployment_manager import IProviderDeploymentManager -from agentstack_server.service_layer.unit_of_work import IUnitOfWorkFactory -from agentstack_server.utils.a2a import get_extension -from agentstack_server.utils.docker import DockerImageID -from agentstack_server.utils.utils import utc_now - -logger = logging.getLogger(__name__) - - -@inject -class ProviderDiscoveryService: - def __init__( - self, deployment_manager: IProviderDeploymentManager, uow: IUnitOfWorkFactory, configuration: Configuration - ): - self._uow = uow - self._deployment_manager = deployment_manager - self._config = configuration - - async def create_discovery(self, *, docker_image: str, user: User) -> ProviderDiscovery: - from agentstack_server.jobs.tasks.provider_discovery import discover_provider as task - - discovery = ProviderDiscovery( - status=DiscoveryState.PENDING, - docker_image=docker_image, - created_by=user.id, - ) - async with self._uow() as uow: - await uow.provider_discoveries.create(discovery=discovery) - await task.configure(queueing_lock=str(discovery.id)).defer_async(provider_discovery_id=str(discovery.id)) - await uow.commit() - return discovery - - async def get_discovery(self, *, discovery_id: UUID, user: User | None = None) -> ProviderDiscovery: - async with self._uow() as uow: - return await uow.provider_discoveries.get(discovery_id=discovery_id, user_id=user.id if user else None) - - async def run_discovery(self, *, discovery_id: UUID) -> ProviderDiscovery: - async with self._uow() as uow: - discovery = await uow.provider_discoveries.get(discovery_id=discovery_id) - if discovery.status != DiscoveryState.PENDING: - logger.warning(f"Discovery {discovery_id} is not pending, skipping") - return discovery - - discovery.status = DiscoveryState.IN_PROGRESS - await uow.provider_discoveries.update(discovery=discovery) - await uow.commit() - - try: - location = DockerImageProviderLocation(DockerImageID(discovery.docker_image)) - agent_card = await self._fetch_agent_card_from_container(location) - - async with self._uow() as uow: - discovery.agent_card = agent_card - discovery.status = DiscoveryState.COMPLETED - await uow.provider_discoveries.update(discovery=discovery) - await uow.commit() - - except Exception as e: - logger.exception(f"Discovery {discovery_id} failed") - async with self._uow() as uow: - discovery.status = DiscoveryState.FAILED - discovery.error_message = str(e) - await uow.provider_discoveries.update(discovery=discovery) - await uow.commit() - - return discovery - - async def cleanup_expired_discoveries(self, *, max_age: timedelta | None = None) -> int: - max_age = max_age or timedelta(days=1) - cutoff = utc_now() - max_age - async with self._uow() as uow: - count = await uow.provider_discoveries.delete_older_than(older_than=cutoff) - await uow.commit() - return count - - async def _fetch_agent_card_from_container(self, location: DockerImageProviderLocation) -> AgentCard: - placeholder_card = AgentCard( - name="discovery", - description="", - url="", - version="", - capabilities=AgentCapabilities(), - default_input_modes=["text"], - default_output_modes=["text"], - skills=[], - ) - temp_provider = Provider( - source=location, - origin=str(location.root), - created_by=uuid.UUID("00000000-0000-0000-0000-000000000000"), - agent_card=placeholder_card, - ) - try: - await self._deployment_manager.create_or_replace(provider=temp_provider) - await self._deployment_manager.wait_for_startup(provider_id=temp_provider.id, timeout=timedelta(minutes=1)) - url = await self._deployment_manager.get_provider_url(provider_id=temp_provider.id) - async with AsyncClient(base_url=str(url)) as client: - response = await client.get(AGENT_CARD_WELL_KNOWN_PATH, timeout=10) - response.raise_for_status() - agent_card = AgentCard.model_validate(response.json()) - return self._inject_default_agent_detail_extension(agent_card, location) - finally: - try: - await self._deployment_manager.delete(provider_id=temp_provider.id) - except Exception: - logger.exception(f"Failed to delete temporary deployment for provider {temp_provider.id}") - - def _inject_default_agent_detail_extension( - self, agent_card: AgentCard, location: DockerImageProviderLocation - ) -> AgentCard: - if get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI): - return agent_card - - default_extension = AgentExtension( - uri=AGENT_DETAIL_EXTENSION_URI, - params={ - "interaction_mode": "multi-turn", - "container_image_url": str(location.root), - }, - ) - - extensions = list(agent_card.capabilities.extensions or []) - extensions.append(default_extension) - agent_card.capabilities.extensions = extensions - return agent_card diff --git a/apps/agentstack-server/src/agentstack_server/service_layer/services/providers.py b/apps/agentstack-server/src/agentstack_server/service_layer/services/providers.py index 0fecedde2b..e12524233b 100644 --- a/apps/agentstack-server/src/agentstack_server/service_layer/services/providers.py +++ b/apps/agentstack-server/src/agentstack_server/service_layer/services/providers.py @@ -4,101 +4,69 @@ from __future__ import annotations -import asyncio -import json import logging import uuid -from collections.abc import AsyncIterator, Callable -from datetime import timedelta from uuid import UUID from a2a.types import AgentCard, AgentExtension from fastapi import HTTPException from kink import inject -from starlette.status import HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND, HTTP_422_UNPROCESSABLE_CONTENT +from starlette.status import HTTP_400_BAD_REQUEST, HTTP_404_NOT_FOUND from agentstack_server.domain.constants import AGENT_DETAIL_EXTENSION_URI, SELF_REGISTRATION_EXTENSION_URI from agentstack_server.domain.models.provider import ( - DockerImageProviderLocation, Provider, - ProviderDeploymentState, ProviderLocation, - ProviderWithState, - UnmanagedState, + ProviderState, + SourceType, ) -from agentstack_server.domain.models.registry import RegistryLocation from agentstack_server.domain.models.user import User, UserRole -from agentstack_server.domain.repositories.env import EnvStoreEntity -from agentstack_server.exceptions import InvalidProviderUpgradeError, ManifestLoadError, MissingAgentCardLabelError -from agentstack_server.service_layer.deployment_manager import ( - IProviderDeploymentManager, -) +from agentstack_server.exceptions import ManifestLoadError from agentstack_server.service_layer.unit_of_work import IUnitOfWorkFactory from agentstack_server.utils.a2a import get_extension -from agentstack_server.utils.github import ResolvedGithubUrl -from agentstack_server.utils.logs_container import LogsContainer -from agentstack_server.utils.utils import cancel_task, utc_now +from agentstack_server.utils.utils import utc_now logger = logging.getLogger(__name__) @inject class ProviderService: - def __init__(self, deployment_manager: IProviderDeploymentManager, uow: IUnitOfWorkFactory): + def __init__(self, uow: IUnitOfWorkFactory): self._uow = uow - self._deployment_manager = deployment_manager async def create_provider( self, *, user: User, location: ProviderLocation, - origin: str | ResolvedGithubUrl | None = None, - auto_stop_timeout: timedelta, - registry: RegistryLocation | None = None, + origin: str | None = None, agent_card: AgentCard | None = None, - variables: dict[str, str] | None = None, - ) -> ProviderWithState: + source_type: SourceType = SourceType.API, + ) -> Provider: try: if not agent_card: agent_card = await location.load_agent_card() agent_card = self._inject_default_agent_detail_extension(agent_card) - version_info = await location.get_version_info() - - if isinstance(origin, ResolvedGithubUrl): - version_info.github = origin - origin = origin.base provider = Provider( source=location, origin=origin or location.origin, - registry=registry, - version_info=version_info, agent_card=agent_card, created_by=user.id, - auto_stop_timeout=auto_stop_timeout, + source_type=source_type, ) - if not provider.managed and get_extension(agent_card, SELF_REGISTRATION_EXTENSION_URI): - provider.unmanaged_state = UnmanagedState.ONLINE + if get_extension(agent_card, SELF_REGISTRATION_EXTENSION_URI): + provider.state = ProviderState.ONLINE except ValueError as ex: raise ManifestLoadError(location=location, message=str(ex), status_code=HTTP_400_BAD_REQUEST) from ex - except MissingAgentCardLabelError as ex: - raise ManifestLoadError( - location=location, message=str(ex), status_code=HTTP_422_UNPROCESSABLE_CONTENT - ) from ex except Exception as ex: raise ManifestLoadError(location=location, message=str(ex)) from ex async with self._uow() as uow: await uow.providers.create(provider=provider) - if variables: - await uow.env.update( - parent_entity=EnvStoreEntity.PROVIDER, parent_entity_id=provider.id, variables=variables - ) await uow.commit() - [provider_response] = await self._get_providers_with_state(providers=[provider]) - return provider_response + return provider def _inject_default_agent_detail_extension(self, agent_card: AgentCard) -> AgentCard: if get_extension(agent_card, AGENT_DETAIL_EXTENSION_URI): @@ -120,33 +88,14 @@ async def patch_provider( provider_id: UUID, user: User, location: ProviderLocation | None = None, - auto_stop_timeout: timedelta | None = None, - origin: str | ResolvedGithubUrl | None = None, + origin: str | None = None, agent_card: AgentCard | None = None, - variables: dict[str, str] | None = None, - allow_registry_update: bool = False, - force: bool = False, - unmanaged_state: UnmanagedState | None = None, - ) -> ProviderWithState: + state: ProviderState | None = None, + ) -> Provider: user_id = user.id if user.role != UserRole.ADMIN else None - github_version_info: ResolvedGithubUrl | None = None - if isinstance(origin, ResolvedGithubUrl): - github_version_info = origin - origin = origin.base - async with self._uow() as uow: provider = await uow.providers.get(provider_id=provider_id, user_id=user_id) - if provider.registry and not allow_registry_update: - raise InvalidProviderUpgradeError("Cannot update provider added from registry") - old_variables = ( - await uow.env.get_all( - parent_entity=EnvStoreEntity.PROVIDER, - parent_entity_ids=[provider.id], - ) - )[provider.id] - - variables = old_variables if variables is None else variables updated_provider = provider.model_copy() updated_provider.source = location or updated_provider.source @@ -154,27 +103,12 @@ async def patch_provider( updated_provider.agent_card = self._inject_default_agent_detail_extension(agent_card) updated_provider.origin = origin or updated_provider.source.origin - if auto_stop_timeout is not None: - updated_provider.auto_stop_timeout = auto_stop_timeout - - # Allow explicit state override, otherwise use heuristic for self-registered agents - if unmanaged_state is not None: - updated_provider.unmanaged_state = unmanaged_state + if state is not None: + updated_provider.state = state elif agent_card and get_extension(agent_card, SELF_REGISTRATION_EXTENSION_URI): - # this is a bit heuristic, self-registered agents send a card in this format, but technically somebody else - # can send it without the agent actually being online - updated_provider.unmanaged_state = UnmanagedState.ONLINE - - # Some migrated docker providers may not have a docker version_info field, update during the patch - if ( - isinstance(updated_provider.source, DockerImageProviderLocation) - and updated_provider.version_info.docker is None - ): - updated_provider.version_info = await provider.source.get_version_info() + updated_provider.state = ProviderState.ONLINE if location is not None and location != provider.source: - updated_provider.version_info = await location.get_version_info() - if not agent_card: try: loaded_card = await location.load_agent_card() @@ -186,33 +120,20 @@ async def patch_provider( except Exception as ex: raise ManifestLoadError(location=location, message=str(ex)) from ex - if github_version_info: - updated_provider.version_info.github = github_version_info - - should_update = provider != updated_provider or variables != old_variables or force - if not should_update: - return (await self._get_providers_with_state(providers=[provider]))[0] + if provider == updated_provider: + return provider updated_provider.updated_at = utc_now() async with self._uow() as uow: await uow.providers.update(provider=updated_provider) - - if old_variables != variables: - await uow.env.delete(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_id=provider_id) - await uow.env.update( - parent_entity=EnvStoreEntity.PROVIDER, parent_entity_id=provider_id, variables=variables - ) await uow.commit() - await self._rotate_provider(provider=updated_provider, env=variables) - - [provider_response] = await self._get_providers_with_state(providers=[updated_provider]) - return provider_response + return updated_provider async def preview_provider( self, location: ProviderLocation, agent_card: AgentCard | None = None - ) -> ProviderWithState: + ) -> Provider: try: if not agent_card: agent_card = await location.load_agent_card() @@ -220,80 +141,24 @@ async def preview_provider( provider = Provider( source=location, origin=location.origin, - version_info=await location.get_version_info(), agent_card=agent_card, created_by=uuid.uuid4(), ) - [provider_response] = await self._get_providers_with_state(providers=[provider]) - return provider_response + return provider except ValueError as ex: raise ManifestLoadError(location=location, message=str(ex), status_code=HTTP_400_BAD_REQUEST) from ex except Exception as ex: raise ManifestLoadError(location=location, message=str(ex)) from ex - async def _get_providers_with_state(self, providers: list[Provider]) -> list[ProviderWithState]: - result_providers = [] - provider_states = await self._deployment_manager.state(provider_ids=[provider.id for provider in providers]) - - async with self._uow() as uow: - provider_ids = [provider.id for provider in providers] - providers_env = await uow.env.get_all(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_ids=provider_ids) - - for provider, state in zip(providers, provider_states, strict=False): - final_state = state - if not provider.managed: - final_state = provider.unmanaged_state if provider.unmanaged_state else UnmanagedState.OFFLINE - result_providers.append( - ProviderWithState( - **provider.model_dump(), - state=final_state, - missing_configuration=[ - var - for var in provider.check_env(providers_env[provider.id], raise_error=False) - if var.required - ], - ) - ) - return result_providers - async def delete_provider(self, *, provider_id: UUID, user: User) -> None: user_id = user.id if user.role != UserRole.ADMIN else None async with self._uow() as uow: - provider = await uow.providers.get(provider_id=provider_id, user_id=user_id) await uow.providers.delete(provider_id=provider_id, user_id=user_id) - if provider.managed: - await self._deployment_manager.delete(provider_id=provider_id) await uow.commit() - async def scale_down_providers(self): - active_providers = [ - provider - for provider in await self.list_providers() - if provider.managed and provider.state == ProviderDeploymentState.RUNNING - ] - errors = [] - for provider in active_providers: - try: - if provider.auto_stop_timeout and (provider.last_active_at + provider.auto_stop_timeout) < utc_now(): - logger.info(f"Scaling down provider: {provider.id}") - await self._deployment_manager.scale_down(provider_id=provider.id) - except Exception as ex: - errors.append(ex) - if errors: - raise ExceptionGroup("Exceptions occurred when scaling down providers", errors) - - async def remove_orphaned_providers(self): - async with self._uow() as uow: - existing_providers = [p.id async for p in uow.providers.list()] - await self._deployment_manager.remove_orphaned_providers(existing_providers=existing_providers) - async def list_providers( self, user: User | None = None, user_owned: bool | None = None, origin: str | None = None - ) -> list[ProviderWithState]: - # user_owned: True -> show user owned entities - # user_owned: False -> show all but user owned entities - # user_owned: None -> show all entities - + ) -> list[Provider]: if user_owned is not None and user is None: raise ValueError("user_owned cannot be specified without a user") @@ -306,11 +171,11 @@ async def list_providers( origin=origin, ) ] - return await self._get_providers_with_state(providers=providers) + return providers async def get_provider( self, provider_id: UUID | None = None, location: ProviderLocation | None = None - ) -> ProviderWithState: + ) -> Provider: if not (bool(provider_id) ^ bool(location)): raise ValueError("Either provider_id or location must be provided") providers = [ @@ -322,78 +187,3 @@ async def get_provider( raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=f"Provider with ID: {provider_id!s} not found") return providers[0] - async def stream_logs(self, provider_id: UUID, user: User) -> Callable[..., AsyncIterator[str]]: - user_id = user.id if user.role != UserRole.ADMIN else None - async with self._uow() as uow: - # check provider exists and user ownership - await uow.providers.get(provider_id=provider_id, user_id=user_id) - - logs_container = LogsContainer() - - logs_task = asyncio.create_task( - self._deployment_manager.stream_logs(provider_id=provider_id, logs_container=logs_container) - ) - - async def logs_iterator() -> AsyncIterator[str]: - try: - async with logs_container.stream() as stream: - async for message in stream: - if message.model_dump().get("error"): - raise RuntimeError(f"Error capturing logs: {message.message}") - yield json.dumps(message.model_dump(mode="json")) - finally: - await cancel_task(logs_task) - - return logs_iterator - - async def _rotate_provider(self, provider: Provider, env: dict[str, str]): - [state] = await self._deployment_manager.state(provider_ids=[provider.id]) - if ( - provider.managed - # provider is not idle (if idle, it will be updated next time it's scaled up) - and state in {ProviderDeploymentState.RUNNING, ProviderDeploymentState.STARTING} - ): - await self._deployment_manager.create_or_replace(provider=provider, env=env) - - async def update_provider_env( - self, - *, - provider_id: UUID, - env: dict[str, str | None] | dict[str, str], - user: User, - allow_registry_update: bool = False, - ) -> None: - user_id = user.id if user.role != UserRole.ADMIN else None - provider = None - try: - async with self._uow() as uow: - provider = await uow.providers.get(provider_id=provider_id, user_id=user_id) - if provider.registry and not allow_registry_update: - raise ValueError("Cannot update variables for a provider added from registry") - await uow.env.update(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_id=provider_id, variables=env) - new_env = await uow.env.get_all(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_ids=[provider_id]) - new_env = new_env[provider_id] - await uow.commit() - await self._rotate_provider(provider=provider, env=new_env) - except Exception as ex: - if not provider: - return - logger.error(f"Exception occurred while updating env, rolling back to previous state: {ex}") - async with self._uow() as uow: - orig_env = await uow.env.get_all(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_ids=[provider_id]) - orig_env = orig_env[provider_id] - try: - logger.exception( - f"Failed to update env, attempting to rollback provider: {provider.id} to previous state" - ) - await self._deployment_manager.create_or_replace(provider=provider, env=orig_env) - except Exception: - logger.error(f"Failed to rollback provider: {provider.id}") - raise - - async def list_provider_env(self, *, provider_id: UUID, user: User) -> dict[str, str]: - user_id = user.id if user.role != UserRole.ADMIN else None - async with self._uow() as uow: - await uow.providers.get(provider_id=provider_id, user_id=user_id) - env = await uow.env.get_all(parent_entity=EnvStoreEntity.PROVIDER, parent_entity_ids=[provider_id]) - return env[provider_id] diff --git a/apps/agentstack-server/src/agentstack_server/service_layer/unit_of_work.py b/apps/agentstack-server/src/agentstack_server/service_layer/unit_of_work.py index 489f28c878..4de69ea91d 100644 --- a/apps/agentstack-server/src/agentstack_server/service_layer/unit_of_work.py +++ b/apps/agentstack-server/src/agentstack_server/service_layer/unit_of_work.py @@ -13,8 +13,6 @@ from agentstack_server.domain.repositories.file import IFileRepository from agentstack_server.domain.repositories.model_provider import IModelProviderRepository from agentstack_server.domain.repositories.provider import IProviderRepository -from agentstack_server.domain.repositories.provider_build import IProviderBuildRepository -from agentstack_server.domain.repositories.provider_discovery import IProviderDiscoveryRepository from agentstack_server.domain.repositories.user import IUserRepository from agentstack_server.domain.repositories.user_feedback import IUserFeedbackRepository from agentstack_server.domain.repositories.vector_store import IVectorDatabaseRepository, IVectorStoreRepository @@ -22,8 +20,6 @@ class IUnitOfWork(Protocol): providers: IProviderRepository - provider_builds: IProviderBuildRepository - provider_discoveries: IProviderDiscoveryRepository a2a_requests: IA2ARequestRepository contexts: IContextRepository files: IFileRepository diff --git a/apps/agentstack-server/src/agentstack_server/utils/docker.py b/apps/agentstack-server/src/agentstack_server/utils/docker.py index 8063652ccf..076b88337b 100644 --- a/apps/agentstack-server/src/agentstack_server/utils/docker.py +++ b/apps/agentstack-server/src/agentstack_server/utils/docker.py @@ -1,274 +1,40 @@ # Copyright 2025 © BeeAI a Series of LF Projects, LLC # SPDX-License-Identifier: Apache-2.0 +"""Minimal stub retained for historical migration compatibility. + +The full Docker registry client was removed as part of the kagenti integration +(providers are now network-based, not Docker-image-based). Only ``DockerImageID`` +is kept because migration ``d39dd1ff796f`` imports it to recompute provider IDs. +""" from __future__ import annotations import re -from datetime import timedelta -from enum import StrEnum -from functools import cached_property -from typing import Any, NamedTuple - -import httpx -from async_lru import alru_cache -from kink import di -from pydantic import ( - BaseModel, - ConfigDict, - ModelWrapValidatorHandler, - PrivateAttr, - RootModel, - computed_field, - model_validator, -) - -from agentstack_server.configuration import Configuration, OCIRegistryConfiguration - - -class RegistryPermissions(StrEnum): - PULL = "pull" - PUSH = "push" - -AUTH_URL_PER_REGISTRY = { - "ghcr.io": "https://ghcr.io/token?service=ghcr.io&scope=repository:{repository}:{permissions}", - "icr.io": "https://icr.io/oauth/token?service=registry&scope=repository:{repository}:{permissions}", - "us.icr.io": "https://us.icr.io/oauth/token?service=registry&scope=repository:{repository}:{permissions}", - "docker.io": "https://auth.docker.io/token?service=registry.docker.io&scope=repository:{repository}:{permissions}", - "registry-1.docker.io": "https://auth.docker.io/token?service=registry.docker.io&scope=repository:{repository}:{permissions}", -} +from pydantic import RootModel, model_validator -base_headers = { - "Accept": ( - "application/vnd.oci.image.index.v1+json," - "application/vnd.oci.image.manifest.v1+json," - "application/vnd.docker.distribution.manifest.list.v2+json," - "application/vnd.docker.distribution.manifest.v2+json" - ) -} - - -class ManifestResponse(NamedTuple): - manifest: dict[str, Any] - digest: str - - -class DockerImageID(RootModel[str]): - model_config = ConfigDict(frozen=True) - - _registry: str | None = PrivateAttr(None) - _repository: str = PrivateAttr() - _tag: str | None = PrivateAttr(None) - _digest: str | None = PrivateAttr(None) - _manifest: dict[str, Any] | None = PrivateAttr(None) - - @property - def registry_config(self) -> OCIRegistryConfiguration: - return di[Configuration].oci_registry[self.registry] - - @cached_property - def registry_base_url(self) -> str: - registry = self.registry +_DOCKER_IMAGE_PATTERN = re.compile( + r"^(?:(?P[^/]+\.[^/]+)/)?(?P[^:@]+)(?::(?P[^@]+))?(?:@(?P.+))?$" +) - if registry.endswith("docker.io"): - registry = "registry-1.docker.io" - return f"{self.registry_config.protocol}://{registry}" - @cached_property - def manifest_base_url(self) -> str: - return f"{self.registry_base_url}/v2/{self.repository}/manifests" +class DockerImageID(RootModel): + """Parses a Docker image reference into registry, repository, tag and digest.""" - @cached_property - def get_manifest_url(self) -> str: - version = self.digest or self.tag - return f"{self.manifest_base_url}/{version}" + root: str - @property - def registry(self) -> str: - return self._registry or "docker.io" - - @property - def repository(self) -> str: - if self.registry.endswith("docker.io") and "/" not in self._repository: - return f"library/{self._repository}" - return self._repository + @model_validator(mode="after") + def _parse(self) -> "DockerImageID": + m = _DOCKER_IMAGE_PATTERN.match(self.root) + if not m: + raise ValueError(f"Invalid docker image reference: {self.root}") + self._registry = m.group("registry") or "docker.io" + self._repository = m.group("repository") + self._tag = m.group("tag") or "latest" + self._digest = m.group("digest") + return self @property def base(self) -> str: - return f"{self.registry}/{self.repository}" - - @property - def tag(self) -> str: - return self._tag or "latest" - - @property - def digest(self) -> str | None: - return self._digest or None - - @model_validator(mode="wrap") - @classmethod - def _parse(cls, data: Any, handler: ModelWrapValidatorHandler): - if isinstance(data, DockerImageID): - return data - if not isinstance(data, str): - raise ValueError(f"Invalid Docker image: {data}") - - pattern = r""" - # Forbid starting with http:// or https:// - ^(?!https?://) - - # Registry (optional) - ends with slash and contains at least one dot - # For local registries, these must use the svc.namespace url pattern, otherwise we - # cannot really distinguish the registry hostname from image name on docker hub - ((?P[^/]+\.[^/]+)/)? - - # Repository (required) - final component before any tag - (?P[^:]+) - - # Tag (optional) - everything after the colon before @ - (?::(?P[^@]+))? - - # Digest (optional) - everything after @ - (?:@(?P.+))? - """ - match = re.match(pattern, data, re.VERBOSE) - if not match: - raise ValueError(f"Invalid Docker image: {data}") - - image_id = handler(data) - for name, value in match.groupdict().items(): - setattr(image_id, f"_{name}", value) - - # we need to construct a new object, because this is a frozen instance - return image_id.model_copy(update={"root": str(image_id)}) - - def __str__(self): - digest = f"@{self.digest}" if self.digest else "" - return f"{self.base}:{self.tag}{digest}" - - async def get_registry_auth_endpoint(self) -> str | None: - if self.registry not in AUTH_URL_PER_REGISTRY: - async with httpx.AsyncClient() as client: - registry_resp = await client.get(self.get_manifest_url, follow_redirects=True) - header = registry_resp.headers.get("www-authenticate") - if not header: - return - if not (match := re.match(r"(\w+)\s+(.*)", header)): - raise ValueError(f"Invalid www authenticate header: {header}") - _auth_scheme, params_str = match.groups() - params = {} - for param in re.finditer(r'(\w+)="([^"]*)"', params_str): - key, value = param.groups() - params[key] = value - auth_url = f"{params['realm']}?service={params['service']}&scope=repository:{{repository}}:{{permissions}}" - AUTH_URL_PER_REGISTRY[self.registry] = auth_url - - return AUTH_URL_PER_REGISTRY[self.registry] - - async def get_manifest(self) -> ManifestResponse: - headers = base_headers.copy() - - if token := await get_registry_token(docker_image_id=self, permissions=(RegistryPermissions.PULL,)): - headers["Authorization"] = f"Bearer {token}" - - async with httpx.AsyncClient() as client: - manifest_resp = await client.get(self.get_manifest_url, headers=headers, follow_redirects=True) - - if manifest_resp.status_code != 200: - raise Exception(f"Failed to get manifest: {manifest_resp.status_code}, {manifest_resp.text}") - - return ManifestResponse( - manifest=manifest_resp.raise_for_status().json(), - digest=manifest_resp.headers["Docker-Content-Digest"], - ) - - async def resolve_version(self) -> ResolvedDockerImageID: - manifest = await self.get_manifest() - digest = manifest.digest - result = ResolvedDockerImageID( - registry=self.registry, - repository=self.repository, - tag=self.tag, - digest=digest, - ) - result._manifest = manifest.manifest - return result - - -class ResolvedDockerImageID(BaseModel): - registry: str - repository: str - tag: str - digest: str - _manifest: dict[str, Any] | None = PrivateAttr(None) - - @computed_field - @cached_property - def image_id(self) -> DockerImageID: - return DockerImageID(root=f"{self.registry}/{self.repository}:{self.tag}@{self.digest}") - - async def get_manifest(self): - if not self._manifest: - self._manifest, _ = await self.image_id.get_manifest() - return self._manifest - - async def get_labels(self) -> dict[str, str]: - manifest = await self.get_manifest() - - headers = base_headers.copy() - headers["Authorization"] = f"Bearer {await get_registry_token(docker_image_id=self.image_id)}" - - async with httpx.AsyncClient() as client: - if "manifests" in manifest: - manifest_resp = await client.get( - f"{self.image_id.manifest_base_url}/{manifest['manifests'][0]['digest']}", - headers=headers, - follow_redirects=True, - ) - manifest = manifest_resp.raise_for_status().json() - - config_digest = manifest["config"]["digest"] - config_url = f"{self.image_id.registry_base_url}/v2/{self.repository}/blobs/{config_digest}" - config_resp = await client.get(config_url, headers=headers, follow_redirects=True) - - if config_resp.status_code != 200: - raise Exception(f"Failed to get config: {config_resp.status_code}, {config_resp.text}") - - config = config_resp.json() - return config.get("config", {}).get("Labels", {}) - - def __str__(self) -> str: - return str(self.image_id) - - -@alru_cache(ttl=timedelta(minutes=5).total_seconds()) -async def get_registry_token( - *, - docker_image_id: DockerImageID, - permissions: tuple[RegistryPermissions] = (RegistryPermissions.PULL,), -) -> str | None: - try: - token_endpoint = await docker_image_id.get_registry_auth_endpoint() - except Exception as ex: - raise Exception( - f"Image registry does not exist or is not accessible: {docker_image_id.get_manifest_url}" - ) from ex - - if token_endpoint: - async with httpx.AsyncClient() as client: - if token_endpoint: - token_endpoint = token_endpoint.format( - repository=docker_image_id.repository, permissions=",".join(str(p) for p in permissions) - ) - auth_resp = await client.get( - token_endpoint, - follow_redirects=True, - headers={"Authorization": f"Basic {docker_image_id.registry_config.basic_auth_str}"} - if docker_image_id.registry_config.basic_auth_str - else {}, - ) - if auth_resp.status_code != 200: - raise Exception(f"Failed to authenticate: {auth_resp.status_code}, {auth_resp.text}") - return auth_resp.json()["token"] - return None + return f"{self._registry}/{self._repository}" diff --git a/apps/agentstack-server/src/agentstack_server/utils/github.py b/apps/agentstack-server/src/agentstack_server/utils/github.py deleted file mode 100644 index 2050dcdeeb..0000000000 --- a/apps/agentstack-server/src/agentstack_server/utils/github.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import logging -import re -import time -from datetime import timedelta -from enum import StrEnum -from typing import TYPE_CHECKING, Any - -import httpx -from async_lru import alru_cache -from authlib.jose import jwt -from kink import di -from pydantic import AnyUrl, BaseModel, ModelWrapValidatorHandler, RootModel, model_validator - -__all__ = [ - "GithubUrl", - "GithubVersionType", - "ResolvedGithubUrl", - "get_github_token", -] - -if TYPE_CHECKING: - from agentstack_server.configuration import GithubAppConfiguration, GithubPATConfiguration - -logger = logging.getLogger(__name__) - - -class GithubVersionType(StrEnum): - HEAD = "head" - TAG = "tag" - - -@alru_cache(ttl=timedelta(minutes=15).seconds) -async def get_github_token(host: str) -> str | None: - from agentstack_server.configuration import Configuration - - if not (conf := di[Configuration].github_registry.get(host)): - return None - if isinstance(conf, GithubPATConfiguration): - return conf.token.get_secret_value() - elif isinstance(conf, GithubAppConfiguration): - now = time.time() - payload = {"iat": int(now), "exp": int(now) + 600, "iss": conf.app_id} - encoded_jwt = jwt.encode({"alg": "RS256"}, payload, conf.private_key.get_secret_value()).decode("utf-8") - async with httpx.AsyncClient() as client: - resp = await client.post( - f"https://{host}/api/v3/app/installations/{conf.installation_id}/access_tokens", - headers={ - "Authorization": f"Bearer {encoded_jwt}", - "Accept": "application/vnd.github+json", - "X-GitHub-Api-Version": "2022-11-28", - }, - ) - return resp.raise_for_status().json()["token"] - - -class ResolvedGithubUrl(BaseModel): - host: str = "github.com" - org: str - repo: str - version: str - version_type: GithubVersionType - commit_hash: str - path: str | None = None - - @property - def _github_config(self) -> GithubPATConfiguration | GithubAppConfiguration | None: - from agentstack_server.configuration import Configuration - - configuration = di[Configuration] # not using inject due to a circular import - return configuration.github_registry.get(self.host) - - async def get_github_token(self) -> str | None: - return await get_github_token(self.host) - - @property - def base(self) -> str: - path = f"#path={self.path}" if self.path else "" - return f"git+https://{self.host}/{self.org}/{self.repo}{path}" - - async def get_raw_url(self, path: str | None = None) -> AnyUrl: - if not path and "." not in (self.path or ""): - raise ValueError("Path is not specified or it is not a file") - path = path or self.path - if not path: - raise ValueError("Path cannot be empty") - # For github.com, use raw.githubusercontent.com, for enterprise use API - if not self._github_config: - if self.host == "github.com": - return AnyUrl.build( - scheme="https", - host="raw.githubusercontent.com", - path=f"{self.org}/{self.repo}/{self.commit_hash}/{path.strip('/')}", - ) - raise ValueError(f"GitHub token not configured for host: {self.host}") - # For enterprise, we need to fetch the download_url from the API response - token = await get_github_token(self.host) - api_host = f"{self.host}/api/v3" - async with httpx.AsyncClient() as client: - headers = {"Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json"} - resp = await client.get( - (f"https://{api_host}/repos/{self.org}/{self.repo}/contents/{path.strip('/')}?ref={self.commit_hash}"), - headers=headers, - ) - resp.raise_for_status() - content_data = resp.json() - if "download_url" not in content_data: - raise ValueError(f"File {path} not found or is not a file") - return AnyUrl(content_data["download_url"]) - - def __str__(self): - path = f"#path={self.path}" if self.path else "" - return f"git+https://{self.host}/{self.org}/{self.repo}@{self.commit_hash}{path}" - - -class GithubUrl(RootModel): - root: str - - _org: str - _repo: str - _host: str = "github.com" - _version: str | None = None - _path: str | None = None - - @property - def host(self) -> str: - return self._host - - @property - def org(self) -> str: - return self._org - - @property - def repo(self) -> str: - return self._repo - - @property - def version(self) -> str | None: - return self._version - - @property - def path(self) -> str | None: - return self._path - - @path.setter - def path(self, value: str): - self._path = value - self.root = str(self) - - @model_validator(mode="wrap") - @classmethod - def _parse(cls, data: Any, handler: ModelWrapValidatorHandler): - url: GithubUrl = handler(data) - - # IMPORTANT: if you change this, make sure to update the CLI - # TODO - extract this class to SDK? - pattern = r""" - ^ - (?:git\+)? # Optional git+ prefix - https?://(?Pgithub(?:\.[^/]+)+)/ # GitHub host (github.com or github.enterprise.com) - (?P[^/]+)/ # Organization - (?P - (?: # Non-capturing group for repo name - (?!\.git(?:$|[@#])) # Negative lookahead for .git at end or followed by @# - [^/@#] # Any char except /@# - )+ # One or more of these chars - ) - (?:\.git)? # Optional .git suffix - (?:@(?P[^#]+))? # Optional version after @ - (?:\#path=(?P.+))? # Optional path after #path= - $ - """ - match = re.match(pattern, url.root, re.VERBOSE) - if not match: - raise ValueError(f"Invalid GitHub URL: {data}") - for name, value in match.groupdict().items(): - if value and not re.match(r"^[/a-zA-Z0-9._-]+$", value): - raise ValueError(f"Invalid {name}: {value}") - setattr(url, f"_{name}", value) - url._path = url.path.strip("/") if url.path else None - url.root = str(url) # normalize url - return url - - async def _resolve_version_public(self) -> ResolvedGithubUrl: - version = self._version or "HEAD" - try: - async with httpx.AsyncClient() as client: - if not (version := self._version): - manifest_url = f"https://github.com/{self.org}/{self.repo}/blob/-/dummy" - resp = await client.head(manifest_url) - if not resp.headers.get("location", None): - raise ValueError(f"{self.path} not found in github repository.") - if match := re.search("/blob/([^/]*)", resp.headers["location"]): - version = match.group(1) - - assert version - - resp = await client.get( - f"https://github.com/{self._org}/{self._repo}.git/info/refs?service=git-upload-pack" - ) - resp = resp.text.split("\n") - [version_line] = [line for line in resp if line.endswith(f"/{version}")] - [commit_hash, _ref_name] = version_line[4:].split() - version_type = GithubVersionType.HEAD if "/refs/heads" in _ref_name else GithubVersionType.TAG - return ResolvedGithubUrl( - host=self._host, - org=self._org, - repo=self._repo, - version=version, - commit_hash=commit_hash, - path=self._path, - version_type=version_type, - ) - except Exception as exc: - raise ValueError( - f"Failed to resolve github version, does the tag or branch {version} exist?: {exc!r}" - ) from exc - - async def _resolve_version_api(self, token: str) -> ResolvedGithubUrl: - version = self._version - api_host = f"{self._host}/api/v3" - - try: - async with httpx.AsyncClient() as client: - headers = {"Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json"} - - if not version: - # Get default branch - resp = await client.get(f"https://{api_host}/repos/{self._org}/{self._repo}", headers=headers) - resp.raise_for_status() - version = resp.json()["default_branch"] - - # Get commit hash for version - resp = await client.get( - f"https://{api_host}/repos/{self._org}/{self._repo}/commits/{version}", headers=headers - ) - resp.raise_for_status() - commit_data = resp.json() - commit_hash = commit_data["sha"] - - # Determine if it's a branch or tag - version_type = GithubVersionType.HEAD - try: - # Check if it's a tag - resp = await client.get( - f"https://{api_host}/repos/{self._org}/{self._repo}/git/refs/tags/{version}", headers=headers - ) - if resp.status_code == 200: - version_type = GithubVersionType.TAG - except Exception: - pass - - return ResolvedGithubUrl( - host=self._host, - org=self._org, - repo=self._repo, - version=version, - commit_hash=commit_hash, - path=self._path, - version_type=version_type, - ) - except Exception as exc: - raise ValueError(f"Failed to resolve github version for private repository: {exc!r}") from exc - - async def resolve_version(self) -> ResolvedGithubUrl: - if not (token := await get_github_token(self._host)): - if self._host == "github.com": - return await self._resolve_version_public() - raise ValueError(f"GitHub token not configured for host {self._host}") - return await self._resolve_version_api(token=token) - - def __str__(self): - version = f"@{self._version}" if self._version else "" - path = f"#path={self.path}" if self.path else "" - return f"git+https://{self._host}/{self.org}/{self.repo}{version}{path}" diff --git a/apps/agentstack-server/tasks.toml b/apps/agentstack-server/tasks.toml index f5fa9faaaa..47ce4c49ac 100644 --- a/apps/agentstack-server/tasks.toml +++ b/apps/agentstack-server/tasks.toml @@ -195,7 +195,6 @@ eval "cli_args=(${usage_cli_args})" export SKIP_PULL {{ mise_bin }} agentstack:start --vm-name="$VM_NAME" \ --no-wait-for-platform \ - --set externalRegistries=null \ "${cli_args[@]}" {{ mise_bin }} run agentstack-server:dev:connect --vm-name="$VM_NAME" """ @@ -317,45 +316,34 @@ NO_CLEAN="${usage_no_clean:-false}" if [ "$NO_CLEAN" != "true" ]; then {{ mise_bin }} run agentstack:stop-all {{ mise_bin }} run agentstack:delete --vm-name=${VM_NAME} || true - curl http://localhost:8333 >/dev/null 2>&1 && echo "Another instance at localhost:8333 is already running" && exit 2 + curl http://agentstack-api.localtest.me:8080 >/dev/null 2>&1 && echo "Another instance at agentstack-api.localtest.me:8080 is already running" && exit 2 fi CONFIG_FILE="/tmp/config_e2e_test_$(date +%s).yaml" echo ' -externalRegistries: null -ui: - enabled: false -auth: - enabled: true -docling: - enabled: true -connector: - presets: - - url: mcp+stdio://test - stdio: - image: mcp/aws-documentation - metadata: - name: Test MCP Server -keycloak: - auth: - seedAgentstackUsers: - - username: admin - password: admin - firstName: Admin - lastName: User - email: admin@beeai.dev - roles: ["agentstack-admin"] +agentstack: + ui: + enabled: false + docling: + enabled: true + connector: + presets: + - url: mcp+stdio://test + stdio: + image: mcp/aws-documentation + metadata: + name: Test MCP Server ' > "$CONFIG_FILE" -{{ mise_bin }} run agentstack:start -v --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set ui.enabled=false +{{ mise_bin }} run agentstack:start -v --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set agentstack:ui.enabled=false eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )" if [ -z "${TEST_AGENT_IMAGE:-}" ]; then echo "Building test agent..." - TEST_AGENT_IMAGE=agentstack-registry-svc.default:5001/chat-test:latest - {{ mise_bin }} run agentstack-cli:run -- client-side-build -v "${PWD}/../.." --vm-name=${VM_NAME} --dockerfile "${PWD}/../../agents/chat/Dockerfile" --tag ${TEST_AGENT_IMAGE} + TEST_AGENT_IMAGE=registry.cr-system.svc.cluster.local:5000/chat-test:latest + {{ mise_bin }} run agentstack-cli:run -- build -v "${PWD}/../.." --dockerfile "${PWD}/../../agents/chat/Dockerfile" --tag ${TEST_AGENT_IMAGE} --vm-name=${VM_NAME} fi export TEST_AGENT_IMAGE @@ -408,38 +396,27 @@ NO_CLEAN="${usage_no_clean:-false}" if [ "$NO_CLEAN" != "true" ]; then {{ mise_bin }} run agentstack:stop-all {{ mise_bin }} run agentstack:delete --vm-name=${VM_NAME} - curl http://localhost:8333 >/dev/null 2>&1 && echo "Another instance at localhost:8333 is already running" && exit 2 + curl http://agentstack-api.localtest.me:8080 >/dev/null 2>&1 && echo "Another instance at agentstack-api.localtest.me:8080 is already running" && exit 2 fi CONFIG_FILE="/tmp/config_e2e_test_$(date +%s).yaml" echo ' -externalRegistries: null -ui: - enabled: false -auth: - enabled: true -docling: - enabled: true -connector: - presets: - - url: mcp+stdio://test - stdio: - image: mcp/aws-documentation - metadata: - name: Test MCP Server -keycloak: - auth: - seedAgentstackUsers: - - username: admin - password: admin - firstName: Admin - lastName: User - email: admin@beeai.dev - roles: ["agentstack-admin"] +agentstack: + ui: + enabled: false + docling: + enabled: true + connector: + presets: + - url: mcp+stdio://test + stdio: + image: mcp/aws-documentation + metadata: + name: Test MCP Server ' > "$CONFIG_FILE" -{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set ui.enabled=false +{{ mise_bin }} run agentstack:start --vm-name=${VM_NAME} --skip-login -f "$CONFIG_FILE" --set agentstack:ui.enabled=false eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )" @@ -489,9 +466,8 @@ VM_NAME=integration-test-run {{ mise_bin }} run agentstack:start \ --vm-name="$VM_NAME" \ - --set externalRegistries=null \ - --set redis.enabled=true \ - --set ui.enabled=false + --set agentstack:redis.enabled=true \ + --set agentstack:ui.enabled=false eval "$( {{ mise_bin }} run agentstack:shell --vm-name="$VM_NAME" )" diff --git a/apps/agentstack-server/tests/conftest.py b/apps/agentstack-server/tests/conftest.py index 21b861c91c..6128c86750 100644 --- a/apps/agentstack-server/tests/conftest.py +++ b/apps/agentstack-server/tests/conftest.py @@ -33,9 +33,9 @@ class Configuration(BaseSettings): llm_model: str = "ollama:gpt-oss:20b" embedding_model: str = "ollama:nomic-embed-text:latest" llm_api_key: Secret[str] = Secret("dummy") - test_agent_image: str = "agentstack-registry-svc.default:5001/chat-test:latest" + test_agent_image: str = "registry.cr-system.svc.cluster.local:5000/chat-test:latest" test_agent_build_repo: str = "https://github.com/i-am-bee/agentstack-starter" - server_url: str = "http://localhost:8333" + server_url: str = "http://agentstack-api.localtest.me:8080" db_url: str = "postgresql+asyncpg://agentstack-user:password@postgresql:5432/agentstack" keycloak_url: str = "http://localhost:8336" diff --git a/apps/agentstack-server/tests/e2e/agents/test_agent_builds.py b/apps/agentstack-server/tests/e2e/agents/test_agent_builds.py deleted file mode 100644 index 3034def427..0000000000 --- a/apps/agentstack-server/tests/e2e/agents/test_agent_builds.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 -from __future__ import annotations - -import asyncio -import json -import logging - -import pytest -from a2a.client.helpers import create_text_message_object -from a2a.types import ( - TaskState, -) -from agentstack_sdk.platform import AddProvider, BuildState, Provider, ProviderBuild -from agentstack_sdk.platform.context import Context - -pytestmark = pytest.mark.e2e - -logger = logging.getLogger(__name__) - - -@pytest.mark.usefixtures("clean_up", "setup_platform_client") -async def test_remote_agent_build_and_start( - subtests, - a2a_client_factory, - get_final_task_from_stream, - test_configuration, -): - with subtests.test("preview build"): - build = await ProviderBuild.preview(location=test_configuration.test_agent_build_repo) - assert build.destination - with subtests.test("build example agent"): - build = await ProviderBuild.create(location=test_configuration.test_agent_build_repo, on_complete=AddProvider()) - async for message in build.stream_logs(): - logger.debug(json.dumps(message)) - - for _ in range(10): - build = await build.get() - if build.status != BuildState.IN_PROGRESS: - break - await asyncio.sleep(0.5) - - assert build.status == BuildState.COMPLETED, f"Fail: {build.error_message}" - with subtests.test("run example agent"): - providers = await Provider.list() - assert len(providers) == 1 - provider = providers[0] - assert provider.source == build.destination - assert provider.id == build.provider_id - assert provider.agent_card - assert test_configuration.test_agent_build_repo in provider.origin - - context = await Context.create() - context_token = await context.generate_token(providers={provider.id}) - - async with a2a_client_factory(provider.agent_card, context_token) as a2a_client: - message = create_text_message_object(content="test of sirens") - task = await get_final_task_from_stream(a2a_client.send_message(message)) - - # Verify response - assert task.status.state == TaskState.completed, f"Fail: {task.status.message.parts[0].root.text}" - assert "test of sirens" in task.history[-1].parts[0].root.text diff --git a/apps/agentstack-server/tests/e2e/agents/test_platform_extensions.py b/apps/agentstack-server/tests/e2e/agents/test_platform_extensions.py index fbe2ec6098..d3c71e6cb3 100644 --- a/apps/agentstack-server/tests/e2e/agents/test_platform_extensions.py +++ b/apps/agentstack-server/tests/e2e/agents/test_platform_extensions.py @@ -2,14 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -import os from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Callable from contextlib import asynccontextmanager from typing import Annotated from uuid import uuid4 import pytest -from a2a.client import Client, create_text_message_object +from a2a.client import Client from a2a.types import FilePart, Message, Role, TaskState from agentstack_sdk.a2a.extensions.services.platform import ( PlatformApiExtensionClient, @@ -104,13 +103,10 @@ async def test_platform_api_extension(file_reader_writer_factory, permissions, s assert file.text == context.id -SELF_REGISTRATION_TEST_VAR_NAME = "_SELF_REGISTRATION_TEST_VAR" - - @pytest.fixture async def self_registration_agent(create_server_with_agent) -> AsyncGenerator[tuple[Server, Client]]: async def self_registration_agent() -> AsyncIterator[RunYield]: - yield os.environ.get(SELF_REGISTRATION_TEST_VAR_NAME, "empty") + yield "hello" context = await Context.create() token = await context.generate_token(grant_global_permissions=Permissions(a2a_proxy={"*"})) @@ -119,13 +115,8 @@ async def self_registration_agent() -> AsyncIterator[RunYield]: @pytest.mark.usefixtures("clean_up", "setup_platform_client") -async def test_self_registration_with_variables( - self_registration_agent, get_final_task_from_stream, subtests, test_configuration -): - os.environ.pop(SELF_REGISTRATION_TEST_VAR_NAME, None) +async def test_self_registration(self_registration_agent, subtests): _, client = self_registration_agent - task = await get_final_task_from_stream(client.send_message(create_text_message_object(content="hi"))) - assert task.history[-1].parts[0].root.text == "empty" with subtests.test("register provider"): async for attempt in AsyncRetrying(stop=stop_after_delay(6), wait=wait_fixed(0.5), reraise=True): @@ -137,18 +128,3 @@ async def test_self_registration_with_variables( assert provider.state == "online" assert "self_registration_agent" in provider.source - with subtests.test("update provider variables"): - await provider.update_variables(variables={SELF_REGISTRATION_TEST_VAR_NAME: "test"}) - - async for attempt in AsyncRetrying(stop=stop_after_delay(6), wait=wait_fixed(0.5), reraise=True): - with attempt: - task = await get_final_task_from_stream(client.send_message(create_text_message_object(content="hi"))) - assert task.history[-1].parts[0].root.text == "test" - - with subtests.test("remove provider variable"): - await provider.update_variables(variables={SELF_REGISTRATION_TEST_VAR_NAME: None}) - - async for attempt in AsyncRetrying(stop=stop_after_delay(6), wait=wait_fixed(0.5), reraise=True): - with attempt: - task = await get_final_task_from_stream(client.send_message(create_text_message_object(content="hi"))) - assert task.history[-1].parts[0].root.text == "empty" diff --git a/apps/agentstack-server/tests/e2e/routes/test_a2a_proxy.py b/apps/agentstack-server/tests/e2e/routes/test_a2a_proxy.py index 4cbcc57457..5c1f75263b 100644 --- a/apps/agentstack-server/tests/e2e/routes/test_a2a_proxy.py +++ b/apps/agentstack-server/tests/e2e/routes/test_a2a_proxy.py @@ -1235,7 +1235,7 @@ async def test_invalid_request_raises_a2a_error(client: Client, handler: mock.As # set provider as offline provider_id = str(client.base_url).rstrip("/").split("/")[-1] await db_transaction.execute( - text("UPDATE providers SET unmanaged_state = 'offline' WHERE id = :provider_id"), + text("UPDATE providers SET state = 'offline' WHERE id = :provider_id"), {"provider_id": provider_id}, ) await db_transaction.commit() diff --git a/apps/agentstack-server/tests/e2e/routes/test_provider_variables.py b/apps/agentstack-server/tests/e2e/routes/test_provider_variables.py deleted file mode 100644 index a1fea923d4..0000000000 --- a/apps/agentstack-server/tests/e2e/routes/test_provider_variables.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest -from a2a.types import AgentCapabilities, AgentCard -from agentstack_sdk.platform import Provider - -from tests.conftest import Configuration - -pytestmark = pytest.mark.e2e - - -@pytest.mark.usefixtures("clean_up", "setup_platform_client") -async def test_provider_variables(subtests, test_configuration: Configuration): - """Test provider environment variables operations.""" - - # First create a real test provider - provider = await Provider.create( - location=test_configuration.test_agent_image, - agent_card=AgentCard( - capabilities=AgentCapabilities(), - default_input_modes=[], - default_output_modes=[], - name="test_agent", - skills=[], - description="test agent", - url="http://localhost:8000", - version="0.0.1", - ), - ) - provider_id = provider.id - - test_variables = {"TEST_VAR_1": "value1", "TEST_VAR_2": "value2", "API_KEY": "secret-key-123"} - - with subtests.test("update provider variables"): - await provider.update_variables(variables=test_variables) - - with subtests.test("list provider variables"): - variables = await provider.list_variables() - - assert len(variables) == len(test_variables) - for key, value in test_variables.items(): - assert variables[key] == value - - with subtests.test("update specific provider variables"): - partial_update = {"TEST_VAR_1": "updated_value1", "NEW_VAR": "new_value"} - - await provider.update_variables(variables=partial_update) - - # Check the updated variables - variables = await provider.list_variables() - - # Should have the updated and new values - assert variables["TEST_VAR_1"] == "updated_value1" - assert variables["NEW_VAR"] == "new_value" - # Previous variables should still be there - assert variables["TEST_VAR_2"] == "value2" - assert variables["API_KEY"] == "secret-key-123" - assert len(variables) == 4 - - with subtests.test("remove provider variables"): - # Remove a variable by setting it to None - remove_update = {"TEST_VAR_2": None, "API_KEY": None} - - await provider.update_variables(variables=remove_update) - - variables = await provider.list_variables() - - assert len(variables) == 2 - assert "TEST_VAR_1" in variables - assert "NEW_VAR" in variables - assert "TEST_VAR_2" not in variables - assert "API_KEY" not in variables - - with subtests.test("empty variables list"): - # Remove all remaining variables - clear_update = {"TEST_VAR_1": None, "NEW_VAR": None} - - await provider.update_variables(variables=clear_update) - - # Should have no variables - variables = await Provider.list_variables(provider_id) - assert len(variables) == 0 diff --git a/apps/agentstack-server/tests/e2e/routes/test_providers.py b/apps/agentstack-server/tests/e2e/routes/test_providers.py index 282acd1ebb..87ec6a6a87 100644 --- a/apps/agentstack-server/tests/e2e/routes/test_providers.py +++ b/apps/agentstack-server/tests/e2e/routes/test_providers.py @@ -17,9 +17,7 @@ @pytest.mark.usefixtures("clean_up", "setup_platform_client") async def test_provider_crud(subtests, test_configuration): with subtests.test("add provider"): - variables = {"test": "var"} - provider = await Provider.create(location=test_configuration.test_agent_image, variables=variables) - assert await provider.list_variables() == variables + provider = await Provider.create(location=test_configuration.test_agent_image) with subtests.test("patch provider"): new_source = test_configuration.test_agent_image @@ -36,15 +34,6 @@ async def test_provider_crud(subtests, test_configuration): provider = await provider.patch(location=new_source, agent_card=new_agent_card) assert provider.agent_card.name == new_agent_card.name assert provider.source == new_source - assert await provider.list_variables() == variables # variables haven't changed - - with subtests.test("change provider variables"): - new_variables = {"other": "var"} - provider = await provider.patch(variables=new_variables) - assert await provider.list_variables() == new_variables - - provider = await provider.patch(variables={}) - assert await provider.list_variables() == {} with subtests.test("test user_owned filtering"): # Test user_owned=True (should see exactly 1 provider - admin's) diff --git a/apps/agentstack-server/tests/integration/persistence/repositories/test_env.py b/apps/agentstack-server/tests/integration/persistence/repositories/test_env.py index 59c575f9c4..aff4826059 100644 --- a/apps/agentstack-server/tests/integration/persistence/repositories/test_env.py +++ b/apps/agentstack-server/tests/integration/persistence/repositories/test_env.py @@ -36,23 +36,21 @@ async def provider_id(db_transaction: AsyncConnection, normal_user: UUID) -> UUI await db_transaction.execute( text( """ - INSERT INTO providers (id, type, source, origin, version_info, auto_stop_timeout_sec, created_at, updated_at, last_active_at, agent_card, created_by, unmanaged_state) - VALUES (:id, :type, :source, :origin, :version_info, :timeout, :created_at, :updated_at, :last_active_at, :agent_card, :created_by, :unmanaged_state) + INSERT INTO providers (id, source, source_type, origin, created_at, updated_at, last_active_at, agent_card, created_by, state) + VALUES (:id, :source, :source_type, :origin, :created_at, :updated_at, :last_active_at, :agent_card, :created_by, :state) """ ), { "id": provider_id, - "type": "unmanaged", "source": "test://provider", + "source_type": "api", "origin": "test", - "version_info": "{}", - "timeout": 3600, "created_at": utc_now(), "updated_at": utc_now(), "last_active_at": utc_now(), "agent_card": "{}", "created_by": normal_user, - "unmanaged_state": None, + "state": None, }, ) return provider_id @@ -331,23 +329,21 @@ async def test_get_all_multiple_entities( await db_transaction.execute( text( """ - INSERT INTO providers (id, type, source, origin, version_info, auto_stop_timeout_sec, created_at, updated_at, last_active_at, agent_card, created_by, unmanaged_state) - VALUES (:id, :type, :source, :origin, :version_info, :timeout, :created_at, :updated_at, :last_active_at, :agent_card, :created_by, :unmanaged_state) + INSERT INTO providers (id, source, source_type, origin, created_at, updated_at, last_active_at, agent_card, created_by, state) + VALUES (:id, :source, :source_type, :origin, :created_at, :updated_at, :last_active_at, :agent_card, :created_by, :state) """ ), { "id": provider_id, - "type": "unmanaged", "source": f"test://provider{i}", + "source_type": "api", "origin": "test", - "version_info": "{}", - "timeout": 3600, "created_at": utc_now(), "updated_at": utc_now(), "last_active_at": utc_now(), "agent_card": "{}", "created_by": normal_user, - "unmanaged_state": None, + "state": None, }, ) @@ -436,23 +432,21 @@ async def test_variable_isolation_between_entity_types( await db_transaction.execute( text( """ - INSERT INTO providers (id, type, source, origin, version_info, auto_stop_timeout_sec, created_at, updated_at, last_active_at, agent_card, created_by, unmanaged_state) - VALUES (:id, :type, :source, :origin, :version_info, :timeout, :created_at, :updated_at, :last_active_at, :agent_card, :created_by, :unmanaged_state) + INSERT INTO providers (id, source, source_type, origin, created_at, updated_at, last_active_at, agent_card, created_by, state) + VALUES (:id, :source, :source_type, :origin, :created_at, :updated_at, :last_active_at, :agent_card, :created_by, :state) """ ), { "id": entity_id, - "type": "unmanaged", "source": "test://provider", + "source_type": "api", "origin": "test", - "version_info": "{}", - "timeout": 3600, "created_at": utc_now(), "updated_at": utc_now(), "last_active_at": utc_now(), "agent_card": "{}", "created_by": normal_user, - "unmanaged_state": None, + "state": None, }, ) diff --git a/apps/agentstack-server/tests/integration/persistence/repositories/test_provider.py b/apps/agentstack-server/tests/integration/persistence/repositories/test_provider.py index 11ebdf417c..1411f0bd75 100644 --- a/apps/agentstack-server/tests/integration/persistence/repositories/test_provider.py +++ b/apps/agentstack-server/tests/integration/persistence/repositories/test_provider.py @@ -5,7 +5,6 @@ import json import uuid -from datetime import timedelta import pytest from a2a.types import AgentCapabilities, AgentCard @@ -35,7 +34,6 @@ async def test_provider(set_di_configuration, normal_user: UUID) -> Provider: return Provider( source=source, origin=source.origin, - registry=None, agent_card=AgentCard( name="Hello World Agent", description="Just a hello world agent", @@ -46,7 +44,6 @@ async def test_provider(set_di_configuration, normal_user: UUID) -> Provider: capabilities=AgentCapabilities(), skills=[], ), - auto_stop_timeout=timedelta(minutes=5), created_by=normal_user, ) @@ -64,9 +61,7 @@ async def test_create_provider(db_transaction: AsyncConnection, test_provider: P assert row is not None assert str(row.id) == str(test_provider.id) assert row.source == str(test_provider.source.root) - assert row.registry == (str(test_provider.registry.root) if test_provider.registry else None) - assert row.auto_stop_timeout_sec == int(test_provider.auto_stop_timeout.total_seconds()) - assert row.type == test_provider.type + assert row.source_type == test_provider.source_type @pytest.mark.usefixtures("set_di_configuration") @@ -78,7 +73,7 @@ async def test_get_provider(db_transaction: AsyncConnection, test_provider, norm provider_data = { "id": source.provider_id, "source": str(source.root), - "registry": None, + "source_type": "api", "created_at": utc_now(), "last_active_at": utc_now(), "agent_card": { @@ -92,24 +87,20 @@ async def test_get_provider(db_transaction: AsyncConnection, test_provider, norm "url": "http://localhost:8000/", "version": "1.0.0", }, - "auto_stop_timeout_sec": 300, # 5 minutes - "type": "unmanaged", - "version_info": {"docker": None, "github": None}, - "unmanaged_state": None, + "state": None, "created_by": normal_user, } await db_transaction.execute( text( - "INSERT INTO providers (id, type, source, origin, version_info, registry, auto_stop_timeout_sec, agent_card, created_at, updated_at, last_active_at, created_by, unmanaged_state) " - "VALUES (:id, :type, :source, :origin, :version_info, :registry, :auto_stop_timeout_sec, :agent_card, :created_at, :updated_at, :last_active_at, :created_by, :unmanaged_state)" + "INSERT INTO providers (id, source, source_type, origin, agent_card, created_at, updated_at, last_active_at, created_by, state) " + "VALUES (:id, :source, :source_type, :origin, :agent_card, :created_at, :updated_at, :last_active_at, :created_by, :state)" ), { **provider_data, "origin": source.origin, "updated_at": utc_now(), "agent_card": json.dumps(provider_data["agent_card"]), - "version_info": json.dumps(provider_data["version_info"]), }, ) # Get provider @@ -118,9 +109,8 @@ async def test_get_provider(db_transaction: AsyncConnection, test_provider, norm # Verify provider assert provider.id == provider_data["id"] assert str(provider.source.root) == provider_data["source"] - assert provider.registry is None - assert provider.auto_stop_timeout == timedelta(seconds=provider_data["auto_stop_timeout_sec"]) - assert provider.type == provider_data["type"] + assert provider.source_type == provider_data["source_type"] + assert provider.state is None async def test_get_provider_not_found(db_transaction: AsyncConnection): @@ -162,7 +152,7 @@ async def test_list_providers(db_transaction: AsyncConnection, normal_user: UUID first_provider = { "id": source.provider_id, "source": str(source.root), - "registry": None, + "source_type": "api", "created_at": utc_now(), "last_active_at": utc_now(), "agent_card": { @@ -176,16 +166,13 @@ async def test_list_providers(db_transaction: AsyncConnection, normal_user: UUID "url": "http://localhost:8001/", "version": "1.0.0", }, - "auto_stop_timeout_sec": 300, - "type": "unmanaged", - "version_info": {"docker": None, "github": None}, - "unmanaged_state": None, + "state": None, "created_by": normal_user, } second_provider = { "id": source2.provider_id, "source": str(source2.root), - "registry": None, + "source_type": "api", "created_at": utc_now(), "last_active_at": utc_now(), "agent_card": { @@ -199,17 +186,14 @@ async def test_list_providers(db_transaction: AsyncConnection, normal_user: UUID "url": "http://localhost:8002/", "version": "1.0.0", }, - "auto_stop_timeout_sec": 600, - "type": "unmanaged", - "version_info": {"docker": None, "github": None}, - "unmanaged_state": None, + "state": None, "created_by": normal_user, } await db_transaction.execute( text( - "INSERT INTO providers (id, type, source, origin, version_info, registry, agent_card, created_at, updated_at, last_active_at, auto_stop_timeout_sec, created_by, unmanaged_state) " - "VALUES (:id, :type, :source, :origin, :version_info, :registry, :agent_card, :created_at, :updated_at, :last_active_at, :auto_stop_timeout_sec, :created_by, :unmanaged_state)" + "INSERT INTO providers (id, source, source_type, origin, agent_card, created_at, updated_at, last_active_at, created_by, state) " + "VALUES (:id, :source, :source_type, :origin, :agent_card, :created_at, :updated_at, :last_active_at, :created_by, :state)" ), [ { @@ -217,14 +201,12 @@ async def test_list_providers(db_transaction: AsyncConnection, normal_user: UUID "origin": source.origin, "updated_at": utc_now(), "agent_card": json.dumps(first_provider["agent_card"]), - "version_info": json.dumps(first_provider["version_info"]), }, { **second_provider, "origin": source2.origin, "updated_at": utc_now(), "agent_card": json.dumps(second_provider["agent_card"]), - "version_info": json.dumps(second_provider["version_info"]), }, ], ) @@ -235,16 +217,10 @@ async def test_list_providers(db_transaction: AsyncConnection, normal_user: UUID # Verify providers assert len(providers) == 2 assert str(providers[first_provider["id"]].source.root) == first_provider["source"] - assert providers[first_provider["id"]].auto_stop_timeout == timedelta( - seconds=first_provider["auto_stop_timeout_sec"] - ) - assert providers[first_provider["id"]].type == first_provider["type"] + assert providers[first_provider["id"]].source_type == first_provider["source_type"] assert str(providers[second_provider["id"]].source.root) == second_provider["source"] - assert providers[second_provider["id"]].auto_stop_timeout == timedelta( - seconds=second_provider["auto_stop_timeout_sec"] - ) - assert providers[second_provider["id"]].type == second_provider["type"] + assert providers[second_provider["id"]].source_type == second_provider["source_type"] async def test_create_duplicate_provider(db_transaction: AsyncConnection, test_provider: Provider, normal_user: UUID): @@ -259,9 +235,7 @@ async def test_create_duplicate_provider(db_transaction: AsyncConnection, test_p duplicate_provider = Provider( source=duplicate_source, origin=duplicate_source.origin, - registry=None, agent_card=test_provider.agent_card.model_copy(update={"name": "NEW_AGENT"}), - auto_stop_timeout=timedelta(minutes=10), # Different timeout created_by=normal_user, ) diff --git a/apps/agentstack-server/tests/integration/utils/test_docker.py b/apps/agentstack-server/tests/integration/utils/test_docker.py deleted file mode 100644 index e6f7709c25..0000000000 --- a/apps/agentstack-server/tests/integration/utils/test_docker.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest -from kink import di -from kink.errors import ServiceError - -from agentstack_server.configuration import Configuration -from agentstack_server.utils.docker import DockerImageID - -pytestmark = pytest.mark.integration - - -@pytest.fixture -def configuration(): - from contextlib import suppress - - orig_conf = None - with suppress(ServiceError): - orig_conf = di[Configuration] - di[Configuration] = Configuration() - yield - if orig_conf: - di[Configuration] = orig_conf - - -@pytest.mark.parametrize( - "image", - [ - DockerImageID(root="ghcr.io/i-am-bee/agentstack/agents/chat:0.4.0-rc1"), - DockerImageID(root="redis:latest"), - DockerImageID(root="icr.io/ibm-messaging/mq:latest"), - DockerImageID(root="registry.goharbor.io/nightly/goharbor/harbor-log:v1.10.0"), - ], -) -async def test_get_image_labels(image, configuration): - resolved_image = await image.resolve_version() - assert resolved_image.digest - await resolved_image.get_labels() diff --git a/apps/agentstack-server/tests/integration/utils/test_github.py b/apps/agentstack-server/tests/integration/utils/test_github.py deleted file mode 100644 index 3633706a12..0000000000 --- a/apps/agentstack-server/tests/integration/utils/test_github.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import os - -import httpx -import pytest -from kink import di -from kink.errors import ServiceError - -from agentstack_server.configuration import Configuration -from agentstack_server.utils.github import GithubUrl - -pytestmark = pytest.mark.integration - - -@pytest.fixture -def configuration(): - from contextlib import suppress - - orig_conf = None - with suppress(ServiceError): - orig_conf = di[Configuration] - di[Configuration] = Configuration() - yield di[Configuration] - if orig_conf: - di[Configuration] = orig_conf - - -@pytest.mark.skipif( - condition=os.getenv("GITHUB_REGISTRY__GITHUB.IBM.COM__TOKEN", None) is None, - reason="Skip if PAT token not provided", -) -async def test_github_private_resolve(configuration: Configuration): - assert configuration.github_registry.get("github.ibm.com") - resolved_url = await GithubUrl("https://github.ibm.com/Incubation/bee-api").resolve_version() - assert resolved_url.version == "main" - assert resolved_url.commit_hash - async with httpx.AsyncClient() as client: - response = await client.get(str(await resolved_url.get_raw_url("README.md"))) - assert response.text - - -async def test_github_public_resolve(configuration: Configuration): - resolved_url = await GithubUrl("https://github.com/i-am-bee/agentstack").resolve_version() - assert resolved_url.version == "main" - assert resolved_url.commit_hash - async with httpx.AsyncClient() as client: - response = await client.get(str(await resolved_url.get_raw_url("README.md"))) - assert response.text diff --git a/apps/agentstack-server/tests/unit/domain/models/test_permissions.py b/apps/agentstack-server/tests/unit/domain/models/test_permissions.py index 03c5ce3617..f589fd7f32 100644 --- a/apps/agentstack-server/tests/unit/domain/models/test_permissions.py +++ b/apps/agentstack-server/tests/unit/domain/models/test_permissions.py @@ -61,7 +61,6 @@ embeddings={"*"}, a2a_proxy={"*"}, providers={"read", "write"}, - provider_variables={"read", "write"}, contexts={"read", "write"}, ), Permissions( @@ -69,7 +68,6 @@ feedback={"write"}, vector_stores={"write"}, providers={"read"}, - provider_variables={"write"}, contexts={"read"}, ), True, @@ -107,7 +105,6 @@ def test_admin_all_permissions_check(): vector_stores={"read", "write"}, feedback={"write"}, providers={"read", "write"}, - provider_variables={"read", "write"}, contexts={"read", "write"}, a2a_proxy={"*"}, ) @@ -139,12 +136,11 @@ def test_admin_all_permissions_empty_required(): (Permissions(llm={"*"}), Permissions(llm={"*"}, embeddings={"*"}), {"llm": {"*"}, "embeddings": {"*"}}), # Multiple permission types ( - Permissions(files={"read"}, provider_variables={"read"}, contexts={"write"}), - Permissions(files={"write"}, provider_variables={"write"}, providers={"read"}), + Permissions(files={"read"}, contexts={"write"}), + Permissions(files={"write"}, providers={"read"}), { "files": {"read", "write"}, "providers": {"read"}, - "provider_variables": {"read", "write"}, "contexts": {"write"}, }, ), diff --git a/apps/agentstack-server/tests/unit/utils/test_docker.py b/apps/agentstack-server/tests/unit/utils/test_docker.py deleted file mode 100644 index 09ee41bc7b..0000000000 --- a/apps/agentstack-server/tests/unit/utils/test_docker.py +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest - -from agentstack_server.utils.docker import DockerImageID -from agentstack_server.utils.utils import filter_dict - -pytestmark = pytest.mark.unit - - -@pytest.mark.parametrize( - "image_id,expected", - [ - ("ubuntu:20.04", {"repository": "library/ubuntu", "tag": "20.04"}), - ("library/ubuntu:latest", {"repository": "library/ubuntu"}), - ("docker.io/library/ubuntu:latest", {"repository": "library/ubuntu"}), - ( - "registry.example.com/image:1.0", - {"registry": "registry.example.com", "repository": "image", "tag": "1.0"}, - ), - ( - "registry.example.com/project/image:1.0", - {"registry": "registry.example.com", "repository": "project/image", "tag": "1.0"}, - ), - ( - "registry.example.com/project/image", - {"registry": "registry.example.com", "repository": "project/image"}, - ), - ("user/repo:tag", {"repository": "user/repo", "tag": "tag"}), - ( - "agentstack-registry-svc.default:5001/i-am-bee/agentstack-starter/agent:036bb38e871dbf0d92d049b0237ae1bcac14e136", - { - "registry": "agentstack-registry-svc.default:5001", - "repository": "i-am-bee/agentstack-starter/agent", - "tag": "036bb38e871dbf0d92d049b0237ae1bcac14e136", - }, - ), - ( - "custom.registry/team/product/component:v1.2.3", - {"registry": "custom.registry", "repository": "team/product/component", "tag": "v1.2.3"}, - ), - ], -) -def test_parses_docker_url(image_id, expected): - image_id = DockerImageID(image_id) - expected = {"registry": "docker.io", "tag": "latest", **expected} - assert ( - filter_dict({"registry": image_id.registry, "repository": image_id.repository, "tag": image_id.tag}) == expected - ) diff --git a/apps/agentstack-server/tests/unit/utils/test_github.py b/apps/agentstack-server/tests/unit/utils/test_github.py deleted file mode 100644 index 1cf5ac507c..0000000000 --- a/apps/agentstack-server/tests/unit/utils/test_github.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2025 © BeeAI a Series of LF Projects, LLC -# SPDX-License-Identifier: Apache-2.0 - -from __future__ import annotations - -import pytest -from kink import di -from kink.errors import ServiceError - -from agentstack_server.configuration import Configuration -from agentstack_server.utils.github import GithubUrl -from agentstack_server.utils.utils import filter_dict - -pytestmark = pytest.mark.unit - - -@pytest.fixture -def configuration(): - from contextlib import suppress - - orig_conf = None - with suppress(ServiceError): - orig_conf = di[Configuration] - di[Configuration] = Configuration() - yield - if orig_conf: - di[Configuration] = orig_conf - - -@pytest.mark.parametrize( - "url,expected", - [ - ("http://github.com/myorg/myrepo", {"org": "myorg", "repo": "myrepo"}), - ("git+https://github.com/myorg/myrepo", {"org": "myorg", "repo": "myrepo"}), - ("git+https://github.com/myorg/myrepo.git", {"org": "myorg", "repo": "myrepo"}), - ("https://github.com/myorg/myrepo.git", {"org": "myorg", "repo": "myrepo"}), - ("https://github.com/myorg/myrepo", {"org": "myorg", "repo": "myrepo"}), - ("https://github.com/myorg/myrepo#path=/a/b.txt", {"org": "myorg", "repo": "myrepo", "path": "a/b.txt"}), - ("https://github.ibm.com/myorg/myrepo#path=/a/b.txt", {"org": "myorg", "repo": "myrepo", "path": "a/b.txt"}), - ("https://github.com/myorg/myrepo@1.0.0", {"org": "myorg", "repo": "myrepo", "version": "1.0.0"}), - ("https://github.com/myorg/myrepo.git@1.0.0", {"org": "myorg", "repo": "myrepo", "version": "1.0.0"}), - ( - "https://github.com/myorg/myrepo@feature/branch-name", - {"org": "myorg", "repo": "myrepo", "version": "feature/branch-name"}, - ), - ( - "https://github.com/myorg/myrepo.git@1.0.0#path=/a/b.txt", - {"org": "myorg", "repo": "myrepo", "version": "1.0.0", "path": "a/b.txt"}, - ), - ("https://github.com/org.dot/repo.dot.git", {"org": "org.dot", "repo": "repo.dot"}), - ], -) -def test_parses_github_url(url, expected, configuration): - url = GithubUrl(url) - assert filter_dict({"org": url.org, "repo": url.repo, "version": url.version, "path": url.path}) == expected - - -@pytest.mark.parametrize( - "url", - [ - "", # Empty string - "http://github.com", # Missing org and repo - "git+invalid://github.com/org/repo", # Invalid protocol - "https://github.com/org", # Missing repo - "https://gitlab.com/org/repo", # Different domain - "https://github.com /org/repo", # extra space - "https://github.com/org /repo", # extra space - "https://github.com/org/repo#path=;DROP TABLE", # extra path - "git@github.com:org/repo.git", # SSH format (not supported) - ], -) -def test_invalid_urls(url): - """Test that invalid URLs raise ValueError.""" - with pytest.raises(ValueError): - GithubUrl(url) diff --git a/apps/agentstack-ui/src/modules/agents/api/queries/useListAgents.ts b/apps/agentstack-ui/src/modules/agents/api/queries/useListAgents.ts index 3c0798c3b7..4f10c84f02 100644 --- a/apps/agentstack-ui/src/modules/agents/api/queries/useListAgents.ts +++ b/apps/agentstack-ui/src/modules/agents/api/queries/useListAgents.ts @@ -4,12 +4,7 @@ */ import { useQuery } from '@tanstack/react-query'; -import { - type ListProvidersRequest, - type ListProvidersResponse, - ProviderStatus, - ProviderUnmanagedStatus, -} from 'agentstack-sdk'; +import { type ListProvidersRequest, type ListProvidersResponse, ProviderState } from 'agentstack-sdk'; import { buildAgent, isAgentUiSupported, sortAgentsByName, sortProvidersBy } from '#modules/agents/utils.ts'; import { listProviders } from '#modules/providers/api/index.ts'; @@ -37,7 +32,7 @@ export function useListAgents({ includeUnsupportedUi, includeOffline, orderBy, i if (!includeOffline) { items = items.filter( - ({ state }) => state !== ProviderUnmanagedStatus.Offline && state !== ProviderStatus.Error, + ({ state }) => state !== ProviderState.Offline, ); } diff --git a/apps/agentstack-ui/src/modules/agents/components/import/ImportAgentsModal.tsx b/apps/agentstack-ui/src/modules/agents/components/import/ImportAgentsModal.tsx index ecb5b7f464..93b1bc2737 100644 --- a/apps/agentstack-ui/src/modules/agents/components/import/ImportAgentsModal.tsx +++ b/apps/agentstack-ui/src/modules/agents/components/import/ImportAgentsModal.tsx @@ -3,31 +3,17 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { - Button, - InlineLoading, - InlineNotification, - ModalBody, - ModalFooter, - ModalHeader, - RadioButton, - RadioButtonGroup, - Select, - SelectItem, - TextInput, -} from '@carbon/react'; +import { Button, InlineLoading, InlineNotification, ModalBody, ModalFooter, ModalHeader, TextInput } from '@carbon/react'; import clsx from 'clsx'; -import { useEffect, useId } from 'react'; -import { useController, useForm } from 'react-hook-form'; +import { useId } from 'react'; +import { useForm } from 'react-hook-form'; -import { CodeSnippet } from '#components/CodeSnippet/CodeSnippet.tsx'; import { Modal } from '#components/Modal/Modal.tsx'; import { useApp } from '#contexts/App/index.ts'; import type { ModalProps } from '#contexts/Modal/modal-context.ts'; import { useImportAgent } from '#modules/agents/hooks/useImportAgent.ts'; import type { ImportAgentFormValues } from '#modules/agents/types.ts'; import { ProviderSource } from '#modules/providers/types.ts'; -import { isValidUrl } from '#utils/url.ts'; import classes from './ImportAgentsModal.module.scss'; @@ -35,40 +21,26 @@ export function ImportAgentsModal({ onRequestClose, ...modalProps }: ModalProps) const id = useId(); const { - config: { appName, featureFlags }, + config: { appName }, } = useApp(); - const { agent, logs, actionRequired, providersToUpdate, isPending, error, importAgent } = useImportAgent(); + const { agent, isPending, error, importAgent } = useImportAgent(); const { register, handleSubmit, - resetField, formState: { isValid, errors }, - control, } = useForm({ mode: 'onTouched', defaultValues: { - source: featureFlags.ProviderBuilds ? ProviderSource.GitHub : ProviderSource.Docker, + source: ProviderSource.Docker, }, }); - const { field: sourceField } = useController({ name: 'source', control }); - const { field: actionField } = useController({ name: 'action', control }); - - const showLogs = isPending && logs.length > 0; - const onSubmit = async (values: ImportAgentFormValues) => { await importAgent(values); - - resetField('action'); - resetField('providerId'); }; - useEffect(() => { - resetField('location'); - }, [sourceField.value, resetField]); - return ( onRequestClose()}> @@ -76,93 +48,29 @@ export function ImportAgentsModal({ onRequestClose, ...modalProps }: ModalProps) -
+ {agent ? (

{agent.name} agent added successfully.

) : isPending ? ( - <> - - - {showLogs && ( - - {logs.join('\n')} - - )} - - ) : actionRequired ? ( -
-

Existing agents detected. What would you like to do?

- - - - - - - {actionField.value === 'update_provider' && providersToUpdate && ( - - )} -
+ ) : (

Once your agent is published, it will be visible to everyone with access to {appName}.

- - {featureFlags.ProviderBuilds && ( - - )} - - - - value.trim(), - validate: (value: string) => { - if (sourceField.value === ProviderSource.GitHub) { - return isValidUrl(value) || 'Enter a valid GitHub repository URL.'; - } - - return true; - }, })} />
@@ -176,12 +84,7 @@ export function ImportAgentsModal({ onRequestClose, ...modalProps }: ModalProps) {!agent && ( - diff --git a/apps/agentstack-ui/src/modules/agents/hooks/useImportAgent.ts b/apps/agentstack-ui/src/modules/agents/hooks/useImportAgent.ts index 6cee584b64..d15a6d570d 100644 --- a/apps/agentstack-ui/src/modules/agents/hooks/useImportAgent.ts +++ b/apps/agentstack-ui/src/modules/agents/hooks/useImportAgent.ts @@ -3,57 +3,16 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { Provider, ProviderBuildOnCompleteAction } from 'agentstack-sdk'; import { useEffect, useMemo, useState } from 'react'; -import { flushSync } from 'react-dom'; -import { useCreateProviderBuild } from '#modules/provider-builds/api/mutations/useCreateProviderBuild.ts'; -import { usePreviewProviderBuild } from '#modules/provider-builds/api/mutations/usePreviewProviderBuild.ts'; -import { useProviderBuild } from '#modules/provider-builds/api/queries/useProviderBuild.ts'; -import { useProviderBuildLogs } from '#modules/provider-builds/api/queries/useProviderBuildLogs.ts'; import { useImportProvider } from '#modules/providers/api/mutations/useImportProvider.ts'; -import { useListProviders } from '#modules/providers/api/queries/useListProviders.ts'; import { ProviderSourcePrefixes } from '#modules/providers/constants.ts'; -import { ProviderSource } from '#modules/providers/types.ts'; -import { maybeParseJson } from '#modules/runs/utils.ts'; -import { isNotNull } from '#utils/helpers.ts'; import { useAgent } from '../api/queries/useAgent'; import type { ImportAgentFormValues } from '../types'; export function useImportAgent() { const [errorMessage, setErrorMessage] = useState(null); - const [providerOrigin, setProviderOrigin] = useState(null); - const [buildId, setBuildId] = useState(); - - const [actionRequired, setActionRequired] = useState(false); - const [providersToUpdate, setProvidersToUpdate] = useState(); - - const { - isFetching: isProvidersFetching, - error: providersError, - refetch: fetchProviders, - } = useListProviders({ - query: { origin: providerOrigin ? encodeURI(providerOrigin) : null }, - enabled: false, - }); - const { data: build } = useProviderBuild({ id: buildId }); - const { data: buildLogs } = useProviderBuildLogs({ id: buildId }); - - const buildStatus = build?.status; - const buildErrorMessage = build?.error_message; - - const { - mutateAsync: previewProviderBuild, - isPending: isPreviewPending, - error: previewError, - } = usePreviewProviderBuild(); - - const { - mutateAsync: createProviderBuild, - isPending: isCreateBuildPending, - error: buildError, - } = useCreateProviderBuild(); const { data: importedProvider, @@ -62,114 +21,18 @@ export function useImportAgent() { error: importError, } = useImportProvider(); - const providerId = importedProvider?.id ?? build?.provider_id; + const providerId = importedProvider?.id; const { data: agent } = useAgent({ providerId }); - const logs = - buildLogs - ?.map(({ data }) => { - const parsed = maybeParseJson(data); - - if (!parsed) { - return null; - } - - const { type, value } = parsed; - - if (type === 'json') { - const json = JSON.parse(value); - const message = json.message; - - if (message && typeof message === 'string') { - return message; - } - } - - return value; - }) - .filter(isNotNull) ?? []; - - const isBuildPending = isCreateBuildPending || (buildId && buildStatus !== 'completed' && buildStatus !== 'failed'); - const isPending = - isPreviewPending || isProvidersFetching || isBuildPending || isImportPending || Boolean(providerId && !agent); + const isPending = isImportPending || Boolean(providerId && !agent); const resetState = () => { setErrorMessage(null); - setProviderOrigin(null); - setBuildId(undefined); - setActionRequired(false); - setProvidersToUpdate(undefined); }; - const createBuild = async ({ - location, - action = 'add_provider', - providerId = '', - }: Pick) => { - let onCompleteAction: ProviderBuildOnCompleteAction = { type: 'no_action' }; - - switch (action) { - case 'update_provider': - onCompleteAction = { type: 'update_provider', provider_id: providerId }; - - break; - case 'add_provider': - onCompleteAction = { type: 'add_provider' }; - - break; - } - - const createdBuild = await createProviderBuild({ location, on_complete: onCompleteAction }); - - setBuildId(createdBuild?.id); - }; - - const importAgent = async ({ source, location, action, providerId }: ImportAgentFormValues) => { + const importAgent = async ({ source, location }: ImportAgentFormValues) => { resetState(); - - if (source === ProviderSource.GitHub) { - if (action) { - createBuild({ location, action, providerId }); - - return; - } - - const buildPreview = await previewProviderBuild({ location }); - - if (!buildPreview) { - return; - } - - const { provider_origin: providerOrigin, destination } = buildPreview; - - flushSync(() => setProviderOrigin(providerOrigin)); - - const { data: providers } = await fetchProviders(); - - if (!providers) { - return; - } - - const { total_count: providersCount, items } = providers; - const provider = items.find((provider) => provider.source === destination); - - if (provider) { - setErrorMessage(`Duplicate provider found: source='${destination}' already exists`); - - return; - } - - if (providersCount !== 0) { - setActionRequired(true); - setProvidersToUpdate(items); - - return; - } - - createBuild({ location, action }); - } else if (source === ProviderSource.Docker) { - await importProvider({ location: `${ProviderSourcePrefixes[source]}${location}` }); - } + await importProvider({ location: `${ProviderSourcePrefixes[source]}${location}` }); }; const error = useMemo(() => { @@ -184,20 +47,16 @@ export function useImportAgent() { }, [errorMessage]); useEffect(() => { - const normalizedBuildError = buildErrorMessage ? new Error(buildErrorMessage) : buildError; - - const error = previewError ?? providersError ?? normalizedBuildError ?? importError; - - if (error) { - setErrorMessage(error.message); + if (importError) { + setErrorMessage(importError.message); } - }, [buildErrorMessage, buildError, importError, providersError, previewError]); + }, [importError]); return { agent, - logs, - actionRequired, - providersToUpdate, + logs: [] as string[], + actionRequired: false, + providersToUpdate: undefined, isPending, error, importAgent, diff --git a/apps/agentstack-ui/src/modules/agents/hooks/useProviderStatus.ts b/apps/agentstack-ui/src/modules/agents/hooks/useProviderStatus.ts index 319e00953c..35f1689c4e 100644 --- a/apps/agentstack-ui/src/modules/agents/hooks/useProviderStatus.ts +++ b/apps/agentstack-ui/src/modules/agents/hooks/useProviderStatus.ts @@ -4,7 +4,7 @@ */ import type { Provider } from 'agentstack-sdk'; -import { ProviderStatus } from 'agentstack-sdk'; +import { ProviderState } from 'agentstack-sdk'; import { useProvider } from '#modules/providers/api/queries/useProvider.ts'; @@ -14,10 +14,10 @@ interface Props { function getStatusHelpers(data?: Provider) { const status = data?.state; - const isNotInstalled = status === ProviderStatus.Missing; - const isStarting = status === ProviderStatus.Starting; - const isError = status === ProviderStatus.Error; - const isReady = status === ProviderStatus.Ready || status === ProviderStatus.Running; + const isNotInstalled = false; + const isStarting = false; + const isError = status === ProviderState.Offline; + const isReady = status === ProviderState.Online; return { status, diff --git a/apps/agentstack-ui/src/modules/agents/types.ts b/apps/agentstack-ui/src/modules/agents/types.ts index 87443106d0..1f28922367 100644 --- a/apps/agentstack-ui/src/modules/agents/types.ts +++ b/apps/agentstack-ui/src/modules/agents/types.ts @@ -3,12 +3,11 @@ * SPDX-License-Identifier: Apache-2.0 */ -import type { CreateProviderRequest, ProviderBuildOnCompleteAction } from 'agentstack-sdk'; +import type { CreateProviderRequest } from 'agentstack-sdk'; import type { ProviderSource } from '#modules/providers/types.ts'; export type ImportAgentFormValues = CreateProviderRequest & { source: ProviderSource; - action?: ProviderBuildOnCompleteAction['type']; providerId?: string; }; diff --git a/apps/agentstack-ui/src/modules/provider-builds/api/index.ts b/apps/agentstack-ui/src/modules/provider-builds/api/index.ts deleted file mode 100644 index 6ec6d68efb..0000000000 --- a/apps/agentstack-ui/src/modules/provider-builds/api/index.ts +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import type { CreateProviderBuildRequest, PreviewProviderBuildRequest, ReadProviderBuildRequest } from 'agentstack-sdk'; -import { unwrapResult } from 'agentstack-sdk'; - -import { agentStackClient } from '#api/agentstack-client.ts'; - -export async function createProviderBuild(request: CreateProviderBuildRequest) { - const response = await agentStackClient.createProviderBuild(request); - const result = unwrapResult(response); - - return result; -} - -export async function readProviderBuild(request: ReadProviderBuildRequest) { - const response = await agentStackClient.readProviderBuild(request); - const result = unwrapResult(response); - - return result; -} - -export async function readProviderBuildLogs(request: ReadProviderBuildRequest) { - const response = await agentStackClient.readProviderBuildLogs(request); - const result = unwrapResult(response); - - return result; -} - -export async function previewProviderBuild(request: PreviewProviderBuildRequest) { - const response = await agentStackClient.previewProviderBuild(request); - const result = unwrapResult(response); - - return result; -} diff --git a/apps/agentstack-ui/src/modules/provider-builds/api/keys.ts b/apps/agentstack-ui/src/modules/provider-builds/api/keys.ts deleted file mode 100644 index e7501cbd3c..0000000000 --- a/apps/agentstack-ui/src/modules/provider-builds/api/keys.ts +++ /dev/null @@ -1,16 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import type { ReadProviderBuildLogsRequest, ReadProviderBuildRequest } from 'agentstack-sdk'; - -export const providerBuildKeys = { - all: () => ['provider-builds'] as const, - lists: () => [...providerBuildKeys.all(), 'list'] as const, - list: () => [...providerBuildKeys.lists()] as const, - details: () => [...providerBuildKeys.all(), 'detail'] as const, - detail: ({ id }: ReadProviderBuildRequest) => [...providerBuildKeys.details(), id] as const, - logs: () => [...providerBuildKeys.all(), 'logs'] as const, - log: ({ id }: ReadProviderBuildLogsRequest) => [...providerBuildKeys.logs(), id] as const, -}; diff --git a/apps/agentstack-ui/src/modules/provider-builds/api/mutations/useCreateProviderBuild.ts b/apps/agentstack-ui/src/modules/provider-builds/api/mutations/useCreateProviderBuild.ts deleted file mode 100644 index 68ccdc0a6e..0000000000 --- a/apps/agentstack-ui/src/modules/provider-builds/api/mutations/useCreateProviderBuild.ts +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { useMutation } from '@tanstack/react-query'; - -import { createProviderBuild } from '..'; - -export function useCreateProviderBuild() { - const mutation = useMutation({ - mutationFn: createProviderBuild, - meta: { - errorToast: { - title: 'Failed to create provider build.', - includeErrorMessage: true, - }, - }, - }); - - return mutation; -} diff --git a/apps/agentstack-ui/src/modules/provider-builds/api/mutations/usePreviewProviderBuild.ts b/apps/agentstack-ui/src/modules/provider-builds/api/mutations/usePreviewProviderBuild.ts deleted file mode 100644 index 3423493dde..0000000000 --- a/apps/agentstack-ui/src/modules/provider-builds/api/mutations/usePreviewProviderBuild.ts +++ /dev/null @@ -1,22 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { useMutation } from '@tanstack/react-query'; - -import { previewProviderBuild } from '..'; - -export function usePreviewProviderBuild() { - const mutation = useMutation({ - mutationFn: previewProviderBuild, - meta: { - errorToast: { - title: 'Failed to preview provider build.', - includeErrorMessage: true, - }, - }, - }); - - return mutation; -} diff --git a/apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuild.ts b/apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuild.ts deleted file mode 100644 index 2c12f97131..0000000000 --- a/apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuild.ts +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { useQuery } from '@tanstack/react-query'; -import { ProviderBuildState } from 'agentstack-sdk'; - -import { readProviderBuild } from '..'; -import { providerBuildKeys } from '../keys'; - -interface Props { - id: string | undefined; -} - -export function useProviderBuild({ id = '' }: Props) { - const query = useQuery({ - queryKey: providerBuildKeys.detail({ id }), - queryFn: () => readProviderBuild({ id }), - enabled: Boolean(id), - refetchInterval: (query) => { - const status = query.state.data?.status; - - if (status === ProviderBuildState.Completed) { - return false; - } - - return 5_000; - }, - }); - - return query; -} diff --git a/apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuildLogs.ts b/apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuildLogs.ts deleted file mode 100644 index 5759da088a..0000000000 --- a/apps/agentstack-ui/src/modules/provider-builds/api/queries/useProviderBuildLogs.ts +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -import { experimental_streamedQuery as streamedQuery, useQuery } from '@tanstack/react-query'; -import { EventSourceParserStream } from 'eventsource-parser/stream'; - -import { readProviderBuildLogs } from '..'; -import { providerBuildKeys } from '../keys'; -import { readableToAsyncIterable } from '../utils'; - -interface Props { - id: string | undefined; -} - -export function useProviderBuildLogs({ id = '' }: Props) { - const query = useQuery({ - queryKey: providerBuildKeys.log({ id }), - queryFn: streamedQuery({ - streamFn: async () => { - const data = await readProviderBuildLogs({ id }); - - const stream = data.pipeThrough(new TextDecoderStream()).pipeThrough(new EventSourceParserStream()); - - return readableToAsyncIterable(stream); - }, - }), - enabled: Boolean(id), - }); - - return query; -} diff --git a/apps/agentstack-ui/src/modules/provider-builds/api/utils.ts b/apps/agentstack-ui/src/modules/provider-builds/api/utils.ts deleted file mode 100644 index 3170ea4337..0000000000 --- a/apps/agentstack-ui/src/modules/provider-builds/api/utils.ts +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Copyright 2025 © BeeAI a Series of LF Projects, LLC - * SPDX-License-Identifier: Apache-2.0 - */ - -export async function* readableToAsyncIterable(stream?: ReadableStream): AsyncIterable { - if (!stream) { - return; - } - - const reader = stream.getReader(); - - try { - while (true) { - const { done, value } = await reader.read(); - - if (done) { - return; - } - - yield value; - } - } finally { - reader.releaseLock(); - } -} diff --git a/apps/agentstack-ui/src/modules/runs/components/RunInput.tsx b/apps/agentstack-ui/src/modules/runs/components/RunInput.tsx index 7f9a9d598f..0efad842a9 100644 --- a/apps/agentstack-ui/src/modules/runs/components/RunInput.tsx +++ b/apps/agentstack-ui/src/modules/runs/components/RunInput.tsx @@ -5,7 +5,7 @@ import { InlineLoading } from '@carbon/react'; import { useMergeRefs } from '@floating-ui/react'; -import { InteractionMode, ProviderUnmanagedStatus } from 'agentstack-sdk'; +import { InteractionMode, ProviderState } from 'agentstack-sdk'; import clsx from 'clsx'; import { useRef, useState } from 'react'; import { FormProvider, useForm } from 'react-hook-form'; @@ -70,7 +70,7 @@ export function RunInput({ promptExamples, onMessageSent }: Props) { const inputProps = register('input', { required: true }); const inputValue = watch('input'); - const isProviderOffline = provider.state === ProviderUnmanagedStatus.Offline; + const isProviderOffline = provider.state === ProviderState.Offline; const isLoadingModelProviders = llmProviders.isLoading || embeddingProviders.isLoading; const isSubmitDisabled = !isReady || isFileUploadPending || !inputValue || isLoadingModelProviders || isProviderOffline; diff --git a/apps/agentstack-ui/src/utils/constants.ts b/apps/agentstack-ui/src/utils/constants.ts index acc858ba81..089717073d 100644 --- a/apps/agentstack-ui/src/utils/constants.ts +++ b/apps/agentstack-ui/src/utils/constants.ts @@ -11,7 +11,7 @@ export const APP_FAVICON_SVG = process.env.NEXT_PUBLIC_APP_FAVICON_SVG ?? '/favi export const APP_FAVICON_SVG_DARK = process.env.NEXT_PUBLIC_APP_FAVICON_SVG_DARK ?? '/favicon-dark.svg'; -export const API_URL = process.env.API_URL ?? 'http://127.0.0.1:8333'; +export const API_URL = process.env.API_URL ?? 'http://agentstack-api.localtest.me:8080'; export const PROD_MODE = process.env.NODE_ENV === 'production'; diff --git a/apps/agentstack-ui/src/utils/feature-flags.ts b/apps/agentstack-ui/src/utils/feature-flags.ts index df76756602..b46512f0dc 100644 --- a/apps/agentstack-ui/src/utils/feature-flags.ts +++ b/apps/agentstack-ui/src/utils/feature-flags.ts @@ -8,7 +8,6 @@ import z from 'zod'; export const featureFlagsSchema = z.strictObject({ Connectors: z.boolean().optional(), LocalSetup: z.boolean().optional(), - ProviderBuilds: z.boolean().optional(), Providers: z.boolean().optional(), Variables: z.boolean().optional(), }); @@ -19,7 +18,6 @@ export type FeatureName = keyof FeatureFlags; export const featureFlagsDefaults: Required = { Connectors: false, LocalSetup: false, - ProviderBuilds: false, Providers: false, Variables: false, }; diff --git a/apps/agentstack-ui/tasks.toml b/apps/agentstack-ui/tasks.toml index 050cbfb868..ecc2231ae4 100644 --- a/apps/agentstack-ui/tasks.toml +++ b/apps/agentstack-ui/tasks.toml @@ -75,7 +75,11 @@ run = "docker build -t ghcr.io/i-am-bee/agentstack/agentstack-ui:local -f ./apps depends = ["common:setup:pnpm", "agentstack-sdk-ts:build"] dir = "{{config_root}}/apps/agentstack-ui" env.NODE_OPTIONS = "--no-experimental-global-navigator" -run = "pnpm next build --webpack" +run = """ +pnpm next build --webpack +# Remove broken symlinks in standalone output — they cause mise's task_source_checker to panic on stat() +find .next/standalone -type l ! -exec test -e {} \\; -delete 2>/dev/null || true +""" sources = [ "package.json", "next.config.ts", @@ -89,4 +93,4 @@ outputs = [".next/standalone/**/*"] ["agentstack-ui:schema:generate"] dir = "{{config_root}}/apps/agentstack-ui" -run = "./scripts/generate-schema.sh http://localhost:8333/api/v1/openapi.json ./src/api/schema.d.ts" +run = "./scripts/generate-schema.sh http://agentstack-api.localtest.me:8080/api/v1/openapi.json ./src/api/schema.d.ts" diff --git a/apps/agentstack-ui/template.env b/apps/agentstack-ui/template.env index 7bb09b2083..11abee0ddb 100644 --- a/apps/agentstack-ui/template.env +++ b/apps/agentstack-ui/template.env @@ -1,9 +1,9 @@ ############# OPTIONAL ############# -# Default: 'http://127.0.0.1:8333' +# Default: 'http://agentstack-api.localtest.me:8080' API_URL= # All feature flags default to false -FEATURE_FLAGS='{"Connectors":false,"LocalSetup":false,"MCP":false,"ProviderBuilds":false,"Providers":false,"Variables":false}' +FEATURE_FLAGS='{"Connectors":false,"LocalSetup":false,"Providers":false,"Variables":false}' # Override the default context token generation permissions (JSON string) CONTEXT_TOKEN_PERMISSIONS='{"grant_global_permissions":{"llm":["*"],"embeddings":["*"]},"grant_context_permissions":{"files":["*"],"vector_stores":["*"],"context_data":["*"]}}' @@ -32,4 +32,4 @@ OIDC_PROVIDER_CLIENT_SECRET=agentstack-ui-secret OIDC_PROVIDER_ISSUER=http://localhost:8336/realms/agentstack # next-auth random string NEXTAUTH_SECRET= -NEXTAUTH_URL="http://localhost:8334" +NEXTAUTH_URL="http://agentstack.localtest.me:8080" diff --git a/apps/microshift-vm/rootfs/etc/containers/registries.conf.d/200-microshift-local.conf b/apps/microshift-vm/rootfs/etc/containers/registries.conf.d/200-microshift-local.conf index 94b79b327a..893df3f70f 100644 --- a/apps/microshift-vm/rootfs/etc/containers/registries.conf.d/200-microshift-local.conf +++ b/apps/microshift-vm/rootfs/etc/containers/registries.conf.d/200-microshift-local.conf @@ -1,6 +1,6 @@ [[registry]] -location = "agentstack-registry-svc.default:5001" +location = "registry.cr-system.svc.cluster.local:5000" insecure = true [[registry.mirror]] -location = "localhost:30501" +location = "localhost:30500" insecure = true diff --git a/apps/microshift-vm/rootfs/etc/systemd/system/kubectl-port-forward@.service b/apps/microshift-vm/rootfs/etc/systemd/system/kubectl-port-forward@.service index f6d9a16f46..503a1a243c 100644 --- a/apps/microshift-vm/rootfs/etc/systemd/system/kubectl-port-forward@.service +++ b/apps/microshift-vm/rootfs/etc/systemd/system/kubectl-port-forward@.service @@ -1,4 +1,4 @@ [Service] -ExecStart=/bin/bash -c 'IFS=":" read svc port <<< "%i" && kubectl wait --kubeconfig=/kubeconfig --for=jsonpath={.subsets[*].addresses[0].ip} ep/$svc --timeout=300s && exec kubectl port-forward --kubeconfig=/kubeconfig --address=127.0.0.1 svc/$svc $port:$port' +ExecStart=/bin/bash -c 'IFS=":" read -ra p <<< "%i"; if [ $${#p[@]} -ge 3 ]; then ns=$${p[0]} svc=$${p[1]} hp=$${p[2]} tp=$${p[3]:-$${p[2]}}; else ns=default svc=$${p[0]} hp=$${p[1]} tp=$${p[1]}; fi; kubectl wait --kubeconfig=/kubeconfig -n $$ns --for=jsonpath={.subsets[*].addresses[0].ip} ep/$$svc --timeout=300s && exec kubectl port-forward --kubeconfig=/kubeconfig -n $$ns --address=127.0.0.1 svc/$$svc $$hp:$$tp' Restart=on-failure User=root diff --git a/docs/development/agent-integration/observability.mdx b/docs/development/agent-integration/observability.mdx index c1761d9d3b..eb7aed8265 100644 --- a/docs/development/agent-integration/observability.mdx +++ b/docs/development/agent-integration/observability.mdx @@ -118,17 +118,20 @@ Telemetry details include: -Install and start Phoenix using the `agentstack platform start` command: +Phoenix is included by default as part of the kagenti observability stack (OTel collector + Phoenix). Simply start the platform: ```sh -agentstack platform start --set phoenix.enabled=true +agentstack platform start ``` -You can run this even if your platform is already running; it will update the configuration without losing existing data. +To disable Phoenix and the OTel collector, pass: +```sh +agentstack platform start --set kagenti-deps:components.otel.enabled=false +``` - + -Spinning up the Phoenix container can take a moment, even after the CLI reports success. Go to [http://localhost:6006](http://localhost:6006) and check if it's running. If not, please wait a few moments. +Spinning up the Phoenix container can take a moment, even after the CLI reports success. Go to [http://phoenix.localtest.me:8080](http://phoenix.localtest.me:8080) and check if it's running. If not, please wait a few moments. diff --git a/docs/development/reference/cli-reference.mdx b/docs/development/reference/cli-reference.mdx index 3793d69a17..e4a9860892 100644 --- a/docs/development/reference/cli-reference.mdx +++ b/docs/development/reference/cli-reference.mdx @@ -34,8 +34,7 @@ $ agentstack [OPTIONS] COMMAND [ARGS]... * `agent`: Manage agents. * `connector`: Manage connectors to external services. * `platform`: Manage Agent Stack platform. -* `client-side-build`: Build agent locally using Docker. -* `build`: Build agent from a GitHub repository in... +* `build`: Build an agent image locally and push to platform registry. * `server`: Manage Agent Stack servers and... * `self`: Manage Agent Stack installation. * `add`: Add a docker image or GitHub repository. @@ -846,50 +845,25 @@ $ agentstack platform exec [OPTIONS] [COMMAND]... * `-v, --verbose`: Show verbose output * `--help`: Show this message and exit. -## `agentstack client-side-build` - -Build agent locally using Docker. [Local only] - -**Usage**: - -```console -$ agentstack client-side-build [OPTIONS] [CONTEXT] -``` - -**Arguments**: - -* `[CONTEXT]`: Docker context for the agent [default: .] - -**Options**: - -* `--dockerfile TEXT`: Use custom dockerfile path -* `--tag TEXT`: Docker tag for the agent -* `--multi-platform / --no-multi-platform`: [default: no-multi-platform] -* `--push / --no-push`: Push the image to the target registry. [default: no-push] -* `--import / --no-import`: Import the image into Agent Stack platform [default: import] -* `--extract-agent-card / --no-extract-agent-card`: Extract agent card from running container [default: extract-agent-card] -* `-v, --verbose`: Show verbose output -* `--help`: Show this message and exit. - ## `agentstack build` -Build agent from a GitHub repository in the platform. [Admin only] +Build an agent image locally and push it to the platform registry. [Local only] **Usage**: ```console -$ agentstack build [OPTIONS] GITHUB_URL +$ agentstack build [OPTIONS] [CONTEXT] ``` **Arguments**: -* `GITHUB_URL`: Github repository URL (public or private if supported by the platform instance) [required] +* `[CONTEXT]`: Docker build context (path or URL) [default: .] **Options**: -* `--dockerfile TEXT`: Use custom dockerfile path, relative to github url sub-path +* `-f, --dockerfile TEXT`: Dockerfile path +* `-t, --tag TEXT`: Image tag (default: auto-generated) * `-v, --verbose`: Show verbose output -* `-y, --yes`: Skip confirmation prompts. * `--help`: Show this message and exit. ## `agentstack server` diff --git a/docs/poc-kagenti-integration.md b/docs/poc-kagenti-integration.md new file mode 100644 index 0000000000..e9b8470a62 --- /dev/null +++ b/docs/poc-kagenti-integration.md @@ -0,0 +1,701 @@ +# PoC: Kagenti Integration into Agentstack + +## Status: Draft / Brainstorming + +--- + +## 1. Executive Summary + +This document outlines the plan to refactor the agentstack platform so that agent scaling, deployment, and discovery are handled by **kagenti** instead of our custom Kubernetes provider management. The goal is a lightweight local developer experience with optional enterprise features (Istio, SPIRE, Shipwright). + +### What We Gain +- Standard A2A agent lifecycle management (deploy, discover, scale) +- Realtime agent card discovery (no more storing cards in DB or Docker labels) +- Zero-trust identity via SPIRE/SPIFFE (optional) +- Service mesh observability via Istio Ambient (optional) +- Shipwright-based builds replacing our Kaniko pipeline (optional) +- Team namespace isolation + +### What We Drop +- `KubernetesProviderDeploymentManager` (custom kr8s-based deployment logic) +- `KubernetesProviderBuildManager` (Kaniko build jobs with Docker label baking) +- Agent card storage in database / Docker image labels +- Scale-to-zero logic (kagenti handles agent lifecycle) +- The concept of "managed" vs "unmanaged" providers (all agents become kagenti-managed) + +--- + +## 2. Architecture Comparison + +### Current Agentstack +``` +Lima VM (MicroShift) +└── agentstack namespace + ├── agentstack-server (FastAPI) + │ ├── KubernetesProviderDeploymentManager (kr8s) + │ ├── KubernetesProviderBuildManager (Kaniko) + │ ├── A2A Proxy Service + │ └── Provider Registry sync + ├── Keycloak (StatefulSet, port 8336) + ├── PostgreSQL + ├── Redis + ├── SeaweedFS + ├── Phoenix (observability) + └── agentstack-provider-{id}-svc (per-agent deployments) +``` + +### Target: Agentstack + Kagenti +``` +Lima VM (MicroShift) +├── agentstack namespace +│ ├── agentstack-server (FastAPI, slimmed down) +│ │ ├── A2A Proxy → delegates to kagenti agent services +│ │ └── Provider Registry → reads from kagenti API / agent cards +│ ├── PostgreSQL +│ ├── Redis +│ └── SeaweedFS +├── keycloak namespace (shared) +│ └── Keycloak (StatefulSet) +├── kagenti-system namespace +│ ├── kagenti-operator +│ ├── kagenti-webhook +│ ├── kagenti-ui (backend + frontend) [optional] +│ └── MCP Gateway +├── team1, team2, ... (agent namespaces) +│ └── agent Deployments + Services +├── istio-system (optional) +├── zero-trust-workload-identity-manager (optional) +└── cr-system (container registry, optional) +``` + +--- + +## 3. Key Integration Decisions + +### 3.1 Installing Kagenti into MicroShift + +**Problem:** Kagenti's installer uses Ansible + Kind. Our stack uses Lima + MicroShift. These are incompatible. + +**Options:** + +#### Option A: Helm-only installation (DECIDED) +Strip kagenti down to just its two Helm charts (`kagenti` and `kagenti-deps`) and install them directly into MicroShift via `helm install`. Skip the Ansible playbook entirely. + +**Pros:** +- Clean, declarative, reproducible +- Integrates with our existing Helm-based deployment +- No Ansible dependency +- Can selectively enable/disable components via values + +**Cons:** +- Some Ansible tasks do pre/post-processing (OAuth secret creation, DNS setup, image preloading) +- Need to replicate essential Ansible logic in our Lima provisioning or Helm hooks + +**What Ansible does that we'd need to replicate:** +1. Cluster creation → already handled by Lima/MicroShift +2. DNS setup → already handled by Lima networking +3. OAuth secret creation → can be Helm hooks or init containers +4. Image preloading → can be pre-pull in Lima config +5. Shipwright ClusterBuildStrategy → can be a Helm template or post-install hook + +#### Option B: Adapt Ansible to target MicroShift +Modify kagenti's Ansible playbook to target an existing MicroShift cluster instead of creating a Kind cluster. + +**Pros:** +- Reuses kagenti's tested installation flow +- Less risk of missing setup steps + +**Cons:** +- Ansible dependency for our stack +- Tight coupling to kagenti's playbook (maintenance burden) +- OpenShift-specific tasks may conflict with MicroShift + +#### Option C: Umbrella Helm chart +Create a single umbrella chart that includes agentstack + kagenti + kagenti-deps as subcharts. + +**Pros:** +- Single `helm install` for everything +- Shared values file for cross-component config +- Clean dependency management + +**Cons:** +- Chart dependency version management +- Large chart, slower iteration +- Kagenti charts may need modifications to work as subcharts + +**Decision:** **Option A** for the PoC. Evolve toward **Option C** for production. + +### 3.2 Configurable Feature Toggles + +The local experience should be modular. Proposed feature flags in a values file: + +```yaml +kagenti: + enabled: true + + features: + istio: + enabled: false # Service mesh (Ambient mode) + spire: + enabled: false # Zero-trust workload identity + shipwright: + enabled: false # Container builds + builds: + enabled: false # Build UI + Tekton + observability: + phoenix: + enabled: true # LLM trace viewer + otel: + enabled: false # OpenTelemetry collector + kiali: + enabled: false # Service mesh dashboard + containerRegistry: + enabled: false # In-cluster registry + certManager: + enabled: false # Certificate management + mcpGateway: + enabled: false # MCP Gateway +``` + +**Minimal local setup** (fastest startup): Just kagenti operator + webhook + Keycloak. No Istio, no SPIRE, no builds. + +**Full-featured setup**: Everything enabled, closest to production. + +### 3.3 Keycloak Namespace + +**Problem:** Agentstack deploys Keycloak in the same namespace as the server. Kagenti deploys it in a dedicated `keycloak` namespace. We need to converge. + +**Options:** + +#### Option A: Separate `keycloak` namespace (Recommended) +Move Keycloak to its own namespace, matching kagenti's approach. + +**Pros:** +- Clean separation of concerns +- Matches kagenti convention +- Keycloak can be shared across agentstack + kagenti components +- Independent scaling and RBAC + +**Cons:** +- Breaks single-chart deployment model +- Cross-namespace service discovery needed (trivial: `keycloak-service.keycloak.svc.cluster.local`) +- Need to coordinate Keycloak deployment between charts + +**Implementation:** +- Remove Keycloak from agentstack Helm chart +- Use kagenti-deps chart to deploy Keycloak (or a standalone Keycloak chart) +- Update agentstack-server config to point to `keycloak-service.keycloak:8080` +- Both agentstack and kagenti configure their OAuth clients in the same realm + +#### Option B: Keep in agentstack namespace, kagenti references it +Keep current setup, configure kagenti to use the existing Keycloak instance. + +**Pros:** +- Minimal changes to agentstack +- Single chart still works + +**Cons:** +- Non-standard for kagenti (may need chart modifications) +- Namespace coupling + +#### Option C: Let kagenti-deps own Keycloak, agentstack consumes it +Kagenti-deps deploys Keycloak in `keycloak` namespace. Agentstack Helm chart declares Keycloak as disabled and references the external instance. + +**Pros:** +- Clean ownership model +- Kagenti's Keycloak setup includes realm bootstrap, OAuth jobs, etc. + +**Cons:** +- Deployment order dependency (keycloak must be up before agentstack) +- Agentstack still needs its own realm provisioning (see below) + +**Decision:** **Option C** - Let kagenti-deps own Keycloak. Agentstack becomes a consumer. This is the cleanest separation. + +**Helm chart implications:** +- This works fine even with multiple charts. The deployment order is: + 1. `helm install kagenti-deps` (includes Keycloak, optionally Istio, SPIRE, etc.) + 2. `helm install agentstack` (references Keycloak via service DNS) + 3. `helm install kagenti` (references Keycloak via service DNS) +- Or with an umbrella chart using `weight` annotations for ordering. + +### 3.4 Keycloak Realm Provisioning + +**Problem:** Both systems need Keycloak realm/client configuration. Moving Keycloak to kagenti-deps doesn't eliminate the need for agentstack's own realm bootstrapping. + +**Agentstack's provision job** (`helm/templates/keycloak/provision-job.yaml`) does: +- Creates `agentstack` realm with custom login theme +- Creates roles: `agentstack-admin`, `agentstack-developer` +- Creates OAuth clients: + - `agentstack-server` (confidential, service accounts + direct access grants) + - `agentstack-ui` (confidential, standard flow + direct access grants) + - `agentstack-cli` (public, standard flow + direct access grants, localhost redirect) +- Configures audience mappers per client (UI URL, API URL) +- Seeds users with passwords and role assignments + +**Kagenti's realm setup** does: +- Creates `kagenti` realm (via Keycloak `autoBootstrapRealm`) +- Creates OAuth clients via Jobs: `kagenti-keycloak-client` (agents), `kagenti-ui-client` (UI), `kagenti-api` (API) +- Roles: `kagenti-admin`, `kagenti-operator`, `kagenti-viewer` + +**Options for convergence:** + +#### Option 1: Separate realms (DECIDED) +Keep `agentstack` realm and `kagenti` realm side by side. Each system provisions its own realm independently. + +Kagenti uses a dedicated `kagenti` realm (configured via `keycloak_realm` in backend config, bootstrapped via `autoBootstrapRealm`). This is their own realm, not the master realm. + +Agentstack keeps its own `agentstack` realm. Both realms live in the same Keycloak instance but are fully independent. + +**Pros:** +- Zero coupling between the two provisioning jobs +- Each system owns its auth config completely +- Agentstack's provision-job.yaml stays unchanged (just point at new Keycloak URL) +- Clean separation - no risk of role/client name collisions + +**Cons:** +- Users need accounts in both realms (or we configure identity brokering later) +- Two login flows if both UIs are deployed + +#### Option 2: Shared realm (Future - Best for UX) +Merge into a single realm. One provision job creates all clients and roles. + +#### Option 3: Separate realms + Identity Brokering (Future) +Each system has its own realm, but Keycloak brokers between them for SSO. + +**Decision:** **Option 1** (separate realms). The provision job just needs its Keycloak URL updated from the local StatefulSet to `keycloak-service.keycloak:8080`. Everything else stays the same. We can evolve to Option 2 when we want unified SSO. + +**What changes in the provision job:** +- Keep `provision-job.yaml` in the agentstack Helm chart +- Remove Keycloak StatefulSet, Service, and Secret templates (those move to kagenti-deps) +- Update the job to target the external Keycloak: `keycloak-service.keycloak:8080` +- Admin credentials need to be shared (either a shared secret or a dedicated admin client for provisioning) + +--- + +## 4. Component Mapping + +### What agentstack drops (delegates to kagenti) + +| Agentstack Component | Kagenti Replacement | +|---|---| +| `KubernetesProviderDeploymentManager` | Kagenti operator deploys agents as standard K8s Deployments in team namespaces | +| `KubernetesProviderBuildManager` (Kaniko) | Shipwright + Tekton builds (optional) | +| Agent card in Docker labels (`beeai.dev.agent.json`) | Realtime HTTP fetch from `/.well-known/agent-card.json` | +| Agent card stored in DB | Realtime discovery from running agents | +| Scale-to-zero / auto-stop logic | Kagenti manages agent lifecycle (or standard HPA) | +| Provider model (managed/unmanaged distinction) | All agents are kagenti-managed Deployments | +| `build-provider-job.yaml` (Kaniko + Crane) | Shipwright BuildRun with Buildah strategy | +| Keycloak deployment | kagenti-deps Keycloak deployment | + +### What agentstack keeps + +| Component | Reason | +|---|---| +| A2A Proxy Service | Core routing/auth logic, user task tracking | +| Provider Registry sync | Can evolve to sync with kagenti's agent namespaces | +| PostgreSQL | Agentstack's own data (users, tasks, conversations) | +| Redis | Caching, rate limiting | +| SeaweedFS | Object storage for artifacts | +| Phoenix | LLM observability (kagenti also supports this) | + +### What changes in agentstack-server + +| Area | Change | +|---|---| +| `bootstrap.py` | Remove `KubernetesProviderDeploymentManager` and `KubernetesProviderBuildManager` injection | +| `providers.py` service | Rewrite to discover agents via kagenti API or direct K8s namespace scanning | +| `a2a.py` service | Update URL resolution: `http://{agent}.{namespace}.svc.cluster.local:8080` | +| Provider model | Simplify - remove `auto_stop_timeout`, `unmanaged_state`, build fields | +| Provider cron jobs | Remove `auto_stop_providers`, `refresh_unmanaged_provider_state`; keep or adapt registry sync | +| Configuration | Add kagenti connection settings, remove build/scaling config | + +--- + +## 5. Multi-Tenancy, Agent Discovery, and Data Scoping + +This is a critical design area. Agentstack currently has user-scoped data (conversations, tasks, files) but no namespace/team concept. Kagenti has namespace-based isolation but no per-user data scoping. We need to bridge these. + +### 5.1 Current State + +**Agentstack multi-tenancy:** +- User-per-tenant model: each user has isolated data via `created_by` FK +- Data scoped per-user: contexts, files, vector_stores, a2a_request_tasks, a2a_request_contexts +- Providers are semi-public: all users can read, only owner/devs can modify +- Model providers are fully global (no scoping) +- No organization/workspace/namespace abstraction +- Roles: ADMIN, DEVELOPER, USER (controls CRUD permissions, not visibility) + +**Kagenti multi-tenancy:** +- Namespace-based: team namespaces (`team1`, `team2`) with `kagenti-enabled=true` label +- Role-based: `kagenti-admin` > `kagenti-operator` > `kagenti-viewer` (Keycloak realm roles) +- NO per-user namespace restrictions at the API level - all viewers see all enabled namespaces +- K8s RBAC on the backend service account controls what namespaces are actually accessible +- No user data storage (stateless - queries K8s API on every request) + +### 5.2 Agent Discovery: How Should Agentstack Find Kagenti Agents? + +#### Option A: Call Kagenti Backend API (DECIDED) + +Agentstack calls `GET http://kagenti-backend.kagenti-system:8080/api/v1/agents?namespace=` to discover agents. + +``` +agentstack-server → HTTP → kagenti-backend → K8s API → Deployments with kagenti.io/type=agent +``` + +**Pros:** +- Clean separation - agentstack doesn't need K8s RBAC for agent namespaces +- Kagenti handles the label scanning, status aggregation, protocol detection +- Easier to evolve (kagenti can add caching, watching, CRDs without agentstack changes) +- Shared auth via Keycloak - agentstack can forward user tokens to kagenti API + +**Cons:** +- Runtime dependency on kagenti backend being available +- Extra network hop +- Polling-based (kagenti has no watch/event mechanism either) + +**Auth for kagenti API access:** +Agentstack-server will need a dedicated Keycloak client in the `kagenti` realm (e.g., `agentstack-api`) to authenticate against the kagenti backend API. This client would use client credentials grant (service-to-service). The kagenti provision job (or agentstack's provision job targeting the kagenti realm) needs to create this client with at least `kagenti-viewer` role. + +**Implementation:** +```python +# New: KagentiAgentDiscovery service +class KagentiAgentDiscovery: + def __init__(self, kagenti_url: str, token_provider: TokenProvider): + self._url = kagenti_url # http://kagenti-backend.kagenti-system:8080 + self._token_provider = token_provider # client_credentials against kagenti realm + + async def list_agents(self, namespace: str | None = None) -> list[AgentSummary]: + # GET /api/v1/agents?namespace={ns} + # Authorization: Bearer + ... + + async def get_agent_card(self, namespace: str, name: str) -> AgentCard: + # GET /api/v1/chat/{ns}/{name}/agent-card + # (kagenti proxies to http://{name}.{ns}.svc:8080/.well-known/agent-card.json) + ... +``` + +#### Option B: Direct K8s Label Scanning (Rejected) + +Agentstack's service account scans Deployments with label `kagenti.io/type=agent` across namespaces. + +**Why rejected:** K8s RBAC is namespace-scoped by default. Agentstack's service account in the `agentstack` namespace cannot list Deployments in `team1` or `team2` unless we grant it a ClusterRole. This is a significant RBAC escalation. An alternative would be deploying a "service agent" sidecar into each team namespace, but that adds complexity. + +Note: this is a **K8s API RBAC** limitation, not a networking limitation. Network calls between namespaces always work (see section 5.6). But listing/watching resources across namespaces via the K8s API requires explicit RBAC grants. + +#### Option C: K8s Watch + Local Cache (Future - Best for Production) + +Same RBAC concern as Option B. Could be revisited if we add a ClusterRole for agentstack or use kagenti's operator to push events. + +**Decision:** **Option A** (kagenti API). We will create a dedicated Keycloak client for agentstack-server in the kagenti realm to authenticate API calls. + +### 5.3 Multi-Tenancy Model: Per-Namespace vs Global + +The core question: **When a user opens agentstack, which agents do they see?** + +#### Pattern 1: Global Agent Catalog + Per-User Data (Recommended) + +All agents across all kagenti namespaces are visible to all agentstack users. User data (conversations, tasks, files) remains per-user as today. + +``` +Agents: GLOBAL (all users see all agents from all namespaces) +Conversations: PER-USER (user's own chat history with agents) +Tasks: PER-USER (A2A task ownership) +Files: PER-USER (uploaded documents) +``` + +**When it makes sense:** +- Small teams, local development, PoC +- All agents are shared resources (like shared microservices) +- Users don't need to hide agents from each other +- Simplest to implement - mirrors current agentstack behavior + +**Implementation:** +- Agentstack discovers all agents across all `kagenti-enabled=true` namespaces +- Provider list in agentstack UI shows agents grouped by namespace +- No changes to user model or data scoping + +#### Pattern 2: Namespace-Scoped Agent Visibility + +Users are assigned to namespaces (teams). They can only see/use agents in their namespaces. + +``` +Agents: PER-NAMESPACE (user sees only agents in their team namespaces) +Conversations: PER-USER (within allowed namespaces) +Tasks: PER-USER (within allowed namespaces) +Files: PER-USER +``` + +**When it makes sense:** +- Multi-team environments +- Compliance/isolation requirements +- Different teams run different agent sets + +**Implementation:** +- Add `user_namespaces` mapping (DB table or Keycloak groups → namespaces) +- Agentstack filters agent list by user's allowed namespaces +- Keycloak groups could map to kagenti namespaces (e.g., group `team1` → namespace `team1`) +- OR: Keycloak realm roles with namespace claims in JWT + +**Keycloak integration approach:** +``` +Keycloak realm: kagenti +├── Group: team1 → users who can access team1 namespace +├── Group: team2 → users who can access team2 namespace +└── Client: agentstack → includes group memberships in token claims +``` + +Agentstack reads group claims from JWT → maps to allowed namespaces → filters agent discovery. + +#### Pattern 3: Hybrid - Global Catalog + Namespace Permissions + +All agents are visible (catalog view), but users can only *interact* with agents in their namespaces. + +``` +Agent catalog: GLOBAL (browse all agents) +Agent usage: PER-NAMESPACE (chat only with agents in your namespaces) +Conversations: PER-USER +``` + +**When it makes sense:** +- Users want to discover what's available but access is controlled +- Self-service model: "request access to namespace X" + +**Recommendation for PoC:** Start with **Pattern 1** (global catalog). It matches current agentstack behavior and is simplest. Add namespace scoping later when we have real multi-team requirements. + +### 5.4 DNS and URL Resolution + +**Note on `localtest.me`:** We can adopt kagenti's `localtest.me` convention for simplicity. This wildcard DNS resolves `*.localtest.me` to `127.0.0.1`, which is useful for Keycloak redirect URIs, agent URLs, and UI access without `/etc/hosts` hacking. + +Agent URL patterns: +- **In-cluster (service DNS):** `http://{agent-name}.{namespace}.svc.cluster.local:8080` +- **External (localtest.me):** `http://{agent-name}.{namespace}.localtest.me:8080` (requires ingress/port-forward) +- **Via kagenti API proxy:** `POST http://kagenti-backend.kagenti-system:8080/api/v1/chat/{ns}/{name}/send` +- **Via agentstack A2A proxy:** agentstack continues to proxy A2A requests, but resolves URLs to in-cluster service DNS + +### 5.5 Auth Token Flow + +Both systems use Keycloak. The question is whether agentstack forwards user tokens to agents or mints its own. + +``` +User → (Keycloak JWT) → agentstack-server → (???) → agent in team1 namespace +``` + +**Options:** +1. **Forward user token** - Agent receives the user's JWT. Agent can validate it against Keycloak. Simple but exposes user identity to agents. +2. **Token exchange** - Agentstack exchanges user token for a scoped service token (Keycloak token exchange). More secure, agents see a service identity. +3. **No auth to agents** (PoC) - Agents trust in-cluster traffic. Simplest for PoC, add auth later. + +Kagenti's current approach: forwards the user's Authorization header to agents (option 1). + +**Recommendation for PoC:** Option 3 (no auth to agents). Adopt option 1 or 2 when adding Istio/SPIRE. + +### 5.6 Multi-Agent Communication + +**Scenario:** Agent A needs to call Agent B. Both are in `team1` namespace. Agentstack runs in `agentstack` namespace. How should this work? + +#### Networking: Cross-namespace is not a problem + +Istio Ambient mode operates at the pod level, not namespace boundaries. Any pod can reach any service across namespaces via standard K8s DNS. Cross-namespace traffic gets automatic mTLS from the ztunnel layer. + +``` +agent-a.team1 → agent-b.team1 # same namespace, trivial +agent-a.team1 → agent-b.team2.svc.cluster.local # cross namespace, also fine +agent-a.team1 → agentstack-server.agentstack.svc # cross namespace, fine +``` + +Access restrictions are opt-in via Istio policies: +- **AuthorizationPolicy**: "only SAs `agent-a` and `agent-b` in `team1` can communicate" +- **Waypoint proxies**: L7 policy enforcement (kagenti sets these up per agent namespace) +- **SPIRE identities**: each agent gets `spiffe://domain/ns/team1/sa/agent-a` for mutual authentication + +#### The real question: should agent-to-agent go through agentstack? + +**Option 1: Direct agent-to-agent (kagenti native)** +``` +User → agentstack → Agent A → Agent B (direct, same namespace) + ↘ Agent C (direct, cross namespace) +``` + +- Agents discover each other via K8s DNS or agent cards +- Lowest latency, no bottleneck +- Istio + SPIRE handle auth and mTLS +- **Problem:** agentstack loses visibility. No audit trail, no task tracking, no rate limiting for agent-to-agent calls. Agentstack only sees the user→Agent A leg. + +**Option 2: All traffic through agentstack proxy** +``` +User → agentstack → Agent A + agentstack ← Agent A (Agent A calls back to agentstack to reach Agent B) + agentstack → Agent B + agentstack ← Agent B (response) + agentstack → Agent A (forwarded response) +``` + +- Full audit trail, task ownership tracking, token management +- Agentstack can enforce rate limits, quotas, access policies +- **Problem:** agentstack is a bottleneck and single point of failure for multi-agent workflows. Higher latency. Every agent-to-agent hop is 2 extra network hops. + +**Option 3: Hybrid - agentstack for orchestration, direct for execution (Recommended)** +``` +User → agentstack → Agent A (orchestrator) + Agent A → Agent B (direct, fast) + Agent A → Agent C (direct, fast) + Agent A → agentstack (report task status/results) +``` + +- Agentstack handles the user-facing session: auth, task creation, context management +- Agent A (the orchestrator) talks to sub-agents directly within the cluster +- Sub-agent calls use Istio mTLS + SPIRE identities for auth (no Keycloak tokens needed) +- Agentstack gets task results when the orchestrator reports back +- **Best of both worlds:** fast agent-to-agent, user-level tracking at the edges + +**Istio policy example for namespace isolation:** +```yaml +apiVersion: security.istio.io/v1 +kind: AuthorizationPolicy +metadata: + name: allow-intra-team + namespace: team1 +spec: + action: ALLOW + rules: + - from: + - source: + namespaces: ["team1"] # same namespace agents + - source: + namespaces: ["agentstack"] # agentstack proxy + principals: ["cluster.local/ns/agentstack/sa/agentstack-server"] +``` + +This allows agents within `team1` to talk freely to each other, and allows agentstack to call into `team1` agents. Cross-namespace agent-to-agent (e.g., `team1` → `team2`) would require explicit policy. + +**Recommendation for PoC:** Option 2 (all through agentstack). Agentstack issues custom tokens and controls which agent can call which - this requires agentstack to remain in the request path. Direct agent-to-agent bypasses these controls. Option 3 can be explored later when we have Istio + SPIRE providing network-level identity and policy enforcement as an alternative to token-based control. + +#### Cross-namespace communication: no architectural blockers + +Regardless of which option we choose, **cross-namespace networking just works in K8s**. There is no restriction: + +``` +# Agent (team1) → agentstack API (agentstack namespace) +http://agentstack-server.agentstack.svc.cluster.local:8000/... ← works + +# Agentstack (agentstack namespace) → agent (team1) +http://weather-agent.team1.svc.cluster.local:8080/... ← works + +# Agent (team1) → agent (team2) +http://other-agent.team2.svc.cluster.local:8080/... ← works +``` + +Namespaces are a logical boundary for resource organization and K8s RBAC, **not** a network boundary. Cross-namespace service DNS is a core K8s guarantee. Nothing in Istio, MicroShift, or kagenti changes this. Istio Ambient adds mTLS **on top** of existing connectivity - it doesn't restrict it. Restrictions are only created by explicit `NetworkPolicy` (K8s) or `AuthorizationPolicy` (Istio), and neither system creates deny-all policies by default. + +**This means agentstack can safely live in its own namespace.** There is no architectural concern about agents calling back to agentstack or agentstack calling into agent namespaces. + +--- + +## 6. Agent Card Discovery - New Model + +(See also section 5.2 for discovery mechanism discussion) + +### Current (agentstack) +``` +Build time: agent-card.json → base64 → Docker label → stored in DB +Runtime: DB lookup → return cached card +Scale-to-zero: Card available even when agent is scaled down (from DB) +``` + +### New (kagenti-based) +``` +Runtime: HTTP GET http://{agent}.{namespace}.svc:8080/.well-known/agent-card.json +Always-on: Agents must be running to serve their card +Alternative: Kagenti operator could maintain a card cache (future) +``` + +### Implications +- No more "offline" agents (agents scaled to zero can't serve cards) +- Discovery is always fresh (no stale cards) +- Agent list = `kubectl get deployments -l kagenti.io/type=agent` across namespaces +- Card validation happens at request time, not build time + +### Transition Path +1. Keep DB-backed card cache as fallback during PoC +2. Add kagenti-based discovery as primary source +3. Remove DB cache once stable + +--- + +## 7. Lima / MicroShift Integration + +### Current Lima Setup +Agentstack uses Lima VMs with MicroShift for local development. The kagenti Helm charts need to install cleanly into this environment. + +### Required Lima Changes +1. **Resource increase**: Kagenti components (especially Istio) need more RAM. Bump VM from current allocation. +2. **Port mappings**: Kagenti uses port 8080. Need to avoid conflicts or configure differently. +3. **DNS**: Adopt kagenti's `localtest.me` convention (wildcard DNS → 127.0.0.1). Simplifies Keycloak redirect URIs and agent access. See section 5.4. +4. **Image pre-pull**: Optional - pre-pull kagenti component images to speed up first startup. + +### MicroShift Compatibility Notes +- MicroShift is a minimal OpenShift. Kagenti supports OpenShift via `global.openshift: true`. +- Some kagenti components use OLM (Operator Lifecycle Manager) for installation. MicroShift may not have OLM → need Helm-based alternatives. +- SPIRE's ZTWIM operator requires OCP 4.19+. MicroShift version needs checking. Alternative: `useSpireHelmChart: true`. +- Istio Ambient mode should work on MicroShift (it's standard K8s networking). + +--- + +## 8. PoC Implementation Plan + +### Phase 1: Minimal Integration (Week 1-2) +1. [ ] Install kagenti Helm charts into MicroShift (operator + webhook only) +2. [ ] Move Keycloak to separate namespace +3. [ ] Deploy a test agent via kagenti (manual kubectl) +4. [ ] Verify agent card discovery via HTTP +5. [ ] Update agentstack A2A proxy to route to kagenti-managed agents + +### Phase 2: Feature Parity (Week 3-4) +6. [ ] Remove `KubernetesProviderDeploymentManager` +7. [ ] Remove `KubernetesProviderBuildManager` +8. [ ] Implement kagenti-based agent discovery in provider service +9. [ ] Update Helm chart (remove Keycloak, add kagenti-deps dependency) +10. [ ] Test full agent lifecycle: deploy → discover → chat → delete + +### Phase 3: Optional Features (Week 5+) +11. [ ] Enable Istio Ambient mode +12. [ ] Enable SPIRE/SPIFFE identity +13. [ ] Enable Shipwright builds +14. [ ] Configure feature toggles in values.yaml +15. [ ] Performance testing with multiple agents across namespaces + +--- + +## 9. Open Questions + +1. **Kagenti operator on MicroShift**: Has this been tested? Any known issues? +2. **Agent namespaces**: Should agentstack create team namespaces, or delegate to kagenti? +3. **Auth flow**: Both systems use Keycloak OAuth. Do we need token exchange between agentstack and kagenti agents, or can we share the same realm/client? +4. **UI**: Do we keep agentstack UI only, or also deploy kagenti UI for agent management? +5. **MCP Gateway**: Agentstack has managed MCP service. Kagenti also has MCP Gateway. Which wins? +6. **Provider registry**: Current agentstack syncs from Git-based registries. Does kagenti have an equivalent, or do we keep this? +7. **Observability**: Both use Phoenix. Consolidate to one instance? +8. **Helm chart publishing**: Should we publish a combined chart, or keep them separate with documentation? + +--- + +## 10. File Reference + +### Agentstack - Key Files Modified/Removed +- `infrastructure/kubernetes/provider_deployment_manager.py` - **Removed** +- `infrastructure/kubernetes/provider_build_manager.py` - **Removed** +- `bootstrap.py` - **Modified** (removed build/deploy managers) +- `service_layer/services/providers.py` - **Simplified** (removed deployment manager, returns Provider directly) +- `service_layer/services/a2a.py` - **Simplified** (no deployment state checking) +- `domain/models/provider.py` - **Simplified** (removed managed/unmanaged, added source_type) +- `configuration.py` - **Modified** (added KagentiConfiguration) +- `infrastructure/kagenti/client.py` - **New** (kagenti API client) +- `jobs/crons/provider.py` - **Rewritten** (periodic kagenti agent sync) + +### Kagenti - Key Files to Reference +- `.kagenti-temp/charts/kagenti/values.yaml` - Platform config +- `.kagenti-temp/charts/kagenti-deps/values.yaml` - Dependencies config (feature toggles) +- `.kagenti-temp/deployments/ansible/default_values.yaml` - Full default values +- `.kagenti-temp/kagenti/backend/app/routers/chat.py` - A2A implementation +- `.kagenti-temp/charts/kagenti-deps/templates/keycloak-k8s.yaml` - Keycloak deployment diff --git a/docs/stable/agent-integration/observability.mdx b/docs/stable/agent-integration/observability.mdx index c1761d9d3b..eb7aed8265 100644 --- a/docs/stable/agent-integration/observability.mdx +++ b/docs/stable/agent-integration/observability.mdx @@ -118,17 +118,20 @@ Telemetry details include: -Install and start Phoenix using the `agentstack platform start` command: +Phoenix is included by default as part of the kagenti observability stack (OTel collector + Phoenix). Simply start the platform: ```sh -agentstack platform start --set phoenix.enabled=true +agentstack platform start ``` -You can run this even if your platform is already running; it will update the configuration without losing existing data. +To disable Phoenix and the OTel collector, pass: +```sh +agentstack platform start --set kagenti-deps:components.otel.enabled=false +``` - + -Spinning up the Phoenix container can take a moment, even after the CLI reports success. Go to [http://localhost:6006](http://localhost:6006) and check if it's running. If not, please wait a few moments. +Spinning up the Phoenix container can take a moment, even after the CLI reports success. Go to [http://phoenix.localtest.me:8080](http://phoenix.localtest.me:8080) and check if it's running. If not, please wait a few moments. diff --git a/docs/stable/reference/cli-reference.mdx b/docs/stable/reference/cli-reference.mdx index 41182ac00e..a0f4618d0d 100644 --- a/docs/stable/reference/cli-reference.mdx +++ b/docs/stable/reference/cli-reference.mdx @@ -34,8 +34,7 @@ $ agentstack [OPTIONS] COMMAND [ARGS]... * `agent`: Manage agents. * `connector`: Manage connectors to external services. * `platform`: Manage Agent Stack platform. -* `client-side-build`: Build agent locally using Docker. -* `build`: Build agent from a GitHub repository in... +* `build`: Build an agent image locally and push to platform registry. * `server`: Manage Agent Stack servers and... * `self`: Manage Agent Stack installation. * `user`: Manage users. @@ -801,49 +800,25 @@ $ agentstack platform exec [OPTIONS] [COMMAND]... * `-v, --verbose`: Show verbose output * `--help`: Show this message and exit. -## `agentstack client-side-build` - -Build agent locally using Docker. [Local only] - -**Usage**: - -```console -$ agentstack client-side-build [OPTIONS] [CONTEXT] -``` - -**Arguments**: - -* `[CONTEXT]`: Docker context for the agent [default: .] - -**Options**: - -* `--dockerfile TEXT`: Use custom dockerfile path -* `--tag TEXT`: Docker tag for the agent -* `--multi-platform / --no-multi-platform`: [default: no-multi-platform] -* `--push / --no-push`: Push the image to the target registry. [default: no-push] -* `--import / --no-import`: Import the image into Agent Stack platform [default: import] -* `-v, --verbose`: Show verbose output -* `--help`: Show this message and exit. - ## `agentstack build` -Build agent from a GitHub repository in the platform. [Admin only] +Build an agent image locally and push it to the platform registry. [Local only] **Usage**: ```console -$ agentstack build [OPTIONS] GITHUB_URL +$ agentstack build [OPTIONS] [CONTEXT] ``` **Arguments**: -* `GITHUB_URL`: Github repository URL (public or private if supported by the platform instance) [required] +* `[CONTEXT]`: Docker build context (path or URL) [default: .] **Options**: -* `--dockerfile TEXT`: Use custom dockerfile path, relative to github url sub-path +* `-f, --dockerfile TEXT`: Dockerfile path +* `-t, --tag TEXT`: Image tag (default: auto-generated) * `-v, --verbose`: Show verbose output -* `-y, --yes`: Skip confirmation prompts. * `--help`: Show this message and exit. ## `agentstack server` diff --git a/helm/.claude/settings.json b/helm/.claude/settings.json new file mode 100644 index 0000000000..8b311a3fce --- /dev/null +++ b/helm/.claude/settings.json @@ -0,0 +1,5 @@ +{ + "permissions": { + "allow": [] + } +} diff --git a/helm/Chart.lock b/helm/Chart.lock index d1590e10c8..8fbee7bafd 100644 --- a/helm/Chart.lock +++ b/helm/Chart.lock @@ -1,18 +1,15 @@ dependencies: - name: common repository: oci://registry-1.docker.io/bitnamicharts - version: 2.31.4 + version: 2.36.0 - name: postgresql repository: oci://registry-1.docker.io/bitnamicharts version: 16.7.27 - name: seaweedfs repository: https://seaweedfs.github.io/seaweedfs/helm version: 4.0.407 -- name: phoenix-helm - repository: oci://registry-1.docker.io/arizephoenix - version: 4.0.32 - name: redis repository: oci://registry-1.docker.io/cloudpirates version: 0.16.3 -digest: sha256:b6bec821abce3733fcfdd919404aa1c62ffb4daee4e046dd6eece4a19bae725f -generated: "2026-01-26T14:26:19.122198+01:00" +digest: sha256:ae6e9c3a6fbf94f11b12b6dbd7d6e16c64e1c1c40cb65602ddced2f958cbea80 +generated: "2026-03-10T11:22:34.000809+01:00" diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 198c953f14..222d42228f 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -18,12 +18,7 @@ dependencies: - condition: seaweedfs.enabled name: seaweedfs repository: https://seaweedfs.github.io/seaweedfs/helm - version: 4.x.x - - condition: phoenix.enabled - name: phoenix-helm - alias: phoenix - repository: oci://registry-1.docker.io/arizephoenix - version: 4.x.x + version: 4.0.407 - condition: redis.enabled name: redis repository: oci://registry-1.docker.io/cloudpirates diff --git a/helm/templates/NOTES.txt b/helm/templates/NOTES.txt index 5740857fb4..52ce3293de 100644 --- a/helm/templates/NOTES.txt +++ b/helm/templates/NOTES.txt @@ -108,13 +108,3 @@ Upgrade platform: helm upgrade {{ .Release.Name }} oci://ghcr.io/i-am-bee/agentstack/chart/agentstack: -{{- if .Values.phoenix.enabled }} - -Important License Notice: - - When you enable Phoenix, be aware that Arize Phoenix is licensed under the Elastic License v2 (ELv2), which has - specific terms regarding commercial use and distribution. By enabling Phoenix, you acknowledge that you are - responsible for ensuring compliance with the ELv2 license terms for your specific use case. Please review the - Phoenix license (https://github.com/Arize-ai/phoenix/blob/main/LICENSE) before enabling this feature in - production environments. -{{- end }} diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl index 26a89dff7e..b993aee6a7 100644 --- a/helm/templates/_helpers.tpl +++ b/helm/templates/_helpers.tpl @@ -23,14 +23,6 @@ If release name contains chart name it will be used as a full name. {{- end }} {{- end }} -{{/* Return a safe agent name based on everything after the first "/" */}} -{{- define "agent.fullname" -}} -{{- $root := .root }} -{{- $image := .image }} - -{{- printf "agent-%s" ($image | sha256sum) | trunc 32 | trimSuffix "-" -}} -{{- end }} - {{/* Create chart name and version as used by the chart label. */}} @@ -351,72 +343,20 @@ Return the S3 secret access key inside the secret -{{/* -PATCH phoenix validatePersistence helper because the chart/image is broken and does not work with sqlalchemy -We need to set database.url otherwise migrations fail -*/}} -{{- define "phoenix.validatePersistence" -}} - {{- $persistenceEnabled := .Values.persistence.enabled | toString | eq "true" }} - {{- $postgresqlEnabled := .Values.postgresql.enabled | toString | eq "true" }} - {{- $databaseUrlConfigured := and .Values.database.url (ne .Values.database.url "") }} - {{- $isMemoryDatabase := .Values.persistence.inMemory | toString | eq "true" }} - {{- if and $isMemoryDatabase $postgresqlEnabled }} - {{- fail "ERROR: In-memory database configuration conflict!\n\nWhen using SQLite In-memory (database.url=\"sqlite:///:memory:\"), PostgreSQL must be disabled.\n\nTo fix this:\n - Set database.url=\"sqlite:///:memory:\"\n - Set postgresql.enabled=false\n\nNote: In-memory mode is for demos/testing only. All data will be lost when the pod restarts." }} - {{- end }} - {{- if and $persistenceEnabled $postgresqlEnabled (not $isMemoryDatabase) }} - {{- fail "ERROR: Invalid persistence configuration detected!\n\nYou cannot enable both 'persistence.enabled=true' and 'postgresql.enabled=true' simultaneously.\n\nThese options are mutually exclusive. Please choose ONE of the following:\n\n 1. SQLite with persistent storage:\n - Set persistence.enabled=true\n - Set postgresql.enabled=false\n - Leave database.url empty\n\n 2. Built-in PostgreSQL:\n - Set persistence.enabled=false\n - Set postgresql.enabled=true\n - Leave database.url empty\n\n 3. External database:\n - Set persistence.enabled=false\n - Set postgresql.enabled=false\n - Configure database.url with your external database connection string\n\nFor more information, see the persistence configuration comments in values.yaml" }} - {{- end }} - {{- if and $persistenceEnabled $databaseUrlConfigured (not $isMemoryDatabase) }} - {{/* We need to disable this check:*/}} - {{/* {{- fail "ERROR: Invalid SQLite configuration detected!\n\nWhen using SQLite with persistent storage (persistence.enabled=true), the 'database.url' must be empty.\n\nSQLite will automatically use the persistent volume at the working directory.\n\nTo fix this:\n - Set persistence.enabled=true\n - Set postgresql.enabled=false\n - Set database.url to empty string\n\nIf you want to use an external database instead:\n - Set persistence.enabled=false\n - Set postgresql.enabled=false\n - Configure database.url with your external database connection string" }}*/}} - {{- end }} - {{- if and $databaseUrlConfigured $postgresqlEnabled (not $isMemoryDatabase) }} - {{- fail "ERROR: Conflicting database configuration detected!\n\nYou cannot specify both 'database.url' and enable the built-in PostgreSQL (postgresql.enabled=true).\n\nTo fix this, choose ONE option:\n\n 1. Use external database:\n - Set postgresql.enabled=false\n - Keep database.url configured with your external database\n\n 2. Use built-in PostgreSQL:\n - Set postgresql.enabled=true\n - Set database.url to empty string\n\nThe database.url setting overrides PostgreSQL settings, so having both enabled creates ambiguity." }} - {{- end }} -{{- end }} {{/* -Generate imagePullSecrets including optional internal registry secret +Generate imagePullSecrets */}} {{- define "agentstack.imagePullSecrets" -}} -{{- $secrets := list -}} -{{- range .Values.imagePullSecrets -}} - {{- $secrets = append $secrets . -}} -{{- end -}} -{{- if .Values.localDockerRegistry.enabled -}} - {{- $internalSecret := dict "name" "agentstack-registry-secret" -}} - {{- $secrets = append $secrets $internalSecret -}} -{{- end -}} -{{- if $secrets -}} +{{- if .Values.imagePullSecrets -}} imagePullSecrets: -{{- range $secrets }} +{{- range .Values.imagePullSecrets }} - name: {{ .name }} {{- end -}} {{- end -}} {{- end }} -{{/* -Generate environment variables for registry docker configs -*/}} -{{- define "agentstack.registryEnvVars" -}} -{{- $secrets := list -}} -{{- range .Values.imagePullSecrets -}} - {{- $secrets = append $secrets . -}} -{{- end -}} -{{- if .Values.localDockerRegistry.enabled -}} - {{- $internalSecret := dict "name" "agentstack-registry-secret" -}} - {{- $secrets = append $secrets $internalSecret -}} -{{- end -}} -{{- range $idx, $secret := $secrets }} -- name: OCI_REGISTRY_DOCKER_CONFIG_JSON__{{ $idx }} - valueFrom: - secretKeyRef: - name: {{ $secret.name }} - key: ".dockerconfigjson" -{{- end }} -{{- end }} - {{/* *** REDIS CONFIGURATION *** */}} @@ -505,38 +445,6 @@ false {{- end -}} {{- end -}} -{{- define "agentstack.phoenix.fullname" -}} -{{- include "common.names.dependency.fullname" (dict "chartName" "phoenix" "chartValues" .Values.phoenix "context" $) -}} -{{- end -}} - -{{/* -Return if Redis is enabled -*/}} -{{- define "agentstack.phoenix.enabled" -}} -{{- or .Values.phoenix.enabled .Values.externalPhoenix.url -}} -{{- end -}} - -{{/* -Return the Phoenix URL -*/}} -{{- define "agentstack.phoenix.url" -}} -{{- if .Values.phoenix.enabled }} - {{- printf "http://%s-svc:6006" (include "agentstack.phoenix.fullname" .) -}} -{{- else -}} - {{- .Values.externalPhoenix.url -}} -{{- end -}} -{{- end -}} - -{{/* -Return the Phoenix API KEY -*/}} -{{- define "agentstack.phoenix.apiKey" -}} -{{- if .Values.phoenix.enabled }} - {{- "" -}} -{{- else -}} - {{- print .Values.externalPhoenix.apiKey -}} -{{- end -}} -{{- end -}} {{/* *** OIDC CONFIGURATION *** @@ -557,7 +465,11 @@ Return the OIDC Issuer URL {{- if .Values.keycloak.enabled -}} {{- print .Values.keycloak.publicIssuerUrl -}} {{- else -}} - {{- print .Values.externalOidcProvider.issuerUrl -}} + {{- if .Values.externalOidcProvider.publicIssuerUrl -}} + {{- print .Values.externalOidcProvider.publicIssuerUrl -}} + {{- else -}} + {{- print .Values.externalOidcProvider.issuerUrl -}} + {{- end -}} {{- end -}} {{- end -}} diff --git a/helm/templates/agent/deployment.yaml b/helm/templates/agent/deployment.yaml deleted file mode 100644 index 23e5aea11c..0000000000 --- a/helm/templates/agent/deployment.yaml +++ /dev/null @@ -1,86 +0,0 @@ -{{- $root := . }} -{{- range $idx, $p := .Values.unmanagedProviders}} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "agent.fullname" (dict "root" $root "image" $p.location) }} - labels: - app: {{ include "agent.fullname" (dict "root" $root "image" $p.location) }} - {{- include "agentstack.labels" $root | nindent 4 }} - {{- with $root.Values.agent.deploymentAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} -spec: - replicas: 1 - selector: - matchLabels: - app: {{ include "agent.fullname" (dict "root" $root "image" $p.location) }} - {{- include "agentstack.selectorLabels" $root | nindent 6 }} - template: - metadata: - annotations: - checksum/secret: {{ include (print $.Template.BasePath "/agent/secret.yaml") $root | sha256sum }} - {{- with $root.Values.agent.podAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - app: {{ include "agent.fullname" (dict "root" $root "image" $p.location) }} - {{- include "agentstack.labels" $root | nindent 8 }} - {{- with $root.Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- include "agentstack.imagePullSecrets" $root | nindent 6 }} - {{- with $root.Values.agent.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: agent-container - {{- with $root.Values.agent.livenessProbe }} - livenessProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with $root.Values.agent.startupProbe }} - startupProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with $root.Values.agent.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with $root.Values.agent.volumeMounts }} - volumeMounts: - {{- toYaml . | nindent 12 }} - {{- end }} - image: {{ $p.location }} - {{- if .Values.agent.imagePullPolicy }} - imagePullPolicy: {{ .Values.agent.imagePullPolicy }} - {{- end }} - ports: - - containerPort: {{ $root.Values.agent.service.port }} - name: http - env: - - name: HOST - value: 0.0.0.0 - - name: HOME - value: '/tmp' - envFrom: - - secretRef: - name: agent-variables - - {{- with $root.Values.agent.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $root.Values.agent.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $root.Values.agent.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/helm/templates/agent/secret.yaml b/helm/templates/agent/secret.yaml deleted file mode 100644 index 4c784047e6..0000000000 --- a/helm/templates/agent/secret.yaml +++ /dev/null @@ -1,12 +0,0 @@ -{{- if and .Values.unmanagedProviders }} -apiVersion: v1 -kind: Secret -metadata: - name: agent-variables - labels: - {{- include "agentstack.labels" . | nindent 4 }} -data: - {{- range $key, $value := .Values.variables }} - {{ $key }}: {{ $value | b64enc }} - {{- end }} -{{- end }} diff --git a/helm/templates/agent/service.yaml b/helm/templates/agent/service.yaml deleted file mode 100644 index 028a1c387b..0000000000 --- a/helm/templates/agent/service.yaml +++ /dev/null @@ -1,21 +0,0 @@ -{{- $root := . }} -{{- range $idx, $p := .Values.unmanagedProviders}} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ include "agent.fullname" (dict "root" $root "image" $p.location) }} - labels: - app: {{ include "agent.fullname" (dict "root" $root "image" $p.location) }} - {{- include "agentstack.labels" $root | nindent 4 }} -spec: - type: {{ $root.Values.agent.service.type }} - selector: - app: {{ include "agent.fullname" (dict "root" $root "image" $p.location) }} - {{- include "agentstack.selectorLabels" $root | nindent 4 }} - ports: - - port: {{ $root.Values.agent.service.port }} - targetPort: http - protocol: TCP - name: http -{{- end }} \ No newline at end of file diff --git a/helm/templates/collector/config.yaml b/helm/templates/collector/config.yaml deleted file mode 100644 index eda4315ec5..0000000000 --- a/helm/templates/collector/config.yaml +++ /dev/null @@ -1,77 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: otel-collector-cm -data: - base.yaml: | - receivers: - otlp: - protocols: - grpc: - endpoint: 0.0.0.0:4317 - max_recv_msg_size_mib: 64 - http: - endpoint: 0.0.0.0:4318 - max_request_body_size: 67108864 - - exporters: - nop: - {{- if include "agentstack.phoenix.enabled" . }} - otlphttp/phoenix: - endpoint: {{ include "agentstack.phoenix.url" . | quote }} - {{- $phoenixApiKey := include "agentstack.phoenix.apiKey" . }} - {{- if $phoenixApiKey }} - headers: - Authorization: Bearer {{ $phoenixApiKey }} - {{- end }} - {{- end }} - {{- if .Values.collector.exporters }} - {{ toYaml .Values.collector.exporters | nindent 6 }} - {{- end }} - - - processors: - memory_limiter: - check_interval: 1s - limit_mib: 1000 - batch: - send_batch_size: 1 - send_batch_max_size: 1 - filter/phoenix: - traces: - span: - ## Filter for openinference packages - #### Python format `openinference.instrumentation.${package_name}` - #### - crewAI exception `crewai.telemetry` - #### Javascript format `@arizeai/openinference-instrumentation-${packageName}` - - not(IsMatch(instrumentation_scope.name, "^openinference\\.instrumentation\\..*") or IsMatch(instrumentation_scope.name, "^@arizeai/openinference-instrumentation-.*") or instrumentation_scope.name == "crewai.telemetry") - {{- if .Values.collector.processors }} - {{ toYaml .Values.collector.processors | nindent 6 }} - {{- end }} - - extensions: - health_check: - - service: - extensions: [ health_check ] - pipelines: - {{- if include "agentstack.phoenix.enabled" . }} - traces/phoenix: - receivers: [ otlp ] - processors: [ memory_limiter, filter/phoenix, batch ] - exporters: [ otlphttp/phoenix ] - {{ else }} - traces/nop: - receivers: [ otlp ] - processors: [ ] - exporters: [ nop ] - {{- end }} - metrics/nop: - receivers: [ otlp ] - exporters: [ nop ] - logs/nop: - receivers: [ otlp ] - exporters: [ nop ] - {{- if .Values.collector.pipelines }} - {{ toYaml .Values.collector.pipelines | nindent 8 }} - {{- end }} diff --git a/helm/templates/collector/deployment.yaml b/helm/templates/collector/deployment.yaml deleted file mode 100644 index 17852b54d3..0000000000 --- a/helm/templates/collector/deployment.yaml +++ /dev/null @@ -1,71 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: otel-collector - labels: - app: otel-collector - {{- include "agentstack.labels" . | nindent 4 }} -spec: - replicas: 1 - selector: - matchLabels: - app: otel-collector - {{- include "agentstack.selectorLabels" . | nindent 6 }} - template: - metadata: - labels: - app: otel-collector - {{- include "agentstack.labels" . | nindent 8 }} - annotations: - checksum/config: {{ include (print $.Template.BasePath "/collector/config.yaml") . | sha256sum }} - spec: - {{- include "agentstack.imagePullSecrets" . | nindent 6 }} - {{- with .Values.agent.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: agent-container - {{- with .Values.collector.livenessProbe }} - livenessProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.collector.readinessProbe }} - readinessProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.collector.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - volumeMounts: - - name: otel-collector-config - mountPath: /config - readOnly: true - image: {{ .Values.collector.image }} - command: - - /otelcol-contrib - - --config - - /config/base.yaml - imagePullPolicy: IfNotPresent - ports: - - containerPort: 4317 - name: grpc - - containerPort: 4318 - name: http - volumes: - - name: otel-collector-config - configMap: - name: otel-collector-cm - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.agent.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/helm/templates/collector/service.yaml b/helm/templates/collector/service.yaml deleted file mode 100644 index 7dcbef44dc..0000000000 --- a/helm/templates/collector/service.yaml +++ /dev/null @@ -1,21 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: otel-collector-svc - labels: - app: otel-collector - {{- include "agentstack.labels" . | nindent 4 }} -spec: - type: {{ .Values.collector.service.type }} - ports: - - port: 4317 - targetPort: grpc - protocol: TCP - name: grpc - - port: 4318 - targetPort: http - protocol: TCP - name: http - selector: - app: otel-collector - {{- include "agentstack.selectorLabels" . | nindent 4 }} diff --git a/helm/templates/config/provider_templates.yaml b/helm/templates/config/provider_templates.yaml deleted file mode 100644 index 4af0cdf459..0000000000 --- a/helm/templates/config/provider_templates.yaml +++ /dev/null @@ -1,362 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: agentstack-manifest-templates-cm -data: - deployment.yaml: | - apiVersion: apps/v1 - kind: Deployment - metadata: - name: "{{`{{ provider_deployment_name }}`}}" - labels: - app: "{{`{{ provider_app_label }}`}}" - managedBy: agentstack - annotations: - {{- with .Values.agent.deploymentAnnotations }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - strategy: - type: {{ .Values.agent.deploymentStrategy }} - replicas: 1 - selector: - matchLabels: - app: "{{`{{ provider_app_label }}`}}" - template: - metadata: - labels: - app: "{{`{{ provider_app_label }}`}}" - {{- with .Values.agent.podAnnotations }} - annotations: - {{- toYaml . | nindent 12 }} - {{- end }} - spec: - {{- include "agentstack.imagePullSecrets" . | nindent 10 }} - containers: - - name: agent-container - image: "{{`{{ image }}`}}" - {{- if .Values.agent.imagePullPolicy }} - imagePullPolicy: {{ .Values.agent.imagePullPolicy }} - {{- end }} - ports: - - containerPort: 8000 - env: - - name: HOME - value: /tmp - envFrom: - - secretRef: - name: "{{`{{ provider_secret_name }}`}}" - {{- with .Values.agent.livenessProbe }} - livenessProbe: - {{- toYaml . | nindent 16 }} - {{- end }} - {{- with .Values.agent.startupProbe }} - startupProbe: - {{- toYaml . | nindent 16 }} - {{- end }} - service.yaml: | - apiVersion: v1 - kind: Service - metadata: - name: "{{`{{ provider_service_name }}`}}" - labels: - app: "{{`{{ provider_app_label }}`}}" - spec: - type: {{ .Values.agent.service.type }} - selector: - app: "{{`{{ provider_app_label }}`}}" - ports: - - port: {{ .Values.agent.service.port }} - targetPort: 8000 - protocol: TCP - name: http - - secret.yaml: | - apiVersion: v1 - kind: Secret - metadata: - name: "{{`{{ provider_secret_name }}`}}" - labels: - app: "{{`{{ provider_app_label }}`}}" - type: Opaque - data: {{`{{ secret_data }}`}} - - {{- if .Values.providerBuilds.enabled }} - build-provider-secret.yaml: | - apiVersion: v1 - kind: Secret - metadata: - name: "{{`{{ git_token_secret_name }}`}}" - labels: - app: "{{`{{ provider_build_label }}`}}" - type: Opaque - data: {{`{{ secret_data }}`}} - build-provider-job.yaml: | - apiVersion: batch/v1 - kind: Job - metadata: - name: "{{`{{ provider_build_name }}`}}" - labels: - app: "{{`{{ provider_build_label }}`}}" - managedBy: agentstack - spec: - backoffLimit: 0 - activeDeadlineSeconds: {{`{{ job_timeout_seconds }}`}} - ttlSecondsAfterFinished: 60 - template: - spec: - {{- if .Values.providerBuilds.externalClusterExecutor.serviceAccountName }} - serviceAccountName: {{ .Values.providerBuilds.externalClusterExecutor.serviceAccountName }} - {{- end }} - terminationGracePeriodSeconds: 3 - imagePullSecrets: - - name: {{.Values.providerBuilds.buildRegistry.secretName | quote}} - securityContext: - fsGroup: 1000 - restartPolicy: Never - initContainers: - - name: git-clone - image: ghcr.io/i-am-bee/alpine/git:v2.49.1 - command: [ "/bin/sh" ] - env: - - name: GIT_TOKEN - valueFrom: - secretKeyRef: - name: {{`{{ git_token_secret_name }}`}} - key: GIT_TOKEN - optional: true - args: - - -c - - | - set -eo pipefail - echo "Cloning repository..." - # Check if GitHub token is available for this host - if [ -n "$GIT_TOKEN" ]; then - echo "Using authenticated clone for {{`{{ git_host }}`}}" - else - echo "Using unauthenticated clone for {{`{{ git_host }}`}}" - fi - git clone --depth 1 \ - --revision {{`{{ git_ref }}`}} \ - "https://$GIT_TOKEN@{{`{{ git_host }}`}}/{{`{{ git_org }}`}}/{{`{{ git_repo }}`}}.git" \ - /tmp/repo - mv "/tmp/repo/{{`{{ git_path }}`}}"/* /workspace/ 2>/dev/null || true - mv "/tmp/repo/{{`{{ git_path }}`}}"/.[^.]* /workspace/ 2>/dev/null || true - echo "Repository cloned successfully" - ls -la /workspace - volumeMounts: - - name: workspace - mountPath: /workspace - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - # Build image with BuildKit - {{- if eq .Values.providerBuilds.buildBackend "buildkit" }} - - name: buildkit - image: ghcr.io/i-am-bee/moby/buildkit:v0.24.0-rootless - env: - - name: DOCKER_CONFIG - value: /docker - - name: BUILDKITD_FLAGS - value: --oci-worker-no-process-sandbox - command: - - buildctl-daemonless.sh - args: - - build - - --frontend - - dockerfile.v0 - - --local - - "dockerfile=/workspace/{{`{{ dockerfile_path }}`}}" - - --local - - context=/workspace - - --opt - - platform=linux/amd64,linux/arm64 - - --output - - type=image,name={{`{{destination}}`}},push=true{{- if .Values.providerBuilds.buildRegistry.insecure }},registry.insecure=true{{- end }} - - --export-cache - - type=inline - - --import-cache - - type=registry,ref={{`{{destination}}`}}{{- if .Values.providerBuilds.buildRegistry.insecure }},registry.insecure=true{{- end }} - securityContext: - # Needs Kubernetes >= 1.19 - seccompProfile: - type: Unconfined - # Needs Kubernetes >= 1.30 - appArmorProfile: - type: Unconfined - # To change UID/GID, you need to rebuild the image - runAsUser: 1000 - runAsGroup: 1000 - volumeMounts: - - name: workspace - mountPath: /workspace - # Dockerfile has `VOLUME /home/user/.local/share/buildkit` by default too, - # but the default VOLUME does not work with rootless on Google's Container-Optimized OS - # as it is mounted with `nosuid,nodev`. - # https://github.com/moby/buildkit/issues/879#issuecomment-1240347038 - - mountPath: /home/user/.local/share/buildkit - name: buildkitd - - name: docker-config - mountPath: /docker/config.json - subPath: .dockerconfigjson - readOnly: true - {{- else if eq .Values.providerBuilds.buildBackend "kaniko" }} - # Build image with Kaniko (no securityContext required) - - name: kaniko-build - image: ghcr.io/kaniko-build/dist/chainguard-dev-kaniko/executor:v1.25.2-slim - args: - - --context=/workspace - - "--dockerfile=/workspace/{{`{{ dockerfile_path }}`}}" - - --no-push - - --tar-path=/tmp/image.tar - volumeMounts: - - name: workspace - mountPath: /workspace - - name: image-tar - mountPath: /tmp - securityContext: - runAsUser: 0 - {{- if .Values.providerBuilds.kaniko.useSecurityContextCapabilities }} - # Capabilities don't work as expected in k3s. Instead of complex security configuration, use a simple toggle - # this can be refined later - capabilities: - drop: - - ALL - add: - - CAP_CHOWN - - CAP_SETGID - - CAP_SETUID - - CAP_FOWNER - - CAP_DAC_OVERRIDE - {{- end }} - # Main container: Step 3 Push the intermediary image - - name: crane-push - image: ghcr.io/i-am-bee/alpine/crane:0.20.6 - env: - - name: DOCKER_CONFIG - value: /tmp/.docker - args: - - push - - /tmp/image.tar - - {{`{{ destination }}`}} - {{- if .Values.providerBuilds.buildRegistry.insecure }} - - --insecure - {{- end }} - volumeMounts: - - name: image-tar - mountPath: /tmp - - name: docker-config - mountPath: /tmp/.docker/config.json - subPath: .dockerconfigjson - readOnly: true - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - resources: - requests: - memory: "256Mi" - cpu: "200m" - limits: - memory: "512Mi" - cpu: "500m" - {{- end }} - - name: run-agent - image: "{{`{{ destination }}`}}" - restartPolicy: Always # This makes it a daemon sidecar container - env: - - name: PORT - value: "8000" - - name: HOST - value: "0.0.0.0" - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - resources: - requests: - memory: "512Mi" - cpu: "500m" - limits: - memory: "1Gi" - cpu: "1" - containers: - # Main container: Step 3 - Mutate and push final image - - name: mutate - image: ghcr.io/i-am-bee/alpine/crane:0.20.6 - env: - - name: DOCKER_CONFIG - value: /tmp/.docker - command: - - /bin/sh - - -c - - | - set -eo pipefail - # Extract agent manifest - for i in $(seq 1 30); do - if nc -z 127.0.0.1 8000; then - break - fi - echo "Waiting for server to start... (attempt $i/10)" - sleep 1 - done - - AGENT_CARD_CONTENT=$(wget -O - http://127.0.0.1:8000/.well-known/agent-card.json) - if [ $? -eq 0 ] && [ -n "$AGENT_CARD_CONTENT" ]; then - echo "Successfully extracted agent-card.json" - echo "Content preview:" - echo "$AGENT_CARD_CONTENT" | head -n 10 - else - echo "Failed to extract agent-card.json" - exit 1 - fi - - echo "Starting image mutation..." - echo "Extraction complete!" - - LABEL_CONTENT=$(echo -n "$AGENT_CARD_CONTENT" | base64 -w 0) - echo "Label content size: $(echo -n $LABEL_CONTENT | wc -c) bytes" - crane mutate {{- if .Values.providerBuilds.buildRegistry.insecure }} --insecure{{- end }} {{`{{ destination }}`}} \ - --label "beeai.dev.agent.json=${LABEL_CONTENT}" - - volumeMounts: - - name: docker-config - mountPath: /tmp/.docker/config.json - subPath: .dockerconfigjson - readOnly: true - securityContext: - capabilities: - drop: - - ALL - runAsUser: 1000 - runAsGroup: 1000 - resources: - requests: - memory: "256Mi" - cpu: "200m" - limits: - memory: "512Mi" - cpu: "500m" - - volumes: - - name: workspace - emptyDir: { } - {{- if eq .Values.providerBuilds.buildBackend "buildkit" }} - - name: buildkitd - emptyDir: { } - {{- else if eq .Values.providerBuilds.buildBackend "kaniko" }} - - name: image-tar - emptyDir: { } - {{- end }} - - name: docker-config - secret: - secretName: {{ .Values.providerBuilds.buildRegistry.secretName }} - {{- end }} - diff --git a/helm/templates/config/providers.yaml b/helm/templates/config/providers.yaml index b56e3950c7..880acca61f 100644 --- a/helm/templates/config/providers.yaml +++ b/helm/templates/config/providers.yaml @@ -5,18 +5,6 @@ metadata: name: agentstack-providers-secret type: Opaque stringData: - providers_registry.yaml: | - {{- if or .Values.providers .Values.unmanagedProviders }} - providers: - {{- range $idx, $p := $root.Values.unmanagedProviders }} - - location: "http://{{ include "agent.fullname" (dict "root" $root "image" $p.location) }}:{{ $root.Values.agent.service.port }}" - {{- end }} - {{- range $idx, $p := $root.Values.providers }} - - {{ $p | toYaml | nindent 8 | trim }} - {{- end }} - {{- else }} - providers: [ ] - {{- end }} model_providers_registry.yaml: | {{- if .Values.modelProviders }} providers: diff --git a/helm/templates/config/secret.yaml b/helm/templates/config/secret.yaml index ef255a2f29..b29b4a022e 100644 --- a/helm/templates/config/secret.yaml +++ b/helm/templates/config/secret.yaml @@ -52,9 +52,6 @@ data: {{- if .Values.github.auths }} .githubconfigjson: {{ .Values.github | toJson | b64enc | quote }} {{- end }} - {{- if .Values.providerBuilds.externalClusterExecutor.kubeconfig }} - providerBuildExternalClusterKubeconfig: {{ .Values.providerBuilds.externalClusterExecutor.kubeconfig | b64enc | quote }} - {{- end }} {{- if and (include "agentstack.databaseSslEnabled" .) .Values.externalDatabase.sslRootCert }} sslRootCert: {{ .Values.externalDatabase.sslRootCert | b64enc | quote }} {{- end }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 1d5a1d2c66..6c1792327c 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -15,7 +15,6 @@ spec: metadata: annotations: checksum/providers: {{ include (print $.Template.BasePath "/config/providers.yaml") . | sha256sum }} - checksum/templates: {{ include (print $.Template.BasePath "/config/provider_templates.yaml") . | sha256sum }} checksum/secret: {{ include (print $.Template.BasePath "/config/secret.yaml") . | sha256sum }} {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} @@ -199,25 +198,6 @@ spec: value: {{ .Values.contextResourcesExpireAfterDays | quote }} - name: TEXT_EXTRACTION__ENABLED value: {{ .Values.docling.enabled | quote }} - - name: AGENT_REGISTRY__SYNC_PERIOD_CRON - value: {{ .Values.agentRegistrySyncPeriodCron | quote }} - {{- include "agentstack.registryEnvVars" . | nindent 12 }} - - name: FEATURES__PROVIDER_BUILDS - value: {{ .Values.providerBuilds.enabled | quote }} - {{- if .Values.providerBuilds.enabled }} - - name: PROVIDER_BUILD__OCI_BUILD_REGISTRY_PREFIX - value: {{ .Values.providerBuilds.buildRegistry.registryPrefix | quote }} - - name: PROVIDER_BUILD__IMAGE_FORMAT - value: {{ .Values.providerBuilds.buildRegistry.imageFormat | quote }} - {{- if .Values.providerBuilds.externalClusterExecutor.namespace }} - - name: PROVIDER_BUILD__K8S_NAMESPACE - value: {{ .Values.providerBuilds.externalClusterExecutor.namespace | quote }} - {{- end }} - {{- if .Values.providerBuilds.externalClusterExecutor.kubeconfig }} - - name: PROVIDER_BUILD__K8S_KUBECONFIG - value: /app/external_build_kubeconfig - {{- end }} - {{- end }} {{- if .Values.github.auths }} - name: GITHUB_REGISTRY_CONFIG_JSON valueFrom: @@ -225,8 +205,6 @@ spec: name: agentstack-secret key: .githubconfigjson {{- end }} - - name: AGENT_REGISTRY__LOCATIONS__FILE - value: file:///app/config/providers_registry.yaml - name: MODEL_PROVIDER_REGISTRY__LOCATIONS__FILE value: file:///app/config/model_providers_registry.yaml {{- if .Values.defaultLLMModel }} @@ -237,10 +215,6 @@ spec: - name: MODEL_PROVIDER__DEFAULT_EMBEDDING_MODEL value: {{ .Values.defaultEmbeddingModel | quote }} {{- end }} - {{- range $name, $location := .Values.externalRegistries }} - - name: AGENT_REGISTRY__LOCATIONS__{{ $name | upper }} - value: {{ $location }} - {{- end }} - name: PERSISTENCE__ENCRYPTION_KEY valueFrom: secretKeyRef: @@ -299,9 +273,26 @@ spec: - name: AUTH__DISABLE_AUTH value: "true" {{- end }} - {{- if .Values.disableProviderDownscaling }} - - name: PROVIDER__DISABLE_DOWNSCALING + # Kagenti Configuration + {{- if .Values.kagenti.enabled }} + - name: KAGENTI__ENABLED value: "true" + - name: KAGENTI__API_URL + value: {{ .Values.kagenti.apiUrl | quote }} + - name: KAGENTI__AUTH_TOKEN_URL + value: {{ include "agentstack.oidc.internalIssuerUrl" . | trimSuffix "/" }}/protocol/openid-connect/token + - name: KAGENTI__CLIENT_ID + value: {{ include "agentstack.oidc.serverClientId" . | quote }} + - name: KAGENTI__CLIENT_SECRET + valueFrom: + secretKeyRef: + key: {{ include "agentstack.oidc.serverClientSecretKey" . }} + name: {{ include "agentstack.oidc.serverClientSecretName" . }} + - name: KAGENTI__NAMESPACES + value: {{ .Values.kagenti.namespaces | toJson | quote }} + {{- else }} + - name: KAGENTI__ENABLED + value: "false" {{- end }} - name: GENERATE_CONVERSATION_TITLE__ENABLED value: {{ .Values.generateConversationTitle.enabled | quote }} @@ -309,8 +300,6 @@ spec: value: {{ .Values.generateConversationTitle.model | quote }} - name: GENERATE_CONVERSATION_TITLE__PROMPT value: {{ .Values.generateConversationTitle.prompt | quote }} - - name: PROVIDER__MANIFEST_TEMPLATE_DIR - value: /app/templates # Object Storage Configuration - name: OBJECT_STORAGE__ENDPOINT_URL value: {{ include "agentstack.s3.endpoint" . | quote }} @@ -367,23 +356,12 @@ spec: name: agentstack-connectors-secret - name: UVICORN_TIMEOUT_KEEP_ALIVE value: {{ .Values.uvicornTimeoutKeepAlive | quote }} - {{- if include "agentstack.phoenix.enabled" . }} + - name: TELEMETRY__COLLECTOR_URL + value: "http://otel-collector.kagenti-system:8335" - name: TELEMETRY__PHOENIX_URL - value: {{ include "agentstack.phoenix.url" . | quote }} - {{- $phoenixApiKey := include "agentstack.phoenix.apiKey" . }} - {{- if $phoenixApiKey }} - - name: TELEMETRY__PHOENIX_API_KEY - value: {{ $phoenixApiKey | quote }} - {{- end }} - {{- end }} + value: "http://phoenix.kagenti-system:6006" volumeMounts: - {{- if .Values.providerBuilds.externalClusterExecutor.kubeconfig }} - - name: provider-build-external-kubeconfig - mountPath: /app/external_build_kubeconfig - subPath: providerBuildExternalClusterKubeconfig - readOnly: true - {{- end }} {{- if and (include "agentstack.databaseSslEnabled" .) .Values.externalDatabase.sslRootCert }} - name: db-ssl-cert mountPath: /app/db.crt @@ -393,18 +371,10 @@ spec: - name: agentstack-providers mountPath: /app/config readOnly: true - - name: agentstack-manifest-templates - mountPath: /app/templates - readOnly: true {{- with .Values.volumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} volumes: - {{- if .Values.providerBuilds.externalClusterExecutor.kubeconfig }} - - name: provider-build-external-kubeconfig - secret: - secretName: agentstack-secret - {{- end }} {{- if and (include "agentstack.databaseSslEnabled" .) .Values.externalDatabase.sslRootCert }} - name: db-ssl-cert secret: @@ -413,9 +383,6 @@ spec: - name: agentstack-providers secret: secretName: agentstack-providers-secret - - name: agentstack-manifest-templates - configMap: - name: agentstack-manifest-templates-cm {{- with .Values.volumes }} {{- toYaml . | nindent 8 }} {{- end }} diff --git a/helm/templates/keycloak/provision-job.yaml b/helm/templates/keycloak/provision-job.yaml index d398210293..687c39a6c9 100644 --- a/helm/templates/keycloak/provision-job.yaml +++ b/helm/templates/keycloak/provision-job.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.auth.enabled .Values.keycloak.enabled }} +{{- if and .Values.auth.enabled .Values.auth.provisionKeycloak }} apiVersion: batch/v1 kind: Job metadata: @@ -29,7 +29,7 @@ spec: - | # Wait for Keycloak to be ready by attempting login echo "Waiting for Keycloak to be ready..." - until /opt/keycloak/bin/kcadm.sh config credentials --server {{ regexReplaceAll "/realms/.*" .Values.keycloak.internalIssuerUrl "" | quote }} --realm master --user $KC_ADMIN_USER --password $KC_ADMIN_PASSWORD {{ if not .Values.keycloak.httpEnabled }}--no-config --truststore /opt/keycloak/conf/server.crt.pem{{ end }}; do + until /opt/keycloak/bin/kcadm.sh config credentials --server {{ regexReplaceAll "/realms/.*" (include "agentstack.oidc.internalIssuerUrl" .) "" | quote }} --realm master --user $KC_ADMIN_USER --password $KC_ADMIN_PASSWORD {{ if and .Values.keycloak.enabled (not .Values.keycloak.httpEnabled) }}--no-config --truststore /opt/keycloak/conf/server.crt.pem{{ end }}; do echo "Keycloak not ready or login failed... retrying in 5s" sleep 5 done @@ -57,15 +57,28 @@ spec: /opt/keycloak/bin/kcadm.sh create roles -r agentstack -s name=agentstack-developer fi + # Create kagenti RBAC roles (idempotent — needed for single-realm setup) + echo "Creating kagenti RBAC roles..." + for KAGENTI_ROLE in kagenti-viewer kagenti-operator kagenti-admin; do + if ! /opt/keycloak/bin/kcadm.sh get roles/$KAGENTI_ROLE -r agentstack > /dev/null 2>&1; then + /opt/keycloak/bin/kcadm.sh create roles -r agentstack -s name=$KAGENTI_ROLE -s "description=Kagenti RBAC role: $KAGENTI_ROLE" + fi + done + # Create/Update Server Client echo "Configuring Server Client..." CID=$(/opt/keycloak/bin/kcadm.sh get clients -r agentstack -q clientId=agentstack-server --fields id --format csv --noquotes) if [ -z "$CID" ]; then /opt/keycloak/bin/kcadm.sh create clients -r agentstack -s clientId=agentstack-server -s enabled=true -s clientAuthenticatorType=client-secret -s secret=$SERVER_CLIENT_SECRET -s serviceAccountsEnabled=true -s directAccessGrantsEnabled=true -s standardFlowEnabled=false + CID=$(/opt/keycloak/bin/kcadm.sh get clients -r agentstack -q clientId=agentstack-server --fields id --format csv --noquotes) else /opt/keycloak/bin/kcadm.sh update clients/$CID -r agentstack -s secret=$SERVER_CLIENT_SECRET -s enabled=true fi + # Assign kagenti-viewer role to agentstack-server service account + echo "Assigning kagenti-viewer to agentstack-server service account..." + /opt/keycloak/bin/kcadm.sh add-roles -r agentstack --uusername service-account-agentstack-server --rolename kagenti-viewer + # Create/Update UI Client {{- if and .Values.ui.enabled }} echo "Configuring UI Client..." @@ -85,7 +98,7 @@ spec: if [ -z "$CID" ]; then /opt/keycloak/bin/kcadm.sh create clients -r agentstack -s clientId=agentstack-cli -s enabled=true -s publicClient=true -s standardFlowEnabled=true -s directAccessGrantsEnabled=true -s "redirectUris=[\"http://localhost:9001/callback\"]" -s "webOrigins=[\"+\"]" else - /opt/keycloak/bin/kcadm.sh update clients/$CID -r agentstack -s enabled=true -s publicClient=true -s "redirectUris=[\"http://localhost:9001/callback\"]" + /opt/keycloak/bin/kcadm.sh update clients/$CID -r agentstack -s enabled=true -s publicClient=true -s directAccessGrantsEnabled=true -s "redirectUris=[\"http://localhost:9001/callback\"]" fi declare -a AUDIENCES=( diff --git a/helm/templates/keycloak/secret.yaml b/helm/templates/keycloak/secret.yaml index 4dc3e748c9..3d7f8bdd49 100644 --- a/helm/templates/keycloak/secret.yaml +++ b/helm/templates/keycloak/secret.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.auth.enabled .Values.keycloak.enabled }} +{{- if .Values.auth.enabled }} apiVersion: v1 kind: Secret metadata: @@ -20,6 +20,7 @@ stringData: {{- end }} {{- end }} admin-password: {{ $adminPassword | quote }} + {{- if .Values.keycloak.enabled }} {{- if .Values.keycloak.persistence.useDedicatedDatabase }} # dedicated database db-password: {{ .Values.keycloak.persistence.dedicatedDatabaseConfig.password | quote }} @@ -27,4 +28,5 @@ stringData: # shared database db-password: {{ include "agentstack.databasePassword" . | quote }} {{- end }} + {{- end }} {{- end }} diff --git a/helm/templates/registry/deployment.yaml b/helm/templates/registry/deployment.yaml deleted file mode 100644 index b0525e1e9c..0000000000 --- a/helm/templates/registry/deployment.yaml +++ /dev/null @@ -1,35 +0,0 @@ -{{- if .Values.localDockerRegistry.enabled }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: agentstack-registry - labels: - app: agentstack-registry - {{- include "agentstack.labels" . | nindent 4 }} -spec: - replicas: 1 - selector: - matchLabels: - app: agentstack-registry - {{- include "agentstack.selectorLabels" . | nindent 6 }} - template: - metadata: - labels: - app: agentstack-registry - {{- include "agentstack.labels" . | nindent 8 }} - spec: - containers: - - name: registry - image: ghcr.io/i-am-bee/library/registry:3 - ports: - - containerPort: 5000 - protocol: TCP - volumeMounts: - - name: registry-data - mountPath: /var/lib/registry - volumes: - - name: registry-data - hostPath: - path: /registry-data - type: Directory -{{- end }} \ No newline at end of file diff --git a/helm/templates/registry/secret.yaml b/helm/templates/registry/secret.yaml deleted file mode 100644 index e3a64d8a48..0000000000 --- a/helm/templates/registry/secret.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- if .Values.localDockerRegistry.enabled }} -apiVersion: v1 -kind: Secret -metadata: - name: agentstack-registry-secret - labels: - app: agentstack-registry - {{- include "agentstack.labels" . | nindent 4 }} -type: kubernetes.io/dockerconfigjson -data: - .dockerconfigjson: {{- $registryConfig := dict "auths" (dict - (printf "agentstack-registry-svc:%d" (.Values.localDockerRegistry.service.port | int)) (dict "auth" "" "username" "" "password" "" "insecure" true) - (printf "agentstack-registry-svc.%s:%d" .Release.Namespace (.Values.localDockerRegistry.service.port | int)) (dict "auth" "" "username" "" "password" "" "insecure" true) - ) }} - {{ $registryConfig | toJson | b64enc }} -{{- end }} \ No newline at end of file diff --git a/helm/templates/registry/service.yaml b/helm/templates/registry/service.yaml deleted file mode 100644 index 02cb7853b8..0000000000 --- a/helm/templates/registry/service.yaml +++ /dev/null @@ -1,20 +0,0 @@ -{{- if .Values.localDockerRegistry.enabled }} -apiVersion: v1 -kind: Service -metadata: - name: agentstack-registry-svc - labels: - app: agentstack-registry - {{- include "agentstack.labels" . | nindent 4 }} -spec: - selector: - app: agentstack-registry - {{- include "agentstack.selectorLabels" . | nindent 4 }} - ports: - - port: {{ .Values.localDockerRegistry.service.port }} - targetPort: 5000 - {{- if (eq .Values.localDockerRegistry.service.type "NodePort") }} - nodePort: {{ .Values.localDockerRegistry.service.nodePort }} - {{- end }} - type: {{ .Values.localDockerRegistry.service.type }} -{{- end }} \ No newline at end of file diff --git a/helm/templates/ui/deployment.yaml b/helm/templates/ui/deployment.yaml index 3390f43f88..3d45dede00 100644 --- a/helm/templates/ui/deployment.yaml +++ b/helm/templates/ui/deployment.yaml @@ -65,7 +65,6 @@ spec: { "LocalSetup": {{ .Values.features.uiLocalSetup }}, "Connectors": {{ .Values.features.uiConnectors }}, - "ProviderBuilds": {{ .Values.providerBuilds.enabled }}, "Providers": {{ .Values.features.uiProviders }}, "Variables": {{ .Values.features.uiVariables }} } diff --git a/helm/values.yaml b/helm/values.yaml index 8c6f814954..6143af9da2 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -90,18 +90,6 @@ contextTokenPermissions: context_data: ["*"] -# -------- AGENTS ---------- -providers: [] -# Example: -# - location: ghcr.io/i-am-bee/agentstack/agent-1-example:0.1.0 -# auto_stop_timeout_sec: 0 # disable agent downscaling -# variables: -# # Variables should be strings (or they will be converted) -# MY_API_KEY: "sk-..." -# MY_CONFIG_VAR: "42" -# - location: ghcr.io/i-am-bee/agentstack/agent-2-example:0.1.0 -# auto_stop_timeout_sec: 600 # downscale after 10 minutes - defaultLLMModel: "" defaultEmbeddingModel: "" modelProviders: [] @@ -111,19 +99,12 @@ modelProviders: [] # base_url: "https://api.openai.com/v1" # api_key: "sk-..." -# DEPRECATED: Unmanaged providers -unmanagedProviders: [] # DEPRECATED: use providers instead -variables: {} # DEPRECATED: use server API to manage variables instead - -disableProviderDownscaling: false - -# External registries in the format: [name: githubURL] -# for example -# github: "https://github.com/i-am-bee/agentstack@v0.2.14#path=agent-registry.yaml" -externalRegistries: {} - -# Cron schedule for syncing external registries -agentRegistrySyncPeriodCron: "*/30 * * * * *" +# Kagenti integration - sync agents from kagenti API as providers +kagenti: + enabled: true + apiUrl: "http://kagenti-backend.kagenti-system.svc.cluster.local:8000" + namespaces: + - team1 # ------- SECURITY --------- @@ -142,9 +123,12 @@ auth: nextauthSecret: "" # If empty, a random string will be generated # These are important for audience validation, make sure to change them to your public URLs - nextauthUrl: "http://localhost:8334" + nextauthUrl: "http://agentstack.localtest.me:8080" nextauthDevUrl: "" - apiUrl: "http://localhost:8333" + apiUrl: "http://agentstack-api.localtest.me:8080" + + # Set to false to skip the Keycloak provisioning job (e.g. when using a non-Keycloak OIDC provider) + provisionKeycloak: true # In agentstack-server this is used to determine the request origin and dynamically create agent card. # For example https://agentstack.example.com/api/v1/providers/{provider_id} will return an agent cart with URL for @@ -247,6 +231,7 @@ keycloak: ## externalOidcProvider: issuerUrl: "" + publicIssuerUrl: "" # If empty, defaults to issuerUrl. Set when internal and public URLs differ (e.g., external Keycloak). name: "OIDC" id: "oidc" rolesPath: "realm_access.roles" # JSON path to roles in JWT claims (can be an array or a semicolon separated string) @@ -293,28 +278,6 @@ github: # This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] -providerBuilds: - enabled: false - buildBackend: "kaniko" # Options: "buildkit" or "kaniko" - buildRegistry: - registryPrefix: "agentstack-registry-svc.default:5001" # This must include a dot - imageFormat: "{registry_prefix}/{org}/{repo}/{path}{dockerfile_path}:{commit_hash}" - secretName: "agentstack-registry-secret" - insecure: true - kaniko: - useSecurityContextCapabilities: false - externalClusterExecutor: - serviceAccountName: "" - namespace: "" # Kubernetes namespace for provider builds (defaults to current namespace if empty) - kubeconfig: "" # Kubeconfig YAML content for external cluster (optional) - # Example: - # kubeconfig: | - # apiVersion: v1 - # kind: Config - # clusters: - # - cluster: - # server: https://kubernetes.example.com - # ... # -------- SERVER ---------- # This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ @@ -498,96 +461,9 @@ ui: livenessProbe: {} readinessProbe: {} -# --------- AGENT ---------- -agent: - podAnnotations: {} - deploymentStrategy: RollingUpdate - deploymentAnnotations: {} - imagePullPolicy: IfNotPresent - podLabels: {} - resources: {} - securityContext: {} - podSecurityContext: {} - service: - type: ClusterIP - port: 8000 - startupProbe: - httpGet: - path: /.well-known/agent-card.json - port: 8000 - initialDelaySeconds: 1 - periodSeconds: 3 - timeoutSeconds: 2 - failureThreshold: 20 - livenessProbe: - httpGet: - path: /.well-known/agent-card.json - port: 8000 - periodSeconds: 10 - timeoutSeconds: 2 - failureThreshold: 2 - volumes: [] - volumeMounts: [] - nodeSelector: {} - tolerations: [] - affinity: {} - # ----- INFRASTRUCTURE ------ -# INSECURE: Only use with local deployments -localDockerRegistry: - enabled: false - service: - type: NodePort - port: 5001 - nodePort: 30501 - -phoenix: - # **Important License Notice**: Phoenix is disabled by default in Agent Stack. When you enable Phoenix, - # be aware that Arize Phoenix is licensed under the Elastic License v2 (ELv2), which has specific terms regarding - # commercial use and distribution.By enabling Phoenix, you acknowledge that you are responsible for ensuring - # compliance with the ELv2 license terms for your specific use case. Please review the - # [Phoenix license](https://github.com/Arize-ai/phoenix/blob/main/LICENSE) before enabling this feature in - # production environments. - enabled: false - fullnameOverride: "phoenix" - ingress: - enabled: false - image: - registry: "ghcr.io" - repository: "i-am-bee/arizephoenix/phoenix" - securityContext: - container: - enabled: true - readOnlyRootFilesystem: true - server: - workingDir: /tmp/working_dir - grpcPort: 8335 - database: - url: "sqlite:////home/phoenix/phoenix.db" - defaultRetentionPolicyDays: 7 - persistence: - enabled: true - size: 8Gi - postgresql: - enabled: false - auth: - enableAuth: false -externalPhoenix: - url: "" - apiKey: "" - -collector: - image: ghcr.io/i-am-bee/otel/opentelemetry-collector-contrib:0.122.1@sha256:0076e6c250bef16968e29e8f94177b87b5a851c79ce14b270e657f5f655b9e04 - livenessProbe: {} - readinessProbe: {} - resources: {} - exporters: {} - processors: {} - pipelines: {} - service: - type: ClusterIP ## PostgreSQL chart configuration ## ref: https://github.com/bitnami/charts/blob/main/bitnami/postgresql/values.yaml @@ -619,7 +495,7 @@ postgresql: # TODO: bug - broken update password script: https://github.com/bitnami/charts/pull/34527 usePasswordFiles: false passwordUpdateJob: - enabled: true + enabled: false architecture: standalone primary: ## PostgreSQL Primary resource requests and limits diff --git a/mise.lock b/mise.lock index 3a8afb8931..036edb0580 100644 --- a/mise.lock +++ b/mise.lock @@ -71,45 +71,6 @@ checksum = "sha256:fe4f4a54daa4a750ffa7d1b0da471d077ac24f09d04d7c1f307e7b7950969 url = "https://github.com/google/addlicense/releases/download/v1.2.0/addlicense_v1.2.0_Windows_x86_64.zip" url_api = "https://api.github.com/repos/google/addlicense/releases/assets/282300418" -[[tools."github:telepresenceio/telepresence"]] -version = "2.25.2" -backend = "github:telepresenceio/telepresence" - -[tools."github:telepresenceio/telepresence"."platforms.linux-arm64"] -checksum = "sha256:d8f4b3cf7683d5794e9b02b4bfd30583e15c8d46f45d5c3309ab6b60f8e387b2" -url = "https://github.com/telepresenceio/telepresence/releases/download/v2.25.2/telepresence-linux-arm64" -url_api = "https://api.github.com/repos/telepresenceio/telepresence/releases/assets/333205819" - -[tools."github:telepresenceio/telepresence"."platforms.linux-arm64-musl"] -checksum = "sha256:d8f4b3cf7683d5794e9b02b4bfd30583e15c8d46f45d5c3309ab6b60f8e387b2" -url = "https://github.com/telepresenceio/telepresence/releases/download/v2.25.2/telepresence-linux-arm64" -url_api = "https://api.github.com/repos/telepresenceio/telepresence/releases/assets/333205819" - -[tools."github:telepresenceio/telepresence"."platforms.linux-x64"] -checksum = "sha256:1ce2f30c203beb5bb92035dd8e29212080101bfb8956f4856dd52745aa7c0130" -url = "https://github.com/telepresenceio/telepresence/releases/download/v2.25.2/telepresence-linux-amd64" -url_api = "https://api.github.com/repos/telepresenceio/telepresence/releases/assets/333205838" - -[tools."github:telepresenceio/telepresence"."platforms.linux-x64-musl"] -checksum = "sha256:1ce2f30c203beb5bb92035dd8e29212080101bfb8956f4856dd52745aa7c0130" -url = "https://github.com/telepresenceio/telepresence/releases/download/v2.25.2/telepresence-linux-amd64" -url_api = "https://api.github.com/repos/telepresenceio/telepresence/releases/assets/333205838" - -[tools."github:telepresenceio/telepresence"."platforms.macos-arm64"] -checksum = "sha256:79aa444eb9ef05ff64c0e6c04e97bef1460f3ae4800c191798996bdad71c8ec9" -url = "https://github.com/telepresenceio/telepresence/releases/download/v2.25.2/telepresence-darwin-arm64" -url_api = "https://api.github.com/repos/telepresenceio/telepresence/releases/assets/333205851" - -[tools."github:telepresenceio/telepresence"."platforms.macos-x64"] -checksum = "sha256:131d28a44b3c4fb2a2b04339ac7400314da530e5663e7f0e841d12e76f9ba616" -url = "https://github.com/telepresenceio/telepresence/releases/download/v2.25.2/telepresence-darwin-amd64" -url_api = "https://api.github.com/repos/telepresenceio/telepresence/releases/assets/333205861" - -[tools."github:telepresenceio/telepresence"."platforms.windows-x64"] -checksum = "sha256:66d9987fd310492d1fd5451ef945d8f080fe1ec25a049b5e1584f26e45db0d31" -url = "https://github.com/telepresenceio/telepresence/releases/download/v2.25.2/telepresence-windows-amd64.zip" -url_api = "https://api.github.com/repos/telepresenceio/telepresence/releases/assets/333205798" - [[tools.gum]] version = "0.17.0" backend = "aqua:charmbracelet/gum" @@ -277,6 +238,7 @@ url = "https://github.com/lima-vm/lima/releases/download/v2.0.3/lima-2.0.3-Linux [tools.lima."platforms.macos-arm64"] checksum = "sha256:22aee997df59e4fd448041b2d1214e48bd8eaf705d2d48a4307d65c1b179dc97" url = "https://github.com/lima-vm/lima/releases/download/v2.0.3/lima-2.0.3-Darwin-arm64.tar.gz" +provenance = "github-attestations" [tools.lima."platforms.macos-x64"] checksum = "sha256:0806bcb83a08411e9d878b43b2c4203f1556fe14f9f8ba1e5f0d5d9a3c2c0bd8" @@ -422,6 +384,38 @@ url = "https://github.com/pnpm/pnpm/releases/download/v10.15.1/pnpm-linux-x64" checksum = "sha256:b64bd9e75d92934251bd0b890d3d4d1073e85d71bf39227ff6a6408d27c3106d" url = "https://github.com/pnpm/pnpm/releases/download/v10.15.1/pnpm-macos-arm64" +[[tools.telepresence]] +version = "2.27.1" +backend = "aqua:telepresenceio/telepresence" + +[tools.telepresence."platforms.linux-arm64"] +checksum = "sha256:2d02fc876dcebc09128762169ac924ab5a48bcd9dd51da3148cab6b8dbe19fe3" +url = "https://github.com/telepresenceio/telepresence/releases/download/v2.27.1/telepresence-linux-arm64" + +[tools.telepresence."platforms.linux-arm64-musl"] +checksum = "sha256:2d02fc876dcebc09128762169ac924ab5a48bcd9dd51da3148cab6b8dbe19fe3" +url = "https://github.com/telepresenceio/telepresence/releases/download/v2.27.1/telepresence-linux-arm64" + +[tools.telepresence."platforms.linux-x64"] +checksum = "sha256:80dc881cd9618f6916407785cb3e61afc3d88901239cf4377d200d175db052e3" +url = "https://github.com/telepresenceio/telepresence/releases/download/v2.27.1/telepresence-linux-amd64" + +[tools.telepresence."platforms.linux-x64-musl"] +checksum = "sha256:80dc881cd9618f6916407785cb3e61afc3d88901239cf4377d200d175db052e3" +url = "https://github.com/telepresenceio/telepresence/releases/download/v2.27.1/telepresence-linux-amd64" + +[tools.telepresence."platforms.macos-arm64"] +checksum = "sha256:99299c7335df403204d6944f6ae4fa9066945aa4e7a3882c07c4545e8cc8e71d" +url = "https://github.com/telepresenceio/telepresence/releases/download/v2.27.1/telepresence-darwin-arm64" + +[tools.telepresence."platforms.macos-x64"] +checksum = "sha256:6f85060fa7979ed354495981251325b90fd6579cf93868dc25d8afb522fcd511" +url = "https://github.com/telepresenceio/telepresence/releases/download/v2.27.1/telepresence-darwin-amd64" + +[tools.telepresence."platforms.windows-x64"] +checksum = "sha256:aa56500985d3ac77e0e782a3fb427d03eb191590b9f361c89d9d6015321eaf70" +url = "https://github.com/telepresenceio/telepresence/releases/download/v2.27.1/telepresence-windows-amd64.zip" + [[tools.uv]] version = "0.10.4" backend = "aqua:astral-sh/uv" @@ -445,6 +439,7 @@ url = "https://github.com/astral-sh/uv/releases/download/0.10.4/uv-x86_64-unknow [tools.uv."platforms.macos-arm64"] checksum = "sha256:a6852e4dc565c8fedcf5adcdf09fca7caf5347739bed512bd95b15dada36db51" url = "https://github.com/astral-sh/uv/releases/download/0.10.4/uv-aarch64-apple-darwin.tar.gz" +provenance = "github-attestations" [tools.uv."platforms.macos-x64"] checksum = "sha256:df6dd1c3ebeab4369a098c516c15c233c62bf789a40a4864b30dad1d38d7604e" diff --git a/mise.toml b/mise.toml index 92efe50eda..b7293bbf4a 100644 --- a/mise.toml +++ b/mise.toml @@ -26,7 +26,7 @@ fd = "latest" gum = "latest" hadolint = "latest" lychee = "latest" -"github:telepresenceio/telepresence" = "latest" +telepresence = "latest" [settings] experimental = true # for task_templates and python.uv_venv_auto diff --git a/tasks.toml b/tasks.toml index 12bd3408d1..006db9157e 100644 --- a/tasks.toml +++ b/tasks.toml @@ -146,15 +146,9 @@ run = """ #!/bin/bash set -euxo pipefail -if [[ "$*" != *"--set externalRegistry="* ]]; then - latest_tag=$(git ls-remote --tags $(git remote get-url origin) 'v*' | grep -o 'refs/tags/v[0-9]*\\.[0-9]*\\.[0-9]*$' | sed 's|refs/tags/||' | sort -V | tail -n 1) - export AGENTSTACK__AGENT_REGISTRY="https://github.com/i-am-bee/agentstack@$latest_tag#path=agent-registry.yaml" - echo "Using agents from: $AGENTSTACK__AGENT_REGISTRY\n use --set externalRegistry= to override this." -fi - if [[ ! "$*" =~ 'ui.enabled=false' ]]; then {{ mise_bin }} run agentstack-ui:build - UI_TAG="--set ui.image.tag=local" + UI_TAG="--set agentstack:ui.image.tag=local" fi if [[ -n "${CI-}" ]]; then @@ -169,10 +163,10 @@ ARCH=$(uname -m | sed -e 's/arm64/aarch64/' -e 's/amd64/x86_64/') {{ mise_bin }} run agentstack-cli:run -- platform start -v \ --lima-image={{config_root}}/apps/microshift-vm/dist/$ARCH/microshift-vm-$ARCH.qcow2 \ --image-pull-mode="$PULL_MODE" \ - --set auth.nextauthDevUrl="http://localhost:3000" \ - --set image.tag=local \ - --set keycloak.image.tag=local \ - ${UI_TAG-} "$@" + --set agentstack:auth.nextauthDevUrl="http://localhost:3000" \ + --set agentstack:image.tag=local \ + --set agentstack:keycloak.image.tag=local \ + $UI_TAG "$@" """ ["agentstack:delete"] @@ -493,6 +487,110 @@ echo -e "✅ Published \x1b[36mstable\x1b[0m docs for \x1b[36mv$publish_version\ echo "💡 Check the pipeline progress and result on: https://github.com/i-am-bee/agentstack/actions/workflows/release.yml" ''' +# misc tasks + +["sync-images-to-ghcr"] +dir = "{{config_root}}" +depends = ["helm:build:dependencies"] +usage = "flag --dryrun" +run = ''' +#!/bin/bash + +set -euo pipefail +DRY_RUN="${usage_dryrun:-false}" + +{% raw %} +echo "Extracting images from helm chart..." + +# Extract images from helm chart +images=$(helm template \ + --set phoenix.enabled=true \ + --set encryptionKey=dummy \ + --set auth.enabled=false \ + --set providerBuilds.enabled=true \ + --set redis.enabled=true \ + ./helm 2>/dev/null | \ + sed -n '/^[[:space:]]*image:/{ /{{/d; s/.*image:[[:space:]]*//p; }' | \ + sed 's/"\([^"]*\)"/\1/' | grep -v 'agentstack') + +echo "Found images:" +echo "$images" + +# Process each image +echo "$images" | while IFS= read -r ghcr_image; do + if [[ -z "$ghcr_image" ]]; then + continue + fi + + echo "Processing image: $ghcr_image" + + # Check for tag+digest format (e.g. image:tag@digest) + ghcr_tag_image="" + if [[ "$ghcr_image" == *":"*"@"* ]]; then + # Extract the tag version (image:tag) + ghcr_tag_image=$(echo "$ghcr_image" | sed 's/@.*//') + + # Strip tag from main variable (image@digest) + ghcr_image=$(echo "$ghcr_image" | sed 's/:[^:/]*@/@/') + fi + + # Remove ghcr.io/i-am-bee/ prefix to get Docker Hub image + if [[ $ghcr_image =~ ^ghcr\.io/i-am-bee/(.+)$ ]]; then + dockerhub_image="${BASH_REMATCH[1]}" + + echo "GHCR image: $ghcr_image" + echo "Docker Hub image: $dockerhub_image" + + # Check if image exists in GHCR + if docker manifest inspect "$ghcr_image" >/dev/null 2>&1; then + echo "✅ Image $ghcr_image already exists in GHCR" + continue + fi + + echo "❌ Image $ghcr_image not found in GHCR" + + if [[ "$DRY_RUN" == "true" ]]; then + echo "🔍 DRY RUN: Would sync $dockerhub_image -> $ghcr_image" + continue + fi + + # Check if source image exists in Docker Hub + if ! docker manifest inspect "$dockerhub_image" >/dev/null 2>&1; then + echo "❌ Source image $dockerhub_image not found in Docker Hub" + continue + fi + + echo "🔄 Syncing $dockerhub_image -> $ghcr_image" + + # Copy image with all architectures using skopeo + skopeo copy --multi-arch all docker://"$dockerhub_image" docker://"$ghcr_image" + + echo "✅ Successfully synced $ghcr_image" + + # Sync the tag version if it exists + if [[ -n "$ghcr_tag_image" ]]; then + echo "🏷️ Also syncing tag: $ghcr_tag_image" + + if docker manifest inspect "$ghcr_tag_image" >/dev/null 2>&1; then + echo "✅ Tag $ghcr_tag_image already exists in GHCR" + else + if [[ "$DRY_RUN" == "true" ]]; then + echo "🔍 DRY RUN: Would sync $dockerhub_image -> $ghcr_tag_image" + else + skopeo copy --multi-arch all docker://"$dockerhub_image" docker://"$ghcr_tag_image" + echo "✅ Successfully synced tag $ghcr_tag_image" + fi + fi + fi + else + echo "⚠️ Skipping image with unexpected format: $ghcr_image" + fi +done + +echo "Image sync completed!" +{% endraw %} +''' + ["release:checkout"] description = "Checkout the latest release branch" dir = "{{config_root}}" @@ -624,111 +722,6 @@ git remote remove "$VM_NAME" 2>/dev/null || true limactl delete -f "$VM_NAME" """ -# misc tasks - -["sync-images-to-ghcr"] -dir = "{{config_root}}" -depends = ["helm:build:dependencies"] -usage = "flag --dryrun" -run = ''' -#!/bin/bash - -set -euo pipefail -DRY_RUN="${usage_dryrun:-false}" - -{% raw %} -echo "Extracting images from helm chart..." - -# Extract images from helm chart -images=$(helm template \ - --set phoenix.enabled=true \ - --set encryptionKey=dummy \ - --set auth.enabled=false \ - --set providerBuilds.enabled=true \ - --set localDockerRegistry.enabled=true \ - --set redis.enabled=true \ - ./helm 2>/dev/null | \ - sed -n '/^[[:space:]]*image:/{ /{{/d; s/.*image:[[:space:]]*//p; }' | \ - sed 's/"\([^"]*\)"/\1/' | grep -v 'agentstack') - -echo "Found images:" -echo "$images" - -# Process each image -echo "$images" | while IFS= read -r ghcr_image; do - if [[ -z "$ghcr_image" ]]; then - continue - fi - - echo "Processing image: $ghcr_image" - - # Check for tag+digest format (e.g. image:tag@digest) - ghcr_tag_image="" - if [[ "$ghcr_image" == *":"*"@"* ]]; then - # Extract the tag version (image:tag) - ghcr_tag_image=$(echo "$ghcr_image" | sed 's/@.*//') - - # Strip tag from main variable (image@digest) - ghcr_image=$(echo "$ghcr_image" | sed 's/:[^:/]*@/@/') - fi - - # Remove ghcr.io/i-am-bee/ prefix to get Docker Hub image - if [[ $ghcr_image =~ ^ghcr\.io/i-am-bee/(.+)$ ]]; then - dockerhub_image="${BASH_REMATCH[1]}" - - echo "GHCR image: $ghcr_image" - echo "Docker Hub image: $dockerhub_image" - - # Check if image exists in GHCR - if docker manifest inspect "$ghcr_image" >/dev/null 2>&1; then - echo "✅ Image $ghcr_image already exists in GHCR" - continue - fi - - echo "❌ Image $ghcr_image not found in GHCR" - - if [[ "$DRY_RUN" == "true" ]]; then - echo "🔍 DRY RUN: Would sync $dockerhub_image -> $ghcr_image" - continue - fi - - # Check if source image exists in Docker Hub - if ! docker manifest inspect "$dockerhub_image" >/dev/null 2>&1; then - echo "❌ Source image $dockerhub_image not found in Docker Hub" - continue - fi - - echo "🔄 Syncing $dockerhub_image -> $ghcr_image" - - # Copy image with all architectures using skopeo - skopeo copy --multi-arch all docker://"$dockerhub_image" docker://"$ghcr_image" - - echo "✅ Successfully synced $ghcr_image" - - # Sync the tag version if it exists - if [[ -n "$ghcr_tag_image" ]]; then - echo "🏷️ Also syncing tag: $ghcr_tag_image" - - if docker manifest inspect "$ghcr_tag_image" >/dev/null 2>&1; then - echo "✅ Tag $ghcr_tag_image already exists in GHCR" - else - if [[ "$DRY_RUN" == "true" ]]; then - echo "🔍 DRY RUN: Would sync $dockerhub_image -> $ghcr_tag_image" - else - skopeo copy --multi-arch all docker://"$dockerhub_image" docker://"$ghcr_tag_image" - echo "✅ Successfully synced tag $ghcr_tag_image" - fi - fi - fi - else - echo "⚠️ Skipping image with unexpected format: $ghcr_image" - fi -done - -echo "Image sync completed!" -{% endraw %} -''' - # security ["security:dependabot:alerts"]