diff --git a/backend/src/analytics_agent/api/settings.py b/backend/src/analytics_agent/api/settings.py index 64aced0..81a99b9 100644 --- a/backend/src/analytics_agent/api/settings.py +++ b/backend/src/analytics_agent/api/settings.py @@ -214,12 +214,36 @@ class UpdateDisplayRequest(BaseModel): {"name": "preview_table", "label": "Preview data"}, {"name": "execute_sql", "label": "Execute SQL"}, ], + "hive": [ + {"name": "list_tables", "label": "List tables"}, + {"name": "get_schema", "label": "Table schema"}, + {"name": "preview_table", "label": "Preview data"}, + {"name": "execute_sql", "label": "Execute SQL"}, + ], "mysql": [ {"name": "list_tables", "label": "List tables"}, {"name": "get_schema", "label": "Table schema"}, {"name": "preview_table", "label": "Preview data"}, {"name": "execute_sql", "label": "Execute SQL"}, ], + "postgresql": [ + {"name": "list_tables", "label": "List tables"}, + {"name": "get_schema", "label": "Table schema"}, + {"name": "preview_table", "label": "Preview data"}, + {"name": "execute_sql", "label": "Execute SQL"}, + ], + "sqlite": [ + {"name": "list_tables", "label": "List tables"}, + {"name": "get_schema", "label": "Table schema"}, + {"name": "preview_table", "label": "Preview data"}, + {"name": "execute_sql", "label": "Execute SQL"}, + ], + "duckdb": [ + {"name": "list_tables", "label": "List tables"}, + {"name": "get_schema", "label": "Table schema"}, + {"name": "preview_table", "label": "Preview data"}, + {"name": "execute_sql", "label": "Execute SQL"}, + ], "sqlalchemy": [ {"name": "list_tables", "label": "List tables"}, {"name": "get_schema", "label": "Table schema"}, @@ -249,6 +273,30 @@ def _build_tool_toggles( return result +def _compute_engine_status(engine_type: str, conn_cfg: dict, sso_connected: bool = False) -> str: + """Return 'connected' or 'unconfigured' for an engine connection.""" + from analytics_agent.engines.factory import _CONNECTOR_MAP + + spec = _CONNECTOR_MAP.get(engine_type) + if spec is not None: + return ( + "connected" + if spec.is_configured(conn_cfg, sso_connected=sso_connected) + else "unconfigured" + ) + + if engine_type in ("mysql", "sqlalchemy", "postgresql", "sqlite", "duckdb"): + host = conn_cfg.get("host", "") + database = conn_cfg.get("database", conn_cfg.get("db", "")) + has_url = bool(conn_cfg.get("url")) + # File-based engines need only `database`; server engines need host too. + file_based = engine_type in ("sqlite", "duckdb") + if has_url or (file_based and bool(database)) or (host and database): + return "connected" + + return "unconfigured" + + # --- Connection helpers --- @@ -596,15 +644,9 @@ async def list_connections(session: AsyncSession = Depends(get_session)): is_sso_connected = cred is not None and cred.auth_type == "sso_externalbrowser" if intg.type == "snowflake": - from analytics_agent.engines.factory import _CONNECTOR_MAP as _CM - account = conn_cfg.get("account", "") user = conn_cfg.get("user", "") - status_str = ( - "connected" - if _CM["snowflake"].is_configured(conn_cfg, sso_connected=is_sso_connected) - else "unconfigured" - ) + status_str = _compute_engine_status(intg.type, conn_cfg, sso_connected=is_sso_connected) # Detect active auth method so the frontend can pre-select the right tab. if is_sso_connected: active_auth_method = "sso" @@ -667,7 +709,7 @@ async def list_connections(session: AsyncSession = Depends(get_session)): conn_cfg.get(k) or os.environ.get(_CM["bigquery"].env_map.get(k, ""), "") for k in _CM["bigquery"].credential_keys ) - status_str = "connected" if _CM["bigquery"].is_configured(conn_cfg) else "unconfigured" + status_str = _compute_engine_status(intg.type, conn_cfg) fields = [ ConnectionField( key="project", @@ -690,13 +732,13 @@ async def list_connections(session: AsyncSession = Depends(get_session)): placeholder='{"type":"service_account",...}', ), ] - elif intg.type in ("mysql", "sqlalchemy", "postgresql", "sqlite"): + elif intg.type in ("mysql", "sqlalchemy", "postgresql", "sqlite", "duckdb"): host = conn_cfg.get("host", "") database = conn_cfg.get("database", conn_cfg.get("db", "")) port = str(conn_cfg.get("port", "")) user = conn_cfg.get("user", conn_cfg.get("username", "")) has_url = bool(conn_cfg.get("url")) - status_str = "connected" if (has_url or (host and database)) else "unconfigured" + status_str = _compute_engine_status(intg.type, conn_cfg) if has_url: fields = [ ConnectionField( @@ -736,8 +778,8 @@ async def list_connections(session: AsyncSession = Depends(get_session)): from analytics_agent.engines.factory import _CONNECTOR_MAP as _CM spec = _CM.get(intg.type) + status_str = _compute_engine_status(intg.type, conn_cfg) if spec is not None and spec.display_fields: - status_str = "connected" if spec.is_configured(conn_cfg) else "unconfigured" fields = [] for df in spec.display_fields: raw = conn_cfg.get(df.key, "") or os.environ.get( @@ -755,7 +797,6 @@ async def list_connections(session: AsyncSession = Depends(get_session)): ) ) else: - status_str = "unconfigured" fields = [] oauth_status = ( diff --git a/backend/src/analytics_agent/engines/factory.py b/backend/src/analytics_agent/engines/factory.py index 3269fdd..97df4c7 100644 --- a/backend/src/analytics_agent/engines/factory.py +++ b/backend/src/analytics_agent/engines/factory.py @@ -187,6 +187,7 @@ def get_secret_env_vars(engine_type: str) -> dict[str, str]: "mysql": SQLAlchemyQueryEngine, "sqlite": SQLAlchemyQueryEngine, "postgresql": SQLAlchemyQueryEngine, + "duckdb": SQLAlchemyQueryEngine, "sqlalchemy": SQLAlchemyQueryEngine, }.get(engine_type) return getattr(cls, "secret_env_vars", {}) if cls else {} @@ -208,6 +209,7 @@ def _make_connector(connection_cfg: dict) -> MCPQueryEngine: "mysql": SQLAlchemyQueryEngine, "sqlite": SQLAlchemyQueryEngine, "postgresql": SQLAlchemyQueryEngine, + "duckdb": SQLAlchemyQueryEngine, "sqlalchemy": SQLAlchemyQueryEngine, "mcp": MCPQueryEngine, "mcp-stdio": MCPQueryEngine, diff --git a/frontend/src/components/Settings/connections/index.ts b/frontend/src/components/Settings/connections/index.ts index 9996ad5..5e20d5d 100644 --- a/frontend/src/components/Settings/connections/index.ts +++ b/frontend/src/components/Settings/connections/index.ts @@ -8,6 +8,7 @@ import { hivePlugin } from "./plugins/hive"; import { mysqlPlugin } from "./plugins/mysql"; import { postgresqlPlugin } from "./plugins/postgresql"; import { sqlitePlugin } from "./plugins/sqlite"; +import { duckdbPlugin } from "./plugins/duckdb"; import { datahubPlugin } from "./plugins/datahub"; import { datahubMcpPlugin } from "./plugins/datahub-mcp"; import { customMcpEnginePlugin, customMcpContextPlugin } from "./plugins/custom-mcp"; @@ -24,6 +25,7 @@ export const CONNECTION_PLUGINS: ConnectionPlugin[] = [ mysqlPlugin, postgresqlPlugin, sqlitePlugin, + duckdbPlugin, customMcpEnginePlugin, // Context platforms diff --git a/frontend/src/components/Settings/connections/plugins/duckdb.tsx b/frontend/src/components/Settings/connections/plugins/duckdb.tsx new file mode 100644 index 0000000..94042c4 --- /dev/null +++ b/frontend/src/components/Settings/connections/plugins/duckdb.tsx @@ -0,0 +1,25 @@ +import { SimpleFormShell } from "../SimpleFormShell"; +import type { ConnectionPlugin, NewConnectionPayload } from "../types"; + +const FIELDS = [ + { key: "database", label: "Database file path", type: "mono" as const, + placeholder: "/absolute/path/to/database.duckdb", required: true }, +]; + +export const duckdbPlugin: ConnectionPlugin = { + id: "duckdb", + serviceId: "duckdb", + label: "DuckDB", + category: "engine", + transport: "native", + description: "Connect to a local DuckDB database file", + Form: ({ onDone, onCancel }) => ( + + onDone({ ...payload, config: { dialect: "duckdb", ...payload.config } }) + } + /> + ), +}; diff --git a/tests/integration/test_duckdb_e2e.py b/tests/integration/test_duckdb_e2e.py new file mode 100644 index 0000000..74e101c --- /dev/null +++ b/tests/integration/test_duckdb_e2e.py @@ -0,0 +1,372 @@ +""" +Integration test: DuckDB query engine + DataHub metadata, end-to-end. + +Setup: + - Creates a temporary DuckDB file with three Olist-like tables + (olist_orders, olist_order_items, olist_products — ~50 rows total). + - Pushes table descriptions to the configured DataHub instance under + platform=duckdb, env=DEV so the agent can discover them via search. + +What it proves: + - SQLAlchemyQueryEngine with dialect=duckdb boots and can execute SQL. + - DataHub context tools find the freshly pushed metadata. + - The full agent pipeline (context lookup → SQL → text answer) works. + +Prerequisites: + DataHub credentials: ~/.datahubenv or DATAHUB_GMS_URL + DATAHUB_GMS_TOKEN + LLM key: ANTHROPIC_API_KEY or OPENAI_API_KEY + +Run: + uv run pytest tests/integration/test_duckdb_e2e.py -v -s +""" + +from __future__ import annotations + +import json +import os +import pathlib +import urllib.request +import uuid + +import pytest + +# ── Skip guards ────────────────────────────────────────────────────────────── + +_has_datahub = bool( + (os.environ.get("DATAHUB_GMS_URL") and os.environ.get("DATAHUB_GMS_TOKEN")) + or pathlib.Path("~/.datahubenv").expanduser().exists() +) +_has_llm = bool(os.environ.get("ANTHROPIC_API_KEY") or os.environ.get("OPENAI_API_KEY")) + +_requires_datahub_and_llm = pytest.mark.skipif( + not (_has_datahub and _has_llm), + reason=( + "Needs DataHub credentials (datahub init or DATAHUB_GMS_URL+TOKEN) " + "and an LLM key (ANTHROPIC_API_KEY or OPENAI_API_KEY)" + ), +) + + +# ── DataHub helpers ─────────────────────────────────────────────────────────── + + +def _datahub_creds() -> tuple[str, str]: + """Return (gms_url, token) from env vars or ~/.datahubenv.""" + gms_url = os.environ.get("DATAHUB_GMS_URL", "") + token = os.environ.get("DATAHUB_GMS_TOKEN", "") + if not gms_url: + import yaml + + env_file = pathlib.Path("~/.datahubenv").expanduser() + if env_file.exists(): + cfg = yaml.safe_load(env_file.read_text()) or {} + gms = cfg.get("gms") or {} + gms_url = gms.get("server", "") + token = gms.get("token", "") + return gms_url, token + + +def _emit_table_description( + gms_url: str, token: str, urn: str, table: str, description: str +) -> None: + """Push a minimal dataset description MCE to DataHub.""" + from datahub.emitter.rest_emitter import DatahubRestEmitter + from datahub.metadata.schema_classes import ( + DatasetPropertiesClass, + DatasetSnapshotClass, + MetadataChangeEventClass, + ) + + emitter = DatahubRestEmitter(gms_server=gms_url, token=token or None) + emitter.emit_mce( + MetadataChangeEventClass( + proposedSnapshot=DatasetSnapshotClass( + urn=urn, + aspects=[DatasetPropertiesClass(description=description, name=table)], + ) + ) + ) + emitter.flush() + + +def _delete_entity(gms_url: str, token: str, urn: str) -> None: + """Hard-delete a DataHub entity by URN (best-effort — non-fatal).""" + try: + req = urllib.request.Request( + f"{gms_url}/entities?action=delete", + data=json.dumps({"urn": urn}).encode(), + headers={ + "Content-Type": "application/json", + **({"Authorization": f"Bearer {token}"} if token else {}), + }, + method="POST", + ) + urllib.request.urlopen(req, timeout=10) + except Exception as e: + print(f"[!] DataHub cleanup failed for {urn}: {e}") + + +# ── DataHub table metadata ──────────────────────────────────────────────────── + +_PLATFORM = "duckdb" +_ENV = "DEV" + +# Tables we create + their descriptions for DataHub. +_TABLES: dict[str, str] = { + "olist_orders": ( + "Order lifecycle records. Columns: order_id (PK), customer_id, " + "order_status ('delivered' or 'canceled'), order_purchase_timestamp." + ), + "olist_order_items": ( + "Line items inside each order. Columns: order_id (FK), product_id (FK), " + "price (item price in BRL), freight_value (shipping cost in BRL). " + "Revenue = SUM(price + freight_value) for delivered orders." + ), + "olist_products": ( + "Product catalog. Columns: product_id (PK), product_category_name " + "(e.g. 'electronics', 'furniture', 'clothing', 'books', 'toys')." + ), +} + + +def _dataset_urn(table: str) -> str: + return f"urn:li:dataset:(urn:li:dataPlatform:{_PLATFORM},{table},{_ENV})" + + +# ── Fixtures ────────────────────────────────────────────────────────────────── + + +@pytest.fixture(scope="module") +def duckdb_path(tmp_path_factory): + """Build a temp DuckDB file with three Olist-like tables.""" + import duckdb + + db_file = tmp_path_factory.mktemp("duckdb") / "test.duckdb" + con = duckdb.connect(str(db_file)) + + # olist_orders — 50 rows, 5 canceled (i % 10 == 0) + con.execute(""" + CREATE TABLE olist_orders ( + order_id VARCHAR PRIMARY KEY, + customer_id VARCHAR, + order_status VARCHAR, + order_purchase_timestamp TIMESTAMP + ) + """) + con.execute(""" + INSERT INTO olist_orders + SELECT + 'order_' || i::VARCHAR, + 'customer_' || (i % 20)::VARCHAR, + CASE WHEN i % 10 = 0 THEN 'canceled' ELSE 'delivered' END, + TIMESTAMP '2017-01-01' + INTERVAL (i) DAY + FROM range(1, 51) t(i) + """) + + # olist_order_items — 2 items per order (100 rows) + # product_id cycles through 0-9 so each maps to a distinct category + con.execute(""" + CREATE TABLE olist_order_items ( + order_id VARCHAR, + product_id VARCHAR, + price DOUBLE, + freight_value DOUBLE + ) + """) + con.execute(""" + INSERT INTO olist_order_items + SELECT + 'order_' || (i % 50 + 1)::VARCHAR, + 'product_' || (i % 10)::VARCHAR, + (i % 5 + 1) * 10.0, + (i % 3 + 1) * 2.0 + FROM range(0, 100) t(i) + """) + + # olist_products — 10 products across 5 categories (2 products each) + con.execute(""" + CREATE TABLE olist_products ( + product_id VARCHAR PRIMARY KEY, + product_category_name VARCHAR + ) + """) + con.executemany( + "INSERT INTO olist_products VALUES (?, ?)", + [ + ("product_0", "electronics"), + ("product_1", "furniture"), + ("product_2", "clothing"), + ("product_3", "books"), + ("product_4", "toys"), + ("product_5", "electronics"), + ("product_6", "furniture"), + ("product_7", "clothing"), + ("product_8", "books"), + ("product_9", "toys"), + ], + ) + + con.close() + return str(db_file) + + +@pytest.fixture(scope="module") +def datahub_metadata(): + """Push table descriptions to DataHub; delete them on teardown.""" + gms_url, token = _datahub_creds() + urns = [] + for table, description in _TABLES.items(): + urn = _dataset_urn(table) + _emit_table_description(gms_url, token, urn, table, description) + urns.append(urn) + print(f"[✓] DataHub metadata pushed: {urn}") + + yield urns + + # Teardown + for urn in urns: + _delete_entity(gms_url, token, urn) + print(f"[✓] DataHub entity deleted: {urn}") + + +@pytest.fixture(scope="module") +def duckdb_engine(duckdb_path): + """SQLAlchemyQueryEngine backed by the temp DuckDB file.""" + import asyncio + + from analytics_agent.engines.sqlalchemy.engine import SQLAlchemyQueryEngine + + engine = SQLAlchemyQueryEngine({"dialect": "duckdb", "database": duckdb_path}) + yield engine + asyncio.run(engine.aclose()) + + +@pytest.fixture(scope="module") +def agent_graph(duckdb_engine, datahub_metadata): + """Full agent graph: DuckDB engine tools + DataHub context tools.""" + from analytics_agent.agent.graph import build_graph + from analytics_agent.context.datahub import build_datahub_tools + + context_tools = build_datahub_tools() + engine_tools = duckdb_engine.get_tools() + + assert engine_tools, "DuckDB engine returned no tools" + assert context_tools, "No DataHub context tools loaded — check credentials" + + return build_graph( + engine_name="test_duckdb", + context_tools=context_tools, + engine_tools=engine_tools, + disabled_tools={"create_chart"}, + ) + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + + +async def _run(graph, question: str) -> list[dict]: + """Run one agent turn and collect all events.""" + from analytics_agent.agent.streaming import stream_graph_events + + events: list[dict] = [] + conv_id = f"e2e-duckdb-{uuid.uuid4().hex[:8]}" + async for event in stream_graph_events(graph, question, conv_id, "test_duckdb"): + events.append(event) + label = event["payload"].get("text") or event["payload"].get("tool_name") or "" + print(f" [{event['event']}] {str(label)[:80]}") + return events + + +# ── Tests ───────────────────────────────────────────────────────────────────── + + +@_requires_datahub_and_llm +@pytest.mark.asyncio +async def test_top_categories_by_revenue(agent_graph): + """Agent must run SQL and return top product categories by revenue.""" + events = await _run( + agent_graph, + "What are the top 3 product categories by total revenue (price + freight_value)?", + ) + + event_types = {e["event"] for e in events} + print("\nEvent types:", event_types) + + assert "COMPLETE" in event_types, f"No COMPLETE event — got: {event_types}" + assert "TEXT" in event_types, f"No TEXT event — got: {event_types}" + + # Agent must have issued at least one successful SQL query + sql_events = [e for e in events if e["event"] == "SQL"] + assert sql_events, ( + "No SQL event emitted — agent did not call execute_sql successfully. " + f"All event types: {event_types}" + ) + + # The SQL result should have rows + rows = sql_events[-1]["payload"].get("rows", []) + assert rows, "SQL result has no rows" + assert len(rows) <= 3, f"Expected ≤3 rows (top 3), got {len(rows)}" + + # The answer should mention at least one of the known categories + complete_text = next(e["payload"].get("text", "") for e in events if e["event"] == "COMPLETE") + known_categories = {"electronics", "furniture", "clothing", "books", "toys"} + assert any(cat in complete_text.lower() for cat in known_categories), ( + f"Response doesn't mention any known category.\nResponse: {complete_text[:400]}" + ) + + +@_requires_datahub_and_llm +@pytest.mark.asyncio +async def test_delivered_vs_canceled_order_count(agent_graph): + """Agent must count delivered vs canceled orders accurately.""" + events = await _run( + agent_graph, + "How many orders are delivered versus canceled?", + ) + + event_types = {e["event"] for e in events} + assert "COMPLETE" in event_types + assert "SQL" in event_types, "Agent should query olist_orders for status counts" + + complete_text = next(e["payload"].get("text", "") for e in events if e["event"] == "COMPLETE") + # Dataset has 45 delivered (i % 10 != 0) and 5 canceled (i % 10 == 0) + # Accept any reasonable mention of both statuses + text_lower = complete_text.lower() + assert "delivered" in text_lower and "canceled" in text_lower, ( + f"Response should mention both statuses.\nResponse: {complete_text[:400]}" + ) + + +@pytest.mark.asyncio +async def test_engine_list_tables(duckdb_engine): + """DuckDB engine's list_tables tool should return all three tables.""" + import orjson + + tools = {t.name: t for t in duckdb_engine.get_tools()} + assert "list_tables" in tools + + result = tools["list_tables"].invoke({"schema": ""}) + tables = orjson.loads(result) + table_names = {t["name"] for t in tables} + assert {"olist_orders", "olist_order_items", "olist_products"} == table_names, ( + f"Unexpected tables: {table_names}" + ) + + +@pytest.mark.asyncio +async def test_engine_execute_sql(duckdb_engine): + """DuckDB engine's execute_sql tool should return correct row counts.""" + import orjson + + tools = {t.name: t for t in duckdb_engine.get_tools()} + result = tools["execute_sql"].invoke( + { + "sql": "SELECT order_status, COUNT(*) AS cnt FROM olist_orders GROUP BY order_status ORDER BY cnt DESC" + } + ) + parsed = orjson.loads(result) + assert "error" not in parsed, f"SQL error: {parsed.get('error')}" + + rows = {row["order_status"]: row["cnt"] for row in parsed["rows"]} + assert rows.get("delivered") == 45, f"Expected 45 delivered, got {rows}" + assert rows.get("canceled") == 5, f"Expected 5 canceled, got {rows}" diff --git a/tests/unit/test_engine_contract.py b/tests/unit/test_engine_contract.py new file mode 100644 index 0000000..a1f243f --- /dev/null +++ b/tests/unit/test_engine_contract.py @@ -0,0 +1,115 @@ +""" +Contract tests for engine connectors. + +Every supported engine type must be wired through several touchpoints — the +engine factory, the secret-env-vars registry, the per-type tools registry, the +status renderer, and the frontend plugin index. Forgetting any one of them +produces silent UX failures (e.g. a connection that always shows as +"unconfigured", or an empty tool-toggles panel) that don't fail in CI. + +This test enumerates every known engine type and asserts each touchpoint +handles it. Add a new entry to MINIMAL_CONFIGS to introduce a connector — +each test will then enforce that the rest of the wiring is in place. +""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +# Minimal config that should yield a "connected" status for each engine type. +# When adding a new connector, add it here. +MINIMAL_CONFIGS: dict[str, dict[str, str]] = { + "snowflake": {"account": "x", "user": "y", "password": "z"}, + "hive": {"host": "x", "user": "y", "password": "z"}, + "bigquery": {"project": "x", "credentials_json": '{"x":"y"}'}, + "mysql": {"host": "x", "database": "y", "user": "z", "password": "p"}, + "postgresql": {"host": "x", "database": "y", "user": "z", "password": "p"}, + "sqlite": {"dialect": "sqlite", "database": "/tmp/x.db"}, + "duckdb": {"dialect": "duckdb", "database": "/tmp/x.duckdb"}, +} + +ENGINE_TYPES = sorted(MINIMAL_CONFIGS) + +# Every query engine exposes the same four tools — anything missing means the +# tool-toggles panel in Settings will be incomplete. +_REQUIRED_SQL_TOOLS = {"execute_sql", "list_tables", "get_schema", "preview_table"} + +# Plugins in the frontend index follow the convention `${type}Plugin`. +_FRONTEND_INDEX = ( + Path(__file__).resolve().parents[2] + / "frontend" + / "src" + / "components" + / "Settings" + / "connections" + / "index.ts" +) + + +@pytest.mark.parametrize("engine_type", ENGINE_TYPES) +def test_factory_returns_callable(engine_type): + """_engine_cls must return a factory for every known type.""" + from analytics_agent.engines.factory import _engine_cls + + fn = _engine_cls(engine_type) + assert fn is not None, ( + f"_engine_cls({engine_type!r}) returned None — add it to the dispatch dict in factory.py" + ) + assert callable(fn) + + +@pytest.mark.parametrize("engine_type", ENGINE_TYPES) +def test_secret_env_vars_returns_dict(engine_type): + """get_secret_env_vars must return a dict (possibly empty) for every type.""" + from analytics_agent.engines.factory import get_secret_env_vars + + result = get_secret_env_vars(engine_type) + assert isinstance(result, dict), ( + f"get_secret_env_vars({engine_type!r}) returned {type(result).__name__}, expected dict" + ) + + +@pytest.mark.parametrize("engine_type", ENGINE_TYPES) +def test_known_tools_has_standard_sql_tools(engine_type): + """_KNOWN_TOOLS must list the four standard SQL tools — otherwise the toggle UI is empty.""" + from analytics_agent.api.settings import _KNOWN_TOOLS + + assert engine_type in _KNOWN_TOOLS, ( + f"_KNOWN_TOOLS missing entry for {engine_type!r} — tool toggles panel will be empty" + ) + tool_names = {t["name"] for t in _KNOWN_TOOLS[engine_type]} + missing = _REQUIRED_SQL_TOOLS - tool_names + assert not missing, f"_KNOWN_TOOLS[{engine_type!r}] missing tools: {missing}" + + +@pytest.mark.parametrize("engine_type", ENGINE_TYPES) +def test_minimal_config_renders_as_connected(engine_type): + """A minimally-configured connection must show 'connected', not 'unconfigured'.""" + from analytics_agent.api.settings import _compute_engine_status + + status = _compute_engine_status(engine_type, MINIMAL_CONFIGS[engine_type]) + assert status == "connected", ( + f"{engine_type} with minimal config rendered as {status!r}; " + f"add it to _compute_engine_status (or to the engine's ConnectorSpec)" + ) + + +@pytest.mark.parametrize("engine_type", ENGINE_TYPES) +def test_empty_config_renders_as_unconfigured(engine_type): + """An empty config must show 'unconfigured' — the status check is meaningful.""" + from analytics_agent.api.settings import _compute_engine_status + + assert _compute_engine_status(engine_type, {}) == "unconfigured" + + +@pytest.mark.parametrize("engine_type", ENGINE_TYPES) +def test_frontend_plugin_registered(engine_type): + """frontend index.ts must import a `${type}Plugin` — otherwise the type is missing from the picker.""" + content = _FRONTEND_INDEX.read_text() + expected = f"{engine_type}Plugin" + assert expected in content, ( + f"frontend index.ts missing `{expected}` — add the plugin import and " + f"register it in CONNECTION_PLUGINS" + )