tinyhumansai · justinhsu1477 · May 26, 2026 · coderabbitai · May 26, 2026
@@ -294,6 +294,50 @@ const CAPABILITIES: &[Capability] = &[
         status: CapabilityStatus::Beta,
         privacy: LOCAL_RAW,
     },
+    Capability {
+        id: "intelligence.embedding_provider_config",
+        name: "Configure Embedding Provider",
+        domain: "embeddings",
+        category: CapabilityCategory::Intelligence,
+        description:
+            "Pick which embedding provider drives semantic search across your memory: \
+             managed cloud (default, Voyage-backed via api.tinyhumans.ai), OpenAI, \
+             Cohere, local Ollama, or a custom OpenAI-compatible endpoint. API keys \
+             are stored encrypted via the local keyring under `embeddings:<slug>`; \
+             model name and embedding dimensions are tunable per provider. The \
+             legacy `inference_embed` RPC is aliased to `embeddings_embed` so \
+             existing callers continue to work.",
+        how_to: "Settings > AI > Embeddings",
+        status: CapabilityStatus::Beta,
+        // Privacy depends on the selected provider — see
+        // `intelligence.embedding_provider_test` for the per-provider data
+        // destinations. The configuration surface itself only writes to the
+        // local keyring and config, so leaving this `None` (treat-as-unknown)
+        // would under-report; we annotate the credential side here and the
+        // network side on the test action.
+        privacy: LOCAL_CREDENTIALS,
+    },
+    Capability {
+        id: "intelligence.embedding_provider_test",
+        name: "Test Embedding Provider",
+        domain: "embeddings",
+        category: CapabilityCategory::Intelligence,
+        description:
+            "Verify a configured embedding provider before committing it to \
+             memory ingestion. Sends a small one-shot embed request and reports \
+             the model, dimensions, and any auth/error surface so a \
+             misconfigured key doesn't get discovered halfway through a 50k \
+             chunk backfill.",
+        how_to: "Settings > AI > Embeddings > Test Connection",
+        // Test payload is a short fixed string ('OpenHuman connectivity \
+        // probe'-style) sent to whichever provider is selected — Voyage via \
+        // the OpenHuman backend, OpenAI, Cohere, or a custom endpoint. \
+        // `DERIVED_TO_BACKEND` is the right label for the default (managed \
+        // cloud) path; the destination list reflects that this is *derived* \
+        // signal (the probe text), not raw user content.
+        status: CapabilityStatus::Beta,
+        privacy: DERIVED_TO_BACKEND,
-        // Test payload is a short fixed string ('OpenHuman connectivity \
-        // probe'-style) sent to whichever provider is selected — Voyage via \
-        // the OpenHuman backend, OpenAI, Cohere, or a custom endpoint. \
-        // `DERIVED_TO_BACKEND` is the right label for the default (managed \
-        // cloud) path; the destination list reflects that this is *derived* \
-        // signal (the probe text), not raw user content.
-        status: CapabilityStatus::Beta,
-        privacy: DERIVED_TO_BACKEND,
+        const DERIVED_TO_CONFIGURED_EMBEDDING_PROVIDER: Option<CapabilityPrivacy> = Some(CapabilityPrivacy {
+            leaves_device: true,
+            data_kind: PrivacyDataKind::Derived,
+            destinations: &[
+                "OpenHuman backend (managed cloud)",
+                "Configured embedding provider endpoint (e.g., OpenAI/Cohere/custom)",
+            ],
+        });
+
+        // Test payload is a short fixed string ('OpenHuman connectivity \
+        // probe'-style) sent to whichever provider is selected — Voyage via \
+        // the OpenHuman backend, OpenAI, Cohere, or a custom endpoint. \
+        // `DERIVED_TO_BACKEND` is the right label for the default (managed \
+        // cloud) path; the destination list reflects that this is *derived* \
+        // signal (the probe text), not raw user content.
+        status: CapabilityStatus::Beta,
+        privacy: DERIVED_TO_CONFIGURED_EMBEDDING_PROVIDER,
-        // Test payload is a short fixed string ('OpenHuman connectivity \
-        // probe'-style) sent to whichever provider is selected — Voyage via \
-        // the OpenHuman backend, OpenAI, Cohere, or a custom endpoint. \
-        // `DERIVED_TO_BACKEND` is the right label for the default (managed \
-        // cloud) path; the destination list reflects that this is *derived* \
-        // signal (the probe text), not raw user content.
-        status: CapabilityStatus::Beta,
-        privacy: DERIVED_TO_BACKEND,
+        const DERIVED_TO_CONFIGURED_EMBEDDING_PROVIDER: Option<CapabilityPrivacy> = Some(CapabilityPrivacy {
+            leaves_device: true,
+            data_kind: PrivacyDataKind::Derived,
+            destinations: &[
+                "OpenHuman backend (managed cloud)",
+                "Configured embedding provider endpoint (e.g., OpenAI/Cohere/custom)",
+            ],
+        });
+
+        // Test payload is a short fixed string ('OpenHuman connectivity \
+        // probe'-style) sent to whichever provider is selected — Voyage via \
+        // the OpenHuman backend, OpenAI, Cohere, or a custom endpoint. \
+        // `DERIVED_TO_BACKEND` is the right label for the default (managed \
+        // cloud) path; the destination list reflects that this is *derived* \
+        // signal (the probe text), not raw user content.
+        status: CapabilityStatus::Beta,
+        privacy: DERIVED_TO_CONFIGURED_EMBEDDING_PROVIDER,
+    },
     Capability {
         id: "intelligence.mcp_server",
         name: "MCP Server",

@@ -103,6 +103,8 @@ fn catalog_includes_additional_user_facing_surfaces() {
         "intelligence.mcp_server",
         "intelligence.searxng_search",
         "intelligence.tool_registry",
+        "intelligence.embedding_provider_config",
+        "intelligence.embedding_provider_test",
         "conversation.subagent_mascots",
     ] {
         assert!(
@@ -111,3 +113,68 @@ fn catalog_includes_additional_user_facing_surfaces() {
         );
     }
 }
+
+/// The two embeddings entries surface a Settings-side configuration panel.
+/// They share the same domain (`embeddings`) but are listed under the
+/// Intelligence umbrella so they sit next to memory_tree_retrieval / mcp_server
+/// in the in-app feature catalog. Pinning the relationships here defends
+/// against an inadvertent recategorisation that would split them across the
+/// UI's tab grouping.
+#[test]
+fn embedding_provider_capabilities_share_domain_and_category() {
+    let config = lookup("intelligence.embedding_provider_config")
+        .expect("embedding_provider_config registered");
+    let test =
+        lookup("intelligence.embedding_provider_test").expect("embedding_provider_test registered");
+
+    assert_eq!(config.domain, "embeddings");
+    assert_eq!(test.domain, "embeddings");
+    assert_eq!(
+        config.category, test.category,
+        "both embedding capabilities must land in the same UI category"
+    );
+
+    // The Settings panel they describe is the same one — make sure the
+    // `how_to` strings point at it, not at an out-of-date breadcrumb.
+    assert!(
+        config.how_to.contains("Settings") && config.how_to.contains("Embeddings"),
+        "config how_to must mention Settings > … > Embeddings, got: {}",
+        config.how_to
+    );
+    assert!(
+        test.how_to.contains("Settings") && test.how_to.contains("Embeddings"),
+        "test how_to must mention Settings > … > Embeddings, got: {}",
+        test.how_to
+    );
+}
+
+/// Privacy annotations must split cleanly: the config side touches only the
+/// local keyring (LOCAL_CREDENTIALS — leaves_device=false), the test side
+/// fires a probe at the configured provider (DERIVED_TO_BACKEND —
+/// leaves_device=true). Without this split, a single `None` privacy flag
+/// would force the UI to treat the embeddings panel as "unknown" and the
+/// Privacy surface would under-report where data goes when the test button
+/// gets clicked.
+#[test]
+fn embedding_provider_capabilities_split_privacy_correctly() {
+    let config = lookup("intelligence.embedding_provider_config")
+        .expect("embedding_provider_config registered");
+    let test =
+        lookup("intelligence.embedding_provider_test").expect("embedding_provider_test registered");
+
+    let config_privacy = config
+        .privacy
+        .expect("config capability has privacy annotation");
+    assert!(
+        !config_privacy.leaves_device,
+        "configuration writes only to local keyring; nothing should leave the device"
+    );
+
+    let test_privacy = test
+        .privacy
+        .expect("test capability has privacy annotation");
+    assert!(
+        test_privacy.leaves_device,
+        "test fires a probe at the configured provider — must report as leaves_device"
+    );
+}