From b96a9d33abafbe4043334cef9f9aa8d63fb66da6 Mon Sep 17 00:00:00 2001 From: Jacob Magar Date: Sun, 24 May 2026 02:38:50 -0400 Subject: [PATCH 1/3] feat: add gateway code mode contract --- crates/lab/src/dispatch/gateway.rs | 1 + crates/lab/src/dispatch/gateway/code_mode.rs | 262 ++++++ crates/lab/src/mcp/catalog.rs | 2 + crates/lab/src/mcp/server.rs | 459 +++++++++- docs/dev/ERRORS.md | 1 + docs/services/GATEWAY.md | 40 +- .../plans/2026-05-24-code-mode-contract.md | 854 ++++++++++++++++++ 7 files changed, 1613 insertions(+), 6 deletions(-) create mode 100644 crates/lab/src/dispatch/gateway/code_mode.rs create mode 100644 docs/superpowers/plans/2026-05-24-code-mode-contract.md diff --git a/crates/lab/src/dispatch/gateway.rs b/crates/lab/src/dispatch/gateway.rs index 552be16d7..3bf6a752c 100644 --- a/crates/lab/src/dispatch/gateway.rs +++ b/crates/lab/src/dispatch/gateway.rs @@ -1,5 +1,6 @@ mod catalog; mod client; +pub mod code_mode; pub(crate) mod config; mod config_mutation; pub mod discovery; diff --git a/crates/lab/src/dispatch/gateway/code_mode.rs b/crates/lab/src/dispatch/gateway/code_mode.rs new file mode 100644 index 000000000..ba1a1a443 --- /dev/null +++ b/crates/lab/src/dispatch/gateway/code_mode.rs @@ -0,0 +1,262 @@ +use serde::Serialize; +use serde_json::Value; + +use crate::dispatch::error::ToolError; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CodeModeToolId { + pub raw: String, + pub reference: CodeModeToolRef, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CodeModeToolRef { + LabAction { service: String, action: String }, + UpstreamTool { upstream: String, tool: String }, +} + +impl CodeModeToolId { + pub fn parse(raw: &str) -> Result { + let raw = raw.trim(); + if raw.is_empty() { + return Err(invalid_code_mode_id("Code Mode tool id must not be empty")); + } + + if let Some(rest) = raw.strip_prefix("lab::") { + let (service, action) = rest.split_once('.').ok_or_else(|| { + invalid_code_mode_id("lab Code Mode ids must use lab::.") + })?; + if service.trim().is_empty() || action.trim().is_empty() { + return Err(invalid_code_mode_id( + "lab Code Mode ids must include service and action", + )); + } + return Ok(Self { + raw: raw.to_string(), + reference: CodeModeToolRef::LabAction { + service: service.trim().to_string(), + action: action.trim().to_string(), + }, + }); + } + + if let Some(rest) = raw.strip_prefix("upstream::") { + let (upstream, tool) = rest.split_once("::").ok_or_else(|| { + invalid_code_mode_id("upstream Code Mode ids must use upstream::::") + })?; + if upstream.trim().is_empty() || tool.trim().is_empty() { + return Err(invalid_code_mode_id( + "upstream Code Mode ids must include upstream and tool", + )); + } + return Ok(Self { + raw: raw.to_string(), + reference: CodeModeToolRef::UpstreamTool { + upstream: upstream.trim().to_string(), + tool: tool.trim().to_string(), + }, + }); + } + + Err(invalid_code_mode_id( + "Code Mode ids must start with lab:: or upstream::", + )) + } +} + +#[must_use] +pub fn lab_action_id(service: &str, action: &str) -> String { + format!("lab::{service}.{action}") +} + +#[must_use] +pub fn upstream_tool_id(upstream: &str, tool: &str) -> String { + format!("upstream::{upstream}::{tool}") +} + +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct CodeModeSearchCandidate { + pub id: String, + pub name: String, + pub upstream: String, + pub description: String, + pub score: f32, + pub schema_available: bool, +} + +impl CodeModeSearchCandidate { + #[must_use] + pub fn lab_action(service: &str, action: &str, description: &str, score: f32) -> Self { + Self { + id: lab_action_id(service, action), + name: action.to_string(), + upstream: "lab".to_string(), + description: description.to_string(), + score, + schema_available: true, + } + } + + #[must_use] + pub fn upstream_tool( + upstream: &str, + tool: &str, + description: &str, + score: f32, + schema: Option, + ) -> Self { + Self { + id: upstream_tool_id(upstream, tool), + name: tool.to_string(), + upstream: upstream.to_string(), + description: description.to_string(), + score, + schema_available: schema.is_some(), + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct CodeModeSchemaResponse { + pub id: String, + pub kind: &'static str, + pub name: String, + pub upstream: String, + pub schema: Value, + pub schema_format: &'static str, +} + +impl CodeModeSchemaResponse { + #[must_use] + pub fn lab_action(id: &str, action: &str, schema: Value) -> Self { + Self { + id: id.to_string(), + kind: "lab_action", + name: action.to_string(), + upstream: "lab".to_string(), + schema, + schema_format: "lab_action_spec", + } + } + + #[must_use] + pub fn upstream_tool(id: &str, upstream: &str, tool: &str, schema: Value) -> Self { + Self { + id: id.to_string(), + kind: "upstream_tool", + name: tool.to_string(), + upstream: upstream.to_string(), + schema, + schema_format: "json_schema", + } + } +} + +pub fn invalid_code_mode_id(message: impl Into) -> ToolError { + ToolError::Sdk { + sdk_kind: "invalid_code_mode_id".to_string(), + message: message.into(), + } +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::{CodeModeSchemaResponse, CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef}; + + #[test] + fn parses_lab_action_id() { + let parsed = CodeModeToolId::parse("lab::gateway.gateway.schema").unwrap(); + assert_eq!( + parsed, + CodeModeToolId { + raw: "lab::gateway.gateway.schema".to_string(), + reference: CodeModeToolRef::LabAction { + service: "gateway".to_string(), + action: "gateway.schema".to_string(), + }, + } + ); + } + + #[test] + fn parses_upstream_tool_id() { + let parsed = CodeModeToolId::parse("upstream::github::search_issues").unwrap(); + assert_eq!( + parsed, + CodeModeToolId { + raw: "upstream::github::search_issues".to_string(), + reference: CodeModeToolRef::UpstreamTool { + upstream: "github".to_string(), + tool: "search_issues".to_string(), + }, + } + ); + } + + #[test] + fn rejects_invalid_ids() { + for id in [ + "", + "gateway.gateway.schema", + "lab::gateway", + "upstream::github", + "upstream::::tool", + ] { + assert!(CodeModeToolId::parse(id).is_err(), "{id} should be invalid"); + } + } + + #[test] + fn builds_search_candidate_for_lab_action() { + let candidate = CodeModeSearchCandidate::lab_action( + "gateway", + "gateway.schema", + "Return gateway schema", + 10.0, + ); + assert_eq!(candidate.id, "lab::gateway.gateway.schema"); + assert_eq!(candidate.upstream, "lab"); + assert_eq!(candidate.name, "gateway.schema"); + assert!(candidate.schema_available); + } + + #[test] + fn builds_search_candidate_for_upstream_tool() { + let candidate = CodeModeSearchCandidate::upstream_tool( + "github", + "search_issues", + "Search issues", + 8.5, + Some(json!({"type": "object"})), + ); + assert_eq!(candidate.id, "upstream::github::search_issues"); + assert_eq!(candidate.upstream, "github"); + assert_eq!(candidate.name, "search_issues"); + assert!(candidate.schema_available); + } + + #[test] + fn builds_lab_schema_response() { + let response = CodeModeSchemaResponse::lab_action( + "lab::gateway.gateway.schema", + "gateway.schema", + json!({"action": "gateway.schema"}), + ); + assert_eq!(response.kind, "lab_action"); + assert_eq!(response.schema_format, "lab_action_spec"); + } + + #[test] + fn builds_upstream_schema_response() { + let response = CodeModeSchemaResponse::upstream_tool( + "upstream::github::search_issues", + "github", + "search_issues", + json!({"type": "object"}), + ); + assert_eq!(response.kind, "upstream_tool"); + assert_eq!(response.schema_format, "json_schema"); + } +} diff --git a/crates/lab/src/mcp/catalog.rs b/crates/lab/src/mcp/catalog.rs index 7a382fc63..e0c7fc11f 100644 --- a/crates/lab/src/mcp/catalog.rs +++ b/crates/lab/src/mcp/catalog.rs @@ -9,6 +9,8 @@ use crate::mcp::prompts::list_all as list_builtin_prompts; pub(crate) const TOOL_SEARCH_TOOL_NAME: &str = "scout"; pub(crate) const TOOL_EXECUTE_TOOL_NAME: &str = "invoke"; +pub(crate) const CODE_SEARCH_TOOL_NAME: &str = "code_search"; +pub(crate) const CODE_SCHEMA_TOOL_NAME: &str = "code_schema"; pub(crate) const LEGACY_TOOL_INVOKE_TOOL_NAME: &str = "tool_invoke"; pub(crate) const LEGACY_TOOL_SEARCH_TOOL_NAME: &str = "tool_search"; pub(crate) const LEGACY_TOOL_EXECUTE_TOOL_NAME: &str = "tool_execute"; diff --git a/crates/lab/src/mcp/server.rs b/crates/lab/src/mcp/server.rs index 56a22ee26..0f070e40f 100644 --- a/crates/lab/src/mcp/server.rs +++ b/crates/lab/src/mcp/server.rs @@ -23,10 +23,15 @@ use serde_json::Value; use tokio::sync::RwLock; use crate::config::NodeRole; +use crate::dispatch::error::ToolError as DispatchToolError; +use crate::dispatch::gateway::code_mode::{ + CodeModeSchemaResponse, CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef, +}; use crate::dispatch::gateway::manager::{GatewayManager, GatewayToolSearchResult}; use crate::mcp::catalog::{ - LEGACY_TOOL_EXECUTE_TOOL_NAME, LEGACY_TOOL_INVOKE_TOOL_NAME, LEGACY_TOOL_SEARCH_TOOL_NAME, - TOOL_EXECUTE_TOOL_NAME, TOOL_SEARCH_TOOL_NAME, + CODE_SCHEMA_TOOL_NAME, CODE_SEARCH_TOOL_NAME, LEGACY_TOOL_EXECUTE_TOOL_NAME, + LEGACY_TOOL_INVOKE_TOOL_NAME, LEGACY_TOOL_SEARCH_TOOL_NAME, TOOL_EXECUTE_TOOL_NAME, + TOOL_SEARCH_TOOL_NAME, }; use crate::mcp::elicitation::{ElicitResult, elicit_confirm}; use crate::mcp::envelope::{build_error, build_error_extra, build_success}; @@ -1064,7 +1069,7 @@ impl ServerHandler for LabMcpServer { } } if manager_tool_search_enabled { - // scout and invoke are gateway meta-tools with no shared dispatch layer equivalent. + // Gateway meta-tools have no shared dispatch layer equivalent. // See mcp/CLAUDE.md for the exception rationale and dispatch/gateway/dispatch.rs guard. let tool_search_schema = match serde_json::json!({ "type": "object", @@ -1092,6 +1097,52 @@ impl ServerHandler for LabMcpServer { tool_search_schema, )); gateway_tool_count += 1; + let code_search_schema = match serde_json::json!({ + "type": "object", + "properties": { + "query": { "type": "string", "maxLength": 500 }, + "top_k": { "type": "integer", "minimum": 1, "maximum": 50 }, + "detail": { + "type": "string", + "enum": ["brief", "detailed", "full"], + "default": "brief" + } + }, + "required": ["query"] + }) { + Value::Object(map) => Arc::new(map), + _ => unreachable!("code_search schema must be an object"), + }; + tools.push(Tool::new( + CODE_SEARCH_TOOL_NAME, + "Schema-first Code Mode discovery for Lab and proxied upstream tools. \ + Returns stable tool ids, short descriptions, scores, and whether an \ + exact schema is available. Use code_schema with the returned id before \ + generating tool-call code.", + code_search_schema, + )); + gateway_tool_count += 1; + let code_schema_schema = match serde_json::json!({ + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Stable id returned by code_search, e.g. lab::gateway.gateway.schema or upstream::github::search_issues" + } + }, + "required": ["id"] + }) { + Value::Object(map) => Arc::new(map), + _ => unreachable!("code_schema schema must be an object"), + }; + tools.push(Tool::new( + CODE_SCHEMA_TOOL_NAME, + "Return the exact input contract for one Code Mode tool id. \ + Lab ids return the ActionSpec-derived action contract; upstream ids \ + return the upstream JSON Schema exposed by the gateway.", + code_schema_schema, + )); + gateway_tool_count += 1; let tool_execute_schema = match serde_json::json!({ "type": "object", "properties": { @@ -1214,6 +1265,228 @@ impl ServerHandler for LabMcpServer { let param_key_count = params.as_object().map_or(0, serde_json::Map::len); let svc = self.registry.services().iter().find(|s| s.name == service); + if service == CODE_SEARCH_TOOL_NAME { + let started = Instant::now(); + let subject = self.request_subject_log_tag(&context); + let auth = auth_context_from_extensions(&context.extensions); + if !tool_search_scope_allowed(auth) { + tracing::warn!( + surface = "mcp", + service = %service, + action = "call_tool", + subject, + elapsed_ms = started.elapsed().as_millis(), + kind = "forbidden", + "gateway code search denied by scope" + ); + let env = build_error_extra( + &service, + "call_tool", + "forbidden", + "code_search requires one of scopes: lab:read, lab, lab:admin", + &serde_json::json!({ "required_scopes": ["lab:read", "lab", "lab:admin"] }), + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + let query = args + .get("query") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + let query_hash = hash_arguments(&Value::String(query.clone())); + let requested_top_k = args + .get("top_k") + .and_then(Value::as_u64) + .map(|value| value as usize); + let Some(manager) = &self.gateway_manager else { + let envelope = build_error( + &service, + "call_tool", + "unknown_tool", + "code search is not enabled", + ); + return Ok(CallToolResult::error(vec![Content::text( + envelope.to_string(), + )])); + }; + let top_k = match requested_top_k { + Some(value) => value, + None => manager.tool_search_config().await.top_k_default, + } + .max(1) + .min(50); + let score_floor_fraction = manager.tool_search_config().await.score_floor_fraction; + tracing::info!( + surface = "mcp", + service = "code_search", + action = "call_tool", + subject, + query_hash = %query_hash, + query_len = query.len(), + top_k, + "gateway code search start" + ); + let mut candidates = self + .search_builtin_code_mode_candidates(&query, top_k, score_floor_fraction) + .await; + return match manager.search_tools(&query, top_k, true).await { + Ok(upstream_results) => { + candidates.extend(upstream_results.into_iter().map(|result| { + CodeModeSearchCandidate::upstream_tool( + &result.upstream, + &result.name, + &result.description, + result.score, + result.input_schema, + ) + })); + candidates.sort_by(compare_code_mode_search_candidates); + candidates.truncate(top_k); + tracing::info!( + surface = "mcp", + service = "code_search", + action = "call_tool", + subject, + query_hash = %query_hash, + query_len = query.len(), + top_k, + result_count = candidates.len(), + elapsed_ms = started.elapsed().as_millis(), + "gateway code search ok" + ); + Ok(CallToolResult::success(vec![Content::text( + serde_json::to_string(&candidates).unwrap_or_else(|_| "[]".to_string()), + )])) + } + Err(err) => { + let kind = err.kind(); + if kind == "index_warming" && !candidates.is_empty() { + candidates.sort_by(compare_code_mode_search_candidates); + candidates.truncate(top_k); + tracing::info!( + surface = "mcp", + service = "code_search", + action = "call_tool", + subject, + query_hash = %query_hash, + query_len = query.len(), + top_k, + result_count = candidates.len(), + elapsed_ms = started.elapsed().as_millis(), + upstream_kind = kind, + "gateway code search ok" + ); + return Ok(CallToolResult::success(vec![Content::text( + serde_json::to_string(&candidates).unwrap_or_else(|_| "[]".to_string()), + )])); + } + tracing::warn!( + surface = "mcp", + service = "code_search", + action = "call_tool", + subject, + query_hash = %query_hash, + query_len = query.len(), + top_k, + elapsed_ms = started.elapsed().as_millis(), + kind, + error = %err, + "gateway code search failed" + ); + let mut extra = serde_json::Map::new(); + if kind == "index_warming" { + extra.insert("retry_after_ms".to_string(), serde_json::json!(2000)); + } + if kind == "invalid_param" { + extra.insert("param".to_string(), serde_json::json!("query")); + } + let env = build_error_extra( + &service, + "call_tool", + kind, + &err.to_string(), + &Value::Object(extra), + ); + Ok(CallToolResult::error(vec![Content::text(env.to_string())])) + } + }; + } + if service == CODE_SCHEMA_TOOL_NAME { + let started = Instant::now(); + let subject = self.request_subject_log_tag(&context); + let auth = auth_context_from_extensions(&context.extensions); + if !tool_search_include_schema_allowed(auth, true) { + tracing::warn!( + surface = "mcp", + service = %service, + action = "call_tool", + subject, + elapsed_ms = started.elapsed().as_millis(), + kind = "forbidden", + "gateway code schema denied by scope" + ); + let env = build_error_extra( + &service, + "call_tool", + "forbidden", + "code_schema requires one of scopes: lab, lab:admin", + &serde_json::json!({ "required_scopes": ["lab", "lab:admin"] }), + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + if self.gateway_manager.is_none() { + let envelope = build_error( + &service, + "call_tool", + "unknown_tool", + "code schema is not enabled", + ); + return Ok(CallToolResult::error(vec![Content::text( + envelope.to_string(), + )])); + } + let id = args + .get("id") + .and_then(Value::as_str) + .unwrap_or_default() + .to_string(); + let parsed = match CodeModeToolId::parse(&id) { + Ok(parsed) => parsed, + Err(err) => { + let env = tool_error_envelope(&service, "call_tool", &err); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + }; + let id_hash = hash_arguments(&Value::String(parsed.raw.clone())); + tracing::info!( + surface = "mcp", + service = "code_schema", + action = "call_tool", + subject, + id_hash = %id_hash, + "gateway code schema start" + ); + return match self.code_mode_schema_response(parsed).await { + Ok(response) => { + tracing::info!( + surface = "mcp", + service = "code_schema", + action = "call_tool", + subject, + id_hash = %id_hash, + elapsed_ms = started.elapsed().as_millis(), + "gateway code schema ok" + ); + Ok(CallToolResult::success(vec![Content::text( + serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string()), + )])) + } + Err(err) => { + let env = tool_error_envelope(&service, "call_tool", &err); + Ok(CallToolResult::error(vec![Content::text(env.to_string())])) + } + }; + } if service == TOOL_SEARCH_TOOL_NAME || service == LEGACY_TOOL_SEARCH_TOOL_NAME { let started = Instant::now(); let subject = self.request_subject_log_tag(&context); @@ -2474,6 +2747,138 @@ impl LabMcpServer { results } + async fn search_builtin_code_mode_candidates( + &self, + query: &str, + top_k: usize, + score_floor_fraction: f32, + ) -> Vec { + let needle = query.trim().to_ascii_lowercase(); + if needle.is_empty() || needle.len() > 500 { + return Vec::new(); + } + + let mut candidates = Vec::new(); + + for service in self.registry.services() { + if !self.service_visible_on_mcp(service.name).await { + continue; + } + for action in self.searchable_builtin_actions(service).await { + let haystack = format!( + "{}\n{}\n{}\n{}", + service.name, service.description, action.name, action.description + ) + .to_ascii_lowercase(); + let score = crate::dispatch::gateway::score_name_haystack( + &needle, + &action.name.to_ascii_lowercase(), + &haystack, + ); + if score > 0.0 { + candidates.push(CodeModeSearchCandidate::lab_action( + service.name, + action.name, + action.description, + score, + )); + } + } + } + + candidates.sort_by(compare_code_mode_search_candidates); + + if score_floor_fraction > 0.0 { + if let Some(top) = candidates.first() { + let floor = top.score * score_floor_fraction; + candidates.retain(|candidate| candidate.score >= floor); + } + } + + candidates.truncate(top_k.max(1).min(50)); + candidates + } + + async fn code_mode_schema_for_lab_action( + &self, + id: &str, + service_name: &str, + action_name: &str, + ) -> Result { + let Some(entry) = self + .registry + .services() + .iter() + .find(|entry| entry.name == service_name) + else { + return Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!("Lab service `{service_name}` was not found"), + }); + }; + if !self.service_visible_on_mcp(entry.name).await + || !self.action_allowed_on_mcp(entry.name, action_name).await + { + return Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!( + "Lab action `{service_name}.{action_name}` is not exposed on the mcp surface" + ), + }); + } + crate::dispatch::helpers::action_schema(entry.actions, action_name) + .map(|schema| CodeModeSchemaResponse::lab_action(id, action_name, schema)) + } + + async fn code_mode_schema_for_upstream_tool( + &self, + id: &str, + upstream: &str, + tool: &str, + ) -> Result { + let Some(pool) = self.current_upstream_pool().await else { + return Err(DispatchToolError::Sdk { + sdk_kind: "upstream_error".to_string(), + message: "gateway upstream pool is unavailable".to_string(), + }); + }; + let upstream_tools = pool.healthy_tools_for_upstream(upstream).await; + let Some(candidate) = upstream_tools + .into_iter() + .find(|candidate| candidate.tool.name.as_ref() == tool) + else { + return Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!("upstream tool `{upstream}::{tool}` was not found"), + }); + }; + let schema = candidate.input_schema.unwrap_or_else(|| { + serde_json::json!({ + "type": "object", + "properties": {} + }) + }); + Ok(CodeModeSchemaResponse::upstream_tool( + id, upstream, tool, schema, + )) + } + + async fn code_mode_schema_response( + &self, + parsed: CodeModeToolId, + ) -> Result { + match parsed.reference { + CodeModeToolRef::LabAction { service, action } => { + self.code_mode_schema_for_lab_action(&parsed.raw, &service, &action) + .await + } + CodeModeToolRef::UpstreamTool { upstream, tool } => { + self.code_mode_schema_for_upstream_tool(&parsed.raw, &upstream, &tool) + .await + } + } + } + async fn searchable_builtin_actions<'a>( &self, service: &'a crate::registry::RegisteredService, @@ -2488,6 +2893,25 @@ impl LabMcpServer { } } +fn compare_code_mode_search_candidates( + a: &CodeModeSearchCandidate, + b: &CodeModeSearchCandidate, +) -> CmpOrdering { + b.score + .partial_cmp(&a.score) + .unwrap_or(CmpOrdering::Equal) + .then_with(|| a.id.cmp(&b.id)) +} + +fn tool_error_envelope(service: &str, action: &str, err: &DispatchToolError) -> Value { + match err { + DispatchToolError::Sdk { sdk_kind, message } => { + build_error(service, action, sdk_kind, message) + } + other => build_error(service, action, other.kind(), &other.to_string()), + } +} + fn merge_tool_search_results( mut left: Vec, right: Vec, @@ -3156,6 +3580,35 @@ mod tests { ); } + #[tokio::test] + async fn code_search_expands_builtin_matches_to_action_candidates() { + let server = super::LabMcpServer { + registry: std::sync::Arc::new(completion_test_registry()), + gateway_manager: None, + node_role: None, + peers: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())), + logging_level: std::sync::Arc::new(std::sync::atomic::AtomicU8::new( + logging_level_rank(rmcp::model::LoggingLevel::Info), + )), + }; + + let results = server + .search_builtin_code_mode_candidates("movie.search", 10, 0.0) + .await; + + assert!( + results + .iter() + .any(|result| result.id == "lab::radarr.movie.search") + ); + assert_eq!( + results.first().map(|result| result.id.as_str()), + Some("lab::radarr.movie.search") + ); + assert!(results.iter().all(|result| result.upstream == "lab")); + assert!(results.iter().all(|result| result.schema_available)); + } + #[tokio::test] async fn snapshot_catalog_hides_builtin_tools_when_tool_search_is_enabled() { let runtime = crate::dispatch::gateway::manager::GatewayRuntimeHandle::default(); diff --git a/docs/dev/ERRORS.md b/docs/dev/ERRORS.md index 6d259382a..47e34ebd1 100644 --- a/docs/dev/ERRORS.md +++ b/docs/dev/ERRORS.md @@ -56,6 +56,7 @@ Dispatch layers may add the following kinds on top of SDK errors: - `unknown_instance` - `conflict` — resource already exists with the given identifier; HTTP 409 - `ambiguous_tool` — unqualified tool name resolved to multiple upstream gateway candidates; envelope carries `valid: Vec` of fully-qualified `{upstream}::{tool}` names the caller must choose from. HTTP 409. +- `invalid_code_mode_id` — Code Mode tool id parsing failed. Valid ids are `lab::.` and `upstream::::`. HTTP 422. - `queue_saturated` — bounded runtime queue is full; caller should retry after the current work drains. HTTP 429. ### Fleet-WS install hardening kinds (lab-zxx5.18) diff --git a/docs/services/GATEWAY.md b/docs/services/GATEWAY.md index 9c0b65191..c494e4d39 100644 --- a/docs/services/GATEWAY.md +++ b/docs/services/GATEWAY.md @@ -87,12 +87,14 @@ Typical patch payloads: Gateway tool-search mode is a single gateway-wide switch. It is not configured per upstream server. -When enabled, Lab hides raw proxied upstream tools from MCP `list_tools()` and exposes two synthetic tools instead: +When enabled, Lab hides raw proxied upstream tools from MCP `list_tools()` and exposes synthetic gateway tools instead: | Tool | Purpose | |------|---------| -| `tool_search` | Search healthy discovered upstream tools across the gateway. | -| `tool_execute` | Invoke one tool returned by `tool_search`. | +| `scout` | Search healthy discovered Lab and upstream tools across the gateway. Legacy alias: `tool_search`. | +| `invoke` | Invoke one tool returned by `scout`. Legacy aliases: `tool_execute`, `tool_invoke`. | +| `code_search` | Return Code Mode candidates with stable ids and schema availability. | +| `code_schema` | Return the exact schema/contract for one `code_search` id. | This keeps the MCP catalog small while still allowing clients to reach every exposed upstream tool. Per-upstream `expose_tools` filters still apply before tools enter the searchable catalog. @@ -136,12 +138,44 @@ Invoke call shape on the MCP surface: { "name": "search_issues", "arguments": { "query": "repo:jmagar/lab tool_search" } } ``` +Code Mode is schema-first discovery, not execution. `code_search` returns stable ids +for Lab actions and upstream tools: + +```json +{ "query": "github issues", "top_k": 10, "detail": "brief" } +``` + +Example candidate ids: + +```json +[ + { "id": "lab::gateway.gateway.schema", "name": "gateway.schema", "upstream": "lab", "schema_available": true }, + { "id": "upstream::github::search_issues", "name": "search_issues", "upstream": "github", "schema_available": true } +] +``` + +`code_schema` then resolves one candidate id to the precise contract: + +```json +{ "id": "lab::gateway.gateway.schema" } +``` + +Lab ids use `lab::.` and return an `ActionSpec`-derived +contract (`schema_format: "lab_action_spec"`). Upstream ids use +`upstream::::` and return the upstream JSON Schema +cached by the gateway (`schema_format: "json_schema"`). `code_schema` requires +the same schema visibility scope as `scout include_schema=true`: `lab` or +`lab:admin`. + Rules: - `top_k_default` is validated in the range `1..=50` - `max_tools` is validated in the range `1..=10000` - `query` must be non-empty and no longer than 500 characters - `include_schema` defaults to `false`; schemas are sanitized before return when requested +- `code_search` is read-only discovery and accepts `lab:read`, `lab`, or `lab:admin` +- `code_schema` exposes full schemas and requires `lab` or `lab:admin` +- invalid Code Mode ids return `invalid_code_mode_id` - old `[[upstream]].tool_search` blocks are accepted only as migration input and are dropped on the next gateway config write - `gateway.update` rejects `patch.tool_search`; use `gateway.tool_search.set` instead diff --git a/docs/superpowers/plans/2026-05-24-code-mode-contract.md b/docs/superpowers/plans/2026-05-24-code-mode-contract.md new file mode 100644 index 000000000..e21e392a7 --- /dev/null +++ b/docs/superpowers/plans/2026-05-24-code-mode-contract.md @@ -0,0 +1,854 @@ +# Code Mode Contract Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add the first Code Mode contract slice for the Lab gateway: canonical Code Mode tool IDs plus `code_search` and `code_schema` discovery tools that are distinct from `scout`/`invoke`. + +**Architecture:** Keep Code Mode as a gateway-owned MCP meta-tool surface, parallel to but separate from `scout`/`invoke`. Add a small `dispatch/gateway/code_mode.rs` module for canonical IDs and schema summaries, then wire `code_search` and `code_schema` in `mcp/server.rs` using existing search and schema sources. + +**Tech Stack:** Rust 2024, `rmcp`, `serde_json`, existing `GatewayManager`, existing `ToolRegistry`, `ActionSpec` / upstream `input_schema`, Cargo unit tests. + +**Implementation status:** Completed in branch `feat/code-mode-contract`. + +- Added canonical Code Mode id parsing and response structs. +- Added `code_search` and `code_schema` MCP meta-tools behind gateway tool-search mode. +- Added action-level built-in Code Mode search, upstream schema resolution, docs, and tests. +- Verified with focused unit tests, `cargo fmt --all -- --check`, and `cargo check --manifest-path crates/lab/Cargo.toml --all-features`. + +--- + +## File Structure + +- Create: `crates/lab/src/dispatch/gateway/code_mode.rs` + - Owns canonical Code Mode IDs, parsing, generated-schema summary envelopes, and small conversion helpers. +- Modify: `crates/lab/src/dispatch/gateway.rs` + - Exposes the new `code_mode` module. +- Modify: `crates/lab/src/mcp/catalog.rs` + - Adds canonical MCP meta-tool names: `code_search` and `code_schema`. +- Modify: `crates/lab/src/mcp/server.rs` + - Lists the two Code Mode tools when gateway tool-search mode is enabled. + - Handles `code_search` by adapting existing `scout` search results into Code Mode candidates. + - Handles `code_schema` by resolving a canonical ID to a Lab built-in action schema or an upstream tool schema. +- Modify: `docs/services/GATEWAY.md` + - Documents the difference between `scout`/`invoke` and Code Mode discovery/schema flow. +- Modify: `docs/dev/ERRORS.md` + - Documents `invalid_code_mode_id` if introduced by the parser. + +## Contract Shape + +The first implementation slice must expose two MCP tools: + +```json +code_search({ + "query": "github issues", + "top_k": 10, + "detail": "brief" +}) +``` + +```json +code_schema({ + "id": "lab::gateway.gateway.schema" +}) +``` + +`code_search` returns JSON text containing an array: + +```json +[ + { + "id": "lab::gateway.gateway.schema", + "name": "gateway", + "upstream": "lab", + "description": "Gateway management and discovery. Actions: gateway.schema, ...", + "score": 42.0, + "schema_available": true + } +] +``` + +`code_schema` returns JSON text: + +```json +{ + "id": "lab::gateway.gateway.schema", + "kind": "lab_action", + "name": "gateway.schema", + "upstream": "lab", + "schema": { + "action": "gateway.schema", + "description": "Return the discovered schema for one upstream gateway server", + "destructive": false, + "returns": "Value", + "params": [] + }, + "schema_format": "lab_action_spec" +} +``` + +For upstream tools: + +```json +{ + "id": "upstream::github::search_issues", + "kind": "upstream_tool", + "name": "search_issues", + "upstream": "github", + "schema": { "type": "object", "properties": {} }, + "schema_format": "json_schema" +} +``` + +## Canonical ID Rules + +- Lab built-in action ID: `lab::.` + - Example: `lab::gateway.gateway.schema` + - Parser split: prefix `lab::`, then service before first `.`, action after first `.`. +- Upstream tool ID: `upstream::::` + - Example: `upstream::github::search_issues` + - Parser split: prefix `upstream::`, then upstream before next `::`, tool after next `::`. +- Invalid or incomplete IDs return `invalid_code_mode_id`. + +## Task 1: Add Canonical ID Parser and Tests + +**Files:** +- Create: `crates/lab/src/dispatch/gateway/code_mode.rs` +- Modify: `crates/lab/src/dispatch/gateway.rs` + +- [ ] **Step 1: Write failing parser tests** + +Add this test module to the new file: + +```rust +#[cfg(test)] +mod tests { + use super::{CodeModeToolId, CodeModeToolRef}; + + #[test] + fn parses_lab_action_id() { + let parsed = CodeModeToolId::parse("lab::gateway.gateway.schema").unwrap(); + assert_eq!( + parsed, + CodeModeToolId { + raw: "lab::gateway.gateway.schema".to_string(), + reference: CodeModeToolRef::LabAction { + service: "gateway".to_string(), + action: "gateway.schema".to_string(), + }, + } + ); + } + + #[test] + fn parses_upstream_tool_id() { + let parsed = CodeModeToolId::parse("upstream::github::search_issues").unwrap(); + assert_eq!( + parsed, + CodeModeToolId { + raw: "upstream::github::search_issues".to_string(), + reference: CodeModeToolRef::UpstreamTool { + upstream: "github".to_string(), + tool: "search_issues".to_string(), + }, + } + ); + } + + #[test] + fn rejects_invalid_ids() { + for id in ["", "gateway.gateway.schema", "lab::gateway", "upstream::github", "upstream::::tool"] { + assert!(CodeModeToolId::parse(id).is_err(), "{id} should be invalid"); + } + } +} +``` + +- [ ] **Step 2: Run the parser tests to verify they fail** + +Run: + +```bash +cargo test -p labby --lib dispatch::gateway::code_mode::tests --all-features +``` + +Expected: FAIL because `code_mode` module and types do not exist. + +- [ ] **Step 3: Implement the parser** + +Create `crates/lab/src/dispatch/gateway/code_mode.rs` with: + +```rust +use crate::dispatch::error::ToolError; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CodeModeToolId { + pub raw: String, + pub reference: CodeModeToolRef, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CodeModeToolRef { + LabAction { service: String, action: String }, + UpstreamTool { upstream: String, tool: String }, +} + +impl CodeModeToolId { + pub fn parse(raw: &str) -> Result { + let raw = raw.trim(); + if raw.is_empty() { + return Err(invalid_code_mode_id("Code Mode tool id must not be empty")); + } + + if let Some(rest) = raw.strip_prefix("lab::") { + let (service, action) = rest + .split_once('.') + .ok_or_else(|| invalid_code_mode_id("lab Code Mode ids must use lab::."))?; + if service.trim().is_empty() || action.trim().is_empty() { + return Err(invalid_code_mode_id("lab Code Mode ids must include service and action")); + } + return Ok(Self { + raw: raw.to_string(), + reference: CodeModeToolRef::LabAction { + service: service.trim().to_string(), + action: action.trim().to_string(), + }, + }); + } + + if let Some(rest) = raw.strip_prefix("upstream::") { + let (upstream, tool) = rest + .split_once("::") + .ok_or_else(|| invalid_code_mode_id("upstream Code Mode ids must use upstream::::"))?; + if upstream.trim().is_empty() || tool.trim().is_empty() { + return Err(invalid_code_mode_id("upstream Code Mode ids must include upstream and tool")); + } + return Ok(Self { + raw: raw.to_string(), + reference: CodeModeToolRef::UpstreamTool { + upstream: upstream.trim().to_string(), + tool: tool.trim().to_string(), + }, + }); + } + + Err(invalid_code_mode_id("Code Mode ids must start with lab:: or upstream::")) + } +} + +pub fn lab_action_id(service: &str, action: &str) -> String { + format!("lab::{service}.{action}") +} + +pub fn upstream_tool_id(upstream: &str, tool: &str) -> String { + format!("upstream::{upstream}::{tool}") +} + +pub fn invalid_code_mode_id(message: impl Into) -> ToolError { + ToolError::Sdk { + sdk_kind: "invalid_code_mode_id".to_string(), + message: message.into(), + } +} +``` + +Expose it in `crates/lab/src/dispatch/gateway.rs`: + +```rust +pub mod code_mode; +``` + +- [ ] **Step 4: Run the parser tests to verify they pass** + +Run: + +```bash +cargo test -p labby --lib dispatch::gateway::code_mode::tests --all-features +``` + +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add crates/lab/src/dispatch/gateway.rs crates/lab/src/dispatch/gateway/code_mode.rs +git commit -m "feat(code-mode): add canonical tool ids" +``` + +## Task 2: Add Code Mode Search Contract + +**Files:** +- Modify: `crates/lab/src/dispatch/gateway/code_mode.rs` +- Modify: `crates/lab/src/mcp/catalog.rs` +- Modify: `crates/lab/src/mcp/server.rs` + +- [ ] **Step 1: Write failing unit tests for Code Mode search results** + +Append to `code_mode.rs` tests: + +```rust +use serde_json::json; + +#[test] +fn builds_search_candidate_for_lab_action() { + let candidate = super::CodeModeSearchCandidate::lab_action( + "gateway", + "gateway.schema", + "Return gateway schema", + 10.0, + ); + assert_eq!(candidate.id, "lab::gateway.gateway.schema"); + assert_eq!(candidate.upstream, "lab"); + assert_eq!(candidate.name, "gateway.schema"); + assert!(candidate.schema_available); +} + +#[test] +fn builds_search_candidate_for_upstream_tool() { + let candidate = super::CodeModeSearchCandidate::upstream_tool( + "github", + "search_issues", + "Search issues", + 8.5, + Some(json!({"type": "object"})), + ); + assert_eq!(candidate.id, "upstream::github::search_issues"); + assert_eq!(candidate.upstream, "github"); + assert_eq!(candidate.name, "search_issues"); + assert!(candidate.schema_available); +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: + +```bash +cargo test -p labby --lib dispatch::gateway::code_mode::tests --all-features +``` + +Expected: FAIL because `CodeModeSearchCandidate` does not exist. + +- [ ] **Step 3: Implement search candidate type** + +Add to `code_mode.rs`: + +```rust +use serde::Serialize; +use serde_json::Value; + +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct CodeModeSearchCandidate { + pub id: String, + pub name: String, + pub upstream: String, + pub description: String, + pub score: f32, + pub schema_available: bool, +} + +impl CodeModeSearchCandidate { + pub fn lab_action(service: &str, action: &str, description: &str, score: f32) -> Self { + Self { + id: lab_action_id(service, action), + name: action.to_string(), + upstream: "lab".to_string(), + description: description.to_string(), + score, + schema_available: true, + } + } + + pub fn upstream_tool( + upstream: &str, + tool: &str, + description: &str, + score: f32, + schema: Option, + ) -> Self { + Self { + id: upstream_tool_id(upstream, tool), + name: tool.to_string(), + upstream: upstream.to_string(), + description: description.to_string(), + score, + schema_available: schema.is_some(), + } + } +} +``` + +- [ ] **Step 4: Add MCP catalog names** + +In `crates/lab/src/mcp/catalog.rs`, add: + +```rust +pub const CODE_SEARCH_TOOL_NAME: &str = "code_search"; +pub const CODE_SCHEMA_TOOL_NAME: &str = "code_schema"; +``` + +Do not remove or rename `TOOL_SEARCH_TOOL_NAME` or `TOOL_EXECUTE_TOOL_NAME`. + +- [ ] **Step 5: Wire `code_search` into `list_tools` and `call_tool`** + +In `server.rs`, import the names: + +```rust +CODE_SCHEMA_TOOL_NAME, CODE_SEARCH_TOOL_NAME, +``` + +Add two schemas near the existing `scout`/`invoke` tool definitions: + +```rust +let code_search_schema = match serde_json::json!({ + "type": "object", + "properties": { + "query": { "type": "string", "maxLength": 500 }, + "top_k": { "type": "integer", "minimum": 1, "maximum": 50 }, + "detail": { + "type": "string", + "enum": ["brief", "detailed", "full"], + "default": "brief" + } + }, + "required": ["query"] +}) { + Value::Object(map) => Arc::new(map), + _ => unreachable!("code_search schema must be an object"), +}; +tools.push(Tool::new( + CODE_SEARCH_TOOL_NAME, + "Search Lab Code Mode candidates. Returns canonical ids for use with code_schema. This is schema-first discovery, not execution.", + code_search_schema, +)); +gateway_tool_count += 1; +``` + +Add a `call_tool` branch before normal service dispatch: + +```rust +if service == CODE_SEARCH_TOOL_NAME { + let started = Instant::now(); + let subject = self.request_subject_log_tag(&context); + let auth = auth_context_from_extensions(&context.extensions); + if !tool_search_scope_allowed(auth) { + let env = build_error_extra( + &service, + "call_tool", + "forbidden", + "code_search requires one of scopes: lab:read, lab, lab:admin", + &serde_json::json!({ "required_scopes": ["lab:read", "lab", "lab:admin"] }), + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + let query = args.get("query").and_then(Value::as_str).unwrap_or_default().to_string(); + let top_k = args.get("top_k").and_then(Value::as_u64).map_or(10, |value| value as usize); + let score_floor_fraction = self + .gateway_manager + .as_ref() + .map(|manager| async move { manager.tool_search_config().await.score_floor_fraction }); + let score_floor_fraction = match score_floor_fraction { + Some(future) => future.await, + None => 0.0, + }; + let builtin = self + .search_builtin_tools(&query, top_k, false, score_floor_fraction) + .await + .into_iter() + .map(|result| crate::dispatch::gateway::code_mode::CodeModeSearchCandidate::lab_action( + &result.name, + "", + &result.description, + result.score, + )) + .collect::>(); + let mut candidates = builtin; + if let Some(manager) = &self.gateway_manager + && let Ok(upstream_results) = manager.search_tools(&query, top_k, true).await + { + candidates.extend(upstream_results.into_iter().map(|result| { + crate::dispatch::gateway::code_mode::CodeModeSearchCandidate::upstream_tool( + &result.upstream, + &result.name, + &result.description, + result.score, + result.input_schema, + ) + })); + } + candidates.truncate(top_k.max(1).min(50)); + tracing::info!( + surface = "mcp", + service = "code_mode", + action = "code_search", + subject, + result_count = candidates.len(), + elapsed_ms = started.elapsed().as_millis(), + "code mode search ok" + ); + return Ok(CallToolResult::success(vec![Content::text( + serde_json::to_string(&candidates).unwrap_or_else(|_| "[]".to_string()), + )])); +} +``` + +After adding this, fix the built-in mapping so it uses real action names. Replace the temporary `lab_action(&result.name, "", ...)` mapping by adding a helper in `server.rs`: + +```rust +async fn search_builtin_code_mode_candidates( + &self, + query: &str, + top_k: usize, + score_floor_fraction: f32, +) -> Vec { + let results = self + .search_builtin_tools(query, top_k, false, score_floor_fraction) + .await; + let mut candidates = Vec::new(); + for result in results { + let Some(service) = self.registry.services().iter().find(|service| service.name == result.name) else { + continue; + }; + for action in self.searchable_builtin_actions(service).await { + candidates.push(crate::dispatch::gateway::code_mode::CodeModeSearchCandidate::lab_action( + service.name, + action.name, + action.description, + result.score, + )); + } + } + candidates.truncate(top_k.max(1).min(50)); + candidates +} +``` + +Use that helper in the `code_search` branch. + +- [ ] **Step 6: Run focused tests** + +Run: + +```bash +cargo test -p labby --lib dispatch::gateway::code_mode::tests --all-features +cargo test -p labby --lib tool_search_indexes_builtin_lab_services --all-features +``` + +Expected: PASS. + +- [ ] **Step 7: Commit** + +```bash +git add crates/lab/src/dispatch/gateway/code_mode.rs crates/lab/src/mcp/catalog.rs crates/lab/src/mcp/server.rs +git commit -m "feat(code-mode): add schema-first search surface" +``` + +## Task 3: Add Code Mode Schema Contract + +**Files:** +- Modify: `crates/lab/src/dispatch/gateway/code_mode.rs` +- Modify: `crates/lab/src/mcp/server.rs` +- Modify: `docs/dev/ERRORS.md` + +- [ ] **Step 1: Write failing schema envelope tests** + +Append to `code_mode.rs` tests: + +```rust +#[test] +fn builds_lab_schema_response() { + let response = super::CodeModeSchemaResponse::lab_action( + "lab::gateway.gateway.schema", + "gateway.schema", + json!({"action": "gateway.schema"}), + ); + assert_eq!(response.kind, "lab_action"); + assert_eq!(response.schema_format, "lab_action_spec"); +} + +#[test] +fn builds_upstream_schema_response() { + let response = super::CodeModeSchemaResponse::upstream_tool( + "upstream::github::search_issues", + "github", + "search_issues", + json!({"type": "object"}), + ); + assert_eq!(response.kind, "upstream_tool"); + assert_eq!(response.schema_format, "json_schema"); +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: + +```bash +cargo test -p labby --lib dispatch::gateway::code_mode::tests --all-features +``` + +Expected: FAIL because `CodeModeSchemaResponse` does not exist. + +- [ ] **Step 3: Implement schema response type** + +Add to `code_mode.rs`: + +```rust +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct CodeModeSchemaResponse { + pub id: String, + pub kind: &'static str, + pub name: String, + pub upstream: String, + pub schema: Value, + pub schema_format: &'static str, +} + +impl CodeModeSchemaResponse { + pub fn lab_action(id: &str, action: &str, schema: Value) -> Self { + Self { + id: id.to_string(), + kind: "lab_action", + name: action.to_string(), + upstream: "lab".to_string(), + schema, + schema_format: "lab_action_spec", + } + } + + pub fn upstream_tool(id: &str, upstream: &str, tool: &str, schema: Value) -> Self { + Self { + id: id.to_string(), + kind: "upstream_tool", + name: tool.to_string(), + upstream: upstream.to_string(), + schema, + schema_format: "json_schema", + } + } +} +``` + +- [ ] **Step 4: Add `code_schema` MCP tool listing** + +In `list_tools`, add: + +```rust +let code_schema_schema = match serde_json::json!({ + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Canonical Code Mode id returned by code_search, e.g. lab::gateway.gateway.schema or upstream::github::search_issues." + } + }, + "required": ["id"] +}) { + Value::Object(map) => Arc::new(map), + _ => unreachable!("code_schema schema must be an object"), +}; +tools.push(Tool::new( + CODE_SCHEMA_TOOL_NAME, + "Fetch the schema envelope for one canonical Code Mode id returned by code_search. Use this before generating code.", + code_schema_schema, +)); +gateway_tool_count += 1; +``` + +- [ ] **Step 5: Add `code_schema` handler** + +In `call_tool`, before normal service dispatch: + +```rust +if service == CODE_SCHEMA_TOOL_NAME { + let id = args.get("id").and_then(Value::as_str).unwrap_or_default(); + let parsed = match crate::dispatch::gateway::code_mode::CodeModeToolId::parse(id) { + Ok(parsed) => parsed, + Err(err) => { + let env = build_error(&service, "call_tool", err.kind(), &err.to_string()); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + }; + match parsed.reference { + crate::dispatch::gateway::code_mode::CodeModeToolRef::LabAction { service: ref target_service, action: ref target_action } => { + let Some(entry) = self.registry.services().iter().find(|entry| entry.name == target_service) else { + let env = build_error(&service, "call_tool", "unknown_tool", &format!("unknown Lab service `{target_service}`")); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + }; + let schema = match crate::dispatch::helpers::action_schema(entry.actions, target_action) { + Ok(schema) => schema, + Err(err) => { + let env = build_error(&service, "call_tool", err.kind(), &err.to_string()); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + }; + let response = crate::dispatch::gateway::code_mode::CodeModeSchemaResponse::lab_action( + &parsed.raw, + target_action, + schema, + ); + return Ok(CallToolResult::success(vec![Content::text( + serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string()), + )])); + } + crate::dispatch::gateway::code_mode::CodeModeToolRef::UpstreamTool { ref upstream, ref tool } => { + let Some(pool) = self.current_upstream_pool().await else { + let env = build_error(&service, "call_tool", "upstream_error", "upstream pool is not available"); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + }; + let matches = pool + .healthy_tools_for_upstream(upstream) + .await + .into_iter() + .filter(|candidate| candidate.tool.name.as_ref() == tool) + .collect::>(); + let Some(candidate) = matches.into_iter().next() else { + let env = build_error(&service, "call_tool", "unknown_tool", &format!("unknown upstream tool `{}`", parsed.raw)); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + }; + let schema = candidate + .input_schema + .unwrap_or_else(|| serde_json::json!({"type": "object", "properties": {}})); + let response = crate::dispatch::gateway::code_mode::CodeModeSchemaResponse::upstream_tool( + &parsed.raw, + upstream, + tool, + schema, + ); + return Ok(CallToolResult::success(vec![Content::text( + serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string()), + )])); + } + } +} +``` + +- [ ] **Step 6: Document error kind** + +In `docs/dev/ERRORS.md`, under dispatcher-level kinds add: + +```markdown +- `invalid_code_mode_id` — Code Mode schema lookup received an id that does not match `lab::.` or `upstream::::`. HTTP 400. +``` + +- [ ] **Step 7: Run focused tests** + +Run: + +```bash +cargo test -p labby --lib dispatch::gateway::code_mode::tests --all-features +cargo test -p labby --lib invoke_ambiguous_tool_error_envelope_guides_retry --all-features +``` + +Expected: PASS. + +- [ ] **Step 8: Commit** + +```bash +git add crates/lab/src/dispatch/gateway/code_mode.rs crates/lab/src/mcp/server.rs docs/dev/ERRORS.md +git commit -m "feat(code-mode): add schema lookup surface" +``` + +## Task 4: Document Gateway Code Mode Discovery + +**Files:** +- Modify: `docs/services/GATEWAY.md` + +- [ ] **Step 1: Add documentation section** + +Add this section near the existing gateway tool-search section: + +```markdown +### Code Mode Discovery + +Code Mode is a schema-first companion to `scout` and `invoke`. `scout` finds tools and `invoke` executes one selected tool call. Code Mode adds a safer programmatic path for agents that need to generate code from schemas: + +1. `code_search` returns canonical ids for candidate Lab actions and upstream tools. +2. `code_schema` returns the schema envelope for one canonical id. +3. Future Code Mode execution must use those ids and re-check the same gateway auth, exposure, and destructive-action policies as normal invocation. + +Canonical ids are stable strings: + +- Lab built-in action: `lab::.`, for example `lab::gateway.gateway.schema`. +- Upstream tool: `upstream::::`, for example `upstream::github::search_issues`. + +Code Mode does not replace `scout` or `invoke`; it is a separate opt-in contract for schema-first generated-code workflows. +``` + +- [ ] **Step 2: Verify docs mention all new tool names** + +Run: + +```bash +rg -n "code_search|code_schema|Code Mode" docs/services/GATEWAY.md +``` + +Expected: Shows all three terms in the new section. + +- [ ] **Step 3: Commit** + +```bash +git add docs/services/GATEWAY.md +git commit -m "docs: explain gateway code mode discovery" +``` + +## Task 5: Final Verification + +**Files:** +- No new files. Verify all changed files. + +- [ ] **Step 1: Run formatting** + +Run: + +```bash +cargo fmt --check +``` + +Expected: PASS. + +- [ ] **Step 2: Run focused tests** + +Run: + +```bash +cargo test -p labby --lib dispatch::gateway::code_mode::tests --all-features +cargo test -p labby --lib tool_search_indexes_builtin_lab_services --all-features +cargo test -p labby --lib invoke_ambiguous_tool_error_envelope_guides_retry --all-features +``` + +Expected: PASS. + +- [ ] **Step 3: Run cargo check** + +Run: + +```bash +cargo check +``` + +Expected: PASS. Existing warnings may appear; do not expand scope unless a new warning is introduced by Code Mode files. + +- [ ] **Step 4: Validate bead remains ready for next wave** + +Run: + +```bash +bd swarm validate lab-le0w0 +``` + +Expected: PASS. `lab-le0w0.1` may remain open until PR merge, but the DAG should still be valid. + +- [ ] **Step 5: Commit final verification note if needed** + +If verification only changes no files, do not commit. If docs or generated artifacts changed, commit: + +```bash +git add +git commit -m "chore(code-mode): refresh verification artifacts" +``` + +## Self-Review + +- Spec coverage: The plan implements only `lab-le0w0.1`, the first ready Code Mode slice. It intentionally does not implement generated TS bindings, sandbox execution, broker policy, or rollout config; those remain in `lab-le0w0.2` through `lab-le0w0.5`. +- Placeholder scan: No task uses TBD/TODO/fill-in instructions. Every code step contains exact code or exact command. +- Type consistency: `CodeModeToolId`, `CodeModeToolRef`, `CodeModeSearchCandidate`, and `CodeModeSchemaResponse` names are consistent across tasks. From 74e83908d15f817437c026093961a52347725180 Mon Sep 17 00:00:00 2001 From: Jacob Magar Date: Sun, 24 May 2026 10:11:58 -0400 Subject: [PATCH 2/3] feat: complete gateway code mode --- crates/lab/src/config.rs | 107 ++++ crates/lab/src/dispatch/gateway/code_mode.rs | 628 ++++++++++++++++++- crates/lab/src/dispatch/gateway/manager.rs | 4 + crates/lab/src/mcp/catalog.rs | 11 + crates/lab/src/mcp/server.rs | 368 ++++++++++- docs/dev/ERRORS.md | 3 + docs/services/GATEWAY.md | 28 +- 7 files changed, 1122 insertions(+), 27 deletions(-) diff --git a/crates/lab/src/config.rs b/crates/lab/src/config.rs index 8be9aded1..e36c6ecb8 100644 --- a/crates/lab/src/config.rs +++ b/crates/lab/src/config.rs @@ -119,6 +119,9 @@ pub struct LabConfig { /// Gateway-wide tool-search mode for all exposed upstream tools. #[serde(default)] pub tool_search: ToolSearchConfig, + /// Gateway-wide Code Mode execution settings. + #[serde(default)] + pub code_mode: CodeModeConfig, /// Upstream MCP servers to proxy through the gateway. #[serde(default)] pub upstream: Vec, @@ -362,6 +365,7 @@ impl LabConfig { pub fn validate(&self) -> Result<(), ConfigError> { self.tool_search.validate()?; + self.code_mode.validate()?; for upstream in &self.upstream { upstream.validate()?; } @@ -484,6 +488,54 @@ impl Default for ToolSearchConfig { } } +fn default_code_mode_timeout_ms() -> u64 { + 5_000 +} + +fn default_code_mode_max_tool_calls() -> usize { + 8 +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct CodeModeConfig { + /// Enable the constrained Code Mode executor. Discovery and schema lookup + /// can be enabled through `[tool_search]` without enabling execution. + #[serde(default)] + pub enabled: bool, + /// Maximum wall-clock time for one Code Mode execution. + #[serde(default = "default_code_mode_timeout_ms")] + pub timeout_ms: u64, + /// Maximum host-brokered tool calls allowed in one Code Mode execution. + #[serde(default = "default_code_mode_max_tool_calls")] + pub max_tool_calls: usize, +} + +impl Default for CodeModeConfig { + fn default() -> Self { + Self { + enabled: false, + timeout_ms: default_code_mode_timeout_ms(), + max_tool_calls: default_code_mode_max_tool_calls(), + } + } +} + +impl CodeModeConfig { + pub fn validate(&self) -> Result<(), ConfigError> { + if !(1..=60_000).contains(&self.timeout_ms) { + return Err(ConfigError::InvalidCodeModeTimeout { + value: self.timeout_ms, + }); + } + if !(1..=50).contains(&self.max_tool_calls) { + return Err(ConfigError::InvalidCodeModeMaxToolCalls { + value: self.max_tool_calls, + }); + } + Ok(()) + } +} + impl ToolSearchConfig { /// Resolve Qdrant URL: config field → `QDRANT_URL` env var → None. pub fn resolved_qdrant_url(&self) -> Option { @@ -879,6 +931,10 @@ pub enum ConfigError { InvalidToolSearchMaxTools { value: usize }, #[error("gateway tool_search.score_floor_fraction={value} is invalid — expected 0.0..=1.0")] InvalidToolSearchScoreFloor { value: f32 }, + #[error("gateway code_mode.timeout_ms={value} is invalid — expected 1..=60000")] + InvalidCodeModeTimeout { value: u64 }, + #[error("gateway code_mode.max_tool_calls={value} is invalid — expected 1..=50")] + InvalidCodeModeMaxToolCalls { value: usize }, #[error("protected MCP route '{name}' has invalid {field}: {value}")] InvalidProtectedRoute { name: String, @@ -2681,6 +2737,57 @@ url = "https://acme.example.com/mcp" cfg.validate().expect("root tool_search validates"); } + #[test] + fn code_mode_is_root_level_config_and_disabled_by_default() { + let default_cfg = LabConfig::default(); + assert!(!default_cfg.code_mode.enabled); + assert_eq!(default_cfg.code_mode.timeout_ms, 5000); + assert_eq!(default_cfg.code_mode.max_tool_calls, 8); + + let cfg = toml::from_str::( + r#" +[code_mode] +enabled = true +timeout_ms = 2500 +max_tool_calls = 3 +"#, + ) + .expect("root code_mode parses"); + + assert!(cfg.code_mode.enabled); + assert_eq!(cfg.code_mode.timeout_ms, 2500); + assert_eq!(cfg.code_mode.max_tool_calls, 3); + } + + #[test] + fn code_mode_validation_rejects_unbounded_execution_settings() { + let cfg = toml::from_str::( + r#" +[code_mode] +timeout_ms = 0 +max_tool_calls = 8 +"#, + ) + .expect("code_mode parses"); + assert!(matches!( + cfg.validate(), + Err(ConfigError::InvalidCodeModeTimeout { value: 0 }) + )); + + let cfg = toml::from_str::( + r#" +[code_mode] +timeout_ms = 5000 +max_tool_calls = 0 +"#, + ) + .expect("code_mode parses"); + assert!(matches!( + cfg.validate(), + Err(ConfigError::InvalidCodeModeMaxToolCalls { value: 0 }) + )); + } + #[test] fn protected_route_legacy_backend_path_folds_into_backend_url() { let mut cfg = toml::from_str::( diff --git a/crates/lab/src/dispatch/gateway/code_mode.rs b/crates/lab/src/dispatch/gateway/code_mode.rs index ba1a1a443..0ac324b36 100644 --- a/crates/lab/src/dispatch/gateway/code_mode.rs +++ b/crates/lab/src/dispatch/gateway/code_mode.rs @@ -1,5 +1,6 @@ +use lab_apis::core::action::{ActionSpec, ParamSpec}; use serde::Serialize; -use serde_json::Value; +use serde_json::{Map, Value, json}; use crate::dispatch::error::ToolError; @@ -74,6 +75,11 @@ pub fn upstream_tool_id(upstream: &str, tool: &str) -> String { format!("upstream::{upstream}::{tool}") } +#[must_use] +pub fn sanitize_code_mode_schema(schema: Option) -> Option { + super::projection::sanitize_schema(schema) +} + #[derive(Debug, Clone, PartialEq, Serialize)] pub struct CodeModeSearchCandidate { pub id: String, @@ -124,11 +130,46 @@ pub struct CodeModeSchemaResponse { pub upstream: String, pub schema: Value, pub schema_format: &'static str, + pub input_schema: Value, + pub bindings: CodeModeBindings, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct CodeModeBindings { + pub typescript: String, +} + +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct CodeModeInvocation { + pub id: String, + pub params: Value, +} + +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct CodeModeExecutionResponse { + pub calls: Vec, +} + +#[derive(Debug, Clone, PartialEq, Serialize)] +pub struct CodeModeExecutedCall { + pub id: String, + pub result: Value, } impl CodeModeSchemaResponse { + #[cfg(test)] #[must_use] pub fn lab_action(id: &str, action: &str, schema: Value) -> Self { + Self::lab_action_with_input_schema(id, action, schema.clone(), schema) + } + + #[must_use] + pub fn lab_action_with_input_schema( + id: &str, + action: &str, + schema: Value, + input_schema: Value, + ) -> Self { Self { id: id.to_string(), kind: "lab_action", @@ -136,6 +177,10 @@ impl CodeModeSchemaResponse { upstream: "lab".to_string(), schema, schema_format: "lab_action_spec", + bindings: CodeModeBindings { + typescript: typescript_binding(id, "ToolArgs", &input_schema), + }, + input_schema, } } @@ -146,6 +191,10 @@ impl CodeModeSchemaResponse { kind: "upstream_tool", name: tool.to_string(), upstream: upstream.to_string(), + bindings: CodeModeBindings { + typescript: typescript_binding(id, "ToolArgs", &schema), + }, + input_schema: schema.clone(), schema, schema_format: "json_schema", } @@ -159,11 +208,458 @@ pub fn invalid_code_mode_id(message: impl Into) -> ToolError { } } +pub fn extract_code_mode_invocations( + code: &str, + max_tool_calls: usize, +) -> Result, ToolError> { + reject_unsupported_code_mode_constructs(code)?; + + let mut rest = code; + let mut calls = Vec::new(); + + while let Some(offset) = next_call_tool_offset(rest) { + rest = &rest[offset + "callTool".len()..]; + let trimmed = rest.trim_start(); + if !trimmed.starts_with('(') { + continue; + } + let (inside, after) = balanced_parenthesized(trimmed)?; + rest = after; + let (id, params) = parse_call_tool_arguments(inside)?; + calls.push(CodeModeInvocation { id, params }); + if calls.len() > max_tool_calls { + return Err(ToolError::Sdk { + sdk_kind: "tool_call_limit_exceeded".to_string(), + message: format!("Code Mode execution exceeded max_tool_calls={max_tool_calls}"), + }); + } + } + + if calls.is_empty() { + return Err(ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: "Code Mode snippet must call callTool(id, params) at least once".to_string(), + }); + } + Ok(calls) +} + +fn reject_unsupported_code_mode_constructs(input: &str) -> Result<(), ToolError> { + if let Some(keyword) = first_unsupported_keyword(input) { + return Err(ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: format!( + "Code Mode MVP only supports a static sequence of callTool(id, params) calls; unsupported construct `{keyword}`" + ), + }); + } + Ok(()) +} + +fn first_unsupported_keyword(input: &str) -> Option<&'static str> { + const UNSUPPORTED: &[&str] = &["if", "for", "while", "switch", "function", "=>"]; + let mut quote = None; + let mut escaped = false; + let mut line_comment = false; + let mut block_comment = false; + let mut iter = input.char_indices().peekable(); + + while let Some((index, ch)) = iter.next() { + if line_comment { + if ch == '\n' { + line_comment = false; + } + continue; + } + if block_comment { + if ch == '*' + && let Some((_, '/')) = iter.peek().copied() + { + iter.next(); + block_comment = false; + } + continue; + } + if let Some(active_quote) = quote { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == active_quote { + quote = None; + } + continue; + } + match ch { + '"' | '\'' | '`' => quote = Some(ch), + '/' => match iter.peek().copied() { + Some((_, '/')) => { + iter.next(); + line_comment = true; + } + Some((_, '*')) => { + iter.next(); + block_comment = true; + } + _ => {} + }, + _ => { + for keyword in UNSUPPORTED { + if input[index..].starts_with(keyword) { + let before = input[..index].chars().next_back(); + let after = input[index + keyword.len()..].chars().next(); + if keyword.chars().all(is_js_identifier_char) { + if before.is_none_or(|ch| !is_js_identifier_char(ch)) + && after.is_none_or(|ch| !is_js_identifier_char(ch)) + { + return Some(keyword); + } + } else { + return Some(keyword); + } + } + } + } + } + } + None +} + +fn next_call_tool_offset(input: &str) -> Option { + let mut quote = None; + let mut escaped = false; + let mut line_comment = false; + let mut block_comment = false; + let mut iter = input.char_indices().peekable(); + + while let Some((index, ch)) = iter.next() { + if line_comment { + if ch == '\n' { + line_comment = false; + } + continue; + } + if block_comment { + if ch == '*' + && let Some((_, '/')) = iter.peek().copied() + { + iter.next(); + block_comment = false; + } + continue; + } + if let Some(active_quote) = quote { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == active_quote { + quote = None; + } + continue; + } + match ch { + '"' | '\'' | '`' => quote = Some(ch), + '/' => match iter.peek().copied() { + Some((_, '/')) => { + iter.next(); + line_comment = true; + } + Some((_, '*')) => { + iter.next(); + block_comment = true; + } + _ => {} + }, + 'c' if input[index..].starts_with("callTool") => { + let before = input[..index].chars().next_back(); + let after = input[index + "callTool".len()..].chars().next(); + if before.is_none_or(|ch| !is_js_identifier_char(ch)) + && after.is_none_or(|ch| !is_js_identifier_char(ch)) + { + return Some(index); + } + } + _ => {} + } + } + None +} + +fn is_js_identifier_char(ch: char) -> bool { + ch == '_' || ch == '$' || ch.is_ascii_alphanumeric() +} + +fn balanced_parenthesized(input: &str) -> Result<(&str, &str), ToolError> { + let mut depth = 0usize; + let mut quote = None; + let mut escaped = false; + for (index, ch) in input.char_indices() { + if let Some(active_quote) = quote { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == active_quote { + quote = None; + } + continue; + } + match ch { + '"' | '\'' => quote = Some(ch), + '(' => depth += 1, + ')' => { + depth = depth.saturating_sub(1); + if depth == 0 { + return Ok((&input[1..index], &input[index + 1..])); + } + } + _ => {} + } + } + Err(ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: "Code Mode snippet contains an unterminated callTool(...) expression".to_string(), + }) +} + +fn parse_call_tool_arguments(input: &str) -> Result<(String, Value), ToolError> { + let input = input.trim(); + let (id, rest) = parse_string_literal(input)?; + let rest = rest.trim_start(); + if rest.is_empty() { + return Ok((id, json!({}))); + } + let rest = rest.strip_prefix(',').ok_or_else(|| ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: "callTool arguments must be callTool(id, params)".to_string(), + })?; + let rest = rest.trim(); + if rest.is_empty() { + return Ok((id, json!({}))); + } + let params: Value = serde_json::from_str(rest).map_err(|err| ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: format!("callTool params must be strict JSON: {err}"), + })?; + if !params.is_object() { + return Err(ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: "callTool params must be a JSON object".to_string(), + }); + } + Ok((id, params)) +} + +fn parse_string_literal(input: &str) -> Result<(String, &str), ToolError> { + let Some(quote @ ('"' | '\'')) = input.chars().next() else { + return Err(ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: "callTool id must be a string literal".to_string(), + }); + }; + let mut escaped = false; + for (index, ch) in input[1..].char_indices() { + let absolute = index + 1; + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote { + let raw = &input[..=absolute]; + let rest = &input[absolute + 1..]; + let id = if quote == '"' { + serde_json::from_str(raw).map_err(|err| ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: format!("callTool id string is invalid: {err}"), + })? + } else { + raw[1..raw.len() - 1].replace("\\'", "'") + }; + return Ok((id, rest)); + } + } + Err(ToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: "callTool id string is unterminated".to_string(), + }) +} + +#[must_use] +pub fn action_input_schema(action: &ActionSpec) -> Value { + let mut properties = Map::new(); + let mut required = Vec::new(); + + for param in action.params { + let mut schema = param_json_schema(param); + if let Value::Object(map) = &mut schema + && !param.description.is_empty() + { + map.insert( + "description".to_string(), + Value::String(param.description.to_string()), + ); + } + properties.insert(param.name.to_string(), schema); + if param.required { + required.push(Value::String(param.name.to_string())); + } + } + + let mut schema = Map::from_iter([ + ("type".to_string(), Value::String("object".to_string())), + ("properties".to_string(), Value::Object(properties)), + ("additionalProperties".to_string(), Value::Bool(false)), + ]); + if !required.is_empty() { + schema.insert("required".to_string(), Value::Array(required)); + } + Value::Object(schema) +} + +fn param_json_schema(param: &ParamSpec) -> Value { + let ty = param.ty.trim(); + if let Some(item) = ty.strip_suffix("[]") { + return json!({ + "type": "array", + "items": type_label_json_schema(item) + }); + } + if ty.contains('|') + && ty.split('|').all(|part| { + !matches!( + part.trim(), + "string" | "number" | "integer" | "boolean" | "object" | "array" | "null" + ) + }) + { + return json!({ + "type": "string", + "enum": ty.split('|').map(str::trim).collect::>() + }); + } + if ty.contains('|') { + return json!({ + "anyOf": ty.split('|').map(|part| type_label_json_schema(part.trim())).collect::>() + }); + } + type_label_json_schema(ty) +} + +fn type_label_json_schema(ty: &str) -> Value { + match ty { + "string" => json!({ "type": "string" }), + "integer" | "int" | "i64" | "u64" | "usize" => json!({ "type": "integer" }), + "number" | "float" | "f64" => json!({ "type": "number" }), + "boolean" | "bool" => json!({ "type": "boolean" }), + "object" | "json" | "value" => json!({ "type": "object" }), + "array" | "list" => json!({ "type": "array" }), + "null" => json!({ "type": "null" }), + _ => json!({ "description": format!("Lab type hint: {ty}") }), + } +} + +#[must_use] +pub fn typescript_binding(id: &str, type_name: &str, schema: &Value) -> String { + let args_type = typescript_type(schema, 0); + format!( + "export type {type_name} = {args_type};\n\n\ + export interface CodeModeToolCaller {{\n callTool(id: string, args: unknown): Promise;\n}}\n\n\ + export async function call(caller: CodeModeToolCaller, args: {type_name}): Promise {{\n return caller.callTool({id_literal}, args);\n}}\n", + id_literal = json!(id) + ) +} + +fn typescript_type(schema: &Value, indent: usize) -> String { + if let Some(values) = schema.get("enum").and_then(Value::as_array) { + let literals = values + .iter() + .filter_map(Value::as_str) + .map(|value| json!(value).to_string()) + .collect::>(); + if !literals.is_empty() { + return literals.join(" | "); + } + } + if let Some(any_of) = schema.get("anyOf").and_then(Value::as_array) { + return any_of + .iter() + .map(|schema| typescript_type(schema, indent)) + .collect::>() + .join(" | "); + } + match schema.get("type").and_then(Value::as_str) { + Some("string") => "string".to_string(), + Some("integer" | "number") => "number".to_string(), + Some("boolean") => "boolean".to_string(), + Some("null") => "null".to_string(), + Some("array") => { + let item = schema + .get("items") + .map(|items| typescript_type(items, indent)) + .unwrap_or_else(|| "unknown".to_string()); + format!("{item}[]") + } + Some("object") => object_typescript_type(schema, indent), + _ => "unknown".to_string(), + } +} + +fn object_typescript_type(schema: &Value, indent: usize) -> String { + let Some(properties) = schema.get("properties").and_then(Value::as_object) else { + return "Record".to_string(); + }; + if properties.is_empty() { + return "Record".to_string(); + } + let required = schema + .get("required") + .and_then(Value::as_array) + .into_iter() + .flatten() + .filter_map(Value::as_str) + .collect::>(); + let pad = " ".repeat(indent); + let child_pad = " ".repeat(indent + 2); + let mut lines = vec!["{".to_string()]; + for (name, property_schema) in properties { + let optional = if required.contains(name.as_str()) { + "" + } else { + "?" + }; + lines.push(format!( + "{child_pad}{}{optional}: {};", + typescript_property_name(name), + typescript_type(property_schema, indent + 2) + )); + } + lines.push(format!("{pad}}}")); + lines.join("\n") +} + +fn typescript_property_name(name: &str) -> String { + let mut chars = name.chars(); + let valid_first = chars + .next() + .is_some_and(|ch| ch == '_' || ch == '$' || ch.is_ascii_alphabetic()); + let valid_rest = chars.all(|ch| ch == '_' || ch == '$' || ch.is_ascii_alphanumeric()); + if valid_first && valid_rest { + name.to_string() + } else { + json!(name).to_string() + } +} + #[cfg(test)] mod tests { use serde_json::json; - use super::{CodeModeSchemaResponse, CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef}; + use super::{ + CodeModeSchemaResponse, CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef, + action_input_schema, extract_code_mode_invocations, sanitize_code_mode_schema, + }; + use lab_apis::core::action::{ActionSpec, ParamSpec}; #[test] fn parses_lab_action_id() { @@ -259,4 +755,132 @@ mod tests { assert_eq!(response.kind, "upstream_tool"); assert_eq!(response.schema_format, "json_schema"); } + + #[test] + fn sanitizes_upstream_schema_for_code_mode() { + let schema = json!({ + "type": "object", + "description": "Use override with token sk-secret", + "properties": { + "query": { + "type": "string", + "description": "repo search" + } + } + }); + + let sanitized = sanitize_code_mode_schema(Some(schema)).unwrap(); + let description = sanitized + .pointer("/description") + .and_then(serde_json::Value::as_str) + .unwrap(); + assert!(!description.contains("")); + assert!(!description.contains("sk-secret")); + assert!(description.contains("")); + } + + #[test] + fn builds_action_input_schema_and_typescript_binding() { + const PARAMS: &[ParamSpec] = &[ + ParamSpec { + name: "query", + ty: "string", + required: true, + description: "Search query", + }, + ParamSpec { + name: "limit", + ty: "integer", + required: false, + description: "Maximum result count", + }, + ]; + let action = ActionSpec { + name: "issue.search", + description: "Search issues", + destructive: false, + params: PARAMS, + returns: "Issue[]", + }; + + let schema = action_input_schema(&action); + assert_eq!( + schema.pointer("/properties/query/type"), + Some(&json!("string")) + ); + assert_eq!( + schema.pointer("/properties/limit/type"), + Some(&json!("integer")) + ); + assert_eq!(schema.pointer("/required/0"), Some(&json!("query"))); + + let response = CodeModeSchemaResponse::lab_action_with_input_schema( + "lab::github.issue.search", + "issue.search", + json!({"action": "issue.search"}), + schema, + ); + assert!(response.bindings.typescript.contains("query: string;")); + assert!(response.bindings.typescript.contains("limit?: number;")); + assert!( + response + .bindings + .typescript + .contains("caller.callTool(\"lab::github.issue.search\", args)") + ); + } + + #[test] + fn extracts_constrained_call_tool_invocations() { + let calls = extract_code_mode_invocations( + r#" + await callTool("lab::radarr.movie.search", {"query":"Alien"}); + await callTool('upstream::github::search_issues', {"query":"repo:jmagar/lab"}); + "#, + 4, + ) + .unwrap(); + + assert_eq!(calls.len(), 2); + assert_eq!(calls[0].id, "lab::radarr.movie.search"); + assert_eq!(calls[0].params.pointer("/query"), Some(&json!("Alien"))); + assert_eq!(calls[1].id, "upstream::github::search_issues"); + } + + #[test] + fn rejects_non_json_call_tool_params() { + let err = extract_code_mode_invocations( + r#"await callTool("lab::radarr.movie.search", {query:"Alien"})"#, + 4, + ) + .unwrap_err(); + assert_eq!(err.kind(), "invalid_param"); + } + + #[test] + fn ignores_call_tool_text_inside_comments_and_strings() { + let calls = extract_code_mode_invocations( + r#" + // callTool("lab::radarr.movie.search", {"query":"comment"}) + const text = "callTool(\"lab::radarr.movie.search\", {\"query\":\"string\"})"; + await callTool("lab::radarr.movie.search", {"query":"real"}); + "#, + 4, + ) + .unwrap(); + + assert_eq!(calls.len(), 1); + assert_eq!(calls[0].params.pointer("/query"), Some(&json!("real"))); + } + + #[test] + fn rejects_control_flow_because_mvp_is_static_batch_only() { + let err = extract_code_mode_invocations( + r#"if (false) { await callTool("lab::radarr.movie.search", {"query":"hidden"}); }"#, + 4, + ) + .unwrap_err(); + + assert_eq!(err.kind(), "invalid_param"); + } } diff --git a/crates/lab/src/dispatch/gateway/manager.rs b/crates/lab/src/dispatch/gateway/manager.rs index a1979eb58..936e0796e 100644 --- a/crates/lab/src/dispatch/gateway/manager.rs +++ b/crates/lab/src/dispatch/gateway/manager.rs @@ -2048,6 +2048,10 @@ impl GatewayManager { self.config.read().await.tool_search.enabled } + pub async fn code_mode_config(&self) -> crate::config::CodeModeConfig { + self.config.read().await.code_mode.clone() + } + pub async fn tool_search_warming(&self) -> bool { self.tool_indexes .iter() diff --git a/crates/lab/src/mcp/catalog.rs b/crates/lab/src/mcp/catalog.rs index e0c7fc11f..ab0277d04 100644 --- a/crates/lab/src/mcp/catalog.rs +++ b/crates/lab/src/mcp/catalog.rs @@ -11,6 +11,7 @@ pub(crate) const TOOL_SEARCH_TOOL_NAME: &str = "scout"; pub(crate) const TOOL_EXECUTE_TOOL_NAME: &str = "invoke"; pub(crate) const CODE_SEARCH_TOOL_NAME: &str = "code_search"; pub(crate) const CODE_SCHEMA_TOOL_NAME: &str = "code_schema"; +pub(crate) const CODE_EXECUTE_TOOL_NAME: &str = "code_execute"; pub(crate) const LEGACY_TOOL_INVOKE_TOOL_NAME: &str = "tool_invoke"; pub(crate) const LEGACY_TOOL_SEARCH_TOOL_NAME: &str = "tool_search"; pub(crate) const LEGACY_TOOL_EXECUTE_TOOL_NAME: &str = "tool_execute"; @@ -62,6 +63,13 @@ impl LabMcpServer { } } + pub(crate) async fn gateway_code_mode_enabled(&self) -> bool { + match &self.gateway_manager { + Some(manager) => manager.tool_search_enabled().await, + None => false, + } + } + pub(crate) async fn service_visible_on_mcp(&self, service: &str) -> bool { if matches!(self.node_role, Some(crate::config::NodeRole::NonMaster)) { return false; @@ -192,6 +200,9 @@ impl LabMcpServer { let mut tools = BTreeSet::new(); if visibility.exposes_synthetic_tools() { tools.insert(TOOL_SEARCH_TOOL_NAME.to_string()); + tools.insert(CODE_SEARCH_TOOL_NAME.to_string()); + tools.insert(CODE_SCHEMA_TOOL_NAME.to_string()); + tools.insert(CODE_EXECUTE_TOOL_NAME.to_string()); tools.insert(TOOL_EXECUTE_TOOL_NAME.to_string()); } else { for svc in self.registry.services() { diff --git a/crates/lab/src/mcp/server.rs b/crates/lab/src/mcp/server.rs index 0f070e40f..7a141867e 100644 --- a/crates/lab/src/mcp/server.rs +++ b/crates/lab/src/mcp/server.rs @@ -7,7 +7,7 @@ use sha2::{Digest, Sha256}; use std::cmp::Ordering as CmpOrdering; use std::sync::Arc; use std::sync::atomic::{AtomicU8, Ordering}; -use std::time::Instant; +use std::time::{Duration, Instant}; use axum::http::{self, request::Parts}; use rmcp::model::{ @@ -25,13 +25,15 @@ use tokio::sync::RwLock; use crate::config::NodeRole; use crate::dispatch::error::ToolError as DispatchToolError; use crate::dispatch::gateway::code_mode::{ - CodeModeSchemaResponse, CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef, + CodeModeExecutedCall, CodeModeExecutionResponse, CodeModeSchemaResponse, + CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef, action_input_schema, + extract_code_mode_invocations, sanitize_code_mode_schema, }; use crate::dispatch::gateway::manager::{GatewayManager, GatewayToolSearchResult}; use crate::mcp::catalog::{ - CODE_SCHEMA_TOOL_NAME, CODE_SEARCH_TOOL_NAME, LEGACY_TOOL_EXECUTE_TOOL_NAME, - LEGACY_TOOL_INVOKE_TOOL_NAME, LEGACY_TOOL_SEARCH_TOOL_NAME, TOOL_EXECUTE_TOOL_NAME, - TOOL_SEARCH_TOOL_NAME, + CODE_EXECUTE_TOOL_NAME, CODE_SCHEMA_TOOL_NAME, CODE_SEARCH_TOOL_NAME, + LEGACY_TOOL_EXECUTE_TOOL_NAME, LEGACY_TOOL_INVOKE_TOOL_NAME, LEGACY_TOOL_SEARCH_TOOL_NAME, + TOOL_EXECUTE_TOOL_NAME, TOOL_SEARCH_TOOL_NAME, }; use crate::mcp::elicitation::{ElicitResult, elicit_confirm}; use crate::mcp::envelope::{build_error, build_error_extra, build_success}; @@ -1101,12 +1103,7 @@ impl ServerHandler for LabMcpServer { "type": "object", "properties": { "query": { "type": "string", "maxLength": 500 }, - "top_k": { "type": "integer", "minimum": 1, "maximum": 50 }, - "detail": { - "type": "string", - "enum": ["brief", "detailed", "full"], - "default": "brief" - } + "top_k": { "type": "integer", "minimum": 1, "maximum": 50 } }, "required": ["query"] }) { @@ -1143,6 +1140,33 @@ impl ServerHandler for LabMcpServer { code_schema_schema, )); gateway_tool_count += 1; + let code_execute_schema = match serde_json::json!({ + "type": "object", + "properties": { + "code": { + "type": "string", + "maxLength": 20000, + "description": "Constrained JavaScript/TypeScript snippet containing callTool(id, params) calls with strict JSON params" + }, + "max_tool_calls": { + "type": "integer", + "minimum": 1, + "maximum": 50 + } + }, + "required": ["code"] + }) { + Value::Object(map) => Arc::new(map), + _ => unreachable!("code_execute schema must be an object"), + }; + tools.push(Tool::new( + CODE_EXECUTE_TOOL_NAME, + "Execute a constrained Code Mode snippet through the Lab gateway broker. \ + Disabled by default; enable [code_mode].enabled to allow execution. \ + Snippets may call callTool(id, params) with ids returned by code_search.", + code_execute_schema, + )); + gateway_tool_count += 1; let tool_execute_schema = match serde_json::json!({ "type": "object", "properties": { @@ -1265,7 +1289,7 @@ impl ServerHandler for LabMcpServer { let param_key_count = params.as_object().map_or(0, serde_json::Map::len); let svc = self.registry.services().iter().find(|s| s.name == service); - if service == CODE_SEARCH_TOOL_NAME { + if service == CODE_SEARCH_TOOL_NAME && self.gateway_code_mode_enabled().await { let started = Instant::now(); let subject = self.request_subject_log_tag(&context); let auth = auth_context_from_extensions(&context.extensions); @@ -1411,7 +1435,7 @@ impl ServerHandler for LabMcpServer { } }; } - if service == CODE_SCHEMA_TOOL_NAME { + if service == CODE_SCHEMA_TOOL_NAME && self.gateway_code_mode_enabled().await { let started = Instant::now(); let subject = self.request_subject_log_tag(&context); let auth = auth_context_from_extensions(&context.extensions); @@ -1487,6 +1511,130 @@ impl ServerHandler for LabMcpServer { } }; } + if service == CODE_EXECUTE_TOOL_NAME && self.gateway_code_mode_enabled().await { + let started = Instant::now(); + let subject = self.request_subject_log_tag(&context); + let auth = auth_context_from_extensions(&context.extensions); + if !tool_execute_scope_allowed(auth) { + tracing::warn!( + surface = "mcp", + service = %service, + action = "call_tool", + subject, + elapsed_ms = started.elapsed().as_millis(), + kind = "forbidden", + "gateway code execute denied by scope" + ); + let env = build_error_extra( + &service, + "call_tool", + "forbidden", + "code_execute requires one of scopes: lab, lab:admin", + &serde_json::json!({ "required_scopes": ["lab", "lab:admin"] }), + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + let Some(manager) = &self.gateway_manager else { + let envelope = build_error( + &service, + "call_tool", + "unknown_tool", + "code execute is not enabled", + ); + return Ok(CallToolResult::error(vec![Content::text( + envelope.to_string(), + )])); + }; + let config = manager.code_mode_config().await; + if !config.enabled { + let env = build_error( + &service, + "call_tool", + "code_mode_disabled", + "Code Mode execution is disabled; set [code_mode].enabled = true to enable it", + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + let code = args.get("code").and_then(Value::as_str).unwrap_or_default(); + let requested_max_tool_calls = args + .get("max_tool_calls") + .and_then(Value::as_u64) + .map(|value| value as usize) + .unwrap_or(config.max_tool_calls) + .max(1) + .min(config.max_tool_calls); + let code_hash = hash_arguments(&Value::String(code.to_string())); + let invocations = match extract_code_mode_invocations(code, requested_max_tool_calls) { + Ok(invocations) => invocations, + Err(err) => { + let env = tool_error_envelope(&service, "call_tool", &err); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + }; + tracing::info!( + surface = "mcp", + service = "code_execute", + action = "call_tool", + subject, + code_hash = %code_hash, + call_count = invocations.len(), + "gateway code execute start" + ); + let subject_raw = self.request_subject(&context); + let execution = async { + let mut calls = Vec::with_capacity(invocations.len()); + for invocation in invocations { + let result = self + .code_mode_call_tool_id( + &invocation.id, + invocation.params, + auth, + subject_raw, + ) + .await?; + calls.push(CodeModeExecutedCall { + id: invocation.id, + result, + }); + } + Ok::<_, DispatchToolError>(CodeModeExecutionResponse { calls }) + }; + let response = + match tokio::time::timeout(Duration::from_millis(config.timeout_ms), execution) + .await + { + Ok(Ok(response)) => response, + Ok(Err(err)) => { + let env = tool_error_envelope(&service, "call_tool", &err); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + Err(_) => { + let env = build_error( + &service, + "call_tool", + "timeout", + &format!( + "Code Mode execution timed out after {}ms", + config.timeout_ms + ), + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + }; + tracing::info!( + surface = "mcp", + service = "code_execute", + action = "call_tool", + subject, + code_hash = %code_hash, + call_count = response.calls.len(), + elapsed_ms = started.elapsed().as_millis(), + "gateway code execute ok" + ); + return Ok(CallToolResult::success(vec![Content::text( + serde_json::to_string(&response).unwrap_or_else(|_| "{}".to_string()), + )])); + } if service == TOOL_SEARCH_TOOL_NAME || service == LEGACY_TOOL_SEARCH_TOOL_NAME { let started = Instant::now(); let subject = self.request_subject_log_tag(&context); @@ -2826,8 +2974,23 @@ impl LabMcpServer { ), }); } - crate::dispatch::helpers::action_schema(entry.actions, action_name) - .map(|schema| CodeModeSchemaResponse::lab_action(id, action_name, schema)) + let action = entry + .actions + .iter() + .find(|action| action.name == action_name) + .ok_or_else(|| DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!("Lab action `{service_name}.{action_name}` was not found"), + })?; + let input_schema = action_input_schema(action); + crate::dispatch::helpers::action_schema(entry.actions, action_name).map(|schema| { + CodeModeSchemaResponse::lab_action_with_input_schema( + id, + action_name, + schema, + input_schema, + ) + }) } async fn code_mode_schema_for_upstream_tool( @@ -2852,12 +3015,14 @@ impl LabMcpServer { message: format!("upstream tool `{upstream}::{tool}` was not found"), }); }; - let schema = candidate.input_schema.unwrap_or_else(|| { - serde_json::json!({ - "type": "object", - "properties": {} - }) - }); + let Some(schema) = sanitize_code_mode_schema(candidate.input_schema) else { + return Err(DispatchToolError::Sdk { + sdk_kind: "schema_unavailable".to_string(), + message: format!( + "upstream tool `{upstream}::{tool}` schema is unavailable or exceeds the safe return size" + ), + }); + }; Ok(CodeModeSchemaResponse::upstream_tool( id, upstream, tool, schema, )) @@ -2879,6 +3044,130 @@ impl LabMcpServer { } } + async fn code_mode_call_tool_id( + &self, + id: &str, + params: Value, + auth: Option<&crate::api::oauth::AuthContext>, + subject: Option<&str>, + ) -> Result { + let parsed = CodeModeToolId::parse(id)?; + match parsed.reference { + CodeModeToolRef::LabAction { service, action } => { + self.code_mode_call_lab_action(&service, &action, params, auth, subject) + .await + } + CodeModeToolRef::UpstreamTool { upstream, tool } => { + self.code_mode_call_upstream_tool(&upstream, &tool, params) + .await + } + } + } + + async fn code_mode_call_lab_action( + &self, + service_name: &str, + action_name: &str, + params: Value, + auth: Option<&crate::api::oauth::AuthContext>, + subject: Option<&str>, + ) -> Result { + let Some(entry) = self + .registry + .services() + .iter() + .find(|entry| entry.name == service_name) + else { + return Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!("Lab service `{service_name}` was not found"), + }); + }; + if !self.service_visible_on_mcp(entry.name).await + || !self.action_allowed_on_mcp(entry.name, action_name).await + { + return Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!( + "Lab action `{service_name}.{action_name}` is not exposed on the mcp surface" + ), + }); + } + if !tool_execute_builtin_action_allowed(entry, action_name, auth) { + return Err(DispatchToolError::Sdk { + sdk_kind: "forbidden".to_string(), + message: format!( + "action `{action_name}` for service `{}` requires `lab:admin` scope", + entry.name + ), + }); + } + let is_destructive = entry + .actions + .iter() + .any(|action| action.name == action_name && action.destructive); + if is_destructive && params.get("confirm").and_then(Value::as_bool) != Some(true) { + return Err(DispatchToolError::Sdk { + sdk_kind: "confirmation_required".to_string(), + message: format!( + "action `{action_name}` is destructive — pass {{\"confirm\":true}} in params" + ), + }); + } + let params = if entry.name == "gateway" { + inject_gateway_origin_param(params, subject) + } else { + params + }; + (entry.dispatch)(action_name.to_string(), params).await + } + + async fn code_mode_call_upstream_tool( + &self, + upstream: &str, + tool: &str, + params: Value, + ) -> Result { + let Some(pool) = self.current_upstream_pool().await else { + return Err(DispatchToolError::Sdk { + sdk_kind: "upstream_error".to_string(), + message: "gateway upstream pool is unavailable".to_string(), + }); + }; + let exposed = pool + .healthy_tools_for_upstream(upstream) + .await + .into_iter() + .any(|candidate| candidate.tool.name.as_ref() == tool); + if !exposed { + return Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!("upstream tool `{upstream}::{tool}` was not found"), + }); + } + let mut upstream_params = CallToolRequestParams::new(tool.to_string()); + upstream_params.arguments = Some(match params { + Value::Object(map) => map, + _ => serde_json::Map::new(), + }); + match pool.call_tool(upstream, upstream_params).await { + Some(Ok(result)) => { + serde_json::to_value(result).map_err(|err| DispatchToolError::Sdk { + sdk_kind: "serialization_error".to_string(), + message: format!("failed to serialize upstream tool result: {err}"), + }) + } + Some(Err(err)) => Err(DispatchToolError::Sdk { + sdk_kind: "upstream_error".to_string(), + message: err, + }), + None => Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!("upstream tool `{upstream}::{tool}` was not found"), + }), + } + } + async fn searchable_builtin_actions<'a>( &self, service: &'a crate::registry::RegisteredService, @@ -3609,6 +3898,31 @@ mod tests { assert!(results.iter().all(|result| result.schema_available)); } + #[tokio::test] + async fn code_mode_brokers_lab_action_by_stable_id() { + let server = super::LabMcpServer { + registry: std::sync::Arc::new(completion_test_registry()), + gateway_manager: None, + node_role: None, + peers: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())), + logging_level: std::sync::Arc::new(std::sync::atomic::AtomicU8::new( + logging_level_rank(rmcp::model::LoggingLevel::Info), + )), + }; + + let result = server + .code_mode_call_tool_id( + "lab::radarr.movie.search", + serde_json::json!({"query": "Alien"}), + None, + None, + ) + .await + .unwrap(); + + assert_eq!(result, Value::Null); + } + #[tokio::test] async fn snapshot_catalog_hides_builtin_tools_when_tool_search_is_enabled() { let runtime = crate::dispatch::gateway::manager::GatewayRuntimeHandle::default(); @@ -3639,9 +3953,15 @@ mod tests { assert_eq!( snapshot.tools, - ["invoke".to_string(), "scout".to_string()] - .into_iter() - .collect() + [ + "code_execute".to_string(), + "code_schema".to_string(), + "code_search".to_string(), + "invoke".to_string(), + "scout".to_string() + ] + .into_iter() + .collect() ); } diff --git a/docs/dev/ERRORS.md b/docs/dev/ERRORS.md index 47e34ebd1..4386f318f 100644 --- a/docs/dev/ERRORS.md +++ b/docs/dev/ERRORS.md @@ -57,6 +57,9 @@ Dispatch layers may add the following kinds on top of SDK errors: - `conflict` — resource already exists with the given identifier; HTTP 409 - `ambiguous_tool` — unqualified tool name resolved to multiple upstream gateway candidates; envelope carries `valid: Vec` of fully-qualified `{upstream}::{tool}` names the caller must choose from. HTTP 409. - `invalid_code_mode_id` — Code Mode tool id parsing failed. Valid ids are `lab::.` and `upstream::::`. HTTP 422. +- `code_mode_disabled` — Code Mode execution was requested while `[code_mode].enabled` is false. Discovery and schema lookup can remain enabled without allowing execution. HTTP 403. +- `tool_call_limit_exceeded` — a Code Mode snippet attempted more host-brokered tool calls than `max_tool_calls` allows. HTTP 429. +- `schema_unavailable` — Code Mode schema lookup found a tool, but its upstream schema was missing or exceeded the safe return size after sanitization. HTTP 422. - `queue_saturated` — bounded runtime queue is full; caller should retry after the current work drains. HTTP 429. ### Fleet-WS install hardening kinds (lab-zxx5.18) diff --git a/docs/services/GATEWAY.md b/docs/services/GATEWAY.md index c494e4d39..a3dfe81a0 100644 --- a/docs/services/GATEWAY.md +++ b/docs/services/GATEWAY.md @@ -95,6 +95,7 @@ When enabled, Lab hides raw proxied upstream tools from MCP `list_tools()` and e | `invoke` | Invoke one tool returned by `scout`. Legacy aliases: `tool_execute`, `tool_invoke`. | | `code_search` | Return Code Mode candidates with stable ids and schema availability. | | `code_schema` | Return the exact schema/contract for one `code_search` id. | +| `code_execute` | Execute a constrained Code Mode snippet through the gateway broker when enabled. | This keeps the MCP catalog small while still allowing clients to reach every exposed upstream tool. Per-upstream `expose_tools` filters still apply before tools enter the searchable catalog. @@ -142,7 +143,7 @@ Code Mode is schema-first discovery, not execution. `code_search` returns stable for Lab actions and upstream tools: ```json -{ "query": "github issues", "top_k": 10, "detail": "brief" } +{ "query": "github issues", "top_k": 10 } ``` Example candidate ids: @@ -167,15 +168,40 @@ cached by the gateway (`schema_format: "json_schema"`). `code_schema` requires the same schema visibility scope as `scout include_schema=true`: `lab` or `lab:admin`. +`code_execute` is disabled by default. Enable it explicitly with: + +```toml +[code_mode] +enabled = true +timeout_ms = 5000 +max_tool_calls = 8 +``` + +The MVP executor accepts a constrained JavaScript/TypeScript-looking static +batch of `callTool(id, params)` calls. It intentionally rejects control flow, +function declarations, and arrow functions until a real sandboxed evaluator is +wired in. `params` must be strict JSON so the gateway can validate and broker +each call without granting the snippet ambient host access: + +```json +{ + "code": "await callTool(\"lab::radarr.movie.search\", {\"query\":\"Alien\"});" +} +``` + Rules: - `top_k_default` is validated in the range `1..=50` - `max_tools` is validated in the range `1..=10000` +- `code_mode.timeout_ms` is validated in the range `1..=60000` +- `code_mode.max_tool_calls` is validated in the range `1..=50` - `query` must be non-empty and no longer than 500 characters - `include_schema` defaults to `false`; schemas are sanitized before return when requested - `code_search` is read-only discovery and accepts `lab:read`, `lab`, or `lab:admin` - `code_schema` exposes full schemas and requires `lab` or `lab:admin` +- `code_execute` requires `lab` or `lab:admin`, is disabled unless `[code_mode].enabled = true`, and brokers calls through the same gateway visibility and destructive-action checks as `invoke` - invalid Code Mode ids return `invalid_code_mode_id` +- unavailable or overlarge upstream schemas return `schema_unavailable` - old `[[upstream]].tool_search` blocks are accepted only as migration input and are dropped on the next gateway config write - `gateway.update` rejects `patch.tool_search`; use `gateway.tool_search.set` instead From 9afb37869c1a17c88f62b50f5f9d1128e9dfa23f Mon Sep 17 00:00:00 2001 From: Jacob Magar Date: Sun, 24 May 2026 15:27:50 -0400 Subject: [PATCH 3/3] feat: sandbox gateway code mode execution --- Cargo.lock | 537 ++++++++++++++++++- crates/lab/Cargo.toml | 1 + crates/lab/src/cli.rs | 5 + crates/lab/src/cli/internal.rs | 25 + crates/lab/src/dispatch/gateway/code_mode.rs | 505 +++++++---------- crates/lab/src/dispatch/gateway/manager.rs | 74 ++- crates/lab/src/docs/render.rs | 1 + crates/lab/src/main.rs | 5 +- crates/lab/src/mcp/CLAUDE.md | 9 + crates/lab/src/mcp/server.rs | 476 +++++++++++++--- crates/lab/tests/code_mode_runner.rs | 94 ++++ deny.toml | 3 + docs/dev/ERRORS.md | 1 + docs/services/GATEWAY.md | 18 +- 14 files changed, 1325 insertions(+), 429 deletions(-) create mode 100644 crates/lab/src/cli/internal.rs create mode 100644 crates/lab/tests/code_mode_runner.rs diff --git a/Cargo.lock b/Cargo.lock index 0867360e9..f5932c97e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -136,6 +136,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned-vec" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b" +dependencies = [ + "equator", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -222,6 +237,12 @@ dependencies = [ "password-hash", ] +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -471,6 +492,147 @@ dependencies = [ "cipher 0.4.4", ] +[[package]] +name = "boa_ast" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6339a700715bda376f5ea65c76e8fe8fc880930d8b0638cea68e7f3da6538e0a" +dependencies = [ + "bitflags", + "boa_interner", + "boa_macros", + "boa_string", + "indexmap 2.14.0", + "num-bigint", + "rustc-hash", +] + +[[package]] +name = "boa_engine" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1521be326f8a5c8887e95d4ce7f002917a002a23f7b93b9a6a2bf50ed4157824" +dependencies = [ + "aligned-vec", + "arrayvec", + "bitflags", + "boa_ast", + "boa_gc", + "boa_interner", + "boa_macros", + "boa_parser", + "boa_string", + "bytemuck", + "cfg-if", + "cow-utils", + "dashmap", + "dynify", + "fast-float2", + "float16", + "futures-channel", + "futures-concurrency", + "futures-lite", + "hashbrown 0.16.1", + "icu_normalizer", + "indexmap 2.14.0", + "intrusive-collections", + "itertools", + "num-bigint", + "num-integer", + "num-traits", + "num_enum", + "paste", + "portable-atomic", + "rand 0.9.4", + "regress", + "rustc-hash", + "ryu-js", + "serde", + "serde_json", + "small_btree", + "static_assertions", + "tag_ptr", + "tap", + "thin-vec", + "thiserror 2.0.18", + "time", + "xsum", +] + +[[package]] +name = "boa_gc" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17323a98cf2e631afacf1a6d659c1212c48a68bacfa85afab0a66ade80582e51" +dependencies = [ + "boa_macros", + "boa_string", + "hashbrown 0.16.1", + "thin-vec", +] + +[[package]] +name = "boa_interner" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20510b8b02bcde9b0a01cf34c0c308c56156503d1d91cdab4c8cfbd292b747ea" +dependencies = [ + "boa_gc", + "boa_macros", + "hashbrown 0.16.1", + "indexmap 2.14.0", + "once_cell", + "phf", + "rustc-hash", + "static_assertions", +] + +[[package]] +name = "boa_macros" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5822cb4f146d243060e588bc5a5f2e709683fdad3d7111f42c48e6b5c921d23d" +dependencies = [ + "cfg-if", + "cow-utils", + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "boa_parser" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd957fa9fa93e3a001a8aba5a5cd40c2bbfde486378be4c4b472fd304aaddb" +dependencies = [ + "bitflags", + "boa_ast", + "boa_interner", + "boa_macros", + "fast-float2", + "icu_properties", + "num-bigint", + "num-traits", + "regress", + "rustc-hash", +] + +[[package]] +name = "boa_string" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2da1d7f4a76fd9040788a122f0d807910800a7b86f5952e9244848c36511de" +dependencies = [ + "fast-float2", + "itoa", + "paste", + "rustc-hash", + "ryu-js", + "static_assertions", +] + [[package]] name = "bstr" version = "1.12.1" @@ -487,6 +649,26 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -758,6 +940,12 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cow-utils" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "417bef24afe1460300965a25ff4a24b8b45ad011948302ec221e8a0a81eb2c79" + [[package]] name = "cpubits" version = "0.1.1" @@ -907,7 +1095,7 @@ dependencies = [ "curve25519-dalek-derive", "digest 0.10.7", "fiat-crypto 0.2.9", - "rustc_version", + "rustc_version 0.4.1", "subtle", "zeroize", ] @@ -923,7 +1111,7 @@ dependencies = [ "curve25519-dalek-derive", "digest 0.11.3", "fiat-crypto 0.3.0", - "rustc_version", + "rustc_version 0.4.1", "subtle", "zeroize", ] @@ -1072,7 +1260,7 @@ dependencies = [ "convert_case", "proc-macro2", "quote", - "rustc_version", + "rustc_version 0.4.1", "syn", "unicode-xid", ] @@ -1171,6 +1359,26 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "dynify" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81acb15628a3e22358bf73de5e7e62360b8a777dbcb5fc9ac7dfa9ae73723747" +dependencies = [ + "dynify-macros", +] + +[[package]] +name = "dynify-macros" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec431cd708430d5029356535259c5d645d60edd3d39c54e5eea9782d46caa7d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "ecdsa" version = "0.16.9" @@ -1250,6 +1458,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + [[package]] name = "elliptic-curve" version = "0.13.8" @@ -1312,6 +1526,26 @@ dependencies = [ "syn", ] +[[package]] +name = "equator" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" +dependencies = [ + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -1340,6 +1574,12 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" +[[package]] +name = "fast-float2" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" + [[package]] name = "fastrand" version = "2.4.1" @@ -1401,6 +1641,16 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float16" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bffafbd079d520191c7c2779ae9cf757601266cf4167d3f659ff09617ff8483" +dependencies = [ + "cfg-if", + "rustc_version 0.2.3", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1710,6 +1960,8 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ + "allocator-api2", + "equivalent", "foldhash 0.2.0", ] @@ -1931,13 +2183,12 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.2.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" dependencies = [ "displaydoc", "potential_utf", - "utf8_iter", "yoke", "zerofrom", "zerovec", @@ -1951,6 +2202,7 @@ checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", + "serde", "tinystr", "writeable", "zerovec", @@ -1958,43 +2210,48 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.2.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +checksum = "8b24a59706036ba941c9476a55cd57b82b77f38a3c667d637ee7cabbc85eaedc" dependencies = [ + "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", "icu_provider", "smallvec", + "utf16_iter", + "write16", "zerovec", ] [[package]] name = "icu_normalizer_data" -version = "2.2.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" +checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" [[package]] name = "icu_properties" -version = "2.2.0" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +checksum = "f5a97b8ac6235e69506e8dacfb2adf38461d2ce6d3e9bd9c94c4cbc3cd4400a4" dependencies = [ + "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", + "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.2.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" [[package]] name = "icu_provider" @@ -2004,6 +2261,8 @@ checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", + "serde", + "stable_deref_trait", "writeable", "yoke", "zerofrom", @@ -2036,9 +2295,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.2" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" dependencies = [ "icu_normalizer", "icu_properties", @@ -2182,6 +2441,15 @@ dependencies = [ "rand_core 0.10.1", ] +[[package]] +name = "intrusive-collections" +version = "0.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "189d0897e4cbe8c75efedf3502c18c887b05046e59d28404d4d8e46cbc4d1e86" +dependencies = [ + "memoffset", +] + [[package]] name = "ipnet" version = "2.12.0" @@ -2215,6 +2483,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" @@ -2287,7 +2564,7 @@ checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" dependencies = [ "proc-macro2", "quote", - "rustc_version", + "rustc_version 0.4.1", "simd_cesu8", "syn", ] @@ -2448,6 +2725,7 @@ dependencies = [ "arc-swap", "axum", "base64", + "boa_engine", "bytes", "chacha20poly1305", "clap", @@ -2500,7 +2778,7 @@ dependencies = [ "tokio-tungstenite", "tokio-util", "toml", - "toml_edit", + "toml_edit 0.23.10+spec-1.0.0", "tower", "tower-http", "tracing", @@ -2627,6 +2905,15 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -2726,6 +3013,7 @@ checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" dependencies = [ "num-integer", "num-traits", + "serde", ] [[package]] @@ -2791,6 +3079,37 @@ dependencies = [ "libc", ] +[[package]] +name = "num_enum" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" +dependencies = [ + "num_enum_derive", + "rustversion", +] + +[[package]] +name = "num_enum_derive" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "oauth2" version = "5.0.0" @@ -2989,6 +3308,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pastey" version = "0.2.2" @@ -3049,6 +3374,48 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.11" @@ -3262,6 +3629,15 @@ dependencies = [ "elliptic-curve 0.14.0-rc.28", ] +[[package]] +name = "proc-macro-crate" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67ba7e9b2b56446f1d419b1d807906278ffa1a658a8a5d8a39dcb1f5a78614f" +dependencies = [ + "toml_edit 0.25.11+spec-1.1.0", +] + [[package]] name = "proc-macro2" version = "1.0.106" @@ -3582,6 +3958,16 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "regress" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48" +dependencies = [ + "hashbrown 0.16.1", + "memchr", +] + [[package]] name = "reqwest" version = "0.13.3" @@ -3914,13 +4300,22 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver 0.9.0", +] + [[package]] name = "rustc_version" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ - "semver", + "semver 1.0.28", ] [[package]] @@ -4056,6 +4451,12 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "ryu-js" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15" + [[package]] name = "salsa20" version = "0.11.0" @@ -4218,12 +4619,27 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + [[package]] name = "semver" version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "serde" version = "1.0.228" @@ -4479,7 +4895,7 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" dependencies = [ - "rustc_version", + "rustc_version 0.4.1", "simdutf8", ] @@ -4507,12 +4923,27 @@ dependencies = [ "time", ] +[[package]] +name = "siphasher" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" + [[package]] name = "slab" version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" +[[package]] +name = "small_btree" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ba60d2df92ba73864714808ca68c059734853e6ab722b40e1cf543ebb3a057a" +dependencies = [ + "arrayvec", +] + [[package]] name = "smallvec" version = "1.15.1" @@ -4615,6 +5046,12 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strip-ansi-escapes" version = "0.2.1" @@ -4708,6 +5145,18 @@ dependencies = [ "windows", ] +[[package]] +name = "tag_ptr" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0e973b34477b7823833469eb0f5a3a60370fef7a453e02d751b59180d0a5a05" + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tempfile" version = "3.27.0" @@ -4721,6 +5170,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "thin-vec" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f7e269b48f0a7dd0146680fa24b50cc67fc0373f086a5b2f99bd084639b482" + [[package]] name = "thiserror" version = "1.0.69" @@ -4778,7 +5233,9 @@ checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", + "libc", "num-conv", + "num_threads", "powerfmt", "serde_core", "time-core", @@ -4808,6 +5265,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", + "serde_core", "zerovec", ] @@ -4952,6 +5410,18 @@ dependencies = [ "winnow 0.7.15", ] +[[package]] +name = "toml_edit" +version = "0.25.11+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b59c4d22ed448339746c59b905d24568fcbb3ab65a500494f7b8c3e97739f2b" +dependencies = [ + "indexmap 2.14.0", + "toml_datetime 1.1.1+spec-1.1.0", + "toml_parser", + "winnow 1.0.2", +] + [[package]] name = "toml_parser" version = "1.1.2+spec-1.1.0" @@ -5244,6 +5714,12 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -5489,7 +5965,7 @@ dependencies = [ "bitflags", "hashbrown 0.15.5", "indexmap 2.14.0", - "semver", + "semver 1.0.28", ] [[package]] @@ -5936,6 +6412,9 @@ name = "winnow" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" +dependencies = [ + "memchr", +] [[package]] name = "wiremock" @@ -6046,7 +6525,7 @@ dependencies = [ "id-arena", "indexmap 2.14.0", "log", - "semver", + "semver 1.0.28", "serde", "serde_derive", "serde_json", @@ -6054,12 +6533,24 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + [[package]] name = "writeable" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" +[[package]] +name = "xsum" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0637d3a5566a82fa5214bae89087bc8c9fb94cd8e8a3c07feb691bb8d9c632db" + [[package]] name = "xxhash-rust" version = "0.8.15" @@ -6145,6 +6636,7 @@ dependencies = [ "displaydoc", "yoke", "zerofrom", + "zerovec", ] [[package]] @@ -6153,6 +6645,7 @@ version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ + "serde", "yoke", "zerofrom", "zerovec-derive", diff --git a/crates/lab/Cargo.toml b/crates/lab/Cargo.toml index ea4595337..5cda790b7 100644 --- a/crates/lab/Cargo.toml +++ b/crates/lab/Cargo.toml @@ -41,6 +41,7 @@ serde_json = { workspace = true, features = ["preserve_order"] } anyhow.workspace = true thiserror.workspace = true jiff.workspace = true +boa_engine = "0.21.1" tracing.workspace = true tracing-subscriber.workspace = true diff --git a/crates/lab/src/cli.rs b/crates/lab/src/cli.rs index e4d0bb8da..267fb5c02 100644 --- a/crates/lab/src/cli.rs +++ b/crates/lab/src/cli.rs @@ -12,6 +12,7 @@ pub mod gateway; pub mod health; pub mod help; pub mod helpers; +pub mod internal; pub mod logs; pub mod marketplace; #[cfg(feature = "mcpregistry")] @@ -100,6 +101,9 @@ pub enum Command { /// Deploy the local lab release binary to SSH targets. #[cfg(feature = "deploy")] Deploy(deploy::DeployArgs), + /// Hidden internal process helpers. + #[command(hide = true)] + Internal(internal::InternalArgs), // [lab-scaffold: cli-variants] } @@ -127,6 +131,7 @@ pub async fn dispatch(cli: Cli, config: LabConfig) -> Result { Command::Stash(args) => stash::run(args, format).await, #[cfg(feature = "deploy")] Command::Deploy(args) => dispatch_deploy(args, format, config.deploy.clone()).await, + Command::Internal(args) => internal::run(args), // [lab-scaffold: cli-dispatch] } } diff --git a/crates/lab/src/cli/internal.rs b/crates/lab/src/cli/internal.rs new file mode 100644 index 000000000..c75157a17 --- /dev/null +++ b/crates/lab/src/cli/internal.rs @@ -0,0 +1,25 @@ +use std::process::ExitCode; + +use anyhow::Result; +use clap::Subcommand; + +use crate::dispatch::gateway::code_mode; + +#[derive(Debug, clap::Args)] +pub struct InternalArgs { + #[command(subcommand)] + pub command: InternalCommand, +} + +#[derive(Debug, Subcommand)] +pub enum InternalCommand { + /// Run the sandboxed Code Mode JavaScript helper process. + #[command(hide = true)] + CodeModeRunner, +} + +pub fn run(args: InternalArgs) -> Result { + match args.command { + InternalCommand::CodeModeRunner => Ok(code_mode::run_code_mode_runner_stdio()), + } +} diff --git a/crates/lab/src/dispatch/gateway/code_mode.rs b/crates/lab/src/dispatch/gateway/code_mode.rs index 0ac324b36..6bc102ba9 100644 --- a/crates/lab/src/dispatch/gateway/code_mode.rs +++ b/crates/lab/src/dispatch/gateway/code_mode.rs @@ -1,5 +1,14 @@ +use std::cell::RefCell; +use std::io::{self, BufRead, BufReader, BufWriter, Write}; +use std::process::ExitCode; + +use boa_engine::builtins::promise::PromiseState; +use boa_engine::object::builtins::JsPromise; +use boa_engine::{ + Context, JsArgs, JsError, JsNativeError, JsResult, JsValue, NativeFunction, Source, js_string, +}; use lab_apis::core::action::{ActionSpec, ParamSpec}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use serde_json::{Map, Value, json}; use crate::dispatch::error::ToolError; @@ -139,12 +148,6 @@ pub struct CodeModeBindings { pub typescript: String, } -#[derive(Debug, Clone, PartialEq, Serialize)] -pub struct CodeModeInvocation { - pub id: String, - pub params: Value, -} - #[derive(Debug, Clone, PartialEq, Serialize)] pub struct CodeModeExecutionResponse { pub calls: Vec, @@ -156,6 +159,45 @@ pub struct CodeModeExecutedCall { pub result: Value, } +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CodeModeRunnerInput { + Start { + code: String, + }, + ToolResult { + seq: u64, + result: Value, + }, + ToolError { + seq: u64, + kind: String, + message: String, + }, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum CodeModeRunnerOutput { + ToolCall { seq: u64, id: String, params: Value }, + Done, + Error { kind: String, message: String }, +} + +struct CodeModeRunnerState { + reader: BufReader, + writer: BufWriter, + next_seq: u64, +} + +const CODE_MODE_LOOP_ITERATION_LIMIT: u64 = 1_000_000; +const CODE_MODE_STACK_SIZE_LIMIT: usize = 16 * 1024; +const CODE_MODE_RECURSION_LIMIT: usize = 256; + +thread_local! { + static RUNNER_STATE: RefCell> = const { RefCell::new(None) }; +} + impl CodeModeSchemaResponse { #[cfg(test)] #[must_use] @@ -208,281 +250,167 @@ pub fn invalid_code_mode_id(message: impl Into) -> ToolError { } } -pub fn extract_code_mode_invocations( - code: &str, - max_tool_calls: usize, -) -> Result, ToolError> { - reject_unsupported_code_mode_constructs(code)?; - - let mut rest = code; - let mut calls = Vec::new(); - - while let Some(offset) = next_call_tool_offset(rest) { - rest = &rest[offset + "callTool".len()..]; - let trimmed = rest.trim_start(); - if !trimmed.starts_with('(') { - continue; - } - let (inside, after) = balanced_parenthesized(trimmed)?; - rest = after; - let (id, params) = parse_call_tool_arguments(inside)?; - calls.push(CodeModeInvocation { id, params }); - if calls.len() > max_tool_calls { - return Err(ToolError::Sdk { - sdk_kind: "tool_call_limit_exceeded".to_string(), - message: format!("Code Mode execution exceeded max_tool_calls={max_tool_calls}"), - }); - } - } - - if calls.is_empty() { - return Err(ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: "Code Mode snippet must call callTool(id, params) at least once".to_string(), +pub fn run_code_mode_runner_stdio() -> ExitCode { + RUNNER_STATE.with(|state| { + *state.borrow_mut() = Some(CodeModeRunnerState { + reader: BufReader::new(io::stdin()), + writer: BufWriter::new(io::stdout()), + next_seq: 0, }); - } - Ok(calls) -} + }); -fn reject_unsupported_code_mode_constructs(input: &str) -> Result<(), ToolError> { - if let Some(keyword) = first_unsupported_keyword(input) { - return Err(ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: format!( - "Code Mode MVP only supports a static sequence of callTool(id, params) calls; unsupported construct `{keyword}`" - ), - }); + let result = run_code_mode_runner(); + if let Err(err) = result { + drop(runner_emit(CodeModeRunnerOutput::Error { + kind: "code_execution_failed".to_string(), + message: err, + })); + return ExitCode::from(1); } - Ok(()) + ExitCode::SUCCESS } -fn first_unsupported_keyword(input: &str) -> Option<&'static str> { - const UNSUPPORTED: &[&str] = &["if", "for", "while", "switch", "function", "=>"]; - let mut quote = None; - let mut escaped = false; - let mut line_comment = false; - let mut block_comment = false; - let mut iter = input.char_indices().peekable(); +fn run_code_mode_runner() -> Result<(), String> { + let CodeModeRunnerInput::Start { code } = runner_read_input()? else { + return Err("runner expected start message".to_string()); + }; - while let Some((index, ch)) = iter.next() { - if line_comment { - if ch == '\n' { - line_comment = false; - } - continue; - } - if block_comment { - if ch == '*' - && let Some((_, '/')) = iter.peek().copied() - { - iter.next(); - block_comment = false; - } - continue; - } - if let Some(active_quote) = quote { - if escaped { - escaped = false; - } else if ch == '\\' { - escaped = true; - } else if ch == active_quote { - quote = None; - } - continue; - } - match ch { - '"' | '\'' | '`' => quote = Some(ch), - '/' => match iter.peek().copied() { - Some((_, '/')) => { - iter.next(); - line_comment = true; - } - Some((_, '*')) => { - iter.next(); - block_comment = true; - } - _ => {} - }, - _ => { - for keyword in UNSUPPORTED { - if input[index..].starts_with(keyword) { - let before = input[..index].chars().next_back(); - let after = input[index + keyword.len()..].chars().next(); - if keyword.chars().all(is_js_identifier_char) { - if before.is_none_or(|ch| !is_js_identifier_char(ch)) - && after.is_none_or(|ch| !is_js_identifier_char(ch)) - { - return Some(keyword); - } - } else { - return Some(keyword); - } - } - } + let mut context = Context::default(); + configure_code_mode_runtime_limits(&mut context); + context + .register_global_builtin_callable( + js_string!("callTool"), + 2, + NativeFunction::from_copy_closure(code_mode_call_tool_native), + ) + .map_err(js_error_message)?; + + let wrapped = format!("(async () => {{\n{code}\n}})()"); + let value = context + .eval(Source::from_bytes(wrapped.as_bytes())) + .map_err(js_error_message)?; + context.run_jobs().map_err(js_error_message)?; + + if let Some(object) = value.as_object() { + let promise = JsPromise::from_object(object.clone()).map_err(js_error_message)?; + match promise.state() { + PromiseState::Fulfilled(_) => {} + PromiseState::Rejected(reason) => return Err(js_value_message(&reason, &mut context)), + PromiseState::Pending => { + return Err("Code Mode script returned a pending promise".to_string()); } } } - None + + runner_emit(CodeModeRunnerOutput::Done) } -fn next_call_tool_offset(input: &str) -> Option { - let mut quote = None; - let mut escaped = false; - let mut line_comment = false; - let mut block_comment = false; - let mut iter = input.char_indices().peekable(); +fn configure_code_mode_runtime_limits(context: &mut Context) { + let limits = context.runtime_limits_mut(); + limits.set_loop_iteration_limit(CODE_MODE_LOOP_ITERATION_LIMIT); + limits.set_stack_size_limit(CODE_MODE_STACK_SIZE_LIMIT); + limits.set_recursion_limit(CODE_MODE_RECURSION_LIMIT); +} - while let Some((index, ch)) = iter.next() { - if line_comment { - if ch == '\n' { - line_comment = false; - } - continue; - } - if block_comment { - if ch == '*' - && let Some((_, '/')) = iter.peek().copied() - { - iter.next(); - block_comment = false; - } - continue; - } - if let Some(active_quote) = quote { - if escaped { - escaped = false; - } else if ch == '\\' { - escaped = true; - } else if ch == active_quote { - quote = None; - } - continue; - } - match ch { - '"' | '\'' | '`' => quote = Some(ch), - '/' => match iter.peek().copied() { - Some((_, '/')) => { - iter.next(); - line_comment = true; - } - Some((_, '*')) => { - iter.next(); - block_comment = true; - } - _ => {} - }, - 'c' if input[index..].starts_with("callTool") => { - let before = input[..index].chars().next_back(); - let after = input[index + "callTool".len()..].chars().next(); - if before.is_none_or(|ch| !is_js_identifier_char(ch)) - && after.is_none_or(|ch| !is_js_identifier_char(ch)) - { - return Some(index); - } - } - _ => {} - } +fn code_mode_call_tool_native( + _this: &JsValue, + args: &[JsValue], + context: &mut Context, +) -> JsResult { + let id = args + .get_or_undefined(0) + .to_string(context)? + .to_std_string_escaped(); + if id.trim().is_empty() { + return Err(js_type_error("callTool id must be a non-empty string")); + } + + let params = args + .get(1) + .map(|value| value.to_json(context)) + .transpose()? + .flatten() + .unwrap_or_else(|| json!({})); + if !params.is_object() { + return Err(js_type_error("callTool params must be a JSON object")); + } + + let seq = RUNNER_STATE + .with(|state| { + let mut state = state.borrow_mut(); + let state = state + .as_mut() + .ok_or_else(|| "runner state is not initialized".to_string())?; + let seq = state.next_seq; + state.next_seq += 1; + Ok::<_, String>(seq) + }) + .map_err(js_type_error)?; + + runner_emit(CodeModeRunnerOutput::ToolCall { seq, id, params }).map_err(js_type_error)?; + + match runner_read_input().map_err(js_type_error)? { + CodeModeRunnerInput::ToolResult { + seq: response_seq, + result, + } if response_seq == seq => JsValue::from_json(&result, context), + CodeModeRunnerInput::ToolError { + seq: response_seq, + kind, + message, + } if response_seq == seq => Err(js_type_error(format!("{kind}: {message}"))), + _ => Err(js_type_error( + "runner received an out-of-order tool response", + )), } - None } -fn is_js_identifier_char(ch: char) -> bool { - ch == '_' || ch == '$' || ch.is_ascii_alphanumeric() +fn runner_emit(output: CodeModeRunnerOutput) -> Result<(), String> { + RUNNER_STATE.with(|state| { + let mut state = state.borrow_mut(); + let state = state + .as_mut() + .ok_or_else(|| "runner state is not initialized".to_string())?; + serde_json::to_writer(&mut state.writer, &output).map_err(|err| err.to_string())?; + state + .writer + .write_all(b"\n") + .map_err(|err| err.to_string())?; + state.writer.flush().map_err(|err| err.to_string()) + }) } -fn balanced_parenthesized(input: &str) -> Result<(&str, &str), ToolError> { - let mut depth = 0usize; - let mut quote = None; - let mut escaped = false; - for (index, ch) in input.char_indices() { - if let Some(active_quote) = quote { - if escaped { - escaped = false; - } else if ch == '\\' { - escaped = true; - } else if ch == active_quote { - quote = None; - } - continue; - } - match ch { - '"' | '\'' => quote = Some(ch), - '(' => depth += 1, - ')' => { - depth = depth.saturating_sub(1); - if depth == 0 { - return Ok((&input[1..index], &input[index + 1..])); - } - } - _ => {} +fn runner_read_input() -> Result { + RUNNER_STATE.with(|state| { + let mut state = state.borrow_mut(); + let state = state + .as_mut() + .ok_or_else(|| "runner state is not initialized".to_string())?; + let mut line = String::new(); + let read = state + .reader + .read_line(&mut line) + .map_err(|err| err.to_string())?; + if read == 0 { + return Err("runner input closed".to_string()); } - } - Err(ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: "Code Mode snippet contains an unterminated callTool(...) expression".to_string(), + serde_json::from_str(&line).map_err(|err| err.to_string()) }) } -fn parse_call_tool_arguments(input: &str) -> Result<(String, Value), ToolError> { - let input = input.trim(); - let (id, rest) = parse_string_literal(input)?; - let rest = rest.trim_start(); - if rest.is_empty() { - return Ok((id, json!({}))); - } - let rest = rest.strip_prefix(',').ok_or_else(|| ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: "callTool arguments must be callTool(id, params)".to_string(), - })?; - let rest = rest.trim(); - if rest.is_empty() { - return Ok((id, json!({}))); - } - let params: Value = serde_json::from_str(rest).map_err(|err| ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: format!("callTool params must be strict JSON: {err}"), - })?; - if !params.is_object() { - return Err(ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: "callTool params must be a JSON object".to_string(), - }); - } - Ok((id, params)) +fn js_type_error(message: impl Into) -> JsError { + JsNativeError::typ().with_message(message.into()).into() } -fn parse_string_literal(input: &str) -> Result<(String, &str), ToolError> { - let Some(quote @ ('"' | '\'')) = input.chars().next() else { - return Err(ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: "callTool id must be a string literal".to_string(), - }); - }; - let mut escaped = false; - for (index, ch) in input[1..].char_indices() { - let absolute = index + 1; - if escaped { - escaped = false; - } else if ch == '\\' { - escaped = true; - } else if ch == quote { - let raw = &input[..=absolute]; - let rest = &input[absolute + 1..]; - let id = if quote == '"' { - serde_json::from_str(raw).map_err(|err| ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: format!("callTool id string is invalid: {err}"), - })? - } else { - raw[1..raw.len() - 1].replace("\\'", "'") - }; - return Ok((id, rest)); - } - } - Err(ToolError::Sdk { - sdk_kind: "invalid_param".to_string(), - message: "callTool id string is unterminated".to_string(), - }) +fn js_error_message(error: JsError) -> String { + error.to_string() +} + +fn js_value_message(value: &JsValue, context: &mut Context) -> String { + value + .to_string(context) + .map(|value| value.to_std_string_escaped()) + .unwrap_or_else(|_| "promise rejected".to_string()) } #[must_use] @@ -653,11 +581,12 @@ fn typescript_property_name(name: &str) -> String { #[cfg(test)] mod tests { + use boa_engine::{Context, Source}; use serde_json::json; use super::{ CodeModeSchemaResponse, CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef, - action_input_schema, extract_code_mode_invocations, sanitize_code_mode_schema, + action_input_schema, configure_code_mode_runtime_limits, sanitize_code_mode_schema, }; use lab_apis::core::action::{ActionSpec, ParamSpec}; @@ -831,56 +760,14 @@ mod tests { } #[test] - fn extracts_constrained_call_tool_invocations() { - let calls = extract_code_mode_invocations( - r#" - await callTool("lab::radarr.movie.search", {"query":"Alien"}); - await callTool('upstream::github::search_issues', {"query":"repo:jmagar/lab"}); - "#, - 4, - ) - .unwrap(); - - assert_eq!(calls.len(), 2); - assert_eq!(calls[0].id, "lab::radarr.movie.search"); - assert_eq!(calls[0].params.pointer("/query"), Some(&json!("Alien"))); - assert_eq!(calls[1].id, "upstream::github::search_issues"); - } - - #[test] - fn rejects_non_json_call_tool_params() { - let err = extract_code_mode_invocations( - r#"await callTool("lab::radarr.movie.search", {query:"Alien"})"#, - 4, - ) - .unwrap_err(); - assert_eq!(err.kind(), "invalid_param"); - } - - #[test] - fn ignores_call_tool_text_inside_comments_and_strings() { - let calls = extract_code_mode_invocations( - r#" - // callTool("lab::radarr.movie.search", {"query":"comment"}) - const text = "callTool(\"lab::radarr.movie.search\", {\"query\":\"string\"})"; - await callTool("lab::radarr.movie.search", {"query":"real"}); - "#, - 4, - ) - .unwrap(); - - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].params.pointer("/query"), Some(&json!("real"))); - } + fn configured_runtime_limits_reject_unbounded_loops() { + let mut context = Context::default(); + configure_code_mode_runtime_limits(&mut context); - #[test] - fn rejects_control_flow_because_mvp_is_static_batch_only() { - let err = extract_code_mode_invocations( - r#"if (false) { await callTool("lab::radarr.movie.search", {"query":"hidden"}); }"#, - 4, - ) - .unwrap_err(); + let error = context + .eval(Source::from_bytes(b"while (true) {}")) + .expect_err("loop limit should stop unbounded scripts"); - assert_eq!(err.kind(), "invalid_param"); + assert!(error.to_string().contains("iteration limit")); } } diff --git a/crates/lab/src/dispatch/gateway/manager.rs b/crates/lab/src/dispatch/gateway/manager.rs index 936e0796e..470c6b2d1 100644 --- a/crates/lab/src/dispatch/gateway/manager.rs +++ b/crates/lab/src/dispatch/gateway/manager.rs @@ -18,7 +18,7 @@ use crate::dispatch::error::ToolError; use crate::dispatch::upstream::pool::{ UpstreamCachedSummary, UpstreamPool, in_process_upstream_name, }; -use crate::dispatch::upstream::types::UpstreamRuntimeOwner; +use crate::dispatch::upstream::types::{UpstreamRuntimeOwner, UpstreamTool}; use crate::oauth::upstream::cache::OauthClientCache; use crate::oauth::upstream::encryption::EncryptionKey; use crate::oauth::upstream::manager::UpstreamOauthManager; @@ -2209,7 +2209,7 @@ impl GatewayManager { pub async fn resolve_tool_execute( &self, name: &str, - ) -> Result<(String, crate::dispatch::upstream::types::UpstreamTool), ToolError> { + ) -> Result<(String, UpstreamTool), ToolError> { if !self.config.read().await.tool_search.enabled { return Err(ToolError::Sdk { sdk_kind: "unknown_tool".to_string(), @@ -2257,6 +2257,50 @@ impl GatewayManager { Ok(matches.into_iter().next().expect("checked len")) } + pub async fn resolve_code_mode_upstream_tool( + &self, + upstream: &str, + tool: &str, + ) -> Result { + let cfg = self.config.read().await; + if !cfg.tool_search.enabled { + return Err(ToolError::Sdk { + sdk_kind: "unknown_tool".to_string(), + message: + "tool search is not enabled; code mode upstream tools require tool_search mode" + .to_string(), + }); + } + let priority = cfg + .upstream + .iter() + .find(|candidate| candidate.name == upstream) + .map(|candidate| candidate.priority.max(0.0)) + .unwrap_or(1.0); + drop(cfg); + + if priority <= 0.0 { + return Err(ToolError::Sdk { + sdk_kind: "unknown_tool".to_string(), + message: format!("upstream tool `{upstream}::{tool}` was not found"), + }); + } + + let pool = self.current_pool().await.ok_or_else(|| ToolError::Sdk { + sdk_kind: "unknown_tool".to_string(), + message: format!("upstream tool `{upstream}::{tool}` was not found"), + })?; + + pool.healthy_tools_for_upstream(upstream) + .await + .into_iter() + .find(|candidate| candidate.tool.name.as_ref() == tool) + .ok_or_else(|| ToolError::Sdk { + sdk_kind: "unknown_tool".to_string(), + message: format!("upstream tool `{upstream}::{tool}` was not found"), + }) + } + fn has_cached_tool_search_index(&self) -> bool { self.tool_indexes .iter() @@ -3248,7 +3292,7 @@ mod tests { format!("{tool_name} description"), schema, ); - let upstream_tool = crate::dispatch::upstream::types::UpstreamTool { + let upstream_tool = UpstreamTool { tool, input_schema: None, upstream_name: Arc::clone(&upstream_name), @@ -3334,6 +3378,28 @@ mod tests { } } + #[tokio::test] + async fn resolve_code_mode_upstream_tool_hides_priority_zero_upstreams() { + let mut upstream = fixture_http_upstream("suppressed"); + upstream.priority = 0.0; + let (manager, pool) = tool_search_manager_with_pool(upstream).await; + pool.insert_entry_for_tests( + "suppressed", + healthy_entry_with_tool("suppressed", "secret-tool"), + ) + .await; + + let err = manager + .resolve_code_mode_upstream_tool("suppressed", "secret-tool") + .await + .expect_err("priority=0 upstream tools must not be invokable by code mode id"); + + match err { + ToolError::Sdk { sdk_kind, .. } => assert_eq!(sdk_kind, "unknown_tool"), + other => panic!("expected unknown_tool sdk error, got {other:?}"), + } + } + #[tokio::test] async fn tool_search_returns_cached_results_while_stale_refresh_runs() { let upstream = fixture_http_upstream("cached-upstream"); @@ -3347,7 +3413,7 @@ mod tests { ); let index = ToolIndex::build_from_tools( &upstream, - vec![crate::dispatch::upstream::types::UpstreamTool { + vec![UpstreamTool { tool, input_schema: None, upstream_name, diff --git a/crates/lab/src/docs/render.rs b/crates/lab/src/docs/render.rs index 38448ee35..74d937d33 100644 --- a/crates/lab/src/docs/render.rs +++ b/crates/lab/src/docs/render.rs @@ -205,6 +205,7 @@ fn write_cli_command(out: &mut String, command: &mut clap::Command, path: &str) let subcommands = command .get_subcommands() + .filter(|subcommand| !subcommand.is_hide_set()) .map(|subcommand| subcommand.get_name().to_string()) .collect::>(); for name in subcommands { diff --git a/crates/lab/src/main.rs b/crates/lab/src/main.rs index 235042e17..eb62383cd 100644 --- a/crates/lab/src/main.rs +++ b/crates/lab/src/main.rs @@ -130,7 +130,10 @@ fn init_tracing( async fn main() -> ExitCode { let cli = Cli::parse(); - if matches!(cli.command, cli::Command::Docs(_)) { + if matches!( + cli.command, + cli::Command::Docs(_) | cli::Command::Internal(_) + ) { return match cli::dispatch(cli, config::LabConfig::default()).await { Ok(code) => code, Err(err) => { diff --git a/crates/lab/src/mcp/CLAUDE.md b/crates/lab/src/mcp/CLAUDE.md index 66c774e6e..0001add2d 100644 --- a/crates/lab/src/mcp/CLAUDE.md +++ b/crates/lab/src/mcp/CLAUDE.md @@ -33,6 +33,15 @@ For normal services, `dispatch//dispatch.rs` owns action routing, catal `dispatch/gateway/dispatch.rs` enforces the non-dispatch boundary. Do not add `dispatch/gateway-scout/` unless a second surface consumer is confirmed. +- `code_search`, `code_schema`, and `code_execute` are registered + directly in `mcp/server.rs` as gateway Code Mode meta-tools. They are + MCP-only because their protocol contract is schema-first tool + discovery plus a child-process JavaScript runner, not Lab's + action+params service shape. Keep reusable schema, id, runner, and + sandbox helpers in `dispatch/gateway/code_mode.rs`, and keep upstream + visibility policy in `GatewayManager`. If a second surface needs Code + Mode, move the orchestration out of `mcp/server.rs` before adding that + surface. **No business logic anywhere in `mcp/`.** If you find yourself calling `reqwest`, parsing JSON beyond param extraction, or retrying, move it to `lab-apis/src//client.rs`. diff --git a/crates/lab/src/mcp/server.rs b/crates/lab/src/mcp/server.rs index 7a141867e..8f2e25cc5 100644 --- a/crates/lab/src/mcp/server.rs +++ b/crates/lab/src/mcp/server.rs @@ -5,6 +5,7 @@ use sha2::{Digest, Sha256}; use std::cmp::Ordering as CmpOrdering; +use std::process::Stdio; use std::sync::Arc; use std::sync::atomic::{AtomicU8, Ordering}; use std::time::{Duration, Instant}; @@ -20,14 +21,17 @@ use rmcp::model::{ use rmcp::service::{NotificationContext, Peer, RequestContext}; use rmcp::{ErrorData, RoleServer, ServerHandler}; use serde_json::Value; +use tempfile::TempDir; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::process::{Child, ChildStdin, Command}; use tokio::sync::RwLock; use crate::config::NodeRole; use crate::dispatch::error::ToolError as DispatchToolError; use crate::dispatch::gateway::code_mode::{ - CodeModeExecutedCall, CodeModeExecutionResponse, CodeModeSchemaResponse, - CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef, action_input_schema, - extract_code_mode_invocations, sanitize_code_mode_schema, + CodeModeExecutedCall, CodeModeExecutionResponse, CodeModeRunnerInput, CodeModeRunnerOutput, + CodeModeSchemaResponse, CodeModeSearchCandidate, CodeModeToolId, CodeModeToolRef, + action_input_schema, sanitize_code_mode_schema, }; use crate::dispatch::gateway::manager::{GatewayManager, GatewayToolSearchResult}; use crate::mcp::catalog::{ @@ -42,6 +46,8 @@ use crate::mcp::error::canonical_kind; use crate::mcp::logging::{DispatchLogOutcome, logging_level_rank}; use crate::registry::ToolRegistry; +const CODE_MODE_MAX_CODE_BYTES: usize = 20_000; + #[cfg(test)] use crate::mcp::peers::PeerNotifier; @@ -1146,7 +1152,7 @@ impl ServerHandler for LabMcpServer { "code": { "type": "string", "maxLength": 20000, - "description": "Constrained JavaScript/TypeScript snippet containing callTool(id, params) calls with strict JSON params" + "description": "JavaScript/TypeScript snippet executed in the Code Mode sandbox. Use await callTool(id, params) with JSON-serializable params." }, "max_tool_calls": { "type": "integer", @@ -1161,8 +1167,8 @@ impl ServerHandler for LabMcpServer { }; tools.push(Tool::new( CODE_EXECUTE_TOOL_NAME, - "Execute a constrained Code Mode snippet through the Lab gateway broker. \ - Disabled by default; enable [code_mode].enabled to allow execution. \ + "Execute a sandboxed Code Mode snippet through the Lab gateway broker. \ + Disabled by default; enable [code_mode].enabled to allow child-process execution. \ Snippets may call callTool(id, params) with ids returned by code_search.", code_execute_schema, )); @@ -1556,6 +1562,26 @@ impl ServerHandler for LabMcpServer { return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); } let code = args.get("code").and_then(Value::as_str).unwrap_or_default(); + if code.trim().is_empty() { + let env = build_error_extra( + &service, + "call_tool", + "invalid_param", + "code must not be empty", + &serde_json::json!({ "param": "code" }), + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } + if code.len() > CODE_MODE_MAX_CODE_BYTES { + let env = build_error_extra( + &service, + "call_tool", + "invalid_param", + "code exceeds max length 20000 bytes", + &serde_json::json!({ "param": "code" }), + ); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); + } let requested_max_tool_calls = args .get("max_tool_calls") .and_then(Value::as_u64) @@ -1564,63 +1590,32 @@ impl ServerHandler for LabMcpServer { .max(1) .min(config.max_tool_calls); let code_hash = hash_arguments(&Value::String(code.to_string())); - let invocations = match extract_code_mode_invocations(code, requested_max_tool_calls) { - Ok(invocations) => invocations, - Err(err) => { - let env = tool_error_envelope(&service, "call_tool", &err); - return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); - } - }; tracing::info!( surface = "mcp", service = "code_execute", action = "call_tool", subject, code_hash = %code_hash, - call_count = invocations.len(), + max_tool_calls = requested_max_tool_calls, "gateway code execute start" ); let subject_raw = self.request_subject(&context); - let execution = async { - let mut calls = Vec::with_capacity(invocations.len()); - for invocation in invocations { - let result = self - .code_mode_call_tool_id( - &invocation.id, - invocation.params, - auth, - subject_raw, - ) - .await?; - calls.push(CodeModeExecutedCall { - id: invocation.id, - result, - }); + let response = match self + .execute_code_mode_sandboxed( + code, + requested_max_tool_calls, + Duration::from_millis(config.timeout_ms), + auth, + subject_raw, + ) + .await + { + Ok(response) => response, + Err(err) => { + let env = tool_error_envelope(&service, "call_tool", &err); + return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); } - Ok::<_, DispatchToolError>(CodeModeExecutionResponse { calls }) }; - let response = - match tokio::time::timeout(Duration::from_millis(config.timeout_ms), execution) - .await - { - Ok(Ok(response)) => response, - Ok(Err(err)) => { - let env = tool_error_envelope(&service, "call_tool", &err); - return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); - } - Err(_) => { - let env = build_error( - &service, - "call_tool", - "timeout", - &format!( - "Code Mode execution timed out after {}ms", - config.timeout_ms - ), - ); - return Ok(CallToolResult::error(vec![Content::text(env.to_string())])); - } - }; tracing::info!( surface = "mcp", service = "code_execute", @@ -2999,22 +2994,15 @@ impl LabMcpServer { upstream: &str, tool: &str, ) -> Result { - let Some(pool) = self.current_upstream_pool().await else { + let Some(manager) = self.gateway_manager.as_ref() else { return Err(DispatchToolError::Sdk { sdk_kind: "upstream_error".to_string(), - message: "gateway upstream pool is unavailable".to_string(), - }); - }; - let upstream_tools = pool.healthy_tools_for_upstream(upstream).await; - let Some(candidate) = upstream_tools - .into_iter() - .find(|candidate| candidate.tool.name.as_ref() == tool) - else { - return Err(DispatchToolError::Sdk { - sdk_kind: "not_found".to_string(), - message: format!("upstream tool `{upstream}::{tool}` was not found"), + message: "gateway manager is unavailable".to_string(), }); }; + let candidate = manager + .resolve_code_mode_upstream_tool(upstream, tool) + .await?; let Some(schema) = sanitize_code_mode_schema(candidate.input_schema) else { return Err(DispatchToolError::Sdk { sdk_kind: "schema_unavailable".to_string(), @@ -3044,6 +3032,192 @@ impl LabMcpServer { } } + async fn execute_code_mode_sandboxed( + &self, + code: &str, + max_tool_calls: usize, + timeout: Duration, + auth: Option<&crate::api::oauth::AuthContext>, + subject: Option<&str>, + ) -> Result { + let exe = std::env::current_exe().map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to locate current executable for Code Mode runner: {err}"), + })?; + let temp_dir = TempDir::new().map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to create Code Mode sandbox directory: {err}"), + })?; + let mut child = Command::new(exe) + .args(["internal", "code-mode-runner"]) + .current_dir(temp_dir.path()) + .env_clear() + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to spawn Code Mode runner: {err}"), + })?; + + let mut stdin = child.stdin.take().ok_or_else(|| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: "Code Mode runner stdin was not available".to_string(), + })?; + let stdout = child.stdout.take().ok_or_else(|| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: "Code Mode runner stdout was not available".to_string(), + })?; + write_runner_input( + &mut stdin, + &CodeModeRunnerInput::Start { + code: code.to_string(), + }, + ) + .await?; + + let mut lines = BufReader::new(stdout).lines(); + let mut calls = Vec::new(); + let deadline = tokio::time::Instant::now() + timeout; + + loop { + let line = match tokio::time::timeout_at(deadline, lines.next_line()).await { + Ok(line) => line, + Err(_) => { + terminate_code_mode_runner(&mut child).await; + return Err(DispatchToolError::Sdk { + sdk_kind: "timeout".to_string(), + message: "Code Mode execution timed out".to_string(), + }); + } + }; + let Some(line) = line.map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to read Code Mode runner output: {err}"), + })? + else { + let status = child.wait().await.map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to wait for Code Mode runner: {err}"), + })?; + return Err(DispatchToolError::Sdk { + sdk_kind: "code_execution_failed".to_string(), + message: format!( + "Code Mode runner exited before completion with status {status}" + ), + }); + }; + match serde_json::from_str::(&line).map_err(|err| { + DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("Code Mode runner emitted invalid protocol JSON: {err}"), + } + })? { + CodeModeRunnerOutput::ToolCall { seq, id, params } => { + if calls.len() >= max_tool_calls { + terminate_code_mode_runner(&mut child).await; + return Err(DispatchToolError::Sdk { + sdk_kind: "tool_call_limit_exceeded".to_string(), + message: format!( + "Code Mode execution exceeded max_tool_calls={max_tool_calls}" + ), + }); + } + let result = match self + .code_mode_call_tool_id_before_deadline( + &id, params, deadline, auth, subject, + ) + .await + { + Ok(result) => result, + Err(err) => { + drop( + write_runner_input( + &mut stdin, + &CodeModeRunnerInput::ToolError { + seq, + kind: match &err { + DispatchToolError::Sdk { sdk_kind, .. } => { + sdk_kind.as_str() + } + other => other.kind(), + } + .to_string(), + message: err.to_string(), + }, + ) + .await, + ); + terminate_code_mode_runner(&mut child).await; + return Err(err); + } + }; + calls.push(CodeModeExecutedCall { + id, + result: result.clone(), + }); + write_runner_input( + &mut stdin, + &CodeModeRunnerInput::ToolResult { seq, result }, + ) + .await?; + } + CodeModeRunnerOutput::Done => { + if calls.is_empty() { + terminate_code_mode_runner(&mut child).await; + return Err(DispatchToolError::Sdk { + sdk_kind: "invalid_param".to_string(), + message: + "Code Mode snippet must call callTool(id, params) at least once" + .to_string(), + }); + } + let status = child.wait().await.map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to wait for Code Mode runner: {err}"), + })?; + if !status.success() { + return Err(DispatchToolError::Sdk { + sdk_kind: "code_execution_failed".to_string(), + message: format!("Code Mode runner exited with status {status}"), + }); + } + return Ok(CodeModeExecutionResponse { calls }); + } + CodeModeRunnerOutput::Error { kind, message } => { + drop(child.wait().await); + return Err(DispatchToolError::Sdk { + sdk_kind: kind, + message, + }); + } + } + } + } + + async fn code_mode_call_tool_id_before_deadline( + &self, + id: &str, + params: Value, + deadline: tokio::time::Instant, + auth: Option<&crate::api::oauth::AuthContext>, + subject: Option<&str>, + ) -> Result { + match tokio::time::timeout_at( + deadline, + self.code_mode_call_tool_id(id, params, auth, subject), + ) + .await + { + Ok(result) => result, + Err(_) => Err(DispatchToolError::Sdk { + sdk_kind: "timeout".to_string(), + message: "Code Mode execution timed out".to_string(), + }), + } + } + async fn code_mode_call_tool_id( &self, id: &str, @@ -3128,23 +3302,22 @@ impl LabMcpServer { tool: &str, params: Value, ) -> Result { - let Some(pool) = self.current_upstream_pool().await else { + let Some(manager) = self.gateway_manager.as_ref() else { return Err(DispatchToolError::Sdk { sdk_kind: "upstream_error".to_string(), - message: "gateway upstream pool is unavailable".to_string(), + message: "gateway manager is unavailable".to_string(), }); }; - let exposed = pool - .healthy_tools_for_upstream(upstream) - .await - .into_iter() - .any(|candidate| candidate.tool.name.as_ref() == tool); - if !exposed { + manager + .resolve_code_mode_upstream_tool(upstream, tool) + .await?; + let Some(pool) = manager.current_pool().await else { return Err(DispatchToolError::Sdk { - sdk_kind: "not_found".to_string(), - message: format!("upstream tool `{upstream}::{tool}` was not found"), + sdk_kind: "upstream_error".to_string(), + message: "gateway upstream pool is unavailable".to_string(), }); - } + }; + let before = self.snapshot_catalog().await; let mut upstream_params = CallToolRequestParams::new(tool.to_string()); upstream_params.arguments = Some(match params { Value::Object(map) => map, @@ -3152,19 +3325,49 @@ impl LabMcpServer { }); match pool.call_tool(upstream, upstream_params).await { Some(Ok(result)) => { + let (result, kind, counts_as_failure) = + normalize_upstream_result(tool, "call_tool", result); + if counts_as_failure { + pool.record_failure( + upstream, + format!("upstream `{upstream}` returned `{kind}`"), + ) + .await; + } else { + pool.record_success(upstream).await; + } + let after = self.snapshot_catalog().await; + self.notify_catalog_changes(&before, &after).await; + if kind != "ok" { + return Err(DispatchToolError::Sdk { + sdk_kind: kind.to_string(), + message: call_tool_result_message(&result), + }); + } serde_json::to_value(result).map_err(|err| DispatchToolError::Sdk { - sdk_kind: "serialization_error".to_string(), + sdk_kind: "internal_error".to_string(), message: format!("failed to serialize upstream tool result: {err}"), }) } - Some(Err(err)) => Err(DispatchToolError::Sdk { - sdk_kind: "upstream_error".to_string(), - message: err, - }), - None => Err(DispatchToolError::Sdk { - sdk_kind: "not_found".to_string(), - message: format!("upstream tool `{upstream}::{tool}` was not found"), - }), + Some(Err(err)) => { + pool.record_failure(upstream, err.clone()).await; + let after = self.snapshot_catalog().await; + self.notify_catalog_changes(&before, &after).await; + Err(DispatchToolError::Sdk { + sdk_kind: "upstream_error".to_string(), + message: err, + }) + } + None => { + pool.record_failure(upstream, format!("upstream `{upstream}` is not connected")) + .await; + let after = self.snapshot_catalog().await; + self.notify_catalog_changes(&before, &after).await; + Err(DispatchToolError::Sdk { + sdk_kind: "not_found".to_string(), + message: format!("upstream tool `{upstream}::{tool}` was not found"), + }) + } } } @@ -3192,6 +3395,36 @@ fn compare_code_mode_search_candidates( .then_with(|| a.id.cmp(&b.id)) } +async fn write_runner_input( + stdin: &mut ChildStdin, + input: &CodeModeRunnerInput, +) -> Result<(), DispatchToolError> { + let mut line = serde_json::to_vec(input).map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to encode Code Mode runner input: {err}"), + })?; + line.push(b'\n'); + stdin + .write_all(&line) + .await + .map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to write Code Mode runner input: {err}"), + })?; + stdin.flush().await.map_err(|err| DispatchToolError::Sdk { + sdk_kind: "internal_error".to_string(), + message: format!("failed to flush Code Mode runner input: {err}"), + }) +} + +async fn terminate_code_mode_runner(child: &mut Child) { + if let Ok(Some(_)) = child.try_wait() { + return; + } + drop(child.kill().await); + drop(child.wait().await); +} + fn tool_error_envelope(service: &str, action: &str, err: &DispatchToolError) -> Value { match err { DispatchToolError::Sdk { sdk_kind, message } => { @@ -3504,6 +3737,15 @@ fn normalize_upstream_result( ) } +fn call_tool_result_message(result: &CallToolResult) -> String { + result + .content + .first() + .and_then(|content| content.as_text()) + .map(|content| content.text.to_string()) + .unwrap_or_else(|| "upstream tool returned an error".to_string()) +} + /// Recover a stable kind tag and message from an `anyhow::Error`. /// /// Priority: @@ -3563,6 +3805,8 @@ mod tests { use rmcp::model::{CallToolResult, Content}; use serde_json::Value; use std::future::Future; + use std::pin::Pin; + use std::time::Duration; #[tokio::test] async fn extract_error_info_preserves_unknown_action_from_real_dispatch_downcast() { @@ -3761,7 +4005,7 @@ mod tests { fn noop_dispatch( _action: String, _params: Value, - ) -> std::pin::Pin> + Send>> { + ) -> Pin> + Send>> { Box::pin(async { Ok(Value::Null) }) } @@ -3923,6 +4167,68 @@ mod tests { assert_eq!(result, Value::Null); } + const SLOW_ACTIONS: &[ActionSpec] = &[ActionSpec { + name: "wait", + description: "Wait long enough to test Code Mode timeout propagation", + destructive: false, + params: &[], + returns: "object", + }]; + + fn slow_dispatch( + _action: String, + _params: Value, + ) -> Pin> + Send>> { + Box::pin(async { + tokio::time::sleep(Duration::from_secs(5)).await; + Ok(serde_json::json!({"ok": true})) + }) + } + + #[tokio::test] + async fn code_mode_timeout_covers_brokered_lab_calls() { + let mut registry = ToolRegistry::new(); + registry.register(RegisteredService { + name: "slow", + description: "Slow test service", + category: "bootstrap", + kind: crate::registry::RegisteredServiceKind::BootstrapOperator, + status: "available", + actions: SLOW_ACTIONS, + dispatch: slow_dispatch, + }); + let server = super::LabMcpServer { + registry: std::sync::Arc::new(registry), + gateway_manager: None, + node_role: None, + peers: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())), + logging_level: std::sync::Arc::new(std::sync::atomic::AtomicU8::new( + logging_level_rank(rmcp::model::LoggingLevel::Info), + )), + }; + + let started = std::time::Instant::now(); + let err = server + .code_mode_call_tool_id_before_deadline( + "lab::slow.wait", + serde_json::json!({}), + tokio::time::Instant::now() + Duration::from_millis(50), + None, + None, + ) + .await + .expect_err("brokered tool call should be bounded by Code Mode timeout"); + + assert!( + started.elapsed() < Duration::from_secs(2), + "timeout should not wait for the slow dispatch to finish" + ); + match err { + ToolError::Sdk { sdk_kind, .. } => assert_eq!(sdk_kind, "timeout"), + other => panic!("expected timeout sdk error, got {other:?}"), + } + } + #[tokio::test] async fn snapshot_catalog_hides_builtin_tools_when_tool_search_is_enabled() { let runtime = crate::dispatch::gateway::manager::GatewayRuntimeHandle::default(); diff --git a/crates/lab/tests/code_mode_runner.rs b/crates/lab/tests/code_mode_runner.rs new file mode 100644 index 000000000..e990137a2 --- /dev/null +++ b/crates/lab/tests/code_mode_runner.rs @@ -0,0 +1,94 @@ +use std::io::{BufRead, BufReader, Write}; +use std::process::{Command, Stdio}; + +use serde_json::{Value, json}; + +fn read_protocol_line(reader: &mut BufReader) -> Value { + let mut line = String::new(); + reader.read_line(&mut line).expect("read runner output"); + assert!(!line.is_empty(), "runner closed stdout"); + serde_json::from_str(&line).expect("runner output must be JSON") +} + +#[test] +fn code_mode_runner_evaluates_js_in_a_minimal_host_environment() { + let mut child = Command::new(env!("CARGO_BIN_EXE_labby")) + .args(["internal", "code-mode-runner"]) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .expect("spawn code mode runner"); + + let mut stdin = child.stdin.take().expect("runner stdin"); + let stdout = child.stdout.take().expect("runner stdout"); + let mut stdout = BufReader::new(stdout); + let code = r#" + if (typeof process !== "undefined" || typeof require !== "undefined" || + typeof fetch !== "undefined" || typeof Deno !== "undefined" || + typeof Bun !== "undefined") { + throw new Error("ambient host API exposed"); + } + const first = await callTool("lab::gateway.first", {"x": 1}); + if (first.ok) { + await callTool("lab::gateway.second", {"from": first.value}); + } + if (false) { + await callTool("lab::gateway.never", {}); + } + "#; + + writeln!( + stdin, + "{}", + json!({ + "type": "start", + "code": code + }) + ) + .expect("write start"); + + assert_eq!( + read_protocol_line(&mut stdout), + json!({ + "type": "tool_call", + "seq": 0, + "id": "lab::gateway.first", + "params": {"x": 1} + }) + ); + writeln!( + stdin, + "{}", + json!({ + "type": "tool_result", + "seq": 0, + "result": {"ok": true, "value": 42} + }) + ) + .expect("write first result"); + + assert_eq!( + read_protocol_line(&mut stdout), + json!({ + "type": "tool_call", + "seq": 1, + "id": "lab::gateway.second", + "params": {"from": 42} + }) + ); + writeln!( + stdin, + "{}", + json!({ + "type": "tool_result", + "seq": 1, + "result": {"ok": true} + }) + ) + .expect("write second result"); + + assert_eq!(read_protocol_line(&mut stdout), json!({"type": "done"})); + let status = child.wait().expect("wait for runner"); + assert!(status.success(), "runner exited with {status}"); +} diff --git a/deny.toml b/deny.toml index 141d1d488..99cb2ad3f 100644 --- a/deny.toml +++ b/deny.toml @@ -16,6 +16,9 @@ ignore = [ "RUSTSEC-2024-0384", # rustls-pemfile: unmaintained transitive dependency from neo4rs 0.9.0-rc.9 "RUSTSEC-2025-0134", + # paste: unmaintained transitive macro dependency from boa_engine 0.21.1. + # Code Mode runs Boa in a short-lived child process with no ambient host APIs. + "RUSTSEC-2024-0436", ] [licenses] diff --git a/docs/dev/ERRORS.md b/docs/dev/ERRORS.md index 4386f318f..482a6a93d 100644 --- a/docs/dev/ERRORS.md +++ b/docs/dev/ERRORS.md @@ -58,6 +58,7 @@ Dispatch layers may add the following kinds on top of SDK errors: - `ambiguous_tool` — unqualified tool name resolved to multiple upstream gateway candidates; envelope carries `valid: Vec` of fully-qualified `{upstream}::{tool}` names the caller must choose from. HTTP 409. - `invalid_code_mode_id` — Code Mode tool id parsing failed. Valid ids are `lab::.` and `upstream::::`. HTTP 422. - `code_mode_disabled` — Code Mode execution was requested while `[code_mode].enabled` is false. Discovery and schema lookup can remain enabled without allowing execution. HTTP 403. +- `code_execution_failed` — Code Mode child-process JavaScript evaluation failed before completing the runner protocol. HTTP 422. - `tool_call_limit_exceeded` — a Code Mode snippet attempted more host-brokered tool calls than `max_tool_calls` allows. HTTP 429. - `schema_unavailable` — Code Mode schema lookup found a tool, but its upstream schema was missing or exceeded the safe return size after sanitization. HTTP 422. - `queue_saturated` — bounded runtime queue is full; caller should retry after the current work drains. HTTP 429. diff --git a/docs/services/GATEWAY.md b/docs/services/GATEWAY.md index a3dfe81a0..cc9bfb73e 100644 --- a/docs/services/GATEWAY.md +++ b/docs/services/GATEWAY.md @@ -139,8 +139,8 @@ Invoke call shape on the MCP surface: { "name": "search_issues", "arguments": { "query": "repo:jmagar/lab tool_search" } } ``` -Code Mode is schema-first discovery, not execution. `code_search` returns stable ids -for Lab actions and upstream tools: +Code Mode is schema-first discovery plus opt-in sandboxed execution. +`code_search` returns stable ids for Lab actions and upstream tools: ```json { "query": "github issues", "top_k": 10 } @@ -177,15 +177,16 @@ timeout_ms = 5000 max_tool_calls = 8 ``` -The MVP executor accepts a constrained JavaScript/TypeScript-looking static -batch of `callTool(id, params)` calls. It intentionally rejects control flow, -function declarations, and arrow functions until a real sandboxed evaluator is -wired in. `params` must be strict JSON so the gateway can validate and broker -each call without granting the snippet ambient host access: +Execution runs in a short-lived child process with an embedded JavaScript engine. +The child gets an empty environment, a temporary working directory, no Node/Deno +host APIs, and no direct access to the Lab runtime. The only host capability is +the injected `callTool(id, params)` function, which sends each requested call +back to the parent gateway for normal visibility, scope, destructive-action, and +upstream exposure checks. `params` must be JSON-serializable: ```json { - "code": "await callTool(\"lab::radarr.movie.search\", {\"query\":\"Alien\"});" + "code": "const result = await callTool(\"lab::radarr.movie.search\", {\"query\":\"Alien\"});\nif (result.total > 0) {\n await callTool(\"lab::radarr.queue.list\", {});\n}" } ``` @@ -200,6 +201,7 @@ Rules: - `code_search` is read-only discovery and accepts `lab:read`, `lab`, or `lab:admin` - `code_schema` exposes full schemas and requires `lab` or `lab:admin` - `code_execute` requires `lab` or `lab:admin`, is disabled unless `[code_mode].enabled = true`, and brokers calls through the same gateway visibility and destructive-action checks as `invoke` +- `code_execute` enforces `timeout_ms` by killing the child process and enforces `max_tool_calls` in the parent before brokering each call - invalid Code Mode ids return `invalid_code_mode_id` - unavailable or overlarge upstream schemas return `schema_unavailable` - old `[[upstream]].tool_search` blocks are accepted only as migration input and are dropped on the next gateway config write