From d20f9a99e48fe04583ef7fb713baa44c71904eba Mon Sep 17 00:00:00 2001 From: Eric Hayes Date: Wed, 10 Jun 2026 14:01:12 -0400 Subject: [PATCH] update tool typegen and schema transforms --- .../generated/schemas/index.ts | 2 - .../generated/schemas/toolSchema.ts | 18 - .../generated/schemas/toolSchemas.ts | 15 - .../service-cognition/openapi.json | 31 -- js/app/scripts/generate-dcs-tools.ts | 19 +- .../ai_tools/src/bin/gen_tool_schemas.rs | 4 +- rust/cloud-storage/ai_tools/src/lib.rs | 17 +- .../src/search/search_service/content.rs | 15 +- .../src/search/search_service/name.rs | 13 +- .../ai_toolset/src/schema/error.rs | 44 ++ .../ai_toolset/src/schema/frontend_typegen.rs | 233 +++++++++++ .../ai_toolset/src/schema/generate.rs | 384 ++++-------------- .../ai_toolset/src/schema/generate/test.rs | 264 ++++++++++++ .../ai_toolset/src/schema/mod.rs | 20 +- .../ai_toolset/src/schema/phantom_tool.rs | 22 +- .../schema/transform/additional_properties.rs | 40 ++ .../ai_toolset/src/schema/transform/mod.rs | 23 ++ .../src/schema/transform/nullify_optional.rs | 97 +++++ .../src/schema/transform/ref_siblings.rs | 64 +++ .../src/schema/transform/required.rs | 25 ++ .../src/schema/transform/rewrite_one_of.rs | 27 ++ .../src/schema/transform/strip_unsupported.rs | 107 +++++ .../ai_toolset/src/schema/validate/extract.rs | 29 ++ .../ai_toolset/src/schema/validate/mod.rs | 58 +++ .../ai_toolset/src/schema/validate/one_of.rs | 18 + .../src/schema/validate/recursive.rs | 50 +++ .../src/schema/validate/recursive/test.rs | 142 +++++++ .../ai_toolset/src/schema/validate/strict.rs | 69 ++++ .../ai_toolset/src/toolset/tool_object/mod.rs | 9 +- .../src/toolset/tool_object/object.rs | 27 -- .../src/toolset/tool_object/tool_async.rs | 17 +- .../src/toolset/tool_object/util.rs | 312 -------------- .../ai_toolset/src/toolset/types.rs | 3 +- .../call/src/inbound/toolset/test.rs | 26 +- .../channels/src/inbound/toolset/test.rs | 44 +- .../chat/src/inbound/toolset/test.rs | 13 +- .../src/api/swagger.rs | 4 - .../documents/src/inbound/toolset/test.rs | 33 +- .../email/src/inbound/toolset/test.rs | 33 +- .../notification/src/inbound/ai_tool/test.rs | 55 +-- .../properties/src/inbound/toolset/test.rs | 37 +- .../soup/src/inbound/toolset/test.rs | 26 +- .../teams/src/inbound/toolset/test.rs | 13 +- 43 files changed, 1556 insertions(+), 946 deletions(-) delete mode 100644 js/app/packages/service-clients/service-cognition/generated/schemas/toolSchema.ts delete mode 100644 js/app/packages/service-clients/service-cognition/generated/schemas/toolSchemas.ts create mode 100644 rust/cloud-storage/ai_toolset/src/schema/error.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/frontend_typegen.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/generate/test.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/transform/additional_properties.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/transform/mod.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/transform/nullify_optional.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/transform/ref_siblings.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/transform/required.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/transform/rewrite_one_of.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/transform/strip_unsupported.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/validate/extract.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/validate/mod.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/validate/one_of.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/validate/recursive.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/validate/recursive/test.rs create mode 100644 rust/cloud-storage/ai_toolset/src/schema/validate/strict.rs delete mode 100644 rust/cloud-storage/ai_toolset/src/toolset/tool_object/util.rs diff --git a/js/app/packages/service-clients/service-cognition/generated/schemas/index.ts b/js/app/packages/service-clients/service-cognition/generated/schemas/index.ts index d058d3e6c1..65e2d54195 100644 --- a/js/app/packages/service-clients/service-cognition/generated/schemas/index.ts +++ b/js/app/packages/service-clients/service-cognition/generated/schemas/index.ts @@ -153,8 +153,6 @@ export * from './structuredCompletionError'; export * from './structuredCompletionRequest'; export * from './structuredCompletionRequestAdditionalInstructions'; export * from './structuredCompletionResponse'; -export * from './toolSchema'; -export * from './toolSchemas'; export * from './toolSet'; export * from './toolSetOneOf'; export * from './toolSetOneOfThree'; diff --git a/js/app/packages/service-clients/service-cognition/generated/schemas/toolSchema.ts b/js/app/packages/service-clients/service-cognition/generated/schemas/toolSchema.ts deleted file mode 100644 index 85a300a8b1..0000000000 --- a/js/app/packages/service-clients/service-cognition/generated/schemas/toolSchema.ts +++ /dev/null @@ -1,18 +0,0 @@ -/** - * Generated by orval v7.21.0 🍺 - * Do not edit manually. - * Document Cognition Service - * OpenAPI spec version: 1.0.0 - */ - -/** - * Schema information for a single tool, serializable for API responses. - */ -export interface ToolSchema { - /** The JSON schema for the tool's input parameters. */ - inputSchema: unknown; - /** The name of the tool. */ - name: string; - /** The JSON schema for the tool's output. */ - outputSchema: unknown; -} diff --git a/js/app/packages/service-clients/service-cognition/generated/schemas/toolSchemas.ts b/js/app/packages/service-clients/service-cognition/generated/schemas/toolSchemas.ts deleted file mode 100644 index 9a27ffe091..0000000000 --- a/js/app/packages/service-clients/service-cognition/generated/schemas/toolSchemas.ts +++ /dev/null @@ -1,15 +0,0 @@ -/** - * Generated by orval v7.21.0 🍺 - * Do not edit manually. - * Document Cognition Service - * OpenAPI spec version: 1.0.0 - */ -import type { ToolSchema } from './toolSchema'; - -/** - * A collection of tool schemas, typically used for API responses. - */ -export interface ToolSchemas { - /** The list of tool schemas. */ - schemas: ToolSchema[]; -} diff --git a/js/app/packages/service-clients/service-cognition/openapi.json b/js/app/packages/service-clients/service-cognition/openapi.json index e514f792a8..b1cf282b32 100644 --- a/js/app/packages/service-clients/service-cognition/openapi.json +++ b/js/app/packages/service-clients/service-cognition/openapi.json @@ -3198,37 +3198,6 @@ "result": {} } }, - "ToolSchema": { - "type": "object", - "description": "Schema information for a single tool, serializable for API responses.", - "required": ["name", "inputSchema", "outputSchema"], - "properties": { - "inputSchema": { - "description": "The JSON schema for the tool's input parameters." - }, - "name": { - "type": "string", - "description": "The name of the tool." - }, - "outputSchema": { - "description": "The JSON schema for the tool's output." - } - } - }, - "ToolSchemas": { - "type": "object", - "description": "A collection of tool schemas, typically used for API responses.", - "required": ["schemas"], - "properties": { - "schemas": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ToolSchema" - }, - "description": "The list of tool schemas." - } - } - }, "ToolSet": { "oneOf": [ { diff --git a/js/app/scripts/generate-dcs-tools.ts b/js/app/scripts/generate-dcs-tools.ts index 0ba2e8064e..a545211844 100644 --- a/js/app/scripts/generate-dcs-tools.ts +++ b/js/app/scripts/generate-dcs-tools.ts @@ -45,8 +45,9 @@ const JsonObjectValidator: z.ZodType = z.record( z.unknown() ); -// The combined schema has shared $defs and a tools array -const CombinedSchemaValidator = z.object({ +// The frontend typegen schema (ai_toolset FrontendSchemas) has shared $defs +// and a tools array +const FrontendSchemasValidator = z.object({ $defs: z.record(z.string(), JsonObjectValidator), tools: z.array( z.object({ @@ -57,7 +58,7 @@ const CombinedSchemaValidator = z.object({ ), }); -type CombinedSchema = z.infer; +type FrontendSchemas = z.infer; async function buildAndRunSchemaGenerator(): Promise { console.log('Building gen_tool_schemas binary...'); @@ -77,17 +78,17 @@ async function buildAndRunSchemaGenerator(): Promise { console.log('Schema generation complete.'); } -async function loadCombinedSchema(): Promise { +async function loadFrontendSchemas(): Promise { await buildAndRunSchemaGenerator(); const data = await readFile(schemasJsonPath, 'utf-8'); - const parsed = CombinedSchemaValidator.parse(JSON.parse(data)); + const parsed = FrontendSchemasValidator.parse(JSON.parse(data)); console.log( `Loaded ${parsed.tools.length} tools, ${Object.keys(parsed.$defs).length} definitions` ); return parsed; } -async function generateSchemasFile(schema: CombinedSchema) { +async function generateSchemasFile(schema: FrontendSchemas) { const resolved = await $RefParser.dereference(structuredClone(schema)); const defs = (resolved as { $defs: Record }).$defs; const seen = new Set(); @@ -118,7 +119,7 @@ async function generateSchemasFile(schema: CombinedSchema) { ); } -async function generateToolTypesFile(schema: CombinedSchema) { +async function generateToolTypesFile(schema: FrontendSchemas) { const properties: Record = {}; for (const key of Object.keys(schema.$defs)) { properties[key] = { $ref: `#/$defs/${key}` }; @@ -148,7 +149,7 @@ async function generateToolTypesFile(schema: CombinedSchema) { await Bun.write(typesFile, `${warning}\n${cleaned}`); } -async function generateToolsFile(schema: CombinedSchema) { +async function generateToolsFile(schema: FrontendSchemas) { const entries = schema.tools .map((t) => ({ name: t.name, @@ -243,7 +244,7 @@ export function deserializeToolResponse( await Bun.write(toolFile, contents); } -const schema = await loadCombinedSchema(); +const schema = await loadFrontendSchemas(); await generateSchemasFile(schema); await generateToolsFile(schema); diff --git a/rust/cloud-storage/ai_tools/src/bin/gen_tool_schemas.rs b/rust/cloud-storage/ai_tools/src/bin/gen_tool_schemas.rs index 2fcab9cae7..b918017937 100644 --- a/rust/cloud-storage/ai_tools/src/bin/gen_tool_schemas.rs +++ b/rust/cloud-storage/ai_tools/src/bin/gen_tool_schemas.rs @@ -1,8 +1,8 @@ //! Binary to generate combined tool schemas JSON file. fn main() { - let combined = ai_tools::all_tool_combined_schema(); - let json = serde_json::to_string_pretty(&combined).expect("serialize schemas"); + let schemas = ai_tools::all_tool_frontend_schemas(); + let json = schemas.to_json_pretty().expect("serialize schemas"); std::fs::create_dir("schemas").expect("create schemas dir"); std::fs::write("schemas/tools.json", &json).expect("write tools.json"); println!("Generated ai_tools/schemas/tools.json"); diff --git a/rust/cloud-storage/ai_tools/src/lib.rs b/rust/cloud-storage/ai_tools/src/lib.rs index eb0a404280..5ddd6594fb 100644 --- a/rust/cloud-storage/ai_tools/src/lib.rs +++ b/rust/cloud-storage/ai_tools/src/lib.rs @@ -1,7 +1,7 @@ #![recursion_limit = "256"] use ai_toolset::AsyncToolCollection; -use ai_toolset::schema::{CombinedToolSchemas, ToolSchemaGenerator}; +use ai_toolset::schema::{FrontendSchemas, ToolSchemaGenerator, frontend_schemas_builder}; mod build_context; mod schemas; pub mod search; @@ -52,14 +52,10 @@ pub struct ToolSetWithPrompt { } impl ToolSchemaGenerator for ToolSetWithPrompt { - fn generate_schemas(&self) -> ai_toolset::schema::ToolSchemas { - self.toolset.generate_schemas() - } - fn register_schemas( &self, generator: &mut schemars::SchemaGenerator, - ) -> Vec { + ) -> Vec { self.toolset.register_schemas(generator) } } @@ -92,9 +88,12 @@ pub fn all_tools() -> ToolSetWithPrompt { } } -/// Combined schema with shared, deduplicated `$defs`. -pub fn all_tool_combined_schema() -> CombinedToolSchemas { - CombinedToolSchemas::builder() +/// Frontend typegen schemas with shared, deduplicated `$defs`. +/// +/// These feed `gen_tool_schemas` / `generate-dcs-tools.ts` and are never +/// sent to AI providers. +pub fn all_tool_frontend_schemas() -> FrontendSchemas { + frontend_schemas_builder() .merge(&all_tools()) .merge(&read::read_thread()) .build() diff --git a/rust/cloud-storage/ai_tools/src/search/search_service/content.rs b/rust/cloud-storage/ai_tools/src/search/search_service/content.rs index 31f6f471bd..b709611b6d 100644 --- a/rust/cloud-storage/ai_tools/src/search/search_service/content.rs +++ b/rust/cloud-storage/ai_tools/src/search/search_service/content.rs @@ -87,23 +87,22 @@ impl AsyncTool> for ContentSearch { #[cfg(test)] mod tests { use super::*; - use ai_toolset::generate_tool_input_schema; - use ai_toolset::tool_object::validate_tool_schema; + use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_content_search_schema_validation() { - let schema = generate_tool_input_schema!(ContentSearch); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ContentSearch", + validated.name, "ContentSearch", "Tool name should match the schemars title" ); assert!( - description.contains("Search items by their content"), + validated + .description + .contains("Search items by their content"), "Description should contain expected text" ); } diff --git a/rust/cloud-storage/ai_tools/src/search/search_service/name.rs b/rust/cloud-storage/ai_tools/src/search/search_service/name.rs index e5476ee0b8..3a868c01db 100644 --- a/rust/cloud-storage/ai_tools/src/search/search_service/name.rs +++ b/rust/cloud-storage/ai_tools/src/search/search_service/name.rs @@ -87,23 +87,20 @@ impl AsyncTool> for NameSearch { #[cfg(test)] mod tests { use super::*; - use ai_toolset::generate_tool_input_schema; - use ai_toolset::tool_object::validate_tool_schema; + use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_name_search_schema_validation() { - let schema = generate_tool_input_schema!(NameSearch); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "NameSearch", + validated.name, "NameSearch", "Tool name should match the schemars title" ); assert!( - description.contains("Search items by their name"), + validated.description.contains("Search items by their name"), "Description should contain expected text" ); } diff --git a/rust/cloud-storage/ai_toolset/src/schema/error.rs b/rust/cloud-storage/ai_toolset/src/schema/error.rs new file mode 100644 index 0000000000..0503996016 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/error.rs @@ -0,0 +1,44 @@ +use serde_json::Error as JsonError; +use thiserror::Error; + +/// Errors that can occur when validating a tool's schema. +#[derive(Debug, Error)] +pub enum ValidationError { + /// The schema is missing required metadata (title or description). + #[error("missing metadata")] + MissingMetadata, + /// Failed to serialize the schema to JSON. + #[error("could not convert to json")] + JsonSerialization(JsonError), + /// The schema contains nested objects which are not supported. + #[error("schema exceeds depth one - nested objects with properties are not allowed")] + ExceedsDepthOne, + /// The schema title is empty. + #[error("title is empty")] + EmptyTitle, + /// The schema contains `oneOf` which is not supported for AI tools. + #[error("schema must not have oneOf set. Do not use descriptions or /// on enum types.")] + OneOf, + /// Schema must be a serde_json::Value::Object + #[error("schema must be a serde_json::Value::Object")] + ExpectedObject, + /// The schema contains a `$ref`, which means a recursive type survived + /// inlining. Strict tool use cannot express recursive schemas. + #[error( + "schema contains $ref β€” recursive types cannot be inlined and are not supported by strict tool use" + )] + UnsupportedRef, + /// An object's `additionalProperties` is not `false` β€” map types with + /// arbitrary keys cannot be expressed in strict mode. + #[error( + "additionalProperties must be false β€” map types (e.g. HashMap) are not supported by strict tool use; use a Vec of key/value structs instead" + )] + AdditionalProperties, + /// An `enum` contains object or array values; strict mode only allows + /// primitive enum members. + #[error("enum values must be primitives (string, number, bool, or null)")] + ComplexEnum, + /// The root of a tool input schema must be an object. + #[error("tool input schema root must have type \"object\"")] + RootNotObject, +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/frontend_typegen.rs b/rust/cloud-storage/ai_toolset/src/schema/frontend_typegen.rs new file mode 100644 index 0000000000..b3f547871a --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/frontend_typegen.rs @@ -0,0 +1,233 @@ +//! Frontend typegen schema types: shared `$defs` accumulation, collision +//! mangling, and the registration trait. +//! +//! The entry point for this codepath is +//! [`frontend_schemas_builder`](super::generate::frontend_schemas_builder), +//! which composes the generator these types operate on. The output feeds the +//! `gen_tool_schemas` binary and the TypeScript codegen behind it β€” none of +//! it is ever sent to an AI provider. + +use crate::AsyncToolCollection; +use serde::Serialize; + +/// Entry in a [`FrontendSchemas`] mapping a tool name to its input/output +/// definition names. +#[derive(Serialize, Clone)] +pub struct FrontendToolEntry { + /// The tool name. + pub name: String, + /// The schema definition name for the tool's input. + pub input: String, + /// The schema definition name for the tool's output. + pub output: String, +} + +/// Trait for types that can register tool schemas for frontend type +/// generation. +pub trait ToolSchemaGenerator { + /// Registers this generator's tool types with a shared + /// [`schemars::SchemaGenerator`] for frontend type generation, + /// returning an entry per tool. + /// + /// The generator accumulates shared definitions so that types used by + /// multiple tools (e.g. `CodeExecutionErrorCode`) appear only once. + fn register_schemas(&self, generator: &mut schemars::SchemaGenerator) + -> Vec; +} + +impl ToolSchemaGenerator for AsyncToolCollection { + fn register_schemas( + &self, + generator: &mut schemars::SchemaGenerator, + ) -> Vec { + self.tools + .iter() + .map(|(name, tool_object)| { + let (input, output) = (tool_object.schema_registrar)(generator); + FrontendToolEntry { + name: name.clone(), + input, + output, + } + }) + .collect() + } +} + +/// Combined tool schemas for frontend type generation, with shared `$defs` +/// and a toolβ†’type mapping. +/// +/// All tool input/output types and their transitive dependencies live in a +/// single `$defs` map, deduplicated by schemars' [`schemars::SchemaGenerator`]. +/// +/// Deliberately does **not** implement `Serialize`: these schemas are shaped +/// for TypeScript codegen (refs preserved, no structured-output transforms) +/// and must never be embedded in an AI provider request. The codegen JSON is +/// produced explicitly via [`FrontendSchemas::to_json_pretty`]. +pub struct FrontendSchemas { + /// Shared JSON Schema definitions (keyed by type name). + defs: serde_json::Map, + /// Tool entries referencing definitions by name. + tools: Vec, +} + +impl FrontendSchemas { + /// Serializes the schemas to pretty-printed JSON for the codegen script: + /// `{ "$defs": { ... }, "tools": [ ... ] }`. + pub fn to_json_pretty(&self) -> serde_json::Result { + serde_json::to_string_pretty(&serde_json::json!({ + "$defs": &self.defs, + "tools": &self.tools, + })) + } + + /// Replaces schemars' numeric collision suffixes (e.g. `ReadContent2`) + /// with tool-name-prefixed names (e.g. `ReadThreadReadContent`). + /// + /// When two different Rust types share a `schema_name()`, schemars + /// disambiguates with a numeric suffix. This method finds those, + /// determines which tool references them, and renames to + /// `{ToolName}{BaseName}` across `$defs`, `tools`, and all `$ref`s. + fn mangle_collisions(&mut self) { + let suffixed: Vec<(String, String)> = self + .defs + .keys() + .filter_map(|name| { + let base = name.trim_end_matches(|c: char| c.is_ascii_digit()); + if base.len() < name.len() && self.defs.contains_key(base) { + Some((name.clone(), base.to_owned())) + } else { + None + } + }) + .collect(); + + for (suffixed_name, base_name) in suffixed { + let tool_name = self.find_owning_tool(&suffixed_name); + let new_name = format!("{tool_name}{base_name}"); + let old_ref = format!("#/$defs/{suffixed_name}"); + let new_ref = format!("#/$defs/{new_name}"); + + if let Some(def) = self.defs.remove(&suffixed_name) { + self.defs.insert(new_name.clone(), def); + } + + for tool in &mut self.tools { + if tool.input == suffixed_name { + tool.input = new_name.clone(); + } + if tool.output == suffixed_name { + tool.output = new_name.clone(); + } + } + + Self::rename_refs(&mut self.defs, &old_ref, &new_ref); + } + } + + fn find_owning_tool(&self, def_name: &str) -> String { + let ref_str = format!("#/$defs/{def_name}"); + + // Check if a tool directly uses this def as input or output. + for tool in &self.tools { + if tool.input == def_name || tool.output == def_name { + return tool.name.clone(); + } + } + + // Walk each tool's input/output def tree looking for a transitive $ref. + for tool in &self.tools { + for root in [&tool.input, &tool.output] { + if let Some(def) = self.defs.get(root.as_str()) + && Self::value_contains_ref(def, &ref_str) + { + return tool.name.clone(); + } + } + } + + def_name.to_owned() + } + + fn value_contains_ref(value: &serde_json::Value, target: &str) -> bool { + match value { + serde_json::Value::String(s) => s == target, + serde_json::Value::Array(arr) => { + arr.iter().any(|v| Self::value_contains_ref(v, target)) + } + serde_json::Value::Object(map) => { + map.values().any(|v| Self::value_contains_ref(v, target)) + } + _ => false, + } + } + + fn rename_refs( + defs: &mut serde_json::Map, + old_ref: &str, + new_ref: &str, + ) { + for value in defs.values_mut() { + Self::rename_refs_in_value(value, old_ref, new_ref); + } + } + + fn rename_refs_in_value(value: &mut serde_json::Value, old_ref: &str, new_ref: &str) { + match value { + serde_json::Value::String(s) if s == old_ref => { + *s = new_ref.to_owned(); + } + serde_json::Value::Array(arr) => { + for v in arr { + Self::rename_refs_in_value(v, old_ref, new_ref); + } + } + serde_json::Value::Object(map) => { + for v in map.values_mut() { + Self::rename_refs_in_value(v, old_ref, new_ref); + } + } + _ => {} + } + } +} + +/// Builder for [`FrontendSchemas`] that accumulates tools from multiple +/// [`ToolSchemaGenerator`]s while sharing a single [`schemars::SchemaGenerator`] +/// for deduplication. +/// +/// Construct via +/// [`frontend_schemas_builder`](super::generate::frontend_schemas_builder). +pub struct FrontendSchemasBuilder { + generator: schemars::SchemaGenerator, + tools: Vec, +} + +impl FrontendSchemasBuilder { + /// Creates a builder around a generator configured by the frontend + /// typegen codepath. + pub(crate) fn new(generator: schemars::SchemaGenerator) -> Self { + FrontendSchemasBuilder { + generator, + tools: Vec::new(), + } + } + + /// Registers all tools from the given generator. + pub fn merge(mut self, schema_generator: &dyn ToolSchemaGenerator) -> Self { + self.tools + .extend(schema_generator.register_schemas(&mut self.generator)); + self + } + + /// Consumes the builder and returns the frontend schemas. + pub fn build(mut self) -> FrontendSchemas { + let defs = self.generator.take_definitions(true); + let mut schemas = FrontendSchemas { + defs, + tools: self.tools, + }; + schemas.mangle_collisions(); + schemas + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/generate.rs b/rust/cloud-storage/ai_toolset/src/schema/generate.rs index 0a8d86cdce..ad8cacfb92 100644 --- a/rust/cloud-storage/ai_toolset/src/schema/generate.rs +++ b/rust/cloud-storage/ai_toolset/src/schema/generate.rs @@ -1,315 +1,91 @@ -use crate::AsyncToolCollection; +//! Entry points for tool schema generation β€” one function per codepath. +//! +//! Both pipelines start from the same Rust tool types but produce different +//! shapes for different consumers. The full list of transforms each codepath +//! applies is visible in its function body here; the building blocks live in +//! the private `transform`, `validate`, and `frontend_typegen` modules. + +#[cfg(test)] +mod test; + +use crate::schema::error::ValidationError; +use crate::schema::frontend_typegen::FrontendSchemasBuilder; +use crate::schema::transform::{ + AddRequired, AdditionalPropertiesFalse, NormaliseRefSiblings, NullifyOptional, OneOfToAnyOf, + StripUnsupported, +}; +use crate::schema::validate::validate_tool_schema; use schemars::Schema; -use schemars::transform::Transform; -use serde::{Deserialize, Serialize}; -use utoipa::ToSchema; +use schemars::generate::SchemaSettings; +use schemars::transform::RecursiveTransform; -/// Normalises `$ref` nodes that carry sibling keywords. -/// -/// Downstream TS tooling (`json-schema-to-typescript`) predates Draft 2020-12 -/// and doesn't merge `$ref` with sibling keywords. This [`schemars::Transform`] -/// rewrites those nodes so the output is consumable: -/// -/// * `$ref` + `description` only β†’ drops the description (the referenced type -/// already carries its own). -/// * `$ref` + structural siblings (`properties`, `required`, …) β†’ rewrites to -/// `allOf` so the intersection is explicit. +/// The transformed and valid schema representing a tool #[derive(Debug, Clone)] -pub struct NormaliseRefSiblings; - -impl Transform for NormaliseRefSiblings { - fn transform(&mut self, schema: &mut Schema) { - let Some(obj) = schema.as_object_mut() else { - return; - }; - if !obj.contains_key("$ref") { - return; - } - - let sibling_keys: Vec = obj - .keys() - .filter(|k| *k != "$ref" && *k != "$schema") - .cloned() - .collect(); - - if sibling_keys.is_empty() { - return; - } - - if sibling_keys == ["description"] { - obj.remove("description"); - return; - } - - let ref_val = obj.remove("$ref").unwrap(); - let mut ref_part = serde_json::Map::new(); - ref_part.insert("$ref".to_string(), ref_val); - - let mut sibling_part = serde_json::Map::new(); - for key in &sibling_keys { - if key == "description" { - continue; - } - if let Some(val) = obj.remove(key) { - sibling_part.insert(key.clone(), val); - } - } - obj.remove("description"); - - obj.insert( - "allOf".to_string(), - serde_json::json!([ - serde_json::Value::Object(ref_part), - serde_json::Value::Object(sibling_part), - ]), - ); - } -} - -/// Schema information for a single tool, serializable for API responses. -#[derive(Serialize, Deserialize, Clone, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct ToolSchema { - /// The name of the tool. +pub struct ValidatedSchema { + /// The tool name, extracted from the schema's `title`. pub name: String, - /// The JSON schema for the tool's input parameters. - pub input_schema: serde_json::Value, - /// The JSON schema for the tool's output. - pub output_schema: serde_json::Value, + /// The tool description, extracted from the schema's `description`. + pub description: String, + /// The transformed input schema. + pub schema: Schema, } -/// A collection of tool schemas, typically used for API responses. -#[derive(Serialize, Deserialize, Clone, ToSchema)] -#[serde(rename_all = "camelCase")] -pub struct ToolSchemas { - /// The list of tool schemas. - pub schemas: Vec, -} - -/// Entry in a combined schema mapping a tool name to its input/output definition names. -#[derive(Serialize, Deserialize, Clone)] -pub struct CombinedToolEntry { - /// The tool name. - pub name: String, - /// The schema definition name for the tool's input. - pub input: String, - /// The schema definition name for the tool's output. - pub output: String, -} - -/// Combined schema with shared `$defs` and a toolβ†’type mapping. +/// **AI input codepath**: generates the input schema for `T` that an AI +/// provider receives, transforms it to satisfy strict mode on both OpenAI +/// and Anthropic, then validates what transforms cannot fix. /// -/// All tool input/output types and their transitive dependencies live in a -/// single `$defs` map, deduplicated by schemars' [`SchemaGenerator`]. -#[derive(Serialize, Deserialize, Clone)] -pub struct CombinedToolSchemas { - /// Shared JSON Schema definitions (keyed by type name). - #[serde(rename = "$defs")] - pub defs: serde_json::Map, - /// Tool entries referencing definitions by name. - pub tools: Vec, -} - -impl CombinedToolSchemas { - /// Returns a builder that uses a shared [`schemars::SchemaGenerator`] - /// (with [`NormaliseRefSiblings`]) so types referenced by multiple tools - /// appear exactly once in the final `$defs`. - pub fn builder() -> CombinedToolSchemasBuilder { - use schemars::transform::RecursiveTransform; - - let generator = schemars::generate::SchemaSettings::draft2020_12() - .with(|s| s.meta_schema = None) - .with_transform(RecursiveTransform(NormaliseRefSiblings)) - .into_generator(); - CombinedToolSchemasBuilder { - generator, - tools: Vec::new(), - } - } - - /// Replaces schemars' numeric collision suffixes (e.g. `ReadContent2`) - /// with tool-name-prefixed names (e.g. `ReadThreadReadContent`). - /// - /// When two different Rust types share a `schema_name()`, schemars - /// disambiguates with a numeric suffix. This method finds those, - /// determines which tool references them, and renames to - /// `{ToolName}{BaseName}` across `$defs`, `tools`, and all `$ref`s. - pub fn mangle_collisions(&mut self) { - let suffixed: Vec<(String, String)> = self - .defs - .keys() - .filter_map(|name| { - let base = name.trim_end_matches(|c: char| c.is_ascii_digit()); - if base.len() < name.len() && self.defs.contains_key(base) { - Some((name.clone(), base.to_owned())) - } else { - None - } - }) - .collect(); - - for (suffixed_name, base_name) in suffixed { - let tool_name = self.find_owning_tool(&suffixed_name); - let new_name = format!("{tool_name}{base_name}"); - let old_ref = format!("#/$defs/{suffixed_name}"); - let new_ref = format!("#/$defs/{new_name}"); - - if let Some(def) = self.defs.remove(&suffixed_name) { - self.defs.insert(new_name.clone(), def); - } - - for tool in &mut self.tools { - if tool.input == suffixed_name { - tool.input = new_name.clone(); - } - if tool.output == suffixed_name { - tool.output = new_name.clone(); - } - } - - Self::rename_refs(&mut self.defs, &old_ref, &new_ref); - } - } - - fn find_owning_tool(&self, def_name: &str) -> String { - let ref_str = format!("#/$defs/{def_name}"); - - // Check if a tool directly uses this def as input or output. - for tool in &self.tools { - if tool.input == def_name || tool.output == def_name { - return tool.name.clone(); - } - } - - // Walk each tool's input/output def tree looking for a transitive $ref. - for tool in &self.tools { - for root in [&tool.input, &tool.output] { - if let Some(def) = self.defs.get(root.as_str()) - && Self::value_contains_ref(def, &ref_str) - { - return tool.name.clone(); - } - } - } - - def_name.to_owned() - } - - fn value_contains_ref(value: &serde_json::Value, target: &str) -> bool { - match value { - serde_json::Value::String(s) => s == target, - serde_json::Value::Array(arr) => { - arr.iter().any(|v| Self::value_contains_ref(v, target)) - } - serde_json::Value::Object(map) => { - map.values().any(|v| Self::value_contains_ref(v, target)) - } - _ => false, - } - } - - fn rename_refs( - defs: &mut serde_json::Map, - old_ref: &str, - new_ref: &str, - ) { - for value in defs.values_mut() { - Self::rename_refs_in_value(value, old_ref, new_ref); - } - } - - fn rename_refs_in_value(value: &mut serde_json::Value, old_ref: &str, new_ref: &str) { - match value { - serde_json::Value::String(s) if s == old_ref => { - *s = new_ref.to_owned(); - } - serde_json::Value::Array(arr) => { - for v in arr { - Self::rename_refs_in_value(v, old_ref, new_ref); - } - } - serde_json::Value::Object(map) => { - for v in map.values_mut() { - Self::rename_refs_in_value(v, old_ref, new_ref); - } - } - _ => {} - } - } -} - -/// Trait for types that can generate tool schemas. +/// Transforms (in order): +/// 1. subschemas inlined β€” providers get one self-contained schema +/// 2. [`OneOfToAnyOf`] β€” `oneOf` is unsupported by both providers; `anyOf` +/// is supported by both +/// 3. [`StripUnsupported`] β€” numeric/string/array constraints and +/// non-whitelisted `format`s are removed and recorded in `description` +/// 4. [`NullifyOptional`] β€” properties not in `required` get a null union; +/// strict mode forbids optionality, so "may be omitted" becomes "may be +/// null" +/// 5. [`AddRequired`] β€” every property listed in `required`, as OpenAI +/// requires (must run after [`NullifyOptional`]) +/// 6. [`AdditionalPropertiesFalse`] β€” `additionalProperties: false` on +/// every object (both providers require it) /// -/// Implement this trait to provide schema information about available tools. -pub trait ToolSchemaGenerator { - /// Generates the schemas for all tools in this generator. - fn generate_schemas(&self) -> ToolSchemas; - - /// Registers this generator's tool types with a shared - /// [`schemars::SchemaGenerator`], returning an entry per tool. - /// - /// The generator accumulates shared definitions so that types used by - /// multiple tools (e.g. `CodeExecutionErrorCode`) appear only once. - fn register_schemas(&self, generator: &mut schemars::SchemaGenerator) - -> Vec; -} - -/// Builder for [`CombinedToolSchemas`] that accumulates tools from multiple -/// [`ToolSchemaGenerator`]s while sharing a single [`schemars::SchemaGenerator`] -/// for deduplication. -pub struct CombinedToolSchemasBuilder { - generator: schemars::SchemaGenerator, - tools: Vec, +/// Validation then rejects what cannot be transformed: missing +/// title/description, a non-object root, recursive types (surviving +/// `$ref`s), map types (`additionalProperties` schemas), non-primitive +/// enums, and residual `oneOf`. Never panics β€” failures are returned as +/// [`ValidationError`]s. +pub fn generate_validated_input_schema() +-> Result { + let schema = SchemaSettings::draft2020_12() + .with(|s| { + s.meta_schema = None; + s.inline_subschemas = true; + }) + .with_transform(RecursiveTransform(OneOfToAnyOf)) + .with_transform(RecursiveTransform(StripUnsupported)) + .with_transform(RecursiveTransform(NullifyOptional)) + .with_transform(RecursiveTransform(AddRequired)) + .with_transform(RecursiveTransform(AdditionalPropertiesFalse)) + .into_generator() + .into_root_schema_for::(); + let (name, description) = validate_tool_schema(&schema)?; + Ok(ValidatedSchema { + name, + description, + schema, + }) } -impl CombinedToolSchemasBuilder { - /// Registers all tools from the given generator. - pub fn merge(mut self, schema_generator: &dyn ToolSchemaGenerator) -> Self { - self.tools - .extend(schema_generator.register_schemas(&mut self.generator)); - self - } - - /// Consumes the builder and returns the combined schema. - pub fn build(mut self) -> CombinedToolSchemas { - let defs = self.generator.take_definitions(true); - let mut combined = CombinedToolSchemas { - defs, - tools: self.tools, - }; - combined.mangle_collisions(); - combined - } -} - -impl ToolSchemaGenerator for AsyncToolCollection { - fn generate_schemas(&self) -> ToolSchemas { - let schemas = self - .tools - .iter() - .map(|(name, tool_object)| ToolSchema { - name: name.clone(), - input_schema: serde_json::Value::Object(tool_object.input_schema.clone()), - output_schema: tool_object.output_schema.clone(), - }) - .collect(); - ToolSchemas { schemas } - } - - fn register_schemas( - &self, - generator: &mut schemars::SchemaGenerator, - ) -> Vec { - self.tools - .iter() - .map(|(name, tool_object)| { - let (input, output) = (tool_object.schema_registrar)(generator); - CombinedToolEntry { - name: name.clone(), - input, - output, - } - }) - .collect() - } +/// **Frontend typegen codepath**: returns a builder that accumulates tool +/// types into shared, deduplicated `$defs` for TypeScript codegen. +/// +/// `$ref`s are preserved (types referenced by multiple tools appear exactly +/// once) and `$ref` nodes with sibling keywords are normalised for the +/// downstream TS tooling. None of the AI structured-output transforms apply +/// here, and the result is never sent to an AI provider. +pub fn frontend_schemas_builder() -> FrontendSchemasBuilder { + let generator = SchemaSettings::draft2020_12() + .with(|s| s.meta_schema = None) + .with_transform(RecursiveTransform(NormaliseRefSiblings)) + .into_generator(); + FrontendSchemasBuilder::new(generator) } diff --git a/rust/cloud-storage/ai_toolset/src/schema/generate/test.rs b/rust/cloud-storage/ai_toolset/src/schema/generate/test.rs new file mode 100644 index 0000000000..cba8fd81bc --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/generate/test.rs @@ -0,0 +1,264 @@ +use super::*; +use schemars::JsonSchema; +use serde::Deserialize; +use serde_json::Value; +use std::collections::HashMap; + +// Test struct with valid schema (should pass) +#[derive(Debug, JsonSchema, Deserialize, Clone)] +#[schemars( + description = "Valid test schema with simple properties", + title = "ValidTestSchema" +)] +#[allow(dead_code)] +struct ValidTestSchema { + #[schemars(description = "A simple string field")] + pub simple_field: Option, + + #[schemars(description = "A vector of strings")] + pub list_field: Option>, + + #[schemars(description = "A boolean flag")] + pub flag_field: Option, + + #[schemars(description = "An integer value")] + pub number_field: Option, +} + +// Enum variant doc comments make schemars emit oneOf; the pipeline rewrites +// it to anyOf, which both providers accept in strict mode. +#[derive(Debug, JsonSchema, Deserialize, Clone)] +#[schemars( + description = "Enum with doc comments generates oneOf, rewritten to anyOf", + title = "DocumentedEnumSchema" +)] +#[allow(dead_code)] +struct DocumentedEnumSchema { + #[schemars(description = "An enum whose variants carry descriptions")] + pub enum_field: DocumentedEnum, +} + +#[derive(Debug, JsonSchema, Deserialize, Clone)] +#[allow(dead_code)] +enum DocumentedEnum { + /// This doc comment causes oneOf + Variant1, + /// This doc comment also causes oneOf + Variant2, +} + +#[derive(Debug, JsonSchema, Deserialize, Clone)] +#[schemars( + description = "Schema with provider-unsupported constraints", + title = "Constrained" +)] +#[allow(dead_code)] +struct Constrained { + #[schemars(description = "An unsigned counter")] + pub count: u32, + + #[schemars(description = "A bounded string", length(min = 1, max = 10))] + pub bounded: String, + + #[schemars(description = "A uuid", extend("format" = "uuid"))] + pub id: String, +} + +#[derive(Debug, JsonSchema, Deserialize, Clone)] +#[schemars(description = "Schema with a map field", title = "WithMap")] +#[allow(dead_code)] +struct WithMap { + pub map_field: HashMap, +} + +#[derive(Debug, JsonSchema, Deserialize, Clone, Default)] +#[allow(dead_code)] +enum PlainEnum { + #[default] + Red, + Blue, +} + +// Fields that are optional without being Option (serde defaults): strict +// mode requires them to become required + nullable. +#[derive(Debug, JsonSchema, Deserialize, Clone)] +#[schemars( + description = "Schema with serde-default fields", + title = "WithDefaults" +)] +#[allow(dead_code)] +struct WithDefaults { + pub always: String, + + #[serde(default)] + #[schemars(description = "An optional counter")] + pub count: u32, + + #[serde(default)] + pub color: PlainEnum, +} + +#[derive(Debug, JsonSchema, Deserialize, Clone)] +#[schemars(description = "A recursive schema", title = "Recursive")] +#[allow(dead_code)] +struct Recursive { + pub children: Vec, +} + +fn schema_json(schema: &Schema) -> Value { + serde_json::to_value(schema).expect("schema serializes") +} + +fn property<'a>(json: &'a Value, name: &str) -> &'a Value { + &json["properties"][name] +} + +#[test] +fn test_validate_tool_schema_passes() { + let result = generate_validated_input_schema::(); + assert!( + result.is_ok(), + "Valid schema should pass validation: {:?}", + result + ); + + let validated = result.unwrap(); + assert_eq!(validated.name, "ValidTestSchema"); + assert_eq!( + validated.description, + "Valid test schema with simple properties" + ); +} + +#[test] +fn test_optional_fields_are_required_and_nullable() { + let validated = generate_validated_input_schema::().unwrap(); + let json = schema_json(&validated.schema); + + // OpenAI strict mode: every property must be in `required`; optional + // fields are expressed as nullable unions instead. + let required: Vec<&str> = json["required"] + .as_array() + .unwrap() + .iter() + .map(|v| v.as_str().unwrap()) + .collect(); + for field in ["simple_field", "list_field", "flag_field", "number_field"] { + assert!(required.contains(&field), "{field} must be required"); + } + let types = property(&json, "simple_field")["type"].as_array().unwrap(); + assert!(types.contains(&Value::String("null".into())), "{types:?}"); +} + +#[test] +fn test_additional_properties_only_on_objects() { + let validated = generate_validated_input_schema::().unwrap(); + let json = schema_json(&validated.schema); + + assert_eq!(json["additionalProperties"], Value::Bool(false)); + // Scalars and arrays must not carry the keyword. + for field in ["simple_field", "list_field", "flag_field", "number_field"] { + assert!( + property(&json, field).get("additionalProperties").is_none(), + "{field} should not have additionalProperties" + ); + } +} + +#[test] +fn test_enum_doc_comments_rewritten_to_any_of() { + let result = generate_validated_input_schema::(); + assert!(result.is_ok(), "{result:?}"); + + let json = schema_json(&result.unwrap().schema); + let enum_field = property(&json, "enum_field"); + assert!(enum_field.get("oneOf").is_none(), "oneOf must be rewritten"); + let variants = enum_field["anyOf"].as_array().expect("anyOf present"); + assert_eq!(variants.len(), 2); + assert_eq!(variants[0]["const"], "Variant1"); + assert!(variants[0]["description"].is_string()); +} + +#[test] +fn test_unsupported_constraints_stripped_into_description() { + let validated = generate_validated_input_schema::().unwrap(); + let json = schema_json(&validated.schema); + + // u32: schemars emits `minimum: 0` (Anthropic-rejected) and + // `format: "uint32"` (a type-width artifact, dropped silently). + let count = property(&json, "count"); + assert!(count.get("minimum").is_none()); + assert!(count.get("format").is_none()); + assert!( + count["description"] + .as_str() + .unwrap() + .contains("minimum: 0"), + "stripped constraint should be noted in the description" + ); + + // minLength/maxLength are rejected by both providers. + let bounded = property(&json, "bounded"); + assert!(bounded.get("minLength").is_none()); + assert!(bounded.get("maxLength").is_none()); + let description = bounded["description"].as_str().unwrap(); + assert!(description.contains("A bounded string")); + assert!(description.contains("minLength: 1")); + assert!(description.contains("maxLength: 10")); + + // `uuid` is on both providers' format whitelist and survives. + assert_eq!(property(&json, "id")["format"], "uuid"); +} + +#[test] +fn test_serde_default_fields_become_required_and_nullable() { + let validated = generate_validated_input_schema::().unwrap(); + let json = schema_json(&validated.schema); + + let required: Vec<&str> = json["required"] + .as_array() + .unwrap() + .iter() + .map(|v| v.as_str().unwrap()) + .collect(); + for field in ["always", "count", "color"] { + assert!(required.contains(&field), "{field} must be required"); + } + + // An already-required field keeps its plain type. + assert_eq!(property(&json, "always")["type"], "string"); + + // A promoted scalar becomes a null union. + let count_types = property(&json, "count")["type"].as_array().unwrap(); + assert!(count_types.contains(&Value::String("integer".into()))); + assert!(count_types.contains(&Value::String("null".into()))); + + // A promoted enum admits null in both `type` and `enum`. + let color = property(&json, "color"); + let color_types = color["type"].as_array().unwrap(); + assert!( + color_types.contains(&Value::String("null".into())), + "{color:?}" + ); + let values = color["enum"].as_array().unwrap(); + assert!(values.contains(&Value::Null), "{values:?}"); + assert!(values.contains(&Value::String("Red".into()))); +} + +#[test] +fn test_map_types_fail_validation() { + let result = generate_validated_input_schema::(); + assert!( + matches!(result, Err(ValidationError::AdditionalProperties)), + "map types cannot be expressed in strict mode: {result:?}" + ); +} + +#[test] +fn test_recursive_types_fail_validation() { + let result = generate_validated_input_schema::(); + assert!( + matches!(result, Err(ValidationError::UnsupportedRef)), + "recursive types cannot be inlined: {result:?}" + ); +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/mod.rs b/rust/cloud-storage/ai_toolset/src/schema/mod.rs index 29ea983225..99bfca1964 100644 --- a/rust/cloud-storage/ai_toolset/src/schema/mod.rs +++ b/rust/cloud-storage/ai_toolset/src/schema/mod.rs @@ -1,13 +1,21 @@ -//! Schema generation utilities for AI tools. +//! Schema generation for AI tools. //! -//! This module provides types and traits for generating and managing -//! tool schemas that describe tool inputs and outputs. +//! The entry points live in [`generate`] β€” one function per codepath: +//! [`generate_validated_input_schema`] for the schema an AI provider +//! receives, and [`frontend_schemas_builder`] for TypeScript codegen. The +//! `transform`, `validate`, and `frontend_typegen` modules are internal +//! building blocks those entry points compose. +mod error; +mod frontend_typegen; mod generate; mod phantom_tool; +mod transform; +mod validate; -pub use generate::{ - CombinedToolEntry, CombinedToolSchemas, CombinedToolSchemasBuilder, NormaliseRefSiblings, - ToolSchema, ToolSchemaGenerator, ToolSchemas, +pub use error::ValidationError; +pub use frontend_typegen::{ + FrontendSchemas, FrontendSchemasBuilder, FrontendToolEntry, ToolSchemaGenerator, }; +pub use generate::{ValidatedSchema, frontend_schemas_builder, generate_validated_input_schema}; pub use phantom_tool::PhantomTool; diff --git a/rust/cloud-storage/ai_toolset/src/schema/phantom_tool.rs b/rust/cloud-storage/ai_toolset/src/schema/phantom_tool.rs index f9e7218ff9..5b84f71d88 100644 --- a/rust/cloud-storage/ai_toolset/src/schema/phantom_tool.rs +++ b/rust/cloud-storage/ai_toolset/src/schema/phantom_tool.rs @@ -1,5 +1,5 @@ -use super::generate::{CombinedToolEntry, ToolSchema, ToolSchemaGenerator, ToolSchemas}; -use schemars::{JsonSchema, schema_for}; +use super::frontend_typegen::{FrontendToolEntry, ToolSchemaGenerator}; +use schemars::JsonSchema; use std::fmt::Debug; use std::marker::PhantomData; @@ -32,27 +32,13 @@ where I: JsonSchema + Clone + Debug, O: JsonSchema + Clone + Debug, { - fn generate_schemas(&self) -> ToolSchemas { - let input_schema = schema_for!(I); - let output_schema = schema_for!(O); - let input_schema_json = serde_json::to_value(&input_schema).expect("input schema"); - let output_schema_json = serde_json::to_value(&output_schema).expect("output schema"); - ToolSchemas { - schemas: vec![ToolSchema { - name: self.name.to_owned(), - input_schema: input_schema_json, - output_schema: output_schema_json, - }], - } - } - fn register_schemas( &self, generator: &mut schemars::SchemaGenerator, - ) -> Vec { + ) -> Vec { generator.subschema_for::(); generator.subschema_for::(); - vec![CombinedToolEntry { + vec![FrontendToolEntry { name: self.name.to_owned(), input: I::schema_name().into_owned(), output: O::schema_name().into_owned(), diff --git a/rust/cloud-storage/ai_toolset/src/schema/transform/additional_properties.rs b/rust/cloud-storage/ai_toolset/src/schema/transform/additional_properties.rs new file mode 100644 index 0000000000..a16083eb2e --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/transform/additional_properties.rs @@ -0,0 +1,40 @@ +use schemars::Schema; +use schemars::transform::Transform; +use serde_json::Value; + +/// Sets `additionalProperties: false` on every object schema, as required by +/// both OpenAI and Anthropic strict mode. +/// +/// Only applies to object-typed schemas β€” strict validators reject (or are +/// confused by) the keyword on scalars and arrays. An existing +/// `additionalProperties` value is left untouched: schemars emits a value +/// *schema* there for map types (`HashMap`), which strict mode +/// cannot express β€” overwriting it would silently change the tool's +/// contract, so it is left for [`ValidateAdditionalProperties`] to reject. +/// +/// [`ValidateAdditionalProperties`]: crate::schema::validate::ValidateAdditionalProperties +#[derive(Debug, Clone)] +pub struct AdditionalPropertiesFalse; + +pub(crate) fn is_object_schema(obj: &serde_json::Map) -> bool { + let type_is_object = match obj.get("type") { + Some(Value::String(t)) => t == "object", + Some(Value::Array(types)) => types.iter().any(|t| t == "object"), + _ => false, + }; + type_is_object || obj.contains_key("properties") +} + +impl Transform for AdditionalPropertiesFalse { + fn transform(&mut self, schema: &mut Schema) { + let Some(obj) = schema.as_object_mut() else { + return; + }; + if !is_object_schema(obj) { + return; + } + if !obj.contains_key("additionalProperties") { + obj.insert("additionalProperties".to_string(), false.into()); + } + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/transform/mod.rs b/rust/cloud-storage/ai_toolset/src/schema/transform/mod.rs new file mode 100644 index 0000000000..20720a7d30 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/transform/mod.rs @@ -0,0 +1,23 @@ +//! Schema transformers composed by the pipelines in +//! [`generate`](super::generate). +//! +//! Internal to the `schema` module β€” outside consumers go through the entry +//! points in `generate` instead. These implement schemars' +//! [`Transform`](schemars::transform::Transform) and are applied via +//! [`RecursiveTransform`](schemars::transform::RecursiveTransform); read-only +//! checks belong in the `validate` module instead. + +mod additional_properties; +mod nullify_optional; +mod ref_siblings; +mod required; +mod rewrite_one_of; +mod strip_unsupported; + +pub use additional_properties::AdditionalPropertiesFalse; +pub(crate) use additional_properties::is_object_schema; +pub use nullify_optional::NullifyOptional; +pub use ref_siblings::NormaliseRefSiblings; +pub use required::AddRequired; +pub use rewrite_one_of::OneOfToAnyOf; +pub use strip_unsupported::StripUnsupported; diff --git a/rust/cloud-storage/ai_toolset/src/schema/transform/nullify_optional.rs b/rust/cloud-storage/ai_toolset/src/schema/transform/nullify_optional.rs new file mode 100644 index 0000000000..977eada860 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/transform/nullify_optional.rs @@ -0,0 +1,97 @@ +use schemars::Schema; +use schemars::transform::Transform; +use serde_json::{Map, Value, json}; + +/// Rewrites optional properties (not listed in `required`) to accept `null`. +/// +/// OpenAI strict mode forbids optional properties β€” optionality must be +/// expressed as a union with `null`. This transform makes that rewrite; +/// [`AddRequired`](super::AddRequired) then marks everything required, and +/// must run **after** this transform since it overwrites the `required` +/// array this one reads. `Option` fields are already nullable and pass +/// through unchanged; this catches properties that are optional any other +/// way (e.g. `#[serde(default)]` fields). +#[derive(Debug, Clone)] +pub struct NullifyOptional; + +impl Transform for NullifyOptional { + fn transform(&mut self, schema: &mut Schema) { + let Some(obj) = schema.as_object_mut() else { + return; + }; + + let required: Vec = obj + .get("required") + .and_then(Value::as_array) + .map(|names| { + names + .iter() + .filter_map(Value::as_str) + .map(str::to_owned) + .collect() + }) + .unwrap_or_default(); + + let Some(properties) = obj.get_mut("properties").and_then(Value::as_object_mut) else { + return; + }; + for (name, property) in properties.iter_mut() { + if !required.iter().any(|r| r == name) { + make_nullable(property); + } + } + } +} + +fn has_null_type(obj: &Map) -> bool { + match obj.get("type") { + Some(Value::String(t)) => t == "null", + Some(Value::Array(types)) => types.iter().any(|t| t == "null"), + _ => false, + } +} + +fn make_nullable(property: &mut Value) { + let Some(obj) = property.as_object_mut() else { + return; + }; + + // Union node: add a null variant unless one exists. + if let Some(Value::Array(variants)) = obj.get_mut("anyOf") { + let has_null = variants + .iter() + .any(|v| v.as_object().is_some_and(has_null_type)); + if !has_null { + variants.push(json!({"type": "null"})); + } + return; + } + + match obj.get("type").cloned() { + Some(Value::String(t)) => { + if t != "null" { + obj.insert("type".to_string(), json!([t, "null"])); + } + } + Some(Value::Array(mut types)) => { + if !types.iter().any(|t| t == "null") { + types.push(json!("null")); + obj.insert("type".to_string(), Value::Array(types)); + } + } + _ => { + // const / allOf / other shapes that can't carry a type union: + // wrap the whole node in a null union instead. + let original = Value::Object(std::mem::take(obj)); + *property = json!({"anyOf": [original, {"type": "null"}]}); + return; + } + } + + // An enum must also admit null now that the type does. + if let Some(Value::Array(values)) = obj.get_mut("enum") + && !values.iter().any(Value::is_null) + { + values.push(Value::Null); + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/transform/ref_siblings.rs b/rust/cloud-storage/ai_toolset/src/schema/transform/ref_siblings.rs new file mode 100644 index 0000000000..b35f1f2429 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/transform/ref_siblings.rs @@ -0,0 +1,64 @@ +use schemars::Schema; +use schemars::transform::Transform; + +/// Normalises `$ref` nodes that carry sibling keywords. +/// +/// Downstream TS tooling (`json-schema-to-typescript`) predates Draft 2020-12 +/// and doesn't merge `$ref` with sibling keywords. This [`schemars::Transform`] +/// rewrites those nodes so the output is consumable: +/// +/// * `$ref` + `description` only β†’ drops the description (the referenced type +/// already carries its own). +/// * `$ref` + structural siblings (`properties`, `required`, …) β†’ rewrites to +/// `allOf` so the intersection is explicit. +#[derive(Debug, Clone)] +pub struct NormaliseRefSiblings; + +impl Transform for NormaliseRefSiblings { + fn transform(&mut self, schema: &mut Schema) { + let Some(obj) = schema.as_object_mut() else { + return; + }; + if !obj.contains_key("$ref") { + return; + } + + let sibling_keys: Vec = obj + .keys() + .filter(|k| *k != "$ref" && *k != "$schema") + .cloned() + .collect(); + + if sibling_keys.is_empty() { + return; + } + + if sibling_keys == ["description"] { + obj.remove("description"); + return; + } + + let ref_val = obj.remove("$ref").unwrap(); + let mut ref_part = serde_json::Map::new(); + ref_part.insert("$ref".to_string(), ref_val); + + let mut sibling_part = serde_json::Map::new(); + for key in &sibling_keys { + if key == "description" { + continue; + } + if let Some(val) = obj.remove(key) { + sibling_part.insert(key.clone(), val); + } + } + obj.remove("description"); + + obj.insert( + "allOf".to_string(), + serde_json::json!([ + serde_json::Value::Object(ref_part), + serde_json::Value::Object(sibling_part), + ]), + ); + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/transform/required.rs b/rust/cloud-storage/ai_toolset/src/schema/transform/required.rs new file mode 100644 index 0000000000..2dda24ccc9 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/transform/required.rs @@ -0,0 +1,25 @@ +use schemars::Schema; +use schemars::transform::Transform; +use serde_json::Value; + +/// Adds all property names to the `required` array, as required by +/// OpenAI's strict mode. +/// +/// Must run **after** [`NullifyOptional`](super::NullifyOptional), which +/// reads the original `required` array to decide which properties need a +/// null union before this transform overwrites it. +#[derive(Debug, Clone)] +pub struct AddRequired; + +impl Transform for AddRequired { + fn transform(&mut self, schema: &mut Schema) { + let Some(obj) = schema.as_object_mut() else { + return; + }; + let Some(properties) = obj.get("properties").and_then(Value::as_object) else { + return; + }; + let property_names: Vec = properties.keys().cloned().map(Value::String).collect(); + obj.insert("required".to_string(), Value::Array(property_names)); + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/transform/rewrite_one_of.rs b/rust/cloud-storage/ai_toolset/src/schema/transform/rewrite_one_of.rs new file mode 100644 index 0000000000..68ebc90e05 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/transform/rewrite_one_of.rs @@ -0,0 +1,27 @@ +use schemars::Schema; +use schemars::transform::Transform; + +/// Rewrites `oneOf` to `anyOf`. +/// +/// Neither OpenAI nor Anthropic strict mode supports `oneOf`; both support +/// `anyOf`. schemars emits `oneOf` for enums whose variants carry doc +/// comments β€” those variants are mutually exclusive `const` subschemas, so +/// the rewrite is semantically lossless for generation purposes. +#[derive(Debug, Clone)] +pub struct OneOfToAnyOf; + +impl Transform for OneOfToAnyOf { + fn transform(&mut self, schema: &mut Schema) { + let Some(obj) = schema.as_object_mut() else { + return; + }; + // If anyOf is already present, leave oneOf in place for the + // validator to reject rather than guessing how to merge them. + if obj.contains_key("anyOf") { + return; + } + if let Some(subschemas) = obj.remove("oneOf") { + obj.insert("anyOf".to_string(), subschemas); + } + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/transform/strip_unsupported.rs b/rust/cloud-storage/ai_toolset/src/schema/transform/strip_unsupported.rs new file mode 100644 index 0000000000..126a5e7388 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/transform/strip_unsupported.rs @@ -0,0 +1,107 @@ +use schemars::Schema; +use schemars::transform::Transform; +use serde_json::Value; + +/// String formats accepted by both OpenAI and Anthropic strict mode. +/// +/// Anthropic additionally supports `uri`, but OpenAI's list omits it, so it +/// is stripped to stay valid on both providers. +const FORMAT_WHITELIST: &[&str] = &[ + "date-time", + "time", + "date", + "duration", + "email", + "hostname", + "ipv4", + "ipv6", + "uuid", +]; + +/// schemars type-width artifacts (`"format": "uint32"` on a `u32` field, +/// etc.). Stripped silently β€” they encode the Rust type, not a constraint +/// the model needs to know about. +const TYPE_WIDTH_FORMATS: &[&str] = &[ + "int8", "int16", "int32", "int64", "int128", "uint8", "uint16", "uint32", "uint64", "uint128", + "float", "double", +]; + +/// Keywords rejected by at least one provider's strict mode, stripped and +/// recorded in the property's `description` so the constraint still reaches +/// the model as guidance. +/// +/// Numeric bounds (`minimum`, …) are OpenAI-supported but Anthropic-rejected; +/// string lengths (`minLength`, `maxLength`) are rejected by both; array +/// constraints beyond `minItems: 0|1` are Anthropic-rejected; `default` is +/// not in OpenAI's supported keyword set. +const STRIPPED_KEYWORDS: &[&str] = &[ + "minimum", + "maximum", + "exclusiveMinimum", + "exclusiveMaximum", + "multipleOf", + "minLength", + "maxLength", + "maxItems", + "uniqueItems", + "contains", + "minContains", + "maxContains", + "default", +]; + +/// Strips JSON Schema keywords that OpenAI or Anthropic strict mode rejects. +/// +/// Stripped constraints are appended to the node's `description` so the +/// model still sees them as soft guidance (mirroring what the official +/// Anthropic SDKs do); validation of the original constraints remains the +/// tool implementation's job. +#[derive(Debug, Clone)] +pub struct StripUnsupported; + +impl Transform for StripUnsupported { + fn transform(&mut self, schema: &mut Schema) { + let Some(obj) = schema.as_object_mut() else { + return; + }; + + let mut notes: Vec = Vec::new(); + + for key in STRIPPED_KEYWORDS { + if let Some(value) = obj.remove(*key) { + notes.push(format!("{key}: {value}")); + } + } + + // `minItems` is allowed by both providers only for 0 and 1. + if let Some(n) = obj.get("minItems").and_then(Value::as_u64) + && n > 1 + { + obj.remove("minItems"); + notes.push(format!("minItems: {n}")); + } + + if let Some(format) = obj.get("format").and_then(Value::as_str).map(str::to_owned) + && !FORMAT_WHITELIST.contains(&format.as_str()) + { + obj.remove("format"); + if !TYPE_WIDTH_FORMATS.contains(&format.as_str()) { + notes.push(format!("format: {format}")); + } + } + + if notes.is_empty() { + return; + } + let note = format!("Constraints: {}", notes.join(", ")); + match obj.get_mut("description") { + Some(Value::String(description)) => { + description.push_str("\n\n"); + description.push_str(¬e); + } + _ => { + obj.insert("description".to_string(), Value::String(note)); + } + } + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/validate/extract.rs b/rust/cloud-storage/ai_toolset/src/schema/validate/extract.rs new file mode 100644 index 0000000000..fdc7ea2654 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/validate/extract.rs @@ -0,0 +1,29 @@ +use super::ValidationError; +use schemars::Schema; +use serde_json::Value; + +pub struct Extract { + key: &'static str, +} + +impl Extract { + pub fn new(key: &'static str) -> Self { + Self { key } + } + + pub fn extract(&self, schema: &Schema) -> Result { + schema + .get(self.key) + .and_then(|v| match v { + Value::String(s) => { + if s.is_empty() { + None + } else { + Some(s.to_owned()) + } + } + _ => None, + }) + .ok_or(ValidationError::MissingMetadata) + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/validate/mod.rs b/rust/cloud-storage/ai_toolset/src/schema/validate/mod.rs new file mode 100644 index 0000000000..0cacdfec43 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/validate/mod.rs @@ -0,0 +1,58 @@ +//! Validators applied to generated AI input schemas. +//! +//! Internal to the `schema` module β€” outside consumers go through +//! [`generate_validated_input_schema`](super::generate::generate_validated_input_schema) +//! instead. + +mod extract; +mod one_of; +mod recursive; +mod strict; + +use super::error::ValidationError; +use extract::Extract; +use schemars::Schema; + +pub use one_of::ValidateNoOneOf; +pub use recursive::RecursiveValidate; +pub use strict::{ValidateAdditionalProperties, ValidateEnumPrimitives, ValidateNoRefs}; + +/// A validating visitor similar to [`Transform`](schemars::transform::Transform) +/// +/// Validators only see one schema node at a time and never traverse; wrap one +/// in [`RecursiveValidate`] to apply it to a schema and all of its subschemas. +/// Schema rewrites belong in [`Transform`](schemars::transform::Transform)s +/// applied via +/// [`RecursiveTransform`](schemars::transform::RecursiveTransform), not here. +pub trait Validate { + fn validate(&self, schema: &Schema) -> Result<(), ValidationError>; +} + +/// Validates a tool's input schema against the strict-mode requirements of +/// both OpenAI and Anthropic. +/// +/// Returns the tool's name and description extracted from the schema +/// metadata. Checks what the transforms cannot fix: surviving `$ref`s +/// (recursive types), non-`false` `additionalProperties` (map types), +/// non-primitive enum members, residual `oneOf`, and a non-object root. +/// +/// See: +/// * +/// * +/// * +pub fn validate_tool_schema(schema: &Schema) -> Result<(String, String), ValidationError> { + let name = Extract::new("title").extract(schema)?; + + let description = Extract::new("description").extract(schema)?; + + if schema.get("type").and_then(serde_json::Value::as_str) != Some("object") { + return Err(ValidationError::RootNotObject); + } + + RecursiveValidate(ValidateNoOneOf).validate(schema)?; + RecursiveValidate(ValidateNoRefs).validate(schema)?; + RecursiveValidate(ValidateAdditionalProperties).validate(schema)?; + RecursiveValidate(ValidateEnumPrimitives).validate(schema)?; + + Ok((name, description)) +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/validate/one_of.rs b/rust/cloud-storage/ai_toolset/src/schema/validate/one_of.rs new file mode 100644 index 0000000000..30a0a228cb --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/validate/one_of.rs @@ -0,0 +1,18 @@ +use schemars::Schema; + +use super::Validate; +use crate::schema::error::ValidationError; + +/// Rejects schemas containing `oneOf`, which is not supported by OpenAI +/// structured outputs. +pub struct ValidateNoOneOf; + +impl Validate for ValidateNoOneOf { + fn validate(&self, schema: &Schema) -> Result<(), ValidationError> { + if schema.get("oneOf").is_some() { + return Err(ValidationError::OneOf); + } + + Ok(()) + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/validate/recursive.rs b/rust/cloud-storage/ai_toolset/src/schema/validate/recursive.rs new file mode 100644 index 0000000000..189d8b7a5f --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/validate/recursive.rs @@ -0,0 +1,50 @@ +use schemars::Schema; +use schemars::transform::{RecursiveTransform, Transform}; + +use super::Validate; +use crate::schema::error::ValidationError; + +#[cfg(test)] +mod test; + +/// Applies the contained [`Validate`] to a schema and all of its subschemas, +/// visiting each node before its children and returning the first error. +/// +/// The counterpart of schemars' [`RecursiveTransform`] for validators. +/// Traversal is delegated to [`RecursiveTransform`] itself; validation stays +/// read-only β€” the internal clone exists only because [`Transform`] requires +/// `&mut` access. +pub struct RecursiveValidate(pub V); + +impl Validate for RecursiveValidate { + fn validate(&self, schema: &Schema) -> Result<(), ValidationError> { + let mut walker = RecursiveTransform(Walker { + validator: &self.0, + error: None, + }); + walker.transform(&mut schema.clone()); + match walker.0.error { + Some(error) => Err(error), + None => Ok(()), + } + } +} + +// Adapts a read-only `Validate` to schemars' infallible `Transform` so +// `RecursiveTransform` can drive the traversal: the first error is stashed +// and every node after it is skipped. +struct Walker<'a, V: ?Sized> { + validator: &'a V, + error: Option, +} + +impl Transform for Walker<'_, V> { + fn transform(&mut self, schema: &mut Schema) { + if self.error.is_some() { + return; + } + if let Err(error) = self.validator.validate(schema) { + self.error = Some(error); + } + } +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/validate/recursive/test.rs b/rust/cloud-storage/ai_toolset/src/schema/validate/recursive/test.rs new file mode 100644 index 0000000000..8a76170671 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/validate/recursive/test.rs @@ -0,0 +1,142 @@ +use std::cell::RefCell; + +use schemars::json_schema; + +use super::*; + +/// Records the `title` of every visited node, erroring on a title of "bad". +struct RecordTitles { + visited: RefCell>, +} + +impl RecordTitles { + fn new() -> Self { + Self { + visited: RefCell::new(Vec::new()), + } + } + + fn visited(&self) -> Vec { + self.visited.borrow().clone() + } +} + +impl Validate for RecordTitles { + fn validate(&self, schema: &Schema) -> Result<(), ValidationError> { + if let Some(title) = schema.get("title").and_then(|title| title.as_str()) { + if title == "bad" { + return Err(ValidationError::OneOf); + } + self.visited.borrow_mut().push(title.to_string()); + } + Ok(()) + } +} + +#[test] +fn visits_root_and_all_subschema_containers() { + let schema = json_schema!({ + "title": "root", + "properties": { + "a": { "title": "in_properties" } + }, + "items": { "title": "in_items" }, + "prefixItems": [ + { "title": "in_prefix_items" } + ], + "anyOf": [ + { "title": "in_any_of" } + ], + "$defs": { + "d": { "title": "in_defs" } + }, + "additionalProperties": { "title": "in_additional_properties" }, + "not": { "title": "in_not" } + }); + + let validator = RecursiveValidate(RecordTitles::new()); + validator.validate(&schema).unwrap(); + + let visited = validator.0.visited(); + for expected in [ + "root", + "in_properties", + "in_items", + "in_prefix_items", + "in_any_of", + "in_defs", + "in_additional_properties", + "in_not", + ] { + assert!( + visited.iter().any(|title| title == expected), + "expected to visit {expected:?}, visited: {visited:?}" + ); + } +} + +#[test] +fn visits_deeply_nested_nodes() { + let schema = json_schema!({ + "title": "root", + "properties": { + "outer": { + "title": "outer", + "items": { + "title": "inner", + "properties": { + "leaf": { "title": "leaf" } + } + } + } + } + }); + + let validator = RecursiveValidate(RecordTitles::new()); + validator.validate(&schema).unwrap(); + + assert_eq!( + validator.0.visited(), + vec!["root", "outer", "inner", "leaf"] + ); +} + +#[test] +fn visits_parent_before_child() { + let schema = json_schema!({ + "title": "parent", + "properties": { + "a": { "title": "child" } + } + }); + + let validator = RecursiveValidate(RecordTitles::new()); + validator.validate(&schema).unwrap(); + + assert_eq!(validator.0.visited(), vec!["parent", "child"]); +} + +#[test] +fn error_propagates_and_stops_validation() { + let schema = json_schema!({ + "title": "root", + "properties": { + "a": { + "title": "bad", + "properties": { + "unreachable": { "title": "below_bad" } + } + } + } + }); + + let validator = RecursiveValidate(RecordTitles::new()); + let result = validator.validate(&schema); + + assert!(matches!(result, Err(ValidationError::OneOf))); + assert_eq!( + validator.0.visited(), + vec!["root"], + "nodes after the failing node must not be validated" + ); +} diff --git a/rust/cloud-storage/ai_toolset/src/schema/validate/strict.rs b/rust/cloud-storage/ai_toolset/src/schema/validate/strict.rs new file mode 100644 index 0000000000..3f161a3626 --- /dev/null +++ b/rust/cloud-storage/ai_toolset/src/schema/validate/strict.rs @@ -0,0 +1,69 @@ +use super::Validate; +use crate::schema::error::ValidationError; +use crate::schema::transform::is_object_schema; +use schemars::Schema; +use serde_json::Value; + +/// Rejects schemas containing `$ref` / `$defs` / `definitions`. +/// +/// The AI input pipeline inlines all subschemas, so any surviving `$ref` +/// means the type is recursive (schemars falls back to `"$ref": "#"`) β€” +/// Anthropic strict mode rejects recursive schemas, and there is no +/// transform that can fix this. +pub struct ValidateNoRefs; + +impl Validate for ValidateNoRefs { + fn validate(&self, schema: &Schema) -> Result<(), ValidationError> { + if let Some(obj) = schema.as_object() + && (obj.contains_key("$ref") + || obj.contains_key("$defs") + || obj.contains_key("definitions")) + { + return Err(ValidationError::UnsupportedRef); + } + Ok(()) + } +} + +/// Rejects object schemas whose `additionalProperties` is anything but +/// `false`. +/// +/// schemars emits a value schema there for map types +/// (`HashMap`); strict mode on both providers requires +/// `additionalProperties: false`, so map-typed tool inputs cannot be +/// expressed β€” use a `Vec` of key/value structs instead. +pub struct ValidateAdditionalProperties; + +impl Validate for ValidateAdditionalProperties { + fn validate(&self, schema: &Schema) -> Result<(), ValidationError> { + let Some(obj) = schema.as_object() else { + return Ok(()); + }; + if !is_object_schema(obj) { + return Ok(()); + } + match obj.get("additionalProperties") { + Some(Value::Bool(false)) => Ok(()), + _ => Err(ValidationError::AdditionalProperties), + } + } +} + +/// Rejects `enum` arrays containing non-primitive values. +/// +/// Both providers restrict enum members to strings, numbers, booleans, and +/// nulls. +pub struct ValidateEnumPrimitives; + +impl Validate for ValidateEnumPrimitives { + fn validate(&self, schema: &Schema) -> Result<(), ValidationError> { + if let Some(values) = schema.get("enum").and_then(Value::as_array) + && values + .iter() + .any(|v| matches!(v, Value::Object(_) | Value::Array(_))) + { + return Err(ValidationError::ComplexEnum); + } + Ok(()) + } +} diff --git a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/mod.rs b/rust/cloud-storage/ai_toolset/src/toolset/tool_object/mod.rs index 8fab3e94ed..dc6035ad2a 100644 --- a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/mod.rs +++ b/rust/cloud-storage/ai_toolset/src/toolset/tool_object/mod.rs @@ -3,19 +3,12 @@ //! This module contains the compiled tool representations that enable //! runtime deserialization and invocation of tools. -#[macro_use] -mod util; - mod json_tool; mod object; mod tool_async; mod user_tool; pub use json_tool::JsonAsyncTool; -pub use object::{SchemaRegistrar, ToolObject, ValidationError}; +pub use object::{SchemaRegistrar, ToolObject}; pub use tool_async::{AsyncToolObject, ToolSetCallable}; pub use user_tool::{UserTool, UserToolResponse}; -pub use util::{ - MinimizedOutput, input_schema_generator, minimized_output_schema_generator, - output_schema_generator, validate_tool_schema, -}; diff --git a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/object.rs b/rust/cloud-storage/ai_toolset/src/toolset/tool_object/object.rs index ca82854d37..383b5d6873 100644 --- a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/object.rs +++ b/rust/cloud-storage/ai_toolset/src/toolset/tool_object/object.rs @@ -1,8 +1,6 @@ use schemars::SchemaGenerator; -use serde_json::Error as JsonError; use serde_json::Map; use serde_json::Value; -use thiserror::Error; /// Closure that registers a tool's input/output types with a shared /// [`SchemaGenerator`], returning `(input_name, output_name)`. @@ -15,8 +13,6 @@ pub type SchemaRegistrar = Box (String, String) pub struct ToolObject { /// The JSON schema describing the tool's input parameters. pub input_schema: Map, - /// The JSON schema describing the tool's output. - pub output_schema: Value, /// A human-readable description of what the tool does. pub description: String, /// The unique name of the tool. @@ -27,26 +23,3 @@ pub struct ToolObject { /// for combined schema generation. pub schema_registrar: SchemaRegistrar, } - -/// Errors that can occur when validating a tool's schema. -#[derive(Debug, Error)] -pub enum ValidationError { - /// The schema is missing required metadata (title or description). - #[error("missing metadata")] - MissingMetadata, - /// Failed to serialize the schema to JSON. - #[error("could not convert to json")] - JsonSerialization(JsonError), - /// The schema contains nested objects which are not supported. - #[error("schema exceeds depth one - nested objects with properties are not allowed")] - ExceedsDepthOne, - /// The schema title is empty. - #[error("title is empty")] - EmptyTitle, - /// The schema contains `oneOf` which is not supported for AI tools. - #[error("schema must not have oneOf set. Do not use descriptions or /// on enum types.")] - OneOf, - /// Schema must be a serde_json::Value::Object - #[error("schema must be a serde_json::Value::Object")] - ExpectedObject, -} diff --git a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/tool_async.rs b/rust/cloud-storage/ai_toolset/src/toolset/tool_object/tool_async.rs index bb910dd884..dac2bbefd2 100644 --- a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/tool_async.rs +++ b/rust/cloud-storage/ai_toolset/src/toolset/tool_object/tool_async.rs @@ -1,5 +1,5 @@ -use super::object::{SchemaRegistrar, ToolObject, ValidationError}; -use super::util::validate_tool_schema; +use super::object::{SchemaRegistrar, ToolObject}; +use crate::schema::{ValidatedSchema, ValidationError, generate_validated_input_schema}; use crate::{AsyncTool, RequestContext, ServiceContext, ToolCallError, ToolResult}; use async_trait::async_trait; use axum::extract::FromRef; @@ -153,8 +153,11 @@ where + Sync, O: Serialize + JsonSchema + 'static, { - let input_schema = generate_tool_input_schema!(&T); - let (name, description) = validate_tool_schema(&input_schema)?; + let ValidatedSchema { + name, + description, + schema: input_schema, + } = generate_validated_input_schema::()?; let input_schema_json = serde_json::to_value(input_schema).map_err(ValidationError::JsonSerialization)?; let serde_json::Value::Object(input_schema_json) = input_schema_json else { @@ -169,10 +172,6 @@ where }) }); - let output_schema = generate_tool_output_schema!(&O); - let output_schema_json = - serde_json::to_value(&output_schema).map_err(ValidationError::JsonSerialization)?; - let schema_registrar: SchemaRegistrar = Box::new(|generator: &mut schemars::SchemaGenerator| { generator.subschema_for::(); @@ -183,7 +182,6 @@ where Ok(Self { name, input_schema: input_schema_json, - output_schema: output_schema_json, description, deserializer, schema_registrar, @@ -265,7 +263,6 @@ where ToolObject { name: self.name, input_schema: self.input_schema, - output_schema: self.output_schema, description: self.description, deserializer: new_deserializer, schema_registrar: self.schema_registrar, diff --git a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/util.rs b/rust/cloud-storage/ai_toolset/src/toolset/tool_object/util.rs deleted file mode 100644 index 68f7e1e1a8..0000000000 --- a/rust/cloud-storage/ai_toolset/src/toolset/tool_object/util.rs +++ /dev/null @@ -1,312 +0,0 @@ -use super::object::ValidationError; -use schemars::{ - Schema, SchemaGenerator, - generate::SchemaSettings, - transform::{RecursiveTransform, Transform}, -}; - -/// Validates a tool's input schema against OpenAI's structured output requirements. -/// -/// Returns the tool's name and description extracted from the schema metadata. -/// See: -pub fn validate_tool_schema(schema: &Schema) -> Result<(String, String), ValidationError> { - let name = schema - .get("title") - .and_then(|title| title.as_str()) - .ok_or(ValidationError::MissingMetadata)? - .to_string(); - validate_title(&name)?; - - let description = schema - .get("description") - .and_then(|description| description.as_str()) - .ok_or(ValidationError::MissingMetadata)? - .to_string(); - - ValidateNoOneOf.visit(schema); - - Ok((name, description)) -} - -// Visitor trait based on schemars Transform -// but does not take ownership of or mutate schema -trait Visit { - fn visit(&self, schema: &Schema); -} - -struct ValidateNoOneOf; - -impl Visit for ValidateNoOneOf { - fn visit(&self, schema: &Schema) { - if schema.get("oneOf").is_some() { - panic!("{}", ValidationError::OneOf); - } - - visit_subschemas(self, schema); - } -} - -// based on transform_subschemas in schemars -fn visit_subschemas(t: &T, schema: &Schema) { - for (key, value) in schema.as_object().into_iter().flatten() { - // This is intentionally written to work with multiple JSON Schema versions, so that - // users can add their own transforms on the end of e.g. `SchemaSettings::draft07()` and - // they will still apply to all subschemas "as expected". - // This is why this match statement contains both `additionalProperties` (which was - // dropped in draft 2020-12) and `prefixItems` (which was added in draft 2020-12). - match key.as_str() { - "not" - | "if" - | "then" - | "else" - | "contains" - | "additionalProperties" - | "propertyNames" - | "additionalItems" => { - if let Ok(subschema) = value.try_into() { - t.visit(subschema); - } - } - "allOf" | "anyOf" | "oneOf" | "prefixItems" => { - if let Some(array) = value.as_array() { - for value in array { - if let Ok(subschema) = value.try_into() { - t.visit(subschema); - } - } - } - } - // Support `items` array even though this is not allowed in draft 2020-12 (see above - // comment) - "items" => { - if let Some(array) = value.as_array() { - for value in array { - if let Ok(subschema) = value.try_into() { - t.visit(subschema); - } - } - } else if let Ok(subschema) = value.try_into() { - t.visit(subschema); - } - } - "properties" | "patternProperties" | "$defs" | "definitions" => { - if let Some(obj) = value.as_object() { - for value in obj.values() { - if let Ok(subschema) = value.try_into() { - t.visit(subschema); - } - } - } - } - _ => {} - } - } -} - -#[derive(Debug, Clone)] -struct AddRequired; - -// adds all property names to required array -impl Transform for AddRequired { - fn transform(&mut self, schema: &mut Schema) { - let properties = match schema.as_object() { - Some(obj) => match obj.get("properties") { - Some(properties) => properties, - None => { - return; - } - }, - None => { - return; - } - }; - let property_names = match properties.as_object() { - Some(properties) => properties - .keys() - .map(|key| key.to_string()) - .collect::>(), - None => { - return; - } - }; - schema.insert("required".to_string(), property_names.into()); - } -} - -#[derive(Debug, Clone)] -struct AdditionalPropertiesFalse; - -// adds additionalProperties: false for all objects -impl Transform for AdditionalPropertiesFalse { - fn transform(&mut self, schema: &mut Schema) { - if let Some(obj) = schema.as_object_mut() { - obj.insert("additionalProperties".to_string(), false.into()); - } - } -} - -/// Creates a schema generator configured for tool input schemas. -/// -/// The generated schemas include `required` arrays for all properties and -/// `additionalProperties: false` as required by OpenAI's structured outputs. -pub fn input_schema_generator() -> SchemaGenerator { - SchemaSettings::draft2020_12() - .with(|s| { - s.meta_schema = None; - s.inline_subschemas = true; - }) - .with_transform(RecursiveTransform(AddRequired)) - .with_transform(RecursiveTransform(AdditionalPropertiesFalse)) - .into_generator() -} - -/// Schema transform that simplifies output schemas for AI consumption. -/// -/// Removes unnecessary fields like `title`, `format`, `required`, `type`, etc., -/// keeping only property names and descriptions. -#[derive(Debug, Clone)] -pub struct MinimizedOutput; - -impl Transform for MinimizedOutput { - fn transform(&mut self, schema: &mut Schema) { - if let Some(obj) = schema.as_object_mut() { - obj.remove("title"); - obj.remove("format"); - obj.remove("required"); - obj.remove("additionalProperties"); - obj.remove("type"); - obj.remove("$ref"); - obj.remove("$defs"); - } - } -} - -/// Creates a schema generator that produces minimized output schemas. -/// -/// Uses [`MinimizedOutput`] transform to strip unnecessary schema fields. -pub fn minimized_output_schema_generator() -> SchemaGenerator { - SchemaSettings::draft2020_12() - .with(|s| { - s.meta_schema = None; - s.inline_subschemas = true; - }) - .with_transform(RecursiveTransform(MinimizedOutput)) - .into_generator() -} - -/// Creates a schema generator for tool output schemas. -/// -/// Uses draft 2020-12 settings with inlined subschemas and no meta schema. -pub fn output_schema_generator() -> SchemaGenerator { - SchemaSettings::draft2020_12() - .with(|s| { - s.meta_schema = None; - s.inline_subschemas = true; - }) - .into_generator() -} - -/// Generates a JSON schema for a tool's input parameters. -/// -/// Uses [`input_schema_generator`] to create a schema compliant with -/// OpenAI's structured output requirements. -#[macro_export] -macro_rules! generate_tool_input_schema { - ($tool:ty) => {{ - use $crate::tool_object::input_schema_generator; - input_schema_generator().into_root_schema_for::<$tool>() - }}; -} - -/// Generates a JSON schema for a tool's output type. -/// -/// Uses [`output_schema_generator`] to create a standard JSON schema. -#[macro_export] -macro_rules! generate_tool_output_schema { - ($tool:ty) => {{ - use $crate::tool_object::output_schema_generator; - output_schema_generator().into_root_schema_for::<$tool>() - }}; -} - -fn validate_title(title: &str) -> Result<(), ValidationError> { - if title.is_empty() { - return Err(ValidationError::EmptyTitle); - } - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - use schemars::JsonSchema; - use serde::Deserialize; - - // Test struct with valid schema (should pass) - #[derive(Debug, JsonSchema, Deserialize, Clone)] - #[schemars( - description = "Valid test schema with simple properties", - title = "ValidTestSchema" - )] - #[allow(dead_code)] - struct ValidTestSchema { - #[schemars(description = "A simple string field")] - pub simple_field: Option, - - #[schemars(description = "A vector of strings")] - pub list_field: Option>, - - #[schemars(description = "A boolean flag")] - pub flag_field: Option, - - #[schemars(description = "An integer value")] - pub number_field: Option, - } - - // Test enum with doc comments that should cause oneOf (should fail) - #[derive(Debug, JsonSchema, Deserialize, Clone)] - #[schemars( - description = "Invalid enum with doc comments that generates oneOf", - title = "InvalidEnumSchema" - )] - #[allow(dead_code)] - struct InvalidEnumSchema { - #[schemars(description = "An enum that will generate oneOf")] - pub enum_field: BadEnum, - } - - // Enum with doc comments that will cause oneOf generation - #[derive(Debug, JsonSchema, Deserialize, Clone)] - #[allow(dead_code)] - enum BadEnum { - /// This doc comment will cause oneOf - Variant1, - /// This doc comment will also cause oneOf - Variant2, - } - - #[test] - fn test_validate_tool_schema_passes() { - let schema = generate_tool_input_schema!(ValidTestSchema); - - let result = validate_tool_schema(&schema); - assert!( - result.is_ok(), - "Valid schema should pass validation: {:?}", - result - ); - - let (name, description) = result.unwrap(); - assert_eq!(name, "ValidTestSchema"); - assert_eq!(description, "Valid test schema with simple properties"); - } - - #[test] - #[should_panic(expected = "schema must not have oneOf set")] - fn test_validate_tool_schema_fails_on_one_of() { - let schema = generate_tool_input_schema!(InvalidEnumSchema); - - // This should panic when oneOf is detected - let _result = validate_tool_schema(&schema); - } -} diff --git a/rust/cloud-storage/ai_toolset/src/toolset/types.rs b/rust/cloud-storage/ai_toolset/src/toolset/types.rs index 10b4a6588c..98a5b2f8d6 100644 --- a/rust/cloud-storage/ai_toolset/src/toolset/types.rs +++ b/rust/cloud-storage/ai_toolset/src/toolset/types.rs @@ -1,6 +1,7 @@ //! types -use super::tool_object::{AsyncToolObject, UserTool, UserToolResponse, ValidationError}; +use super::tool_object::{AsyncToolObject, UserTool, UserToolResponse}; use crate::RequestContext; +use crate::schema::ValidationError; use crate::{AsyncTool, ToolResult}; use axum::extract::FromRef; use schemars::{JsonSchema, Schema}; diff --git a/rust/cloud-storage/call/src/inbound/toolset/test.rs b/rust/cloud-storage/call/src/inbound/toolset/test.rs index 7c5e6c24a9..df26791a98 100644 --- a/rust/cloud-storage/call/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/call/src/inbound/toolset/test.rs @@ -1,29 +1,27 @@ use super::list_call_records::{ListCallRecords, build_filter}; use super::read_call_record::ReadCallRecord; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; use filter_ast::Expr; use item_filters::{CallStatus, ast::call::CallLiteral}; use serde_json::Value; #[test] fn test_list_call_records_schema_validation() { - let schema = generate_tool_input_schema!(ListCallRecords); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ListCallRecords", + validated.name, "ListCallRecords", "Tool name should match the schemars title" ); assert!( - description.contains("List"), + validated.description.contains("List"), "Description should contain expected text" ); - let schema_json = serde_json::to_value(&schema).expect("schema should serialize to JSON"); + let schema_json = + serde_json::to_value(&validated.schema).expect("schema should serialize to JSON"); let status_schema = schema_property(&schema_json, "status"); let status_values = find_enum_values(status_schema) .expect("status schema should include supported enum values"); @@ -71,18 +69,16 @@ fn test_list_call_records_attended_filter_still_supported() { #[test] fn test_read_call_record_schema_validation() { - let schema = generate_tool_input_schema!(ReadCallRecord); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ReadCallRecord", + validated.name, "ReadCallRecord", "Tool name should match the schemars title" ); assert!( - description.contains("transcript"), + validated.description.contains("transcript"), "Description should contain expected text" ); } diff --git a/rust/cloud-storage/channels/src/inbound/toolset/test.rs b/rust/cloud-storage/channels/src/inbound/toolset/test.rs index 1b3becfab4..45795a9206 100644 --- a/rust/cloud-storage/channels/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/channels/src/inbound/toolset/test.rs @@ -1,39 +1,37 @@ use super::{ReadChannelMessageContext, ReadChannelMessages, ReadChannelThread}; -use ai_toolset::tool_object::validate_tool_schema; -use ai_toolset::{generate_tool_input_schema, generate_tool_output_schema}; +use ai_toolset::schema::generate_validated_input_schema; #[test] fn read_channel_messages_schema_is_valid() { - let schema = generate_tool_input_schema!(ReadChannelMessages); - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{result:?}"); - let (name, description) = result.unwrap(); - assert_eq!(name, "ReadChannelMessages"); - assert!(description.contains("Read a small structured window")); - - let _ = generate_tool_output_schema!(super::ReadChannelMessagesResponse); + let validated = result.unwrap(); + assert_eq!(validated.name, "ReadChannelMessages"); + assert!( + validated + .description + .contains("Read a small structured window") + ); } #[test] fn read_channel_message_context_schema_is_valid() { - let schema = generate_tool_input_schema!(ReadChannelMessageContext); - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{result:?}"); - let (name, description) = result.unwrap(); - assert_eq!(name, "ReadChannelMessageContext"); - assert!(description.contains("Read the local channel and thread context")); - - let _ = generate_tool_output_schema!(super::ReadChannelMessageContextResponse); + let validated = result.unwrap(); + assert_eq!(validated.name, "ReadChannelMessageContext"); + assert!( + validated + .description + .contains("Read the local channel and thread context") + ); } #[test] fn read_channel_thread_schema_is_valid() { - let schema = generate_tool_input_schema!(ReadChannelThread); - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{result:?}"); - let (name, description) = result.unwrap(); - assert_eq!(name, "ReadChannelThread"); - assert!(description.contains("Read")); - - let _ = generate_tool_output_schema!(super::ReadChannelThreadResponse); + let validated = result.unwrap(); + assert_eq!(validated.name, "ReadChannelThread"); + assert!(validated.description.contains("Read")); } diff --git a/rust/cloud-storage/chat/src/inbound/toolset/test.rs b/rust/cloud-storage/chat/src/inbound/toolset/test.rs index 18944a8316..364e1e6fc8 100644 --- a/rust/cloud-storage/chat/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/chat/src/inbound/toolset/test.rs @@ -1,21 +1,18 @@ use super::read_chat::ReadChat; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_read_chat_schema_validation() { - let schema = generate_tool_input_schema!(ReadChat); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ReadChat", + validated.name, "ReadChat", "Tool name should match the schemars title" ); assert!( - description.contains("chat"), + validated.description.contains("chat"), "Description should contain expected text" ); } diff --git a/rust/cloud-storage/document_cognition_service/src/api/swagger.rs b/rust/cloud-storage/document_cognition_service/src/api/swagger.rs index 2a16cd4cf1..b29b057726 100644 --- a/rust/cloud-storage/document_cognition_service/src/api/swagger.rs +++ b/rust/cloud-storage/document_cognition_service/src/api/swagger.rs @@ -174,10 +174,6 @@ use utoipa::OpenApi; StartAuthRequest, StartAuthResponse, model_error_response::ErrorResponse, - - // Tools - ai_toolset::schema::ToolSchema, - ai_toolset::schema::ToolSchemas, ), ), tags( diff --git a/rust/cloud-storage/documents/src/inbound/toolset/test.rs b/rust/cloud-storage/documents/src/inbound/toolset/test.rs index 8368902639..68fc412755 100644 --- a/rust/cloud-storage/documents/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/documents/src/inbound/toolset/test.rs @@ -1,57 +1,50 @@ use super::*; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_read_metadata_schema_validation() { - let schema = generate_tool_input_schema!(ReadMetadata); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ReadMetadata", + validated.name, "ReadMetadata", "Tool name should match the schemars title" ); assert!( - description.contains("Retrieve"), + validated.description.contains("Retrieve"), "Description should contain expected text" ); } #[test] fn test_read_content_schema_validation() { - let schema = generate_tool_input_schema!(ReadContent); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ReadContent", + validated.name, "ReadContent", "Tool name should match the schemars title" ); assert!( - description.contains("Retrieve"), + validated.description.contains("Retrieve"), "Description should contain expected text" ); } #[test] fn test_create_document_schema_validation() { - let schema = generate_tool_input_schema!(CreateDocument); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "CreateDocument", + validated.name, "CreateDocument", "Tool name should match the schemars title" ); assert!( - description.contains("Create"), + validated.description.contains("Create"), "Description should contain expected text" ); } diff --git a/rust/cloud-storage/email/src/inbound/toolset/test.rs b/rust/cloud-storage/email/src/inbound/toolset/test.rs index b089329caa..784a10c99f 100644 --- a/rust/cloud-storage/email/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/email/src/inbound/toolset/test.rs @@ -1,7 +1,6 @@ use super::list_labels::build_summary; use super::*; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_build_summary_empty() { @@ -37,54 +36,48 @@ fn test_build_summary_with_labels() { #[test] fn test_update_thread_labels_schema_validation() { - let schema = generate_tool_input_schema!(UpdateThreadLabels); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "UpdateThreadLabels", + validated.name, "UpdateThreadLabels", "Tool name should match the schemars title" ); assert!( - description.contains("label"), + validated.description.contains("label"), "Description should contain expected text" ); } #[test] fn test_send_email_schema_validation() { - let schema = generate_tool_input_schema!(SendEmail); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "SendEmail", + validated.name, "SendEmail", "Tool name should match the schemars title" ); assert!( - description.contains("send"), + validated.description.contains("send"), "Description should contain expected text" ); } #[test] fn test_get_thread_schema_validation() { - let schema = generate_tool_input_schema!(GetThread); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "GetThread", + validated.name, "GetThread", "Tool name should match the schemars title" ); assert!( - description.contains("thread"), + validated.description.contains("thread"), "Description should contain expected text" ); } diff --git a/rust/cloud-storage/notification/src/inbound/ai_tool/test.rs b/rust/cloud-storage/notification/src/inbound/ai_tool/test.rs index dc424d7c6d..88dd9aba6d 100644 --- a/rust/cloud-storage/notification/src/inbound/ai_tool/test.rs +++ b/rust/cloud-storage/notification/src/inbound/ai_tool/test.rs @@ -1,21 +1,18 @@ use super::*; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_list_notifications_schema_validation() { - let schema = generate_tool_input_schema!(ListNotifications); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ListNotifications", + validated.name, "ListNotifications", "Tool name should match the schemars title" ); assert!( - description.contains("List the current user"), + validated.description.contains("List the current user"), "Description should contain expected text" ); } @@ -64,7 +61,9 @@ fn test_list_notifications_deserialization() { #[test] #[ignore = "prints the input schema"] fn print_list_notifications_input_schema() { - let schema = generate_tool_input_schema!(ListNotifications); + let schema = generate_validated_input_schema::() + .unwrap() + .schema; println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } @@ -72,43 +71,42 @@ fn print_list_notifications_input_schema() { #[test] #[ignore = "prints the output schema"] fn print_list_notifications_output_schema() { - let generator = ai_toolset::tool_object::minimized_output_schema_generator(); - let schema = generator.into_root_schema_for::(); + let schema = schemars::schema_for!(ListNotificationsResponse); println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } #[test] fn test_mark_notifications_seen_schema_validation() { - let schema = generate_tool_input_schema!(MarkNotificationsSeen); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "MarkNotificationsSeen", + validated.name, "MarkNotificationsSeen", "Tool name should match the schemars title" ); assert!( - description.contains("Mark one or more notifications as seen"), + validated + .description + .contains("Mark one or more notifications as seen"), "Description should contain expected text" ); } #[test] fn test_mark_notifications_done_schema_validation() { - let schema = generate_tool_input_schema!(MarkNotificationsDone); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "MarkNotificationsDone", + validated.name, "MarkNotificationsDone", "Tool name should match the schemars title" ); assert!( - description.contains("Mark one or more notifications as done"), + validated + .description + .contains("Mark one or more notifications as done"), "Description should contain expected text" ); } @@ -124,7 +122,9 @@ fn test_mark_notifications_seen_deserialization() { #[test] #[ignore = "prints the input schema"] fn print_mark_seen_input_schema() { - let schema = generate_tool_input_schema!(MarkNotificationsSeen); + let schema = generate_validated_input_schema::() + .unwrap() + .schema; println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } @@ -132,7 +132,9 @@ fn print_mark_seen_input_schema() { #[test] #[ignore = "prints the input schema"] fn print_mark_done_input_schema() { - let schema = generate_tool_input_schema!(MarkNotificationsDone); + let schema = generate_validated_input_schema::() + .unwrap() + .schema; println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } @@ -140,7 +142,6 @@ fn print_mark_done_input_schema() { #[test] #[ignore = "prints the output schema"] fn print_output_schema() { - let generator = ai_toolset::tool_object::minimized_output_schema_generator(); - let schema = generator.into_root_schema_for::(); + let schema = schemars::schema_for!(MarkNotificationsResponse); println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } diff --git a/rust/cloud-storage/properties/src/inbound/toolset/test.rs b/rust/cloud-storage/properties/src/inbound/toolset/test.rs index 000f5bd54f..8da8cc033b 100644 --- a/rust/cloud-storage/properties/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/properties/src/inbound/toolset/test.rs @@ -1,34 +1,29 @@ #[allow(unused_imports)] use super::*; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_get_entity_properties_schema_validation() { - let schema = generate_tool_input_schema!(GetEntityProperties); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); - assert_eq!(name, "GetEntityProperties"); + let validated = result.unwrap(); + assert_eq!(validated.name, "GetEntityProperties"); assert!( - description.contains("Get all properties"), + validated.description.contains("Get all properties"), "Description should contain expected text" ); } #[test] fn test_set_entity_property_schema_validation() { - let schema = generate_tool_input_schema!(SetEntityProperty); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); - assert_eq!(name, "SetEntityProperty"); + let validated = result.unwrap(); + assert_eq!(validated.name, "SetEntityProperty"); assert!( - description.contains("Set or update a property"), + validated.description.contains("Set or update a property"), "Description should contain expected text" ); } @@ -37,7 +32,9 @@ fn test_set_entity_property_schema_validation() { #[test] #[ignore = "prints the input schema"] fn print_get_input_schema() { - let schema = generate_tool_input_schema!(GetEntityProperties); + let schema = generate_validated_input_schema::() + .unwrap() + .schema; println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } @@ -45,7 +42,9 @@ fn print_get_input_schema() { #[test] #[ignore = "prints the input schema"] fn print_set_input_schema() { - let schema = generate_tool_input_schema!(SetEntityProperty); + let schema = generate_validated_input_schema::() + .unwrap() + .schema; println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } @@ -53,8 +52,7 @@ fn print_set_input_schema() { #[test] #[ignore = "prints the output schema"] fn print_get_output_schema() { - let generator = ai_toolset::tool_object::minimized_output_schema_generator(); - let schema = generator.into_root_schema_for::(); + let schema = schemars::schema_for!(GetEntityPropertiesResponse); println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } @@ -62,7 +60,6 @@ fn print_get_output_schema() { #[test] #[ignore = "prints the output schema"] fn print_set_output_schema() { - let generator = ai_toolset::tool_object::minimized_output_schema_generator(); - let schema = generator.into_root_schema_for::(); + let schema = schemars::schema_for!(SetEntityPropertyResponse); println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } diff --git a/rust/cloud-storage/soup/src/inbound/toolset/test.rs b/rust/cloud-storage/soup/src/inbound/toolset/test.rs index 405288c688..2204c54548 100644 --- a/rust/cloud-storage/soup/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/soup/src/inbound/toolset/test.rs @@ -1,8 +1,7 @@ use super::list_entities::build_summary; #[allow(unused_imports)] use super::*; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; use chrono::Utc; use models_soup::{foreign_entity::SoupForeignEntity, item::SoupItem}; use non_empty::IsEmpty; @@ -10,26 +9,26 @@ use uuid::Uuid; #[test] fn test_list_entities_schema_validation() { - let schema = generate_tool_input_schema!(ListEntities); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ListEntities", + validated.name, "ListEntities", "Tool name should match the schemars title" ); assert!( - description.contains("Browse the user's Macro workspace"), + validated + .description + .contains("Browse the user's Macro workspace"), "Description should contain expected text" ); } #[test] fn test_list_entities_schema_guides_macro_task_queries() { - let schema = generate_tool_input_schema!(ListEntities); - let schema_json = serde_json::to_string(&schema).unwrap(); + let validated = generate_validated_input_schema::().unwrap(); + let schema_json = serde_json::to_string(&validated.schema).unwrap(); assert!( schema_json.contains("prefer this tool over external task trackers such as Linear"), @@ -264,7 +263,9 @@ fn test_converts_foreign_entity_soup_item() { #[test] #[ignore = "prints the input schema"] fn print_input_schema() { - let schema = generate_tool_input_schema!(ListEntities); + let schema = generate_validated_input_schema::() + .unwrap() + .schema; println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } @@ -272,7 +273,6 @@ fn print_input_schema() { #[test] #[ignore = "prints the output schema"] fn print_output_schema() { - let generator = ai_toolset::tool_object::minimized_output_schema_generator(); - let schema = generator.into_root_schema_for::(); + let schema = schemars::schema_for!(ListEntitiesResponse); println!("{}", serde_json::to_string_pretty(&schema).unwrap()); } diff --git a/rust/cloud-storage/teams/src/inbound/toolset/test.rs b/rust/cloud-storage/teams/src/inbound/toolset/test.rs index d930f54f53..ab30298528 100644 --- a/rust/cloud-storage/teams/src/inbound/toolset/test.rs +++ b/rust/cloud-storage/teams/src/inbound/toolset/test.rs @@ -1,21 +1,18 @@ use super::list_team_members::ListTeamMembers; -use ai_toolset::generate_tool_input_schema; -use ai_toolset::tool_object::validate_tool_schema; +use ai_toolset::schema::generate_validated_input_schema; #[test] fn test_list_team_members_schema_validation() { - let schema = generate_tool_input_schema!(ListTeamMembers); - - let result = validate_tool_schema(&schema); + let result = generate_validated_input_schema::(); assert!(result.is_ok(), "{:?}", result); - let (name, description) = result.unwrap(); + let validated = result.unwrap(); assert_eq!( - name, "ListTeamMembers", + validated.name, "ListTeamMembers", "Tool name should match the schemars title" ); assert!( - description.contains("List"), + validated.description.contains("List"), "Description should contain expected text" ); }