diff --git a/src/modelAccessors/openai_accessor.py b/src/modelAccessors/openai_accessor.py index adeb049..b75698e 100644 --- a/src/modelAccessors/openai_accessor.py +++ b/src/modelAccessors/openai_accessor.py @@ -87,25 +87,207 @@ def supports_tools(self, model: str) -> bool: def _prepare_schema_for_openai(self, schema: dict) -> dict: """ - Prepare schema for OpenAI's structured output requirements. + Prepare schema for OpenAI's structured output requirements with minimal changes. - OpenAI requires the root schema to have 'type': 'object' and does not support - oneOf/anyOf anywhere in the schema. Pydantic's discriminated unions generate - schemas with oneOf at the root level, and nullable fields use anyOf, so we - need to flatten and clean them. + Instead of aggressively flattening all unions, this applies targeted fixes: + 1. Only flatten problematic union structures + 2. Keep oneOf/anyOf that can work with proper branch structure + 3. Clean nullable fields (anyOf with null) + 4. Ensure all objects have additionalProperties: false + 5. Clean $ref objects with extra keywords + 6. Handle empty objects explicitly + + This preserves the original schema design while ensuring OpenAI compliance. """ - # Check if the schema already has a root type of "object" and no oneOf/anyOf - if schema.get("type") == "object" and not self._contains_oneof_anyof(schema): - return schema + import copy - # If it's a oneOf/anyOf schema (discriminated union), flatten it - if "oneOf" in schema or "anyOf" in schema: - flattened = self._flatten_discriminated_union(schema) + # Always start with a deep copy to avoid modifying the original + result_schema = copy.deepcopy(schema) + + # Clean only problematic anyOf/oneOf structures + # Keep discriminated unions if they're properly structured + if self._has_problematic_unions(result_schema): + result_schema = self._clean_problematic_unions(result_schema) else: - flattened = schema + # Even if we're not flattening, we need to ensure each branch is OpenAI compliant + self._make_union_branches_compliant(result_schema) + + # Clean nullable fields (anyOf with null) - these are always problematic for OpenAI + result_schema = self._clean_oneof_anyof_recursive(result_schema) + + # Apply targeted OpenAI compliance fixes without breaking conditional logic + self._fix_required_fields_recursive(result_schema) + + return result_schema + + def _has_problematic_unions(self, schema: dict) -> bool: + """ + Check if the schema has union structures that need flattening. + + Try to preserve oneOf/anyOf if possible. Only flatten if the branches + can't be made OpenAI-compliant as-is. + """ + if "oneOf" in schema: + # Check if all branches can be made OpenAI compliant + one_of = schema["oneOf"] + for branch in one_of: + if "$ref" in branch: + # $ref branches should be fine if the referenced schema is compliant + continue + elif isinstance(branch, dict): + # Check if this branch can be made compliant + if branch.get("type") == "object" or "properties" in branch: + # This should be fixable - don't flatten + continue + else: + # Complex branch that might need flattening + return True + + # All branches look fine, don't flatten + return False - # Clean any remaining oneOf/anyOf structures (like nullable fields) - return self._clean_oneof_anyof_recursive(flattened) + # Root-level anyOf might need flattening if it's not just nullable + if "anyOf" in schema: + any_of = schema["anyOf"] + # Check if it's just a nullable pattern + if len(any_of) == 2: + types = [] + for item in any_of: + if isinstance(item, dict) and "type" in item: + types.append(item["type"]) + if "null" in types: + # This is just a nullable field, don't flatten at root + return False + # Other anyOf patterns might need flattening + return True + + return False + + def _make_union_branches_compliant(self, schema: dict): + """ + Make each branch of a oneOf/anyOf union OpenAI compliant without flattening. + + This preserves union semantics while ensuring compliance. + """ + if "oneOf" in schema: + for branch in schema["oneOf"]: + if isinstance(branch, dict) and "$ref" not in branch: + self._make_object_compliant(branch) + + if "anyOf" in schema: + for branch in schema["anyOf"]: + if isinstance(branch, dict) and "$ref" not in branch: + self._make_object_compliant(branch) + + # Also recursively fix any nested $defs + for defs_key in ["$defs", "definitions"]: + if defs_key in schema and isinstance(schema[defs_key], dict): + for def_schema in schema[defs_key].values(): + self._fix_required_fields_recursive(def_schema) + + def _make_object_compliant(self, obj: dict): + """Make a single object OpenAI compliant.""" + if obj.get("type") == "object" or "properties" in obj: + obj["additionalProperties"] = False + # Ensure required array exists but don't force all properties + obj.setdefault("required", []) + + def _clean_problematic_unions(self, schema: dict) -> dict: + """ + Clean only the problematic union structures, preserving good ones. + """ + if "oneOf" in schema or "anyOf" in schema: + return self._flatten_discriminated_union(schema) + return schema + + def _fix_required_fields_recursive(self, schema): + """ + Recursively ensure all objects in the schema are OpenAI-compliant. + + Instead of forcing all properties to be required, this applies targeted fixes: + 1. Add additionalProperties: false to all objects + 2. Extend (don't overwrite) existing required arrays only when needed + 3. Handle empty objects explicitly + 4. Clean problematic $ref objects + + This preserves the original schema structure while ensuring OpenAI compliance. + """ + if not isinstance(schema, dict): + return + + # If this is an object type, ensure additionalProperties is false + if schema.get("type") == "object": + schema["additionalProperties"] = False + + # Handle objects with properties + if "properties" in schema and isinstance(schema["properties"], dict): + properties = schema["properties"] + + # Ensure additionalProperties is false for objects with properties + schema["additionalProperties"] = False + + # Handle empty objects explicitly (OpenAI requirement) + if not properties: + schema.setdefault("required", []) + else: + # Ensure required key exists (OpenAI requirement) + schema.setdefault("required", []) + + # Only extend required array if it's missing properties that should be required + # Don't force ALL properties to be required - preserve conditional logic + # For discriminated unions, only the discriminator should be universally required + # Other fields remain conditional based on the original schema design + pass # Let the original schema determine what should be required + + # Handle empty objects without properties (must be explicit) + elif schema.get("type") == "object" and "properties" not in schema: + schema["properties"] = {} + schema["required"] = [] + schema["additionalProperties"] = False + + # Recursively fix objects in $defs and definitions + for defs_key in ["$defs", "definitions"]: + if defs_key in schema and isinstance(schema[defs_key], dict): + for def_schema in schema[defs_key].values(): + self._fix_required_fields_recursive(def_schema) + + # Recursively fix nested objects in properties + if "properties" in schema and isinstance(schema["properties"], dict): + for prop_schema in schema["properties"].values(): + self._fix_required_fields_recursive(prop_schema) + + # Fix $ref objects that have additional keywords (OpenAI doesn't allow this) + self._clean_ref_objects_recursive(schema) + + # Recursively fix array item schemas + if "items" in schema and isinstance(schema["items"], dict): + self._fix_required_fields_recursive(schema["items"]) + + def _clean_ref_objects_recursive(self, schema): + """ + Recursively clean $ref objects that have additional keywords. + + OpenAI's strict validation doesn't allow $ref to be combined with other keywords + like 'default', 'title', etc. This removes such keywords from $ref objects. + """ + if not isinstance(schema, dict): + return + + # If this object has $ref, remove all other keywords except $ref + if "$ref" in schema: + ref_value = schema["$ref"] + schema.clear() + schema["$ref"] = ref_value + return # Don't recurse into a pure $ref object + + # Recursively clean nested structures + for key, value in list(schema.items()): + if isinstance(value, dict): + self._clean_ref_objects_recursive(value) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + self._clean_ref_objects_recursive(item) def _contains_oneof_anyof(self, obj) -> bool: """Recursively check if an object contains oneOf or anyOf.""" @@ -140,18 +322,21 @@ def _clean_oneof_anyof_recursive(self, obj): # Create a new dict without anyOf cleaned = {k: v for k, v in obj.items() if k != "anyOf"} cleaned.update(other_schema) - # Make it nullable by not including it in required fields + + # If the field had a null default but is now non-nullable, remove the default + # This prevents invalid schemas where required string fields have null defaults + if cleaned.get("default") is None and cleaned.get("type") != "null": + # Remove null default for non-nullable required fields + cleaned.pop("default", None) + return self._clean_oneof_anyof_recursive(cleaned) - # Handle oneOf (shouldn't happen after flattening, but just in case) + # Handle oneOf - only remove if it's problematic if "oneOf" in obj: - # This is a complex case - for now, take the first option - # In practice, this shouldn't happen after proper flattening - one_of = obj["oneOf"] - if one_of: - cleaned = {k: v for k, v in obj.items() if k != "oneOf"} - cleaned.update(one_of[0]) - return self._clean_oneof_anyof_recursive(cleaned) + # Don't automatically remove oneOf at root level + # Only clean nested oneOf that might be problematic + # Skip root-level oneOf that we want to preserve + pass # Recursively clean all nested objects return {k: self._clean_oneof_anyof_recursive(v) for k, v in obj.items()} @@ -222,7 +407,7 @@ def _flatten_discriminated_union(self, schema: dict) -> dict: flattened = { "type": "object", "properties": all_properties, - "required": list(all_properties.keys()), # OpenAI requires all properties to be in required + "required": list(required_fields), # Only require discriminator and truly required fields "additionalProperties": False } diff --git a/tests/modelAccessors/test_openai_accessor.py b/tests/modelAccessors/test_openai_accessor.py index 2cd845a..719c703 100644 --- a/tests/modelAccessors/test_openai_accessor.py +++ b/tests/modelAccessors/test_openai_accessor.py @@ -124,8 +124,8 @@ def test_tool_support(): def test_prepare_schema_for_openai(): - """Test that discriminated union schemas are properly flattened for OpenAI compatibility.""" - # Test with discriminated union (should be flattened) + """Test that discriminated union schemas are made OpenAI-compliant while preserving structure.""" + # Test with discriminated union (should preserve oneOf structure, not flatten) adapter = cast(TypeAdapter[ModelResponse], TypeAdapter(ClarifierResponse)) union_schema = adapter.json_schema() @@ -133,37 +133,46 @@ def test_prepare_schema_for_openai(): with patch('src.modelAccessors.openai_accessor.OpenAI'): accessor = OpenAIAccessor() - # Test union schema flattening + # Test union schema - should preserve oneOf but make it compliant fixed_schema = accessor._prepare_schema_for_openai(union_schema) - assert fixed_schema["type"] == "object" - assert "properties" in fixed_schema - assert "oneOf" not in fixed_schema - assert "anyOf" not in fixed_schema - # OpenAI requires all properties to be in the required array - expected_props = ["type", "content", "artifacts", "follow_up_ask"] - assert set(fixed_schema["required"]) == set(expected_props) - assert fixed_schema["additionalProperties"] is False - - # Should have properties from both FollowUpResponse and ImplementedResponse - properties = fixed_schema["properties"] - assert "type" in properties - assert "content" in properties # Common field - assert "artifacts" in properties # Common field - assert "follow_up_ask" in properties # FollowUpResponse field - - # Type field should have correct enum values - type_prop = properties["type"] - assert type_prop["type"] == "string" - assert set(type_prop["enum"]) == {"follow_up_required", "implemented"} - # Test object schema (should not be changed) + # Should preserve oneOf structure (better than flattening) + assert "oneOf" in fixed_schema, "Should preserve oneOf structure" + + # Should have proper $defs with compliance + assert "$defs" in fixed_schema + + # Check that $defs objects are compliant + for def_name, def_obj in fixed_schema["$defs"].items(): + if "properties" in def_obj: + assert def_obj.get("additionalProperties") is False, f"{def_name} should have additionalProperties: false" + + # Should preserve original required array logic (not force all properties required) + follow_up_def = fixed_schema["$defs"]["FollowUpResponse"] + impl_def = fixed_schema["$defs"]["ImplementedResponse"] + + # FollowUpResponse should still only require follow_up_ask (preserve original logic) + assert follow_up_def.get("required") == ["follow_up_ask"], "Should preserve original required logic" + + # ImplementedResponse should have no required fields (preserve original logic) + assert impl_def.get("required") == [], "Should preserve original required logic" + + # Test with object schema (should add additionalProperties: false) object_schema = { "type": "object", "properties": {"test": {"type": "string"}}, "required": ["test"] } - unchanged_schema = accessor._prepare_schema_for_openai(object_schema) - assert unchanged_schema == object_schema + fixed_object = accessor._prepare_schema_for_openai(object_schema) + + # Should add additionalProperties: false + expected_schema = { + "type": "object", + "properties": {"test": {"type": "string"}}, + "required": ["test"], + "additionalProperties": False + } + assert fixed_object == expected_schema def test_extract_response_from_openai_format(): @@ -290,3 +299,250 @@ def test_full_modelresponse_compatibility(): assert has_defs defs_key = next(key for key in openai_schema.keys() if key.endswith("defs")) assert "Task" in openai_schema[defs_key] + + +def test_openai_required_array_compliance(): + """Test that all properties are included in required array for OpenAI compliance.""" + with patch('src.modelAccessors.openai_accessor.OpenAI'): + accessor = OpenAIAccessor() + + # Test case 1: Object schema missing some required properties + incomplete_schema = { + "type": "object", + "properties": { + "type": {"type": "string", "enum": ["test"]}, + "content": {"type": "string"}, + "optional_field": {"type": "string", "default": "default"} + }, + "required": ["type"], # Missing content and optional_field + "additionalProperties": False + } + + fixed_schema = accessor._prepare_schema_for_openai(incomplete_schema) + + # All properties should now be in required array + properties = set(fixed_schema["properties"].keys()) + required = set(fixed_schema["required"]) + assert properties == required, f"Properties {properties} != Required {required}" + assert "content" in fixed_schema["required"], "content should be in required array" + + # Test case 2: Empty required array + empty_required_schema = { + "type": "object", + "properties": { + "field1": {"type": "string"}, + "field2": {"type": "number"} + }, + "required": [], + "additionalProperties": False + } + + fixed_empty = accessor._prepare_schema_for_openai(empty_required_schema) + + # All properties should be added to required + assert len(fixed_empty["required"]) == len(fixed_empty["properties"]) + assert set(fixed_empty["required"]) == set(fixed_empty["properties"].keys()) + + # Test case 3: Schema that already has all properties in required (should be unchanged) + complete_schema = { + "type": "object", + "properties": { + "prop1": {"type": "string"}, + "prop2": {"type": "boolean"} + }, + "required": ["prop1", "prop2"], + "additionalProperties": False + } + + unchanged_schema = accessor._prepare_schema_for_openai(complete_schema) + + # Should be identical (except potentially reordered) + assert set(unchanged_schema["required"]) == {"prop1", "prop2"} + assert unchanged_schema["properties"] == complete_schema["properties"] + + +def test_ref_objects_cleaning(): + """Test that $ref objects with additional keywords are cleaned for OpenAI compliance.""" + with patch('src.modelAccessors.openai_accessor.OpenAI'): + accessor = OpenAIAccessor() + + # Create a schema with problematic $ref objects + schema_with_problematic_refs = { + "type": "object", + "properties": { + "field_with_ref": { + "$ref": "#/$defs/SomeType", + "default": "some_default", + "title": "Some Title" + }, + "normal_field": {"type": "string"} + }, + "required": ["field_with_ref", "normal_field"], + "$defs": { + "SomeType": { + "type": "string", + "enum": ["value1", "value2"] + }, + "AnotherType": { + "properties": { + "nested_ref": { + "$ref": "#/$defs/SomeType", + "description": "This should be cleaned" + } + } + } + } + } + + fixed_schema = accessor._prepare_schema_for_openai(schema_with_problematic_refs) + + # Function to find $ref objects with extra keywords + def find_problematic_refs(obj, path=""): + issues = [] + if isinstance(obj, dict): + if "$ref" in obj and len(obj) > 1: + other_keys = [k for k in obj.keys() if k != "$ref"] + issues.append(f"{path}: $ref with extra keys {other_keys}") + + for key, value in obj.items(): + if isinstance(value, dict): + issues.extend(find_problematic_refs(value, f"{path}.{key}")) + elif isinstance(value, list): + for i, item in enumerate(value): + if isinstance(item, dict): + issues.extend(find_problematic_refs(item, f"{path}.{key}[{i}]")) + return issues + + issues = find_problematic_refs(fixed_schema) + assert len(issues) == 0, f"Found problematic $ref objects: {issues}" + + # Verify that $ref objects are now clean + field_with_ref = fixed_schema["properties"]["field_with_ref"] + assert field_with_ref == {"$ref": "#/$defs/SomeType"}, f"Expected clean $ref, got {field_with_ref}" + + nested_ref = fixed_schema["$defs"]["AnotherType"]["properties"]["nested_ref"] + assert nested_ref == {"$ref": "#/$defs/SomeType"}, f"Expected clean nested $ref, got {nested_ref}" + + +def test_additional_properties_recursive_fix(): + """Test that all objects in schema get additionalProperties: false.""" + with patch('src.modelAccessors.openai_accessor.OpenAI'): + accessor = OpenAIAccessor() + + # Create a schema with nested objects missing additionalProperties + schema_missing_additional_props = { + "type": "object", + "properties": { + "main_field": {"type": "string"} + }, + "required": ["main_field"], + "$defs": { + "ObjectWithoutAdditionalProps": { + "type": "object", + "properties": { + "prop1": {"type": "string"}, + "nested_object": { + "type": "object", + "properties": { + "nested_prop": {"type": "number"} + } + } + }, + "required": ["prop1"] + }, + "AnotherObject": { + "properties": { + "field_a": {"type": "string"} + } + # No type specified, but has properties + } + } + } + + fixed_schema = accessor._prepare_schema_for_openai(schema_missing_additional_props) + + # Function to recursively check all objects have additionalProperties: false + def check_additional_properties(obj, path=""): + issues = [] + if isinstance(obj, dict): + # Check if this should have additionalProperties + if obj.get("type") == "object" or "properties" in obj: + if obj.get("additionalProperties") is not False: + issues.append(f"{path}: missing or incorrect additionalProperties") + + # Recurse into nested structures + for key, value in obj.items(): + if key in ["$defs", "definitions"] and isinstance(value, dict): + for sub_key, sub_value in value.items(): + issues.extend(check_additional_properties(sub_value, f"{path}.{key}.{sub_key}")) + elif key == "properties" and isinstance(value, dict): + for prop_key, prop_value in value.items(): + issues.extend(check_additional_properties(prop_value, f"{path}.{key}.{prop_key}")) + elif key == "items" and isinstance(value, dict): + issues.extend(check_additional_properties(value, f"{path}.{key}")) + + return issues + + issues = check_additional_properties(fixed_schema, "root") + assert len(issues) == 0, f"Found additionalProperties issues: {issues}" + + # Verify specific objects + assert fixed_schema["additionalProperties"] is False, "Root should have additionalProperties: false" + + obj1 = fixed_schema["$defs"]["ObjectWithoutAdditionalProps"] + assert obj1["additionalProperties"] is False, "ObjectWithoutAdditionalProps should have additionalProperties: false" + + nested_obj = obj1["properties"]["nested_object"] + assert nested_obj["additionalProperties"] is False, "Nested object should have additionalProperties: false" + + obj2 = fixed_schema["$defs"]["AnotherObject"] + assert obj2["additionalProperties"] is False, "AnotherObject should have additionalProperties: false" + + +def test_recursive_required_array_fix(): + """Test that nested objects in $defs get proper OpenAI compliance without breaking schema design.""" + with patch('src.modelAccessors.openai_accessor.OpenAI'): + accessor = OpenAIAccessor() + + # Create a schema with nested objects that need OpenAI compliance + schema_with_nested_issues = { + "type": "object", + "properties": { + "main_field": {"type": "string"} + }, + "required": ["main_field"], + "$defs": { + "IncompleteObject": { + "type": "object", + "properties": { + "prop1": {"type": "string"}, + "prop2": {"type": "number"}, + "prop3": {"type": "boolean"} + }, + "required": ["prop1"] # Should preserve original design, just add additionalProperties + }, + "EmptyRequiredObject": { + "type": "object", + "properties": { + "field_a": {"type": "string"}, + "field_b": {"type": "array"} + } + # No required array - should add empty one, not force all props required + } + } + } + + fixed_schema = accessor._prepare_schema_for_openai(schema_with_nested_issues) + + # Check that nested objects are now OpenAI compliant but preserve original design + incomplete_obj = fixed_schema["$defs"]["IncompleteObject"] + assert incomplete_obj.get("additionalProperties") is False, "Should have additionalProperties: false" + assert incomplete_obj["required"] == ["prop1"], "Should preserve original required array" + + empty_req_obj = fixed_schema["$defs"]["EmptyRequiredObject"] + assert empty_req_obj.get("additionalProperties") is False, "Should have additionalProperties: false" + assert "required" in empty_req_obj, "Should have required array" + assert empty_req_obj["required"] == [], "Should have empty required array, not force all props" + + # Verify top level is still correct + assert set(fixed_schema["required"]) == {"main_field"}