diff --git a/DEPENDENCIES b/DEPENDENCIES index 88b334d9..31c0aae0 100644 --- a/DEPENDENCIES +++ b/DEPENDENCIES @@ -1,5 +1,5 @@ vendorpull https://github.com/sourcemeta/vendorpull 1dcbac42809cf87cb5b045106b863e17ad84ba02 -core https://github.com/sourcemeta/core 9eb79e6d10f9a141b214b29867f602bd3ec6202a +core https://github.com/sourcemeta/core 6cdc6b362fef03e3efb5b674095287d983bafffa jsonbinpack https://github.com/sourcemeta/jsonbinpack 3898774a945ebf7392bad8a9015ead97a0518a19 blaze https://github.com/sourcemeta/blaze a7d43ba8a4de39918f44b97602950e260809cd0d hydra https://github.com/sourcemeta/hydra 6e0ad118846ce57275bc7d6434f8440baae2c27e diff --git a/src/resolver.h b/src/resolver.h index c5bbfc32..ded3287f 100644 --- a/src/resolver.h +++ b/src/resolver.h @@ -262,7 +262,7 @@ class CustomResolver { sourcemeta::core::reidentify(subschema, key.second, entry.base_dialect); const auto result{this->schemas.emplace(key.second, subschema)}; - if (!result.second && result.first->second != schema) { + if (!result.second && result.first->second != subschema) { throw sourcemeta::core::SchemaFrameError( key.second, "Cannot register the same identifier twice"); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2a0e6179..320e5116 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -358,6 +358,7 @@ add_jsonschema_test_unix(bundle/fail_invalid_id_type) add_jsonschema_test_unix(bundle/fail_invalid_schema_uri) add_jsonschema_test_unix(bundle/pass_config_ignore) add_jsonschema_test_unix(bundle/pass_ref_in_bundled_resolves_against_id) +add_jsonschema_test_unix(bundle/pass_resolve_deduplicate_embedded) # Inspect add_jsonschema_test_unix(inspect/pass) diff --git a/test/bundle/pass_ref_in_bundled_resolves_against_id.sh b/test/bundle/pass_ref_in_bundled_resolves_against_id.sh index b492e6bd..56e79ca3 100755 --- a/test/bundle/pass_ref_in_bundled_resolves_against_id.sh +++ b/test/bundle/pass_ref_in_bundled_resolves_against_id.sh @@ -36,16 +36,14 @@ cat << EOF > "$TMP/expected.json" "title": "Entry", "\$ref": "https://example.com/schemas/parent", "\$defs": { + "https://example.com/schemas/child": { + "\$id": "https://example.com/schemas/child", + "type": "string" + }, "https://example.com/schemas/parent": { "\$schema": "https://json-schema.org/draft/2020-12/schema", "\$id": "https://example.com/schemas/parent", - "\$ref": "./child", - "\$defs": { - "https://example.com/schemas/child": { - "\$id": "https://example.com/schemas/child", - "type": "string" - } - } + "\$ref": "./child" } } } diff --git a/test/bundle/pass_resolve_deduplicate_embedded.sh b/test/bundle/pass_resolve_deduplicate_embedded.sh new file mode 100755 index 00000000..4612731a --- /dev/null +++ b/test/bundle/pass_resolve_deduplicate_embedded.sh @@ -0,0 +1,95 @@ +#!/bin/sh + +set -o errexit +set -o nounset + +TMP="$(mktemp -d)" +clean() { rm -rf "$TMP"; } +trap clean EXIT + +cat << 'EOF' > "$TMP/common.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/common", + "type": "string" +} +EOF + +cat << 'EOF' > "$TMP/a.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/a", + "$ref": "common", + "$defs": { + "https://example.com/common": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/common", + "type": "string" + } + } +} +EOF + +cat << 'EOF' > "$TMP/b.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/b", + "$ref": "common", + "$defs": { + "https://example.com/common": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/common", + "type": "string" + } + } +} +EOF + +cat << 'EOF' > "$TMP/entry.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/entry", + "allOf": [ + { "$ref": "a" }, + { "$ref": "b" } + ] +} +EOF + +"$1" bundle "$TMP/entry.json" \ + --resolve "$TMP/a.json" \ + --resolve "$TMP/b.json" > "$TMP/result.json" + +cat << 'EOF' > "$TMP/expected.json" +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/entry", + "allOf": [ + { + "$ref": "a" + }, + { + "$ref": "b" + } + ], + "$defs": { + "https://example.com/common": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/common", + "type": "string" + }, + "https://example.com/a": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/a", + "$ref": "common" + }, + "https://example.com/b": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://example.com/b", + "$ref": "common" + } + } +} +EOF + +diff "$TMP/result.json" "$TMP/expected.json" diff --git a/vendor/core/src/core/jsonschema/bundle.cc b/vendor/core/src/core/jsonschema/bundle.cc index 46c697c5..66fa4d4b 100644 --- a/vendor/core/src/core/jsonschema/bundle.cc +++ b/vendor/core/src/core/jsonschema/bundle.cc @@ -1,5 +1,7 @@ #include +#include "helpers.h" + #include // assert #include // std::reference_wrapper #include // std::ostringstream @@ -131,6 +133,103 @@ auto embed_schema(sourcemeta::core::JSON &root, current->assign(key.str(), std::move(target)); } +auto elevate_embedded_resources( + sourcemeta::core::JSON &remote, sourcemeta::core::JSON &root, + const sourcemeta::core::Pointer &container, + const sourcemeta::core::SchemaBaseDialect remote_dialect, + const sourcemeta::core::SchemaResolver &resolver, + std::string_view default_dialect, + std::unordered_map &bundled) -> void { + const auto keyword{sourcemeta::core::definitions_keyword(remote_dialect)}; + const sourcemeta::core::JSON::String keyword_string{keyword}; + if (keyword.empty() || !remote.is_object() || + !remote.defines(keyword_string) || + !remote.at(keyword_string).is_object()) { + return; + } + + auto &defs{remote.at(keyword_string)}; + + // Navigate to the root container once, as it doesn't change per entry + const sourcemeta::core::JSON *root_container{&root}; + bool container_exists{true}; + for (const auto &token : container) { + if (!token.is_property() || !root_container->is_object() || + !root_container->defines(token.to_property())) { + container_exists = false; + break; + } + + root_container = &root_container->at(token.to_property()); + } + + std::vector to_extract; + std::vector to_remove; + for (const auto &entry : defs.as_object()) { + const auto &key{entry.first}; + const auto &value{entry.second}; + const auto entry_dialect{ + sourcemeta::core::base_dialect(value, resolver, default_dialect)}; + const auto effective_entry_dialect{ + entry_dialect.has_value() ? entry_dialect.value() : remote_dialect}; + const auto identifier{ + sourcemeta::core::identify(value, effective_entry_dialect)}; + if (identifier.empty() || identifier != key || + !sourcemeta::core::URI{identifier}.is_absolute()) { + continue; + } + + const sourcemeta::core::JSON::String identifier_string{identifier}; + if (bundled.contains(identifier_string)) { + if (container_exists && root_container->is_object()) { + for (const auto &root_entry : root_container->as_object()) { + if (!root_entry.first.starts_with(identifier_string)) { + continue; + } + + const auto stored_dialect{sourcemeta::core::base_dialect( + root_entry.second, resolver, default_dialect)}; + const auto effective_stored_dialect{stored_dialect.has_value() + ? stored_dialect.value() + : remote_dialect}; + const auto stored_id{sourcemeta::core::identify( + root_entry.second, effective_stored_dialect)}; + if (stored_id != identifier_string) { + continue; + } + + if (root_entry.second != value) { + throw sourcemeta::core::SchemaError( + "Conflicting embedded resources with the same identifier"); + } + + break; + } + } + + to_remove.emplace_back(key); + } else { + to_extract.emplace_back(key); + bundled.emplace(identifier_string, identifier_string); + } + } + + for (const auto &key : to_extract) { + auto value{std::move(defs.at(key))}; + defs.erase(key); + embed_schema(root, container, key, std::move(value)); + } + + for (const auto &key : to_remove) { + defs.erase(key); + } + + if (defs.empty()) { + remote.erase(sourcemeta::core::JSON::String{keyword}); + } +} + auto bundle_schema(sourcemeta::core::JSON &root, const sourcemeta::core::Pointer &container, sourcemeta::core::JSON &subschema, @@ -155,7 +254,8 @@ auto bundle_schema(sourcemeta::core::JSON &root, frame.analyse(subschema, walker, resolver, default_dialect, default_id); } - std::vector> + std::vector> deferred; std::vector< std::pair> @@ -267,7 +367,8 @@ auto bundle_schema(sourcemeta::core::JSON &root, bundled.emplace(identifier, effective_id); bundled.emplace(effective_id, effective_id); - deferred.emplace_back(std::move(remote).value(), std::move(effective_id)); + deferred.emplace_back(std::move(remote).value(), std::move(effective_id), + remote_base_dialect.value()); }); for (auto &[rewrite_pointer, rewrite_value] : ref_rewrites) { @@ -275,9 +376,11 @@ auto bundle_schema(sourcemeta::core::JSON &root, sourcemeta::core::JSON{rewrite_value}); } - for (auto &[remote, effective_id] : deferred) { + for (auto &[remote, effective_id, remote_dialect] : deferred) { bundle_schema(root, container, remote, walker, resolver, default_dialect, effective_id, paths, bundled, depth + 1); + elevate_embedded_resources(remote, root, container, remote_dialect, + resolver, default_dialect, bundled); embed_schema(root, container, effective_id, std::move(remote)); } } @@ -330,75 +433,43 @@ auto bundle(JSON &schema, const SchemaWalker &walker, reidentify(schema, default_id, resolver, default_dialect); } - const auto vocabularies{ - sourcemeta::core::vocabularies(schema, resolver, default_dialect)}; - if (vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_2020_12_Core) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_2019_09_Core)) { - bundle_schema(schema, {"$defs"}, schema, walker, resolver, default_dialect, - default_id, paths, bundled); - return; - } else if ( - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_7) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_7_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_6) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_6_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_4) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_4_Hyper)) { - if (schema.is_object() && schema.defines("$ref")) { - // This is a very specific case in which we can "fix" this - if (schema.size() == 1) { - auto branches{JSON::make_array()}; - branches.push_back(schema); - schema.at("$ref").into(std::move(branches)); - // Note that `allOf` was introduced in Draft 4 - schema.rename("$ref", "allOf"); - } else { - throw sourcemeta::core::SchemaError( - "Cannot bundle a JSON Schema Draft 7 or older with a top-level " - "`$ref` (which overrides sibling keywords) without introducing " - "undefined behavior"); - } - } + const auto schema_base_dialect{ + base_dialect(schema, resolver, default_dialect)}; + if (!schema_base_dialect.has_value()) { + throw SchemaError( + "Could not determine how to perform bundling in this dialect"); + } - bundle_schema(schema, {"definitions"}, schema, walker, resolver, - default_dialect, default_id, paths, bundled); - return; - } else if ( - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_3_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_3) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_2_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_2) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_1_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_1) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0_Hyper) || - vocabularies.contains( - sourcemeta::core::Vocabularies::Known::JSON_Schema_Draft_0)) { + const auto container_keyword{ + definitions_keyword(schema_base_dialect.value())}; + if (container_keyword.empty()) { SchemaFrame frame{SchemaFrame::Mode::References}; frame.analyse(schema, walker, resolver, default_dialect, default_id); if (frame.standalone()) { return; } + + throw SchemaError( + "Could not determine how to perform bundling in this dialect"); + } + + if (ref_overrides_adjacent_keywords(schema_base_dialect.value()) && + schema.is_object() && schema.defines("$ref")) { + if (schema.size() == 1) { + auto branches{JSON::make_array()}; + branches.push_back(schema); + schema.at("$ref").into(std::move(branches)); + schema.rename("$ref", "allOf"); + } else { + throw SchemaError( + "Cannot bundle a JSON Schema Draft 7 or older with a top-level " + "`$ref` (which overrides sibling keywords) without introducing " + "undefined behavior"); + } } - // We don't attempt to bundle on dialects where we - // don't know where to put the embedded schemas - throw SchemaError( - "Could not determine how to perform bundling in this dialect"); + bundle_schema(schema, {JSON::String{container_keyword}}, schema, walker, + resolver, default_dialect, default_id, paths, bundled); } auto bundle(const JSON &schema, const SchemaWalker &walker, diff --git a/vendor/core/src/core/jsonschema/helpers.h b/vendor/core/src/core/jsonschema/helpers.h index 8136371f..ad263555 100644 --- a/vendor/core/src/core/jsonschema/helpers.h +++ b/vendor/core/src/core/jsonschema/helpers.h @@ -34,6 +34,33 @@ inline auto id_keyword(const SchemaBaseDialect base_dialect) return "$id"; } +inline auto definitions_keyword(const SchemaBaseDialect base_dialect) + -> std::string_view { + switch (base_dialect) { + case SchemaBaseDialect::JSON_Schema_2020_12: + case SchemaBaseDialect::JSON_Schema_2020_12_Hyper: + case SchemaBaseDialect::JSON_Schema_2019_09: + case SchemaBaseDialect::JSON_Schema_2019_09_Hyper: + return "$defs"; + case SchemaBaseDialect::JSON_Schema_Draft_7: + case SchemaBaseDialect::JSON_Schema_Draft_7_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_6: + case SchemaBaseDialect::JSON_Schema_Draft_6_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_4: + case SchemaBaseDialect::JSON_Schema_Draft_4_Hyper: + return "definitions"; + case SchemaBaseDialect::JSON_Schema_Draft_3: + case SchemaBaseDialect::JSON_Schema_Draft_3_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_2_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_1_Hyper: + case SchemaBaseDialect::JSON_Schema_Draft_0_Hyper: + return ""; + } + + assert(false); + return "$defs"; +} + // In older drafts, the presence of `$ref` would override any sibling keywords // See // https://json-schema.org/draft-07/draft-handrews-json-schema-01#rfc.section.8.3