From 8e57401905217bf1661f717fd1e8f54dbf2e85f5 Mon Sep 17 00:00:00 2001 From: David Carlier Date: Thu, 4 Jun 2026 06:09:18 +0100 Subject: [PATCH] ext/dom: resolve in-scope prefixed QName values during document validation. Fix #22219 Modern DOM keeps namespace declarations off the tree (node->nsDef is NULL), so libxml's native validators cannot resolve a prefixed QName appearing in element or attribute content. Temporarily materialize them as nsDef entries around schema, RelaxNG and DTD validation, reusing the C14N relink machinery, then restore the tree. --- ext/dom/document.c | 42 +++++++++++++++++++++++++++-- ext/dom/namespace_compat.h | 7 +++++ ext/dom/node.c | 4 +-- ext/dom/tests/gh22219.phpt | 54 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 ext/dom/tests/gh22219.phpt diff --git a/ext/dom/document.c b/ext/dom/document.c index 371c9f846273..e4d285c990fe 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1757,6 +1757,35 @@ static int dom_perform_xinclude(xmlDocPtr docp, dom_object *intern, zend_long fl return err; } +/* For modern DOM, namespace declarations are stored as attributes (node->nsDef + * is NULL), so libxml's native validators can't resolve prefixed QNames found in + * content (e.g. an xs:QName attribute value). Temporarily relink them, mirroring + * what C14N does in dom_canonicalization(). */ +typedef struct { + HashTable links; + bool active; +} dom_validate_ns_guard; + +static void dom_validate_ns_guard_begin(dom_validate_ns_guard *guard, xmlDocPtr docp) +{ + guard->active = php_dom_follow_spec_node((const xmlNode *) docp); + if (guard->active) { + zend_hash_init(&guard->links, 0, NULL, NULL, false); + xmlNodePtr root_element = xmlDocGetRootElement(docp); + if (root_element) { + dom_relink_ns_decls(&guard->links, root_element); + } + } +} + +static void dom_validate_ns_guard_end(dom_validate_ns_guard *guard) +{ + if (guard->active) { + dom_unlink_ns_decls(&guard->links); + zend_hash_destroy(&guard->links); + } +} + /* {{{ Substitutues xincludes in a DomDocument */ PHP_METHOD(DOMDocument, xinclude) { @@ -1832,8 +1861,11 @@ PHP_METHOD(DOMDocument, validate) cvp->userData = NULL; cvp->error = (xmlValidityErrorFunc) php_libxml_error_handler; cvp->warning = (xmlValidityErrorFunc) php_libxml_error_handler; - - if (xmlValidateDocument(cvp, docp)) { + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); + int dtd_valid = xmlValidateDocument(cvp, docp); + dom_validate_ns_guard_end(&guard); + if (dtd_valid) { RETVAL_TRUE; } else { RETVAL_FALSE; @@ -1930,7 +1962,10 @@ static void dom_document_schema_validate(INTERNAL_FUNCTION_PARAMETERS, int type) PHP_LIBXML_SANITIZE_GLOBALS(validate); xmlSchemaSetValidOptions(vptr, valid_opts); xmlSchemaSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr); + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); is_valid = xmlSchemaValidateDoc(vptr, docp); + dom_validate_ns_guard_end(&guard); xmlSchemaFree(sptr); xmlSchemaFreeValidCtxt(vptr); PHP_LIBXML_RESTORE_GLOBALS(validate); @@ -2028,7 +2063,10 @@ static void dom_document_relaxNG_validate(INTERNAL_FUNCTION_PARAMETERS, int type } xmlRelaxNGSetValidErrors(vptr, php_libxml_error_handler, php_libxml_error_handler, vptr); + dom_validate_ns_guard guard; + dom_validate_ns_guard_begin(&guard, docp); is_valid = xmlRelaxNGValidateDoc(vptr, docp); + dom_validate_ns_guard_end(&guard); xmlRelaxNGFree(sptr); xmlRelaxNGFreeValidCtxt(vptr); diff --git a/ext/dom/namespace_compat.h b/ext/dom/namespace_compat.h index 23c80acc7fd7..185673f8586e 100644 --- a/ext/dom/namespace_compat.h +++ b/ext/dom/namespace_compat.h @@ -69,4 +69,11 @@ PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_map PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns_legacy(const xmlNode *node); PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns); +/* Temporarily materialize namespace declarations as nsDef entries on the tree so + * that libxml's native validators/canonicalizers can resolve prefixed QNames that + * appear in element/attribute *content*. Modern DOM keeps declarations off the + * tree (node->nsDef == NULL), which xmlSearchNs() cannot follow. */ +PHP_DOM_EXPORT void dom_relink_ns_decls(HashTable *links, xmlNodePtr root); +PHP_DOM_EXPORT void dom_unlink_ns_decls(HashTable *links); + #endif diff --git a/ext/dom/node.c b/ext/dom/node.c index 505f6ee452c3..a3ceaaa14af8 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -2201,7 +2201,7 @@ static void dom_relink_ns_decls_element(HashTable *links, xmlNodePtr node) } } -static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) +void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) { dom_relink_ns_decls_element(links, root); @@ -2213,7 +2213,7 @@ static void dom_relink_ns_decls(HashTable *links, xmlNodePtr root) } } -static void dom_unlink_ns_decls(HashTable *links) +void dom_unlink_ns_decls(HashTable *links) { ZEND_HASH_MAP_FOREACH_NUM_KEY_VAL(links, zend_ulong h, zval *data) { if (h & 1) { diff --git a/ext/dom/tests/gh22219.phpt b/ext/dom/tests/gh22219.phpt new file mode 100644 index 000000000000..637a47039c35 --- /dev/null +++ b/ext/dom/tests/gh22219.phpt @@ -0,0 +1,54 @@ +--TEST-- +GH-22219 (Dom\XMLDocument::schemaValidate fails to resolve xs:QName value from an in-scope prefix) +--EXTENSIONS-- +dom +--SKIPIF-- + +--FILE-- + + + + +XML; + +// The 'ref' prefix is declared on but only used inside the xs:QName +// attribute value, never as an element or attribute namespace. +$xsd = << + + + + + + + + + + + + + +XSD; + +libxml_use_internal_errors(true); + +$modern = Dom\XMLDocument::createFromString($xml, LIBXML_NSCLEAN); +var_dump($modern->schemaValidateSource($xsd)); + +$legacy = new DOMDocument(); +$legacy->loadXML($xml, LIBXML_NSCLEAN); +var_dump($legacy->schemaValidateSource($xsd)); + +foreach (libxml_get_errors() as $error) { + echo trim($error->message), PHP_EOL; +} +?> +--EXPECT-- +bool(true) +bool(true)