From ee199a0d0df24dcda07c684f98d298115428eb2a Mon Sep 17 00:00:00 2001 From: tompng Date: Mon, 15 Jun 2026 19:49:37 +0900 Subject: [PATCH 1/5] Simplify element.attributes structure Change REXML::Attributes internal structure, Refactor ATTLIST handling in Attributes. Fix inconsistent beavior of handling ATTLIST. Stop fuzzy get_attribute/get_attribute_ns search and make it Document Object Model complient. --- lib/rexml/attribute.rb | 11 +- lib/rexml/doctype.rb | 28 +++-- lib/rexml/element.rb | 201 ++++++++++++++-------------------- test/test_attributes.rb | 10 ++ test/test_attributes_mixin.rb | 6 +- test/test_contrib.rb | 4 +- test/test_core.rb | 17 ++- 7 files changed, 136 insertions(+), 141 deletions(-) diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index c56732497..b31beb747 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -180,7 +180,7 @@ def element=( element ) # # This method is usually not called directly. def remove - @element.attributes.delete self.name unless @element.nil? + @element.attributes.delete self unless @element.nil? end # Writes this attribute (EG, puts 'key="value"' to the output) @@ -205,6 +205,15 @@ def xpath def document @element&.document end + + # Returns true if this attribute is a namespace declaration, false otherwise. + # "foo" => false + # "xmlns" => true + # "xmlns:foo" => true + # "foo:xmlns" => false + def namespace_declaration? + prefix == 'xmlns' || (prefix.empty? && name == 'xmlns') + end end end #vim:ts=2 sw=2 noexpandtab: diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index d4f0bbe41..f58a39297 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -113,23 +113,27 @@ def node_type end def attributes_of element - rv = [] - each do |child| - child.each do |key,val| - rv << Attribute.new(key,val) - end if child.kind_of? AttlistDecl and child.element_name == element + attribute_declarations_of(element).map do |key, value| + Attribute.new(key, value) end - rv end def attribute_of element, attribute - att_decl = find do |child| - child.kind_of? AttlistDecl and - child.element_name == element and - child.include? attribute + attribute_declarations_of(element)[attribute] + end + + def attribute_declarations_of(name) + raw_attributes = {} + decls = select do |child| + child.kind_of?(AttlistDecl) && child.element_name == name + end + decls.each do |child| + child.each do |key, val| + # First declaration wins + raw_attributes[key] = val unless raw_attributes.key? key + end end - return nil unless att_decl - att_decl[attribute] + raw_attributes end def clone diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index abdb28a72..861982b40 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -2240,15 +2240,10 @@ def length # [REXML::Attribute, bar:att='2'] # [REXML::Attribute, att='<'] # - def each_attribute # :yields: attribute - return to_enum(__method__) unless block_given? - each_value do |val| - if val.kind_of? Attribute - yield val - else - val.each_value { |atr| yield atr } - end - end + # This method doesn't iterate attributes in the DTD. This may be a bug. + # + def each_attribute(&block) # :yields: attribute + each_own_attribute(&block) end # :call-seq: @@ -2273,6 +2268,8 @@ def each_attribute # :yields: attribute # ["bar:att", "2"] # ["att", "<"] # + # This method doesn't iterate attributes in the DTD. This may be a bug. + # def each return to_enum(__method__) unless block_given? each_attribute do |attr| @@ -2300,36 +2297,7 @@ def each # attrs.get_attribute('nosuch') # => nil # def get_attribute( name ) - attr = fetch( name, nil ) - if attr.nil? - return nil if name.nil? - # Look for prefix - name =~ Namespace::NAMESPLIT - prefix, n = $1, $2 - if prefix - attr = fetch( n, nil ) - # check prefix - if attr == nil - elsif attr.kind_of? Attribute - return attr if prefix == attr.prefix - else - attr = attr[ prefix ] - return attr - end - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - attr_val = doctype.attribute_of(expn, name) - return Attribute.new( name, attr_val ) if attr_val - end - return nil - end - if attr.kind_of? Hash - attr = attr[ @element.prefix ] - end - attr + fetch(name, nil) || attlist_attributes&.[](name) end # :call-seq: @@ -2357,12 +2325,15 @@ def get_attribute( name ) # def []=( name, value ) if value.nil? # Delete the named attribute - attr = get_attribute(name) - delete attr + delete name return end - unless value.kind_of? Attribute + if value.kind_of? Attribute + # Use attribute's expanded name to avoid inconsistency. + # TODO: Explicitly document that inconsistent names are allowed, or raise/warn about inconsistency. + name = value.expanded_name + else doctype = @element.document&.doctype if doctype value = Text::normalize( value, doctype ) @@ -2372,17 +2343,7 @@ def []=( name, value ) value = Attribute.new(name, value) end value.element = @element - old_attr = fetch(value.name, nil) - if old_attr.nil? - store(value.name, value) - elsif old_attr.kind_of? Hash - old_attr[value.prefix] = value - elsif old_attr.prefix != value.prefix - store value.name, {old_attr.prefix => old_attr, - value.prefix => value} - else - store value.name, value - end + store name, value @element end @@ -2398,20 +2359,7 @@ def []=( name, value ) # d.root.attributes.prefixes # => ["x", "y"] # def prefixes - ns = [] - each_attribute do |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - doctype.attributes_of(expn).each { - |attribute| - ns << attribute.name if attribute.prefix == 'xmlns' - } - end - ns + namespaces.keys - ['xmlns'] end # :call-seq: @@ -2425,17 +2373,8 @@ def prefixes # def namespaces namespaces = {} - each_attribute do |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - end - doctype = @element.document&.doctype - if doctype - expn = @element.expanded_name - expn = doctype.name if expn.size == 0 - doctype.attributes_of(expn).each { - |attribute| - namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' - } + each_effective_attribute do |attribute| + namespaces[attribute.name] = attribute.value if attribute.namespace_declaration? end namespaces end @@ -2468,28 +2407,11 @@ def namespaces # attrs.delete(attr) # => # => # attrs.delete(attr) # => # => # - def delete( attribute ) - name = nil - prefix = nil - if attribute.kind_of? Attribute - name = attribute.name - prefix = attribute.prefix - else - attribute =~ Namespace::NAMESPLIT - prefix, name = $1, $2 - prefix = '' unless prefix - end - old = fetch(name, nil) - if old.kind_of? Hash # the supplied attribute is one of many - old.delete(prefix) - if old.size == 1 - repl = nil - old.each_value{|v| repl = v} - store name, repl - end - elsif old # the supplied attribute is a top-level one - super(name) - end + def delete(attribute_or_name) + key = attribute_or_name + key = attribute_or_name.expanded_name if attribute_or_name.kind_of? Attribute + super(key) + @element end @@ -2514,7 +2436,7 @@ def delete( attribute ) # attrs.include?('baz') # => true # def add( attribute ) - self[attribute.name] = attribute + self[attribute.expanded_name] = attribute end alias :<< :add @@ -2527,21 +2449,26 @@ def add( attribute ) # # xml_string = <<-EOT # - # + # # # EOT # d = REXML::Document.new(xml_string) - # ele = d.root.elements['//ele'] # => + # ele = d.root.elements['//ele'] # => # attrs = ele.attributes - # attrs.delete_all('att') # => [att='<'] + # attrs.delete_all('att') # => [foo:att='1', att='<'] + # attrs.each_attribute.map(&:expanded_name) #=> ['foo:other', 'other'] # def delete_all( name ) - rv = [] - each_attribute { |attribute| - rv << attribute if attribute.expanded_name == name - } - rv.each{ |attr| attr.remove } - rv + attributes = each_attribute.select do |attribute| + # For + # delete_all('foo') should not delete xmlns:foo="url" + # because it is a namespace declaration, not a normal attribute. + (!attribute.namespace_declaration? && attribute.name == name) || attribute.expanded_name == name + end + attributes.each do |attribute| + delete attribute.expanded_name + end + attributes end # :call-seq: @@ -2562,17 +2489,51 @@ def delete_all( name ) # attrs.get_attribute_ns('http://foo', 'nosuch') # => nil # def get_attribute_ns(namespace, name) - result = nil - each_attribute() { |attribute| - if name == attribute.name && - namespace == attribute.namespace() && - ( !namespace.empty? || !attribute.fully_expanded_name.index(':') ) - # foo will match xmlns:foo, but only if foo isn't also an attribute - result = attribute if !result or !namespace.empty? or - !attribute.fully_expanded_name.index(':') + each_effective_attribute.find do |attribute| + if attribute.namespace_declaration? + # namespace declarations are not considered as attributes in this method. + # For example: + # Both elem1.get_attribute_ns('', 'foo') and elem2.get_attribute_ns('bar', 'foo') + # should not match. + false + elsif namespace.empty? && !attribute.prefix.empty? + # If prefix is present, namespace url shouldn't be empty, so it never matches. + # For example, , even if attribute.namespace returns '', + # it just means that namespace lookup failed, it shouldn't match. + false + else + name == attribute.name && namespace == attribute.namespace() end - } - result + end + end + + private + + # Iterates over the attributes of the element and those in the DTD. + # If the element has an attribute with the same name as one in the DTD, + # the element's attribute takes precedence. + def each_effective_attribute + return to_enum(__method__) unless block_given? + attr = attlist_attributes + attr = attr ? attr.merge(self) : self + attr.each_value do |attribute| + yield attribute + end + end + + alias each_own_attribute each_value + private :each_own_attribute + + def attlist_attributes + doctype = @element.document&.doctype + if doctype + expn = @element.is_a?(Document) ? doctype.name : @element.expanded_name + doctype.attribute_declarations_of(expn).map do |key, value| + attr = Attribute.new(key, value) + attr.element = @element + [key, attr] + end.to_h + end end end end diff --git a/test/test_attributes.rb b/test/test_attributes.rb index 09fde4422..af42f06ff 100644 --- a/test/test_attributes.rb +++ b/test/test_attributes.rb @@ -74,6 +74,16 @@ def test_delete assert_equal '4', doc.root.attributes['z:foo'] end + def test_delete_all + doc = Document.new("") + doc.root.attributes.delete_all 'x' + assert_equal ['xmlns:x', 'xmlns:y', 'y', 'x:y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + doc.root.attributes.delete_all 'x:y' + assert_equal ['xmlns:x', 'xmlns:y', 'y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + doc.root.attributes.delete_all 'xmlns:y' + assert_equal ['xmlns:x', 'y', 'x:z', 'y:y'], doc.root.attributes.each_attribute.map(&:expanded_name) + end + def test_prefixes doc = Document.new("") prefixes = doc.root.attributes.prefixes diff --git a/test/test_attributes_mixin.rb b/test/test_attributes_mixin.rb index 2b9108cbc..e4d5841cb 100644 --- a/test/test_attributes_mixin.rb +++ b/test/test_attributes_mixin.rb @@ -5,8 +5,10 @@ class TestAttributes < Test::Unit::TestCase def setup @ns_a = "urn:x-test:a" @ns_b = "urn:x-test:b" + @ns_default = "urn:x-test:default" element_string = <<-"XMLEND" - Dummy title' anElement = anElement = aDoc.elements[1] - elementAttrPrefix = anElement.attributes.get_attribute('content').prefix + elementAttrPrefix = anElement.attributes.get_attribute('tpl:content').prefix aClone = anElement.clone - cloneAttrPrefix = aClone.attributes.get_attribute('content').prefix + cloneAttrPrefix = aClone.attributes.get_attribute('tpl:content').prefix assert_equal( elementAttrPrefix , cloneAttrPrefix ) end diff --git a/test/test_core.rb b/test/test_core.rb index beaaeed76..d4962507d 100644 --- a/test/test_core.rb +++ b/test/test_core.rb @@ -889,16 +889,25 @@ def test_attlist_decl ]> EOL assert_equal '', doc.doctype.children[0].to_s.gsub(/\s+/, " ") assert_equal 'gobble', doc.root.attributes['bar'] + assert_equal 'gobble', doc.root.attributes.get_attribute('bar').value + assert_equal 'three', doc.root.attributes.get_attribute('one:baz').value + assert_equal 'three', doc.root.attributes.get_attribute_ns('two', 'baz').value assert_equal 'xxx', doc.root.elements[2].namespace assert_equal 'two', doc.root.elements[1].namespace assert_equal 'foo', doc.root.namespace + # Not used in REXML internal any more, keep for backward compatibility + assert_equal 'gobble', doc.doctype.attribute_of('a', 'bar') + expected_attributes = [Attribute.new('bar', 'gobble'), Attribute.new('xmlns:one', 'two'), Attribute.new('one:baz', 'three')] + assert_equal expected_attributes, doc.doctype.attributes_of('a') + doc = Document.new <<~EOL ' ) expected = { - "inkscape" => attribute("xmlns:inkscape", + "xmlns:inkscape" => attribute("xmlns:inkscape", "http://www.inkscape.org/namespaces/inkscape"), - "version" => { - "inkscape" => attribute("inkscape:version", "0.44"), - "" => attribute("version", "1.0"), - }, + "inkscape:version" => attribute("inkscape:version", "0.44"), + "version" => attribute("version", "1.0"), } assert_equal(expected, doc.root.attributes.to_h) From a7fbf41ba427a29ac961b71c9149fe8de4b51110 Mon Sep 17 00:00:00 2001 From: tompng Date: Wed, 17 Jun 2026 01:24:37 +0900 Subject: [PATCH 2/5] Change namespace cache strategy Namespace calculation for each node can't be cached to document because document lookup is slow for deeply nested nodes. Change namespace cache strategy, inject cached hash as an argument to retrieve namespace/namespaces from XPath match operation and also for bare namespace call. --- lib/rexml/doctype.rb | 17 ++++---- lib/rexml/element.rb | 92 ++++++++++++++++++++++----------------- lib/rexml/xpath_parser.rb | 64 ++++++++++++++++++--------- test/test_namespace.rb | 22 ++++++++++ test/xpath/test_base.rb | 10 ----- 5 files changed, 125 insertions(+), 80 deletions(-) diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index f58a39297..c8789d94a 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -113,27 +113,26 @@ def node_type end def attributes_of element - attribute_declarations_of(element).map do |key, value| + raw_attributes = _attlist_mappings[element] || {} + raw_attributes.map do |key, value| Attribute.new(key, value) end end def attribute_of element, attribute - attribute_declarations_of(element)[attribute] + _attlist_mappings[element]&.[](attribute) end - def attribute_declarations_of(name) - raw_attributes = {} - decls = select do |child| - child.kind_of?(AttlistDecl) && child.element_name == name - end - decls.each do |child| + def _attlist_mappings + mapping = {} + grep(AttlistDecl).each do |child| + raw_attributes = mapping[child.element_name] ||= {} child.each do |key, val| # First declaration wins raw_attributes[key] = val unless raw_attributes.key? key end end - raw_attributes + mapping end def clone diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 861982b40..1c8d829ed 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -588,12 +588,7 @@ def prefixes # d.elements['//c'].namespaces # => {"x"=>"1", "y"=>"2", "z"=>"3"} # def namespaces - namespaces_cache = document&.__send__(:namespaces_cache) - if namespaces_cache - namespaces_cache[self] ||= calculate_namespaces - else - calculate_namespaces - end + _calculate_namespaces end # :call-seq: @@ -618,13 +613,10 @@ def namespaces # def namespace(prefix=nil) if prefix.nil? - prefix = prefix() + _namespace_internal + else + _namespace_lookup_internal(prefix) end - prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:") - ns = namespaces[prefix] - - ns = '' if ns.nil? and prefix == 'xmlns' - ns end # :call-seq: @@ -1507,15 +1499,32 @@ def write(output=$stdout, indent=-1, transitive=false, ie_hack=false) formatter.write( self, output ) end - private - def calculate_namespaces - if parent - parent.namespaces.merge(attributes.namespaces) - else - attributes.namespaces - end + # Returns namespace of the element + def _namespace_internal(namespaces = _calculate_namespaces) # :nodoc: + _namespace_lookup_internal(prefix, namespaces) + end + + # Lookup namespace for the given prefix in the context of the element + def _namespace_lookup_internal(prefix, namespaces = _calculate_namespaces) # :nodoc: + prefix = (prefix == '') ? 'xmlns' : prefix.delete_prefix("xmlns:") + ns = namespaces[prefix] + ns = '' if ns.nil? and prefix == 'xmlns' + ns + end + + def _calculate_namespaces(cache_hash = nil, attlist_mappings = nil) # :nodoc: + return cache_hash[self] if cache_hash && cache_hash.key?(self) + + attlist_mappings ||= document&.doctype&._attlist_mappings || {} + inherited_namespaces = parent ? parent._calculate_namespaces(cache_hash, attlist_mappings) : {} + attr_namespaces = attributes._calculate_namespaces(attlist_mappings) + namespaces = attr_namespaces.any? ? inherited_namespaces.merge(attr_namespaces) : inherited_namespaces + cache_hash[self] = namespaces if cache_hash + namespaces end + private + def __to_xpath_helper node rv = node.expanded_name.clone if node.parent @@ -2297,7 +2306,7 @@ def each # attrs.get_attribute('nosuch') # => nil # def get_attribute( name ) - fetch(name, nil) || attlist_attributes&.[](name) + fetch(name, nil) || attlist_attributes[name] end # :call-seq: @@ -2372,11 +2381,7 @@ def prefixes # d.root.attributes.namespaces # => {"xmlns"=>"foo", "x"=>"bar", "y"=>"twee"} # def namespaces - namespaces = {} - each_effective_attribute do |attribute| - namespaces[attribute.name] = attribute.value if attribute.namespace_declaration? - end - namespaces + _calculate_namespaces end # :call-seq: @@ -2507,16 +2512,23 @@ def get_attribute_ns(namespace, name) end end + def _calculate_namespaces(attlist_mappings = nil) # :nodoc: + namespaces = {} + each_effective_attribute(attlist_mappings) do |attribute| + namespaces[attribute.name] = attribute.value if attribute.namespace_declaration? + end + namespaces + end + private # Iterates over the attributes of the element and those in the DTD. # If the element has an attribute with the same name as one in the DTD, # the element's attribute takes precedence. - def each_effective_attribute - return to_enum(__method__) unless block_given? - attr = attlist_attributes - attr = attr ? attr.merge(self) : self - attr.each_value do |attribute| + def each_effective_attribute(attlist_mappings = nil) + return to_enum(__method__, attlist_mappings) unless block_given? + attributes = attlist_attributes(attlist_mappings).merge(self) + attributes.each_value do |attribute| yield attribute end end @@ -2524,16 +2536,16 @@ def each_effective_attribute alias each_own_attribute each_value private :each_own_attribute - def attlist_attributes - doctype = @element.document&.doctype - if doctype - expn = @element.is_a?(Document) ? doctype.name : @element.expanded_name - doctype.attribute_declarations_of(expn).map do |key, value| - attr = Attribute.new(key, value) - attr.element = @element - [key, attr] - end.to_h - end + def attlist_attributes(attlist_mappings = nil) + attlist_mappings ||= @element.document&.doctype&._attlist_mappings + return {} unless attlist_mappings + + expn = @element.is_a?(Document) ? @element.doctype&.name : @element.expanded_name + attlist_mappings[expn]&.map do |key, value| + attr = Attribute.new(key, value) + attr.element = @element + [key, attr] + end.to_h end end end diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 9c856a65d..2bdb743c2 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -63,6 +63,9 @@ def initialize(strict: false) @namespaces = nil @variables = {} @functions = FunctionsClass.new + @attlist_mappings = nil + @document = nil + @element_namespaces_cache = {} @nest = 0 @strict = strict end @@ -85,14 +88,8 @@ def parse path, node node = node.first end - document = node.document - if document - document.__send__(:enable_cache) do - match( path_stack, node ) - end - else - match( path_stack, node ) - end + @document = node.document + match(path_stack, node) end def get_first path, node @@ -167,20 +164,45 @@ def strict? @strict end - # Returns a String namespace for a node, given a prefix + # Returns a String namespace for a prefix used in xpath # The rules are: # # 1. Use the supplied namespace mapping first. - # 2. If no mapping was supplied, use the context node to look up the namespace - def get_namespace( node, prefix ) + # 2. If no mapping was supplied, use the context node to look up the namespace as a fallback. + def get_xpath_namespace(node, prefix) if @namespaces @namespaces[prefix] || '' + elsif node.node_type == :element + element_namespace_lookup(node, prefix) else - return node.namespace( prefix ) if node.node_type == :element '' end end + # Returns attribute's namespace URI while caching the + # intermediate result to speed up retrieval of namespaces + def attribute_namespace(attribute) + attribute.prefix == '' ? '' : element_namespace_lookup(attribute.element, attribute.prefix) + end + + # Return element's namespace URI while caching the + # intermediate result to speed up retrieval of namespaces + def element_namespace(element) + element._namespace_internal(element_namespaces(element)) + end + + # Returns a hash of namespaces for the given element while caching the + # intermediate result to speed up retrieval of namespaces + def element_namespaces(element) + @attlist_mappings ||= @document&.doctype&._attlist_mappings || {} + element._calculate_namespaces(@element_namespaces_cache, @attlist_mappings) + end + + # Returns namespace of the prefix in the context of the element, + # while caching the intermediate result to speed up retrieval of namespaces + def element_namespace_lookup(element, prefix) + element._namespace_lookup_internal(prefix, element_namespaces(element)) + end # Expr takes a stack of path elements and a set of nodes (either a Parent # or an Array and returns an Array of matching nodes @@ -642,20 +664,20 @@ def node_test(path_stack, any_type: :element) node.name == name elsif prefix.empty? if strict? - node.name == name and node.namespace == "" + node.name == name and element_namespace(node) == "" else - node.name == name and node.namespace == get_namespace(node, prefix) + node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix) end else - node.name == name and node.namespace == get_namespace(node, prefix) + node.name == name and element_namespace(node) == get_xpath_namespace(node, prefix) end when :attribute if prefix.nil? node.name == name elsif prefix.empty? - node.name == name and node.namespace == "" + node.name == name and attribute_namespace(node) == "" else - node.name == name and node.namespace == get_namespace(node.element, prefix) + node.name == name and attribute_namespace(node) == get_xpath_namespace(node.element, prefix) end else false @@ -666,11 +688,11 @@ def node_test(path_stack, any_type: :element) ->(node) do case node.node_type when :element - namespaces = @namespaces || node.namespaces - node.namespace == namespaces[prefix] + namespaces = @namespaces || element_namespaces(node) + element_namespace(node) == namespaces[prefix] when :attribute - namespaces = @namespaces || node.element.namespaces - node.namespace == namespaces[prefix] + namespaces = @namespaces || element_namespaces(node.element) + attribute_namespace(node) == namespaces[prefix] else false end diff --git a/test/test_namespace.rb b/test/test_namespace.rb index a41e5058e..6d405c9ad 100644 --- a/test/test_namespace.rb +++ b/test/test_namespace.rb @@ -1,7 +1,9 @@ # frozen_string_literal: false +require "core_assertions" module REXMLTests class TestNamespace < Test::Unit::TestCase + include Test::Unit::CoreAssertions include Helper::Fixture include REXML @@ -34,5 +36,25 @@ def test_xml_namespace assert_equal("http://www.w3.org/XML/1998/namespace", document.root.namespace("xml")) end + + def test_deep_element_namespace_linear + omit('Recursion too deep on JRuby') if RUBY_ENGINE == "jruby" + + max_depth = 3000 + xml = <<~XML + #{'' * max_depth + '' * max_depth} + XML + doc = Document.new(xml) + prepare_element = ->(depth) do + node = doc.root + depth.times { node = node.first } + node || raise + end + + assert_linear_performance([30, 100, 300, 1000, 3000], rehearsal: 10) do |depth| + elem = prepare_element.call(depth) + elem.namespace + end + end end end diff --git a/test/xpath/test_base.rb b/test/xpath/test_base.rb index 911fda120..0383a4179 100644 --- a/test/xpath/test_base.rb +++ b/test/xpath/test_base.rb @@ -1320,16 +1320,6 @@ def test_namespaces_0 assert_equal( 1, XPath.match( d, "//x:*" ).size ) end - def test_namespaces_cache - doc = Document.new("") - assert_equal("", XPath.first(doc, "//b[namespace-uri()='1']").to_s) - assert_nil(XPath.first(doc, "//b[namespace-uri()='']")) - - doc.root.delete_namespace - assert_nil(XPath.first(doc, "//b[namespace-uri()='1']")) - assert_equal("", XPath.first(doc, "//b[namespace-uri()='']").to_s) - end - def test_ticket_71 doc = Document.new(%Q{}) el = doc.root.elements[1] From 9b77e3f12f466462baf3b84744543ba74ee6f554 Mon Sep 17 00:00:00 2001 From: tomoya ishida Date: Sat, 4 Jul 2026 02:34:07 +0900 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: NAITOH Jun --- lib/rexml/doctype.rb | 2 +- lib/rexml/element.rb | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index c8789d94a..a52ac0577 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -123,7 +123,7 @@ def attribute_of element, attribute _attlist_mappings[element]&.[](attribute) end - def _attlist_mappings + def _attlist_mappings # :nodoc: mapping = {} grep(AttlistDecl).each do |child| raw_attributes = mapping[child.element_name] ||= {} diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index 1c8d829ed..45d6531d6 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -2527,7 +2527,8 @@ def _calculate_namespaces(attlist_mappings = nil) # :nodoc: # the element's attribute takes precedence. def each_effective_attribute(attlist_mappings = nil) return to_enum(__method__, attlist_mappings) unless block_given? - attributes = attlist_attributes(attlist_mappings).merge(self) + attlist = attlist_attributes(attlist_mappings) + attributes = attlist.empty? ? self : attlist.merge(self) attributes.each_value do |attribute| yield attribute end From 09ccba1b279cace78c350f526c7d33460302cffd Mon Sep 17 00:00:00 2001 From: tompng Date: Sat, 4 Jul 2026 02:12:42 +0900 Subject: [PATCH 4/5] Remove document's namespace cache API which is no longer used --- lib/rexml/document.rb | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index d5db713da..bcf612fbe 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -449,19 +449,6 @@ def document private - attr_accessor :namespaces_cache - - # New document level cache is created and available in this block. - # This API is thread unsafe. Users can't change this document in this block. - def enable_cache - @namespaces_cache = {} - begin - yield - ensure - @namespaces_cache = nil - end - end - def build( source ) Parsers::TreeParser.new( source, self ).parse end From a83e31ad6d105cf39dc980fba7647bd91bff9e58 Mon Sep 17 00:00:00 2001 From: tompng Date: Sat, 4 Jul 2026 02:38:52 +0900 Subject: [PATCH 5/5] Update XPathParser#get_first and XPathParser#predicate to both setup document --- lib/rexml/xpath_parser.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 2bdb743c2..00d066ec5 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -88,12 +88,12 @@ def parse path, node node = node.first end - @document = node.document match(path_stack, node) end def get_first path, node path_stack = @parser.parse( path ) + @document = node.document first( path_stack, node ) end @@ -149,6 +149,7 @@ def first( path_stack, node ) def match(path_stack, node) + @document = node.document nodeset = [node] result = expr(path_stack, nodeset) case result