From f7bc28d8244cd69b78382d193421abda1204aa73 Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sat, 24 Jan 2026 20:34:23 +0100 Subject: [PATCH 1/2] [ruby/prism] Further optimize ripper translator by not using `delegate` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using it seems pretty bad for performance: ```rb require "benchmark/ips" require "prism" require "ripper" codes = Dir["**/*.rb"].map { File.read(it) } Benchmark.ips do |x| x.report("prism") { codes.each { Prism::Translation::Ripper.lex(it) } } x.report("ripper") { codes.each { Ripper.lex(it) } } x.compare! end ``` ``` # Before ruby 4.0.0 (2025-12-25 revision https://github.com/ruby/prism/commit/553f1675f3) +PRISM [x86_64-linux] Warming up -------------------------------------- prism 1.000 i/100ms ripper 1.000 i/100ms Calculating ------------------------------------- prism 0.319 (± 0.0%) i/s (3.14 s/i) - 2.000 in 6.276154s ripper 0.647 (± 0.0%) i/s (1.54 s/i) - 4.000 in 6.182662s Comparison: ripper: 0.6 i/s prism: 0.3 i/s - 2.03x slower # After ruby 4.0.0 (2025-12-25 revision https://github.com/ruby/prism/commit/553f1675f3) +PRISM [x86_64-linux] Warming up -------------------------------------- prism 1.000 i/100ms ripper 1.000 i/100ms Calculating ------------------------------------- prism 0.482 (± 0.0%) i/s (2.08 s/i) - 3.000 in 6.225603s ripper 0.645 (± 0.0%) i/s (1.55 s/i) - 4.000 in 6.205636s Comparison: ripper: 0.6 i/s prism: 0.5 i/s - 1.34x slower ``` `vernier` tells me it does `method_missing` even for explicitly defined methods like `location`. https://github.com/ruby/prism/commit/2ea81398cc --- lib/prism/lex_compat.rb | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 597e63c73e73b7..c14a9f16030490 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -1,8 +1,6 @@ # frozen_string_literal: true # :markup: markdown -require "delegate" - module Prism # This class is responsible for lexing the source using prism and then # converting those tokens to be compatible with Ripper. In the vast majority @@ -201,27 +199,43 @@ def deconstruct_keys(keys) # When we produce tokens, we produce the same arrays that Ripper does. # However, we add a couple of convenience methods onto them to make them a # little easier to work with. We delegate all other methods to the array. - class Token < SimpleDelegator - # @dynamic initialize, each, [] + class Token < BasicObject + # Create a new token object with the given ripper-compatible array. + def initialize(array) + @array = array + end # The location of the token in the source. def location - self[0] + @array[0] end # The type of the token. def event - self[1] + @array[1] end # The slice of the source that this token represents. def value - self[2] + @array[2] end # The state of the lexer when this token was produced. def state - self[3] + @array[3] + end + + # We want to pretend that this is just an Array. + def ==(other) # :nodoc: + @array == other + end + + def respond_to_missing?(name, include_private = false) # :nodoc: + @array.respond_to?(name, include_private) + end + + def method_missing(name, ...) # :nodoc: + @array.send(name, ...) end end From 985b58a4ed03913509834c578a31105c0dd3204a Mon Sep 17 00:00:00 2001 From: Earlopain <14981592+Earlopain@users.noreply.github.com> Date: Sat, 24 Jan 2026 16:51:30 +0100 Subject: [PATCH 2/2] [ruby/prism] Remove unneeded lex_compat token types These are either fixed in prism or ruby/ripper itself. https://github.com/ruby/prism/commit/41c7c126b2 --- lib/prism/lex_compat.rb | 69 ----------------------------------------- 1 file changed, 69 deletions(-) diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index c14a9f16030490..63305b7057c1ef 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -247,50 +247,6 @@ def ==(other) # :nodoc: end end - # Ident tokens for the most part are exactly the same, except sometimes we - # know an ident is a local when ripper doesn't (when they are introduced - # through named captures in regular expressions). In that case we don't - # compare the state. - class IdentToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Translation::Ripper::EXPR_LABEL | Translation::Ripper::EXPR_END) || - (other[3] & (Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_CMDARG) != 0) - ) - end - end - - # Ignored newlines can occasionally have a LABEL state attached to them, so - # we compare the state differently here. - class IgnoredNewlineToken < Token - def ==(other) # :nodoc: - return false unless self[0...-1] == other[0...-1] - - if self[3] == Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED - other[3] & Translation::Ripper::EXPR_ARG | Translation::Ripper::EXPR_LABELED != 0 - else - self[3] == other[3] - end - end - end - - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a parent - # scope named bar because it hasn't pushed the local table yet. We do this - # more accurately, so we need to allow comparing against both END and - # END|LABEL. - class ParamToken < Token - def ==(other) # :nodoc: - (self[0...-1] == other[0...-1]) && ( - (other[3] == Translation::Ripper::EXPR_END) || - (other[3] == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL) - ) - end - end - # A heredoc in this case is a list of tokens that belong to the body of the # heredoc that should be appended onto the list of tokens when the heredoc # closes. @@ -699,33 +655,8 @@ def result # want to bother comparing the state on them. last_heredoc_end = token.location.end_offset IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ident - if lex_state == Translation::Ripper::EXPR_END - # If we have an identifier that follows a method name like: - # - # def foo bar - # - # then Ripper will mark bar as END|LABEL if there is a local in a - # parent scope named bar because it hasn't pushed the local table - # yet. We do this more accurately, so we need to allow comparing - # against both END and END|LABEL. - ParamToken.new([[lineno, column], event, value, lex_state]) - elsif lex_state == Translation::Ripper::EXPR_END | Translation::Ripper::EXPR_LABEL - # In the event that we're comparing identifiers, we're going to - # allow a little divergence. Ripper doesn't account for local - # variables introduced through named captures in regexes, and we - # do, which accounts for this difference. - IdentToken.new([[lineno, column], event, value, lex_state]) - else - Token.new([[lineno, column], event, value, lex_state]) - end when :on_embexpr_end IgnoreStateToken.new([[lineno, column], event, value, lex_state]) - when :on_ignored_nl - # Ignored newlines can occasionally have a LABEL state attached to - # them which doesn't actually impact anything. We don't mirror that - # state so we ignored it. - IgnoredNewlineToken.new([[lineno, column], event, value, lex_state]) when :on_regexp_end # On regex end, Ripper scans and then sets end state, so the ripper # lexed output is begin, when it should be end. prism sets lex state