From dab84235ef7d0fddc19757717e6452af354d2dc8 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Sat, 7 Jun 2025 21:39:03 +0000
Subject: [PATCH 1/8] parser_stream: Produce green tree traversal rather than
 token ranges

## Background

I've written about 5 parsers that use the general red/tree green tree
pattern. Now that we're using JuliaSyntax in base, I'd like to replace
some of them by a version based on JuliaSyntax, so that I can avoid
having to multiple copies of similar infrastructure. As a result, I'm
taking a close look at some of the internals of JuliaSyntax.

## Current Design

One thing that I really like about JuliaSyntax is that the parser basically
produces a flat output buffer (well two in the current design, after
https://github.com/JuliaLang/JuliaSyntax.jl/pull/19). In essence, the output
is a post-order depth-first traversal of the parse tree, each node annotated
with the range of covered by this range.

From there, it is possible to recover the parse tree without re-parsing
by partitioning the token list according to the ranges of the non-terminal
tokens. One particular application of this is to re-build a pointer-y green
tree structure that stores relative by ranges and serves the same incremental
parsing purpose as green tree representations in other system.

The single-output-buffer design is a great innovation over the pointer-y
system. It's much easier to handle and it also enforces important invariants
by construction (or at least makes them easy to check). However, I think
the whole post-parse tree construction logic is reducing the value of it
significantly. In particular, green trees are supposed to be able to serve
as compact, persistent representations of parse tree. However, here the
compact, persistent representation (the output memory buffer) is not usable
as a green tree. We do have the pointer-y `GreenNode` tree, but this has
all the same downsides that the single buffer system was supposed to avoid.
It uses explicit vectors in every node and even constructing it from the
parser output allocates a nontrivial amount of memory to recover the tree
structure.

## Proposed design

This PR proposed to change the parser output to be directly usable as a
green-tree in-situ by changing the post-order dfs traversal to instead
produce (byte, node) spans (note that this is the same data as in the
current `GreenNode`, except that the node span is implicit in the length
of the vector and that here the children are implicit by the position
in the output).

This does essentially mean semantically reverting #19,
but the representation proposed here is more compact than both main and
the pre-#19 representation. In particular, the output is now a sequence of:

```
struct RawGreenNode
    head::SyntaxHead                  # Kind,flags
    byte_span::UInt32                 # Number of bytes covered by this range
    # If NON_TERMINAL_FLAG is set, this is the total number of child nodes
    # Otherwise this is a terminal node (i.e. a token) and this is orig_kind
    node_span_or_orig_kind::UInt32
end
```

The structure is used for both terminals and non-terminals, with the iterpretation
differing between them for the last field. This is marginally more compact than
the current token list representation on current `main`, because we do not store
the `next_byte` pointer (which would instead have to be recovered from the green
tree using the usual `O(log n)` algorithm).

However, because we store `node_span`, this data structure provides linear time
traversal (in reverse order) over the children of the current ndoe. In particular,
this means that the tree structure is manifest and does not require the allocation
of temporary stacks to recover the tree structure. As a result, the output buffer
can now be used as an efficient, persistent, green tree representation.

I think the primary weird thing about this design is that the iteration over the
children must happen in reverse order. The current GreenNode design has constant
time access to all children. Of course, a lookup table for this can be computed
in linear time with smaller memory than GreenNode design, but it's
important to point out this limitation. That said, for transformation uses cases
(e.g. to Expr or Syntax node), constant time access to the children is not really
required (although the children are being produced backwards, which looks a little
funny). That said, to avoid any disruption to downstream users, the `GreenNode`
design itself is not changed to use this faster alternative. We can consider
doing so in a later PR.

## Benchmark

The motivation for this change is not performance, but rather representational cleanliness.
That said, it's of course imperative that this not degrade performance.
Fortunately, the benchmarks show that this is in fact marginally faster for `Expr`
construction, largely because we get to avoid the additional memory allocation traffic
from having the tree structure explicitly represented. Parse time itself is essentially
unchanged (which is unsurprising, since we're primarily changing what's being put into
the output - although the parser does a few lookback-style operations in a few places).
---
 Project.toml                         |   2 -
 docs/src/design.md                   |  38 +-
 src/JuliaSyntax.jl                   |   5 +-
 src/expr.jl                          | 466 +++++++++++----------
 src/{green_tree.jl => green_node.jl} |  63 +--
 src/kinds.jl                         |   6 +-
 src/parse_stream.jl                  | 596 ++++++++++++++-------------
 src/parser.jl                        |   2 +
 src/parser_api.jl                    |   8 +-
 src/syntax_tree.jl                   |  72 ++--
 src/tree_cursors.jl                  | 145 +++++++
 test/expr.jl                         |  14 +-
 test/green_node.jl                   |   2 +-
 test/parse_stream.jl                 |  52 ++-
 test/parser.jl                       |   4 +-
 test/syntax_tree.jl                  |   6 +-
 test/test_utils.jl                   |   3 +-
 17 files changed, 865 insertions(+), 619 deletions(-)
 rename src/{green_tree.jl => green_node.jl} (66%)
 create mode 100644 src/tree_cursors.jl

diff --git a/Project.toml b/Project.toml
index 231dab6f..1abbf2f7 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,8 +7,6 @@ version = "1.0.2"
 Serialization = "1.0"
 julia = "1.0"
 
-[deps]
-
 [extras]
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
diff --git a/docs/src/design.md b/docs/src/design.md
index fb2a06c2..968a0e11 100644
--- a/docs/src/design.md
+++ b/docs/src/design.md
@@ -56,7 +56,7 @@ We use a hand-written lexer (a heavily modified version of
 The main parser innovation is the `ParseStream` interface which provides a
 stream-like I/O interface for writing the parser. The parser does not
 depend on or produce any concrete tree data structure as part of the parsing
-phase but the output spans can be post-processed into various tree data
+phase but the output nodes can be post-processed into various tree data
 structures as required. This is like the design of rust-analyzer though with a
 simpler implementation.
 
@@ -64,35 +64,39 @@ Parsing proceeds by recursive descent;
 
 * The parser consumes a flat list of lexed tokens as *input* using `peek()` to
   examine tokens and `bump()` to consume them.
-* The parser produces a flat list of text spans as *output* using `bump()` to
-  transfer tokens to the output and `position()`/`emit()` for nonterminal ranges.
+* The parser produces a flat list of `RawGreenNode`s as *output* using `bump()` to
+  transfer tokens to the output and `position()`/`emit()` for nonterminal nodes.
 * Diagnostics are emitted as separate text spans
 * Whitespace and comments are automatically `bump()`ed and don't need to be
   handled explicitly. The exception is syntactically relevant newlines in space
   sensitive mode.
 * Parser modes are passed down the call tree using `ParseState`.
 
-The output spans track the byte range, a syntax "kind" stored as an integer
-tag, and some flags. The kind tag makes the spans a [sum
-type](https://blog.waleedkhan.name/union-vs-sum-types/) but where the type is
-tracked explicitly outside of Julia's type system.
+The output nodes track the byte range, a syntax "kind" stored as an integer
+tag, and some flags. Each node also stores either the number of child nodes
+(for non-terminals) or the original token kind (for terminals). The kind tag
+makes the nodes a [sum type](https://blog.waleedkhan.name/union-vs-sum-types/)
+but where the type is tracked explicitly outside of Julia's type system.
 
-For lossless parsing the output spans must cover the entire input text. Using
+For lossless parsing the output nodes must cover the entire input text. Using
 `bump()`, `position()` and `emit()` in a natural way also ensures that:
-* Spans are cleanly nested with children contained entirely within their parents
-* Siblings spans are emitted in source order
-* Parent spans are emitted after all their children.
+* Nodes are cleanly nested with children contained entirely within their parents
+* Sibling nodes are emitted in source order
+* Parent nodes are emitted after all their children.
 
-These properties make the output spans naturally isomorphic to a
+These properties make the output nodes a post-order traversal of a
 ["green tree"](#raw-syntax-tree--green-tree)
-in the terminology of C#'s Roslyn compiler.
+in the terminology of C#'s Roslyn compiler, with the tree structure
+implicit in the node spans.
 
 ### Tree construction
 
-The `build_tree` function performs a depth-first traversal of the `ParseStream`
-output spans allowing it to be assembled into a concrete tree data structure,
-for example using the `GreenNode` data type. We further build on top of this to
-define `build_tree` for the AST type `SyntaxNode` and for normal Julia `Expr`.
+The `build_tree` function uses the implicit tree structure in the `ParseStream`
+output to assemble concrete tree data structures. Since the output is already
+a post-order traversal of `RawGreenNode`s with node spans encoding parent-child
+relationships, tree construction is straightforward. We build on top of this to
+define `build_tree` for various tree types including `GreenNode`, the AST type
+`SyntaxNode`, and for normal Julia `Expr`.
 
 ### Error recovery
 
diff --git a/src/JuliaSyntax.jl b/src/JuliaSyntax.jl
index 9afff872..3c276984 100644
--- a/src/JuliaSyntax.jl
+++ b/src/JuliaSyntax.jl
@@ -73,7 +73,7 @@ export @K_str, kind
 
 export SyntaxNode
 
-@_public GreenNode,
+@_public GreenNode, RedTreeCursor, GreenTreeCursor,
     span
 
 # Helper utilities
@@ -95,7 +95,8 @@ include("parser_api.jl")
 include("literal_parsing.jl")
 
 # Tree data structures
-include("green_tree.jl")
+include("tree_cursors.jl")
+include("green_node.jl")
 include("syntax_tree.jl")
 include("expr.jl")
 
diff --git a/src/expr.jl b/src/expr.jl
index 0a6f16a7..dc802b4e 100644
--- a/src/expr.jl
+++ b/src/expr.jl
@@ -28,7 +28,7 @@ macro isexpr(ex, head, nargs)
       length($(esc(ex)).args) == $(esc(nargs)))
 end
 
-function _reorder_parameters!(args::Vector{Any}, params_pos)
+function _reorder_parameters!(args::Vector{Any}, params_pos::Int)
     p = 0
     for i = length(args):-1:1
         ai = args[i]
@@ -48,7 +48,7 @@ function _reorder_parameters!(args::Vector{Any}, params_pos)
     insert!(args, params_pos, pop!(args))
 end
 
-function _strip_parens(ex)
+function _strip_parens(ex::Expr)
     while true
         if @isexpr(ex, :parens)
             if length(ex.args) == 1
@@ -63,37 +63,9 @@ function _strip_parens(ex)
     end
 end
 
-# Get Julia value of leaf node as it would be represented in `Expr` form
-function _expr_leaf_val(node::SyntaxNode)
-    node.val
-end
 
-function _leaf_to_Expr(source, txtbuf, txtbuf_offset, head, srcrange, node)
-    k = kind(head)
-    if k == K"MacroName" && view(source, srcrange) == "."
-        return Symbol("@__dot__")
-    elseif is_error(k)
-        return k == K"error" ?
-            Expr(:error) :
-            Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`")
-    else
-        val = isnothing(node) ?
-            parse_julia_literal(txtbuf, head, srcrange .+ txtbuf_offset) :
-            _expr_leaf_val(node)
-        if val isa Union{Int128,UInt128,BigInt}
-            # Ignore the values of large integers and convert them back to
-            # symbolic/textural form for compatibility with the Expr
-            # representation of these.
-            str = replace(source[srcrange], '_'=>"")
-            macname = val isa Int128  ? Symbol("@int128_str")  :
-                      val isa UInt128 ? Symbol("@uint128_str") :
-                      Symbol("@big_str")
-            return Expr(:macrocall, GlobalRef(Core, macname), nothing, str)
-        else
-            return val
-        end
-    end
-end
+reverse_nontrivia_children(cursor::RedTreeCursor) = Iterators.filter(should_include_node, Iterators.reverse(cursor))
+reverse_nontrivia_children(cursor::SyntaxNode) = Iterators.filter(should_include_node, Iterators.reverse(children(cursor)))
 
 # Julia string literals in a `K"string"` node may be split into several chunks
 # interspersed with trivia in two situations:
@@ -102,89 +74,110 @@ end
 #
 # This function concatenating adjacent string chunks together as done in the
 # reference parser.
-function _string_to_Expr(args)
+function _string_to_Expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32)
+    ret = Expr(:string)
     args2 = Any[]
     i = 1
-    while i <= length(args)
-        if args[i] isa String
-            if i < length(args) && args[i+1] isa String
+    it = reverse_nontrivia_children(cursor)
+    r = iterate(it)
+    while r !== nothing
+        (child, state) = r
+        ex = node_to_expr(child, source, txtbuf, txtbuf_offset)
+        if isa(ex, String)
+            # This branch combines consequent string chunks together.
+            # It's unrolled once to avoid unnecessary allocations.
+            r = iterate(it, state)
+            if r === nothing
+                pushfirst!(ret.args, ex)
+                continue
+            end
+            (child, state) = r
+            ex2 = node_to_expr(child, source, txtbuf, txtbuf_offset)
+            if !isa(ex2, String)
+                pushfirst!(ret.args, ex)
+                ex = ex2
+                # Fall through to process `ex` (!::String)
+            else
+                strings = String[ex2, ex]  # Note: reversed order since we're iterating backwards
+                r = iterate(it, state)
+                while r !== nothing
+                    (child, state) = r
+                    ex = node_to_expr(child, source, txtbuf, txtbuf_offset)
+                    isa(ex, String) || break
+                    pushfirst!(strings, ex)
+                    r = iterate(it, state)
+                end
                 buf = IOBuffer()
-                while i <= length(args) && args[i] isa String
-                    write(buf, args[i]::String)
-                    i += 1
+                for s in strings
+                    write(buf, s)
                 end
-                push!(args2, String(take!(buf)))
-            else
-                push!(args2, args[i])
-                i += 1
+                pushfirst!(ret.args, String(take!(buf)))
+                r === nothing && break
+                # Fall through to process `ex` (!::String)
             end
-        else
-            ex = args[i]
-            if @isexpr(ex, :parens, 1)
-                ex = _strip_parens(ex)
-                if ex isa String
-                    # Wrap interpolated literal strings in (string) so we can
-                    # distinguish them from the surrounding text (issue #38501)
-                    # Ie, "$("str")"  vs  "str"
-                    # https://github.com/JuliaLang/julia/pull/38692
-                    ex = Expr(:string, ex)
-                end
+        end
+        # ex not a string
+        if @isexpr(ex, :parens, 1)
+            ex = _strip_parens(ex)
+            if ex isa String
+                # Wrap interpolated literal strings in (string) so we can
+                # distinguish them from the surrounding text (issue #38501)
+                # Ie, "$("str")"  vs  "str"
+                # https://github.com/JuliaLang/julia/pull/38692
+                ex = Expr(:string, ex)
             end
-            push!(args2, ex)
-            i += 1
         end
+        @assert ex !== nothing
+        pushfirst!(ret.args, ex)
+        r = iterate(it, state)
     end
-    if length(args2) == 1 && args2[1] isa String
+
+    if length(ret.args) == 1 && ret.args[1] isa String
         # If there's a single string remaining after joining, we unwrap
         # to give a string literal.
         #   """\n  a\n  b""" ==>  "a\nb"
-        return only(args2)
+        return only(ret.args)
     else
         # This only happens when the kind is K"string" or when an error has occurred.
-        return Expr(:string, args2...)
+        return ret
     end
 end
 
 # Shared fixups for Expr children in cases where the type of the parent node
 # affects the child layout.
-function _fixup_Expr_children!(head, loc, args)
+function fixup_Expr_child(head::SyntaxHead, @nospecialize(arg), first::Bool)
+    isa(arg, Expr) || return arg
     k = kind(head)
     eq_to_kw_in_call = ((k == K"call" || k == K"dotcall") &&
                         is_prefix_call(head)) || k == K"ref"
     eq_to_kw_in_params = k != K"vect"   && k != K"curly" &&
                          k != K"braces" && k != K"ref"
     coalesce_dot = k in KSet"call dotcall curly" ||
-                   (k == K"quote" && flags(head) == COLON_QUOTE)
-    for i in 1:length(args)
-        arg = args[i]
-        was_parens = @isexpr(arg, :parens)
-        arg = _strip_parens(arg)
-        if @isexpr(arg, :(=)) && eq_to_kw_in_call && i > 1
-            arg = Expr(:kw, arg.args...)
-        elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple
-            h, a = arg.args[1]::Tuple{SyntaxHead,Any}
-            arg = ((!was_parens && coalesce_dot && i == 1) ||
-                   (k == K"comparison" && iseven(i)) ||
-                   is_syntactic_operator(h)) ?
-                Symbol(".", a) : Expr(:., a)
-        elseif @isexpr(arg, :parameters) && eq_to_kw_in_params
-            pargs = arg.args
-            for j = 1:length(pargs)
-                pj = pargs[j]
-                if @isexpr(pj, :(=))
-                    pargs[j] = Expr(:kw, pj.args...)
-                end
+                   (k == K"quote" && has_flags(head, COLON_QUOTE))
+    was_parens = @isexpr(arg, :parens)
+    arg = _strip_parens(arg)
+    if @isexpr(arg, :(=)) && eq_to_kw_in_call && !first
+        arg = Expr(:kw, arg.args...)
+    elseif k != K"parens" && @isexpr(arg, :., 1) && arg.args[1] isa Tuple
+        # This undoes the "Hack" below"
+        h, a = arg.args[1]::Tuple{SyntaxHead,Any}
+        arg = ((!was_parens && coalesce_dot && first) ||
+                is_syntactic_operator(h)) ?
+            Symbol(".", a) : Expr(:., a)
+    elseif @isexpr(arg, :parameters) && eq_to_kw_in_params
+        pargs = arg.args
+        for j = 1:length(pargs)
+            pj = pargs[j]
+            if @isexpr(pj, :(=))
+                pargs[j] = Expr(:kw, pj.args...)
             end
-        elseif k == K"let" && i == 1 && @isexpr(arg, :block)
-            filter!(a -> !(a isa LineNumberNode), arg.args)
         end
-        args[i] = arg
     end
-    return args
+    return arg
 end
 
 # Remove the `do` block from the final position in a function/macro call arg list
-function _extract_do_lambda!(args)
+function _extract_do_lambda!(args::Vector{Any})
     if length(args) > 1 && Meta.isexpr(args[end], :do_lambda)
         do_ex = pop!(args)::Expr
         return Expr(:->, do_ex.args...)
@@ -193,7 +186,7 @@ function _extract_do_lambda!(args)
     end
 end
 
-function _append_iterspec!(args, ex)
+function _append_iterspec!(args::Vector{Any}, @nospecialize(ex))
     if @isexpr(ex, :iteration)
         for iter in ex.args::Vector{Any}
             push!(args, Expr(:(=), iter.args...))
@@ -204,48 +197,131 @@ function _append_iterspec!(args, ex)
     return args
 end
 
+function parseargs!(retexpr::Expr, loc::LineNumberNode, cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32)
+    args = retexpr.args
+    firstchildhead = head(cursor)
+    firstchildrange::UnitRange{UInt32} = byte_range(cursor)
+    itr = reverse_nontrivia_children(cursor)
+    r = iterate(itr)
+    while r !== nothing
+        (child, state) = r
+        r = iterate(itr, state)
+        expr = node_to_expr(child, source, txtbuf, txtbuf_offset)
+        @assert expr !== nothing
+        firstchildhead = head(child)
+        firstchildrange = byte_range(child)
+        pushfirst!(args, fixup_Expr_child(head(cursor), expr, r === nothing))
+    end
+    return (firstchildhead, firstchildrange)
+end
+
 # Convert internal node of the JuliaSyntax parse tree to an Expr
-function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args)
-    k = kind(head)
-    if (k == K"var" || k == K"char") && length(args) == 1
-        # Ideally we'd like `@check length(args) == 1` as an invariant for all
-        # K"var" and K"char" nodes, but this discounts having embedded error
-        # nodes when ignore_errors=true is set.
-        return args[1]
-    elseif k == K"string"
-        return _string_to_Expr(args)
+function node_to_expr(cursor::Union{RedTreeCursor, SyntaxNode}, source::SourceFile, txtbuf::Vector{UInt8}, txtbuf_offset::UInt32=UInt32(0))
+    if !should_include_node(cursor)
+        return nothing
+    end
+
+    nodehead = head(cursor)
+    k = kind(cursor)
+    srcrange::UnitRange{UInt32} = byte_range(cursor)
+    if is_leaf(cursor)
+        if k == K"MacroName" && view(source, srcrange) == "."
+            return Symbol("@__dot__")
+        elseif is_error(k)
+            return k == K"error" ?
+                Expr(:error) :
+                Expr(:error, "$(_token_error_descriptions[k]): `$(source[srcrange])`")
+        else
+            val = parse_julia_literal(txtbuf, head(cursor), srcrange .+ txtbuf_offset)
+            if val isa Union{Int128,UInt128,BigInt}
+                # Ignore the values of large integers and convert them back to
+                # symbolic/textural form for compatibility with the Expr
+                # representation of these.
+                str = replace(source[srcrange], '_'=>"")
+                macname = val isa Int128  ? Symbol("@int128_str")  :
+                        val isa UInt128 ? Symbol("@uint128_str") :
+                        Symbol("@big_str")
+                return Expr(:macrocall, GlobalRef(Core, macname), nothing, str)
+            else
+                return val
+            end
+        end
+    end
+
+    if k == K"string"
+        return _string_to_Expr(cursor, source, txtbuf, txtbuf_offset)
     end
 
     loc = source_location(LineNumberNode, source, first(srcrange))
-    endloc = source_location(LineNumberNode, source, last(srcrange))
 
     if k == K"cmdstring"
-        return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc, _string_to_Expr(args))
+        return Expr(:macrocall, GlobalRef(Core, Symbol("@cmd")), loc,
+            _string_to_Expr(cursor, source, txtbuf, txtbuf_offset))
     end
 
-    _fixup_Expr_children!(head, loc, args)
-
-    headstr = untokenize(head, include_flag_suff=false)
+    headstr = untokenize(nodehead, include_flag_suff=false)
     headsym = !isnothing(headstr) ?
               Symbol(headstr) :
               error("Can't untokenize head of kind $(k)")
+    retexpr = Expr(headsym)
 
-    if k == K"?"
-        headsym = :if
+    # Block gets special handling for extra line number nodes
+    if k == K"block" || (k == K"toplevel" && !has_flags(nodehead, TOPLEVEL_SEMICOLONS_FLAG))
+        args = retexpr.args
+        for child in reverse_nontrivia_children(cursor)
+            expr = node_to_expr(child, source, txtbuf, txtbuf_offset)
+            @assert expr !== nothing
+            # K"block" does not have special first-child handling, so we do not need to keep track of that here
+            pushfirst!(args, fixup_Expr_child(head(cursor), expr, false))
+            pushfirst!(args, source_location(LineNumberNode, source, first(byte_range(child))))
+        end
+        isempty(args) && push!(args, loc)
+        if k == K"block" && has_flags(nodehead, PARENS_FLAG)
+            popfirst!(args)
+        end
+        return retexpr
+    end
+
+    # Now recurse to parse all arguments
+    (firstchildhead, firstchildrange) = parseargs!(retexpr, loc, cursor, source, txtbuf, txtbuf_offset)
+
+    return _node_to_expr(retexpr, loc, srcrange,
+                         firstchildhead, firstchildrange,
+                         nodehead, source)
+end
+
+# Split out from the above for codesize reasons, to avoid specialization on multiple
+# tree types.
+@noinline function _node_to_expr(retexpr::Expr, loc::LineNumberNode,
+                                 srcrange::UnitRange{UInt32},
+                                 firstchildhead::SyntaxHead,
+                                 firstchildrange::UnitRange{UInt32},
+                                 nodehead::SyntaxHead,
+                                 source::SourceFile)
+    args = retexpr.args
+    k = kind(nodehead)
+    endloc = source_location(LineNumberNode, source, last(srcrange))
+    if (k == K"var" || k == K"char") && length(retexpr.args) == 1
+        # `var` and `char` nodes have a single argument which is the value.
+        # However, errors can add additional errors tokens which we represent
+        # as e.g. `Expr(:var, ..., Expr(:error))`.
+        return retexpr.args[1]
+    elseif k == K"?"
+        retexpr.head = :if
     elseif k == K"op=" && length(args) == 3
         lhs = args[1]
         op = args[2]
         rhs = args[3]
         headstr = string(args[2], '=')
-        if is_dotted(head)
+        if is_dotted(nodehead)
             headstr = '.'*headstr
         end
-        headsym = Symbol(headstr)
-        args = Any[lhs, rhs]
+        retexpr.head = Symbol(headstr)
+        retexpr.args = Any[lhs, rhs]
     elseif k == K"macrocall"
         if length(args) >= 2
             a2 = args[2]
-            if @isexpr(a2, :macrocall) && kind(childheads[1]) == K"CmdMacroName"
+            if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName"
                 # Fix up for custom cmd macros like foo`x`
                 args[2] = a2.args[3]
             end
@@ -254,54 +330,41 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
         _reorder_parameters!(args, 2)
         insert!(args, 2, loc)
         if do_lambda isa Expr
-            return Expr(:do, Expr(headsym, args...), do_lambda)
-        end
-    elseif k == K"block" || (k == K"toplevel" && !has_flags(head, TOPLEVEL_SEMICOLONS_FLAG))
-        if isempty(args)
-            push!(args, loc)
-        else
-            resize!(args, 2*length(args))
-            for i = length(childranges):-1:1
-                args[2*i] = args[i]
-                args[2*i-1] = source_location(LineNumberNode, source, first(childranges[i]))
-            end
-        end
-        if k == K"block" && has_flags(head, PARENS_FLAG)
-            popfirst!(args)
+            return Expr(:do, retexpr, do_lambda)
         end
     elseif k == K"doc"
-        headsym = :macrocall
-        args = [GlobalRef(Core, Symbol("@doc")), loc, args...]
+        retexpr.head = :macrocall
+        retexpr.args = [GlobalRef(Core, Symbol("@doc")), loc, args...]
     elseif k == K"dotcall" || k == K"call"
         # Julia's standard `Expr` ASTs have children stored in a canonical
         # order which is often not always source order. We permute the children
         # here as necessary to get the canonical order.
-        if is_infix_op_call(head) || is_postfix_op_call(head)
+        if is_infix_op_call(nodehead) || is_postfix_op_call(nodehead)
             args[2], args[1] = args[1], args[2]
         end
         # Lower (call x ') to special ' head
-        if is_postfix_op_call(head) && args[1] == Symbol("'")
+        if is_postfix_op_call(nodehead) && args[1] == Symbol("'")
             popfirst!(args)
-            headsym = Symbol("'")
+            retexpr.head = Symbol("'")
         end
         do_lambda = _extract_do_lambda!(args)
         # Move parameters blocks to args[2]
         _reorder_parameters!(args, 2)
-        if headsym === :dotcall
+        if retexpr.head === :dotcall
             funcname = args[1]
-            if is_prefix_call(head)
-                headsym = :.
-                args = Any[funcname, Expr(:tuple, args[2:end]...)]
+            if is_prefix_call(nodehead)
+                retexpr.head = :.
+                retexpr.args = Any[funcname, Expr(:tuple, args[2:end]...)]
             else
                 # operator calls
-                headsym = :call
+                retexpr.head = :call
                 if funcname isa Symbol
                     args[1] = Symbol(:., funcname)
                 end # else funcname could be an Expr(:error), just propagate it
             end
         end
         if do_lambda isa Expr
-            return Expr(:do, Expr(headsym, args...), do_lambda)
+            return Expr(:do, retexpr, do_lambda)
         end
     elseif k == K"."
         if length(args) == 2
@@ -312,7 +375,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
         elseif length(args) == 1
             # Hack: Here we preserve the head of the operator to determine whether
             # we need to coalesce it with the dot into a single symbol later on.
-            args[1] = (childheads[1], args[1])
+            args[1] = (firstchildhead, args[1])
         end
     elseif k == K"ref" || k == K"curly"
         # Move parameters blocks to args[2]
@@ -335,11 +398,11 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
             if @isexpr(a2, :braces)
                 a2a = a2.args
                 _reorder_parameters!(a2a, 2)
-                args = Any[args[1], a2a...]
+                retexpr.args = Any[args[1], a2a...]
             end
         end
     elseif k == K"catch"
-        if kind(childheads[1]) == K"Placeholder"
+        if kind(firstchildhead) == K"Placeholder"
             args[1] = false
         end
     elseif k == K"try"
@@ -367,7 +430,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
                 @assert false "Illegal $a subclause in `try`"
             end
         end
-        args = Any[try_, catch_var, catch_]
+        empty!(args)
+        push!(args, try_, catch_var, catch_)
         if finally_ !== false || else_ !== false
             push!(args, finally_)
             if else_ !== false
@@ -389,13 +453,13 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
         return gen
     elseif k == K"filter"
         @assert length(args) == 2
-        args = _append_iterspec!(Any[args[2]], args[1])
+        retexpr.args = _append_iterspec!(Any[args[2]], args[1])
     elseif k == K"nrow" || k == K"ncat"
         # For lack of a better place, the dimension argument to nrow/ncat
         # is stored in the flags
-        pushfirst!(args, numeric_flags(flags(head)))
+        pushfirst!(args, numeric_flags(flags(nodehead)))
     elseif k == K"typed_ncat"
-        insert!(args, 2, numeric_flags(flags(head)))
+        insert!(args, 2, numeric_flags(flags(nodehead)))
     elseif k == K"elseif"
         # Block for conditional's source location
         args[1] = Expr(:block, loc, args[1])
@@ -406,8 +470,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
             # compatibility. We should consider deleting this special case in
             # the future as a minor change.
             if length(a1.args) == 1 &&
-                    (!has_flags(childheads[1], PARENS_FLAG) ||
-                     !has_flags(childheads[1], TRAILING_COMMA_FLAG)) &&
+                    (!has_flags(firstchildhead, PARENS_FLAG) ||
+                     !has_flags(firstchildhead, TRAILING_COMMA_FLAG)) &&
                     !Meta.isexpr(a1.args[1], :parameters)
                 # `(a) -> c` is parsed without tuple on lhs in Expr form
                 args[1] = a1.args[1]
@@ -419,7 +483,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
                 else
                     a111 = only(a11.args)
                     assgn = @isexpr(a111, :kw) ? Expr(:(=), a111.args...) : a111
-                    argloc = source_location(LineNumberNode, source, last(childranges[1]))
+                    argloc = source_location(LineNumberNode, source, last(firstchildrange))
                     args[1] = Expr(:block, a1.args[2], argloc, assgn)
                 end
             end
@@ -433,12 +497,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
         end
     elseif k == K"function"
         if length(args) > 1
-            if has_flags(head, SHORT_FORM_FUNCTION_FLAG)
+            if has_flags(nodehead, SHORT_FORM_FUNCTION_FLAG)
                 a2 = args[2]
                 if !@isexpr(a2, :block)
                     args[2] = Expr(:block, a2)
                 end
-                headsym = :(=)
+                retexpr.head = :(=)
             else
                 a1 = args[1]
                 if @isexpr(a1, :tuple)
@@ -451,31 +515,36 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
                     end
                 end
             end
-            pushfirst!((args[2]::Expr).args, loc)
+            arg2 = args[2]
+            # Only push if this is an Expr - could be an ErrorVal
+            isa(arg2, Expr) && pushfirst!(arg2.args, loc)
         end
     elseif k == K"macro"
         if length(args) > 1
             pushfirst!((args[2]::Expr).args, loc)
         end
     elseif k == K"module"
-        pushfirst!(args, !has_flags(head, BARE_MODULE_FLAG))
+        pushfirst!(args, !has_flags(nodehead, BARE_MODULE_FLAG))
         pushfirst!((args[3]::Expr).args, loc)
     elseif k == K"inert"
         return QuoteNode(only(args))
-    elseif k == K"quote" && length(args) == 1
-        a1 = only(args)
-        if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool)
-            # Flisp parser does an optimization here: simple values are stored
-            # as inert QuoteNode rather than in `Expr(:quote)` quasiquote
-            return QuoteNode(a1)
+    elseif k == K"quote"
+        if length(args) == 1
+            a1 = only(args)
+            if !(a1 isa Expr || a1 isa QuoteNode || a1 isa Bool)
+                # Flisp parser does an optimization here: simple values are stored
+                # as inert QuoteNode rather than in `Expr(:quote)` quasiquote
+                return QuoteNode(a1)
+            end
         end
     elseif k == K"do"
         # Temporary head which is picked up by _extract_do_lambda
-        headsym = :do_lambda
+        retexpr.head = :do_lambda
     elseif k == K"let"
         a1 = args[1]
         if @isexpr(a1, :block)
             a1a = (args[1]::Expr).args
+            filter!(a -> !(a isa LineNumberNode), a1a)
             # Ugly logic to strip the Expr(:block) in certain cases for compatibility
             if length(a1a) == 1
                 a = a1a[1]
@@ -489,17 +558,17 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
             a1 = args[1]
             if @isexpr(a1, :const)
                 # Normalize `local const` to `const local`
-                args[1] = Expr(headsym, (a1::Expr).args...)
-                headsym = :const
+                args[1] = Expr(retexpr.head, (a1::Expr).args...)
+                retexpr.head = :const
             elseif @isexpr(a1, :tuple)
                 # Normalize `global (x, y)` to `global x, y`
-                args = a1.args
+                retexpr.args = a1.args
             end
         end
     elseif k == K"return" && isempty(args)
         push!(args, nothing)
     elseif k == K"juxtapose"
-        headsym = :call
+        retexpr.head = :call
         pushfirst!(args, :*)
     elseif k == K"struct"
         @assert args[2].head == :block
@@ -515,9 +584,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
             end
         end
         args[2] = fields
-        pushfirst!(args, has_flags(head, MUTABLE_FLAG))
+        pushfirst!(args, has_flags(nodehead, MUTABLE_FLAG))
     elseif k == K"importpath"
-        headsym = :.
+        retexpr.head = :.
         for i = 1:length(args)
             ai = args[i]
             if ai isa QuoteNode
@@ -529,72 +598,41 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
     elseif k == K"wrapper"
         # This should only happen for errors wrapped next to what should have
         # been single statements or atoms - represent these as blocks.
-        headsym = :block
+        retexpr.head = :block
+    elseif k == K"comparison"
+        for i = 2:2:length(args)
+            arg = args[i]
+            if @isexpr(arg, :., 1)
+                args[i] = Symbol(".", arg.args[1])
+            end
+        end
     end
 
-    return Expr(headsym, args...)
-end
-
-
-# Stack entry for build_tree Expr conversion.
-# We'd use `Tuple{UnitRange{Int},SyntaxHead,Any}` instead, but that's an
-# abstract type due to the `Any` and tuple covariance which destroys
-# performance.
-struct _BuildExprStackEntry
-    srcrange::UnitRange{Int}
-    head::SyntaxHead
-    ex::Any
+    return retexpr
 end
 
 function build_tree(::Type{Expr}, stream::ParseStream;
                     filename=nothing, first_line=1, kws...)
     source = SourceFile(stream, filename=filename, first_line=first_line)
     txtbuf = unsafe_textbuf(stream)
-    args = Any[]
-    childranges = UnitRange{Int}[]
-    childheads = SyntaxHead[]
-    entry = build_tree(_BuildExprStackEntry, stream; kws...) do head, srcrange, nodechildren
-        if is_trivia(head) && !is_error(head)
-            return nothing
-        end
-        k = kind(head)
-        if isnothing(nodechildren)
-            ex = _leaf_to_Expr(source, txtbuf, 0, head, srcrange, nothing)
-        else
-            resize!(childranges, length(nodechildren))
-            resize!(childheads, length(nodechildren))
-            resize!(args, length(nodechildren))
-            for (i,c) in enumerate(nodechildren)
-                childranges[i] = c.srcrange
-                childheads[i] = c.head
-                args[i] = c.ex
-            end
-            ex = _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args)
-        end
-        return _BuildExprStackEntry(srcrange, head, ex)
-    end
-    loc = source_location(LineNumberNode, source, first(entry.srcrange))
-    only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[entry.ex]))
-end
-
-function _to_expr(node)
-    file = sourcefile(node)
-    if is_leaf(node)
-        txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(file))
-        return _leaf_to_Expr(file, txtbuf, txtbuf_offset, head(node), byte_range(node), node)
+    cursor = RedTreeCursor(stream)
+    wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS)
+    if has_toplevel_siblings(cursor)
+        entry = Expr(:block)
+        for child in
+                Iterators.filter(should_include_node, reverse_toplevel_siblings(cursor))
+            pushfirst!(entry.args, fixup_Expr_child(wrapper_head, node_to_expr(child, source, txtbuf), false))
+        end
+        length(entry.args) == 1 && (entry = only(entry.args))
+    else
+        entry = fixup_Expr_child(wrapper_head, node_to_expr(cursor, source, txtbuf), false)
     end
-    cs = children(node)
-    args = Any[_to_expr(c) for c in cs]
-    _internal_node_to_Expr(file, byte_range(node), head(node), byte_range.(cs), head.(cs), args)
-end
-
-function to_expr(node)
-    ex = _to_expr(node)
-    loc = source_location(LineNumberNode, node)
-    only(_fixup_Expr_children!(SyntaxHead(K"None",EMPTY_FLAGS), loc, Any[ex]))
+    return entry
 end
 
 function Base.Expr(node::SyntaxNode)
-    to_expr(node)
+    source = sourcefile(node)
+    txtbuf_offset, txtbuf = _unsafe_wrap_substring(sourcetext(source))
+    wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS)
+    return fixup_Expr_child(wrapper_head, node_to_expr(node, source, txtbuf, UInt32(txtbuf_offset)), false)
 end
-
diff --git a/src/green_tree.jl b/src/green_node.jl
similarity index 66%
rename from src/green_tree.jl
rename to src/green_node.jl
index 4164529a..e6a239fe 100644
--- a/src/green_tree.jl
+++ b/src/green_node.jl
@@ -1,24 +1,10 @@
 """
-    GreenNode(head, span)
-    GreenNode(head, children...)
+    struct GreenNode
 
-A "green tree" is a lossless syntax tree which overlays all the source text.
-The most basic properties of a green tree are that:
-
-* Nodes cover a contiguous span of bytes in the text
-* Sibling nodes are ordered in the same order as the text
-
-As implementation choices, we choose that:
-
-* Nodes are immutable and don't know their parents or absolute position, so can
-  be cached and reused
-* Nodes are homogeneously typed at the language level so they can be stored
-  concretely, with the `head` defining the node type. Normally this would
-  include a "syntax kind" enumeration, but it can also include flags and record
-  information the parser knew about the layout of the child nodes.
-* For simplicity and uniformity, leaf nodes cover a single token in the source.
-  This is like rust-analyzer, but different from Roslyn where leaves can
-  include syntax trivia.
+An explicit pointer-y representation of the green tree produced by the parser.
+See [`RawGreenNode`](@ref) for documentation on working with the implicit green
+tree directly. However, this representation is useful for introspection as it
+provides O(1) access to the children (as well as forward iteration).
 """
 struct GreenNode{Head}
     head::Head
@@ -132,10 +118,39 @@ function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractStr
     _show_green_node(io, node, "", 1, str, show_trivia)
 end
 
-function build_tree(::Type{GreenNode}, stream::ParseStream; kws...)
-    build_tree(GreenNode{SyntaxHead}, stream; kws...) do h, srcrange, cs
-        span = length(srcrange)
-        isnothing(cs) ? GreenNode(h, span) :
-                        GreenNode(h, span, collect(GreenNode{SyntaxHead}, cs))
+function GreenNode(cursor::GreenTreeCursor)
+    if is_leaf(cursor)
+        children = nothing
+    else
+        children = GreenNode{typeof(head(cursor))}[]
+        for child in reverse(cursor)
+            pushfirst!(children, GreenNode(child))
+        end
+    end
+    return GreenNode{typeof(head(cursor))}(head(cursor), span(cursor), children)
+end
+
+function build_tree(T::Type{GreenNode}, stream::ParseStream; kws...)
+    cursor = GreenTreeCursor(stream)
+    if treesize(cursor)+1 != length(stream.output)-1 # First output is a tombstone =
+        # There are multiple toplevel nodes, e.g. because we're using this
+        # to test a partial parse. Wrap everything in K"wrapper"
+        all_processed = 0
+        local cs
+        while true
+            c = GreenNode(cursor)
+            if !@isdefined(cs)
+                cs = [c]
+            else
+                pushfirst!(cs, c)
+            end
+            all_processed += treesize(cursor)+1
+            all_processed == length(stream.output)-1 && break
+            cursor = GreenTreeCursor(stream.output, length(stream.output) - all_processed)
+        end
+        @assert length(cs) != 1
+        return GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG), stream.next_byte-1, cs)
+    else
+        return GreenNode(cursor)
     end
 end
diff --git a/src/kinds.jl b/src/kinds.jl
index c7d27e35..9d8999c7 100644
--- a/src/kinds.jl
+++ b/src/kinds.jl
@@ -27,7 +27,7 @@ primitive type Kind 16 end
 const _kind_str_to_int = Dict{String,UInt16}()
 const _kind_int_to_str = Dict{UInt16,String}()
 const _kind_modules = Dict{Int,Union{Symbol,Module}}(
-    0=>:JuliaSyntax,
+    0=>nameof(@__MODULE__),
     1=>:JuliaLowering,
     2=>:JuliaSyntaxFormatter
 )
@@ -49,7 +49,7 @@ function Kind(s::AbstractString)
     Kind(i)
 end
 
-Base.string(x::Kind) = _kind_int_to_str[reinterpret(UInt16, x)]
+Base.string(x::Kind) = get(_kind_int_to_str, reinterpret(UInt16, x), "<error: unknown kind>")
 Base.print(io::IO, x::Kind) = print(io, string(x))
 
 Base.isless(x::Kind, y::Kind) = reinterpret(UInt16, x) < reinterpret(UInt16, y)
@@ -127,7 +127,7 @@ end
 """
     register_kinds!(mod, module_id, names)
 
-Register custom `Kind`s with the given `names`, belonging to a module `mod`. 
+Register custom `Kind`s with the given `names`, belonging to a module `mod`.
 `names` is an array of arbitrary strings.
 
 In order for kinds to be represented by a small number of bits, some nontrivial
diff --git a/src/parse_stream.jl b/src/parse_stream.jl
index 0c57c2a4..8ae51145 100644
--- a/src/parse_stream.jl
+++ b/src/parse_stream.jl
@@ -71,6 +71,11 @@ Set for K"module" when it's not bare (`module`, not `baremodule`)
 """
 const BARE_MODULE_FLAG = RawFlags(1<<5)
 
+"""
+Set for nodes that are non-terminals
+"""
+const NON_TERMINAL_FLAG = RawFlags(1<<7)
+
 # Flags holding the dimension of an nrow or other UInt8 not held in the source
 # TODO: Given this is only used for nrow/ncat, we could actually use all the flags?
 const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8)
@@ -282,25 +287,105 @@ preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace
 #-------------------------------------------------------------------------------
 
 """
-Range in the source text which will become a node in the tree. Can be either a
-token (leaf node of the tree) or an interior node, depending on how the
-start_mark compares to previous nodes.
-"""
-struct TaggedRange
-    head::SyntaxHead # Kind,flags
-    # The following field is used for one of two things:
-    # - For leaf nodes it's an index in the tokens array
-    # - For non-leaf nodes it points to the index of the first child
-    first_token::UInt32
-    last_token::UInt32
+    RawGreenNode(head::SyntaxHead, byte_span::UInt32, orig_kind::Kind) # Terminal
+    RawGreenNode(head::SyntaxHead, byte_span::UInt32, nchildren::UInt32) # Non-terminal
+
+A "green tree" is a lossless syntax tree which overlays all the source text.
+The most basic properties of a green tree are that:
+
+* Nodes cover a contiguous span of bytes in the text
+* Sibling nodes are ordered in the same order as the text
+
+As implementation choices, we choose that:
+
+* Nodes are immutable and don't know their parents or absolute position, so can
+  be cached and reused
+* Nodes are homogeneously typed at the language level so they can be stored
+  concretely, with the `head` defining the node type. Normally this would
+  include a "syntax kind" enumeration, but it can also include flags and record
+  information the parser knew about the layout of the child nodes.
+* For simplicity and uniformity, leaf nodes cover a single token in the source.
+  This is like rust-analyzer, but different from Roslyn where leaves can
+  include syntax trivia.
+* The parser produces a single buffer of `RawGreenNode` which encodes the tree.
+  There are higher level accessors, which make working with this tree easier.
+"""
+struct RawGreenNode
+    head::SyntaxHead                  # Kind,flags
+    byte_span::UInt32                 # Number of bytes covered by this range
+    # If NON_TERMINAL_FLAG is set, this is the total number of child nodes
+    # Otherwise this is a terminal node (i.e. a token) and this is orig_kind
+    node_span_or_orig_kind::UInt32
+
+    # Constructor for terminal nodes (tokens)
+    function RawGreenNode(head::SyntaxHead, byte_span::Integer, orig_kind::Kind)
+        @assert (flags(head) & NON_TERMINAL_FLAG) == 0
+        new(head, UInt32(byte_span), UInt32(reinterpret(UInt16, orig_kind)))
+    end
+
+    # Constructor for non-terminal nodes - automatically sets NON_TERMINAL_FLAG
+    function RawGreenNode(head::SyntaxHead, byte_span::Integer, node_span::Integer)
+        h = SyntaxHead(kind(head), flags(head) | NON_TERMINAL_FLAG)
+        new(h, UInt32(byte_span), UInt32(node_span))
+    end
+
+    global reset_node
+    function reset_node(node::RawGreenNode, kind, flags)
+        new(_reset_node_head(node, kind, flags),
+            getfield(node, :byte_span),
+            getfield(node, :node_span_or_orig_kind))
+    end
 end
 
-head(range::TaggedRange) = range.head
+function _reset_node_head(node, k, f)
+    if !isnothing(f)
+        f = RawFlags(f)
+        @assert (f & NON_TERMINAL_FLAG) == 0
+        f |= flags(node) & NON_TERMINAL_FLAG
+    else
+        f = flags(node)
+    end
+    h = SyntaxHead(isnothing(k) ? kind(node) : k, f)
+end
+
+Base.summary(node::RawGreenNode) = summary(node.head)
+function Base.show(io::IO, node::RawGreenNode)
+    print(io, summary(node), " (", node.byte_span, " bytes,")
+    if is_terminal(node)
+        print(io, " orig_kind=", node.orig_kind, ")")
+    else
+        print(io, " ", node.node_span, " children)")
+    end
+end
+
+function Base.getproperty(rgn::RawGreenNode, name::Symbol)
+    if name === :node_span
+        has_flags(rgn.head, NON_TERMINAL_FLAG) || return UInt32(0) # Leaf nodes have no children
+        return getfield(rgn, :node_span_or_orig_kind)
+    elseif name === :orig_kind
+        has_flags(rgn.head, NON_TERMINAL_FLAG) && error("Cannot access orig_kind for non-terminal node")
+        return Kind(getfield(rgn, :node_span_or_orig_kind))
+    end
+    getfield(rgn, name)
+end
+
+head(range::RawGreenNode) = range.head
+
+# Helper functions for unified output
+is_terminal(node::RawGreenNode) = !has_flags(node.head, NON_TERMINAL_FLAG)
+is_non_terminal(node::RawGreenNode) = has_flags(node.head, NON_TERMINAL_FLAG)
 
 #-------------------------------------------------------------------------------
 struct ParseStreamPosition
-    token_index::UInt32  # Index of last token in output
-    range_index::UInt32
+    """
+    The current position in the byte stream, i.e. the byte at `byte_index` is
+    the first byte of the next token to be parsed.
+    """
+    byte_index::UInt32
+    """
+    The total number of nodes (terminal + non-terminal) in the output so far.
+    """
+    node_index::UInt32
 end
 
 const NO_POSITION = ParseStreamPosition(0, 0)
@@ -349,10 +434,9 @@ mutable struct ParseStream
     lookahead_index::Int
     # Pool of stream positions for use as working space in parsing
     position_pool::Vector{Vector{ParseStreamPosition}}
-    # Buffer of finalized tokens
-    tokens::Vector{SyntaxToken}
-    # Parser output as an ordered sequence of ranges, parent nodes after children.
-    ranges::Vector{TaggedRange}
+    output::Vector{RawGreenNode}
+    # Current byte position in the output (the next byte to be written)
+    next_byte::Int
     # Parsing diagnostics (errors/warnings etc)
     diagnostics::Vector{Diagnostic}
     # Counter for number of peek()s we've done without making progress via a bump()
@@ -372,17 +456,16 @@ mutable struct ParseStream
         # numbers. This means we're inexact for old dev versions but that seems
         # like an acceptable tradeoff.
         ver = (version.major, version.minor)
-        # Initial sentinel token containing the first byte of the first real token.
-        sentinel = SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS),
-                               K"TOMBSTONE", false, next_byte)
+        # Initial sentinel node (covering all ignored bytes before the first token)
+        sentinel = RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), next_byte-1, K"TOMBSTONE")
         new(text_buf,
             text_root,
             lexer,
             Vector{SyntaxToken}(),
             1,
             Vector{Vector{ParseStreamPosition}}(),
-            SyntaxToken[sentinel],
-            Vector{TaggedRange}(),
+            RawGreenNode[sentinel],
+            next_byte,  # Initialize next_byte from the parameter
             Vector{Diagnostic}(),
             0,
             ver)
@@ -427,7 +510,7 @@ function ParseStream(io::IO; version=VERSION)
 end
 
 function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream)
-    println(io, "ParseStream at position $(_next_byte(stream))")
+    println(io, "ParseStream at position $(stream.next_byte)")
 end
 
 function show_diagnostics(io::IO, stream::ParseStream)
@@ -448,19 +531,11 @@ function release_positions(stream, positions)
 end
 
 #-------------------------------------------------------------------------------
-# Return true when a token was emitted last at stream position `pos`
+# Return true when a terminal (token) was emitted last at stream position `pos`
 function token_is_last(stream, pos)
-    return pos.range_index == 0 ||
-           pos.token_index > stream.ranges[pos.range_index].last_token
-end
-
-# Compute the first byte of a token at given index `i`
-function token_first_byte(stream, i)
-    stream.tokens[i-1].next_byte
-end
-
-function token_last_byte(stream::ParseStream, i)
-    stream.tokens[i].next_byte - 1
+    # In the unified structure, check if the node at pos is a terminal
+    return pos.node_index > 0 && pos.node_index <= length(stream.output) &&
+           is_terminal(stream.output[pos.node_index])
 end
 
 function lookahead_token_first_byte(stream, i)
@@ -507,7 +582,7 @@ end
 
 # Return the index of the next byte of the input
 function _next_byte(stream)
-    last(stream.tokens).next_byte
+    stream.next_byte
 end
 
 # Find the index of the next nontrivia token
@@ -571,7 +646,7 @@ end
 
 @noinline function _parser_stuck_error(stream)
     # Optimization: emit unlikely errors in a separate function
-    error("The parser seems stuck at byte $(_next_byte(stream))")
+    error("The parser seems stuck at byte $(stream.next_byte)")
 end
 
 """
@@ -644,18 +719,19 @@ Retroactively inspecting or modifying the parser's output can be confusing, so
 using this function should be avoided where possible.
 """
 function peek_behind(stream::ParseStream, pos::ParseStreamPosition)
-    if token_is_last(stream, pos) && pos.token_index > 0
-        t = stream.tokens[pos.token_index]
-        return (kind=kind(t),
-                flags=flags(t),
-                orig_kind=t.orig_kind,
-                is_leaf=true)
-    elseif !isempty(stream.ranges) && pos.range_index > 0
-        r = stream.ranges[pos.range_index]
-        return (kind=kind(r),
-                flags=flags(r),
-                orig_kind=K"None",
-                is_leaf=false)
+    if pos.node_index > 0 && pos.node_index <= length(stream.output)
+        node = stream.output[pos.node_index]
+        if is_terminal(node)
+            return (kind=kind(node),
+                    flags=flags(node),
+                    orig_kind=node.orig_kind,
+                    is_leaf=true)
+        else
+            return (kind=kind(node),
+                    flags=flags(node),
+                    orig_kind=K"None",
+                    is_leaf=false)
+        end
     else
         return (kind=K"None",
                 flags=EMPTY_FLAGS,
@@ -664,70 +740,57 @@ function peek_behind(stream::ParseStream, pos::ParseStreamPosition)
     end
 end
 
+"""
+    first_child_position(stream::ParseStream, pos::ParseStreamPosition)
+
+Find the first non-trivia child of this node (in the GreenTree/RedTree sense) and return
+its position.
+"""
 function first_child_position(stream::ParseStream, pos::ParseStreamPosition)
-    ranges = stream.ranges
-    @assert pos.range_index > 0
-    parent = ranges[pos.range_index]
-    # Find the first nontrivia range which is a child of this range but not a
-    # child of the child
-    c = 0
-    for i = pos.range_index-1:-1:1
-        if ranges[i].first_token < parent.first_token
-            break
-        end
-        if (c == 0 || ranges[i].first_token < ranges[c].first_token) && !is_trivia(ranges[i])
-            c = i
-        end
+    output = stream.output
+    @assert pos.node_index > 0
+    cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-UInt32(1))
+    candidate = nothing
+    for child in reverse(cursor)
+        is_trivia(child) && continue
+        candidate = child
     end
 
-    # Find first nontrivia token
-    t = 0
-    for i = parent.first_token:parent.last_token
-        if !is_trivia(stream.tokens[i])
-            t = i
-            break
+    candidate !== nothing && return ParseStreamPosition(candidate.byte_end+UInt32(1), candidate.green.position)
+
+    # No children found - return the first non-trivia *token* (even if it
+    # is the child of a non-terminal trivia node (e.g. an error)).
+    byte_end = pos.byte_index
+    for i in pos.node_index-1:-1:(pos.node_index - treesize(cursor))
+        node = output[i]
+        if is_terminal(node)
+            if !is_trivia(node)
+                return ParseStreamPosition(byte_end, i)
+            end
+            byte_end -= node.byte_span
         end
     end
 
-    if c == 0 || (t != 0 && ranges[c].first_token > t)
-        # Return leaf node at `t`
-        return ParseStreamPosition(t, 0)
-    else
-        # Return interior node at `c`
-        return ParseStreamPosition(ranges[c].last_token, c)
-    end
+    # Still none found. Return a sentinel value
+    return ParseStreamPosition(0, 0)
 end
 
-function last_child_position(stream::ParseStream, pos::ParseStreamPosition)
-    ranges = stream.ranges
-    @assert pos.range_index > 0
-    parent = ranges[pos.range_index]
-    # Find the last nontrivia range which is a child of this range
-    c = 0
-    if pos.range_index > 1
-        i = pos.range_index-1
-        if ranges[i].first_token >= parent.first_token
-            # Valid child of current range
-            c = i
-        end
-    end
-
-    # Find last nontrivia token
-    t = 0
-    for i = parent.last_token:-1:parent.first_token
-        if !is_trivia(stream.tokens[i])
-            t = i
-            break
-        end
-    end
+"""
+        first_child_position(stream::ParseStream, pos::ParseStreamPosition)
 
-    if c == 0 || (t != 0 && ranges[c].last_token < t)
-        # Return leaf node at `t`
-        return ParseStreamPosition(t, 0)
-    else
-        # Return interior node at `c`
-        return ParseStreamPosition(ranges[c].last_token, c)
+    Find the last non-trivia child of this node (in the GreenTree/RedTree sense) and
+    return its position (i.e. the position as if that child had been the last thing parsed).
+"""
+function last_child_position(stream::ParseStream, pos::ParseStreamPosition)
+    output = stream.output
+    @assert pos.node_index > 0
+    cursor = RedTreeCursor(GreenTreeCursor(output, pos.node_index), pos.byte_index-1)
+    candidate = nothing
+    for child in reverse(cursor)
+        is_trivia(child) && continue
+        return ParseStreamPosition(child.byte_end+UInt32(1), child.green.position)
     end
+    return ParseStreamPosition(0, 0)
 end
 
 # Get last position in stream "of interest", skipping
@@ -736,24 +799,34 @@ end
 # * whitespace (if skip_trivia=true)
 function peek_behind_pos(stream::ParseStream; skip_trivia::Bool=true,
                          skip_parens::Bool=true)
-    token_index = lastindex(stream.tokens)
-    range_index = lastindex(stream.ranges)
+    # Work backwards through the output
+    node_idx = length(stream.output)
+    byte_idx = stream.next_byte
+
+    # Skip parens nodes if requested
     if skip_parens
-        while range_index >= firstindex(stream.ranges) &&
-                kind(stream.ranges[range_index]) == K"parens"
-            range_index -= 1
+        while node_idx > 0
+            node = stream.output[node_idx]
+            if is_non_terminal(node) && kind(node) == K"parens"
+                node_idx -= 1
+            else
+                break
+            end
         end
     end
-    last_token_in_nonterminal = range_index == 0 ? 0 :
-                                stream.ranges[range_index].last_token
-    while token_index > last_token_in_nonterminal
-        t = stream.tokens[token_index]
-        if kind(t) != K"TOMBSTONE" && (!skip_trivia || !is_trivia(t))
+
+    # Skip trivia if requested
+    while node_idx > 0
+        node = stream.output[node_idx]
+        if kind(node) == K"TOMBSTONE" || (skip_trivia && is_trivia(node))
+            node_idx -= 1
+            byte_idx -= node.byte_span
+        else
             break
         end
-        token_index -= 1
     end
-    return ParseStreamPosition(token_index, range_index)
+
+    return ParseStreamPosition(byte_idx, node_idx)
 end
 
 function peek_behind(stream::ParseStream; kws...)
@@ -767,7 +840,7 @@ end
 
 # Bump up until the `n`th token
 # flags and remap_kind are applied to any non-trivia tokens
-function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None")
+function _bump_until_n(stream::ParseStream, n::Integer, new_flags, remap_kind=K"None")
     if n < stream.lookahead_index
         return
     end
@@ -777,13 +850,28 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None
         if k == K"EndMarker"
             break
         end
-        f = flags | (@__MODULE__).flags(tok)
+        f = new_flags | flags(tok)
         is_trivia = is_whitespace(k)
         is_trivia && (f |= TRIVIA_FLAG)
         outk = (is_trivia || remap_kind == K"None") ? k : remap_kind
         h = SyntaxHead(outk, f)
-        push!(stream.tokens,
-              SyntaxToken(h, kind(tok), tok.preceding_whitespace, tok.next_byte))
+
+        # Calculate byte span for this token
+        if i == stream.lookahead_index
+            # First token in this batch - calculate span from current stream position
+            prev_byte = stream.next_byte
+        else
+            # Subsequent tokens - use previous token's next_byte
+            prev_byte = stream.lookahead[i-1].next_byte
+        end
+        byte_span = Int(tok.next_byte) - Int(prev_byte)
+
+        # Create terminal RawGreenNode
+        node = RawGreenNode(h, byte_span, kind(tok))
+        push!(stream.output, node)
+
+        # Update next_byte
+        stream.next_byte += byte_span
     end
     stream.lookahead_index = n + 1
     # Defuse the time bomb
@@ -838,9 +926,12 @@ example, `2x` means `2*x` via the juxtaposition rules.
 """
 function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS;
                         error=nothing)
-    b = _next_byte(stream)
+    b = stream.next_byte
     h = SyntaxHead(kind, flags)
-    push!(stream.tokens, SyntaxToken(h, (@__MODULE__).kind(h), false, b))
+    # Zero-width token
+    node = RawGreenNode(h, 0, kind)
+    push!(stream.output, node)
+    # No need to update next_byte for zero-width token
     if !isnothing(error)
         emit_diagnostic(stream, b:b-1, error=error)
     end
@@ -858,8 +949,14 @@ whitespace if necessary with bump_trivia.
 function bump_glue(stream::ParseStream, kind, flags)
     i = stream.lookahead_index
     h = SyntaxHead(kind, flags)
-    push!(stream.tokens, SyntaxToken(h, kind, false,
-                                     stream.lookahead[i+1].next_byte))
+    # Calculate byte span for glued tokens
+    start_byte = stream.next_byte
+    end_byte = stream.lookahead[i+1].next_byte
+    byte_span = end_byte - start_byte
+
+    node = RawGreenNode(h, byte_span, kind)
+    push!(stream.output, node)
+    stream.next_byte += byte_span
     stream.lookahead_index += 2
     stream.peek_count = 0
     return position(stream)
@@ -887,24 +984,23 @@ simpler one which only splits preceding dots?
 function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N}
     tok = stream.lookahead[stream.lookahead_index]
     stream.lookahead_index += 1
-    b = _next_byte(stream)
-    toklen = tok.next_byte - b
+    start_b = _next_byte(stream)
+    toklen = tok.next_byte - start_b
+    prev_b = start_b
     for (i, (nbyte, k, f)) in enumerate(split_spec)
         h = SyntaxHead(k, f)
-        b += nbyte < 0 ? (toklen + nbyte) : nbyte
+        actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte
         orig_k = k == K"." ? K"." : kind(tok)
-        push!(stream.tokens, SyntaxToken(h, orig_k, false, b))
+        node = RawGreenNode(h, actual_nbyte, orig_k)
+        push!(stream.output, node)
+        prev_b += actual_nbyte
+        stream.next_byte += actual_nbyte
     end
-    @assert tok.next_byte == b
+    @assert tok.next_byte == prev_b
     stream.peek_count = 0
     return position(stream)
 end
 
-function _reset_node_head(x, k, f)
-    h = SyntaxHead(isnothing(k) ? kind(x)  : k,
-                   isnothing(f) ? flags(x) : f)
-end
-
 """
 Reset kind or flags of an existing node in the output stream
 
@@ -915,17 +1011,8 @@ in those cases.
 """
 function reset_node!(stream::ParseStream, pos::ParseStreamPosition;
                      kind=nothing, flags=nothing)
-    if token_is_last(stream, pos)
-        t = stream.tokens[pos.token_index]
-        stream.tokens[pos.token_index] =
-            SyntaxToken(_reset_node_head(t, kind, flags),
-                        t.orig_kind, t.preceding_whitespace, t.next_byte)
-    else
-        r = stream.ranges[pos.range_index]
-        stream.ranges[pos.range_index] =
-            TaggedRange(_reset_node_head(r, kind, flags),
-                        r.first_token, r.last_token)
-    end
+    node = stream.output[pos.node_index]
+    stream.output[pos.node_index] = reset_node(node, kind, flags)
 end
 
 """
@@ -937,45 +1024,57 @@ Hack alert! This is used only for managing the complicated rules related to
 dedenting triple quoted strings.
 """
 function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numbytes)
-    i = pos.token_index
-    t1 = stream.tokens[i]
-    t2 = stream.tokens[i+1]
+    i = pos.node_index
+    t1 = stream.output[i]
+    t2 = stream.output[i+1]
+    @assert is_terminal(t1) && is_terminal(t2)
 
-    t1_next_byte = t1.next_byte + numbytes
-    stream.tokens[i] = SyntaxToken(t1.head, t1.orig_kind,
-                                   t1.preceding_whitespace, t1_next_byte)
+    stream.output[i] = RawGreenNode(t1.head, t1.byte_span + numbytes,
+                                    t1.orig_kind)
 
-    t2_is_empty = t1_next_byte == t2.next_byte
+    t2_is_empty = t2.byte_span == numbytes
     head2 = t2_is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t2.head
-    stream.tokens[i+1] = SyntaxToken(head2, t2.orig_kind,
-                                     t2.preceding_whitespace, t2.next_byte)
+    stream.output[i+1] = RawGreenNode(head2, t2.byte_span - numbytes,
+                                      t2.orig_kind)
     return t2_is_empty
 end
 
 # Get position of last item emitted into the output stream
 function Base.position(stream::ParseStream)
-    ParseStreamPosition(lastindex(stream.tokens), lastindex(stream.ranges))
+    byte_idx = stream.next_byte
+    node_idx = length(stream.output)
+
+    ParseStreamPosition(byte_idx, node_idx)
 end
 
 """
     emit(stream, mark, kind, flags = EMPTY_FLAGS; error=nothing)
 
-Emit a new text span into the output which covers source bytes from `mark` to
+Emit a new non-terminal node into the output which covers source bytes from `mark` to
 the end of the most recent token which was `bump()`'ed. The starting `mark`
-should be a previous return value of `position()`.
+should be a previous return value of `position()`. The emitted node will have
+its `node_span` set to the number of nodes emitted since `mark`.
 """
 function emit(stream::ParseStream, mark::ParseStreamPosition, kind::Kind,
               flags::RawFlags = EMPTY_FLAGS; error=nothing)
-    first_token = mark.token_index + 1
-    range = TaggedRange(SyntaxHead(kind, flags), first_token, length(stream.tokens))
+    # Calculate byte span from mark position to current
+    mark_byte = mark.byte_index
+    current_byte = stream.next_byte
+    byte_span = current_byte - mark_byte
+
+    # Calculate node span (number of children, exclusive of the node itself)
+    node_span = length(stream.output) - mark.node_index
+
+    # Create non-terminal RawGreenNode
+    node = RawGreenNode(SyntaxHead(kind, flags), byte_span, node_span)
+
     if !isnothing(error)
-        # The first child must be a leaf, otherwise ranges would be improperly
-        # nested.
-        fbyte = token_first_byte(stream, first_token)
-        lbyte = token_last_byte(stream, lastindex(stream.tokens))
-        emit_diagnostic(stream, fbyte:lbyte, error=error)
+        emit_diagnostic(stream, mark_byte:current_byte-1, error=error)
     end
-    push!(stream.ranges, range)
+
+    push!(stream.output, node)
+    # Note: emit() for non-terminals doesn't advance next_byte
+    # because it's a range over already-emitted tokens
     return position(stream)
 end
 
@@ -1008,25 +1107,21 @@ function emit_diagnostic(stream::ParseStream; whitespace=false, kws...)
 end
 
 function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition; trim_whitespace=true, kws...)
-    i = mark.token_index
-    j = lastindex(stream.tokens)
+    # Find the byte range from mark to current position
+    start_byte = mark.byte_index
+    end_byte = stream.next_byte - 1
+
     if trim_whitespace
-        while i < j && is_whitespace(stream.tokens[j])
-            j -= 1
-        end
-        while i+1 < j && is_whitespace(stream.tokens[i+1])
-            i += 1
-        end
+        # TODO: Implement whitespace trimming for unified output
+        # This would require scanning the output array
     end
-    byterange = stream.tokens[i].next_byte:stream.tokens[j].next_byte-1
-    emit_diagnostic(stream, byterange; kws...)
+
+    emit_diagnostic(stream, start_byte:end_byte; kws...)
 end
 
 function emit_diagnostic(stream::ParseStream, mark::ParseStreamPosition,
                          end_mark::ParseStreamPosition; kws...)
-    fbyte = stream.tokens[mark.token_index].next_byte
-    lbyte = stream.tokens[end_mark.token_index].next_byte-1
-    emit_diagnostic(stream, fbyte:lbyte; kws...)
+    emit_diagnostic(stream, mark.byte_index:end_mark.byte_index-1; kws...)
 end
 
 function emit_diagnostic(diagnostics::AbstractVector{Diagnostic},
@@ -1039,15 +1134,21 @@ end
 
 function validate_tokens(stream::ParseStream)
     txtbuf = unsafe_textbuf(stream)
-    toks = stream.tokens
     charbuf = IOBuffer()
-    for i = 2:length(toks)
-        t = toks[i]
-        k = kind(t)
-        fbyte = toks[i-1].next_byte
-        nbyte = t.next_byte
+
+    # Process terminal nodes in the output
+    fbyte = stream.output[1].byte_span+1  # Start after sentinel
+    for i = 2:length(stream.output)
+        node = stream.output[i]
+        if !is_terminal(node) || kind(node) == K"TOMBSTONE"
+            continue
+        end
+
+        k = kind(node)
+        nbyte = fbyte + node.byte_span
         tokrange = fbyte:nbyte-1
         error_kind = K"None"
+
         if k in KSet"Integer BinInt OctInt HexInt"
             # The following shouldn't be able to error...
             # parse_int_literal
@@ -1090,7 +1191,7 @@ function validate_tokens(stream::ParseStream)
                                     error="character literal contains multiple characters")
                 end
             end
-        elseif k == K"String" && !has_flags(t, RAW_STRING_FLAG)
+        elseif k == K"String" && !has_flags(node, RAW_STRING_FLAG)
             had_error = unescape_julia_string(devnull, txtbuf, fbyte,
                                               nbyte, stream.diagnostics)
             if had_error
@@ -1108,11 +1209,14 @@ function validate_tokens(stream::ParseStream)
             end
             emit_diagnostic(stream, tokrange, error=msg)
         end
+
         if error_kind != K"None"
-            toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS),
-                                  t.orig_kind, t.preceding_whitespace,
-                                  t.next_byte)
+            # Update the node with new error kind
+            stream.output[i] = RawGreenNode(SyntaxHead(error_kind, EMPTY_FLAGS),
+                                          node.byte_span, node.orig_kind)
         end
+
+        fbyte = nbyte
     end
     sort!(stream.diagnostics, by=first_byte)
 end
@@ -1121,89 +1225,6 @@ end
 
 # API for extracting results from ParseStream
 
-"""
-    build_tree(make_node::Function, ::Type{StackEntry}, stream::ParseStream; kws...)
-
-Construct a tree from a ParseStream using depth-first traversal. `make_node`
-must have the signature
-
-    make_node(head::SyntaxHead, span::Integer, children)
-
-where `children` is either `nothing` for leaf nodes or an iterable of the
-children of type `StackEntry` for internal nodes. `StackEntry` may be a node
-type, but also may include other information required during building the tree.
-
-If the ParseStream has multiple nodes at the top level, `K"wrapper"` is used to
-wrap them in a single node.
-
-The tree here is constructed depth-first in postorder.
-"""
-function build_tree(make_node::Function, ::Type{NodeType}, stream::ParseStream;
-                    kws...) where NodeType
-    stack = Vector{NamedTuple{(:first_token,:node),Tuple{Int,NodeType}}}()
-
-    tokens = stream.tokens
-    ranges = stream.ranges
-    i = firstindex(tokens)
-    j = firstindex(ranges)
-    while true
-        last_token = j <= lastindex(ranges) ?
-                     ranges[j].last_token : lastindex(tokens)
-        # Process tokens to leaf nodes for all tokens used by the next internal node
-        while i <= last_token
-            t = tokens[i]
-            if kind(t) == K"TOMBSTONE"
-                i += 1
-                continue # Ignore removed tokens
-            end
-            srcrange = (stream.tokens[i-1].next_byte:
-                        stream.tokens[i].next_byte - 1)
-            h = head(t)
-            node = make_node(h, srcrange, nothing)
-            if !isnothing(node)
-                push!(stack, (first_token=i, node=node))
-            end
-            i += 1
-        end
-        if j > lastindex(ranges)
-            break
-        end
-        # Process internal nodes which end at the current position
-        while j <= lastindex(ranges)
-            r = ranges[j]
-            if r.last_token != last_token
-                break
-            end
-            if kind(r) == K"TOMBSTONE"
-                j += 1
-                continue
-            end
-            # Collect children from the stack for this internal node
-            k = length(stack) + 1
-            while k > 1 && r.first_token <= stack[k-1].first_token
-                k -= 1
-            end
-            srcrange = (stream.tokens[r.first_token-1].next_byte:
-                        stream.tokens[r.last_token].next_byte - 1)
-            children = (stack[n].node for n = k:length(stack))
-            node = make_node(head(r), srcrange, children)
-            resize!(stack, k-1)
-            if !isnothing(node)
-                push!(stack, (first_token=r.first_token, node=node))
-            end
-            j += 1
-        end
-    end
-    if length(stack) == 1
-        return only(stack).node
-    else
-        srcrange = (stream.tokens[1].next_byte:
-                    stream.tokens[end].next_byte - 1)
-        children = (x.node for x in stack)
-        return make_node(SyntaxHead(K"wrapper", EMPTY_FLAGS), srcrange, children)
-    end
-end
-
 function sourcetext(stream::ParseStream; steal_textbuf=false)
     Base.depwarn("Use of `sourcetext(::ParseStream)` is deprecated. Use `SourceFile(stream)` instead", :sourcetext)
     root = stream.text_root
@@ -1253,27 +1274,34 @@ Return the `Vector{UInt8}` text buffer being parsed by this `ParseStream`.
 """
 unsafe_textbuf(stream) = stream.textbuf
 
-first_byte(stream::ParseStream) = first(stream.tokens).next_byte # Use sentinel token
-last_byte(stream::ParseStream) = _next_byte(stream)-1
+first_byte(stream::ParseStream) = first(stream.output).byte_span + 1 # After sentinel
+last_byte(stream::ParseStream) = stream.next_byte - 1
 any_error(stream::ParseStream) = any_error(stream.diagnostics)
 
 # Return last non-whitespace byte which was parsed
 function last_non_whitespace_byte(stream::ParseStream)
-    for i = length(stream.tokens):-1:1
-        tok = stream.tokens[i]
-        if !(kind(tok) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment")
-            return tok.next_byte - 1
+    byte_pos = stream.next_byte
+    for i = length(stream.output):-1:1
+        node = stream.output[i]
+        if is_terminal(node)
+            if !(kind(node) in KSet"Comment Whitespace NewlineWs ErrorEofMultiComment")
+                return byte_pos - 1
+            end
+            byte_pos -= node.byte_span
         end
     end
     return first_byte(stream) - 1
 end
 
 function Base.empty!(stream::ParseStream)
-    t = last(stream.tokens)
-    empty!(stream.tokens)
-    # Restore sentinel token
-    push!(stream.tokens, SyntaxToken(SyntaxHead(K"TOMBSTONE",EMPTY_FLAGS),
-                                     K"TOMBSTONE", t.preceding_whitespace,
-                                     t.next_byte))
-    empty!(stream.ranges)
+    # Keep only the sentinel
+    if !isempty(stream.output) && kind(stream.output[1]) == K"TOMBSTONE"
+        resize!(stream.output, 1)
+    else
+        empty!(stream.output)
+        # Restore sentinel node
+        push!(stream.output, RawGreenNode(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS), 0, K"TOMBSTONE"))
+    end
+    # Reset next_byte to initial position
+    stream.next_byte = 1
 end
diff --git a/src/parser.jl b/src/parser.jl
index d1a91478..d593fe0b 100644
--- a/src/parser.jl
+++ b/src/parser.jl
@@ -340,6 +340,8 @@ function bump_dotsplit(ps, flags=EMPTY_FLAGS;
         bump_trivia(ps)
         mark = position(ps)
         k = remap_kind != K"None" ? remap_kind : kind(t)
+        # Split the dotted operator into . and the operator
+        # First split emits the . token (1 byte) at position mark.node_index+1
         pos = bump_split(ps, (1, K".", TRIVIA_FLAG), (-1, k, flags))
         if emit_dot_node
             pos = emit(ps, mark, K".")
diff --git a/src/parser_api.jl b/src/parser_api.jl
index 7931ef31..a3e2162b 100644
--- a/src/parser_api.jl
+++ b/src/parser_api.jl
@@ -190,13 +190,15 @@ emitted as `K"Identifier"` (the default) or as `K"+"`.
 function tokenize(text; operators_as_identifiers=true)
     ps = ParseStream(text)
     parse!(ps, rule=:all)
-    ts = ps.tokens
+    ts = ps.output
     output_tokens = Token[]
+    byte_start::UInt32 = ps.output[1].byte_span + 1
     for i = 2:length(ts)
-        if kind(ts[i]) == K"TOMBSTONE"
+        if kind(ts[i]) == K"TOMBSTONE" || is_non_terminal(ts[i])
             continue
         end
-        r = ts[i-1].next_byte:ts[i].next_byte-1
+        r = byte_start:(byte_start+ts[i].byte_span - 1)
+        byte_start = last(r) + 1
         k = kind(ts[i])
         if k == K"Identifier" && !operators_as_identifiers
             orig_k = ts[i].orig_kind
diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl
index edc864e0..ba9ca3ee 100644
--- a/src/syntax_tree.jl
+++ b/src/syntax_tree.jl
@@ -56,19 +56,19 @@ const AbstractSyntaxNode = TreeNode{<:AbstractSyntaxData}
 
 struct SyntaxData <: AbstractSyntaxData
     source::SourceFile
-    raw::GreenNode{SyntaxHead}
-    position::Int
+    raw::RawGreenNode
+    byte_end::UInt32
     val::Any
 end
 
 Base.hash(data::SyntaxData, h::UInt) =
-    hash(data.source, hash(data.raw, hash(data.position,
+    hash(data.source, hash(data.raw, hash(data.byte_end,
         # Avoid dynamic dispatch:
         # This does not support custom `hash` implementation that may be defined for `typeof(data.val)`,
         # However, such custom user types should not generally appear in the AST.
         Core.invoke(hash, Tuple{Any,UInt}, data.val, h))))
 function Base.:(==)(a::SyntaxData, b::SyntaxData)
-    a.source == b.source && a.raw == b.raw && a.position == b.position && a.val === b.val
+    a.source == b.source && a.raw == b.raw && a.byte_end == b.byte_end && a.val === b.val
 end
 
 """
@@ -80,41 +80,38 @@ text by calling one of the parser API functions such as [`parseall`](@ref)
 """
 const SyntaxNode = TreeNode{SyntaxData}
 
-function SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead};
-                    keep_parens=false, position::Integer=1)
+function SyntaxNode(source::SourceFile, cursor::RedTreeCursor;
+                    keep_parens=false)
     GC.@preserve source begin
         raw_offset, txtbuf = _unsafe_wrap_substring(source.code)
         offset = raw_offset - source.byte_offset
-        _to_SyntaxNode(source, txtbuf, offset, raw, convert(Int, position), keep_parens)
+        _to_SyntaxNode(source, txtbuf, offset, cursor, keep_parens)
     end
 end
 
+should_include_node(child) = !is_trivia(child) || is_error(child)
+
 function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int,
-                        raw::GreenNode{SyntaxHead},
-                        position::Int, keep_parens::Bool)
-    if is_leaf(raw)
+                        cursor::RedTreeCursor, keep_parens::Bool)
+    if is_leaf(cursor)
         # Here we parse the values eagerly rather than representing them as
         # strings. Maybe this is good. Maybe not.
-        valrange = position:position + span(raw) - 1
-        val = parse_julia_literal(txtbuf, head(raw), valrange .+ offset)
-        return SyntaxNode(nothing, nothing, SyntaxData(source, raw, position, val))
+        valrange = byte_range(cursor)
+        val = parse_julia_literal(txtbuf, head(cursor), valrange .+ offset)
+        return SyntaxNode(nothing, nothing, SyntaxData(source, this(cursor.green), cursor.byte_end, val))
     else
         cs = SyntaxNode[]
         pos = position
-        for (i,rawchild) in enumerate(children(raw))
+        for child in reverse(cursor)
             # FIXME: Allowing trivia is_error nodes here corrupts the tree layout.
-            if !is_trivia(rawchild) || is_error(rawchild)
-                push!(cs, _to_SyntaxNode(source, txtbuf, offset, rawchild, pos, keep_parens))
+            if should_include_node(child)
+                pushfirst!(cs, _to_SyntaxNode(source, txtbuf, offset, child, keep_parens))
             end
-            pos += Int(rawchild.span)
-        end
-        if !keep_parens && kind(raw) == K"parens" && length(cs) == 1
-            return cs[1]
         end
-        if kind(raw) == K"wrapper" && length(cs) == 1
+        if !keep_parens && kind(cursor) == K"parens" && length(cs) == 1
             return cs[1]
         end
-        node = SyntaxNode(nothing, cs, SyntaxData(source, raw, position, nothing))
+        node = SyntaxNode(nothing, cs, SyntaxData(source, this(cursor.green), cursor.byte_end, nothing))
         for c in cs
             c.parent = node
         end
@@ -162,9 +159,12 @@ structure.
 """
 head(node::AbstractSyntaxNode) = head(node.raw)
 
-span(node::AbstractSyntaxNode) = span(node.raw)
+span(node::AbstractSyntaxNode) = node.raw.byte_span
 
-byte_range(node::AbstractSyntaxNode) = node.position:(node.position + span(node) - 1)
+byte_range(node::AbstractSyntaxNode) = (node.byte_end - span(node) + 1):node.byte_end
+
+first_byte(node::AbstractSyntaxNode) = first(byte_range(node))
+last_byte(node::AbstractSyntaxNode) = last(byte_range(node))
 
 sourcefile(node::AbstractSyntaxNode) = node.source
 
@@ -271,13 +271,31 @@ function Base.copy(node::TreeNode)
 end
 
 # shallow-copy the data
-Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.position, data.val)
+Base.copy(data::SyntaxData) = SyntaxData(data.source, data.raw, data.byte_end, data.val)
 
 function build_tree(::Type{SyntaxNode}, stream::ParseStream;
                     filename=nothing, first_line=1, keep_parens=false, kws...)
-    green_tree = build_tree(GreenNode, stream; kws...)
     source = SourceFile(stream, filename=filename, first_line=first_line)
-    SyntaxNode(source, green_tree, position=first_byte(stream), keep_parens=keep_parens)
+    cursor = RedTreeCursor(stream)
+    if has_toplevel_siblings(cursor)
+        # There are multiple toplevel nodes, e.g. because we're using this
+        # to test a partial parse. Wrap everything in K"wrapper"
+        cs = SyntaxNode[]
+        for child in
+                Iterators.filter(should_include_node, reverse_toplevel_siblings(cursor))
+            pushfirst!(cs, SyntaxNode(source, child, keep_parens=keep_parens))
+        end
+        length(cs) == 1 && return only(cs)
+        node = SyntaxNode(nothing, cs, SyntaxData(source,
+            RawGreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG),
+            stream.next_byte-1, length(stream.output)-1), stream.next_byte-1, nothing))
+        for c in cs
+            c.parent = node
+        end
+        return node
+    else
+        return SyntaxNode(source, cursor, keep_parens=keep_parens)
+    end
 end
 
 @deprecate haschildren(x) !is_leaf(x) false
diff --git a/src/tree_cursors.jl b/src/tree_cursors.jl
new file mode 100644
index 00000000..9b0e8c65
--- /dev/null
+++ b/src/tree_cursors.jl
@@ -0,0 +1,145 @@
+using Base.Iterators: Reverse
+
+"""
+    prev_sibling_assumed(cursor::GreenTreeCursor)::Union{Nothing, GreenTreeCursor}
+    prev_sibling_assumed(cursor::RedTreeCursor)::Union{Nothing, RedTreeCursor}
+
+Gives the previous sibling of the current node, but makes the assumption that
+there is one or that we are at the top level.
+Without knowing the parent, we cannot otherwise know which the last sibling is,
+unless we are at the top level in which case `nothing` is returned.
+"""
+function prev_sibling_assumed end
+
+"""
+    GreenTreeCursor
+
+Represents a cursors into a ParseStream output buffer that makes it easy to
+work with the green tree representation.
+"""
+struct GreenTreeCursor
+    parser_output::Vector{RawGreenNode}
+    position::UInt32
+end
+GreenTreeCursor(stream::ParseStream) = GreenTreeCursor(stream.output, length(stream.output))
+this(node::GreenTreeCursor) = node.parser_output[node.position]
+
+const SENTINEL_INDEX = UInt32(1)
+function prev_sibling_assumed(cursor::GreenTreeCursor)
+    next_idx = cursor.position - this(cursor).node_span - UInt32(1)
+    next_idx == SENTINEL_INDEX && return nothing
+    GreenTreeCursor(cursor.parser_output, next_idx)
+end
+
+# Debug printing
+function Base.show(io::IO, node::GreenTreeCursor)
+    print(io, Base.summary(this(node)), " @", node.position)
+end
+
+# Reverse iterator interface
+Base.reverse(node::GreenTreeCursor) = Base.Iterators.Reverse(node)
+Base.IteratorSize(::Type{Reverse{GreenTreeCursor}}) = Base.SizeUnknown()
+@inline function Base.iterate(node::Reverse{GreenTreeCursor},
+                              next_idx::UInt32 = node.itr.position-UInt32(1))::Union{Nothing, Tuple{GreenTreeCursor, UInt32}}
+    node = node.itr
+    while true
+        next_idx == node.position - this(node).node_span - UInt32(1) && return nothing
+        next_node = GreenTreeCursor(node.parser_output, next_idx)
+        if kind(next_node) == K"TOMBSTONE"
+            # TOMBSTONED nodes are counted as part of the size of the tree, but
+            # do not contribute either byte ranges or children.
+            next_idx -= UInt32(1)
+            continue
+        end
+        # Inlined prev_sibling_assumed
+        new_next_idx = next_idx - this(next_node).node_span - UInt32(1)
+        return (next_node, new_next_idx)
+    end
+end
+
+# Accessors / predicates
+is_leaf(node::GreenTreeCursor)     = !is_non_terminal(this(node))
+head(node::GreenTreeCursor)        = this(node).head
+treesize(node::GreenTreeCursor)    = this(node).node_span
+is_non_terminal(node::GreenTreeCursor) = is_non_terminal(this(node))
+
+"""
+    span(node)
+
+Get the number of bytes this node covers in the source text.
+"""
+span(node::GreenTreeCursor) = this(node).byte_span
+
+"""
+    RedTreeCursor
+
+Wraps a `GreenTreeCursor` to keep track of the absolute position of the node
+in the original source text.
+"""
+struct RedTreeCursor
+    green::GreenTreeCursor
+    # The last byte that is still part of the node
+    byte_end::UInt32
+end
+RedTreeCursor(stream::ParseStream) = RedTreeCursor(
+    GreenTreeCursor(stream), stream.next_byte - UInt32(1))
+
+function prev_sibling_assumed(cursor::RedTreeCursor)
+    prevgreen = prev_sibling_assumed(cursor.green)
+    if prevgreen === nothing
+        return nothing
+    end
+    return RedTreeCursor(prevgreen, cursor.byte_end - span(cursor))
+end
+
+
+Base.reverse(node::RedTreeCursor) = Base.Iterators.Reverse(node)
+Base.IteratorSize(::Type{Reverse{RedTreeCursor}}) = Base.SizeUnknown()
+@inline function Base.iterate(
+        node::Reverse{RedTreeCursor},
+        (next_byte_end, next_idx)::NTuple{2, UInt32} =
+            (node.itr.byte_end, node.itr.green.position-UInt32(1)))::Union{Nothing, Tuple{RedTreeCursor, NTuple{2, UInt32}}}
+    r = iterate(Reverse(node.itr.green), next_idx)
+    r === nothing && return nothing
+    next_node, next_idx = r
+    return RedTreeCursor(next_node, next_byte_end),
+           (next_byte_end - span(next_node), next_idx)
+end
+
+is_leaf(node::RedTreeCursor)     = is_leaf(node.green)
+head(node::RedTreeCursor)        = head(node.green)
+span(node::RedTreeCursor)        = span(node.green)
+byte_range(node::RedTreeCursor)  = (node.byte_end - span(node.green) + UInt32(1)):node.byte_end
+treesize(node::RedTreeCursor)    = treesize(node.green)
+is_non_terminal(node::RedTreeCursor) = is_non_terminal(node.green)
+
+function Base.show(io::IO, node::RedTreeCursor)
+    print(io, node.green, " [", byte_range(node), "]")
+end
+
+has_toplevel_siblings(cursor::GreenTreeCursor) =
+    treesize(cursor)+1 != length(cursor.parser_output)-1
+has_toplevel_siblings(cursor::RedTreeCursor) =
+    has_toplevel_siblings(cursor.green)
+struct TopLevelSiblingIterator{C}
+    cursor::C
+end
+
+function reverse_toplevel_siblings(cursor::RedTreeCursor)
+    @assert cursor.green.position == length(cursor.green.parser_output)
+    TopLevelSiblingIterator(cursor)
+end
+
+function reverse_toplevel_siblings(cursor::GreenTreeCursor)
+    @assert cursor.position == length(cursor.parser_output)
+    TopLevelSiblingIterator(cursor)
+end
+
+function Base.iterate(tsi::TopLevelSiblingIterator)
+    return (tsi.cursor, tsi.cursor)
+end
+function Base.iterate(cursor::TopLevelSiblingIterator{C}, last::C) where {C}
+    this = prev_sibling_assumed(last)
+    this === nothing && return nothing
+    return (this, this)
+end
diff --git a/test/expr.jl b/test/expr.jl
index 200e8764..7651347c 100644
--- a/test/expr.jl
+++ b/test/expr.jl
@@ -379,7 +379,7 @@
             Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)))
         @test parsestmt("f(a=1; b=2)") ==
             Expr(:call, :f, Expr(:parameters, Expr(:kw, :b, 2)), Expr(:kw, :a, 1))
-        @test parsestmt("f(a; b; c)") == 
+        @test parsestmt("f(a; b; c)") ==
             Expr(:call, :f, Expr(:parameters, Expr(:parameters, :c), :b), :a)
         @test parsestmt("+(a=1,)") ==
             Expr(:call, :+, Expr(:kw, :a, 1))
@@ -389,11 +389,11 @@
         # Operator calls:  = is not :kw
         @test parsestmt("(x=1) != 2") ==
             Expr(:call, :!=, Expr(:(=), :x, 1), 2)
-        @test parsestmt("+(a=1)") == 
+        @test parsestmt("+(a=1)") ==
             Expr(:call, :+, Expr(:(=), :a, 1))
-        @test parsestmt("(a=1)'") == 
+        @test parsestmt("(a=1)'") ==
             Expr(Symbol("'"), Expr(:(=), :a, 1))
-        @test parsestmt("(a=1)'ᵀ") == 
+        @test parsestmt("(a=1)'ᵀ") ==
             Expr(:call, Symbol("'ᵀ"), Expr(:(=), :a, 1))
 
         # Dotcall
@@ -611,8 +611,8 @@
             Expr(:generator, :x,
                  Expr(:filter, :z, Expr(:(=), :a, :as), Expr(:(=), :b, :bs)))
         @test parsestmt("(x for a in as, b in bs for c in cs, d in ds)") ==
-            Expr(:flatten, 
-                Expr(:generator, 
+            Expr(:flatten,
+                Expr(:generator,
                      Expr(:generator, :x, Expr(:(=), :c, :cs), Expr(:(=), :d, :ds)),
                      Expr(:(=), :a, :as), Expr(:(=), :b, :bs)))
         @test parsestmt("(x for a in as for b in bs if z)") ==
@@ -782,7 +782,7 @@
         @test parsestmt("global x ~ 1") == Expr(:global, Expr(:call, :~, :x, 1))
         @test parsestmt("global x += 1") == Expr(:global, Expr(:+=, :x, 1))
 
-        # Parsing of global/local with 
+        # Parsing of global/local with
         @test parsestmt("global (x,y)") == Expr(:global, :x, :y)
         @test parsestmt("local (x,y)") == Expr(:local, :x, :y)
     end
diff --git a/test/green_node.jl b/test/green_node.jl
index 42d20f52..727c7178 100644
--- a/test/green_node.jl
+++ b/test/green_node.jl
@@ -3,7 +3,7 @@
 
     @test span(t) == 6
     @test !is_leaf(t)
-    @test head(t) == SyntaxHead(K"call", 0x0008)
+    @test head(t) == SyntaxHead(K"call", 0x0088)
     @test span.(children(t)) == [2,1,1,1,1]
     @test head.(children(t)) == [
          SyntaxHead(K"Identifier", 0x0000)
diff --git a/test/parse_stream.jl b/test/parse_stream.jl
index f5148f27..0eca59b7 100644
--- a/test/parse_stream.jl
+++ b/test/parse_stream.jl
@@ -20,7 +20,6 @@ using JuliaSyntax: ParseStream,
         yy
     end
     """
-
     st = ParseStream(code)
 
     p1 = position(st)
@@ -73,8 +72,6 @@ using JuliaSyntax: ParseStream,
     @test peek(st) == K"NewlineWs"
     bump(st, TRIVIA_FLAG)
     emit(st, p1, K"toplevel")
-
-    @test build_tree(GreenNode, st) isa JuliaSyntax.GreenNode
 end
 
 @testset "ParseStream constructors" begin
@@ -106,47 +103,48 @@ end
 end
 
 @testset "ParseStream tree traversal" begin
-    # NB: ParseStreamPosition.token_index includes an initial sentinel token so
-    # indices here are one more than "might be expected".
+    # NB: ParseStreamPosition.node_index includes an initial sentinel token so
+    # indices here are one more than "might be expected". Additionally, note that
+    # the byte index points to the first byte after the token.
     st = parse_sexpr("((a b) c)")
     child1_pos = first_child_position(st, position(st))
-    @test child1_pos == ParseStreamPosition(7, 1)
-    @test first_child_position(st, child1_pos) == ParseStreamPosition(4, 0)
-    @test last_child_position(st, position(st)) == ParseStreamPosition(9, 0)
-    @test last_child_position(st, child1_pos) == ParseStreamPosition(6, 0)
+    @test child1_pos == ParseStreamPosition(7, 8)
+    @test first_child_position(st, child1_pos) == ParseStreamPosition(4, 4)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10)
+    @test last_child_position(st, child1_pos) == ParseStreamPosition(6, 6)
 
     st = parse_sexpr("( (a b) c)")
     child1_pos = first_child_position(st, position(st))
-    @test child1_pos == ParseStreamPosition(8, 1)
-    @test first_child_position(st, child1_pos) == ParseStreamPosition(5, 0)
-    @test last_child_position(st, position(st)) == ParseStreamPosition(10, 0)
-    @test last_child_position(st, child1_pos) == ParseStreamPosition(7, 0)
+    @test child1_pos == ParseStreamPosition(8, 9)
+    @test first_child_position(st, child1_pos) == ParseStreamPosition(5, 5)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(10, 11)
+    @test last_child_position(st, child1_pos) == ParseStreamPosition(7, 7)
 
     st = parse_sexpr("(a (b c))")
-    @test first_child_position(st, position(st)) == ParseStreamPosition(3, 0)
+    @test first_child_position(st, position(st)) == ParseStreamPosition(3, 3)
     child2_pos = last_child_position(st, position(st))
-    @test child2_pos == ParseStreamPosition(9, 1)
-    @test first_child_position(st, child2_pos) == ParseStreamPosition(6, 0)
-    @test last_child_position(st, child2_pos) == ParseStreamPosition(8, 0)
+    @test child2_pos == ParseStreamPosition(9, 10)
+    @test first_child_position(st, child2_pos) == ParseStreamPosition(6, 6)
+    @test last_child_position(st, child2_pos) == ParseStreamPosition(8, 8)
 
     st = parse_sexpr("( a (b c))")
-    @test first_child_position(st, position(st)) == ParseStreamPosition(4, 0)
+    @test first_child_position(st, position(st)) == ParseStreamPosition(4, 4)
     child2_pos = last_child_position(st, position(st))
-    @test child2_pos == ParseStreamPosition(10, 1)
-    @test first_child_position(st, child2_pos) == ParseStreamPosition(7, 0)
-    @test last_child_position(st, child2_pos) == ParseStreamPosition(9, 0)
+    @test child2_pos == ParseStreamPosition(10, 11)
+    @test first_child_position(st, child2_pos) == ParseStreamPosition(7, 7)
+    @test last_child_position(st, child2_pos) == ParseStreamPosition(9, 9)
 
     st = parse_sexpr("a (b c)")
-    @test first_child_position(st, position(st)) == ParseStreamPosition(5, 0)
-    @test last_child_position(st, position(st)) == ParseStreamPosition(7, 0)
+    @test first_child_position(st, position(st)) == ParseStreamPosition(5, 5)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(7, 7)
 
     st = parse_sexpr("(a) (b c)")
-    @test first_child_position(st, position(st)) == ParseStreamPosition(7, 0)
-    @test last_child_position(st, position(st)) == ParseStreamPosition(9, 0)
+    @test first_child_position(st, position(st)) == ParseStreamPosition(7, 8)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(9, 10)
 
     st = parse_sexpr("(() ())")
-    @test first_child_position(st, position(st)) == ParseStreamPosition(4, 1)
-    @test last_child_position(st, position(st)) == ParseStreamPosition(7, 2)
+    @test first_child_position(st, position(st)) == ParseStreamPosition(4, 5)
+    @test last_child_position(st, position(st)) == ParseStreamPosition(7, 9)
 end
 
 @testset "SubString{GenericString} (issue #505)" begin
diff --git a/test/parser.jl b/test/parser.jl
index f208e24c..f0ff0f51 100644
--- a/test/parser.jl
+++ b/test/parser.jl
@@ -5,9 +5,7 @@ function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", show_kws
     stream = ParseStream(code, version=v)
     production(ParseState(stream))
     JuliaSyntax.validate_tokens(stream)
-    t = build_tree(GreenNode, stream)
-    source = SourceFile(code)
-    s = SyntaxNode(source, t, keep_parens=true)
+    s = build_tree(SyntaxNode, stream, keep_parens=true)
     return sprint(io->show(io, MIME("text/x.sexpression"), s; show_kws...))
 end
 
diff --git a/test/syntax_tree.jl b/test/syntax_tree.jl
index 2fac0d6b..3e2361ca 100644
--- a/test/syntax_tree.jl
+++ b/test/syntax_tree.jl
@@ -28,7 +28,6 @@
         "(call-i (call-i a::Identifier *::Identifier b::Identifier) +::Identifier c::Identifier)"
 
     @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙"
-    @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙"
 
     # Pass-through field access
     node = t[1][1]
@@ -40,7 +39,6 @@
     # Newline-terminated source
     t = parsestmt(SyntaxNode, "a*b + c\n")
     @test sprint(highlight, t[1][3]) == "a*b + c\n# ╙"
-    @test sprint(highlight, t.source, t.raw, 1, 3) == "a*b + c\n# ╙"
 
     # copy
     t = parsestmt(SyntaxNode, "a*b + c")
@@ -58,8 +56,8 @@
 
     # SyntaxNode with offsets
     t,_ = parsestmt(SyntaxNode, "begin a end\nbegin b end", 13)
-    @test t.position == 13
-    @test t[1].position == 19
+    @test first(byte_range(t)) == 13
+    @test first(byte_range(t[1])) == 19
     @test t[1].val == :b
 
     # Unicode character ranges
diff --git a/test/test_utils.jl b/test/test_utils.jl
index 7553bf1c..dae16cc0 100644
--- a/test/test_utils.jl
+++ b/test/test_utils.jl
@@ -18,6 +18,7 @@ using .JuliaSyntax:
     @K_str,
     # Nodes
     GreenNode,
+    RedTreeCursor,
     SyntaxNode,
     ErrorVal,
     # Node inspection
@@ -131,7 +132,7 @@ function exprs_roughly_equal(fl_ex, ex)
     args = ex.head in (:block, :quote, :toplevel) ?
            filter(x->!(x isa LineNumberNode), ex.args) :
            ex.args
-    if (fl_ex.head == :block && ex.head == :tuple && 
+    if (fl_ex.head == :block && ex.head == :tuple &&
         length(fl_args) == 2 && length(args) == 2 &&
         Meta.isexpr(args[1], :parameters, 1) &&
         exprs_roughly_equal(fl_args[2], args[1].args[1]) &&

From 0f09e2823e3de69d90ec7b50779f3c441eff0854 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Mon, 9 Jun 2025 03:49:43 +0000
Subject: [PATCH 2/8] Fix perf and add compat for :position access

---
 src/syntax_tree.jl  | 10 ++++++++++
 src/tree_cursors.jl | 13 +++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl
index ba9ca3ee..000fd629 100644
--- a/src/syntax_tree.jl
+++ b/src/syntax_tree.jl
@@ -60,6 +60,16 @@ struct SyntaxData <: AbstractSyntaxData
     byte_end::UInt32
     val::Any
 end
+function Base.getproperty(data::SyntaxData, name::Symbol)
+    if name === :position
+        # Previous versions of JuliaSyntax had `position::Int`.
+        # Allow access for compatibility. It was renamed (with changed) semantics
+        # to `byte_end::UInt32` to match the rest of the code base, which identified
+        # nodes, by their last byte.
+        return Int(getfield(data, :byte_end) - getfield(data, :raw).node_span + UInt32(1))
+    end
+    return getfield(data, name)
+end
 
 Base.hash(data::SyntaxData, h::UInt) =
     hash(data.source, hash(data.raw, hash(data.byte_end,
diff --git a/src/tree_cursors.jl b/src/tree_cursors.jl
index 9b0e8c65..c079904a 100644
--- a/src/tree_cursors.jl
+++ b/src/tree_cursors.jl
@@ -143,3 +143,16 @@ function Base.iterate(cursor::TopLevelSiblingIterator{C}, last::C) where {C}
     this === nothing && return nothing
     return (this, this)
 end
+
+# HACK: Force inlining of `filter` for our cursors to avoid significant perf
+# degradation.
+@inline function Base.iterate(f::Iterators.Filter{<:Any, Iterators.Reverse{T}}, state...) where {T<:Union{RedTreeCursor, GreenTreeCursor}}
+    y = iterate(f.itr, state...)
+    while y !== nothing
+        if f.flt(y[1])
+            return y
+        end
+        y = iterate(f.itr, y[2])
+    end
+    nothing
+end

From 24c89361afa3cb135516187d4bd6c311cdda8f6c Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Mon, 9 Jun 2025 04:02:03 +0000
Subject: [PATCH 3/8] Another perf tweak

---
 src/green_node.jl | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/green_node.jl b/src/green_node.jl
index e6a239fe..f2a95db2 100644
--- a/src/green_node.jl
+++ b/src/green_node.jl
@@ -137,16 +137,13 @@ function build_tree(T::Type{GreenNode}, stream::ParseStream; kws...)
         # to test a partial parse. Wrap everything in K"wrapper"
         all_processed = 0
         local cs
-        while true
-            c = GreenNode(cursor)
+        for child in reverse_toplevel_siblings(cursor)
+            c = GreenNode(child)
             if !@isdefined(cs)
-                cs = [c]
+                cs = GreenNode{SyntaxHead}[c]
             else
                 pushfirst!(cs, c)
             end
-            all_processed += treesize(cursor)+1
-            all_processed == length(stream.output)-1 && break
-            cursor = GreenTreeCursor(stream.output, length(stream.output) - all_processed)
         end
         @assert length(cs) != 1
         return GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG), stream.next_byte-1, cs)

From b01837a52fec3df609d8e234725076f48e213da1 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Mon, 9 Jun 2025 04:59:20 +0000
Subject: [PATCH 4/8] Some more tweaks

---
 src/green_node.jl   | 10 ++++++----
 src/parse_stream.jl |  4 ++--
 src/tree_cursors.jl | 32 ++++++++++++++++++++------------
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/src/green_node.jl b/src/green_node.jl
index f2a95db2..a80d6e13 100644
--- a/src/green_node.jl
+++ b/src/green_node.jl
@@ -119,20 +119,22 @@ function Base.show(io::IO, ::MIME"text/plain", node::GreenNode, str::AbstractStr
 end
 
 function GreenNode(cursor::GreenTreeCursor)
+    chead = head(cursor)
+    T = typeof(chead)
     if is_leaf(cursor)
-        children = nothing
+        return GreenNode{T}(head(cursor), span(cursor), nothing)
     else
-        children = GreenNode{typeof(head(cursor))}[]
+        children = GreenNode{T}[]
         for child in reverse(cursor)
             pushfirst!(children, GreenNode(child))
         end
+        return GreenNode{T}(head(cursor), span(cursor), children)
     end
-    return GreenNode{typeof(head(cursor))}(head(cursor), span(cursor), children)
 end
 
 function build_tree(T::Type{GreenNode}, stream::ParseStream; kws...)
     cursor = GreenTreeCursor(stream)
-    if treesize(cursor)+1 != length(stream.output)-1 # First output is a tombstone =
+    if has_toplevel_siblings(cursor)
         # There are multiple toplevel nodes, e.g. because we're using this
         # to test a partial parse. Wrap everything in K"wrapper"
         all_processed = 0
diff --git a/src/parse_stream.jl b/src/parse_stream.jl
index 8ae51145..1000fdaa 100644
--- a/src/parse_stream.jl
+++ b/src/parse_stream.jl
@@ -360,10 +360,10 @@ end
 
 function Base.getproperty(rgn::RawGreenNode, name::Symbol)
     if name === :node_span
-        has_flags(rgn.head, NON_TERMINAL_FLAG) || return UInt32(0) # Leaf nodes have no children
+        has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) || return UInt32(0) # Leaf nodes have no children
         return getfield(rgn, :node_span_or_orig_kind)
     elseif name === :orig_kind
-        has_flags(rgn.head, NON_TERMINAL_FLAG) && error("Cannot access orig_kind for non-terminal node")
+        has_flags(getfield(rgn, :head), NON_TERMINAL_FLAG) && error("Cannot access orig_kind for non-terminal node")
         return Kind(getfield(rgn, :node_span_or_orig_kind))
     end
     getfield(rgn, name)
diff --git a/src/tree_cursors.jl b/src/tree_cursors.jl
index c079904a..3f65b6ce 100644
--- a/src/tree_cursors.jl
+++ b/src/tree_cursors.jl
@@ -40,20 +40,22 @@ end
 Base.reverse(node::GreenTreeCursor) = Base.Iterators.Reverse(node)
 Base.IteratorSize(::Type{Reverse{GreenTreeCursor}}) = Base.SizeUnknown()
 @inline function Base.iterate(node::Reverse{GreenTreeCursor},
-                              next_idx::UInt32 = node.itr.position-UInt32(1))::Union{Nothing, Tuple{GreenTreeCursor, UInt32}}
+                              (next_idx, final)::NTuple{2, UInt32} =
+                              (node.itr.position-UInt32(1), node.itr.position - this(node.itr).node_span - UInt32(1)))::Union{Nothing, Tuple{GreenTreeCursor, NTuple{2, UInt32}}}
     node = node.itr
     while true
-        next_idx == node.position - this(node).node_span - UInt32(1) && return nothing
+        next_idx == final && return nothing
         next_node = GreenTreeCursor(node.parser_output, next_idx)
-        if kind(next_node) == K"TOMBSTONE"
+        nrgn = this(next_node)
+        if getfield(nrgn, :head).kind == K"TOMBSTONE"
             # TOMBSTONED nodes are counted as part of the size of the tree, but
             # do not contribute either byte ranges or children.
             next_idx -= UInt32(1)
             continue
         end
         # Inlined prev_sibling_assumed
-        new_next_idx = next_idx - this(next_node).node_span - UInt32(1)
-        return (next_node, new_next_idx)
+        new_next_idx = next_idx - nrgn.node_span - UInt32(1)
+        return (next_node, (new_next_idx, final))
     end
 end
 
@@ -95,15 +97,21 @@ end
 
 Base.reverse(node::RedTreeCursor) = Base.Iterators.Reverse(node)
 Base.IteratorSize(::Type{Reverse{RedTreeCursor}}) = Base.SizeUnknown()
-@inline function Base.iterate(
-        node::Reverse{RedTreeCursor},
-        (next_byte_end, next_idx)::NTuple{2, UInt32} =
-            (node.itr.byte_end, node.itr.green.position-UInt32(1)))::Union{Nothing, Tuple{RedTreeCursor, NTuple{2, UInt32}}}
-    r = iterate(Reverse(node.itr.green), next_idx)
+@inline function Base.iterate(node::Reverse{RedTreeCursor})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}}
+    r = iterate(Reverse(node.itr.green))
+    return _iterate_red_cursor(r, node.itr.byte_end)
+end
+
+@inline function Base.iterate(node::Reverse{RedTreeCursor}, state::NTuple{3, UInt32})::Union{Nothing, Tuple{RedTreeCursor, NTuple{3, UInt32}}}
+    r = iterate(Reverse(node.itr.green), Base.tail(state))
+    return _iterate_red_cursor(r, first(state))
+end
+
+@inline function _iterate_red_cursor(r, byte_end)
     r === nothing && return nothing
     next_node, next_idx = r
-    return RedTreeCursor(next_node, next_byte_end),
-           (next_byte_end - span(next_node), next_idx)
+    return RedTreeCursor(next_node, byte_end),
+           (byte_end - span(next_node), next_idx...)
 end
 
 is_leaf(node::RedTreeCursor)     = is_leaf(node.green)

From ebdd3128ba4c4a9a7d8e53b9d11a77f85cb1c007 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Mon, 9 Jun 2025 05:44:28 +0000
Subject: [PATCH 5/8] Work around https://github.com/JuliaLang/julia/pull/58674

---
 src/tokenize.jl | 6 +++---
 src/utils.jl    | 9 +++++++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/tokenize.jl b/src/tokenize.jl
index 0ea9be19..761455dd 100644
--- a/src/tokenize.jl
+++ b/src/tokenize.jl
@@ -2,7 +2,7 @@ module Tokenize
 
 export tokenize, untokenize
 
-using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str
+using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str, @callsite_inline
 
 import ..JuliaSyntax: kind,
     is_literal, is_contextual_keyword, is_word_operator
@@ -1303,14 +1303,14 @@ function lex_identifier(l::Lexer, c)
             @inbounds if (pc_byte == UInt8('!') && ppc == '=') || !ascii_is_identifier_char[pc_byte+1]
                 break
             end
-        elseif Unicode.isgraphemebreak!(graphemestate, c, pc)
+        elseif @callsite_inline Unicode.isgraphemebreak!(graphemestate, c, pc)
             if (pc == '!' && ppc == '=') || !is_identifier_char(pc)
                 break
             end
         elseif pc in ('\u200c','\u200d') # ZWNJ/ZWJ control characters
             # ZWJ/ZWNJ only within grapheme sequences, not at end
             graphemestate_peek[] = graphemestate[]
-            if Unicode.isgraphemebreak!(graphemestate_peek, pc, ppc)
+            if @callsite_inline Unicode.isgraphemebreak!(graphemestate_peek, pc, ppc)
                 break
             end
         end
diff --git a/src/utils.jl b/src/utils.jl
index 3f95c485..8c0614e3 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -14,6 +14,15 @@ if VERSION < v"1.5"
     import Base.peek
 end
 
+if VERSION < v"1.8"
+    macro callsite_inline(call)
+        esc(call)
+    end
+else
+    const var"@callsite_inline" = var"@inline"
+end
+
+
 _unsafe_wrap_substring(s) = (s.offset, unsafe_wrap(Vector{UInt8}, s.string))
 
 #--------------------------------------------------

From 6c900294b9665a079e70629c633b2098ae851503 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Mon, 9 Jun 2025 05:47:14 +0000
Subject: [PATCH 6/8] 1.0 compat

---
 src/utils.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils.jl b/src/utils.jl
index 8c0614e3..c21c251e 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -14,7 +14,7 @@ if VERSION < v"1.5"
     import Base.peek
 end
 
-if VERSION < v"1.8"
+@static if VERSION < v"1.8"
     macro callsite_inline(call)
         esc(call)
     end

From cb9fdf6fd4612b74250db65a7211620d37a046f2 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Wed, 11 Jun 2025 22:39:47 +0000
Subject: [PATCH 7/8] Restore raw::GreenNode

More packages than I thought are relying on `raw` being a GreenNode,
in general, as discussed on the PR, we'd probably like to do more
work on traversal trees anyway, so this keep things consistent for
downstream while we do that and then when we have something better
downstream can switch.
---
 src/green_node.jl  |  2 +-
 src/syntax_tree.jl | 66 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/src/green_node.jl b/src/green_node.jl
index a80d6e13..61bdbb01 100644
--- a/src/green_node.jl
+++ b/src/green_node.jl
@@ -32,7 +32,7 @@ span(node::GreenNode) = node.span
 Base.getindex(node::GreenNode, i::Int) = children(node)[i]
 Base.getindex(node::GreenNode, rng::UnitRange) = view(children(node), rng)
 Base.firstindex(node::GreenNode) = 1
-Base.lastindex(node::GreenNode) = length(children(node))
+Base.lastindex(node::GreenNode) = children(node) === nothing ? 0 : length(children(node))
 
 """
 Get absolute position and span of the child of `node` at the given tree `path`.
diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl
index 000fd629..64fb78ce 100644
--- a/src/syntax_tree.jl
+++ b/src/syntax_tree.jl
@@ -56,7 +56,7 @@ const AbstractSyntaxNode = TreeNode{<:AbstractSyntaxData}
 
 struct SyntaxData <: AbstractSyntaxData
     source::SourceFile
-    raw::RawGreenNode
+    raw::GreenNode{SyntaxHead}
     byte_end::UInt32
     val::Any
 end
@@ -92,36 +92,54 @@ const SyntaxNode = TreeNode{SyntaxData}
 
 function SyntaxNode(source::SourceFile, cursor::RedTreeCursor;
                     keep_parens=false)
+    # Build the full GreenNode tree once upfront (including trivia)
+    green = GreenNode(cursor.green)
+
+    GC.@preserve source begin
+        raw_offset, txtbuf = _unsafe_wrap_substring(source.code)
+        offset = raw_offset - source.byte_offset
+        _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens)
+    end
+end
+
+function SyntaxNode(source::SourceFile, cursor::RedTreeCursor, green::GreenNode{SyntaxHead};
+                    keep_parens=false)
     GC.@preserve source begin
         raw_offset, txtbuf = _unsafe_wrap_substring(source.code)
         offset = raw_offset - source.byte_offset
-        _to_SyntaxNode(source, txtbuf, offset, cursor, keep_parens)
+        _to_SyntaxNode(source, txtbuf, offset, cursor, green, keep_parens)
     end
 end
 
 should_include_node(child) = !is_trivia(child) || is_error(child)
 
 function _to_SyntaxNode(source::SourceFile, txtbuf::Vector{UInt8}, offset::Int,
-                        cursor::RedTreeCursor, keep_parens::Bool)
+                        cursor::RedTreeCursor, green::GreenNode{SyntaxHead}, keep_parens::Bool)
     if is_leaf(cursor)
         # Here we parse the values eagerly rather than representing them as
         # strings. Maybe this is good. Maybe not.
         valrange = byte_range(cursor)
         val = parse_julia_literal(txtbuf, head(cursor), valrange .+ offset)
-        return SyntaxNode(nothing, nothing, SyntaxData(source, this(cursor.green), cursor.byte_end, val))
+        return SyntaxNode(nothing, nothing, SyntaxData(source, green, cursor.byte_end, val))
     else
         cs = SyntaxNode[]
-        pos = position
-        for child in reverse(cursor)
-            # FIXME: Allowing trivia is_error nodes here corrupts the tree layout.
-            if should_include_node(child)
-                pushfirst!(cs, _to_SyntaxNode(source, txtbuf, offset, child, keep_parens))
+        green_children = children(green)
+
+        # We need to match up the filtered SyntaxNode children with the unfiltered GreenNode children
+        # Both cursor and green children need to be traversed in the same order
+        # Since cursor iterates in reverse, we need to match from the end of green_children
+        green_idx = green_children === nothing ? 0 : length(green_children)
+
+        for (i, child_cursor) in enumerate(reverse(cursor))
+            if should_include_node(child_cursor)
+                pushfirst!(cs, _to_SyntaxNode(source, txtbuf, offset, child_cursor, green[end-i+1], keep_parens))
             end
         end
+
         if !keep_parens && kind(cursor) == K"parens" && length(cs) == 1
             return cs[1]
         end
-        node = SyntaxNode(nothing, cs, SyntaxData(source, this(cursor.green), cursor.byte_end, nothing))
+        node = SyntaxNode(nothing, cs, SyntaxData(source, green, cursor.byte_end, nothing))
         for c in cs
             c.parent = node
         end
@@ -169,7 +187,7 @@ structure.
 """
 head(node::AbstractSyntaxNode) = head(node.raw)
 
-span(node::AbstractSyntaxNode) = node.raw.byte_span
+span(node::AbstractSyntaxNode) = node.raw.span
 
 byte_range(node::AbstractSyntaxNode) = (node.byte_end - span(node) + 1):node.byte_end
 
@@ -290,15 +308,29 @@ function build_tree(::Type{SyntaxNode}, stream::ParseStream;
     if has_toplevel_siblings(cursor)
         # There are multiple toplevel nodes, e.g. because we're using this
         # to test a partial parse. Wrap everything in K"wrapper"
+
+        # First build the full green tree for all children (including trivia)
+        green_children = GreenNode{SyntaxHead}[]
+        for child in reverse_toplevel_siblings(cursor)
+            pushfirst!(green_children, GreenNode(child.green))
+        end
+
+        # Create a wrapper GreenNode with children
+        green = GreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG),
+                                  stream.next_byte-1, green_children)
+
+        # Now build SyntaxNodes, iterating through cursors and green nodes together
         cs = SyntaxNode[]
-        for child in
-                Iterators.filter(should_include_node, reverse_toplevel_siblings(cursor))
-            pushfirst!(cs, SyntaxNode(source, child, keep_parens=keep_parens))
+        for (i, child) in enumerate(reverse_toplevel_siblings(cursor))
+            if should_include_node(child)
+                pushfirst!(cs, SyntaxNode(source, child, green[end-i+1], keep_parens=keep_parens))
+            end
         end
+
         length(cs) == 1 && return only(cs)
-        node = SyntaxNode(nothing, cs, SyntaxData(source,
-            RawGreenNode(SyntaxHead(K"wrapper", NON_TERMINAL_FLAG),
-            stream.next_byte-1, length(stream.output)-1), stream.next_byte-1, nothing))
+
+        node = SyntaxNode(nothing, cs, SyntaxData(source, green,
+                                                   stream.next_byte-1, nothing))
         for c in cs
             c.parent = node
         end

From e1fe502484ecc1853ce64bd00a4a27f5580f1ec3 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@alumni.harvard.edu>
Date: Wed, 11 Jun 2025 19:23:10 -0400
Subject: [PATCH 8/8] Update src/syntax_tree.jl

Co-authored-by: Em Chu <61633163+mlechu@users.noreply.github.com>
---
 src/syntax_tree.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl
index 64fb78ce..71b1be82 100644
--- a/src/syntax_tree.jl
+++ b/src/syntax_tree.jl
@@ -66,7 +66,7 @@ function Base.getproperty(data::SyntaxData, name::Symbol)
         # Allow access for compatibility. It was renamed (with changed) semantics
         # to `byte_end::UInt32` to match the rest of the code base, which identified
         # nodes, by their last byte.
-        return Int(getfield(data, :byte_end) - getfield(data, :raw).node_span + UInt32(1))
+        return Int(getfield(data, :byte_end) - getfield(data, :raw).span + UInt32(1))
     end
     return getfield(data, name)
 end