From 0584628e9ef4b335d796c6d3e72e41cba692a74f Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Thu, 11 Jun 2026 10:00:03 -0700 Subject: [PATCH 1/3] =?UTF-8?q?jit:=20table=20lock/unlock=20glue=20(wip=20?= =?UTF-8?q?=E2=80=94=20slot-walk=20groundwork)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 --- include/daScript/simulate/aot_builtin_jit.h | 2 ++ src/builtin/module_jit.cpp | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/daScript/simulate/aot_builtin_jit.h b/include/daScript/simulate/aot_builtin_jit.h index 8f8dc13a81..2d659b6673 100644 --- a/include/daScript/simulate/aot_builtin_jit.h +++ b/include/daScript/simulate/aot_builtin_jit.h @@ -35,6 +35,8 @@ namespace das { void * das_get_jit_free_persistent (); void * das_get_jit_array_lock (); void * das_get_jit_array_unlock (); + void * das_get_jit_table_lock (); + void * das_get_jit_table_unlock (); void * das_get_jit_table_at ( int32_t baseType, Context * context, LineInfoArg * at ); void * das_get_jit_table_erase ( int32_t baseType, Context * context, LineInfoArg * at ); void * das_get_jit_table_find ( int32_t baseType, Context * context, LineInfoArg * at ); diff --git a/src/builtin/module_jit.cpp b/src/builtin/module_jit.cpp index 47f02d8e29..485fbf8ea7 100644 --- a/src/builtin/module_jit.cpp +++ b/src/builtin/module_jit.cpp @@ -446,6 +446,14 @@ extern "C" { builtin_array_unlock_mutable(arr, context, at); } + DAS_API void jit_table_lock ( Table & tab, Context * context, LineInfoArg * at ) { + builtin_table_lock(tab, context, at); + } + + DAS_API void jit_table_unlock ( Table & tab, Context * context, LineInfoArg * at ) { + builtin_table_unlock(tab, context, at); + } + DAS_API int32_t jit_str_cmp ( char * a, char * b ) { return strcmp(a ? a : "",b ? b : ""); } @@ -638,6 +646,8 @@ extern "C" { void *das_get_jit_free_persistent() { return (void *)&jit_free_persistent; } void *das_get_jit_array_lock() { return (void *)&builtin_array_lock; } void *das_get_jit_array_unlock() { return (void *)&builtin_array_unlock; } + void *das_get_jit_table_lock() { return (void *)&builtin_table_lock; } + void *das_get_jit_table_unlock() { return (void *)&builtin_table_unlock; } void *das_get_jit_str_cmp() { return (void *)&jit_str_cmp; } void *das_get_jit_prologue() { return (void *)&jit_prologue; } void *das_get_jit_epilogue() { return (void *)&jit_epilogue; } @@ -1177,6 +1187,10 @@ extern "C" { SideEffects::none, "das_get_jit_array_lock"); addExtern(*this, lib, "get_jit_array_unlock", SideEffects::none, "das_get_jit_array_unlock"); + addExtern(*this, lib, "get_jit_table_lock", + SideEffects::none, "das_get_jit_table_lock"); + addExtern(*this, lib, "get_jit_table_unlock", + SideEffects::none, "das_get_jit_table_unlock"); addExtern(*this, lib, "get_jit_table_at", SideEffects::none, "das_get_jit_table_at"); addExtern(*this, lib, "get_jit_table_erase", From c191ddde550daec225b7bddac81e3209a74927db Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Thu, 11 Jun 2026 10:25:00 -0700 Subject: [PATCH 2/3] jit: inline table slot walk for keys/values for-loop sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A for-loop over keys(tab)/values(tab) (incl. the fused linq kv zips) compiled to a heap-allocated C++ TableIterator + first/next call per element per lane. Workhorse-keyed tables are open-addressed at every capacity (#3025), so the walk is a flat ctrl-byte scan — now emitted inline: lock once, scan ctrl[slot] > CTRL_TOMBSTONE, keys copy the slot key out (past-end guarded, like the C++ iterator), values bind a pointer into the data block, close re-checks the data base (modified-during-iteration on shared/hopeless tables that bypass the lock) and unlocks. String / non-workhorse keys keep the generic iterator (different liveness regimes). Detection: the daslib generics instantiate into the compiling module as builtin`keys` — matched by that compiler-generated prefix (the plain-name + module-$ check never fired; instances don't keep either). The skipped source call never allocates an iterator, mirroring count(). Glue: jit_table_lock/unlock (module_jit.cpp wrapping builtin_table_lock/ unlock; engine mapping + DAS_API symbol for the exe/dll paths). LLVM_JIT_CODEGEN_VERSION 0x25 -> 0x26. m7 JIT spot numbers (ns/elem): count/sum/max_aggregate 13.4 -> 7.3, chained_where 17.8 -> 10.4, join_count 33 -> 25.2, join_probe 24 -> 16.6, groupby_count ~160 -> 44.1, reverse_take ~70 -> 19.3, point_lookup_scan 6.0 -> 3.0, last_match -> 12.0. Full sweep + results.md refresh after the table-arc PR (#3099) merges and this branch rebases onto it. Gates: JIT tests/linq 1962/1962, tests/language 1054/1054, jit_tests + decs + json green, exe-build smoke links (the #3025 dll-glob lesson), new tests/jit_tests/table_walk.das 8/8 INTERP+JIT (incl. is_jit_function firing checks, tombstones, by-ref values, break-unlock, locked-iteration panic, string-key fallback). CI lint clean. Co-Authored-By: Claude Fable 5 --- modules/dasLLVM/daslib/llvm_jit.das | 186 ++++++++++++++++++++- modules/dasLLVM/daslib/llvm_jit_common.das | 17 ++ modules/dasLLVM/daslib/llvm_jit_run.das | 2 +- tests/jit_tests/table_walk.das | 147 ++++++++++++++++ 4 files changed, 350 insertions(+), 2 deletions(-) create mode 100644 tests/jit_tests/table_walk.das diff --git a/modules/dasLLVM/daslib/llvm_jit.das b/modules/dasLLVM/daslib/llvm_jit.das index ac16dfc40d..bc8f91338a 100644 --- a/modules/dasLLVM/daslib/llvm_jit.das +++ b/modules/dasLLVM/daslib/llvm_jit.das @@ -331,6 +331,21 @@ def public is_workhorse_table_key(t : Type) : bool { || t == Type.tBool || t == Type.tFloat || t == Type.tDouble) } +// Per-source state of an inline table slot walk (a `keys(tab)` / `values(tab)` for-loop source +// over a workhorse-keyed table) — set up by the first() arm, advanced by next(), torn down by +// emit_iterator_close. Non-string keys are open-addressed at every capacity (1-byte CTRL array), +// so the walk is a flat `ctrl[slot] > CTRL_TOMBSTONE` scan with no regime branch. +struct TableWalkState { + slot : LLVMOpaqueValue? // alloca i64 — current slot cursor + cap : LLVMOpaqueValue? // i64 capacity, loaded once at first() + ctrlPtr : LLVMOpaqueValue? // i8* — open-addressed control bytes + basePtr : LLVMOpaqueValue? // typed keys* (keys lane) / i8* data (values lane) + origin : LLVMOpaqueValue? // raw KEYS/DATA at first() — modified-during-iteration check + tabPtr : LLVMOpaqueValue? // Table* — unlock + the origin recheck at close + isKeys : bool + stride : int // value stride in bytes (values lane; keys GEP by element type) +} + [macro] class public LlvmJitVisitor : AstVisitor { adapter : VisitorAdapter? @@ -358,6 +373,7 @@ class public LlvmJitVisitor : AstVisitor { forBodyToExpr : table // for-loop body block -> ExprFor (for iterator close on return path) skipCall : table range2 : table // for loop - range - where to + tableWalk : table // for loop - inline table slot-walk sources callBlock : LLVMOpaqueValue? g_builder : LLVMOpaqueBuilder? g_di_builder : LLVMOpaqueDIBuilder? @@ -3627,6 +3643,100 @@ class public LlvmJitVisitor : AstVisitor { return cf.func.flags.builtIn && cf.func._module.name == "$" && (cf.func.name == "count" || cf.func.name == "ucount") } + // A `keys(tab)` / `values(tab)` for-loop source over a workhorse-keyed table — the inline + // slot-walk fast path. String / non-workhorse keys keep the generic C++ iterator (different + // liveness regimes; see runtime_table.h tableLiveSlot). The daslib generics instantiate into + // the compiling module under the origin-prefixed name `builtin`keys`` — the backtick + // prefix is compiler-generated, so it can't collide with a user function. + def table_walk_is_keys_call(cf : ExprCall?) { + return starts_with(cf.func.name, "builtin`keys`") + } + + def is_table_keys_values(expr : ExpressionPtr) { + if (!(expr is ExprCall)) return false + var cf = expr as ExprCall + if (cf.func == null || length(cf.arguments) != 1 + || !(starts_with(cf.func.name, "builtin`keys`") || starts_with(cf.func.name, "builtin`values`"))) return false + assume tabT = cf.arguments[0]._type + if (tabT == null || !tabT.isGoodTableType) return false + return is_workhorse_table_key(tabT.firstType.baseType) + } + + def build_table_lock_call(tab_ptr : LLVMOpaqueValue?; at : LineInfo) { + var params = fixed_array( + LLVMBuildPointerCast(g_builder, tab_ptr, types.LLVMVoidPtrType(), ""), + get_context_param(), + get_line_info_ptr(at), + ) + var typ = g_fn_types[FN_JIT_TABLE_LOCK] + LLVMBuildCall2(g_builder, typ, LLVMGetNamedFunction(g_mod, FN_JIT_TABLE_LOCK), params, "") + } + + def build_table_unlock_call(tab_ptr : LLVMOpaqueValue?; at : LineInfo) { + var params = fixed_array( + LLVMBuildPointerCast(g_builder, tab_ptr, types.LLVMVoidPtrType(), ""), + get_context_param(), + get_line_info_ptr(at), + ) + var typ = g_fn_types[FN_JIT_TABLE_UNLOCK] + LLVMBuildCall2(g_builder, typ, LLVMGetNamedFunction(g_mod, FN_JIT_TABLE_UNLOCK), params, "") + } + + // Advance state.slot to the next live slot (ctrl byte > CTRL_TOMBSTONE) — the open-addressed + // walk core shared by the table-walk first()/next() arms. Leaves slot == cap when exhausted. + def build_table_walk_scan(state : TableWalkState) { + var bb_cond = append_basic_block("twalk_scan_cond") + var bb_check = append_basic_block("twalk_scan_check") + var bb_inc = append_basic_block("twalk_scan_inc") + var bb_done = append_basic_block("twalk_scan_done") + LLVMBuildBr(g_builder, bb_cond) + LLVMPositionBuilderAtEnd(g_builder, bb_cond) + var s = LLVMBuildLoad2(g_builder, types.t_int64, state.slot, "twalk.s") + var inb = LLVMBuildICmp(g_builder, LLVMIntPredicate.LLVMIntULT, s, state.cap, "twalk.inb") + LLVMBuildCondBr(g_builder, inb, bb_check, bb_done) + LLVMPositionBuilderAtEnd(g_builder, bb_check) + var cp = LLVMBuildGEP2(g_builder, types.t_int8, state.ctrlPtr, s, "twalk.cp") + var c = LLVMBuildLoad2(g_builder, types.t_int8, cp, "twalk.c") + var live = LLVMBuildICmp(g_builder, LLVMIntPredicate.LLVMIntUGT, c, LLVMConstInt(types.t_int8, 1ul, 0), "twalk.live") + LLVMBuildCondBr(g_builder, live, bb_done, bb_inc) + LLVMPositionBuilderAtEnd(g_builder, bb_inc) + var s1 = LLVMBuildAdd(g_builder, s, LLVMConstInt(types.t_int64, 1ul, 0), "twalk.s1") + LLVMBuildStore(g_builder, s1, state.slot) + LLVMBuildBr(g_builder, bb_cond) + LLVMPositionBuilderAtEnd(g_builder, bb_done) + } + + // Store the loop variable for the current slot: keys copy the key out (guarded against the + // past-end read on an exhausted/empty table when `guarded`); values store a pointer into the + // data block (safe at table_end, exactly like the C++ iterator). + def build_table_walk_store_var(state : TableWalkState; svar : VariablePtr; guarded : bool) { + var s = LLVMBuildLoad2(g_builder, types.t_int64, state.slot, "twalk.s") + if (state.isKeys) { + var keyElemTy = type_to_llvm_type(svar._type) + if (guarded) { + var bb_read = append_basic_block("twalk_key_read") + var bb_skip = append_basic_block("twalk_key_skip") + var inb = LLVMBuildICmp(g_builder, LLVMIntPredicate.LLVMIntULT, s, state.cap, "twalk.kinb") + LLVMBuildCondBr(g_builder, inb, bb_read, bb_skip) + LLVMPositionBuilderAtEnd(g_builder, bb_read) + var kp = LLVMBuildGEP2(g_builder, keyElemTy, state.basePtr, s, "twalk.kp") + var k = LLVMBuildLoad2(g_builder, keyElemTy, kp, "twalk.k") + LLVMBuildStore(g_builder, k, getV(svar)) + LLVMBuildBr(g_builder, bb_skip) + LLVMPositionBuilderAtEnd(g_builder, bb_skip) + } else { + var kp = LLVMBuildGEP2(g_builder, keyElemTy, state.basePtr, s, "twalk.kp") + var k = LLVMBuildLoad2(g_builder, keyElemTy, kp, "twalk.k") + LLVMBuildStore(g_builder, k, getV(svar)) + } + } else { + var off = LLVMBuildMul(g_builder, s, LLVMConstInt(types.t_int64, uint64(state.stride), 0), "twalk.voff") + var vp = LLVMBuildGEP2(g_builder, types.t_int8, state.basePtr, off, "twalk.vp") + vp = LLVMBuildPointerCast(g_builder, vp, get_type_pointer(svar._type), "twalk.vpc") + LLVMBuildStore(g_builder, vp, getV(svar)) + } + } + def override preVisitExprFor(expr : ExprFor?) : void { var lblk = LoopBlock( loop_start = append_basic_block("for_start"), @@ -3641,7 +3751,7 @@ class public LlvmJitVisitor : AstVisitor { forBodyToExpr[expr.body as ExprBlock] = expr } for (ssrc in expr.sources) { - if (is_count_or_ucount(ssrc)) { + if (is_count_or_ucount(ssrc) || is_table_keys_values(ssrc)) { skipCall |> insert(ssrc as ExprCall) } } @@ -3734,6 +3844,51 @@ class public LlvmJitVisitor : AstVisitor { visit(ccount.arguments[0], adapter) visit(ccount.arguments[1], adapter) LLVMBuildStore(g_builder, getE(ccount.arguments[0]), getV(svar)) + } elif (is_table_keys_values(ssrc)) { + // table keys/values ->first(): inline slot walk — lock, load the header once, + // scan to the first live ctrl byte. The skipped call never allocates a C++ iterator. + var ctab = ssrc as ExprCall + visit(ctab.arguments[0], adapter) + var tab = getE(ctab.arguments[0]) + build_table_lock_call(tab, ssrc.at) + var hdr = load_table_header(tab) + var cap = LLVMBuildExtractValue(g_builder, hdr, uint(JIT_TABLE.CAPACITY), "twalk.cap") + var sizeV = LLVMBuildExtractValue(g_builder, hdr, uint(JIT_TABLE.SIZE), "twalk.size") + var ctrlV = LLVMBuildExtractValue(g_builder, hdr, uint(JIT_TABLE.HASHES), "twalk.ctrl") + let isKeys = table_walk_is_keys_call(ctab) + var rawBase = LLVMBuildExtractValue(g_builder, hdr, uint(isKeys ? JIT_TABLE.KEYS : JIT_TABLE.DATA), "twalk.base") + let i8ptr = LLVMPointerType(types.t_int8, 0u) + var state = TableWalkState( + cap = cap, + ctrlPtr = LLVMBuildPointerCast(g_builder, ctrlV, i8ptr, "twalk.ctrlp"), + origin = rawBase, + tabPtr = tab, + isKeys = isKeys, + stride = isKeys ? 0 : ctab.arguments[0]._type.secondType.sizeOf + ) + if (isKeys) { + state.basePtr = LLVMBuildPointerCast(g_builder, rawBase, + LLVMPointerType(type_to_llvm_type(svar._type), 0u), "twalk.keysp") + } else { + state.basePtr = LLVMBuildPointerCast(g_builder, rawBase, i8ptr, "twalk.datap") + } + at_function_entry() { + state.slot = LLVMBuildAlloca(g_builder, types.t_int64, "twalk_slot_{svar.name}") + } + LLVMBuildStore(g_builder, LLVMConstInt(types.t_int64, 0ul, 0), state.slot) + tableWalk[ssrc] = state + // need_loop &= size != 0 (the C++ first() returns (bool)table->size) + var nonEmpty = LLVMBuildICmp(g_builder, LLVMIntPredicate.LLVMIntNE, sizeV, + LLVMConstInt(LLVMTypeOf(sizeV), 0ul, 0), "twalk.nonempty") + var okay = append_basic_block("for_{svar.name}_not_empty") + var not_okay = append_basic_block("for_{svar.name}_empty") + LLVMBuildCondBr(g_builder, nonEmpty, okay, not_okay) + LLVMPositionBuilderAtEnd(g_builder, not_okay) + LLVMBuildStore(g_builder, LLVMConstInt(types.t_int1, 0ul, 0), lblk.need_loop) // need loop, no + LLVMBuildBr(g_builder, okay) + LLVMPositionBuilderAtEnd(g_builder, okay) + build_table_walk_scan(state) + build_table_walk_store_var(state, svar, true) } else { var seq = LLVMBuildLoadData2Aligned(g_builder, type_to_llvm_type(ssrc._type), getE(ssrc), ssrc._type.alignOf, "") var piter = LLVMBuildExtractValue(g_builder, seq, uint(JIT_SEQUENCE.ITERATOR), "") @@ -3844,6 +3999,19 @@ class public LlvmJitVisitor : AstVisitor { var vvar = LLVMBuildLoadData2Aligned(g_builder, type_to_llvm_type(svar._type), getV(svar), svar._type.alignOf, "") var vadd = LLVMBuildAdd(g_builder, vvar, getE(ccount.arguments[1]), "") LLVMBuildStore(g_builder, vadd, getV(svar)) + } elif (is_table_keys_values(ssrc)) { + // table keys/values ->next(): advance the cursor, scan to the next live slot + var state = tableWalk[ssrc] + var s = LLVMBuildLoad2(g_builder, types.t_int64, state.slot, "twalk.ns") + var s1 = LLVMBuildAdd(g_builder, s, LLVMConstInt(types.t_int64, 1ul, 0), "twalk.ns1") + LLVMBuildStore(g_builder, s1, state.slot) + build_table_walk_scan(state) + var s2 = LLVMBuildLoad2(g_builder, types.t_int64, state.slot, "twalk.ns2") + var done = LLVMBuildICmp(g_builder, LLVMIntPredicate.LLVMIntEQ, s2, state.cap, "twalk.done") + var nextOk = append_basic_block("for_{svar.name}_next_ok") + LLVMBuildCondBr(g_builder, done, lblk.loop_end, nextOk) + LLVMPositionBuilderAtEnd(g_builder, nextOk) + build_table_walk_store_var(state, svar, false) } else { var rcond = build_iter_next(ssrc.at, getE(ssrc), getV(svar)) var nextOk = append_basic_block("for_{svar.name}_next_ok") @@ -3896,6 +4064,22 @@ class public LlvmJitVisitor : AstVisitor { if (is_count_or_ucount(ssrc)) { // we do nothing for count pass + } elif (is_table_keys_values(ssrc)) { + // table keys/values ->close(): the realloc check mirrors the C++ iterator + // (catches mutation on shared/hopeless tables that bypass the lock), then unlock + let state & = unsafe(tableWalk[ssrc]) + var hdr2 = load_table_header(state.tabPtr) + var base2 = LLVMBuildExtractValue(g_builder, hdr2, + uint(state.isKeys ? JIT_TABLE.KEYS : JIT_TABLE.DATA), "twalk.base2") + var moved = LLVMBuildICmp(g_builder, LLVMIntPredicate.LLVMIntNE, base2, state.origin, "twalk.moved") + var bb_bad = append_basic_block("twalk_close_moved") + var bb_ok = append_basic_block("twalk_close_ok") + LLVMBuildCondBr(g_builder, moved, bb_bad, bb_ok) + LLVMPositionBuilderAtEnd(g_builder, bb_bad) + build_exception("table was modified during iteration", ssrc.at) + LLVMBuildBr(g_builder, bb_ok) + LLVMPositionBuilderAtEnd(g_builder, bb_ok) + build_table_unlock_call(state.tabPtr, ssrc.at) } else { build_iter_close(getE(ssrc), getV(svar)) } diff --git a/modules/dasLLVM/daslib/llvm_jit_common.das b/modules/dasLLVM/daslib/llvm_jit_common.das index e6913b1b2b..6088057b74 100644 --- a/modules/dasLLVM/daslib/llvm_jit_common.das +++ b/modules/dasLLVM/daslib/llvm_jit_common.das @@ -58,6 +58,8 @@ let public FN_JIT_FREE_HEAP = "jit_free_heap" let public FN_JIT_FREE_PERSISTENT = "jit_free_persistent" let public FN_JIT_ARRAY_LOCK = "jit_array_lock" let public FN_JIT_ARRAY_UNLOCK = "jit_array_unlock" +let public FN_JIT_TABLE_LOCK = "jit_table_lock" +let public FN_JIT_TABLE_UNLOCK = "jit_table_unlock" let public FN_JIT_STR_CMP = "jit_str_cmp" let public FN_JIT_STR_CAT = "jit_str_cat" let public FN_JIT_PROLOGUE = "jit_prologue" @@ -612,6 +614,21 @@ def public init_jit(cg_opt_level : uint; target_triple : string = "") { LLVMAddAttributesToFunction(jit_array_unlock, fixed_array(nounwind, willreturn)) LLVMAddAttributeToFunctionArgumentRange(jit_array_unlock, urange(0, 2), nocapture) + // void jit_table_lock ( Table & tab, Context * context, LineInfoArg * at ) + var jit_table_lock = LLVMAddFunctionWithType(g_mod, FN_JIT_TABLE_LOCK, + LLVMFunctionType(g_prim_t.t_void, + fixed_array(g_prim_t.LLVMVoidPtrType(), g_prim_t.LLVMVoidPtrType(), g_prim_t.LLVMVoidPtrType()))) + LLVMAddGlobalMapping(g_engine, jit_table_lock, get_jit_table_lock()) + LLVMAddAttributesToFunction(jit_table_lock, fixed_array(nounwind, willreturn)) + LLVMAddAttributeToFunctionArgumentRange(jit_table_lock, urange(0, 2), nocapture) + // void jit_table_unlock ( Table & tab, Context * context, LineInfoArg * at ) + var jit_table_unlock = LLVMAddFunctionWithType(g_mod, FN_JIT_TABLE_UNLOCK, + LLVMFunctionType(g_prim_t.t_void, + fixed_array(g_prim_t.LLVMVoidPtrType(), g_prim_t.LLVMVoidPtrType(), g_prim_t.LLVMVoidPtrType()))) + LLVMAddGlobalMapping(g_engine, jit_table_unlock, get_jit_table_unlock()) + LLVMAddAttributesToFunction(jit_table_unlock, fixed_array(nounwind, willreturn)) + LLVMAddAttributeToFunctionArgumentRange(jit_table_unlock, urange(0, 2), nocapture) + // int jit_str_cmp ( char * a, char * b ) var jit_str_cmp = LLVMAddFunctionWithType(g_mod, FN_JIT_STR_CMP, LLVMFunctionType(g_prim_t.t_int32, diff --git a/modules/dasLLVM/daslib/llvm_jit_run.das b/modules/dasLLVM/daslib/llvm_jit_run.das index fd589d4702..83d48a8aa2 100644 --- a/modules/dasLLVM/daslib/llvm_jit_run.das +++ b/modules/dasLLVM/daslib/llvm_jit_run.das @@ -33,7 +33,7 @@ var LINK_WHOLE_LIB = false // when true, standalone exe links against the whole // invalidates cached DLLs (e.g. edits to llvm_jit.das, llvm_macro.das, llvm_jit_common.das, // runtime helper ABI, default target triple). Cache filenames fold this in, so a bump // makes every previously written DLL miss the cache on the next run and get GC'd. -let LLVM_JIT_CODEGEN_VERSION : uint64 = 0x25ul +let LLVM_JIT_CODEGEN_VERSION : uint64 = 0x26ul let JIT_FNV_PRIME : uint64 = 1099511628211ul diff --git a/tests/jit_tests/table_walk.das b/tests/jit_tests/table_walk.das new file mode 100644 index 0000000000..e9639f2d22 --- /dev/null +++ b/tests/jit_tests/table_walk.das @@ -0,0 +1,147 @@ +// Inline table slot-walk (keys/values for-loop sources over workhorse-keyed tables): +// the JIT scans the open-addressed ctrl bytes natively instead of calling the C++ +// iterator per element. String keys keep the generic iterator path. +options gen2 +require dastest/testing_boost + +def kv_sums(t : table) : tuple { + var n = 0 + var ks = 0 + var vs = 0 + for (k, v in keys(t), values(t)) { + n++ + ks += k + vs += v + } + return (n = n, ks = ks, vs = vs) +} + +def keys_sum(t : table) : int { + var s = 0 + for (k in keys(t)) { + s += k + } + return s +} + +def values_bump(var t : table) { + for (v in values(t)) { + v++ + } +} + +def keys_break_at(t : table; stop : int) : int { + var n = 0 + for (_k in keys(t)) { + n++ + break if (n == stop) + } + return n +} + +def string_kv_sum(t : table) : int { + var s = 0 + for (_k, v in keys(t), values(t)) { + s += v + } + return s +} + +def keys64_sum(t : table) : int64 { + var s = 0l + for (k in keys(t)) { + s += k + } + return s +} + +def insert_during_iteration(var t : table) : bool { + var caught = false + try { + for (k in keys(t)) { + t[100 + k] = 1 + } + } recover { + caught = true + } + return caught +} + +[test] +def test_table_walk(t : T?) { + t |> run("kv zip with tombstones") @(t : T?) { + t |> success(!jit_enabled() || is_jit_function(@@kv_sums)) + var tab : table + for (i in range(10)) { + tab[i] = i * 10 + } + tab |> erase(3) + tab |> erase(7) + let r = kv_sums(tab) + t |> equal(r.n, 8) + t |> equal(r.ks, 35) + t |> equal(r.vs, 350) + delete tab + } + t |> run("keys-only walk") @(t : T?) { + t |> success(!jit_enabled() || is_jit_function(@@keys_sum)) + var tab : table + for (i in range(5)) { + tab[i] = i + } + t |> equal(keys_sum(tab), 10) + var e : table + t |> equal(keys_sum(e), 0) + delete tab + delete e + } + t |> run("values walk mutates by ref") @(t : T?) { + t |> success(!jit_enabled() || is_jit_function(@@values_bump)) + var tab : table + for (i in range(4)) { + tab[i] = i * 10 + } + values_bump(tab) + let r = kv_sums(tab) + t |> equal(r.vs, 60 + 4) + delete tab + } + t |> run("break exits the walk and unlocks") @(t : T?) { + t |> success(!jit_enabled() || is_jit_function(@@keys_break_at)) + var tab : table + for (i in range(8)) { + tab[i] = i + } + t |> equal(keys_break_at(tab, 3), 3) + tab[100] = 1 // table must be unlocked after the break + t |> equal(length(tab), 9) + delete tab + } + t |> run("string keys keep the generic path") @(t : T?) { + t |> success(!jit_enabled() || is_jit_function(@@string_kv_sum)) + var tab : table + tab["a"] = 1 + tab["b"] = 2 + tab["c"] = 4 + t |> equal(string_kv_sum(tab), 7) + delete tab + } + t |> run("int64 keys ride the walk") @(t : T?) { + t |> success(!jit_enabled() || is_jit_function(@@keys64_sum)) + var tab : table + tab[10l] = 1 + tab[20l] = 2 + tab[30l] = 3 + t |> equal(keys64_sum(tab), 60l) + delete tab + } + t |> run("insert during locked iteration panics") @(t : T?) { + // no delete: the panic leaves the table locked by design (panic is not resumable); + // the context teardown reclaims it — same shape as array.das's locked-resize test + var tab : table + for (i in range(5)) { + tab[i] = i + } + t |> success(insert_during_iteration(tab)) + } +} From c9dd28bab2dd5ef600e67240349c9f7c494965d8 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Thu, 11 Jun 2026 10:59:49 -0700 Subject: [PATCH 3/3] bench: re-sweep benchmarks/sql after the JIT table walk results.md regenerated (2026-06-11, INTERP+JIT matrices). INTERP flat within noise; JIT m7 halves on walk-dominated families (chained_where 17.8->10.4, count_aggregate 13.5->7.3, join_probe 24.2->16.7, last_match 22.8->12.1, point_lookup_scan 6.0->3.0, select_where 28.2->17.9 ns/op). groupby stays ~44 (tier-2 group cascade dominates, source walk was never the bottleneck there). One prose line added to the m7 bullet: the JIT column is now fused codegen end to end. Co-Authored-By: Claude Fable 5 --- benchmarks/sql/results.md | 300 +++++++++++++++++++------------------- 1 file changed, 151 insertions(+), 149 deletions(-) diff --git a/benchmarks/sql/results.md b/benchmarks/sql/results.md index bb13ccbc7b..6ddc837b68 100644 --- a/benchmarks/sql/results.md +++ b/benchmarks/sql/results.md @@ -21,7 +21,9 @@ are stable now). joined on its bare key probes the table instead of building the join hash — the `join_probe` / `join_probe_build` pair measures it; a trailing `to_table()` inserts straight into the result table with no intermediate array — the `to_table` / `to_table_staged` pair measures it; - group_by / reverse defer to tier-2). + group_by / reverse defer to tier-2). Under JIT, `keys`/`values` for-loop sources compile to an + inline open-addressed slot walk (no per-element C++ iterator calls), so the m7 JIT column is + fused codegen end to end. `0.00` = early-exit terminator below timer resolution ("free"). Chain shapes are in `benchmarks/README.md`; the splice arms each fires are in `doc/source/reference/linq_fold_patterns.rst`. @@ -36,175 +38,175 @@ signal, JIT deltas as indicative.** | Benchmark | SQL (m1) | Array (m3f) | Decs (m4) | XML fold (m5f) | JSON fold (m6f) | Table fold (m7) | |---|---:|---:|---:|---:|---:|---:| -| `aggregate_match` | 35.0 | 5.9 | 5.9 | 60.5 | 159.7 | 19.0 | -| `all_match` | 27.7 | 3.5 | 3.4 | 56.1 | 153.8 | 15.8 | +| `aggregate_match` | 34.9 | 5.9 | 5.9 | 60.5 | 158.9 | 19.8 | +| `all_match` | 27.8 | 3.5 | 3.5 | 56.5 | 156.6 | 15.8 | | `any_match` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -| `average_aggregate` | 30.1 | 6.0 | 8.8 | 60.1 | 163.7 | 17.2 | -| `bare_last` | — | 4.2 | 0.0 | 0.0 | 4.2 | 30.1 | -| `bare_order_where` | 278.1 | 117.1 | 126.5 | 302.8 | 288.8 | 163.0 | -| `chained_select_collapse` | — | 17.9 | 17.6 | 70.7 | 172.6 | 27.9 | -| `chained_where` | 36.9 | 6.6 | 7.1 | 105.4 | 183.4 | 23.8 | -| `contains_match` | 0.0 | 2.2 | 1.4 | 29.0 | 72.4 | 6.5 | -| `count_aggregate` | 29.7 | 4.2 | 4.1 | 63.6 | 154.3 | 20.1 | -| `cross_join` | 12597.0 | 3721.0 | — | 4040.3 | 4113.3 | — | -| `decs_count_bare_pred` | — | — | 4.2 | — | — | — | -| `distinct_by_count` | 41.6 | 16.4 | 15.8 | 70.8 | 162.9 | 26.9 | -| `distinct_by_order_take` | 241.1 | 22.1 | 23.7 | 124.2 | 162.4 | 49.2 | -| `distinct_by_order_to_array` | 241.0 | 22.2 | 23.8 | 125.0 | 163.2 | 48.9 | -| `distinct_count` | 41.8 | 15.7 | 15.9 | 70.7 | 162.9 | 27.1 | -| `distinct_count_pred` | 253.4 | 15.9 | 15.9 | 112.7 | 179.4 | 26.7 | +| `average_aggregate` | 30.2 | 6.1 | 8.7 | 58.8 | 157.2 | 17.2 | +| `bare_last` | — | 4.2 | 0.0 | 0.0 | 4.1 | 30.2 | +| `bare_order_where` | 279.9 | 117.8 | 125.7 | 302.3 | 292.5 | 163.9 | +| `chained_select_collapse` | — | 17.6 | 17.4 | 70.6 | 154.1 | 28.4 | +| `chained_where` | 36.5 | 6.6 | 7.1 | 105.8 | 177.6 | 23.9 | +| `contains_match` | 0.0 | 2.2 | 1.4 | 27.8 | 70.3 | 6.5 | +| `count_aggregate` | 29.7 | 4.2 | 4.1 | 64.1 | 158.1 | 20.3 | +| `cross_join` | 12594.5 | 3704.9 | — | 4030.6 | 4063.0 | — | +| `decs_count_bare_pred` | — | — | 4.1 | — | — | — | +| `distinct_by_count` | 41.6 | 15.8 | 15.7 | 70.7 | 156.5 | 27.3 | +| `distinct_by_order_take` | 240.5 | 22.2 | 23.4 | 124.6 | 159.8 | 49.5 | +| `distinct_by_order_to_array` | 240.7 | 22.1 | 23.4 | 125.1 | 163.6 | 49.2 | +| `distinct_count` | 41.2 | 15.6 | 15.6 | 70.6 | 161.2 | 27.5 | +| `distinct_count_pred` | 253.4 | 15.9 | 15.9 | 112.6 | 173.8 | 27.4 | | `distinct_take` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | | `element_at_match` | 0.0 | 0.0 | 0.0 | 0.4 | 0.3 | 0.0 | | `first_match` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | | `first_or_default_match` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -| `groupby_average` | 173.6 | 29.2 | 29.3 | 123.6 | 195.4 | 198.4 | -| `groupby_count` | 144.5 | 19.2 | 19.2 | 75.0 | 168.4 | 164.3 | -| `groupby_first` | 253.9 | 19.1 | 19.8 | 72.7 | 163.4 | 164.1 | -| `groupby_having_count` | 142.6 | 19.2 | 19.2 | 75.4 | 168.9 | 186.7 | -| `groupby_having_hidden_sum` | 176.8 | 22.2 | 22.9 | 118.6 | 192.0 | 216.5 | -| `groupby_having_post_where` | 172.4 | 20.5 | 20.5 | 114.6 | 188.7 | 194.8 | -| `groupby_max` | 175.4 | 24.9 | 25.2 | 120.0 | 192.4 | 202.4 | -| `groupby_min` | 175.2 | 24.9 | 25.3 | 120.7 | 193.2 | 204.5 | -| `groupby_multi_reducer` | 192.0 | 30.8 | 30.2 | 125.6 | 196.8 | 232.3 | -| `groupby_select_order` | 172.8 | 20.5 | 20.5 | 115.2 | 188.1 | 195.3 | -| `groupby_select_sum` | 199.8 | 38.7 | 38.7 | 102.3 | 193.5 | 191.2 | -| `groupby_sum` | 176.8 | 20.5 | 20.5 | 114.9 | 188.1 | 194.5 | -| `groupby_where_count` | 76.2 | 13.8 | 14.5 | 116.0 | 185.7 | 165.2 | -| `groupby_where_sum` | 87.7 | 14.1 | 14.9 | 116.5 | 187.3 | 180.6 | -| `join_count` | 38.0 | 51.3 | 64.7 | 113.3 | 183.5 | 66.0 | -| `join_groupby_count` | 160.1 | 76.7 | 89.9 | 178.6 | 230.9 | 259.6 | -| `join_groupby_to_array` | 194.1 | 78.4 | 91.4 | 216.3 | 212.7 | 290.0 | +| `groupby_average` | 171.3 | 29.1 | 29.4 | 124.3 | 187.5 | 197.3 | +| `groupby_count` | 143.5 | 19.1 | 19.1 | 74.8 | 159.1 | 163.1 | +| `groupby_first` | 251.7 | 19.0 | 19.8 | 72.4 | 155.1 | 163.1 | +| `groupby_having_count` | 142.2 | 19.1 | 19.1 | 75.2 | 160.2 | 185.4 | +| `groupby_having_hidden_sum` | 175.9 | 22.2 | 22.3 | 119.0 | 183.5 | 215.6 | +| `groupby_having_post_where` | 172.8 | 20.4 | 20.5 | 115.3 | 181.3 | 194.2 | +| `groupby_max` | 173.1 | 24.9 | 24.8 | 120.5 | 184.1 | 201.8 | +| `groupby_min` | 173.6 | 25.6 | 25.2 | 120.7 | 184.3 | 204.0 | +| `groupby_multi_reducer` | 190.4 | 30.4 | 30.3 | 126.3 | 188.2 | 231.5 | +| `groupby_select_order` | 172.9 | 20.5 | 20.4 | 118.6 | 179.8 | 194.6 | +| `groupby_select_sum` | 198.2 | 38.5 | 38.8 | 102.1 | 185.0 | 188.0 | +| `groupby_sum` | 171.1 | 20.4 | 20.4 | 115.0 | 179.5 | 194.2 | +| `groupby_where_count` | 76.8 | 13.9 | 14.5 | 115.3 | 188.6 | 164.2 | +| `groupby_where_sum` | 87.4 | 14.2 | 14.8 | 116.7 | 187.1 | 179.4 | +| `join_count` | 38.6 | 51.8 | 63.9 | 112.3 | 185.6 | 64.2 | +| `join_groupby_count` | 158.4 | 76.9 | 88.5 | 178.1 | 225.8 | 259.3 | +| `join_groupby_to_array` | 190.4 | 78.3 | 90.6 | 215.6 | 212.2 | 290.2 | | `join_probe` | — | — | — | — | — | 46.6 | -| `join_probe_build` | — | — | — | — | — | 79.9 | -| `join_select` | 150.3 | 72.7 | 86.0 | 190.7 | 215.4 | 222.9 | -| `join_where_count` | 39.1 | 61.6 | 79.4 | 161.2 | 198.1 | 80.1 | -| `last_match` | 0.0 | 5.9 | 13.9 | 64.8 | 159.6 | 31.0 | -| `long_count_aggregate` | 29.5 | 4.1 | 4.1 | 63.2 | 155.1 | 20.2 | -| `max_aggregate` | 30.8 | 6.2 | 7.0 | 58.6 | 163.2 | 17.4 | -| `min_aggregate` | 31.1 | 6.2 | 6.8 | 58.6 | 163.5 | 17.3 | -| `order_by_multi_key` | 336.9 | 274.1 | 281.9 | 458.6 | 445.5 | 335.6 | -| `order_distinct_take` | 140.6 | 15.9 | 99.4 | 72.3 | 163.8 | 31.6 | -| `order_reverse_normalized` | 38.6 | 16.3 | 20.0 | 70.1 | 170.7 | 33.1 | -| `order_take_desc` | 38.3 | 16.5 | 20.6 | 70.1 | 170.9 | 33.1 | +| `join_probe_build` | — | — | — | — | — | 79.5 | +| `join_select` | 150.9 | 73.5 | 84.6 | 187.9 | 207.1 | 223.5 | +| `join_where_count` | 39.8 | 61.9 | 75.8 | 161.2 | 192.9 | 79.8 | +| `last_match` | 0.0 | 5.8 | 13.9 | 65.3 | 157.9 | 30.9 | +| `long_count_aggregate` | 29.7 | 4.2 | 4.1 | 63.7 | 158.0 | 20.1 | +| `max_aggregate` | 31.0 | 6.1 | 6.8 | 58.8 | 157.6 | 17.0 | +| `min_aggregate` | 30.9 | 6.1 | 6.8 | 59.0 | 159.3 | 17.0 | +| `order_by_multi_key` | 338.1 | 274.4 | 282.8 | 459.3 | 445.2 | 341.9 | +| `order_distinct_take` | 138.5 | 15.7 | 98.9 | 72.8 | 155.0 | 31.7 | +| `order_reverse_normalized` | 38.4 | 16.2 | 19.9 | 70.5 | 162.1 | 33.1 | +| `order_take_desc` | 38.3 | 16.4 | 19.9 | 70.6 | 162.5 | 33.0 | | `point_lookup` | — | — | — | — | — | 0.0 | -| `point_lookup_scan` | — | — | — | — | — | 8.4 | -| `reverse_distinct_by` | 308.2 | 21.2 | 27.9 | 70.8 | 163.1 | 44.6 | -| `reverse_take` | 0.1 | 0.0 | 0.2 | 0.0 | 26.4 | 58.9 | -| `reverse_take_select` | 0.0 | 0.0 | 0.2 | 0.0 | 26.3 | 58.6 | -| `select_count` | 0.1 | 0.0 | 2.2 | 68.5 | 2.2 | 0.0 | -| `select_many` | — | 192.1 | — | — | — | — | -| `select_where` | 197.4 | 11.2 | 19.4 | 196.4 | 183.1 | 37.8 | -| `select_where_count` | 32.6 | 5.1 | 7.4 | 64.4 | 157.5 | 22.8 | -| `select_where_order_take` | 36.6 | 12.5 | 15.1 | 72.3 | 164.9 | 35.1 | -| `select_where_sum` | 37.1 | 7.4 | 7.5 | 66.3 | 162.5 | 23.6 | -| `single_match` | 0.0 | 2.8 | 5.4 | 58.0 | 151.0 | 22.8 | -| `skip_take` | 0.5 | 0.1 | 0.2 | 3.0 | 2.8 | 0.3 | -| `skip_while_match` | 3.4 | 5.3 | 5.3 | 59.9 | 153.2 | 18.3 | -| `sort_first` | 37.9 | 11.1 | 13.4 | 65.2 | 166.1 | 32.2 | -| `sort_take` | 38.3 | 16.3 | 20.4 | 70.3 | 171.0 | 33.1 | -| `sort_take_select` | 38.3 | 16.3 | 20.2 | 70.7 | 170.5 | 33.2 | -| `sum_aggregate` | 30.2 | 2.1 | 2.1 | 53.9 | 153.3 | 13.5 | -| `sum_where` | 32.8 | 4.2 | 4.3 | 63.4 | 154.2 | 20.5 | -| `take_count` | 3.6 | 0.2 | 0.4 | 2.9 | 2.7 | 0.5 | -| `take_count_filtered` | 1.1 | 0.2 | 0.2 | 1.3 | 1.1 | 0.3 | +| `point_lookup_scan` | — | — | — | — | — | 8.3 | +| `reverse_distinct_by` | 296.9 | 21.1 | 27.7 | 71.7 | 154.5 | 44.4 | +| `reverse_take` | 0.1 | 0.0 | 0.2 | 0.0 | 26.2 | 58.5 | +| `reverse_take_select` | 0.0 | 0.0 | 0.2 | 0.0 | 26.2 | 58.7 | +| `select_count` | 0.1 | 0.0 | 2.2 | 63.4 | 2.2 | 0.0 | +| `select_many` | — | 189.8 | — | — | — | — | +| `select_where` | 199.2 | 11.2 | 19.2 | 197.4 | 186.5 | 37.8 | +| `select_where_count` | 33.0 | 5.2 | 7.5 | 65.2 | 150.0 | 23.2 | +| `select_where_order_take` | 37.0 | 12.2 | 14.9 | 72.5 | 163.1 | 34.7 | +| `select_where_sum` | 37.1 | 7.5 | 7.5 | 66.2 | 158.2 | 24.2 | +| `single_match` | 0.0 | 2.9 | 5.4 | 56.2 | 148.2 | 22.8 | +| `skip_take` | 0.5 | 0.1 | 0.2 | 3.1 | 2.8 | 0.3 | +| `skip_while_match` | 3.5 | 5.3 | 5.3 | 57.9 | 150.2 | 18.2 | +| `sort_first` | 38.4 | 11.0 | 13.4 | 65.7 | 162.3 | 31.7 | +| `sort_take` | 38.6 | 16.1 | 20.3 | 70.7 | 163.1 | 33.2 | +| `sort_take_select` | 38.6 | 16.4 | 20.2 | 70.9 | 161.9 | 33.2 | +| `sum_aggregate` | 30.0 | 2.1 | 2.1 | 54.8 | 156.9 | 13.5 | +| `sum_where` | 33.0 | 4.3 | 4.3 | 63.7 | 157.9 | 20.7 | +| `take_count` | 3.7 | 0.2 | 0.4 | 2.9 | 2.7 | 0.5 | +| `take_count_filtered` | 1.1 | 0.2 | 0.2 | 1.4 | 1.1 | 0.3 | | `take_sum_aggregate` | 0.8 | 0.1 | 0.1 | 0.6 | 0.5 | 0.1 | | `take_where_count` | 0.9 | 0.1 | 0.1 | 0.7 | 0.6 | 0.2 | -| `take_while_match` | 7.8 | 2.4 | 2.4 | 30.1 | 75.7 | 16.4 | -| `to_array_filter` | 70.3 | 11.8 | 11.8 | 70.9 | 163.7 | 29.0 | -| `to_table` | — | 18.7 | 144.0 | 118.2 | 144.3 | 32.2 | -| `to_table_staged` | — | 54.8 | 56.8 | 144.8 | 166.8 | 69.0 | -| `where_join_count` | 41.2 | 29.1 | 41.8 | 131.7 | 167.5 | 46.8 | -| `zip_count_pred` | 39.4 | 15.9 | — | 317.3 | 319.1 | — | -| `zip_dot_product` | 46.6 | 12.7 | 10.6 | 314.0 | 316.5 | — | -| `zip_dot_product_3arg` | 46.8 | 12.8 | — | 313.0 | 316.7 | — | -| `zip_reverse_to_array` | — | 31.7 | — | 349.3 | 351.4 | — | +| `take_while_match` | 7.8 | 2.4 | 2.5 | 29.0 | 72.6 | 16.9 | +| `to_array_filter` | 70.7 | 11.7 | 11.8 | 71.7 | 163.7 | 28.9 | +| `to_table` | — | 18.6 | 141.9 | 118.5 | 140.3 | 32.1 | +| `to_table_staged` | — | 54.7 | 56.6 | 143.3 | 165.1 | 68.5 | +| `where_join_count` | 41.6 | 29.4 | 41.0 | 132.1 | 171.8 | 46.7 | +| `zip_count_pred` | 39.4 | 15.8 | — | 316.5 | 317.6 | — | +| `zip_dot_product` | 49.8 | 12.6 | 10.6 | 312.4 | 313.7 | — | +| `zip_dot_product_3arg` | 50.2 | 12.7 | — | 312.5 | 313.9 | — | +| `zip_reverse_to_array` | — | 32.1 | — | 347.2 | 351.8 | — | ## JIT | Benchmark | SQL (m1) | Array (m3f) | Decs (m4) | XML fold (m5f) | JSON fold (m6f) | Table fold (m7) | |---|---:|---:|---:|---:|---:|---:| -| `aggregate_match` | 35.0 | 0.3 | 0.7 | 29.8 | 27.2 | 13.5 | -| `all_match` | 27.9 | 0.3 | 0.2 | 18.8 | 26.2 | 13.5 | +| `aggregate_match` | 35.0 | 0.3 | 0.7 | 29.7 | 27.3 | 7.3 | +| `all_match` | 27.7 | 0.3 | 0.2 | 18.8 | 25.3 | 7.2 | | `any_match` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -| `average_aggregate` | 30.2 | 1.0 | 3.5 | 18.8 | 25.7 | 13.5 | -| `bare_last` | — | 0.4 | 0.0 | 0.0 | 0.0 | 17.2 | -| `bare_order_where` | 185.1 | 34.2 | 35.0 | 105.5 | 53.0 | 78.8 | -| `chained_select_collapse` | — | 1.1 | 1.1 | 20.6 | 33.9 | 14.0 | -| `chained_where` | 36.9 | 0.6 | 0.8 | 36.6 | 32.1 | 17.8 | -| `contains_match` | 0.0 | 0.2 | 0.1 | 17.5 | 9.4 | 4.7 | -| `count_aggregate` | 29.5 | 0.3 | 0.6 | 29.5 | 26.4 | 13.5 | -| `cross_join` | 5991.6 | 734.4 | — | 834.6 | 771.2 | — | +| `average_aggregate` | 30.3 | 1.0 | 3.6 | 18.8 | 24.4 | 7.4 | +| `bare_last` | — | 0.4 | 0.0 | 0.0 | 0.0 | 8.9 | +| `bare_order_where` | 185.9 | 33.8 | 35.0 | 106.0 | 51.7 | 68.2 | +| `chained_select_collapse` | — | 1.1 | 1.1 | 20.6 | 32.1 | 8.1 | +| `chained_where` | 36.7 | 0.6 | 0.9 | 36.4 | 31.8 | 10.4 | +| `contains_match` | 0.0 | 0.2 | 0.1 | 16.8 | 9.2 | 2.5 | +| `count_aggregate` | 29.4 | 0.3 | 0.6 | 29.5 | 25.1 | 7.3 | +| `cross_join` | 5962.9 | 719.2 | — | 833.9 | 771.0 | — | | `decs_count_bare_pred` | — | — | 0.6 | — | — | — | -| `distinct_by_count` | 42.1 | 1.1 | 1.1 | 20.6 | 33.9 | 14.1 | -| `distinct_by_order_take` | 249.6 | 1.7 | 2.6 | 45.2 | 39.0 | 30.3 | -| `distinct_by_order_to_array` | 252.5 | 1.7 | 2.7 | 45.5 | 38.9 | 30.2 | -| `distinct_count` | 41.7 | 1.1 | 1.1 | 20.6 | 33.7 | 14.1 | -| `distinct_count_pred` | 265.8 | 1.1 | 1.3 | 37.8 | 43.6 | 14.0 | +| `distinct_by_count` | 41.6 | 1.1 | 1.1 | 20.6 | 31.9 | 8.0 | +| `distinct_by_order_take` | 238.9 | 1.7 | 2.6 | 45.3 | 37.2 | 19.6 | +| `distinct_by_order_to_array` | 239.5 | 1.7 | 2.7 | 45.5 | 37.0 | 19.5 | +| `distinct_count` | 41.4 | 1.1 | 1.1 | 20.7 | 33.1 | 8.0 | +| `distinct_count_pred` | 252.1 | 1.1 | 1.3 | 37.7 | 43.6 | 8.0 | | `distinct_take` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | | `element_at_match` | 0.0 | 0.0 | 0.0 | 0.1 | 0.0 | 0.0 | | `first_match` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | | `first_or_default_match` | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -| `groupby_average` | 177.2 | 1.6 | 1.9 | 37.2 | 45.6 | 51.9 | -| `groupby_count` | 145.8 | 1.3 | 1.5 | 20.6 | 34.1 | 43.9 | -| `groupby_first` | 265.0 | 1.3 | 2.3 | 20.7 | 34.6 | 43.7 | -| `groupby_having_count` | 144.6 | 1.3 | 1.5 | 20.7 | 34.1 | 46.7 | -| `groupby_having_hidden_sum` | 180.4 | 1.5 | 1.7 | 37.0 | 45.4 | 55.0 | -| `groupby_having_post_where` | 177.4 | 1.4 | 2.0 | 37.0 | 44.2 | 51.4 | -| `groupby_max` | 179.1 | 1.5 | 1.9 | 37.1 | 46.0 | 52.0 | -| `groupby_min` | 179.2 | 1.5 | 1.8 | 37.0 | 46.1 | 52.4 | -| `groupby_multi_reducer` | 195.2 | 1.6 | 2.0 | 37.1 | 45.9 | 61.3 | -| `groupby_select_order` | 176.3 | 1.4 | 1.9 | 37.0 | 44.4 | 51.4 | -| `groupby_select_sum` | 205.9 | 2.8 | 3.2 | 33.2 | 39.7 | 73.0 | -| `groupby_sum` | 175.9 | 1.4 | 1.6 | 37.0 | 44.5 | 51.9 | -| `groupby_where_count` | 76.5 | 0.9 | 1.3 | 37.2 | 41.9 | 52.2 | -| `groupby_where_sum` | 87.7 | 0.9 | 1.3 | 36.9 | 42.0 | 56.1 | -| `join_count` | 38.7 | 11.0 | 11.7 | 40.9 | 71.4 | 31.8 | -| `join_groupby_count` | 160.1 | 17.4 | 19.7 | 66.4 | 90.1 | 72.9 | -| `join_groupby_to_array` | 194.1 | 17.9 | 19.8 | 78.4 | 36.1 | 81.1 | -| `join_probe` | — | — | — | — | — | 24.2 | -| `join_probe_build` | — | — | — | — | — | 39.8 | -| `join_select` | 94.0 | 19.7 | 21.8 | 72.2 | 94.4 | 70.1 | -| `join_where_count` | 39.6 | 19.3 | 20.6 | 63.2 | 78.2 | 38.0 | -| `last_match` | 0.0 | 0.5 | 1.4 | 19.6 | 26.9 | 22.8 | -| `long_count_aggregate` | 29.8 | 0.3 | 0.6 | 29.4 | 26.5 | 13.8 | -| `max_aggregate` | 31.0 | 0.3 | 0.5 | 29.8 | 27.9 | 13.5 | -| `min_aggregate` | 31.2 | 0.3 | 0.5 | 29.8 | 27.7 | 13.5 | -| `order_by_multi_key` | 251.0 | 54.8 | 54.8 | 124.4 | 71.8 | 129.5 | -| `order_distinct_take` | 142.6 | 1.1 | 75.8 | 21.0 | 35.8 | 14.0 | -| `order_reverse_normalized` | 38.7 | 0.7 | 1.4 | 19.8 | 28.6 | 17.8 | -| `order_take_desc` | 38.6 | 0.7 | 1.3 | 19.7 | 28.4 | 17.8 | +| `groupby_average` | 171.5 | 1.5 | 1.8 | 37.2 | 44.8 | 51.8 | +| `groupby_count` | 162.3 | 1.3 | 1.5 | 20.7 | 32.2 | 43.5 | +| `groupby_first` | 251.8 | 1.3 | 2.3 | 20.7 | 34.0 | 43.4 | +| `groupby_having_count` | 142.7 | 1.3 | 1.5 | 20.6 | 33.4 | 46.2 | +| `groupby_having_hidden_sum` | 175.1 | 1.5 | 1.9 | 37.0 | 43.0 | 54.5 | +| `groupby_having_post_where` | 171.4 | 1.4 | 1.9 | 37.0 | 42.0 | 51.4 | +| `groupby_max` | 173.0 | 1.5 | 1.9 | 37.1 | 43.6 | 51.9 | +| `groupby_min` | 172.4 | 1.5 | 1.9 | 38.2 | 43.4 | 52.6 | +| `groupby_multi_reducer` | 193.2 | 1.6 | 1.9 | 37.2 | 43.7 | 60.8 | +| `groupby_select_order` | 170.5 | 1.4 | 1.6 | 37.9 | 41.9 | 51.8 | +| `groupby_select_sum` | 196.9 | 2.8 | 3.2 | 33.5 | 37.7 | 73.3 | +| `groupby_sum` | 171.5 | 1.4 | 1.9 | 37.9 | 42.0 | 52.1 | +| `groupby_where_count` | 76.5 | 0.9 | 1.3 | 37.2 | 39.7 | 53.7 | +| `groupby_where_sum` | 87.4 | 0.9 | 1.3 | 37.1 | 39.7 | 57.7 | +| `join_count` | 38.3 | 11.2 | 12.5 | 40.8 | 68.0 | 25.2 | +| `join_groupby_count` | 157.5 | 17.2 | 19.3 | 66.4 | 86.0 | 73.1 | +| `join_groupby_to_array` | 190.7 | 17.8 | 19.7 | 78.6 | 35.8 | 81.4 | +| `join_probe` | — | — | — | — | — | 16.7 | +| `join_probe_build` | — | — | — | — | — | 33.2 | +| `join_select` | 91.8 | 19.6 | 21.7 | 73.5 | 89.8 | 70.1 | +| `join_where_count` | 39.2 | 19.2 | 20.6 | 63.3 | 77.3 | 31.7 | +| `last_match` | 0.0 | 0.5 | 1.4 | 19.6 | 25.1 | 12.1 | +| `long_count_aggregate` | 30.0 | 0.3 | 0.6 | 29.4 | 25.1 | 7.3 | +| `max_aggregate` | 31.0 | 0.3 | 0.5 | 29.7 | 26.3 | 7.5 | +| `min_aggregate` | 31.0 | 0.3 | 0.5 | 29.7 | 26.2 | 7.4 | +| `order_by_multi_key` | 242.6 | 53.3 | 54.4 | 124.6 | 70.5 | 119.3 | +| `order_distinct_take` | 138.6 | 1.1 | 75.8 | 20.9 | 34.1 | 8.1 | +| `order_reverse_normalized` | 38.5 | 0.7 | 1.3 | 19.8 | 27.0 | 11.1 | +| `order_take_desc` | 38.8 | 0.7 | 1.3 | 19.8 | 26.9 | 10.0 | | `point_lookup` | — | — | — | — | — | 0.0 | -| `point_lookup_scan` | — | — | — | — | — | 6.0 | -| `reverse_distinct_by` | 297.0 | 1.6 | 3.1 | 20.6 | 34.6 | 18.8 | -| `reverse_take` | 0.0 | 0.0 | 0.0 | 0.0 | 3.8 | 27.0 | -| `reverse_take_select` | 0.0 | 0.0 | 0.0 | 0.0 | 3.9 | 27.1 | -| `select_count` | 0.1 | 0.0 | 0.0 | 68.1 | 0.0 | 0.0 | -| `select_many` | — | 62.7 | — | — | — | — | -| `select_where` | 108.3 | 4.1 | 5.3 | 75.4 | 23.1 | 28.2 | -| `select_where_count` | 32.9 | 0.3 | 0.6 | 29.9 | 27.2 | 13.5 | -| `select_where_order_take` | 37.0 | 0.7 | 1.4 | 19.8 | 27.9 | 23.3 | -| `select_where_sum` | 37.4 | 0.4 | 0.6 | 20.4 | 26.2 | 13.4 | -| `single_match` | 0.0 | 0.4 | 1.1 | 46.1 | 23.2 | 17.4 | -| `skip_take` | 0.3 | 0.0 | 0.0 | 1.3 | 0.2 | 0.2 | -| `skip_while_match` | 3.5 | 0.4 | 0.4 | 46.0 | 22.2 | 13.3 | -| `sort_first` | 38.3 | 0.4 | 1.3 | 18.9 | 27.5 | 17.3 | -| `sort_take` | 38.3 | 0.7 | 1.4 | 19.7 | 28.5 | 17.8 | -| `sort_take_select` | 38.4 | 0.7 | 1.3 | 19.8 | 28.4 | 17.7 | -| `sum_aggregate` | 30.4 | 0.3 | 0.1 | 23.3 | 25.6 | 13.5 | -| `sum_where` | 33.1 | 0.3 | 0.6 | 29.5 | 27.1 | 13.5 | -| `take_count` | 1.8 | 0.1 | 0.1 | 1.2 | 0.2 | 0.3 | -| `take_count_filtered` | 1.1 | 0.0 | 0.0 | 0.5 | 0.1 | 0.2 | -| `take_sum_aggregate` | 0.8 | 0.0 | 0.0 | 0.2 | 0.0 | 0.1 | -| `take_where_count` | 0.9 | 0.0 | 0.0 | 0.3 | 0.0 | 0.1 | -| `take_while_match` | 7.8 | 0.2 | 0.3 | 17.3 | 9.3 | 13.5 | -| `to_array_filter` | 48.5 | 3.3 | 3.4 | 22.2 | 35.4 | 20.3 | -| `to_table` | — | 14.1 | 37.4 | 49.7 | 54.3 | 29.2 | -| `to_table_staged` | — | 25.8 | 26.1 | 53.5 | 64.1 | 42.1 | -| `where_join_count` | 39.6 | 5.8 | 6.8 | 47.7 | 42.1 | 26.9 | -| `zip_count_pred` | 39.2 | 0.1 | — | 112.6 | 34.2 | — | -| `zip_dot_product` | 46.9 | 0.1 | 0.1 | 112.4 | 34.1 | — | -| `zip_dot_product_3arg` | 46.9 | 0.1 | — | 112.4 | 34.1 | — | -| `zip_reverse_to_array` | — | 4.6 | — | 123.5 | 51.8 | — | +| `point_lookup_scan` | — | — | — | — | — | 3.0 | +| `reverse_distinct_by` | 296.3 | 1.6 | 3.2 | 20.6 | 32.6 | 10.9 | +| `reverse_take` | 0.0 | 0.0 | 0.0 | 0.0 | 3.8 | 19.3 | +| `reverse_take_select` | 0.0 | 0.0 | 0.0 | 0.0 | 3.8 | 19.2 | +| `select_count` | 0.1 | 0.0 | 0.0 | 67.0 | 0.0 | 0.0 | +| `select_many` | — | 61.4 | — | — | — | — | +| `select_where` | 107.8 | 4.1 | 5.3 | 76.0 | 22.2 | 17.9 | +| `select_where_count` | 32.6 | 0.3 | 0.6 | 29.5 | 25.9 | 7.4 | +| `select_where_order_take` | 36.8 | 0.7 | 1.4 | 19.8 | 26.6 | 13.0 | +| `select_where_sum` | 37.4 | 0.4 | 0.6 | 20.4 | 24.8 | 7.5 | +| `single_match` | 0.0 | 0.4 | 1.1 | 45.9 | 22.2 | 9.7 | +| `skip_take` | 0.3 | 0.0 | 0.0 | 1.3 | 0.2 | 0.1 | +| `skip_while_match` | 3.5 | 0.4 | 0.4 | 46.1 | 21.7 | 7.8 | +| `sort_first` | 38.1 | 0.4 | 1.3 | 18.8 | 26.1 | 9.3 | +| `sort_take` | 38.6 | 0.7 | 1.3 | 19.8 | 27.0 | 9.7 | +| `sort_take_select` | 38.6 | 0.7 | 1.3 | 19.8 | 26.9 | 9.6 | +| `sum_aggregate` | 30.2 | 0.3 | 0.0 | 22.5 | 24.2 | 7.3 | +| `sum_where` | 32.9 | 0.3 | 0.6 | 29.6 | 25.8 | 7.3 | +| `take_count` | 1.8 | 0.1 | 0.1 | 1.2 | 0.2 | 0.1 | +| `take_count_filtered` | 1.1 | 0.0 | 0.0 | 0.5 | 0.1 | 0.0 | +| `take_sum_aggregate` | 0.8 | 0.0 | 0.0 | 0.2 | 0.0 | 0.0 | +| `take_where_count` | 0.9 | 0.0 | 0.0 | 0.3 | 0.0 | 0.0 | +| `take_while_match` | 7.8 | 0.2 | 0.3 | 16.9 | 9.0 | 7.3 | +| `to_array_filter` | 48.4 | 3.3 | 3.3 | 22.2 | 33.7 | 13.2 | +| `to_table` | — | 14.1 | 37.1 | 49.6 | 52.2 | 20.6 | +| `to_table_staged` | — | 25.8 | 26.2 | 53.2 | 61.3 | 33.6 | +| `where_join_count` | 41.6 | 6.0 | 6.7 | 48.0 | 40.6 | 19.9 | +| `zip_count_pred` | 39.3 | 0.1 | — | 114.0 | 33.7 | — | +| `zip_dot_product` | 46.9 | 0.1 | 0.1 | 113.6 | 33.5 | — | +| `zip_dot_product_3arg` | 46.6 | 0.1 | — | 113.9 | 33.7 | — | +| `zip_reverse_to_array` | — | 4.5 | — | 125.3 | 50.7 | — | ## Missing lanes (the `—` cells)