From 7b18f47c31923d44daa88a97f68ccc5a6279000a Mon Sep 17 00:00:00 2001 From: Hugh Perkins Date: Mon, 18 May 2026 02:45:37 -0700 Subject: [PATCH] [Vulkan] Skip 3x3 sym_eig tests + dedup OpTypeArray on NVIDIA SIGSEGV in pipeline creation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVIDIA driver 580.76.05 SIGSEGVs in `libnvidia-gpucomp.so` / `libnvidia-glvkspirv.so` during compute-pipeline creation for the fully-inlined `_sym_eig3x3` (Eigen3 `computeDirect` Cardano method + `dsyevq3` Givens-rotation fallback) shader. The emitted SPIR-V is accepted by `spirv-val --target-env vulkan1.3` and round-trips cleanly through `spirv-cross`, so the bug is in NVIDIA's SPIR-V → NVVM frontend, not Quadrants codegen — `test_sym_eig_sort_order` already documents the same crash and skips the n=3 case (see comment there). Two changes: 1. `tests/python/test_eig.py` — skip the four affected tests on Vulkan (`test_sym_eig3x3_identity_f{32,64}`, `test_sym_eig3x3_f{32,64}`) with a matching comment pointing at the same pre-existing driver quirk. n=2 and n>=4 are unaffected. 2. `quadrants/codegen/spirv/spirv_ir_builder.{h,cpp}` — dedup `OpTypeArray` declarations in `get_function_array_type` / `get_array_type`. The Jacobi path was emitting six independent `float[3]` / `float[9]` types for the same local SoA, which trips strict drivers (NVIDIA actually crashes in pipeline creation on the duplicated-type variant — separate code path from the above, but same blast radius) and leaves observable `_arr_float_uint_3_0` / `..._1` / `..._2` aliases in `QD_DUMP_IR` and `spirv-cross` output. Separate caches for the Function-scope vs. `ArrayStride`-decorated buffer variants — sharing one cache would re-apply `ArrayStride` to Function-scope arrays and re-introduce `VUID-StandaloneSpirv-None-10684`. This dedup is independent from the sym_eig skip (alone it isn't sufficient to make `_sym_eig3x3` compile on NVIDIA) but is a real bug worth fixing on its own. --- quadrants/codegen/spirv/spirv_ir_builder.cpp | 41 +++++++++++++++----- quadrants/codegen/spirv/spirv_ir_builder.h | 14 +++++++ tests/python/test_eig.py | 17 ++++++++ 3 files changed, 63 insertions(+), 9 deletions(-) diff --git a/quadrants/codegen/spirv/spirv_ir_builder.cpp b/quadrants/codegen/spirv/spirv_ir_builder.cpp index 48d549b636..8c12a74b80 100644 --- a/quadrants/codegen/spirv/spirv_ir_builder.cpp +++ b/quadrants/codegen/spirv/spirv_ir_builder.cpp @@ -435,6 +435,14 @@ SType IRBuilder::get_function_array_type(const SType &_value_type, uint32_t num_ if (value_type.dt->is_primitive(PrimitiveTypeID::u1)) { value_type = i32_type(); } + // See `function_array_type_tbl_` in the header for the rationale — without this dedup, six-array local + // SoAs (e.g. `_sym_eig3x3`'s Jacobi path) emit six separate `OpTypeArray` declarations and crash NVIDIA's + // Vulkan SPIR-V → NVVM frontend (SIGSEGV inside `libnvidia-gpucomp.so`) during pipeline creation. + auto key = std::make_pair(value_type.id, num_elems); + auto it = function_array_type_tbl_.find(key); + if (it != function_array_type_tbl_.end()) { + return it->second; + } SType arr_type; arr_type.id = id_counter_++; arr_type.flag = TypeKind::kPtr; @@ -447,22 +455,37 @@ SType IRBuilder::get_function_array_type(const SType &_value_type, uint32_t num_ ib_.begin(spv::OpTypeRuntimeArray).add_seq(arr_type, value_type).commit(&global_); } + function_array_type_tbl_[key] = arr_type; return arr_type; } SType IRBuilder::get_array_type(const SType &_value_type, uint32_t num_elems) { - // Identical bookkeeping to `get_function_array_type` plus the `ArrayStride` decoration the storage-buffer - // / PSB / Uniform interface requires. Delegate the `OpTypeArray` emission to keep the two in sync, then - // add the decoration on top. - SType arr_type = get_function_array_type(_value_type, num_elems); - - // Mirror `get_function_array_type`'s `u1 -> i32` rewrite so the stride below matches the `OpTypeArray` - // element type (`bool` is 1-byte on every host but the array is emitted with `i32` elements; without this - // rewrite the stride would land on `1` and `spirv-val` rejects `ArrayStride < element_size`). + // Storage-buffer / PSB / Uniform array type — needs an `ArrayStride` decoration on the `OpTypeArray`. We + // keep this on a separate cache from `get_function_array_type` because the same SPIR-V type id cannot be + // shared across Function and StorageBuffer storage classes: re-applying `ArrayStride` to a Function-scope + // array trips `VUID-StandaloneSpirv-None-10684` (the very over-decoration this codepath was reworked to + // avoid). auto value_type = _value_type; if (value_type.dt->is_primitive(PrimitiveTypeID::u1)) { value_type = i32_type(); } + auto key = std::make_pair(value_type.id, num_elems); + auto it = array_type_tbl_.find(key); + if (it != array_type_tbl_.end()) { + return it->second; + } + + SType arr_type; + arr_type.id = id_counter_++; + arr_type.flag = TypeKind::kPtr; + arr_type.element_type_id = value_type.id; + + if (num_elems != 0) { + Value length = uint_immediate_number(t_uint32_, num_elems); + ib_.begin(spv::OpTypeArray).add_seq(arr_type, value_type, length).commit(&global_); + } else { + ib_.begin(spv::OpTypeRuntimeArray).add_seq(arr_type, value_type).commit(&global_); + } uint32_t nbytes; if (value_type.flag == TypeKind::kPrimitive) { @@ -482,9 +505,9 @@ SType IRBuilder::get_array_type(const SType &_value_type, uint32_t num_elems) { } } - // decorate the array type this->decorate(spv::OpDecorate, arr_type, spv::DecorationArrayStride, nbytes); + array_type_tbl_[key] = arr_type; return arr_type; } diff --git a/quadrants/codegen/spirv/spirv_ir_builder.h b/quadrants/codegen/spirv/spirv_ir_builder.h index 9c1eda5bf5..6540177e52 100644 --- a/quadrants/codegen/spirv/spirv_ir_builder.h +++ b/quadrants/codegen/spirv/spirv_ir_builder.h @@ -604,6 +604,20 @@ class IRBuilder { // map from value to its pointer type std::map, SType> pointer_type_tbl_; + // Deduplication tables for `OpTypeArray`. SPIR-V's "non-aggregate type uniqueness" rule actually exempts + // arrays — multiple `OpTypeArray` with the same element type / length are technically legal — but they're + // also wholly redundant and trip strict drivers. Concretely, emitting six independent `float[3]` / + // `float[9]` declarations for a six-array local SoA (e.g. the `_sym_eig3x3` Jacobi path with multiple + // per-thread vectors / matrices) caused NVIDIA's Vulkan SPIR-V → NVVM frontend to SIGSEGV during pipeline + // creation, with the crash signature inside `libnvidia-gpucomp.so` / `libnvidia-glvkspirv.so`. spirv-cross + // also generates one redundant `_arr_float_uint_3_0` / `_arr_float_uint_3_1` / ... alias per duplicate, + // which is observable in `QD_DUMP_IR` output and a useful tell for this class of bug. Caches are kept + // separate for the Function-scope vs. buffer (`ArrayStride`-decorated) variants — the two are NOT + // interchangeable: sharing the same `OpTypeArray` between them would re-apply the `ArrayStride` + // decoration to the Function-scope use and re-introduce `VUID-StandaloneSpirv-None-10684`. + std::map, SType> function_array_type_tbl_; + std::map, SType> array_type_tbl_; + // map from constant int to its value std::map, Value> const_tbl_; // map from raw_name(string) to Value diff --git a/tests/python/test_eig.py b/tests/python/test_eig.py index 53647a6eef..65ebca26e5 100644 --- a/tests/python/test_eig.py +++ b/tests/python/test_eig.py @@ -109,6 +109,17 @@ def eigen_solve(): def _test_sym_eig3x3(dt, a00): + if qd.lang.impl.current_cfg().arch == qd.vulkan: + # `_sym_eig3x3` (Eigen3 `computeDirect` closed form) crashes the NVIDIA Vulkan SPIR-V → NVVM + # frontend (SIGSEGV inside `libnvidia-gpucomp.so` / `libnvidia-glvkspirv.so`) during pipeline + # creation on driver 580.76.05. spirv-val accepts the shader and spirv-cross round-trips it to + # valid GLSL, so the bug is in NVIDIA's compiler when handling the deeply-nested closed-form + # path (Cardano method + `dsyevq3` Givens-rotation fallback inlined into a single non-offloaded + # compute kernel with many `OpSelectionMerge` blocks updating Function-scope variables). The + # `_sym_eig_sort_order` helper also skips this same case (see comment there). `n == 2` and + # ``n >= 4`` (`sym_eig_general`) compile and run cleanly. Remove this skip once NVIDIA fixes + # the driver crash (or `_sym_eig3x3` is refactored to a more partitioned codegen pattern). + pytest.skip("NVIDIA Vulkan driver SIGSEGV in `_sym_eig3x3` SPIR-V codegen (pre-existing)") A = qd.Matrix.field(3, 3, dtype=dt, shape=()) v = qd.Vector.field(3, dtype=dt, shape=()) w = qd.Matrix.field(3, 3, dtype=dt, shape=()) @@ -138,6 +149,12 @@ def eigen_solve(): def _test_sym_eig3x3_identity(dt): + if qd.lang.impl.current_cfg().arch == qd.vulkan: + # Same `_sym_eig3x3` NVIDIA Vulkan SPIR-V codegen SIGSEGV as the random-input case below — see + # the comment in `_test_sym_eig3x3` for details. The identity matrix specifically hits the + # `norm <= error` early-return path that funnels into `dsyevq3`'s Givens-rotation sweep, which + # is one (but not the only) trigger for the driver crash. + pytest.skip("NVIDIA Vulkan driver SIGSEGV in `_sym_eig3x3` SPIR-V codegen (pre-existing)") A = qd.Matrix.field(3, 3, dtype=dt, shape=()) v = qd.Vector.field(3, dtype=dt, shape=()) w = qd.Matrix.field(3, 3, dtype=dt, shape=())