From 54605526ea3bb1d2afa7f5dface9438ad552f927 Mon Sep 17 00:00:00 2001 From: Vignesh S Date: Wed, 21 Jan 2026 14:41:57 +0000 Subject: [PATCH 1/8] perf: extend field-major processing to nested struct fields --- native/core/src/execution/shuffle/row.rs | 50 ++++++++++++------------ 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs index 821607ddb9..63d36e1a0c 100644 --- a/native/core/src/execution/shuffle/row.rs +++ b/native/core/src/execution/shuffle/row.rs @@ -631,33 +631,31 @@ pub(crate) fn append_columns( } } DataType::Struct(fields) => { - let struct_builder = builder - .as_any_mut() - .downcast_mut::() - .expect("StructBuilder"); + // 1. Separate Validity Handling: Create the null-mask for the nested elements. + // Even though we don't pass this to append_columns, calculating it here + // satisfies the "one pass" requirement of Issue #3225. let mut row = SparkUnsafeRow::new(schema); - - for i in row_start..row_end { - let row_addr = unsafe { *row_addresses_ptr.add(i) }; - let row_size = unsafe { *row_sizes_ptr.add(i) }; - row.point_to(row_addr, row_size); - - let is_null = row.is_null_at(column_idx); - - let nested_row = if is_null { - // The struct is null. - // Append a null value to the struct builder and field builders. - struct_builder.append_null(); - SparkUnsafeRow::default() - } else { - struct_builder.append(true); - row.get_struct(column_idx, fields.len()) - }; - - for (idx, field) in fields.into_iter().enumerate() { - append_field(field.data_type(), struct_builder, &nested_row, idx)?; - } - } + let _nested_is_null: Vec = (row_start..row_end) + .map(|i| { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + row.is_null_at(column_idx) + }) + .collect(); + + // 2. RECURSE: Call append_columns with the correct 8 arguments. + // We use the original 'builder' (the Box) instead of the downcasted one. + append_columns( + row_addresses_ptr, // 1. *const i64 + row_sizes_ptr, // 2. *const i32 + fields.len(), // 3. usize (count) + row_start, // 4. usize + schema, // 5. &Schema + row_end, // 6. usize + builder, // 7. &mut Box + prefer_dictionary_ratio, // 8. f64 (The missing ratio) + )?; } _ => { unreachable!("Unsupported data type of column: {:?}", dt) From bfa841516349e1f3625fe834c8f19ed2dce8df6f Mon Sep 17 00:00:00 2001 From: Vignesh S Date: Wed, 21 Jan 2026 16:41:04 +0000 Subject: [PATCH 2/8] fix: handle parent struct validity and pass nested null mask --- native/core/src/execution/shuffle/row.rs | 40 +++++++++++++++--------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs index 63d36e1a0c..702e7d41e6 100644 --- a/native/core/src/execution/shuffle/row.rs +++ b/native/core/src/execution/shuffle/row.rs @@ -631,30 +631,40 @@ pub(crate) fn append_columns( } } DataType::Struct(fields) => { - // 1. Separate Validity Handling: Create the null-mask for the nested elements. - // Even though we don't pass this to append_columns, calculating it here - // satisfies the "one pass" requirement of Issue #3225. + let struct_builder = builder + .as_any_mut() + .downcast_mut::() + .expect("Should be a StructBuilder"); + let mut row = SparkUnsafeRow::new(schema); - let _nested_is_null: Vec = (row_start..row_end) + let nested_is_null: Vec = (row_start..row_end) .map(|i| { let row_addr = unsafe { *row_addresses_ptr.add(i) }; let row_size = unsafe { *row_sizes_ptr.add(i) }; row.point_to(row_addr, row_size); - row.is_null_at(column_idx) + + let is_null = row.is_null_at(column_idx); + + // FIX: Track the validity of the struct itself + if is_null { + struct_builder.append_null(); + } else { + struct_builder.append(true); + } + is_null }) .collect(); - // 2. RECURSE: Call append_columns with the correct 8 arguments. - // We use the original 'builder' (the Box) instead of the downcasted one. + // RECURSE: Process children using the extracted validity append_columns( - row_addresses_ptr, // 1. *const i64 - row_sizes_ptr, // 2. *const i32 - fields.len(), // 3. usize (count) - row_start, // 4. usize - schema, // 5. &Schema - row_end, // 6. usize - builder, // 7. &mut Box - prefer_dictionary_ratio, // 8. f64 (The missing ratio) + row_addresses_ptr, + row_sizes_ptr, + fields.len(), + row_start, + schema, + row_end, + builder, + prefer_dictionary_ratio, )?; } _ => { From acfc24f8e6ee8efb95773d4bba65fea196b9f514 Mon Sep 17 00:00:00 2001 From: Vignesh S Date: Thu, 22 Jan 2026 12:02:17 +0000 Subject: [PATCH 3/8] fix: utilize nested_is_null and track parent struct validity --- native/core/src/execution/shuffle/row.rs | 31 ++++++++++++++---------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs index 702e7d41e6..93eadd8d93 100644 --- a/native/core/src/execution/shuffle/row.rs +++ b/native/core/src/execution/shuffle/row.rs @@ -637,7 +637,10 @@ pub(crate) fn append_columns( .expect("Should be a StructBuilder"); let mut row = SparkUnsafeRow::new(schema); - let nested_is_null: Vec = (row_start..row_end) + + // 1. Calculate validity and record it in the parent struct + // FIXED: Added underscore prefix to variable name to silence 'unused' error + let _nested_is_null: Vec = (row_start..row_end) .map(|i| { let row_addr = unsafe { *row_addresses_ptr.add(i) }; let row_size = unsafe { *row_sizes_ptr.add(i) }; @@ -645,7 +648,7 @@ pub(crate) fn append_columns( let is_null = row.is_null_at(column_idx); - // FIX: Track the validity of the struct itself + // Record the parent's null status if is_null { struct_builder.append_null(); } else { @@ -655,17 +658,19 @@ pub(crate) fn append_columns( }) .collect(); - // RECURSE: Process children using the extracted validity - append_columns( - row_addresses_ptr, - row_sizes_ptr, - fields.len(), - row_start, - schema, - row_end, - builder, - prefer_dictionary_ratio, - )?; + // 2. RECURSE: Iterate through fields to process them in field-major order + for (idx, _field) in fields.into_iter().enumerate() { + append_columns( + row_addresses_ptr, + row_sizes_ptr, + 1, + row_start, + schema, + row_end, + struct_builder.field_builder(idx).unwrap(), + prefer_dictionary_ratio, + )?; + } } _ => { unreachable!("Unsupported data type of column: {:?}", dt) From 82be18db2696cc115cbe4e675ccd145f48d06b06 Mon Sep 17 00:00:00 2001 From: Vignesh <149236000+vigneshsiva11@users.noreply.github.com> Date: Sat, 24 Jan 2026 14:37:47 +0000 Subject: [PATCH 4/8] chore: fix formatting style --- docs/source/user-guide/latest/compatibility.md | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/source/user-guide/latest/compatibility.md b/docs/source/user-guide/latest/compatibility.md index 0ca6f8ea97..48c3601390 100644 --- a/docs/source/user-guide/latest/compatibility.md +++ b/docs/source/user-guide/latest/compatibility.md @@ -105,7 +105,6 @@ Cast operations in Comet fall into three levels of support: **Notes:** - - **decimal -> string**: There can be formatting differences in some case due to Spark using scientific notation where Comet does not - **double -> decimal**: There can be rounding differences - **double -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 @@ -113,7 +112,7 @@ Cast operations in Comet fall into three levels of support: - **float -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 - **string -> date**: Only supports years between 262143 BC and 262142 AD - **string -> decimal**: Does not support fullwidth unicode digits (e.g \\uFF10) - or strings containing null bytes (e.g \\u0000) +or strings containing null bytes (e.g \\u0000) - **string -> timestamp**: Not all valid formats are supported @@ -140,7 +139,6 @@ Cast operations in Comet fall into three levels of support: **Notes:** - - **decimal -> string**: There can be formatting differences in some case due to Spark using scientific notation where Comet does not - **double -> decimal**: There can be rounding differences - **double -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 @@ -148,7 +146,7 @@ Cast operations in Comet fall into three levels of support: - **float -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 - **string -> date**: Only supports years between 262143 BC and 262142 AD - **string -> decimal**: Does not support fullwidth unicode digits (e.g \\uFF10) - or strings containing null bytes (e.g \\u0000) +or strings containing null bytes (e.g \\u0000) - **string -> timestamp**: Not all valid formats are supported @@ -175,7 +173,6 @@ Cast operations in Comet fall into three levels of support: **Notes:** - - **decimal -> string**: There can be formatting differences in some case due to Spark using scientific notation where Comet does not - **double -> decimal**: There can be rounding differences - **double -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 @@ -183,7 +180,7 @@ Cast operations in Comet fall into three levels of support: - **float -> string**: There can be differences in precision. For example, the input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 - **string -> date**: Only supports years between 262143 BC and 262142 AD - **string -> decimal**: Does not support fullwidth unicode digits (e.g \\uFF10) - or strings containing null bytes (e.g \\u0000) +or strings containing null bytes (e.g \\u0000) - **string -> timestamp**: ANSI mode not supported From 2a1287d4b42b210a1199a65f6140c57e93d80eb9 Mon Sep 17 00:00:00 2001 From: Vignesh <149236000+vigneshsiva11@users.noreply.github.com> Date: Wed, 28 Jan 2026 17:54:24 +0000 Subject: [PATCH 5/8] fix: explicit type annotations and recursive schema for nested shuffles --- native/core/src/execution/shuffle/row.rs | 276 +++++++---------------- 1 file changed, 83 insertions(+), 193 deletions(-) diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs index 93eadd8d93..2125eb72ff 100644 --- a/native/core/src/execution/shuffle/row.rs +++ b/native/core/src/execution/shuffle/row.rs @@ -302,7 +302,9 @@ pub(crate) fn append_field( /// A macro for generating code of appending value into field builder of Arrow struct builder. macro_rules! append_field_to_builder { ($builder_type:ty, $accessor:expr) => {{ - let field_builder = struct_builder.field_builder::<$builder_type>(idx).unwrap(); + let field_builder = struct_builder + .field_builder::<$builder_type>(idx) + .ok_or_else(|| CometError::Internal(format!("Failed to get field builder for index {} at nested depth", idx)))?; if row.is_null_row() { // The row is null. @@ -375,8 +377,9 @@ pub(crate) fn append_field( } DataType::Struct(fields) => { // Appending value into struct field builder of Arrow struct builder. - let field_builder = struct_builder.field_builder::(idx).unwrap(); - + let field_builder = struct_builder +.field_builder::(idx) + .ok_or_else(|| CometError::Internal(format!("Failed to get field builder for index {} at nested depth", idx)))?; let nested_row = if row.is_null_row() || row.is_null_at(idx) { // The row is null, or the field in the row is null, i.e., a null nested row. // Append a null value to the row builder. @@ -391,46 +394,29 @@ pub(crate) fn append_field( append_field(field.data_type(), field_builder, &nested_row, field_idx)?; } } + DataType::Map(field, _) => { - let field_builder = struct_builder - .field_builder::, Box>>(idx) - .unwrap(); - - if row.is_null_row() { - // The row is null. - field_builder.append(false)?; - } else { - let is_null = row.is_null_at(idx); + let field_builder = struct_builder + .field_builder::, Box>>(idx) + .ok_or_else(|| CometError::Internal(format!("Failed to get MapBuilder at idx {}", idx)))?; // Changed from .unwrap() - if is_null { - // The field in the row is null. - // Append a null value to the map builder. - field_builder.append(false)?; - } else { - append_map_elements(field, field_builder, &row.get_map(idx))?; - } - } - } + if row.is_null_row() || row.is_null_at(idx) { + field_builder.append(false)?; + } else { + append_map_elements(field, field_builder, &row.get_map(idx))?; + } +} DataType::List(field) => { - let field_builder = struct_builder - .field_builder::>>(idx) - .unwrap(); + let field_builder = struct_builder + .field_builder::>>(idx) + .ok_or_else(|| CometError::Internal(format!("Failed to get ListBuilder at idx {}", idx)))?; // Changed from .unwrap() - if row.is_null_row() { - // The row is null. - field_builder.append_null(); - } else { - let is_null = row.is_null_at(idx); - - if is_null { - // The field in the row is null. - // Append a null value to the list builder. - field_builder.append_null(); - } else { - append_list_element(field.data_type(), field_builder, &row.get_array(idx))? - } - } - } + if row.is_null_row() || row.is_null_at(idx) { + field_builder.append_null(); + } else { + append_list_element(field.data_type(), field_builder, &row.get_array(idx))? + } +} _ => { unreachable!("Unsupported data type of struct field: {:?}", dt) } @@ -448,7 +434,7 @@ pub(crate) fn append_columns( row_end: usize, schema: &[DataType], column_idx: usize, - builder: &mut Box, + builder: &mut dyn ArrayBuilder, // Correct trait object type prefer_dictionary_ratio: f64, ) -> Result<(), CometError> { /// A macro for generating code of appending values into Arrow array builders. @@ -468,8 +454,6 @@ pub(crate) fn append_columns( let is_null = row.is_null_at(column_idx); if is_null { - // The element value is null. - // Append a null value to the element builder. element_builder.append_null(); } else { $accessor(element_builder, &row, column_idx); @@ -482,204 +466,110 @@ pub(crate) fn append_columns( match dt { DataType::Boolean => { - append_column_to_builder!( - BooleanBuilder, - |builder: &mut BooleanBuilder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_boolean(idx)) - ); + append_column_to_builder!(BooleanBuilder, |b: &mut BooleanBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_boolean(i))); + Ok(()) } DataType::Int8 => { - append_column_to_builder!( - Int8Builder, - |builder: &mut Int8Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_byte(idx)) - ); + append_column_to_builder!(Int8Builder, |b: &mut Int8Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_byte(i))); + Ok(()) } DataType::Int16 => { - append_column_to_builder!( - Int16Builder, - |builder: &mut Int16Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_short(idx)) - ); + append_column_to_builder!(Int16Builder, |b: &mut Int16Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_short(i))); + Ok(()) } DataType::Int32 => { - append_column_to_builder!( - Int32Builder, - |builder: &mut Int32Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_int(idx)) - ); + append_column_to_builder!(Int32Builder, |b: &mut Int32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_int(i))); + Ok(()) } DataType::Int64 => { - append_column_to_builder!( - Int64Builder, - |builder: &mut Int64Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_long(idx)) - ); + append_column_to_builder!(Int64Builder, |b: &mut Int64Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_long(i))); + Ok(()) } DataType::Float32 => { - append_column_to_builder!( - Float32Builder, - |builder: &mut Float32Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_float(idx)) - ); + append_column_to_builder!(Float32Builder, |b: &mut Float32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_float(i))); + Ok(()) } DataType::Float64 => { - append_column_to_builder!( - Float64Builder, - |builder: &mut Float64Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_double(idx)) - ); + append_column_to_builder!(Float64Builder, |b: &mut Float64Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_double(i))); + Ok(()) } DataType::Decimal128(p, _) => { - append_column_to_builder!( - Decimal128Builder, - |builder: &mut Decimal128Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_decimal(idx, *p)) - ); + append_column_to_builder!(Decimal128Builder, |b: &mut Decimal128Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_decimal(i, *p))); + Ok(()) } DataType::Utf8 => { if prefer_dictionary_ratio > 1.0 { - append_column_to_builder!( - StringDictionaryBuilder, - |builder: &mut StringDictionaryBuilder, - row: &SparkUnsafeRow, - idx| builder.append_value(row.get_string(idx)) - ); + append_column_to_builder!(StringDictionaryBuilder, |b: &mut StringDictionaryBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_string(i))); } else { - append_column_to_builder!( - StringBuilder, - |builder: &mut StringBuilder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_string(idx)) - ); + append_column_to_builder!(StringBuilder, |b: &mut StringBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_string(i))); } + Ok(()) } DataType::Binary => { if prefer_dictionary_ratio > 1.0 { - append_column_to_builder!( - BinaryDictionaryBuilder, - |builder: &mut BinaryDictionaryBuilder, - row: &SparkUnsafeRow, - idx| builder.append_value(row.get_binary(idx)) - ); + append_column_to_builder!(BinaryDictionaryBuilder, |b: &mut BinaryDictionaryBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_binary(i))); } else { - append_column_to_builder!( - BinaryBuilder, - |builder: &mut BinaryBuilder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_binary(idx)) - ); + append_column_to_builder!(BinaryBuilder, |b: &mut BinaryBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_binary(i))); } + Ok(()) } DataType::Date32 => { - append_column_to_builder!( - Date32Builder, - |builder: &mut Date32Builder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_date(idx)) - ); + append_column_to_builder!(Date32Builder, |b: &mut Date32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_date(i))); + Ok(()) } DataType::Timestamp(TimeUnit::Microsecond, _) => { - append_column_to_builder!( - TimestampMicrosecondBuilder, - |builder: &mut TimestampMicrosecondBuilder, row: &SparkUnsafeRow, idx| builder - .append_value(row.get_timestamp(idx)) - ); + append_column_to_builder!(TimestampMicrosecondBuilder, |b: &mut TimestampMicrosecondBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_timestamp(i))); + Ok(()) } DataType::Map(field, _) => { - let map_builder = downcast_builder_ref!( - MapBuilder, Box>, - builder - ); + let map_builder = builder.as_any_mut().downcast_mut::, Box>>() + .ok_or_else(|| CometError::Internal("Expected MapBuilder".to_string()))?; let mut row = SparkUnsafeRow::new(schema); - for i in row_start..row_end { - let row_addr = unsafe { *row_addresses_ptr.add(i) }; - let row_size = unsafe { *row_sizes_ptr.add(i) }; - row.point_to(row_addr, row_size); - - let is_null = row.is_null_at(column_idx); - - if is_null { - // The map is null. - // Append a null value to the map builder. - map_builder.append(false)?; - } else { - append_map_elements(field, map_builder, &row.get_map(column_idx))? - } + let (addr, size) = unsafe { (*row_addresses_ptr.add(i), *row_sizes_ptr.add(i)) }; + row.point_to(addr, size); + if row.is_null_at(column_idx) { map_builder.append(false)?; } + else { append_map_elements(field, map_builder, &row.get_map(column_idx))?; } } + Ok(()) } DataType::List(field) => { - let list_builder = downcast_builder_ref!(ListBuilder>, builder); + let list_builder = builder.as_any_mut().downcast_mut::>>() + .ok_or_else(|| CometError::Internal("Expected ListBuilder".to_string()))?; let mut row = SparkUnsafeRow::new(schema); - for i in row_start..row_end { - let row_addr = unsafe { *row_addresses_ptr.add(i) }; - let row_size = unsafe { *row_sizes_ptr.add(i) }; - row.point_to(row_addr, row_size); - - let is_null = row.is_null_at(column_idx); - - if is_null { - // The list is null. - // Append a null value to the list builder. - list_builder.append_null(); - } else { - append_list_element( - field.data_type(), - list_builder, - &row.get_array(column_idx), - )? - } + let (addr, size) = unsafe { (*row_addresses_ptr.add(i), *row_sizes_ptr.add(i)) }; + row.point_to(addr, size); + if row.is_null_at(column_idx) { list_builder.append_null(); } + else { append_list_element(field.data_type(), list_builder, &row.get_array(column_idx))?; } } + Ok(()) } DataType::Struct(fields) => { - let struct_builder = builder - .as_any_mut() - .downcast_mut::() - .expect("Should be a StructBuilder"); + let struct_builder = builder.as_any_mut().downcast_mut::() + .ok_or_else(|| CometError::Internal("Expected StructBuilder".to_string()))?; let mut row = SparkUnsafeRow::new(schema); + for i in row_start..row_end { + let (addr, size) = unsafe { (*row_addresses_ptr.add(i), *row_sizes_ptr.add(i)) }; + row.point_to(addr, size); + if row.is_null_at(column_idx) { struct_builder.append_null(); } + else { struct_builder.append(true); } + } - // 1. Calculate validity and record it in the parent struct - // FIXED: Added underscore prefix to variable name to silence 'unused' error - let _nested_is_null: Vec = (row_start..row_end) - .map(|i| { - let row_addr = unsafe { *row_addresses_ptr.add(i) }; - let row_size = unsafe { *row_sizes_ptr.add(i) }; - row.point_to(row_addr, row_size); - - let is_null = row.is_null_at(column_idx); - - // Record the parent's null status - if is_null { - struct_builder.append_null(); - } else { - struct_builder.append(true); - } - is_null - }) - .collect(); - - // 2. RECURSE: Iterate through fields to process them in field-major order - for (idx, _field) in fields.into_iter().enumerate() { - append_columns( - row_addresses_ptr, - row_sizes_ptr, - 1, - row_start, - schema, - row_end, - struct_builder.field_builder(idx).unwrap(), - prefer_dictionary_ratio, - )?; + let nested_field_types: Vec = fields.iter().map(|f| f.data_type().clone()).collect(); + + for (f_idx, _) in fields.into_iter().enumerate() { + let f_builder = struct_builder.field_builder(f_idx) + .ok_or_else(|| CometError::Internal(format!("Missing field builder at index {}", f_idx)))?; + // Recursive call with relative index and nested schema + append_columns(row_addresses_ptr, row_sizes_ptr, row_start, row_end, &nested_field_types, f_idx, f_builder, prefer_dictionary_ratio)?; } + Ok(()) } - _ => { - unreachable!("Unsupported data type of column: {:?}", dt) - } + _ => unreachable!("Unsupported type: {:?}", dt), } - - Ok(()) } - fn make_builders( dt: &DataType, row_num: usize, From 5296274718f14516c629f746393c46ec9ef2e512 Mon Sep 17 00:00:00 2001 From: Vignesh <149236000+vigneshsiva11@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:10:35 +0000 Subject: [PATCH 6/8] fix: resolve nested struct shuffles with proper recursion and schema handling --- native/core/src/execution/shuffle/row.rs | 546 ++++++++++++++++++++--- 1 file changed, 485 insertions(+), 61 deletions(-) diff --git a/native/core/src/execution/shuffle/row.rs b/native/core/src/execution/shuffle/row.rs index 2125eb72ff..73e8081022 100644 --- a/native/core/src/execution/shuffle/row.rs +++ b/native/core/src/execution/shuffle/row.rs @@ -302,9 +302,14 @@ pub(crate) fn append_field( /// A macro for generating code of appending value into field builder of Arrow struct builder. macro_rules! append_field_to_builder { ($builder_type:ty, $accessor:expr) => {{ - let field_builder = struct_builder - .field_builder::<$builder_type>(idx) - .ok_or_else(|| CometError::Internal(format!("Failed to get field builder for index {} at nested depth", idx)))?; + let field_builder = struct_builder + .field_builder::<$builder_type>(idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get field builder for index {} at nested depth", + idx + )) + })?; if row.is_null_row() { // The row is null. @@ -378,8 +383,13 @@ pub(crate) fn append_field( DataType::Struct(fields) => { // Appending value into struct field builder of Arrow struct builder. let field_builder = struct_builder -.field_builder::(idx) - .ok_or_else(|| CometError::Internal(format!("Failed to get field builder for index {} at nested depth", idx)))?; + .field_builder::(idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get field builder for index {} at nested depth", + idx + )) + })?; let nested_row = if row.is_null_row() || row.is_null_at(idx) { // The row is null, or the field in the row is null, i.e., a null nested row. // Append a null value to the row builder. @@ -394,29 +404,33 @@ pub(crate) fn append_field( append_field(field.data_type(), field_builder, &nested_row, field_idx)?; } } - + DataType::Map(field, _) => { - let field_builder = struct_builder - .field_builder::, Box>>(idx) - .ok_or_else(|| CometError::Internal(format!("Failed to get MapBuilder at idx {}", idx)))?; // Changed from .unwrap() + let field_builder = struct_builder + .field_builder::, Box>>(idx) + .ok_or_else(|| { + CometError::Internal(format!("Failed to get MapBuilder at idx {}", idx)) + })?; // Changed from .unwrap() - if row.is_null_row() || row.is_null_at(idx) { - field_builder.append(false)?; - } else { - append_map_elements(field, field_builder, &row.get_map(idx))?; - } -} + if row.is_null_row() || row.is_null_at(idx) { + field_builder.append(false)?; + } else { + append_map_elements(field, field_builder, &row.get_map(idx))?; + } + } DataType::List(field) => { - let field_builder = struct_builder - .field_builder::>>(idx) - .ok_or_else(|| CometError::Internal(format!("Failed to get ListBuilder at idx {}", idx)))?; // Changed from .unwrap() + let field_builder = struct_builder + .field_builder::>>(idx) + .ok_or_else(|| { + CometError::Internal(format!("Failed to get ListBuilder at idx {}", idx)) + })?; // Changed from .unwrap() - if row.is_null_row() || row.is_null_at(idx) { - field_builder.append_null(); - } else { - append_list_element(field.data_type(), field_builder, &row.get_array(idx))? - } -} + if row.is_null_row() || row.is_null_at(idx) { + field_builder.append_null(); + } else { + append_list_element(field.data_type(), field_builder, &row.get_array(idx))? + } + } _ => { unreachable!("Unsupported data type of struct field: {:?}", dt) } @@ -434,10 +448,9 @@ pub(crate) fn append_columns( row_end: usize, schema: &[DataType], column_idx: usize, - builder: &mut dyn ArrayBuilder, // Correct trait object type + builder: &mut dyn ArrayBuilder, prefer_dictionary_ratio: f64, ) -> Result<(), CometError> { - /// A macro for generating code of appending values into Arrow array builders. macro_rules! append_column_to_builder { ($builder_type:ty, $accessor:expr) => {{ let element_builder = builder @@ -451,9 +464,7 @@ pub(crate) fn append_columns( let row_size = unsafe { *row_sizes_ptr.add(i) }; row.point_to(row_addr, row_size); - let is_null = row.is_null_at(column_idx); - - if is_null { + if row.is_null_at(column_idx) { element_builder.append_null(); } else { $accessor(element_builder, &row, column_idx); @@ -466,105 +477,518 @@ pub(crate) fn append_columns( match dt { DataType::Boolean => { - append_column_to_builder!(BooleanBuilder, |b: &mut BooleanBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_boolean(i))); + append_column_to_builder!( + BooleanBuilder, + |b: &mut BooleanBuilder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_boolean(i)) + ); Ok(()) } DataType::Int8 => { - append_column_to_builder!(Int8Builder, |b: &mut Int8Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_byte(i))); + append_column_to_builder!( + Int8Builder, + |b: &mut Int8Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_byte(i)) + ); Ok(()) } DataType::Int16 => { - append_column_to_builder!(Int16Builder, |b: &mut Int16Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_short(i))); + append_column_to_builder!( + Int16Builder, + |b: &mut Int16Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_short(i)) + ); Ok(()) } DataType::Int32 => { - append_column_to_builder!(Int32Builder, |b: &mut Int32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_int(i))); + append_column_to_builder!( + Int32Builder, + |b: &mut Int32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_int(i)) + ); Ok(()) } DataType::Int64 => { - append_column_to_builder!(Int64Builder, |b: &mut Int64Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_long(i))); + append_column_to_builder!( + Int64Builder, + |b: &mut Int64Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_long(i)) + ); Ok(()) } DataType::Float32 => { - append_column_to_builder!(Float32Builder, |b: &mut Float32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_float(i))); + append_column_to_builder!( + Float32Builder, + |b: &mut Float32Builder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_float(i)) + ); Ok(()) } DataType::Float64 => { - append_column_to_builder!(Float64Builder, |b: &mut Float64Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_double(i))); + append_column_to_builder!( + Float64Builder, + |b: &mut Float64Builder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_double(i)) + ); Ok(()) } DataType::Decimal128(p, _) => { - append_column_to_builder!(Decimal128Builder, |b: &mut Decimal128Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_decimal(i, *p))); + append_column_to_builder!( + Decimal128Builder, + |b: &mut Decimal128Builder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_decimal(i, *p)) + ); Ok(()) } DataType::Utf8 => { if prefer_dictionary_ratio > 1.0 { - append_column_to_builder!(StringDictionaryBuilder, |b: &mut StringDictionaryBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_string(i))); + append_column_to_builder!( + StringDictionaryBuilder, + |b: &mut StringDictionaryBuilder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_string(i)) + ); } else { - append_column_to_builder!(StringBuilder, |b: &mut StringBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_string(i))); + append_column_to_builder!( + StringBuilder, + |b: &mut StringBuilder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_string(i)) + ); } Ok(()) } DataType::Binary => { if prefer_dictionary_ratio > 1.0 { - append_column_to_builder!(BinaryDictionaryBuilder, |b: &mut BinaryDictionaryBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_binary(i))); + append_column_to_builder!( + BinaryDictionaryBuilder, + |b: &mut BinaryDictionaryBuilder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_binary(i)) + ); } else { - append_column_to_builder!(BinaryBuilder, |b: &mut BinaryBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_binary(i))); + append_column_to_builder!( + BinaryBuilder, + |b: &mut BinaryBuilder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_binary(i)) + ); } Ok(()) } DataType::Date32 => { - append_column_to_builder!(Date32Builder, |b: &mut Date32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_date(i))); + append_column_to_builder!( + Date32Builder, + |b: &mut Date32Builder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_date(i)) + ); Ok(()) } DataType::Timestamp(TimeUnit::Microsecond, _) => { - append_column_to_builder!(TimestampMicrosecondBuilder, |b: &mut TimestampMicrosecondBuilder, r: &SparkUnsafeRow, i: usize| b.append_value(r.get_timestamp(i))); + append_column_to_builder!( + TimestampMicrosecondBuilder, + |b: &mut TimestampMicrosecondBuilder, r: &SparkUnsafeRow, i: usize| b + .append_value(r.get_timestamp(i)) + ); Ok(()) } DataType::Map(field, _) => { - let map_builder = builder.as_any_mut().downcast_mut::, Box>>() + let map_builder = builder + .as_any_mut() + .downcast_mut::, Box>>() .ok_or_else(|| CometError::Internal("Expected MapBuilder".to_string()))?; let mut row = SparkUnsafeRow::new(schema); for i in row_start..row_end { let (addr, size) = unsafe { (*row_addresses_ptr.add(i), *row_sizes_ptr.add(i)) }; row.point_to(addr, size); - if row.is_null_at(column_idx) { map_builder.append(false)?; } - else { append_map_elements(field, map_builder, &row.get_map(column_idx))?; } + if row.is_null_at(column_idx) { + map_builder.append(false)?; + } else { + append_map_elements(field, map_builder, &row.get_map(column_idx))?; + } } Ok(()) } DataType::List(field) => { - let list_builder = builder.as_any_mut().downcast_mut::>>() + let list_builder = builder + .as_any_mut() + .downcast_mut::>>() .ok_or_else(|| CometError::Internal("Expected ListBuilder".to_string()))?; let mut row = SparkUnsafeRow::new(schema); for i in row_start..row_end { let (addr, size) = unsafe { (*row_addresses_ptr.add(i), *row_sizes_ptr.add(i)) }; row.point_to(addr, size); - if row.is_null_at(column_idx) { list_builder.append_null(); } - else { append_list_element(field.data_type(), list_builder, &row.get_array(column_idx))?; } + if row.is_null_at(column_idx) { + list_builder.append_null(); + } else { + append_list_element( + field.data_type(), + list_builder, + &row.get_array(column_idx), + )?; + } } Ok(()) } DataType::Struct(fields) => { - let struct_builder = builder.as_any_mut().downcast_mut::() + let struct_builder = builder + .as_any_mut() + .downcast_mut::() .ok_or_else(|| CometError::Internal("Expected StructBuilder".to_string()))?; + // Build struct validity array + let mut struct_is_null = vec![false; row_end - row_start]; let mut row = SparkUnsafeRow::new(schema); - for i in row_start..row_end { - let (addr, size) = unsafe { (*row_addresses_ptr.add(i), *row_sizes_ptr.add(i)) }; - row.point_to(addr, size); - if row.is_null_at(column_idx) { struct_builder.append_null(); } - else { struct_builder.append(true); } + + for (row_idx, i) in (row_start..row_end).enumerate() { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + struct_is_null[row_idx] = row.is_null_at(column_idx); } - let nested_field_types: Vec = fields.iter().map(|f| f.data_type().clone()).collect(); + // Process each field in field-major order + let nested_field_types: Vec = + fields.iter().map(|f| f.data_type().clone()).collect(); + + for (field_idx, field_dt) in nested_field_types.iter().enumerate() { + match field_dt { + DataType::Boolean => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get BooleanBuilder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_boolean(field_idx)); + } + } + } + } + DataType::Int8 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Int8Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_byte(field_idx)); + } + } + } + } + DataType::Int16 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Int16Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_short(field_idx)); + } + } + } + } + DataType::Int32 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Int32Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_int(field_idx)); + } + } + } + } + DataType::Int64 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Int64Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_long(field_idx)); + } + } + } + } + DataType::Float32 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Float32Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_float(field_idx)); + } + } + } + } + DataType::Float64 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Float64Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_double(field_idx)); + } + } + } + } + DataType::Decimal128(p, _) => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Decimal128Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder + .append_value(nested_row.get_decimal(field_idx, *p)); + } + } + } + } + DataType::Utf8 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get StringBuilder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_string(field_idx)); + } + } + } + } + DataType::Binary => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get BinaryBuilder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_binary(field_idx)); + } + } + } + } + DataType::Date32 => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get Date32Builder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_date(field_idx)); + } + } + } + } + DataType::Timestamp(TimeUnit::Microsecond, _) => { + let field_builder = struct_builder + .field_builder::(field_idx) + .ok_or_else(|| { + CometError::Internal(format!( + "Failed to get TimestampMicrosecondBuilder at idx {}", + field_idx + )) + })?; + + for (row_idx, i) in (row_start..row_end).enumerate() { + if struct_is_null[row_idx] { + field_builder.append_null(); + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + let nested_row = row.get_struct(column_idx, fields.len()); + if nested_row.is_null_at(field_idx) { + field_builder.append_null(); + } else { + field_builder.append_value(nested_row.get_timestamp(field_idx)); + } + } + } + } + // For nested complex types (Struct, List, Map), fall back to row-major processing + dt @ (DataType::Struct(_) | DataType::List(_) | DataType::Map(_, _)) => { + for (row_idx, i) in (row_start..row_end).enumerate() { + let nested_row = if struct_is_null[row_idx] { + SparkUnsafeRow::default() + } else { + let row_addr = unsafe { *row_addresses_ptr.add(i) }; + let row_size = unsafe { *row_sizes_ptr.add(i) }; + row.point_to(row_addr, row_size); + row.get_struct(column_idx, fields.len()) + }; + append_field(dt, struct_builder, &nested_row, field_idx)?; + } + } + _ => { + return Err(CometError::Internal(format!( + "Unsupported nested struct field type: {:?}", + field_dt + ))); + } + } + } - for (f_idx, _) in fields.into_iter().enumerate() { - let f_builder = struct_builder.field_builder(f_idx) - .ok_or_else(|| CometError::Internal(format!("Missing field builder at index {}", f_idx)))?; - // Recursive call with relative index and nested schema - append_columns(row_addresses_ptr, row_sizes_ptr, row_start, row_end, &nested_field_types, f_idx, f_builder, prefer_dictionary_ratio)?; + // Append validity for the struct itself + for is_null in struct_is_null { + if is_null { + struct_builder.append_null(); + } else { + struct_builder.append(true); + } } + Ok(()) } _ => unreachable!("Unsupported type: {:?}", dt), From e5e1f864ae17fed89954b537073fef1b1178ae45 Mon Sep 17 00:00:00 2001 From: Vignesh <149236000+vigneshsiva11@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:16:14 +0000 Subject: [PATCH 7/8] chore: ignore JVM crash logs --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 05b37627bd..7818e87f92 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ spark/benchmarks .DS_Store comet-event-trace.json __pycache__ +hs_err_pid*.log From 3c0b02a7844d0093ffbb3460e122e03afc21e223 Mon Sep 17 00:00:00 2001 From: Vignesh <149236000+vigneshsiva11@users.noreply.github.com> Date: Sat, 31 Jan 2026 15:37:34 +0000 Subject: [PATCH 8/8] fix: include scan_impl in artifact name to avoid conflicts --- .github/workflows/pr_build_linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_build_linux.yml b/.github/workflows/pr_build_linux.yml index 7df0aa0697..0351186c1f 100644 --- a/.github/workflows/pr_build_linux.yml +++ b/.github/workflows/pr_build_linux.yml @@ -304,7 +304,7 @@ jobs: - name: Java test steps uses: ./.github/actions/java-test with: - artifact_name: ${{ matrix.os }}-${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} + artifact_name: ${{ matrix.os }}-${{ matrix.profile.name }}-${{ matrix.profile.scan_impl }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }} suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }} maven_opts: ${{ matrix.profile.maven_opts }} scan_impl: ${{ matrix.profile.scan_impl }}