Skip to content

Commit fdb130f

Browse files
authored
[GLUTEN-11425][CORE] Upgrade Spark 4.1.0 to 4.1.1 (#11426)
* [CORE] Upgrade Spark 4.1.0 to 4.1.1 * update spark 411 test * Remove exclusion for SPARK-53413 in VeloxTestSettings
1 parent 6782c58 commit fdb130f

9 files changed

Lines changed: 280 additions & 50 deletions

File tree

.github/workflows/util/install-spark-resources.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ case "$1" in
122122
4.1)
123123
# Spark-4.x, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
124124
cd ${INSTALL_DIR} && \
125-
install_spark "4.1.0" "3" "2.12"
125+
install_spark "4.1.1" "3" "2.12"
126126
;;
127127
*)
128128
echo "Spark version is expected to be specified."

gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/identifier-clause.sql

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,12 @@ SELECT * FROM unpivot_test UNPIVOT (val FOR col IN (a AS IDENTIFIER('col_a'), b
367367
SELECT * FROM unpivot_test UNPIVOT ((v1, v2) FOR col IN ((a, b) AS IDENTIFIER('cols_ab'), (b, c) AS IDENTIFIER('cols_bc'))) ORDER BY ALL;
368368
DROP TABLE unpivot_test;
369369

370+
-- DESCRIBE column with IDENTIFIER()
371+
CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV;
372+
DESCRIBE describe_col_test IDENTIFIER('c1');
373+
DESCRIBE describe_col_test IDENTIFIER('c2');
374+
DROP TABLE describe_col_test;
375+
370376
-- All the following tests fail because they are not about "true" identifiers
371377

372378
-- This should fail - named parameters don't support IDENTIFIER()

gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/inputs/kllquantiles.sql

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,31 +64,31 @@ VALUES
6464
(CAST(7 AS DOUBLE), CAST(11 AS DOUBLE)) AS tab(col1, col2);
6565

6666
-- BIGINT sketches
67-
SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
67+
SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS str_contains_kll,
6868
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS median_close_to_4,
6969
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
7070
FROM (
7171
SELECT kll_sketch_agg_bigint(col1) AS agg
7272
FROM t_byte_1_5_through_7_11
7373
);
7474

75-
SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
75+
SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS str_contains_kll,
7676
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS median_close_to_4,
7777
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
7878
FROM (
7979
SELECT kll_sketch_agg_bigint(col1) AS agg
8080
FROM t_int_1_5_through_7_11
8181
);
8282

83-
SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
83+
SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS str_contains_kll,
8484
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS median_close_to_4,
8585
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
8686
FROM (
8787
SELECT kll_sketch_agg_bigint(col1) AS agg
8888
FROM t_long_1_5_through_7_11
8989
);
9090

91-
SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
91+
SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS str_contains_kll,
9292
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS median_close_to_4,
9393
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
9494
FROM (
@@ -97,7 +97,7 @@ FROM (
9797
);
9898

9999
-- FLOAT sketches (only accepts float types to avoid precision loss)
100-
SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
100+
SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
101101
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS median_close_to_4,
102102
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
103103
FROM (
@@ -106,7 +106,7 @@ FROM (
106106
);
107107

108108
-- DOUBLE sketches (accepts float and double types to avoid precision loss from integer conversion)
109-
SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
109+
SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS str_contains_kll,
110110
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS median_close_to_4,
111111
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
112112
FROM (
@@ -115,7 +115,7 @@ FROM (
115115
);
116116

117117
-- Test float column with double sketch (valid type promotion)
118-
SELECT split(kll_sketch_to_string_double(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
118+
SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS str_contains_kll,
119119
abs(kll_sketch_get_quantile_double(agg, 0.5) - 4.0) < 0.5 AS median_close_to_4,
120120
abs(kll_sketch_get_rank_double(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
121121
FROM (
@@ -268,7 +268,7 @@ FROM t_long_1_5_through_7_11;
268268
-- These queries should fail with type mismatch or validation errors
269269

270270
-- Type mismatch: BIGINT sketch does not accept DOUBLE columns
271-
SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
271+
SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS str_contains_kll,
272272
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS median_close_to_4,
273273
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
274274
FROM (
@@ -277,7 +277,7 @@ FROM (
277277
);
278278

279279
-- Type mismatch: BIGINT sketch does not accept FLOAT columns
280-
SELECT split(kll_sketch_to_string_bigint(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
280+
SELECT lower(kll_sketch_to_string_bigint(agg)) LIKE '%kll%' AS str_contains_kll,
281281
abs(kll_sketch_get_quantile_bigint(agg, 0.5) - 4) < 1 AS median_close_to_4,
282282
abs(kll_sketch_get_rank_bigint(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
283283
FROM (
@@ -286,7 +286,7 @@ FROM (
286286
);
287287

288288
-- Type mismatch: FLOAT sketch does not accept DOUBLE columns
289-
SELECT split(kll_sketch_to_string_float(agg), '\n')[1] LIKE '%Kll%' AS str_contains_kll,
289+
SELECT lower(kll_sketch_to_string_float(agg)) LIKE '%kll%' AS str_contains_kll,
290290
abs(kll_sketch_get_quantile_float(agg, 0.5) - 4.0) < 0.5 AS median_close_to_4,
291291
abs(kll_sketch_get_rank_float(agg, 3) - 0.4) < 0.1 AS rank3_close_to_0_4
292292
FROM (
@@ -378,7 +378,7 @@ FROM (
378378
-- interpret the binary data. This query succeeds even though we're using a DOUBLE
379379
-- to_string function on a BIGINT sketch. The function reads the binary representation
380380
-- and produces output, but the numeric values will be incorrectly interpreted.
381-
SELECT kll_sketch_to_string_double(agg) LIKE '%Kll%' AS contains_kll_header
381+
SELECT lower(kll_sketch_to_string_double(agg)) LIKE '%kll%' AS contains_kll_header
382382
FROM (
383383
SELECT kll_sketch_agg_bigint(col1) AS agg
384384
FROM t_long_1_5_through_7_11
@@ -464,6 +464,41 @@ FROM (
464464
FROM t_double_1_5_through_7_11
465465
);
466466

467+
-- Negative tests for non-foldable (non-constant) rank/quantile arguments
468+
-- These tests verify that get_quantile and get_rank functions require compile-time constant arguments
469+
470+
-- Non-foldable scalar rank argument to get_quantile (column reference)
471+
SELECT kll_sketch_get_quantile_bigint(agg, CAST(col1 AS DOUBLE) / 10.0) AS non_foldable_scalar_rank
472+
FROM (
473+
SELECT kll_sketch_agg_bigint(col1) AS agg, col1
474+
FROM t_long_1_5_through_7_11
475+
GROUP BY col1
476+
);
477+
478+
-- Non-foldable array rank argument to get_quantile (array containing column reference)
479+
SELECT kll_sketch_get_quantile_bigint(agg, array(0.25, CAST(col1 AS DOUBLE) / 10.0, 0.75)) AS non_foldable_array_rank
480+
FROM (
481+
SELECT kll_sketch_agg_bigint(col1) AS agg, col1
482+
FROM t_long_1_5_through_7_11
483+
GROUP BY col1
484+
);
485+
486+
-- Non-foldable scalar quantile argument to get_rank (column reference)
487+
SELECT kll_sketch_get_rank_bigint(agg, col1) AS non_foldable_scalar_quantile
488+
FROM (
489+
SELECT kll_sketch_agg_bigint(col1) AS agg, col1
490+
FROM t_long_1_5_through_7_11
491+
GROUP BY col1
492+
);
493+
494+
-- Non-foldable array quantile argument to get_rank (array containing column reference)
495+
SELECT kll_sketch_get_rank_bigint(agg, array(1L, col1, 5L)) AS non_foldable_array_quantile
496+
FROM (
497+
SELECT kll_sketch_agg_bigint(col1) AS agg, col1
498+
FROM t_long_1_5_through_7_11
499+
GROUP BY col1
500+
);
501+
467502
-- Clean up
468503
DROP TABLE IF EXISTS t_int_1_5_through_7_11;
469504
DROP TABLE IF EXISTS t_long_1_5_through_7_11;

gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/identifier-clause-legacy.sql.out

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2740,6 +2740,54 @@ struct<>
27402740

27412741

27422742

2743+
-- !query
2744+
CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
2745+
-- !query schema
2746+
struct<>
2747+
-- !query output
2748+
2749+
2750+
2751+
-- !query
2752+
DESCRIBE describe_col_test IDENTIFIER('c1')
2753+
-- !query schema
2754+
struct<>
2755+
-- !query output
2756+
org.apache.spark.sql.catalyst.parser.ParseException
2757+
{
2758+
"errorClass" : "PARSE_SYNTAX_ERROR",
2759+
"sqlState" : "42601",
2760+
"messageParameters" : {
2761+
"error" : "'('",
2762+
"hint" : ""
2763+
}
2764+
}
2765+
2766+
2767+
-- !query
2768+
DESCRIBE describe_col_test IDENTIFIER('c2')
2769+
-- !query schema
2770+
struct<>
2771+
-- !query output
2772+
org.apache.spark.sql.catalyst.parser.ParseException
2773+
{
2774+
"errorClass" : "PARSE_SYNTAX_ERROR",
2775+
"sqlState" : "42601",
2776+
"messageParameters" : {
2777+
"error" : "'('",
2778+
"hint" : ""
2779+
}
2780+
}
2781+
2782+
2783+
-- !query
2784+
DROP TABLE describe_col_test
2785+
-- !query schema
2786+
struct<>
2787+
-- !query output
2788+
2789+
2790+
27432791
-- !query
27442792
SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
27452793
-- !query schema

gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/identifier-clause.sql.out

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2227,24 +2227,11 @@ struct<result:int>
22272227
EXECUTE IMMEDIATE 'SELECT IDENTIFIER(:alias ''.c1'') FROM integration_test AS IDENTIFIER(:alias) ORDER BY ALL'
22282228
USING 't' AS alias
22292229
-- !query schema
2230-
struct<>
2230+
struct<c1:int>
22312231
-- !query output
2232-
org.apache.spark.sql.catalyst.ExtendedAnalysisException
2233-
{
2234-
"errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
2235-
"sqlState" : "42703",
2236-
"messageParameters" : {
2237-
"objectName" : "`t`.`c1`",
2238-
"proposal" : "`IDENTIFIER('t')`.`c1`, `IDENTIFIER('t')`.`c2`, `IDENTIFIER('t')`.`c4`"
2239-
},
2240-
"queryContext" : [ {
2241-
"objectType" : "EXECUTE IMMEDIATE",
2242-
"objectName" : "",
2243-
"startIndex" : 8,
2244-
"stopIndex" : 31,
2245-
"fragment" : "IDENTIFIER(:alias '.c1')"
2246-
} ]
2247-
}
2232+
1
2233+
2
2234+
3
22482235

22492236

22502237
-- !query
@@ -2396,6 +2383,42 @@ struct<>
23962383

23972384

23982385

2386+
-- !query
2387+
CREATE TABLE describe_col_test(c1 INT, c2 STRING, c3 DOUBLE) USING CSV
2388+
-- !query schema
2389+
struct<>
2390+
-- !query output
2391+
2392+
2393+
2394+
-- !query
2395+
DESCRIBE describe_col_test IDENTIFIER('c1')
2396+
-- !query schema
2397+
struct<info_name:string,info_value:string>
2398+
-- !query output
2399+
col_name c1
2400+
data_type int
2401+
comment NULL
2402+
2403+
2404+
-- !query
2405+
DESCRIBE describe_col_test IDENTIFIER('c2')
2406+
-- !query schema
2407+
struct<info_name:string,info_value:string>
2408+
-- !query output
2409+
col_name c2
2410+
data_type string
2411+
comment NULL
2412+
2413+
2414+
-- !query
2415+
DROP TABLE describe_col_test
2416+
-- !query schema
2417+
struct<>
2418+
-- !query output
2419+
2420+
2421+
23992422
-- !query
24002423
SELECT :IDENTIFIER('param1') FROM VALUES(1) AS T(c1)
24012424
-- !query schema

0 commit comments

Comments
 (0)