From 05126e4af2e8c0a084c63bf214886c394f166b85 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Fri, 10 Apr 2026 19:09:38 -0700 Subject: [PATCH 1/5] HIVE-29559: SemanticAnalyzer.materializeCTE to use CreateTableAnalyzer instead of SemanticAnalyzer --- .../hive/ql/parse/SemanticAnalyzer.java | 3 +- .../clientpositive/cte_materialize_non_aggr.q | 17 +++ .../llap/cte_materialize_non_aggr.q.out | 138 ++++++++++++++++++ 3 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 ql/src/test/queries/clientpositive/cte_materialize_non_aggr.q create mode 100644 ql/src/test/results/clientpositive/llap/cte_materialize_non_aggr.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5c4f049f0350..c28782aff44a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -117,6 +117,7 @@ import org.apache.hadoop.hive.ql.ddl.DDLDescWithTableProperties; import org.apache.hadoop.hive.ql.ddl.DDLWork; import org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc; +import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer; import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc; @@ -1568,7 +1569,7 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException { createTable.addChild(temporary); createTable.addChild(cte.cteNode); - SemanticAnalyzer analyzer = new SemanticAnalyzer(queryState); + CreateTableAnalyzer analyzer = new CreateTableAnalyzer(queryState); analyzer.initCtx(ctx); analyzer.init(false); diff --git a/ql/src/test/queries/clientpositive/cte_materialize_non_aggr.q b/ql/src/test/queries/clientpositive/cte_materialize_non_aggr.q new file mode 100644 index 000000000000..452be1e40cf4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cte_materialize_non_aggr.q @@ -0,0 +1,17 @@ +-- HIVE-28724 regression: SemanticAnalyzer.materializeCTE uses wrong analyzer class +-- CalcitePlanner.materializeCTE was fixed to use CreateTableAnalyzer, +-- but SemanticAnalyzer.materializeCTE still uses SemanticAnalyzer directly. +-- Bug triggers when: CBO disabled + non-aggregate CTE materialization + +set hive.optimize.cte.materialize.full.aggregate.only=false; +set hive.cbo.enable=false; + +explain +WITH cte AS ( + SELECT 1 as id +) +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +UNION ALL +SELECT * FROM cte; diff --git a/ql/src/test/results/clientpositive/llap/cte_materialize_non_aggr.q.out b/ql/src/test/results/clientpositive/llap/cte_materialize_non_aggr.q.out new file mode 100644 index 000000000000..5a8ed1b23751 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/cte_materialize_non_aggr.q.out @@ -0,0 +1,138 @@ +PREHOOK: query: explain +WITH cte AS ( + SELECT 1 as id +) +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +PREHOOK: type: QUERY +PREHOOK: Input: default@cte +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +WITH cte AS ( + SELECT 1 as id +) +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cte +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.cte + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Union 3 (CONTAINS) + Map 4 <- Union 3 (CONTAINS) + Map 5 <- Union 3 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: cte + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: cte + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: cte + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true + destination: hdfs://### HDFS PATH ### + + Stage: Stage-3 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + From 02ff8188088b2aa6bdfba490d361e14e466adea0 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Sat, 11 Apr 2026 11:43:25 -0700 Subject: [PATCH 2/5] HIVE-29559: unit test, new .q test file with minimal config changes, .out file generated with proper test driver this time --- .../hive/ql/parse/TestSemanticAnalyzer.java | 21 +++++ ...ze_non_aggr.q => cte_materialize_no_cbo.q} | 5 +- ...ggr.q.out => cte_materialize_no_cbo.q.out} | 92 +++++++++++-------- 3 files changed, 79 insertions(+), 39 deletions(-) rename ql/src/test/queries/clientpositive/{cte_materialize_non_aggr.q => cte_materialize_no_cbo.q} (70%) rename ql/src/test/results/clientpositive/llap/{cte_materialize_non_aggr.q.out => cte_materialize_no_cbo.q.out} (56%) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java index dbdc79769dc8..1b07a19d9180 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java @@ -493,4 +493,25 @@ private void checkTablesUsed(String query, Set tables) throws Exception Assert.assertEquals(new TreeSet<>(tables), new TreeSet<>(result)); } + + @Test + public void testMaterializeCTEWithCBODisabled() throws Exception { + HiveConf testConf = new HiveConf(conf); + testConf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, false); + testConf.setIntVar(HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD, 2); + + SessionState.start(testConf); + Context ctx = new Context(testConf); + + String query = "WITH cte AS (SELECT COUNT(*) as cnt FROM table1) " + + "SELECT * FROM cte UNION ALL SELECT * FROM cte"; + + ASTNode astNode = ParseUtils.parse(query, ctx); + QueryState queryState = new QueryState.Builder().withHiveConf(testConf).build(); + BaseSemanticAnalyzer analyzer = SemanticAnalyzerFactory.get(queryState, astNode); + analyzer.initCtx(ctx); + + // This should not throw NPE after the fix + analyzer.analyze(astNode, ctx); + } } diff --git a/ql/src/test/queries/clientpositive/cte_materialize_non_aggr.q b/ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q similarity index 70% rename from ql/src/test/queries/clientpositive/cte_materialize_non_aggr.q rename to ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q index 452be1e40cf4..04b9a153aeca 100644 --- a/ql/src/test/queries/clientpositive/cte_materialize_non_aggr.q +++ b/ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q @@ -1,14 +1,13 @@ -- HIVE-28724 regression: SemanticAnalyzer.materializeCTE uses wrong analyzer class -- CalcitePlanner.materializeCTE was fixed to use CreateTableAnalyzer, -- but SemanticAnalyzer.materializeCTE still uses SemanticAnalyzer directly. --- Bug triggers when: CBO disabled + non-aggregate CTE materialization +-- Bug triggers when CBO is disabled -set hive.optimize.cte.materialize.full.aggregate.only=false; set hive.cbo.enable=false; explain WITH cte AS ( - SELECT 1 as id + SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t ) SELECT * FROM cte UNION ALL diff --git a/ql/src/test/results/clientpositive/llap/cte_materialize_non_aggr.q.out b/ql/src/test/results/clientpositive/llap/cte_materialize_no_cbo.q.out similarity index 56% rename from ql/src/test/results/clientpositive/llap/cte_materialize_non_aggr.q.out rename to ql/src/test/results/clientpositive/llap/cte_materialize_no_cbo.q.out index 5a8ed1b23751..37210c0049d9 100644 --- a/ql/src/test/results/clientpositive/llap/cte_materialize_non_aggr.q.out +++ b/ql/src/test/results/clientpositive/llap/cte_materialize_no_cbo.q.out @@ -1,6 +1,6 @@ PREHOOK: query: explain WITH cte AS ( - SELECT 1 as id + SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t ) SELECT * FROM cte UNION ALL @@ -9,10 +9,10 @@ UNION ALL SELECT * FROM cte PREHOOK: type: QUERY PREHOOK: Input: default@cte -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: explain WITH cte AS ( - SELECT 1 as id + SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t ) SELECT * FROM cte UNION ALL @@ -21,7 +21,7 @@ UNION ALL SELECT * FROM cte POSTHOOK: type: QUERY POSTHOOK: Input: default@cte -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -32,6 +32,9 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -41,19 +44,36 @@ STAGE PLANS: Row Limit Per Split: 1 Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.cte + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.cte Stage: Stage-2 Dependency Collection @@ -62,73 +82,73 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Union 3 (CONTAINS) - Map 4 <- Union 3 (CONTAINS) - Map 5 <- Union 3 (CONTAINS) + Map 3 <- Union 4 (CONTAINS) + Map 5 <- Union 4 (CONTAINS) + Map 6 <- Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: cte - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int) + expressions: cnt (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: cte - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int) + expressions: cnt (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: cte - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int) + expressions: cnt (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Union 3 - Vertex: Union 3 + Union 4 + Vertex: Union 4 Stage: Stage-0 Move Operator files: hdfs directory: true - destination: hdfs://### HDFS PATH ### +#### A masked pattern was here #### Stage: Stage-3 Fetch Operator From 9c6f55e5adbce2dc36c7dfebd82c094aca5cc784 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Mon, 13 Apr 2026 10:50:44 -0700 Subject: [PATCH 3/5] HIVE-29559: refactoring to truly use DDLSemanticAnalyzerFactory as originally intended by HIVE-28724 --- .../ddl/table/create/CreateTableAnalyzer.java | 10 ++ .../hive/ql/parse/BaseSemanticAnalyzer.java | 75 +++++++++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 46 ----- .../hive/ql/parse/SemanticAnalyzer.java | 52 +----- .../hive/ql/parse/TestSemanticAnalyzer.java | 43 ++++- .../queries/clientpositive/cte_materialize.q | 27 +++ .../clientpositive/cte_materialize_no_cbo.q | 16 -- ...ize_no_cbo.q.out => cte_materialize.q.out} | 158 ++++++++++++++++++ 8 files changed, 314 insertions(+), 113 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/cte_materialize.q delete mode 100644 ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q rename ql/src/test/results/clientpositive/llap/{cte_materialize_no_cbo.q.out => cte_materialize.q.out} (50%) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java index 49e5b5020f07..f746f8f52e96 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java @@ -137,6 +137,16 @@ protected boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) return analyzeAndResolveChildTree(child, plannerCtx); } + @Override + public void acceptCTEContext(Map aliasToCTEs) { + this.aliasToCTEs.putAll(aliasToCTEs); + } + + @Override + public CreateTableDesc getCreatedTableDesc() { + return getQB().getTableDesc(); + } + /** * Checks to see if given partition columns has DEFAULT or CHECK constraints (whether ENABLED or DISABLED) * Or has NOT NULL constraints (only ENABLED) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 986dcb7fcbbb..0888c2801e45 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -69,7 +69,9 @@ import org.apache.hadoop.hive.ql.cache.results.CacheUsage; import org.apache.hadoop.hive.ql.ddl.DDLDesc.DDLDescWithWriteId; import org.apache.hadoop.hive.ql.ddl.table.constraint.ConstraintsUtils; +import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc; import org.apache.hadoop.hive.ql.exec.FetchTask; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -2174,4 +2176,77 @@ protected void setSqlKind(SqlKind sqlKind) { queryState.setSqlKind(sqlKind); } } + + /** + * Returns the sink operator for query plans that produce one. + * @return the sink operator, or throws if not supported + */ + public Operator getSinkOp() { + throw new UnsupportedOperationException( + "getSinkOp not supported for " + getClass().getSimpleName()); + } + + /** + * Accepts CTE context from another analyzer for CTE materialization. + * @param aliasToCTEs the CTE alias to clause mapping to import + */ + public void acceptCTEContext(Map aliasToCTEs) { + throw new UnsupportedOperationException( + "acceptCTEContext not supported for " + getClass().getSimpleName()); + } + + /** + * Returns the table descriptor created during analysis (e.g., for CTAS or CTE materialization). + * @return the created table descriptor, or throws if not supported + */ + public CreateTableDesc getCreatedTableDesc() { + throw new UnsupportedOperationException( + "getCreatedTableDesc not supported for " + getClass().getSimpleName()); + } + + /** + * Represents a Common Table Expression (CTE) clause. + */ + public class CTEClause { + public CTEClause(String alias, ASTNode cteNode, ASTNode withColList) { + this.alias = alias; + this.cteNode = cteNode; + this.withColList = withColList; + } + + public String alias; + public ASTNode cteNode; + public ASTNode withColList; + public boolean materialize; + public int reference; + public QBExpr qbExpr; + public List parents = new ArrayList(); + + // materialized + public BaseSemanticAnalyzer source; + + public List> getTasks() { + return source == null ? null : source.rootTasks; + } + + public List asExecutionOrder() { + List execution = new ArrayList(); + asExecutionOrder(new HashSet(), execution); + return execution; + } + + public void asExecutionOrder(Set visited, List execution) { + for (CTEClause parent : parents) { + if (visited.add(parent)) { + parent.asExecutionOrder(visited, execution); + } + } + execution.add(this); + } + + @Override + public String toString() { + return alias == null ? "" : alias; + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index d5c683daa303..47e2bf03fa64 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -144,7 +144,6 @@ import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryState; -import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -1051,51 +1050,6 @@ boolean continueJoinMerge() { return !(runCBO && disableSemJoinReordering); } - @Override - Table materializeCTE(String cteName, CTEClause cte) throws HiveException { - - ASTNode createTable = new ASTNode(new ClassicToken(HiveParser.TOK_CREATETABLE)); - - ASTNode tableName = new ASTNode(new ClassicToken(HiveParser.TOK_TABNAME)); - tableName.addChild(new ASTNode(new ClassicToken(HiveParser.Identifier, cteName))); - - ASTNode temporary = new ASTNode(new ClassicToken(HiveParser.KW_TEMPORARY, MATERIALIZATION_MARKER)); - - createTable.addChild(tableName); - createTable.addChild(temporary); - createTable.addChild(cte.cteNode); - - CreateTableAnalyzer analyzer = new CreateTableAnalyzer(queryState); - analyzer.initCtx(ctx); - analyzer.init(false); - - // should share cte contexts - analyzer.aliasToCTEs.putAll(aliasToCTEs); - - HiveOperation operation = queryState.getHiveOperation(); - try { - analyzer.analyzeInternal(createTable); - } finally { - queryState.setCommandType(operation); - } - - Table table = analyzer.tableDesc.toTable(conf); - Path location = table.getDataLocation(); - try { - location.getFileSystem(conf).mkdirs(location); - } catch (IOException e) { - throw new HiveException(e); - } - table.setMaterializedTable(true); - - LOG.info(cteName + " will be materialized into " + location); - cte.source = analyzer; - - ctx.addMaterializedTable(cteName, table, getMaterializedTableStats(analyzer.getSinkOp())); - - return table; - } - @Override String fixCtasColumnName(String colName) { if (runCBO) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c28782aff44a..5a96ef04ad67 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -117,7 +117,7 @@ import org.apache.hadoop.hive.ql.ddl.DDLDescWithTableProperties; import org.apache.hadoop.hive.ql.ddl.DDLWork; import org.apache.hadoop.hive.ql.ddl.misc.hooks.InsertCommitHookDesc; -import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer; +import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory; import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.preinsert.PreInsertTableDesc; import org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableUnsetPropertiesDesc; @@ -1440,47 +1440,6 @@ public Set getAllOutputs() { return writeEntities; } - class CTEClause { - CTEClause(String alias, ASTNode cteNode, ASTNode withColList) { - this.alias = alias; - this.cteNode = cteNode; - this.withColList = withColList; - } - String alias; - ASTNode cteNode; - ASTNode withColList; - boolean materialize; - int reference; - QBExpr qbExpr; - List parents = new ArrayList(); - - // materialized - SemanticAnalyzer source; - - List> getTasks() { - return source == null ? null : source.rootTasks; - } - - List asExecutionOrder() { - List execution = new ArrayList(); - asExecutionOrder(new HashSet(), execution); - return execution; - } - - void asExecutionOrder(Set visited, List execution) { - for (CTEClause parent : parents) { - if (visited.add(parent)) { - parent.asExecutionOrder(visited, execution); - } - } - execution.add(this); - } - - @Override - public String toString() { - return alias == null ? "" : alias; - } - } private List> getRealTasks(CTEClause cte) { if (cte == rootClause) { @@ -1569,12 +1528,12 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException { createTable.addChild(temporary); createTable.addChild(cte.cteNode); - CreateTableAnalyzer analyzer = new CreateTableAnalyzer(queryState); + BaseSemanticAnalyzer analyzer = DDLSemanticAnalyzerFactory.getAnalyzer(createTable, queryState); analyzer.initCtx(ctx); analyzer.init(false); // should share cte contexts - analyzer.aliasToCTEs.putAll(aliasToCTEs); + analyzer.acceptCTEContext(aliasToCTEs); HiveOperation operation = queryState.getHiveOperation(); try { @@ -1583,7 +1542,7 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException { queryState.setCommandType(operation); } - Table table = analyzer.tableDesc.toTable(conf); + Table table = analyzer.getCreatedTableDesc().toTable(conf); Path location = table.getDataLocation(); try { location.getFileSystem(conf).mkdirs(location); @@ -15458,7 +15417,8 @@ public Map getViewAliasToInput() { return viewAliasToInput; } - public Operator getSinkOp() { + @Override + public Operator getSinkOp() { return sinkOp; } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java index 1b07a19d9180..7777b797f1f3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java @@ -20,12 +20,16 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -46,6 +50,8 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory; +import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableAnalyzer; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.QueryProperties.QueryType; import org.apache.hadoop.hive.ql.QueryState; @@ -65,6 +71,7 @@ import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; +import org.mockito.MockedStatic; import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -496,22 +503,48 @@ private void checkTablesUsed(String query, Set tables) throws Exception @Test public void testMaterializeCTEWithCBODisabled() throws Exception { + testMaterializeCTEUsesDDLFactory(false); + } + + @Test + public void testMaterializeCTEWithCBOEnabled() throws Exception { + testMaterializeCTEUsesDDLFactory(true); + } + + private void testMaterializeCTEUsesDDLFactory(boolean cboEnabled) throws Exception { HiveConf testConf = new HiveConf(conf); - testConf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, false); - testConf.setIntVar(HiveConf.ConfVars.HIVE_CTE_MATERIALIZE_THRESHOLD, 2); + testConf.setBoolVar(HiveConf.ConfVars.HIVE_CBO_ENABLED, cboEnabled); SessionState.start(testConf); Context ctx = new Context(testConf); + // Reference CTE 3 times to exceed default materialization threshold of 2 String query = "WITH cte AS (SELECT COUNT(*) as cnt FROM table1) " + - "SELECT * FROM cte UNION ALL SELECT * FROM cte"; + "SELECT * FROM cte UNION ALL SELECT * FROM cte UNION ALL SELECT * FROM cte"; ASTNode astNode = ParseUtils.parse(query, ctx); QueryState queryState = new QueryState.Builder().withHiveConf(testConf).build(); BaseSemanticAnalyzer analyzer = SemanticAnalyzerFactory.get(queryState, astNode); analyzer.initCtx(ctx); - // This should not throw NPE after the fix - analyzer.analyze(astNode, ctx); + try (MockedStatic mocked = + mockStatic(DDLSemanticAnalyzerFactory.class, CALLS_REAL_METHODS)) { + BaseSemanticAnalyzer[] cteAnalyzer = new BaseSemanticAnalyzer[1]; + + mocked.when(() -> DDLSemanticAnalyzerFactory.getAnalyzer(any(ASTNode.class), any(QueryState.class))) + .thenAnswer(invocation -> { + BaseSemanticAnalyzer result = (BaseSemanticAnalyzer) invocation.callRealMethod(); + if (invocation.getArgument(0, ASTNode.class).getType() == HiveParser.TOK_CREATETABLE) { + cteAnalyzer[0] = result; + } + return result; + }); + + analyzer.analyze(astNode, ctx); + + assertNotNull("DDLSemanticAnalyzerFactory should be called for CTE materialization", cteAnalyzer[0]); + assertTrue("CTE materialization should use CreateTableAnalyzer", + cteAnalyzer[0] instanceof CreateTableAnalyzer); + } } } diff --git a/ql/src/test/queries/clientpositive/cte_materialize.q b/ql/src/test/queries/clientpositive/cte_materialize.q new file mode 100644 index 000000000000..b56c0d393b44 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cte_materialize.q @@ -0,0 +1,27 @@ +-- Test CTE materialization with both CBO enabled and disabled +-- Verifies DDLSemanticAnalyzerFactory is used for CTE materialization +-- Also ensures that an NPE is no longer triggered with CBO off (HIVE-28724 regression) + +-- Test with CBO enabled (default) +explain +WITH cte AS ( + SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t +) +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +UNION ALL +SELECT * FROM cte; + +-- Test with CBO disabled +set hive.cbo.enable=false; + +explain +WITH cte AS ( + SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t +) +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +UNION ALL +SELECT * FROM cte; diff --git a/ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q b/ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q deleted file mode 100644 index 04b9a153aeca..000000000000 --- a/ql/src/test/queries/clientpositive/cte_materialize_no_cbo.q +++ /dev/null @@ -1,16 +0,0 @@ --- HIVE-28724 regression: SemanticAnalyzer.materializeCTE uses wrong analyzer class --- CalcitePlanner.materializeCTE was fixed to use CreateTableAnalyzer, --- but SemanticAnalyzer.materializeCTE still uses SemanticAnalyzer directly. --- Bug triggers when CBO is disabled - -set hive.cbo.enable=false; - -explain -WITH cte AS ( - SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t -) -SELECT * FROM cte -UNION ALL -SELECT * FROM cte -UNION ALL -SELECT * FROM cte; diff --git a/ql/src/test/results/clientpositive/llap/cte_materialize_no_cbo.q.out b/ql/src/test/results/clientpositive/llap/cte_materialize.q.out similarity index 50% rename from ql/src/test/results/clientpositive/llap/cte_materialize_no_cbo.q.out rename to ql/src/test/results/clientpositive/llap/cte_materialize.q.out index 37210c0049d9..827963eb8514 100644 --- a/ql/src/test/results/clientpositive/llap/cte_materialize_no_cbo.q.out +++ b/ql/src/test/results/clientpositive/llap/cte_materialize.q.out @@ -156,3 +156,161 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +WITH cte AS ( + SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t +) +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +PREHOOK: type: QUERY +PREHOOK: Input: default@cte +#### A masked pattern was here #### +POSTHOOK: query: explain +WITH cte AS ( + SELECT COUNT(*) as cnt FROM (SELECT 1 as id) t +) +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +UNION ALL +SELECT * FROM cte +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cte +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-2, Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.cte + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-4 + Tez +#### A masked pattern was here #### + Edges: + Map 3 <- Union 4 (CONTAINS) + Map 5 <- Union 4 (CONTAINS) + Map 6 <- Union 4 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: cte + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cnt (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: cte + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cnt (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: cte + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cnt (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Union 4 + Vertex: Union 4 + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-3 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + From 5503ff10f8d04379d000228040280f51aa5d5eb3 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Mon, 13 Apr 2026 16:48:04 -0700 Subject: [PATCH 4/5] HIVE-29559: SonarQube feedback + reverted some unintended "refactoring" --- .../ddl/table/create/CreateTableAnalyzer.java | 5 --- .../hive/ql/parse/BaseSemanticAnalyzer.java | 32 ++++++++----------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 3 -- .../hive/ql/parse/SemanticAnalyzer.java | 7 +++- .../hive/ql/parse/TestSemanticAnalyzer.java | 1 - 5 files changed, 20 insertions(+), 28 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java index f746f8f52e96..d98de83bf596 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java @@ -137,11 +137,6 @@ protected boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) return analyzeAndResolveChildTree(child, plannerCtx); } - @Override - public void acceptCTEContext(Map aliasToCTEs) { - this.aliasToCTEs.putAll(aliasToCTEs); - } - @Override public CreateTableDesc getCreatedTableDesc() { return getQB().getTableDesc(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 0888c2801e45..cae7e29e6483 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -2181,7 +2181,7 @@ protected void setSqlKind(SqlKind sqlKind) { * Returns the sink operator for query plans that produce one. * @return the sink operator, or throws if not supported */ - public Operator getSinkOp() { + public Operator getSinkOp() { throw new UnsupportedOperationException( "getSinkOp not supported for " + getClass().getSimpleName()); } @@ -2204,38 +2204,34 @@ public CreateTableDesc getCreatedTableDesc() { "getCreatedTableDesc not supported for " + getClass().getSimpleName()); } - /** - * Represents a Common Table Expression (CTE) clause. - */ - public class CTEClause { - public CTEClause(String alias, ASTNode cteNode, ASTNode withColList) { + static class CTEClause { + CTEClause(String alias, ASTNode cteNode, ASTNode withColList) { this.alias = alias; this.cteNode = cteNode; this.withColList = withColList; } - - public String alias; - public ASTNode cteNode; - public ASTNode withColList; - public boolean materialize; - public int reference; - public QBExpr qbExpr; - public List parents = new ArrayList(); + String alias; + ASTNode cteNode; + ASTNode withColList; + boolean materialize; + int reference; + QBExpr qbExpr; + List parents = new ArrayList(); // materialized - public BaseSemanticAnalyzer source; + BaseSemanticAnalyzer source; - public List> getTasks() { + List> getTasks() { return source == null ? null : source.rootTasks; } - public List asExecutionOrder() { + List asExecutionOrder() { List execution = new ArrayList(); asExecutionOrder(new HashSet(), execution); return execution; } - public void asExecutionOrder(Set visited, List execution) { + void asExecutionOrder(Set visited, List execution) { for (CTEClause parent : parents) { if (visited.add(parent)) { parent.asExecutionOrder(visited, execution); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 47e2bf03fa64..fbc32eaa358e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -31,7 +31,6 @@ import java.util.Optional; import java.util.function.Function; import java.util.regex.Pattern; -import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; @@ -318,7 +317,6 @@ import org.apache.hadoop.hive.ql.parse.type.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.HiveOperation; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource; import org.apache.hadoop.hive.ql.plan.mapper.StatsSource; @@ -337,7 +335,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.joda.time.Interval; -import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; import java.math.BigDecimal; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 5a96ef04ad67..e5a1c541f6f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -15418,10 +15418,15 @@ public Map getViewAliasToInput() { } @Override - public Operator getSinkOp() { + public Operator getSinkOp() { return sinkOp; } + @Override + public void acceptCTEContext(Map aliasToCTEs) { + this.aliasToCTEs.putAll(aliasToCTEs); + } + protected enum MaterializationRebuildMode { NONE, INSERT_OVERWRITE_REBUILD, diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java index 7777b797f1f3..d0695ff4067b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSemanticAnalyzer.java @@ -25,7 +25,6 @@ import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; From ec7448b99dfd593f0bbf6dec2b373158c9234bf0 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Tue, 14 Apr 2026 08:32:10 -0700 Subject: [PATCH 5/5] HIVE-29559: using cast greatly reduces the need of refactoring --- .../ddl/table/create/CreateTableAnalyzer.java | 5 -- .../hive/ql/parse/BaseSemanticAnalyzer.java | 71 ------------------- .../hive/ql/parse/SemanticAnalyzer.java | 53 +++++++++++--- 3 files changed, 44 insertions(+), 85 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java index d98de83bf596..49e5b5020f07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableAnalyzer.java @@ -137,11 +137,6 @@ protected boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) return analyzeAndResolveChildTree(child, plannerCtx); } - @Override - public CreateTableDesc getCreatedTableDesc() { - return getQB().getTableDesc(); - } - /** * Checks to see if given partition columns has DEFAULT or CHECK constraints (whether ENABLED or DISABLED) * Or has NOT NULL constraints (only ENABLED) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index cae7e29e6483..986dcb7fcbbb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -69,9 +69,7 @@ import org.apache.hadoop.hive.ql.cache.results.CacheUsage; import org.apache.hadoop.hive.ql.ddl.DDLDesc.DDLDescWithWriteId; import org.apache.hadoop.hive.ql.ddl.table.constraint.ConstraintsUtils; -import org.apache.hadoop.hive.ql.ddl.table.create.CreateTableDesc; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -2176,73 +2174,4 @@ protected void setSqlKind(SqlKind sqlKind) { queryState.setSqlKind(sqlKind); } } - - /** - * Returns the sink operator for query plans that produce one. - * @return the sink operator, or throws if not supported - */ - public Operator getSinkOp() { - throw new UnsupportedOperationException( - "getSinkOp not supported for " + getClass().getSimpleName()); - } - - /** - * Accepts CTE context from another analyzer for CTE materialization. - * @param aliasToCTEs the CTE alias to clause mapping to import - */ - public void acceptCTEContext(Map aliasToCTEs) { - throw new UnsupportedOperationException( - "acceptCTEContext not supported for " + getClass().getSimpleName()); - } - - /** - * Returns the table descriptor created during analysis (e.g., for CTAS or CTE materialization). - * @return the created table descriptor, or throws if not supported - */ - public CreateTableDesc getCreatedTableDesc() { - throw new UnsupportedOperationException( - "getCreatedTableDesc not supported for " + getClass().getSimpleName()); - } - - static class CTEClause { - CTEClause(String alias, ASTNode cteNode, ASTNode withColList) { - this.alias = alias; - this.cteNode = cteNode; - this.withColList = withColList; - } - String alias; - ASTNode cteNode; - ASTNode withColList; - boolean materialize; - int reference; - QBExpr qbExpr; - List parents = new ArrayList(); - - // materialized - BaseSemanticAnalyzer source; - - List> getTasks() { - return source == null ? null : source.rootTasks; - } - - List asExecutionOrder() { - List execution = new ArrayList(); - asExecutionOrder(new HashSet(), execution); - return execution; - } - - void asExecutionOrder(Set visited, List execution) { - for (CTEClause parent : parents) { - if (visited.add(parent)) { - parent.asExecutionOrder(visited, execution); - } - } - execution.add(this); - } - - @Override - public String toString() { - return alias == null ? "" : alias; - } - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index e5a1c541f6f5..66225b873a56 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1440,6 +1440,47 @@ public Set getAllOutputs() { return writeEntities; } + class CTEClause { + CTEClause(String alias, ASTNode cteNode, ASTNode withColList) { + this.alias = alias; + this.cteNode = cteNode; + this.withColList = withColList; + } + String alias; + ASTNode cteNode; + ASTNode withColList; + boolean materialize; + int reference; + QBExpr qbExpr; + List parents = new ArrayList(); + + // materialized + SemanticAnalyzer source; + + List> getTasks() { + return source == null ? null : source.rootTasks; + } + + List asExecutionOrder() { + List execution = new ArrayList(); + asExecutionOrder(new HashSet(), execution); + return execution; + } + + void asExecutionOrder(Set visited, List execution) { + for (CTEClause parent : parents) { + if (visited.add(parent)) { + parent.asExecutionOrder(visited, execution); + } + } + execution.add(this); + } + + @Override + public String toString() { + return alias == null ? "" : alias; + } + } private List> getRealTasks(CTEClause cte) { if (cte == rootClause) { @@ -1528,12 +1569,12 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException { createTable.addChild(temporary); createTable.addChild(cte.cteNode); - BaseSemanticAnalyzer analyzer = DDLSemanticAnalyzerFactory.getAnalyzer(createTable, queryState); + SemanticAnalyzer analyzer = (SemanticAnalyzer) DDLSemanticAnalyzerFactory.getAnalyzer(createTable, queryState); analyzer.initCtx(ctx); analyzer.init(false); // should share cte contexts - analyzer.acceptCTEContext(aliasToCTEs); + analyzer.aliasToCTEs.putAll(aliasToCTEs); HiveOperation operation = queryState.getHiveOperation(); try { @@ -1542,7 +1583,7 @@ Table materializeCTE(String cteName, CTEClause cte) throws HiveException { queryState.setCommandType(operation); } - Table table = analyzer.getCreatedTableDesc().toTable(conf); + Table table = analyzer.tableDesc.toTable(conf); Path location = table.getDataLocation(); try { location.getFileSystem(conf).mkdirs(location); @@ -15417,16 +15458,10 @@ public Map getViewAliasToInput() { return viewAliasToInput; } - @Override public Operator getSinkOp() { return sinkOp; } - @Override - public void acceptCTEContext(Map aliasToCTEs) { - this.aliasToCTEs.putAll(aliasToCTEs); - } - protected enum MaterializationRebuildMode { NONE, INSERT_OVERWRITE_REBUILD,