diff --git a/fixtures/dremio/expected/12.sql b/fixtures/dremio/expected/12.sql index 6c0c5b6..f924cd6 100644 --- a/fixtures/dremio/expected/12.sql +++ b/fixtures/dremio/expected/12.sql @@ -7,4 +7,6 @@ FROM demoCatalog.sales."orders_history" AT REF "refs/heads/main"; CREATE TABLE demoCatalog.sales.snapshot_orders AS -SELECT * FROM demoCatalog.sales."orders_history"; \ No newline at end of file +SELECT * +FROM demoCatalog.sales."orders_history" +AT COMMIT 'a1b2c3d4'; \ No newline at end of file diff --git a/fixtures/dremio/expected/17.sql b/fixtures/dremio/expected/17.sql new file mode 100644 index 0000000..da8be76 --- /dev/null +++ b/fixtures/dremio/expected/17.sql @@ -0,0 +1,19 @@ +CREATE TABLE a AS +SELECT * +FROM s.t +AT TAG v1; + +CREATE VIEW v AS +SELECT * +FROM s.t +AT BRANCH release +AS OF TIMESTAMP '2025-01-01 00:00:00'; + +INSERT INTO a +SELECT * +FROM s.t +AT COMMIT 'a1b2c3d4'; + +SELECT * +FROM s.t +AT REF "refs/heads/main"; \ No newline at end of file diff --git a/fixtures/dremio/in/17.sql b/fixtures/dremio/in/17.sql new file mode 100644 index 0000000..8ef21da --- /dev/null +++ b/fixtures/dremio/in/17.sql @@ -0,0 +1,4 @@ +create table a as select * from s.t at tag v1; +create view v as select * from s.t at branch release as of timestamp '2025-01-01 00:00:00'; +insert into a select * from s.t at commit 'a1b2c3d4'; +select * from s.t at ref "refs/heads/main"; diff --git a/fixtures/dremio/out/12.sql b/fixtures/dremio/out/12.sql index 6c0c5b6..f924cd6 100644 --- a/fixtures/dremio/out/12.sql +++ b/fixtures/dremio/out/12.sql @@ -7,4 +7,6 @@ FROM demoCatalog.sales."orders_history" AT REF "refs/heads/main"; CREATE TABLE demoCatalog.sales.snapshot_orders AS -SELECT * FROM demoCatalog.sales."orders_history"; \ No newline at end of file +SELECT * +FROM demoCatalog.sales."orders_history" +AT COMMIT 'a1b2c3d4'; \ No newline at end of file diff --git a/fixtures/dremio/out/17.sql b/fixtures/dremio/out/17.sql new file mode 100644 index 0000000..da8be76 --- /dev/null +++ b/fixtures/dremio/out/17.sql @@ -0,0 +1,19 @@ +CREATE TABLE a AS +SELECT * +FROM s.t +AT TAG v1; + +CREATE VIEW v AS +SELECT * +FROM s.t +AT BRANCH release +AS OF TIMESTAMP '2025-01-01 00:00:00'; + +INSERT INTO a +SELECT * +FROM s.t +AT COMMIT 'a1b2c3d4'; + +SELECT * +FROM s.t +AT REF "refs/heads/main"; \ No newline at end of file diff --git a/src/format/sql/mod.rs b/src/format/sql/mod.rs index e31feba..1a89409 100644 --- a/src/format/sql/mod.rs +++ b/src/format/sql/mod.rs @@ -1,4 +1,5 @@ use anyhow::{Context, Result}; +use regex::Regex; use sqlparser::ast::{ ArrayElemTypeDef, CastFormat, CastKind, ColumnDef, CreateTableOptions, DataType, DateTimeField, Expr, Function, GroupByExpr, HiveDistributionStyle, HiveFormat, Insert, Interval, ObjectName, @@ -257,6 +258,7 @@ fn format_statement( .as_ref() .expect("query exists for ctas") .as_ref(), + version.clone(), TableFormatOptions { if_not_exists: create_table.if_not_exists, or_replace: create_table.or_replace, @@ -273,7 +275,12 @@ fn format_statement( }, ) } else { - Ok(Doc::Text(stringify_with_alias_styles(stmt, alias_tracker))) + Ok(Doc::Text(stringify_with_alias_styles( + stmt, + alias_tracker, + version.as_ref(), + cfg, + ))) } } Statement::CreateView(create_view) => { @@ -297,15 +304,26 @@ fn format_statement( if_not_exists: create_view.if_not_exists, temporary: create_view.temporary, }, + version.clone(), cfg, alias_tracker, ) } else { - Ok(Doc::Text(stringify_with_alias_styles(stmt, alias_tracker))) + Ok(Doc::Text(stringify_with_alias_styles( + stmt, + alias_tracker, + version.as_ref(), + cfg, + ))) } } - Statement::Insert(insert) => format_insert(insert, cfg, alias_tracker), - other => Ok(Doc::Text(stringify_with_alias_styles(other, alias_tracker))), + Statement::Insert(insert) => format_insert(insert, cfg, version, alias_tracker), + other => Ok(Doc::Text(stringify_with_alias_styles( + other, + alias_tracker, + None, + cfg, + ))), } } @@ -347,6 +365,7 @@ struct CreateTableLayout<'a> { fn format_create_table_with_query( name: &ObjectName, query: &Query, + version: Option, opts: TableFormatOptions, cfg: &FormatterConfig, alias_tracker: &mut RelationAliasTracker, @@ -409,9 +428,14 @@ fn format_create_table_with_query( let prefer_multiline = has_pre_as_clause || (cfg.dialect == DialectKind::Dremio && should_prefer_multiline_ctas(query)); - let body = - format_query_with_layout_preference(query, cfg, None, alias_tracker, prefer_multiline) - .unwrap_or_else(|_| Doc::Text(query.to_string())); + let body = format_query_with_layout_preference( + query, + cfg, + version.clone(), + alias_tracker, + prefer_multiline, + ) + .unwrap_or_else(|_| Doc::Text(stringify_query_with_version(query, version.as_ref(), cfg))); parts.push(Doc::Line); parts.push(body); @@ -435,11 +459,18 @@ fn should_prefer_multiline_ctas(query: &Query) -> bool { fn format_insert( insert: &Insert, cfg: &FormatterConfig, + version: Option, alias_tracker: &mut RelationAliasTracker, ) -> Result { let source = match &insert.source { Some(query) => query, - None => return Ok(Doc::Text(Statement::Insert(insert.clone()).to_string())), + None => { + return Ok(Doc::Text(reattach_dremio_version_clause( + &Statement::Insert(insert.clone()).to_string(), + version.as_ref(), + cfg, + ))); + } }; let complex = insert.overwrite @@ -454,7 +485,11 @@ fn format_insert( || insert.or.is_some(); if complex { - return Ok(Doc::Text(Statement::Insert(insert.clone()).to_string())); + return Ok(Doc::Text(reattach_dremio_version_clause( + &Statement::Insert(insert.clone()).to_string(), + version.as_ref(), + cfg, + ))); } let mut head = Vec::new(); @@ -477,8 +512,8 @@ fn format_insert( head.push(format_parenthesized_inline(cols)); } - let body = format_query(source, cfg, None, alias_tracker) - .unwrap_or_else(|_| Doc::Text(source.to_string())); + let body = format_query(source, cfg, version.clone(), alias_tracker) + .unwrap_or_else(|_| Doc::Text(stringify_query_with_version(source, version.as_ref(), cfg))); Ok(Doc::Group(vec![Doc::Group(head), Doc::Line, body])) } @@ -520,6 +555,7 @@ fn format_create_view( columns: &[ViewColumnDef], query: &Query, opts: CreateViewOptions, + version: Option, cfg: &FormatterConfig, alias_tracker: &mut RelationAliasTracker, ) -> Result { @@ -547,8 +583,8 @@ fn format_create_view( parts.push(format_parenthesized_inline(cols)); } - let body = format_query(query, cfg, None, alias_tracker) - .unwrap_or_else(|_| Doc::Text(query.to_string())); + let body = format_query(query, cfg, version.clone(), alias_tracker) + .unwrap_or_else(|_| Doc::Text(stringify_query_with_version(query, version.as_ref(), cfg))); parts.push(Doc::Space); parts.push(keyword_doc(cfg, "AS")); @@ -1167,10 +1203,63 @@ fn render_option_content(rest: &str, cfg: &FormatterConfig) -> Doc { fn stringify_with_alias_styles( stmt: &Statement, alias_tracker: &mut RelationAliasTracker, + version: Option<&DremioVersionClause>, + cfg: &FormatterConfig, ) -> String { let mut text = stmt.to_string(); strip_relation_aliases_in_statement(&mut text, stmt, alias_tracker); - text + reattach_dremio_version_clause(&text, version, cfg) +} + +fn stringify_query_with_version( + query: &Query, + version: Option<&DremioVersionClause>, + cfg: &FormatterConfig, +) -> String { + let text = query.to_string(); + reattach_dremio_version_clause(&text, version, cfg) +} + +fn reattach_dremio_version_clause( + sql: &str, + version: Option<&DremioVersionClause>, + cfg: &FormatterConfig, +) -> String { + let Some(version) = version else { + return sql.to_string(); + }; + + let from_re = Regex::new(r"(?is)\bFROM\b\s+").expect("from regex"); + let Some(from_match) = from_re.find(sql) else { + return sql.to_string(); + }; + + let boundary_re = Regex::new( + r"(?is)\b(WHERE|GROUP\s+BY|HAVING|QUALIFY|WINDOW|ORDER\s+BY|LIMIT|OFFSET|JOIN|INNER|LEFT|RIGHT|FULL|CROSS|UNION|EXCEPT|INTERSECT)\b", + ) + .expect("boundary regex"); + + let after_from = &sql[from_match.end()..]; + let boundary_pos = boundary_re + .find(after_from) + .map(|m| from_match.end() + m.start()) + .unwrap_or(sql.len()); + + let head = sql[..boundary_pos].trim_end(); + let tail = sql[boundary_pos..].trim_start(); + let version_text = format_doc( + &format_dremio_version_clause(version, cfg), + &PrintConfig { + line_length: cfg.line_length, + indent_width: cfg.indent_width, + }, + ); + + if tail.is_empty() { + format!("{head}\n{version_text}") + } else { + format!("{head}\n{version_text}\n{tail}") + } } fn strip_relation_aliases_in_statement( @@ -1561,6 +1650,41 @@ WHERE NOT EXISTS ( ); } + #[test] + fn preserves_dremio_version_clause_in_ctas_view_insert_and_select() { + let cfg = FormatterConfig { + dialect: DialectKind::Dremio, + ..Default::default() + }; + let sql = "\ +CREATE TABLE a AS SELECT * FROM s.t AT TAG v1; +CREATE VIEW v AS SELECT * FROM s.t AT TAG v1; +INSERT INTO a SELECT * FROM s.t AT TAG v1; +SELECT * FROM s.t AT TAG v1;"; + let out = format_str(sql, &cfg); + assert_eq!( + out.trim(), + "CREATE TABLE a AS\nSELECT *\nFROM s.t\nAT TAG v1;\n\nCREATE VIEW v AS\nSELECT *\nFROM s.t\nAT TAG v1;\n\nINSERT INTO a\nSELECT *\nFROM s.t\nAT TAG v1;\n\nSELECT *\nFROM s.t\nAT TAG v1;" + ); + } + + #[test] + fn preserves_dremio_as_of_timestamp_in_non_top_level_statements() { + let cfg = FormatterConfig { + dialect: DialectKind::Dremio, + ..Default::default() + }; + let sql = "\ +CREATE TABLE a AS SELECT * FROM s.t AT BRANCH release AS OF TIMESTAMP '2025-01-01 00:00:00'; +CREATE VIEW v AS SELECT * FROM s.t AT BRANCH release AS OF TIMESTAMP '2025-01-01 00:00:00'; +INSERT INTO a SELECT * FROM s.t AT BRANCH release AS OF TIMESTAMP '2025-01-01 00:00:00';"; + let out = format_str(sql, &cfg); + assert_eq!( + out.trim(), + "CREATE TABLE a AS\nSELECT *\nFROM s.t\nAT BRANCH release\nAS OF TIMESTAMP '2025-01-01 00:00:00';\n\nCREATE VIEW v AS\nSELECT *\nFROM s.t\nAT BRANCH release\nAS OF TIMESTAMP '2025-01-01 00:00:00';\n\nINSERT INTO a\nSELECT *\nFROM s.t\nAT BRANCH release\nAS OF TIMESTAMP '2025-01-01 00:00:00';" + ); + } + #[test] fn passes_through_dremio_use_command() { let cfg = FormatterConfig { diff --git a/src/parser.rs b/src/parser.rs index 4b5aae7..2b51653 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1012,6 +1012,55 @@ mod tests { )); } + #[test] + fn captures_dremio_version_clause_for_ctas_view_and_insert() { + let sql = "\ +CREATE TABLE a AS SELECT * FROM s.t AT TAG v1; +CREATE VIEW v AS SELECT * FROM s.t AT BRANCH dev AS OF TIMESTAMP '2025-01-01 00:00:00'; +INSERT INTO a SELECT * FROM s.t AT COMMIT 'a1b2c3d4';"; + let stmts = parse_sql(sql, DialectKind::Dremio).expect("parse dremio versioned statements"); + assert_eq!(stmts.len(), 3); + + match &stmts[0] { + ParsedStatement::Sql { version, .. } => { + assert!(matches!( + version, + Some(DremioVersionClause { + at: Some(VersionSelector::Tag(tag)), + as_of_timestamp: None + }) if tag == "v1" + )); + } + other => panic!("expected SQL statement, got {other:?}"), + } + + match &stmts[1] { + ParsedStatement::Sql { version, .. } => { + assert!(matches!( + version, + Some(DremioVersionClause { + at: Some(VersionSelector::Branch(branch)), + as_of_timestamp: Some(ts) + }) if branch == "dev" && ts == "'2025-01-01 00:00:00'" + )); + } + other => panic!("expected SQL statement, got {other:?}"), + } + + match &stmts[2] { + ParsedStatement::Sql { version, .. } => { + assert!(matches!( + version, + Some(DremioVersionClause { + at: Some(VersionSelector::Commit(commit)), + as_of_timestamp: None + }) if commit == "'a1b2c3d4'" + )); + } + other => panic!("expected SQL statement, got {other:?}"), + } + } + #[test] fn parses_dremio_show_reflections() { let sql = "show reflections in my_space";