diff --git a/CHANGELOG b/CHANGELOG index 2e670bc..48c4a7b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,106 @@ +2026-05-03 v1.0.19 + + Bug fixes: + - Refactored comment-related query construction into dedicated + helper methods on `Dump`: + `build_tables_standalone_query`, + `build_regular_views_query`, + `build_materialized_views_query`, + `build_view_column_comments_query`, + `build_sequences_standalone_query`, + and on `Table`: `build_indexes_bulk_query`. + - Kept `pg_description` joins scoped to relation comments by + filtering on `pg_description.classoid = 'pg_class'::regclass` + and `pg_description.objsubid = 0` to avoid catalog + collisions. + - Normalized SQL formatting in the new helper query strings. + - `Comparer::compare_grants` no longer runs a duplicate + `iter().find(...)` over `from_cols` for every TO column + whose own ACL is empty. The first scan was discarded + and the second was redone unconditionally; both are + now replaced by a single per-table + `HashMap<&str, &[String]>` lookup, which also reduces + the column-grants emission from O(C^2) to O(C) on + wide tables. + - `Comparer::mark_serial_columns` now keys + `serial_columns` by `(schema, table, column)` tuple + instead of a `format!("{}.{}.{}", ...)` string that + was parsed back via `splitn(3, '.')`. The string + form silently misparsed any identifier containing a + literal `.` (legal in PostgreSQL when quoted), + leaving the corresponding column unmarked. + - `Comparer::compare_sequences` had a dead + `dropped_sequences: HashSet` that was + checked and updated on every iteration of a loop + over `self.from.sequences`. Sequences are unique by + `(schema, name)` so the dedupe could never fire; + the set and its per-iteration `format!`/`contains`/ + `insert` calls are removed. + - `Comparer::compare_routines` and + `Comparer::compare_routines_and_views` no longer + clone every `Routine` (each carries the full + `source_code` string). The drop path now holds + `routines_to_drop: Vec<&Routine>` borrowing into + `self.from.routines`. The create/update path — + previously forced to clone because + `process_target_routine` took `&mut self` and so + conflicted with any borrow into `self.from` / + `self.to` — is unblocked by refactoring that + method into a free associated function + `Self::emit_routine_diff(&mut script, use_drop, + routine, from_routine)`. Disjoint-field split + borrows now allow `&mut self.script` to coexist + with `&Routine` borrows out of the dump fields, + removing the per-emit clones. Same pattern as the + pre-existing `Self::emit_drop`. + + Performance: + - `Dump::process` no longer materializes the entire + serialized dump as a `String` before handing it to + the zip writer. The JSON payload is now streamed + into `ZipWriter` via `serde_json::to_writer` + through a 256 KiB `BufWriter`, bounding peak + memory at the buffer plus zlib's internal state + rather than ~2x the uncompressed dump size. The + `BufWriter` is required for speed: `to_writer` + emits one write per JSON token, and feeding those + straight into the deflate stream paid a per-call + cost on every one (an early unbuffered version of + this change made dumps roughly 10x slower). The + write path is exposed as `Dump::write_to_file`, + mirroring the existing `Dump::read_from_file` and + making the round-trip directly testable. + + Tests: + - Added regression tests proving every new query + builder includes the `pg_class` classoid filter: + `build_tables_standalone_query_filters_by_pg_class`, + `build_regular_views_query_filters_by_pg_class`, + `build_materialized_views_query_filters_by_pg_class`, + `build_view_column_comments_query_filters_by_pg_class`, + `build_sequences_standalone_query_filters_by_pg_class`, + and `build_indexes_bulk_query_filters_by_pg_class`. + - `write_to_file_round_trips_via_read_from_file` + builds a Dump with schemas, extensions, tables, + views, sequences, and routines, writes it via the + new streaming path, reads it back, and asserts + every collection size and a few content fields + match. + - `compare_column_grants_dispatches_per_column_acl_correctly` + builds a table with three columns whose effective + `from_acl` differs (kept / revoked / newly + granted) and verifies that the per-table + column-ACL HashMap dispatches each column to the + right diff outcome — guarding against off-by-one + mistakes that single-column tests would miss. + - `mark_serial_columns_handles_dotted_identifier_names` + drives the `mark_serial_columns` path with a + schema, table, and column name that all contain a + literal `.`, asserting that the new tuple key + still locates the target column. The pre-fix + `splitn(3, '.')` parser would have failed on this + input. + 2026-04-25 v1.0.18 Bug fixes: diff --git a/app/Cargo.lock b/app/Cargo.lock index f07f20c..7a42b19 100644 --- a/app/Cargo.lock +++ b/app/Cargo.lock @@ -1146,7 +1146,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pgc" -version = "1.0.18" +version = "1.0.19" dependencies = [ "chrono", "clap", diff --git a/app/Cargo.toml b/app/Cargo.toml index 964a871..196bdea 100644 --- a/app/Cargo.toml +++ b/app/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pgc" -version = "1.0.18" +version = "1.0.19" edition = "2024" license = "MIT" diff --git a/app/src/comparer/core.rs b/app/src/comparer/core.rs index 607cbc1..dfef6b7 100644 --- a/app/src/comparer/core.rs +++ b/app/src/comparer/core.rs @@ -37,8 +37,9 @@ pub struct Comparer { // default privileges and must be accounted for in compare_grants. recreated_tables: HashSet, // Tracks columns that should use serial/bigserial/smallserial type. - // Key: "schema.table.column", Value: "serial", "bigserial", or "smallserial". - serial_columns: HashMap, + // Key: (schema, table, column); Value: "serial", "bigserial", or "smallserial". + // The tuple form avoids ambiguity when any identifier contains a literal '.'. + serial_columns: HashMap<(String, String, String), String>, } impl Comparer { @@ -165,20 +166,16 @@ impl Comparer { .map(|(i, t)| ((t.schema.as_str(), t.name.as_str()), i)) .collect(); - // Collect indices first to avoid borrow conflict + // Collect indices first to avoid borrow conflict between the + // shared borrow of `self.serial_columns` and the mutable borrow + // of `self.to.tables` below. let updates: Vec<(usize, String, String)> = self .serial_columns .iter() - .filter_map(|(key, serial_type)| { - let parts: Vec<&str> = key.splitn(3, '.').collect(); - if parts.len() == 3 { - let (schema, table, column) = (parts[0], parts[1], parts[2]); - to_table_idx - .get(&(schema, table)) - .map(|&idx| (idx, column.to_string(), serial_type.clone())) - } else { - None - } + .filter_map(|((schema, table, column), serial_type)| { + to_table_idx + .get(&(schema.as_str(), table.as_str())) + .map(|&idx| (idx, column.clone(), serial_type.clone())) }) .collect(); @@ -202,9 +199,22 @@ impl Comparer { } } - fn process_target_routine(&mut self, routine: &Routine, from_routine: Option<&Routine>) { + /// Emit the diff for a single TO-side routine into `script`. Free + /// associated function (rather than `&mut self` method) so callers + /// can pass `&Routine` borrows into `self.from` / `self.to` while + /// also passing `&mut self.script` — disjoint fields allow split + /// borrows, which lets the dependency-sorted emit loops avoid + /// cloning each `Routine` (with its potentially large `source_code` + /// string) just to satisfy the borrow checker. Same rationale as + /// [`Comparer::emit_drop`]. + fn emit_routine_diff( + script: &mut String, + use_drop: bool, + routine: &Routine, + from_routine: Option<&Routine>, + ) { if routine.hash.is_none() { - self.script.push_str( + script.push_str( format!( "/* Skipping routine {}.{}({}) due to missing hash. */\n", routine.schema, routine.name, routine.arguments @@ -216,7 +226,7 @@ impl Comparer { if let Some(from_routine) = from_routine { if from_routine.hash.is_none() { - self.script.push_str( + script.push_str( format!( "/* Skipping routine {}.{}({}) due to missing hash. */\n", from_routine.schema, from_routine.name, from_routine.arguments @@ -231,37 +241,26 @@ impl Comparer { || from_routine.arguments != routine.arguments || from_routine.arguments_defaults != routine.arguments_defaults { - let drop_script = from_routine.get_drop_script(); - if self.use_drop { - self.script.push_str(drop_script.as_str()); - } else { - self.script.push_str( - drop_script - .lines() - .map(|l| format!("-- {}\n", l)) - .collect::() - .as_str(), - ); - } + Self::emit_drop(script, use_drop, &from_routine.get_drop_script()); } - self.script.push_str( + script.push_str( format!( "/* Routine: {}.{}({})*/\n", routine.schema, routine.name, routine.arguments ) .as_str(), ); - self.script.push_str(routine.get_script().as_str()); + script.push_str(routine.get_script().as_str()); } } else { - self.script.push_str( + script.push_str( format!( "/* Routine: {}.{}({})*/\n", routine.schema, routine.name, routine.arguments ) .as_str(), ); - self.script.push_str(routine.get_script().as_str()); + script.push_str(routine.get_script().as_str()); } } @@ -1125,8 +1124,10 @@ impl Comparer { "smallint" => "smallserial", _ => "serial", }; - let key = format!("{}.{}.{}", schema, table, column); - self.serial_columns.insert(key, serial_type.to_string()); + self.serial_columns.insert( + (schema.clone(), table.clone(), column.clone()), + serial_type.to_string(), + ); } self.script.push_str( format!( @@ -1148,12 +1149,7 @@ impl Comparer { } { - let mut dropped_sequences = HashSet::new(); for sequence in &self.from.sequences { - if dropped_sequences.contains(&format!("{}.{}", sequence.schema, sequence.name)) { - continue; - } - if to_seq_keys.contains(&(sequence.schema.as_str(), sequence.name.as_str())) { continue; // Sequence is present in both dumps, we already processed it } @@ -1237,7 +1233,6 @@ impl Comparer { .as_str(), ); } - dropped_sequences.insert(format!("{}.{}", sequence.schema, sequence.name)); } } @@ -1274,14 +1269,16 @@ impl Comparer { .collect(); // ── Drop routines not present in TO (reverse dependency order) ── - let routines_to_drop: Vec = self + // Borrow rather than clone: we only need read access to each Routine + // for dep analysis and emission, and `self.from.routines` is never + // mutated through this function. + let routines_to_drop: Vec<&Routine> = self .from .routines .iter() .filter(|r| { !to_routine_keys.contains(&(r.schema.clone(), r.name.clone(), r.arguments.clone())) }) - .cloned() .collect(); if !routines_to_drop.is_empty() { @@ -1327,7 +1324,7 @@ impl Comparer { drop_order.reverse(); for idx in drop_order { - let routine = &routines_to_drop[idx]; + let routine = routines_to_drop[idx]; self.script.push_str( format!( "/* Routine: {}.{}({})*/\n", @@ -1353,13 +1350,13 @@ impl Comparer { } // ── Create / update routines in dependency order ── - let create_routines: Vec<(usize, Routine)> = self + // Borrow into self.to.routines instead of cloning each Routine. + let create_routines: Vec<(usize, &Routine)> = self .to .routines .iter() .enumerate() .filter(|(_, r)| r.hash.is_some()) - .map(|(i, r)| (i, r.clone())) .collect(); if !create_routines.is_empty() { @@ -1399,7 +1396,7 @@ impl Comparer { } let sorted = Self::kahn_toposort(n, &depends_on, |i| { - let r = &create_routines[i].1; + let r = create_routines[i].1; let priority: u8 = if r.lang.eq_ignore_ascii_case("sql") { 1 } else { @@ -1413,16 +1410,32 @@ impl Comparer { ) }); - for idx in sorted { - let routine = &create_routines[idx].1; - let from_routine = from_routine_map - .get(&( - routine.schema.clone(), - routine.name.clone(), - routine.arguments.clone(), - )) - .map(|&i| self.from.routines[i].clone()); - self.process_target_routine(routine, from_routine.as_ref()); + // Resolve the emit plan into (to_idx, optional from_idx) pairs + // up front. The actual emission borrows the routines directly + // out of `self.to` / `self.from` and writes through + // `&mut self.script`; `Self::emit_routine_diff` is a free + // associated function precisely so these disjoint-field + // borrows can coexist without cloning each `Routine`. + let plan: Vec<(usize, Option)> = sorted + .iter() + .map(|&idx| { + let (to_idx, routine) = create_routines[idx]; + let from_idx = from_routine_map + .get(&( + routine.schema.clone(), + routine.name.clone(), + routine.arguments.clone(), + )) + .copied(); + (to_idx, from_idx) + }) + .collect(); + drop(create_routines); + + for (to_idx, from_idx) in plan { + let routine = &self.to.routines[to_idx]; + let from_routine = from_idx.map(|i| &self.from.routines[i]); + Self::emit_routine_diff(&mut self.script, self.use_drop, routine, from_routine); } } @@ -2863,14 +2876,17 @@ impl Comparer { // ────────────────────────────────────────────────────────── // Phase 1 – Drop routines that are not present in TO // ────────────────────────────────────────────────────────── - let routines_to_drop: Vec = self + // Borrow rather than clone: dependency analysis and emission + // only need read access; `self.from.routines` is not mutated + // during this function, and emission only mutates `self.script` + // (a disjoint field). + let routines_to_drop: Vec<&Routine> = self .from .routines .iter() .filter(|r| { !to_routine_keys.contains(&(r.schema.clone(), r.name.clone(), r.arguments.clone())) }) - .cloned() .collect(); if !routines_to_drop.is_empty() { @@ -2918,7 +2934,7 @@ impl Comparer { drop_order.reverse(); for idx in drop_order { - let routine = &routines_to_drop[idx]; + let routine = routines_to_drop[idx]; self.script.push_str( format!( "/* Routine: {}.{}({})*/\n", @@ -3132,15 +3148,23 @@ impl Comparer { let view = self.to.views[orig_idx].clone(); self.emit_view_create(&view); } else { - let routine = self.to.routines[orig_idx].clone(); - let from_routine = from_routine_map - .get(&( - routine.schema.clone(), - routine.name.clone(), - routine.arguments.clone(), - )) - .map(|&i| self.from.routines[i].clone()); - self.process_target_routine(&routine, from_routine.as_ref()); + // Resolve the matching FROM-side routine by index so + // `emit_routine_diff` can borrow the `Routine` out of + // `self.from` / `self.to` directly — no clones of the + // potentially-large `source_code` string. + let from_idx = { + let routine = &self.to.routines[orig_idx]; + from_routine_map + .get(&( + routine.schema.clone(), + routine.name.clone(), + routine.arguments.clone(), + )) + .copied() + }; + let routine = &self.to.routines[orig_idx]; + let from_routine = from_idx.map(|i| &self.from.routines[i]); + Self::emit_routine_diff(&mut self.script, self.use_drop, routine, from_routine); } } @@ -3662,22 +3686,19 @@ impl Comparer { .copied() .unwrap_or(&[]); + // Index from-columns by name once per table so the per-column + // lookup below is O(1) instead of an O(C²) linear scan. + let from_col_acls: HashMap<&str, &[String]> = from_cols + .iter() + .map(|c| (c.name.as_str(), c.acl.as_slice())) + .collect(); + for col in &table.columns { - if col.acl.is_empty() { - let from_col_acl: &[String] = from_cols - .iter() - .find(|c| c.name == col.name) - .map(|c| c.acl.as_slice()) - .unwrap_or(&[]); - if from_col_acl.is_empty() { - continue; - } + let from_col_acl: &[String] = + from_col_acls.get(col.name.as_str()).copied().unwrap_or(&[]); + if col.acl.is_empty() && from_col_acl.is_empty() { + continue; } - let from_col_acl: &[String] = from_cols - .iter() - .find(|c| c.name == col.name) - .map(|c| c.acl.as_slice()) - .unwrap_or(&[]); let col_grants = acl::generate_column_grants_script( from_col_acl, &col.acl, diff --git a/app/src/comparer/core_tests.rs b/app/src/comparer/core_tests.rs index 84605b3..bc03610 100644 --- a/app/src/comparer/core_tests.rs +++ b/app/src/comparer/core_tests.rs @@ -6806,3 +6806,167 @@ async fn compare_column_grants_revokes_former_owner_when_to_has_no_column_acl() "Current owner must never appear in column grant output, got: {script}" ); } + +/// Regression test for the per-table column-ACL HashMap rewrite. Previously +/// each TO column did a linear scan over `from_cols`; the rewrite indexes +/// `from_cols` by name once per table. This test exercises a table with +/// multiple columns where each column's effective `from_acl` differs, to +/// catch off-by-one mistakes that a single-column test would miss. +#[tokio::test] +async fn compare_column_grants_dispatches_per_column_acl_correctly() { + let mut from_dump = Dump::new(DumpConfig::default()); + let mut to_dump = Dump::new(DumpConfig::default()); + + // FROM: three columns with distinct ACL states. + let mut from_a = int_column("public", "t", "a", 1); + from_a.acl = vec!["reader=r/owner".to_string()]; + let mut from_b = int_column("public", "t", "b", 2); + from_b.acl = vec!["reader=r/owner".to_string()]; + let from_c = int_column("public", "t", "c", 3); // no ACL in FROM + + let from_table = Table::new( + "public".to_string(), + "t".to_string(), + "public".to_string(), + "t".to_string(), + "owner".to_string(), + None, + vec![from_a, from_b, from_c], + Vec::new(), + Vec::new(), + Vec::new(), + None, + ); + + // TO: a kept, b loses its grant, c gains a grant. + let mut to_a = int_column("public", "t", "a", 1); + to_a.acl = vec!["reader=r/owner".to_string()]; + let to_b = int_column("public", "t", "b", 2); // grant should be revoked + let mut to_c = int_column("public", "t", "c", 3); + to_c.acl = vec!["writer=a/owner".to_string()]; // INSERT grant added + + let to_table = Table::new( + "public".to_string(), + "t".to_string(), + "public".to_string(), + "t".to_string(), + "owner".to_string(), + None, + vec![to_a, to_b, to_c], + Vec::new(), + Vec::new(), + Vec::new(), + None, + ); + + from_dump.tables.push(from_table); + to_dump.tables.push(to_table); + + let mut comparer = Comparer::new(from_dump, to_dump, false, false, true, GrantsMode::Full); + comparer.compare_grants().await.unwrap(); + let script = comparer.get_script(); + + // a: identical → nothing emitted for column a. + assert!( + !script.contains("(a)"), + "column a is unchanged and must not appear, got: {script}" + ); + // b: REVOKE for the dropped grant. + assert!( + script.contains("REVOKE SELECT (b) ON TABLE public.t FROM reader;"), + "expected REVOKE for column b, got: {script}" + ); + // c: GRANT for the added INSERT privilege. + assert!( + script.contains("GRANT INSERT (c) ON TABLE public.t TO writer;"), + "expected GRANT INSERT on column c, got: {script}" + ); + // Sanity: no cross-talk where column b's REVOKE refers to writer/c, etc. + assert!( + !script.contains("REVOKE SELECT (c)"), + "column c had no FROM grant and must not be revoked, got: {script}" + ); + assert!( + !script.contains("GRANT INSERT (a)") && !script.contains("GRANT INSERT (b)"), + "INSERT grant must be scoped to column c only, got: {script}" + ); +} + +/// Regression test for the `serial_columns` key change from a joined +/// `"schema.table.column"` `String` to a `(String, String, String)` tuple. +/// The old form was parsed back via `splitn(3, '.')`, which silently +/// misparsed any identifier containing a literal `.` (legal in PostgreSQL +/// when quoted). With the tuple key, dotted identifiers round-trip cleanly +/// and `mark_serial_columns` still finds the target column. +#[tokio::test] +async fn mark_serial_columns_handles_dotted_identifier_names() { + let from_dump = Dump::new(DumpConfig::default()); + let mut to_dump = Dump::new(DumpConfig::default()); + + // Schema, table, and column names all contain a literal dot — the + // pre-fix `splitn(3, '.')` would slice these in the wrong place and + // fail to locate the column. + let schema = "weird.schema"; + let table = "weird.table"; + let column = "weird.id"; + + let serial_seq = Sequence::new( + schema.to_string(), + format!("{table}_{column}_seq"), + "postgres".to_string(), + "integer".to_string(), + Some(1), + Some(1), + Some(2147483647), + Some(1), + false, + Some(1), + Some(1), + Some(schema.to_string()), + Some(table.to_string()), + Some(column.to_string()), + ); + to_dump.sequences.push(serial_seq); + + let mut col = int_column(schema, table, column, 1); + col.column_default = Some(format!( + "nextval('{schema}.{table}_{column}_seq'::regclass)" + )); + col.is_nullable = false; + + let table_obj = Table::new( + schema.to_string(), + table.to_string(), + schema.to_string(), + table.to_string(), + "postgres".to_string(), + None, + vec![col], + Vec::new(), + Vec::new(), + Vec::new(), + None, + ); + to_dump.tables.push(table_obj); + + let mut comparer = Comparer::new(from_dump, to_dump, false, false, true, GrantsMode::Ignore); + comparer.compare_sequences().await.unwrap(); + comparer.mark_serial_columns(); + + let to_table = comparer + .to + .tables + .iter() + .find(|t| t.schema == schema && t.name == table) + .expect("table must round-trip"); + let to_column = to_table + .columns + .iter() + .find(|c| c.name == column) + .expect("column must round-trip"); + assert_eq!( + to_column.serial_type.as_deref(), + Some("serial"), + "dotted-name column must still be marked as serial" + ); +} diff --git a/app/src/dump/core.rs b/app/src/dump/core.rs index 06abb90..c40b708 100644 --- a/app/src/dump/core.rs +++ b/app/src/dump/core.rs @@ -25,8 +25,7 @@ use sqlx::postgres::types::Oid; use std::cmp::Ordering; use std::collections::HashMap; use std::fs::File; -use std::io::Write; -use std::io::{Error, Read}; +use std::io::{BufWriter, Error, Read, Write}; use zip::ZipWriter; use zip::write::SimpleFileOptions; @@ -221,18 +220,31 @@ impl Dump { pool.close().await; - // Serialize the dump to a file. - let serialized_data = serde_json::to_string(&self) - .map_err(|e| Error::other(format!("Failed to serialize dump: {e}.")))?; - let serialized_bytes = serialized_data.as_bytes(); + self.write_to_file(&self.configuration.file) + } - let file = File::create(&self.configuration.file)?; + /// Persist this dump to `path` as a zip-compressed JSON archive. + /// + /// The JSON payload is streamed into the zip writer through a 256 KiB + /// `BufWriter`, which is required for performance: `serde_json::to_writer` + /// emits many small writes (one per JSON token), and feeding them + /// straight into the deflate stream pays a per-call cost on every one. + /// Buffering keeps peak memory bounded (no full intermediate `String` + /// copy of the payload) while still amortising the compressor overhead. + /// Pairs with [`Dump::read_from_file`]. + pub fn write_to_file(&self, path: &str) -> Result<(), Error> { + const WRITE_BUF_BYTES: usize = 256 * 1024; + let file = File::create(path)?; let mut zip = ZipWriter::new(file); let options = SimpleFileOptions::default() .compression_method(zip::CompressionMethod::Deflated) .unix_permissions(0o644); zip.start_file("dump.io", options)?; - zip.write_all(serialized_bytes)?; + let mut buf = BufWriter::with_capacity(WRITE_BUF_BYTES, &mut zip); + serde_json::to_writer(&mut buf, self) + .map_err(|e| Error::other(format!("Failed to serialize dump: {e}.")))?; + buf.flush()?; + drop(buf); zip.finish()?; Ok(()) } @@ -889,48 +901,7 @@ impl Dump { pool: &PgPool, schema_filter: &str, ) -> Result, Error> { - let query = format!( - " - select - quote_ident(seq.schemaname) as schemaname, - quote_ident(seq.sequencename) as sequencename, - quote_ident(seq.sequenceowner) as sequenceowner, - seq.data_type::varchar as sequencedatatype, - seq.start_value, - seq.min_value, - seq.max_value, - seq.increment_by, - seq.cycle, - seq.cache_size, - seq.last_value, - quote_ident(owner_ns.nspname) as owned_by_schema, - quote_ident(owner_table.relname) as owned_by_table, - quote_ident(owner_attr.attname) as owned_by_column, - dep.deptype::text as dependency_type, - seq_desc.description as seq_comment, - seq_class.relacl::text[] as seq_acl, - seq_class.relpersistence::text as seq_persistence - from - pg_sequences seq - left join pg_namespace seq_ns on seq_ns.nspname = seq.schemaname - left join pg_class seq_class on seq_class.relname = seq.sequencename - and seq_class.relnamespace = seq_ns.oid - left join pg_description seq_desc on seq_desc.objoid = seq_class.oid and seq_desc.objsubid = 0 - left join pg_depend dep on dep.objid = seq_class.oid - and dep.deptype in ('a', 'i') - left join pg_class owner_table on owner_table.oid = dep.refobjid - left join pg_namespace owner_ns on owner_ns.oid = owner_table.relnamespace - left join pg_attribute owner_attr on owner_attr.attrelid = dep.refobjid - and owner_attr.attnum = dep.refobjsubid - where - seq.schemaname in {} - and not exists ( - select 1 from pg_depend ext_dep - where ext_dep.objid = seq_class.oid - and ext_dep.deptype = 'e' - )", - schema_filter - ); + let query = Self::build_sequences_standalone_query(schema_filter); let rows = sqlx::query(query.as_str()) .fetch_all(pool) @@ -985,6 +956,52 @@ impl Dump { Ok(sequences) } + fn build_sequences_standalone_query(schema_filter: &str) -> String { + format!( + "select + quote_ident(seq.schemaname) as schemaname, + quote_ident(seq.sequencename) as sequencename, + quote_ident(seq.sequenceowner) as sequenceowner, + seq.data_type::varchar as sequencedatatype, + seq.start_value, + seq.min_value, + seq.max_value, + seq.increment_by, + seq.cycle, + seq.cache_size, + seq.last_value, + quote_ident(owner_ns.nspname) as owned_by_schema, + quote_ident(owner_table.relname) as owned_by_table, + quote_ident(owner_attr.attname) as owned_by_column, + dep.deptype::text as dependency_type, + seq_desc.description as seq_comment, + seq_class.relacl::text[] as seq_acl, + seq_class.relpersistence::text as seq_persistence + from + pg_sequences seq + left join pg_namespace seq_ns on seq_ns.nspname = seq.schemaname + left join pg_class seq_class on seq_class.relname = seq.sequencename + and seq_class.relnamespace = seq_ns.oid + left join pg_description seq_desc on seq_desc.objoid = seq_class.oid + and seq_desc.classoid = 'pg_class'::regclass + and seq_desc.objsubid = 0 + left join pg_depend dep on dep.objid = seq_class.oid + and dep.deptype in ('a', 'i') + left join pg_class owner_table on owner_table.oid = dep.refobjid + left join pg_namespace owner_ns on owner_ns.oid = owner_table.relnamespace + left join pg_attribute owner_attr on owner_attr.attrelid = dep.refobjid + and owner_attr.attnum = dep.refobjsubid + where + seq.schemaname in {} + and not exists ( + select 1 from pg_depend ext_dep + where ext_dep.objid = seq_class.oid + and ext_dep.deptype = 'e' + )", + schema_filter + ) + } + async fn fetch_routines_standalone( pool: &PgPool, schema_filter: &str, @@ -1225,55 +1242,7 @@ impl Dump { // Probe catalog capabilities once for the entire dump run. let caps = PgCatalogCaps::detect(pool, pg_version).await; - let query = format!( - " - select - quote_ident(t.schemaname) as schemaname, - quote_ident(t.tablename) as tablename, - quote_ident(t.tableowner) as tableowner, - t.schemaname as raw_schema_name, - t.tablename as raw_table_name, - t.tablespace, - t.hasindexes, - t.hastriggers, - t.hasrules, - t.rowsecurity, - d.description as table_comment, - c.relacl::text[] as table_acl, - am.amname as access_method, - c.relpersistence as relpersistence, - c.reloptions as reloptions, - c.relreplident as relreplident, - c.relforcerowsecurity as relforcerowsecurity, - case when c.reloftype <> 0 then c.reloftype::regtype::text else null end as typed_table_type, - array( - select quote_ident(pn.nspname) || '.' || quote_ident(pc.relname) - from pg_inherits pi2 - join pg_class pc on pc.oid = pi2.inhparent - join pg_namespace pn on pn.oid = pc.relnamespace - where pi2.inhrelid = c.oid - and not exists ( - select 1 from pg_partitioned_table pt where pt.partrelid = pi2.inhparent - ) - order by pi2.inhseqno - ) as inherits_from - from pg_tables t - left join pg_class c on c.relname = t.tablename - and c.relkind in ('r','p') - and c.relnamespace = (select oid from pg_namespace where nspname = t.schemaname) - left join pg_am am on am.oid = c.relam - left join pg_description d on d.objoid = c.oid and d.objsubid = 0 - where - t.schemaname not in ('pg_catalog', 'information_schema') - and t.schemaname in {} - and t.tablename not like 'pg_%' - and not exists ( - select 1 from pg_depend ext_dep - where ext_dep.objid = c.oid - and ext_dep.deptype = 'e' - );", - schema_filter - ); + let query = Self::build_tables_standalone_query(schema_filter); let rows = sqlx::query(query.as_str()) .fetch_all(pool) @@ -1370,89 +1339,68 @@ impl Dump { Ok(shell_tables) } + fn build_tables_standalone_query(schema_filter: &str) -> String { + format!( + " + select + quote_ident(t.schemaname) as schemaname, + quote_ident(t.tablename) as tablename, + quote_ident(t.tableowner) as tableowner, + t.schemaname as raw_schema_name, + t.tablename as raw_table_name, + t.tablespace, + t.hasindexes, + t.hastriggers, + t.hasrules, + t.rowsecurity, + d.description as table_comment, + c.relacl::text[] as table_acl, + am.amname as access_method, + c.relpersistence as relpersistence, + c.reloptions as reloptions, + c.relreplident as relreplident, + c.relforcerowsecurity as relforcerowsecurity, + case when c.reloftype <> 0 then c.reloftype::regtype::text else null end as typed_table_type, + array( + select quote_ident(pn.nspname) || '.' || quote_ident(pc.relname) + from pg_inherits pi2 + join pg_class pc on pc.oid = pi2.inhparent + join pg_namespace pn on pn.oid = pc.relnamespace + where pi2.inhrelid = c.oid + and not exists ( + select 1 from pg_partitioned_table pt where pt.partrelid = pi2.inhparent + ) + order by pi2.inhseqno + ) as inherits_from + from pg_tables t + left join pg_class c on c.relname = t.tablename + and c.relkind in ('r','p') + and c.relnamespace = (select oid from pg_namespace where nspname = t.schemaname) + left join pg_am am on am.oid = c.relam + left join pg_description d on d.objoid = c.oid + and d.classoid = 'pg_class'::regclass + and d.objsubid = 0 + where + t.schemaname not in ('pg_catalog', 'information_schema') + and t.schemaname in {} + and t.tablename not like 'pg_%' + and not exists ( + select 1 from pg_depend ext_dep + where ext_dep.objid = c.oid + and ext_dep.deptype = 'e' + );", + schema_filter + ) + } + async fn fetch_views_standalone( pool: &PgPool, schema_filter: &str, ) -> Result, Error> { // Fetch regular and materialized views concurrently. - let regular_query = format!( - "select - quote_ident(v.table_schema) as table_schema, - quote_ident(v.table_name) as table_name, - v.view_definition, - quote_ident(pv.viewowner) as view_owner, - array_agg(distinct vtu.table_schema || '.' || vtu.table_name) as table_relation, - d.description as view_comment, - (select cc.relacl::text[] from pg_class cc where cc.oid = c.oid) as view_acl, - coalesce(c.reloptions::text[] @> array['security_invoker=true']::text[], false) as security_invoker, - v.check_option - from information_schema.views v - join information_schema.view_table_usage vtu on v.table_name = vtu.view_name and v.table_schema = vtu.view_schema - left join pg_views pv on pv.schemaname = v.table_schema and pv.viewname = v.table_name - left join pg_class c on c.relname = v.table_name and c.relnamespace = (select oid from pg_namespace where nspname = v.table_schema) - left join pg_description d on d.objoid = c.oid and d.objsubid = 0 - where - v.table_schema not in ('pg_catalog', 'information_schema') - and v.table_schema in {} - and not exists ( - select 1 from pg_depend ext_dep - where ext_dep.objid = c.oid - and ext_dep.deptype = 'e' - ) - group by v.table_schema, v.table_name, v.view_definition, pv.viewowner, d.description, c.oid, c.reloptions, v.check_option;", - schema_filter - ); - - let mat_query = format!( - "select - mv.schemaname as table_schema, - mv.matviewname as table_name, - mv.definition as view_definition, - mv.matviewowner as view_owner, - array( - select distinct n.nspname || '.' || dc.relname - from pg_depend dep - join pg_class dc on dc.oid = dep.refobjid - join pg_namespace n on n.oid = dc.relnamespace - where dep.objid = c.oid - and dep.deptype = 'n' - and dc.relkind in ('r', 'v', 'm') - ) as table_relation, - d.description as view_comment, - c.relacl::text[] as view_acl, - c.reloptions as storage_options, - (select spcname from pg_tablespace where oid = c.reltablespace) as tablespace_name - from pg_matviews mv - join pg_class c on c.relname = mv.matviewname - and c.relnamespace = (select oid from pg_namespace where nspname = mv.schemaname) - left join pg_description d on d.objoid = c.oid and d.objsubid = 0 - where mv.schemaname not in ('pg_catalog', 'information_schema') - and mv.schemaname in {} - and not exists ( - select 1 from pg_depend ext_dep - where ext_dep.objid = c.oid - and ext_dep.deptype = 'e' - );", - schema_filter - ); - - // Column comments query (works for both regular and materialized views) - let col_comments_query = format!( - "select - quote_ident(n.nspname) as schema_name, - quote_ident(c.relname) as view_name, - quote_ident(a.attname) as column_name, - d.description as col_comment - from pg_class c - join pg_namespace n on n.oid = c.relnamespace - join pg_attribute a on a.attrelid = c.oid and a.attnum > 0 and not a.attisdropped - join pg_description d on d.objoid = c.oid and d.objsubid = a.attnum - where c.relkind in ('v', 'm') - and n.nspname not in ('pg_catalog', 'information_schema') - and n.nspname in {} - order by n.nspname, c.relname, a.attnum;", - schema_filter - ); + let regular_query = Self::build_regular_views_query(schema_filter); + let mat_query = Self::build_materialized_views_query(schema_filter); + let col_comments_query = Self::build_view_column_comments_query(schema_filter); let (regular_rows, mat_rows, col_comment_rows) = tokio::try_join!( async { @@ -1599,6 +1547,96 @@ impl Dump { Ok(views) } + fn build_regular_views_query(schema_filter: &str) -> String { + format!( + "select + quote_ident(v.table_schema) as table_schema, + quote_ident(v.table_name) as table_name, + v.view_definition, + quote_ident(pv.viewowner) as view_owner, + array_agg(distinct vtu.table_schema || '.' || vtu.table_name) as table_relation, + d.description as view_comment, + (select cc.relacl::text[] from pg_class cc where cc.oid = c.oid) as view_acl, + coalesce(c.reloptions::text[] @> array['security_invoker=true']::text[], false) as security_invoker, + v.check_option + from information_schema.views v + join information_schema.view_table_usage vtu on v.table_name = vtu.view_name and v.table_schema = vtu.view_schema + left join pg_views pv on pv.schemaname = v.table_schema and pv.viewname = v.table_name + left join pg_class c on c.relname = v.table_name and c.relnamespace = (select oid from pg_namespace where nspname = v.table_schema) + left join pg_description d on d.objoid = c.oid + and d.classoid = 'pg_class'::regclass + and d.objsubid = 0 + where + v.table_schema not in ('pg_catalog', 'information_schema') + and v.table_schema in {} + and not exists ( + select 1 from pg_depend ext_dep + where ext_dep.objid = c.oid + and ext_dep.deptype = 'e' + ) + group by v.table_schema, v.table_name, v.view_definition, pv.viewowner, d.description, c.oid, c.reloptions, v.check_option;", + schema_filter + ) + } + + fn build_materialized_views_query(schema_filter: &str) -> String { + format!( + "select + mv.schemaname as table_schema, + mv.matviewname as table_name, + mv.definition as view_definition, + mv.matviewowner as view_owner, + array( + select distinct n.nspname || '.' || dc.relname + from pg_depend dep + join pg_class dc on dc.oid = dep.refobjid + join pg_namespace n on n.oid = dc.relnamespace + where dep.objid = c.oid + and dep.deptype = 'n' + and dc.relkind in ('r', 'v', 'm') + ) as table_relation, + d.description as view_comment, + c.relacl::text[] as view_acl, + c.reloptions as storage_options, + (select spcname from pg_tablespace where oid = c.reltablespace) as tablespace_name + from pg_matviews mv + join pg_class c on c.relname = mv.matviewname + and c.relnamespace = (select oid from pg_namespace where nspname = mv.schemaname) + left join pg_description d on d.objoid = c.oid + and d.classoid = 'pg_class'::regclass + and d.objsubid = 0 + where mv.schemaname not in ('pg_catalog', 'information_schema') + and mv.schemaname in {} + and not exists ( + select 1 from pg_depend ext_dep + where ext_dep.objid = c.oid + and ext_dep.deptype = 'e' + );", + schema_filter + ) + } + + fn build_view_column_comments_query(schema_filter: &str) -> String { + format!( + "select + quote_ident(n.nspname) as schema_name, + quote_ident(c.relname) as view_name, + quote_ident(a.attname) as column_name, + d.description as col_comment + from pg_class c + join pg_namespace n on n.oid = c.relnamespace + join pg_attribute a on a.attrelid = c.oid and a.attnum > 0 and not a.attisdropped + join pg_description d on d.objoid = c.oid + and d.classoid = 'pg_class'::regclass + and d.objsubid = a.attnum + where c.relkind in ('v', 'm') + and n.nspname not in ('pg_catalog', 'information_schema') + and n.nspname in {} + order by n.nspname, c.relname, a.attnum;", + schema_filter + ) + } + async fn fetch_foreign_tables_standalone( pool: &PgPool, schema_filter: &str, @@ -3171,6 +3209,38 @@ mod tests { use crate::dump::table_constraint::TableConstraint; use crate::dump::view::View; use sqlx::postgres::types::Oid; + use std::path::PathBuf; + use std::sync::atomic::{AtomicU64, Ordering}; + + /// RAII guard for a temp file used by a test. The path is deleted + /// on drop, including when an assertion panics earlier in the + /// test, so failures don't leave files behind in the temp dir. + /// Using a per-test-process atomic counter (plus PID) for the name + /// also avoids collisions when several tests in the same binary + /// reach for a temp file concurrently. + struct TempPath(PathBuf); + + impl TempPath { + fn new(prefix: &str, suffix: &str) -> Self { + static COUNTER: AtomicU64 = AtomicU64::new(0); + let n = COUNTER.fetch_add(1, Ordering::Relaxed); + let name = format!("{}_{}_{}.{}", prefix, std::process::id(), n, suffix); + Self(std::env::temp_dir().join(name)) + } + + fn as_str(&self) -> std::borrow::Cow<'_, str> { + self.0.to_string_lossy() + } + } + + impl Drop for TempPath { + fn drop(&mut self) { + // Ignore NotFound (e.g. test never wrote the file) and + // any other error — best-effort cleanup must not mask + // the original test failure. + let _ = std::fs::remove_file(&self.0); + } + } fn empty_dump() -> Dump { Dump::new(DumpConfig { @@ -3764,4 +3834,101 @@ mod tests { "regular_a before regular_b alphabetically" ); } + + #[test] + fn build_tables_standalone_query_filters_by_pg_class() { + let query = Dump::build_tables_standalone_query("('public')"); + assert!( + query.contains("d.classoid = 'pg_class'::regclass"), + "expected pg_class classoid filter for table comments" + ); + } + + #[test] + fn build_regular_views_query_filters_by_pg_class() { + let query = Dump::build_regular_views_query("('public')"); + assert!( + query.contains("d.classoid = 'pg_class'::regclass"), + "expected pg_class classoid filter for regular view comments" + ); + } + + #[test] + fn build_materialized_views_query_filters_by_pg_class() { + let query = Dump::build_materialized_views_query("('public')"); + assert!( + query.contains("d.classoid = 'pg_class'::regclass"), + "expected pg_class classoid filter for materialized view comments" + ); + } + + #[test] + fn build_view_column_comments_query_filters_by_pg_class() { + let query = Dump::build_view_column_comments_query("('public')"); + assert!( + query.contains("d.classoid = 'pg_class'::regclass"), + "expected pg_class classoid filter for view column comments" + ); + } + + #[test] + fn build_sequences_standalone_query_filters_by_pg_class() { + let query = Dump::build_sequences_standalone_query("('public')"); + assert!( + query.contains("seq_desc.classoid = 'pg_class'::regclass"), + "expected pg_class classoid filter for sequence comments" + ); + } + + /// Regression test for the streaming dump-write change: a Dump + /// serialized via [`Dump::write_to_file`] (which streams JSON + /// directly into the zip writer instead of materializing the whole + /// payload as a `String`) must still round-trip identically through + /// [`Dump::read_from_file`]. + #[tokio::test] + async fn write_to_file_round_trips_via_read_from_file() { + let mut dump = empty_dump(); + dump.schemas.push(make_schema("public")); + dump.schemas.push(make_schema("data")); + dump.extensions.push(make_extension("pgcrypto")); + dump.tables.push(make_table("public", "users")); + dump.tables.push(make_table("data", "events")); + dump.views.push(make_view("public", "active_users")); + dump.sequences.push(make_sequence("public", "users_id_seq")); + dump.routines.push(make_routine("public", "noop")); + + // RAII guard cleans up the temp file even if a later assertion + // panics, so failures don't pollute the temp dir. + let path = TempPath::new("pgc_dump_roundtrip", "zip"); + + dump.write_to_file(&path.as_str()) + .expect("write_to_file failed"); + + let restored = Dump::read_from_file(&path.as_str()) + .await + .expect("read_from_file failed"); + + assert_eq!(restored.schemas.len(), dump.schemas.len()); + assert_eq!(restored.extensions.len(), dump.extensions.len()); + assert_eq!(restored.tables.len(), dump.tables.len()); + assert_eq!(restored.views.len(), dump.views.len()); + assert_eq!(restored.sequences.len(), dump.sequences.len()); + assert_eq!(restored.routines.len(), dump.routines.len()); + + let restored_schemas: Vec<&str> = + restored.schemas.iter().map(|s| s.name.as_str()).collect(); + assert!(restored_schemas.contains(&"public")); + assert!(restored_schemas.contains(&"data")); + + let restored_table_keys: Vec<(&str, &str)> = restored + .tables + .iter() + .map(|t| (t.schema.as_str(), t.name.as_str())) + .collect(); + assert!(restored_table_keys.contains(&("public", "users"))); + assert!(restored_table_keys.contains(&("data", "events"))); + + assert_eq!(restored.routines[0].name, "noop"); + assert_eq!(restored.sequences[0].name, "users_id_seq"); + } } diff --git a/app/src/dump/table.rs b/app/src/dump/table.rs index 71fd5e8..a4ce45a 100644 --- a/app/src/dump/table.rs +++ b/app/src/dump/table.rs @@ -421,6 +421,7 @@ impl Table { AND a.attisdropped = false LEFT JOIN pg_description pd ON pd.objoid = cls.oid + AND pd.classoid = 'pg_class'::regclass AND pd.objsubid = a.attnum WHERE c.table_schema IN {schema_filter} ORDER BY c.table_schema, c.table_name, c.ordinal_position" @@ -545,29 +546,7 @@ impl Table { pool: &PgPool, schema_filter: &str, ) -> Result>, Error> { - let query = format!( - "SELECT - quote_ident(i.schemaname) as schemaname, - quote_ident(i.tablename) as tablename, - i.schemaname as raw_schemaname, - i.tablename as raw_tablename, - quote_ident(i.indexname) as indexname, - i.tablespace, - i.indexdef, - EXISTS (SELECT 1 FROM pg_inherits inh WHERE inh.inhrelid = ic.oid) AS is_partition_index, - d.description as index_comment - FROM pg_indexes i - JOIN pg_class ic ON ic.relname = i.indexname - JOIN pg_namespace n ON n.oid = ic.relnamespace AND n.nspname = i.schemaname - JOIN pg_index idx ON idx.indexrelid = ic.oid - LEFT JOIN pg_constraint puc ON puc.conindid = ic.oid AND puc.contype IN ('p', 'u') - LEFT JOIN pg_description d ON d.objoid = ic.oid AND d.objsubid = 0 - WHERE idx.indisprimary = false - AND (idx.indisunique = false OR puc.oid IS NULL) - AND NOT EXISTS (SELECT 1 FROM pg_constraint xc WHERE xc.conindid = ic.oid AND xc.contype = 'x') - AND i.schemaname IN {schema_filter} - ORDER BY i.schemaname, i.tablename, i.indexname" - ); + let query = Self::build_indexes_bulk_query(schema_filter); let rows = sqlx::query(&query).fetch_all(pool).await?; let mut indexes_by_key: HashMap<(String, String), Vec> = HashMap::new(); @@ -592,6 +571,44 @@ impl Table { Ok(indexes_by_key) } + fn build_indexes_bulk_query(schema_filter: &str) -> String { + format!( + "select + quote_ident(i.schemaname) as schemaname, + quote_ident(i.tablename) as tablename, + i.schemaname as raw_schemaname, + i.tablename as raw_tablename, + quote_ident(i.indexname) as indexname, + i.tablespace, + i.indexdef, + EXISTS (SELECT 1 FROM pg_inherits inh WHERE inh.inhrelid = ic.oid) AS is_partition_index, + d.description as index_comment + from + pg_indexes i + join pg_class ic on ic.relname = i.indexname + join pg_namespace n on n.oid = ic.relnamespace and n.nspname = i.schemaname + join pg_index idx on idx.indexrelid = ic.oid + left join pg_constraint puc on puc.conindid = ic.oid and puc.contype in ('p', 'u') + left join pg_description d + on d.objoid = ic.oid + and d.classoid = 'pg_class'::regclass + and d.objsubid = 0 + where + idx.indisprimary = false + and (idx.indisunique = false or puc.oid is null) + and not exists ( + select 1 from pg_constraint xc + where xc.conindid = ic.oid + and xc.contype = 'x' + ) + and i.schemaname in {schema_filter} + order by + i.schemaname, + i.tablename, + i.indexname" + ) + } + /// Fetch constraints for every table in the accessible schemas in one query. async fn fetch_constraints_bulk( pool: &PgPool, @@ -4511,5 +4528,19 @@ mod tests { query.contains("a.attstattarget::int4"), "expected ::int4 cast for attstattarget" ); + assert!( + query.contains("pd.classoid = 'pg_class'::regclass"), + "expected pg_class classoid filter for table column comments" + ); + } + + #[test] + fn build_indexes_bulk_query_filters_by_pg_class() { + let query = Table::build_indexes_bulk_query("('public')"); + + assert!( + query.contains("d.classoid = 'pg_class'::regclass"), + "expected pg_class classoid filter for table index comments" + ); } }