From 456d1aba96c674b93368497985197700f901f99f Mon Sep 17 00:00:00 2001 From: Josh Carp Date: Fri, 15 May 2026 15:21:43 -0400 Subject: [PATCH] Revert retention policy for clickhouse system.query_log. As described in #10444, there's a bug in setting retention policies on `system.query_log`: because we configure the ttl in config.xml and then optionally overwrite it in sql via retention policies, user-configured ttls on the query log table don't survive clickhouse restarts. Instead, clickhouse moves `system.query_log` to `system.query_log_$ORDINAL`, and creates a new `system.query_log` table using the ttl from config.xml. We could solve this by persisting the user-configured ttl in cockroachdb and propagating that value to config.xml somehow. However, it's simpler to instead make `system.query_log` small enough that we don't have to care about its ttl, and remove it from the retention policy concept. We dropped the size of this table significantly in #10443, and will drop it further by omitting query logs for fast queries in a follow-up patch. In this patch, we drop logic to manage the query log ttl. Fixes #10444. --- clickhouse-admin/src/context.rs | 34 +++----------------- oximeter/db/src/client/mod.rs | 56 +++------------------------------ 2 files changed, 10 insertions(+), 80 deletions(-) diff --git a/clickhouse-admin/src/context.rs b/clickhouse-admin/src/context.rs index 6484156a2e1..b8a872e7b59 100644 --- a/clickhouse-admin/src/context.rs +++ b/clickhouse-admin/src/context.rs @@ -899,37 +899,13 @@ mod tests { ErrorStatusCode::SERVICE_UNAVAILABLE )); - // Initialize the database, then check the retention policy again. - // - // We need to wait for the `system.query_log` table to exist, which can - // take a while. context.init_db(false).await.expect("failed to initialize database"); - let policy = dev::poll::wait_for_condition( - || async { - match context.retention_policy().await { - Ok(pol) => { - if pol.tables.contains_key("query_log") { - Ok(pol) - } else { - Err(dev::poll::CondCheckError::<()>::NotYet) - } - } - Err(_) => Err(dev::poll::CondCheckError::<()>::NotYet), - } - }, - &std::time::Duration::from_millis(100), - &std::time::Duration::from_secs(30), - ) - .await - .expect("failed to get retention policy"); - assert!(!policy.tables.is_empty()); - - // The query log defaults to 7 days TTL, everything else is 30. - assert!(policy.tables.iter().all(|pol| { - let expected = if pol.table == "query_log" { 7 } else { 30 }; - u8::from(pol.days) == expected - })); + let policy = context + .retention_policy() + .await + .expect("failed to get retention policy"); + assert!(policy.tables.iter().all(|pol| { u8::from(pol.days) == 30 })); // Set everything to 3, and ensure we can read it back. let days = Days::new(3).unwrap(); diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index 0b8acf7b304..f4e2f0dbb39 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -1069,8 +1069,6 @@ impl Client { Some("toDateTime(timestamp)") } else if table.contains("fields") { Some("last_updated_at") - } else if table.contains("query_log") { - Some("event_date") } else { None } @@ -1098,32 +1096,9 @@ impl Client { tables.push(format!("{}.{}", crate::DATABASE_NAME, table)); } - // TTL applies to the query log table too, if it exists. - if self.database_has_query_log_table(handle).await? { - tables.push(String::from("system.query_log")); - } Ok(tables) } - /// Return true if the database has the `system.query_log` table. - async fn database_has_query_log_table( - &self, - handle: &mut Handle, - ) -> Result { - const SQL: &str = "\ - SELECT 1 \ - FROM system.tables \ - WHERE name = 'query_log' \ - LIMIT 1"; - let n_rows = self - .execute_with_block(handle, SQL) - .await? - .data - .ok_or_else(|| Error::QueryMissingData { query: SQL.to_string() })? - .n_rows(); - Ok(n_rows > 0) - } - /// Return the retention policy for the oximeter database tables. /// /// An error is returned if the database cannot be reached or the policy @@ -1152,7 +1127,7 @@ impl Client { startsWith(name, 'fields') \ ) \ AND position(engine, 'MergeTree') != 0\ - ) OR (database = 'system' AND name = 'query_log');"; + );"; let result = self .execute_with_block(&mut self.claim_connection().await?, SQL) .await?; @@ -1213,10 +1188,9 @@ impl Client { ifNull(total_bytes, 0) AS total_bytes, \ ifNull(total_rows, 0) AS total_rows \ FROM system.tables \ - WHERE (\ - database = '{}' OR \ - (database = 'system' AND name = 'query_log')\ - ) AND has_own_data", + WHERE \ + database = '{}'\ + AND has_own_data", crate::DATABASE_NAME, ); let columns = self @@ -1705,26 +1679,15 @@ impl Client { // // for the field tables. // -// Lastly, the TTL line could look like this -// -// ``` -// TTL event_date + toIntervalDay() -// ``` -// -// for the system.query_log table. -// -// // We're picking out the number of days from the function argument as a string. fn extract_ttl_in_days_from_create_table_query( row: &str, ) -> Result { - const NEEDLES: [&str; 3] = [ + const NEEDLES: [&str; 2] = [ // For measurement tables "TTL toDateTime(timestamp) + toIntervalDay(", // For field tables "TTL last_updated_at + toIntervalDay(", - // For system.query_log - "TTL event_date + toIntervalDay(", ]; // Find the first needle that matches, and error if none do. @@ -5370,15 +5333,6 @@ mod tests { "some junk TTL last_updated_at + toIntervalDay(7) other stuff" ).unwrap()), ); - assert_eq!( - 7u8, - u8::from( - extract_ttl_in_days_from_create_table_query( - "some junk TTL event_date + toIntervalDay(7) other stuff" - ) - .unwrap() - ), - ); for invalid in [ "some junk TTL toDateTime(timestamp) + toIntervalDay(-1) other stuf",