diff --git a/src/export/clickhouse_exporter.cc b/src/export/clickhouse_exporter.cc index 23fe1c6..b4cbe9a 100644 --- a/src/export/clickhouse_exporter.cc +++ b/src/export/clickhouse_exporter.cc @@ -157,54 +157,48 @@ class ClickHouseExporter : public StatsExporter { MemoryContextDelete(conn_cxt_); } - // Server casts String to LowCardinality(String) on INSERT - shared_ptr> TagString(string_view name) final { + // On the CH-native side, the LC/HC distinction is a hint: the server-side + // LowCardinality() wrap (declared in the schema) is what actually + // applies dictionary encoding on write. clickhouse-c speaks plain typed + // columns either way, so StatLC* and StatHC* of the same C++ type produce + // identical wire bytes here. The new Arrow exporter is where the LC/HC + // distinction materially changes the wire shape (DictBuilder vs plain). + + // Low-cardinality columns + shared_ptr> StatLCString(string_view name) final { return MakeCol>(name); } - shared_ptr> MetricInt16(string_view name) final { - return MakeCol>(name, "Int16"); - } - shared_ptr> MetricInt32(string_view name) final { - return MakeCol>(name, "Int32"); - } - shared_ptr> MetricInt64(string_view name) final { - return MakeCol>(name, "Int64"); - } - shared_ptr> MetricUInt8(string_view name) final { + shared_ptr> StatLCUInt8(string_view name) final { return MakeCol>(name, "UInt8"); } - shared_ptr> MetricUInt64(string_view name) final { - return MakeCol>(name, "UInt64"); - } - - shared_ptr> RecordInt16(string_view name) final { + shared_ptr> StatLCInt16(string_view name) final { return MakeCol>(name, "Int16"); } - shared_ptr> RecordInt32(string_view name) final { + shared_ptr> StatLCInt32(string_view name) final { return MakeCol>(name, "Int32"); } - shared_ptr> RecordInt64(string_view name) final { - return MakeCol>(name, "Int64"); + + // High-cardinality columns + shared_ptr> StatHCString(string_view name) final { + return MakeCol>(name); } - shared_ptr> RecordUInt8(string_view name) final { - return MakeCol>(name, "UInt8"); + shared_ptr> StatHCInt64(string_view name) final { + return MakeCol>(name, "Int64"); } - shared_ptr> RecordUInt64(string_view name) final { + shared_ptr> StatHCUInt64(string_view name) final { return MakeCol>(name, "UInt64"); } - shared_ptr> RecordDateTime(string_view name) final { + + shared_ptr> StatTimestamp(string_view name) final { return MakeCol>(name, "DateTime64(6)"); } - shared_ptr> RecordString(string_view name) final { - return MakeCol>(name); - } // Semantic columns - shared_ptr> DbNameColumn() final { return TagString("db_name"); } - shared_ptr> DbUserColumn() final { return TagString("db_user"); } - shared_ptr> DbDurationColumn() final { return MetricUInt64("duration_us"); } - shared_ptr> DbOperationColumn() final { return TagString("db_operation"); } - shared_ptr> DbQueryTextColumn() final { return RecordString("query_text"); } + shared_ptr> DbNameColumn() final { return StatLCString("db_name"); } + shared_ptr> DbUserColumn() final { return StatLCString("db_user"); } + shared_ptr> DbDurationColumn() final { return StatHCUInt64("duration_us"); } + shared_ptr> DbOperationColumn() final { return StatLCString("db_operation"); } + shared_ptr> DbQueryTextColumn() final { return StatHCString("query_text"); } void BeginBatch() final { for (auto& col : columns_) diff --git a/src/export/exporter_interface.h b/src/export/exporter_interface.h index 249e82e..ec86e0d 100644 --- a/src/export/exporter_interface.h +++ b/src/export/exporter_interface.h @@ -28,24 +28,44 @@ class StatsExporter { }; public: - // Tags: Columns that serve as narrowing criteria for metrics. - virtual shared_ptr> TagString(string_view name) = 0; + // =========================================================================== + // Cardinality-typed columns. + // + // Stat(name) declares both the wire type and the *cardinality + // intent* of the column. Each backend honors that intent appropriately: + // + // ClickHouse: LC -> may be stored as LowCardinality(); HC -> plain. + // Schema-declared encoding wins on write; the LC hint helps + // the producer pick the cheapest column representation. + // Arrow IPC: LC -> DictBuilder (dictionary-encoded array); HC -> plain + // typed builder. Required for batch-rate efficiency on + // low-cardinality dimensions. + // OTel: LC -> eligible as a histogram dimension or metric label; + // HC -> log attribute only, *never* a metric dimension + // (cardinality explosion). + // + // Cardinality is a property of the data, not just the type width. err_elevel + // (UInt8) is LC because values repeat (~5 distinct codes). query_id (Int64) + // is HC because every distinct query produces a distinct value. The author + // declares intent; the backend implements it. + // =========================================================================== + + // Low-cardinality columns: dimensions you'd group/filter by. + virtual shared_ptr> StatLCString(string_view name) = 0; + virtual shared_ptr> StatLCUInt8(string_view name) = 0; + virtual shared_ptr> StatLCInt16(string_view name) = 0; + virtual shared_ptr> StatLCInt32(string_view name) = 0; - // Metrics: Columns that are generally bucketed into histograms. - virtual shared_ptr> MetricInt16(string_view name) = 0; - virtual shared_ptr> MetricInt32(string_view name) = 0; - virtual shared_ptr> MetricInt64(string_view name) = 0; - virtual shared_ptr> MetricUInt8(string_view name) = 0; - virtual shared_ptr> MetricUInt64(string_view name) = 0; + // High-cardinality columns: values you observe, not dimensions you group by. + virtual shared_ptr> StatHCString(string_view name) = 0; + virtual shared_ptr> StatHCInt64(string_view name) = 0; + virtual shared_ptr> StatHCUInt64(string_view name) = 0; - // Records: Data columns you wouldn't want to filter by. - virtual shared_ptr> RecordInt16(string_view name) = 0; - virtual shared_ptr> RecordInt32(string_view name) = 0; - virtual shared_ptr> RecordInt64(string_view name) = 0; - virtual shared_ptr> RecordUInt8(string_view name) = 0; - virtual shared_ptr> RecordUInt64(string_view name) = 0; - virtual shared_ptr> RecordDateTime(string_view name) = 0; - virtual shared_ptr> RecordString(string_view name) = 0; + // Domain-specific. Caller appends a Unix-epoch microsecond timestamp. + // (PG-epoch values must be offset by kPostgresEpochOffsetUs before append; + // CH DateTime64(6) and OTel time_unix_nano both interpret the wire value + // as Unix-epoch.) + virtual shared_ptr> StatTimestamp(string_view name) = 0; // =========================================================================== // Semantic columns: name, unit, and instrument type may vary by backend. @@ -54,17 +74,17 @@ class StatsExporter { // instrument). Pure virtuals enforce explicit handling in every exporter. // =========================================================================== - // Database name. CH: TagString "db_name"; OTel semconv: "db.name" tag. + // Database name. CH: StatLCString "db_name"; OTel semconv: "db.name" tag. virtual shared_ptr> DbNameColumn() = 0; - // Authenticated user. CH: TagString "db_user"; OTel semconv: "db.user" tag. + // Authenticated user. CH: StatLCString "db_user"; OTel semconv: "db.user" tag. virtual shared_ptr> DbUserColumn() = 0; - // Query duration. Caller appends microseconds. CH: MetricUInt64 "duration_us"; + // Query duration. Caller appends microseconds. CH: StatHCUInt64 "duration_us"; // OTel: converts to seconds, records as Histogram "db.client.operation.duration". virtual shared_ptr> DbDurationColumn() = 0; - // SQL command type. CH: TagString "db_operation"; OTel: TagString "db.operation.name" + // SQL command type. CH: StatLCString "db_operation"; OTel: StatLCString "db.operation.name" // (used as a dimension on the duration histogram). virtual shared_ptr> DbOperationColumn() = 0; - // Query text. CH: RecordString "query_text"; OTel semconv: "db.query.text". + // Query text. CH: StatHCString "query_text"; OTel semconv: "db.query.text". virtual shared_ptr> DbQueryTextColumn() = 0; virtual void BeginBatch() = 0; @@ -95,8 +115,8 @@ void RecordExporterFailure(const char* message); // Expected usage: // void ProcessBatch(StatsExporter *exporter) { // exporter->BeginBatch(); // no op or ClickHouse column reset -// auto col_user = exporter->TagString("db_user"); -// auto col_rows = exporter->MetricUInt64("rows"); +// auto col_user = exporter->StatLCString("db_user"); +// auto col_rows = exporter->StatHCUInt64("rows"); // // for (const auto &ev : events) { // exporter->BeginRow(); // no-op or initialize tag map diff --git a/src/export/otel_exporter.cc b/src/export/otel_exporter.cc index 3c7f8d6..d588ee4 100644 --- a/src/export/otel_exporter.cc +++ b/src/export/otel_exporter.cc @@ -152,43 +152,38 @@ class OTelExporter : public StatsExporter { bool CommitBatch() final; // -- Column factories (all write directly to the arena-allocated LogRecord) -- - - shared_ptr> TagString(string_view name) final { return MakeStringCol(name); } - - shared_ptr> MetricInt16(string_view name) final { - return MakeIntCol(name); - } - shared_ptr> MetricInt32(string_view name) final { - return MakeIntCol(name); - } - shared_ptr> MetricInt64(string_view name) final { - return MakeIntCol(name); - } - shared_ptr> MetricUInt8(string_view name) final { + // + // OTel logs are flat attribute bags, so the LC/HC distinction collapses on + // this side — both StatLC* and StatHC* of the same wire type produce + // identical log attributes. The intent declared at the interface layer + // matters for downstream metric processors (LC -> eligible as dimension; + // HC -> log-attribute-only) but those decisions live in the collector + // config, not here. + + // Low-cardinality columns + shared_ptr> StatLCString(string_view name) final { return MakeStringCol(name); } + shared_ptr> StatLCUInt8(string_view name) final { return MakeIntCol(name); } - shared_ptr> MetricUInt64(string_view name) final { - return MakeIntCol(name); - } - shared_ptr> RecordInt16(string_view name) final { + shared_ptr> StatLCInt16(string_view name) final { return MakeIntCol(name); } - shared_ptr> RecordInt32(string_view name) final { + shared_ptr> StatLCInt32(string_view name) final { return MakeIntCol(name); } - shared_ptr> RecordInt64(string_view name) final { + + // High-cardinality columns + shared_ptr> StatHCString(string_view name) final { return MakeSvCol(name); } + shared_ptr> StatHCInt64(string_view name) final { return MakeIntCol(name); } - shared_ptr> RecordUInt8(string_view name) final { - return MakeIntCol(name); - } - shared_ptr> RecordUInt64(string_view name) final { + shared_ptr> StatHCUInt64(string_view name) final { return MakeIntCol(name); } - shared_ptr> RecordDateTime(string_view name) final { + + shared_ptr> StatTimestamp(string_view name) final { return MakeDateTimeCol(name); } - shared_ptr> RecordString(string_view name) final { return MakeSvCol(name); } // Semantic columns shared_ptr> DbNameColumn() final { return MakeStringCol("db.name"); } diff --git a/src/export/stats_exporter.cc b/src/export/stats_exporter.cc index 2a2899d..f586238 100644 --- a/src/export/stats_exporter.cc +++ b/src/export/stats_exporter.cc @@ -233,57 +233,57 @@ void ExportEventStatsInternal(const std::vector& events, StatsExporte exporter->BeginBatch(); - auto col_ts = exporter->RecordDateTime("ts"); + auto col_ts = exporter->StatTimestamp("ts"); auto col_duration_us = exporter->DbDurationColumn(); auto col_db_name = exporter->DbNameColumn(); auto col_db_user = exporter->DbUserColumn(); - auto col_pid = exporter->RecordInt32("pid"); - auto col_query_id = exporter->RecordInt64("query_id"); + auto col_pid = exporter->StatLCInt32("pid"); + auto col_query_id = exporter->StatHCInt64("query_id"); auto col_db_operation = exporter->DbOperationColumn(); - auto col_rows = exporter->MetricUInt64("rows"); + auto col_rows = exporter->StatHCUInt64("rows"); auto col_query_text = exporter->DbQueryTextColumn(); - auto col_shared_blks_hit = exporter->MetricInt64("shared_blks_hit"); - auto col_shared_blks_read = exporter->MetricInt64("shared_blks_read"); - auto col_shared_blks_dirtied = exporter->RecordInt64("shared_blks_dirtied"); - auto col_shared_blks_written = exporter->RecordInt64("shared_blks_written"); - auto col_local_blks_hit = exporter->RecordInt64("local_blks_hit"); - auto col_local_blks_read = exporter->RecordInt64("local_blks_read"); - auto col_local_blks_dirtied = exporter->RecordInt64("local_blks_dirtied"); - auto col_local_blks_written = exporter->RecordInt64("local_blks_written"); - auto col_temp_blks_read = exporter->RecordInt64("temp_blks_read"); - auto col_temp_blks_written = exporter->RecordInt64("temp_blks_written"); - - auto col_shared_blk_read_time_us = exporter->RecordInt64("shared_blk_read_time_us"); - auto col_shared_blk_write_time_us = exporter->RecordInt64("shared_blk_write_time_us"); - auto col_local_blk_read_time_us = exporter->RecordInt64("local_blk_read_time_us"); - auto col_local_blk_write_time_us = exporter->RecordInt64("local_blk_write_time_us"); - auto col_temp_blk_read_time_us = exporter->RecordInt64("temp_blk_read_time_us"); - auto col_temp_blk_write_time_us = exporter->RecordInt64("temp_blk_write_time_us"); - - auto col_wal_records = exporter->RecordInt64("wal_records"); - auto col_wal_fpi = exporter->RecordInt64("wal_fpi"); - auto col_wal_bytes = exporter->RecordUInt64("wal_bytes"); - - auto col_cpu_user_time_us = exporter->RecordInt64("cpu_user_time_us"); - auto col_cpu_sys_time_us = exporter->RecordInt64("cpu_sys_time_us"); - - auto col_jit_functions = exporter->RecordInt32("jit_functions"); - auto col_jit_generation_time_us = exporter->RecordInt32("jit_generation_time_us"); - auto col_jit_deform_time_us = exporter->RecordInt32("jit_deform_time_us"); - auto col_jit_inlining_time_us = exporter->RecordInt32("jit_inlining_time_us"); - auto col_jit_optimization_time_us = exporter->RecordInt32("jit_optimization_time_us"); - auto col_jit_emission_time_us = exporter->RecordInt32("jit_emission_time_us"); - - auto col_parallel_workers_planned = exporter->RecordInt16("parallel_workers_planned"); - auto col_parallel_workers_launched = exporter->RecordInt16("parallel_workers_launched"); - - auto col_err_sqlstate = exporter->TagString("err_sqlstate"); - auto col_err_elevel = exporter->RecordUInt8("err_elevel"); - auto col_err_message = exporter->RecordString("err_message"); - - auto col_app = exporter->RecordString("app"); - auto col_client_addr = exporter->RecordString("client_addr"); + auto col_shared_blks_hit = exporter->StatHCInt64("shared_blks_hit"); + auto col_shared_blks_read = exporter->StatHCInt64("shared_blks_read"); + auto col_shared_blks_dirtied = exporter->StatHCInt64("shared_blks_dirtied"); + auto col_shared_blks_written = exporter->StatHCInt64("shared_blks_written"); + auto col_local_blks_hit = exporter->StatHCInt64("local_blks_hit"); + auto col_local_blks_read = exporter->StatHCInt64("local_blks_read"); + auto col_local_blks_dirtied = exporter->StatHCInt64("local_blks_dirtied"); + auto col_local_blks_written = exporter->StatHCInt64("local_blks_written"); + auto col_temp_blks_read = exporter->StatHCInt64("temp_blks_read"); + auto col_temp_blks_written = exporter->StatHCInt64("temp_blks_written"); + + auto col_shared_blk_read_time_us = exporter->StatHCInt64("shared_blk_read_time_us"); + auto col_shared_blk_write_time_us = exporter->StatHCInt64("shared_blk_write_time_us"); + auto col_local_blk_read_time_us = exporter->StatHCInt64("local_blk_read_time_us"); + auto col_local_blk_write_time_us = exporter->StatHCInt64("local_blk_write_time_us"); + auto col_temp_blk_read_time_us = exporter->StatHCInt64("temp_blk_read_time_us"); + auto col_temp_blk_write_time_us = exporter->StatHCInt64("temp_blk_write_time_us"); + + auto col_wal_records = exporter->StatHCInt64("wal_records"); + auto col_wal_fpi = exporter->StatHCInt64("wal_fpi"); + auto col_wal_bytes = exporter->StatHCUInt64("wal_bytes"); + + auto col_cpu_user_time_us = exporter->StatHCInt64("cpu_user_time_us"); + auto col_cpu_sys_time_us = exporter->StatHCInt64("cpu_sys_time_us"); + + auto col_jit_functions = exporter->StatLCInt32("jit_functions"); + auto col_jit_generation_time_us = exporter->StatLCInt32("jit_generation_time_us"); + auto col_jit_deform_time_us = exporter->StatLCInt32("jit_deform_time_us"); + auto col_jit_inlining_time_us = exporter->StatLCInt32("jit_inlining_time_us"); + auto col_jit_optimization_time_us = exporter->StatLCInt32("jit_optimization_time_us"); + auto col_jit_emission_time_us = exporter->StatLCInt32("jit_emission_time_us"); + + auto col_parallel_workers_planned = exporter->StatLCInt16("parallel_workers_planned"); + auto col_parallel_workers_launched = exporter->StatLCInt16("parallel_workers_launched"); + + auto col_err_sqlstate = exporter->StatLCString("err_sqlstate"); + auto col_err_elevel = exporter->StatLCUInt8("err_elevel"); + auto col_err_message = exporter->StatHCString("err_message"); + + auto col_app = exporter->StatLCString("app"); + auto col_client_addr = exporter->StatHCString("client_addr"); for (const auto& ev : events) { exporter->BeginRow(); diff --git a/t/024_otel_export.pl b/t/024_otel_export.pl index e5e7e60..02b42ad 100755 --- a/t/024_otel_export.pl +++ b/t/024_otel_export.pl @@ -86,8 +86,10 @@ }; # Test 4: Metric labels populated -# Tags (db, username) appear as Prometheus labels on histogram metrics. -# The OTel exporter maps TagString columns to both log attributes and metric tags. +# StatLCString columns (db_name, db_user) appear as Prometheus labels on +# histogram metrics. The producer-side OTel exporter emits these as OTLP +# log attributes only; the downstream collector's log-to-metric processor +# is what promotes them to Prometheus labels. subtest 'metric labels populated' => sub { $node->safe_psql('postgres', 'CREATE TABLE test_otel_labels(id int)'); $node->safe_psql('postgres', "INSERT INTO test_otel_labels VALUES (1), (2), (3)"); diff --git a/t/psch.pm b/t/psch.pm index 02f12c1..0fdf6cd 100644 --- a/t/psch.pm +++ b/t/psch.pm @@ -290,7 +290,7 @@ sub psch_get_otel_histogram_total { } # Return true if any Prometheus metric line for $metric_base contains a label -# with the given key=value pair. Useful for verifying TagString columns. +# with the given key=value pair. Useful for verifying StatLCString columns. # # Example: psch_otel_metric_has_label("pg_stat_ch_duration_us", "db", "postgres") sub psch_otel_metric_has_label {