Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 26 additions & 32 deletions src/export/clickhouse_exporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,54 +157,48 @@ class ClickHouseExporter : public StatsExporter {
MemoryContextDelete(conn_cxt_);
}

// Server casts String to LowCardinality(String) on INSERT
shared_ptr<Column<string>> TagString(string_view name) final {
// On the CH-native side, the LC/HC distinction is a hint: the server-side
// LowCardinality(<Type>) wrap (declared in the schema) is what actually
// applies dictionary encoding on write. clickhouse-c speaks plain typed
// columns either way, so StatLC* and StatHC* of the same C++ type produce
// identical wire bytes here. The new Arrow exporter is where the LC/HC
// distinction materially changes the wire shape (DictBuilder vs plain).

// Low-cardinality columns
shared_ptr<Column<string>> StatLCString(string_view name) final {
return MakeCol<StringCol<string>>(name);
}
shared_ptr<Column<int16_t>> MetricInt16(string_view name) final {
return MakeCol<FixedCol<int16_t>>(name, "Int16");
}
shared_ptr<Column<int32_t>> MetricInt32(string_view name) final {
return MakeCol<FixedCol<int32_t>>(name, "Int32");
}
shared_ptr<Column<int64_t>> MetricInt64(string_view name) final {
return MakeCol<FixedCol<int64_t>>(name, "Int64");
}
shared_ptr<Column<uint8_t>> MetricUInt8(string_view name) final {
shared_ptr<Column<uint8_t>> StatLCUInt8(string_view name) final {
return MakeCol<FixedCol<uint8_t>>(name, "UInt8");
}
shared_ptr<Column<uint64_t>> MetricUInt64(string_view name) final {
return MakeCol<FixedCol<uint64_t>>(name, "UInt64");
}

shared_ptr<Column<int16_t>> RecordInt16(string_view name) final {
shared_ptr<Column<int16_t>> StatLCInt16(string_view name) final {
return MakeCol<FixedCol<int16_t>>(name, "Int16");
}
shared_ptr<Column<int32_t>> RecordInt32(string_view name) final {
shared_ptr<Column<int32_t>> StatLCInt32(string_view name) final {
return MakeCol<FixedCol<int32_t>>(name, "Int32");
}
shared_ptr<Column<int64_t>> RecordInt64(string_view name) final {
return MakeCol<FixedCol<int64_t>>(name, "Int64");

// High-cardinality columns
shared_ptr<Column<string_view>> StatHCString(string_view name) final {
return MakeCol<StringCol<string_view>>(name);
}
shared_ptr<Column<uint8_t>> RecordUInt8(string_view name) final {
return MakeCol<FixedCol<uint8_t>>(name, "UInt8");
shared_ptr<Column<int64_t>> StatHCInt64(string_view name) final {
return MakeCol<FixedCol<int64_t>>(name, "Int64");
}
shared_ptr<Column<uint64_t>> RecordUInt64(string_view name) final {
shared_ptr<Column<uint64_t>> StatHCUInt64(string_view name) final {
return MakeCol<FixedCol<uint64_t>>(name, "UInt64");
}
shared_ptr<Column<int64_t>> RecordDateTime(string_view name) final {

shared_ptr<Column<int64_t>> StatTimestamp(string_view name) final {
return MakeCol<FixedCol<int64_t>>(name, "DateTime64(6)");
}
shared_ptr<Column<string_view>> RecordString(string_view name) final {
return MakeCol<StringCol<string_view>>(name);
}

// Semantic columns
shared_ptr<Column<string>> DbNameColumn() final { return TagString("db_name"); }
shared_ptr<Column<string>> DbUserColumn() final { return TagString("db_user"); }
shared_ptr<Column<uint64_t>> DbDurationColumn() final { return MetricUInt64("duration_us"); }
shared_ptr<Column<string>> DbOperationColumn() final { return TagString("db_operation"); }
shared_ptr<Column<string_view>> DbQueryTextColumn() final { return RecordString("query_text"); }
shared_ptr<Column<string>> DbNameColumn() final { return StatLCString("db_name"); }
shared_ptr<Column<string>> DbUserColumn() final { return StatLCString("db_user"); }
shared_ptr<Column<uint64_t>> DbDurationColumn() final { return StatHCUInt64("duration_us"); }
shared_ptr<Column<string>> DbOperationColumn() final { return StatLCString("db_operation"); }
shared_ptr<Column<string_view>> DbQueryTextColumn() final { return StatHCString("query_text"); }

void BeginBatch() final {
for (auto& col : columns_)
Expand Down
66 changes: 43 additions & 23 deletions src/export/exporter_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,44 @@ class StatsExporter {
};

public:
// Tags: Columns that serve as narrowing criteria for metrics.
virtual shared_ptr<Column<string>> TagString(string_view name) = 0;
// ===========================================================================
// Cardinality-typed columns.
//
// Stat<LC|HC><Type>(name) declares both the wire type and the *cardinality
// intent* of the column. Each backend honors that intent appropriately:
//
// ClickHouse: LC -> may be stored as LowCardinality(<Type>); HC -> plain.
// Schema-declared encoding wins on write; the LC hint helps
// the producer pick the cheapest column representation.
// Arrow IPC: LC -> DictBuilder (dictionary-encoded array); HC -> plain
// typed builder. Required for batch-rate efficiency on
// low-cardinality dimensions.
// OTel: LC -> eligible as a histogram dimension or metric label;
// HC -> log attribute only, *never* a metric dimension
// (cardinality explosion).
//
// Cardinality is a property of the data, not just the type width. err_elevel
// (UInt8) is LC because values repeat (~5 distinct codes). query_id (Int64)
// is HC because every distinct query produces a distinct value. The author
// declares intent; the backend implements it.
// ===========================================================================

// Low-cardinality columns: dimensions you'd group/filter by.
virtual shared_ptr<Column<string>> StatLCString(string_view name) = 0;
virtual shared_ptr<Column<uint8_t>> StatLCUInt8(string_view name) = 0;
virtual shared_ptr<Column<int16_t>> StatLCInt16(string_view name) = 0;
virtual shared_ptr<Column<int32_t>> StatLCInt32(string_view name) = 0;

// Metrics: Columns that are generally bucketed into histograms.
virtual shared_ptr<Column<int16_t>> MetricInt16(string_view name) = 0;
virtual shared_ptr<Column<int32_t>> MetricInt32(string_view name) = 0;
virtual shared_ptr<Column<int64_t>> MetricInt64(string_view name) = 0;
virtual shared_ptr<Column<uint8_t>> MetricUInt8(string_view name) = 0;
virtual shared_ptr<Column<uint64_t>> MetricUInt64(string_view name) = 0;
// High-cardinality columns: values you observe, not dimensions you group by.
virtual shared_ptr<Column<string_view>> StatHCString(string_view name) = 0;
virtual shared_ptr<Column<int64_t>> StatHCInt64(string_view name) = 0;
virtual shared_ptr<Column<uint64_t>> StatHCUInt64(string_view name) = 0;

// Records: Data columns you wouldn't want to filter by.
virtual shared_ptr<Column<int16_t>> RecordInt16(string_view name) = 0;
virtual shared_ptr<Column<int32_t>> RecordInt32(string_view name) = 0;
virtual shared_ptr<Column<int64_t>> RecordInt64(string_view name) = 0;
virtual shared_ptr<Column<uint8_t>> RecordUInt8(string_view name) = 0;
virtual shared_ptr<Column<uint64_t>> RecordUInt64(string_view name) = 0;
virtual shared_ptr<Column<int64_t>> RecordDateTime(string_view name) = 0;
virtual shared_ptr<Column<string_view>> RecordString(string_view name) = 0;
// Domain-specific. Caller appends a Unix-epoch microsecond timestamp.
// (PG-epoch values must be offset by kPostgresEpochOffsetUs before append;
// CH DateTime64(6) and OTel time_unix_nano both interpret the wire value
// as Unix-epoch.)
virtual shared_ptr<Column<int64_t>> StatTimestamp(string_view name) = 0;

// ===========================================================================
// Semantic columns: name, unit, and instrument type may vary by backend.
Expand All @@ -54,17 +74,17 @@ class StatsExporter {
// instrument). Pure virtuals enforce explicit handling in every exporter.
// ===========================================================================

// Database name. CH: TagString "db_name"; OTel semconv: "db.name" tag.
// Database name. CH: StatLCString "db_name"; OTel semconv: "db.name" tag.
virtual shared_ptr<Column<string>> DbNameColumn() = 0;
// Authenticated user. CH: TagString "db_user"; OTel semconv: "db.user" tag.
// Authenticated user. CH: StatLCString "db_user"; OTel semconv: "db.user" tag.
virtual shared_ptr<Column<string>> DbUserColumn() = 0;
// Query duration. Caller appends microseconds. CH: MetricUInt64 "duration_us";
// Query duration. Caller appends microseconds. CH: StatHCUInt64 "duration_us";
// OTel: converts to seconds, records as Histogram<double> "db.client.operation.duration".
virtual shared_ptr<Column<uint64_t>> DbDurationColumn() = 0;
// SQL command type. CH: TagString "db_operation"; OTel: TagString "db.operation.name"
// SQL command type. CH: StatLCString "db_operation"; OTel: StatLCString "db.operation.name"
// (used as a dimension on the duration histogram).
virtual shared_ptr<Column<string>> DbOperationColumn() = 0;
// Query text. CH: RecordString "query_text"; OTel semconv: "db.query.text".
// Query text. CH: StatHCString "query_text"; OTel semconv: "db.query.text".
virtual shared_ptr<Column<string_view>> DbQueryTextColumn() = 0;

virtual void BeginBatch() = 0;
Expand Down Expand Up @@ -95,8 +115,8 @@ void RecordExporterFailure(const char* message);
// Expected usage:
// void ProcessBatch(StatsExporter *exporter) {
// exporter->BeginBatch(); // no op or ClickHouse column reset
// auto col_user = exporter->TagString("db_user");
// auto col_rows = exporter->MetricUInt64("rows");
// auto col_user = exporter->StatLCString("db_user");
// auto col_rows = exporter->StatHCUInt64("rows");
//
// for (const auto &ev : events) {
// exporter->BeginRow(); // no-op or initialize tag map
Expand Down
45 changes: 20 additions & 25 deletions src/export/otel_exporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,43 +152,38 @@ class OTelExporter : public StatsExporter {
bool CommitBatch() final;

// -- Column factories (all write directly to the arena-allocated LogRecord) --

shared_ptr<Column<string>> TagString(string_view name) final { return MakeStringCol(name); }

shared_ptr<Column<int16_t>> MetricInt16(string_view name) final {
return MakeIntCol<int16_t>(name);
}
shared_ptr<Column<int32_t>> MetricInt32(string_view name) final {
return MakeIntCol<int32_t>(name);
}
shared_ptr<Column<int64_t>> MetricInt64(string_view name) final {
return MakeIntCol<int64_t>(name);
}
shared_ptr<Column<uint8_t>> MetricUInt8(string_view name) final {
//
// OTel logs are flat attribute bags, so the LC/HC distinction collapses on
// this side — both StatLC* and StatHC* of the same wire type produce
// identical log attributes. The intent declared at the interface layer
// matters for downstream metric processors (LC -> eligible as dimension;
// HC -> log-attribute-only) but those decisions live in the collector
// config, not here.

// Low-cardinality columns
shared_ptr<Column<string>> StatLCString(string_view name) final { return MakeStringCol(name); }
shared_ptr<Column<uint8_t>> StatLCUInt8(string_view name) final {
return MakeIntCol<uint8_t>(name);
}
shared_ptr<Column<uint64_t>> MetricUInt64(string_view name) final {
return MakeIntCol<uint64_t>(name);
}
shared_ptr<Column<int16_t>> RecordInt16(string_view name) final {
shared_ptr<Column<int16_t>> StatLCInt16(string_view name) final {
return MakeIntCol<int16_t>(name);
}
shared_ptr<Column<int32_t>> RecordInt32(string_view name) final {
shared_ptr<Column<int32_t>> StatLCInt32(string_view name) final {
return MakeIntCol<int32_t>(name);
}
shared_ptr<Column<int64_t>> RecordInt64(string_view name) final {

// High-cardinality columns
shared_ptr<Column<string_view>> StatHCString(string_view name) final { return MakeSvCol(name); }
shared_ptr<Column<int64_t>> StatHCInt64(string_view name) final {
return MakeIntCol<int64_t>(name);
}
shared_ptr<Column<uint8_t>> RecordUInt8(string_view name) final {
return MakeIntCol<uint8_t>(name);
}
shared_ptr<Column<uint64_t>> RecordUInt64(string_view name) final {
shared_ptr<Column<uint64_t>> StatHCUInt64(string_view name) final {
return MakeIntCol<uint64_t>(name);
}
shared_ptr<Column<int64_t>> RecordDateTime(string_view name) final {

shared_ptr<Column<int64_t>> StatTimestamp(string_view name) final {
return MakeDateTimeCol(name);
}
shared_ptr<Column<string_view>> RecordString(string_view name) final { return MakeSvCol(name); }

// Semantic columns
shared_ptr<Column<string>> DbNameColumn() final { return MakeStringCol("db.name"); }
Expand Down
90 changes: 45 additions & 45 deletions src/export/stats_exporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -233,57 +233,57 @@ void ExportEventStatsInternal(const std::vector<PschEvent>& events, StatsExporte

exporter->BeginBatch();

auto col_ts = exporter->RecordDateTime("ts");
auto col_ts = exporter->StatTimestamp("ts");
auto col_duration_us = exporter->DbDurationColumn();
auto col_db_name = exporter->DbNameColumn();
auto col_db_user = exporter->DbUserColumn();
auto col_pid = exporter->RecordInt32("pid");
auto col_query_id = exporter->RecordInt64("query_id");
auto col_pid = exporter->StatLCInt32("pid");
auto col_query_id = exporter->StatHCInt64("query_id");
auto col_db_operation = exporter->DbOperationColumn();
auto col_rows = exporter->MetricUInt64("rows");
auto col_rows = exporter->StatHCUInt64("rows");
auto col_query_text = exporter->DbQueryTextColumn();

auto col_shared_blks_hit = exporter->MetricInt64("shared_blks_hit");
auto col_shared_blks_read = exporter->MetricInt64("shared_blks_read");
auto col_shared_blks_dirtied = exporter->RecordInt64("shared_blks_dirtied");
auto col_shared_blks_written = exporter->RecordInt64("shared_blks_written");
auto col_local_blks_hit = exporter->RecordInt64("local_blks_hit");
auto col_local_blks_read = exporter->RecordInt64("local_blks_read");
auto col_local_blks_dirtied = exporter->RecordInt64("local_blks_dirtied");
auto col_local_blks_written = exporter->RecordInt64("local_blks_written");
auto col_temp_blks_read = exporter->RecordInt64("temp_blks_read");
auto col_temp_blks_written = exporter->RecordInt64("temp_blks_written");

auto col_shared_blk_read_time_us = exporter->RecordInt64("shared_blk_read_time_us");
auto col_shared_blk_write_time_us = exporter->RecordInt64("shared_blk_write_time_us");
auto col_local_blk_read_time_us = exporter->RecordInt64("local_blk_read_time_us");
auto col_local_blk_write_time_us = exporter->RecordInt64("local_blk_write_time_us");
auto col_temp_blk_read_time_us = exporter->RecordInt64("temp_blk_read_time_us");
auto col_temp_blk_write_time_us = exporter->RecordInt64("temp_blk_write_time_us");

auto col_wal_records = exporter->RecordInt64("wal_records");
auto col_wal_fpi = exporter->RecordInt64("wal_fpi");
auto col_wal_bytes = exporter->RecordUInt64("wal_bytes");

auto col_cpu_user_time_us = exporter->RecordInt64("cpu_user_time_us");
auto col_cpu_sys_time_us = exporter->RecordInt64("cpu_sys_time_us");

auto col_jit_functions = exporter->RecordInt32("jit_functions");
auto col_jit_generation_time_us = exporter->RecordInt32("jit_generation_time_us");
auto col_jit_deform_time_us = exporter->RecordInt32("jit_deform_time_us");
auto col_jit_inlining_time_us = exporter->RecordInt32("jit_inlining_time_us");
auto col_jit_optimization_time_us = exporter->RecordInt32("jit_optimization_time_us");
auto col_jit_emission_time_us = exporter->RecordInt32("jit_emission_time_us");

auto col_parallel_workers_planned = exporter->RecordInt16("parallel_workers_planned");
auto col_parallel_workers_launched = exporter->RecordInt16("parallel_workers_launched");

auto col_err_sqlstate = exporter->TagString("err_sqlstate");
auto col_err_elevel = exporter->RecordUInt8("err_elevel");
auto col_err_message = exporter->RecordString("err_message");

auto col_app = exporter->RecordString("app");
auto col_client_addr = exporter->RecordString("client_addr");
auto col_shared_blks_hit = exporter->StatHCInt64("shared_blks_hit");
auto col_shared_blks_read = exporter->StatHCInt64("shared_blks_read");
auto col_shared_blks_dirtied = exporter->StatHCInt64("shared_blks_dirtied");
auto col_shared_blks_written = exporter->StatHCInt64("shared_blks_written");
auto col_local_blks_hit = exporter->StatHCInt64("local_blks_hit");
auto col_local_blks_read = exporter->StatHCInt64("local_blks_read");
auto col_local_blks_dirtied = exporter->StatHCInt64("local_blks_dirtied");
auto col_local_blks_written = exporter->StatHCInt64("local_blks_written");
auto col_temp_blks_read = exporter->StatHCInt64("temp_blks_read");
auto col_temp_blks_written = exporter->StatHCInt64("temp_blks_written");

auto col_shared_blk_read_time_us = exporter->StatHCInt64("shared_blk_read_time_us");
auto col_shared_blk_write_time_us = exporter->StatHCInt64("shared_blk_write_time_us");
auto col_local_blk_read_time_us = exporter->StatHCInt64("local_blk_read_time_us");
auto col_local_blk_write_time_us = exporter->StatHCInt64("local_blk_write_time_us");
auto col_temp_blk_read_time_us = exporter->StatHCInt64("temp_blk_read_time_us");
auto col_temp_blk_write_time_us = exporter->StatHCInt64("temp_blk_write_time_us");

auto col_wal_records = exporter->StatHCInt64("wal_records");
auto col_wal_fpi = exporter->StatHCInt64("wal_fpi");
auto col_wal_bytes = exporter->StatHCUInt64("wal_bytes");

auto col_cpu_user_time_us = exporter->StatHCInt64("cpu_user_time_us");
auto col_cpu_sys_time_us = exporter->StatHCInt64("cpu_sys_time_us");

auto col_jit_functions = exporter->StatLCInt32("jit_functions");
auto col_jit_generation_time_us = exporter->StatLCInt32("jit_generation_time_us");
auto col_jit_deform_time_us = exporter->StatLCInt32("jit_deform_time_us");
auto col_jit_inlining_time_us = exporter->StatLCInt32("jit_inlining_time_us");
auto col_jit_optimization_time_us = exporter->StatLCInt32("jit_optimization_time_us");
auto col_jit_emission_time_us = exporter->StatLCInt32("jit_emission_time_us");

auto col_parallel_workers_planned = exporter->StatLCInt16("parallel_workers_planned");
auto col_parallel_workers_launched = exporter->StatLCInt16("parallel_workers_launched");

auto col_err_sqlstate = exporter->StatLCString("err_sqlstate");
auto col_err_elevel = exporter->StatLCUInt8("err_elevel");
auto col_err_message = exporter->StatHCString("err_message");

auto col_app = exporter->StatLCString("app");
auto col_client_addr = exporter->StatHCString("client_addr");

for (const auto& ev : events) {
exporter->BeginRow();
Expand Down
6 changes: 4 additions & 2 deletions t/024_otel_export.pl
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@
};

# Test 4: Metric labels populated
# Tags (db, username) appear as Prometheus labels on histogram metrics.
# The OTel exporter maps TagString columns to both log attributes and metric tags.
# StatLCString columns (db_name, db_user) appear as Prometheus labels on
# histogram metrics. The producer-side OTel exporter emits these as OTLP
# log attributes only; the downstream collector's log-to-metric processor
# is what promotes them to Prometheus labels.
subtest 'metric labels populated' => sub {
$node->safe_psql('postgres', 'CREATE TABLE test_otel_labels(id int)');
$node->safe_psql('postgres', "INSERT INTO test_otel_labels VALUES (1), (2), (3)");
Expand Down
2 changes: 1 addition & 1 deletion t/psch.pm
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ sub psch_get_otel_histogram_total {
}

# Return true if any Prometheus metric line for $metric_base contains a label
# with the given key=value pair. Useful for verifying TagString columns.
# with the given key=value pair. Useful for verifying StatLCString columns.
#
# Example: psch_otel_metric_has_label("pg_stat_ch_duration_us", "db", "postgres")
sub psch_otel_metric_has_label {
Expand Down
Loading