Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ of the new YAML fields below until the version that ships them.
docs.
([docs/quickstart-operator.md])

- **Hostname cardinality override for metrics.** `proxy.metrics.cardinality.hostname_cap`
can lower the `hostname` label budget independently from the default
per-label cap, enabling deterministic overflow tests and tighter
multi-tenant Prometheus budgets.
([crates/sbproxy-config/src/types.rs],
[crates/sbproxy-observe/src/cardinality.rs])

- **`release-fast` build profile for CI images.** Docker-based CI and
local kind smoke-test builds can now use `CARGO_PROFILE=release-fast`
to skip fat LTO and use more codegen units, cutting link memory/time
Expand Down
11 changes: 11 additions & 0 deletions crates/sbproxy-config/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -849,12 +849,16 @@ pub struct MetricsConfig {
/// collapsed to `__other__`. Defaults to 1 000.
#[serde(default = "default_max_cardinality")]
pub max_cardinality_per_label: usize,
/// Per-label cardinality overrides.
#[serde(default)]
pub cardinality: MetricsCardinalityConfig,
}

impl Default for MetricsConfig {
fn default() -> Self {
Self {
max_cardinality_per_label: default_max_cardinality(),
cardinality: MetricsCardinalityConfig::default(),
}
}
}
Expand All @@ -863,6 +867,13 @@ fn default_max_cardinality() -> usize {
1000
}

/// Per-label metrics cardinality overrides.
#[derive(Debug, Clone, Default, Deserialize, Serialize)]
pub struct MetricsCardinalityConfig {
/// Optional override for the `hostname` label cap.
pub hostname_cap: Option<usize>,
}

// --- Access Log Config ---

/// Structured-JSON access-log emission, off by default.
Expand Down
9 changes: 9 additions & 0 deletions crates/sbproxy-core/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11146,6 +11146,15 @@ pub fn run(config_path: &str) -> anyhow::Result<()> {
let server_config = compiled.server.clone();
let hostnames: Vec<String> = compiled.host_map.keys().map(|k| k.to_string()).collect();

if let Some(metrics_cfg) = server_config.metrics.as_ref() {
let _ = sbproxy_observe::metrics::init_cardinality_limiter(
sbproxy_observe::CardinalityConfig {
max_per_label: metrics_cfg.max_cardinality_per_label,
hostname_cap: metrics_cfg.cardinality.hostname_cap,
},
);
}

// Initialise the AI provider catalog from the embedded YAML, with
// an optional override path from `proxy.ai_providers_file`: use
// the override file when readable, fall back to the embedded
Expand Down
38 changes: 36 additions & 2 deletions crates/sbproxy-observe/src/cardinality.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,30 @@ pub fn log_demotion(label_name: &str, value: &str) {
}

/// Configuration for cardinality limiting.
#[derive(Debug, Clone)]
pub struct CardinalityConfig {
/// Max unique values per label name. Default: 1000.
pub max_per_label: usize,
/// Optional hostname-label override. Useful for tests and
/// deployments that need a tighter route-cardinality budget.
pub hostname_cap: Option<usize>,
}

impl Default for CardinalityConfig {
fn default() -> Self {
Self {
max_per_label: 1000,
hostname_cap: None,
}
}
}

impl CardinalityConfig {
/// Return the effective cap for `label_name`.
pub fn cap_for_label(&self, label_name: &str) -> usize {
match (label_name, self.hostname_cap) {
("hostname", Some(cap)) => cap,
_ => budget_for_label(label_name),
}
}
}
Expand Down Expand Up @@ -111,7 +126,7 @@ impl CardinalityLimiter {
/// callers of [`sanitize`](Self::sanitize) keep the workspace
/// default.
pub fn sanitize_budget(&self, label_name: &str, value: &str) -> String {
let cap = budget_for_label(label_name);
let cap = self.config.cap_for_label(label_name);
self.sanitize_with_cap(label_name, value, cap)
}

Expand Down Expand Up @@ -164,7 +179,10 @@ mod tests {
use std::sync::Arc;

fn limiter_with_max(max: usize) -> CardinalityLimiter {
CardinalityLimiter::new(CardinalityConfig { max_per_label: max })
CardinalityLimiter::new(CardinalityConfig {
max_per_label: max,
hostname_cap: None,
})
}

// --- log_demotion ---
Expand Down Expand Up @@ -299,12 +317,25 @@ mod tests {
assert_eq!(budget_for_label("totally-novel-label"), 1000);
}

#[test]
fn hostname_cap_override_wins_over_adr_budget() {
let config = CardinalityConfig {
max_per_label: 1_000_000,
hostname_cap: Some(2),
};
let lim = CardinalityLimiter::new(config);
assert_eq!(lim.sanitize_budget("hostname", "a.example"), "a.example");
assert_eq!(lim.sanitize_budget("hostname", "b.example"), "b.example");
assert_eq!(lim.sanitize_budget("hostname", "c.example"), OTHER_LABEL);
}

#[test]
fn sanitize_budget_demotes_at_per_label_cap_for_agent_class() {
// agent_class budget is 8 per ADR. Insert 8 distinct values,
// then verify the 9th overflows to __other__.
let lim = CardinalityLimiter::new(CardinalityConfig {
max_per_label: 1_000_000,
hostname_cap: None,
});
for i in 0..8 {
let v = format!("class-{i}");
Expand All @@ -321,6 +352,7 @@ mod tests {
fn sanitize_budget_payment_rail_caps_at_six() {
let lim = CardinalityLimiter::new(CardinalityConfig {
max_per_label: 1_000_000,
hostname_cap: None,
});
for v in &[
"none",
Expand All @@ -345,6 +377,7 @@ mod tests {
// fallback path doesn't accidentally cap at zero.
let lim = CardinalityLimiter::new(CardinalityConfig {
max_per_label: 1_000_000,
hostname_cap: None,
});
assert_eq!(lim.sanitize_budget("oddball", "value-1"), "value-1");
assert_eq!(lim.sanitize_budget("oddball", "value-2"), "value-2");
Expand All @@ -359,6 +392,7 @@ mod tests {
// string or "__other__", and the accepted count must not exceed 500.
let lim = Arc::new(CardinalityLimiter::new(CardinalityConfig {
max_per_label: 500,
hostname_cap: None,
}));

let mut handles = Vec::new();
Expand Down
16 changes: 11 additions & 5 deletions crates/sbproxy-observe/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,8 @@ pub fn record_request_with_labels(
bytes_out: u64,
agent: AgentLabels<'_>,
) {
let origin_san = sanitize_label("origin", origin);
let hostname_san = sanitize_label_budget("sbproxy_requests_total", "hostname", origin);
let origin_san = sanitize_label_budget("sbproxy_origin_requests_total", "origin", origin);
let status_str = status.to_string();

// --- Wave 1 / G1.6: per-agent labels on sbproxy_requests_total ---
Expand All @@ -582,11 +583,12 @@ pub fn record_request_with_labels(

let m = metrics();
// sbproxy_requests_total now carries the full Wave 1 label set.
// Sanitised hostname is reused via origin_san (cardinality cap
// 200 per ADR; same numeric cap, different label name).
// Sanitise with the metric's public label name (`hostname`) so
// `metrics.cardinality.hostname_cap` can lower this budget without
// affecting the per-origin views below.
m.requests_total
.with_label_values(&[
&origin_san,
&hostname_san,
method,
&status_str,
&agent_id,
Expand Down Expand Up @@ -1143,7 +1145,10 @@ mod tests {
#[test]
fn test_cardinality_limiter_overflow_to_other() {
// Use a fresh limiter with a tiny cap to test overflow.
let lim = CardinalityLimiter::new(CardinalityConfig { max_per_label: 3 });
let lim = CardinalityLimiter::new(CardinalityConfig {
max_per_label: 3,
hostname_cap: None,
});

let a = lim.sanitize("origin", "a.com");
let b = lim.sanitize("origin", "b.com");
Expand All @@ -1169,6 +1174,7 @@ mod tests {
// via a dedicated limiter (we can't reset the global one safely in tests).
let lim = CardinalityLimiter::new(CardinalityConfig {
max_per_label: 1000,
hostname_cap: None,
});
for i in 0..1000 {
lim.sanitize("origin", &format!("overflow-origin-{i}.example.com"));
Expand Down
5 changes: 4 additions & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ proxy:

metrics:
max_cardinality_per_label: 1000
cardinality:
hostname_cap: 200

admin:
enabled: false
Expand Down Expand Up @@ -216,7 +218,8 @@ socket level.

| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `max_cardinality_per_label` | int | 1000 | Cap on unique label values per metric. New values are collapsed to `__other__`. |
| `max_cardinality_per_label` | int | 1000 | Default cap on unique label values per metric. New values are collapsed to `__other__`. |
| `cardinality.hostname_cap` | int | 200 | Optional override for the `hostname` label budget. Useful for high-tenant-count deployments and deterministic overflow tests. |

### access_log

Expand Down
2 changes: 2 additions & 0 deletions docs/features.md
Original file line number Diff line number Diff line change
Expand Up @@ -1424,6 +1424,8 @@ proxy:
port: 9090
metrics:
max_cardinality_per_label: 1000
cardinality:
hostname_cap: 200
```

```bash
Expand Down
2 changes: 1 addition & 1 deletion docs/manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ The proxy serves `/metrics` on its main HTTP port (`http_bind_port`, default `80
GET http://localhost:8080/metrics
```

Label cardinality is capped by `metrics.max_cardinality_per_label` (default `1000`). Values past the cap collapse into the literal `__other__`.
Label cardinality is capped by `metrics.max_cardinality_per_label` (default `1000`). The `hostname` label uses its ADR budget by default and can be overridden with `metrics.cardinality.hostname_cap`. Values past the effective cap collapse into the literal `__other__`.

#### Hostname-scoped metrics

Expand Down
4 changes: 3 additions & 1 deletion e2e/tests/metrics_per_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ use sbproxy_e2e::ProxyHarness;
const FIXTURE: &str = r#"
proxy:
http_bind_port: 0 # overridden by the harness
metrics:
cardinality:
hostname_cap: 100
origins:
"blog.localhost":
action:
Expand Down Expand Up @@ -132,7 +135,6 @@ fn cardinality_cap_keeps_series_count_bounded() {
// --- Test 3: overflow sentinel + demotion counter ---

#[test]
#[ignore = "TODO(wave3): hostname cardinality cap is above 250 in default config; overflow sentinel + demotion counter wired but not triggered by this fixture. Needs either a higher-volume fixture or a config knob to lower the cap for tests."]
fn cardinality_overflow_emits_sentinel_and_increments_demotion_counter() {
let harness = ProxyHarness::start_with_yaml(FIXTURE).expect("start proxy");

Expand Down
Loading