From 5cc6800c797f508dcd7b6548e47b576caa8bcf49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Mon, 9 Feb 2026 16:54:35 +0100 Subject: [PATCH 1/8] perf(sampling): add simple sampling benchmark --- datadog-opentelemetry/Cargo.toml | 12 + .../benches/datadog_sampling_benchmark.rs | 459 ++++++++++++++++ .../benches/otel_sampling_benchmark.rs | 516 ++++++++++++++++++ datadog-opentelemetry/src/lib.rs | 16 +- datadog-opentelemetry/src/sampler.rs | 80 ++- .../src/sampling/datadog_sampler.rs | 10 +- datadog-opentelemetry/src/span_processor.rs | 29 +- 7 files changed, 1087 insertions(+), 35 deletions(-) create mode 100644 datadog-opentelemetry/benches/datadog_sampling_benchmark.rs create mode 100644 datadog-opentelemetry/benches/otel_sampling_benchmark.rs diff --git a/datadog-opentelemetry/Cargo.toml b/datadog-opentelemetry/Cargo.toml index f2a3b5bc..b18c14b3 100644 --- a/datadog-opentelemetry/Cargo.toml +++ b/datadog-opentelemetry/Cargo.toml @@ -134,3 +134,15 @@ path = "benches/inject_benchmark.rs" name = "extract_benchmark" harness = false path = "benches/extract_benchmark.rs" + +[[bench]] +name = "datadog_sampling_benchmark" +harness = false +path = "benches/datadog_sampling_benchmark.rs" +required-features = ["test-utils"] + +[[bench]] +name = "otel_sampling_benchmark" +harness = false +path = "benches/otel_sampling_benchmark.rs" +required-features = ["test-utils"] diff --git a/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs new file mode 100644 index 00000000..40ae58e9 --- /dev/null +++ b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs @@ -0,0 +1,459 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{criterion_group, criterion_main, Criterion}; +use datadog_opentelemetry::core_pub_hack::test_utils::benchmarks::{ + memory_allocated_measurement, MeasurementName, ReportingAllocator, +}; +use datadog_opentelemetry::sampling::DatadogSampler; +use datadog_opentelemetry::sampling::SamplingRule; +use opentelemetry::{trace::SpanKind, KeyValue, TraceId}; +use std::collections::HashMap; +use std::hint::black_box; +use std::sync::{Arc, RwLock}; + +#[global_allocator] +static GLOBAL: ReportingAllocator = ReportingAllocator::new(std::alloc::System); + +struct BenchmarkConfig { + name: &'static str, + rules: Vec, + resource: opentelemetry_sdk::Resource, + trace_id: TraceId, + span_name: &'static str, + span_kind: SpanKind, + attributes: Vec, + is_parent_sampled: Option, + should_keep: Option, +} + +fn create_benchmark_configs() -> Vec { + let trace_id = TraceId::from(0x12345678901234567890123456789012_u128); + + vec![ + // 1. All spans rule (baseline) + BenchmarkConfig { + name: "rule_all_spans_only_rate", + rules: vec![SamplingRule::new(1.0, None, None, None, None, None)], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "something", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar"), KeyValue::new("bar", "baz")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 2. Service rule - matching + BenchmarkConfig { + name: "service_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + None, + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 3. Service rule - not matching + BenchmarkConfig { + name: "service_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + None, + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 4. Name pattern rule - matching + BenchmarkConfig { + name: "name_pattern_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + Some("http.*".to_string()), + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("http.method", "GET")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 5. Name pattern rule - not matching + BenchmarkConfig { + name: "name_pattern_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + Some("http.*".to_string()), + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "grpc.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("rpc.method", "GetUser")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 6. Resource pattern rule - matching + BenchmarkConfig { + name: "resource_pattern_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + Some("/api/*".to_string()), + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/api/users"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 7. Resource pattern rule - not matching + BenchmarkConfig { + name: "resource_pattern_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + Some("/api/*".to_string()), + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/health"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 8. Tag rule - matching + BenchmarkConfig { + name: "tag_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("foo", "bar"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 9. Tag rule - not matching + BenchmarkConfig { + name: "tag_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("foo", "bar"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 10. Complex rule - matching + BenchmarkConfig { + name: "complex_rule_matching", + rules: vec![SamplingRule::new( + 0.5, + Some("api-service".to_string()), + Some("http.*".to_string()), + Some("/api/v1/*".to_string()), + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/api/v1/users"), + ], + is_parent_sampled: None, + should_keep: None, // Probabilistic sampling at 0.5 rate + }, + // 11. Complex rule - partial match + BenchmarkConfig { + name: "complex_rule_partial_match", + rules: vec![SamplingRule::new( + 0.5, + Some("api-service".to_string()), + Some("http.*".to_string()), + Some("/api/v1/*".to_string()), + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/health"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 12. Multiple rules - first match + BenchmarkConfig { + name: "multiple_rules_first_match", + rules: vec![ + SamplingRule::new(0.1, Some("api-service".to_string()), None, None, None, None), + SamplingRule::new(0.5, Some("web-service".to_string()), None, None, None, None), + SamplingRule::new(1.0, None, None, None, None, None), + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: None, // Probabilistic sampling at 0.1 rate + }, + // 13. Multiple rules - last match + BenchmarkConfig { + name: "multiple_rules_last_match", + rules: vec![ + SamplingRule::new(0.1, Some("api-service".to_string()), None, None, None, None), + SamplingRule::new(0.5, Some("web-service".to_string()), None, None, None, None), + SamplingRule::new(1.0, None, None, None, None, None), + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 14. Many attributes + BenchmarkConfig { + name: "many_attributes", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "key10".to_string(), + "value10".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: (0..20) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: None, + should_keep: Some(true), + }, + // 15. Parent sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_sampled_short_circuit", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + Some("http.*".to_string()), + Some("/api/*".to_string()), + Some(HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: Some(true), + should_keep: Some(true), + }, + // 16. Parent not sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_not_sampled_short_circuit", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + Some("http.*".to_string()), + Some("/api/*".to_string()), + Some(HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: Some(false), + should_keep: Some(false), + }, + ] +} + +fn bench_datadog_sampling( + c: &mut Criterion, +) { + let configs = create_benchmark_configs(); + + for config in configs { + let sampler = DatadogSampler::new(config.rules, -1, Arc::new(RwLock::new(config.resource))); + + c.bench_function( + &format!("datadog_sample_span/{}/{}", config.name, M::name()), + |b| { + b.iter_batched( + || (), + |_| { + bench_sample( + &sampler, + config.is_parent_sampled, + config.trace_id, + config.span_name, + &config.span_kind, + &config.attributes, + config.should_keep, + ); + }, + criterion::BatchSize::LargeInput, + ) + }, + ); + } +} + +#[inline(never)] +fn bench_sample( + sampler: &DatadogSampler, + is_parent_sampled: Option, + trace_id: TraceId, + span_name: &str, + span_kind: &SpanKind, + attributes: &[KeyValue], + should_keep: Option, +) { + let result = black_box(sampler).sample( + black_box(is_parent_sampled), + black_box(trace_id), + black_box(span_name), + black_box(span_kind), + black_box(attributes), + ); + if let Some(should_keep) = should_keep { + assert_eq!(result.is_keep, should_keep); + black_box(result); + } else { + black_box(result); + } +} + +criterion_group!( + name = memory_benches; + config = memory_allocated_measurement(&GLOBAL); + targets = bench_datadog_sampling +); +criterion_group!( + name = wall_time_benches; + config = Criterion::default(); + targets = bench_datadog_sampling +); +criterion_main!(memory_benches, wall_time_benches); diff --git a/datadog-opentelemetry/benches/otel_sampling_benchmark.rs b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs new file mode 100644 index 00000000..07b56194 --- /dev/null +++ b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs @@ -0,0 +1,516 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{criterion_group, criterion_main, Criterion}; +use datadog_opentelemetry::configuration::{Config, SamplingRuleConfig}; +use datadog_opentelemetry::core_pub_hack::test_utils::benchmarks::{ + memory_allocated_measurement, MeasurementName, ReportingAllocator, +}; +use datadog_opentelemetry::sampler::Sampler; +use opentelemetry::{trace::SamplingDecision, trace::SpanKind, KeyValue, TraceId}; +use opentelemetry_sdk::trace::ShouldSample; +use std::collections::HashMap; +use std::hint::black_box; +use std::sync::{Arc, RwLock}; + +#[global_allocator] +static GLOBAL: ReportingAllocator = ReportingAllocator::new(std::alloc::System); + +struct BenchmarkConfig { + name: &'static str, + rules: Vec, + resource: opentelemetry_sdk::Resource, + trace_id: TraceId, + span_name: &'static str, + span_kind: SpanKind, + attributes: Vec, + parent_context: Option, + expected_decision: Option, +} + +fn create_benchmark_configs() -> Vec { + use opentelemetry::trace::{SpanContext, SpanId, TraceContextExt, TraceFlags, TraceState}; + + let trace_id = TraceId::from(0x12345678901234567890123456789012_u128); + + // Helper to create parent context + let create_parent_context = |is_sampled: bool| { + let flags = if is_sampled { + TraceFlags::SAMPLED + } else { + TraceFlags::default() + }; + let span_context = SpanContext::new( + trace_id, + SpanId::from(0x1234567890123456_u64), + flags, + false, + TraceState::default(), + ); + opentelemetry::Context::current().with_remote_span_context(span_context) + }; + + vec![ + // 1. All spans rule (baseline) + BenchmarkConfig { + name: "rule_all_spans_only_rate", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "something", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar"), KeyValue::new("bar", "baz")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 2. Service rule - matching + BenchmarkConfig { + name: "service_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 3. Service rule - not matching + BenchmarkConfig { + name: "service_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 4. Name pattern rule - matching + BenchmarkConfig { + name: "name_pattern_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: Some("http.*".to_string()), + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("http.method", "GET")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 5. Name pattern rule - not matching + BenchmarkConfig { + name: "name_pattern_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: Some("http.*".to_string()), + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "grpc.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("rpc.method", "GetUser")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 6. Resource pattern rule - matching + BenchmarkConfig { + name: "resource_pattern_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: Some("/api/*".to_string()), + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/api/users"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 7. Resource pattern rule - not matching + BenchmarkConfig { + name: "resource_pattern_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: Some("/api/*".to_string()), + tags: HashMap::new(), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/health"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 8. Tag rule - matching + BenchmarkConfig { + name: "tag_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("foo", "bar"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 9. Tag rule - not matching + BenchmarkConfig { + name: "tag_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("foo", "bar"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 10. Complex rule - matching + BenchmarkConfig { + name: "complex_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("api-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/v1/*".to_string()), + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/api/v1/users"), + ], + parent_context: None, + expected_decision: None, // Probabilistic sampling at 0.5 rate + }, + // 11. Complex rule - partial match + BenchmarkConfig { + name: "complex_rule_partial_match", + rules: vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("api-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/v1/*".to_string()), + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/health"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 12. Multiple rules - first match + BenchmarkConfig { + name: "multiple_rules_first_match", + rules: vec![ + SamplingRuleConfig { + sample_rate: 0.1, + service: Some("api-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 0.5, + service: Some("web-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: None, // Probabilistic sampling at 0.1 rate + }, + // 13. Multiple rules - last match + BenchmarkConfig { + name: "multiple_rules_last_match", + rules: vec![ + SamplingRuleConfig { + sample_rate: 0.1, + service: Some("api-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 0.5, + service: Some("web-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + provenance: "".to_string(), + }, + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 14. Many attributes + BenchmarkConfig { + name: "many_attributes", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("key10".to_string(), "value10".to_string())]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: (0..20) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 15. Parent sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_sampled_short_circuit", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/*".to_string()), + tags: HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: Some(create_parent_context(true)), + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 16. Parent not sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_not_sampled_short_circuit", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/*".to_string()), + tags: HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ]), + provenance: "".to_string(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: Some(create_parent_context(false)), + expected_decision: Some(SamplingDecision::RecordOnly), + }, + ] +} + +fn bench_otel_span_sampling( + c: &mut Criterion, +) { + let configs = create_benchmark_configs(); + let links: Vec = vec![]; + + for config in configs { + let test_resource = Arc::new(RwLock::new(config.resource)); + let test_config = Arc::new( + Config::builder() + .set_trace_rate_limit(-1) + .set_trace_sampling_rules(config.rules) + .build(), + ); + let test_sampler = Sampler::new(test_config, test_resource.clone(), None); + + c.bench_function( + &format!("otel_sample_span/{}/{}", config.name, M::name()), + |b| { + b.iter_batched( + || (), + |_| { + bench_sample( + &test_sampler, + config.parent_context.as_ref(), + config.trace_id, + config.span_name, + &config.span_kind, + &config.attributes, + &links, + &config.expected_decision, + ); + }, + criterion::BatchSize::LargeInput, + ) + }, + ); + } +} + +#[inline(never)] +fn bench_sample( + sampler: &Sampler, + parent_context: Option<&opentelemetry::Context>, + trace_id: TraceId, + span_name: &str, + span_kind: &SpanKind, + attributes: &[KeyValue], + links: &[opentelemetry::trace::Link], + expected_decision: &Option, +) { + let result = black_box(sampler).should_sample( + black_box(parent_context), + black_box(trace_id), + black_box(span_name), + black_box(span_kind), + black_box(attributes), + black_box(links), + ); + if let Some(expected_decision) = expected_decision { + assert_eq!(result.decision, *expected_decision); + black_box(result); + } else { + black_box(result); + } +} + +criterion_group!(name = memory_benches; config = memory_allocated_measurement(&GLOBAL); targets = bench_otel_span_sampling); +criterion_group!(name = wall_time_benches; config = Criterion::default(); targets = bench_otel_span_sampling); +criterion_main!(memory_benches, wall_time_benches); diff --git a/datadog-opentelemetry/src/lib.rs b/datadog-opentelemetry/src/lib.rs index 97ad45ef..bc7332d0 100644 --- a/datadog-opentelemetry/src/lib.rs +++ b/datadog-opentelemetry/src/lib.rs @@ -268,14 +268,22 @@ pub mod mappings; #[cfg(feature = "test-utils")] pub mod propagation; #[cfg(feature = "test-utils")] +pub mod sampler; +#[cfg(feature = "test-utils")] pub mod sampling; +#[cfg(feature = "test-utils")] +pub mod span_processor; #[cfg(not(feature = "test-utils"))] pub(crate) mod mappings; #[cfg(not(feature = "test-utils"))] pub(crate) mod propagation; #[cfg(not(feature = "test-utils"))] +mod sampler; +#[cfg(not(feature = "test-utils"))] pub(crate) mod sampling; +#[cfg(not(feature = "test-utils"))] +mod span_processor; mod ddtrace_transform; #[cfg(any(feature = "logs-grpc", feature = "logs-http"))] @@ -283,9 +291,7 @@ mod logs_reader; #[cfg(any(feature = "metrics-grpc", feature = "metrics-http"))] mod metrics_reader; mod otlp_utils; -mod sampler; mod span_exporter; -mod span_processor; mod spans_metrics; #[cfg(any(feature = "logs-grpc", feature = "logs-http"))] mod telemetry_logs_exporter; @@ -496,7 +502,11 @@ fn make_tracer( let resource_slot = Arc::new(RwLock::new(Resource::builder_empty().build())); // Sampler only needs config for initialization (reads initial sampling rules) // Runtime updates come via config callback, so no need for shared config - let sampler = Sampler::new(config.clone(), resource_slot.clone(), registry.clone()); + let sampler = Sampler::new( + config.clone(), + resource_slot.clone(), + Some(registry.clone()), + ); let agent_response_handler = sampler.on_agent_response(); diff --git a/datadog-opentelemetry/src/sampler.rs b/datadog-opentelemetry/src/sampler.rs index 00553165..73b7b7ad 100644 --- a/datadog-opentelemetry/src/sampler.rs +++ b/datadog-opentelemetry/src/sampler.rs @@ -1,6 +1,8 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +//! Datadog OTel Sampler + use opentelemetry::trace::{TraceContextExt, TraceState}; use opentelemetry_sdk::{trace::ShouldSample, Resource}; use std::sync::{Arc, RwLock}; @@ -16,18 +18,31 @@ use crate::{ TraceRegistry, }; +/// OpenTelemetry sampler implementation for Datadog tracing. +/// +/// Implements the `ShouldSample` trait to make sampling decisions for traces based on +/// Datadog's sampling rules, rate limits, and service-based sampling rates. #[derive(Debug, Clone)] pub struct Sampler { sampler: DatadogSampler, - trace_registry: TraceRegistry, + trace_registry: Option, cfg: Arc, } impl Sampler { + /// Creates a new Datadog sampler. + /// + /// # Arguments + /// + /// * `cfg` - Configuration containing sampling rules and rate limits + /// * `resource` - OpenTelemetry resource with service information + /// * `trace_registry` - Optional trace registry for managing in-flight traces (None for + /// benchmarking) pub fn new( cfg: Arc, resource: Arc>, - trace_registry: TraceRegistry, + // This is an Option to allow benchmarking different parts of sampling + trace_registry: Option, ) -> Self { let rules = SamplingRule::from_configs(cfg.trace_sampling_rules().to_vec()); let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit(), resource); @@ -38,6 +53,9 @@ impl Sampler { } } + /// Returns a callback for processing agent responses. + /// + /// The callback updates service-based sampling rates based on the agent's response. pub fn on_agent_response(&self) -> Box Fn(&'a str) + Send + Sync> { self.sampler.on_agent_response() } @@ -140,30 +158,30 @@ impl ShouldSample for Sampler { None }; if let Some(trace_propagation_data) = trace_propagation_data { - match self - .trace_registry - .register_local_root_trace_propagation_data( + if let Some(trace_registry) = &self.trace_registry { + match trace_registry.register_local_root_trace_propagation_data( trace_id.to_bytes(), trace_propagation_data, ) { - RegisterTracePropagationResult::Existing(sampling_decision) => { - return opentelemetry::trace::SamplingResult { - // If at this point the sampling decision is still None, we will - // end up sending the span to the agent without a sampling priority, which - // will latter take a decision. - // So the span is marked as RecordAndSample because we treat it as such - decision: if sampling_decision.priority.is_none_or(|p| p.is_keep()) { - opentelemetry::trace::SamplingDecision::RecordAndSample - } else { - opentelemetry::trace::SamplingDecision::RecordOnly - }, - attributes: Vec::new(), - trace_state: parent_context - .map(|c| c.span().span_context().trace_state().clone()) - .unwrap_or_default(), - }; + RegisterTracePropagationResult::Existing(sampling_decision) => { + return opentelemetry::trace::SamplingResult { + // If at this point the sampling decision is still None, we will + // end up sending the span to the agent without a sampling priority, + // which will later take a decision. + // So the span is marked as RecordAndSample because we treat it as such + decision: if sampling_decision.priority.is_none_or(|p| p.is_keep()) { + opentelemetry::trace::SamplingDecision::RecordAndSample + } else { + opentelemetry::trace::SamplingDecision::RecordOnly + }, + attributes: Vec::new(), + trace_state: parent_context + .map(|c| c.span().span_context().trace_state().clone()) + .unwrap_or_default(), + }; + } + RegisterTracePropagationResult::New => {} } - RegisterTracePropagationResult::New => {} } } @@ -205,7 +223,11 @@ mod tests { ); let test_resource = Arc::new(RwLock::new(Resource::builder().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id_bytes = [1; 16]; let trace_id = TraceId::from_bytes(trace_id_bytes); @@ -227,7 +249,11 @@ mod tests { let config = Arc::new(Config::builder().build()); let test_resource = Arc::new(RwLock::new(Resource::builder_empty().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id_bytes = [2; 16]; let trace_id = TraceId::from_bytes(trace_id_bytes); @@ -246,7 +272,11 @@ mod tests { let config = Arc::new(Config::builder().build()); let test_resource = Arc::new(RwLock::new(Resource::builder_empty().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id = TraceId::from_bytes([2; 16]); let span_id = SpanId::from_bytes([3; 8]); diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/datadog-opentelemetry/src/sampling/datadog_sampler.rs index 62ea1e09..f7c65329 100644 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ b/datadog-opentelemetry/src/sampling/datadog_sampler.rs @@ -372,11 +372,11 @@ impl DatadogSampler { } /// Sample an incoming span based on the parent context and attributes - pub(crate) fn sample( + pub fn sample( &self, is_parent_sampled: Option, trace_id: TraceId, - _name: &str, + name: &str, span_kind: &opentelemetry::trace::SpanKind, attributes: &[KeyValue], ) -> DdSamplingResult { @@ -389,7 +389,7 @@ impl DatadogSampler { } // Apply rules-based sampling - self.sample_root(trace_id, _name, span_kind, attributes) + self.sample_root(trace_id, name, span_kind, attributes) } /// Sample the root span of a trace @@ -501,12 +501,12 @@ fn format_sampling_rate(rate: f64) -> Option { }) } -pub(crate) struct DdSamplingResult { +pub struct DdSamplingResult { pub is_keep: bool, pub trace_root_info: Option, } -pub(crate) struct TraceRootSamplingInfo { +pub struct TraceRootSamplingInfo { pub priority: SamplingPriority, pub mechanism: SamplingMechanism, pub rate: f64, diff --git a/datadog-opentelemetry/src/span_processor.rs b/datadog-opentelemetry/src/span_processor.rs index df5f2b02..e594f8ac 100644 --- a/datadog-opentelemetry/src/span_processor.rs +++ b/datadog-opentelemetry/src/span_processor.rs @@ -1,6 +1,8 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +//! Datadog OTel SpanProcessor + use hashbrown::{hash_map, HashMap as BHashMap}; use std::{ collections::HashMap, @@ -69,7 +71,7 @@ struct InnerTraceRegistry { config: Arc, } -pub enum RegisterTracePropagationResult { +pub(crate) enum RegisterTracePropagationResult { Existing(SamplingDecision), New, } @@ -262,7 +264,7 @@ struct CachePadded(T); /// - The finished spans of the trace /// - The number of open spans in the trace /// - The sampling decision of the trace -pub(crate) struct TraceRegistry { +pub struct TraceRegistry { // Example: // inner: Arc<[CacheAligned>; N]>; // to access a trace we do inner[hash(trace_id) % N].read() @@ -271,6 +273,10 @@ pub(crate) struct TraceRegistry { } impl TraceRegistry { + /// Creates a new trace registry. + /// + /// The registry uses sharding to minimize lock contention when multiple threads + /// are creating and finishing spans concurrently. pub fn new(config: Arc) -> Self { Self { inner: Arc::new(std::array::from_fn(|_| { @@ -297,6 +303,7 @@ impl TraceRegistry { /// /// If the trace is already registered with a non None sampling decision, /// it will return the existing sampling decision instead + #[allow(private_interfaces)] pub fn register_local_root_trace_propagation_data( &self, trace_id: [u8; 16], @@ -321,6 +328,7 @@ impl TraceRegistry { } /// Register a new span with the given trace ID and span ID. + #[allow(private_interfaces)] pub fn register_span( &self, trace_id: [u8; 16], @@ -345,6 +353,10 @@ impl TraceRegistry { inner.finish_span(trace_id, span_data) } + /// Retrieves the trace propagation data for a given trace ID. + /// + /// Returns the sampling decision, origin, and internal tags associated with the trace. + #[allow(private_interfaces)] pub fn get_trace_propagation_data(&self, trace_id: [u8; 16]) -> TracePropagationData { let inner = self .get_shard(trace_id) @@ -354,6 +366,10 @@ impl TraceRegistry { inner.get_trace_propagation_data(trace_id).clone() } + /// Aggregates and returns metrics from all registry shards. + /// + /// Collects counters for spans created/finished, trace segments, and partial flushes + /// across all shards in the registry. pub fn get_metrics(&self) -> TraceRegistryMetrics { let mut stats = TraceRegistryMetrics::default(); for shard_idx in 0..TRACE_REGISTRY_SHARDS { @@ -369,12 +385,21 @@ impl TraceRegistry { } } +/// Metrics collected by the trace registry. +/// +/// Tracks the lifecycle of spans and traces through the registry, useful for +/// monitoring and debugging trace collection behavior. #[derive(Default, Debug)] pub struct TraceRegistryMetrics { + /// Number of spans created and registered in the registry. pub spans_created: usize, + /// Number of spans that have finished processing. pub spans_finished: usize, + /// Number of trace segments created (complete or partial traces). pub trace_segments_created: usize, + /// Number of trace segments closed and sent to the exporter. pub trace_segments_closed: usize, + /// Number of times traces were partially flushed before completion. pub trace_partial_flush_count: usize, } From 7083b179077e6f7e6d58230d79a40860c67fb013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Mon, 9 Feb 2026 16:54:44 +0100 Subject: [PATCH 2/8] refactor(sampling): add traits for span information --- .../benches/datadog_sampling_benchmark.rs | 41 +- .../benches/otel_sampling_benchmark.rs | 1 + datadog-opentelemetry/src/core/sampling.rs | 20 +- .../src/mappings/transform/mod.rs | 2 +- .../src/mappings/transform/otel_util.rs | 11 +- datadog-opentelemetry/src/sampler.rs | 30 +- .../src/sampling/datadog_sampler.rs | 625 ++++++++++-------- datadog-opentelemetry/src/sampling/mod.rs | 5 + .../src/sampling/otel_mappings.rs | 178 ++++- .../src/sampling/rate_sampler.rs | 20 +- .../src/sampling/rules_sampler.rs | 4 +- datadog-opentelemetry/src/sampling/types.rs | 207 ++++++ 12 files changed, 795 insertions(+), 349 deletions(-) create mode 100644 datadog-opentelemetry/src/sampling/types.rs diff --git a/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs index 40ae58e9..122061c5 100644 --- a/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs +++ b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs @@ -5,8 +5,9 @@ use criterion::{criterion_group, criterion_main, Criterion}; use datadog_opentelemetry::core_pub_hack::test_utils::benchmarks::{ memory_allocated_measurement, MeasurementName, ReportingAllocator, }; -use datadog_opentelemetry::sampling::DatadogSampler; +use datadog_opentelemetry::sampling::OtelSamplingData; use datadog_opentelemetry::sampling::SamplingRule; +use datadog_opentelemetry::sampling::{DatadogSampler, SamplingData}; use opentelemetry::{trace::SpanKind, KeyValue, TraceId}; use std::collections::HashMap; use std::hint::black_box; @@ -397,6 +398,14 @@ fn bench_datadog_sampling, - trace_id: TraceId, - span_name: &str, - span_kind: &SpanKind, - attributes: &[KeyValue], - should_keep: Option, -) { - let result = black_box(sampler).sample( - black_box(is_parent_sampled), - black_box(trace_id), - black_box(span_name), - black_box(span_kind), - black_box(attributes), - ); +fn bench_sample(sampler: &DatadogSampler, data: &impl SamplingData, should_keep: Option) { + let result = black_box(sampler).sample(black_box(data)); if let Some(should_keep) = should_keep { - assert_eq!(result.is_keep, should_keep); + assert_eq!(result.get_priority().is_keep(), should_keep); black_box(result); } else { black_box(result); diff --git a/datadog-opentelemetry/benches/otel_sampling_benchmark.rs b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs index 07b56194..b1e92aeb 100644 --- a/datadog-opentelemetry/benches/otel_sampling_benchmark.rs +++ b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs @@ -485,6 +485,7 @@ fn bench_otel_span_sampling, diff --git a/datadog-opentelemetry/src/core/sampling.rs b/datadog-opentelemetry/src/core/sampling.rs index 398a62d0..6532afad 100644 --- a/datadog-opentelemetry/src/core/sampling.rs +++ b/datadog-opentelemetry/src/core/sampling.rs @@ -38,7 +38,25 @@ impl SamplingPriority { self.value } - pub(crate) fn is_keep(&self) -> bool { + /// Returns whether this sampling priority indicates the trace should be kept. + /// + /// # Returns + /// + /// `true` if the priority value is positive (indicating the trace should be kept), + /// `false` otherwise (indicating the trace should be dropped). + /// + /// # Examples + /// + /// ``` + /// use datadog_opentelemetry::core_pub_hack::sampling::priority; + /// + /// assert!(priority::AUTO_KEEP.is_keep()); + /// assert!(priority::USER_KEEP.is_keep()); + /// assert!(!priority::AUTO_REJECT.is_keep()); + /// assert!(!priority::USER_REJECT.is_keep()); + /// ``` + #[inline(always)] + pub fn is_keep(&self) -> bool { self.value > 0 } } diff --git a/datadog-opentelemetry/src/mappings/transform/mod.rs b/datadog-opentelemetry/src/mappings/transform/mod.rs index e4b80081..7b4aa996 100644 --- a/datadog-opentelemetry/src/mappings/transform/mod.rs +++ b/datadog-opentelemetry/src/mappings/transform/mod.rs @@ -212,7 +212,7 @@ fn otel_span_to_dd_span_minimal<'a>( let code: u32 = if let Some(http_status_code) = span.get_attr_num(DATADOG_HTTP_STATUS_CODE) { http_status_code } else { - get_otel_status_code(span) + get_otel_status_code(span).unwrap_or(0) }; if code != 0 { dd_span.meta.insert( diff --git a/datadog-opentelemetry/src/mappings/transform/otel_util.rs b/datadog-opentelemetry/src/mappings/transform/otel_util.rs index e9194088..343cd36f 100644 --- a/datadog-opentelemetry/src/mappings/transform/otel_util.rs +++ b/datadog-opentelemetry/src/mappings/transform/otel_util.rs @@ -198,14 +198,9 @@ pub fn get_otel_resource_v2<'a>(span: &impl OtelSpan<'a>) -> Cow<'a, str> { } // https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/otel_util.go#L571 -pub(crate) fn get_otel_status_code<'a>(span: &impl OtelSpan<'a>) -> u32 { - if let Some(code) = span.get_attr_num(HTTP_RESPONSE_STATUS_CODE) { - return code; - } - if let Some(code) = span.get_attr_num(HTTP_STATUS_CODE) { - return code; - } - 0 +pub(crate) fn get_otel_status_code<'a>(span: &impl OtelSpan<'a>) -> Option { + span.get_attr_num(HTTP_RESPONSE_STATUS_CODE) + .or_else(|| span.get_attr_num(HTTP_STATUS_CODE)) } const SPAN_TYPE_SQL: &str = "sql"; diff --git a/datadog-opentelemetry/src/sampler.rs b/datadog-opentelemetry/src/sampler.rs index 73b7b7ad..67ed2c57 100644 --- a/datadog-opentelemetry/src/sampler.rs +++ b/datadog-opentelemetry/src/sampler.rs @@ -12,7 +12,7 @@ use crate::{ configuration::Config, constants::SAMPLING_DECISION_MAKER_TAG_KEY, sampling::SamplingDecision, }, - sampling::{DatadogSampler, SamplingRule, SamplingRulesCallback}, + sampling::{DatadogSampler, OtelSamplingData, SamplingRule, SamplingRulesCallback}, span_processor::{RegisterTracePropagationResult, TracePropagationData}, text_map_propagator::{self, DatadogExtractData}, TraceRegistry, @@ -99,10 +99,18 @@ impl ShouldSample for Sampler { .filter(|c| !is_parent_deferred && c.has_active_span()) .map(|c| c.span().span_context().trace_flags().is_sampled()); - let result = self - .sampler - .sample(is_parent_sampled, trace_id, name, span_kind, attributes); - let trace_propagation_data = if let Some(trace_root_info) = &result.trace_root_info { + let data = OtelSamplingData::new( + is_parent_sampled, + &trace_id, + name, + span_kind.clone(), + attributes, + self.sampler.resource(), + ); + let result = self.sampler.sample(&data); + let trace_propagation_data = if let Some(trace_root_info) = + result.get_trace_root_sampling_info() + { // If the parent was deferred, we try to merge propagation tags with what we extracted let (mut tags, origin) = if is_parent_deferred { if let Some(DatadogExtractData { @@ -118,7 +126,7 @@ impl ShouldSample for Sampler { } else { (None, None) }; - let mechanism = trace_root_info.mechanism; + let mechanism = trace_root_info.mechanism(); tags.get_or_insert_default().insert( SAMPLING_DECISION_MAKER_TAG_KEY.to_string(), mechanism.to_cow().into_owned(), @@ -126,7 +134,7 @@ impl ShouldSample for Sampler { Some(TracePropagationData { sampling_decision: SamplingDecision { - priority: Some(trace_root_info.priority), + priority: Some(result.get_priority()), mechanism: Some(mechanism), }, origin, @@ -186,8 +194,12 @@ impl ShouldSample for Sampler { } opentelemetry::trace::SamplingResult { - decision: result.to_otel_decision(), - attributes: result.to_dd_sampling_tags(), + decision: crate::sampling::otel_mappings::priority_to_otel_decision( + result.get_priority(), + ), + attributes: result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(), trace_state: parent_context .map(|c| c.span().span_context().trace_state().clone()) .unwrap_or_default(), diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/datadog-opentelemetry/src/sampling/datadog_sampler.rs index f7c65329..a9ed2d2a 100644 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ b/datadog-opentelemetry/src/sampling/datadog_sampler.rs @@ -6,19 +6,13 @@ use crate::core::constants::{ RL_EFFECTIVE_RATE, SAMPLING_AGENT_RATE_TAG_KEY, SAMPLING_DECISION_MAKER_TAG_KEY, SAMPLING_KNUTH_RATE_TAG_KEY, SAMPLING_PRIORITY_TAG_KEY, SAMPLING_RULE_RATE_TAG_KEY, }; -use crate::core::sampling::{mechanism, SamplingMechanism, SamplingPriority}; +use crate::core::sampling::{mechanism, priority, SamplingMechanism, SamplingPriority}; /// Type alias for sampling rules update callback /// Consolidated callback type used across crates for remote config sampling updates pub type SamplingRulesCallback = Box Fn(&'a [SamplingRuleConfig]) + Send + Sync>; -use crate::mappings::{ - get_dd_key_for_otlp_attribute, get_otel_env, get_otel_operation_name_v2, get_otel_resource_v2, - get_otel_service, get_otel_status_code, OtelSpan, -}; -use opentelemetry::trace::SamplingDecision; -use opentelemetry::trace::TraceId; -use opentelemetry::KeyValue; +use crate::sampling::{AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike}; use std::collections::HashMap; use std::sync::{Arc, RwLock}; @@ -26,11 +20,9 @@ use super::agent_service_sampler::{AgentRates, ServicesSampler}; // Import the attr constants use super::constants::pattern::NO_RULE; use super::glob_matcher::GlobMatcher; -use super::otel_mappings::PreSampledSpan; use super::rate_limiter::RateLimiter; use super::rate_sampler::RateSampler; use super::rules_sampler::RulesSampler; -use super::utils; fn matcher_from_rule(rule: &str) -> Option { (rule != NO_RULE).then(|| GlobMatcher::new(rule)) @@ -110,9 +102,9 @@ impl SamplingRule { /// Checks if this rule matches the given span's attributes and name /// The name is derived from the attributes and span kind - fn matches(&self, span: &PreSampledSpan) -> bool { - // Get the operation name from the attributes and span kind - let name: std::borrow::Cow<'_, str> = get_otel_operation_name_v2(span); + fn matches(&self, span: &impl SpanProperties) -> bool { + // Get the operation name from the span + let name = span.operation_name(); // Check name using glob matcher if specified if let Some(ref matcher) = self.name_matcher { @@ -123,21 +115,21 @@ impl SamplingRule { // Check service if specified using glob matcher if let Some(ref matcher) = self.service_matcher { - // Get service directly from the resource - let service_from_resource = get_otel_service(span); + // Get service from the span + let service = span.service(); - // Match against the service from resource - if !matcher.matches(&service_from_resource) { + // Match against the service + if !matcher.matches(&service) { return false; } } // Get the resource string for matching - let resource_str: std::borrow::Cow<'_, str> = get_otel_resource_v2(span); + let resource_str = span.resource(); // Check resource if specified using glob matcher if let Some(ref matcher) = self.resource_matcher { - // Use the resource generated by get_otel_resource_v2 + // Use the resource from the span if !matcher.matches(resource_str.as_ref()) { return false; } @@ -161,10 +153,9 @@ impl SamplingRule { // Logic for other tags: // First, try to match directly with the provided tag key let direct_match = span - .attributes - .iter() - .find(|kv| kv.key.as_str() == rule_tag_key_str) - .and_then(|kv| self.match_attribute_value(&kv.value, matcher)); + .attributes() + .find(|attr| attr.key() == rule_tag_key_str) + .and_then(|attr| self.match_attribute_value(attr.value(), matcher)); if direct_match.unwrap_or(false) { continue; @@ -175,12 +166,13 @@ impl SamplingRule { // is a Datadog key (e.g., "http.method") and the attribute is an // OTel key (e.g., "http.request.method") if rule_tag_key_str.starts_with("http.") { - let tag_match = span.attributes.iter().any(|kv| { - let dd_key_from_otel_attr = get_dd_key_for_otlp_attribute(kv.key.as_str()); - if dd_key_from_otel_attr == rule_tag_key_str { - return self - .match_attribute_value(&kv.value, matcher) - .unwrap_or(false); + let tag_match = span.attributes().any(|attr| { + if let Some(alternate_key) = span.get_alternate_key(attr.key()) { + if alternate_key == rule_tag_key_str { + return self + .match_attribute_value(attr.value(), matcher) + .unwrap_or(false); + } } false }); @@ -206,26 +198,18 @@ impl SamplingRule { fn match_http_status_code_rule( &self, matcher: &GlobMatcher, - span: &PreSampledSpan, + span: &impl SpanProperties, ) -> Option { - let status_code_u32 = get_otel_status_code(span); - if status_code_u32 != 0 { - // Assuming 0 means not found - let status_value = opentelemetry::Value::I64(i64::from(status_code_u32)); + span.status_code().and_then(|status_code| { + let status_value = opentelemetry::Value::I64(i64::from(status_code)); self.match_attribute_value(&status_value, matcher) - } else { - None // Status code not found in attributes - } + }) } // Helper method to match attribute values considering different value types - fn match_attribute_value( - &self, - value: &opentelemetry::Value, - matcher: &GlobMatcher, - ) -> Option { + fn match_attribute_value(&self, value: &impl ValueLike, matcher: &GlobMatcher) -> Option { // Floating point values are handled with special rules - if let Some(float_val) = utils::extract_float_value(value) { + if let Some(float_val) = value.extract_float() { // Check if the float has a non-zero decimal part let has_decimal = float_val != (float_val as i64) as f64; @@ -240,11 +224,13 @@ impl SamplingRule { } // For non-float values, use normal matching - utils::extract_string_value(value).map(|string_value| matcher.matches(&string_value)) + value + .extract_string() + .map(|string_value| matcher.matches(&string_value)) } /// Samples a trace ID using this rule's sample rate - pub fn sample(&self, trace_id: TraceId) -> bool { + pub fn sample(&self, trace_id: &impl TraceIdLike) -> bool { // Delegate to the internal rate sampler's new sample method self.rate_sampler.sample(trace_id) } @@ -303,6 +289,11 @@ impl DatadogSampler { } } + /// Returns a reference to the resource + pub fn resource(&self) -> &RwLock { + self.resource.as_ref() + } + // used for tests #[allow(dead_code)] pub(crate) fn update_service_rates(&self, rates: impl IntoIterator) { @@ -335,17 +326,17 @@ impl DatadogSampler { } /// Computes a key for service-based sampling - fn service_key<'a>(&self, span: &impl OtelSpan<'a>) -> String { - // Get service directly from resource - let service = get_otel_service(span).into_owned(); - // Get env from attributes - let env = get_otel_env(span); + fn service_key(&self, span: &impl SpanProperties) -> String { + // Get service from span + let service = span.service().into_owned(); + // Get env from span + let env = span.env(); format!("service:{service},env:{env}") } /// Finds the highest precedence rule that matches the span - fn find_matching_rule(&self, span: &PreSampledSpan) -> Option { + fn find_matching_rule(&self, span: &impl SpanProperties) -> Option { self.rules.find_matching_rule(|rule| rule.matches(span)) } @@ -372,44 +363,37 @@ impl DatadogSampler { } /// Sample an incoming span based on the parent context and attributes - pub fn sample( - &self, - is_parent_sampled: Option, - trace_id: TraceId, - name: &str, - span_kind: &opentelemetry::trace::SpanKind, - attributes: &[KeyValue], - ) -> DdSamplingResult { - if let Some(is_parent_sampled) = is_parent_sampled { + pub fn sample(&self, data: &impl SamplingData) -> DdSamplingResult { + if let Some(is_parent_sampled) = data.is_parent_sampled() { + let priority = match is_parent_sampled { + false => priority::AUTO_REJECT, + true => priority::AUTO_KEEP, + }; // If a parent exists, inherit its sampling decision and trace state return DdSamplingResult { - is_keep: is_parent_sampled, + priority, trace_root_info: None, }; } // Apply rules-based sampling - self.sample_root(trace_id, name, span_kind, attributes) + data.with_span_properties(self, |sampler, span| sampler.sample_root(data, span)) } /// Sample the root span of a trace fn sample_root( &self, - trace_id: TraceId, - name: &str, - span_kind: &opentelemetry::trace::SpanKind, - attributes: &[KeyValue], + data: &impl SamplingData, + span: &impl SpanProperties, ) -> DdSamplingResult { let mut is_keep = true; let mut used_agent_sampler = false; let sample_rate; let mut rl_effective_rate: Option = None; - - let resource_guard = self.resource.read().unwrap(); - let span = PreSampledSpan::new(name, span_kind.clone(), attributes, &resource_guard); + let trace_id = data.trace_id(); // Find a matching rule - let matching_rule = self.find_matching_rule(&span); + let matching_rule = self.find_matching_rule(span); // Apply sampling logic if let Some(rule) = &matching_rule { @@ -426,7 +410,7 @@ impl DatadogSampler { } } else { // Try service-based sampling from Agent - let service_key = self.service_key(&span); + let service_key = self.service_key(span); if let Some(sampler) = self.service_samplers.get(&service_key) { // Use the service-based sampler used_agent_sampler = true; @@ -447,10 +431,9 @@ impl DatadogSampler { let mechanism = self.get_sampling_mechanism(matching_rule.as_ref(), used_agent_sampler); DdSamplingResult { - is_keep, + priority: mechanism.to_priority(is_keep), trace_root_info: Some(TraceRootSamplingInfo { mechanism, - priority: mechanism.to_priority(is_keep), rate: sample_rate, rl_effective_rate, }), @@ -501,93 +484,115 @@ fn format_sampling_rate(rate: f64) -> Option { }) } -pub struct DdSamplingResult { - pub is_keep: bool, - pub trace_root_info: Option, +pub struct TraceRootSamplingInfo { + mechanism: SamplingMechanism, + rate: f64, + rl_effective_rate: Option, } -pub struct TraceRootSamplingInfo { - pub priority: SamplingPriority, - pub mechanism: SamplingMechanism, - pub rate: f64, - pub rl_effective_rate: Option, +impl TraceRootSamplingInfo { + /// Returns the sampling mechanism used for this trace root + pub fn mechanism(&self) -> SamplingMechanism { + self.mechanism + } + + /// Returns the sample rate used for this trace root + pub fn rate(&self) -> f64 { + self.rate + } + + /// Returns the effective rate limit if rate limiting was applied + pub fn rl_effective_rate(&self) -> Option { + self.rl_effective_rate + } +} + +pub struct DdSamplingResult { + priority: SamplingPriority, + trace_root_info: Option, } impl DdSamplingResult { + #[inline(always)] + pub fn get_priority(&self) -> SamplingPriority { + self.priority + } + + pub fn get_trace_root_sampling_info(&self) -> &Option { + &self.trace_root_info + } + /// Returns Datadog-specific sampling tags to be added as attributes /// /// # Parameters - /// * `decision` - The sampling decision (RecordAndSample or Drop) - /// * `mechanism` - The sampling mechanism used to make the decision - /// * `sample_rate` - The sample rate to use for the decision - /// * `rl_effective_rate` - The effective rate limit if rate limiting was applied + /// * `factory` - The attribute factory to use for creating attributes /// /// # Returns - /// A vector of attributes to add to the sampling result - pub fn to_dd_sampling_tags(&self) -> Vec { - let mut result = Vec::new(); + /// An optional vector of attributes to add to the sampling result + pub fn to_dd_sampling_tags(&self, factory: &F) -> Option> + where + F: crate::sampling::AttributeFactory, + { let Some(root_info) = &self.trace_root_info else { - return result; // No root info, return empty attributes + return None; // No root info, return empty attributes }; + let mut result: Vec; // Add rate limiting tag if applicable - if let Some(limit) = root_info.rl_effective_rate { - result.push(KeyValue::new(RL_EFFECTIVE_RATE, limit)); + if let Some(limit) = root_info.rl_effective_rate() { + result = Vec::with_capacity(4); + result.push(factory.create_f64(RL_EFFECTIVE_RATE, limit)); + } else { + result = Vec::with_capacity(3); } // Add the sampling decision trace tag with the mechanism - let mechanism = root_info.mechanism; - result.push(KeyValue::new( - SAMPLING_DECISION_MAKER_TAG_KEY, - mechanism.to_cow(), - )); + let mechanism = root_info.mechanism(); + result.push(factory.create_string(SAMPLING_DECISION_MAKER_TAG_KEY, mechanism.to_cow())); // Add the sample rate tag with the correct key based on the mechanism match mechanism { mechanism::AGENT_RATE_BY_SERVICE => { - result.push(KeyValue::new(SAMPLING_AGENT_RATE_TAG_KEY, root_info.rate)); - if let Some(rate_str) = format_sampling_rate(root_info.rate) { - result.push(KeyValue::new(SAMPLING_KNUTH_RATE_TAG_KEY, rate_str)); + result.push(factory.create_f64(SAMPLING_AGENT_RATE_TAG_KEY, root_info.rate())); + if let Some(rate_str) = format_sampling_rate(root_info.rate()) { + result.push(factory.create_string( + SAMPLING_KNUTH_RATE_TAG_KEY, + std::borrow::Cow::Owned(rate_str), + )); } } mechanism::REMOTE_USER_TRACE_SAMPLING_RULE | mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE | mechanism::LOCAL_USER_TRACE_SAMPLING_RULE => { - result.push(KeyValue::new(SAMPLING_RULE_RATE_TAG_KEY, root_info.rate)); - if let Some(rate_str) = format_sampling_rate(root_info.rate) { - result.push(KeyValue::new(SAMPLING_KNUTH_RATE_TAG_KEY, rate_str)); + result.push(factory.create_f64(SAMPLING_RULE_RATE_TAG_KEY, root_info.rate())); + if let Some(rate_str) = format_sampling_rate(root_info.rate()) { + result.push(factory.create_string( + SAMPLING_KNUTH_RATE_TAG_KEY, + std::borrow::Cow::Owned(rate_str), + )); } } _ => {} } - let priority = root_info.priority; - result.push(KeyValue::new( - SAMPLING_PRIORITY_TAG_KEY, - priority.into_i8() as i64, - )); - - result - } + let priority = self.priority; + result.push(factory.create_i64(SAMPLING_PRIORITY_TAG_KEY, priority.into_i8() as i64)); - /// Converts the sampling result to a SamplingResult for OpenTelemetry - pub fn to_otel_decision(&self) -> SamplingDecision { - if self.is_keep { - SamplingDecision::RecordAndSample - } else { - SamplingDecision::RecordOnly - } + Some(result) } } #[cfg(test)] mod tests { use super::*; + use crate::mappings::get_otel_operation_name_v2; use crate::sampling::constants::{ attr::{ENV_TAG, RESOURCE_TAG}, pattern, }; - use opentelemetry::{trace::SpanKind, Key, KeyValue, Value}; + use crate::sampling::otel_mappings::{OtelSamplingData, PreSampledSpan}; + use opentelemetry::trace::{SpanKind, TraceId}; + use opentelemetry::{Key, KeyValue, Value}; use opentelemetry_sdk::Resource as SdkResource; use opentelemetry_semantic_conventions::{ attribute::{ @@ -632,6 +637,25 @@ mod tests { ] } + // Helper function to create SamplingData for testing + fn create_sampling_data<'a>( + is_parent_sampled: Option, + trace_id: &'a TraceId, + name: &'a str, + span_kind: SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, + ) -> OtelSamplingData<'a> { + OtelSamplingData::new( + is_parent_sampled, + trace_id, + name, + span_kind, + attributes, + resource, + ) + } + #[test] fn test_sampling_rule_creation() { let rule = SamplingRule::new( @@ -741,10 +765,10 @@ mod tests { let trace_id = create_trace_id(); // Rule with rate 1.0 should always sample - assert!(rule_always.sample(trace_id)); + assert!(rule_always.sample(&trace_id)); // Rule with rate 0.0 should never sample - assert!(!rule_never.sample(trace_id)); + assert!(!rule_never.sample(&trace_id)); } #[test] @@ -970,16 +994,17 @@ mod tests { let is_sampled = true; let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; let sampling_result = DdSamplingResult { - is_keep: true, + priority: mechanism.to_priority(is_sampled), trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), mechanism, rate: 0.5, rl_effective_rate: None, }), }; - let attrs = sampling_result.to_dd_sampling_tags(); + let attrs = sampling_result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(); // Verify the number of attributes (decision_maker + priority + rule_rate + ksr) assert_eq!(attrs.len(), 4); @@ -1041,15 +1066,16 @@ mod tests { let is_sampled = false; let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; let sampling_result = DdSamplingResult { - is_keep: false, + priority: mechanism.to_priority(is_sampled), trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), mechanism, rate: 0.5, rl_effective_rate: Some(rate_limit), }), }; - let attrs_with_limit = sampling_result.to_dd_sampling_tags(); + let attrs_with_limit = sampling_result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(); // With rate limiting, there should be one more attribute assert_eq!(attrs_with_limit.len(), 5); @@ -1076,16 +1102,17 @@ mod tests { let is_sampled = false; let mechanism = mechanism::AGENT_RATE_BY_SERVICE; let sampling_result = DdSamplingResult { - is_keep: false, + priority: mechanism.to_priority(is_sampled), trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), mechanism, rate: agent_rate, rl_effective_rate: None, }), }; - let agent_attrs = sampling_result.to_dd_sampling_tags(); + let agent_attrs = sampling_result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(); // Verify the number of attributes (should be 4: decision_maker + priority + // agent_rate + ksr) @@ -1175,39 +1202,42 @@ mod tests { // Create empty slices for attributes and links let empty_attrs: &[KeyValue] = &[]; + let trace_id = create_trace_id(); + let span_kind = SpanKind::Client; // Test with sampled parent context - // let parent_sampled = create_parent_context(true); - let result_sampled = sampler.sample( + let data_sampled = create_sampling_data( Some(true), - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind.clone(), empty_attrs, + sampler.resource.as_ref(), ); + let result_sampled = sampler.sample(&data_sampled); // Should inherit the sampling decision from parent - assert_eq!( - result_sampled.to_otel_decision(), - SamplingDecision::RecordAndSample - ); - assert!(result_sampled.to_dd_sampling_tags().is_empty()); + assert!(result_sampled.get_priority().is_keep()); + assert!(result_sampled + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_none()); // Test with non-sampled parent context - let result_not_sampled = sampler.sample( + let data_not_sampled = create_sampling_data( Some(false), - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind, empty_attrs, + sampler.resource.as_ref(), ); + let result_not_sampled = sampler.sample(&data_not_sampled); // Should inherit the sampling decision from parent - assert_eq!( - result_not_sampled.to_otel_decision(), - SamplingDecision::RecordOnly - ); - assert!(result_not_sampled.to_dd_sampling_tags().is_empty()); + assert!(!result_not_sampled.get_priority().is_keep()); + assert!(result_not_sampled + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_none()); } #[test] @@ -1224,36 +1254,44 @@ mod tests { let sampler = DatadogSampler::new(vec![rule], 100, create_empty_resource_arc()); + let trace_id = create_trace_id(); + let span_kind = SpanKind::Client; + // Test with matching attributes let attrs = create_attributes("resource", "prod"); - let result = sampler.sample( + let data = create_sampling_data( None, - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind.clone(), attrs.as_slice(), + sampler.resource.as_ref(), ); + let result = sampler.sample(&data); // Should sample and add attributes - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - assert!(!result.to_dd_sampling_tags().is_empty()); + assert!(result.get_priority().is_keep()); + assert!(result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_some()); // Test with non-matching attributes let attrs_no_match = create_attributes("other-resource", "prod"); - let result_no_match = sampler.sample( + let data_no_match = create_sampling_data( None, - create_trace_id(), + &trace_id, "span", - &SpanKind::Client, + span_kind, attrs_no_match.as_slice(), + sampler.resource.as_ref(), ); + let result_no_match = sampler.sample(&data_no_match); // Should still sample (default behavior when no rules match) and add attributes - assert_eq!( - result_no_match.to_otel_decision(), - SamplingDecision::RecordAndSample - ); - assert!(!result_no_match.to_dd_sampling_tags().is_empty()); + assert!(result_no_match.get_priority().is_keep()); + assert!(result_no_match + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .is_some()); } #[test] @@ -1270,21 +1308,25 @@ mod tests { sampler.update_service_rates(rates); + let trace_id = create_trace_id(); + let span_kind = SpanKind::Client; + // Test with attributes that should lead to "service:test-service,env:prod" key // Sampler's resource is already for "test-service" let attrs_sample = create_attributes("any_resource_name_matching_env", "prod"); - let result_sample = sampler.sample( + let data_sample = create_sampling_data( None, - create_trace_id(), + &trace_id, "span_for_test_service", - &SpanKind::Client, + span_kind.clone(), attrs_sample.as_slice(), + sampler.resource.as_ref(), ); + let result_sample = sampler.sample(&data_sample); // Expect RecordAndSample because service_key will be "service:test-service,env:prod" -> // rate 1.0 - assert_eq!( - result_sample.to_otel_decision(), - SamplingDecision::RecordAndSample, + assert!( + result_sample.get_priority().is_keep(), "Span for test-service/prod should be sampled" ); @@ -1292,17 +1334,18 @@ mod tests { // Update sampler's resource to be "other-service" sampler.resource = create_resource("other-service".to_string()); let attrs_no_sample = create_attributes("any_resource_name_matching_env", "prod"); - let result_no_sample = sampler.sample( + let data_no_sample = create_sampling_data( None, - create_trace_id(), + &trace_id, "span_for_other_service", - &SpanKind::Client, + span_kind, attrs_no_sample.as_slice(), + sampler.resource.as_ref(), ); + let result_no_sample = sampler.sample(&data_no_sample); // Expect Drop because service_key will be "service:other-service,env:prod" -> rate 0.0 - assert_eq!( - result_no_sample.to_otel_decision(), - SamplingDecision::RecordOnly, + assert!( + !result_no_sample.get_priority().is_keep(), "Span for other-service/prod should be dropped" ); } @@ -1332,12 +1375,14 @@ mod tests { // Should match integer float let integer_float_attrs = create_attributes_with_float("float_tag", 42.0); - assert!(rule_integer.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, integer_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(rule_integer.matches(&span)); // Test case 2: Rule with wildcard pattern and non-integer float let rule_wildcard = SamplingRule::new( @@ -1351,12 +1396,14 @@ mod tests { // Should match non-integer float with wildcard pattern let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - assert!(rule_wildcard.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(rule_wildcard.matches(&span)); // Test case 3: Rule with specific pattern and non-integer float // With our simplified logic, non-integer floats will never match non-wildcard patterns @@ -1374,12 +1421,14 @@ mod tests { // Should NOT match the exact decimal value because non-integer floats only match wildcards let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - assert!(!rule_specific.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule_specific.matches(&span)); // Test case 4: Pattern with partial wildcard '*' for suffix let rule_prefix = SamplingRule::new( 0.5, @@ -1395,12 +1444,14 @@ mod tests { // Should NOT match decimal values as we don't do partial pattern matching for non-integer // floats - assert!(!rule_prefix.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule_prefix.matches(&span)); } #[test] @@ -1422,30 +1473,36 @@ mod tests { let otel_attrs = vec![KeyValue::new("http.response.status_code", 500)]; // The rule should match because both use the same OpenTelemetry attribute name - assert!(rule.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, otel_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(rule.matches(&span)); // Attributes that don't match the value pattern shouldn't match let non_matching_attrs = vec![KeyValue::new("http.response.status_code", 200)]; - assert!(!rule.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, non_matching_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule.matches(&span)); // Attributes that have no mapping to the rule tag shouldn't match let unrelated_attrs = vec![KeyValue::new("unrelated.attribute", "value")]; - assert!(!rule.matches(&PreSampledSpan::new( + let resource = create_empty_resource(); + let span = PreSampledSpan::new( "test-span", SpanKind::Client, unrelated_attrs.as_slice(), - &create_empty_resource() - ))); + &resource, + ); + assert!(!rule.matches(&span)); } #[test] @@ -1469,12 +1526,9 @@ mod tests { ]; // The rule should match because all three criteria are satisfied through mapping - assert!(rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &mixed_attrs, - &create_empty_resource() - ),)); + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &mixed_attrs, &resource); + assert!(rule.matches(&span)); // If any criteria is not met, the rule shouldn't match let missing_method = vec![ @@ -1483,12 +1537,9 @@ mod tests { KeyValue::new("url.full", "https://example.com/api/v1/resource"), ]; - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &missing_method, - &create_empty_resource() - ),)); + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &missing_method, &resource); + assert!(!rule.matches(&span)); // Wrong value should also not match let wrong_method = vec![ @@ -1497,12 +1548,9 @@ mod tests { KeyValue::new("url.full", "https://example.com/api/v1/resource"), ]; - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &wrong_method, - &create_empty_resource() - ),)); + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &wrong_method, &resource); + assert!(!rule.matches(&span)); } #[test] @@ -1529,34 +1577,37 @@ mod tests { KeyValue::new(otel_response_status_key_str, 503), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", span_kind_client, &mixed_attrs_match, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should match span with otel_response_status_key (503) and custom.tag"); + &empty_resource, + ); + assert!(rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should match span with otel_response_status_key (503) and custom.tag"); // Case 2: Datadog convention for status code (503 matches "5*") + Direct custom.tag match let dd_attrs_match = vec![ KeyValue::new(dd_status_key_str, 503), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &dd_attrs_match, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should match span with dd_status_key (503) and custom.tag"); + &empty_resource, + ); + assert!(rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should match span with dd_status_key (503) and custom.tag"); // Case 3: Missing the custom tag should fail (status code would match) let missing_custom_tag_attrs = vec![KeyValue::new(otel_response_status_key_str, 503)]; + let span = PreSampledSpan::new( + "test-span", + SpanKind::Client, + &missing_custom_tag_attrs, + &empty_resource, + ); assert!( - !rule1.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &missing_custom_tag_attrs, - &empty_resource - )), + !rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span missing custom.tag" ); @@ -1565,24 +1616,26 @@ mod tests { KeyValue::new(otel_response_status_key_str, 200), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(!rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &non_matching_otel_status_attrs, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should NOT match span with non-matching otel_response_status_key (200)"); + &empty_resource, + ); + assert!(!rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span with non-matching otel_response_status_key (200)"); // Case 5: No recognizable status code + custom.tag present let no_status_code_attrs = vec![ KeyValue::new("another.tag", "irrelevant"), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(!rule1.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &no_status_code_attrs, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should NOT match span with no status code attribute"); + &empty_resource, + ); + assert!(!rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span with no status code attribute"); // Case 6: Rule uses OTel key http.response.status_code directly, span has matching OTel // key. @@ -1595,12 +1648,13 @@ mod tests { KeyValue::new(otel_response_status_key_str, 200), KeyValue::new(custom_tag_key, custom_tag_value), ]; - assert!(rule2.matches(&PreSampledSpan::new( + let span = PreSampledSpan::new( "test-span", SpanKind::Client, &otel_key_rule_match_attrs, - &empty_resource - )), "Rule with otel_response_status_key (200) and custom.tag should match span with otel_response_status_key (200) and custom.tag"); + &empty_resource, + ); + assert!(rule2.matches(&span), "Rule with otel_response_status_key (200) and custom.tag should match span with otel_response_status_key (200) and custom.tag"); } #[test] @@ -1653,27 +1707,26 @@ mod tests { let empty_resource: SdkResource = create_empty_resource(); // Print the operation name that will be generated - let http_client_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Client, - &http_client_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Client, &http_client_attrs, &empty_resource); + let http_client_op_name = get_otel_operation_name_v2(&span); assert_eq!( http_client_op_name, "http.client.request", "HTTP client operation name should be correct" ); - let result = sampler.sample( + let span_kind_client = SpanKind::Client; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Client, + span_kind_client.clone(), &http_client_attrs, + sampler.resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the http_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 2. HTTP server request let http_server_attrs = vec![KeyValue::new( @@ -1682,27 +1735,26 @@ mod tests { )]; // Print the operation name that will be generated - let http_server_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Server, - &http_server_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Server, &http_server_attrs, &empty_resource); + let http_server_op_name = get_otel_operation_name_v2(&span); assert_eq!( http_server_op_name, "http.server.request", "HTTP server operation name should be correct" ); - let result = sampler.sample( + let span_kind_server = SpanKind::Server; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Server, + span_kind_server.clone(), &http_server_attrs, + sampler.resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the http_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 3. Database query let db_attrs = vec![KeyValue::new( @@ -1711,27 +1763,25 @@ mod tests { )]; // Print the operation name that will be generated - let db_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Client, - &db_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Client, &db_attrs, &empty_resource); + let db_op_name = get_otel_operation_name_v2(&span); assert_eq!( db_op_name, "postgresql.query", "Database operation name should be correct" ); - let result = sampler.sample( + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Client, // DB queries use client span kind + span_kind_client, // DB queries use client span kind &db_attrs, + sampler.resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the db_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 4. Messaging operation let messaging_attrs = vec![ @@ -1746,53 +1796,51 @@ mod tests { ]; // Print the operation name that will be generated - let messaging_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Consumer, - &messaging_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Consumer, &messaging_attrs, &empty_resource); + let messaging_op_name = get_otel_operation_name_v2(&span); assert_eq!( messaging_op_name, "kafka.process", "Messaging operation name should be correct" ); - let result = sampler.sample( + let span_kind_consumer = SpanKind::Consumer; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Consumer, // Messaging uses consumer span kind + span_kind_consumer, // Messaging uses consumer span kind &messaging_attrs, + sampler.resource.as_ref(), ); + let result = sampler.sample(&data); // Should be sampled due to matching the messaging_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 5. Generic internal span (should not match any rules) let internal_attrs = vec![KeyValue::new("custom.tag", "value")]; // Print the operation name that will be generated - let internal_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Internal, - &internal_attrs, - &empty_resource, - )); + let span = PreSampledSpan::new("", SpanKind::Internal, &internal_attrs, &empty_resource); + let internal_op_name = get_otel_operation_name_v2(&span); assert_eq!( internal_op_name, "internal", "Internal operation name should be the span kind" ); - let result = sampler.sample( + let span_kind_internal = SpanKind::Internal; + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Internal, + span_kind_internal, &internal_attrs, + sampler.resource.as_ref(), ); + let result = sampler.sample(&data); // Should still be sampled (default behavior when no rules match) - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); // 6. Server with protocol but no HTTP method let server_protocol_attrs = vec![KeyValue::new( @@ -1801,28 +1849,31 @@ mod tests { )]; // Print the operation name that will be generated - let server_protocol_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( + let span = PreSampledSpan::new( "", SpanKind::Server, &server_protocol_attrs, &empty_resource, - )); + ); + let server_protocol_op_name = get_otel_operation_name_v2(&span); assert_eq!( server_protocol_op_name, "http.server.request", "Server with protocol operation name should use protocol" ); - let result = sampler.sample( + let data = create_sampling_data( None, - trace_id, + &trace_id, "test-span", - &SpanKind::Server, + span_kind_server, &server_protocol_attrs, + sampler.resource.as_ref(), ); + let result = sampler.sample(&data); // Should not match our http rule since operation name would be "http.server.request" // But should still be sampled (default behavior) - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); + assert!(result.get_priority().is_keep()); } #[test] diff --git a/datadog-opentelemetry/src/sampling/mod.rs b/datadog-opentelemetry/src/sampling/mod.rs index 2f995a30..1dd05d7d 100644 --- a/datadog-opentelemetry/src/sampling/mod.rs +++ b/datadog-opentelemetry/src/sampling/mod.rs @@ -11,7 +11,12 @@ pub(crate) mod otel_mappings; pub(crate) mod rate_limiter; pub(crate) mod rate_sampler; pub(crate) mod rules_sampler; +mod types; pub(crate) mod utils; // Re-export key public types pub use datadog_sampler::{DatadogSampler, SamplingRule, SamplingRulesCallback}; +pub use otel_mappings::{OtelAttributeFactory, OtelSamplingData}; +pub use types::{ + AttributeFactory, AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike, +}; diff --git a/datadog-opentelemetry/src/sampling/otel_mappings.rs b/datadog-opentelemetry/src/sampling/otel_mappings.rs index 95d763b7..5ccd5e2c 100644 --- a/datadog-opentelemetry/src/sampling/otel_mappings.rs +++ b/datadog-opentelemetry/src/sampling/otel_mappings.rs @@ -2,11 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 use std::borrow::Cow; +use std::sync::RwLock; -use crate::mappings::{AttributeIndices, AttributeKey, OtelSpan}; -use opentelemetry::Key; +use crate::mappings::{ + get_dd_key_for_otlp_attribute, get_otel_env, get_otel_operation_name_v2, get_otel_resource_v2, + get_otel_service, get_otel_status_code, AttributeIndices, AttributeKey, OtelSpan, +}; +use crate::sampling::{SamplingData, SpanProperties}; +use opentelemetry::{Key, KeyValue}; -pub(crate) struct PreSampledSpan<'a> { +pub struct PreSampledSpan<'a> { pub name: &'a str, pub span_kind: opentelemetry::trace::SpanKind, pub attributes: &'a [opentelemetry::KeyValue], @@ -73,3 +78,170 @@ impl<'a> OtelSpan<'a> for PreSampledSpan<'a> { self.resource.len() } } + +impl SpanProperties for PreSampledSpan<'_> { + type Attribute = opentelemetry::KeyValue; + type AttributesIter<'b> + = std::slice::Iter<'b, opentelemetry::KeyValue> + where + Self: 'b; + + fn operation_name(&self) -> Cow<'_, str> { + get_otel_operation_name_v2(self) + } + + fn service(&self) -> Cow<'_, str> { + get_otel_service(self) + } + + fn env(&self) -> Cow<'_, str> { + get_otel_env(self) + } + + fn resource(&self) -> Cow<'_, str> { + get_otel_resource_v2(self) + } + + fn status_code(&self) -> Option { + get_otel_status_code(self) + } + + fn attributes(&self) -> Self::AttributesIter<'_> { + self.attributes.iter() + } + + fn get_alternate_key<'b>(&self, key: &'b str) -> Option> { + let mapped = get_dd_key_for_otlp_attribute(key); + // If the mapping returned an empty string or the same key, there's no alternate + if mapped.is_empty() || mapped.as_ref() == key { + None + } else { + Some(mapped) + } + } +} + +/// OpenTelemetry Sampling Data implementation. +/// +/// Provides the necessary data for making sampling decisions on OpenTelemetry spans. +/// This struct contains references to span metadata including the trace ID, span name, +/// span kind, attributes, and resource information. +pub struct OtelSamplingData<'a> { + is_parent_sampled: Option, + trace_id: &'a opentelemetry::trace::TraceId, + name: &'a str, + span_kind: opentelemetry::trace::SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, +} + +impl<'a> OtelSamplingData<'a> { + /// Creates a new OpenTelemetry sampling data instance. + /// + /// # Arguments + /// + /// * `is_parent_sampled` - Whether the parent span was sampled, if known + /// * `trace_id` - The trace ID for this span + /// * `name` - The span name + /// * `span_kind` - The kind of span (e.g., Server, Client) + /// * `attributes` - The span's attributes + /// * `resource` - The OpenTelemetry resource containing service metadata + pub fn new( + is_parent_sampled: Option, + trace_id: &'a opentelemetry::trace::TraceId, + name: &'a str, + span_kind: opentelemetry::trace::SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, + ) -> Self { + Self { + is_parent_sampled, + trace_id, + name, + span_kind, + attributes, + resource, + } + } +} + +impl SamplingData for OtelSamplingData<'_> { + type TraceId = opentelemetry::trace::TraceId; + type Properties<'b> + = PreSampledSpan<'b> + where + Self: 'b; + + fn is_parent_sampled(&self) -> Option { + self.is_parent_sampled + } + fn trace_id(&self) -> &Self::TraceId { + self.trace_id + } + + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: for<'b> Fn(&S, &PreSampledSpan<'b>) -> T, + { + let resource_guard = self.resource.read().unwrap(); + let span = PreSampledSpan::new( + self.name, + self.span_kind.clone(), + self.attributes, + &resource_guard, + ); + f(s, &span) + } +} + +impl crate::sampling::TraceIdLike for opentelemetry::trace::TraceId { + type Item = opentelemetry::trace::TraceId; + + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.to_bytes()) + } + + fn inner(&self) -> &Self::Item { + self + } +} + +/// Factory for creating OpenTelemetry KeyValue attributes. +pub struct OtelAttributeFactory; + +impl crate::sampling::AttributeFactory for OtelAttributeFactory { + type Attribute = opentelemetry::KeyValue; + + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } + + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } + + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } +} + +/// Converts a Datadog sampling priority to an OpenTelemetry sampling decision. +/// +/// # Arguments +/// +/// * `priority` - The Datadog sampling priority +/// +/// # Returns +/// +/// The corresponding OpenTelemetry sampling decision: +/// - `RecordAndSample` if the priority indicates the trace should be kept +/// - `RecordOnly` if the priority indicates the trace should be dropped +pub(crate) fn priority_to_otel_decision( + priority: crate::core::sampling::SamplingPriority, +) -> opentelemetry::trace::SamplingDecision { + if priority.is_keep() { + opentelemetry::trace::SamplingDecision::RecordAndSample + } else { + opentelemetry::trace::SamplingDecision::RecordOnly + } +} diff --git a/datadog-opentelemetry/src/sampling/rate_sampler.rs b/datadog-opentelemetry/src/sampling/rate_sampler.rs index ba6d77de..97a2304b 100644 --- a/datadog-opentelemetry/src/sampling/rate_sampler.rs +++ b/datadog-opentelemetry/src/sampling/rate_sampler.rs @@ -1,11 +1,10 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use opentelemetry::trace::TraceId; -use std::fmt; - use super::constants::{numeric, rate}; +use crate::sampling::TraceIdLike; use numeric::{KNUTH_FACTOR, MAX_UINT_64BITS}; +use std::fmt; /// Keeps (100 * `sample_rate`)% of the traces randomly. #[derive(Clone)] @@ -50,7 +49,7 @@ impl RateSampler { /// Determines if a trace should be sampled based on its trace_id and the configured rate. /// Returns true if the trace should be kept, false otherwise. - pub fn sample(&self, trace_id: TraceId) -> bool { + pub fn sample(&self, trace_id: &T) -> bool { // Fast-path for sample rate of 0.0 (always drop) or 1.0 (always sample) if self.sample_rate <= rate::MIN_SAMPLE_RATE { return false; @@ -60,8 +59,7 @@ impl RateSampler { } // Convert trace_id to u128 and then cast to u64 to get the lower 64 bits - let trace_id_u128 = u128::from_be_bytes(trace_id.to_bytes()); - let trace_id_64bits = trace_id_u128 as u64; + let trace_id_64bits = trace_id.to_u128() as u64; let hashed_id = trace_id_64bits.wrapping_mul(KNUTH_FACTOR); @@ -124,7 +122,7 @@ mod tests { bytes_zero[15] = 1; // Example ID let trace_id_zero = TraceId::from_bytes(bytes_zero); assert!( - !sampler_zero.sample(trace_id_zero), + !sampler_zero.sample(&trace_id_zero), "sampler_zero should return false" ); @@ -134,7 +132,7 @@ mod tests { bytes_one[15] = 2; // Example ID let trace_id_one = TraceId::from_bytes(bytes_one); assert!( - sampler_one.sample(trace_id_one), + sampler_one.sample(&trace_id_one), "sampler_one should return true" ); @@ -149,7 +147,7 @@ mod tests { let sample_hash = sample_u64.wrapping_mul(KNUTH_FACTOR); assert!(sample_hash <= threshold); assert!( - sampler_half.sample(trace_id_sample), + sampler_half.sample(&trace_id_sample), "sampler_half should sample trace_id_sample" ); @@ -165,7 +163,7 @@ mod tests { "Drop hash {drop_hash} should be > threshold {threshold}", ); assert!( - !sampler_half.sample(trace_id_drop), + !sampler_half.sample(&trace_id_drop), "sampler_half should drop trace_id_drop" ); } @@ -177,7 +175,7 @@ mod tests { let bytes_to_sample = [0u8; 16]; let trace_id_to_sample = TraceId::from_bytes(bytes_to_sample); assert!( - sampler_half.sample(trace_id_to_sample), + sampler_half.sample(&trace_id_to_sample), "Sampler with 0.5 rate should sample trace ID 0" ); } diff --git a/datadog-opentelemetry/src/sampling/rules_sampler.rs b/datadog-opentelemetry/src/sampling/rules_sampler.rs index e679a09e..7e412e9d 100644 --- a/datadog-opentelemetry/src/sampling/rules_sampler.rs +++ b/datadog-opentelemetry/src/sampling/rules_sampler.rs @@ -24,9 +24,9 @@ impl RulesSampler { } /// Finds the first matching rule for a span - pub fn find_matching_rule(&self, matcher: F) -> Option + pub fn find_matching_rule(&self, mut matcher: F) -> Option where - F: Fn(&SamplingRule) -> bool, + F: FnMut(&SamplingRule) -> bool, { self.inner .read() diff --git a/datadog-opentelemetry/src/sampling/types.rs b/datadog-opentelemetry/src/sampling/types.rs new file mode 100644 index 00000000..3e7e164a --- /dev/null +++ b/datadog-opentelemetry/src/sampling/types.rs @@ -0,0 +1,207 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Type definitions and traits for sampling + +use std::borrow::Cow; + +/// A trait for converting trace IDs to a numeric representation. +/// +/// Provides a common interface for converting trace IDs from different tracing systems +/// into a 128-bit unsigned integer for use in hash-based operations. +/// +/// # Examples +/// +/// ```ignore +/// use opentelemetry::trace::TraceId; +/// use datadog_opentelemetry::sampling::TraceIdLike; +/// +/// #[derive(Clone, PartialEq, Eq)] +/// struct MyTraceId(TraceId); +/// +/// impl TraceIdLike for MyTraceId { +/// type Item = TraceId; +/// +/// fn to_u128(&self) -> u128 { +/// u128::from_be_bytes(self.0.to_bytes()) +/// } +/// +/// fn inner(&self) -> &Self::Item { +/// &self.0 +/// } +/// } +/// ``` +pub trait TraceIdLike { + /// The underlying trace ID type. + type Item: PartialEq + Eq; + + /// Converts the trace ID to a 128-bit unsigned integer. + /// + /// The conversion should be deterministic: the same trace ID must always produce + /// the same `u128` value. Typically implemented by interpreting the trace ID's + /// bytes as a big-endian integer. + fn to_u128(&self) -> u128; + + /// Returns a reference to the underlying trace ID. + /// + /// Used internally for trait object equality comparisons. + fn inner(&self) -> &Self::Item; +} + +impl PartialEq for dyn TraceIdLike { + fn eq(&self, other: &Self) -> bool { + self.inner() == other.inner() + } +} + +/// A trait for accessing span attribute key-value pairs. +/// +/// Provides methods for retrieving the key and value of a span attribute. +pub trait AttributeLike { + /// The type of the value that implements `ValueLike`. + type Value: ValueLike; + + /// Returns the attribute key as a string. + fn key(&self) -> &str; + + /// Returns a reference to the attribute value. + fn value(&self) -> &Self::Value; +} + +/// A trait for extracting typed values from attribute values. +/// +/// Provides methods for converting attribute values to common types used in sampling logic. +pub trait ValueLike { + /// Extracts a float value if the value can be represented as `f64`. + /// + /// Returns `Some(f64)` for numeric types, `None` otherwise. + fn extract_float(&self) -> Option; + + /// Extracts a string representation of the value. + /// + /// Returns `Some(Cow)` for types that can be converted to strings, `None` otherwise. + fn extract_string(&self) -> Option>; +} + +impl AttributeLike for opentelemetry::KeyValue { + type Value = opentelemetry::Value; + + fn key(&self) -> &str { + self.key.as_str() + } + + fn value(&self) -> &Self::Value { + &self.value + } +} + +impl ValueLike for opentelemetry::Value { + fn extract_float(&self) -> Option { + crate::sampling::utils::extract_float_value(self) + } + + fn extract_string(&self) -> Option> { + crate::sampling::utils::extract_string_value(self) + } +} + +/// A trait for creating sampling attributes. +/// +/// This trait abstracts the creation of attributes for sampling tags, +/// allowing different implementations for different attribute types. +pub trait AttributeFactory { + /// The type of attribute created by this factory. + type Attribute: Sized; + + /// Creates an attribute with an i64 value. + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute; + + /// Creates an attribute with an f64 value. + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute; + + /// Creates an attribute with a string value. + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute; +} + +/// A trait for accessing span properties needed for sampling decisions. +/// +/// Provides methods for retrieving span metadata like operation name, service, environment, +/// resource name, and status codes used by sampling rules. +pub trait SpanProperties { + /// The type of attribute that implements `AttributeLike`. + type Attribute: AttributeLike; + + /// The type of iterator over span attributes. + type AttributesIter<'a>: Iterator + where + Self: 'a; + + /// Returns the operation name for the span. + /// + /// The operation name is derived from span attributes and kind according to + /// OpenTelemetry semantic conventions. + fn operation_name(&self) -> Cow<'_, str>; + + /// Returns the service name for the span. + /// + /// The service name is extracted from resource attributes. + fn service(&self) -> Cow<'_, str>; + + /// Returns the environment name for the span. + /// + /// The environment is extracted from span or resource attributes. + fn env(&self) -> Cow<'_, str>; + + /// Returns the resource name for the span. + /// + /// The resource name is derived from span attributes and kind. + fn resource(&self) -> Cow<'_, str>; + + /// Returns the HTTP status code if present. + /// + /// Returns `None` if the span does not have an HTTP status code attribute. + fn status_code(&self) -> Option; + + /// Returns an iterator over span attributes. + fn attributes(&self) -> Self::AttributesIter<'_>; + + /// Returns an alternate key for the given attribute key. + /// + /// This is used for mapping between different attribute naming conventions + /// (e.g., OpenTelemetry to Datadog). Returns `Some(alternate_key)` if a mapping exists, + /// or `None` if the attribute key has no alternate mapping. + fn get_alternate_key<'b>(&self, key: &'b str) -> Option>; +} + +/// A trait for accessing sampling data, combining trace ID and span properties. +/// +/// This trait provides unified access to both the trace ID and span properties +/// needed for making sampling decisions. +pub trait SamplingData { + /// The type that implements `TraceIdLike`. + type TraceId: TraceIdLike; + + /// The type that implements `SpanProperties`. + type Properties<'a>: SpanProperties + where + Self: 'a; + + /// Returns whether the parent span was sampled. + /// + /// Returns: + /// - `Some(true)` if the parent span was sampled + /// - `Some(false)` if the parent span was not sampled + /// - `None` if there is no parent sampling information + fn is_parent_sampled(&self) -> Option; + + /// Returns a reference to the trace ID. + fn trace_id(&self) -> &Self::TraceId; + + /// Returns the span properties via a callback. + /// + /// This method constructs the span properties and passes them to the provided + /// callback function. The properties are only valid for the duration of the callback. + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: Fn(&S, &Self::Properties<'_>) -> T; +} From 81c1a70d241f4c13af8e3d6472436ad7cf547361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Fri, 13 Feb 2026 14:40:28 +0100 Subject: [PATCH 3/8] refactor(sampling): remove resource from datadog_sampler --- .../benches/datadog_sampling_benchmark.rs | 5 +- datadog-opentelemetry/src/sampler.rs | 6 +- .../src/sampling/datadog_sampler.rs | 112 +++++++----------- 3 files changed, 52 insertions(+), 71 deletions(-) diff --git a/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs index 122061c5..21e77183 100644 --- a/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs +++ b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs @@ -397,14 +397,15 @@ fn bench_datadog_sampling>, trace_registry: Option, cfg: Arc, } @@ -45,10 +46,11 @@ impl Sampler { trace_registry: Option, ) -> Self { let rules = SamplingRule::from_configs(cfg.trace_sampling_rules().to_vec()); - let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit(), resource); + let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit()); Self { cfg, sampler, + resource, trace_registry, } } @@ -105,7 +107,7 @@ impl ShouldSample for Sampler { name, span_kind.clone(), attributes, - self.sampler.resource(), + self.resource.as_ref(), ); let result = self.sampler.sample(&data); let trace_propagation_data = if let Some(trace_root_info) = diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/datadog-opentelemetry/src/sampling/datadog_sampler.rs index a9ed2d2a..143a569d 100644 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ b/datadog-opentelemetry/src/sampling/datadog_sampler.rs @@ -14,7 +14,6 @@ pub type SamplingRulesCallback = Box Fn(&'a [SamplingRuleConfig]) + use crate::sampling::{AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike}; use std::collections::HashMap; -use std::sync::{Arc, RwLock}; use super::agent_service_sampler::{AgentRates, ServicesSampler}; // Import the attr constants @@ -266,18 +265,11 @@ pub struct DatadogSampler { /// Rate limiter for limiting the number of spans per second rate_limiter: RateLimiter, - - /// Resource with service information, wrapped in Arc> for sharing - resource: Arc>, } impl DatadogSampler { /// Creates a new DatadogSampler with the given rules - pub fn new( - rules: Vec, - rate_limit: i32, - resource: Arc>, - ) -> Self { + pub fn new(rules: Vec, rate_limit: i32) -> Self { // Create rate limiter with default value of 100 if not provided let limiter = RateLimiter::new(rate_limit, None); @@ -285,15 +277,9 @@ impl DatadogSampler { rules: RulesSampler::new(rules), service_samplers: ServicesSampler::default(), rate_limiter: limiter, - resource, } } - /// Returns a reference to the resource - pub fn resource(&self) -> &RwLock { - self.resource.as_ref() - } - // used for tests #[allow(dead_code)] pub(crate) fn update_service_rates(&self, rates: impl IntoIterator) { @@ -601,6 +587,7 @@ mod tests { resource::SERVICE_NAME, trace::{HTTP_RESPONSE_STATUS_CODE, NETWORK_PROTOCOL_NAME}, }; + use std::sync::{Arc, RwLock}; fn create_empty_resource() -> opentelemetry_sdk::Resource { opentelemetry_sdk::Resource::builder_empty().build() @@ -774,32 +761,32 @@ mod tests { #[test] fn test_datadog_sampler_creation() { // Create a sampler with default config - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); assert!(sampler.rules.is_empty()); assert!(sampler.service_samplers.is_empty()); // Create a sampler with rules let rule = SamplingRule::new(0.5, None, None, None, None, None); - let sampler_with_rules = DatadogSampler::new(vec![rule], 200, create_empty_resource_arc()); + let sampler_with_rules = DatadogSampler::new(vec![rule], 200); assert_eq!(sampler_with_rules.rules.len(), 1); } #[test] fn test_service_key_generation() { - // Use create_resource to initialize the sampler with a service name in its resource + // Create resource with test service name let test_service_name = "test-service".to_string(); - let sampler_resource = create_resource(test_service_name.clone()); - let sampler = DatadogSampler::new(vec![], 100, sampler_resource); + let resource = create_resource(test_service_name.clone()); + let sampler = DatadogSampler::new(vec![], 100); // Test with service and env // The 'service' in create_attributes is not used for the service part of the key, // but ENV_TAG is still correctly picked up from attributes. let attrs = create_attributes("resource", "production"); - let res = &sampler.resource.read().unwrap(); + let res = &resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Internal, attrs.as_slice(), res); assert_eq!( sampler.service_key(&span), - // Expect the service name from the sampler's resource + // Expect the service name from the resource format!("service:{test_service_name},env:production") ); @@ -814,14 +801,14 @@ mod tests { ); assert_eq!( sampler.service_key(&span), - // Expect the service name from the sampler's resource and an empty env + // Expect the service name from the resource and an empty env format!("service:{test_service_name},env:") ); } #[test] fn test_update_service_rates() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); // Update with service rates let mut rates = HashMap::new(); @@ -885,18 +872,13 @@ mod tests { Some("default".to_string()), // Lowest priority ); - // Sampler is mutable to allow resource updates - let mut sampler = DatadogSampler::new( - vec![rule1.clone(), rule2.clone(), rule3.clone()], - 100, - create_empty_resource_arc(), // Initial resource, will be updated before each check - ); + let sampler = DatadogSampler::new(vec![rule1.clone(), rule2.clone(), rule3.clone()], 100); // Test with a specific service that should match the first rule (rule1) { - sampler.resource = create_resource("service1".to_string()); + let resource = create_resource("service1".to_string()); let attrs1 = create_attributes("resource_val_for_attr1", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs1.as_slice(), &res); let matching_rule_for_attrs1 = sampler.find_matching_rule(&span); assert!( @@ -910,9 +892,9 @@ mod tests { // Test with a specific service that should match the second rule (rule2) { - sampler.resource = create_resource("service2".to_string()); + let resource = create_resource("service2".to_string()); let attrs2 = create_attributes("resource_val_for_attr2", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs2.as_slice(), &res); let matching_rule_for_attrs2 = sampler.find_matching_rule(&span); assert!( @@ -926,9 +908,9 @@ mod tests { // Test with a service that matches the wildcard rule (rule3) { - sampler.resource = create_resource("service3".to_string()); + let resource = create_resource("service3".to_string()); let attrs3 = create_attributes("resource_val_for_attr3", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs3.as_slice(), &res); let matching_rule_for_attrs3 = sampler.find_matching_rule(&span); assert!( @@ -942,9 +924,9 @@ mod tests { // Test with a service that doesn't match any rule's service pattern { - sampler.resource = create_resource("other_sampler_service".to_string()); + let resource = create_resource("other_sampler_service".to_string()); let attrs4 = create_attributes("resource_val_for_attr4", "prod"); - let res = sampler.resource.read().unwrap(); + let res = resource.read().unwrap(); let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs4.as_slice(), &res); let matching_rule_for_attrs4 = sampler.find_matching_rule(&span); assert!( @@ -956,7 +938,7 @@ mod tests { #[test] fn test_get_sampling_mechanism() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); // Create rules with different provenances let rule_customer = @@ -1198,12 +1180,13 @@ mod tests { #[test] fn test_should_sample_parent_context() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![], 100); // Create empty slices for attributes and links let empty_attrs: &[KeyValue] = &[]; let trace_id = create_trace_id(); let span_kind = SpanKind::Client; + let resource = create_empty_resource_arc(); // Test with sampled parent context let data_sampled = create_sampling_data( @@ -1212,7 +1195,7 @@ mod tests { "span", span_kind.clone(), empty_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result_sampled = sampler.sample(&data_sampled); @@ -1229,7 +1212,7 @@ mod tests { "span", span_kind, empty_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result_not_sampled = sampler.sample(&data_not_sampled); @@ -1252,10 +1235,11 @@ mod tests { None, ); - let sampler = DatadogSampler::new(vec![rule], 100, create_empty_resource_arc()); + let sampler = DatadogSampler::new(vec![rule], 100); let trace_id = create_trace_id(); let span_kind = SpanKind::Client; + let resource = create_empty_resource_arc(); // Test with matching attributes let attrs = create_attributes("resource", "prod"); @@ -1265,7 +1249,7 @@ mod tests { "span", span_kind.clone(), attrs.as_slice(), - sampler.resource.as_ref(), + resource.as_ref(), ); let result = sampler.sample(&data); @@ -1283,7 +1267,7 @@ mod tests { "span", span_kind, attrs_no_match.as_slice(), - sampler.resource.as_ref(), + resource.as_ref(), ); let result_no_match = sampler.sample(&data_no_match); @@ -1296,10 +1280,8 @@ mod tests { #[test] fn test_should_sample_with_service_rates() { - // Initialize sampler with a default service, e.g., "test-service" - // The sampler's own service name will be used for the 'service:' part of the service_key - let mut sampler = - DatadogSampler::new(vec![], 100, create_resource("test-service".to_string())); + // Initialize sampler + let sampler = DatadogSampler::new(vec![], 100); // Add service rates for different service+env combinations let mut rates = HashMap::new(); @@ -1312,7 +1294,7 @@ mod tests { let span_kind = SpanKind::Client; // Test with attributes that should lead to "service:test-service,env:prod" key - // Sampler's resource is already for "test-service" + let resource_test_service = create_resource("test-service".to_string()); let attrs_sample = create_attributes("any_resource_name_matching_env", "prod"); let data_sample = create_sampling_data( None, @@ -1320,7 +1302,7 @@ mod tests { "span_for_test_service", span_kind.clone(), attrs_sample.as_slice(), - sampler.resource.as_ref(), + resource_test_service.as_ref(), ); let result_sample = sampler.sample(&data_sample); // Expect RecordAndSample because service_key will be "service:test-service,env:prod" -> @@ -1331,8 +1313,7 @@ mod tests { ); // Test with attributes that should lead to "service:other-service,env:prod" key - // Update sampler's resource to be "other-service" - sampler.resource = create_resource("other-service".to_string()); + let resource_other_service = create_resource("other-service".to_string()); let attrs_no_sample = create_attributes("any_resource_name_matching_env", "prod"); let data_no_sample = create_sampling_data( None, @@ -1340,7 +1321,7 @@ mod tests { "span_for_other_service", span_kind, attrs_no_sample.as_slice(), - sampler.resource.as_ref(), + resource_other_service.as_ref(), ); let result_no_sample = sampler.sample(&data_no_sample); // Expect Drop because service_key will be "service:other-service,env:prod" -> rate 0.0 @@ -1688,14 +1669,11 @@ mod tests { ); // Create a sampler with these rules - let sampler = DatadogSampler::new( - vec![http_rule, db_rule, messaging_rule], - 100, - create_empty_resource_arc(), - ); + let sampler = DatadogSampler::new(vec![http_rule, db_rule, messaging_rule], 100); // Create a trace ID for testing let trace_id = create_trace_id(); + let resource = create_empty_resource_arc(); // Test cases for different span kinds and attributes @@ -1721,7 +1699,7 @@ mod tests { "test-span", span_kind_client.clone(), &http_client_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result = sampler.sample(&data); @@ -1749,7 +1727,7 @@ mod tests { "test-span", span_kind_server.clone(), &http_server_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result = sampler.sample(&data); @@ -1776,7 +1754,7 @@ mod tests { "test-span", span_kind_client, // DB queries use client span kind &db_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result = sampler.sample(&data); @@ -1810,7 +1788,7 @@ mod tests { "test-span", span_kind_consumer, // Messaging uses consumer span kind &messaging_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result = sampler.sample(&data); @@ -1835,7 +1813,7 @@ mod tests { "test-span", span_kind_internal, &internal_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result = sampler.sample(&data); @@ -1867,7 +1845,7 @@ mod tests { "test-span", span_kind_server, &server_protocol_attrs, - sampler.resource.as_ref(), + resource.as_ref(), ); let result = sampler.sample(&data); @@ -1895,7 +1873,7 @@ mod tests { .build(), )); - let sampler = DatadogSampler::new(vec![initial_rule], 100, test_resource); + let sampler = DatadogSampler::new(vec![initial_rule], 100); // Verify initial state assert_eq!(sampler.rules.len(), 1); @@ -1935,7 +1913,7 @@ mod tests { KeyValue::new(HTTP_REQUEST_METHOD, "GET"), /* This will make operation name * "http.client.request" */ ]; - let resource_guard = sampler.resource.read().unwrap(); + let resource_guard = test_resource.read().unwrap(); let span = PreSampledSpan::new( "test-span", SpanKind::Client, From cdb4a04442bfcba15d7706b1fcfba04aed7fc0db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Mon, 16 Mar 2026 14:51:49 +0100 Subject: [PATCH 4/8] chore(sampling): apply review comments --- .../src/sampling/otel_mappings.rs | 15 +++------ .../src/sampling/rules_sampler.rs | 4 +-- datadog-opentelemetry/src/sampling/types.rs | 31 +++---------------- 3 files changed, 10 insertions(+), 40 deletions(-) diff --git a/datadog-opentelemetry/src/sampling/otel_mappings.rs b/datadog-opentelemetry/src/sampling/otel_mappings.rs index 5ccd5e2c..fd909341 100644 --- a/datadog-opentelemetry/src/sampling/otel_mappings.rs +++ b/datadog-opentelemetry/src/sampling/otel_mappings.rs @@ -81,10 +81,6 @@ impl<'a> OtelSpan<'a> for PreSampledSpan<'a> { impl SpanProperties for PreSampledSpan<'_> { type Attribute = opentelemetry::KeyValue; - type AttributesIter<'b> - = std::slice::Iter<'b, opentelemetry::KeyValue> - where - Self: 'b; fn operation_name(&self) -> Cow<'_, str> { get_otel_operation_name_v2(self) @@ -106,7 +102,10 @@ impl SpanProperties for PreSampledSpan<'_> { get_otel_status_code(self) } - fn attributes(&self) -> Self::AttributesIter<'_> { + fn attributes<'a>(&'a self) -> impl Iterator + where + Self: 'a, + { self.attributes.iter() } @@ -195,15 +194,9 @@ impl SamplingData for OtelSamplingData<'_> { } impl crate::sampling::TraceIdLike for opentelemetry::trace::TraceId { - type Item = opentelemetry::trace::TraceId; - fn to_u128(&self) -> u128 { u128::from_be_bytes(self.to_bytes()) } - - fn inner(&self) -> &Self::Item { - self - } } /// Factory for creating OpenTelemetry KeyValue attributes. diff --git a/datadog-opentelemetry/src/sampling/rules_sampler.rs b/datadog-opentelemetry/src/sampling/rules_sampler.rs index 7e412e9d..e679a09e 100644 --- a/datadog-opentelemetry/src/sampling/rules_sampler.rs +++ b/datadog-opentelemetry/src/sampling/rules_sampler.rs @@ -24,9 +24,9 @@ impl RulesSampler { } /// Finds the first matching rule for a span - pub fn find_matching_rule(&self, mut matcher: F) -> Option + pub fn find_matching_rule(&self, matcher: F) -> Option where - F: FnMut(&SamplingRule) -> bool, + F: Fn(&SamplingRule) -> bool, { self.inner .read() diff --git a/datadog-opentelemetry/src/sampling/types.rs b/datadog-opentelemetry/src/sampling/types.rs index 3e7e164a..6202cde3 100644 --- a/datadog-opentelemetry/src/sampling/types.rs +++ b/datadog-opentelemetry/src/sampling/types.rs @@ -20,38 +20,18 @@ use std::borrow::Cow; /// struct MyTraceId(TraceId); /// /// impl TraceIdLike for MyTraceId { -/// type Item = TraceId; -/// /// fn to_u128(&self) -> u128 { /// u128::from_be_bytes(self.0.to_bytes()) /// } -/// -/// fn inner(&self) -> &Self::Item { -/// &self.0 -/// } /// } /// ``` -pub trait TraceIdLike { - /// The underlying trace ID type. - type Item: PartialEq + Eq; - +pub trait TraceIdLike: PartialEq + Eq { /// Converts the trace ID to a 128-bit unsigned integer. /// /// The conversion should be deterministic: the same trace ID must always produce /// the same `u128` value. Typically implemented by interpreting the trace ID's /// bytes as a big-endian integer. fn to_u128(&self) -> u128; - - /// Returns a reference to the underlying trace ID. - /// - /// Used internally for trait object equality comparisons. - fn inner(&self) -> &Self::Item; -} - -impl PartialEq for dyn TraceIdLike { - fn eq(&self, other: &Self) -> bool { - self.inner() == other.inner() - } } /// A trait for accessing span attribute key-value pairs. @@ -131,11 +111,6 @@ pub trait SpanProperties { /// The type of attribute that implements `AttributeLike`. type Attribute: AttributeLike; - /// The type of iterator over span attributes. - type AttributesIter<'a>: Iterator - where - Self: 'a; - /// Returns the operation name for the span. /// /// The operation name is derived from span attributes and kind according to @@ -163,7 +138,9 @@ pub trait SpanProperties { fn status_code(&self) -> Option; /// Returns an iterator over span attributes. - fn attributes(&self) -> Self::AttributesIter<'_>; + fn attributes<'a>(&'a self) -> impl Iterator + where + Self: 'a; /// Returns an alternate key for the given attribute key. /// From 367f3ed1af30f5221291f91b6830405b4783e60d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Fri, 13 Feb 2026 14:50:43 +0100 Subject: [PATCH 5/8] refactor(sampling): move otel tests and types from datadog tracer --- .../src/sampling/datadog_sampler.rs | 990 +++++++----------- .../src/sampling/otel_mappings.rs | 392 +++++++ .../src/sampling/rate_sampler.rs | 33 +- datadog-opentelemetry/src/sampling/types.rs | 29 +- 4 files changed, 803 insertions(+), 641 deletions(-) diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/datadog-opentelemetry/src/sampling/datadog_sampler.rs index 143a569d..bb8c5c71 100644 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ b/datadog-opentelemetry/src/sampling/datadog_sampler.rs @@ -23,6 +23,10 @@ use super::rate_limiter::RateLimiter; use super::rate_sampler::RateSampler; use super::rules_sampler::RulesSampler; +// HTTP status code attribute constants +const HTTP_RESPONSE_STATUS_CODE: &str = "http.response.status_code"; +const HTTP_STATUS_CODE: &str = "http.status_code"; + fn matcher_from_rule(rule: &str) -> Option { (rule != NO_RULE).then(|| GlobMatcher::new(rule)) } @@ -140,9 +144,7 @@ impl SamplingRule { // Special handling for rules defined with "http.status_code" or // "http.response.status_code" - if rule_tag_key_str == "http.status_code" - || rule_tag_key_str - == opentelemetry_semantic_conventions::trace::HTTP_RESPONSE_STATUS_CODE + if rule_tag_key_str == HTTP_STATUS_CODE || rule_tag_key_str == HTTP_RESPONSE_STATUS_CODE { match self.match_http_status_code_rule(matcher, span) { Some(true) => continue, // Status code matched @@ -200,7 +202,7 @@ impl SamplingRule { span: &impl SpanProperties, ) -> Option { span.status_code().and_then(|status_code| { - let status_value = opentelemetry::Value::I64(i64::from(status_code)); + let status_value = ValueI64(i64::from(status_code)); self.match_attribute_value(&status_value, matcher) }) } @@ -470,6 +472,18 @@ fn format_sampling_rate(rate: f64) -> Option { }) } +struct ValueI64(i64); + +impl ValueLike for ValueI64 { + fn extract_float(&self) -> Option { + Some(self.0 as f64) + } + + fn extract_string(&self) -> Option> { + Some(std::borrow::Cow::Owned(self.0.to_string())) + } +} + pub struct TraceRootSamplingInfo { mechanism: SamplingMechanism, rate: f64, @@ -571,76 +585,292 @@ impl DdSamplingResult { #[cfg(test)] mod tests { use super::*; - use crate::mappings::get_otel_operation_name_v2; use crate::sampling::constants::{ attr::{ENV_TAG, RESOURCE_TAG}, pattern, }; - use crate::sampling::otel_mappings::{OtelSamplingData, PreSampledSpan}; - use opentelemetry::trace::{SpanKind, TraceId}; - use opentelemetry::{Key, KeyValue, Value}; - use opentelemetry_sdk::Resource as SdkResource; - use opentelemetry_semantic_conventions::{ - attribute::{ - DB_SYSTEM_NAME, HTTP_REQUEST_METHOD, MESSAGING_OPERATION_TYPE, MESSAGING_SYSTEM, - }, - resource::SERVICE_NAME, - trace::{HTTP_RESPONSE_STATUS_CODE, NETWORK_PROTOCOL_NAME}, - }; - use std::sync::{Arc, RwLock}; + use std::borrow::Cow; + use std::collections::HashMap; - fn create_empty_resource() -> opentelemetry_sdk::Resource { - opentelemetry_sdk::Resource::builder_empty().build() + // Test-only semantic convention constants + const HTTP_REQUEST_METHOD: &str = "http.request.method"; + const SERVICE_NAME: &str = "service.name"; + + use super::{HTTP_RESPONSE_STATUS_CODE, HTTP_STATUS_CODE}; + + // ============================================================================ + // Test-only data structures + // ============================================================================ + + #[derive(Clone, Debug, PartialEq, Eq)] + struct TestTraceId { + bytes: [u8; 16], + } + + impl TestTraceId { + fn from_bytes(bytes: [u8; 16]) -> Self { + Self { bytes } + } } - // Helper function to create an empty resource wrapped in Arc for DatadogSampler - fn create_empty_resource_arc() -> Arc> { - Arc::new(RwLock::new( - opentelemetry_sdk::Resource::builder_empty().build(), - )) + impl TraceIdLike for TestTraceId { + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.bytes) + } } - fn create_resource(res: String) -> Arc> { - let attributes = vec![ - KeyValue::new(SERVICE_NAME, res), // String `res` is Into - ]; - let resource: SdkResource = SdkResource::builder_empty() - .with_attributes(attributes) - .build(); - Arc::new(RwLock::new(resource)) + #[derive(Clone, Debug, PartialEq)] + enum TestValue { + String(String), + I64(i64), + F64(f64), + } + + impl ValueLike for TestValue { + fn extract_float(&self) -> Option { + match self { + TestValue::I64(i) => Some(*i as f64), + TestValue::F64(f) => Some(*f), + _ => None, + } + } + + fn extract_string(&self) -> Option> { + match self { + TestValue::String(s) => Some(Cow::Borrowed(s.as_str())), + TestValue::I64(i) => Some(Cow::Owned(i.to_string())), + TestValue::F64(f) => Some(Cow::Owned(f.to_string())), + } + } + } + + #[derive(Clone, Debug)] + struct TestAttribute { + key: String, + value: TestValue, + } + + impl TestAttribute { + fn new(key: impl Into, value: impl Into) -> Self { + Self { + key: key.into(), + value: value.into(), + } + } + } + + impl AttributeLike for TestAttribute { + type Value = TestValue; + + fn key(&self) -> &str { + &self.key + } + + fn value(&self) -> &Self::Value { + &self.value + } + } + + impl From<&str> for TestValue { + fn from(s: &str) -> Self { + TestValue::String(s.to_string()) + } + } + + impl From for TestValue { + fn from(s: String) -> Self { + TestValue::String(s) + } + } + + struct TestSpan<'a> { + name: &'a str, + attributes: &'a [TestAttribute], + } + + impl<'a> TestSpan<'a> { + fn new(name: &'a str, attributes: &'a [TestAttribute]) -> Self { + Self { name, attributes } + } + + fn get_operation_name(&self) -> Cow<'_, str> { + // Check for HTTP spans - label them all as client spans + if self + .attributes + .iter() + .any(|attr| attr.key() == HTTP_REQUEST_METHOD) + { + return Cow::Borrowed("http.client.request"); + } + + // Default fallback + Cow::Borrowed("internal") + } } + impl<'a> SpanProperties for TestSpan<'a> { + type Attribute = TestAttribute; + + fn operation_name(&self) -> Cow<'_, str> { + self.get_operation_name() + } + + fn service(&self) -> Cow<'_, str> { + self.attributes + .iter() + .find(|attr| attr.key() == SERVICE_NAME) + .and_then(|attr| attr.value().extract_string()) + .unwrap_or(Cow::Borrowed("")) + } + + fn env(&self) -> Cow<'_, str> { + self.attributes + .iter() + .find(|attr| attr.key() == "datadog.env" || attr.key() == ENV_TAG) + .and_then(|attr| attr.value().extract_string()) + .unwrap_or(Cow::Borrowed("")) + } + + fn resource(&self) -> Cow<'_, str> { + self.attributes + .iter() + .find(|attr| attr.key() == RESOURCE_TAG) + .and_then(|attr| attr.value().extract_string()) + .unwrap_or(Cow::Borrowed(self.name)) + } + + fn status_code(&self) -> Option { + self.attributes + .iter() + .find(|attr| { + attr.key() == HTTP_RESPONSE_STATUS_CODE || attr.key() == HTTP_STATUS_CODE + }) + .and_then(|attr| match attr.value() { + TestValue::I64(i) => Some(*i as u32), + _ => None, + }) + } + + fn attributes<'b>(&'b self) -> impl Iterator + where + Self: 'b, + { + self.attributes.iter() + } + + fn get_alternate_key<'b>(&self, key: &'b str) -> Option> { + match key { + HTTP_RESPONSE_STATUS_CODE => Some(Cow::Borrowed(HTTP_STATUS_CODE)), + HTTP_REQUEST_METHOD => Some(Cow::Borrowed("http.method")), + _ => None, + } + } + } + + struct TestSamplingData<'a> { + is_parent_sampled: Option, + trace_id: &'a TestTraceId, + name: &'a str, + attributes: &'a [TestAttribute], + } + + impl<'a> TestSamplingData<'a> { + fn new( + is_parent_sampled: Option, + trace_id: &'a TestTraceId, + name: &'a str, + attributes: &'a [TestAttribute], + ) -> Self { + Self { + is_parent_sampled, + trace_id, + name, + attributes, + } + } + } + + impl<'a> SamplingData for TestSamplingData<'a> { + type TraceId = TestTraceId; + type Properties<'b> + = TestSpan<'b> + where + Self: 'b; + + fn is_parent_sampled(&self) -> Option { + self.is_parent_sampled + } + + fn trace_id(&self) -> &Self::TraceId { + self.trace_id + } + + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: for<'b> Fn(&S, &TestSpan<'b>) -> T, + { + let span = TestSpan::new(self.name, self.attributes); + f(s, &span) + } + } + + struct TestAttributeFactory; + + impl crate::sampling::AttributeFactory for TestAttributeFactory { + type Attribute = TestAttribute; + + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute { + TestAttribute::new(key, TestValue::I64(value)) + } + + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute { + TestAttribute::new(key, TestValue::F64(value)) + } + + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute { + TestAttribute::new(key, TestValue::String(value.into_owned())) + } + } + + // ============================================================================ + // Test helper functions + // ============================================================================ + // Helper function to create a trace ID - fn create_trace_id() -> TraceId { + fn create_trace_id() -> TestTraceId { let bytes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; - TraceId::from_bytes(bytes) + TestTraceId::from_bytes(bytes) + } + + // Helper function to create attributes for testing (with resource and env) + fn create_attributes(resource: &'static str, env: &'static str) -> Vec { + vec![ + TestAttribute::new(RESOURCE_TAG, resource), + TestAttribute::new("datadog.env", env), + ] } - // Helper function to create attributes for testing - fn create_attributes(resource: &'static str, env: &'static str) -> Vec { + // Helper function to create attributes with service + fn create_attributes_with_service( + service: String, + resource: &'static str, + env: &'static str, + ) -> Vec { vec![ - KeyValue::new(RESOURCE_TAG, resource), - KeyValue::new("datadog.env", env), + TestAttribute::new(SERVICE_NAME, service), + TestAttribute::new(RESOURCE_TAG, resource), + TestAttribute::new("datadog.env", env), ] } // Helper function to create SamplingData for testing fn create_sampling_data<'a>( is_parent_sampled: Option, - trace_id: &'a TraceId, + trace_id: &'a TestTraceId, name: &'a str, - span_kind: SpanKind, - attributes: &'a [KeyValue], - resource: &'a RwLock, - ) -> OtelSamplingData<'a> { - OtelSamplingData::new( - is_parent_sampled, - trace_id, - name, - span_kind, - attributes, - resource, - ) + attributes: &'a [TestAttribute], + ) -> TestSamplingData<'a> { + TestSamplingData::new(is_parent_sampled, trace_id, name, attributes) } #[test] @@ -718,11 +948,8 @@ mod tests { // Create a span with some attributes let attributes = create_attributes("some-resource", "some-env"); - // Empty resource for testing (unwrapped for the test) - let empty_resource = create_empty_resource(); - // Both rules should match any span since they have no criteria - let span = PreSampledSpan::new("", SpanKind::Client, &attributes, &empty_resource); + let span = TestSpan::new("", &attributes); assert!(rule.matches(&span)); assert!(rule_with_empty_strings.matches(&span)); } @@ -773,35 +1000,26 @@ mod tests { #[test] fn test_service_key_generation() { - // Create resource with test service name let test_service_name = "test-service".to_string(); - let resource = create_resource(test_service_name.clone()); let sampler = DatadogSampler::new(vec![], 100); // Test with service and env - // The 'service' in create_attributes is not used for the service part of the key, - // but ENV_TAG is still correctly picked up from attributes. - let attrs = create_attributes("resource", "production"); - let res = &resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Internal, attrs.as_slice(), res); + let attrs = + create_attributes_with_service(test_service_name.clone(), "resource", "production"); + let span = TestSpan::new("test-span", attrs.as_slice()); assert_eq!( sampler.service_key(&span), - // Expect the service name from the resource format!("service:{test_service_name},env:production") ); // Test with missing env - // The 'service' in these attributes is also not used for the service part of the key. - let attrs_no_env = vec![KeyValue::new(RESOURCE_TAG, "resource")]; - let span = PreSampledSpan::new( - "test-span", - SpanKind::Internal, - attrs_no_env.as_slice(), - res, - ); + let attrs_no_env = vec![ + TestAttribute::new(SERVICE_NAME, test_service_name.clone()), + TestAttribute::new(RESOURCE_TAG, "resource"), + ]; + let span = TestSpan::new("test-span", attrs_no_env.as_slice()); assert_eq!( sampler.service_key(&span), - // Expect the service name from the resource and an empty env format!("service:{test_service_name},env:") ); } @@ -876,10 +1094,12 @@ mod tests { // Test with a specific service that should match the first rule (rule1) { - let resource = create_resource("service1".to_string()); - let attrs1 = create_attributes("resource_val_for_attr1", "prod"); - let res = resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs1.as_slice(), &res); + let attrs1 = create_attributes_with_service( + "service1".to_string(), + "resource_val_for_attr1", + "prod", + ); + let span = TestSpan::new("test-span", attrs1.as_slice()); let matching_rule_for_attrs1 = sampler.find_matching_rule(&span); assert!( matching_rule_for_attrs1.is_some(), @@ -892,10 +1112,12 @@ mod tests { // Test with a specific service that should match the second rule (rule2) { - let resource = create_resource("service2".to_string()); - let attrs2 = create_attributes("resource_val_for_attr2", "prod"); - let res = resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs2.as_slice(), &res); + let attrs2 = create_attributes_with_service( + "service2".to_string(), + "resource_val_for_attr2", + "prod", + ); + let span = TestSpan::new("test-span", attrs2.as_slice()); let matching_rule_for_attrs2 = sampler.find_matching_rule(&span); assert!( matching_rule_for_attrs2.is_some(), @@ -908,10 +1130,12 @@ mod tests { // Test with a service that matches the wildcard rule (rule3) { - let resource = create_resource("service3".to_string()); - let attrs3 = create_attributes("resource_val_for_attr3", "prod"); - let res = resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs3.as_slice(), &res); + let attrs3 = create_attributes_with_service( + "service3".to_string(), + "resource_val_for_attr3", + "prod", + ); + let span = TestSpan::new("test-span", attrs3.as_slice()); let matching_rule_for_attrs3 = sampler.find_matching_rule(&span); assert!( matching_rule_for_attrs3.is_some(), @@ -924,10 +1148,12 @@ mod tests { // Test with a service that doesn't match any rule's service pattern { - let resource = create_resource("other_sampler_service".to_string()); - let attrs4 = create_attributes("resource_val_for_attr4", "prod"); - let res = resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs4.as_slice(), &res); + let attrs4 = create_attributes_with_service( + "other_sampler_service".to_string(), + "resource_val_for_attr4", + "prod", + ); + let span = TestSpan::new("test-span", attrs4.as_slice()); let matching_rule_for_attrs4 = sampler.find_matching_rule(&span); assert!( matching_rule_for_attrs4.is_none(), @@ -985,7 +1211,7 @@ mod tests { }; let attrs = sampling_result - .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .to_dd_sampling_tags(&TestAttributeFactory) .unwrap_or_default(); // Verify the number of attributes (decision_maker + priority + rule_rate + ksr) @@ -998,10 +1224,10 @@ mod tests { let mut found_ksr = false; for attr in &attrs { - match attr.key.as_str() { + match attr.key() { SAMPLING_DECISION_MAKER_TAG_KEY => { - let value_str = match &attr.value { - opentelemetry::Value::String(s) => s.to_string(), + let value_str = match attr.value() { + TestValue::String(s) => s.to_string(), _ => panic!("Expected string value for decision maker tag"), }; assert_eq!(value_str, mechanism.to_cow()); @@ -1011,16 +1237,16 @@ mod tests { // For LocalUserTraceSamplingRule with KEEP, it should be USER_KEEP let expected_priority = mechanism.to_priority(true).into_i8() as i64; - let value_int = match attr.value { - opentelemetry::Value::I64(i) => i, + let value_int = match attr.value() { + TestValue::I64(i) => *i, _ => panic!("Expected integer value for priority tag"), }; assert_eq!(value_int, expected_priority); found_priority = true; } SAMPLING_RULE_RATE_TAG_KEY => { - let value_float = match attr.value { - opentelemetry::Value::F64(f) => f, + let value_float = match attr.value() { + TestValue::F64(f) => *f, _ => panic!("Expected float value for rule rate tag"), }; assert_eq!(value_float, sample_rate); @@ -1056,7 +1282,7 @@ mod tests { }), }; let attrs_with_limit = sampling_result - .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .to_dd_sampling_tags(&TestAttributeFactory) .unwrap_or_default(); // With rate limiting, there should be one more attribute @@ -1065,9 +1291,9 @@ mod tests { // Check for rate limit attribute let mut found_limit = false; for attr in &attrs_with_limit { - if attr.key.as_str() == RL_EFFECTIVE_RATE { - let value_float = match attr.value { - opentelemetry::Value::F64(f) => f, + if attr.key() == RL_EFFECTIVE_RATE { + let value_float = match attr.value() { + TestValue::F64(f) => *f, _ => panic!("Expected float value for rate limit tag"), }; assert_eq!(value_float, rate_limit); @@ -1093,7 +1319,7 @@ mod tests { }; let agent_attrs = sampling_result - .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .to_dd_sampling_tags(&TestAttributeFactory) .unwrap_or_default(); // Verify the number of attributes (should be 4: decision_maker + priority + @@ -1104,18 +1330,18 @@ mod tests { let mut found_agent_rate = false; let mut found_ksr = false; for attr in &agent_attrs { - match attr.key.as_str() { + match attr.key() { SAMPLING_AGENT_RATE_TAG_KEY => { - let value_float = match attr.value { - opentelemetry::Value::F64(f) => f, + let value_float = match attr.value() { + TestValue::F64(f) => *f, _ => panic!("Expected float value for agent rate tag"), }; assert_eq!(value_float, agent_rate); found_agent_rate = true; } SAMPLING_KNUTH_RATE_TAG_KEY => { - let value_str = match &attr.value { - opentelemetry::Value::String(s) => s.to_string(), + let value_str = match attr.value() { + TestValue::String(s) => s.to_string(), _ => panic!("Expected string value for ksr tag"), }; assert_eq!(value_str, "0.75"); @@ -1134,7 +1360,7 @@ mod tests { // Also check that the SAMPLING_RULE_RATE_TAG_KEY is NOT present for agent mechanism for attr in &agent_attrs { assert_ne!( - attr.key.as_str(), + attr.key(), SAMPLING_RULE_RATE_TAG_KEY, "Rule rate tag should not be present for agent mechanism" ); @@ -1183,20 +1409,11 @@ mod tests { let sampler = DatadogSampler::new(vec![], 100); // Create empty slices for attributes and links - let empty_attrs: &[KeyValue] = &[]; + let empty_attrs: &[TestAttribute] = &[]; let trace_id = create_trace_id(); - let span_kind = SpanKind::Client; - let resource = create_empty_resource_arc(); // Test with sampled parent context - let data_sampled = create_sampling_data( - Some(true), - &trace_id, - "span", - span_kind.clone(), - empty_attrs, - resource.as_ref(), - ); + let data_sampled = create_sampling_data(Some(true), &trace_id, "span", empty_attrs); let result_sampled = sampler.sample(&data_sampled); // Should inherit the sampling decision from parent @@ -1206,14 +1423,7 @@ mod tests { .is_none()); // Test with non-sampled parent context - let data_not_sampled = create_sampling_data( - Some(false), - &trace_id, - "span", - span_kind, - empty_attrs, - resource.as_ref(), - ); + let data_not_sampled = create_sampling_data(Some(false), &trace_id, "span", empty_attrs); let result_not_sampled = sampler.sample(&data_not_sampled); // Should inherit the sampling decision from parent @@ -1238,19 +1448,10 @@ mod tests { let sampler = DatadogSampler::new(vec![rule], 100); let trace_id = create_trace_id(); - let span_kind = SpanKind::Client; - let resource = create_empty_resource_arc(); // Test with matching attributes let attrs = create_attributes("resource", "prod"); - let data = create_sampling_data( - None, - &trace_id, - "span", - span_kind.clone(), - attrs.as_slice(), - resource.as_ref(), - ); + let data = create_sampling_data(None, &trace_id, "span", attrs.as_slice()); let result = sampler.sample(&data); // Should sample and add attributes @@ -1261,14 +1462,8 @@ mod tests { // Test with non-matching attributes let attrs_no_match = create_attributes("other-resource", "prod"); - let data_no_match = create_sampling_data( - None, - &trace_id, - "span", - span_kind, - attrs_no_match.as_slice(), - resource.as_ref(), - ); + let data_no_match = + create_sampling_data(None, &trace_id, "span", attrs_no_match.as_slice()); let result_no_match = sampler.sample(&data_no_match); // Should still sample (default behavior when no rules match) and add attributes @@ -1291,18 +1486,18 @@ mod tests { sampler.update_service_rates(rates); let trace_id = create_trace_id(); - let span_kind = SpanKind::Client; // Test with attributes that should lead to "service:test-service,env:prod" key - let resource_test_service = create_resource("test-service".to_string()); - let attrs_sample = create_attributes("any_resource_name_matching_env", "prod"); + let attrs_sample = create_attributes_with_service( + "test-service".to_string(), + "any_resource_name_matching_env", + "prod", + ); let data_sample = create_sampling_data( None, &trace_id, "span_for_test_service", - span_kind.clone(), attrs_sample.as_slice(), - resource_test_service.as_ref(), ); let result_sample = sampler.sample(&data_sample); // Expect RecordAndSample because service_key will be "service:test-service,env:prod" -> @@ -1313,15 +1508,16 @@ mod tests { ); // Test with attributes that should lead to "service:other-service,env:prod" key - let resource_other_service = create_resource("other-service".to_string()); - let attrs_no_sample = create_attributes("any_resource_name_matching_env", "prod"); + let attrs_no_sample = create_attributes_with_service( + "other-service".to_string(), + "any_resource_name_matching_env", + "prod", + ); let data_no_sample = create_sampling_data( None, &trace_id, "span_for_other_service", - span_kind, attrs_no_sample.as_slice(), - resource_other_service.as_ref(), ); let result_no_sample = sampler.sample(&data_no_sample); // Expect Drop because service_key will be "service:other-service,env:prod" -> rate 0.0 @@ -1333,14 +1529,15 @@ mod tests { #[test] fn test_sampling_rule_matches_float_attributes() { - use opentelemetry::Value; - // Helper to create attributes with a float value - fn create_attributes_with_float(tag_key: &'static str, float_value: f64) -> Vec { + fn create_attributes_with_float( + tag_key: &'static str, + float_value: f64, + ) -> Vec { vec![ - KeyValue::new(RESOURCE_TAG, "resource"), - KeyValue::new(ENV_TAG, "prod"), - KeyValue::new(tag_key, Value::F64(float_value)), + TestAttribute::new(RESOURCE_TAG, "resource"), + TestAttribute::new(ENV_TAG, "prod"), + TestAttribute::new(tag_key, TestValue::F64(float_value)), ] } @@ -1356,13 +1553,7 @@ mod tests { // Should match integer float let integer_float_attrs = create_attributes_with_float("float_tag", 42.0); - let resource = create_empty_resource(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - integer_float_attrs.as_slice(), - &resource, - ); + let span = TestSpan::new("test-span", integer_float_attrs.as_slice()); assert!(rule_integer.matches(&span)); // Test case 2: Rule with wildcard pattern and non-integer float @@ -1377,13 +1568,7 @@ mod tests { // Should match non-integer float with wildcard pattern let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - let resource = create_empty_resource(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - decimal_float_attrs.as_slice(), - &resource, - ); + let span = TestSpan::new("test-span", decimal_float_attrs.as_slice()); assert!(rule_wildcard.matches(&span)); // Test case 3: Rule with specific pattern and non-integer float @@ -1402,13 +1587,7 @@ mod tests { // Should NOT match the exact decimal value because non-integer floats only match wildcards let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - let resource = create_empty_resource(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - decimal_float_attrs.as_slice(), - &resource, - ); + let span = TestSpan::new("test-span", decimal_float_attrs.as_slice()); assert!(!rule_specific.matches(&span)); // Test case 4: Pattern with partial wildcard '*' for suffix let rule_prefix = SamplingRule::new( @@ -1425,433 +1604,37 @@ mod tests { // Should NOT match decimal values as we don't do partial pattern matching for non-integer // floats - let resource = create_empty_resource(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - decimal_float_attrs.as_slice(), - &resource, - ); + let span = TestSpan::new("test-span", decimal_float_attrs.as_slice()); assert!(!rule_prefix.matches(&span)); } #[test] - fn test_otel_to_datadog_attribute_mapping() { - // Test with a rule that matches against a Datadog attribute name - let rule = SamplingRule::new( - 1.0, - None, - None, - None, - Some(HashMap::from([( - "http.response.status_code".to_string(), - "5*".to_string(), - )])), - None, - ); - - // Create attributes with OpenTelemetry naming convention - let otel_attrs = vec![KeyValue::new("http.response.status_code", 500)]; - - // The rule should match because both use the same OpenTelemetry attribute name - let resource = create_empty_resource(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - otel_attrs.as_slice(), - &resource, - ); - assert!(rule.matches(&span)); - - // Attributes that don't match the value pattern shouldn't match - let non_matching_attrs = vec![KeyValue::new("http.response.status_code", 200)]; - let resource = create_empty_resource(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - non_matching_attrs.as_slice(), - &resource, - ); - assert!(!rule.matches(&span)); - - // Attributes that have no mapping to the rule tag shouldn't match - let unrelated_attrs = vec![KeyValue::new("unrelated.attribute", "value")]; - let resource = create_empty_resource(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - unrelated_attrs.as_slice(), - &resource, - ); - assert!(!rule.matches(&span)); - } - - #[test] - fn test_multiple_otel_attribute_mappings() { - // Test with a rule that has multiple tag criteria - let mut tags = HashMap::new(); - tags.insert("http.status_code".to_string(), "5*".to_string()); - tags.insert("http.method".to_string(), "POST".to_string()); - tags.insert("http.url".to_string(), "*api*".to_string()); - - let rule = SamplingRule::new(1.0, None, None, None, Some(tags), None); - - // Create attributes with mixed OpenTelemetry and Datadog naming - let mixed_attrs = vec![ - // OTel attribute that maps to http.status_code - KeyValue::new("http.response.status_code", 503), - // OTel attribute that maps to http.method - KeyValue::new("http.request.method", "POST"), - // OTel attribute that maps to http.url - KeyValue::new("url.full", "https://example.com/api/v1/resource"), - ]; - - // The rule should match because all three criteria are satisfied through mapping - let resource = create_empty_resource(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, &mixed_attrs, &resource); - assert!(rule.matches(&span)); - - // If any criteria is not met, the rule shouldn't match - let missing_method = vec![ - KeyValue::new("http.response.status_code", 503), - // Missing http.method/http.request.method - KeyValue::new("url.full", "https://example.com/api/v1/resource"), - ]; - - let resource = create_empty_resource(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, &missing_method, &resource); - assert!(!rule.matches(&span)); - - // Wrong value should also not match - let wrong_method = vec![ - KeyValue::new("http.response.status_code", 503), - KeyValue::new("http.request.method", "GET"), // Not POST - KeyValue::new("url.full", "https://example.com/api/v1/resource"), - ]; - - let resource = create_empty_resource(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, &wrong_method, &resource); - assert!(!rule.matches(&span)); - } - - #[test] - fn test_direct_and_mapped_mixed_attributes() { - // Constants for key names to improve readability and ensure consistency - let dd_status_key_str = HTTP_RESPONSE_STATUS_CODE; - let otel_response_status_key_str = HTTP_RESPONSE_STATUS_CODE; - let custom_tag_key = "custom.tag"; - let custom_tag_value = "value"; - - let empty_resource = create_empty_resource(); - let span_kind_client = SpanKind::Client; - - // Test with both direct matches and mapped attributes - let mut tags_rule1 = HashMap::new(); - tags_rule1.insert(dd_status_key_str.to_string(), "5*".to_string()); - tags_rule1.insert(custom_tag_key.to_string(), custom_tag_value.to_string()); - - let rule1 = SamplingRule::new(1.0, None, None, None, Some(tags_rule1), None); - - // Case 1: OTel attribute that maps to http.status_code (503 matches "5*") + Direct - // custom.tag match - let mixed_attrs_match = vec![ - KeyValue::new(otel_response_status_key_str, 503), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - let span = PreSampledSpan::new( - "test-span", - span_kind_client, - &mixed_attrs_match, - &empty_resource, - ); - assert!(rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should match span with otel_response_status_key (503) and custom.tag"); - - // Case 2: Datadog convention for status code (503 matches "5*") + Direct custom.tag match - let dd_attrs_match = vec![ - KeyValue::new(dd_status_key_str, 503), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - &dd_attrs_match, - &empty_resource, - ); - assert!(rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should match span with dd_status_key (503) and custom.tag"); - - // Case 3: Missing the custom tag should fail (status code would match) - let missing_custom_tag_attrs = vec![KeyValue::new(otel_response_status_key_str, 503)]; - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - &missing_custom_tag_attrs, - &empty_resource, - ); - assert!( - !rule1.matches(&span), - "Rule with dd_status_key (5*) and custom.tag should NOT match span missing custom.tag" - ); - - // Case 4: OTel status code 200 (does NOT match "5*") + custom.tag present - let non_matching_otel_status_attrs = vec![ - KeyValue::new(otel_response_status_key_str, 200), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - &non_matching_otel_status_attrs, - &empty_resource, - ); - assert!(!rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span with non-matching otel_response_status_key (200)"); - - // Case 5: No recognizable status code + custom.tag present - let no_status_code_attrs = vec![ - KeyValue::new("another.tag", "irrelevant"), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - &no_status_code_attrs, - &empty_resource, - ); - assert!(!rule1.matches(&span), "Rule with dd_status_key (5*) and custom.tag should NOT match span with no status code attribute"); - - // Case 6: Rule uses OTel key http.response.status_code directly, span has matching OTel - // key. - let mut tags_rule2 = HashMap::new(); - tags_rule2.insert(otel_response_status_key_str.to_string(), "200".to_string()); - tags_rule2.insert(custom_tag_key.to_string(), custom_tag_value.to_string()); - let rule2 = SamplingRule::new(1.0, None, None, None, Some(tags_rule2), None); - - let otel_key_rule_match_attrs = vec![ - KeyValue::new(otel_response_status_key_str, 200), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - &otel_key_rule_match_attrs, - &empty_resource, - ); - assert!(rule2.matches(&span), "Rule with otel_response_status_key (200) and custom.tag should match span with otel_response_status_key (200) and custom.tag"); - } + fn test_operation_name() { + // Test that the sampler correctly matches rules based on operation names + // Operation name generation itself is tested in otel_mappings unit tests - #[test] - fn test_operation_name_integration() { - // Create rules that match different operation name patterns let http_rule = SamplingRule::new( - 1.0, // 100% sample rate - None, // no service matcher - Some("http.*.request".to_string()), // matches both client and server HTTP requests - None, // no resource matcher - None, // no tag matchers - Some("default".to_string()), // rule name - default provenance - ); - - let db_rule = SamplingRule::new( - 1.0, // 100% sample rate - None, // no service matcher - Some("postgresql.query".to_string()), // matches database queries - None, // no resource matcher - None, // no tag matchers - Some("default".to_string()), // rule name - default provenance - ); - - let messaging_rule = SamplingRule::new( - 1.0, // 100% sample rate - None, // no service matcher - Some("kafka.process".to_string()), // matches Kafka messaging operations - None, // no resource matcher - None, // no tag matchers - Some("default".to_string()), // rule name - default provenance - ); - - // Create a sampler with these rules - let sampler = DatadogSampler::new(vec![http_rule, db_rule, messaging_rule], 100); - - // Create a trace ID for testing - let trace_id = create_trace_id(); - let resource = create_empty_resource_arc(); - - // Test cases for different span kinds and attributes - - // 1. HTTP client request - let http_client_attrs = vec![KeyValue::new( - Key::from_static_str(HTTP_REQUEST_METHOD), - Value::String("GET".into()), - )]; - - let empty_resource: SdkResource = create_empty_resource(); - // Print the operation name that will be generated - let span = PreSampledSpan::new("", SpanKind::Client, &http_client_attrs, &empty_resource); - let http_client_op_name = get_otel_operation_name_v2(&span); - assert_eq!( - http_client_op_name, "http.client.request", - "HTTP client operation name should be correct" - ); - - let span_kind_client = SpanKind::Client; - let data = create_sampling_data( + 1.0, None, - &trace_id, - "test-span", - span_kind_client.clone(), - &http_client_attrs, - resource.as_ref(), - ); - let result = sampler.sample(&data); - - // Should be sampled due to matching the http_rule - assert!(result.get_priority().is_keep()); - - // 2. HTTP server request - let http_server_attrs = vec![KeyValue::new( - Key::from_static_str(HTTP_REQUEST_METHOD), - Value::String("POST".into()), - )]; - - // Print the operation name that will be generated - let span = PreSampledSpan::new("", SpanKind::Server, &http_server_attrs, &empty_resource); - let http_server_op_name = get_otel_operation_name_v2(&span); - assert_eq!( - http_server_op_name, "http.server.request", - "HTTP server operation name should be correct" - ); - - let span_kind_server = SpanKind::Server; - let data = create_sampling_data( + Some("http.*.request".to_string()), None, - &trace_id, - "test-span", - span_kind_server.clone(), - &http_server_attrs, - resource.as_ref(), - ); - let result = sampler.sample(&data); - - // Should be sampled due to matching the http_rule - assert!(result.get_priority().is_keep()); - - // 3. Database query - let db_attrs = vec![KeyValue::new( - Key::from_static_str(DB_SYSTEM_NAME), - Value::String("postgresql".into()), - )]; - - // Print the operation name that will be generated - let span = PreSampledSpan::new("", SpanKind::Client, &db_attrs, &empty_resource); - let db_op_name = get_otel_operation_name_v2(&span); - assert_eq!( - db_op_name, "postgresql.query", - "Database operation name should be correct" - ); - - let data = create_sampling_data( None, - &trace_id, - "test-span", - span_kind_client, // DB queries use client span kind - &db_attrs, - resource.as_ref(), - ); - let result = sampler.sample(&data); - - // Should be sampled due to matching the db_rule - assert!(result.get_priority().is_keep()); - - // 4. Messaging operation - let messaging_attrs = vec![ - KeyValue::new( - Key::from_static_str(MESSAGING_SYSTEM), - Value::String("kafka".into()), - ), - KeyValue::new( - Key::from_static_str(MESSAGING_OPERATION_TYPE), - Value::String("process".into()), - ), - ]; - - // Print the operation name that will be generated - let span = PreSampledSpan::new("", SpanKind::Consumer, &messaging_attrs, &empty_resource); - let messaging_op_name = get_otel_operation_name_v2(&span); - assert_eq!( - messaging_op_name, "kafka.process", - "Messaging operation name should be correct" - ); - - let span_kind_consumer = SpanKind::Consumer; - let data = create_sampling_data( - None, - &trace_id, - "test-span", - span_kind_consumer, // Messaging uses consumer span kind - &messaging_attrs, - resource.as_ref(), - ); - let result = sampler.sample(&data); - - // Should be sampled due to matching the messaging_rule - assert!(result.get_priority().is_keep()); - - // 5. Generic internal span (should not match any rules) - let internal_attrs = vec![KeyValue::new("custom.tag", "value")]; - - // Print the operation name that will be generated - let span = PreSampledSpan::new("", SpanKind::Internal, &internal_attrs, &empty_resource); - let internal_op_name = get_otel_operation_name_v2(&span); - assert_eq!( - internal_op_name, "internal", - "Internal operation name should be the span kind" - ); - - let span_kind_internal = SpanKind::Internal; - let data = create_sampling_data( - None, - &trace_id, - "test-span", - span_kind_internal, - &internal_attrs, - resource.as_ref(), + Some("default".to_string()), ); - let result = sampler.sample(&data); - // Should still be sampled (default behavior when no rules match) - assert!(result.get_priority().is_keep()); + let sampler = DatadogSampler::new(vec![http_rule], 100); - // 6. Server with protocol but no HTTP method - let server_protocol_attrs = vec![KeyValue::new( - Key::from_static_str(NETWORK_PROTOCOL_NAME), - Value::String("http".into()), - )]; - - // Print the operation name that will be generated - let span = PreSampledSpan::new( - "", - SpanKind::Server, - &server_protocol_attrs, - &empty_resource, - ); - let server_protocol_op_name = get_otel_operation_name_v2(&span); - assert_eq!( - server_protocol_op_name, "http.server.request", - "Server with protocol operation name should use protocol" - ); + let trace_id = create_trace_id(); - let data = create_sampling_data( - None, - &trace_id, - "test-span", - span_kind_server, - &server_protocol_attrs, - resource.as_ref(), - ); - let result = sampler.sample(&data); + // HTTP client request should match http_rule (operation name: http.client.request) + let http_client_attrs = vec![TestAttribute::new(HTTP_REQUEST_METHOD, "GET")]; + let data = create_sampling_data(None, &trace_id, "test-span", &http_client_attrs); + assert!(sampler.sample(&data).get_priority().is_keep()); - // Should not match our http rule since operation name would be "http.server.request" - // But should still be sampled (default behavior) - assert!(result.get_priority().is_keep()); + // Span that doesn't match the rule should still be sampled (default behavior) + let internal_attrs = vec![TestAttribute::new("custom.tag", "value")]; + let data = create_sampling_data(None, &trace_id, "test-span", &internal_attrs); + assert!(sampler.sample(&data).get_priority().is_keep()); } #[test] @@ -1866,13 +1649,6 @@ mod tests { Some("default".to_string()), ); - // Create a resource with a service name that will match our test rule - let test_resource = Arc::new(RwLock::new( - opentelemetry_sdk::Resource::builder_empty() - .with_attributes(vec![KeyValue::new(SERVICE_NAME, "web-frontend")]) - .build(), - )); - let sampler = DatadogSampler::new(vec![initial_rule], 100); // Verify initial state @@ -1909,17 +1685,13 @@ mod tests { // Test that the new rules work by finding a matching rule // Create attributes that will generate an operation name matching "http.*" + // and service matching "web-*" let attrs = vec![ - KeyValue::new(HTTP_REQUEST_METHOD, "GET"), /* This will make operation name - * "http.client.request" */ + TestAttribute::new(SERVICE_NAME, "web-frontend"), + TestAttribute::new(HTTP_REQUEST_METHOD, "GET"), /* This will make operation name + * "http.client.request" */ ]; - let resource_guard = test_resource.read().unwrap(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - attrs.as_slice(), - &resource_guard, - ); + let span = TestSpan::new("test-span", attrs.as_slice()); let matching_rule = sampler.find_matching_rule(&span); assert!(matching_rule.is_some(), "Expected to find a matching rule for service 'web-frontend' and name 'http.client.request'"); diff --git a/datadog-opentelemetry/src/sampling/otel_mappings.rs b/datadog-opentelemetry/src/sampling/otel_mappings.rs index fd909341..0fc607c6 100644 --- a/datadog-opentelemetry/src/sampling/otel_mappings.rs +++ b/datadog-opentelemetry/src/sampling/otel_mappings.rs @@ -120,6 +120,28 @@ impl SpanProperties for PreSampledSpan<'_> { } } +impl crate::sampling::AttributeLike for opentelemetry::KeyValue { + type Value = opentelemetry::Value; + + fn key(&self) -> &str { + self.key.as_str() + } + + fn value(&self) -> &Self::Value { + &self.value + } +} + +impl crate::sampling::ValueLike for opentelemetry::Value { + fn extract_float(&self) -> Option { + crate::sampling::utils::extract_float_value(self) + } + + fn extract_string(&self) -> Option> { + crate::sampling::utils::extract_string_value(self) + } +} + /// OpenTelemetry Sampling Data implementation. /// /// Provides the necessary data for making sampling decisions on OpenTelemetry spans. @@ -238,3 +260,373 @@ pub(crate) fn priority_to_otel_decision( opentelemetry::trace::SamplingDecision::RecordOnly } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::mappings::get_otel_operation_name_v2; + use opentelemetry::trace::SpanKind; + use opentelemetry::{Key, KeyValue, Value}; + use opentelemetry_semantic_conventions::attribute::{ + DB_SYSTEM_NAME, HTTP_REQUEST_METHOD, MESSAGING_OPERATION_TYPE, MESSAGING_SYSTEM, + }; + use opentelemetry_semantic_conventions::trace::{ + HTTP_RESPONSE_STATUS_CODE, NETWORK_PROTOCOL_NAME, + }; + + fn create_empty_resource() -> opentelemetry_sdk::Resource { + opentelemetry_sdk::Resource::builder_empty().build() + } + + #[test] + fn test_operation_name_http_client() { + let attrs = vec![KeyValue::new( + Key::from_static_str(HTTP_REQUEST_METHOD), + Value::String("GET".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Client, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "http.client.request"); + + // Also verify using get_otel_operation_name_v2 + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "http.client.request"); + } + + #[test] + fn test_operation_name_http_server() { + let attrs = vec![KeyValue::new( + Key::from_static_str(HTTP_REQUEST_METHOD), + Value::String("POST".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Server, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "http.server.request"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "http.server.request"); + } + + #[test] + fn test_operation_name_database() { + let attrs = vec![KeyValue::new( + Key::from_static_str(DB_SYSTEM_NAME), + Value::String("postgresql".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Client, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "postgresql.query"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "postgresql.query"); + } + + #[test] + fn test_operation_name_messaging() { + let attrs = vec![ + KeyValue::new( + Key::from_static_str(MESSAGING_SYSTEM), + Value::String("kafka".into()), + ), + KeyValue::new( + Key::from_static_str(MESSAGING_OPERATION_TYPE), + Value::String("process".into()), + ), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Consumer, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "kafka.process"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "kafka.process"); + } + + #[test] + fn test_operation_name_generic_server_with_protocol() { + let attrs = vec![KeyValue::new( + Key::from_static_str(NETWORK_PROTOCOL_NAME), + Value::String("http".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Server, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "http.server.request"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "http.server.request"); + } + + #[test] + fn test_operation_name_internal_fallback() { + let attrs = vec![KeyValue::new("custom.tag", "value")]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Internal, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "internal"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "internal"); + } + + #[test] + fn test_service_from_resource() { + use opentelemetry_semantic_conventions::resource::SERVICE_NAME; + + let resource = opentelemetry_sdk::Resource::builder_empty() + .with_attributes(vec![KeyValue::new(SERVICE_NAME, "my-service")]) + .build(); + let attrs = vec![]; + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.service(), "my-service"); + } + + #[test] + fn test_env_from_attributes() { + let attrs = vec![KeyValue::new("datadog.env", "production")]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.env(), "production"); + } + + #[test] + fn test_env_empty_when_not_present() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.env(), ""); + } + + #[test] + fn test_status_code_from_attributes() { + let attrs = vec![KeyValue::new( + Key::from_static_str(HTTP_RESPONSE_STATUS_CODE), + Value::I64(404), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.status_code(), Some(404)); + } + + #[test] + fn test_status_code_none_when_not_present() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.status_code(), None); + } + + #[test] + fn test_attributes_iteration() { + let attrs = vec![ + KeyValue::new("key1", "value1"), + KeyValue::new("key2", Value::I64(42)), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + let collected: Vec<_> = span.attributes().collect(); + assert_eq!(collected.len(), 2); + assert_eq!(collected[0].key.as_str(), "key1"); + assert_eq!(collected[1].key.as_str(), "key2"); + } + + #[test] + fn test_get_alternate_key_http_status() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // Test HTTP status code mapping + let alternate = span.get_alternate_key("http.response.status_code"); + assert_eq!(alternate, Some("http.status_code".into())); + } + + #[test] + fn test_get_alternate_key_http_method() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // Test HTTP method mapping + let alternate = span.get_alternate_key("http.request.method"); + assert_eq!(alternate, Some("http.method".into())); + } + + #[test] + fn test_get_alternate_key_no_mapping() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // Test key with no mapping + let alternate = span.get_alternate_key("custom.attribute"); + assert_eq!(alternate, None); + } + + #[test] + fn test_attribute_key_mapping_comprehensive() { + // Test that OTel attribute keys are correctly mapped to Datadog keys + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // HTTP attribute mappings (OTel -> DD) + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + assert_eq!( + span.get_alternate_key("http.request.method"), + Some("http.method".into()) + ); + assert_eq!(span.get_alternate_key("url.full"), Some("http.url".into())); + assert_eq!( + span.get_alternate_key("user_agent.original"), + Some("http.useragent".into()) + ); + assert_eq!( + span.get_alternate_key("server.address"), + Some("http.server_name".into()) + ); + assert_eq!( + span.get_alternate_key("client.address"), + Some("http.client_ip".into()) + ); + + // Keys without mappings (same in both OTel and DD) should return None + assert_eq!(span.get_alternate_key("custom.tag"), None); + assert_eq!(span.get_alternate_key("application.name"), None); + assert_eq!(span.get_alternate_key("http.route"), None); // Maps to itself + + // Datadog convention keys map to empty string (filtered out), which get_alternate_key + // returns as None + assert_eq!(span.get_alternate_key("service.name"), None); + assert_eq!(span.get_alternate_key("operation.name"), None); + assert_eq!(span.get_alternate_key("datadog.custom"), None); + } + + #[test] + fn test_otel_to_datadog_attribute_mapping_in_span() { + // Test that a span with OTel attribute names can be queried using DD keys via + // get_alternate_key + let otel_attrs = vec![ + KeyValue::new("http.response.status_code", Value::I64(500)), + KeyValue::new("http.request.method", "POST"), + KeyValue::new("url.full", "https://example.com/api"), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &otel_attrs, &resource); + + // Verify the OTel attributes are present + let attrs: Vec<_> = span.attributes().collect(); + assert_eq!(attrs.len(), 3); + + // Verify we can find the DD equivalent keys for these OTel attributes + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + assert_eq!( + span.get_alternate_key("http.request.method"), + Some("http.method".into()) + ); + assert_eq!(span.get_alternate_key("url.full"), Some("http.url".into())); + + // Verify the actual attributes can be found by their original keys + let status_code_attr = attrs + .iter() + .find(|a| a.key.as_str() == "http.response.status_code"); + assert!(status_code_attr.is_some()); + + let method_attr = attrs + .iter() + .find(|a| a.key.as_str() == "http.request.method"); + assert!(method_attr.is_some()); + } + + #[test] + fn test_multiple_attribute_mappings() { + // Test that a span with multiple OTel attributes correctly maps them all to DD keys + let mixed_attrs = vec![ + KeyValue::new("http.response.status_code", Value::I64(503)), + KeyValue::new("http.request.method", "POST"), + KeyValue::new("url.full", "https://example.com/api/v1/resource"), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &mixed_attrs, &resource); + + // Verify all three OTel attributes have correct DD mappings + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + assert_eq!( + span.get_alternate_key("http.request.method"), + Some("http.method".into()) + ); + assert_eq!(span.get_alternate_key("url.full"), Some("http.url".into())); + + // Verify all attributes are present + let attrs: Vec<_> = span.attributes().collect(); + assert_eq!(attrs.len(), 3); + + // Verify each attribute can be found by its original OTel key + assert!(attrs + .iter() + .any(|a| a.key.as_str() == "http.response.status_code")); + assert!(attrs + .iter() + .any(|a| a.key.as_str() == "http.request.method")); + assert!(attrs.iter().any(|a| a.key.as_str() == "url.full")); + } + + #[test] + fn test_mixed_direct_and_mapped_attributes() { + // Test that spans with both OTel attributes (that have DD mappings) and + // custom attributes (that don't have mappings) work correctly together + let mixed_attrs = vec![ + // OTel attribute with DD mapping + KeyValue::new("http.response.status_code", Value::I64(503)), + // Custom attribute without mapping + KeyValue::new("custom.tag", "custom_value"), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &mixed_attrs, &resource); + + // OTel attribute should have alternate DD key + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + + // Custom attribute should not have alternate key + assert_eq!(span.get_alternate_key("custom.tag"), None); + + // Both attributes should be present and accessible + let attrs: Vec<_> = span.attributes().collect(); + assert_eq!(attrs.len(), 2); + + assert!(attrs + .iter() + .any(|a| a.key.as_str() == "http.response.status_code")); + assert!(attrs.iter().any(|a| a.key.as_str() == "custom.tag")); + + // Verify the status code is accessible + assert_eq!(span.status_code(), Some(503)); + } +} diff --git a/datadog-opentelemetry/src/sampling/rate_sampler.rs b/datadog-opentelemetry/src/sampling/rate_sampler.rs index 97a2304b..9ca2d794 100644 --- a/datadog-opentelemetry/src/sampling/rate_sampler.rs +++ b/datadog-opentelemetry/src/sampling/rate_sampler.rs @@ -71,7 +71,28 @@ impl RateSampler { #[cfg(test)] mod tests { use super::*; - use opentelemetry::trace::TraceId; + + // Test-only TraceId implementation + #[derive(Clone, Debug, PartialEq, Eq)] + struct TestTraceId { + bytes: [u8; 16], + } + + impl TestTraceId { + fn from_bytes(bytes: [u8; 16]) -> Self { + Self { bytes } + } + + fn to_bytes(&self) -> [u8; 16] { + self.bytes + } + } + + impl TraceIdLike for TestTraceId { + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.bytes) + } + } #[test] fn check_debug_impl() { @@ -120,7 +141,7 @@ mod tests { let sampler_zero = RateSampler::new(0.0); let mut bytes_zero = [0u8; 16]; bytes_zero[15] = 1; // Example ID - let trace_id_zero = TraceId::from_bytes(bytes_zero); + let trace_id_zero = TestTraceId::from_bytes(bytes_zero); assert!( !sampler_zero.sample(&trace_id_zero), "sampler_zero should return false" @@ -130,7 +151,7 @@ mod tests { let sampler_one = RateSampler::new(1.0); let mut bytes_one = [0u8; 16]; bytes_one[15] = 2; // Example ID - let trace_id_one = TraceId::from_bytes(bytes_one); + let trace_id_one = TestTraceId::from_bytes(bytes_one); assert!( sampler_one.sample(&trace_id_one), "sampler_one should return true" @@ -142,7 +163,7 @@ mod tests { // Trace ID that should be sampled (hashed value <= threshold) let bytes_sample = [0u8; 16]; // Hashes to 0 - let trace_id_sample = TraceId::from_bytes(bytes_sample); + let trace_id_sample = TestTraceId::from_bytes(bytes_sample); let sample_u64 = u128::from_be_bytes(trace_id_sample.to_bytes()) as u64; let sample_hash = sample_u64.wrapping_mul(KNUTH_FACTOR); assert!(sample_hash <= threshold); @@ -154,7 +175,7 @@ mod tests { // Trace ID that should be dropped (hashed value > threshold) let mut bytes_drop = [0u8; 16]; bytes_drop[8..16].copy_from_slice(&u64::MAX.to_be_bytes()); // High lower 64 bits - let trace_id_drop = TraceId::from_bytes(bytes_drop); + let trace_id_drop = TestTraceId::from_bytes(bytes_drop); let drop_u64 = u128::from_be_bytes(trace_id_drop.to_bytes()) as u64; let drop_hash = drop_u64.wrapping_mul(KNUTH_FACTOR); // For rate 0.5, threshold is MAX/2. Hashing MAX should result in something > MAX/2 @@ -173,7 +194,7 @@ mod tests { let sampler_half = RateSampler::new(0.5); // Trace ID with all zeros hashes to 0, which is always <= threshold for rate > 0 let bytes_to_sample = [0u8; 16]; - let trace_id_to_sample = TraceId::from_bytes(bytes_to_sample); + let trace_id_to_sample = TestTraceId::from_bytes(bytes_to_sample); assert!( sampler_half.sample(&trace_id_to_sample), "Sampler with 0.5 rate should sample trace ID 0" diff --git a/datadog-opentelemetry/src/sampling/types.rs b/datadog-opentelemetry/src/sampling/types.rs index 6202cde3..173589e9 100644 --- a/datadog-opentelemetry/src/sampling/types.rs +++ b/datadog-opentelemetry/src/sampling/types.rs @@ -12,16 +12,15 @@ use std::borrow::Cow; /// /// # Examples /// -/// ```ignore -/// use opentelemetry::trace::TraceId; +/// ``` /// use datadog_opentelemetry::sampling::TraceIdLike; /// /// #[derive(Clone, PartialEq, Eq)] -/// struct MyTraceId(TraceId); +/// struct MyTraceId(u128); /// /// impl TraceIdLike for MyTraceId { /// fn to_u128(&self) -> u128 { -/// u128::from_be_bytes(self.0.to_bytes()) +/// self.0 /// } /// } /// ``` @@ -63,28 +62,6 @@ pub trait ValueLike { fn extract_string(&self) -> Option>; } -impl AttributeLike for opentelemetry::KeyValue { - type Value = opentelemetry::Value; - - fn key(&self) -> &str { - self.key.as_str() - } - - fn value(&self) -> &Self::Value { - &self.value - } -} - -impl ValueLike for opentelemetry::Value { - fn extract_float(&self) -> Option { - crate::sampling::utils::extract_float_value(self) - } - - fn extract_string(&self) -> Option> { - crate::sampling::utils::extract_string_value(self) - } -} - /// A trait for creating sampling attributes. /// /// This trait abstracts the creation of attributes for sampling tags, From 8338fa2ee6e6c489dcaa55f4d8bb4a87ee3e061a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Fri, 13 Feb 2026 14:50:47 +0100 Subject: [PATCH 6/8] refactor(sampling): move sampling rules and configuration to their own files --- .../src/core/configuration/configuration.rs | 90 +----- .../src/core/configuration/mod.rs | 6 +- .../configuration/sampling_rule_config.rs | 88 ++++++ .../src/sampling/datadog_sampler.rs | 262 +----------------- datadog-opentelemetry/src/sampling/mod.rs | 4 +- .../src/sampling/rules_sampler.rs | 2 +- .../src/sampling/sampling_rule.rs | 256 +++++++++++++++++ 7 files changed, 365 insertions(+), 343 deletions(-) create mode 100644 datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs create mode 100644 datadog-opentelemetry/src/sampling/sampling_rule.rs diff --git a/datadog-opentelemetry/src/core/configuration/configuration.rs b/datadog-opentelemetry/src/core/configuration/configuration.rs index 22010824..79b81c0b 100644 --- a/datadog-opentelemetry/src/core/configuration/configuration.rs +++ b/datadog-opentelemetry/src/core/configuration/configuration.rs @@ -2,15 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 use libdd_telemetry::data::Configuration; -use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashSet, VecDeque}; +use std::fmt::Display; use std::ops::Deref; +use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::time::Duration; -use std::{borrow::Cow, fmt::Display, str::FromStr, sync::OnceLock}; +use std::{borrow::Cow, sync::OnceLock}; use rustc_version_runtime::version; +use crate::core::configuration::sampling_rule_config::{ParsedSamplingRules, SamplingRuleConfig}; use crate::core::configuration::sources::{ CompositeConfigSourceResult, CompositeSource, ConfigKey, ConfigSourceOrigin, }; @@ -83,92 +85,11 @@ impl Default for RemoteConfigCallbacks { Self::new() } } - -/// Configuration for a single sampling rule -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] -pub struct SamplingRuleConfig { - /// The sample rate to apply (0.0-1.0) - pub sample_rate: f64, - - /// Optional service name pattern to match - #[serde(default)] - pub service: Option, - - /// Optional span name pattern to match - #[serde(default)] - pub name: Option, - - /// Optional resource name pattern to match - #[serde(default)] - pub resource: Option, - - /// Tags that must match (key-value pairs) - #[serde(default)] - pub tags: HashMap, - - /// Where this rule comes from (customer, dynamic, default) - // TODO(paullgdc): this value should not be definable by customers - #[serde(default = "default_provenance")] - pub provenance: String, -} - -impl Display for SamplingRuleConfig { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", serde_json::json!(self)) - } -} - -fn default_provenance() -> String { - "default".to_string() -} - pub const TRACER_VERSION: &str = env!("CARGO_PKG_VERSION"); const DATADOG_TAGS_MAX_LENGTH: usize = 512; const RC_DEFAULT_POLL_INTERVAL: f64 = 5.0; // 5 seconds is the highest interval allowed by the spec -#[derive(Debug, Default, Clone, PartialEq)] -struct ParsedSamplingRules { - rules: Vec, -} - -impl Deref for ParsedSamplingRules { - type Target = [SamplingRuleConfig]; - - fn deref(&self) -> &Self::Target { - &self.rules - } -} - -impl From for Vec { - fn from(parsed: ParsedSamplingRules) -> Self { - parsed.rules - } -} - -impl FromStr for ParsedSamplingRules { - type Err = serde_json::Error; - - fn from_str(s: &str) -> Result { - if s.trim().is_empty() { - return Ok(ParsedSamplingRules::default()); - } - // DD_TRACE_SAMPLING_RULES is expected to be a JSON array of SamplingRuleConfig objects. - let rules_vec: Vec = serde_json::from_str(s)?; - Ok(ParsedSamplingRules { rules: rules_vec }) - } -} - -impl Display for ParsedSamplingRules { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - serde_json::to_string(&self.rules).unwrap_or_default() - ) - } -} - /// OTLP protocol types for OTLP export. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] @@ -2458,6 +2379,7 @@ impl ConfigBuilder { #[cfg(test)] mod tests { use libdd_telemetry::data::ConfigurationOrigin; + use std::collections::HashMap; use super::Config; use super::*; diff --git a/datadog-opentelemetry/src/core/configuration/mod.rs b/datadog-opentelemetry/src/core/configuration/mod.rs index 94cf7238..4732f820 100644 --- a/datadog-opentelemetry/src/core/configuration/mod.rs +++ b/datadog-opentelemetry/src/core/configuration/mod.rs @@ -22,10 +22,10 @@ #[allow(clippy::module_inception)] mod configuration; pub(crate) mod remote_config; +mod sampling_rule_config; mod sources; mod supported_configurations; -pub use configuration::{ - Config, ConfigBuilder, OtlpProtocol, SamplingRuleConfig, TracePropagationStyle, -}; +pub use configuration::{Config, ConfigBuilder, OtlpProtocol, TracePropagationStyle}; pub(crate) use configuration::{ConfigurationProvider, RemoteConfigUpdate}; +pub use sampling_rule_config::SamplingRuleConfig; diff --git a/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs b/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs new file mode 100644 index 00000000..7e3f83ff --- /dev/null +++ b/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs @@ -0,0 +1,88 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fmt::Display; +use std::ops::Deref; +use std::str::FromStr; + +/// Configuration for a single sampling rule +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] +pub struct SamplingRuleConfig { + /// The sample rate to apply (0.0-1.0) + pub sample_rate: f64, + + /// Optional service name pattern to match + #[serde(default)] + pub service: Option, + + /// Optional span name pattern to match + #[serde(default)] + pub name: Option, + + /// Optional resource name pattern to match + #[serde(default)] + pub resource: Option, + + /// Tags that must match (key-value pairs) + #[serde(default)] + pub tags: HashMap, + + /// Where this rule comes from (customer, dynamic, default) + // TODO(paullgdc): this value should not be definable by customers + #[serde(default = "default_provenance")] + pub provenance: String, +} + +impl Display for SamplingRuleConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", serde_json::json!(self)) + } +} + +fn default_provenance() -> String { + "default".to_string() +} + +#[derive(Debug, Default, Clone, PartialEq)] +pub(crate) struct ParsedSamplingRules { + pub(crate) rules: Vec, +} + +impl Deref for ParsedSamplingRules { + type Target = [SamplingRuleConfig]; + + fn deref(&self) -> &Self::Target { + &self.rules + } +} + +impl From for Vec { + fn from(parsed: ParsedSamplingRules) -> Self { + parsed.rules + } +} + +impl FromStr for ParsedSamplingRules { + type Err = serde_json::Error; + + fn from_str(s: &str) -> Result { + if s.trim().is_empty() { + return Ok(ParsedSamplingRules::default()); + } + // DD_TRACE_SAMPLING_RULES is expected to be a JSON array of SamplingRuleConfig objects. + let rules_vec: Vec = serde_json::from_str(s)?; + Ok(ParsedSamplingRules { rules: rules_vec }) + } +} + +impl Display for ParsedSamplingRules { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + serde_json::to_string(&self.rules).unwrap_or_default() + ) + } +} diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/datadog-opentelemetry/src/sampling/datadog_sampler.rs index bb8c5c71..da0cbccc 100644 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ b/datadog-opentelemetry/src/sampling/datadog_sampler.rs @@ -12,249 +12,12 @@ use crate::core::sampling::{mechanism, priority, SamplingMechanism, SamplingPrio /// Consolidated callback type used across crates for remote config sampling updates pub type SamplingRulesCallback = Box Fn(&'a [SamplingRuleConfig]) + Send + Sync>; -use crate::sampling::{AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike}; -use std::collections::HashMap; +use crate::sampling::{SamplingData, SpanProperties}; use super::agent_service_sampler::{AgentRates, ServicesSampler}; -// Import the attr constants -use super::constants::pattern::NO_RULE; -use super::glob_matcher::GlobMatcher; use super::rate_limiter::RateLimiter; -use super::rate_sampler::RateSampler; use super::rules_sampler::RulesSampler; - -// HTTP status code attribute constants -const HTTP_RESPONSE_STATUS_CODE: &str = "http.response.status_code"; -const HTTP_STATUS_CODE: &str = "http.status_code"; - -fn matcher_from_rule(rule: &str) -> Option { - (rule != NO_RULE).then(|| GlobMatcher::new(rule)) -} - -/// Represents a sampling rule with criteria for matching spans -#[derive(Clone, Debug)] -pub struct SamplingRule { - /// The sample rate to apply when this rule matches (0.0-1.0) - sample_rate: f64, - - /// Where this rule comes from (customer, dynamic, default) - provenance: String, - - /// Internal rate sampler used when this rule matches - rate_sampler: RateSampler, - - /// Glob matchers for pattern matching - name_matcher: Option, - service_matcher: Option, - resource_matcher: Option, - tag_matchers: HashMap, -} - -impl SamplingRule { - /// Converts a vector of SamplingRuleConfig into SamplingRule objects - /// Centralizes the conversion logic - pub fn from_configs(configs: Vec) -> Vec { - configs - .into_iter() - .map(|config| { - Self::new( - config.sample_rate, - config.service, - config.name, - config.resource, - Some(config.tags), - Some(config.provenance), - ) - }) - .collect() - } - - /// Creates a new sampling rule - pub fn new( - sample_rate: f64, - service: Option, - name: Option, - resource: Option, - tags: Option>, - provenance: Option, - ) -> Self { - // Create glob matchers for the patterns - let name_matcher = name.as_deref().and_then(matcher_from_rule); - let service_matcher = service.as_deref().and_then(matcher_from_rule); - let resource_matcher = resource.as_deref().and_then(matcher_from_rule); - - // Create matchers for tag values - let tag_map = tags.clone().unwrap_or_default(); - let mut tag_matchers = HashMap::with_capacity(tag_map.len()); - for (key, value) in &tag_map { - if let Some(matcher) = matcher_from_rule(value) { - tag_matchers.insert(key.clone(), matcher); - } - } - - SamplingRule { - sample_rate, - provenance: provenance.unwrap_or_else(|| "default".to_string()), - rate_sampler: RateSampler::new(sample_rate), - name_matcher, - service_matcher, - resource_matcher, - tag_matchers, - } - } - - /// Checks if this rule matches the given span's attributes and name - /// The name is derived from the attributes and span kind - fn matches(&self, span: &impl SpanProperties) -> bool { - // Get the operation name from the span - let name = span.operation_name(); - - // Check name using glob matcher if specified - if let Some(ref matcher) = self.name_matcher { - if !matcher.matches(name.as_ref()) { - return false; - } - } - - // Check service if specified using glob matcher - if let Some(ref matcher) = self.service_matcher { - // Get service from the span - let service = span.service(); - - // Match against the service - if !matcher.matches(&service) { - return false; - } - } - - // Get the resource string for matching - let resource_str = span.resource(); - - // Check resource if specified using glob matcher - if let Some(ref matcher) = self.resource_matcher { - // Use the resource from the span - if !matcher.matches(resource_str.as_ref()) { - return false; - } - } - - // Check all tags using glob matchers - for (key, matcher) in &self.tag_matchers { - let rule_tag_key_str = key.as_str(); - - // Special handling for rules defined with "http.status_code" or - // "http.response.status_code" - if rule_tag_key_str == HTTP_STATUS_CODE || rule_tag_key_str == HTTP_RESPONSE_STATUS_CODE - { - match self.match_http_status_code_rule(matcher, span) { - Some(true) => continue, // Status code matched - Some(false) | None => return false, // Status code didn't match or wasn't found - } - } else { - // Logic for other tags: - // First, try to match directly with the provided tag key - let direct_match = span - .attributes() - .find(|attr| attr.key() == rule_tag_key_str) - .and_then(|attr| self.match_attribute_value(attr.value(), matcher)); - - if direct_match.unwrap_or(false) { - continue; - } - - // If no direct match, try to find the corresponding OpenTelemetry attribute that - // maps to the Datadog tag key This handles cases where the rule key - // is a Datadog key (e.g., "http.method") and the attribute is an - // OTel key (e.g., "http.request.method") - if rule_tag_key_str.starts_with("http.") { - let tag_match = span.attributes().any(|attr| { - if let Some(alternate_key) = span.get_alternate_key(attr.key()) { - if alternate_key == rule_tag_key_str { - return self - .match_attribute_value(attr.value(), matcher) - .unwrap_or(false); - } - } - false - }); - - if !tag_match { - return false; // Mapped attribute not found or did not match - } - // If tag_match is true, loop continues to next rule_tag_key. - } else { - // For non-HTTP attributes, if we don't have a direct match, the rule doesn't - // match - return false; - } - } - } - - true - } - - /// Helper method to specifically match a rule against an HTTP status code extracted from - /// attributes. Returns Some(true) if status code found and matches, Some(false) if found - /// but not matched, None if not found. - fn match_http_status_code_rule( - &self, - matcher: &GlobMatcher, - span: &impl SpanProperties, - ) -> Option { - span.status_code().and_then(|status_code| { - let status_value = ValueI64(i64::from(status_code)); - self.match_attribute_value(&status_value, matcher) - }) - } - - // Helper method to match attribute values considering different value types - fn match_attribute_value(&self, value: &impl ValueLike, matcher: &GlobMatcher) -> Option { - // Floating point values are handled with special rules - if let Some(float_val) = value.extract_float() { - // Check if the float has a non-zero decimal part - let has_decimal = float_val != (float_val as i64) as f64; - - // For non-integer floats, only match if it's a wildcard pattern - if has_decimal { - // All '*' pattern returns true, any other pattern returns false - return Some(matcher.pattern().chars().all(|c| c == '*')); - } - - // For integer floats, convert to string for matching - return Some(matcher.matches(&float_val.to_string())); - } - - // For non-float values, use normal matching - value - .extract_string() - .map(|string_value| matcher.matches(&string_value)) - } - - /// Samples a trace ID using this rule's sample rate - pub fn sample(&self, trace_id: &impl TraceIdLike) -> bool { - // Delegate to the internal rate sampler's new sample method - self.rate_sampler.sample(trace_id) - } -} - -/// Represents a priority for sampling rules -#[allow(dead_code)] -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum RuleProvenance { - Customer = 0, - Dynamic = 1, - Default = 2, -} - -impl From<&str> for RuleProvenance { - fn from(s: &str) -> Self { - match s { - "customer" => RuleProvenance::Customer, - "dynamic" => RuleProvenance::Dynamic, - _ => RuleProvenance::Default, - } - } -} +use super::sampling_rule::SamplingRule; /// A composite sampler that applies rules in order of precedence #[derive(Clone, Debug)] @@ -472,18 +235,6 @@ fn format_sampling_rate(rate: f64) -> Option { }) } -struct ValueI64(i64); - -impl ValueLike for ValueI64 { - fn extract_float(&self) -> Option { - Some(self.0 as f64) - } - - fn extract_string(&self) -> Option> { - Some(std::borrow::Cow::Owned(self.0.to_string())) - } -} - pub struct TraceRootSamplingInfo { mechanism: SamplingMechanism, rate: f64, @@ -589,6 +340,7 @@ mod tests { attr::{ENV_TAG, RESOURCE_TAG}, pattern, }; + use crate::sampling::{AttributeLike, TraceIdLike, ValueLike}; use std::borrow::Cow; use std::collections::HashMap; @@ -596,7 +348,9 @@ mod tests { const HTTP_REQUEST_METHOD: &str = "http.request.method"; const SERVICE_NAME: &str = "service.name"; - use super::{HTTP_RESPONSE_STATUS_CODE, HTTP_STATUS_CODE}; + // HTTP status code attribute constants (for tests) + const HTTP_RESPONSE_STATUS_CODE: &str = "http.response.status_code"; + const HTTP_STATUS_CODE: &str = "http.status_code"; // ============================================================================ // Test-only data structures @@ -1253,8 +1007,8 @@ mod tests { found_rule_rate = true; } SAMPLING_KNUTH_RATE_TAG_KEY => { - let value_str = match &attr.value { - opentelemetry::Value::String(s) => s.to_string(), + let value_str = match attr.value() { + TestValue::String(s) => s.to_string(), _ => panic!("Expected string value for ksr tag"), }; assert_eq!(value_str, "0.5"); diff --git a/datadog-opentelemetry/src/sampling/mod.rs b/datadog-opentelemetry/src/sampling/mod.rs index 1dd05d7d..f9174c75 100644 --- a/datadog-opentelemetry/src/sampling/mod.rs +++ b/datadog-opentelemetry/src/sampling/mod.rs @@ -11,12 +11,14 @@ pub(crate) mod otel_mappings; pub(crate) mod rate_limiter; pub(crate) mod rate_sampler; pub(crate) mod rules_sampler; +pub(crate) mod sampling_rule; mod types; pub(crate) mod utils; // Re-export key public types -pub use datadog_sampler::{DatadogSampler, SamplingRule, SamplingRulesCallback}; +pub use datadog_sampler::{DatadogSampler, SamplingRulesCallback}; pub use otel_mappings::{OtelAttributeFactory, OtelSamplingData}; +pub use sampling_rule::SamplingRule; pub use types::{ AttributeFactory, AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike, }; diff --git a/datadog-opentelemetry/src/sampling/rules_sampler.rs b/datadog-opentelemetry/src/sampling/rules_sampler.rs index e679a09e..3049ff6e 100644 --- a/datadog-opentelemetry/src/sampling/rules_sampler.rs +++ b/datadog-opentelemetry/src/sampling/rules_sampler.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, RwLock}; -use super::datadog_sampler::SamplingRule; +use super::sampling_rule::SamplingRule; /// Thread-safe container for sampling rules #[derive(Debug, Default, Clone)] diff --git a/datadog-opentelemetry/src/sampling/sampling_rule.rs b/datadog-opentelemetry/src/sampling/sampling_rule.rs new file mode 100644 index 00000000..38332f52 --- /dev/null +++ b/datadog-opentelemetry/src/sampling/sampling_rule.rs @@ -0,0 +1,256 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::core::configuration::SamplingRuleConfig; +use crate::sampling::{AttributeLike, SpanProperties, TraceIdLike, ValueLike}; +use std::collections::HashMap; + +use super::constants::pattern::NO_RULE; +use super::glob_matcher::GlobMatcher; +use super::rate_sampler::RateSampler; + +// HTTP status code attribute constants +const HTTP_RESPONSE_STATUS_CODE: &str = "http.response.status_code"; +const HTTP_STATUS_CODE: &str = "http.status_code"; + +fn matcher_from_rule(rule: &str) -> Option { + (rule != NO_RULE).then(|| GlobMatcher::new(rule)) +} + +/// Represents a sampling rule with criteria for matching spans +#[derive(Clone, Debug)] +pub struct SamplingRule { + /// The sample rate to apply when this rule matches (0.0-1.0) + pub(crate) sample_rate: f64, + + /// Where this rule comes from (customer, dynamic, default) + pub(crate) provenance: String, + + /// Internal rate sampler used when this rule matches + rate_sampler: RateSampler, + + /// Glob matchers for pattern matching + pub(crate) name_matcher: Option, + pub(crate) service_matcher: Option, + pub(crate) resource_matcher: Option, + pub(crate) tag_matchers: HashMap, +} + +impl SamplingRule { + /// Converts a vector of SamplingRuleConfig into SamplingRule objects + /// Centralizes the conversion logic + pub fn from_configs(configs: Vec) -> Vec { + configs + .into_iter() + .map(|config| { + Self::new( + config.sample_rate, + config.service, + config.name, + config.resource, + Some(config.tags), + Some(config.provenance), + ) + }) + .collect() + } + + /// Creates a new sampling rule + pub fn new( + sample_rate: f64, + service: Option, + name: Option, + resource: Option, + tags: Option>, + provenance: Option, + ) -> Self { + // Create glob matchers for the patterns + let name_matcher = name.as_deref().and_then(matcher_from_rule); + let service_matcher = service.as_deref().and_then(matcher_from_rule); + let resource_matcher = resource.as_deref().and_then(matcher_from_rule); + + // Create matchers for tag values + let tag_map = tags.clone().unwrap_or_default(); + let mut tag_matchers = HashMap::with_capacity(tag_map.len()); + for (key, value) in &tag_map { + if let Some(matcher) = matcher_from_rule(value) { + tag_matchers.insert(key.clone(), matcher); + } + } + + SamplingRule { + sample_rate, + provenance: provenance.unwrap_or_else(|| "default".to_string()), + rate_sampler: RateSampler::new(sample_rate), + name_matcher, + service_matcher, + resource_matcher, + tag_matchers, + } + } + + /// Checks if this rule matches the given span's attributes and name + /// The name is derived from the attributes and span kind + pub(crate) fn matches(&self, span: &impl SpanProperties) -> bool { + // Get the operation name from the span + let name = span.operation_name(); + + // Check name using glob matcher if specified + if let Some(ref matcher) = self.name_matcher { + if !matcher.matches(name.as_ref()) { + return false; + } + } + + // Check service if specified using glob matcher + if let Some(ref matcher) = self.service_matcher { + // Get service from the span + let service = span.service(); + + // Match against the service + if !matcher.matches(&service) { + return false; + } + } + + // Get the resource string for matching + let resource_str = span.resource(); + + // Check resource if specified using glob matcher + if let Some(ref matcher) = self.resource_matcher { + // Use the resource from the span + if !matcher.matches(resource_str.as_ref()) { + return false; + } + } + + // Check all tags using glob matchers + for (key, matcher) in &self.tag_matchers { + let rule_tag_key_str = key.as_str(); + + // Special handling for rules defined with "http.status_code" or + // "http.response.status_code" + if rule_tag_key_str == HTTP_STATUS_CODE || rule_tag_key_str == HTTP_RESPONSE_STATUS_CODE + { + match self.match_http_status_code_rule(matcher, span) { + Some(true) => continue, // Status code matched + Some(false) | None => return false, // Status code didn't match or wasn't found + } + } else { + // Logic for other tags: + // First, try to match directly with the provided tag key + let direct_match = span + .attributes() + .find(|attr| attr.key() == rule_tag_key_str) + .and_then(|attr| self.match_attribute_value(attr.value(), matcher)); + + if direct_match.unwrap_or(false) { + continue; + } + + // If no direct match, try to find the corresponding OpenTelemetry attribute that + // maps to the Datadog tag key This handles cases where the rule key + // is a Datadog key (e.g., "http.method") and the attribute is an + // OTel key (e.g., "http.request.method") + if rule_tag_key_str.starts_with("http.") { + let tag_match = span.attributes().any(|attr| { + if let Some(alternate_key) = span.get_alternate_key(attr.key()) { + if alternate_key == rule_tag_key_str { + return self + .match_attribute_value(attr.value(), matcher) + .unwrap_or(false); + } + } + false + }); + + if !tag_match { + return false; // Mapped attribute not found or did not match + } + // If tag_match is true, loop continues to next rule_tag_key. + } else { + // For non-HTTP attributes, if we don't have a direct match, the rule doesn't + // match + return false; + } + } + } + + true + } + + /// Helper method to specifically match a rule against an HTTP status code extracted from + /// attributes. Returns Some(true) if status code found and matches, Some(false) if found + /// but not matched, None if not found. + fn match_http_status_code_rule( + &self, + matcher: &GlobMatcher, + span: &impl SpanProperties, + ) -> Option { + span.status_code().and_then(|status_code| { + let status_value = ValueI64(i64::from(status_code)); + self.match_attribute_value(&status_value, matcher) + }) + } + + // Helper method to match attribute values considering different value types + fn match_attribute_value(&self, value: &impl ValueLike, matcher: &GlobMatcher) -> Option { + // Floating point values are handled with special rules + if let Some(float_val) = value.extract_float() { + // Check if the float has a non-zero decimal part + let has_decimal = float_val != (float_val as i64) as f64; + + // For non-integer floats, only match if it's a wildcard pattern + if has_decimal { + // All '*' pattern returns true, any other pattern returns false + return Some(matcher.pattern().chars().all(|c| c == '*')); + } + + // For integer floats, convert to string for matching + return Some(matcher.matches(&float_val.to_string())); + } + + // For non-float values, use normal matching + value + .extract_string() + .map(|string_value| matcher.matches(&string_value)) + } + + /// Samples a trace ID using this rule's sample rate + pub fn sample(&self, trace_id: &impl TraceIdLike) -> bool { + // Delegate to the internal rate sampler's new sample method + self.rate_sampler.sample(trace_id) + } +} + +/// Represents a priority for sampling rules +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum RuleProvenance { + Customer = 0, + Dynamic = 1, + Default = 2, +} + +impl From<&str> for RuleProvenance { + fn from(s: &str) -> Self { + match s { + "customer" => RuleProvenance::Customer, + "dynamic" => RuleProvenance::Dynamic, + _ => RuleProvenance::Default, + } + } +} + +/// Helper struct for representing i64 values as ValueLike +struct ValueI64(i64); + +impl ValueLike for ValueI64 { + fn extract_float(&self) -> Option { + Some(self.0 as f64) + } + + fn extract_string(&self) -> Option> { + Some(std::borrow::Cow::Owned(self.0.to_string())) + } +} From 2b8ee4d231f16b70c136167c23b5d2a113cdbd18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Thu, 19 Feb 2026 17:14:33 +0100 Subject: [PATCH 7/8] refactor(sampling): move sampling to its own crate --- Cargo.lock | 13 ++ Cargo.toml | 1 + datadog-opentelemetry/Cargo.toml | 2 + .../src/core/configuration/configuration.rs | 2 +- .../src/core/configuration/mod.rs | 5 +- datadog-opentelemetry/src/core/mod.rs | 6 +- datadog-opentelemetry/src/sampling/mod.rs | 20 +-- .../src/sampling/otel_mappings.rs | 128 +++++++++++------- libdd-sampling/Cargo.toml | 23 ++++ .../src}/agent_service_sampler.rs | 6 +- .../src}/constants.rs | 0 .../src}/datadog_sampler.rs | 28 ++-- .../src/dd_constants.rs | 6 - .../src/dd_sampling.rs | 12 +- .../src}/glob_matcher.rs | 0 libdd-sampling/src/lib.rs | 37 +++++ .../src}/rate_limiter.rs | 2 +- .../src}/rate_sampler.rs | 4 +- .../src}/rules_sampler.rs | 0 .../src}/sampling_rule.rs | 11 +- .../src}/sampling_rule_config.rs | 4 +- .../sampling => libdd-sampling/src}/types.rs | 2 +- 22 files changed, 198 insertions(+), 114 deletions(-) create mode 100644 libdd-sampling/Cargo.toml rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/agent_service_sampler.rs (91%) rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/constants.rs (100%) rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/datadog_sampler.rs (98%) rename datadog-opentelemetry/src/core/constants.rs => libdd-sampling/src/dd_constants.rs (76%) rename datadog-opentelemetry/src/core/sampling.rs => libdd-sampling/src/dd_sampling.rs (96%) rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/glob_matcher.rs (100%) create mode 100644 libdd-sampling/src/lib.rs rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/rate_limiter.rs (99%) rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/rate_sampler.rs (99%) rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/rules_sampler.rs (100%) rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/sampling_rule.rs (97%) rename {datadog-opentelemetry/src/core/configuration => libdd-sampling/src}/sampling_rule_config.rs (96%) rename {datadog-opentelemetry/src/sampling => libdd-sampling/src}/types.rs (99%) diff --git a/Cargo.lock b/Cargo.lock index 54a3a437..dc5f12f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,6 +477,7 @@ dependencies = [ "hyper-util", "libdd-common", "libdd-data-pipeline", + "libdd-sampling", "libdd-telemetry", "libdd-tinybytes", "libdd-trace-utils", @@ -1232,6 +1233,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "libdd-sampling" +version = "0.3.1" +dependencies = [ + "criterion", + "foldhash 0.1.5", + "hashbrown 0.15.5", + "lru", + "serde", + "serde_json", +] + [[package]] name = "libdd-telemetry" version = "3.0.0" diff --git a/Cargo.toml b/Cargo.toml index 2f025c1e..e7f43413 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ [workspace] members = [ + "libdd-sampling", "datadog-opentelemetry", "datadog-opentelemetry/examples/propagator", "datadog-opentelemetry/examples/simple_tracing", diff --git a/datadog-opentelemetry/Cargo.toml b/datadog-opentelemetry/Cargo.toml index b18c14b3..44f1ac47 100644 --- a/datadog-opentelemetry/Cargo.toml +++ b/datadog-opentelemetry/Cargo.toml @@ -15,6 +15,8 @@ rustdoc-args = ["--cfg", "docsrs"] [dependencies] +# Internal dependencies +libdd-sampling = { path = "../libdd-sampling" } # External dependencies foldhash = { workspace = true } diff --git a/datadog-opentelemetry/src/core/configuration/configuration.rs b/datadog-opentelemetry/src/core/configuration/configuration.rs index 79b81c0b..3e29460e 100644 --- a/datadog-opentelemetry/src/core/configuration/configuration.rs +++ b/datadog-opentelemetry/src/core/configuration/configuration.rs @@ -12,7 +12,6 @@ use std::{borrow::Cow, sync::OnceLock}; use rustc_version_runtime::version; -use crate::core::configuration::sampling_rule_config::{ParsedSamplingRules, SamplingRuleConfig}; use crate::core::configuration::sources::{ CompositeConfigSourceResult, CompositeSource, ConfigKey, ConfigSourceOrigin, }; @@ -20,6 +19,7 @@ use crate::core::configuration::supported_configurations::SupportedConfiguration use crate::core::log::LevelFilter; use crate::core::telemetry; use crate::{dd_error, dd_warn}; +use libdd_sampling::{ParsedSamplingRules, SamplingRuleConfig}; /// Different types of remote configuration updates that can trigger callbacks #[derive(Debug, Clone)] diff --git a/datadog-opentelemetry/src/core/configuration/mod.rs b/datadog-opentelemetry/src/core/configuration/mod.rs index 4732f820..fc73d142 100644 --- a/datadog-opentelemetry/src/core/configuration/mod.rs +++ b/datadog-opentelemetry/src/core/configuration/mod.rs @@ -22,10 +22,11 @@ #[allow(clippy::module_inception)] mod configuration; pub(crate) mod remote_config; -mod sampling_rule_config; mod sources; mod supported_configurations; pub use configuration::{Config, ConfigBuilder, OtlpProtocol, TracePropagationStyle}; pub(crate) use configuration::{ConfigurationProvider, RemoteConfigUpdate}; -pub use sampling_rule_config::SamplingRuleConfig; + +// Re-export from libdd-sampling +pub use libdd_sampling::SamplingRuleConfig; diff --git a/datadog-opentelemetry/src/core/mod.rs b/datadog-opentelemetry/src/core/mod.rs index b38404f0..0f4f11cb 100644 --- a/datadog-opentelemetry/src/core/mod.rs +++ b/datadog-opentelemetry/src/core/mod.rs @@ -4,9 +4,9 @@ //! Core components of the SDK pub mod configuration; -/// Trace propagation and sampling constant keys. -pub mod constants; -pub mod sampling; + +// Re-export from libdd-sampling +pub use libdd_sampling::{dd_constants as constants, dd_sampling as sampling}; mod error; diff --git a/datadog-opentelemetry/src/sampling/mod.rs b/datadog-opentelemetry/src/sampling/mod.rs index f9174c75..6ffa55b2 100644 --- a/datadog-opentelemetry/src/sampling/mod.rs +++ b/datadog-opentelemetry/src/sampling/mod.rs @@ -3,22 +3,14 @@ //! Datadog sampling logic -pub(crate) mod agent_service_sampler; -pub(crate) mod constants; -pub(crate) mod datadog_sampler; -pub(crate) mod glob_matcher; pub(crate) mod otel_mappings; -pub(crate) mod rate_limiter; -pub(crate) mod rate_sampler; -pub(crate) mod rules_sampler; -pub(crate) mod sampling_rule; -mod types; pub(crate) mod utils; +// Re-export from libdd-sampling +pub use libdd_sampling::{ + AttributeFactory, AttributeLike, DatadogSampler, SamplingData, SamplingRule, + SamplingRulesCallback, SpanProperties, TraceIdLike, ValueLike, +}; + // Re-export key public types -pub use datadog_sampler::{DatadogSampler, SamplingRulesCallback}; pub use otel_mappings::{OtelAttributeFactory, OtelSamplingData}; -pub use sampling_rule::SamplingRule; -pub use types::{ - AttributeFactory, AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike, -}; diff --git a/datadog-opentelemetry/src/sampling/otel_mappings.rs b/datadog-opentelemetry/src/sampling/otel_mappings.rs index 0fc607c6..a58d6e3e 100644 --- a/datadog-opentelemetry/src/sampling/otel_mappings.rs +++ b/datadog-opentelemetry/src/sampling/otel_mappings.rs @@ -8,9 +8,69 @@ use crate::mappings::{ get_dd_key_for_otlp_attribute, get_otel_env, get_otel_operation_name_v2, get_otel_resource_v2, get_otel_service, get_otel_status_code, AttributeIndices, AttributeKey, OtelSpan, }; -use crate::sampling::{SamplingData, SpanProperties}; +use crate::sampling::{AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike}; use opentelemetry::{Key, KeyValue}; +/// Wrapper around OpenTelemetry TraceId for trait implementations +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct OtelTraceId(opentelemetry::trace::TraceId); + +impl TraceIdLike for OtelTraceId { + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.0.to_bytes()) + } +} + +/// Transparent wrapper around OpenTelemetry Value for trait implementations +#[repr(transparent)] +#[derive(Debug)] +pub struct OtelValue(opentelemetry::Value); + +impl OtelValue { + /// Convert a reference to opentelemetry::Value to a reference to OtelValue + /// This is safe because OtelValue is repr(transparent) + fn from_ref(value: &opentelemetry::Value) -> &Self { + // Safety: OtelValue is repr(transparent) over opentelemetry::Value + unsafe { &*(value as *const opentelemetry::Value as *const OtelValue) } + } +} + +impl ValueLike for OtelValue { + fn extract_float(&self) -> Option { + crate::sampling::utils::extract_float_value(&self.0) + } + + fn extract_string(&self) -> Option> { + crate::sampling::utils::extract_string_value(&self.0) + } +} + +/// Transparent wrapper around OpenTelemetry KeyValue for trait implementations +#[repr(transparent)] +#[derive(Debug)] +pub struct OtelKeyValue(opentelemetry::KeyValue); + +impl OtelKeyValue { + /// Convert a reference to opentelemetry::KeyValue to a reference to OtelKeyValue + /// This is safe because OtelKeyValue is repr(transparent) + fn from_ref(kv: &opentelemetry::KeyValue) -> &Self { + // Safety: OtelKeyValue is repr(transparent) over opentelemetry::KeyValue + unsafe { &*(kv as *const opentelemetry::KeyValue as *const OtelKeyValue) } + } +} + +impl AttributeLike for OtelKeyValue { + type Value = OtelValue; + + fn key(&self) -> &str { + self.0.key.as_str() + } + + fn value(&self) -> &Self::Value { + OtelValue::from_ref(&self.0.value) + } +} + pub struct PreSampledSpan<'a> { pub name: &'a str, pub span_kind: opentelemetry::trace::SpanKind, @@ -80,7 +140,7 @@ impl<'a> OtelSpan<'a> for PreSampledSpan<'a> { } impl SpanProperties for PreSampledSpan<'_> { - type Attribute = opentelemetry::KeyValue; + type Attribute = OtelKeyValue; fn operation_name(&self) -> Cow<'_, str> { get_otel_operation_name_v2(self) @@ -106,7 +166,7 @@ impl SpanProperties for PreSampledSpan<'_> { where Self: 'a, { - self.attributes.iter() + self.attributes.iter().map(OtelKeyValue::from_ref) } fn get_alternate_key<'b>(&self, key: &'b str) -> Option> { @@ -120,28 +180,6 @@ impl SpanProperties for PreSampledSpan<'_> { } } -impl crate::sampling::AttributeLike for opentelemetry::KeyValue { - type Value = opentelemetry::Value; - - fn key(&self) -> &str { - self.key.as_str() - } - - fn value(&self) -> &Self::Value { - &self.value - } -} - -impl crate::sampling::ValueLike for opentelemetry::Value { - fn extract_float(&self) -> Option { - crate::sampling::utils::extract_float_value(self) - } - - fn extract_string(&self) -> Option> { - crate::sampling::utils::extract_string_value(self) - } -} - /// OpenTelemetry Sampling Data implementation. /// /// Provides the necessary data for making sampling decisions on OpenTelemetry spans. @@ -149,7 +187,7 @@ impl crate::sampling::ValueLike for opentelemetry::Value { /// span kind, attributes, and resource information. pub struct OtelSamplingData<'a> { is_parent_sampled: Option, - trace_id: &'a opentelemetry::trace::TraceId, + trace_id: OtelTraceId, name: &'a str, span_kind: opentelemetry::trace::SpanKind, attributes: &'a [KeyValue], @@ -177,7 +215,7 @@ impl<'a> OtelSamplingData<'a> { ) -> Self { Self { is_parent_sampled, - trace_id, + trace_id: OtelTraceId(*trace_id), name, span_kind, attributes, @@ -187,7 +225,7 @@ impl<'a> OtelSamplingData<'a> { } impl SamplingData for OtelSamplingData<'_> { - type TraceId = opentelemetry::trace::TraceId; + type TraceId = OtelTraceId; type Properties<'b> = PreSampledSpan<'b> where @@ -197,7 +235,7 @@ impl SamplingData for OtelSamplingData<'_> { self.is_parent_sampled } fn trace_id(&self) -> &Self::TraceId { - self.trace_id + &self.trace_id } fn with_span_properties(&self, s: &S, f: F) -> T @@ -215,12 +253,6 @@ impl SamplingData for OtelSamplingData<'_> { } } -impl crate::sampling::TraceIdLike for opentelemetry::trace::TraceId { - fn to_u128(&self) -> u128 { - u128::from_be_bytes(self.to_bytes()) - } -} - /// Factory for creating OpenTelemetry KeyValue attributes. pub struct OtelAttributeFactory; @@ -441,8 +473,8 @@ mod tests { let collected: Vec<_> = span.attributes().collect(); assert_eq!(collected.len(), 2); - assert_eq!(collected[0].key.as_str(), "key1"); - assert_eq!(collected[1].key.as_str(), "key2"); + assert_eq!(collected[0].key(), "key1"); + assert_eq!(collected[1].key(), "key2"); } #[test] @@ -550,12 +582,10 @@ mod tests { // Verify the actual attributes can be found by their original keys let status_code_attr = attrs .iter() - .find(|a| a.key.as_str() == "http.response.status_code"); + .find(|a| a.key() == "http.response.status_code"); assert!(status_code_attr.is_some()); - let method_attr = attrs - .iter() - .find(|a| a.key.as_str() == "http.request.method"); + let method_attr = attrs.iter().find(|a| a.key() == "http.request.method"); assert!(method_attr.is_some()); } @@ -586,13 +616,9 @@ mod tests { assert_eq!(attrs.len(), 3); // Verify each attribute can be found by its original OTel key - assert!(attrs - .iter() - .any(|a| a.key.as_str() == "http.response.status_code")); - assert!(attrs - .iter() - .any(|a| a.key.as_str() == "http.request.method")); - assert!(attrs.iter().any(|a| a.key.as_str() == "url.full")); + assert!(attrs.iter().any(|a| a.key() == "http.response.status_code")); + assert!(attrs.iter().any(|a| a.key() == "http.request.method")); + assert!(attrs.iter().any(|a| a.key() == "url.full")); } #[test] @@ -621,10 +647,8 @@ mod tests { let attrs: Vec<_> = span.attributes().collect(); assert_eq!(attrs.len(), 2); - assert!(attrs - .iter() - .any(|a| a.key.as_str() == "http.response.status_code")); - assert!(attrs.iter().any(|a| a.key.as_str() == "custom.tag")); + assert!(attrs.iter().any(|a| a.key() == "http.response.status_code")); + assert!(attrs.iter().any(|a| a.key() == "custom.tag")); // Verify the status code is accessible assert_eq!(span.status_code(), Some(503)); diff --git a/libdd-sampling/Cargo.toml b/libdd-sampling/Cargo.toml new file mode 100644 index 00000000..1dea6a65 --- /dev/null +++ b/libdd-sampling/Cargo.toml @@ -0,0 +1,23 @@ +# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "libdd-sampling" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true +readme.workspace = true +description = "Core sampling logic for Datadog tracing" +authors.workspace = true + +[dependencies] +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +hashbrown = { workspace = true } +foldhash = { workspace = true } +lru = "0.16.3" + +[dev-dependencies] +criterion = "0.5" diff --git a/datadog-opentelemetry/src/sampling/agent_service_sampler.rs b/libdd-sampling/src/agent_service_sampler.rs similarity index 91% rename from datadog-opentelemetry/src/sampling/agent_service_sampler.rs rename to libdd-sampling/src/agent_service_sampler.rs index 5100f1ad..a8841027 100644 --- a/datadog-opentelemetry/src/sampling/agent_service_sampler.rs +++ b/libdd-sampling/src/agent_service_sampler.rs @@ -6,16 +6,16 @@ use std::{ sync::{Arc, RwLock}, }; -use super::rate_sampler::RateSampler; +use crate::rate_sampler::RateSampler; #[derive(Debug, serde::Deserialize)] -pub(crate) struct AgentRates<'a> { +pub struct AgentRates<'a> { #[serde(borrow)] pub rates_by_service: Option>, } #[derive(Debug, Default, Clone)] -pub(crate) struct ServicesSampler { +pub struct ServicesSampler { inner: Arc>>, } diff --git a/datadog-opentelemetry/src/sampling/constants.rs b/libdd-sampling/src/constants.rs similarity index 100% rename from datadog-opentelemetry/src/sampling/constants.rs rename to libdd-sampling/src/constants.rs diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/libdd-sampling/src/datadog_sampler.rs similarity index 98% rename from datadog-opentelemetry/src/sampling/datadog_sampler.rs rename to libdd-sampling/src/datadog_sampler.rs index da0cbccc..eb1b8207 100644 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ b/libdd-sampling/src/datadog_sampler.rs @@ -1,18 +1,18 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use crate::core::configuration::SamplingRuleConfig; -use crate::core::constants::{ +use crate::dd_constants::{ RL_EFFECTIVE_RATE, SAMPLING_AGENT_RATE_TAG_KEY, SAMPLING_DECISION_MAKER_TAG_KEY, SAMPLING_KNUTH_RATE_TAG_KEY, SAMPLING_PRIORITY_TAG_KEY, SAMPLING_RULE_RATE_TAG_KEY, }; -use crate::core::sampling::{mechanism, priority, SamplingMechanism, SamplingPriority}; +use crate::dd_sampling::{mechanism, priority, SamplingMechanism, SamplingPriority}; +use crate::sampling_rule_config::SamplingRuleConfig; /// Type alias for sampling rules update callback /// Consolidated callback type used across crates for remote config sampling updates pub type SamplingRulesCallback = Box Fn(&'a [SamplingRuleConfig]) + Send + Sync>; -use crate::sampling::{SamplingData, SpanProperties}; +use crate::types::{SamplingData, SpanProperties}; use super::agent_service_sampler::{AgentRates, ServicesSampler}; use super::rate_limiter::RateLimiter; @@ -51,7 +51,7 @@ impl DatadogSampler { self.service_samplers.update_rates(rates); } - pub(crate) fn on_agent_response(&self) -> Box Fn(&'a str) + Send + Sync> { + pub fn on_agent_response(&self) -> Box Fn(&'a str) + Send + Sync> { let service_samplers = self.service_samplers.clone(); Box::new(move |s: &str| { let Ok(new_rates) = serde_json::de::from_str::(s) else { @@ -282,7 +282,7 @@ impl DdSamplingResult { /// An optional vector of attributes to add to the sampling result pub fn to_dd_sampling_tags(&self, factory: &F) -> Option> where - F: crate::sampling::AttributeFactory, + F: crate::types::AttributeFactory, { let Some(root_info) = &self.trace_root_info else { return None; // No root info, return empty attributes @@ -336,11 +336,11 @@ impl DdSamplingResult { #[cfg(test)] mod tests { use super::*; - use crate::sampling::constants::{ + use crate::constants::{ attr::{ENV_TAG, RESOURCE_TAG}, pattern, }; - use crate::sampling::{AttributeLike, TraceIdLike, ValueLike}; + use crate::types::{AttributeLike, TraceIdLike, ValueLike}; use std::borrow::Cow; use std::collections::HashMap; @@ -570,7 +570,7 @@ mod tests { struct TestAttributeFactory; - impl crate::sampling::AttributeFactory for TestAttributeFactory { + impl crate::types::AttributeFactory for TestAttributeFactory { type Attribute = TestAttribute; fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute { @@ -1173,7 +1173,7 @@ mod tests { // Should inherit the sampling decision from parent assert!(result_sampled.get_priority().is_keep()); assert!(result_sampled - .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .to_dd_sampling_tags(&TestAttributeFactory) .is_none()); // Test with non-sampled parent context @@ -1183,7 +1183,7 @@ mod tests { // Should inherit the sampling decision from parent assert!(!result_not_sampled.get_priority().is_keep()); assert!(result_not_sampled - .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .to_dd_sampling_tags(&TestAttributeFactory) .is_none()); } @@ -1210,9 +1210,7 @@ mod tests { // Should sample and add attributes assert!(result.get_priority().is_keep()); - assert!(result - .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) - .is_some()); + assert!(result.to_dd_sampling_tags(&TestAttributeFactory).is_some()); // Test with non-matching attributes let attrs_no_match = create_attributes("other-resource", "prod"); @@ -1223,7 +1221,7 @@ mod tests { // Should still sample (default behavior when no rules match) and add attributes assert!(result_no_match.get_priority().is_keep()); assert!(result_no_match - .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .to_dd_sampling_tags(&TestAttributeFactory) .is_some()); } diff --git a/datadog-opentelemetry/src/core/constants.rs b/libdd-sampling/src/dd_constants.rs similarity index 76% rename from datadog-opentelemetry/src/core/constants.rs rename to libdd-sampling/src/dd_constants.rs index a4fbaee7..5d6de6b8 100644 --- a/datadog-opentelemetry/src/core/constants.rs +++ b/libdd-sampling/src/dd_constants.rs @@ -9,22 +9,16 @@ pub const HIGHER_ORDER_TRACE_ID_BITS_TAG: &str = "_dd.p.tid"; #[allow(unused)] pub const SPAN_KIND_TAG: &str = "span.kind"; -/// Event extraction sampling rate metric key. pub const SAMPLING_RATE_EVENT_EXTRACTION_KEY: &str = "_dd1.sr.eausr"; -/// Sampling priority metric key. pub const SAMPLING_PRIORITY_TAG_KEY: &str = "_sampling_priority_v1"; -/// Sampling decision maker propagation tag key. pub const SAMPLING_DECISION_MAKER_TAG_KEY: &str = "_dd.p.dm"; -/// Sampling rule rate metric key. pub const SAMPLING_RULE_RATE_TAG_KEY: &str = "_dd.rule_psr"; -/// Sampling agent rate metric key. pub const SAMPLING_AGENT_RATE_TAG_KEY: &str = "_dd.agent_psr"; -/// Rate limiter effective rate metric key. pub const RL_EFFECTIVE_RATE: &str = "_dd.limit_psr"; /// Knuth Sampling Rate propagated tag key. diff --git a/datadog-opentelemetry/src/core/sampling.rs b/libdd-sampling/src/dd_sampling.rs similarity index 96% rename from datadog-opentelemetry/src/core/sampling.rs rename to libdd-sampling/src/dd_sampling.rs index 6532afad..f80482bb 100644 --- a/datadog-opentelemetry/src/core/sampling.rs +++ b/libdd-sampling/src/dd_sampling.rs @@ -30,11 +30,11 @@ pub struct SamplingPriority { } impl SamplingPriority { - pub(crate) const fn from_i8(value: i8) -> Self { + pub const fn from_i8(value: i8) -> Self { Self { value } } - pub(crate) fn into_i8(self) -> i8 { + pub fn into_i8(self) -> i8 { self.value } @@ -48,7 +48,7 @@ impl SamplingPriority { /// # Examples /// /// ``` - /// use datadog_opentelemetry::core_pub_hack::sampling::priority; + /// use libdd_sampling::priority; /// /// assert!(priority::AUTO_KEEP.is_keep()); /// assert!(priority::USER_KEEP.is_keep()); @@ -105,15 +105,15 @@ pub struct SamplingMechanism { } impl SamplingMechanism { - pub(crate) const fn from_u8(value: u8) -> Self { + pub const fn from_u8(value: u8) -> Self { Self { value } } - pub(crate) fn into_u8(self) -> u8 { + pub fn into_u8(self) -> u8 { self.value } - pub(crate) fn to_priority(self, is_keep: bool) -> SamplingPriority { + pub fn to_priority(self, is_keep: bool) -> SamplingPriority { const AUTO_PAIR: PriorityPair = PriorityPair { keep: priority::AUTO_KEEP, reject: priority::AUTO_REJECT, diff --git a/datadog-opentelemetry/src/sampling/glob_matcher.rs b/libdd-sampling/src/glob_matcher.rs similarity index 100% rename from datadog-opentelemetry/src/sampling/glob_matcher.rs rename to libdd-sampling/src/glob_matcher.rs diff --git a/libdd-sampling/src/lib.rs b/libdd-sampling/src/lib.rs new file mode 100644 index 00000000..dbb709fd --- /dev/null +++ b/libdd-sampling/src/lib.rs @@ -0,0 +1,37 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Core sampling logic for Datadog tracing +//! +//! This crate provides generic sampling infrastructure including: +//! - Trait abstractions for trace IDs, attributes, and span properties +//! - Rate-based sampling algorithms +//! - Rate limiting functionality +//! - Glob pattern matching for sampling rules +//! - Sampling-related constants +//! - Rule-based sampling with pattern matching +//! - Agent-provided sampling rates +//! - Complete Datadog sampler implementation + +pub mod agent_service_sampler; +pub mod constants; +pub mod datadog_sampler; +pub mod dd_constants; +pub mod dd_sampling; +pub mod glob_matcher; +pub mod rate_limiter; +pub mod rate_sampler; +pub mod rules_sampler; +pub mod sampling_rule; +pub mod sampling_rule_config; +pub mod types; + +// Re-export key types for convenience +pub use agent_service_sampler::ServicesSampler; +pub use datadog_sampler::{DatadogSampler, SamplingRulesCallback}; +pub use dd_sampling::{mechanism, priority, SamplingDecision, SamplingMechanism, SamplingPriority}; +pub use sampling_rule::SamplingRule; +pub use sampling_rule_config::{ParsedSamplingRules, SamplingRuleConfig}; +pub use types::{ + AttributeFactory, AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike, +}; diff --git a/datadog-opentelemetry/src/sampling/rate_limiter.rs b/libdd-sampling/src/rate_limiter.rs similarity index 99% rename from datadog-opentelemetry/src/sampling/rate_limiter.rs rename to libdd-sampling/src/rate_limiter.rs index bc69c312..ff9ebbc6 100644 --- a/datadog-opentelemetry/src/sampling/rate_limiter.rs +++ b/libdd-sampling/src/rate_limiter.rs @@ -7,7 +7,7 @@ use std::time::Instant; /// A token bucket rate limiter implementation #[derive(Clone)] -pub(crate) struct RateLimiter { +pub struct RateLimiter { /// Rate limit value that doesn't need to be protected by mutex rate_limit: i32, diff --git a/datadog-opentelemetry/src/sampling/rate_sampler.rs b/libdd-sampling/src/rate_sampler.rs similarity index 99% rename from datadog-opentelemetry/src/sampling/rate_sampler.rs rename to libdd-sampling/src/rate_sampler.rs index 9ca2d794..fd20fb2d 100644 --- a/datadog-opentelemetry/src/sampling/rate_sampler.rs +++ b/libdd-sampling/src/rate_sampler.rs @@ -2,13 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 use super::constants::{numeric, rate}; -use crate::sampling::TraceIdLike; +use crate::types::TraceIdLike; use numeric::{KNUTH_FACTOR, MAX_UINT_64BITS}; use std::fmt; /// Keeps (100 * `sample_rate`)% of the traces randomly. #[derive(Clone)] -pub(crate) struct RateSampler { +pub struct RateSampler { sample_rate: f64, sampling_id_threshold: u64, } diff --git a/datadog-opentelemetry/src/sampling/rules_sampler.rs b/libdd-sampling/src/rules_sampler.rs similarity index 100% rename from datadog-opentelemetry/src/sampling/rules_sampler.rs rename to libdd-sampling/src/rules_sampler.rs diff --git a/datadog-opentelemetry/src/sampling/sampling_rule.rs b/libdd-sampling/src/sampling_rule.rs similarity index 97% rename from datadog-opentelemetry/src/sampling/sampling_rule.rs rename to libdd-sampling/src/sampling_rule.rs index 38332f52..d721630d 100644 --- a/datadog-opentelemetry/src/sampling/sampling_rule.rs +++ b/libdd-sampling/src/sampling_rule.rs @@ -1,14 +1,13 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use crate::core::configuration::SamplingRuleConfig; -use crate::sampling::{AttributeLike, SpanProperties, TraceIdLike, ValueLike}; +use crate::constants::pattern::NO_RULE; +use crate::glob_matcher::GlobMatcher; +use crate::rate_sampler::RateSampler; +use crate::sampling_rule_config::SamplingRuleConfig; +use crate::types::{AttributeLike, SpanProperties, TraceIdLike, ValueLike}; use std::collections::HashMap; -use super::constants::pattern::NO_RULE; -use super::glob_matcher::GlobMatcher; -use super::rate_sampler::RateSampler; - // HTTP status code attribute constants const HTTP_RESPONSE_STATUS_CODE: &str = "http.response.status_code"; const HTTP_STATUS_CODE: &str = "http.status_code"; diff --git a/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs b/libdd-sampling/src/sampling_rule_config.rs similarity index 96% rename from datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs rename to libdd-sampling/src/sampling_rule_config.rs index 7e3f83ff..308107f9 100644 --- a/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs +++ b/libdd-sampling/src/sampling_rule_config.rs @@ -46,8 +46,8 @@ fn default_provenance() -> String { } #[derive(Debug, Default, Clone, PartialEq)] -pub(crate) struct ParsedSamplingRules { - pub(crate) rules: Vec, +pub struct ParsedSamplingRules { + pub rules: Vec, } impl Deref for ParsedSamplingRules { diff --git a/datadog-opentelemetry/src/sampling/types.rs b/libdd-sampling/src/types.rs similarity index 99% rename from datadog-opentelemetry/src/sampling/types.rs rename to libdd-sampling/src/types.rs index 173589e9..07aa7ccc 100644 --- a/datadog-opentelemetry/src/sampling/types.rs +++ b/libdd-sampling/src/types.rs @@ -13,7 +13,7 @@ use std::borrow::Cow; /// # Examples /// /// ``` -/// use datadog_opentelemetry::sampling::TraceIdLike; +/// use libdd_sampling::TraceIdLike; /// /// #[derive(Clone, PartialEq, Eq)] /// struct MyTraceId(u128); From de4f688845925a094d3d7bcbc89ac64113b50a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Antonsson?= Date: Fri, 20 Mar 2026 17:40:53 +0100 Subject: [PATCH 8/8] refactor(sampling): apply code review comments --- .../benches/otel_sampling_benchmark.rs | 20 --- .../src/core/configuration/configuration.rs | 134 +++++++++++++----- .../src/core/configuration/mod.rs | 4 +- .../configuration/sampling_rule_config.rs | 115 +++++++++++++++ datadog-opentelemetry/src/sampler.rs | 9 +- .../src/sampling/otel_mappings.rs | 14 +- .../integration_tests/opentelemetry_api.rs | 1 - libdd-sampling/src/dd_constants.rs | 6 + libdd-sampling/src/lib.rs | 20 +-- libdd-sampling/src/sampling_rule_config.rs | 5 +- 10 files changed, 251 insertions(+), 77 deletions(-) create mode 100644 datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs diff --git a/datadog-opentelemetry/benches/otel_sampling_benchmark.rs b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs index b1e92aeb..aaf6dc59 100644 --- a/datadog-opentelemetry/benches/otel_sampling_benchmark.rs +++ b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs @@ -60,7 +60,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -79,7 +78,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder() .with_service_name("test-service") @@ -100,7 +98,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder() .with_service_name("other-service") @@ -121,7 +118,6 @@ fn create_benchmark_configs() -> Vec { name: Some("http.*".to_string()), resource: None, tags: HashMap::new(), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -140,7 +136,6 @@ fn create_benchmark_configs() -> Vec { name: Some("http.*".to_string()), resource: None, tags: HashMap::new(), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -159,7 +154,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: Some("/api/*".to_string()), tags: HashMap::new(), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -181,7 +175,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: Some("/api/*".to_string()), tags: HashMap::new(), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -203,7 +196,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::from([("environment".to_string(), "production".to_string())]), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -225,7 +217,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::from([("environment".to_string(), "production".to_string())]), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -247,7 +238,6 @@ fn create_benchmark_configs() -> Vec { name: Some("http.*".to_string()), resource: Some("/api/v1/*".to_string()), tags: HashMap::from([("environment".to_string(), "production".to_string())]), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder() .with_service_name("api-service") @@ -272,7 +262,6 @@ fn create_benchmark_configs() -> Vec { name: Some("http.*".to_string()), resource: Some("/api/v1/*".to_string()), tags: HashMap::from([("environment".to_string(), "production".to_string())]), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder() .with_service_name("api-service") @@ -298,7 +287,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }, SamplingRuleConfig { sample_rate: 0.5, @@ -306,7 +294,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }, SamplingRuleConfig { sample_rate: 1.0, @@ -314,7 +301,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }, ], resource: opentelemetry_sdk::Resource::builder() @@ -337,7 +323,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }, SamplingRuleConfig { sample_rate: 0.5, @@ -345,7 +330,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }, SamplingRuleConfig { sample_rate: 1.0, @@ -353,7 +337,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::new(), - provenance: "".to_string(), }, ], resource: opentelemetry_sdk::Resource::builder() @@ -375,7 +358,6 @@ fn create_benchmark_configs() -> Vec { name: None, resource: None, tags: HashMap::from([("key10".to_string(), "value10".to_string())]), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder_empty().build(), trace_id, @@ -400,7 +382,6 @@ fn create_benchmark_configs() -> Vec { ("region".to_string(), "us-east-1".to_string()), ("version".to_string(), "v1.2.3".to_string()), ]), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder() .with_service_name("test-service") @@ -427,7 +408,6 @@ fn create_benchmark_configs() -> Vec { ("region".to_string(), "us-east-1".to_string()), ("version".to_string(), "v1.2.3".to_string()), ]), - provenance: "".to_string(), }], resource: opentelemetry_sdk::Resource::builder() .with_service_name("test-service") diff --git a/datadog-opentelemetry/src/core/configuration/configuration.rs b/datadog-opentelemetry/src/core/configuration/configuration.rs index 3e29460e..47006dec 100644 --- a/datadog-opentelemetry/src/core/configuration/configuration.rs +++ b/datadog-opentelemetry/src/core/configuration/configuration.rs @@ -12,6 +12,7 @@ use std::{borrow::Cow, sync::OnceLock}; use rustc_version_runtime::version; +use super::{ParsedSamplingRules, SamplingRuleConfig}; use crate::core::configuration::sources::{ CompositeConfigSourceResult, CompositeSource, ConfigKey, ConfigSourceOrigin, }; @@ -19,13 +20,13 @@ use crate::core::configuration::supported_configurations::SupportedConfiguration use crate::core::log::LevelFilter; use crate::core::telemetry; use crate::{dd_error, dd_warn}; -use libdd_sampling::{ParsedSamplingRules, SamplingRuleConfig}; /// Different types of remote configuration updates that can trigger callbacks #[derive(Debug, Clone)] pub enum RemoteConfigUpdate { - /// Sampling rules were updated from remote configuration - SamplingRules(Vec), + /// Sampling rules were updated from remote configuration. + /// Uses the internal type to preserve provenance from remote config. + SamplingRules(Vec), // Future remote config update types should be added here as new variants. // E.g. // - FeatureFlags(HashMap) @@ -1490,24 +1491,29 @@ impl Config { rules_json: &str, config_id: Option, ) -> Result<(), String> { - // Parse the JSON into SamplingRuleConfig objects - let rules: Vec = serde_json::from_str(rules_json) - .map_err(|e| format!("Failed to parse sampling rules JSON: {e}"))?; + // Parse the JSON into the internal type to preserve provenance from remote config. + let internal_rules: Vec = + serde_json::from_str(rules_json) + .map_err(|e| format!("Failed to parse sampling rules JSON: {e}"))?; // If remote config sends empty rules, clear remote config to fall back to local rules - if rules.is_empty() { + if internal_rules.is_empty() { self.clear_remote_sampling_rules(config_id); } else { + // Convert to public type for storage (provenance is dropped). + let rules: Vec = + internal_rules.iter().cloned().map(Into::into).collect(); self.trace_sampling_rules.set_override_value( ParsedSamplingRules { rules }, ConfigSourceOrigin::RemoteConfig, ); self.trace_sampling_rules.set_config_id(config_id); - // Notify callbacks about the sampling rules update - self.remote_config_callbacks.lock().unwrap().notify_update( - &RemoteConfigUpdate::SamplingRules(self.trace_sampling_rules().to_vec()), - ); + // Notify callbacks with the internal rules (preserves provenance) + self.remote_config_callbacks + .lock() + .unwrap() + .notify_update(&RemoteConfigUpdate::SamplingRules(internal_rules)); telemetry::notify_configuration_update(&self.trace_sampling_rules); } @@ -1543,9 +1549,17 @@ impl Config { self.trace_sampling_rules.unset_override_value(); self.trace_sampling_rules.set_config_id(config_id); - self.remote_config_callbacks.lock().unwrap().notify_update( - &RemoteConfigUpdate::SamplingRules(self.trace_sampling_rules().to_vec()), - ); + // Fallback rules are locally defined, so "local" provenance is correct + let internal: Vec = self + .trace_sampling_rules() + .iter() + .cloned() + .map(Into::into) + .collect(); + self.remote_config_callbacks + .lock() + .unwrap() + .notify_update(&RemoteConfigUpdate::SamplingRules(internal)); telemetry::notify_configuration_update(&self.trace_sampling_rules); } @@ -2412,7 +2426,6 @@ mod tests { &SamplingRuleConfig { sample_rate: 0.5, service: Some("web-api".to_string()), - provenance: "customer".to_string(), ..SamplingRuleConfig::default() } ); @@ -2439,7 +2452,6 @@ mod tests { &SamplingRuleConfig { sample_rate: 0.5, service: Some("test-service".to_string()), - provenance: "customer".to_string(), ..SamplingRuleConfig::default() } ); @@ -2462,7 +2474,6 @@ mod tests { name: None, resource: None, tags: HashMap::new(), - provenance: "manual".to_string(), }]) .set_trace_rate_limit(200) .set_service("manual-service".to_string()) @@ -2478,7 +2489,6 @@ mod tests { &SamplingRuleConfig { sample_rate: 0.8, service: Some("manual-service".to_string()), - provenance: "manual".to_string(), ..SamplingRuleConfig::default() } ); @@ -2786,16 +2796,15 @@ mod tests { fn test_sampling_rules_update_callbacks() { let config = Config::builder().build(); - // Track callback invocations + // Track callback invocations — uses internal type to verify provenance preservation let callback_called = Arc::new(Mutex::new(false)); - let callback_rules = Arc::new(Mutex::new(Vec::::new())); + let callback_rules = Arc::new(Mutex::new(Vec::::new())); let callback_called_clone = callback_called.clone(); let callback_rules_clone = callback_rules.clone(); config.set_sampling_rules_callback(move |update| { *callback_called_clone.lock().unwrap() = true; - // Store the rules - for now we only have SamplingRules variant let RemoteConfigUpdate::SamplingRules(rules) = update; *callback_rules_clone.lock().unwrap() = rules.clone(); }); @@ -2804,22 +2813,20 @@ mod tests { assert!(!*callback_called.lock().unwrap()); assert!(callback_rules.lock().unwrap().is_empty()); - // Update rules from remote config - let new_rules = vec![SamplingRuleConfig { - sample_rate: 0.5, - service: Some("test-service".to_string()), - provenance: "remote".to_string(), - ..SamplingRuleConfig::default() - }]; - - let rules_json = serde_json::to_string(&new_rules).unwrap(); + // Update rules from remote config with provenance "dynamic" + let rules_json = r#"[{"sample_rate":0.5,"service":"test-service","provenance":"dynamic"}]"#; config - .update_sampling_rules_from_remote(&rules_json, None) + .update_sampling_rules_from_remote(rules_json, None) .unwrap(); - // Callback should be called with the new rules + // Callback should be called with the new rules, provenance preserved assert!(*callback_called.lock().unwrap()); - assert_eq!(*callback_rules.lock().unwrap(), new_rules); + let received = callback_rules.lock().unwrap(); + assert_eq!(received.len(), 1); + assert_eq!(received[0].sample_rate, 0.5); + assert_eq!(received[0].service, Some("test-service".to_string())); + assert_eq!(received[0].provenance, "dynamic"); + drop(received); // Test clearing rules *callback_called.lock().unwrap() = false; @@ -2833,6 +2840,55 @@ mod tests { assert!(callback_rules.lock().unwrap().is_empty()); } + #[test] + fn test_clear_remote_rules_callback_has_local_provenance() { + let config = Config::builder() + .set_trace_sampling_rules(vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("local-svc".to_string()), + ..SamplingRuleConfig::default() + }]) + .build(); + + let callback_rules = Arc::new(Mutex::new(Vec::::new())); + let clone = callback_rules.clone(); + config.set_sampling_rules_callback(move |update| { + let RemoteConfigUpdate::SamplingRules(rules) = update; + *clone.lock().unwrap() = rules.clone(); + }); + + // Push remote rules then clear to trigger fallback + config + .update_sampling_rules_from_remote( + r#"[{"sample_rate":0.9,"provenance":"dynamic"}]"#, + None, + ) + .unwrap(); + config.clear_remote_sampling_rules(None); + + // Fallback rules should have "local" provenance + let received = callback_rules.lock().unwrap(); + assert_eq!(received.len(), 1); + assert_eq!(received[0].sample_rate, 0.5); + assert_eq!(received[0].provenance, "local"); + } + + #[test] + fn test_public_sampling_rule_config_ignores_provenance_in_json() { + // The public SamplingRuleConfig should silently ignore a "provenance" field in JSON, + // since serde skips unknown fields by default. + let json = r#"[{"sample_rate":0.5,"service":"svc","provenance":"dynamic"}]"#; + let parsed: ParsedSamplingRules = json.parse().unwrap(); + assert_eq!(parsed.rules.len(), 1); + assert_eq!(parsed.rules[0].sample_rate, 0.5); + assert_eq!(parsed.rules[0].service, Some("svc".to_string())); + // No provenance field on the public type — it was silently dropped. + + // Round-trip: serialized output should NOT contain provenance + let serialized = parsed.to_string(); + assert!(!serialized.contains("provenance")); + } + #[test] fn test_config_item_priority() { // Test that ConfigItem respects priority: remote_config > code > env_var > default @@ -2928,7 +2984,6 @@ mod tests { rules: vec![SamplingRuleConfig { sample_rate: 0.3, service: Some("local-service".to_string()), - provenance: "local".to_string(), ..SamplingRuleConfig::default() }], }; @@ -2993,7 +3048,6 @@ mod tests { let new_rules = vec![SamplingRuleConfig { sample_rate: 0.5, service: Some("test-service".to_string()), - provenance: "remote".to_string(), ..SamplingRuleConfig::default() }]; @@ -3258,8 +3312,9 @@ mod tests { let config = Config::builder_with_sources(&sources).build(); let expected = ParsedSamplingRules::from_str( - r#"[{"sample_rate":0.5,"service":"web-api","name":null,"resource":null,"tags":{},"provenance":"customer"}]"# - ).unwrap(); + r#"[{"sample_rate":0.5,"service":"web-api","name":null,"resource":null,"tags":{}}]"#, + ) + .unwrap(); let configurations = &config.trace_sampling_rules.get_all_configurations(); // active config is the one with highest seq_id @@ -3274,7 +3329,10 @@ mod tests { ); // Update ConfigItemRc via RC - let expected_rc = ParsedSamplingRules::from_str(r#"[{"sample_rate":1,"service":"web-api","name":null,"resource":null,"tags":{},"provenance":"customer"}]"#).unwrap(); + let expected_rc = ParsedSamplingRules::from_str( + r#"[{"sample_rate":1,"service":"web-api","name":null,"resource":null,"tags":{}}]"#, + ) + .unwrap(); config .trace_sampling_rules .set_override_value(expected_rc.clone(), ConfigSourceOrigin::RemoteConfig); diff --git a/datadog-opentelemetry/src/core/configuration/mod.rs b/datadog-opentelemetry/src/core/configuration/mod.rs index fc73d142..4922f18c 100644 --- a/datadog-opentelemetry/src/core/configuration/mod.rs +++ b/datadog-opentelemetry/src/core/configuration/mod.rs @@ -28,5 +28,5 @@ mod supported_configurations; pub use configuration::{Config, ConfigBuilder, OtlpProtocol, TracePropagationStyle}; pub(crate) use configuration::{ConfigurationProvider, RemoteConfigUpdate}; -// Re-export from libdd-sampling -pub use libdd_sampling::SamplingRuleConfig; +mod sampling_rule_config; +pub use sampling_rule_config::{ParsedSamplingRules, SamplingRuleConfig}; diff --git a/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs b/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs new file mode 100644 index 00000000..c771f4c7 --- /dev/null +++ b/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs @@ -0,0 +1,115 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fmt::Display; +use std::ops::Deref; +use std::str::FromStr; + +/// Configuration for a single sampling rule. +// +// This is the public-facing type. The `provenance` field from +// [`libdd_sampling::SamplingRuleConfig`] is intentionally omitted — +// it is set automatically when the rule reaches the sampler. + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] +pub struct SamplingRuleConfig { + /// The sample rate to apply (0.0-1.0) + pub sample_rate: f64, + + /// Optional service name pattern to match + #[serde(default)] + pub service: Option, + + /// Optional span name pattern to match + #[serde(default)] + pub name: Option, + + /// Optional resource name pattern to match + #[serde(default)] + pub resource: Option, + + /// Tags that must match (key-value pairs) + #[serde(default)] + pub tags: HashMap, +} + +impl Display for SamplingRuleConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", serde_json::json!(self)) + } +} + +/// Wrapper for parsed sampling rules (from JSON env var or API). +#[derive(Debug, Default, Clone, PartialEq)] +pub struct ParsedSamplingRules { + /// The individual sampling rules. + pub rules: Vec, +} + +impl Deref for ParsedSamplingRules { + type Target = [SamplingRuleConfig]; + + fn deref(&self) -> &Self::Target { + &self.rules + } +} + +impl From for Vec { + fn from(parsed: ParsedSamplingRules) -> Self { + parsed.rules + } +} + +impl FromStr for ParsedSamplingRules { + type Err = serde_json::Error; + + fn from_str(s: &str) -> Result { + if s.trim().is_empty() { + return Ok(ParsedSamplingRules::default()); + } + // DD_TRACE_SAMPLING_RULES is expected to be a JSON array of SamplingRuleConfig objects. + let rules_vec: Vec = serde_json::from_str(s)?; + Ok(ParsedSamplingRules { rules: rules_vec }) + } +} + +impl Display for ParsedSamplingRules { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + serde_json::to_string(&self.rules).unwrap_or_default() + ) + } +} + +// --------------------------------------------------------------------------- +// Conversions to/from internal libdd_sampling types +// --------------------------------------------------------------------------- + +impl From for libdd_sampling::SamplingRuleConfig { + fn from(public: SamplingRuleConfig) -> Self { + Self { + sample_rate: public.sample_rate, + service: public.service, + name: public.name, + resource: public.resource, + tags: public.tags, + provenance: "local".to_string(), + } + } +} + +impl From for SamplingRuleConfig { + fn from(internal: libdd_sampling::SamplingRuleConfig) -> Self { + Self { + sample_rate: internal.sample_rate, + service: internal.service, + name: internal.name, + resource: internal.resource, + tags: internal.tags, + } + } +} diff --git a/datadog-opentelemetry/src/sampler.rs b/datadog-opentelemetry/src/sampler.rs index db022d0a..00c6a69b 100644 --- a/datadog-opentelemetry/src/sampler.rs +++ b/datadog-opentelemetry/src/sampler.rs @@ -45,7 +45,13 @@ impl Sampler { // This is an Option to allow benchmarking different parts of sampling trace_registry: Option, ) -> Self { - let rules = SamplingRule::from_configs(cfg.trace_sampling_rules().to_vec()); + let internal_configs: Vec = cfg + .trace_sampling_rules() + .iter() + .cloned() + .map(Into::into) + .collect(); + let rules = SamplingRule::from_configs(internal_configs); let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit()); Self { cfg, @@ -231,7 +237,6 @@ mod tests { name: None, resource: None, tags: HashMap::new(), - provenance: "customer".to_string(), }]) .build(), ); diff --git a/datadog-opentelemetry/src/sampling/otel_mappings.rs b/datadog-opentelemetry/src/sampling/otel_mappings.rs index a58d6e3e..9e910523 100644 --- a/datadog-opentelemetry/src/sampling/otel_mappings.rs +++ b/datadog-opentelemetry/src/sampling/otel_mappings.rs @@ -21,10 +21,16 @@ impl TraceIdLike for OtelTraceId { } } -/// Transparent wrapper around OpenTelemetry Value for trait implementations +/// Transparent wrapper around OpenTelemetry Value for trait implementations. +/// +/// `#[repr(transparent)]` guarantees the same memory layout as the inner type, +/// which makes the `from_ref` pointer cast sound. The compile-time assertion +/// below ensures this invariant is never accidentally broken. #[repr(transparent)] #[derive(Debug)] pub struct OtelValue(opentelemetry::Value); +const _: () = + assert!(std::mem::size_of::() == std::mem::size_of::()); impl OtelValue { /// Convert a reference to opentelemetry::Value to a reference to OtelValue @@ -45,10 +51,14 @@ impl ValueLike for OtelValue { } } -/// Transparent wrapper around OpenTelemetry KeyValue for trait implementations +/// Transparent wrapper around OpenTelemetry KeyValue for trait implementations. +/// +/// See `OtelValue` for the safety rationale behind `#[repr(transparent)]`. #[repr(transparent)] #[derive(Debug)] pub struct OtelKeyValue(opentelemetry::KeyValue); +const _: () = + assert!(std::mem::size_of::() == std::mem::size_of::()); impl OtelKeyValue { /// Convert a reference to opentelemetry::KeyValue to a reference to OtelKeyValue diff --git a/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs b/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs index 0b3936a6..922979e3 100644 --- a/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs +++ b/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs @@ -233,7 +233,6 @@ async fn test_remote_config_sampling_rates() { vec![SamplingRuleConfig { resource: Some("test-span".into()), sample_rate: 1.0, - provenance: "customer".into(), ..Default::default() }] ); diff --git a/libdd-sampling/src/dd_constants.rs b/libdd-sampling/src/dd_constants.rs index 5d6de6b8..a4fbaee7 100644 --- a/libdd-sampling/src/dd_constants.rs +++ b/libdd-sampling/src/dd_constants.rs @@ -9,16 +9,22 @@ pub const HIGHER_ORDER_TRACE_ID_BITS_TAG: &str = "_dd.p.tid"; #[allow(unused)] pub const SPAN_KIND_TAG: &str = "span.kind"; +/// Event extraction sampling rate metric key. pub const SAMPLING_RATE_EVENT_EXTRACTION_KEY: &str = "_dd1.sr.eausr"; +/// Sampling priority metric key. pub const SAMPLING_PRIORITY_TAG_KEY: &str = "_sampling_priority_v1"; +/// Sampling decision maker propagation tag key. pub const SAMPLING_DECISION_MAKER_TAG_KEY: &str = "_dd.p.dm"; +/// Sampling rule rate metric key. pub const SAMPLING_RULE_RATE_TAG_KEY: &str = "_dd.rule_psr"; +/// Sampling agent rate metric key. pub const SAMPLING_AGENT_RATE_TAG_KEY: &str = "_dd.agent_psr"; +/// Rate limiter effective rate metric key. pub const RL_EFFECTIVE_RATE: &str = "_dd.limit_psr"; /// Knuth Sampling Rate propagated tag key. diff --git a/libdd-sampling/src/lib.rs b/libdd-sampling/src/lib.rs index dbb709fd..f6ac72ee 100644 --- a/libdd-sampling/src/lib.rs +++ b/libdd-sampling/src/lib.rs @@ -13,18 +13,18 @@ //! - Agent-provided sampling rates //! - Complete Datadog sampler implementation -pub mod agent_service_sampler; -pub mod constants; -pub mod datadog_sampler; +pub(crate) mod agent_service_sampler; +pub(crate) mod constants; +pub(crate) mod datadog_sampler; pub mod dd_constants; pub mod dd_sampling; -pub mod glob_matcher; -pub mod rate_limiter; -pub mod rate_sampler; -pub mod rules_sampler; -pub mod sampling_rule; -pub mod sampling_rule_config; -pub mod types; +pub(crate) mod glob_matcher; +pub(crate) mod rate_limiter; +pub(crate) mod rate_sampler; +pub(crate) mod rules_sampler; +pub(crate) mod sampling_rule; +pub(crate) mod sampling_rule_config; +pub(crate) mod types; // Re-export key types for convenience pub use agent_service_sampler::ServicesSampler; diff --git a/libdd-sampling/src/sampling_rule_config.rs b/libdd-sampling/src/sampling_rule_config.rs index 308107f9..604bcecf 100644 --- a/libdd-sampling/src/sampling_rule_config.rs +++ b/libdd-sampling/src/sampling_rule_config.rs @@ -29,8 +29,9 @@ pub struct SamplingRuleConfig { #[serde(default)] pub tags: HashMap, - /// Where this rule comes from (customer, dynamic, default) - // TODO(paullgdc): this value should not be definable by customers + /// Where this rule comes from (customer, dynamic, default). + /// Not exposed in the public `datadog-opentelemetry` API — set automatically + /// during conversion from the public `SamplingRuleConfig` type. #[serde(default = "default_provenance")] pub provenance: String, }