diff --git a/Cargo.lock b/Cargo.lock index 54a3a437..dc5f12f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -477,6 +477,7 @@ dependencies = [ "hyper-util", "libdd-common", "libdd-data-pipeline", + "libdd-sampling", "libdd-telemetry", "libdd-tinybytes", "libdd-trace-utils", @@ -1232,6 +1233,18 @@ dependencies = [ "tracing", ] +[[package]] +name = "libdd-sampling" +version = "0.3.1" +dependencies = [ + "criterion", + "foldhash 0.1.5", + "hashbrown 0.15.5", + "lru", + "serde", + "serde_json", +] + [[package]] name = "libdd-telemetry" version = "3.0.0" diff --git a/Cargo.toml b/Cargo.toml index 2f025c1e..e7f43413 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ [workspace] members = [ + "libdd-sampling", "datadog-opentelemetry", "datadog-opentelemetry/examples/propagator", "datadog-opentelemetry/examples/simple_tracing", diff --git a/datadog-opentelemetry/Cargo.toml b/datadog-opentelemetry/Cargo.toml index f2a3b5bc..44f1ac47 100644 --- a/datadog-opentelemetry/Cargo.toml +++ b/datadog-opentelemetry/Cargo.toml @@ -15,6 +15,8 @@ rustdoc-args = ["--cfg", "docsrs"] [dependencies] +# Internal dependencies +libdd-sampling = { path = "../libdd-sampling" } # External dependencies foldhash = { workspace = true } @@ -134,3 +136,15 @@ path = "benches/inject_benchmark.rs" name = "extract_benchmark" harness = false path = "benches/extract_benchmark.rs" + +[[bench]] +name = "datadog_sampling_benchmark" +harness = false +path = "benches/datadog_sampling_benchmark.rs" +required-features = ["test-utils"] + +[[bench]] +name = "otel_sampling_benchmark" +harness = false +path = "benches/otel_sampling_benchmark.rs" +required-features = ["test-utils"] diff --git a/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs new file mode 100644 index 00000000..21e77183 --- /dev/null +++ b/datadog-opentelemetry/benches/datadog_sampling_benchmark.rs @@ -0,0 +1,447 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{criterion_group, criterion_main, Criterion}; +use datadog_opentelemetry::core_pub_hack::test_utils::benchmarks::{ + memory_allocated_measurement, MeasurementName, ReportingAllocator, +}; +use datadog_opentelemetry::sampling::OtelSamplingData; +use datadog_opentelemetry::sampling::SamplingRule; +use datadog_opentelemetry::sampling::{DatadogSampler, SamplingData}; +use opentelemetry::{trace::SpanKind, KeyValue, TraceId}; +use std::collections::HashMap; +use std::hint::black_box; +use std::sync::{Arc, RwLock}; + +#[global_allocator] +static GLOBAL: ReportingAllocator = ReportingAllocator::new(std::alloc::System); + +struct BenchmarkConfig { + name: &'static str, + rules: Vec, + resource: opentelemetry_sdk::Resource, + trace_id: TraceId, + span_name: &'static str, + span_kind: SpanKind, + attributes: Vec, + is_parent_sampled: Option, + should_keep: Option, +} + +fn create_benchmark_configs() -> Vec { + let trace_id = TraceId::from(0x12345678901234567890123456789012_u128); + + vec![ + // 1. All spans rule (baseline) + BenchmarkConfig { + name: "rule_all_spans_only_rate", + rules: vec![SamplingRule::new(1.0, None, None, None, None, None)], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "something", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar"), KeyValue::new("bar", "baz")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 2. Service rule - matching + BenchmarkConfig { + name: "service_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + None, + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 3. Service rule - not matching + BenchmarkConfig { + name: "service_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + None, + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 4. Name pattern rule - matching + BenchmarkConfig { + name: "name_pattern_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + Some("http.*".to_string()), + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("http.method", "GET")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 5. Name pattern rule - not matching + BenchmarkConfig { + name: "name_pattern_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + Some("http.*".to_string()), + None, + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "grpc.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("rpc.method", "GetUser")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 6. Resource pattern rule - matching + BenchmarkConfig { + name: "resource_pattern_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + Some("/api/*".to_string()), + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/api/users"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 7. Resource pattern rule - not matching + BenchmarkConfig { + name: "resource_pattern_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + Some("/api/*".to_string()), + None, + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/health"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 8. Tag rule - matching + BenchmarkConfig { + name: "tag_rule_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("foo", "bar"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 9. Tag rule - not matching + BenchmarkConfig { + name: "tag_rule_not_matching", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("foo", "bar"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 10. Complex rule - matching + BenchmarkConfig { + name: "complex_rule_matching", + rules: vec![SamplingRule::new( + 0.5, + Some("api-service".to_string()), + Some("http.*".to_string()), + Some("/api/v1/*".to_string()), + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/api/v1/users"), + ], + is_parent_sampled: None, + should_keep: None, // Probabilistic sampling at 0.5 rate + }, + // 11. Complex rule - partial match + BenchmarkConfig { + name: "complex_rule_partial_match", + rules: vec![SamplingRule::new( + 0.5, + Some("api-service".to_string()), + Some("http.*".to_string()), + Some("/api/v1/*".to_string()), + Some(HashMap::from([( + "environment".to_string(), + "production".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/health"), + ], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 12. Multiple rules - first match + BenchmarkConfig { + name: "multiple_rules_first_match", + rules: vec![ + SamplingRule::new(0.1, Some("api-service".to_string()), None, None, None, None), + SamplingRule::new(0.5, Some("web-service".to_string()), None, None, None, None), + SamplingRule::new(1.0, None, None, None, None, None), + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: None, // Probabilistic sampling at 0.1 rate + }, + // 13. Multiple rules - last match + BenchmarkConfig { + name: "multiple_rules_last_match", + rules: vec![ + SamplingRule::new(0.1, Some("api-service".to_string()), None, None, None, None), + SamplingRule::new(0.5, Some("web-service".to_string()), None, None, None, None), + SamplingRule::new(1.0, None, None, None, None, None), + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + is_parent_sampled: None, + should_keep: Some(true), + }, + // 14. Many attributes + BenchmarkConfig { + name: "many_attributes", + rules: vec![SamplingRule::new( + 1.0, + None, + None, + None, + Some(HashMap::from([( + "key10".to_string(), + "value10".to_string(), + )])), + None, + )], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: (0..20) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: None, + should_keep: Some(true), + }, + // 15. Parent sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_sampled_short_circuit", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + Some("http.*".to_string()), + Some("/api/*".to_string()), + Some(HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: Some(true), + should_keep: Some(true), + }, + // 16. Parent not sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_not_sampled_short_circuit", + rules: vec![SamplingRule::new( + 1.0, + Some("test-service".to_string()), + Some("http.*".to_string()), + Some("/api/*".to_string()), + Some(HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ])), + None, + )], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + is_parent_sampled: Some(false), + should_keep: Some(false), + }, + ] +} + +fn bench_datadog_sampling( + c: &mut Criterion, +) { + let configs = create_benchmark_configs(); + + for config in configs { + let sampler = DatadogSampler::new(config.rules, -1); + let resource = Arc::new(RwLock::new(config.resource)); + let data = OtelSamplingData::new( + black_box(config.is_parent_sampled), + black_box(&config.trace_id), + black_box(config.span_name), + black_box(config.span_kind.clone()), + black_box(&config.attributes), + black_box(resource.as_ref()), + ); + + c.bench_function( + &format!("datadog_sample_span/{}/{}", config.name, M::name()), + |b| { + b.iter_batched( + || (), + |_| { + bench_sample(&sampler, &data, config.should_keep); + }, + criterion::BatchSize::LargeInput, + ) + }, + ); + } +} + +#[inline(never)] +fn bench_sample(sampler: &DatadogSampler, data: &impl SamplingData, should_keep: Option) { + let result = black_box(sampler).sample(black_box(data)); + if let Some(should_keep) = should_keep { + assert_eq!(result.get_priority().is_keep(), should_keep); + black_box(result); + } else { + black_box(result); + } +} + +criterion_group!( + name = memory_benches; + config = memory_allocated_measurement(&GLOBAL); + targets = bench_datadog_sampling +); +criterion_group!( + name = wall_time_benches; + config = Criterion::default(); + targets = bench_datadog_sampling +); +criterion_main!(memory_benches, wall_time_benches); diff --git a/datadog-opentelemetry/benches/otel_sampling_benchmark.rs b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs new file mode 100644 index 00000000..aaf6dc59 --- /dev/null +++ b/datadog-opentelemetry/benches/otel_sampling_benchmark.rs @@ -0,0 +1,497 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use criterion::{criterion_group, criterion_main, Criterion}; +use datadog_opentelemetry::configuration::{Config, SamplingRuleConfig}; +use datadog_opentelemetry::core_pub_hack::test_utils::benchmarks::{ + memory_allocated_measurement, MeasurementName, ReportingAllocator, +}; +use datadog_opentelemetry::sampler::Sampler; +use opentelemetry::{trace::SamplingDecision, trace::SpanKind, KeyValue, TraceId}; +use opentelemetry_sdk::trace::ShouldSample; +use std::collections::HashMap; +use std::hint::black_box; +use std::sync::{Arc, RwLock}; + +#[global_allocator] +static GLOBAL: ReportingAllocator = ReportingAllocator::new(std::alloc::System); + +struct BenchmarkConfig { + name: &'static str, + rules: Vec, + resource: opentelemetry_sdk::Resource, + trace_id: TraceId, + span_name: &'static str, + span_kind: SpanKind, + attributes: Vec, + parent_context: Option, + expected_decision: Option, +} + +fn create_benchmark_configs() -> Vec { + use opentelemetry::trace::{SpanContext, SpanId, TraceContextExt, TraceFlags, TraceState}; + + let trace_id = TraceId::from(0x12345678901234567890123456789012_u128); + + // Helper to create parent context + let create_parent_context = |is_sampled: bool| { + let flags = if is_sampled { + TraceFlags::SAMPLED + } else { + TraceFlags::default() + }; + let span_context = SpanContext::new( + trace_id, + SpanId::from(0x1234567890123456_u64), + flags, + false, + TraceState::default(), + ); + opentelemetry::Context::current().with_remote_span_context(span_context) + }; + + vec![ + // 1. All spans rule (baseline) + BenchmarkConfig { + name: "rule_all_spans_only_rate", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "something", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar"), KeyValue::new("bar", "baz")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 2. Service rule - matching + BenchmarkConfig { + name: "service_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 3. Service rule - not matching + BenchmarkConfig { + name: "service_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 4. Name pattern rule - matching + BenchmarkConfig { + name: "name_pattern_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: Some("http.*".to_string()), + resource: None, + tags: HashMap::new(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("http.method", "GET")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 5. Name pattern rule - not matching + BenchmarkConfig { + name: "name_pattern_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: Some("http.*".to_string()), + resource: None, + tags: HashMap::new(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "grpc.request", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("rpc.method", "GetUser")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 6. Resource pattern rule - matching + BenchmarkConfig { + name: "resource_pattern_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: Some("/api/*".to_string()), + tags: HashMap::new(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/api/users"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 7. Resource pattern rule - not matching + BenchmarkConfig { + name: "resource_pattern_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: Some("/api/*".to_string()), + tags: HashMap::new(), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("http.method", "GET"), + KeyValue::new("http.route", "/health"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 8. Tag rule - matching + BenchmarkConfig { + name: "tag_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("foo", "bar"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 9. Tag rule - not matching + BenchmarkConfig { + name: "tag_rule_not_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("foo", "bar"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 10. Complex rule - matching + BenchmarkConfig { + name: "complex_rule_matching", + rules: vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("api-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/v1/*".to_string()), + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "production"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/api/v1/users"), + ], + parent_context: None, + expected_decision: None, // Probabilistic sampling at 0.5 rate + }, + // 11. Complex rule - partial match + BenchmarkConfig { + name: "complex_rule_partial_match", + rules: vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("api-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/v1/*".to_string()), + tags: HashMap::from([("environment".to_string(), "production".to_string())]), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: vec![ + KeyValue::new("environment", "staging"), + KeyValue::new("http.method", "POST"), + KeyValue::new("http.route", "/health"), + ], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 12. Multiple rules - first match + BenchmarkConfig { + name: "multiple_rules_first_match", + rules: vec![ + SamplingRuleConfig { + sample_rate: 0.1, + service: Some("api-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + }, + SamplingRuleConfig { + sample_rate: 0.5, + service: Some("web-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + }, + SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + }, + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("api-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: None, // Probabilistic sampling at 0.1 rate + }, + // 13. Multiple rules - last match + BenchmarkConfig { + name: "multiple_rules_last_match", + rules: vec![ + SamplingRuleConfig { + sample_rate: 0.1, + service: Some("api-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + }, + SamplingRuleConfig { + sample_rate: 0.5, + service: Some("web-service".to_string()), + name: None, + resource: None, + tags: HashMap::new(), + }, + SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::new(), + }, + ], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("other-service") + .build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: vec![KeyValue::new("foo", "bar")], + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 14. Many attributes + BenchmarkConfig { + name: "many_attributes", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: None, + name: None, + resource: None, + tags: HashMap::from([("key10".to_string(), "value10".to_string())]), + }], + resource: opentelemetry_sdk::Resource::builder_empty().build(), + trace_id, + span_name: "test-operation", + span_kind: SpanKind::Server, + attributes: (0..20) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: None, + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 15. Parent sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_sampled_short_circuit", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/*".to_string()), + tags: HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ]), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: Some(create_parent_context(true)), + expected_decision: Some(SamplingDecision::RecordAndSample), + }, + // 16. Parent not sampled - short circuit with many attributes and complex rules + BenchmarkConfig { + name: "parent_not_sampled_short_circuit", + rules: vec![SamplingRuleConfig { + sample_rate: 1.0, + service: Some("test-service".to_string()), + name: Some("http.*".to_string()), + resource: Some("/api/*".to_string()), + tags: HashMap::from([ + ("environment".to_string(), "production".to_string()), + ("region".to_string(), "us-east-1".to_string()), + ("version".to_string(), "v1.2.3".to_string()), + ]), + }], + resource: opentelemetry_sdk::Resource::builder() + .with_service_name("test-service") + .build(), + trace_id, + span_name: "http.request", + span_kind: SpanKind::Server, + attributes: (0..50) + .map(|i| KeyValue::new(format!("key{}", i), format!("value{}", i))) + .collect(), + parent_context: Some(create_parent_context(false)), + expected_decision: Some(SamplingDecision::RecordOnly), + }, + ] +} + +fn bench_otel_span_sampling( + c: &mut Criterion, +) { + let configs = create_benchmark_configs(); + let links: Vec = vec![]; + + for config in configs { + let test_resource = Arc::new(RwLock::new(config.resource)); + let test_config = Arc::new( + Config::builder() + .set_trace_rate_limit(-1) + .set_trace_sampling_rules(config.rules) + .build(), + ); + let test_sampler = Sampler::new(test_config, test_resource.clone(), None); + + c.bench_function( + &format!("otel_sample_span/{}/{}", config.name, M::name()), + |b| { + b.iter_batched( + || (), + |_| { + bench_sample( + &test_sampler, + config.parent_context.as_ref(), + config.trace_id, + config.span_name, + &config.span_kind, + &config.attributes, + &links, + &config.expected_decision, + ); + }, + criterion::BatchSize::LargeInput, + ) + }, + ); + } +} + +#[inline(never)] +#[allow(clippy::too_many_arguments)] +fn bench_sample( + sampler: &Sampler, + parent_context: Option<&opentelemetry::Context>, + trace_id: TraceId, + span_name: &str, + span_kind: &SpanKind, + attributes: &[KeyValue], + links: &[opentelemetry::trace::Link], + expected_decision: &Option, +) { + let result = black_box(sampler).should_sample( + black_box(parent_context), + black_box(trace_id), + black_box(span_name), + black_box(span_kind), + black_box(attributes), + black_box(links), + ); + if let Some(expected_decision) = expected_decision { + assert_eq!(result.decision, *expected_decision); + black_box(result); + } else { + black_box(result); + } +} + +criterion_group!(name = memory_benches; config = memory_allocated_measurement(&GLOBAL); targets = bench_otel_span_sampling); +criterion_group!(name = wall_time_benches; config = Criterion::default(); targets = bench_otel_span_sampling); +criterion_main!(memory_benches, wall_time_benches); diff --git a/datadog-opentelemetry/src/core/configuration/configuration.rs b/datadog-opentelemetry/src/core/configuration/configuration.rs index 22010824..47006dec 100644 --- a/datadog-opentelemetry/src/core/configuration/configuration.rs +++ b/datadog-opentelemetry/src/core/configuration/configuration.rs @@ -2,15 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 use libdd_telemetry::data::Configuration; -use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashSet, VecDeque}; +use std::fmt::Display; use std::ops::Deref; +use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::time::Duration; -use std::{borrow::Cow, fmt::Display, str::FromStr, sync::OnceLock}; +use std::{borrow::Cow, sync::OnceLock}; use rustc_version_runtime::version; +use super::{ParsedSamplingRules, SamplingRuleConfig}; use crate::core::configuration::sources::{ CompositeConfigSourceResult, CompositeSource, ConfigKey, ConfigSourceOrigin, }; @@ -22,8 +24,9 @@ use crate::{dd_error, dd_warn}; /// Different types of remote configuration updates that can trigger callbacks #[derive(Debug, Clone)] pub enum RemoteConfigUpdate { - /// Sampling rules were updated from remote configuration - SamplingRules(Vec), + /// Sampling rules were updated from remote configuration. + /// Uses the internal type to preserve provenance from remote config. + SamplingRules(Vec), // Future remote config update types should be added here as new variants. // E.g. // - FeatureFlags(HashMap) @@ -83,92 +86,11 @@ impl Default for RemoteConfigCallbacks { Self::new() } } - -/// Configuration for a single sampling rule -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] -pub struct SamplingRuleConfig { - /// The sample rate to apply (0.0-1.0) - pub sample_rate: f64, - - /// Optional service name pattern to match - #[serde(default)] - pub service: Option, - - /// Optional span name pattern to match - #[serde(default)] - pub name: Option, - - /// Optional resource name pattern to match - #[serde(default)] - pub resource: Option, - - /// Tags that must match (key-value pairs) - #[serde(default)] - pub tags: HashMap, - - /// Where this rule comes from (customer, dynamic, default) - // TODO(paullgdc): this value should not be definable by customers - #[serde(default = "default_provenance")] - pub provenance: String, -} - -impl Display for SamplingRuleConfig { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", serde_json::json!(self)) - } -} - -fn default_provenance() -> String { - "default".to_string() -} - pub const TRACER_VERSION: &str = env!("CARGO_PKG_VERSION"); const DATADOG_TAGS_MAX_LENGTH: usize = 512; const RC_DEFAULT_POLL_INTERVAL: f64 = 5.0; // 5 seconds is the highest interval allowed by the spec -#[derive(Debug, Default, Clone, PartialEq)] -struct ParsedSamplingRules { - rules: Vec, -} - -impl Deref for ParsedSamplingRules { - type Target = [SamplingRuleConfig]; - - fn deref(&self) -> &Self::Target { - &self.rules - } -} - -impl From for Vec { - fn from(parsed: ParsedSamplingRules) -> Self { - parsed.rules - } -} - -impl FromStr for ParsedSamplingRules { - type Err = serde_json::Error; - - fn from_str(s: &str) -> Result { - if s.trim().is_empty() { - return Ok(ParsedSamplingRules::default()); - } - // DD_TRACE_SAMPLING_RULES is expected to be a JSON array of SamplingRuleConfig objects. - let rules_vec: Vec = serde_json::from_str(s)?; - Ok(ParsedSamplingRules { rules: rules_vec }) - } -} - -impl Display for ParsedSamplingRules { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - serde_json::to_string(&self.rules).unwrap_or_default() - ) - } -} - /// OTLP protocol types for OTLP export. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[non_exhaustive] @@ -1569,24 +1491,29 @@ impl Config { rules_json: &str, config_id: Option, ) -> Result<(), String> { - // Parse the JSON into SamplingRuleConfig objects - let rules: Vec = serde_json::from_str(rules_json) - .map_err(|e| format!("Failed to parse sampling rules JSON: {e}"))?; + // Parse the JSON into the internal type to preserve provenance from remote config. + let internal_rules: Vec = + serde_json::from_str(rules_json) + .map_err(|e| format!("Failed to parse sampling rules JSON: {e}"))?; // If remote config sends empty rules, clear remote config to fall back to local rules - if rules.is_empty() { + if internal_rules.is_empty() { self.clear_remote_sampling_rules(config_id); } else { + // Convert to public type for storage (provenance is dropped). + let rules: Vec = + internal_rules.iter().cloned().map(Into::into).collect(); self.trace_sampling_rules.set_override_value( ParsedSamplingRules { rules }, ConfigSourceOrigin::RemoteConfig, ); self.trace_sampling_rules.set_config_id(config_id); - // Notify callbacks about the sampling rules update - self.remote_config_callbacks.lock().unwrap().notify_update( - &RemoteConfigUpdate::SamplingRules(self.trace_sampling_rules().to_vec()), - ); + // Notify callbacks with the internal rules (preserves provenance) + self.remote_config_callbacks + .lock() + .unwrap() + .notify_update(&RemoteConfigUpdate::SamplingRules(internal_rules)); telemetry::notify_configuration_update(&self.trace_sampling_rules); } @@ -1622,9 +1549,17 @@ impl Config { self.trace_sampling_rules.unset_override_value(); self.trace_sampling_rules.set_config_id(config_id); - self.remote_config_callbacks.lock().unwrap().notify_update( - &RemoteConfigUpdate::SamplingRules(self.trace_sampling_rules().to_vec()), - ); + // Fallback rules are locally defined, so "local" provenance is correct + let internal: Vec = self + .trace_sampling_rules() + .iter() + .cloned() + .map(Into::into) + .collect(); + self.remote_config_callbacks + .lock() + .unwrap() + .notify_update(&RemoteConfigUpdate::SamplingRules(internal)); telemetry::notify_configuration_update(&self.trace_sampling_rules); } @@ -2458,6 +2393,7 @@ impl ConfigBuilder { #[cfg(test)] mod tests { use libdd_telemetry::data::ConfigurationOrigin; + use std::collections::HashMap; use super::Config; use super::*; @@ -2490,7 +2426,6 @@ mod tests { &SamplingRuleConfig { sample_rate: 0.5, service: Some("web-api".to_string()), - provenance: "customer".to_string(), ..SamplingRuleConfig::default() } ); @@ -2517,7 +2452,6 @@ mod tests { &SamplingRuleConfig { sample_rate: 0.5, service: Some("test-service".to_string()), - provenance: "customer".to_string(), ..SamplingRuleConfig::default() } ); @@ -2540,7 +2474,6 @@ mod tests { name: None, resource: None, tags: HashMap::new(), - provenance: "manual".to_string(), }]) .set_trace_rate_limit(200) .set_service("manual-service".to_string()) @@ -2556,7 +2489,6 @@ mod tests { &SamplingRuleConfig { sample_rate: 0.8, service: Some("manual-service".to_string()), - provenance: "manual".to_string(), ..SamplingRuleConfig::default() } ); @@ -2864,16 +2796,15 @@ mod tests { fn test_sampling_rules_update_callbacks() { let config = Config::builder().build(); - // Track callback invocations + // Track callback invocations — uses internal type to verify provenance preservation let callback_called = Arc::new(Mutex::new(false)); - let callback_rules = Arc::new(Mutex::new(Vec::::new())); + let callback_rules = Arc::new(Mutex::new(Vec::::new())); let callback_called_clone = callback_called.clone(); let callback_rules_clone = callback_rules.clone(); config.set_sampling_rules_callback(move |update| { *callback_called_clone.lock().unwrap() = true; - // Store the rules - for now we only have SamplingRules variant let RemoteConfigUpdate::SamplingRules(rules) = update; *callback_rules_clone.lock().unwrap() = rules.clone(); }); @@ -2882,22 +2813,20 @@ mod tests { assert!(!*callback_called.lock().unwrap()); assert!(callback_rules.lock().unwrap().is_empty()); - // Update rules from remote config - let new_rules = vec![SamplingRuleConfig { - sample_rate: 0.5, - service: Some("test-service".to_string()), - provenance: "remote".to_string(), - ..SamplingRuleConfig::default() - }]; - - let rules_json = serde_json::to_string(&new_rules).unwrap(); + // Update rules from remote config with provenance "dynamic" + let rules_json = r#"[{"sample_rate":0.5,"service":"test-service","provenance":"dynamic"}]"#; config - .update_sampling_rules_from_remote(&rules_json, None) + .update_sampling_rules_from_remote(rules_json, None) .unwrap(); - // Callback should be called with the new rules + // Callback should be called with the new rules, provenance preserved assert!(*callback_called.lock().unwrap()); - assert_eq!(*callback_rules.lock().unwrap(), new_rules); + let received = callback_rules.lock().unwrap(); + assert_eq!(received.len(), 1); + assert_eq!(received[0].sample_rate, 0.5); + assert_eq!(received[0].service, Some("test-service".to_string())); + assert_eq!(received[0].provenance, "dynamic"); + drop(received); // Test clearing rules *callback_called.lock().unwrap() = false; @@ -2911,6 +2840,55 @@ mod tests { assert!(callback_rules.lock().unwrap().is_empty()); } + #[test] + fn test_clear_remote_rules_callback_has_local_provenance() { + let config = Config::builder() + .set_trace_sampling_rules(vec![SamplingRuleConfig { + sample_rate: 0.5, + service: Some("local-svc".to_string()), + ..SamplingRuleConfig::default() + }]) + .build(); + + let callback_rules = Arc::new(Mutex::new(Vec::::new())); + let clone = callback_rules.clone(); + config.set_sampling_rules_callback(move |update| { + let RemoteConfigUpdate::SamplingRules(rules) = update; + *clone.lock().unwrap() = rules.clone(); + }); + + // Push remote rules then clear to trigger fallback + config + .update_sampling_rules_from_remote( + r#"[{"sample_rate":0.9,"provenance":"dynamic"}]"#, + None, + ) + .unwrap(); + config.clear_remote_sampling_rules(None); + + // Fallback rules should have "local" provenance + let received = callback_rules.lock().unwrap(); + assert_eq!(received.len(), 1); + assert_eq!(received[0].sample_rate, 0.5); + assert_eq!(received[0].provenance, "local"); + } + + #[test] + fn test_public_sampling_rule_config_ignores_provenance_in_json() { + // The public SamplingRuleConfig should silently ignore a "provenance" field in JSON, + // since serde skips unknown fields by default. + let json = r#"[{"sample_rate":0.5,"service":"svc","provenance":"dynamic"}]"#; + let parsed: ParsedSamplingRules = json.parse().unwrap(); + assert_eq!(parsed.rules.len(), 1); + assert_eq!(parsed.rules[0].sample_rate, 0.5); + assert_eq!(parsed.rules[0].service, Some("svc".to_string())); + // No provenance field on the public type — it was silently dropped. + + // Round-trip: serialized output should NOT contain provenance + let serialized = parsed.to_string(); + assert!(!serialized.contains("provenance")); + } + #[test] fn test_config_item_priority() { // Test that ConfigItem respects priority: remote_config > code > env_var > default @@ -3006,7 +2984,6 @@ mod tests { rules: vec![SamplingRuleConfig { sample_rate: 0.3, service: Some("local-service".to_string()), - provenance: "local".to_string(), ..SamplingRuleConfig::default() }], }; @@ -3071,7 +3048,6 @@ mod tests { let new_rules = vec![SamplingRuleConfig { sample_rate: 0.5, service: Some("test-service".to_string()), - provenance: "remote".to_string(), ..SamplingRuleConfig::default() }]; @@ -3336,8 +3312,9 @@ mod tests { let config = Config::builder_with_sources(&sources).build(); let expected = ParsedSamplingRules::from_str( - r#"[{"sample_rate":0.5,"service":"web-api","name":null,"resource":null,"tags":{},"provenance":"customer"}]"# - ).unwrap(); + r#"[{"sample_rate":0.5,"service":"web-api","name":null,"resource":null,"tags":{}}]"#, + ) + .unwrap(); let configurations = &config.trace_sampling_rules.get_all_configurations(); // active config is the one with highest seq_id @@ -3352,7 +3329,10 @@ mod tests { ); // Update ConfigItemRc via RC - let expected_rc = ParsedSamplingRules::from_str(r#"[{"sample_rate":1,"service":"web-api","name":null,"resource":null,"tags":{},"provenance":"customer"}]"#).unwrap(); + let expected_rc = ParsedSamplingRules::from_str( + r#"[{"sample_rate":1,"service":"web-api","name":null,"resource":null,"tags":{}}]"#, + ) + .unwrap(); config .trace_sampling_rules .set_override_value(expected_rc.clone(), ConfigSourceOrigin::RemoteConfig); diff --git a/datadog-opentelemetry/src/core/configuration/mod.rs b/datadog-opentelemetry/src/core/configuration/mod.rs index 94cf7238..4922f18c 100644 --- a/datadog-opentelemetry/src/core/configuration/mod.rs +++ b/datadog-opentelemetry/src/core/configuration/mod.rs @@ -25,7 +25,8 @@ pub(crate) mod remote_config; mod sources; mod supported_configurations; -pub use configuration::{ - Config, ConfigBuilder, OtlpProtocol, SamplingRuleConfig, TracePropagationStyle, -}; +pub use configuration::{Config, ConfigBuilder, OtlpProtocol, TracePropagationStyle}; pub(crate) use configuration::{ConfigurationProvider, RemoteConfigUpdate}; + +mod sampling_rule_config; +pub use sampling_rule_config::{ParsedSamplingRules, SamplingRuleConfig}; diff --git a/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs b/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs new file mode 100644 index 00000000..c771f4c7 --- /dev/null +++ b/datadog-opentelemetry/src/core/configuration/sampling_rule_config.rs @@ -0,0 +1,115 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fmt::Display; +use std::ops::Deref; +use std::str::FromStr; + +/// Configuration for a single sampling rule. +// +// This is the public-facing type. The `provenance` field from +// [`libdd_sampling::SamplingRuleConfig`] is intentionally omitted — +// it is set automatically when the rule reaches the sampler. + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] +pub struct SamplingRuleConfig { + /// The sample rate to apply (0.0-1.0) + pub sample_rate: f64, + + /// Optional service name pattern to match + #[serde(default)] + pub service: Option, + + /// Optional span name pattern to match + #[serde(default)] + pub name: Option, + + /// Optional resource name pattern to match + #[serde(default)] + pub resource: Option, + + /// Tags that must match (key-value pairs) + #[serde(default)] + pub tags: HashMap, +} + +impl Display for SamplingRuleConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", serde_json::json!(self)) + } +} + +/// Wrapper for parsed sampling rules (from JSON env var or API). +#[derive(Debug, Default, Clone, PartialEq)] +pub struct ParsedSamplingRules { + /// The individual sampling rules. + pub rules: Vec, +} + +impl Deref for ParsedSamplingRules { + type Target = [SamplingRuleConfig]; + + fn deref(&self) -> &Self::Target { + &self.rules + } +} + +impl From for Vec { + fn from(parsed: ParsedSamplingRules) -> Self { + parsed.rules + } +} + +impl FromStr for ParsedSamplingRules { + type Err = serde_json::Error; + + fn from_str(s: &str) -> Result { + if s.trim().is_empty() { + return Ok(ParsedSamplingRules::default()); + } + // DD_TRACE_SAMPLING_RULES is expected to be a JSON array of SamplingRuleConfig objects. + let rules_vec: Vec = serde_json::from_str(s)?; + Ok(ParsedSamplingRules { rules: rules_vec }) + } +} + +impl Display for ParsedSamplingRules { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + serde_json::to_string(&self.rules).unwrap_or_default() + ) + } +} + +// --------------------------------------------------------------------------- +// Conversions to/from internal libdd_sampling types +// --------------------------------------------------------------------------- + +impl From for libdd_sampling::SamplingRuleConfig { + fn from(public: SamplingRuleConfig) -> Self { + Self { + sample_rate: public.sample_rate, + service: public.service, + name: public.name, + resource: public.resource, + tags: public.tags, + provenance: "local".to_string(), + } + } +} + +impl From for SamplingRuleConfig { + fn from(internal: libdd_sampling::SamplingRuleConfig) -> Self { + Self { + sample_rate: internal.sample_rate, + service: internal.service, + name: internal.name, + resource: internal.resource, + tags: internal.tags, + } + } +} diff --git a/datadog-opentelemetry/src/core/mod.rs b/datadog-opentelemetry/src/core/mod.rs index b38404f0..0f4f11cb 100644 --- a/datadog-opentelemetry/src/core/mod.rs +++ b/datadog-opentelemetry/src/core/mod.rs @@ -4,9 +4,9 @@ //! Core components of the SDK pub mod configuration; -/// Trace propagation and sampling constant keys. -pub mod constants; -pub mod sampling; + +// Re-export from libdd-sampling +pub use libdd_sampling::{dd_constants as constants, dd_sampling as sampling}; mod error; diff --git a/datadog-opentelemetry/src/lib.rs b/datadog-opentelemetry/src/lib.rs index 97ad45ef..bc7332d0 100644 --- a/datadog-opentelemetry/src/lib.rs +++ b/datadog-opentelemetry/src/lib.rs @@ -268,14 +268,22 @@ pub mod mappings; #[cfg(feature = "test-utils")] pub mod propagation; #[cfg(feature = "test-utils")] +pub mod sampler; +#[cfg(feature = "test-utils")] pub mod sampling; +#[cfg(feature = "test-utils")] +pub mod span_processor; #[cfg(not(feature = "test-utils"))] pub(crate) mod mappings; #[cfg(not(feature = "test-utils"))] pub(crate) mod propagation; #[cfg(not(feature = "test-utils"))] +mod sampler; +#[cfg(not(feature = "test-utils"))] pub(crate) mod sampling; +#[cfg(not(feature = "test-utils"))] +mod span_processor; mod ddtrace_transform; #[cfg(any(feature = "logs-grpc", feature = "logs-http"))] @@ -283,9 +291,7 @@ mod logs_reader; #[cfg(any(feature = "metrics-grpc", feature = "metrics-http"))] mod metrics_reader; mod otlp_utils; -mod sampler; mod span_exporter; -mod span_processor; mod spans_metrics; #[cfg(any(feature = "logs-grpc", feature = "logs-http"))] mod telemetry_logs_exporter; @@ -496,7 +502,11 @@ fn make_tracer( let resource_slot = Arc::new(RwLock::new(Resource::builder_empty().build())); // Sampler only needs config for initialization (reads initial sampling rules) // Runtime updates come via config callback, so no need for shared config - let sampler = Sampler::new(config.clone(), resource_slot.clone(), registry.clone()); + let sampler = Sampler::new( + config.clone(), + resource_slot.clone(), + Some(registry.clone()), + ); let agent_response_handler = sampler.on_agent_response(); diff --git a/datadog-opentelemetry/src/mappings/transform/mod.rs b/datadog-opentelemetry/src/mappings/transform/mod.rs index e4b80081..7b4aa996 100644 --- a/datadog-opentelemetry/src/mappings/transform/mod.rs +++ b/datadog-opentelemetry/src/mappings/transform/mod.rs @@ -212,7 +212,7 @@ fn otel_span_to_dd_span_minimal<'a>( let code: u32 = if let Some(http_status_code) = span.get_attr_num(DATADOG_HTTP_STATUS_CODE) { http_status_code } else { - get_otel_status_code(span) + get_otel_status_code(span).unwrap_or(0) }; if code != 0 { dd_span.meta.insert( diff --git a/datadog-opentelemetry/src/mappings/transform/otel_util.rs b/datadog-opentelemetry/src/mappings/transform/otel_util.rs index e9194088..343cd36f 100644 --- a/datadog-opentelemetry/src/mappings/transform/otel_util.rs +++ b/datadog-opentelemetry/src/mappings/transform/otel_util.rs @@ -198,14 +198,9 @@ pub fn get_otel_resource_v2<'a>(span: &impl OtelSpan<'a>) -> Cow<'a, str> { } // https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/traceutil/otel_util.go#L571 -pub(crate) fn get_otel_status_code<'a>(span: &impl OtelSpan<'a>) -> u32 { - if let Some(code) = span.get_attr_num(HTTP_RESPONSE_STATUS_CODE) { - return code; - } - if let Some(code) = span.get_attr_num(HTTP_STATUS_CODE) { - return code; - } - 0 +pub(crate) fn get_otel_status_code<'a>(span: &impl OtelSpan<'a>) -> Option { + span.get_attr_num(HTTP_RESPONSE_STATUS_CODE) + .or_else(|| span.get_attr_num(HTTP_STATUS_CODE)) } const SPAN_TYPE_SQL: &str = "sql"; diff --git a/datadog-opentelemetry/src/sampler.rs b/datadog-opentelemetry/src/sampler.rs index 00553165..00c6a69b 100644 --- a/datadog-opentelemetry/src/sampler.rs +++ b/datadog-opentelemetry/src/sampler.rs @@ -1,6 +1,8 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +//! Datadog OTel Sampler + use opentelemetry::trace::{TraceContextExt, TraceState}; use opentelemetry_sdk::{trace::ShouldSample, Resource}; use std::sync::{Arc, RwLock}; @@ -10,34 +12,58 @@ use crate::{ configuration::Config, constants::SAMPLING_DECISION_MAKER_TAG_KEY, sampling::SamplingDecision, }, - sampling::{DatadogSampler, SamplingRule, SamplingRulesCallback}, + sampling::{DatadogSampler, OtelSamplingData, SamplingRule, SamplingRulesCallback}, span_processor::{RegisterTracePropagationResult, TracePropagationData}, text_map_propagator::{self, DatadogExtractData}, TraceRegistry, }; +/// OpenTelemetry sampler implementation for Datadog tracing. +/// +/// Implements the `ShouldSample` trait to make sampling decisions for traces based on +/// Datadog's sampling rules, rate limits, and service-based sampling rates. #[derive(Debug, Clone)] pub struct Sampler { sampler: DatadogSampler, - trace_registry: TraceRegistry, + resource: Arc>, + trace_registry: Option, cfg: Arc, } impl Sampler { + /// Creates a new Datadog sampler. + /// + /// # Arguments + /// + /// * `cfg` - Configuration containing sampling rules and rate limits + /// * `resource` - OpenTelemetry resource with service information + /// * `trace_registry` - Optional trace registry for managing in-flight traces (None for + /// benchmarking) pub fn new( cfg: Arc, resource: Arc>, - trace_registry: TraceRegistry, + // This is an Option to allow benchmarking different parts of sampling + trace_registry: Option, ) -> Self { - let rules = SamplingRule::from_configs(cfg.trace_sampling_rules().to_vec()); - let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit(), resource); + let internal_configs: Vec = cfg + .trace_sampling_rules() + .iter() + .cloned() + .map(Into::into) + .collect(); + let rules = SamplingRule::from_configs(internal_configs); + let sampler = DatadogSampler::new(rules, cfg.trace_rate_limit()); Self { cfg, sampler, + resource, trace_registry, } } + /// Returns a callback for processing agent responses. + /// + /// The callback updates service-based sampling rates based on the agent's response. pub fn on_agent_response(&self) -> Box Fn(&'a str) + Send + Sync> { self.sampler.on_agent_response() } @@ -81,10 +107,18 @@ impl ShouldSample for Sampler { .filter(|c| !is_parent_deferred && c.has_active_span()) .map(|c| c.span().span_context().trace_flags().is_sampled()); - let result = self - .sampler - .sample(is_parent_sampled, trace_id, name, span_kind, attributes); - let trace_propagation_data = if let Some(trace_root_info) = &result.trace_root_info { + let data = OtelSamplingData::new( + is_parent_sampled, + &trace_id, + name, + span_kind.clone(), + attributes, + self.resource.as_ref(), + ); + let result = self.sampler.sample(&data); + let trace_propagation_data = if let Some(trace_root_info) = + result.get_trace_root_sampling_info() + { // If the parent was deferred, we try to merge propagation tags with what we extracted let (mut tags, origin) = if is_parent_deferred { if let Some(DatadogExtractData { @@ -100,7 +134,7 @@ impl ShouldSample for Sampler { } else { (None, None) }; - let mechanism = trace_root_info.mechanism; + let mechanism = trace_root_info.mechanism(); tags.get_or_insert_default().insert( SAMPLING_DECISION_MAKER_TAG_KEY.to_string(), mechanism.to_cow().into_owned(), @@ -108,7 +142,7 @@ impl ShouldSample for Sampler { Some(TracePropagationData { sampling_decision: SamplingDecision { - priority: Some(trace_root_info.priority), + priority: Some(result.get_priority()), mechanism: Some(mechanism), }, origin, @@ -140,36 +174,40 @@ impl ShouldSample for Sampler { None }; if let Some(trace_propagation_data) = trace_propagation_data { - match self - .trace_registry - .register_local_root_trace_propagation_data( + if let Some(trace_registry) = &self.trace_registry { + match trace_registry.register_local_root_trace_propagation_data( trace_id.to_bytes(), trace_propagation_data, ) { - RegisterTracePropagationResult::Existing(sampling_decision) => { - return opentelemetry::trace::SamplingResult { - // If at this point the sampling decision is still None, we will - // end up sending the span to the agent without a sampling priority, which - // will latter take a decision. - // So the span is marked as RecordAndSample because we treat it as such - decision: if sampling_decision.priority.is_none_or(|p| p.is_keep()) { - opentelemetry::trace::SamplingDecision::RecordAndSample - } else { - opentelemetry::trace::SamplingDecision::RecordOnly - }, - attributes: Vec::new(), - trace_state: parent_context - .map(|c| c.span().span_context().trace_state().clone()) - .unwrap_or_default(), - }; + RegisterTracePropagationResult::Existing(sampling_decision) => { + return opentelemetry::trace::SamplingResult { + // If at this point the sampling decision is still None, we will + // end up sending the span to the agent without a sampling priority, + // which will later take a decision. + // So the span is marked as RecordAndSample because we treat it as such + decision: if sampling_decision.priority.is_none_or(|p| p.is_keep()) { + opentelemetry::trace::SamplingDecision::RecordAndSample + } else { + opentelemetry::trace::SamplingDecision::RecordOnly + }, + attributes: Vec::new(), + trace_state: parent_context + .map(|c| c.span().span_context().trace_state().clone()) + .unwrap_or_default(), + }; + } + RegisterTracePropagationResult::New => {} } - RegisterTracePropagationResult::New => {} } } opentelemetry::trace::SamplingResult { - decision: result.to_otel_decision(), - attributes: result.to_dd_sampling_tags(), + decision: crate::sampling::otel_mappings::priority_to_otel_decision( + result.get_priority(), + ), + attributes: result + .to_dd_sampling_tags(&crate::sampling::OtelAttributeFactory) + .unwrap_or_default(), trace_state: parent_context .map(|c| c.span().span_context().trace_state().clone()) .unwrap_or_default(), @@ -199,13 +237,16 @@ mod tests { name: None, resource: None, tags: HashMap::new(), - provenance: "customer".to_string(), }]) .build(), ); let test_resource = Arc::new(RwLock::new(Resource::builder().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id_bytes = [1; 16]; let trace_id = TraceId::from_bytes(trace_id_bytes); @@ -227,7 +268,11 @@ mod tests { let config = Arc::new(Config::builder().build()); let test_resource = Arc::new(RwLock::new(Resource::builder_empty().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id_bytes = [2; 16]; let trace_id = TraceId::from_bytes(trace_id_bytes); @@ -246,7 +291,11 @@ mod tests { let config = Arc::new(Config::builder().build()); let test_resource = Arc::new(RwLock::new(Resource::builder_empty().build())); - let sampler = Sampler::new(config.clone(), test_resource, TraceRegistry::new(config)); + let sampler = Sampler::new( + config.clone(), + test_resource, + Some(TraceRegistry::new(config)), + ); let trace_id = TraceId::from_bytes([2; 16]); let span_id = SpanId::from_bytes([3; 8]); diff --git a/datadog-opentelemetry/src/sampling/datadog_sampler.rs b/datadog-opentelemetry/src/sampling/datadog_sampler.rs deleted file mode 100644 index 62ea1e09..00000000 --- a/datadog-opentelemetry/src/sampling/datadog_sampler.rs +++ /dev/null @@ -1,1905 +0,0 @@ -// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -use crate::core::configuration::SamplingRuleConfig; -use crate::core::constants::{ - RL_EFFECTIVE_RATE, SAMPLING_AGENT_RATE_TAG_KEY, SAMPLING_DECISION_MAKER_TAG_KEY, - SAMPLING_KNUTH_RATE_TAG_KEY, SAMPLING_PRIORITY_TAG_KEY, SAMPLING_RULE_RATE_TAG_KEY, -}; -use crate::core::sampling::{mechanism, SamplingMechanism, SamplingPriority}; - -/// Type alias for sampling rules update callback -/// Consolidated callback type used across crates for remote config sampling updates -pub type SamplingRulesCallback = Box Fn(&'a [SamplingRuleConfig]) + Send + Sync>; - -use crate::mappings::{ - get_dd_key_for_otlp_attribute, get_otel_env, get_otel_operation_name_v2, get_otel_resource_v2, - get_otel_service, get_otel_status_code, OtelSpan, -}; -use opentelemetry::trace::SamplingDecision; -use opentelemetry::trace::TraceId; -use opentelemetry::KeyValue; -use std::collections::HashMap; -use std::sync::{Arc, RwLock}; - -use super::agent_service_sampler::{AgentRates, ServicesSampler}; -// Import the attr constants -use super::constants::pattern::NO_RULE; -use super::glob_matcher::GlobMatcher; -use super::otel_mappings::PreSampledSpan; -use super::rate_limiter::RateLimiter; -use super::rate_sampler::RateSampler; -use super::rules_sampler::RulesSampler; -use super::utils; - -fn matcher_from_rule(rule: &str) -> Option { - (rule != NO_RULE).then(|| GlobMatcher::new(rule)) -} - -/// Represents a sampling rule with criteria for matching spans -#[derive(Clone, Debug)] -pub struct SamplingRule { - /// The sample rate to apply when this rule matches (0.0-1.0) - sample_rate: f64, - - /// Where this rule comes from (customer, dynamic, default) - provenance: String, - - /// Internal rate sampler used when this rule matches - rate_sampler: RateSampler, - - /// Glob matchers for pattern matching - name_matcher: Option, - service_matcher: Option, - resource_matcher: Option, - tag_matchers: HashMap, -} - -impl SamplingRule { - /// Converts a vector of SamplingRuleConfig into SamplingRule objects - /// Centralizes the conversion logic - pub fn from_configs(configs: Vec) -> Vec { - configs - .into_iter() - .map(|config| { - Self::new( - config.sample_rate, - config.service, - config.name, - config.resource, - Some(config.tags), - Some(config.provenance), - ) - }) - .collect() - } - - /// Creates a new sampling rule - pub fn new( - sample_rate: f64, - service: Option, - name: Option, - resource: Option, - tags: Option>, - provenance: Option, - ) -> Self { - // Create glob matchers for the patterns - let name_matcher = name.as_deref().and_then(matcher_from_rule); - let service_matcher = service.as_deref().and_then(matcher_from_rule); - let resource_matcher = resource.as_deref().and_then(matcher_from_rule); - - // Create matchers for tag values - let tag_map = tags.clone().unwrap_or_default(); - let mut tag_matchers = HashMap::with_capacity(tag_map.len()); - for (key, value) in &tag_map { - if let Some(matcher) = matcher_from_rule(value) { - tag_matchers.insert(key.clone(), matcher); - } - } - - SamplingRule { - sample_rate, - provenance: provenance.unwrap_or_else(|| "default".to_string()), - rate_sampler: RateSampler::new(sample_rate), - name_matcher, - service_matcher, - resource_matcher, - tag_matchers, - } - } - - /// Checks if this rule matches the given span's attributes and name - /// The name is derived from the attributes and span kind - fn matches(&self, span: &PreSampledSpan) -> bool { - // Get the operation name from the attributes and span kind - let name: std::borrow::Cow<'_, str> = get_otel_operation_name_v2(span); - - // Check name using glob matcher if specified - if let Some(ref matcher) = self.name_matcher { - if !matcher.matches(name.as_ref()) { - return false; - } - } - - // Check service if specified using glob matcher - if let Some(ref matcher) = self.service_matcher { - // Get service directly from the resource - let service_from_resource = get_otel_service(span); - - // Match against the service from resource - if !matcher.matches(&service_from_resource) { - return false; - } - } - - // Get the resource string for matching - let resource_str: std::borrow::Cow<'_, str> = get_otel_resource_v2(span); - - // Check resource if specified using glob matcher - if let Some(ref matcher) = self.resource_matcher { - // Use the resource generated by get_otel_resource_v2 - if !matcher.matches(resource_str.as_ref()) { - return false; - } - } - - // Check all tags using glob matchers - for (key, matcher) in &self.tag_matchers { - let rule_tag_key_str = key.as_str(); - - // Special handling for rules defined with "http.status_code" or - // "http.response.status_code" - if rule_tag_key_str == "http.status_code" - || rule_tag_key_str - == opentelemetry_semantic_conventions::trace::HTTP_RESPONSE_STATUS_CODE - { - match self.match_http_status_code_rule(matcher, span) { - Some(true) => continue, // Status code matched - Some(false) | None => return false, // Status code didn't match or wasn't found - } - } else { - // Logic for other tags: - // First, try to match directly with the provided tag key - let direct_match = span - .attributes - .iter() - .find(|kv| kv.key.as_str() == rule_tag_key_str) - .and_then(|kv| self.match_attribute_value(&kv.value, matcher)); - - if direct_match.unwrap_or(false) { - continue; - } - - // If no direct match, try to find the corresponding OpenTelemetry attribute that - // maps to the Datadog tag key This handles cases where the rule key - // is a Datadog key (e.g., "http.method") and the attribute is an - // OTel key (e.g., "http.request.method") - if rule_tag_key_str.starts_with("http.") { - let tag_match = span.attributes.iter().any(|kv| { - let dd_key_from_otel_attr = get_dd_key_for_otlp_attribute(kv.key.as_str()); - if dd_key_from_otel_attr == rule_tag_key_str { - return self - .match_attribute_value(&kv.value, matcher) - .unwrap_or(false); - } - false - }); - - if !tag_match { - return false; // Mapped attribute not found or did not match - } - // If tag_match is true, loop continues to next rule_tag_key. - } else { - // For non-HTTP attributes, if we don't have a direct match, the rule doesn't - // match - return false; - } - } - } - - true - } - - /// Helper method to specifically match a rule against an HTTP status code extracted from - /// attributes. Returns Some(true) if status code found and matches, Some(false) if found - /// but not matched, None if not found. - fn match_http_status_code_rule( - &self, - matcher: &GlobMatcher, - span: &PreSampledSpan, - ) -> Option { - let status_code_u32 = get_otel_status_code(span); - if status_code_u32 != 0 { - // Assuming 0 means not found - let status_value = opentelemetry::Value::I64(i64::from(status_code_u32)); - self.match_attribute_value(&status_value, matcher) - } else { - None // Status code not found in attributes - } - } - - // Helper method to match attribute values considering different value types - fn match_attribute_value( - &self, - value: &opentelemetry::Value, - matcher: &GlobMatcher, - ) -> Option { - // Floating point values are handled with special rules - if let Some(float_val) = utils::extract_float_value(value) { - // Check if the float has a non-zero decimal part - let has_decimal = float_val != (float_val as i64) as f64; - - // For non-integer floats, only match if it's a wildcard pattern - if has_decimal { - // All '*' pattern returns true, any other pattern returns false - return Some(matcher.pattern().chars().all(|c| c == '*')); - } - - // For integer floats, convert to string for matching - return Some(matcher.matches(&float_val.to_string())); - } - - // For non-float values, use normal matching - utils::extract_string_value(value).map(|string_value| matcher.matches(&string_value)) - } - - /// Samples a trace ID using this rule's sample rate - pub fn sample(&self, trace_id: TraceId) -> bool { - // Delegate to the internal rate sampler's new sample method - self.rate_sampler.sample(trace_id) - } -} - -/// Represents a priority for sampling rules -#[allow(dead_code)] -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum RuleProvenance { - Customer = 0, - Dynamic = 1, - Default = 2, -} - -impl From<&str> for RuleProvenance { - fn from(s: &str) -> Self { - match s { - "customer" => RuleProvenance::Customer, - "dynamic" => RuleProvenance::Dynamic, - _ => RuleProvenance::Default, - } - } -} - -/// A composite sampler that applies rules in order of precedence -#[derive(Clone, Debug)] -pub struct DatadogSampler { - /// Sampling rules to apply, in order of precedence - rules: RulesSampler, - - /// Service-based samplers provided by the Agent - service_samplers: ServicesSampler, - - /// Rate limiter for limiting the number of spans per second - rate_limiter: RateLimiter, - - /// Resource with service information, wrapped in Arc> for sharing - resource: Arc>, -} - -impl DatadogSampler { - /// Creates a new DatadogSampler with the given rules - pub fn new( - rules: Vec, - rate_limit: i32, - resource: Arc>, - ) -> Self { - // Create rate limiter with default value of 100 if not provided - let limiter = RateLimiter::new(rate_limit, None); - - DatadogSampler { - rules: RulesSampler::new(rules), - service_samplers: ServicesSampler::default(), - rate_limiter: limiter, - resource, - } - } - - // used for tests - #[allow(dead_code)] - pub(crate) fn update_service_rates(&self, rates: impl IntoIterator) { - self.service_samplers.update_rates(rates); - } - - pub(crate) fn on_agent_response(&self) -> Box Fn(&'a str) + Send + Sync> { - let service_samplers = self.service_samplers.clone(); - Box::new(move |s: &str| { - let Ok(new_rates) = serde_json::de::from_str::(s) else { - return; - }; - let Some(new_rates) = new_rates.rates_by_service else { - return; - }; - service_samplers.update_rates(new_rates.into_iter().map(|(k, v)| (k.to_string(), v))); - }) - } - - /// Creates a callback for updating sampling rules from remote configuration - /// # Returns - /// A boxed function that takes a slice of SamplingRuleConfig and updates the sampling rules - pub fn on_rules_update(&self) -> SamplingRulesCallback { - let rules_sampler = self.rules.clone(); - Box::new(move |rule_configs: &[SamplingRuleConfig]| { - let new_rules = SamplingRule::from_configs(rule_configs.to_vec()); - - rules_sampler.update_rules(new_rules); - }) - } - - /// Computes a key for service-based sampling - fn service_key<'a>(&self, span: &impl OtelSpan<'a>) -> String { - // Get service directly from resource - let service = get_otel_service(span).into_owned(); - // Get env from attributes - let env = get_otel_env(span); - - format!("service:{service},env:{env}") - } - - /// Finds the highest precedence rule that matches the span - fn find_matching_rule(&self, span: &PreSampledSpan) -> Option { - self.rules.find_matching_rule(|rule| rule.matches(span)) - } - - /// Returns the sampling mechanism used for the decision - fn get_sampling_mechanism( - &self, - rule: Option<&SamplingRule>, - used_agent_sampler: bool, - ) -> SamplingMechanism { - if let Some(rule) = rule { - match rule.provenance.as_str() { - // Provenance will not be set for rules until we implement remote configuration - "customer" => mechanism::REMOTE_USER_TRACE_SAMPLING_RULE, - "dynamic" => mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE, - _ => mechanism::LOCAL_USER_TRACE_SAMPLING_RULE, - } - } else if used_agent_sampler { - // If using service-based sampling from the agent - mechanism::AGENT_RATE_BY_SERVICE - } else { - // Should not happen, but just in case - mechanism::DEFAULT - } - } - - /// Sample an incoming span based on the parent context and attributes - pub(crate) fn sample( - &self, - is_parent_sampled: Option, - trace_id: TraceId, - _name: &str, - span_kind: &opentelemetry::trace::SpanKind, - attributes: &[KeyValue], - ) -> DdSamplingResult { - if let Some(is_parent_sampled) = is_parent_sampled { - // If a parent exists, inherit its sampling decision and trace state - return DdSamplingResult { - is_keep: is_parent_sampled, - trace_root_info: None, - }; - } - - // Apply rules-based sampling - self.sample_root(trace_id, _name, span_kind, attributes) - } - - /// Sample the root span of a trace - fn sample_root( - &self, - trace_id: TraceId, - name: &str, - span_kind: &opentelemetry::trace::SpanKind, - attributes: &[KeyValue], - ) -> DdSamplingResult { - let mut is_keep = true; - let mut used_agent_sampler = false; - let sample_rate; - let mut rl_effective_rate: Option = None; - - let resource_guard = self.resource.read().unwrap(); - let span = PreSampledSpan::new(name, span_kind.clone(), attributes, &resource_guard); - - // Find a matching rule - let matching_rule = self.find_matching_rule(&span); - - // Apply sampling logic - if let Some(rule) = &matching_rule { - // Get the sample rate from the rule - sample_rate = rule.sample_rate; - - // First check if the span should be sampled according to the rule - if !rule.sample(trace_id) { - is_keep = false; - // If the span should be sampled, then apply rate limiting - } else if !self.rate_limiter.is_allowed() { - is_keep = false; - rl_effective_rate = Some(self.rate_limiter.effective_rate()); - } - } else { - // Try service-based sampling from Agent - let service_key = self.service_key(&span); - if let Some(sampler) = self.service_samplers.get(&service_key) { - // Use the service-based sampler - used_agent_sampler = true; - sample_rate = sampler.sample_rate(); // Get rate for reporting - - // Check if the service sampler decides to drop - if !sampler.sample(trace_id) { - is_keep = false; - } - } else { - // Default sample rate, should never happen in practice if agent provides rates - sample_rate = 1.0; - // Keep the default decision (RecordAndSample) - } - } - - // Determine the sampling mechanism - let mechanism = self.get_sampling_mechanism(matching_rule.as_ref(), used_agent_sampler); - - DdSamplingResult { - is_keep, - trace_root_info: Some(TraceRootSamplingInfo { - mechanism, - priority: mechanism.to_priority(is_keep), - rate: sample_rate, - rl_effective_rate, - }), - } - } -} - -/// Formats a sampling rate with up to 6 significant digits, stripping trailing zeros. -/// -/// This matches the Go behavior of `strconv.FormatFloat(rate, 'g', 6, 64)`. -/// -/// # Examples -/// - `1.0` → `Some("1")` -/// - `0.5` → `Some("0.5")` -/// - `0.7654321` → `Some("0.765432")` -/// - `0.100000` → `Some("0.1")` -/// - `-0.1` → `None` -/// - `1.1` → `None` -fn format_sampling_rate(rate: f64) -> Option { - if rate.is_nan() || !(0.0..=1.0).contains(&rate) { - return None; - } - - if rate == 0.0 { - return Some("0".to_string()); - } - - let digits = 6_i32; - let magnitude = rate.abs().log10().floor() as i32; - let scale = 10f64.powi(digits - 1 - magnitude); - let rounded = (rate * scale).round() / scale; - - // Determine decimal places needed for 6 significant digits - let decimal_places = if magnitude >= digits - 1 { - 0 - } else { - (digits - 1 - magnitude) as usize - }; - - let s = format!("{:.prec$}", rounded, prec = decimal_places); - // Strip trailing zeros after decimal point - Some(if s.contains('.') { - let s = s.trim_end_matches('0'); - let s = s.trim_end_matches('.'); - s.to_string() - } else { - s - }) -} - -pub(crate) struct DdSamplingResult { - pub is_keep: bool, - pub trace_root_info: Option, -} - -pub(crate) struct TraceRootSamplingInfo { - pub priority: SamplingPriority, - pub mechanism: SamplingMechanism, - pub rate: f64, - pub rl_effective_rate: Option, -} - -impl DdSamplingResult { - /// Returns Datadog-specific sampling tags to be added as attributes - /// - /// # Parameters - /// * `decision` - The sampling decision (RecordAndSample or Drop) - /// * `mechanism` - The sampling mechanism used to make the decision - /// * `sample_rate` - The sample rate to use for the decision - /// * `rl_effective_rate` - The effective rate limit if rate limiting was applied - /// - /// # Returns - /// A vector of attributes to add to the sampling result - pub fn to_dd_sampling_tags(&self) -> Vec { - let mut result = Vec::new(); - let Some(root_info) = &self.trace_root_info else { - return result; // No root info, return empty attributes - }; - - // Add rate limiting tag if applicable - if let Some(limit) = root_info.rl_effective_rate { - result.push(KeyValue::new(RL_EFFECTIVE_RATE, limit)); - } - - // Add the sampling decision trace tag with the mechanism - let mechanism = root_info.mechanism; - result.push(KeyValue::new( - SAMPLING_DECISION_MAKER_TAG_KEY, - mechanism.to_cow(), - )); - - // Add the sample rate tag with the correct key based on the mechanism - match mechanism { - mechanism::AGENT_RATE_BY_SERVICE => { - result.push(KeyValue::new(SAMPLING_AGENT_RATE_TAG_KEY, root_info.rate)); - if let Some(rate_str) = format_sampling_rate(root_info.rate) { - result.push(KeyValue::new(SAMPLING_KNUTH_RATE_TAG_KEY, rate_str)); - } - } - mechanism::REMOTE_USER_TRACE_SAMPLING_RULE - | mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE - | mechanism::LOCAL_USER_TRACE_SAMPLING_RULE => { - result.push(KeyValue::new(SAMPLING_RULE_RATE_TAG_KEY, root_info.rate)); - if let Some(rate_str) = format_sampling_rate(root_info.rate) { - result.push(KeyValue::new(SAMPLING_KNUTH_RATE_TAG_KEY, rate_str)); - } - } - _ => {} - } - - let priority = root_info.priority; - result.push(KeyValue::new( - SAMPLING_PRIORITY_TAG_KEY, - priority.into_i8() as i64, - )); - - result - } - - /// Converts the sampling result to a SamplingResult for OpenTelemetry - pub fn to_otel_decision(&self) -> SamplingDecision { - if self.is_keep { - SamplingDecision::RecordAndSample - } else { - SamplingDecision::RecordOnly - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::sampling::constants::{ - attr::{ENV_TAG, RESOURCE_TAG}, - pattern, - }; - use opentelemetry::{trace::SpanKind, Key, KeyValue, Value}; - use opentelemetry_sdk::Resource as SdkResource; - use opentelemetry_semantic_conventions::{ - attribute::{ - DB_SYSTEM_NAME, HTTP_REQUEST_METHOD, MESSAGING_OPERATION_TYPE, MESSAGING_SYSTEM, - }, - resource::SERVICE_NAME, - trace::{HTTP_RESPONSE_STATUS_CODE, NETWORK_PROTOCOL_NAME}, - }; - - fn create_empty_resource() -> opentelemetry_sdk::Resource { - opentelemetry_sdk::Resource::builder_empty().build() - } - - // Helper function to create an empty resource wrapped in Arc for DatadogSampler - fn create_empty_resource_arc() -> Arc> { - Arc::new(RwLock::new( - opentelemetry_sdk::Resource::builder_empty().build(), - )) - } - - fn create_resource(res: String) -> Arc> { - let attributes = vec![ - KeyValue::new(SERVICE_NAME, res), // String `res` is Into - ]; - let resource: SdkResource = SdkResource::builder_empty() - .with_attributes(attributes) - .build(); - Arc::new(RwLock::new(resource)) - } - - // Helper function to create a trace ID - fn create_trace_id() -> TraceId { - let bytes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; - TraceId::from_bytes(bytes) - } - - // Helper function to create attributes for testing - fn create_attributes(resource: &'static str, env: &'static str) -> Vec { - vec![ - KeyValue::new(RESOURCE_TAG, resource), - KeyValue::new("datadog.env", env), - ] - } - - #[test] - fn test_sampling_rule_creation() { - let rule = SamplingRule::new( - 0.5, - Some("test-service".to_string()), - Some("test-name".to_string()), - Some("test-resource".to_string()), - Some(HashMap::from([( - "custom-tag".to_string(), - "tag-value".to_string(), - )])), - Some("customer".to_string()), - ); - - assert_eq!(rule.sample_rate, 0.5); - assert_eq!(rule.service_matcher.unwrap().pattern(), "test-service"); - assert_eq!(rule.name_matcher.unwrap().pattern(), "test-name"); - assert_eq!( - rule.resource_matcher.unwrap().pattern(), - "test-resource".to_string() - ); - assert_eq!( - rule.tag_matchers.get("custom-tag").unwrap().pattern(), - "tag-value" - ); - assert_eq!(rule.provenance, "customer"); - } - - #[test] - fn test_sampling_rule_with_no_rule() { - // Create a rule without specifying any criteria - let rule = SamplingRule::new( - 0.5, None, // No service - None, // No name - None, // No resource - None, // No tags - None, // Default provenance - ); - - // Verify fields are set to None or empty - assert_eq!(rule.sample_rate, 0.5); - assert!(rule.service_matcher.is_none()); - assert!(rule.name_matcher.is_none()); - assert!(rule.resource_matcher.is_none()); - assert!(rule.tag_matchers.is_empty()); - assert_eq!(rule.provenance, "default"); - - // Verify no matchers were created - assert!(rule.service_matcher.is_none()); - assert!(rule.name_matcher.is_none()); - assert!(rule.resource_matcher.is_none()); - assert!(rule.tag_matchers.is_empty()); - - // Test that a rule with NO_RULE constants behaves the same as None - let rule_with_empty_strings = SamplingRule::new( - 0.5, - Some(pattern::NO_RULE.to_string()), // Empty service string - Some(pattern::NO_RULE.to_string()), // Empty name string - Some(pattern::NO_RULE.to_string()), // Empty resource string - Some(HashMap::from([( - pattern::NO_RULE.to_string(), - pattern::NO_RULE.to_string(), - )])), // Empty tag - None, - ); - - // Verify that matchers aren't created for NO_RULE values - assert!(rule_with_empty_strings.service_matcher.is_none()); - assert!(rule_with_empty_strings.name_matcher.is_none()); - assert!(rule_with_empty_strings.resource_matcher.is_none()); - assert!(rule_with_empty_strings.tag_matchers.is_empty()); - - // Create a span with some attributes - let attributes = create_attributes("some-resource", "some-env"); - - // Empty resource for testing (unwrapped for the test) - let empty_resource = create_empty_resource(); - - // Both rules should match any span since they have no criteria - let span = PreSampledSpan::new("", SpanKind::Client, &attributes, &empty_resource); - assert!(rule.matches(&span)); - assert!(rule_with_empty_strings.matches(&span)); - } - - #[test] - fn test_sampling_rule_matches() { - // Create a rule with specific service and name patterns - let _rule = SamplingRule::new( - 0.5, - Some("web-*".to_string()), - Some("http.*".to_string()), - None, - Some(HashMap::from([( - "custom_key".to_string(), - "custom_value".to_string(), - )])), - None, - ); - } - - #[test] - fn test_sample_method() { - // Create two rules with different rates - let rule_always = SamplingRule::new(1.0, None, None, None, None, None); - let rule_never = SamplingRule::new(0.0, None, None, None, None, None); - - let trace_id = create_trace_id(); - - // Rule with rate 1.0 should always sample - assert!(rule_always.sample(trace_id)); - - // Rule with rate 0.0 should never sample - assert!(!rule_never.sample(trace_id)); - } - - #[test] - fn test_datadog_sampler_creation() { - // Create a sampler with default config - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); - assert!(sampler.rules.is_empty()); - assert!(sampler.service_samplers.is_empty()); - - // Create a sampler with rules - let rule = SamplingRule::new(0.5, None, None, None, None, None); - let sampler_with_rules = DatadogSampler::new(vec![rule], 200, create_empty_resource_arc()); - assert_eq!(sampler_with_rules.rules.len(), 1); - } - - #[test] - fn test_service_key_generation() { - // Use create_resource to initialize the sampler with a service name in its resource - let test_service_name = "test-service".to_string(); - let sampler_resource = create_resource(test_service_name.clone()); - let sampler = DatadogSampler::new(vec![], 100, sampler_resource); - - // Test with service and env - // The 'service' in create_attributes is not used for the service part of the key, - // but ENV_TAG is still correctly picked up from attributes. - let attrs = create_attributes("resource", "production"); - let res = &sampler.resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Internal, attrs.as_slice(), res); - assert_eq!( - sampler.service_key(&span), - // Expect the service name from the sampler's resource - format!("service:{test_service_name},env:production") - ); - - // Test with missing env - // The 'service' in these attributes is also not used for the service part of the key. - let attrs_no_env = vec![KeyValue::new(RESOURCE_TAG, "resource")]; - let span = PreSampledSpan::new( - "test-span", - SpanKind::Internal, - attrs_no_env.as_slice(), - res, - ); - assert_eq!( - sampler.service_key(&span), - // Expect the service name from the sampler's resource and an empty env - format!("service:{test_service_name},env:") - ); - } - - #[test] - fn test_update_service_rates() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); - - // Update with service rates - let mut rates = HashMap::new(); - rates.insert("service:web,env:prod".to_string(), 0.5); - rates.insert("service:api,env:prod".to_string(), 0.75); - - sampler.service_samplers.update_rates(rates); - - // Check number of samplers - assert_eq!(sampler.service_samplers.len(), 2); - - // Verify keys exist - assert!(sampler - .service_samplers - .contains_key("service:web,env:prod")); - assert!(sampler - .service_samplers - .contains_key("service:api,env:prod")); - - // Verify the sampling rates are correctly set - if let Some(web_sampler) = sampler.service_samplers.get("service:web,env:prod") { - assert_eq!(web_sampler.sample_rate(), 0.5); - } else { - panic!("Web service sampler not found"); - } - - if let Some(api_sampler) = sampler.service_samplers.get("service:api,env:prod") { - assert_eq!(api_sampler.sample_rate(), 0.75); - } else { - panic!("API service sampler not found"); - } - } - - #[test] - fn test_find_matching_rule() { - // Create rules with different priorities and service matchers - let rule1 = SamplingRule::new( - 0.1, - Some("service1".to_string()), - None, - None, - None, - Some("customer".to_string()), // Highest priority - ); - - let rule2 = SamplingRule::new( - 0.2, - Some("service2".to_string()), - None, - None, - None, - Some("dynamic".to_string()), // Middle priority - ); - - let rule3 = SamplingRule::new( - 0.3, - Some("service*".to_string()), // Wildcard service - None, - None, - None, - Some("default".to_string()), // Lowest priority - ); - - // Sampler is mutable to allow resource updates - let mut sampler = DatadogSampler::new( - vec![rule1.clone(), rule2.clone(), rule3.clone()], - 100, - create_empty_resource_arc(), // Initial resource, will be updated before each check - ); - - // Test with a specific service that should match the first rule (rule1) - { - sampler.resource = create_resource("service1".to_string()); - let attrs1 = create_attributes("resource_val_for_attr1", "prod"); - let res = sampler.resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs1.as_slice(), &res); - let matching_rule_for_attrs1 = sampler.find_matching_rule(&span); - assert!( - matching_rule_for_attrs1.is_some(), - "Expected rule1 to match for service1" - ); - let rule = matching_rule_for_attrs1.unwrap(); - assert_eq!(rule.sample_rate, 0.1, "Expected rule1 sample rate"); - assert_eq!(rule.provenance, "customer", "Expected rule1 provenance"); - } - - // Test with a specific service that should match the second rule (rule2) - { - sampler.resource = create_resource("service2".to_string()); - let attrs2 = create_attributes("resource_val_for_attr2", "prod"); - let res = sampler.resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs2.as_slice(), &res); - let matching_rule_for_attrs2 = sampler.find_matching_rule(&span); - assert!( - matching_rule_for_attrs2.is_some(), - "Expected rule2 to match for service2" - ); - let rule = matching_rule_for_attrs2.unwrap(); - assert_eq!(rule.sample_rate, 0.2, "Expected rule2 sample rate"); - assert_eq!(rule.provenance, "dynamic", "Expected rule2 provenance"); - } - - // Test with a service that matches the wildcard rule (rule3) - { - sampler.resource = create_resource("service3".to_string()); - let attrs3 = create_attributes("resource_val_for_attr3", "prod"); - let res = sampler.resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs3.as_slice(), &res); - let matching_rule_for_attrs3 = sampler.find_matching_rule(&span); - assert!( - matching_rule_for_attrs3.is_some(), - "Expected rule3 to match for service3" - ); - let rule = matching_rule_for_attrs3.unwrap(); - assert_eq!(rule.sample_rate, 0.3, "Expected rule3 sample rate"); - assert_eq!(rule.provenance, "default", "Expected rule3 provenance"); - } - - // Test with a service that doesn't match any rule's service pattern - { - sampler.resource = create_resource("other_sampler_service".to_string()); - let attrs4 = create_attributes("resource_val_for_attr4", "prod"); - let res = sampler.resource.read().unwrap(); - let span = PreSampledSpan::new("test-span", SpanKind::Client, attrs4.as_slice(), &res); - let matching_rule_for_attrs4 = sampler.find_matching_rule(&span); - assert!( - matching_rule_for_attrs4.is_none(), - "Expected no rule to match for service 'other_sampler_service'" - ); - } - } - - #[test] - fn test_get_sampling_mechanism() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); - - // Create rules with different provenances - let rule_customer = - SamplingRule::new(0.1, None, None, None, None, Some("customer".to_string())); - let rule_dynamic = - SamplingRule::new(0.2, None, None, None, None, Some("dynamic".to_string())); - let rule_default = - SamplingRule::new(0.3, None, None, None, None, Some("default".to_string())); - - // Test with customer rule - let mechanism1 = sampler.get_sampling_mechanism(Some(&rule_customer), false); - assert_eq!(mechanism1, mechanism::REMOTE_USER_TRACE_SAMPLING_RULE); - - // Test with dynamic rule - let mechanism2 = sampler.get_sampling_mechanism(Some(&rule_dynamic), false); - assert_eq!(mechanism2, mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE); - - // Test with default rule - let mechanism3 = sampler.get_sampling_mechanism(Some(&rule_default), false); - assert_eq!(mechanism3, mechanism::LOCAL_USER_TRACE_SAMPLING_RULE); - - // Test with agent sampler - let mechanism4 = sampler.get_sampling_mechanism(None, true); - assert_eq!(mechanism4, mechanism::AGENT_RATE_BY_SERVICE); - - // Test fallback case - let mechanism5 = sampler.get_sampling_mechanism(None, false); - assert_eq!(mechanism5, mechanism::DEFAULT); - } - - #[test] - fn test_add_dd_sampling_tags() { - // Test with RecordAndSample decision and LocalUserTraceSamplingRule mechanism - let sample_rate = 0.5; - let is_sampled = true; - let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; - let sampling_result = DdSamplingResult { - is_keep: true, - trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), - mechanism, - rate: 0.5, - rl_effective_rate: None, - }), - }; - - let attrs = sampling_result.to_dd_sampling_tags(); - - // Verify the number of attributes (decision_maker + priority + rule_rate + ksr) - assert_eq!(attrs.len(), 4); - - // Check individual attributes - let mut found_decision_maker = false; - let mut found_priority = false; - let mut found_rule_rate = false; - let mut found_ksr = false; - - for attr in &attrs { - match attr.key.as_str() { - SAMPLING_DECISION_MAKER_TAG_KEY => { - let value_str = match &attr.value { - opentelemetry::Value::String(s) => s.to_string(), - _ => panic!("Expected string value for decision maker tag"), - }; - assert_eq!(value_str, mechanism.to_cow()); - found_decision_maker = true; - } - SAMPLING_PRIORITY_TAG_KEY => { - // For LocalUserTraceSamplingRule with KEEP, it should be USER_KEEP - let expected_priority = mechanism.to_priority(true).into_i8() as i64; - - let value_int = match attr.value { - opentelemetry::Value::I64(i) => i, - _ => panic!("Expected integer value for priority tag"), - }; - assert_eq!(value_int, expected_priority); - found_priority = true; - } - SAMPLING_RULE_RATE_TAG_KEY => { - let value_float = match attr.value { - opentelemetry::Value::F64(f) => f, - _ => panic!("Expected float value for rule rate tag"), - }; - assert_eq!(value_float, sample_rate); - found_rule_rate = true; - } - SAMPLING_KNUTH_RATE_TAG_KEY => { - let value_str = match &attr.value { - opentelemetry::Value::String(s) => s.to_string(), - _ => panic!("Expected string value for ksr tag"), - }; - assert_eq!(value_str, "0.5"); - found_ksr = true; - } - _ => {} - } - } - - assert!(found_decision_maker, "Missing decision maker tag"); - assert!(found_priority, "Missing priority tag"); - assert!(found_rule_rate, "Missing rule rate tag"); - assert!(found_ksr, "Missing knuth sampling rate tag"); - - // Test with rate limiting - let rate_limit = 0.5; - let is_sampled = false; - let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; - let sampling_result = DdSamplingResult { - is_keep: false, - trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), - mechanism, - rate: 0.5, - rl_effective_rate: Some(rate_limit), - }), - }; - let attrs_with_limit = sampling_result.to_dd_sampling_tags(); - - // With rate limiting, there should be one more attribute - assert_eq!(attrs_with_limit.len(), 5); - - // Check for rate limit attribute - let mut found_limit = false; - for attr in &attrs_with_limit { - if attr.key.as_str() == RL_EFFECTIVE_RATE { - let value_float = match attr.value { - opentelemetry::Value::F64(f) => f, - _ => panic!("Expected float value for rate limit tag"), - }; - assert_eq!(value_float, rate_limit); - found_limit = true; - break; - } - } - - assert!(found_limit, "Missing rate limit tag"); - - // Test with AgentRateByService mechanism to check for SAMPLING_AGENT_RATE_TAG_KEY - - let agent_rate = 0.75; - let is_sampled = false; - let mechanism = mechanism::AGENT_RATE_BY_SERVICE; - let sampling_result = DdSamplingResult { - is_keep: false, - trace_root_info: Some(TraceRootSamplingInfo { - priority: mechanism.to_priority(is_sampled), - mechanism, - rate: agent_rate, - rl_effective_rate: None, - }), - }; - - let agent_attrs = sampling_result.to_dd_sampling_tags(); - - // Verify the number of attributes (should be 4: decision_maker + priority + - // agent_rate + ksr) - assert_eq!(agent_attrs.len(), 4); - - // Check for agent rate tag and ksr tag - let mut found_agent_rate = false; - let mut found_ksr = false; - for attr in &agent_attrs { - match attr.key.as_str() { - SAMPLING_AGENT_RATE_TAG_KEY => { - let value_float = match attr.value { - opentelemetry::Value::F64(f) => f, - _ => panic!("Expected float value for agent rate tag"), - }; - assert_eq!(value_float, agent_rate); - found_agent_rate = true; - } - SAMPLING_KNUTH_RATE_TAG_KEY => { - let value_str = match &attr.value { - opentelemetry::Value::String(s) => s.to_string(), - _ => panic!("Expected string value for ksr tag"), - }; - assert_eq!(value_str, "0.75"); - found_ksr = true; - } - _ => {} - } - } - - assert!(found_agent_rate, "Missing agent rate tag"); - assert!( - found_ksr, - "Missing knuth sampling rate tag for agent mechanism" - ); - - // Also check that the SAMPLING_RULE_RATE_TAG_KEY is NOT present for agent mechanism - for attr in &agent_attrs { - assert_ne!( - attr.key.as_str(), - SAMPLING_RULE_RATE_TAG_KEY, - "Rule rate tag should not be present for agent mechanism" - ); - } - } - - #[test] - fn test_format_sampling_rate() { - // Exact values - assert_eq!(format_sampling_rate(1.0), Some("1".to_string())); - assert_eq!(format_sampling_rate(0.5), Some("0.5".to_string())); - assert_eq!(format_sampling_rate(0.1), Some("0.1".to_string())); - assert_eq!(format_sampling_rate(0.0), Some("0".to_string())); - - // Trailing zeros should be stripped - assert_eq!(format_sampling_rate(0.100000), Some("0.1".to_string())); - assert_eq!(format_sampling_rate(0.500000), Some("0.5".to_string())); - - // Truncation to 6 significant digits - assert_eq!( - format_sampling_rate(0.7654321), - Some("0.765432".to_string()) - ); - assert_eq!( - format_sampling_rate(0.123456789), - Some("0.123457".to_string()) - ); - - // Small values - assert_eq!(format_sampling_rate(0.001), Some("0.001".to_string())); - - // Boundary values - assert_eq!(format_sampling_rate(0.75), Some("0.75".to_string())); - assert_eq!(format_sampling_rate(0.999999), Some("0.999999".to_string())); - - // Invalid rates - assert_eq!(format_sampling_rate(-0.1), None); - assert_eq!(format_sampling_rate(1.1), None); - assert_eq!(format_sampling_rate(f64::NAN), None); - assert_eq!(format_sampling_rate(f64::INFINITY), None); - assert_eq!(format_sampling_rate(f64::NEG_INFINITY), None); - } - - #[test] - fn test_should_sample_parent_context() { - let sampler = DatadogSampler::new(vec![], 100, create_empty_resource_arc()); - - // Create empty slices for attributes and links - let empty_attrs: &[KeyValue] = &[]; - - // Test with sampled parent context - // let parent_sampled = create_parent_context(true); - let result_sampled = sampler.sample( - Some(true), - create_trace_id(), - "span", - &SpanKind::Client, - empty_attrs, - ); - - // Should inherit the sampling decision from parent - assert_eq!( - result_sampled.to_otel_decision(), - SamplingDecision::RecordAndSample - ); - assert!(result_sampled.to_dd_sampling_tags().is_empty()); - - // Test with non-sampled parent context - let result_not_sampled = sampler.sample( - Some(false), - create_trace_id(), - "span", - &SpanKind::Client, - empty_attrs, - ); - - // Should inherit the sampling decision from parent - assert_eq!( - result_not_sampled.to_otel_decision(), - SamplingDecision::RecordOnly - ); - assert!(result_not_sampled.to_dd_sampling_tags().is_empty()); - } - - #[test] - fn test_should_sample_with_rule() { - // Create a rule that always samples - let rule = SamplingRule::new( - 1.0, - Some("test-service".to_string()), - None, - None, - None, - None, - ); - - let sampler = DatadogSampler::new(vec![rule], 100, create_empty_resource_arc()); - - // Test with matching attributes - let attrs = create_attributes("resource", "prod"); - let result = sampler.sample( - None, - create_trace_id(), - "span", - &SpanKind::Client, - attrs.as_slice(), - ); - - // Should sample and add attributes - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - assert!(!result.to_dd_sampling_tags().is_empty()); - - // Test with non-matching attributes - let attrs_no_match = create_attributes("other-resource", "prod"); - let result_no_match = sampler.sample( - None, - create_trace_id(), - "span", - &SpanKind::Client, - attrs_no_match.as_slice(), - ); - - // Should still sample (default behavior when no rules match) and add attributes - assert_eq!( - result_no_match.to_otel_decision(), - SamplingDecision::RecordAndSample - ); - assert!(!result_no_match.to_dd_sampling_tags().is_empty()); - } - - #[test] - fn test_should_sample_with_service_rates() { - // Initialize sampler with a default service, e.g., "test-service" - // The sampler's own service name will be used for the 'service:' part of the service_key - let mut sampler = - DatadogSampler::new(vec![], 100, create_resource("test-service".to_string())); - - // Add service rates for different service+env combinations - let mut rates = HashMap::new(); - rates.insert("service:test-service,env:prod".to_string(), 1.0); // Always sample for test-service in prod - rates.insert("service:other-service,env:prod".to_string(), 0.0); // Never sample for other-service in prod - - sampler.update_service_rates(rates); - - // Test with attributes that should lead to "service:test-service,env:prod" key - // Sampler's resource is already for "test-service" - let attrs_sample = create_attributes("any_resource_name_matching_env", "prod"); - let result_sample = sampler.sample( - None, - create_trace_id(), - "span_for_test_service", - &SpanKind::Client, - attrs_sample.as_slice(), - ); - // Expect RecordAndSample because service_key will be "service:test-service,env:prod" -> - // rate 1.0 - assert_eq!( - result_sample.to_otel_decision(), - SamplingDecision::RecordAndSample, - "Span for test-service/prod should be sampled" - ); - - // Test with attributes that should lead to "service:other-service,env:prod" key - // Update sampler's resource to be "other-service" - sampler.resource = create_resource("other-service".to_string()); - let attrs_no_sample = create_attributes("any_resource_name_matching_env", "prod"); - let result_no_sample = sampler.sample( - None, - create_trace_id(), - "span_for_other_service", - &SpanKind::Client, - attrs_no_sample.as_slice(), - ); - // Expect Drop because service_key will be "service:other-service,env:prod" -> rate 0.0 - assert_eq!( - result_no_sample.to_otel_decision(), - SamplingDecision::RecordOnly, - "Span for other-service/prod should be dropped" - ); - } - - #[test] - fn test_sampling_rule_matches_float_attributes() { - use opentelemetry::Value; - - // Helper to create attributes with a float value - fn create_attributes_with_float(tag_key: &'static str, float_value: f64) -> Vec { - vec![ - KeyValue::new(RESOURCE_TAG, "resource"), - KeyValue::new(ENV_TAG, "prod"), - KeyValue::new(tag_key, Value::F64(float_value)), - ] - } - - // Test case 1: Rule with exact value matching integer float - let rule_integer = SamplingRule::new( - 0.5, - None, - None, - None, - Some(HashMap::from([("float_tag".to_string(), "42".to_string())])), - None, - ); - - // Should match integer float - let integer_float_attrs = create_attributes_with_float("float_tag", 42.0); - assert!(rule_integer.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - integer_float_attrs.as_slice(), - &create_empty_resource() - ))); - - // Test case 2: Rule with wildcard pattern and non-integer float - let rule_wildcard = SamplingRule::new( - 0.5, - None, - None, - None, - Some(HashMap::from([("float_tag".to_string(), "*".to_string())])), - None, - ); - - // Should match non-integer float with wildcard pattern - let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - assert!(rule_wildcard.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); - - // Test case 3: Rule with specific pattern and non-integer float - // With our simplified logic, non-integer floats will never match non-wildcard patterns - let rule_specific = SamplingRule::new( - 0.5, - None, - None, - None, - Some(HashMap::from([( - "float_tag".to_string(), - "42.5".to_string(), - )])), - None, - ); - - // Should NOT match the exact decimal value because non-integer floats only match wildcards - let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); - assert!(!rule_specific.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); - // Test case 4: Pattern with partial wildcard '*' for suffix - let rule_prefix = SamplingRule::new( - 0.5, - None, - None, - None, - Some(HashMap::from([( - "float_tag".to_string(), - "42.*".to_string(), - )])), - None, - ); - - // Should NOT match decimal values as we don't do partial pattern matching for non-integer - // floats - assert!(!rule_prefix.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - decimal_float_attrs.as_slice(), - &create_empty_resource() - ))); - } - - #[test] - fn test_otel_to_datadog_attribute_mapping() { - // Test with a rule that matches against a Datadog attribute name - let rule = SamplingRule::new( - 1.0, - None, - None, - None, - Some(HashMap::from([( - "http.response.status_code".to_string(), - "5*".to_string(), - )])), - None, - ); - - // Create attributes with OpenTelemetry naming convention - let otel_attrs = vec![KeyValue::new("http.response.status_code", 500)]; - - // The rule should match because both use the same OpenTelemetry attribute name - assert!(rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - otel_attrs.as_slice(), - &create_empty_resource() - ))); - - // Attributes that don't match the value pattern shouldn't match - let non_matching_attrs = vec![KeyValue::new("http.response.status_code", 200)]; - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - non_matching_attrs.as_slice(), - &create_empty_resource() - ))); - - // Attributes that have no mapping to the rule tag shouldn't match - let unrelated_attrs = vec![KeyValue::new("unrelated.attribute", "value")]; - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - unrelated_attrs.as_slice(), - &create_empty_resource() - ))); - } - - #[test] - fn test_multiple_otel_attribute_mappings() { - // Test with a rule that has multiple tag criteria - let mut tags = HashMap::new(); - tags.insert("http.status_code".to_string(), "5*".to_string()); - tags.insert("http.method".to_string(), "POST".to_string()); - tags.insert("http.url".to_string(), "*api*".to_string()); - - let rule = SamplingRule::new(1.0, None, None, None, Some(tags), None); - - // Create attributes with mixed OpenTelemetry and Datadog naming - let mixed_attrs = vec![ - // OTel attribute that maps to http.status_code - KeyValue::new("http.response.status_code", 503), - // OTel attribute that maps to http.method - KeyValue::new("http.request.method", "POST"), - // OTel attribute that maps to http.url - KeyValue::new("url.full", "https://example.com/api/v1/resource"), - ]; - - // The rule should match because all three criteria are satisfied through mapping - assert!(rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &mixed_attrs, - &create_empty_resource() - ),)); - - // If any criteria is not met, the rule shouldn't match - let missing_method = vec![ - KeyValue::new("http.response.status_code", 503), - // Missing http.method/http.request.method - KeyValue::new("url.full", "https://example.com/api/v1/resource"), - ]; - - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &missing_method, - &create_empty_resource() - ),)); - - // Wrong value should also not match - let wrong_method = vec![ - KeyValue::new("http.response.status_code", 503), - KeyValue::new("http.request.method", "GET"), // Not POST - KeyValue::new("url.full", "https://example.com/api/v1/resource"), - ]; - - assert!(!rule.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &wrong_method, - &create_empty_resource() - ),)); - } - - #[test] - fn test_direct_and_mapped_mixed_attributes() { - // Constants for key names to improve readability and ensure consistency - let dd_status_key_str = HTTP_RESPONSE_STATUS_CODE; - let otel_response_status_key_str = HTTP_RESPONSE_STATUS_CODE; - let custom_tag_key = "custom.tag"; - let custom_tag_value = "value"; - - let empty_resource = create_empty_resource(); - let span_kind_client = SpanKind::Client; - - // Test with both direct matches and mapped attributes - let mut tags_rule1 = HashMap::new(); - tags_rule1.insert(dd_status_key_str.to_string(), "5*".to_string()); - tags_rule1.insert(custom_tag_key.to_string(), custom_tag_value.to_string()); - - let rule1 = SamplingRule::new(1.0, None, None, None, Some(tags_rule1), None); - - // Case 1: OTel attribute that maps to http.status_code (503 matches "5*") + Direct - // custom.tag match - let mixed_attrs_match = vec![ - KeyValue::new(otel_response_status_key_str, 503), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - assert!(rule1.matches(&PreSampledSpan::new( - "test-span", - span_kind_client, - &mixed_attrs_match, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should match span with otel_response_status_key (503) and custom.tag"); - - // Case 2: Datadog convention for status code (503 matches "5*") + Direct custom.tag match - let dd_attrs_match = vec![ - KeyValue::new(dd_status_key_str, 503), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - assert!(rule1.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &dd_attrs_match, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should match span with dd_status_key (503) and custom.tag"); - - // Case 3: Missing the custom tag should fail (status code would match) - let missing_custom_tag_attrs = vec![KeyValue::new(otel_response_status_key_str, 503)]; - assert!( - !rule1.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &missing_custom_tag_attrs, - &empty_resource - )), - "Rule with dd_status_key (5*) and custom.tag should NOT match span missing custom.tag" - ); - - // Case 4: OTel status code 200 (does NOT match "5*") + custom.tag present - let non_matching_otel_status_attrs = vec![ - KeyValue::new(otel_response_status_key_str, 200), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - assert!(!rule1.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &non_matching_otel_status_attrs, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should NOT match span with non-matching otel_response_status_key (200)"); - - // Case 5: No recognizable status code + custom.tag present - let no_status_code_attrs = vec![ - KeyValue::new("another.tag", "irrelevant"), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - assert!(!rule1.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &no_status_code_attrs, - &empty_resource - )), "Rule with dd_status_key (5*) and custom.tag should NOT match span with no status code attribute"); - - // Case 6: Rule uses OTel key http.response.status_code directly, span has matching OTel - // key. - let mut tags_rule2 = HashMap::new(); - tags_rule2.insert(otel_response_status_key_str.to_string(), "200".to_string()); - tags_rule2.insert(custom_tag_key.to_string(), custom_tag_value.to_string()); - let rule2 = SamplingRule::new(1.0, None, None, None, Some(tags_rule2), None); - - let otel_key_rule_match_attrs = vec![ - KeyValue::new(otel_response_status_key_str, 200), - KeyValue::new(custom_tag_key, custom_tag_value), - ]; - assert!(rule2.matches(&PreSampledSpan::new( - "test-span", - SpanKind::Client, - &otel_key_rule_match_attrs, - &empty_resource - )), "Rule with otel_response_status_key (200) and custom.tag should match span with otel_response_status_key (200) and custom.tag"); - } - - #[test] - fn test_operation_name_integration() { - // Create rules that match different operation name patterns - let http_rule = SamplingRule::new( - 1.0, // 100% sample rate - None, // no service matcher - Some("http.*.request".to_string()), // matches both client and server HTTP requests - None, // no resource matcher - None, // no tag matchers - Some("default".to_string()), // rule name - default provenance - ); - - let db_rule = SamplingRule::new( - 1.0, // 100% sample rate - None, // no service matcher - Some("postgresql.query".to_string()), // matches database queries - None, // no resource matcher - None, // no tag matchers - Some("default".to_string()), // rule name - default provenance - ); - - let messaging_rule = SamplingRule::new( - 1.0, // 100% sample rate - None, // no service matcher - Some("kafka.process".to_string()), // matches Kafka messaging operations - None, // no resource matcher - None, // no tag matchers - Some("default".to_string()), // rule name - default provenance - ); - - // Create a sampler with these rules - let sampler = DatadogSampler::new( - vec![http_rule, db_rule, messaging_rule], - 100, - create_empty_resource_arc(), - ); - - // Create a trace ID for testing - let trace_id = create_trace_id(); - - // Test cases for different span kinds and attributes - - // 1. HTTP client request - let http_client_attrs = vec![KeyValue::new( - Key::from_static_str(HTTP_REQUEST_METHOD), - Value::String("GET".into()), - )]; - - let empty_resource: SdkResource = create_empty_resource(); - // Print the operation name that will be generated - let http_client_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Client, - &http_client_attrs, - &empty_resource, - )); - assert_eq!( - http_client_op_name, "http.client.request", - "HTTP client operation name should be correct" - ); - - let result = sampler.sample( - None, - trace_id, - "test-span", - &SpanKind::Client, - &http_client_attrs, - ); - - // Should be sampled due to matching the http_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - - // 2. HTTP server request - let http_server_attrs = vec![KeyValue::new( - Key::from_static_str(HTTP_REQUEST_METHOD), - Value::String("POST".into()), - )]; - - // Print the operation name that will be generated - let http_server_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Server, - &http_server_attrs, - &empty_resource, - )); - assert_eq!( - http_server_op_name, "http.server.request", - "HTTP server operation name should be correct" - ); - - let result = sampler.sample( - None, - trace_id, - "test-span", - &SpanKind::Server, - &http_server_attrs, - ); - - // Should be sampled due to matching the http_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - - // 3. Database query - let db_attrs = vec![KeyValue::new( - Key::from_static_str(DB_SYSTEM_NAME), - Value::String("postgresql".into()), - )]; - - // Print the operation name that will be generated - let db_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Client, - &db_attrs, - &empty_resource, - )); - assert_eq!( - db_op_name, "postgresql.query", - "Database operation name should be correct" - ); - - let result = sampler.sample( - None, - trace_id, - "test-span", - &SpanKind::Client, // DB queries use client span kind - &db_attrs, - ); - - // Should be sampled due to matching the db_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - - // 4. Messaging operation - let messaging_attrs = vec![ - KeyValue::new( - Key::from_static_str(MESSAGING_SYSTEM), - Value::String("kafka".into()), - ), - KeyValue::new( - Key::from_static_str(MESSAGING_OPERATION_TYPE), - Value::String("process".into()), - ), - ]; - - // Print the operation name that will be generated - let messaging_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Consumer, - &messaging_attrs, - &empty_resource, - )); - assert_eq!( - messaging_op_name, "kafka.process", - "Messaging operation name should be correct" - ); - - let result = sampler.sample( - None, - trace_id, - "test-span", - &SpanKind::Consumer, // Messaging uses consumer span kind - &messaging_attrs, - ); - - // Should be sampled due to matching the messaging_rule - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - - // 5. Generic internal span (should not match any rules) - let internal_attrs = vec![KeyValue::new("custom.tag", "value")]; - - // Print the operation name that will be generated - let internal_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Internal, - &internal_attrs, - &empty_resource, - )); - assert_eq!( - internal_op_name, "internal", - "Internal operation name should be the span kind" - ); - - let result = sampler.sample( - None, - trace_id, - "test-span", - &SpanKind::Internal, - &internal_attrs, - ); - - // Should still be sampled (default behavior when no rules match) - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - - // 6. Server with protocol but no HTTP method - let server_protocol_attrs = vec![KeyValue::new( - Key::from_static_str(NETWORK_PROTOCOL_NAME), - Value::String("http".into()), - )]; - - // Print the operation name that will be generated - let server_protocol_op_name = get_otel_operation_name_v2(&PreSampledSpan::new( - "", - SpanKind::Server, - &server_protocol_attrs, - &empty_resource, - )); - assert_eq!( - server_protocol_op_name, "http.server.request", - "Server with protocol operation name should use protocol" - ); - - let result = sampler.sample( - None, - trace_id, - "test-span", - &SpanKind::Server, - &server_protocol_attrs, - ); - - // Should not match our http rule since operation name would be "http.server.request" - // But should still be sampled (default behavior) - assert_eq!(result.to_otel_decision(), SamplingDecision::RecordAndSample); - } - - #[test] - fn test_on_rules_update_callback() { - // Create a sampler with initial rules - let initial_rule = SamplingRule::new( - 0.1, - Some("initial-service".to_string()), - None, - None, - None, - Some("default".to_string()), - ); - - // Create a resource with a service name that will match our test rule - let test_resource = Arc::new(RwLock::new( - opentelemetry_sdk::Resource::builder_empty() - .with_attributes(vec![KeyValue::new(SERVICE_NAME, "web-frontend")]) - .build(), - )); - - let sampler = DatadogSampler::new(vec![initial_rule], 100, test_resource); - - // Verify initial state - assert_eq!(sampler.rules.len(), 1); - - // Get the callback - let callback = sampler.on_rules_update(); - - // Create new rules directly as SamplingRuleConfig objects - let new_rules = vec![ - SamplingRuleConfig { - sample_rate: 0.5, - service: Some("web-*".to_string()), - name: Some("http.*".to_string()), - resource: None, - tags: std::collections::HashMap::new(), - provenance: "customer".to_string(), - }, - SamplingRuleConfig { - sample_rate: 0.2, - service: Some("api-*".to_string()), - name: None, - resource: Some("/api/*".to_string()), - tags: [("env".to_string(), "prod".to_string())].into(), - provenance: "dynamic".to_string(), - }, - ]; - - // Apply the update - callback(&new_rules); - - // Verify the rules were updated - assert_eq!(sampler.rules.len(), 2); - - // Test that the new rules work by finding a matching rule - // Create attributes that will generate an operation name matching "http.*" - let attrs = vec![ - KeyValue::new(HTTP_REQUEST_METHOD, "GET"), /* This will make operation name - * "http.client.request" */ - ]; - let resource_guard = sampler.resource.read().unwrap(); - let span = PreSampledSpan::new( - "test-span", - SpanKind::Client, - attrs.as_slice(), - &resource_guard, - ); - - let matching_rule = sampler.find_matching_rule(&span); - assert!(matching_rule.is_some(), "Expected to find a matching rule for service 'web-frontend' and name 'http.client.request'"); - let rule = matching_rule.unwrap(); - assert_eq!(rule.sample_rate, 0.5); - assert_eq!(rule.provenance, "customer"); - - // Test with empty rules array - callback(&[]); - assert_eq!(sampler.rules.len(), 0); // Should now have no rules - } -} diff --git a/datadog-opentelemetry/src/sampling/mod.rs b/datadog-opentelemetry/src/sampling/mod.rs index 2f995a30..6ffa55b2 100644 --- a/datadog-opentelemetry/src/sampling/mod.rs +++ b/datadog-opentelemetry/src/sampling/mod.rs @@ -3,15 +3,14 @@ //! Datadog sampling logic -pub(crate) mod agent_service_sampler; -pub(crate) mod constants; -pub(crate) mod datadog_sampler; -pub(crate) mod glob_matcher; pub(crate) mod otel_mappings; -pub(crate) mod rate_limiter; -pub(crate) mod rate_sampler; -pub(crate) mod rules_sampler; pub(crate) mod utils; +// Re-export from libdd-sampling +pub use libdd_sampling::{ + AttributeFactory, AttributeLike, DatadogSampler, SamplingData, SamplingRule, + SamplingRulesCallback, SpanProperties, TraceIdLike, ValueLike, +}; + // Re-export key public types -pub use datadog_sampler::{DatadogSampler, SamplingRule, SamplingRulesCallback}; +pub use otel_mappings::{OtelAttributeFactory, OtelSamplingData}; diff --git a/datadog-opentelemetry/src/sampling/otel_mappings.rs b/datadog-opentelemetry/src/sampling/otel_mappings.rs index 95d763b7..9e910523 100644 --- a/datadog-opentelemetry/src/sampling/otel_mappings.rs +++ b/datadog-opentelemetry/src/sampling/otel_mappings.rs @@ -2,11 +2,86 @@ // SPDX-License-Identifier: Apache-2.0 use std::borrow::Cow; +use std::sync::RwLock; -use crate::mappings::{AttributeIndices, AttributeKey, OtelSpan}; -use opentelemetry::Key; +use crate::mappings::{ + get_dd_key_for_otlp_attribute, get_otel_env, get_otel_operation_name_v2, get_otel_resource_v2, + get_otel_service, get_otel_status_code, AttributeIndices, AttributeKey, OtelSpan, +}; +use crate::sampling::{AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike}; +use opentelemetry::{Key, KeyValue}; -pub(crate) struct PreSampledSpan<'a> { +/// Wrapper around OpenTelemetry TraceId for trait implementations +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct OtelTraceId(opentelemetry::trace::TraceId); + +impl TraceIdLike for OtelTraceId { + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.0.to_bytes()) + } +} + +/// Transparent wrapper around OpenTelemetry Value for trait implementations. +/// +/// `#[repr(transparent)]` guarantees the same memory layout as the inner type, +/// which makes the `from_ref` pointer cast sound. The compile-time assertion +/// below ensures this invariant is never accidentally broken. +#[repr(transparent)] +#[derive(Debug)] +pub struct OtelValue(opentelemetry::Value); +const _: () = + assert!(std::mem::size_of::() == std::mem::size_of::()); + +impl OtelValue { + /// Convert a reference to opentelemetry::Value to a reference to OtelValue + /// This is safe because OtelValue is repr(transparent) + fn from_ref(value: &opentelemetry::Value) -> &Self { + // Safety: OtelValue is repr(transparent) over opentelemetry::Value + unsafe { &*(value as *const opentelemetry::Value as *const OtelValue) } + } +} + +impl ValueLike for OtelValue { + fn extract_float(&self) -> Option { + crate::sampling::utils::extract_float_value(&self.0) + } + + fn extract_string(&self) -> Option> { + crate::sampling::utils::extract_string_value(&self.0) + } +} + +/// Transparent wrapper around OpenTelemetry KeyValue for trait implementations. +/// +/// See `OtelValue` for the safety rationale behind `#[repr(transparent)]`. +#[repr(transparent)] +#[derive(Debug)] +pub struct OtelKeyValue(opentelemetry::KeyValue); +const _: () = + assert!(std::mem::size_of::() == std::mem::size_of::()); + +impl OtelKeyValue { + /// Convert a reference to opentelemetry::KeyValue to a reference to OtelKeyValue + /// This is safe because OtelKeyValue is repr(transparent) + fn from_ref(kv: &opentelemetry::KeyValue) -> &Self { + // Safety: OtelKeyValue is repr(transparent) over opentelemetry::KeyValue + unsafe { &*(kv as *const opentelemetry::KeyValue as *const OtelKeyValue) } + } +} + +impl AttributeLike for OtelKeyValue { + type Value = OtelValue; + + fn key(&self) -> &str { + self.0.key.as_str() + } + + fn value(&self) -> &Self::Value { + OtelValue::from_ref(&self.0.value) + } +} + +pub struct PreSampledSpan<'a> { pub name: &'a str, pub span_kind: opentelemetry::trace::SpanKind, pub attributes: &'a [opentelemetry::KeyValue], @@ -73,3 +148,519 @@ impl<'a> OtelSpan<'a> for PreSampledSpan<'a> { self.resource.len() } } + +impl SpanProperties for PreSampledSpan<'_> { + type Attribute = OtelKeyValue; + + fn operation_name(&self) -> Cow<'_, str> { + get_otel_operation_name_v2(self) + } + + fn service(&self) -> Cow<'_, str> { + get_otel_service(self) + } + + fn env(&self) -> Cow<'_, str> { + get_otel_env(self) + } + + fn resource(&self) -> Cow<'_, str> { + get_otel_resource_v2(self) + } + + fn status_code(&self) -> Option { + get_otel_status_code(self) + } + + fn attributes<'a>(&'a self) -> impl Iterator + where + Self: 'a, + { + self.attributes.iter().map(OtelKeyValue::from_ref) + } + + fn get_alternate_key<'b>(&self, key: &'b str) -> Option> { + let mapped = get_dd_key_for_otlp_attribute(key); + // If the mapping returned an empty string or the same key, there's no alternate + if mapped.is_empty() || mapped.as_ref() == key { + None + } else { + Some(mapped) + } + } +} + +/// OpenTelemetry Sampling Data implementation. +/// +/// Provides the necessary data for making sampling decisions on OpenTelemetry spans. +/// This struct contains references to span metadata including the trace ID, span name, +/// span kind, attributes, and resource information. +pub struct OtelSamplingData<'a> { + is_parent_sampled: Option, + trace_id: OtelTraceId, + name: &'a str, + span_kind: opentelemetry::trace::SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, +} + +impl<'a> OtelSamplingData<'a> { + /// Creates a new OpenTelemetry sampling data instance. + /// + /// # Arguments + /// + /// * `is_parent_sampled` - Whether the parent span was sampled, if known + /// * `trace_id` - The trace ID for this span + /// * `name` - The span name + /// * `span_kind` - The kind of span (e.g., Server, Client) + /// * `attributes` - The span's attributes + /// * `resource` - The OpenTelemetry resource containing service metadata + pub fn new( + is_parent_sampled: Option, + trace_id: &'a opentelemetry::trace::TraceId, + name: &'a str, + span_kind: opentelemetry::trace::SpanKind, + attributes: &'a [KeyValue], + resource: &'a RwLock, + ) -> Self { + Self { + is_parent_sampled, + trace_id: OtelTraceId(*trace_id), + name, + span_kind, + attributes, + resource, + } + } +} + +impl SamplingData for OtelSamplingData<'_> { + type TraceId = OtelTraceId; + type Properties<'b> + = PreSampledSpan<'b> + where + Self: 'b; + + fn is_parent_sampled(&self) -> Option { + self.is_parent_sampled + } + fn trace_id(&self) -> &Self::TraceId { + &self.trace_id + } + + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: for<'b> Fn(&S, &PreSampledSpan<'b>) -> T, + { + let resource_guard = self.resource.read().unwrap(); + let span = PreSampledSpan::new( + self.name, + self.span_kind.clone(), + self.attributes, + &resource_guard, + ); + f(s, &span) + } +} + +/// Factory for creating OpenTelemetry KeyValue attributes. +pub struct OtelAttributeFactory; + +impl crate::sampling::AttributeFactory for OtelAttributeFactory { + type Attribute = opentelemetry::KeyValue; + + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } + + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } + + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute { + opentelemetry::KeyValue::new(key, value) + } +} + +/// Converts a Datadog sampling priority to an OpenTelemetry sampling decision. +/// +/// # Arguments +/// +/// * `priority` - The Datadog sampling priority +/// +/// # Returns +/// +/// The corresponding OpenTelemetry sampling decision: +/// - `RecordAndSample` if the priority indicates the trace should be kept +/// - `RecordOnly` if the priority indicates the trace should be dropped +pub(crate) fn priority_to_otel_decision( + priority: crate::core::sampling::SamplingPriority, +) -> opentelemetry::trace::SamplingDecision { + if priority.is_keep() { + opentelemetry::trace::SamplingDecision::RecordAndSample + } else { + opentelemetry::trace::SamplingDecision::RecordOnly + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mappings::get_otel_operation_name_v2; + use opentelemetry::trace::SpanKind; + use opentelemetry::{Key, KeyValue, Value}; + use opentelemetry_semantic_conventions::attribute::{ + DB_SYSTEM_NAME, HTTP_REQUEST_METHOD, MESSAGING_OPERATION_TYPE, MESSAGING_SYSTEM, + }; + use opentelemetry_semantic_conventions::trace::{ + HTTP_RESPONSE_STATUS_CODE, NETWORK_PROTOCOL_NAME, + }; + + fn create_empty_resource() -> opentelemetry_sdk::Resource { + opentelemetry_sdk::Resource::builder_empty().build() + } + + #[test] + fn test_operation_name_http_client() { + let attrs = vec![KeyValue::new( + Key::from_static_str(HTTP_REQUEST_METHOD), + Value::String("GET".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Client, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "http.client.request"); + + // Also verify using get_otel_operation_name_v2 + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "http.client.request"); + } + + #[test] + fn test_operation_name_http_server() { + let attrs = vec![KeyValue::new( + Key::from_static_str(HTTP_REQUEST_METHOD), + Value::String("POST".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Server, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "http.server.request"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "http.server.request"); + } + + #[test] + fn test_operation_name_database() { + let attrs = vec![KeyValue::new( + Key::from_static_str(DB_SYSTEM_NAME), + Value::String("postgresql".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Client, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "postgresql.query"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "postgresql.query"); + } + + #[test] + fn test_operation_name_messaging() { + let attrs = vec![ + KeyValue::new( + Key::from_static_str(MESSAGING_SYSTEM), + Value::String("kafka".into()), + ), + KeyValue::new( + Key::from_static_str(MESSAGING_OPERATION_TYPE), + Value::String("process".into()), + ), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Consumer, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "kafka.process"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "kafka.process"); + } + + #[test] + fn test_operation_name_generic_server_with_protocol() { + let attrs = vec![KeyValue::new( + Key::from_static_str(NETWORK_PROTOCOL_NAME), + Value::String("http".into()), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Server, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "http.server.request"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "http.server.request"); + } + + #[test] + fn test_operation_name_internal_fallback() { + let attrs = vec![KeyValue::new("custom.tag", "value")]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("", SpanKind::Internal, &attrs, &resource); + + let op_name = span.operation_name(); + assert_eq!(op_name, "internal"); + + let op_name_v2 = get_otel_operation_name_v2(&span); + assert_eq!(op_name_v2, "internal"); + } + + #[test] + fn test_service_from_resource() { + use opentelemetry_semantic_conventions::resource::SERVICE_NAME; + + let resource = opentelemetry_sdk::Resource::builder_empty() + .with_attributes(vec![KeyValue::new(SERVICE_NAME, "my-service")]) + .build(); + let attrs = vec![]; + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.service(), "my-service"); + } + + #[test] + fn test_env_from_attributes() { + let attrs = vec![KeyValue::new("datadog.env", "production")]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.env(), "production"); + } + + #[test] + fn test_env_empty_when_not_present() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.env(), ""); + } + + #[test] + fn test_status_code_from_attributes() { + let attrs = vec![KeyValue::new( + Key::from_static_str(HTTP_RESPONSE_STATUS_CODE), + Value::I64(404), + )]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.status_code(), Some(404)); + } + + #[test] + fn test_status_code_none_when_not_present() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + assert_eq!(span.status_code(), None); + } + + #[test] + fn test_attributes_iteration() { + let attrs = vec![ + KeyValue::new("key1", "value1"), + KeyValue::new("key2", Value::I64(42)), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + let collected: Vec<_> = span.attributes().collect(); + assert_eq!(collected.len(), 2); + assert_eq!(collected[0].key(), "key1"); + assert_eq!(collected[1].key(), "key2"); + } + + #[test] + fn test_get_alternate_key_http_status() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // Test HTTP status code mapping + let alternate = span.get_alternate_key("http.response.status_code"); + assert_eq!(alternate, Some("http.status_code".into())); + } + + #[test] + fn test_get_alternate_key_http_method() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // Test HTTP method mapping + let alternate = span.get_alternate_key("http.request.method"); + assert_eq!(alternate, Some("http.method".into())); + } + + #[test] + fn test_get_alternate_key_no_mapping() { + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // Test key with no mapping + let alternate = span.get_alternate_key("custom.attribute"); + assert_eq!(alternate, None); + } + + #[test] + fn test_attribute_key_mapping_comprehensive() { + // Test that OTel attribute keys are correctly mapped to Datadog keys + let attrs = vec![]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test", SpanKind::Server, &attrs, &resource); + + // HTTP attribute mappings (OTel -> DD) + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + assert_eq!( + span.get_alternate_key("http.request.method"), + Some("http.method".into()) + ); + assert_eq!(span.get_alternate_key("url.full"), Some("http.url".into())); + assert_eq!( + span.get_alternate_key("user_agent.original"), + Some("http.useragent".into()) + ); + assert_eq!( + span.get_alternate_key("server.address"), + Some("http.server_name".into()) + ); + assert_eq!( + span.get_alternate_key("client.address"), + Some("http.client_ip".into()) + ); + + // Keys without mappings (same in both OTel and DD) should return None + assert_eq!(span.get_alternate_key("custom.tag"), None); + assert_eq!(span.get_alternate_key("application.name"), None); + assert_eq!(span.get_alternate_key("http.route"), None); // Maps to itself + + // Datadog convention keys map to empty string (filtered out), which get_alternate_key + // returns as None + assert_eq!(span.get_alternate_key("service.name"), None); + assert_eq!(span.get_alternate_key("operation.name"), None); + assert_eq!(span.get_alternate_key("datadog.custom"), None); + } + + #[test] + fn test_otel_to_datadog_attribute_mapping_in_span() { + // Test that a span with OTel attribute names can be queried using DD keys via + // get_alternate_key + let otel_attrs = vec![ + KeyValue::new("http.response.status_code", Value::I64(500)), + KeyValue::new("http.request.method", "POST"), + KeyValue::new("url.full", "https://example.com/api"), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &otel_attrs, &resource); + + // Verify the OTel attributes are present + let attrs: Vec<_> = span.attributes().collect(); + assert_eq!(attrs.len(), 3); + + // Verify we can find the DD equivalent keys for these OTel attributes + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + assert_eq!( + span.get_alternate_key("http.request.method"), + Some("http.method".into()) + ); + assert_eq!(span.get_alternate_key("url.full"), Some("http.url".into())); + + // Verify the actual attributes can be found by their original keys + let status_code_attr = attrs + .iter() + .find(|a| a.key() == "http.response.status_code"); + assert!(status_code_attr.is_some()); + + let method_attr = attrs.iter().find(|a| a.key() == "http.request.method"); + assert!(method_attr.is_some()); + } + + #[test] + fn test_multiple_attribute_mappings() { + // Test that a span with multiple OTel attributes correctly maps them all to DD keys + let mixed_attrs = vec![ + KeyValue::new("http.response.status_code", Value::I64(503)), + KeyValue::new("http.request.method", "POST"), + KeyValue::new("url.full", "https://example.com/api/v1/resource"), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &mixed_attrs, &resource); + + // Verify all three OTel attributes have correct DD mappings + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + assert_eq!( + span.get_alternate_key("http.request.method"), + Some("http.method".into()) + ); + assert_eq!(span.get_alternate_key("url.full"), Some("http.url".into())); + + // Verify all attributes are present + let attrs: Vec<_> = span.attributes().collect(); + assert_eq!(attrs.len(), 3); + + // Verify each attribute can be found by its original OTel key + assert!(attrs.iter().any(|a| a.key() == "http.response.status_code")); + assert!(attrs.iter().any(|a| a.key() == "http.request.method")); + assert!(attrs.iter().any(|a| a.key() == "url.full")); + } + + #[test] + fn test_mixed_direct_and_mapped_attributes() { + // Test that spans with both OTel attributes (that have DD mappings) and + // custom attributes (that don't have mappings) work correctly together + let mixed_attrs = vec![ + // OTel attribute with DD mapping + KeyValue::new("http.response.status_code", Value::I64(503)), + // Custom attribute without mapping + KeyValue::new("custom.tag", "custom_value"), + ]; + let resource = create_empty_resource(); + let span = PreSampledSpan::new("test-span", SpanKind::Client, &mixed_attrs, &resource); + + // OTel attribute should have alternate DD key + assert_eq!( + span.get_alternate_key("http.response.status_code"), + Some("http.status_code".into()) + ); + + // Custom attribute should not have alternate key + assert_eq!(span.get_alternate_key("custom.tag"), None); + + // Both attributes should be present and accessible + let attrs: Vec<_> = span.attributes().collect(); + assert_eq!(attrs.len(), 2); + + assert!(attrs.iter().any(|a| a.key() == "http.response.status_code")); + assert!(attrs.iter().any(|a| a.key() == "custom.tag")); + + // Verify the status code is accessible + assert_eq!(span.status_code(), Some(503)); + } +} diff --git a/datadog-opentelemetry/src/span_processor.rs b/datadog-opentelemetry/src/span_processor.rs index df5f2b02..e594f8ac 100644 --- a/datadog-opentelemetry/src/span_processor.rs +++ b/datadog-opentelemetry/src/span_processor.rs @@ -1,6 +1,8 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 +//! Datadog OTel SpanProcessor + use hashbrown::{hash_map, HashMap as BHashMap}; use std::{ collections::HashMap, @@ -69,7 +71,7 @@ struct InnerTraceRegistry { config: Arc, } -pub enum RegisterTracePropagationResult { +pub(crate) enum RegisterTracePropagationResult { Existing(SamplingDecision), New, } @@ -262,7 +264,7 @@ struct CachePadded(T); /// - The finished spans of the trace /// - The number of open spans in the trace /// - The sampling decision of the trace -pub(crate) struct TraceRegistry { +pub struct TraceRegistry { // Example: // inner: Arc<[CacheAligned>; N]>; // to access a trace we do inner[hash(trace_id) % N].read() @@ -271,6 +273,10 @@ pub(crate) struct TraceRegistry { } impl TraceRegistry { + /// Creates a new trace registry. + /// + /// The registry uses sharding to minimize lock contention when multiple threads + /// are creating and finishing spans concurrently. pub fn new(config: Arc) -> Self { Self { inner: Arc::new(std::array::from_fn(|_| { @@ -297,6 +303,7 @@ impl TraceRegistry { /// /// If the trace is already registered with a non None sampling decision, /// it will return the existing sampling decision instead + #[allow(private_interfaces)] pub fn register_local_root_trace_propagation_data( &self, trace_id: [u8; 16], @@ -321,6 +328,7 @@ impl TraceRegistry { } /// Register a new span with the given trace ID and span ID. + #[allow(private_interfaces)] pub fn register_span( &self, trace_id: [u8; 16], @@ -345,6 +353,10 @@ impl TraceRegistry { inner.finish_span(trace_id, span_data) } + /// Retrieves the trace propagation data for a given trace ID. + /// + /// Returns the sampling decision, origin, and internal tags associated with the trace. + #[allow(private_interfaces)] pub fn get_trace_propagation_data(&self, trace_id: [u8; 16]) -> TracePropagationData { let inner = self .get_shard(trace_id) @@ -354,6 +366,10 @@ impl TraceRegistry { inner.get_trace_propagation_data(trace_id).clone() } + /// Aggregates and returns metrics from all registry shards. + /// + /// Collects counters for spans created/finished, trace segments, and partial flushes + /// across all shards in the registry. pub fn get_metrics(&self) -> TraceRegistryMetrics { let mut stats = TraceRegistryMetrics::default(); for shard_idx in 0..TRACE_REGISTRY_SHARDS { @@ -369,12 +385,21 @@ impl TraceRegistry { } } +/// Metrics collected by the trace registry. +/// +/// Tracks the lifecycle of spans and traces through the registry, useful for +/// monitoring and debugging trace collection behavior. #[derive(Default, Debug)] pub struct TraceRegistryMetrics { + /// Number of spans created and registered in the registry. pub spans_created: usize, + /// Number of spans that have finished processing. pub spans_finished: usize, + /// Number of trace segments created (complete or partial traces). pub trace_segments_created: usize, + /// Number of trace segments closed and sent to the exporter. pub trace_segments_closed: usize, + /// Number of times traces were partially flushed before completion. pub trace_partial_flush_count: usize, } diff --git a/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs b/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs index 0b3936a6..922979e3 100644 --- a/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs +++ b/datadog-opentelemetry/tests/integration_tests/opentelemetry_api.rs @@ -233,7 +233,6 @@ async fn test_remote_config_sampling_rates() { vec![SamplingRuleConfig { resource: Some("test-span".into()), sample_rate: 1.0, - provenance: "customer".into(), ..Default::default() }] ); diff --git a/libdd-sampling/Cargo.toml b/libdd-sampling/Cargo.toml new file mode 100644 index 00000000..1dea6a65 --- /dev/null +++ b/libdd-sampling/Cargo.toml @@ -0,0 +1,23 @@ +# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "libdd-sampling" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +repository.workspace = true +readme.workspace = true +description = "Core sampling logic for Datadog tracing" +authors.workspace = true + +[dependencies] +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +hashbrown = { workspace = true } +foldhash = { workspace = true } +lru = "0.16.3" + +[dev-dependencies] +criterion = "0.5" diff --git a/datadog-opentelemetry/src/sampling/agent_service_sampler.rs b/libdd-sampling/src/agent_service_sampler.rs similarity index 91% rename from datadog-opentelemetry/src/sampling/agent_service_sampler.rs rename to libdd-sampling/src/agent_service_sampler.rs index 5100f1ad..a8841027 100644 --- a/datadog-opentelemetry/src/sampling/agent_service_sampler.rs +++ b/libdd-sampling/src/agent_service_sampler.rs @@ -6,16 +6,16 @@ use std::{ sync::{Arc, RwLock}, }; -use super::rate_sampler::RateSampler; +use crate::rate_sampler::RateSampler; #[derive(Debug, serde::Deserialize)] -pub(crate) struct AgentRates<'a> { +pub struct AgentRates<'a> { #[serde(borrow)] pub rates_by_service: Option>, } #[derive(Debug, Default, Clone)] -pub(crate) struct ServicesSampler { +pub struct ServicesSampler { inner: Arc>>, } diff --git a/datadog-opentelemetry/src/sampling/constants.rs b/libdd-sampling/src/constants.rs similarity index 100% rename from datadog-opentelemetry/src/sampling/constants.rs rename to libdd-sampling/src/constants.rs diff --git a/libdd-sampling/src/datadog_sampler.rs b/libdd-sampling/src/datadog_sampler.rs new file mode 100644 index 00000000..eb1b8207 --- /dev/null +++ b/libdd-sampling/src/datadog_sampler.rs @@ -0,0 +1,1458 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::dd_constants::{ + RL_EFFECTIVE_RATE, SAMPLING_AGENT_RATE_TAG_KEY, SAMPLING_DECISION_MAKER_TAG_KEY, + SAMPLING_KNUTH_RATE_TAG_KEY, SAMPLING_PRIORITY_TAG_KEY, SAMPLING_RULE_RATE_TAG_KEY, +}; +use crate::dd_sampling::{mechanism, priority, SamplingMechanism, SamplingPriority}; +use crate::sampling_rule_config::SamplingRuleConfig; + +/// Type alias for sampling rules update callback +/// Consolidated callback type used across crates for remote config sampling updates +pub type SamplingRulesCallback = Box Fn(&'a [SamplingRuleConfig]) + Send + Sync>; + +use crate::types::{SamplingData, SpanProperties}; + +use super::agent_service_sampler::{AgentRates, ServicesSampler}; +use super::rate_limiter::RateLimiter; +use super::rules_sampler::RulesSampler; +use super::sampling_rule::SamplingRule; + +/// A composite sampler that applies rules in order of precedence +#[derive(Clone, Debug)] +pub struct DatadogSampler { + /// Sampling rules to apply, in order of precedence + rules: RulesSampler, + + /// Service-based samplers provided by the Agent + service_samplers: ServicesSampler, + + /// Rate limiter for limiting the number of spans per second + rate_limiter: RateLimiter, +} + +impl DatadogSampler { + /// Creates a new DatadogSampler with the given rules + pub fn new(rules: Vec, rate_limit: i32) -> Self { + // Create rate limiter with default value of 100 if not provided + let limiter = RateLimiter::new(rate_limit, None); + + DatadogSampler { + rules: RulesSampler::new(rules), + service_samplers: ServicesSampler::default(), + rate_limiter: limiter, + } + } + + // used for tests + #[allow(dead_code)] + pub(crate) fn update_service_rates(&self, rates: impl IntoIterator) { + self.service_samplers.update_rates(rates); + } + + pub fn on_agent_response(&self) -> Box Fn(&'a str) + Send + Sync> { + let service_samplers = self.service_samplers.clone(); + Box::new(move |s: &str| { + let Ok(new_rates) = serde_json::de::from_str::(s) else { + return; + }; + let Some(new_rates) = new_rates.rates_by_service else { + return; + }; + service_samplers.update_rates(new_rates.into_iter().map(|(k, v)| (k.to_string(), v))); + }) + } + + /// Creates a callback for updating sampling rules from remote configuration + /// # Returns + /// A boxed function that takes a slice of SamplingRuleConfig and updates the sampling rules + pub fn on_rules_update(&self) -> SamplingRulesCallback { + let rules_sampler = self.rules.clone(); + Box::new(move |rule_configs: &[SamplingRuleConfig]| { + let new_rules = SamplingRule::from_configs(rule_configs.to_vec()); + + rules_sampler.update_rules(new_rules); + }) + } + + /// Computes a key for service-based sampling + fn service_key(&self, span: &impl SpanProperties) -> String { + // Get service from span + let service = span.service().into_owned(); + // Get env from span + let env = span.env(); + + format!("service:{service},env:{env}") + } + + /// Finds the highest precedence rule that matches the span + fn find_matching_rule(&self, span: &impl SpanProperties) -> Option { + self.rules.find_matching_rule(|rule| rule.matches(span)) + } + + /// Returns the sampling mechanism used for the decision + fn get_sampling_mechanism( + &self, + rule: Option<&SamplingRule>, + used_agent_sampler: bool, + ) -> SamplingMechanism { + if let Some(rule) = rule { + match rule.provenance.as_str() { + // Provenance will not be set for rules until we implement remote configuration + "customer" => mechanism::REMOTE_USER_TRACE_SAMPLING_RULE, + "dynamic" => mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE, + _ => mechanism::LOCAL_USER_TRACE_SAMPLING_RULE, + } + } else if used_agent_sampler { + // If using service-based sampling from the agent + mechanism::AGENT_RATE_BY_SERVICE + } else { + // Should not happen, but just in case + mechanism::DEFAULT + } + } + + /// Sample an incoming span based on the parent context and attributes + pub fn sample(&self, data: &impl SamplingData) -> DdSamplingResult { + if let Some(is_parent_sampled) = data.is_parent_sampled() { + let priority = match is_parent_sampled { + false => priority::AUTO_REJECT, + true => priority::AUTO_KEEP, + }; + // If a parent exists, inherit its sampling decision and trace state + return DdSamplingResult { + priority, + trace_root_info: None, + }; + } + + // Apply rules-based sampling + data.with_span_properties(self, |sampler, span| sampler.sample_root(data, span)) + } + + /// Sample the root span of a trace + fn sample_root( + &self, + data: &impl SamplingData, + span: &impl SpanProperties, + ) -> DdSamplingResult { + let mut is_keep = true; + let mut used_agent_sampler = false; + let sample_rate; + let mut rl_effective_rate: Option = None; + let trace_id = data.trace_id(); + + // Find a matching rule + let matching_rule = self.find_matching_rule(span); + + // Apply sampling logic + if let Some(rule) = &matching_rule { + // Get the sample rate from the rule + sample_rate = rule.sample_rate; + + // First check if the span should be sampled according to the rule + if !rule.sample(trace_id) { + is_keep = false; + // If the span should be sampled, then apply rate limiting + } else if !self.rate_limiter.is_allowed() { + is_keep = false; + rl_effective_rate = Some(self.rate_limiter.effective_rate()); + } + } else { + // Try service-based sampling from Agent + let service_key = self.service_key(span); + if let Some(sampler) = self.service_samplers.get(&service_key) { + // Use the service-based sampler + used_agent_sampler = true; + sample_rate = sampler.sample_rate(); // Get rate for reporting + + // Check if the service sampler decides to drop + if !sampler.sample(trace_id) { + is_keep = false; + } + } else { + // Default sample rate, should never happen in practice if agent provides rates + sample_rate = 1.0; + // Keep the default decision (RecordAndSample) + } + } + + // Determine the sampling mechanism + let mechanism = self.get_sampling_mechanism(matching_rule.as_ref(), used_agent_sampler); + + DdSamplingResult { + priority: mechanism.to_priority(is_keep), + trace_root_info: Some(TraceRootSamplingInfo { + mechanism, + rate: sample_rate, + rl_effective_rate, + }), + } + } +} + +/// Formats a sampling rate with up to 6 significant digits, stripping trailing zeros. +/// +/// This matches the Go behavior of `strconv.FormatFloat(rate, 'g', 6, 64)`. +/// +/// # Examples +/// - `1.0` → `Some("1")` +/// - `0.5` → `Some("0.5")` +/// - `0.7654321` → `Some("0.765432")` +/// - `0.100000` → `Some("0.1")` +/// - `-0.1` → `None` +/// - `1.1` → `None` +fn format_sampling_rate(rate: f64) -> Option { + if rate.is_nan() || !(0.0..=1.0).contains(&rate) { + return None; + } + + if rate == 0.0 { + return Some("0".to_string()); + } + + let digits = 6_i32; + let magnitude = rate.abs().log10().floor() as i32; + let scale = 10f64.powi(digits - 1 - magnitude); + let rounded = (rate * scale).round() / scale; + + // Determine decimal places needed for 6 significant digits + let decimal_places = if magnitude >= digits - 1 { + 0 + } else { + (digits - 1 - magnitude) as usize + }; + + let s = format!("{:.prec$}", rounded, prec = decimal_places); + // Strip trailing zeros after decimal point + Some(if s.contains('.') { + let s = s.trim_end_matches('0'); + let s = s.trim_end_matches('.'); + s.to_string() + } else { + s + }) +} + +pub struct TraceRootSamplingInfo { + mechanism: SamplingMechanism, + rate: f64, + rl_effective_rate: Option, +} + +impl TraceRootSamplingInfo { + /// Returns the sampling mechanism used for this trace root + pub fn mechanism(&self) -> SamplingMechanism { + self.mechanism + } + + /// Returns the sample rate used for this trace root + pub fn rate(&self) -> f64 { + self.rate + } + + /// Returns the effective rate limit if rate limiting was applied + pub fn rl_effective_rate(&self) -> Option { + self.rl_effective_rate + } +} + +pub struct DdSamplingResult { + priority: SamplingPriority, + trace_root_info: Option, +} + +impl DdSamplingResult { + #[inline(always)] + pub fn get_priority(&self) -> SamplingPriority { + self.priority + } + + pub fn get_trace_root_sampling_info(&self) -> &Option { + &self.trace_root_info + } + + /// Returns Datadog-specific sampling tags to be added as attributes + /// + /// # Parameters + /// * `factory` - The attribute factory to use for creating attributes + /// + /// # Returns + /// An optional vector of attributes to add to the sampling result + pub fn to_dd_sampling_tags(&self, factory: &F) -> Option> + where + F: crate::types::AttributeFactory, + { + let Some(root_info) = &self.trace_root_info else { + return None; // No root info, return empty attributes + }; + + let mut result: Vec; + // Add rate limiting tag if applicable + if let Some(limit) = root_info.rl_effective_rate() { + result = Vec::with_capacity(4); + result.push(factory.create_f64(RL_EFFECTIVE_RATE, limit)); + } else { + result = Vec::with_capacity(3); + } + + // Add the sampling decision trace tag with the mechanism + let mechanism = root_info.mechanism(); + result.push(factory.create_string(SAMPLING_DECISION_MAKER_TAG_KEY, mechanism.to_cow())); + + // Add the sample rate tag with the correct key based on the mechanism + match mechanism { + mechanism::AGENT_RATE_BY_SERVICE => { + result.push(factory.create_f64(SAMPLING_AGENT_RATE_TAG_KEY, root_info.rate())); + if let Some(rate_str) = format_sampling_rate(root_info.rate()) { + result.push(factory.create_string( + SAMPLING_KNUTH_RATE_TAG_KEY, + std::borrow::Cow::Owned(rate_str), + )); + } + } + mechanism::REMOTE_USER_TRACE_SAMPLING_RULE + | mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE + | mechanism::LOCAL_USER_TRACE_SAMPLING_RULE => { + result.push(factory.create_f64(SAMPLING_RULE_RATE_TAG_KEY, root_info.rate())); + if let Some(rate_str) = format_sampling_rate(root_info.rate()) { + result.push(factory.create_string( + SAMPLING_KNUTH_RATE_TAG_KEY, + std::borrow::Cow::Owned(rate_str), + )); + } + } + _ => {} + } + + let priority = self.priority; + result.push(factory.create_i64(SAMPLING_PRIORITY_TAG_KEY, priority.into_i8() as i64)); + + Some(result) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::constants::{ + attr::{ENV_TAG, RESOURCE_TAG}, + pattern, + }; + use crate::types::{AttributeLike, TraceIdLike, ValueLike}; + use std::borrow::Cow; + use std::collections::HashMap; + + // Test-only semantic convention constants + const HTTP_REQUEST_METHOD: &str = "http.request.method"; + const SERVICE_NAME: &str = "service.name"; + + // HTTP status code attribute constants (for tests) + const HTTP_RESPONSE_STATUS_CODE: &str = "http.response.status_code"; + const HTTP_STATUS_CODE: &str = "http.status_code"; + + // ============================================================================ + // Test-only data structures + // ============================================================================ + + #[derive(Clone, Debug, PartialEq, Eq)] + struct TestTraceId { + bytes: [u8; 16], + } + + impl TestTraceId { + fn from_bytes(bytes: [u8; 16]) -> Self { + Self { bytes } + } + } + + impl TraceIdLike for TestTraceId { + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.bytes) + } + } + + #[derive(Clone, Debug, PartialEq)] + enum TestValue { + String(String), + I64(i64), + F64(f64), + } + + impl ValueLike for TestValue { + fn extract_float(&self) -> Option { + match self { + TestValue::I64(i) => Some(*i as f64), + TestValue::F64(f) => Some(*f), + _ => None, + } + } + + fn extract_string(&self) -> Option> { + match self { + TestValue::String(s) => Some(Cow::Borrowed(s.as_str())), + TestValue::I64(i) => Some(Cow::Owned(i.to_string())), + TestValue::F64(f) => Some(Cow::Owned(f.to_string())), + } + } + } + + #[derive(Clone, Debug)] + struct TestAttribute { + key: String, + value: TestValue, + } + + impl TestAttribute { + fn new(key: impl Into, value: impl Into) -> Self { + Self { + key: key.into(), + value: value.into(), + } + } + } + + impl AttributeLike for TestAttribute { + type Value = TestValue; + + fn key(&self) -> &str { + &self.key + } + + fn value(&self) -> &Self::Value { + &self.value + } + } + + impl From<&str> for TestValue { + fn from(s: &str) -> Self { + TestValue::String(s.to_string()) + } + } + + impl From for TestValue { + fn from(s: String) -> Self { + TestValue::String(s) + } + } + + struct TestSpan<'a> { + name: &'a str, + attributes: &'a [TestAttribute], + } + + impl<'a> TestSpan<'a> { + fn new(name: &'a str, attributes: &'a [TestAttribute]) -> Self { + Self { name, attributes } + } + + fn get_operation_name(&self) -> Cow<'_, str> { + // Check for HTTP spans - label them all as client spans + if self + .attributes + .iter() + .any(|attr| attr.key() == HTTP_REQUEST_METHOD) + { + return Cow::Borrowed("http.client.request"); + } + + // Default fallback + Cow::Borrowed("internal") + } + } + + impl<'a> SpanProperties for TestSpan<'a> { + type Attribute = TestAttribute; + + fn operation_name(&self) -> Cow<'_, str> { + self.get_operation_name() + } + + fn service(&self) -> Cow<'_, str> { + self.attributes + .iter() + .find(|attr| attr.key() == SERVICE_NAME) + .and_then(|attr| attr.value().extract_string()) + .unwrap_or(Cow::Borrowed("")) + } + + fn env(&self) -> Cow<'_, str> { + self.attributes + .iter() + .find(|attr| attr.key() == "datadog.env" || attr.key() == ENV_TAG) + .and_then(|attr| attr.value().extract_string()) + .unwrap_or(Cow::Borrowed("")) + } + + fn resource(&self) -> Cow<'_, str> { + self.attributes + .iter() + .find(|attr| attr.key() == RESOURCE_TAG) + .and_then(|attr| attr.value().extract_string()) + .unwrap_or(Cow::Borrowed(self.name)) + } + + fn status_code(&self) -> Option { + self.attributes + .iter() + .find(|attr| { + attr.key() == HTTP_RESPONSE_STATUS_CODE || attr.key() == HTTP_STATUS_CODE + }) + .and_then(|attr| match attr.value() { + TestValue::I64(i) => Some(*i as u32), + _ => None, + }) + } + + fn attributes<'b>(&'b self) -> impl Iterator + where + Self: 'b, + { + self.attributes.iter() + } + + fn get_alternate_key<'b>(&self, key: &'b str) -> Option> { + match key { + HTTP_RESPONSE_STATUS_CODE => Some(Cow::Borrowed(HTTP_STATUS_CODE)), + HTTP_REQUEST_METHOD => Some(Cow::Borrowed("http.method")), + _ => None, + } + } + } + + struct TestSamplingData<'a> { + is_parent_sampled: Option, + trace_id: &'a TestTraceId, + name: &'a str, + attributes: &'a [TestAttribute], + } + + impl<'a> TestSamplingData<'a> { + fn new( + is_parent_sampled: Option, + trace_id: &'a TestTraceId, + name: &'a str, + attributes: &'a [TestAttribute], + ) -> Self { + Self { + is_parent_sampled, + trace_id, + name, + attributes, + } + } + } + + impl<'a> SamplingData for TestSamplingData<'a> { + type TraceId = TestTraceId; + type Properties<'b> + = TestSpan<'b> + where + Self: 'b; + + fn is_parent_sampled(&self) -> Option { + self.is_parent_sampled + } + + fn trace_id(&self) -> &Self::TraceId { + self.trace_id + } + + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: for<'b> Fn(&S, &TestSpan<'b>) -> T, + { + let span = TestSpan::new(self.name, self.attributes); + f(s, &span) + } + } + + struct TestAttributeFactory; + + impl crate::types::AttributeFactory for TestAttributeFactory { + type Attribute = TestAttribute; + + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute { + TestAttribute::new(key, TestValue::I64(value)) + } + + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute { + TestAttribute::new(key, TestValue::F64(value)) + } + + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute { + TestAttribute::new(key, TestValue::String(value.into_owned())) + } + } + + // ============================================================================ + // Test helper functions + // ============================================================================ + + // Helper function to create a trace ID + fn create_trace_id() -> TestTraceId { + let bytes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + TestTraceId::from_bytes(bytes) + } + + // Helper function to create attributes for testing (with resource and env) + fn create_attributes(resource: &'static str, env: &'static str) -> Vec { + vec![ + TestAttribute::new(RESOURCE_TAG, resource), + TestAttribute::new("datadog.env", env), + ] + } + + // Helper function to create attributes with service + fn create_attributes_with_service( + service: String, + resource: &'static str, + env: &'static str, + ) -> Vec { + vec![ + TestAttribute::new(SERVICE_NAME, service), + TestAttribute::new(RESOURCE_TAG, resource), + TestAttribute::new("datadog.env", env), + ] + } + + // Helper function to create SamplingData for testing + fn create_sampling_data<'a>( + is_parent_sampled: Option, + trace_id: &'a TestTraceId, + name: &'a str, + attributes: &'a [TestAttribute], + ) -> TestSamplingData<'a> { + TestSamplingData::new(is_parent_sampled, trace_id, name, attributes) + } + + #[test] + fn test_sampling_rule_creation() { + let rule = SamplingRule::new( + 0.5, + Some("test-service".to_string()), + Some("test-name".to_string()), + Some("test-resource".to_string()), + Some(HashMap::from([( + "custom-tag".to_string(), + "tag-value".to_string(), + )])), + Some("customer".to_string()), + ); + + assert_eq!(rule.sample_rate, 0.5); + assert_eq!(rule.service_matcher.unwrap().pattern(), "test-service"); + assert_eq!(rule.name_matcher.unwrap().pattern(), "test-name"); + assert_eq!( + rule.resource_matcher.unwrap().pattern(), + "test-resource".to_string() + ); + assert_eq!( + rule.tag_matchers.get("custom-tag").unwrap().pattern(), + "tag-value" + ); + assert_eq!(rule.provenance, "customer"); + } + + #[test] + fn test_sampling_rule_with_no_rule() { + // Create a rule without specifying any criteria + let rule = SamplingRule::new( + 0.5, None, // No service + None, // No name + None, // No resource + None, // No tags + None, // Default provenance + ); + + // Verify fields are set to None or empty + assert_eq!(rule.sample_rate, 0.5); + assert!(rule.service_matcher.is_none()); + assert!(rule.name_matcher.is_none()); + assert!(rule.resource_matcher.is_none()); + assert!(rule.tag_matchers.is_empty()); + assert_eq!(rule.provenance, "default"); + + // Verify no matchers were created + assert!(rule.service_matcher.is_none()); + assert!(rule.name_matcher.is_none()); + assert!(rule.resource_matcher.is_none()); + assert!(rule.tag_matchers.is_empty()); + + // Test that a rule with NO_RULE constants behaves the same as None + let rule_with_empty_strings = SamplingRule::new( + 0.5, + Some(pattern::NO_RULE.to_string()), // Empty service string + Some(pattern::NO_RULE.to_string()), // Empty name string + Some(pattern::NO_RULE.to_string()), // Empty resource string + Some(HashMap::from([( + pattern::NO_RULE.to_string(), + pattern::NO_RULE.to_string(), + )])), // Empty tag + None, + ); + + // Verify that matchers aren't created for NO_RULE values + assert!(rule_with_empty_strings.service_matcher.is_none()); + assert!(rule_with_empty_strings.name_matcher.is_none()); + assert!(rule_with_empty_strings.resource_matcher.is_none()); + assert!(rule_with_empty_strings.tag_matchers.is_empty()); + + // Create a span with some attributes + let attributes = create_attributes("some-resource", "some-env"); + + // Both rules should match any span since they have no criteria + let span = TestSpan::new("", &attributes); + assert!(rule.matches(&span)); + assert!(rule_with_empty_strings.matches(&span)); + } + + #[test] + fn test_sampling_rule_matches() { + // Create a rule with specific service and name patterns + let _rule = SamplingRule::new( + 0.5, + Some("web-*".to_string()), + Some("http.*".to_string()), + None, + Some(HashMap::from([( + "custom_key".to_string(), + "custom_value".to_string(), + )])), + None, + ); + } + + #[test] + fn test_sample_method() { + // Create two rules with different rates + let rule_always = SamplingRule::new(1.0, None, None, None, None, None); + let rule_never = SamplingRule::new(0.0, None, None, None, None, None); + + let trace_id = create_trace_id(); + + // Rule with rate 1.0 should always sample + assert!(rule_always.sample(&trace_id)); + + // Rule with rate 0.0 should never sample + assert!(!rule_never.sample(&trace_id)); + } + + #[test] + fn test_datadog_sampler_creation() { + // Create a sampler with default config + let sampler = DatadogSampler::new(vec![], 100); + assert!(sampler.rules.is_empty()); + assert!(sampler.service_samplers.is_empty()); + + // Create a sampler with rules + let rule = SamplingRule::new(0.5, None, None, None, None, None); + let sampler_with_rules = DatadogSampler::new(vec![rule], 200); + assert_eq!(sampler_with_rules.rules.len(), 1); + } + + #[test] + fn test_service_key_generation() { + let test_service_name = "test-service".to_string(); + let sampler = DatadogSampler::new(vec![], 100); + + // Test with service and env + let attrs = + create_attributes_with_service(test_service_name.clone(), "resource", "production"); + let span = TestSpan::new("test-span", attrs.as_slice()); + assert_eq!( + sampler.service_key(&span), + format!("service:{test_service_name},env:production") + ); + + // Test with missing env + let attrs_no_env = vec![ + TestAttribute::new(SERVICE_NAME, test_service_name.clone()), + TestAttribute::new(RESOURCE_TAG, "resource"), + ]; + let span = TestSpan::new("test-span", attrs_no_env.as_slice()); + assert_eq!( + sampler.service_key(&span), + format!("service:{test_service_name},env:") + ); + } + + #[test] + fn test_update_service_rates() { + let sampler = DatadogSampler::new(vec![], 100); + + // Update with service rates + let mut rates = HashMap::new(); + rates.insert("service:web,env:prod".to_string(), 0.5); + rates.insert("service:api,env:prod".to_string(), 0.75); + + sampler.service_samplers.update_rates(rates); + + // Check number of samplers + assert_eq!(sampler.service_samplers.len(), 2); + + // Verify keys exist + assert!(sampler + .service_samplers + .contains_key("service:web,env:prod")); + assert!(sampler + .service_samplers + .contains_key("service:api,env:prod")); + + // Verify the sampling rates are correctly set + if let Some(web_sampler) = sampler.service_samplers.get("service:web,env:prod") { + assert_eq!(web_sampler.sample_rate(), 0.5); + } else { + panic!("Web service sampler not found"); + } + + if let Some(api_sampler) = sampler.service_samplers.get("service:api,env:prod") { + assert_eq!(api_sampler.sample_rate(), 0.75); + } else { + panic!("API service sampler not found"); + } + } + + #[test] + fn test_find_matching_rule() { + // Create rules with different priorities and service matchers + let rule1 = SamplingRule::new( + 0.1, + Some("service1".to_string()), + None, + None, + None, + Some("customer".to_string()), // Highest priority + ); + + let rule2 = SamplingRule::new( + 0.2, + Some("service2".to_string()), + None, + None, + None, + Some("dynamic".to_string()), // Middle priority + ); + + let rule3 = SamplingRule::new( + 0.3, + Some("service*".to_string()), // Wildcard service + None, + None, + None, + Some("default".to_string()), // Lowest priority + ); + + let sampler = DatadogSampler::new(vec![rule1.clone(), rule2.clone(), rule3.clone()], 100); + + // Test with a specific service that should match the first rule (rule1) + { + let attrs1 = create_attributes_with_service( + "service1".to_string(), + "resource_val_for_attr1", + "prod", + ); + let span = TestSpan::new("test-span", attrs1.as_slice()); + let matching_rule_for_attrs1 = sampler.find_matching_rule(&span); + assert!( + matching_rule_for_attrs1.is_some(), + "Expected rule1 to match for service1" + ); + let rule = matching_rule_for_attrs1.unwrap(); + assert_eq!(rule.sample_rate, 0.1, "Expected rule1 sample rate"); + assert_eq!(rule.provenance, "customer", "Expected rule1 provenance"); + } + + // Test with a specific service that should match the second rule (rule2) + { + let attrs2 = create_attributes_with_service( + "service2".to_string(), + "resource_val_for_attr2", + "prod", + ); + let span = TestSpan::new("test-span", attrs2.as_slice()); + let matching_rule_for_attrs2 = sampler.find_matching_rule(&span); + assert!( + matching_rule_for_attrs2.is_some(), + "Expected rule2 to match for service2" + ); + let rule = matching_rule_for_attrs2.unwrap(); + assert_eq!(rule.sample_rate, 0.2, "Expected rule2 sample rate"); + assert_eq!(rule.provenance, "dynamic", "Expected rule2 provenance"); + } + + // Test with a service that matches the wildcard rule (rule3) + { + let attrs3 = create_attributes_with_service( + "service3".to_string(), + "resource_val_for_attr3", + "prod", + ); + let span = TestSpan::new("test-span", attrs3.as_slice()); + let matching_rule_for_attrs3 = sampler.find_matching_rule(&span); + assert!( + matching_rule_for_attrs3.is_some(), + "Expected rule3 to match for service3" + ); + let rule = matching_rule_for_attrs3.unwrap(); + assert_eq!(rule.sample_rate, 0.3, "Expected rule3 sample rate"); + assert_eq!(rule.provenance, "default", "Expected rule3 provenance"); + } + + // Test with a service that doesn't match any rule's service pattern + { + let attrs4 = create_attributes_with_service( + "other_sampler_service".to_string(), + "resource_val_for_attr4", + "prod", + ); + let span = TestSpan::new("test-span", attrs4.as_slice()); + let matching_rule_for_attrs4 = sampler.find_matching_rule(&span); + assert!( + matching_rule_for_attrs4.is_none(), + "Expected no rule to match for service 'other_sampler_service'" + ); + } + } + + #[test] + fn test_get_sampling_mechanism() { + let sampler = DatadogSampler::new(vec![], 100); + + // Create rules with different provenances + let rule_customer = + SamplingRule::new(0.1, None, None, None, None, Some("customer".to_string())); + let rule_dynamic = + SamplingRule::new(0.2, None, None, None, None, Some("dynamic".to_string())); + let rule_default = + SamplingRule::new(0.3, None, None, None, None, Some("default".to_string())); + + // Test with customer rule + let mechanism1 = sampler.get_sampling_mechanism(Some(&rule_customer), false); + assert_eq!(mechanism1, mechanism::REMOTE_USER_TRACE_SAMPLING_RULE); + + // Test with dynamic rule + let mechanism2 = sampler.get_sampling_mechanism(Some(&rule_dynamic), false); + assert_eq!(mechanism2, mechanism::REMOTE_DYNAMIC_TRACE_SAMPLING_RULE); + + // Test with default rule + let mechanism3 = sampler.get_sampling_mechanism(Some(&rule_default), false); + assert_eq!(mechanism3, mechanism::LOCAL_USER_TRACE_SAMPLING_RULE); + + // Test with agent sampler + let mechanism4 = sampler.get_sampling_mechanism(None, true); + assert_eq!(mechanism4, mechanism::AGENT_RATE_BY_SERVICE); + + // Test fallback case + let mechanism5 = sampler.get_sampling_mechanism(None, false); + assert_eq!(mechanism5, mechanism::DEFAULT); + } + + #[test] + fn test_add_dd_sampling_tags() { + // Test with RecordAndSample decision and LocalUserTraceSamplingRule mechanism + let sample_rate = 0.5; + let is_sampled = true; + let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; + let sampling_result = DdSamplingResult { + priority: mechanism.to_priority(is_sampled), + trace_root_info: Some(TraceRootSamplingInfo { + mechanism, + rate: 0.5, + rl_effective_rate: None, + }), + }; + + let attrs = sampling_result + .to_dd_sampling_tags(&TestAttributeFactory) + .unwrap_or_default(); + + // Verify the number of attributes (decision_maker + priority + rule_rate + ksr) + assert_eq!(attrs.len(), 4); + + // Check individual attributes + let mut found_decision_maker = false; + let mut found_priority = false; + let mut found_rule_rate = false; + let mut found_ksr = false; + + for attr in &attrs { + match attr.key() { + SAMPLING_DECISION_MAKER_TAG_KEY => { + let value_str = match attr.value() { + TestValue::String(s) => s.to_string(), + _ => panic!("Expected string value for decision maker tag"), + }; + assert_eq!(value_str, mechanism.to_cow()); + found_decision_maker = true; + } + SAMPLING_PRIORITY_TAG_KEY => { + // For LocalUserTraceSamplingRule with KEEP, it should be USER_KEEP + let expected_priority = mechanism.to_priority(true).into_i8() as i64; + + let value_int = match attr.value() { + TestValue::I64(i) => *i, + _ => panic!("Expected integer value for priority tag"), + }; + assert_eq!(value_int, expected_priority); + found_priority = true; + } + SAMPLING_RULE_RATE_TAG_KEY => { + let value_float = match attr.value() { + TestValue::F64(f) => *f, + _ => panic!("Expected float value for rule rate tag"), + }; + assert_eq!(value_float, sample_rate); + found_rule_rate = true; + } + SAMPLING_KNUTH_RATE_TAG_KEY => { + let value_str = match attr.value() { + TestValue::String(s) => s.to_string(), + _ => panic!("Expected string value for ksr tag"), + }; + assert_eq!(value_str, "0.5"); + found_ksr = true; + } + _ => {} + } + } + + assert!(found_decision_maker, "Missing decision maker tag"); + assert!(found_priority, "Missing priority tag"); + assert!(found_rule_rate, "Missing rule rate tag"); + assert!(found_ksr, "Missing knuth sampling rate tag"); + + // Test with rate limiting + let rate_limit = 0.5; + let is_sampled = false; + let mechanism = mechanism::LOCAL_USER_TRACE_SAMPLING_RULE; + let sampling_result = DdSamplingResult { + priority: mechanism.to_priority(is_sampled), + trace_root_info: Some(TraceRootSamplingInfo { + mechanism, + rate: 0.5, + rl_effective_rate: Some(rate_limit), + }), + }; + let attrs_with_limit = sampling_result + .to_dd_sampling_tags(&TestAttributeFactory) + .unwrap_or_default(); + + // With rate limiting, there should be one more attribute + assert_eq!(attrs_with_limit.len(), 5); + + // Check for rate limit attribute + let mut found_limit = false; + for attr in &attrs_with_limit { + if attr.key() == RL_EFFECTIVE_RATE { + let value_float = match attr.value() { + TestValue::F64(f) => *f, + _ => panic!("Expected float value for rate limit tag"), + }; + assert_eq!(value_float, rate_limit); + found_limit = true; + break; + } + } + + assert!(found_limit, "Missing rate limit tag"); + + // Test with AgentRateByService mechanism to check for SAMPLING_AGENT_RATE_TAG_KEY + + let agent_rate = 0.75; + let is_sampled = false; + let mechanism = mechanism::AGENT_RATE_BY_SERVICE; + let sampling_result = DdSamplingResult { + priority: mechanism.to_priority(is_sampled), + trace_root_info: Some(TraceRootSamplingInfo { + mechanism, + rate: agent_rate, + rl_effective_rate: None, + }), + }; + + let agent_attrs = sampling_result + .to_dd_sampling_tags(&TestAttributeFactory) + .unwrap_or_default(); + + // Verify the number of attributes (should be 4: decision_maker + priority + + // agent_rate + ksr) + assert_eq!(agent_attrs.len(), 4); + + // Check for agent rate tag and ksr tag + let mut found_agent_rate = false; + let mut found_ksr = false; + for attr in &agent_attrs { + match attr.key() { + SAMPLING_AGENT_RATE_TAG_KEY => { + let value_float = match attr.value() { + TestValue::F64(f) => *f, + _ => panic!("Expected float value for agent rate tag"), + }; + assert_eq!(value_float, agent_rate); + found_agent_rate = true; + } + SAMPLING_KNUTH_RATE_TAG_KEY => { + let value_str = match attr.value() { + TestValue::String(s) => s.to_string(), + _ => panic!("Expected string value for ksr tag"), + }; + assert_eq!(value_str, "0.75"); + found_ksr = true; + } + _ => {} + } + } + + assert!(found_agent_rate, "Missing agent rate tag"); + assert!( + found_ksr, + "Missing knuth sampling rate tag for agent mechanism" + ); + + // Also check that the SAMPLING_RULE_RATE_TAG_KEY is NOT present for agent mechanism + for attr in &agent_attrs { + assert_ne!( + attr.key(), + SAMPLING_RULE_RATE_TAG_KEY, + "Rule rate tag should not be present for agent mechanism" + ); + } + } + + #[test] + fn test_format_sampling_rate() { + // Exact values + assert_eq!(format_sampling_rate(1.0), Some("1".to_string())); + assert_eq!(format_sampling_rate(0.5), Some("0.5".to_string())); + assert_eq!(format_sampling_rate(0.1), Some("0.1".to_string())); + assert_eq!(format_sampling_rate(0.0), Some("0".to_string())); + + // Trailing zeros should be stripped + assert_eq!(format_sampling_rate(0.100000), Some("0.1".to_string())); + assert_eq!(format_sampling_rate(0.500000), Some("0.5".to_string())); + + // Truncation to 6 significant digits + assert_eq!( + format_sampling_rate(0.7654321), + Some("0.765432".to_string()) + ); + assert_eq!( + format_sampling_rate(0.123456789), + Some("0.123457".to_string()) + ); + + // Small values + assert_eq!(format_sampling_rate(0.001), Some("0.001".to_string())); + + // Boundary values + assert_eq!(format_sampling_rate(0.75), Some("0.75".to_string())); + assert_eq!(format_sampling_rate(0.999999), Some("0.999999".to_string())); + + // Invalid rates + assert_eq!(format_sampling_rate(-0.1), None); + assert_eq!(format_sampling_rate(1.1), None); + assert_eq!(format_sampling_rate(f64::NAN), None); + assert_eq!(format_sampling_rate(f64::INFINITY), None); + assert_eq!(format_sampling_rate(f64::NEG_INFINITY), None); + } + + #[test] + fn test_should_sample_parent_context() { + let sampler = DatadogSampler::new(vec![], 100); + + // Create empty slices for attributes and links + let empty_attrs: &[TestAttribute] = &[]; + let trace_id = create_trace_id(); + + // Test with sampled parent context + let data_sampled = create_sampling_data(Some(true), &trace_id, "span", empty_attrs); + let result_sampled = sampler.sample(&data_sampled); + + // Should inherit the sampling decision from parent + assert!(result_sampled.get_priority().is_keep()); + assert!(result_sampled + .to_dd_sampling_tags(&TestAttributeFactory) + .is_none()); + + // Test with non-sampled parent context + let data_not_sampled = create_sampling_data(Some(false), &trace_id, "span", empty_attrs); + let result_not_sampled = sampler.sample(&data_not_sampled); + + // Should inherit the sampling decision from parent + assert!(!result_not_sampled.get_priority().is_keep()); + assert!(result_not_sampled + .to_dd_sampling_tags(&TestAttributeFactory) + .is_none()); + } + + #[test] + fn test_should_sample_with_rule() { + // Create a rule that always samples + let rule = SamplingRule::new( + 1.0, + Some("test-service".to_string()), + None, + None, + None, + None, + ); + + let sampler = DatadogSampler::new(vec![rule], 100); + + let trace_id = create_trace_id(); + + // Test with matching attributes + let attrs = create_attributes("resource", "prod"); + let data = create_sampling_data(None, &trace_id, "span", attrs.as_slice()); + let result = sampler.sample(&data); + + // Should sample and add attributes + assert!(result.get_priority().is_keep()); + assert!(result.to_dd_sampling_tags(&TestAttributeFactory).is_some()); + + // Test with non-matching attributes + let attrs_no_match = create_attributes("other-resource", "prod"); + let data_no_match = + create_sampling_data(None, &trace_id, "span", attrs_no_match.as_slice()); + let result_no_match = sampler.sample(&data_no_match); + + // Should still sample (default behavior when no rules match) and add attributes + assert!(result_no_match.get_priority().is_keep()); + assert!(result_no_match + .to_dd_sampling_tags(&TestAttributeFactory) + .is_some()); + } + + #[test] + fn test_should_sample_with_service_rates() { + // Initialize sampler + let sampler = DatadogSampler::new(vec![], 100); + + // Add service rates for different service+env combinations + let mut rates = HashMap::new(); + rates.insert("service:test-service,env:prod".to_string(), 1.0); // Always sample for test-service in prod + rates.insert("service:other-service,env:prod".to_string(), 0.0); // Never sample for other-service in prod + + sampler.update_service_rates(rates); + + let trace_id = create_trace_id(); + + // Test with attributes that should lead to "service:test-service,env:prod" key + let attrs_sample = create_attributes_with_service( + "test-service".to_string(), + "any_resource_name_matching_env", + "prod", + ); + let data_sample = create_sampling_data( + None, + &trace_id, + "span_for_test_service", + attrs_sample.as_slice(), + ); + let result_sample = sampler.sample(&data_sample); + // Expect RecordAndSample because service_key will be "service:test-service,env:prod" -> + // rate 1.0 + assert!( + result_sample.get_priority().is_keep(), + "Span for test-service/prod should be sampled" + ); + + // Test with attributes that should lead to "service:other-service,env:prod" key + let attrs_no_sample = create_attributes_with_service( + "other-service".to_string(), + "any_resource_name_matching_env", + "prod", + ); + let data_no_sample = create_sampling_data( + None, + &trace_id, + "span_for_other_service", + attrs_no_sample.as_slice(), + ); + let result_no_sample = sampler.sample(&data_no_sample); + // Expect Drop because service_key will be "service:other-service,env:prod" -> rate 0.0 + assert!( + !result_no_sample.get_priority().is_keep(), + "Span for other-service/prod should be dropped" + ); + } + + #[test] + fn test_sampling_rule_matches_float_attributes() { + // Helper to create attributes with a float value + fn create_attributes_with_float( + tag_key: &'static str, + float_value: f64, + ) -> Vec { + vec![ + TestAttribute::new(RESOURCE_TAG, "resource"), + TestAttribute::new(ENV_TAG, "prod"), + TestAttribute::new(tag_key, TestValue::F64(float_value)), + ] + } + + // Test case 1: Rule with exact value matching integer float + let rule_integer = SamplingRule::new( + 0.5, + None, + None, + None, + Some(HashMap::from([("float_tag".to_string(), "42".to_string())])), + None, + ); + + // Should match integer float + let integer_float_attrs = create_attributes_with_float("float_tag", 42.0); + let span = TestSpan::new("test-span", integer_float_attrs.as_slice()); + assert!(rule_integer.matches(&span)); + + // Test case 2: Rule with wildcard pattern and non-integer float + let rule_wildcard = SamplingRule::new( + 0.5, + None, + None, + None, + Some(HashMap::from([("float_tag".to_string(), "*".to_string())])), + None, + ); + + // Should match non-integer float with wildcard pattern + let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); + let span = TestSpan::new("test-span", decimal_float_attrs.as_slice()); + assert!(rule_wildcard.matches(&span)); + + // Test case 3: Rule with specific pattern and non-integer float + // With our simplified logic, non-integer floats will never match non-wildcard patterns + let rule_specific = SamplingRule::new( + 0.5, + None, + None, + None, + Some(HashMap::from([( + "float_tag".to_string(), + "42.5".to_string(), + )])), + None, + ); + + // Should NOT match the exact decimal value because non-integer floats only match wildcards + let decimal_float_attrs = create_attributes_with_float("float_tag", 42.5); + let span = TestSpan::new("test-span", decimal_float_attrs.as_slice()); + assert!(!rule_specific.matches(&span)); + // Test case 4: Pattern with partial wildcard '*' for suffix + let rule_prefix = SamplingRule::new( + 0.5, + None, + None, + None, + Some(HashMap::from([( + "float_tag".to_string(), + "42.*".to_string(), + )])), + None, + ); + + // Should NOT match decimal values as we don't do partial pattern matching for non-integer + // floats + let span = TestSpan::new("test-span", decimal_float_attrs.as_slice()); + assert!(!rule_prefix.matches(&span)); + } + + #[test] + fn test_operation_name() { + // Test that the sampler correctly matches rules based on operation names + // Operation name generation itself is tested in otel_mappings unit tests + + let http_rule = SamplingRule::new( + 1.0, + None, + Some("http.*.request".to_string()), + None, + None, + Some("default".to_string()), + ); + + let sampler = DatadogSampler::new(vec![http_rule], 100); + + let trace_id = create_trace_id(); + + // HTTP client request should match http_rule (operation name: http.client.request) + let http_client_attrs = vec![TestAttribute::new(HTTP_REQUEST_METHOD, "GET")]; + let data = create_sampling_data(None, &trace_id, "test-span", &http_client_attrs); + assert!(sampler.sample(&data).get_priority().is_keep()); + + // Span that doesn't match the rule should still be sampled (default behavior) + let internal_attrs = vec![TestAttribute::new("custom.tag", "value")]; + let data = create_sampling_data(None, &trace_id, "test-span", &internal_attrs); + assert!(sampler.sample(&data).get_priority().is_keep()); + } + + #[test] + fn test_on_rules_update_callback() { + // Create a sampler with initial rules + let initial_rule = SamplingRule::new( + 0.1, + Some("initial-service".to_string()), + None, + None, + None, + Some("default".to_string()), + ); + + let sampler = DatadogSampler::new(vec![initial_rule], 100); + + // Verify initial state + assert_eq!(sampler.rules.len(), 1); + + // Get the callback + let callback = sampler.on_rules_update(); + + // Create new rules directly as SamplingRuleConfig objects + let new_rules = vec![ + SamplingRuleConfig { + sample_rate: 0.5, + service: Some("web-*".to_string()), + name: Some("http.*".to_string()), + resource: None, + tags: std::collections::HashMap::new(), + provenance: "customer".to_string(), + }, + SamplingRuleConfig { + sample_rate: 0.2, + service: Some("api-*".to_string()), + name: None, + resource: Some("/api/*".to_string()), + tags: [("env".to_string(), "prod".to_string())].into(), + provenance: "dynamic".to_string(), + }, + ]; + + // Apply the update + callback(&new_rules); + + // Verify the rules were updated + assert_eq!(sampler.rules.len(), 2); + + // Test that the new rules work by finding a matching rule + // Create attributes that will generate an operation name matching "http.*" + // and service matching "web-*" + let attrs = vec![ + TestAttribute::new(SERVICE_NAME, "web-frontend"), + TestAttribute::new(HTTP_REQUEST_METHOD, "GET"), /* This will make operation name + * "http.client.request" */ + ]; + let span = TestSpan::new("test-span", attrs.as_slice()); + + let matching_rule = sampler.find_matching_rule(&span); + assert!(matching_rule.is_some(), "Expected to find a matching rule for service 'web-frontend' and name 'http.client.request'"); + let rule = matching_rule.unwrap(); + assert_eq!(rule.sample_rate, 0.5); + assert_eq!(rule.provenance, "customer"); + + // Test with empty rules array + callback(&[]); + assert_eq!(sampler.rules.len(), 0); // Should now have no rules + } +} diff --git a/datadog-opentelemetry/src/core/constants.rs b/libdd-sampling/src/dd_constants.rs similarity index 100% rename from datadog-opentelemetry/src/core/constants.rs rename to libdd-sampling/src/dd_constants.rs diff --git a/datadog-opentelemetry/src/core/sampling.rs b/libdd-sampling/src/dd_sampling.rs similarity index 89% rename from datadog-opentelemetry/src/core/sampling.rs rename to libdd-sampling/src/dd_sampling.rs index 398a62d0..f80482bb 100644 --- a/datadog-opentelemetry/src/core/sampling.rs +++ b/libdd-sampling/src/dd_sampling.rs @@ -30,15 +30,33 @@ pub struct SamplingPriority { } impl SamplingPriority { - pub(crate) const fn from_i8(value: i8) -> Self { + pub const fn from_i8(value: i8) -> Self { Self { value } } - pub(crate) fn into_i8(self) -> i8 { + pub fn into_i8(self) -> i8 { self.value } - pub(crate) fn is_keep(&self) -> bool { + /// Returns whether this sampling priority indicates the trace should be kept. + /// + /// # Returns + /// + /// `true` if the priority value is positive (indicating the trace should be kept), + /// `false` otherwise (indicating the trace should be dropped). + /// + /// # Examples + /// + /// ``` + /// use libdd_sampling::priority; + /// + /// assert!(priority::AUTO_KEEP.is_keep()); + /// assert!(priority::USER_KEEP.is_keep()); + /// assert!(!priority::AUTO_REJECT.is_keep()); + /// assert!(!priority::USER_REJECT.is_keep()); + /// ``` + #[inline(always)] + pub fn is_keep(&self) -> bool { self.value > 0 } } @@ -87,15 +105,15 @@ pub struct SamplingMechanism { } impl SamplingMechanism { - pub(crate) const fn from_u8(value: u8) -> Self { + pub const fn from_u8(value: u8) -> Self { Self { value } } - pub(crate) fn into_u8(self) -> u8 { + pub fn into_u8(self) -> u8 { self.value } - pub(crate) fn to_priority(self, is_keep: bool) -> SamplingPriority { + pub fn to_priority(self, is_keep: bool) -> SamplingPriority { const AUTO_PAIR: PriorityPair = PriorityPair { keep: priority::AUTO_KEEP, reject: priority::AUTO_REJECT, diff --git a/datadog-opentelemetry/src/sampling/glob_matcher.rs b/libdd-sampling/src/glob_matcher.rs similarity index 100% rename from datadog-opentelemetry/src/sampling/glob_matcher.rs rename to libdd-sampling/src/glob_matcher.rs diff --git a/libdd-sampling/src/lib.rs b/libdd-sampling/src/lib.rs new file mode 100644 index 00000000..f6ac72ee --- /dev/null +++ b/libdd-sampling/src/lib.rs @@ -0,0 +1,37 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Core sampling logic for Datadog tracing +//! +//! This crate provides generic sampling infrastructure including: +//! - Trait abstractions for trace IDs, attributes, and span properties +//! - Rate-based sampling algorithms +//! - Rate limiting functionality +//! - Glob pattern matching for sampling rules +//! - Sampling-related constants +//! - Rule-based sampling with pattern matching +//! - Agent-provided sampling rates +//! - Complete Datadog sampler implementation + +pub(crate) mod agent_service_sampler; +pub(crate) mod constants; +pub(crate) mod datadog_sampler; +pub mod dd_constants; +pub mod dd_sampling; +pub(crate) mod glob_matcher; +pub(crate) mod rate_limiter; +pub(crate) mod rate_sampler; +pub(crate) mod rules_sampler; +pub(crate) mod sampling_rule; +pub(crate) mod sampling_rule_config; +pub(crate) mod types; + +// Re-export key types for convenience +pub use agent_service_sampler::ServicesSampler; +pub use datadog_sampler::{DatadogSampler, SamplingRulesCallback}; +pub use dd_sampling::{mechanism, priority, SamplingDecision, SamplingMechanism, SamplingPriority}; +pub use sampling_rule::SamplingRule; +pub use sampling_rule_config::{ParsedSamplingRules, SamplingRuleConfig}; +pub use types::{ + AttributeFactory, AttributeLike, SamplingData, SpanProperties, TraceIdLike, ValueLike, +}; diff --git a/datadog-opentelemetry/src/sampling/rate_limiter.rs b/libdd-sampling/src/rate_limiter.rs similarity index 99% rename from datadog-opentelemetry/src/sampling/rate_limiter.rs rename to libdd-sampling/src/rate_limiter.rs index bc69c312..ff9ebbc6 100644 --- a/datadog-opentelemetry/src/sampling/rate_limiter.rs +++ b/libdd-sampling/src/rate_limiter.rs @@ -7,7 +7,7 @@ use std::time::Instant; /// A token bucket rate limiter implementation #[derive(Clone)] -pub(crate) struct RateLimiter { +pub struct RateLimiter { /// Rate limit value that doesn't need to be protected by mutex rate_limit: i32, diff --git a/datadog-opentelemetry/src/sampling/rate_sampler.rs b/libdd-sampling/src/rate_sampler.rs similarity index 82% rename from datadog-opentelemetry/src/sampling/rate_sampler.rs rename to libdd-sampling/src/rate_sampler.rs index ba6d77de..fd20fb2d 100644 --- a/datadog-opentelemetry/src/sampling/rate_sampler.rs +++ b/libdd-sampling/src/rate_sampler.rs @@ -1,15 +1,14 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use opentelemetry::trace::TraceId; -use std::fmt; - use super::constants::{numeric, rate}; +use crate::types::TraceIdLike; use numeric::{KNUTH_FACTOR, MAX_UINT_64BITS}; +use std::fmt; /// Keeps (100 * `sample_rate`)% of the traces randomly. #[derive(Clone)] -pub(crate) struct RateSampler { +pub struct RateSampler { sample_rate: f64, sampling_id_threshold: u64, } @@ -50,7 +49,7 @@ impl RateSampler { /// Determines if a trace should be sampled based on its trace_id and the configured rate. /// Returns true if the trace should be kept, false otherwise. - pub fn sample(&self, trace_id: TraceId) -> bool { + pub fn sample(&self, trace_id: &T) -> bool { // Fast-path for sample rate of 0.0 (always drop) or 1.0 (always sample) if self.sample_rate <= rate::MIN_SAMPLE_RATE { return false; @@ -60,8 +59,7 @@ impl RateSampler { } // Convert trace_id to u128 and then cast to u64 to get the lower 64 bits - let trace_id_u128 = u128::from_be_bytes(trace_id.to_bytes()); - let trace_id_64bits = trace_id_u128 as u64; + let trace_id_64bits = trace_id.to_u128() as u64; let hashed_id = trace_id_64bits.wrapping_mul(KNUTH_FACTOR); @@ -73,7 +71,28 @@ impl RateSampler { #[cfg(test)] mod tests { use super::*; - use opentelemetry::trace::TraceId; + + // Test-only TraceId implementation + #[derive(Clone, Debug, PartialEq, Eq)] + struct TestTraceId { + bytes: [u8; 16], + } + + impl TestTraceId { + fn from_bytes(bytes: [u8; 16]) -> Self { + Self { bytes } + } + + fn to_bytes(&self) -> [u8; 16] { + self.bytes + } + } + + impl TraceIdLike for TestTraceId { + fn to_u128(&self) -> u128 { + u128::from_be_bytes(self.bytes) + } + } #[test] fn check_debug_impl() { @@ -122,9 +141,9 @@ mod tests { let sampler_zero = RateSampler::new(0.0); let mut bytes_zero = [0u8; 16]; bytes_zero[15] = 1; // Example ID - let trace_id_zero = TraceId::from_bytes(bytes_zero); + let trace_id_zero = TestTraceId::from_bytes(bytes_zero); assert!( - !sampler_zero.sample(trace_id_zero), + !sampler_zero.sample(&trace_id_zero), "sampler_zero should return false" ); @@ -132,9 +151,9 @@ mod tests { let sampler_one = RateSampler::new(1.0); let mut bytes_one = [0u8; 16]; bytes_one[15] = 2; // Example ID - let trace_id_one = TraceId::from_bytes(bytes_one); + let trace_id_one = TestTraceId::from_bytes(bytes_one); assert!( - sampler_one.sample(trace_id_one), + sampler_one.sample(&trace_id_one), "sampler_one should return true" ); @@ -144,19 +163,19 @@ mod tests { // Trace ID that should be sampled (hashed value <= threshold) let bytes_sample = [0u8; 16]; // Hashes to 0 - let trace_id_sample = TraceId::from_bytes(bytes_sample); + let trace_id_sample = TestTraceId::from_bytes(bytes_sample); let sample_u64 = u128::from_be_bytes(trace_id_sample.to_bytes()) as u64; let sample_hash = sample_u64.wrapping_mul(KNUTH_FACTOR); assert!(sample_hash <= threshold); assert!( - sampler_half.sample(trace_id_sample), + sampler_half.sample(&trace_id_sample), "sampler_half should sample trace_id_sample" ); // Trace ID that should be dropped (hashed value > threshold) let mut bytes_drop = [0u8; 16]; bytes_drop[8..16].copy_from_slice(&u64::MAX.to_be_bytes()); // High lower 64 bits - let trace_id_drop = TraceId::from_bytes(bytes_drop); + let trace_id_drop = TestTraceId::from_bytes(bytes_drop); let drop_u64 = u128::from_be_bytes(trace_id_drop.to_bytes()) as u64; let drop_hash = drop_u64.wrapping_mul(KNUTH_FACTOR); // For rate 0.5, threshold is MAX/2. Hashing MAX should result in something > MAX/2 @@ -165,7 +184,7 @@ mod tests { "Drop hash {drop_hash} should be > threshold {threshold}", ); assert!( - !sampler_half.sample(trace_id_drop), + !sampler_half.sample(&trace_id_drop), "sampler_half should drop trace_id_drop" ); } @@ -175,9 +194,9 @@ mod tests { let sampler_half = RateSampler::new(0.5); // Trace ID with all zeros hashes to 0, which is always <= threshold for rate > 0 let bytes_to_sample = [0u8; 16]; - let trace_id_to_sample = TraceId::from_bytes(bytes_to_sample); + let trace_id_to_sample = TestTraceId::from_bytes(bytes_to_sample); assert!( - sampler_half.sample(trace_id_to_sample), + sampler_half.sample(&trace_id_to_sample), "Sampler with 0.5 rate should sample trace ID 0" ); } diff --git a/datadog-opentelemetry/src/sampling/rules_sampler.rs b/libdd-sampling/src/rules_sampler.rs similarity index 96% rename from datadog-opentelemetry/src/sampling/rules_sampler.rs rename to libdd-sampling/src/rules_sampler.rs index e679a09e..3049ff6e 100644 --- a/datadog-opentelemetry/src/sampling/rules_sampler.rs +++ b/libdd-sampling/src/rules_sampler.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, RwLock}; -use super::datadog_sampler::SamplingRule; +use super::sampling_rule::SamplingRule; /// Thread-safe container for sampling rules #[derive(Debug, Default, Clone)] diff --git a/libdd-sampling/src/sampling_rule.rs b/libdd-sampling/src/sampling_rule.rs new file mode 100644 index 00000000..d721630d --- /dev/null +++ b/libdd-sampling/src/sampling_rule.rs @@ -0,0 +1,255 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use crate::constants::pattern::NO_RULE; +use crate::glob_matcher::GlobMatcher; +use crate::rate_sampler::RateSampler; +use crate::sampling_rule_config::SamplingRuleConfig; +use crate::types::{AttributeLike, SpanProperties, TraceIdLike, ValueLike}; +use std::collections::HashMap; + +// HTTP status code attribute constants +const HTTP_RESPONSE_STATUS_CODE: &str = "http.response.status_code"; +const HTTP_STATUS_CODE: &str = "http.status_code"; + +fn matcher_from_rule(rule: &str) -> Option { + (rule != NO_RULE).then(|| GlobMatcher::new(rule)) +} + +/// Represents a sampling rule with criteria for matching spans +#[derive(Clone, Debug)] +pub struct SamplingRule { + /// The sample rate to apply when this rule matches (0.0-1.0) + pub(crate) sample_rate: f64, + + /// Where this rule comes from (customer, dynamic, default) + pub(crate) provenance: String, + + /// Internal rate sampler used when this rule matches + rate_sampler: RateSampler, + + /// Glob matchers for pattern matching + pub(crate) name_matcher: Option, + pub(crate) service_matcher: Option, + pub(crate) resource_matcher: Option, + pub(crate) tag_matchers: HashMap, +} + +impl SamplingRule { + /// Converts a vector of SamplingRuleConfig into SamplingRule objects + /// Centralizes the conversion logic + pub fn from_configs(configs: Vec) -> Vec { + configs + .into_iter() + .map(|config| { + Self::new( + config.sample_rate, + config.service, + config.name, + config.resource, + Some(config.tags), + Some(config.provenance), + ) + }) + .collect() + } + + /// Creates a new sampling rule + pub fn new( + sample_rate: f64, + service: Option, + name: Option, + resource: Option, + tags: Option>, + provenance: Option, + ) -> Self { + // Create glob matchers for the patterns + let name_matcher = name.as_deref().and_then(matcher_from_rule); + let service_matcher = service.as_deref().and_then(matcher_from_rule); + let resource_matcher = resource.as_deref().and_then(matcher_from_rule); + + // Create matchers for tag values + let tag_map = tags.clone().unwrap_or_default(); + let mut tag_matchers = HashMap::with_capacity(tag_map.len()); + for (key, value) in &tag_map { + if let Some(matcher) = matcher_from_rule(value) { + tag_matchers.insert(key.clone(), matcher); + } + } + + SamplingRule { + sample_rate, + provenance: provenance.unwrap_or_else(|| "default".to_string()), + rate_sampler: RateSampler::new(sample_rate), + name_matcher, + service_matcher, + resource_matcher, + tag_matchers, + } + } + + /// Checks if this rule matches the given span's attributes and name + /// The name is derived from the attributes and span kind + pub(crate) fn matches(&self, span: &impl SpanProperties) -> bool { + // Get the operation name from the span + let name = span.operation_name(); + + // Check name using glob matcher if specified + if let Some(ref matcher) = self.name_matcher { + if !matcher.matches(name.as_ref()) { + return false; + } + } + + // Check service if specified using glob matcher + if let Some(ref matcher) = self.service_matcher { + // Get service from the span + let service = span.service(); + + // Match against the service + if !matcher.matches(&service) { + return false; + } + } + + // Get the resource string for matching + let resource_str = span.resource(); + + // Check resource if specified using glob matcher + if let Some(ref matcher) = self.resource_matcher { + // Use the resource from the span + if !matcher.matches(resource_str.as_ref()) { + return false; + } + } + + // Check all tags using glob matchers + for (key, matcher) in &self.tag_matchers { + let rule_tag_key_str = key.as_str(); + + // Special handling for rules defined with "http.status_code" or + // "http.response.status_code" + if rule_tag_key_str == HTTP_STATUS_CODE || rule_tag_key_str == HTTP_RESPONSE_STATUS_CODE + { + match self.match_http_status_code_rule(matcher, span) { + Some(true) => continue, // Status code matched + Some(false) | None => return false, // Status code didn't match or wasn't found + } + } else { + // Logic for other tags: + // First, try to match directly with the provided tag key + let direct_match = span + .attributes() + .find(|attr| attr.key() == rule_tag_key_str) + .and_then(|attr| self.match_attribute_value(attr.value(), matcher)); + + if direct_match.unwrap_or(false) { + continue; + } + + // If no direct match, try to find the corresponding OpenTelemetry attribute that + // maps to the Datadog tag key This handles cases where the rule key + // is a Datadog key (e.g., "http.method") and the attribute is an + // OTel key (e.g., "http.request.method") + if rule_tag_key_str.starts_with("http.") { + let tag_match = span.attributes().any(|attr| { + if let Some(alternate_key) = span.get_alternate_key(attr.key()) { + if alternate_key == rule_tag_key_str { + return self + .match_attribute_value(attr.value(), matcher) + .unwrap_or(false); + } + } + false + }); + + if !tag_match { + return false; // Mapped attribute not found or did not match + } + // If tag_match is true, loop continues to next rule_tag_key. + } else { + // For non-HTTP attributes, if we don't have a direct match, the rule doesn't + // match + return false; + } + } + } + + true + } + + /// Helper method to specifically match a rule against an HTTP status code extracted from + /// attributes. Returns Some(true) if status code found and matches, Some(false) if found + /// but not matched, None if not found. + fn match_http_status_code_rule( + &self, + matcher: &GlobMatcher, + span: &impl SpanProperties, + ) -> Option { + span.status_code().and_then(|status_code| { + let status_value = ValueI64(i64::from(status_code)); + self.match_attribute_value(&status_value, matcher) + }) + } + + // Helper method to match attribute values considering different value types + fn match_attribute_value(&self, value: &impl ValueLike, matcher: &GlobMatcher) -> Option { + // Floating point values are handled with special rules + if let Some(float_val) = value.extract_float() { + // Check if the float has a non-zero decimal part + let has_decimal = float_val != (float_val as i64) as f64; + + // For non-integer floats, only match if it's a wildcard pattern + if has_decimal { + // All '*' pattern returns true, any other pattern returns false + return Some(matcher.pattern().chars().all(|c| c == '*')); + } + + // For integer floats, convert to string for matching + return Some(matcher.matches(&float_val.to_string())); + } + + // For non-float values, use normal matching + value + .extract_string() + .map(|string_value| matcher.matches(&string_value)) + } + + /// Samples a trace ID using this rule's sample rate + pub fn sample(&self, trace_id: &impl TraceIdLike) -> bool { + // Delegate to the internal rate sampler's new sample method + self.rate_sampler.sample(trace_id) + } +} + +/// Represents a priority for sampling rules +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum RuleProvenance { + Customer = 0, + Dynamic = 1, + Default = 2, +} + +impl From<&str> for RuleProvenance { + fn from(s: &str) -> Self { + match s { + "customer" => RuleProvenance::Customer, + "dynamic" => RuleProvenance::Dynamic, + _ => RuleProvenance::Default, + } + } +} + +/// Helper struct for representing i64 values as ValueLike +struct ValueI64(i64); + +impl ValueLike for ValueI64 { + fn extract_float(&self) -> Option { + Some(self.0 as f64) + } + + fn extract_string(&self) -> Option> { + Some(std::borrow::Cow::Owned(self.0.to_string())) + } +} diff --git a/libdd-sampling/src/sampling_rule_config.rs b/libdd-sampling/src/sampling_rule_config.rs new file mode 100644 index 00000000..604bcecf --- /dev/null +++ b/libdd-sampling/src/sampling_rule_config.rs @@ -0,0 +1,89 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fmt::Display; +use std::ops::Deref; +use std::str::FromStr; + +/// Configuration for a single sampling rule +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] +pub struct SamplingRuleConfig { + /// The sample rate to apply (0.0-1.0) + pub sample_rate: f64, + + /// Optional service name pattern to match + #[serde(default)] + pub service: Option, + + /// Optional span name pattern to match + #[serde(default)] + pub name: Option, + + /// Optional resource name pattern to match + #[serde(default)] + pub resource: Option, + + /// Tags that must match (key-value pairs) + #[serde(default)] + pub tags: HashMap, + + /// Where this rule comes from (customer, dynamic, default). + /// Not exposed in the public `datadog-opentelemetry` API — set automatically + /// during conversion from the public `SamplingRuleConfig` type. + #[serde(default = "default_provenance")] + pub provenance: String, +} + +impl Display for SamplingRuleConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", serde_json::json!(self)) + } +} + +fn default_provenance() -> String { + "default".to_string() +} + +#[derive(Debug, Default, Clone, PartialEq)] +pub struct ParsedSamplingRules { + pub rules: Vec, +} + +impl Deref for ParsedSamplingRules { + type Target = [SamplingRuleConfig]; + + fn deref(&self) -> &Self::Target { + &self.rules + } +} + +impl From for Vec { + fn from(parsed: ParsedSamplingRules) -> Self { + parsed.rules + } +} + +impl FromStr for ParsedSamplingRules { + type Err = serde_json::Error; + + fn from_str(s: &str) -> Result { + if s.trim().is_empty() { + return Ok(ParsedSamplingRules::default()); + } + // DD_TRACE_SAMPLING_RULES is expected to be a JSON array of SamplingRuleConfig objects. + let rules_vec: Vec = serde_json::from_str(s)?; + Ok(ParsedSamplingRules { rules: rules_vec }) + } +} + +impl Display for ParsedSamplingRules { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + serde_json::to_string(&self.rules).unwrap_or_default() + ) + } +} diff --git a/libdd-sampling/src/types.rs b/libdd-sampling/src/types.rs new file mode 100644 index 00000000..07aa7ccc --- /dev/null +++ b/libdd-sampling/src/types.rs @@ -0,0 +1,161 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Type definitions and traits for sampling + +use std::borrow::Cow; + +/// A trait for converting trace IDs to a numeric representation. +/// +/// Provides a common interface for converting trace IDs from different tracing systems +/// into a 128-bit unsigned integer for use in hash-based operations. +/// +/// # Examples +/// +/// ``` +/// use libdd_sampling::TraceIdLike; +/// +/// #[derive(Clone, PartialEq, Eq)] +/// struct MyTraceId(u128); +/// +/// impl TraceIdLike for MyTraceId { +/// fn to_u128(&self) -> u128 { +/// self.0 +/// } +/// } +/// ``` +pub trait TraceIdLike: PartialEq + Eq { + /// Converts the trace ID to a 128-bit unsigned integer. + /// + /// The conversion should be deterministic: the same trace ID must always produce + /// the same `u128` value. Typically implemented by interpreting the trace ID's + /// bytes as a big-endian integer. + fn to_u128(&self) -> u128; +} + +/// A trait for accessing span attribute key-value pairs. +/// +/// Provides methods for retrieving the key and value of a span attribute. +pub trait AttributeLike { + /// The type of the value that implements `ValueLike`. + type Value: ValueLike; + + /// Returns the attribute key as a string. + fn key(&self) -> &str; + + /// Returns a reference to the attribute value. + fn value(&self) -> &Self::Value; +} + +/// A trait for extracting typed values from attribute values. +/// +/// Provides methods for converting attribute values to common types used in sampling logic. +pub trait ValueLike { + /// Extracts a float value if the value can be represented as `f64`. + /// + /// Returns `Some(f64)` for numeric types, `None` otherwise. + fn extract_float(&self) -> Option; + + /// Extracts a string representation of the value. + /// + /// Returns `Some(Cow)` for types that can be converted to strings, `None` otherwise. + fn extract_string(&self) -> Option>; +} + +/// A trait for creating sampling attributes. +/// +/// This trait abstracts the creation of attributes for sampling tags, +/// allowing different implementations for different attribute types. +pub trait AttributeFactory { + /// The type of attribute created by this factory. + type Attribute: Sized; + + /// Creates an attribute with an i64 value. + fn create_i64(&self, key: &'static str, value: i64) -> Self::Attribute; + + /// Creates an attribute with an f64 value. + fn create_f64(&self, key: &'static str, value: f64) -> Self::Attribute; + + /// Creates an attribute with a string value. + fn create_string(&self, key: &'static str, value: Cow<'static, str>) -> Self::Attribute; +} + +/// A trait for accessing span properties needed for sampling decisions. +/// +/// Provides methods for retrieving span metadata like operation name, service, environment, +/// resource name, and status codes used by sampling rules. +pub trait SpanProperties { + /// The type of attribute that implements `AttributeLike`. + type Attribute: AttributeLike; + + /// Returns the operation name for the span. + /// + /// The operation name is derived from span attributes and kind according to + /// OpenTelemetry semantic conventions. + fn operation_name(&self) -> Cow<'_, str>; + + /// Returns the service name for the span. + /// + /// The service name is extracted from resource attributes. + fn service(&self) -> Cow<'_, str>; + + /// Returns the environment name for the span. + /// + /// The environment is extracted from span or resource attributes. + fn env(&self) -> Cow<'_, str>; + + /// Returns the resource name for the span. + /// + /// The resource name is derived from span attributes and kind. + fn resource(&self) -> Cow<'_, str>; + + /// Returns the HTTP status code if present. + /// + /// Returns `None` if the span does not have an HTTP status code attribute. + fn status_code(&self) -> Option; + + /// Returns an iterator over span attributes. + fn attributes<'a>(&'a self) -> impl Iterator + where + Self: 'a; + + /// Returns an alternate key for the given attribute key. + /// + /// This is used for mapping between different attribute naming conventions + /// (e.g., OpenTelemetry to Datadog). Returns `Some(alternate_key)` if a mapping exists, + /// or `None` if the attribute key has no alternate mapping. + fn get_alternate_key<'b>(&self, key: &'b str) -> Option>; +} + +/// A trait for accessing sampling data, combining trace ID and span properties. +/// +/// This trait provides unified access to both the trace ID and span properties +/// needed for making sampling decisions. +pub trait SamplingData { + /// The type that implements `TraceIdLike`. + type TraceId: TraceIdLike; + + /// The type that implements `SpanProperties`. + type Properties<'a>: SpanProperties + where + Self: 'a; + + /// Returns whether the parent span was sampled. + /// + /// Returns: + /// - `Some(true)` if the parent span was sampled + /// - `Some(false)` if the parent span was not sampled + /// - `None` if there is no parent sampling information + fn is_parent_sampled(&self) -> Option; + + /// Returns a reference to the trace ID. + fn trace_id(&self) -> &Self::TraceId; + + /// Returns the span properties via a callback. + /// + /// This method constructs the span properties and passes them to the provided + /// callback function. The properties are only valid for the duration of the callback. + fn with_span_properties(&self, s: &S, f: F) -> T + where + F: Fn(&S, &Self::Properties<'_>) -> T; +}