From 86fc42316f86bca169a9b99bb9681c63b6dc6491 Mon Sep 17 00:00:00 2001 From: Jordan Gonzalez <30836115+duncanista@users.noreply.github.com> Date: Thu, 2 Apr 2026 16:00:13 -0400 Subject: [PATCH 1/4] feat(agent-config): allow extensible configuration via ConfigExtension trait Introduces a generic `Config` type that lets consumers define additional configuration fields without modifying or copy-pasting the core crate. Includes a unified `Source` type for dual extraction from both env vars and YAML, a `merge_fields!` macro to reduce merge boilerplate, and moves Lambda-specific fields out of the core Config struct. Also restructures the crate to use a conventional `src/` layout and adds a README documenting the extension API. --- crates/datadog-agent-config/Cargo.toml | 3 - crates/datadog-agent-config/README.md | 110 +++++ .../{ => src}/additional_endpoints.rs | 0 .../{ => src}/apm_replace_rule.rs | 0 crates/datadog-agent-config/{ => src}/env.rs | 405 +----------------- .../{ => src}/flush_strategy.rs | 0 .../{mod.rs => src/lib.rs} | 370 +++++++++++----- .../{ => src}/log_level.rs | 0 .../{ => src}/logs_additional_endpoints.rs | 0 .../{ => src}/processing_rule.rs | 0 .../{ => src}/service_mapping.rs | 0 crates/datadog-agent-config/{ => src}/yaml.rs | 158 ++----- 12 files changed, 415 insertions(+), 631 deletions(-) create mode 100644 crates/datadog-agent-config/README.md rename crates/datadog-agent-config/{ => src}/additional_endpoints.rs (100%) rename crates/datadog-agent-config/{ => src}/apm_replace_rule.rs (100%) rename crates/datadog-agent-config/{ => src}/env.rs (75%) rename crates/datadog-agent-config/{ => src}/flush_strategy.rs (100%) rename crates/datadog-agent-config/{mod.rs => src/lib.rs} (86%) rename crates/datadog-agent-config/{ => src}/log_level.rs (100%) rename crates/datadog-agent-config/{ => src}/logs_additional_endpoints.rs (100%) rename crates/datadog-agent-config/{ => src}/processing_rule.rs (100%) rename crates/datadog-agent-config/{ => src}/service_mapping.rs (100%) rename crates/datadog-agent-config/{ => src}/yaml.rs (85%) diff --git a/crates/datadog-agent-config/Cargo.toml b/crates/datadog-agent-config/Cargo.toml index 222d726..b9477ac 100644 --- a/crates/datadog-agent-config/Cargo.toml +++ b/crates/datadog-agent-config/Cargo.toml @@ -4,9 +4,6 @@ version = "0.1.0" edition.workspace = true license.workspace = true -[lib] -path = "mod.rs" - [dependencies] figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "8c88979985154d6d97c0fc2ca9039682981eacad" } diff --git a/crates/datadog-agent-config/README.md b/crates/datadog-agent-config/README.md new file mode 100644 index 0000000..58f3962 --- /dev/null +++ b/crates/datadog-agent-config/README.md @@ -0,0 +1,110 @@ +# datadog-agent-config + +Shared configuration crate for Datadog serverless agents. Provides a typed `Config` struct with built-in loading from environment variables (`DD_*`) and YAML files (`datadog.yaml`), with environment variables taking precedence. + +## Core features + +- **Typed config struct** with fields for site, API key, proxy, logs, APM, metrics, DogStatsD, OTLP, and trace propagation +- **Two built-in sources**: `EnvConfigSource` (reads `DD_*` / `DATADOG_*` env vars) and `YamlConfigSource` (reads `datadog.yaml`) +- **Graceful deserialization**: every field uses forgiving deserializers that fall back to defaults on bad input, so one misconfigured value never crashes the whole config +- **Extensible via `ConfigExtension`**: consumers can define additional configuration fields without modifying this crate + +## Quick start + +```rust +use std::path::Path; +use datadog_agent_config::get_config; + +let config = get_config(Path::new("/var/task")); +println!("site: {}", config.site); +println!("api_key: {}", config.api_key); +``` + +## Extensible configuration + +Consumers that need additional fields (e.g., Lambda-specific settings) implement the `ConfigExtension` trait instead of forking or copy-pasting the crate. + +### 1. Define the extension and its source + +```rust +use datadog_agent_config::{ + ConfigExtension, merge_fields, + deserialize_optional_string, deserialize_optional_bool_from_anything, +}; +use serde::Deserialize; + +#[derive(Debug, PartialEq, Clone)] +pub struct MyExtension { + pub custom_flag: bool, + pub custom_name: String, +} + +impl Default for MyExtension { + fn default() -> Self { + Self { custom_flag: false, custom_name: String::new() } + } +} + +/// Source struct for deserialization. Must use #[serde(default)] and +/// graceful deserializers so one bad field doesn't fail the whole extraction. +#[derive(Debug, Clone, Default, Deserialize)] +#[serde(default)] +pub struct MySource { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub custom_flag: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub custom_name: Option, +} + +impl ConfigExtension for MyExtension { + type Source = MySource; + + fn merge_from(&mut self, source: &MySource) { + merge_fields!(self, source, + string: [custom_name], + value: [custom_flag], + ); + } +} +``` + +### 2. Load config with the extension + +```rust +use std::path::Path; +use datadog_agent_config::{Config, get_config_with_extension}; + +type MyConfig = Config; + +let config: MyConfig = get_config_with_extension(Path::new("/var/task")); + +// Core fields +println!("site: {}", config.site); + +// Extension fields +println!("custom_flag: {}", config.ext.custom_flag); +println!("custom_name: {}", config.ext.custom_name); +``` + +Extension fields are populated from both `DD_*` environment variables and `datadog.yaml` using dual extraction: the core fields and extension fields are extracted independently from the same figment instance, so they don't interfere with each other. + +### Flat fields only + +The single `Source` type is used for both env var and YAML extraction. This works when extension fields are top-level (flat) in the YAML file, which is the common case. If you need nested YAML structures that differ from the flat env var layout, implement `merge_from` with a nested source struct and handle the mapping manually. + +### merge_fields! macro + +The `merge_fields!` macro reduces boilerplate in `merge_from` by batching fields by merge strategy: + +- `string`: merges `Option` into `String` (sets value if `Some`) +- `value`: merges `Option` into `T` (sets value if `Some`) +- `option`: merges `Option` into `Option` (overwrites if `Some`) + +Custom merge logic (e.g., OR-ing two boolean fields together) goes after the macro call in the same method. + +## Config loading precedence + +1. `Config::default()` (hardcoded defaults) +2. `datadog.yaml` values (lower priority) +3. `DD_*` environment variables (highest priority) +4. Post-processing defaults (site, proxy, logs/APM URL construction) diff --git a/crates/datadog-agent-config/additional_endpoints.rs b/crates/datadog-agent-config/src/additional_endpoints.rs similarity index 100% rename from crates/datadog-agent-config/additional_endpoints.rs rename to crates/datadog-agent-config/src/additional_endpoints.rs diff --git a/crates/datadog-agent-config/apm_replace_rule.rs b/crates/datadog-agent-config/src/apm_replace_rule.rs similarity index 100% rename from crates/datadog-agent-config/apm_replace_rule.rs rename to crates/datadog-agent-config/src/apm_replace_rule.rs diff --git a/crates/datadog-agent-config/env.rs b/crates/datadog-agent-config/src/env.rs similarity index 75% rename from crates/datadog-agent-config/env.rs rename to crates/datadog-agent-config/src/env.rs index f24d6be..78853e0 100644 --- a/crates/datadog-agent-config/env.rs +++ b/crates/datadog-agent-config/src/env.rs @@ -1,22 +1,18 @@ use figment::{Figment, providers::Env}; use serde::Deserialize; use std::collections::HashMap; -use std::time::Duration; use dogstatsd::util::parse_metric_namespace; use libdd_trace_obfuscation::replacer::ReplaceRule; use crate::{ - Config, ConfigError, ConfigSource, TracePropagationStyle, + Config, ConfigError, ConfigExtension, ConfigSource, TracePropagationStyle, additional_endpoints::deserialize_additional_endpoints, apm_replace_rule::deserialize_apm_replace_rules, deserialize_apm_filter_tags, deserialize_array_from_comma_separated_string, deserialize_key_value_pairs, deserialize_option_lossless, - deserialize_optional_bool_from_anything, deserialize_optional_duration_from_microseconds, - deserialize_optional_duration_from_seconds, - deserialize_optional_duration_from_seconds_ignore_zero, deserialize_optional_string, + deserialize_optional_bool_from_anything, deserialize_optional_string, deserialize_string_or_int, deserialize_trace_propagation_style, deserialize_with_default, - flush_strategy::FlushStrategy, log_level::LogLevel, logs_additional_endpoints::{LogsAdditionalEndpoint, deserialize_logs_additional_endpoints}, merge_hashmap, merge_option, merge_option_to_value, merge_string, merge_vec, @@ -369,119 +365,10 @@ pub struct EnvConfig { /// @env `DD_OTLP_CONFIG_LOGS_ENABLED` #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] pub otlp_config_logs_enabled: Option, - - // AWS Lambda - /// @env `DD_API_KEY_SECRET_ARN` - /// - /// The AWS ARN of the secret containing the Datadog API key. - #[serde(deserialize_with = "deserialize_optional_string")] - pub api_key_secret_arn: Option, - /// @env `DD_KMS_API_KEY` - /// - /// The AWS KMS API key to use for the Datadog Agent. - #[serde(deserialize_with = "deserialize_optional_string")] - pub kms_api_key: Option, - /// @env `DD_API_KEY_SSM_ARN` - /// - /// The AWS Systems Manager Parameter Store parameter ARN containing the Datadog API key. - #[serde(deserialize_with = "deserialize_optional_string")] - pub api_key_ssm_arn: Option, - /// @env `DD_SERVERLESS_LOGS_ENABLED` - /// - /// Enable logs for AWS Lambda. Default is `true`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_logs_enabled: Option, - /// @env `DD_LOGS_ENABLED` - /// - /// Enable logs for AWS Lambda. Alias for `DD_SERVERLESS_LOGS_ENABLED`. Default is `true`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub logs_enabled: Option, - /// @env `DD_SERVERLESS_FLUSH_STRATEGY` - /// - /// The flush strategy to use for AWS Lambda. - #[serde(deserialize_with = "deserialize_with_default")] - pub serverless_flush_strategy: Option, - /// @env `DD_ENHANCED_METRICS` - /// - /// Enable enhanced metrics for AWS Lambda. Default is `true`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub enhanced_metrics: Option, - /// @env `DD_LAMBDA_PROC_ENHANCED_METRICS` - /// - /// Enable Lambda process metrics for AWS Lambda. Default is `true`. - /// - /// This is for metrics like: - /// - CPU usage - /// - Network usage - /// - File descriptor count - /// - Thread count - /// - Temp directory usage - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub lambda_proc_enhanced_metrics: Option, - /// @env `DD_CAPTURE_LAMBDA_PAYLOAD` - /// - /// Enable capture of the Lambda request and response payloads. - /// Default is `false`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub capture_lambda_payload: Option, - /// @env `DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH` - /// - /// The maximum depth of the Lambda payload to capture. - /// Default is `10`. Requires `capture_lambda_payload` to be `true`. - #[serde(deserialize_with = "deserialize_option_lossless")] - pub capture_lambda_payload_max_depth: Option, - /// @env `DD_COMPUTE_TRACE_STATS_ON_EXTENSION` - /// - /// If true, enable computation of trace stats on the extension side. - /// If false, trace stats will be computed on the backend side. - /// Default is `false`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub compute_trace_stats_on_extension: Option, - /// @env `DD_SPAN_DEDUP_TIMEOUT` - /// - /// The timeout for the span deduplication service to check if a span key exists, in seconds. - /// For now, this is a temporary field added to debug the failure of `check_and_add()` in span dedup service. - /// Do not use this field extensively in production. - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] - pub span_dedup_timeout: Option, - /// @env `DD_API_KEY_SECRET_RELOAD_INTERVAL` - /// - /// The interval at which the Datadog API key is reloaded, in seconds. - /// If None, the API key will not be reloaded. - /// Default is `None`. - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] - pub api_key_secret_reload_interval: Option, - /// @env `DD_SERVERLESS_APPSEC_ENABLED` - /// - /// Enable Application and API Protection (AAP), previously known as AppSec/ASM, for AWS Lambda. - /// Default is `false`. - /// - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_appsec_enabled: Option, - /// @env `DD_APPSEC_RULES` - /// - /// The path to a user-configured App & API Protection ruleset (in JSON format). - #[serde(deserialize_with = "deserialize_optional_string")] - pub appsec_rules: Option, - /// @env `DD_APPSEC_WAF_TIMEOUT` - /// - /// The timeout for the WAF to process a request, in microseconds. - #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] - pub appsec_waf_timeout: Option, - /// @env `DD_API_SECURITY_ENABLED` - /// - /// Enable API Security for AWS Lambda. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub api_security_enabled: Option, - /// @env `DD_API_SECURITY_SAMPLE_DELAY` - /// - /// The delay between two samples of the API Security schema collection, in seconds. - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds")] - pub api_security_sample_delay: Option, } #[allow(clippy::too_many_lines)] -fn merge_config(config: &mut Config, env_config: &EnvConfig) { +fn merge_config(config: &mut Config, env_config: &EnvConfig) { // Basic fields merge_string!(config, env_config, site); merge_string!(config, env_config, api_key); @@ -654,44 +541,19 @@ fn merge_config(config: &mut Config, env_config: &EnvConfig) { otlp_config_traces_probabilistic_sampler_sampling_percentage ); merge_option_to_value!(config, env_config, otlp_config_logs_enabled); - - // AWS Lambda - merge_string!(config, env_config, api_key_secret_arn); - merge_string!(config, env_config, kms_api_key); - merge_string!(config, env_config, api_key_ssm_arn); - merge_option_to_value!(config, env_config, serverless_logs_enabled); - - // Handle serverless_logs_enabled with OR logic: if either DD_LOGS_ENABLED or DD_SERVERLESS_LOGS_ENABLED is true, enable logs - if env_config.serverless_logs_enabled.is_some() || env_config.logs_enabled.is_some() { - config.serverless_logs_enabled = env_config.serverless_logs_enabled.unwrap_or(false) - || env_config.logs_enabled.unwrap_or(false); - } - - merge_option_to_value!(config, env_config, serverless_flush_strategy); - merge_option_to_value!(config, env_config, enhanced_metrics); - merge_option_to_value!(config, env_config, lambda_proc_enhanced_metrics); - merge_option_to_value!(config, env_config, capture_lambda_payload); - merge_option_to_value!(config, env_config, capture_lambda_payload_max_depth); - merge_option_to_value!(config, env_config, compute_trace_stats_on_extension); - merge_option!(config, env_config, span_dedup_timeout); - merge_option!(config, env_config, api_key_secret_reload_interval); - merge_option_to_value!(config, env_config, serverless_appsec_enabled); - merge_option!(config, env_config, appsec_rules); - merge_option_to_value!(config, env_config, appsec_waf_timeout); - merge_option_to_value!(config, env_config, api_security_enabled); - merge_option_to_value!(config, env_config, api_security_sample_delay); } #[derive(Debug, PartialEq, Clone, Copy)] #[allow(clippy::module_name_repetitions)] pub struct EnvConfigSource; -impl ConfigSource for EnvConfigSource { - fn load(&self, config: &mut Config) -> Result<(), ConfigError> { +impl ConfigSource for EnvConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError> { let figment = Figment::new() .merge(Env::prefixed("DATADOG_")) .merge(Env::prefixed("DD_")); + // Extract core config fields match figment.extract::() { Ok(env_config) => merge_config(config, &env_config), Err(e) => { @@ -701,6 +563,16 @@ impl ConfigSource for EnvConfigSource { } } + // Extract extension fields via dual extraction + match figment.extract::() { + Ok(ext_source) => config.ext.merge_from(&ext_source), + Err(e) => { + tracing::warn!( + "Failed to parse extension config from environment variables: {e}, using default extension config." + ); + } + } + Ok(()) } } @@ -708,12 +580,9 @@ impl ConfigSource for EnvConfigSource { #[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics #[cfg(test)] mod tests { - use std::time::Duration; - use super::*; use crate::{ Config, TracePropagationStyle, - flush_strategy::{FlushStrategy, PeriodicStrategy}, log_level::LogLevel, processing_rule::{Kind, ProcessingRule}, }; @@ -727,6 +596,7 @@ mod tests { /// corresponding entry in the arrays below. #[test] #[allow(clippy::too_many_lines)] + #[allow(clippy::field_reassign_with_default)] fn test_all_env_fields_wrong_type_fallback_to_default() { // Non-string fields → invalid values that exercise graceful fallback. let invalid_non_string_env_vars: &[(&str, &str)] = &[ @@ -736,7 +606,6 @@ mod tests { ("DD_LOGS_CONFIG_COMPRESSION_LEVEL", "not_a_number"), ("DD_APM_CONFIG_COMPRESSION_LEVEL", "not_a_number"), ("DD_METRICS_CONFIG_COMPRESSION_LEVEL", "not_a_number"), - ("DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH", "not_a_number"), ("DD_DOGSTATSD_SO_RCVBUF", "not_a_number"), ("DD_DOGSTATSD_BUFFER_SIZE", "not_a_number"), ("DD_DOGSTATSD_QUEUE_SIZE", "not_a_number"), @@ -763,12 +632,6 @@ mod tests { ("DD_TRACE_PROPAGATION_EXTRACT_FIRST", "not_a_bool"), ("DD_TRACE_PROPAGATION_HTTP_BAGGAGE_ENABLED", "not_a_bool"), ("DD_TRACE_AWS_SERVICE_REPRESENTATION_ENABLED", "not_a_bool"), - ("DD_ENHANCED_METRICS", "not_a_bool"), - ("DD_LAMBDA_PROC_ENHANCED_METRICS", "not_a_bool"), - ("DD_CAPTURE_LAMBDA_PAYLOAD", "not_a_bool"), - ("DD_COMPUTE_TRACE_STATS_ON_EXTENSION", "not_a_bool"), - ("DD_SERVERLESS_APPSEC_ENABLED", "not_a_bool"), - ("DD_API_SECURITY_ENABLED", "not_a_bool"), ("DD_OTLP_CONFIG_TRACES_ENABLED", "not_a_bool"), ( "DD_OTLP_CONFIG_TRACES_SPAN_NAME_AS_RESOURCE_NAME", @@ -797,16 +660,8 @@ mod tests { "DD_OBSERVABILITY_PIPELINES_WORKER_LOGS_ENABLED", "not_a_bool", ), - ("DD_SERVERLESS_LOGS_ENABLED", "not_a_bool"), - ("DD_LOGS_ENABLED", "not_a_bool"), // Enum ("DD_LOG_LEVEL", "invalid_level_999"), - ("DD_SERVERLESS_FLUSH_STRATEGY", "[[[invalid"), - // Duration - ("DD_SPAN_DEDUP_TIMEOUT", "not_a_number"), - ("DD_API_KEY_SECRET_RELOAD_INTERVAL", "not_a_number"), - ("DD_APPSEC_WAF_TIMEOUT", "not_a_number"), - ("DD_API_SECURITY_SAMPLE_DELAY", "not_a_number"), // JSON ("DD_ADDITIONAL_ENDPOINTS", "not_json{{"), ("DD_APM_ADDITIONAL_ENDPOINTS", "not_json{{"), @@ -870,16 +725,6 @@ mod tests { "keep", ), ("DD_OTLP_CONFIG_METRICS_SUMMARIES_MODE", "noquantiles"), - ( - "DD_API_KEY_SECRET_ARN", - "arn:aws:secretsmanager:us-east-1:123:secret:key", - ), - ("DD_KMS_API_KEY", "kms-encrypted-key"), - ( - "DD_API_KEY_SSM_ARN", - "arn:aws:ssm:us-east-1:123:parameter/key", - ), - ("DD_APPSEC_RULES", "/opt/custom-rules.json"), ]; // Programmatic guard: count `pub ` fields in the EnvConfig struct from @@ -912,7 +757,7 @@ mod tests { jail.set_env(key, value); } - let mut config = Config::default(); + let mut config: Config = Config::default(); // This MUST succeed — no single field should crash the whole config EnvConfigSource .load(&mut config) @@ -920,7 +765,7 @@ mod tests { // Build expected: string fields have their non-default values, // all non-string fields stay at defaults. - let mut expected = Config::default(); + let mut expected: Config = Config::default(); // String fields (merge_string! → Config String) expected.site = "custom-site.example.com".to_string(); expected.api_key = "test-api-key-12345".to_string(); @@ -930,10 +775,6 @@ mod tests { expected.observability_pipelines_worker_logs_url = "https://opw.example.com".to_string(); expected.apm_dd_url = "https://custom-apm.example.com".to_string(); - expected.api_key_secret_arn = - "arn:aws:secretsmanager:us-east-1:123:secret:key".to_string(); - expected.kms_api_key = "kms-encrypted-key".to_string(); - expected.api_key_ssm_arn = "arn:aws:ssm:us-east-1:123:parameter/key".to_string(); // Option fields (merge_option! → Config Option) expected.proxy_https = Some("https://proxy.example.com".to_string()); expected.http_protocol = Some("http1".to_string()); @@ -954,7 +795,6 @@ mod tests { expected.otlp_config_metrics_sums_initial_cumulativ_monotonic_value = Some("keep".to_string()); expected.otlp_config_metrics_summaries_mode = Some("noquantiles".to_string()); - expected.appsec_rules = Some("/opt/custom-rules.json".to_string()); assert_eq!(config, expected); Ok(()) @@ -1104,28 +944,7 @@ mod tests { jail.set_env("DD_DOGSTATSD_BUFFER_SIZE", "65507"); jail.set_env("DD_DOGSTATSD_QUEUE_SIZE", "2048"); - // AWS Lambda - jail.set_env( - "DD_API_KEY_SECRET_ARN", - "arn:aws:secretsmanager:region:account:secret:datadog-api-key", - ); - jail.set_env("DD_KMS_API_KEY", "test-kms-key"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "periodically,60000"); - jail.set_env("DD_ENHANCED_METRICS", "false"); - jail.set_env("DD_LAMBDA_PROC_ENHANCED_METRICS", "false"); - jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "true"); - jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH", "5"); - jail.set_env("DD_COMPUTE_TRACE_STATS_ON_EXTENSION", "true"); - jail.set_env("DD_SPAN_DEDUP_TIMEOUT", "5"); - jail.set_env("DD_API_KEY_SECRET_RELOAD_INTERVAL", "10"); - jail.set_env("DD_SERVERLESS_APPSEC_ENABLED", "true"); - jail.set_env("DD_APPSEC_RULES", "/path/to/rules.json"); - jail.set_env("DD_APPSEC_WAF_TIMEOUT", "1000000"); // Microseconds - jail.set_env("DD_API_SECURITY_ENABLED", "0"); // Seconds - jail.set_env("DD_API_SECURITY_SAMPLE_DELAY", "60"); // Seconds - - let mut config = Config::default(); + let mut config: Config = Config::default(); let env_config_source = EnvConfigSource; env_config_source .load(&mut config) @@ -1262,26 +1081,7 @@ mod tests { dogstatsd_so_rcvbuf: Some(1_048_576), dogstatsd_buffer_size: Some(65507), dogstatsd_queue_size: Some(2048), - api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" - .to_string(), - kms_api_key: "test-kms-key".to_string(), - api_key_ssm_arn: String::default(), - serverless_logs_enabled: false, - serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { - interval: 60000, - }), - enhanced_metrics: false, - lambda_proc_enhanced_metrics: false, - capture_lambda_payload: true, - capture_lambda_payload_max_depth: 5, - compute_trace_stats_on_extension: true, - span_dedup_timeout: Some(Duration::from_secs(5)), - api_key_secret_reload_interval: Some(Duration::from_secs(10)), - serverless_appsec_enabled: true, - appsec_rules: Some("/path/to/rules.json".to_string()), - appsec_waf_timeout: Duration::from_secs(1), - api_security_enabled: false, - api_security_sample_delay: Duration::from_secs(60), + ext: crate::NoExtension, }; assert_eq!(config, expected_config); @@ -1290,165 +1090,6 @@ mod tests { }); } - #[test] - fn test_dd_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_dd_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(!config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_dd_serverless_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_dd_serverless_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(!config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_both_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "true"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_both_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "false"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(!config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_logs_enabled_true_serverless_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "true"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - // OR logic: if either is true, logs are enabled - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_logs_enabled_false_serverless_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "false"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - // OR logic: if either is true, logs are enabled - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_neither_logs_enabled_set_uses_default() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - // Default value is true - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - #[test] fn test_dogstatsd_config_from_env() { figment::Jail::expect_with(|jail| { @@ -1457,7 +1098,7 @@ mod tests { jail.set_env("DD_DOGSTATSD_BUFFER_SIZE", "65507"); jail.set_env("DD_DOGSTATSD_QUEUE_SIZE", "2048"); - let mut config = Config::default(); + let mut config: Config = Config::default(); let env_config_source = EnvConfigSource; env_config_source .load(&mut config) @@ -1475,7 +1116,7 @@ mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); - let mut config = Config::default(); + let mut config: Config = Config::default(); let env_config_source = EnvConfigSource; env_config_source .load(&mut config) diff --git a/crates/datadog-agent-config/flush_strategy.rs b/crates/datadog-agent-config/src/flush_strategy.rs similarity index 100% rename from crates/datadog-agent-config/flush_strategy.rs rename to crates/datadog-agent-config/src/flush_strategy.rs diff --git a/crates/datadog-agent-config/mod.rs b/crates/datadog-agent-config/src/lib.rs similarity index 86% rename from crates/datadog-agent-config/mod.rs rename to crates/datadog-agent-config/src/lib.rs index 6fc858a..d334d5e 100644 --- a/crates/datadog-agent-config/mod.rs +++ b/crates/datadog-agent-config/src/lib.rs @@ -25,7 +25,6 @@ use tracing::{debug, error, warn}; use crate::{ apm_replace_rule::deserialize_apm_replace_rules, env::EnvConfigSource, - flush_strategy::FlushStrategy, log_level::LogLevel, logs_additional_endpoints::LogsAdditionalEndpoint, processing_rule::{ProcessingRule, deserialize_processing_rules}, @@ -138,6 +137,87 @@ macro_rules! merge_hashmap { }; } +/// Trait that extension configs must implement to add additional configuration +/// fields beyond what the core provides. +/// +/// Extensions allow consumers to define their own external configuration fields +/// that are deserialized from environment variables and YAML files alongside +/// core fields via dual extraction. +/// +/// # Source type requirements +/// +/// The `Source` type must use `#[serde(default)]` on the struct and graceful +/// deserializers (e.g., `deserialize_optional_bool_from_anything`) on each field +/// to ensure that a single bad value doesn't fail the entire extraction. +/// +/// # Flat fields only +/// +/// A single `Source` type is used for both environment variable and YAML +/// extraction. This works when all extension fields are top-level (flat) in +/// the YAML file, which is the common case for extension configs: +/// +/// ```yaml +/// # Works: flat fields map naturally to both DD_* env vars and YAML keys +/// enhanced_metrics: true +/// capture_lambda_payload: false +/// ``` +/// +/// If you need nested YAML structures (e.g., `lambda: { enhanced_metrics: true }`) +/// that differ from the flat env var layout, implement `merge_from` with a +/// nested source struct and handle the mapping manually instead of using +/// `merge_fields!`. +pub trait ConfigExtension: Clone + Default + std::fmt::Debug + PartialEq { + /// Intermediate type for deserializing extension fields. + /// Used for both environment variable and YAML extraction. + type Source: Default + serde::de::DeserializeOwned + Clone + std::fmt::Debug; + + /// Merge parsed source fields into self. + fn merge_from(&mut self, source: &Self::Source); +} + +/// Batch-merge extension fields from a source struct. +/// +/// Groups fields by merge strategy so you don't have to write individual +/// `merge_string!` / `merge_option_to_value!` / `merge_option!` calls. +/// +/// ```ignore +/// merge_fields!(self, source, +/// string: [api_key_secret_arn, kms_api_key], +/// value: [enhanced_metrics, capture_lambda_payload], +/// option: [span_dedup_timeout, appsec_rules], +/// ); +/// ``` +#[macro_export] +macro_rules! merge_fields { + // Internal rules dispatched by keyword + (@string $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_string!($config, $source, $field); )* + }; + (@value $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_option_to_value!($config, $source, $field); )* + }; + (@option $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_option!($config, $source, $field); )* + }; + // Public entry point: accepts any combination of groups in any order + ($config:expr, $source:expr, $($kind:ident: [$($field:ident),* $(,)?]),* $(,)?) => { + $( $crate::merge_fields!(@$kind $config, $source, [$($field),*]); )* + }; +} + +/// A no-op extension for consumers that don't need extra fields. +#[derive(Clone, Default, Debug, PartialEq)] +pub struct NoExtension; + +/// A no-op source for deserialization that accepts (and ignores) any input. +#[derive(Clone, Default, Debug, Deserialize)] +pub struct NoExtensionSource; + +impl ConfigExtension for NoExtension { + type Source = NoExtensionSource; + fn merge_from(&mut self, _source: &Self::Source) {} +} + #[derive(Debug, PartialEq)] #[allow(clippy::module_name_repetitions)] pub enum ConfigError { @@ -146,26 +226,34 @@ pub enum ConfigError { } #[allow(clippy::module_name_repetitions)] -pub trait ConfigSource { - fn load(&self, config: &mut Config) -> Result<(), ConfigError>; +pub trait ConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError>; } -#[derive(Default)] #[allow(clippy::module_name_repetitions)] -pub struct ConfigBuilder { - sources: Vec>, - config: Config, +pub struct ConfigBuilder { + sources: Vec>>, + config: Config, +} + +impl Default for ConfigBuilder { + fn default() -> Self { + Self { + sources: Vec::new(), + config: Config::default(), + } + } } #[allow(clippy::module_name_repetitions)] -impl ConfigBuilder { +impl ConfigBuilder { #[must_use] - pub fn add_source(mut self, source: Box) -> Self { + pub fn add_source(mut self, source: Box>) -> Self { self.sources.push(source); self } - pub fn build(&mut self) -> Config { + pub fn build(&mut self) -> Config { let mut failed_sources = 0; for source in &self.sources { match source.load(&mut self.config) { @@ -238,7 +326,7 @@ impl ConfigBuilder { #[derive(Debug, PartialEq, Clone)] #[allow(clippy::module_name_repetitions)] #[allow(clippy::struct_excessive_bools)] -pub struct Config { +pub struct Config { pub site: String, pub api_key: String, pub log_level: LogLevel, @@ -349,28 +437,12 @@ pub struct Config { // - Logs pub otlp_config_logs_enabled: bool, - // AWS Lambda - pub api_key_secret_arn: String, - pub kms_api_key: String, - pub api_key_ssm_arn: String, - pub serverless_logs_enabled: bool, - pub serverless_flush_strategy: FlushStrategy, - pub enhanced_metrics: bool, - pub lambda_proc_enhanced_metrics: bool, - pub capture_lambda_payload: bool, - pub capture_lambda_payload_max_depth: u32, - pub compute_trace_stats_on_extension: bool, - pub span_dedup_timeout: Option, - pub api_key_secret_reload_interval: Option, - - pub serverless_appsec_enabled: bool, - pub appsec_rules: Option, - pub appsec_waf_timeout: Duration, - pub api_security_enabled: bool, - pub api_security_sample_delay: Duration, + /// Agent-specific extension fields defined by the consumer. + /// Use `NoExtension` (the default) when no extra fields are needed. + pub ext: E, } -impl Default for Config { +impl Default for Config { fn default() -> Self { Self { site: String::default(), @@ -464,25 +536,7 @@ impl Default for Config { otlp_config_traces_probabilistic_sampler_sampling_percentage: None, otlp_config_logs_enabled: false, - // AWS Lambda - api_key_secret_arn: String::default(), - kms_api_key: String::default(), - api_key_ssm_arn: String::default(), - serverless_logs_enabled: true, - serverless_flush_strategy: FlushStrategy::Default, - enhanced_metrics: true, - lambda_proc_enhanced_metrics: true, - capture_lambda_payload: false, - capture_lambda_payload_max_depth: 10, - compute_trace_stats_on_extension: false, - span_dedup_timeout: None, - api_key_secret_reload_interval: None, - - serverless_appsec_enabled: false, - appsec_rules: None, - appsec_waf_timeout: Duration::from_millis(5), - api_security_enabled: true, - api_security_sample_delay: Duration::from_secs(30), + ext: E::default(), } } } @@ -491,6 +545,17 @@ impl Default for Config { #[inline] #[must_use] pub fn get_config(config_directory: &Path) -> Config { + get_config_with_extension(config_directory) +} + +/// Load configuration with a custom extension type. +/// +/// Consumers that need agent-specific fields (e.g., Lambda, Cloud Run) should +/// call this with their extension type instead of `get_config`. +#[allow(clippy::module_name_repetitions)] +#[inline] +#[must_use] +pub fn get_config_with_extension(config_directory: &Path) -> Config { let path: std::path::PathBuf = config_directory.join("datadog.yaml"); ConfigBuilder::default() .add_source(Box::new(YamlConfigSource { path })) @@ -886,12 +951,7 @@ pub mod tests { use super::*; - use crate::{ - TracePropagationStyle, - flush_strategy::{FlushStrategy, PeriodicStrategy}, - log_level::LogLevel, - processing_rule::ProcessingRule, - }; + use crate::{TracePropagationStyle, log_level::LogLevel, processing_rule::ProcessingRule}; #[test] fn test_default_logs_intake_url() { @@ -1158,56 +1218,6 @@ pub mod tests { }); } - #[test] - fn test_parse_flush_strategy_end() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "end"); - let config = get_config(Path::new("")); - assert_eq!(config.serverless_flush_strategy, FlushStrategy::End); - Ok(()) - }); - } - - #[test] - fn test_parse_flush_strategy_periodically() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "periodically,100000"); - let config = get_config(Path::new("")); - assert_eq!( - config.serverless_flush_strategy, - FlushStrategy::Periodically(PeriodicStrategy { interval: 100_000 }) - ); - Ok(()) - }); - } - - #[test] - fn test_parse_flush_strategy_invalid() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "invalid_strategy"); - let config = get_config(Path::new("")); - assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); - Ok(()) - }); - } - - #[test] - fn test_parse_flush_strategy_invalid_periodic() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env( - "DD_SERVERLESS_FLUSH_STRATEGY", - "periodically,invalid_interval", - ); - let config = get_config(Path::new("")); - assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); - Ok(()) - }); - } - #[test] fn parse_number_or_string_env_vars() { figment::Jail::expect_with(|jail| { @@ -1476,15 +1486,11 @@ pub mod tests { fn test_parse_bool_from_anything() { figment::Jail::expect_with(|jail| { jail.clear_env(); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - jail.set_env("DD_ENHANCED_METRICS", "1"); jail.set_env("DD_LOGS_CONFIG_USE_COMPRESSION", "TRUE"); - jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "0"); + jail.set_env("DD_SKIP_SSL_VALIDATION", "1"); let config = get_config(Path::new("")); - assert!(config.serverless_logs_enabled); - assert!(config.enhanced_metrics); assert!(config.logs_config_use_compression); - assert!(!config.capture_lambda_payload); + assert!(config.skip_ssl_validation); Ok(()) }); } @@ -1708,4 +1714,144 @@ pub mod tests { serde_json::from_str::(r#"{"tags": []}"#).expect("failed to parse JSON"); assert_eq!(result.tags, HashMap::new()); } + + // -- ConfigExtension tests -- + + /// A test extension with a few fields, mimicking what a consumer like Lambda would define. + #[derive(Clone, Default, Debug, PartialEq)] + struct TestExtension { + custom_flag: bool, + custom_name: String, + } + + #[derive(Clone, Default, Debug, Deserialize)] + #[serde(default)] + struct TestExtSource { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + custom_flag: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + custom_name: Option, + } + + impl ConfigExtension for TestExtension { + type Source = TestExtSource; + + fn merge_from(&mut self, source: &TestExtSource) { + merge_fields!(self, source, + string: [custom_name], + value: [custom_flag], + ); + } + } + + #[test] + fn test_no_extension_config_works() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "datad0g.com"); + let config = get_config(Path::new("")); + assert_eq!(config.site, "datad0g.com"); + assert_eq!(config.ext, NoExtension); + Ok(()) + }); + } + + #[test] + fn test_extension_receives_env_vars() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "datad0g.com"); + jail.set_env("DD_CUSTOM_FLAG", "true"); + jail.set_env("DD_CUSTOM_NAME", "my-extension"); + + let config: Config = get_config_with_extension(Path::new("")); + + // Core fields work + assert_eq!(config.site, "datad0g.com"); + // Extension fields are populated + assert!(config.ext.custom_flag); + assert_eq!(config.ext.custom_name, "my-extension"); + Ok(()) + }); + } + + #[test] + fn test_extension_receives_yaml_fields() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r#" +site: "datad0g.com" +custom_flag: true +custom_name: "yaml-ext" +"#, + )?; + + let config: Config = get_config_with_extension(Path::new("")); + + assert_eq!(config.site, "datad0g.com"); + assert!(config.ext.custom_flag); + assert_eq!(config.ext.custom_name, "yaml-ext"); + Ok(()) + }); + } + + #[test] + fn test_extension_env_overrides_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r#" +custom_name: "yaml-value" +custom_flag: false +"#, + )?; + jail.set_env("DD_CUSTOM_NAME", "env-value"); + jail.set_env("DD_CUSTOM_FLAG", "true"); + + let config: Config = get_config_with_extension(Path::new("")); + + // Env should override YAML (env source loaded after yaml) + assert!(config.ext.custom_flag); + assert_eq!(config.ext.custom_name, "env-value"); + Ok(()) + }); + } + + #[test] + fn test_extension_defaults_when_not_set() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + let config: Config = get_config_with_extension(Path::new("")); + + // Extension fields should be at their defaults + assert!(!config.ext.custom_flag); + assert_eq!(config.ext.custom_name, ""); + // Core fields should have post-processing defaults + assert_eq!(config.site, "datadoghq.com"); + Ok(()) + }); + } + + #[test] + fn test_extension_does_not_interfere_with_core() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "us5.datadoghq.com"); + jail.set_env("DD_API_KEY", "test-key"); + jail.set_env("DD_CUSTOM_FLAG", "true"); + + let config: Config = get_config_with_extension(Path::new("")); + + // Core fields are not affected by extension env vars + assert_eq!(config.site, "us5.datadoghq.com"); + assert_eq!(config.api_key, "test-key"); + // Extension fields work alongside core + assert!(config.ext.custom_flag); + Ok(()) + }); + } } diff --git a/crates/datadog-agent-config/log_level.rs b/crates/datadog-agent-config/src/log_level.rs similarity index 100% rename from crates/datadog-agent-config/log_level.rs rename to crates/datadog-agent-config/src/log_level.rs diff --git a/crates/datadog-agent-config/logs_additional_endpoints.rs b/crates/datadog-agent-config/src/logs_additional_endpoints.rs similarity index 100% rename from crates/datadog-agent-config/logs_additional_endpoints.rs rename to crates/datadog-agent-config/src/logs_additional_endpoints.rs diff --git a/crates/datadog-agent-config/processing_rule.rs b/crates/datadog-agent-config/src/processing_rule.rs similarity index 100% rename from crates/datadog-agent-config/processing_rule.rs rename to crates/datadog-agent-config/src/processing_rule.rs diff --git a/crates/datadog-agent-config/service_mapping.rs b/crates/datadog-agent-config/src/service_mapping.rs similarity index 100% rename from crates/datadog-agent-config/service_mapping.rs rename to crates/datadog-agent-config/src/service_mapping.rs diff --git a/crates/datadog-agent-config/yaml.rs b/crates/datadog-agent-config/src/yaml.rs similarity index 85% rename from crates/datadog-agent-config/yaml.rs rename to crates/datadog-agent-config/src/yaml.rs index 06b7851..6e4eebc 100644 --- a/crates/datadog-agent-config/yaml.rs +++ b/crates/datadog-agent-config/src/yaml.rs @@ -1,15 +1,12 @@ -use std::time::Duration; use std::{collections::HashMap, path::PathBuf}; use crate::{ - Config, ConfigError, ConfigSource, ProcessingRule, TracePropagationStyle, + Config, ConfigError, ConfigExtension, ConfigSource, ProcessingRule, TracePropagationStyle, additional_endpoints::deserialize_additional_endpoints, deserialize_apm_replace_rules, deserialize_key_value_pair_array_to_hashmap, deserialize_option_lossless, - deserialize_optional_bool_from_anything, deserialize_optional_duration_from_microseconds, - deserialize_optional_duration_from_seconds, - deserialize_optional_duration_from_seconds_ignore_zero, deserialize_optional_string, + deserialize_optional_bool_from_anything, deserialize_optional_string, deserialize_processing_rules, deserialize_string_or_int, deserialize_trace_propagation_style, - deserialize_with_default, flush_strategy::FlushStrategy, log_level::LogLevel, + deserialize_with_default, log_level::LogLevel, logs_additional_endpoints::LogsAdditionalEndpoint, merge_hashmap, merge_option, merge_option_to_value, merge_string, merge_vec, service_mapping::deserialize_service_mapping, }; @@ -108,40 +105,6 @@ pub struct YamlConfig { // OTLP #[serde(deserialize_with = "deserialize_with_default")] pub otlp_config: Option, - - // AWS Lambda - #[serde(deserialize_with = "deserialize_optional_string")] - pub api_key_secret_arn: Option, - #[serde(deserialize_with = "deserialize_optional_string")] - pub kms_api_key: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_logs_enabled: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub logs_enabled: Option, - #[serde(deserialize_with = "deserialize_with_default")] - pub serverless_flush_strategy: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub enhanced_metrics: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub lambda_proc_enhanced_metrics: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub capture_lambda_payload: Option, - #[serde(deserialize_with = "deserialize_option_lossless")] - pub capture_lambda_payload_max_depth: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub compute_trace_stats_on_extension: Option, - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] - pub api_key_secret_reload_interval: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_appsec_enabled: Option, - #[serde(deserialize_with = "deserialize_optional_string")] - pub appsec_rules: Option, - #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] - pub appsec_waf_timeout: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub api_security_enabled: Option, - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds")] - pub api_security_sample_delay: Option, } /// Proxy Config @@ -443,7 +406,7 @@ impl OtlpConfig { } #[allow(clippy::too_many_lines)] -fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { +fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { // Basic fields merge_string!(config, yaml_config, site); merge_string!(config, yaml_config, api_key); @@ -720,29 +683,6 @@ fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { merge_option_to_value!(config, otlp_config_logs_enabled, logs, enabled); } } - - // AWS Lambda - merge_string!(config, yaml_config, api_key_secret_arn); - merge_string!(config, yaml_config, kms_api_key); - - // Handle serverless_logs_enabled with OR logic: if either logs_enabled or serverless_logs_enabled is true, enable logs - if yaml_config.serverless_logs_enabled.is_some() || yaml_config.logs_enabled.is_some() { - config.serverless_logs_enabled = yaml_config.serverless_logs_enabled.unwrap_or(false) - || yaml_config.logs_enabled.unwrap_or(false); - } - - merge_option_to_value!(config, yaml_config, serverless_flush_strategy); - merge_option_to_value!(config, yaml_config, enhanced_metrics); - merge_option_to_value!(config, yaml_config, lambda_proc_enhanced_metrics); - merge_option_to_value!(config, yaml_config, capture_lambda_payload); - merge_option_to_value!(config, yaml_config, capture_lambda_payload_max_depth); - merge_option_to_value!(config, yaml_config, compute_trace_stats_on_extension); - merge_option!(config, yaml_config, api_key_secret_reload_interval); - merge_option_to_value!(config, yaml_config, serverless_appsec_enabled); - merge_option!(config, yaml_config, appsec_rules); - merge_option_to_value!(config, yaml_config, appsec_waf_timeout); - merge_option_to_value!(config, yaml_config, api_security_enabled); - merge_option_to_value!(config, yaml_config, api_security_sample_delay); } #[derive(Debug, PartialEq, Clone)] @@ -751,8 +691,8 @@ pub struct YamlConfigSource { pub path: PathBuf, } -impl ConfigSource for YamlConfigSource { - fn load(&self, config: &mut Config) -> Result<(), ConfigError> { +impl ConfigSource for YamlConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError> { let figment = Figment::new().merge(Yaml::file(self.path.clone())); match figment.extract::() { @@ -764,6 +704,16 @@ impl ConfigSource for YamlConfigSource { } } + // Extract extension fields via dual extraction + match figment.extract::() { + Ok(ext_source) => config.ext.merge_from(&ext_source), + Err(e) => { + tracing::warn!( + "Failed to parse extension config from yaml file: {e}, using default extension config." + ); + } + } + Ok(()) } } @@ -772,9 +722,8 @@ impl ConfigSource for YamlConfigSource { #[cfg(test)] mod tests { use std::path::Path; - use std::time::Duration; - use crate::{flush_strategy::PeriodicStrategy, log_level::LogLevel, processing_rule::Kind}; + use crate::{log_level::LogLevel, processing_rule::Kind}; use super::*; @@ -784,6 +733,7 @@ mod tests { /// When adding a new field to YamlConfig or any nested struct, add an entry /// here with the wrong type to ensure graceful deserialization is in place. #[test] + #[allow(clippy::field_reassign_with_default)] fn test_all_yaml_fields_wrong_type_fallback_to_default() { figment::Jail::expect_with(|jail| { jail.clear_env(); @@ -890,28 +840,10 @@ otlp_config: mode: "noquantiles" logs: enabled: [1, 2, 3] - -# AWS Lambda -api_key_secret_arn: "arn:aws:secretsmanager:us-east-1:123:secret:key" -kms_api_key: "kms-encrypted-key" -serverless_logs_enabled: [1, 2, 3] -logs_enabled: [1, 2, 3] -serverless_flush_strategy: [1, 2, 3] -enhanced_metrics: [1, 2, 3] -lambda_proc_enhanced_metrics: [1, 2, 3] -capture_lambda_payload: [1, 2, 3] -capture_lambda_payload_max_depth: [1, 2, 3] -compute_trace_stats_on_extension: [1, 2, 3] -api_key_secret_reload_interval: [1, 2, 3] -serverless_appsec_enabled: [1, 2, 3] -appsec_rules: "/opt/custom-rules.json" -appsec_waf_timeout: [1, 2, 3] -api_security_enabled: [1, 2, 3] -api_security_sample_delay: [1, 2, 3] "#, )?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let source = YamlConfigSource { path: PathBuf::from("datadog.yaml"), }; @@ -922,15 +854,12 @@ api_security_sample_delay: [1, 2, 3] // Build expected: string fields have their non-default values, // all non-string fields stay at defaults. - let mut expected = Config::default(); + let mut expected: Config = Config::default(); expected.site = "custom-site.example.com".to_string(); expected.api_key = "test-api-key-12345".to_string(); expected.dd_url = "https://custom-metrics.example.com".to_string(); expected.logs_config_logs_dd_url = "https://custom-logs.example.com".to_string(); expected.apm_dd_url = "https://custom-apm.example.com".to_string(); - expected.api_key_secret_arn = - "arn:aws:secretsmanager:us-east-1:123:secret:key".to_string(); - expected.kms_api_key = "kms-encrypted-key".to_string(); // Option fields expected.proxy_https = Some("https://proxy.example.com".to_string()); expected.http_protocol = Some("http1".to_string()); @@ -950,7 +879,6 @@ api_security_sample_delay: [1, 2, 3] expected.otlp_config_metrics_sums_initial_cumulativ_monotonic_value = Some("keep".to_string()); expected.otlp_config_metrics_summaries_mode = Some("noquantiles".to_string()); - expected.appsec_rules = Some("/opt/custom-rules.json".to_string()); assert_eq!(config, expected); Ok(()) @@ -1081,27 +1009,10 @@ otlp_config: mode: "quantiles" logs: enabled: true - -# AWS Lambda -api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" -kms_api_key: "test-kms-key" -serverless_logs_enabled: false -serverless_flush_strategy: "periodically,60000" -enhanced_metrics: false -lambda_proc_enhanced_metrics: false -capture_lambda_payload: true -capture_lambda_payload_max_depth: 5 -compute_trace_stats_on_extension: true -api_key_secret_reload_interval: 0 -serverless_appsec_enabled: true -appsec_rules: "/path/to/rules.json" -appsec_waf_timeout: 1000000 # Microseconds -api_security_enabled: false -api_security_sample_delay: 60 # Seconds "#, )?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let yaml_config_source = YamlConfigSource { path: Path::new("datadog.yaml").to_path_buf(), }; @@ -1215,28 +1126,6 @@ api_security_sample_delay: 60 # Seconds otlp_config_metrics_summaries_mode: Some("quantiles".to_string()), otlp_config_traces_probabilistic_sampler_sampling_percentage: Some(50), otlp_config_logs_enabled: true, - api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" - .to_string(), - kms_api_key: "test-kms-key".to_string(), - api_key_ssm_arn: String::default(), - serverless_logs_enabled: false, - serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { - interval: 60000, - }), - enhanced_metrics: false, - lambda_proc_enhanced_metrics: false, - capture_lambda_payload: true, - capture_lambda_payload_max_depth: 5, - compute_trace_stats_on_extension: true, - span_dedup_timeout: None, - api_key_secret_reload_interval: None, - - serverless_appsec_enabled: true, - appsec_rules: Some("/path/to/rules.json".to_string()), - appsec_waf_timeout: Duration::from_secs(1), - api_security_enabled: false, - api_security_sample_delay: Duration::from_secs(60), - apm_filter_tags_require: None, apm_filter_tags_reject: None, apm_filter_tags_regex_require: None, @@ -1245,6 +1134,7 @@ api_security_sample_delay: 60 # Seconds dogstatsd_so_rcvbuf: Some(1_048_576), dogstatsd_buffer_size: Some(65507), dogstatsd_queue_size: Some(2048), + ext: crate::NoExtension, }; // Assert that @@ -1266,7 +1156,7 @@ dogstatsd_buffer_size: 16384 dogstatsd_queue_size: 512 ", )?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let yaml_config_source = YamlConfigSource { path: Path::new("datadog.yaml").to_path_buf(), }; @@ -1286,7 +1176,7 @@ dogstatsd_queue_size: 512 figment::Jail::expect_with(|jail| { jail.clear_env(); jail.create_file("datadog.yaml", "")?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let yaml_config_source = YamlConfigSource { path: Path::new("datadog.yaml").to_path_buf(), }; From 214fd8e35e6b9b448a67792f8f13c5109939f2dc Mon Sep 17 00:00:00 2001 From: Jordan Gonzalez <30836115+duncanista@users.noreply.github.com> Date: Thu, 2 Apr 2026 16:10:27 -0400 Subject: [PATCH 2/4] refactor(agent-config): organize crate into sources/ and deserializers/ modules Move config source implementations (env, yaml) into `src/sources/` and type definitions with custom deserialization into `src/deserializers/`. Re-exports at the crate root preserve all existing import paths. --- .../additional_endpoints.rs | 0 .../{ => deserializers}/apm_replace_rule.rs | 0 .../src/{ => deserializers}/flush_strategy.rs | 0 .../src/{ => deserializers}/log_level.rs | 0 .../logs_additional_endpoints.rs | 0 .../src/deserializers/mod.rs | 7 +++++++ .../{ => deserializers}/processing_rule.rs | 0 .../{ => deserializers}/service_mapping.rs | 0 crates/datadog-agent-config/src/lib.rs | 19 ++++++++++--------- .../src/{ => sources}/env.rs | 0 .../datadog-agent-config/src/sources/mod.rs | 2 ++ .../src/{ => sources}/yaml.rs | 0 12 files changed, 19 insertions(+), 9 deletions(-) rename crates/datadog-agent-config/src/{ => deserializers}/additional_endpoints.rs (100%) rename crates/datadog-agent-config/src/{ => deserializers}/apm_replace_rule.rs (100%) rename crates/datadog-agent-config/src/{ => deserializers}/flush_strategy.rs (100%) rename crates/datadog-agent-config/src/{ => deserializers}/log_level.rs (100%) rename crates/datadog-agent-config/src/{ => deserializers}/logs_additional_endpoints.rs (100%) create mode 100644 crates/datadog-agent-config/src/deserializers/mod.rs rename crates/datadog-agent-config/src/{ => deserializers}/processing_rule.rs (100%) rename crates/datadog-agent-config/src/{ => deserializers}/service_mapping.rs (100%) rename crates/datadog-agent-config/src/{ => sources}/env.rs (100%) create mode 100644 crates/datadog-agent-config/src/sources/mod.rs rename crates/datadog-agent-config/src/{ => sources}/yaml.rs (100%) diff --git a/crates/datadog-agent-config/src/additional_endpoints.rs b/crates/datadog-agent-config/src/deserializers/additional_endpoints.rs similarity index 100% rename from crates/datadog-agent-config/src/additional_endpoints.rs rename to crates/datadog-agent-config/src/deserializers/additional_endpoints.rs diff --git a/crates/datadog-agent-config/src/apm_replace_rule.rs b/crates/datadog-agent-config/src/deserializers/apm_replace_rule.rs similarity index 100% rename from crates/datadog-agent-config/src/apm_replace_rule.rs rename to crates/datadog-agent-config/src/deserializers/apm_replace_rule.rs diff --git a/crates/datadog-agent-config/src/flush_strategy.rs b/crates/datadog-agent-config/src/deserializers/flush_strategy.rs similarity index 100% rename from crates/datadog-agent-config/src/flush_strategy.rs rename to crates/datadog-agent-config/src/deserializers/flush_strategy.rs diff --git a/crates/datadog-agent-config/src/log_level.rs b/crates/datadog-agent-config/src/deserializers/log_level.rs similarity index 100% rename from crates/datadog-agent-config/src/log_level.rs rename to crates/datadog-agent-config/src/deserializers/log_level.rs diff --git a/crates/datadog-agent-config/src/logs_additional_endpoints.rs b/crates/datadog-agent-config/src/deserializers/logs_additional_endpoints.rs similarity index 100% rename from crates/datadog-agent-config/src/logs_additional_endpoints.rs rename to crates/datadog-agent-config/src/deserializers/logs_additional_endpoints.rs diff --git a/crates/datadog-agent-config/src/deserializers/mod.rs b/crates/datadog-agent-config/src/deserializers/mod.rs new file mode 100644 index 0000000..c9f7262 --- /dev/null +++ b/crates/datadog-agent-config/src/deserializers/mod.rs @@ -0,0 +1,7 @@ +pub mod additional_endpoints; +pub mod apm_replace_rule; +pub mod flush_strategy; +pub mod log_level; +pub mod logs_additional_endpoints; +pub mod processing_rule; +pub mod service_mapping; diff --git a/crates/datadog-agent-config/src/processing_rule.rs b/crates/datadog-agent-config/src/deserializers/processing_rule.rs similarity index 100% rename from crates/datadog-agent-config/src/processing_rule.rs rename to crates/datadog-agent-config/src/deserializers/processing_rule.rs diff --git a/crates/datadog-agent-config/src/service_mapping.rs b/crates/datadog-agent-config/src/deserializers/service_mapping.rs similarity index 100% rename from crates/datadog-agent-config/src/service_mapping.rs rename to crates/datadog-agent-config/src/deserializers/service_mapping.rs diff --git a/crates/datadog-agent-config/src/lib.rs b/crates/datadog-agent-config/src/lib.rs index d334d5e..28b37f7 100644 --- a/crates/datadog-agent-config/src/lib.rs +++ b/crates/datadog-agent-config/src/lib.rs @@ -1,12 +1,13 @@ -pub mod additional_endpoints; -pub mod apm_replace_rule; -pub mod env; -pub mod flush_strategy; -pub mod log_level; -pub mod logs_additional_endpoints; -pub mod processing_rule; -pub mod service_mapping; -pub mod yaml; +pub mod deserializers; +pub mod sources; + +// Re-export submodules at the crate root so existing imports like +// `crate::flush_strategy::FlushStrategy` and `crate::env::EnvConfigSource` keep working. +pub use deserializers::{ + additional_endpoints, apm_replace_rule, flush_strategy, log_level, + logs_additional_endpoints, processing_rule, service_mapping, +}; +pub use sources::{env, yaml}; pub use datadog_opentelemetry::configuration::TracePropagationStyle; diff --git a/crates/datadog-agent-config/src/env.rs b/crates/datadog-agent-config/src/sources/env.rs similarity index 100% rename from crates/datadog-agent-config/src/env.rs rename to crates/datadog-agent-config/src/sources/env.rs diff --git a/crates/datadog-agent-config/src/sources/mod.rs b/crates/datadog-agent-config/src/sources/mod.rs new file mode 100644 index 0000000..dc4d398 --- /dev/null +++ b/crates/datadog-agent-config/src/sources/mod.rs @@ -0,0 +1,2 @@ +pub mod env; +pub mod yaml; diff --git a/crates/datadog-agent-config/src/yaml.rs b/crates/datadog-agent-config/src/sources/yaml.rs similarity index 100% rename from crates/datadog-agent-config/src/yaml.rs rename to crates/datadog-agent-config/src/sources/yaml.rs From a4a35ef21c3e4d08f00a348f3eafca97fcf24715 Mon Sep 17 00:00:00 2001 From: Jordan Gonzalez <30836115+duncanista@users.noreply.github.com> Date: Thu, 2 Apr 2026 16:18:24 -0400 Subject: [PATCH 3/4] refactor(agent-config): move inline deserializer helpers to deserializers/helpers.rs Extracts all generic deserializer functions (deserialize_optional_string, deserialize_with_default, duration parsers, key-value parsers, etc.) from lib.rs into src/deserializers/helpers.rs. Re-exported at the crate root so all existing import paths continue to work. --- .../src/deserializers/helpers.rs | 372 +++++++++++++++++ .../src/deserializers/mod.rs | 1 + crates/datadog-agent-config/src/lib.rs | 376 +----------------- 3 files changed, 381 insertions(+), 368 deletions(-) create mode 100644 crates/datadog-agent-config/src/deserializers/helpers.rs diff --git a/crates/datadog-agent-config/src/deserializers/helpers.rs b/crates/datadog-agent-config/src/deserializers/helpers.rs new file mode 100644 index 0000000..058c0ce --- /dev/null +++ b/crates/datadog-agent-config/src/deserializers/helpers.rs @@ -0,0 +1,372 @@ +use serde::{Deserialize, Deserializer}; +use serde_aux::prelude::deserialize_bool_from_anything; +use serde_json::Value; + +use std::collections::HashMap; +use std::fmt; +use std::time::Duration; +use tracing::warn; + +use crate::TracePropagationStyle; + +pub fn deserialize_optional_string<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + match Value::deserialize(deserializer)? { + Value::String(s) => Ok(Some(s)), + other => { + warn!( + "Failed to parse value, expected a string, got: {}, ignoring", + other + ); + Ok(None) + } + } +} + +pub fn deserialize_string_or_int<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + match value { + Value::String(s) => { + if s.trim().is_empty() { + Ok(None) + } else { + Ok(Some(s)) + } + } + Value::Number(n) => Ok(Some(n.to_string())), + _ => { + warn!("Failed to parse value, expected a string or an integer, ignoring"); + Ok(None) + } + } +} + +pub fn deserialize_optional_bool_from_anything<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + // First try to deserialize as Option<_> to handle null/missing values + let opt: Option = Option::deserialize(deserializer)?; + + match opt { + None => Ok(None), + Some(value) => match deserialize_bool_from_anything(value) { + Ok(bool_result) => Ok(Some(bool_result)), + Err(e) => { + warn!("Failed to parse bool value: {}, ignoring", e); + Ok(None) + } + }, + } +} + +/// Parse a single "key:value" string into a (key, value) tuple +/// Returns None if the string is invalid (e.g., missing colon, empty key/value) +fn parse_key_value_tag(tag: &str) -> Option<(String, String)> { + let parts: Vec<&str> = tag.splitn(2, ':').collect(); + if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() { + Some((parts[0].to_string(), parts[1].to_string())) + } else { + warn!( + "Failed to parse tag '{}', expected format 'key:value', ignoring", + tag + ); + None + } +} + +pub fn deserialize_key_value_pairs<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + struct KeyValueVisitor; + + impl serde::de::Visitor<'_> for KeyValueVisitor { + type Value = HashMap; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string in format 'key1:value1,key2:value2' or 'key1:value1'") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + let mut map = HashMap::new(); + for tag in value.split(&[',', ' ']) { + if tag.is_empty() { + continue; + } + if let Some((key, val)) = parse_key_value_tag(tag) { + map.insert(key, val); + } + } + + Ok(map) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_i64(self, value: i64) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_bool(self, value: bool) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got boolean {}, ignoring", + value + ); + Ok(HashMap::new()) + } + } + + deserializer.deserialize_any(KeyValueVisitor) +} + +pub fn deserialize_array_from_comma_separated_string<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + Ok(s.split(',') + .map(|feature| feature.trim().to_string()) + .filter(|feature| !feature.is_empty()) + .collect()) +} + +pub fn deserialize_key_value_pair_array_to_hashmap<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let array: Vec = match Vec::deserialize(deserializer) { + Ok(v) => v, + Err(e) => { + warn!("Failed to deserialize tags array: {e}, ignoring"); + return Ok(HashMap::new()); + } + }; + let mut map = HashMap::new(); + for s in array { + if let Some((key, val)) = parse_key_value_tag(&s) { + map.insert(key, val); + } + } + Ok(map) +} + +/// Deserialize APM filter tags from space-separated "key:value" pairs, also support key-only tags +pub fn deserialize_apm_filter_tags<'de, D>(deserializer: D) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + let opt: Option = Option::deserialize(deserializer)?; + + match opt { + None => Ok(None), + Some(s) if s.trim().is_empty() => Ok(None), + Some(s) => { + let tags: Vec = s + .split_whitespace() + .filter_map(|pair| { + let parts: Vec<&str> = pair.splitn(2, ':').collect(); + if parts.len() == 2 { + let key = parts[0].trim(); + let value = parts[1].trim(); + if key.is_empty() { + None + } else if value.is_empty() { + Some(key.to_string()) + } else { + Some(format!("{key}:{value}")) + } + } else if parts.len() == 1 { + let key = parts[0].trim(); + if key.is_empty() { + None + } else { + Some(key.to_string()) + } + } else { + None + } + }) + .collect(); + + if tags.is_empty() { + Ok(None) + } else { + Ok(Some(tags)) + } + } + } +} + +pub fn deserialize_option_lossless<'de, D, T>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, + T: Deserialize<'de>, +{ + match Option::::deserialize(deserializer) { + Ok(value) => Ok(value), + Err(e) => { + warn!("Failed to deserialize optional value: {}, ignoring", e); + Ok(None) + } + } +} + +/// Gracefully deserialize any field, falling back to `T::default()` on error. +/// +/// This ensures that a single field with the wrong type never fails the entire +/// struct extraction. Works for any `T` that implements `Deserialize + Default`: +/// - `Option` defaults to `None` +/// - `Vec` defaults to `[]` +/// - `HashMap` defaults to `{}` +/// - Structs with `#[derive(Default)]` use their default +pub fn deserialize_with_default<'de, D, T>(deserializer: D) -> Result +where + D: Deserializer<'de>, + T: Deserialize<'de> + Default, +{ + match T::deserialize(deserializer) { + Ok(value) => Ok(value), + Err(e) => { + warn!("Failed to deserialize field: {}, using default", e); + Ok(T::default()) + } + } +} + +pub fn deserialize_optional_duration_from_microseconds<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + match Option::::deserialize(deserializer) { + Ok(opt) => Ok(opt.map(Duration::from_micros)), + Err(e) => { + warn!("Failed to deserialize duration (microseconds): {e}, ignoring"); + Ok(None) + } + } +} + +pub fn deserialize_optional_duration_from_seconds<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + // Deserialize into a generic Value first to avoid propagating type errors, + // then try to extract a duration from it. + match Value::deserialize(deserializer) { + Ok(Value::Number(n)) => { + if let Some(u) = n.as_u64() { + Ok(Some(Duration::from_secs(u))) + } else if let Some(i) = n.as_i64() { + if i < 0 { + warn!("Failed to parse duration: negative durations are not allowed, ignoring"); + Ok(None) + } else { + Ok(Some(Duration::from_secs(i as u64))) + } + } else if let Some(f) = n.as_f64() { + if f < 0.0 { + warn!("Failed to parse duration: negative durations are not allowed, ignoring"); + Ok(None) + } else { + Ok(Some(Duration::from_secs_f64(f))) + } + } else { + warn!("Failed to parse duration: unsupported number format, ignoring"); + Ok(None) + } + } + Ok(Value::Null) => Ok(None), + Ok(other) => { + warn!("Failed to parse duration: expected number, got {other}, ignoring"); + Ok(None) + } + Err(e) => { + warn!("Failed to deserialize duration: {e}, ignoring"); + Ok(None) + } + } +} + +// Like deserialize_optional_duration_from_seconds(), but return None if the value is 0 +pub fn deserialize_optional_duration_from_seconds_ignore_zero<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + let duration: Option = deserialize_optional_duration_from_seconds(deserializer)?; + if duration.is_some_and(|d| d.as_secs() == 0) { + return Ok(None); + } + Ok(duration) +} + +pub fn deserialize_trace_propagation_style<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + use std::str::FromStr; + let s: String = match String::deserialize(deserializer) { + Ok(s) => s, + Err(e) => { + warn!("Failed to deserialize trace propagation style: {e}, ignoring"); + return Ok(Vec::new()); + } + }; + + Ok(s.split(',') + .filter_map( + |style| match TracePropagationStyle::from_str(style.trim()) { + Ok(parsed_style) => Some(parsed_style), + Err(e) => { + warn!("Failed to parse trace propagation style: {e}, ignoring"); + None + } + }, + ) + .collect()) +} diff --git a/crates/datadog-agent-config/src/deserializers/mod.rs b/crates/datadog-agent-config/src/deserializers/mod.rs index c9f7262..e88c90b 100644 --- a/crates/datadog-agent-config/src/deserializers/mod.rs +++ b/crates/datadog-agent-config/src/deserializers/mod.rs @@ -1,6 +1,7 @@ pub mod additional_endpoints; pub mod apm_replace_rule; pub mod flush_strategy; +pub mod helpers; pub mod log_level; pub mod logs_additional_endpoints; pub mod processing_rule; diff --git a/crates/datadog-agent-config/src/lib.rs b/crates/datadog-agent-config/src/lib.rs index 28b37f7..0d47d75 100644 --- a/crates/datadog-agent-config/src/lib.rs +++ b/crates/datadog-agent-config/src/lib.rs @@ -10,18 +10,18 @@ pub use deserializers::{ pub use sources::{env, yaml}; pub use datadog_opentelemetry::configuration::TracePropagationStyle; +// Re-export all helper deserializers so consumers and internal modules can +// use `crate::deserialize_optional_string` etc. without reaching into submodules. +pub use deserializers::helpers::*; use libdd_trace_obfuscation::replacer::ReplaceRule; use libdd_trace_utils::config_utils::{trace_intake_url, trace_intake_url_prefixed}; -use serde::{Deserialize, Deserializer}; -use serde_aux::prelude::deserialize_bool_from_anything; -use serde_json::Value; +use serde::Deserialize; use std::path::Path; -use std::time::Duration; -use std::{collections::HashMap, fmt}; -use tracing::{debug, error, warn}; +use std::collections::HashMap; +use tracing::{debug, error}; use crate::{ apm_replace_rule::deserialize_apm_replace_rules, @@ -583,368 +583,6 @@ fn logs_intake_url(url: &str) -> String { format!("https://{url}") } -pub fn deserialize_optional_string<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - match Value::deserialize(deserializer)? { - Value::String(s) => Ok(Some(s)), - other => { - warn!( - "Failed to parse value, expected a string, got: {}, ignoring", - other - ); - Ok(None) - } - } -} - -pub fn deserialize_string_or_int<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let value = Value::deserialize(deserializer)?; - match value { - Value::String(s) => { - if s.trim().is_empty() { - Ok(None) - } else { - Ok(Some(s)) - } - } - Value::Number(n) => Ok(Some(n.to_string())), - _ => { - warn!("Failed to parse value, expected a string or an integer, ignoring"); - Ok(None) - } - } -} - -pub fn deserialize_optional_bool_from_anything<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - // First try to deserialize as Option<_> to handle null/missing values - let opt: Option = Option::deserialize(deserializer)?; - - match opt { - None => Ok(None), - Some(value) => match deserialize_bool_from_anything(value) { - Ok(bool_result) => Ok(Some(bool_result)), - Err(e) => { - warn!("Failed to parse bool value: {}, ignoring", e); - Ok(None) - } - }, - } -} - -/// Parse a single "key:value" string into a (key, value) tuple -/// Returns None if the string is invalid (e.g., missing colon, empty key/value) -fn parse_key_value_tag(tag: &str) -> Option<(String, String)> { - let parts: Vec<&str> = tag.splitn(2, ':').collect(); - if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() { - Some((parts[0].to_string(), parts[1].to_string())) - } else { - warn!( - "Failed to parse tag '{}', expected format 'key:value', ignoring", - tag - ); - None - } -} - -pub fn deserialize_key_value_pairs<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - struct KeyValueVisitor; - - impl serde::de::Visitor<'_> for KeyValueVisitor { - type Value = HashMap; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string in format 'key1:value1,key2:value2' or 'key1:value1'") - } - - fn visit_str(self, value: &str) -> Result - where - E: serde::de::Error, - { - let mut map = HashMap::new(); - for tag in value.split(&[',', ' ']) { - if tag.is_empty() { - continue; - } - if let Some((key, val)) = parse_key_value_tag(tag) { - map.insert(key, val); - } - } - - Ok(map) - } - - fn visit_u64(self, value: u64) -> Result - where - E: serde::de::Error, - { - warn!( - "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", - value - ); - Ok(HashMap::new()) - } - - fn visit_i64(self, value: i64) -> Result - where - E: serde::de::Error, - { - warn!( - "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", - value - ); - Ok(HashMap::new()) - } - - fn visit_f64(self, value: f64) -> Result - where - E: serde::de::Error, - { - warn!( - "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", - value - ); - Ok(HashMap::new()) - } - - fn visit_bool(self, value: bool) -> Result - where - E: serde::de::Error, - { - warn!( - "Failed to parse tags: expected string in format 'key:value', got boolean {}, ignoring", - value - ); - Ok(HashMap::new()) - } - } - - deserializer.deserialize_any(KeyValueVisitor) -} - -pub fn deserialize_array_from_comma_separated_string<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let s: String = String::deserialize(deserializer)?; - Ok(s.split(',') - .map(|feature| feature.trim().to_string()) - .filter(|feature| !feature.is_empty()) - .collect()) -} - -pub fn deserialize_key_value_pair_array_to_hashmap<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let array: Vec = match Vec::deserialize(deserializer) { - Ok(v) => v, - Err(e) => { - warn!("Failed to deserialize tags array: {e}, ignoring"); - return Ok(HashMap::new()); - } - }; - let mut map = HashMap::new(); - for s in array { - if let Some((key, val)) = parse_key_value_tag(&s) { - map.insert(key, val); - } - } - Ok(map) -} - -/// Deserialize APM filter tags from space-separated "key:value" pairs, also support key-only tags -pub fn deserialize_apm_filter_tags<'de, D>(deserializer: D) -> Result>, D::Error> -where - D: Deserializer<'de>, -{ - let opt: Option = Option::deserialize(deserializer)?; - - match opt { - None => Ok(None), - Some(s) if s.trim().is_empty() => Ok(None), - Some(s) => { - let tags: Vec = s - .split_whitespace() - .filter_map(|pair| { - let parts: Vec<&str> = pair.splitn(2, ':').collect(); - if parts.len() == 2 { - let key = parts[0].trim(); - let value = parts[1].trim(); - if key.is_empty() { - None - } else if value.is_empty() { - Some(key.to_string()) - } else { - Some(format!("{key}:{value}")) - } - } else if parts.len() == 1 { - let key = parts[0].trim(); - if key.is_empty() { - None - } else { - Some(key.to_string()) - } - } else { - None - } - }) - .collect(); - - if tags.is_empty() { - Ok(None) - } else { - Ok(Some(tags)) - } - } - } -} - -pub fn deserialize_option_lossless<'de, D, T>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, - T: Deserialize<'de>, -{ - match Option::::deserialize(deserializer) { - Ok(value) => Ok(value), - Err(e) => { - warn!("Failed to deserialize optional value: {}, ignoring", e); - Ok(None) - } - } -} - -/// Gracefully deserialize any field, falling back to `T::default()` on error. -/// -/// This ensures that a single field with the wrong type never fails the entire -/// struct extraction. Works for any `T` that implements `Deserialize + Default`: -/// - `Option` defaults to `None` -/// - `Vec` defaults to `[]` -/// - `HashMap` defaults to `{}` -/// - Structs with `#[derive(Default)]` use their default -pub fn deserialize_with_default<'de, D, T>(deserializer: D) -> Result -where - D: Deserializer<'de>, - T: Deserialize<'de> + Default, -{ - match T::deserialize(deserializer) { - Ok(value) => Ok(value), - Err(e) => { - warn!("Failed to deserialize field: {}, using default", e); - Ok(T::default()) - } - } -} - -pub fn deserialize_optional_duration_from_microseconds<'de, D: Deserializer<'de>>( - deserializer: D, -) -> Result, D::Error> { - match Option::::deserialize(deserializer) { - Ok(opt) => Ok(opt.map(Duration::from_micros)), - Err(e) => { - warn!("Failed to deserialize duration (microseconds): {e}, ignoring"); - Ok(None) - } - } -} - -pub fn deserialize_optional_duration_from_seconds<'de, D: Deserializer<'de>>( - deserializer: D, -) -> Result, D::Error> { - // Deserialize into a generic Value first to avoid propagating type errors, - // then try to extract a duration from it. - match Value::deserialize(deserializer) { - Ok(Value::Number(n)) => { - if let Some(u) = n.as_u64() { - Ok(Some(Duration::from_secs(u))) - } else if let Some(i) = n.as_i64() { - if i < 0 { - warn!("Failed to parse duration: negative durations are not allowed, ignoring"); - Ok(None) - } else { - Ok(Some(Duration::from_secs(i as u64))) - } - } else if let Some(f) = n.as_f64() { - if f < 0.0 { - warn!("Failed to parse duration: negative durations are not allowed, ignoring"); - Ok(None) - } else { - Ok(Some(Duration::from_secs_f64(f))) - } - } else { - warn!("Failed to parse duration: unsupported number format, ignoring"); - Ok(None) - } - } - Ok(Value::Null) => Ok(None), - Ok(other) => { - warn!("Failed to parse duration: expected number, got {other}, ignoring"); - Ok(None) - } - Err(e) => { - warn!("Failed to deserialize duration: {e}, ignoring"); - Ok(None) - } - } -} - -// Like deserialize_optional_duration_from_seconds(), but return None if the value is 0 -pub fn deserialize_optional_duration_from_seconds_ignore_zero<'de, D: Deserializer<'de>>( - deserializer: D, -) -> Result, D::Error> { - let duration: Option = deserialize_optional_duration_from_seconds(deserializer)?; - if duration.is_some_and(|d| d.as_secs() == 0) { - return Ok(None); - } - Ok(duration) -} - -pub fn deserialize_trace_propagation_style<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - use std::str::FromStr; - let s: String = match String::deserialize(deserializer) { - Ok(s) => s, - Err(e) => { - warn!("Failed to deserialize trace propagation style: {e}, ignoring"); - return Ok(Vec::new()); - } - }; - - Ok(s.split(',') - .filter_map( - |style| match TracePropagationStyle::from_str(style.trim()) { - Ok(parsed_style) => Some(parsed_style), - Err(e) => { - warn!("Failed to parse trace propagation style: {e}, ignoring"); - None - } - }, - ) - .collect()) -} - #[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics #[cfg(test)] pub mod tests { @@ -952,6 +590,8 @@ pub mod tests { use super::*; + use std::time::Duration; + use crate::{TracePropagationStyle, log_level::LogLevel, processing_rule::ProcessingRule}; #[test] From 7c6eb51564f64f4f5cf5de42c61af175f263e111 Mon Sep 17 00:00:00 2001 From: Jordan Gonzalez <30836115+duncanista@users.noreply.github.com> Date: Thu, 2 Apr 2026 16:25:47 -0400 Subject: [PATCH 4/4] refactor(agent-config): reorder lib.rs so Config struct is visible first Reorganize lib.rs so an engineer opening the file immediately sees the Config struct and its fields, followed by the loading entry points, then the extension trait, builder, and macros. Sections are separated with headers for quick scanning. --- crates/datadog-agent-config/src/lib.rs | 757 +++++++++++++------------ 1 file changed, 388 insertions(+), 369 deletions(-) diff --git a/crates/datadog-agent-config/src/lib.rs b/crates/datadog-agent-config/src/lib.rs index 0d47d75..3544ab5 100644 --- a/crates/datadog-agent-config/src/lib.rs +++ b/crates/datadog-agent-config/src/lib.rs @@ -4,8 +4,8 @@ pub mod sources; // Re-export submodules at the crate root so existing imports like // `crate::flush_strategy::FlushStrategy` and `crate::env::EnvConfigSource` keep working. pub use deserializers::{ - additional_endpoints, apm_replace_rule, flush_strategy, log_level, - logs_additional_endpoints, processing_rule, service_mapping, + additional_endpoints, apm_replace_rule, flush_strategy, log_level, logs_additional_endpoints, + processing_rule, service_mapping, }; pub use sources::{env, yaml}; @@ -19,8 +19,8 @@ use libdd_trace_utils::config_utils::{trace_intake_url, trace_intake_url_prefixe use serde::Deserialize; -use std::path::Path; use std::collections::HashMap; +use std::path::Path; use tracing::{debug, error}; use crate::{ @@ -32,297 +32,9 @@ use crate::{ yaml::YamlConfigSource, }; -/// Helper macro to merge Option fields to String fields -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_string { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if let Some(value) = &$source.$source_field { - $config.$config_field.clone_from(value); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if let Some(value) = &$source.$field { - $config.$field.clone_from(value); - } - }; -} - -/// Helper macro to merge Option fields where T implements Clone -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_option { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if $source.$source_field.is_some() { - $config.$config_field.clone_from(&$source.$source_field); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if $source.$field.is_some() { - $config.$field.clone_from(&$source.$field); - } - }; -} - -/// Helper macro to merge Option fields to T fields when Option is Some -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_option_to_value { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if let Some(value) = &$source.$source_field { - $config.$config_field = value.clone(); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if let Some(value) = &$source.$field { - $config.$field = value.clone(); - } - }; -} - -/// Helper macro to merge `Vec` fields when `Vec` is not empty -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_vec { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if !$source.$source_field.is_empty() { - $config.$config_field.clone_from(&$source.$source_field); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if !$source.$field.is_empty() { - $config.$field.clone_from(&$source.$field); - } - }; -} - -// nit: these will replace one map with the other, not merge the maps togehter, right? -/// Helper macro to merge `HashMap` fields when `HashMap` is not empty -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_hashmap { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if !$source.$source_field.is_empty() { - $config.$config_field.clone_from(&$source.$source_field); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if !$source.$field.is_empty() { - $config.$field.clone_from(&$source.$field); - } - }; -} - -/// Trait that extension configs must implement to add additional configuration -/// fields beyond what the core provides. -/// -/// Extensions allow consumers to define their own external configuration fields -/// that are deserialized from environment variables and YAML files alongside -/// core fields via dual extraction. -/// -/// # Source type requirements -/// -/// The `Source` type must use `#[serde(default)]` on the struct and graceful -/// deserializers (e.g., `deserialize_optional_bool_from_anything`) on each field -/// to ensure that a single bad value doesn't fail the entire extraction. -/// -/// # Flat fields only -/// -/// A single `Source` type is used for both environment variable and YAML -/// extraction. This works when all extension fields are top-level (flat) in -/// the YAML file, which is the common case for extension configs: -/// -/// ```yaml -/// # Works: flat fields map naturally to both DD_* env vars and YAML keys -/// enhanced_metrics: true -/// capture_lambda_payload: false -/// ``` -/// -/// If you need nested YAML structures (e.g., `lambda: { enhanced_metrics: true }`) -/// that differ from the flat env var layout, implement `merge_from` with a -/// nested source struct and handle the mapping manually instead of using -/// `merge_fields!`. -pub trait ConfigExtension: Clone + Default + std::fmt::Debug + PartialEq { - /// Intermediate type for deserializing extension fields. - /// Used for both environment variable and YAML extraction. - type Source: Default + serde::de::DeserializeOwned + Clone + std::fmt::Debug; - - /// Merge parsed source fields into self. - fn merge_from(&mut self, source: &Self::Source); -} - -/// Batch-merge extension fields from a source struct. -/// -/// Groups fields by merge strategy so you don't have to write individual -/// `merge_string!` / `merge_option_to_value!` / `merge_option!` calls. -/// -/// ```ignore -/// merge_fields!(self, source, -/// string: [api_key_secret_arn, kms_api_key], -/// value: [enhanced_metrics, capture_lambda_payload], -/// option: [span_dedup_timeout, appsec_rules], -/// ); -/// ``` -#[macro_export] -macro_rules! merge_fields { - // Internal rules dispatched by keyword - (@string $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { - $( $crate::merge_string!($config, $source, $field); )* - }; - (@value $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { - $( $crate::merge_option_to_value!($config, $source, $field); )* - }; - (@option $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { - $( $crate::merge_option!($config, $source, $field); )* - }; - // Public entry point: accepts any combination of groups in any order - ($config:expr, $source:expr, $($kind:ident: [$($field:ident),* $(,)?]),* $(,)?) => { - $( $crate::merge_fields!(@$kind $config, $source, [$($field),*]); )* - }; -} - -/// A no-op extension for consumers that don't need extra fields. -#[derive(Clone, Default, Debug, PartialEq)] -pub struct NoExtension; - -/// A no-op source for deserialization that accepts (and ignores) any input. -#[derive(Clone, Default, Debug, Deserialize)] -pub struct NoExtensionSource; - -impl ConfigExtension for NoExtension { - type Source = NoExtensionSource; - fn merge_from(&mut self, _source: &Self::Source) {} -} - -#[derive(Debug, PartialEq)] -#[allow(clippy::module_name_repetitions)] -pub enum ConfigError { - ParseError(String), - UnsupportedField(String), -} - -#[allow(clippy::module_name_repetitions)] -pub trait ConfigSource { - fn load(&self, config: &mut Config) -> Result<(), ConfigError>; -} - -#[allow(clippy::module_name_repetitions)] -pub struct ConfigBuilder { - sources: Vec>>, - config: Config, -} - -impl Default for ConfigBuilder { - fn default() -> Self { - Self { - sources: Vec::new(), - config: Config::default(), - } - } -} - -#[allow(clippy::module_name_repetitions)] -impl ConfigBuilder { - #[must_use] - pub fn add_source(mut self, source: Box>) -> Self { - self.sources.push(source); - self - } - - pub fn build(&mut self) -> Config { - let mut failed_sources = 0; - for source in &self.sources { - match source.load(&mut self.config) { - Ok(()) => (), - Err(e) => { - error!("Failed to load config: {:?}", e); - failed_sources += 1; - } - } - } - - if !self.sources.is_empty() && failed_sources == self.sources.len() { - debug!("All sources failed to load config, using default config."); - } - - if self.config.site.is_empty() { - self.config.site = "datadoghq.com".to_string(); - } - - // If `proxy_https` is not set, set it from `HTTPS_PROXY` environment variable - // if it exists - if let Ok(https_proxy) = std::env::var("HTTPS_PROXY") - && self.config.proxy_https.is_none() - { - self.config.proxy_https = Some(https_proxy); - } - - // If `proxy_https` is set, check if the site is in `NO_PROXY` environment variable - // or in the `proxy_no_proxy` config field. - if self.config.proxy_https.is_some() { - let site_in_no_proxy = std::env::var("NO_PROXY") - .is_ok_and(|no_proxy| no_proxy.contains(&self.config.site)) - || self - .config - .proxy_no_proxy - .iter() - .any(|no_proxy| no_proxy.contains(&self.config.site)); - if site_in_no_proxy { - self.config.proxy_https = None; - } - } - - // If extraction is not set, set it to the same as the propagation style - if self.config.trace_propagation_style_extract.is_empty() { - self.config - .trace_propagation_style_extract - .clone_from(&self.config.trace_propagation_style); - } - - // If Logs URL is not set, set it to the default - if self.config.logs_config_logs_dd_url.trim().is_empty() { - self.config.logs_config_logs_dd_url = build_fqdn_logs(self.config.site.clone()); - } else { - self.config.logs_config_logs_dd_url = - logs_intake_url(self.config.logs_config_logs_dd_url.as_str()); - } - - // If APM URL is not set, set it to the default - if self.config.apm_dd_url.is_empty() { - self.config.apm_dd_url = trace_intake_url(self.config.site.clone().as_str()); - } else { - // If APM URL is set, add the site to the URL - self.config.apm_dd_url = trace_intake_url_prefixed(self.config.apm_dd_url.as_str()); - } - - self.config.clone() - } -} +// --------------------------------------------------------------------------- +// Config — the resolved configuration struct +// --------------------------------------------------------------------------- #[derive(Debug, PartialEq, Clone)] #[allow(clippy::module_name_repetitions)] @@ -480,88 +192,256 @@ impl Default for Config { observability_pipelines_worker_logs_enabled: false, observability_pipelines_worker_logs_url: String::default(), - // APM - service_mapping: HashMap::new(), - apm_dd_url: String::default(), - apm_replace_tags: None, - apm_config_obfuscation_http_remove_query_string: false, - apm_config_obfuscation_http_remove_paths_with_digits: false, - apm_config_compression_level: 3, - apm_features: vec![], - apm_additional_endpoints: HashMap::new(), - apm_filter_tags_require: None, - apm_filter_tags_reject: None, - apm_filter_tags_regex_require: None, - apm_filter_tags_regex_reject: None, - trace_aws_service_representation_enabled: true, - trace_propagation_style: vec![ - TracePropagationStyle::Datadog, - TracePropagationStyle::TraceContext, - ], - trace_propagation_style_extract: vec![], - trace_propagation_extract_first: false, - trace_propagation_http_baggage_enabled: false, + // APM + service_mapping: HashMap::new(), + apm_dd_url: String::default(), + apm_replace_tags: None, + apm_config_obfuscation_http_remove_query_string: false, + apm_config_obfuscation_http_remove_paths_with_digits: false, + apm_config_compression_level: 3, + apm_features: vec![], + apm_additional_endpoints: HashMap::new(), + apm_filter_tags_require: None, + apm_filter_tags_reject: None, + apm_filter_tags_regex_require: None, + apm_filter_tags_regex_reject: None, + trace_aws_service_representation_enabled: true, + trace_propagation_style: vec![ + TracePropagationStyle::Datadog, + TracePropagationStyle::TraceContext, + ], + trace_propagation_style_extract: vec![], + trace_propagation_extract_first: false, + trace_propagation_http_baggage_enabled: false, + + // Metrics + metrics_config_compression_level: 3, + statsd_metric_namespace: None, + + // DogStatsD + // Defaults to None, which uses the OS default. + dogstatsd_so_rcvbuf: None, + // Defaults to 8192 internally. + dogstatsd_buffer_size: None, + // Defaults to 1024 internally. + dogstatsd_queue_size: None, + + // OTLP + otlp_config_traces_enabled: true, + otlp_config_traces_span_name_as_resource_name: false, + otlp_config_traces_span_name_remappings: HashMap::new(), + otlp_config_ignore_missing_datadog_fields: false, + otlp_config_receiver_protocols_http_endpoint: None, + otlp_config_receiver_protocols_grpc_endpoint: None, + otlp_config_receiver_protocols_grpc_transport: None, + otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib: None, + otlp_config_metrics_enabled: false, // TODO(duncanista): Go Agent default is to true + otlp_config_metrics_resource_attributes_as_tags: false, + otlp_config_metrics_instrumentation_scope_metadata_as_tags: false, + otlp_config_metrics_tag_cardinality: None, + otlp_config_metrics_delta_ttl: None, + otlp_config_metrics_histograms_mode: None, + otlp_config_metrics_histograms_send_count_sum_metrics: false, + otlp_config_metrics_histograms_send_aggregation_metrics: false, + otlp_config_metrics_sums_cumulative_monotonic_mode: None, + otlp_config_metrics_sums_initial_cumulativ_monotonic_value: None, + otlp_config_metrics_summaries_mode: None, + otlp_config_traces_probabilistic_sampler_sampling_percentage: None, + otlp_config_logs_enabled: false, + + ext: E::default(), + } + } +} + +// --------------------------------------------------------------------------- +// Loading — entry points for building a Config +// --------------------------------------------------------------------------- + +#[allow(clippy::module_name_repetitions)] +#[inline] +#[must_use] +pub fn get_config(config_directory: &Path) -> Config { + get_config_with_extension(config_directory) +} + +/// Load configuration with a custom extension type. +/// +/// Consumers that need additional fields should call this with their +/// extension type instead of `get_config`. +#[allow(clippy::module_name_repetitions)] +#[inline] +#[must_use] +pub fn get_config_with_extension(config_directory: &Path) -> Config { + let path: std::path::PathBuf = config_directory.join("datadog.yaml"); + ConfigBuilder::default() + .add_source(Box::new(YamlConfigSource { path })) + .add_source(Box::new(EnvConfigSource)) + .build() +} + +// --------------------------------------------------------------------------- +// ConfigExtension — trait for additional configuration fields +// --------------------------------------------------------------------------- + +/// Trait that extension configs must implement to add additional configuration +/// fields beyond what the core provides. +/// +/// Extensions allow consumers to define their own external configuration fields +/// that are deserialized from environment variables and YAML files alongside +/// core fields via dual extraction. +/// +/// # Source type requirements +/// +/// The `Source` type must use `#[serde(default)]` on the struct and graceful +/// deserializers (e.g., `deserialize_optional_bool_from_anything`) on each field +/// to ensure that a single bad value doesn't fail the entire extraction. +/// +/// # Flat fields only +/// +/// A single `Source` type is used for both environment variable and YAML +/// extraction. This works when all extension fields are top-level (flat) in +/// the YAML file, which is the common case for extension configs: +/// +/// ```yaml +/// # Works: flat fields map naturally to both DD_* env vars and YAML keys +/// enhanced_metrics: true +/// capture_lambda_payload: false +/// ``` +/// +/// If you need nested YAML structures (e.g., `lambda: { enhanced_metrics: true }`) +/// that differ from the flat env var layout, implement `merge_from` with a +/// nested source struct and handle the mapping manually instead of using +/// `merge_fields!`. +pub trait ConfigExtension: Clone + Default + std::fmt::Debug + PartialEq { + /// Intermediate type for deserializing extension fields. + /// Used for both environment variable and YAML extraction. + type Source: Default + serde::de::DeserializeOwned + Clone + std::fmt::Debug; + + /// Merge parsed source fields into self. + fn merge_from(&mut self, source: &Self::Source); +} + +/// A no-op extension for consumers that don't need extra fields. +#[derive(Clone, Default, Debug, PartialEq)] +pub struct NoExtension; + +/// A no-op source for deserialization that accepts (and ignores) any input. +#[derive(Clone, Default, Debug, Deserialize)] +pub struct NoExtensionSource; + +impl ConfigExtension for NoExtension { + type Source = NoExtensionSource; + fn merge_from(&mut self, _source: &Self::Source) {} +} + +// --------------------------------------------------------------------------- +// ConfigBuilder — orchestrates loading from multiple sources +// --------------------------------------------------------------------------- + +#[derive(Debug, PartialEq)] +#[allow(clippy::module_name_repetitions)] +pub enum ConfigError { + ParseError(String), + UnsupportedField(String), +} + +#[allow(clippy::module_name_repetitions)] +pub trait ConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError>; +} + +#[allow(clippy::module_name_repetitions)] +pub struct ConfigBuilder { + sources: Vec>>, + config: Config, +} + +impl Default for ConfigBuilder { + fn default() -> Self { + Self { + sources: Vec::new(), + config: Config::default(), + } + } +} + +#[allow(clippy::module_name_repetitions)] +impl ConfigBuilder { + #[must_use] + pub fn add_source(mut self, source: Box>) -> Self { + self.sources.push(source); + self + } + + pub fn build(&mut self) -> Config { + let mut failed_sources = 0; + for source in &self.sources { + match source.load(&mut self.config) { + Ok(()) => (), + Err(e) => { + error!("Failed to load config: {:?}", e); + failed_sources += 1; + } + } + } + + if !self.sources.is_empty() && failed_sources == self.sources.len() { + debug!("All sources failed to load config, using default config."); + } + + if self.config.site.is_empty() { + self.config.site = "datadoghq.com".to_string(); + } - // Metrics - metrics_config_compression_level: 3, - statsd_metric_namespace: None, + // If `proxy_https` is not set, set it from `HTTPS_PROXY` environment variable + // if it exists + if let Ok(https_proxy) = std::env::var("HTTPS_PROXY") + && self.config.proxy_https.is_none() + { + self.config.proxy_https = Some(https_proxy); + } - // DogStatsD - // Defaults to None, which uses the OS default. - dogstatsd_so_rcvbuf: None, - // Defaults to 8192 internally. - dogstatsd_buffer_size: None, - // Defaults to 1024 internally. - dogstatsd_queue_size: None, + // If `proxy_https` is set, check if the site is in `NO_PROXY` environment variable + // or in the `proxy_no_proxy` config field. + if self.config.proxy_https.is_some() { + let site_in_no_proxy = std::env::var("NO_PROXY") + .is_ok_and(|no_proxy| no_proxy.contains(&self.config.site)) + || self + .config + .proxy_no_proxy + .iter() + .any(|no_proxy| no_proxy.contains(&self.config.site)); + if site_in_no_proxy { + self.config.proxy_https = None; + } + } - // OTLP - otlp_config_traces_enabled: true, - otlp_config_traces_span_name_as_resource_name: false, - otlp_config_traces_span_name_remappings: HashMap::new(), - otlp_config_ignore_missing_datadog_fields: false, - otlp_config_receiver_protocols_http_endpoint: None, - otlp_config_receiver_protocols_grpc_endpoint: None, - otlp_config_receiver_protocols_grpc_transport: None, - otlp_config_receiver_protocols_grpc_max_recv_msg_size_mib: None, - otlp_config_metrics_enabled: false, // TODO(duncanista): Go Agent default is to true - otlp_config_metrics_resource_attributes_as_tags: false, - otlp_config_metrics_instrumentation_scope_metadata_as_tags: false, - otlp_config_metrics_tag_cardinality: None, - otlp_config_metrics_delta_ttl: None, - otlp_config_metrics_histograms_mode: None, - otlp_config_metrics_histograms_send_count_sum_metrics: false, - otlp_config_metrics_histograms_send_aggregation_metrics: false, - otlp_config_metrics_sums_cumulative_monotonic_mode: None, - otlp_config_metrics_sums_initial_cumulativ_monotonic_value: None, - otlp_config_metrics_summaries_mode: None, - otlp_config_traces_probabilistic_sampler_sampling_percentage: None, - otlp_config_logs_enabled: false, + // If extraction is not set, set it to the same as the propagation style + if self.config.trace_propagation_style_extract.is_empty() { + self.config + .trace_propagation_style_extract + .clone_from(&self.config.trace_propagation_style); + } - ext: E::default(), + // If Logs URL is not set, set it to the default + if self.config.logs_config_logs_dd_url.trim().is_empty() { + self.config.logs_config_logs_dd_url = build_fqdn_logs(self.config.site.clone()); + } else { + self.config.logs_config_logs_dd_url = + logs_intake_url(self.config.logs_config_logs_dd_url.as_str()); } - } -} -#[allow(clippy::module_name_repetitions)] -#[inline] -#[must_use] -pub fn get_config(config_directory: &Path) -> Config { - get_config_with_extension(config_directory) -} + // If APM URL is not set, set it to the default + if self.config.apm_dd_url.is_empty() { + self.config.apm_dd_url = trace_intake_url(self.config.site.clone().as_str()); + } else { + // If APM URL is set, add the site to the URL + self.config.apm_dd_url = trace_intake_url_prefixed(self.config.apm_dd_url.as_str()); + } -/// Load configuration with a custom extension type. -/// -/// Consumers that need agent-specific fields (e.g., Lambda, Cloud Run) should -/// call this with their extension type instead of `get_config`. -#[allow(clippy::module_name_repetitions)] -#[inline] -#[must_use] -pub fn get_config_with_extension(config_directory: &Path) -> Config { - let path: std::path::PathBuf = config_directory.join("datadog.yaml"); - ConfigBuilder::default() - .add_source(Box::new(YamlConfigSource { path })) - .add_source(Box::new(EnvConfigSource)) - .build() + self.config.clone() + } } #[inline] @@ -583,6 +463,145 @@ fn logs_intake_url(url: &str) -> String { format!("https://{url}") } +// --------------------------------------------------------------------------- +// Merge macros — used by sources and extension implementations +// --------------------------------------------------------------------------- + +/// Helper macro to merge Option fields to String fields +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_string { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if let Some(value) = &$source.$source_field { + $config.$config_field.clone_from(value); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if let Some(value) = &$source.$field { + $config.$field.clone_from(value); + } + }; +} + +/// Helper macro to merge Option fields where T implements Clone +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_option { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if $source.$source_field.is_some() { + $config.$config_field.clone_from(&$source.$source_field); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if $source.$field.is_some() { + $config.$field.clone_from(&$source.$field); + } + }; +} + +/// Helper macro to merge Option fields to T fields when Option is Some +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_option_to_value { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if let Some(value) = &$source.$source_field { + $config.$config_field = value.clone(); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if let Some(value) = &$source.$field { + $config.$field = value.clone(); + } + }; +} + +/// Helper macro to merge `Vec` fields when `Vec` is not empty +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_vec { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if !$source.$source_field.is_empty() { + $config.$config_field.clone_from(&$source.$source_field); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if !$source.$field.is_empty() { + $config.$field.clone_from(&$source.$field); + } + }; +} + +/// Helper macro to merge `HashMap` fields when `HashMap` is not empty +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_hashmap { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if !$source.$source_field.is_empty() { + $config.$config_field.clone_from(&$source.$source_field); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if !$source.$field.is_empty() { + $config.$field.clone_from(&$source.$field); + } + }; +} + +/// Batch-merge extension fields from a source struct. +/// +/// Groups fields by merge strategy so you don't have to write individual +/// `merge_string!` / `merge_option_to_value!` / `merge_option!` calls. +/// +/// ```ignore +/// merge_fields!(self, source, +/// string: [api_key_secret_arn, kms_api_key], +/// value: [enhanced_metrics, capture_lambda_payload], +/// option: [span_dedup_timeout, appsec_rules], +/// ); +/// ``` +#[macro_export] +macro_rules! merge_fields { + // Internal rules dispatched by keyword + (@string $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_string!($config, $source, $field); )* + }; + (@value $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_option_to_value!($config, $source, $field); )* + }; + (@option $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_option!($config, $source, $field); )* + }; + // Public entry point: accepts any combination of groups in any order + ($config:expr, $source:expr, $($kind:ident: [$($field:ident),* $(,)?]),* $(,)?) => { + $( $crate::merge_fields!(@$kind $config, $source, [$($field),*]); )* + }; +} + #[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics #[cfg(test)] pub mod tests {