diff --git a/crates/datadog-agent-config/Cargo.toml b/crates/datadog-agent-config/Cargo.toml index 222d726..b9477ac 100644 --- a/crates/datadog-agent-config/Cargo.toml +++ b/crates/datadog-agent-config/Cargo.toml @@ -4,9 +4,6 @@ version = "0.1.0" edition.workspace = true license.workspace = true -[lib] -path = "mod.rs" - [dependencies] figment = { version = "0.10", default-features = false, features = ["yaml", "env"] } libdd-trace-obfuscation = { git = "https://github.com/DataDog/libdatadog", rev = "8c88979985154d6d97c0fc2ca9039682981eacad" } diff --git a/crates/datadog-agent-config/README.md b/crates/datadog-agent-config/README.md new file mode 100644 index 0000000..58f3962 --- /dev/null +++ b/crates/datadog-agent-config/README.md @@ -0,0 +1,110 @@ +# datadog-agent-config + +Shared configuration crate for Datadog serverless agents. Provides a typed `Config` struct with built-in loading from environment variables (`DD_*`) and YAML files (`datadog.yaml`), with environment variables taking precedence. + +## Core features + +- **Typed config struct** with fields for site, API key, proxy, logs, APM, metrics, DogStatsD, OTLP, and trace propagation +- **Two built-in sources**: `EnvConfigSource` (reads `DD_*` / `DATADOG_*` env vars) and `YamlConfigSource` (reads `datadog.yaml`) +- **Graceful deserialization**: every field uses forgiving deserializers that fall back to defaults on bad input, so one misconfigured value never crashes the whole config +- **Extensible via `ConfigExtension`**: consumers can define additional configuration fields without modifying this crate + +## Quick start + +```rust +use std::path::Path; +use datadog_agent_config::get_config; + +let config = get_config(Path::new("/var/task")); +println!("site: {}", config.site); +println!("api_key: {}", config.api_key); +``` + +## Extensible configuration + +Consumers that need additional fields (e.g., Lambda-specific settings) implement the `ConfigExtension` trait instead of forking or copy-pasting the crate. + +### 1. Define the extension and its source + +```rust +use datadog_agent_config::{ + ConfigExtension, merge_fields, + deserialize_optional_string, deserialize_optional_bool_from_anything, +}; +use serde::Deserialize; + +#[derive(Debug, PartialEq, Clone)] +pub struct MyExtension { + pub custom_flag: bool, + pub custom_name: String, +} + +impl Default for MyExtension { + fn default() -> Self { + Self { custom_flag: false, custom_name: String::new() } + } +} + +/// Source struct for deserialization. Must use #[serde(default)] and +/// graceful deserializers so one bad field doesn't fail the whole extraction. +#[derive(Debug, Clone, Default, Deserialize)] +#[serde(default)] +pub struct MySource { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + pub custom_flag: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + pub custom_name: Option, +} + +impl ConfigExtension for MyExtension { + type Source = MySource; + + fn merge_from(&mut self, source: &MySource) { + merge_fields!(self, source, + string: [custom_name], + value: [custom_flag], + ); + } +} +``` + +### 2. Load config with the extension + +```rust +use std::path::Path; +use datadog_agent_config::{Config, get_config_with_extension}; + +type MyConfig = Config; + +let config: MyConfig = get_config_with_extension(Path::new("/var/task")); + +// Core fields +println!("site: {}", config.site); + +// Extension fields +println!("custom_flag: {}", config.ext.custom_flag); +println!("custom_name: {}", config.ext.custom_name); +``` + +Extension fields are populated from both `DD_*` environment variables and `datadog.yaml` using dual extraction: the core fields and extension fields are extracted independently from the same figment instance, so they don't interfere with each other. + +### Flat fields only + +The single `Source` type is used for both env var and YAML extraction. This works when extension fields are top-level (flat) in the YAML file, which is the common case. If you need nested YAML structures that differ from the flat env var layout, implement `merge_from` with a nested source struct and handle the mapping manually. + +### merge_fields! macro + +The `merge_fields!` macro reduces boilerplate in `merge_from` by batching fields by merge strategy: + +- `string`: merges `Option` into `String` (sets value if `Some`) +- `value`: merges `Option` into `T` (sets value if `Some`) +- `option`: merges `Option` into `Option` (overwrites if `Some`) + +Custom merge logic (e.g., OR-ing two boolean fields together) goes after the macro call in the same method. + +## Config loading precedence + +1. `Config::default()` (hardcoded defaults) +2. `datadog.yaml` values (lower priority) +3. `DD_*` environment variables (highest priority) +4. Post-processing defaults (site, proxy, logs/APM URL construction) diff --git a/crates/datadog-agent-config/additional_endpoints.rs b/crates/datadog-agent-config/src/deserializers/additional_endpoints.rs similarity index 100% rename from crates/datadog-agent-config/additional_endpoints.rs rename to crates/datadog-agent-config/src/deserializers/additional_endpoints.rs diff --git a/crates/datadog-agent-config/apm_replace_rule.rs b/crates/datadog-agent-config/src/deserializers/apm_replace_rule.rs similarity index 100% rename from crates/datadog-agent-config/apm_replace_rule.rs rename to crates/datadog-agent-config/src/deserializers/apm_replace_rule.rs diff --git a/crates/datadog-agent-config/flush_strategy.rs b/crates/datadog-agent-config/src/deserializers/flush_strategy.rs similarity index 100% rename from crates/datadog-agent-config/flush_strategy.rs rename to crates/datadog-agent-config/src/deserializers/flush_strategy.rs diff --git a/crates/datadog-agent-config/src/deserializers/helpers.rs b/crates/datadog-agent-config/src/deserializers/helpers.rs new file mode 100644 index 0000000..058c0ce --- /dev/null +++ b/crates/datadog-agent-config/src/deserializers/helpers.rs @@ -0,0 +1,372 @@ +use serde::{Deserialize, Deserializer}; +use serde_aux::prelude::deserialize_bool_from_anything; +use serde_json::Value; + +use std::collections::HashMap; +use std::fmt; +use std::time::Duration; +use tracing::warn; + +use crate::TracePropagationStyle; + +pub fn deserialize_optional_string<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + match Value::deserialize(deserializer)? { + Value::String(s) => Ok(Some(s)), + other => { + warn!( + "Failed to parse value, expected a string, got: {}, ignoring", + other + ); + Ok(None) + } + } +} + +pub fn deserialize_string_or_int<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let value = Value::deserialize(deserializer)?; + match value { + Value::String(s) => { + if s.trim().is_empty() { + Ok(None) + } else { + Ok(Some(s)) + } + } + Value::Number(n) => Ok(Some(n.to_string())), + _ => { + warn!("Failed to parse value, expected a string or an integer, ignoring"); + Ok(None) + } + } +} + +pub fn deserialize_optional_bool_from_anything<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + // First try to deserialize as Option<_> to handle null/missing values + let opt: Option = Option::deserialize(deserializer)?; + + match opt { + None => Ok(None), + Some(value) => match deserialize_bool_from_anything(value) { + Ok(bool_result) => Ok(Some(bool_result)), + Err(e) => { + warn!("Failed to parse bool value: {}, ignoring", e); + Ok(None) + } + }, + } +} + +/// Parse a single "key:value" string into a (key, value) tuple +/// Returns None if the string is invalid (e.g., missing colon, empty key/value) +fn parse_key_value_tag(tag: &str) -> Option<(String, String)> { + let parts: Vec<&str> = tag.splitn(2, ':').collect(); + if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() { + Some((parts[0].to_string(), parts[1].to_string())) + } else { + warn!( + "Failed to parse tag '{}', expected format 'key:value', ignoring", + tag + ); + None + } +} + +pub fn deserialize_key_value_pairs<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + struct KeyValueVisitor; + + impl serde::de::Visitor<'_> for KeyValueVisitor { + type Value = HashMap; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("a string in format 'key1:value1,key2:value2' or 'key1:value1'") + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + let mut map = HashMap::new(); + for tag in value.split(&[',', ' ']) { + if tag.is_empty() { + continue; + } + if let Some((key, val)) = parse_key_value_tag(tag) { + map.insert(key, val); + } + } + + Ok(map) + } + + fn visit_u64(self, value: u64) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_i64(self, value: i64) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_f64(self, value: f64) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", + value + ); + Ok(HashMap::new()) + } + + fn visit_bool(self, value: bool) -> Result + where + E: serde::de::Error, + { + warn!( + "Failed to parse tags: expected string in format 'key:value', got boolean {}, ignoring", + value + ); + Ok(HashMap::new()) + } + } + + deserializer.deserialize_any(KeyValueVisitor) +} + +pub fn deserialize_array_from_comma_separated_string<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let s: String = String::deserialize(deserializer)?; + Ok(s.split(',') + .map(|feature| feature.trim().to_string()) + .filter(|feature| !feature.is_empty()) + .collect()) +} + +pub fn deserialize_key_value_pair_array_to_hashmap<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + let array: Vec = match Vec::deserialize(deserializer) { + Ok(v) => v, + Err(e) => { + warn!("Failed to deserialize tags array: {e}, ignoring"); + return Ok(HashMap::new()); + } + }; + let mut map = HashMap::new(); + for s in array { + if let Some((key, val)) = parse_key_value_tag(&s) { + map.insert(key, val); + } + } + Ok(map) +} + +/// Deserialize APM filter tags from space-separated "key:value" pairs, also support key-only tags +pub fn deserialize_apm_filter_tags<'de, D>(deserializer: D) -> Result>, D::Error> +where + D: Deserializer<'de>, +{ + let opt: Option = Option::deserialize(deserializer)?; + + match opt { + None => Ok(None), + Some(s) if s.trim().is_empty() => Ok(None), + Some(s) => { + let tags: Vec = s + .split_whitespace() + .filter_map(|pair| { + let parts: Vec<&str> = pair.splitn(2, ':').collect(); + if parts.len() == 2 { + let key = parts[0].trim(); + let value = parts[1].trim(); + if key.is_empty() { + None + } else if value.is_empty() { + Some(key.to_string()) + } else { + Some(format!("{key}:{value}")) + } + } else if parts.len() == 1 { + let key = parts[0].trim(); + if key.is_empty() { + None + } else { + Some(key.to_string()) + } + } else { + None + } + }) + .collect(); + + if tags.is_empty() { + Ok(None) + } else { + Ok(Some(tags)) + } + } + } +} + +pub fn deserialize_option_lossless<'de, D, T>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, + T: Deserialize<'de>, +{ + match Option::::deserialize(deserializer) { + Ok(value) => Ok(value), + Err(e) => { + warn!("Failed to deserialize optional value: {}, ignoring", e); + Ok(None) + } + } +} + +/// Gracefully deserialize any field, falling back to `T::default()` on error. +/// +/// This ensures that a single field with the wrong type never fails the entire +/// struct extraction. Works for any `T` that implements `Deserialize + Default`: +/// - `Option` defaults to `None` +/// - `Vec` defaults to `[]` +/// - `HashMap` defaults to `{}` +/// - Structs with `#[derive(Default)]` use their default +pub fn deserialize_with_default<'de, D, T>(deserializer: D) -> Result +where + D: Deserializer<'de>, + T: Deserialize<'de> + Default, +{ + match T::deserialize(deserializer) { + Ok(value) => Ok(value), + Err(e) => { + warn!("Failed to deserialize field: {}, using default", e); + Ok(T::default()) + } + } +} + +pub fn deserialize_optional_duration_from_microseconds<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + match Option::::deserialize(deserializer) { + Ok(opt) => Ok(opt.map(Duration::from_micros)), + Err(e) => { + warn!("Failed to deserialize duration (microseconds): {e}, ignoring"); + Ok(None) + } + } +} + +pub fn deserialize_optional_duration_from_seconds<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + // Deserialize into a generic Value first to avoid propagating type errors, + // then try to extract a duration from it. + match Value::deserialize(deserializer) { + Ok(Value::Number(n)) => { + if let Some(u) = n.as_u64() { + Ok(Some(Duration::from_secs(u))) + } else if let Some(i) = n.as_i64() { + if i < 0 { + warn!("Failed to parse duration: negative durations are not allowed, ignoring"); + Ok(None) + } else { + Ok(Some(Duration::from_secs(i as u64))) + } + } else if let Some(f) = n.as_f64() { + if f < 0.0 { + warn!("Failed to parse duration: negative durations are not allowed, ignoring"); + Ok(None) + } else { + Ok(Some(Duration::from_secs_f64(f))) + } + } else { + warn!("Failed to parse duration: unsupported number format, ignoring"); + Ok(None) + } + } + Ok(Value::Null) => Ok(None), + Ok(other) => { + warn!("Failed to parse duration: expected number, got {other}, ignoring"); + Ok(None) + } + Err(e) => { + warn!("Failed to deserialize duration: {e}, ignoring"); + Ok(None) + } + } +} + +// Like deserialize_optional_duration_from_seconds(), but return None if the value is 0 +pub fn deserialize_optional_duration_from_seconds_ignore_zero<'de, D: Deserializer<'de>>( + deserializer: D, +) -> Result, D::Error> { + let duration: Option = deserialize_optional_duration_from_seconds(deserializer)?; + if duration.is_some_and(|d| d.as_secs() == 0) { + return Ok(None); + } + Ok(duration) +} + +pub fn deserialize_trace_propagation_style<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + use std::str::FromStr; + let s: String = match String::deserialize(deserializer) { + Ok(s) => s, + Err(e) => { + warn!("Failed to deserialize trace propagation style: {e}, ignoring"); + return Ok(Vec::new()); + } + }; + + Ok(s.split(',') + .filter_map( + |style| match TracePropagationStyle::from_str(style.trim()) { + Ok(parsed_style) => Some(parsed_style), + Err(e) => { + warn!("Failed to parse trace propagation style: {e}, ignoring"); + None + } + }, + ) + .collect()) +} diff --git a/crates/datadog-agent-config/log_level.rs b/crates/datadog-agent-config/src/deserializers/log_level.rs similarity index 100% rename from crates/datadog-agent-config/log_level.rs rename to crates/datadog-agent-config/src/deserializers/log_level.rs diff --git a/crates/datadog-agent-config/logs_additional_endpoints.rs b/crates/datadog-agent-config/src/deserializers/logs_additional_endpoints.rs similarity index 100% rename from crates/datadog-agent-config/logs_additional_endpoints.rs rename to crates/datadog-agent-config/src/deserializers/logs_additional_endpoints.rs diff --git a/crates/datadog-agent-config/src/deserializers/mod.rs b/crates/datadog-agent-config/src/deserializers/mod.rs new file mode 100644 index 0000000..e88c90b --- /dev/null +++ b/crates/datadog-agent-config/src/deserializers/mod.rs @@ -0,0 +1,8 @@ +pub mod additional_endpoints; +pub mod apm_replace_rule; +pub mod flush_strategy; +pub mod helpers; +pub mod log_level; +pub mod logs_additional_endpoints; +pub mod processing_rule; +pub mod service_mapping; diff --git a/crates/datadog-agent-config/processing_rule.rs b/crates/datadog-agent-config/src/deserializers/processing_rule.rs similarity index 100% rename from crates/datadog-agent-config/processing_rule.rs rename to crates/datadog-agent-config/src/deserializers/processing_rule.rs diff --git a/crates/datadog-agent-config/service_mapping.rs b/crates/datadog-agent-config/src/deserializers/service_mapping.rs similarity index 100% rename from crates/datadog-agent-config/service_mapping.rs rename to crates/datadog-agent-config/src/deserializers/service_mapping.rs diff --git a/crates/datadog-agent-config/mod.rs b/crates/datadog-agent-config/src/lib.rs similarity index 74% rename from crates/datadog-agent-config/mod.rs rename to crates/datadog-agent-config/src/lib.rs index 6fc858a..3544ab5 100644 --- a/crates/datadog-agent-config/mod.rs +++ b/crates/datadog-agent-config/src/lib.rs @@ -1,244 +1,45 @@ -pub mod additional_endpoints; -pub mod apm_replace_rule; -pub mod env; -pub mod flush_strategy; -pub mod log_level; -pub mod logs_additional_endpoints; -pub mod processing_rule; -pub mod service_mapping; -pub mod yaml; +pub mod deserializers; +pub mod sources; + +// Re-export submodules at the crate root so existing imports like +// `crate::flush_strategy::FlushStrategy` and `crate::env::EnvConfigSource` keep working. +pub use deserializers::{ + additional_endpoints, apm_replace_rule, flush_strategy, log_level, logs_additional_endpoints, + processing_rule, service_mapping, +}; +pub use sources::{env, yaml}; pub use datadog_opentelemetry::configuration::TracePropagationStyle; +// Re-export all helper deserializers so consumers and internal modules can +// use `crate::deserialize_optional_string` etc. without reaching into submodules. +pub use deserializers::helpers::*; use libdd_trace_obfuscation::replacer::ReplaceRule; use libdd_trace_utils::config_utils::{trace_intake_url, trace_intake_url_prefixed}; -use serde::{Deserialize, Deserializer}; -use serde_aux::prelude::deserialize_bool_from_anything; -use serde_json::Value; +use serde::Deserialize; +use std::collections::HashMap; use std::path::Path; -use std::time::Duration; -use std::{collections::HashMap, fmt}; -use tracing::{debug, error, warn}; +use tracing::{debug, error}; use crate::{ apm_replace_rule::deserialize_apm_replace_rules, env::EnvConfigSource, - flush_strategy::FlushStrategy, log_level::LogLevel, logs_additional_endpoints::LogsAdditionalEndpoint, processing_rule::{ProcessingRule, deserialize_processing_rules}, yaml::YamlConfigSource, }; -/// Helper macro to merge Option fields to String fields -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_string { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if let Some(value) = &$source.$source_field { - $config.$config_field.clone_from(value); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if let Some(value) = &$source.$field { - $config.$field.clone_from(value); - } - }; -} - -/// Helper macro to merge Option fields where T implements Clone -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_option { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if $source.$source_field.is_some() { - $config.$config_field.clone_from(&$source.$source_field); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if $source.$field.is_some() { - $config.$field.clone_from(&$source.$field); - } - }; -} - -/// Helper macro to merge Option fields to T fields when Option is Some -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_option_to_value { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if let Some(value) = &$source.$source_field { - $config.$config_field = value.clone(); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if let Some(value) = &$source.$field { - $config.$field = value.clone(); - } - }; -} - -/// Helper macro to merge `Vec` fields when `Vec` is not empty -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_vec { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if !$source.$source_field.is_empty() { - $config.$config_field.clone_from(&$source.$source_field); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if !$source.$field.is_empty() { - $config.$field.clone_from(&$source.$field); - } - }; -} - -// nit: these will replace one map with the other, not merge the maps togehter, right? -/// Helper macro to merge `HashMap` fields when `HashMap` is not empty -/// -/// Providing one field argument will merge the value from the source config field into the config -/// field. -/// -/// Providing two field arguments will merge the value from the source config field into the config -/// field if the value is not empty. -#[macro_export] -macro_rules! merge_hashmap { - ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { - if !$source.$source_field.is_empty() { - $config.$config_field.clone_from(&$source.$source_field); - } - }; - ($config:expr, $source:expr, $field:ident) => { - if !$source.$field.is_empty() { - $config.$field.clone_from(&$source.$field); - } - }; -} - -#[derive(Debug, PartialEq)] -#[allow(clippy::module_name_repetitions)] -pub enum ConfigError { - ParseError(String), - UnsupportedField(String), -} - -#[allow(clippy::module_name_repetitions)] -pub trait ConfigSource { - fn load(&self, config: &mut Config) -> Result<(), ConfigError>; -} - -#[derive(Default)] -#[allow(clippy::module_name_repetitions)] -pub struct ConfigBuilder { - sources: Vec>, - config: Config, -} - -#[allow(clippy::module_name_repetitions)] -impl ConfigBuilder { - #[must_use] - pub fn add_source(mut self, source: Box) -> Self { - self.sources.push(source); - self - } - - pub fn build(&mut self) -> Config { - let mut failed_sources = 0; - for source in &self.sources { - match source.load(&mut self.config) { - Ok(()) => (), - Err(e) => { - error!("Failed to load config: {:?}", e); - failed_sources += 1; - } - } - } - - if !self.sources.is_empty() && failed_sources == self.sources.len() { - debug!("All sources failed to load config, using default config."); - } - - if self.config.site.is_empty() { - self.config.site = "datadoghq.com".to_string(); - } - - // If `proxy_https` is not set, set it from `HTTPS_PROXY` environment variable - // if it exists - if let Ok(https_proxy) = std::env::var("HTTPS_PROXY") - && self.config.proxy_https.is_none() - { - self.config.proxy_https = Some(https_proxy); - } - - // If `proxy_https` is set, check if the site is in `NO_PROXY` environment variable - // or in the `proxy_no_proxy` config field. - if self.config.proxy_https.is_some() { - let site_in_no_proxy = std::env::var("NO_PROXY") - .is_ok_and(|no_proxy| no_proxy.contains(&self.config.site)) - || self - .config - .proxy_no_proxy - .iter() - .any(|no_proxy| no_proxy.contains(&self.config.site)); - if site_in_no_proxy { - self.config.proxy_https = None; - } - } - - // If extraction is not set, set it to the same as the propagation style - if self.config.trace_propagation_style_extract.is_empty() { - self.config - .trace_propagation_style_extract - .clone_from(&self.config.trace_propagation_style); - } - - // If Logs URL is not set, set it to the default - if self.config.logs_config_logs_dd_url.trim().is_empty() { - self.config.logs_config_logs_dd_url = build_fqdn_logs(self.config.site.clone()); - } else { - self.config.logs_config_logs_dd_url = - logs_intake_url(self.config.logs_config_logs_dd_url.as_str()); - } - - // If APM URL is not set, set it to the default - if self.config.apm_dd_url.is_empty() { - self.config.apm_dd_url = trace_intake_url(self.config.site.clone().as_str()); - } else { - // If APM URL is set, add the site to the URL - self.config.apm_dd_url = trace_intake_url_prefixed(self.config.apm_dd_url.as_str()); - } - - self.config.clone() - } -} +// --------------------------------------------------------------------------- +// Config — the resolved configuration struct +// --------------------------------------------------------------------------- #[derive(Debug, PartialEq, Clone)] #[allow(clippy::module_name_repetitions)] #[allow(clippy::struct_excessive_bools)] -pub struct Config { +pub struct Config { pub site: String, pub api_key: String, pub log_level: LogLevel, @@ -349,28 +150,12 @@ pub struct Config { // - Logs pub otlp_config_logs_enabled: bool, - // AWS Lambda - pub api_key_secret_arn: String, - pub kms_api_key: String, - pub api_key_ssm_arn: String, - pub serverless_logs_enabled: bool, - pub serverless_flush_strategy: FlushStrategy, - pub enhanced_metrics: bool, - pub lambda_proc_enhanced_metrics: bool, - pub capture_lambda_payload: bool, - pub capture_lambda_payload_max_depth: u32, - pub compute_trace_stats_on_extension: bool, - pub span_dedup_timeout: Option, - pub api_key_secret_reload_interval: Option, - - pub serverless_appsec_enabled: bool, - pub appsec_rules: Option, - pub appsec_waf_timeout: Duration, - pub api_security_enabled: bool, - pub api_security_sample_delay: Duration, + /// Agent-specific extension fields defined by the consumer. + /// Use `NoExtension` (the default) when no extra fields are needed. + pub ext: E, } -impl Default for Config { +impl Default for Config { fn default() -> Self { Self { site: String::default(), @@ -464,33 +249,30 @@ impl Default for Config { otlp_config_traces_probabilistic_sampler_sampling_percentage: None, otlp_config_logs_enabled: false, - // AWS Lambda - api_key_secret_arn: String::default(), - kms_api_key: String::default(), - api_key_ssm_arn: String::default(), - serverless_logs_enabled: true, - serverless_flush_strategy: FlushStrategy::Default, - enhanced_metrics: true, - lambda_proc_enhanced_metrics: true, - capture_lambda_payload: false, - capture_lambda_payload_max_depth: 10, - compute_trace_stats_on_extension: false, - span_dedup_timeout: None, - api_key_secret_reload_interval: None, - - serverless_appsec_enabled: false, - appsec_rules: None, - appsec_waf_timeout: Duration::from_millis(5), - api_security_enabled: true, - api_security_sample_delay: Duration::from_secs(30), + ext: E::default(), } } } +// --------------------------------------------------------------------------- +// Loading — entry points for building a Config +// --------------------------------------------------------------------------- + #[allow(clippy::module_name_repetitions)] #[inline] #[must_use] pub fn get_config(config_directory: &Path) -> Config { + get_config_with_extension(config_directory) +} + +/// Load configuration with a custom extension type. +/// +/// Consumers that need additional fields should call this with their +/// extension type instead of `get_config`. +#[allow(clippy::module_name_repetitions)] +#[inline] +#[must_use] +pub fn get_config_with_extension(config_directory: &Path) -> Config { let path: std::path::PathBuf = config_directory.join("datadog.yaml"); ConfigBuilder::default() .add_source(Box::new(YamlConfigSource { path })) @@ -498,385 +280,326 @@ pub fn get_config(config_directory: &Path) -> Config { .build() } -#[inline] -#[must_use] -fn build_fqdn_logs(site: String) -> String { - format!("https://http-intake.logs.{site}") +// --------------------------------------------------------------------------- +// ConfigExtension — trait for additional configuration fields +// --------------------------------------------------------------------------- + +/// Trait that extension configs must implement to add additional configuration +/// fields beyond what the core provides. +/// +/// Extensions allow consumers to define their own external configuration fields +/// that are deserialized from environment variables and YAML files alongside +/// core fields via dual extraction. +/// +/// # Source type requirements +/// +/// The `Source` type must use `#[serde(default)]` on the struct and graceful +/// deserializers (e.g., `deserialize_optional_bool_from_anything`) on each field +/// to ensure that a single bad value doesn't fail the entire extraction. +/// +/// # Flat fields only +/// +/// A single `Source` type is used for both environment variable and YAML +/// extraction. This works when all extension fields are top-level (flat) in +/// the YAML file, which is the common case for extension configs: +/// +/// ```yaml +/// # Works: flat fields map naturally to both DD_* env vars and YAML keys +/// enhanced_metrics: true +/// capture_lambda_payload: false +/// ``` +/// +/// If you need nested YAML structures (e.g., `lambda: { enhanced_metrics: true }`) +/// that differ from the flat env var layout, implement `merge_from` with a +/// nested source struct and handle the mapping manually instead of using +/// `merge_fields!`. +pub trait ConfigExtension: Clone + Default + std::fmt::Debug + PartialEq { + /// Intermediate type for deserializing extension fields. + /// Used for both environment variable and YAML extraction. + type Source: Default + serde::de::DeserializeOwned + Clone + std::fmt::Debug; + + /// Merge parsed source fields into self. + fn merge_from(&mut self, source: &Self::Source); } -#[inline] -#[must_use] -fn logs_intake_url(url: &str) -> String { - let url = url.trim(); - if url.is_empty() { - return url.to_string(); - } - if url.starts_with("https://") || url.starts_with("http://") { - return url.to_string(); - } - format!("https://{url}") +/// A no-op extension for consumers that don't need extra fields. +#[derive(Clone, Default, Debug, PartialEq)] +pub struct NoExtension; + +/// A no-op source for deserialization that accepts (and ignores) any input. +#[derive(Clone, Default, Debug, Deserialize)] +pub struct NoExtensionSource; + +impl ConfigExtension for NoExtension { + type Source = NoExtensionSource; + fn merge_from(&mut self, _source: &Self::Source) {} } -pub fn deserialize_optional_string<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - match Value::deserialize(deserializer)? { - Value::String(s) => Ok(Some(s)), - other => { - warn!( - "Failed to parse value, expected a string, got: {}, ignoring", - other - ); - Ok(None) - } - } +// --------------------------------------------------------------------------- +// ConfigBuilder — orchestrates loading from multiple sources +// --------------------------------------------------------------------------- + +#[derive(Debug, PartialEq)] +#[allow(clippy::module_name_repetitions)] +pub enum ConfigError { + ParseError(String), + UnsupportedField(String), } -pub fn deserialize_string_or_int<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let value = Value::deserialize(deserializer)?; - match value { - Value::String(s) => { - if s.trim().is_empty() { - Ok(None) - } else { - Ok(Some(s)) - } - } - Value::Number(n) => Ok(Some(n.to_string())), - _ => { - warn!("Failed to parse value, expected a string or an integer, ignoring"); - Ok(None) - } - } +#[allow(clippy::module_name_repetitions)] +pub trait ConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError>; } -pub fn deserialize_optional_bool_from_anything<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - // First try to deserialize as Option<_> to handle null/missing values - let opt: Option = Option::deserialize(deserializer)?; - - match opt { - None => Ok(None), - Some(value) => match deserialize_bool_from_anything(value) { - Ok(bool_result) => Ok(Some(bool_result)), - Err(e) => { - warn!("Failed to parse bool value: {}, ignoring", e); - Ok(None) - } - }, - } +#[allow(clippy::module_name_repetitions)] +pub struct ConfigBuilder { + sources: Vec>>, + config: Config, } -/// Parse a single "key:value" string into a (key, value) tuple -/// Returns None if the string is invalid (e.g., missing colon, empty key/value) -fn parse_key_value_tag(tag: &str) -> Option<(String, String)> { - let parts: Vec<&str> = tag.splitn(2, ':').collect(); - if parts.len() == 2 && !parts[0].is_empty() && !parts[1].is_empty() { - Some((parts[0].to_string(), parts[1].to_string())) - } else { - warn!( - "Failed to parse tag '{}', expected format 'key:value', ignoring", - tag - ); - None +impl Default for ConfigBuilder { + fn default() -> Self { + Self { + sources: Vec::new(), + config: Config::default(), + } } } -pub fn deserialize_key_value_pairs<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - struct KeyValueVisitor; - - impl serde::de::Visitor<'_> for KeyValueVisitor { - type Value = HashMap; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a string in format 'key1:value1,key2:value2' or 'key1:value1'") - } +#[allow(clippy::module_name_repetitions)] +impl ConfigBuilder { + #[must_use] + pub fn add_source(mut self, source: Box>) -> Self { + self.sources.push(source); + self + } - fn visit_str(self, value: &str) -> Result - where - E: serde::de::Error, - { - let mut map = HashMap::new(); - for tag in value.split(&[',', ' ']) { - if tag.is_empty() { - continue; - } - if let Some((key, val)) = parse_key_value_tag(tag) { - map.insert(key, val); + pub fn build(&mut self) -> Config { + let mut failed_sources = 0; + for source in &self.sources { + match source.load(&mut self.config) { + Ok(()) => (), + Err(e) => { + error!("Failed to load config: {:?}", e); + failed_sources += 1; } } + } - Ok(map) + if !self.sources.is_empty() && failed_sources == self.sources.len() { + debug!("All sources failed to load config, using default config."); } - fn visit_u64(self, value: u64) -> Result - where - E: serde::de::Error, - { - warn!( - "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", - value - ); - Ok(HashMap::new()) + if self.config.site.is_empty() { + self.config.site = "datadoghq.com".to_string(); } - fn visit_i64(self, value: i64) -> Result - where - E: serde::de::Error, + // If `proxy_https` is not set, set it from `HTTPS_PROXY` environment variable + // if it exists + if let Ok(https_proxy) = std::env::var("HTTPS_PROXY") + && self.config.proxy_https.is_none() { - warn!( - "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", - value - ); - Ok(HashMap::new()) + self.config.proxy_https = Some(https_proxy); } - fn visit_f64(self, value: f64) -> Result - where - E: serde::de::Error, - { - warn!( - "Failed to parse tags: expected string in format 'key:value', got number {}, ignoring", - value - ); - Ok(HashMap::new()) + // If `proxy_https` is set, check if the site is in `NO_PROXY` environment variable + // or in the `proxy_no_proxy` config field. + if self.config.proxy_https.is_some() { + let site_in_no_proxy = std::env::var("NO_PROXY") + .is_ok_and(|no_proxy| no_proxy.contains(&self.config.site)) + || self + .config + .proxy_no_proxy + .iter() + .any(|no_proxy| no_proxy.contains(&self.config.site)); + if site_in_no_proxy { + self.config.proxy_https = None; + } + } + + // If extraction is not set, set it to the same as the propagation style + if self.config.trace_propagation_style_extract.is_empty() { + self.config + .trace_propagation_style_extract + .clone_from(&self.config.trace_propagation_style); + } + + // If Logs URL is not set, set it to the default + if self.config.logs_config_logs_dd_url.trim().is_empty() { + self.config.logs_config_logs_dd_url = build_fqdn_logs(self.config.site.clone()); + } else { + self.config.logs_config_logs_dd_url = + logs_intake_url(self.config.logs_config_logs_dd_url.as_str()); } - fn visit_bool(self, value: bool) -> Result - where - E: serde::de::Error, - { - warn!( - "Failed to parse tags: expected string in format 'key:value', got boolean {}, ignoring", - value - ); - Ok(HashMap::new()) + // If APM URL is not set, set it to the default + if self.config.apm_dd_url.is_empty() { + self.config.apm_dd_url = trace_intake_url(self.config.site.clone().as_str()); + } else { + // If APM URL is set, add the site to the URL + self.config.apm_dd_url = trace_intake_url_prefixed(self.config.apm_dd_url.as_str()); } + + self.config.clone() } +} - deserializer.deserialize_any(KeyValueVisitor) +#[inline] +#[must_use] +fn build_fqdn_logs(site: String) -> String { + format!("https://http-intake.logs.{site}") } -pub fn deserialize_array_from_comma_separated_string<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let s: String = String::deserialize(deserializer)?; - Ok(s.split(',') - .map(|feature| feature.trim().to_string()) - .filter(|feature| !feature.is_empty()) - .collect()) +#[inline] +#[must_use] +fn logs_intake_url(url: &str) -> String { + let url = url.trim(); + if url.is_empty() { + return url.to_string(); + } + if url.starts_with("https://") || url.starts_with("http://") { + return url.to_string(); + } + format!("https://{url}") } -pub fn deserialize_key_value_pair_array_to_hashmap<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - let array: Vec = match Vec::deserialize(deserializer) { - Ok(v) => v, - Err(e) => { - warn!("Failed to deserialize tags array: {e}, ignoring"); - return Ok(HashMap::new()); +// --------------------------------------------------------------------------- +// Merge macros — used by sources and extension implementations +// --------------------------------------------------------------------------- + +/// Helper macro to merge Option fields to String fields +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_string { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if let Some(value) = &$source.$source_field { + $config.$config_field.clone_from(value); } }; - let mut map = HashMap::new(); - for s in array { - if let Some((key, val)) = parse_key_value_tag(&s) { - map.insert(key, val); + ($config:expr, $source:expr, $field:ident) => { + if let Some(value) = &$source.$field { + $config.$field.clone_from(value); } - } - Ok(map) + }; } -/// Deserialize APM filter tags from space-separated "key:value" pairs, also support key-only tags -pub fn deserialize_apm_filter_tags<'de, D>(deserializer: D) -> Result>, D::Error> -where - D: Deserializer<'de>, -{ - let opt: Option = Option::deserialize(deserializer)?; - - match opt { - None => Ok(None), - Some(s) if s.trim().is_empty() => Ok(None), - Some(s) => { - let tags: Vec = s - .split_whitespace() - .filter_map(|pair| { - let parts: Vec<&str> = pair.splitn(2, ':').collect(); - if parts.len() == 2 { - let key = parts[0].trim(); - let value = parts[1].trim(); - if key.is_empty() { - None - } else if value.is_empty() { - Some(key.to_string()) - } else { - Some(format!("{key}:{value}")) - } - } else if parts.len() == 1 { - let key = parts[0].trim(); - if key.is_empty() { - None - } else { - Some(key.to_string()) - } - } else { - None - } - }) - .collect(); - - if tags.is_empty() { - Ok(None) - } else { - Ok(Some(tags)) - } +/// Helper macro to merge Option fields where T implements Clone +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_option { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if $source.$source_field.is_some() { + $config.$config_field.clone_from(&$source.$source_field); } - } -} - -pub fn deserialize_option_lossless<'de, D, T>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, - T: Deserialize<'de>, -{ - match Option::::deserialize(deserializer) { - Ok(value) => Ok(value), - Err(e) => { - warn!("Failed to deserialize optional value: {}, ignoring", e); - Ok(None) + }; + ($config:expr, $source:expr, $field:ident) => { + if $source.$field.is_some() { + $config.$field.clone_from(&$source.$field); } - } + }; } -/// Gracefully deserialize any field, falling back to `T::default()` on error. +/// Helper macro to merge Option fields to T fields when Option is Some +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. /// -/// This ensures that a single field with the wrong type never fails the entire -/// struct extraction. Works for any `T` that implements `Deserialize + Default`: -/// - `Option` defaults to `None` -/// - `Vec` defaults to `[]` -/// - `HashMap` defaults to `{}` -/// - Structs with `#[derive(Default)]` use their default -pub fn deserialize_with_default<'de, D, T>(deserializer: D) -> Result -where - D: Deserializer<'de>, - T: Deserialize<'de> + Default, -{ - match T::deserialize(deserializer) { - Ok(value) => Ok(value), - Err(e) => { - warn!("Failed to deserialize field: {}, using default", e); - Ok(T::default()) +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_option_to_value { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if let Some(value) = &$source.$source_field { + $config.$config_field = value.clone(); } - } -} - -pub fn deserialize_optional_duration_from_microseconds<'de, D: Deserializer<'de>>( - deserializer: D, -) -> Result, D::Error> { - match Option::::deserialize(deserializer) { - Ok(opt) => Ok(opt.map(Duration::from_micros)), - Err(e) => { - warn!("Failed to deserialize duration (microseconds): {e}, ignoring"); - Ok(None) + }; + ($config:expr, $source:expr, $field:ident) => { + if let Some(value) = &$source.$field { + $config.$field = value.clone(); } - } + }; } -pub fn deserialize_optional_duration_from_seconds<'de, D: Deserializer<'de>>( - deserializer: D, -) -> Result, D::Error> { - // Deserialize into a generic Value first to avoid propagating type errors, - // then try to extract a duration from it. - match Value::deserialize(deserializer) { - Ok(Value::Number(n)) => { - if let Some(u) = n.as_u64() { - Ok(Some(Duration::from_secs(u))) - } else if let Some(i) = n.as_i64() { - if i < 0 { - warn!("Failed to parse duration: negative durations are not allowed, ignoring"); - Ok(None) - } else { - Ok(Some(Duration::from_secs(i as u64))) - } - } else if let Some(f) = n.as_f64() { - if f < 0.0 { - warn!("Failed to parse duration: negative durations are not allowed, ignoring"); - Ok(None) - } else { - Ok(Some(Duration::from_secs_f64(f))) - } - } else { - warn!("Failed to parse duration: unsupported number format, ignoring"); - Ok(None) - } - } - Ok(Value::Null) => Ok(None), - Ok(other) => { - warn!("Failed to parse duration: expected number, got {other}, ignoring"); - Ok(None) +/// Helper macro to merge `Vec` fields when `Vec` is not empty +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_vec { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if !$source.$source_field.is_empty() { + $config.$config_field.clone_from(&$source.$source_field); } - Err(e) => { - warn!("Failed to deserialize duration: {e}, ignoring"); - Ok(None) + }; + ($config:expr, $source:expr, $field:ident) => { + if !$source.$field.is_empty() { + $config.$field.clone_from(&$source.$field); } - } -} - -// Like deserialize_optional_duration_from_seconds(), but return None if the value is 0 -pub fn deserialize_optional_duration_from_seconds_ignore_zero<'de, D: Deserializer<'de>>( - deserializer: D, -) -> Result, D::Error> { - let duration: Option = deserialize_optional_duration_from_seconds(deserializer)?; - if duration.is_some_and(|d| d.as_secs() == 0) { - return Ok(None); - } - Ok(duration) + }; } -pub fn deserialize_trace_propagation_style<'de, D>( - deserializer: D, -) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - use std::str::FromStr; - let s: String = match String::deserialize(deserializer) { - Ok(s) => s, - Err(e) => { - warn!("Failed to deserialize trace propagation style: {e}, ignoring"); - return Ok(Vec::new()); +/// Helper macro to merge `HashMap` fields when `HashMap` is not empty +/// +/// Providing one field argument will merge the value from the source config field into the config +/// field. +/// +/// Providing two field arguments will merge the value from the source config field into the config +/// field if the value is not empty. +#[macro_export] +macro_rules! merge_hashmap { + ($config:expr, $config_field:ident, $source:expr, $source_field:ident) => { + if !$source.$source_field.is_empty() { + $config.$config_field.clone_from(&$source.$source_field); + } + }; + ($config:expr, $source:expr, $field:ident) => { + if !$source.$field.is_empty() { + $config.$field.clone_from(&$source.$field); } }; +} - Ok(s.split(',') - .filter_map( - |style| match TracePropagationStyle::from_str(style.trim()) { - Ok(parsed_style) => Some(parsed_style), - Err(e) => { - warn!("Failed to parse trace propagation style: {e}, ignoring"); - None - } - }, - ) - .collect()) +/// Batch-merge extension fields from a source struct. +/// +/// Groups fields by merge strategy so you don't have to write individual +/// `merge_string!` / `merge_option_to_value!` / `merge_option!` calls. +/// +/// ```ignore +/// merge_fields!(self, source, +/// string: [api_key_secret_arn, kms_api_key], +/// value: [enhanced_metrics, capture_lambda_payload], +/// option: [span_dedup_timeout, appsec_rules], +/// ); +/// ``` +#[macro_export] +macro_rules! merge_fields { + // Internal rules dispatched by keyword + (@string $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_string!($config, $source, $field); )* + }; + (@value $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_option_to_value!($config, $source, $field); )* + }; + (@option $config:expr, $source:expr, [$($field:ident),* $(,)?]) => { + $( $crate::merge_option!($config, $source, $field); )* + }; + // Public entry point: accepts any combination of groups in any order + ($config:expr, $source:expr, $($kind:ident: [$($field:ident),* $(,)?]),* $(,)?) => { + $( $crate::merge_fields!(@$kind $config, $source, [$($field),*]); )* + }; } #[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics @@ -886,12 +609,9 @@ pub mod tests { use super::*; - use crate::{ - TracePropagationStyle, - flush_strategy::{FlushStrategy, PeriodicStrategy}, - log_level::LogLevel, - processing_rule::ProcessingRule, - }; + use std::time::Duration; + + use crate::{TracePropagationStyle, log_level::LogLevel, processing_rule::ProcessingRule}; #[test] fn test_default_logs_intake_url() { @@ -1158,56 +878,6 @@ pub mod tests { }); } - #[test] - fn test_parse_flush_strategy_end() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "end"); - let config = get_config(Path::new("")); - assert_eq!(config.serverless_flush_strategy, FlushStrategy::End); - Ok(()) - }); - } - - #[test] - fn test_parse_flush_strategy_periodically() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "periodically,100000"); - let config = get_config(Path::new("")); - assert_eq!( - config.serverless_flush_strategy, - FlushStrategy::Periodically(PeriodicStrategy { interval: 100_000 }) - ); - Ok(()) - }); - } - - #[test] - fn test_parse_flush_strategy_invalid() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "invalid_strategy"); - let config = get_config(Path::new("")); - assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); - Ok(()) - }); - } - - #[test] - fn test_parse_flush_strategy_invalid_periodic() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env( - "DD_SERVERLESS_FLUSH_STRATEGY", - "periodically,invalid_interval", - ); - let config = get_config(Path::new("")); - assert_eq!(config.serverless_flush_strategy, FlushStrategy::Default); - Ok(()) - }); - } - #[test] fn parse_number_or_string_env_vars() { figment::Jail::expect_with(|jail| { @@ -1476,15 +1146,11 @@ pub mod tests { fn test_parse_bool_from_anything() { figment::Jail::expect_with(|jail| { jail.clear_env(); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - jail.set_env("DD_ENHANCED_METRICS", "1"); jail.set_env("DD_LOGS_CONFIG_USE_COMPRESSION", "TRUE"); - jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "0"); + jail.set_env("DD_SKIP_SSL_VALIDATION", "1"); let config = get_config(Path::new("")); - assert!(config.serverless_logs_enabled); - assert!(config.enhanced_metrics); assert!(config.logs_config_use_compression); - assert!(!config.capture_lambda_payload); + assert!(config.skip_ssl_validation); Ok(()) }); } @@ -1708,4 +1374,144 @@ pub mod tests { serde_json::from_str::(r#"{"tags": []}"#).expect("failed to parse JSON"); assert_eq!(result.tags, HashMap::new()); } + + // -- ConfigExtension tests -- + + /// A test extension with a few fields, mimicking what a consumer like Lambda would define. + #[derive(Clone, Default, Debug, PartialEq)] + struct TestExtension { + custom_flag: bool, + custom_name: String, + } + + #[derive(Clone, Default, Debug, Deserialize)] + #[serde(default)] + struct TestExtSource { + #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] + custom_flag: Option, + #[serde(deserialize_with = "deserialize_optional_string")] + custom_name: Option, + } + + impl ConfigExtension for TestExtension { + type Source = TestExtSource; + + fn merge_from(&mut self, source: &TestExtSource) { + merge_fields!(self, source, + string: [custom_name], + value: [custom_flag], + ); + } + } + + #[test] + fn test_no_extension_config_works() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "datad0g.com"); + let config = get_config(Path::new("")); + assert_eq!(config.site, "datad0g.com"); + assert_eq!(config.ext, NoExtension); + Ok(()) + }); + } + + #[test] + fn test_extension_receives_env_vars() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "datad0g.com"); + jail.set_env("DD_CUSTOM_FLAG", "true"); + jail.set_env("DD_CUSTOM_NAME", "my-extension"); + + let config: Config = get_config_with_extension(Path::new("")); + + // Core fields work + assert_eq!(config.site, "datad0g.com"); + // Extension fields are populated + assert!(config.ext.custom_flag); + assert_eq!(config.ext.custom_name, "my-extension"); + Ok(()) + }); + } + + #[test] + fn test_extension_receives_yaml_fields() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r#" +site: "datad0g.com" +custom_flag: true +custom_name: "yaml-ext" +"#, + )?; + + let config: Config = get_config_with_extension(Path::new("")); + + assert_eq!(config.site, "datad0g.com"); + assert!(config.ext.custom_flag); + assert_eq!(config.ext.custom_name, "yaml-ext"); + Ok(()) + }); + } + + #[test] + fn test_extension_env_overrides_yaml() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.create_file( + "datadog.yaml", + r#" +custom_name: "yaml-value" +custom_flag: false +"#, + )?; + jail.set_env("DD_CUSTOM_NAME", "env-value"); + jail.set_env("DD_CUSTOM_FLAG", "true"); + + let config: Config = get_config_with_extension(Path::new("")); + + // Env should override YAML (env source loaded after yaml) + assert!(config.ext.custom_flag); + assert_eq!(config.ext.custom_name, "env-value"); + Ok(()) + }); + } + + #[test] + fn test_extension_defaults_when_not_set() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + + let config: Config = get_config_with_extension(Path::new("")); + + // Extension fields should be at their defaults + assert!(!config.ext.custom_flag); + assert_eq!(config.ext.custom_name, ""); + // Core fields should have post-processing defaults + assert_eq!(config.site, "datadoghq.com"); + Ok(()) + }); + } + + #[test] + fn test_extension_does_not_interfere_with_core() { + figment::Jail::expect_with(|jail| { + jail.clear_env(); + jail.set_env("DD_SITE", "us5.datadoghq.com"); + jail.set_env("DD_API_KEY", "test-key"); + jail.set_env("DD_CUSTOM_FLAG", "true"); + + let config: Config = get_config_with_extension(Path::new("")); + + // Core fields are not affected by extension env vars + assert_eq!(config.site, "us5.datadoghq.com"); + assert_eq!(config.api_key, "test-key"); + // Extension fields work alongside core + assert!(config.ext.custom_flag); + Ok(()) + }); + } } diff --git a/crates/datadog-agent-config/env.rs b/crates/datadog-agent-config/src/sources/env.rs similarity index 75% rename from crates/datadog-agent-config/env.rs rename to crates/datadog-agent-config/src/sources/env.rs index f24d6be..78853e0 100644 --- a/crates/datadog-agent-config/env.rs +++ b/crates/datadog-agent-config/src/sources/env.rs @@ -1,22 +1,18 @@ use figment::{Figment, providers::Env}; use serde::Deserialize; use std::collections::HashMap; -use std::time::Duration; use dogstatsd::util::parse_metric_namespace; use libdd_trace_obfuscation::replacer::ReplaceRule; use crate::{ - Config, ConfigError, ConfigSource, TracePropagationStyle, + Config, ConfigError, ConfigExtension, ConfigSource, TracePropagationStyle, additional_endpoints::deserialize_additional_endpoints, apm_replace_rule::deserialize_apm_replace_rules, deserialize_apm_filter_tags, deserialize_array_from_comma_separated_string, deserialize_key_value_pairs, deserialize_option_lossless, - deserialize_optional_bool_from_anything, deserialize_optional_duration_from_microseconds, - deserialize_optional_duration_from_seconds, - deserialize_optional_duration_from_seconds_ignore_zero, deserialize_optional_string, + deserialize_optional_bool_from_anything, deserialize_optional_string, deserialize_string_or_int, deserialize_trace_propagation_style, deserialize_with_default, - flush_strategy::FlushStrategy, log_level::LogLevel, logs_additional_endpoints::{LogsAdditionalEndpoint, deserialize_logs_additional_endpoints}, merge_hashmap, merge_option, merge_option_to_value, merge_string, merge_vec, @@ -369,119 +365,10 @@ pub struct EnvConfig { /// @env `DD_OTLP_CONFIG_LOGS_ENABLED` #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] pub otlp_config_logs_enabled: Option, - - // AWS Lambda - /// @env `DD_API_KEY_SECRET_ARN` - /// - /// The AWS ARN of the secret containing the Datadog API key. - #[serde(deserialize_with = "deserialize_optional_string")] - pub api_key_secret_arn: Option, - /// @env `DD_KMS_API_KEY` - /// - /// The AWS KMS API key to use for the Datadog Agent. - #[serde(deserialize_with = "deserialize_optional_string")] - pub kms_api_key: Option, - /// @env `DD_API_KEY_SSM_ARN` - /// - /// The AWS Systems Manager Parameter Store parameter ARN containing the Datadog API key. - #[serde(deserialize_with = "deserialize_optional_string")] - pub api_key_ssm_arn: Option, - /// @env `DD_SERVERLESS_LOGS_ENABLED` - /// - /// Enable logs for AWS Lambda. Default is `true`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_logs_enabled: Option, - /// @env `DD_LOGS_ENABLED` - /// - /// Enable logs for AWS Lambda. Alias for `DD_SERVERLESS_LOGS_ENABLED`. Default is `true`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub logs_enabled: Option, - /// @env `DD_SERVERLESS_FLUSH_STRATEGY` - /// - /// The flush strategy to use for AWS Lambda. - #[serde(deserialize_with = "deserialize_with_default")] - pub serverless_flush_strategy: Option, - /// @env `DD_ENHANCED_METRICS` - /// - /// Enable enhanced metrics for AWS Lambda. Default is `true`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub enhanced_metrics: Option, - /// @env `DD_LAMBDA_PROC_ENHANCED_METRICS` - /// - /// Enable Lambda process metrics for AWS Lambda. Default is `true`. - /// - /// This is for metrics like: - /// - CPU usage - /// - Network usage - /// - File descriptor count - /// - Thread count - /// - Temp directory usage - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub lambda_proc_enhanced_metrics: Option, - /// @env `DD_CAPTURE_LAMBDA_PAYLOAD` - /// - /// Enable capture of the Lambda request and response payloads. - /// Default is `false`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub capture_lambda_payload: Option, - /// @env `DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH` - /// - /// The maximum depth of the Lambda payload to capture. - /// Default is `10`. Requires `capture_lambda_payload` to be `true`. - #[serde(deserialize_with = "deserialize_option_lossless")] - pub capture_lambda_payload_max_depth: Option, - /// @env `DD_COMPUTE_TRACE_STATS_ON_EXTENSION` - /// - /// If true, enable computation of trace stats on the extension side. - /// If false, trace stats will be computed on the backend side. - /// Default is `false`. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub compute_trace_stats_on_extension: Option, - /// @env `DD_SPAN_DEDUP_TIMEOUT` - /// - /// The timeout for the span deduplication service to check if a span key exists, in seconds. - /// For now, this is a temporary field added to debug the failure of `check_and_add()` in span dedup service. - /// Do not use this field extensively in production. - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] - pub span_dedup_timeout: Option, - /// @env `DD_API_KEY_SECRET_RELOAD_INTERVAL` - /// - /// The interval at which the Datadog API key is reloaded, in seconds. - /// If None, the API key will not be reloaded. - /// Default is `None`. - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] - pub api_key_secret_reload_interval: Option, - /// @env `DD_SERVERLESS_APPSEC_ENABLED` - /// - /// Enable Application and API Protection (AAP), previously known as AppSec/ASM, for AWS Lambda. - /// Default is `false`. - /// - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_appsec_enabled: Option, - /// @env `DD_APPSEC_RULES` - /// - /// The path to a user-configured App & API Protection ruleset (in JSON format). - #[serde(deserialize_with = "deserialize_optional_string")] - pub appsec_rules: Option, - /// @env `DD_APPSEC_WAF_TIMEOUT` - /// - /// The timeout for the WAF to process a request, in microseconds. - #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] - pub appsec_waf_timeout: Option, - /// @env `DD_API_SECURITY_ENABLED` - /// - /// Enable API Security for AWS Lambda. - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub api_security_enabled: Option, - /// @env `DD_API_SECURITY_SAMPLE_DELAY` - /// - /// The delay between two samples of the API Security schema collection, in seconds. - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds")] - pub api_security_sample_delay: Option, } #[allow(clippy::too_many_lines)] -fn merge_config(config: &mut Config, env_config: &EnvConfig) { +fn merge_config(config: &mut Config, env_config: &EnvConfig) { // Basic fields merge_string!(config, env_config, site); merge_string!(config, env_config, api_key); @@ -654,44 +541,19 @@ fn merge_config(config: &mut Config, env_config: &EnvConfig) { otlp_config_traces_probabilistic_sampler_sampling_percentage ); merge_option_to_value!(config, env_config, otlp_config_logs_enabled); - - // AWS Lambda - merge_string!(config, env_config, api_key_secret_arn); - merge_string!(config, env_config, kms_api_key); - merge_string!(config, env_config, api_key_ssm_arn); - merge_option_to_value!(config, env_config, serverless_logs_enabled); - - // Handle serverless_logs_enabled with OR logic: if either DD_LOGS_ENABLED or DD_SERVERLESS_LOGS_ENABLED is true, enable logs - if env_config.serverless_logs_enabled.is_some() || env_config.logs_enabled.is_some() { - config.serverless_logs_enabled = env_config.serverless_logs_enabled.unwrap_or(false) - || env_config.logs_enabled.unwrap_or(false); - } - - merge_option_to_value!(config, env_config, serverless_flush_strategy); - merge_option_to_value!(config, env_config, enhanced_metrics); - merge_option_to_value!(config, env_config, lambda_proc_enhanced_metrics); - merge_option_to_value!(config, env_config, capture_lambda_payload); - merge_option_to_value!(config, env_config, capture_lambda_payload_max_depth); - merge_option_to_value!(config, env_config, compute_trace_stats_on_extension); - merge_option!(config, env_config, span_dedup_timeout); - merge_option!(config, env_config, api_key_secret_reload_interval); - merge_option_to_value!(config, env_config, serverless_appsec_enabled); - merge_option!(config, env_config, appsec_rules); - merge_option_to_value!(config, env_config, appsec_waf_timeout); - merge_option_to_value!(config, env_config, api_security_enabled); - merge_option_to_value!(config, env_config, api_security_sample_delay); } #[derive(Debug, PartialEq, Clone, Copy)] #[allow(clippy::module_name_repetitions)] pub struct EnvConfigSource; -impl ConfigSource for EnvConfigSource { - fn load(&self, config: &mut Config) -> Result<(), ConfigError> { +impl ConfigSource for EnvConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError> { let figment = Figment::new() .merge(Env::prefixed("DATADOG_")) .merge(Env::prefixed("DD_")); + // Extract core config fields match figment.extract::() { Ok(env_config) => merge_config(config, &env_config), Err(e) => { @@ -701,6 +563,16 @@ impl ConfigSource for EnvConfigSource { } } + // Extract extension fields via dual extraction + match figment.extract::() { + Ok(ext_source) => config.ext.merge_from(&ext_source), + Err(e) => { + tracing::warn!( + "Failed to parse extension config from environment variables: {e}, using default extension config." + ); + } + } + Ok(()) } } @@ -708,12 +580,9 @@ impl ConfigSource for EnvConfigSource { #[cfg_attr(coverage_nightly, coverage(off))] // Test modules skew coverage metrics #[cfg(test)] mod tests { - use std::time::Duration; - use super::*; use crate::{ Config, TracePropagationStyle, - flush_strategy::{FlushStrategy, PeriodicStrategy}, log_level::LogLevel, processing_rule::{Kind, ProcessingRule}, }; @@ -727,6 +596,7 @@ mod tests { /// corresponding entry in the arrays below. #[test] #[allow(clippy::too_many_lines)] + #[allow(clippy::field_reassign_with_default)] fn test_all_env_fields_wrong_type_fallback_to_default() { // Non-string fields → invalid values that exercise graceful fallback. let invalid_non_string_env_vars: &[(&str, &str)] = &[ @@ -736,7 +606,6 @@ mod tests { ("DD_LOGS_CONFIG_COMPRESSION_LEVEL", "not_a_number"), ("DD_APM_CONFIG_COMPRESSION_LEVEL", "not_a_number"), ("DD_METRICS_CONFIG_COMPRESSION_LEVEL", "not_a_number"), - ("DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH", "not_a_number"), ("DD_DOGSTATSD_SO_RCVBUF", "not_a_number"), ("DD_DOGSTATSD_BUFFER_SIZE", "not_a_number"), ("DD_DOGSTATSD_QUEUE_SIZE", "not_a_number"), @@ -763,12 +632,6 @@ mod tests { ("DD_TRACE_PROPAGATION_EXTRACT_FIRST", "not_a_bool"), ("DD_TRACE_PROPAGATION_HTTP_BAGGAGE_ENABLED", "not_a_bool"), ("DD_TRACE_AWS_SERVICE_REPRESENTATION_ENABLED", "not_a_bool"), - ("DD_ENHANCED_METRICS", "not_a_bool"), - ("DD_LAMBDA_PROC_ENHANCED_METRICS", "not_a_bool"), - ("DD_CAPTURE_LAMBDA_PAYLOAD", "not_a_bool"), - ("DD_COMPUTE_TRACE_STATS_ON_EXTENSION", "not_a_bool"), - ("DD_SERVERLESS_APPSEC_ENABLED", "not_a_bool"), - ("DD_API_SECURITY_ENABLED", "not_a_bool"), ("DD_OTLP_CONFIG_TRACES_ENABLED", "not_a_bool"), ( "DD_OTLP_CONFIG_TRACES_SPAN_NAME_AS_RESOURCE_NAME", @@ -797,16 +660,8 @@ mod tests { "DD_OBSERVABILITY_PIPELINES_WORKER_LOGS_ENABLED", "not_a_bool", ), - ("DD_SERVERLESS_LOGS_ENABLED", "not_a_bool"), - ("DD_LOGS_ENABLED", "not_a_bool"), // Enum ("DD_LOG_LEVEL", "invalid_level_999"), - ("DD_SERVERLESS_FLUSH_STRATEGY", "[[[invalid"), - // Duration - ("DD_SPAN_DEDUP_TIMEOUT", "not_a_number"), - ("DD_API_KEY_SECRET_RELOAD_INTERVAL", "not_a_number"), - ("DD_APPSEC_WAF_TIMEOUT", "not_a_number"), - ("DD_API_SECURITY_SAMPLE_DELAY", "not_a_number"), // JSON ("DD_ADDITIONAL_ENDPOINTS", "not_json{{"), ("DD_APM_ADDITIONAL_ENDPOINTS", "not_json{{"), @@ -870,16 +725,6 @@ mod tests { "keep", ), ("DD_OTLP_CONFIG_METRICS_SUMMARIES_MODE", "noquantiles"), - ( - "DD_API_KEY_SECRET_ARN", - "arn:aws:secretsmanager:us-east-1:123:secret:key", - ), - ("DD_KMS_API_KEY", "kms-encrypted-key"), - ( - "DD_API_KEY_SSM_ARN", - "arn:aws:ssm:us-east-1:123:parameter/key", - ), - ("DD_APPSEC_RULES", "/opt/custom-rules.json"), ]; // Programmatic guard: count `pub ` fields in the EnvConfig struct from @@ -912,7 +757,7 @@ mod tests { jail.set_env(key, value); } - let mut config = Config::default(); + let mut config: Config = Config::default(); // This MUST succeed — no single field should crash the whole config EnvConfigSource .load(&mut config) @@ -920,7 +765,7 @@ mod tests { // Build expected: string fields have their non-default values, // all non-string fields stay at defaults. - let mut expected = Config::default(); + let mut expected: Config = Config::default(); // String fields (merge_string! → Config String) expected.site = "custom-site.example.com".to_string(); expected.api_key = "test-api-key-12345".to_string(); @@ -930,10 +775,6 @@ mod tests { expected.observability_pipelines_worker_logs_url = "https://opw.example.com".to_string(); expected.apm_dd_url = "https://custom-apm.example.com".to_string(); - expected.api_key_secret_arn = - "arn:aws:secretsmanager:us-east-1:123:secret:key".to_string(); - expected.kms_api_key = "kms-encrypted-key".to_string(); - expected.api_key_ssm_arn = "arn:aws:ssm:us-east-1:123:parameter/key".to_string(); // Option fields (merge_option! → Config Option) expected.proxy_https = Some("https://proxy.example.com".to_string()); expected.http_protocol = Some("http1".to_string()); @@ -954,7 +795,6 @@ mod tests { expected.otlp_config_metrics_sums_initial_cumulativ_monotonic_value = Some("keep".to_string()); expected.otlp_config_metrics_summaries_mode = Some("noquantiles".to_string()); - expected.appsec_rules = Some("/opt/custom-rules.json".to_string()); assert_eq!(config, expected); Ok(()) @@ -1104,28 +944,7 @@ mod tests { jail.set_env("DD_DOGSTATSD_BUFFER_SIZE", "65507"); jail.set_env("DD_DOGSTATSD_QUEUE_SIZE", "2048"); - // AWS Lambda - jail.set_env( - "DD_API_KEY_SECRET_ARN", - "arn:aws:secretsmanager:region:account:secret:datadog-api-key", - ); - jail.set_env("DD_KMS_API_KEY", "test-kms-key"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - jail.set_env("DD_SERVERLESS_FLUSH_STRATEGY", "periodically,60000"); - jail.set_env("DD_ENHANCED_METRICS", "false"); - jail.set_env("DD_LAMBDA_PROC_ENHANCED_METRICS", "false"); - jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD", "true"); - jail.set_env("DD_CAPTURE_LAMBDA_PAYLOAD_MAX_DEPTH", "5"); - jail.set_env("DD_COMPUTE_TRACE_STATS_ON_EXTENSION", "true"); - jail.set_env("DD_SPAN_DEDUP_TIMEOUT", "5"); - jail.set_env("DD_API_KEY_SECRET_RELOAD_INTERVAL", "10"); - jail.set_env("DD_SERVERLESS_APPSEC_ENABLED", "true"); - jail.set_env("DD_APPSEC_RULES", "/path/to/rules.json"); - jail.set_env("DD_APPSEC_WAF_TIMEOUT", "1000000"); // Microseconds - jail.set_env("DD_API_SECURITY_ENABLED", "0"); // Seconds - jail.set_env("DD_API_SECURITY_SAMPLE_DELAY", "60"); // Seconds - - let mut config = Config::default(); + let mut config: Config = Config::default(); let env_config_source = EnvConfigSource; env_config_source .load(&mut config) @@ -1262,26 +1081,7 @@ mod tests { dogstatsd_so_rcvbuf: Some(1_048_576), dogstatsd_buffer_size: Some(65507), dogstatsd_queue_size: Some(2048), - api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" - .to_string(), - kms_api_key: "test-kms-key".to_string(), - api_key_ssm_arn: String::default(), - serverless_logs_enabled: false, - serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { - interval: 60000, - }), - enhanced_metrics: false, - lambda_proc_enhanced_metrics: false, - capture_lambda_payload: true, - capture_lambda_payload_max_depth: 5, - compute_trace_stats_on_extension: true, - span_dedup_timeout: Some(Duration::from_secs(5)), - api_key_secret_reload_interval: Some(Duration::from_secs(10)), - serverless_appsec_enabled: true, - appsec_rules: Some("/path/to/rules.json".to_string()), - appsec_waf_timeout: Duration::from_secs(1), - api_security_enabled: false, - api_security_sample_delay: Duration::from_secs(60), + ext: crate::NoExtension, }; assert_eq!(config, expected_config); @@ -1290,165 +1090,6 @@ mod tests { }); } - #[test] - fn test_dd_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_dd_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(!config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_dd_serverless_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_dd_serverless_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(!config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_both_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "true"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_both_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "false"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - assert!(!config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_logs_enabled_true_serverless_logs_enabled_false() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "true"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "false"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - // OR logic: if either is true, logs are enabled - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_logs_enabled_false_serverless_logs_enabled_true() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - jail.set_env("DD_LOGS_ENABLED", "false"); - jail.set_env("DD_SERVERLESS_LOGS_ENABLED", "true"); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - // OR logic: if either is true, logs are enabled - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - - #[test] - fn test_neither_logs_enabled_set_uses_default() { - figment::Jail::expect_with(|jail| { - jail.clear_env(); - - let mut config = Config::default(); - let env_config_source = EnvConfigSource; - env_config_source - .load(&mut config) - .expect("Failed to load config"); - - // Default value is true - assert!(config.serverless_logs_enabled); - Ok(()) - }); - } - #[test] fn test_dogstatsd_config_from_env() { figment::Jail::expect_with(|jail| { @@ -1457,7 +1098,7 @@ mod tests { jail.set_env("DD_DOGSTATSD_BUFFER_SIZE", "65507"); jail.set_env("DD_DOGSTATSD_QUEUE_SIZE", "2048"); - let mut config = Config::default(); + let mut config: Config = Config::default(); let env_config_source = EnvConfigSource; env_config_source .load(&mut config) @@ -1475,7 +1116,7 @@ mod tests { figment::Jail::expect_with(|jail| { jail.clear_env(); - let mut config = Config::default(); + let mut config: Config = Config::default(); let env_config_source = EnvConfigSource; env_config_source .load(&mut config) diff --git a/crates/datadog-agent-config/src/sources/mod.rs b/crates/datadog-agent-config/src/sources/mod.rs new file mode 100644 index 0000000..dc4d398 --- /dev/null +++ b/crates/datadog-agent-config/src/sources/mod.rs @@ -0,0 +1,2 @@ +pub mod env; +pub mod yaml; diff --git a/crates/datadog-agent-config/yaml.rs b/crates/datadog-agent-config/src/sources/yaml.rs similarity index 85% rename from crates/datadog-agent-config/yaml.rs rename to crates/datadog-agent-config/src/sources/yaml.rs index 06b7851..6e4eebc 100644 --- a/crates/datadog-agent-config/yaml.rs +++ b/crates/datadog-agent-config/src/sources/yaml.rs @@ -1,15 +1,12 @@ -use std::time::Duration; use std::{collections::HashMap, path::PathBuf}; use crate::{ - Config, ConfigError, ConfigSource, ProcessingRule, TracePropagationStyle, + Config, ConfigError, ConfigExtension, ConfigSource, ProcessingRule, TracePropagationStyle, additional_endpoints::deserialize_additional_endpoints, deserialize_apm_replace_rules, deserialize_key_value_pair_array_to_hashmap, deserialize_option_lossless, - deserialize_optional_bool_from_anything, deserialize_optional_duration_from_microseconds, - deserialize_optional_duration_from_seconds, - deserialize_optional_duration_from_seconds_ignore_zero, deserialize_optional_string, + deserialize_optional_bool_from_anything, deserialize_optional_string, deserialize_processing_rules, deserialize_string_or_int, deserialize_trace_propagation_style, - deserialize_with_default, flush_strategy::FlushStrategy, log_level::LogLevel, + deserialize_with_default, log_level::LogLevel, logs_additional_endpoints::LogsAdditionalEndpoint, merge_hashmap, merge_option, merge_option_to_value, merge_string, merge_vec, service_mapping::deserialize_service_mapping, }; @@ -108,40 +105,6 @@ pub struct YamlConfig { // OTLP #[serde(deserialize_with = "deserialize_with_default")] pub otlp_config: Option, - - // AWS Lambda - #[serde(deserialize_with = "deserialize_optional_string")] - pub api_key_secret_arn: Option, - #[serde(deserialize_with = "deserialize_optional_string")] - pub kms_api_key: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_logs_enabled: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub logs_enabled: Option, - #[serde(deserialize_with = "deserialize_with_default")] - pub serverless_flush_strategy: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub enhanced_metrics: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub lambda_proc_enhanced_metrics: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub capture_lambda_payload: Option, - #[serde(deserialize_with = "deserialize_option_lossless")] - pub capture_lambda_payload_max_depth: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub compute_trace_stats_on_extension: Option, - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds_ignore_zero")] - pub api_key_secret_reload_interval: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub serverless_appsec_enabled: Option, - #[serde(deserialize_with = "deserialize_optional_string")] - pub appsec_rules: Option, - #[serde(deserialize_with = "deserialize_optional_duration_from_microseconds")] - pub appsec_waf_timeout: Option, - #[serde(deserialize_with = "deserialize_optional_bool_from_anything")] - pub api_security_enabled: Option, - #[serde(deserialize_with = "deserialize_optional_duration_from_seconds")] - pub api_security_sample_delay: Option, } /// Proxy Config @@ -443,7 +406,7 @@ impl OtlpConfig { } #[allow(clippy::too_many_lines)] -fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { +fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { // Basic fields merge_string!(config, yaml_config, site); merge_string!(config, yaml_config, api_key); @@ -720,29 +683,6 @@ fn merge_config(config: &mut Config, yaml_config: &YamlConfig) { merge_option_to_value!(config, otlp_config_logs_enabled, logs, enabled); } } - - // AWS Lambda - merge_string!(config, yaml_config, api_key_secret_arn); - merge_string!(config, yaml_config, kms_api_key); - - // Handle serverless_logs_enabled with OR logic: if either logs_enabled or serverless_logs_enabled is true, enable logs - if yaml_config.serverless_logs_enabled.is_some() || yaml_config.logs_enabled.is_some() { - config.serverless_logs_enabled = yaml_config.serverless_logs_enabled.unwrap_or(false) - || yaml_config.logs_enabled.unwrap_or(false); - } - - merge_option_to_value!(config, yaml_config, serverless_flush_strategy); - merge_option_to_value!(config, yaml_config, enhanced_metrics); - merge_option_to_value!(config, yaml_config, lambda_proc_enhanced_metrics); - merge_option_to_value!(config, yaml_config, capture_lambda_payload); - merge_option_to_value!(config, yaml_config, capture_lambda_payload_max_depth); - merge_option_to_value!(config, yaml_config, compute_trace_stats_on_extension); - merge_option!(config, yaml_config, api_key_secret_reload_interval); - merge_option_to_value!(config, yaml_config, serverless_appsec_enabled); - merge_option!(config, yaml_config, appsec_rules); - merge_option_to_value!(config, yaml_config, appsec_waf_timeout); - merge_option_to_value!(config, yaml_config, api_security_enabled); - merge_option_to_value!(config, yaml_config, api_security_sample_delay); } #[derive(Debug, PartialEq, Clone)] @@ -751,8 +691,8 @@ pub struct YamlConfigSource { pub path: PathBuf, } -impl ConfigSource for YamlConfigSource { - fn load(&self, config: &mut Config) -> Result<(), ConfigError> { +impl ConfigSource for YamlConfigSource { + fn load(&self, config: &mut Config) -> Result<(), ConfigError> { let figment = Figment::new().merge(Yaml::file(self.path.clone())); match figment.extract::() { @@ -764,6 +704,16 @@ impl ConfigSource for YamlConfigSource { } } + // Extract extension fields via dual extraction + match figment.extract::() { + Ok(ext_source) => config.ext.merge_from(&ext_source), + Err(e) => { + tracing::warn!( + "Failed to parse extension config from yaml file: {e}, using default extension config." + ); + } + } + Ok(()) } } @@ -772,9 +722,8 @@ impl ConfigSource for YamlConfigSource { #[cfg(test)] mod tests { use std::path::Path; - use std::time::Duration; - use crate::{flush_strategy::PeriodicStrategy, log_level::LogLevel, processing_rule::Kind}; + use crate::{log_level::LogLevel, processing_rule::Kind}; use super::*; @@ -784,6 +733,7 @@ mod tests { /// When adding a new field to YamlConfig or any nested struct, add an entry /// here with the wrong type to ensure graceful deserialization is in place. #[test] + #[allow(clippy::field_reassign_with_default)] fn test_all_yaml_fields_wrong_type_fallback_to_default() { figment::Jail::expect_with(|jail| { jail.clear_env(); @@ -890,28 +840,10 @@ otlp_config: mode: "noquantiles" logs: enabled: [1, 2, 3] - -# AWS Lambda -api_key_secret_arn: "arn:aws:secretsmanager:us-east-1:123:secret:key" -kms_api_key: "kms-encrypted-key" -serverless_logs_enabled: [1, 2, 3] -logs_enabled: [1, 2, 3] -serverless_flush_strategy: [1, 2, 3] -enhanced_metrics: [1, 2, 3] -lambda_proc_enhanced_metrics: [1, 2, 3] -capture_lambda_payload: [1, 2, 3] -capture_lambda_payload_max_depth: [1, 2, 3] -compute_trace_stats_on_extension: [1, 2, 3] -api_key_secret_reload_interval: [1, 2, 3] -serverless_appsec_enabled: [1, 2, 3] -appsec_rules: "/opt/custom-rules.json" -appsec_waf_timeout: [1, 2, 3] -api_security_enabled: [1, 2, 3] -api_security_sample_delay: [1, 2, 3] "#, )?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let source = YamlConfigSource { path: PathBuf::from("datadog.yaml"), }; @@ -922,15 +854,12 @@ api_security_sample_delay: [1, 2, 3] // Build expected: string fields have their non-default values, // all non-string fields stay at defaults. - let mut expected = Config::default(); + let mut expected: Config = Config::default(); expected.site = "custom-site.example.com".to_string(); expected.api_key = "test-api-key-12345".to_string(); expected.dd_url = "https://custom-metrics.example.com".to_string(); expected.logs_config_logs_dd_url = "https://custom-logs.example.com".to_string(); expected.apm_dd_url = "https://custom-apm.example.com".to_string(); - expected.api_key_secret_arn = - "arn:aws:secretsmanager:us-east-1:123:secret:key".to_string(); - expected.kms_api_key = "kms-encrypted-key".to_string(); // Option fields expected.proxy_https = Some("https://proxy.example.com".to_string()); expected.http_protocol = Some("http1".to_string()); @@ -950,7 +879,6 @@ api_security_sample_delay: [1, 2, 3] expected.otlp_config_metrics_sums_initial_cumulativ_monotonic_value = Some("keep".to_string()); expected.otlp_config_metrics_summaries_mode = Some("noquantiles".to_string()); - expected.appsec_rules = Some("/opt/custom-rules.json".to_string()); assert_eq!(config, expected); Ok(()) @@ -1081,27 +1009,10 @@ otlp_config: mode: "quantiles" logs: enabled: true - -# AWS Lambda -api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" -kms_api_key: "test-kms-key" -serverless_logs_enabled: false -serverless_flush_strategy: "periodically,60000" -enhanced_metrics: false -lambda_proc_enhanced_metrics: false -capture_lambda_payload: true -capture_lambda_payload_max_depth: 5 -compute_trace_stats_on_extension: true -api_key_secret_reload_interval: 0 -serverless_appsec_enabled: true -appsec_rules: "/path/to/rules.json" -appsec_waf_timeout: 1000000 # Microseconds -api_security_enabled: false -api_security_sample_delay: 60 # Seconds "#, )?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let yaml_config_source = YamlConfigSource { path: Path::new("datadog.yaml").to_path_buf(), }; @@ -1215,28 +1126,6 @@ api_security_sample_delay: 60 # Seconds otlp_config_metrics_summaries_mode: Some("quantiles".to_string()), otlp_config_traces_probabilistic_sampler_sampling_percentage: Some(50), otlp_config_logs_enabled: true, - api_key_secret_arn: "arn:aws:secretsmanager:region:account:secret:datadog-api-key" - .to_string(), - kms_api_key: "test-kms-key".to_string(), - api_key_ssm_arn: String::default(), - serverless_logs_enabled: false, - serverless_flush_strategy: FlushStrategy::Periodically(PeriodicStrategy { - interval: 60000, - }), - enhanced_metrics: false, - lambda_proc_enhanced_metrics: false, - capture_lambda_payload: true, - capture_lambda_payload_max_depth: 5, - compute_trace_stats_on_extension: true, - span_dedup_timeout: None, - api_key_secret_reload_interval: None, - - serverless_appsec_enabled: true, - appsec_rules: Some("/path/to/rules.json".to_string()), - appsec_waf_timeout: Duration::from_secs(1), - api_security_enabled: false, - api_security_sample_delay: Duration::from_secs(60), - apm_filter_tags_require: None, apm_filter_tags_reject: None, apm_filter_tags_regex_require: None, @@ -1245,6 +1134,7 @@ api_security_sample_delay: 60 # Seconds dogstatsd_so_rcvbuf: Some(1_048_576), dogstatsd_buffer_size: Some(65507), dogstatsd_queue_size: Some(2048), + ext: crate::NoExtension, }; // Assert that @@ -1266,7 +1156,7 @@ dogstatsd_buffer_size: 16384 dogstatsd_queue_size: 512 ", )?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let yaml_config_source = YamlConfigSource { path: Path::new("datadog.yaml").to_path_buf(), }; @@ -1286,7 +1176,7 @@ dogstatsd_queue_size: 512 figment::Jail::expect_with(|jail| { jail.clear_env(); jail.create_file("datadog.yaml", "")?; - let mut config = Config::default(); + let mut config: Config = Config::default(); let yaml_config_source = YamlConfigSource { path: Path::new("datadog.yaml").to_path_buf(), };