From 505443e1a9995f0736fa5d365b531b2805410b36 Mon Sep 17 00:00:00 2001 From: James Brown Date: Fri, 30 Jan 2026 13:39:27 -0800 Subject: [PATCH] allow parsing all legal environment variable names --- dotenvy/src/parse.rs | 67 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 9 deletions(-) diff --git a/dotenvy/src/parse.rs b/dotenvy/src/parse.rs index 671de97..205814a 100644 --- a/dotenvy/src/parse.rs +++ b/dotenvy/src/parse.rs @@ -68,17 +68,28 @@ impl<'a> LineParser<'a> { Ok(Some((key, parsed_value))) } + #[inline(always)] + fn valid_env_var_character(c: char) -> bool { + // POSIX 2018 § 8.1 tells us that environment variables can be any ASCII portable + // character other than `=` and `\0`. Because we're loading them from a file, we also + // rule out whitespace and control characters + // See + // + // for the range of portable ASCII; it's essentially 0x21 through 0x7e, with some + // whitespace that we can ignore + c.is_ascii() && c >= '\u{0021}' && c <= '\u{007e}' && c != '=' + } + fn parse_key(&mut self) -> Result { + // POSIX: "Other applications may have difficulty dealing with environment variable names that start with a digit. + // For this reason, use of such names is not recommended anywhere." if !self .line - .starts_with(|c: char| c.is_ascii_alphabetic() || c == '_') + .starts_with(|c| Self::valid_env_var_character(c) && !c.is_numeric()) { return Err(self.err()); } - let index = match self - .line - .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '.')) - { + let index = match self.line.find(|c| !Self::valid_env_var_character(c)) { Some(index) => index, None => self.line.len(), }; @@ -289,6 +300,8 @@ KEY10 ="whitespace before =" KEY11= "whitespace after =" export="export as key" export SHELL_LOVER=1 +export KEYS:CAN:HAVE_COLONS=1 +%TEMP%=/tmp "# .as_bytes(), ); @@ -307,6 +320,8 @@ export SHELL_LOVER=1 ("KEY11", "whitespace after ="), ("export", "export as key"), ("SHELL_LOVER", "1"), + ("KEYS:CAN:HAVE_COLONS", "1"), + ("%TEMP%", "/tmp"), ] .into_iter() .map(|(key, value)| (key.to_owned(), value.to_owned())); @@ -316,7 +331,7 @@ export SHELL_LOVER=1 assert_eq!(expected, actual?); count += 1; } - assert_eq!(count, 13); + assert_eq!(count, 15); Ok(()) } @@ -554,6 +569,17 @@ mod substitution_tests { vec![("KEY2", "_2"), ("KEY", "><>_2<")], ) } + + #[test] + fn nonalphanumeric_substitutions() -> Result<(), ParseBufError> { + assert_str( + r" + KEY:ONE=one + KEY:TWO=${KEY:ONE}11 + ", + vec![("KEY:ONE", "one"), ("KEY:TWO", "one11")], + ) + } } #[cfg(test)] @@ -588,8 +614,8 @@ mod error_tests { } #[test] - fn should_not_allow_dot_as_first_char_of_key() { - let invalid_key = ".KEY=value"; + fn should_not_allow_number_as_first_char_of_key() { + let invalid_key = "1KEY=value"; let iter = Iter::new(invalid_key.as_bytes()).collect::>(); @@ -601,7 +627,7 @@ mod error_tests { #[test] fn should_not_parse_invalid_format() { - let invalid_fmt = r"<><><>"; + let invalid_fmt = r"======"; let iter = Iter::new(invalid_fmt.as_bytes()).collect::>(); assert!(matches!( @@ -610,6 +636,29 @@ mod error_tests { )); } + #[test] + fn should_not_parse_invalid_keys() { + for (key, bad_index) in [ + ("0foo", 0), // starts with a digit, discouraged by posix + ("foö", 2), // trailing unicode + ("Πoo", 0), // leading unicode + ("foo\x07", 3), // contains control code + ("bar\x00", 3), // contains embedded nul byte + ("baz zed", 4), // whitespace + ] { + let invalid_fmt = format!("{key}=1"); + let iter = Iter::new(invalid_fmt.as_bytes()).collect::>(); + + assert!( + matches!( + iter[0], + Err(ParseBufError::LineParse(ref v, idx)) if *v == invalid_fmt && idx == bad_index + ), + "should fail to parse {key:?}" + ); + } + } + #[test] fn should_not_parse_invalid_escape() { let invalid_esc = r">\f<";