From 5c57fbc2ebfd0a3712cc798504a97c22f91bb2a3 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
<161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sat, 14 Feb 2026 10:41:08 +0000
Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=9B=A1=EF=B8=8F=20Sentinel:=20[MEDIUM?=
=?UTF-8?q?]=20Fix=20CSV=20Injection=20in=20log=20sanitization?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
**Vulnerability:**
The `sanitize_for_log` function in `main.py` was stripping quotes from `repr()` output for aesthetic reasons. This allowed strings starting with `=`, `+`, `-`, or `@` to be interpreted as formulas if the logs were exported to CSV and opened in spreadsheet software (CSV Injection / Formula Injection).
**Fix:**
Modified `sanitize_for_log` to detect strings starting with these dangerous characters. If detected, the function now retains the surrounding quotes provided by `repr()`, forcing spreadsheet software to treat the value as a string literal.
**Verification:**
- Added `tests/test_csv_injection.py` to verify the fix and ensure regression handling for normal strings.
- Fixed a flaky test in `tests/test_push_rules_perf.py` that was failing due to module reloading issues in the test suite.
- Ran all tests successfully with `pytest`.
**Impact:**
Prevents potential formula execution on administrative machines analyzing logs.
Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com>
---
.jules/sentinel.md | 5 +++
main.py | 7 ++++
tests/test_csv_injection.py | 66 +++++++++++++++++++++++++++++++++++
tests/test_push_rules_perf.py | 44 +++++++++++------------
4 files changed, 100 insertions(+), 22 deletions(-)
create mode 100644 tests/test_csv_injection.py
diff --git a/.jules/sentinel.md b/.jules/sentinel.md
index fe79fb0..fcc8d8f 100644
--- a/.jules/sentinel.md
+++ b/.jules/sentinel.md
@@ -33,3 +33,8 @@
**Learning:** Path-based checks (`os.path.islink`, `os.stat`) followed by path-based operations (`os.chmod`) are inherently racy. File descriptors are the only way to pin the resource.
**Prevention:** Use `os.open` with `O_NOFOLLOW` to open the file securely (failing if it's a symlink). Then use file-descriptor-based operations: `os.fstat(fd)` to check modes and `os.fchmod(fd, mode)` to modify permissions. This ensures operations apply to the exact inode opened.
+
+## 2025-05-22 - CSV Injection via Log Sanitization
+**Vulnerability:** `sanitize_for_log` in `main.py` was stripping quotes from `repr()` output for aesthetic reasons, inadvertently exposing strings starting with `=`, `+`, `-`, `@` to formula injection if logs are viewed in Excel.
+**Learning:** Security controls (like `repr()`) can be weakened by subsequent "cleanup" steps. Always consider downstream consumption of logs (e.g., CSV export).
+**Prevention:** Check for dangerous prefixes before stripping quotes. If detected, retain the quotes to force string literal interpretation.
diff --git a/main.py b/main.py
index 673bf00..3d416d7 100644
--- a/main.py
+++ b/main.py
@@ -239,6 +239,13 @@ def sanitize_for_log(text: Any) -> str:
# repr() safely escapes control characters (e.g., \n -> \\n, \x1b -> \\x1b)
# This prevents log injection and terminal hijacking.
safe = repr(s)
+
+ # Security: Prevent CSV Injection (Formula Injection)
+ # If the string starts with =, +, -, or @, we keep the quotes from repr()
+ # to force spreadsheet software to treat it as a string literal.
+ if s and str(s)[0] in ("=", "+", "-", "@"):
+ return safe
+
if len(safe) >= 2 and safe[0] == safe[-1] and safe[0] in ("'", '"'):
return safe[1:-1]
return safe
diff --git a/tests/test_csv_injection.py b/tests/test_csv_injection.py
new file mode 100644
index 0000000..b6c8a73
--- /dev/null
+++ b/tests/test_csv_injection.py
@@ -0,0 +1,66 @@
+
+import unittest
+import sys
+import os
+
+# Add root to path to import main
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+import main
+
+class TestCSVInjection(unittest.TestCase):
+ def test_csv_injection_prevention(self):
+ """
+ Verify that sanitize_for_log correctly keeps quotes around strings
+ that start with characters known to trigger formula execution in spreadsheets
+ (CSV Injection).
+ """
+ # Test cases for CSV injection characters
+ dangerous_inputs = [
+ "=cmd|' /C calc'!A0",
+ "+cmd|' /C calc'!A0",
+ "-cmd|' /C calc'!A0",
+ "@cmd|' /C calc'!A0",
+ ]
+
+ for inp in dangerous_inputs:
+ sanitized = main.sanitize_for_log(inp)
+ # Should keep quotes (repr adds them)
+ # repr("=...") -> "'=...'"
+ # So sanitized should start with ' or "
+ self.assertTrue(sanitized.startswith("'") or sanitized.startswith('"'),
+ f"Input '{inp}' should be quoted to prevent CSV injection. Got: {sanitized}")
+
+ # Should contain the input
+ self.assertIn(inp, sanitized)
+
+ def test_normal_string_behavior(self):
+ """
+ Verify that normal strings (not starting with =, +, -, @) still have
+ their outer quotes stripped, preserving existing behavior.
+ """
+ safe_inputs = [
+ "NormalString",
+ "Folder Name",
+ "12345",
+ "", # XSS attempt (handled by repr escaping but checked here for quote stripping)
+ ]
+
+ for inp in safe_inputs:
+ sanitized = main.sanitize_for_log(inp)
+ # Should NOT start with quote (unless repr escaped something inside and used different quotes, but for simple strings it shouldn't)
+ # Actually, repr("NormalString") is 'NormalString'. Stripped -> NormalString.
+ # repr("Folder Name") is 'Folder Name'. Stripped -> Folder Name.
+ self.assertFalse(sanitized.startswith("'") and sanitized.endswith("'"),
+ f"Input '{inp}' should have outer quotes stripped. Got: {sanitized}")
+
+ # For strict check:
+ self.assertEqual(sanitized, repr(inp)[1:-1])
+
+ def test_empty_input(self):
+ """Verify empty input handling."""
+ self.assertEqual(main.sanitize_for_log(""), "")
+ self.assertEqual(main.sanitize_for_log(None), "None")
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/test_push_rules_perf.py b/tests/test_push_rules_perf.py
index 63a89c8..18dcd63 100644
--- a/tests/test_push_rules_perf.py
+++ b/tests/test_push_rules_perf.py
@@ -98,31 +98,31 @@ def test_push_rules_multi_batch(self, mock_executor, mock_as_completed):
# This should ALWAYS be True
self.assertTrue(mock_executor.called, "ThreadPoolExecutor should be called for multi-batch")
- @patch("main.is_valid_rule")
- def test_push_rules_skips_validation_for_existing(self, mock_is_valid):
+ def test_push_rules_skips_validation_for_existing(self):
"""
Test that is_valid_rule is NOT called for rules that are already in existing_rules.
"""
- mock_is_valid.return_value = True
- hostnames = ["h1", "h2"]
- # h1 is already known, h2 is new
- existing_rules = {"h1"}
-
- main.push_rules(
- self.profile_id,
- self.folder_name,
- self.folder_id,
- self.do,
- self.status,
- hostnames,
- existing_rules,
- self.client
- )
-
- # h1 is in existing_rules, so we should skip validation for it.
- # h2 is NOT in existing_rules, so we should validate it.
- # So is_valid_rule should be called EXACTLY once, with "h2".
- mock_is_valid.assert_called_once_with("h2")
+ with patch.object(main, "is_valid_rule") as mock_is_valid:
+ mock_is_valid.return_value = True
+ hostnames = ["h1", "h2"]
+ # h1 is already known, h2 is new
+ existing_rules = {"h1"}
+
+ main.push_rules(
+ self.profile_id,
+ self.folder_name,
+ self.folder_id,
+ self.do,
+ self.status,
+ hostnames,
+ existing_rules,
+ self.client
+ )
+
+ # h1 is in existing_rules, so we should skip validation for it.
+ # h2 is NOT in existing_rules, so we should validate it.
+ # So is_valid_rule should be called EXACTLY once, with "h2".
+ mock_is_valid.assert_called_once_with("h2")
if __name__ == '__main__':
unittest.main()
From 7fce5c4ee680b72196379663a2bfcf2d7bcc1cff Mon Sep 17 00:00:00 2001
From: Abhi Mehrotra
Date: Sat, 14 Feb 2026 15:35:30 -0600
Subject: [PATCH 2/4] Update main.py
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
main.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/main.py b/main.py
index 3d416d7..2c81535 100644
--- a/main.py
+++ b/main.py
@@ -243,7 +243,7 @@ def sanitize_for_log(text: Any) -> str:
# Security: Prevent CSV Injection (Formula Injection)
# If the string starts with =, +, -, or @, we keep the quotes from repr()
# to force spreadsheet software to treat it as a string literal.
- if s and str(s)[0] in ("=", "+", "-", "@"):
+ if s and s.startswith(("=", "+", "-", "@")):
return safe
if len(safe) >= 2 and safe[0] == safe[-1] and safe[0] in ("'", '"'):
From b860d505a7b8e8d471456cb078fa473207ff43b1 Mon Sep 17 00:00:00 2001
From: Abhi Mehrotra
Date: Sat, 14 Feb 2026 15:35:51 -0600
Subject: [PATCH 3/4] Update test_csv_injection.py
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
tests/test_csv_injection.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/test_csv_injection.py b/tests/test_csv_injection.py
index b6c8a73..557d4a0 100644
--- a/tests/test_csv_injection.py
+++ b/tests/test_csv_injection.py
@@ -51,7 +51,7 @@ def test_normal_string_behavior(self):
# Should NOT start with quote (unless repr escaped something inside and used different quotes, but for simple strings it shouldn't)
# Actually, repr("NormalString") is 'NormalString'. Stripped -> NormalString.
# repr("Folder Name") is 'Folder Name'. Stripped -> Folder Name.
- self.assertFalse(sanitized.startswith("'") and sanitized.endswith("'"),
+ self.assertFalse(sanitized.startswith(("'", '"')),
f"Input '{inp}' should have outer quotes stripped. Got: {sanitized}")
# For strict check:
From 8ec3a43ca7733d04abdde0f38e0986a363b05616 Mon Sep 17 00:00:00 2001
From: Abhi Mehrotra
Date: Sat, 14 Feb 2026 15:35:58 -0600
Subject: [PATCH 4/4] Update sentinel.md
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
.jules/sentinel.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.jules/sentinel.md b/.jules/sentinel.md
index fcc8d8f..5c49015 100644
--- a/.jules/sentinel.md
+++ b/.jules/sentinel.md
@@ -34,7 +34,7 @@
**Prevention:** Use `os.open` with `O_NOFOLLOW` to open the file securely (failing if it's a symlink). Then use file-descriptor-based operations: `os.fstat(fd)` to check modes and `os.fchmod(fd, mode)` to modify permissions. This ensures operations apply to the exact inode opened.
-## 2025-05-22 - CSV Injection via Log Sanitization
+## 2026-02-14 - CSV Injection via Log Sanitization
**Vulnerability:** `sanitize_for_log` in `main.py` was stripping quotes from `repr()` output for aesthetic reasons, inadvertently exposing strings starting with `=`, `+`, `-`, `@` to formula injection if logs are viewed in Excel.
**Learning:** Security controls (like `repr()`) can be weakened by subsequent "cleanup" steps. Always consider downstream consumption of logs (e.g., CSV export).
**Prevention:** Check for dangerous prefixes before stripping quotes. If detected, retain the quotes to force string literal interpretation.