From 7af7e51c0214ee72fdc16cf15c6f52087e539912 Mon Sep 17 00:00:00 2001
From: Luke Inglis <lukeinglis21@yahoo.com>
Date: Fri, 1 May 2026 14:54:37 -0400
Subject: [PATCH] feat: add security hygiene eval dimension (bandit + npm
 audit)

Add a 7th mandatory hygiene dimension that runs security scanners on
detected sub-projects:

- Python: runs bandit with JSON output, counts issues
- Node.js: runs npm audit with JSON output, sums vulnerabilities
- Returns neutral score (0.5) when no scanner is detected

Rebalances HYGIENE_WEIGHTS to maintain sum of 1.0 with the new
dimension at 0.08 weight. Tests and coverage keep the highest weights.

Includes 7 tests covering clean scans, issue parsing, tool-not-found
fallback, and score floor at zero. Updates existing tests for the
new dimension count (6 -> 7, 11 -> 12 total with growth).

Closes #128 item from contributing.md "Good First Issues" table.

Signed-off-by: Luke Inglis <lukeinglis21@yahoo.com>
---
 factory/eval/hygiene.py    | 83 ++++++++++++++++++++++++++++++++----
 tests/eval/test_hygiene.py | 87 ++++++++++++++++++++++++++++++++++++--
 tests/eval/test_runner.py  |  6 +--
 3 files changed, 160 insertions(+), 16 deletions(-)

diff --git a/factory/eval/hygiene.py b/factory/eval/hygiene.py
index 46c3fbe..3ed0165 100644
--- a/factory/eval/hygiene.py
+++ b/factory/eval/hygiene.py
@@ -1,17 +1,18 @@
 """Universal hygiene eval dimensions applied to every factory-managed project.
 
-These 6 dimensions are mandatory and cannot be removed. They are computed by
+These 7 dimensions are mandatory and cannot be removed. They are computed by
 the factory itself (not by per-project eval/score.py) and auto-detect the
 project's tooling. Projects can ADD dimensions via eval/score.py but cannot
 remove any of these.
 
-Together with the 5 growth dimensions in growth.py, these form the 11
+Together with the 5 growth dimensions in growth.py, these form the 12
 mandatory eval dimensions that define the factory's quality baseline.
 
 All functions take a project_path and return an EvalResult-compatible dict.
 If a tool is not detected for a dimension, score is 0.5 (neutral), not 0.
 """
 
+import json
 import os
 import re
 import subprocess
@@ -20,12 +21,13 @@
 # Relative weights within the hygiene category (sum to 1.0).
 # The runner normalizes these so that hygiene gets 50% of the composite.
 HYGIENE_WEIGHTS = {
-    "tests": 0.30,
-    "lint": 0.15,
-    "type_check": 0.10,
-    "coverage": 0.25,
-    "guard_patterns": 0.10,
-    "config_parser": 0.10,
+    "tests": 0.28,
+    "lint": 0.14,
+    "type_check": 0.09,
+    "coverage": 0.23,
+    "guard_patterns": 0.09,
+    "config_parser": 0.09,
+    "security": 0.08,
 }
 
 
@@ -523,11 +525,73 @@ def eval_config_parser(project_path: Path) -> dict:
         }
 
 
+# ── Dimension 7: security (weight 0.08) ─────────────────────────
+
+
+def eval_security(project_path: Path) -> dict:
+    """Run security scanners across detected sub-projects. Partial credit per issue."""
+    sub_projects = _find_sub_projects(project_path)
+    total_issues = 0
+    ran_any = False
+    details_parts: list[str] = []
+
+    for sp in sub_projects:
+        if _detect_python_project(sp):
+            rc, stdout, stderr = _run_cmd(
+                ["python", "-m", "bandit", "-r", ".", "-f", "json", "-q"], sp,
+            )
+            if rc == 1 and "Command not found" in stderr:
+                continue
+            try:
+                data = json.loads(stdout) if stdout.strip() else {}
+                issues = data.get("results", [])
+                count = len(issues)
+            except (json.JSONDecodeError, TypeError):
+                count = 0
+            if rc == 0 and count == 0:
+                ran_any = True
+                details_parts.append(f"{sp.name}: clean")
+            elif count > 0:
+                ran_any = True
+                total_issues += count
+                details_parts.append(f"{sp.name}: {count} issues")
+
+        if _detect_node_project(sp):
+            rc, stdout, stderr = _run_cmd(["npm", "audit", "--json"], sp, timeout=180)
+            if rc == 1 and "Command not found" in stderr:
+                continue
+            try:
+                data = json.loads(stdout) if stdout.strip() else {}
+                vulns = data.get("metadata", {}).get("vulnerabilities", {})
+                count = sum(vulns.get(sev, 0) for sev in ("low", "moderate", "high", "critical"))
+            except (json.JSONDecodeError, TypeError):
+                count = 0
+            if count == 0 and rc == 0:
+                ran_any = True
+                details_parts.append(f"{sp.name}(js): clean")
+            elif count > 0:
+                ran_any = True
+                total_issues += count
+                details_parts.append(f"{sp.name}(js): {count} vulnerabilities")
+
+    if not ran_any:
+        return _neutral("security", "no security scanner detected")
+
+    score = max(0.0, 1.0 - total_issues * 0.1)
+    return {
+        "name": "security",
+        "score": round(score, 4),
+        "weight": HYGIENE_WEIGHTS["security"],
+        "passed": total_issues == 0,
+        "details": "; ".join(details_parts),
+    }
+
+
 # ── Public API ─────────────────────────────────────────────────────
 
 
 def compute_hygiene_results(project_path: Path) -> list[dict]:
-    """Compute all 6 mandatory hygiene dimensions for a project."""
+    """Compute all 7 mandatory hygiene dimensions for a project."""
     return [
         eval_tests(project_path),
         eval_lint(project_path),
@@ -535,4 +599,5 @@ def compute_hygiene_results(project_path: Path) -> list[dict]:
         eval_coverage(project_path),
         eval_guard_patterns(project_path),
         eval_config_parser(project_path),
+        eval_security(project_path),
     ]
diff --git a/tests/eval/test_hygiene.py b/tests/eval/test_hygiene.py
index 7f96172..2cbb354 100644
--- a/tests/eval/test_hygiene.py
+++ b/tests/eval/test_hygiene.py
@@ -1,5 +1,8 @@
 """Tests for factory.eval.hygiene — universal hygiene dimensions."""
 
+import json
+from unittest.mock import patch
+
 from factory.eval.hygiene import (
     HYGIENE_WEIGHTS,
     _find_sub_projects,
@@ -8,6 +11,7 @@
     eval_coverage,
     eval_guard_patterns,
     eval_lint,
+    eval_security,
     eval_tests,
     eval_type_check,
 )
@@ -18,9 +22,10 @@ def test_weights_sum_to_one(self):
         total = sum(HYGIENE_WEIGHTS.values())
         assert abs(total - 1.0) < 1e-9
 
-    def test_all_six_dimensions(self):
+    def test_all_seven_dimensions(self):
         assert set(HYGIENE_WEIGHTS.keys()) == {
             "tests", "lint", "type_check", "coverage", "guard_patterns", "config_parser",
+            "security",
         }
 
 
@@ -125,11 +130,11 @@ def test_valid_factory_md(self, tmp_path):
 
 
 class TestComputeHygieneResults:
-    def test_returns_all_six(self, tmp_path):
+    def test_returns_all_seven(self, tmp_path):
         results = compute_hygiene_results(tmp_path)
-        assert len(results) == 6
+        assert len(results) == 7
         names = {r["name"] for r in results}
-        assert names == {"tests", "lint", "type_check", "coverage", "guard_patterns", "config_parser"}
+        assert names == {"tests", "lint", "type_check", "coverage", "guard_patterns", "config_parser", "security"}
 
     def test_all_have_required_keys(self, tmp_path):
         results = compute_hygiene_results(tmp_path)
@@ -139,3 +144,77 @@ def test_all_have_required_keys(self, tmp_path):
             assert "weight" in r
             assert "passed" in r
             assert "details" in r
+
+
+class TestEvalSecurity:
+    def test_no_scanner_returns_neutral(self, tmp_path):
+        result = eval_security(tmp_path)
+        assert result["name"] == "security"
+        assert result["score"] == 0.5
+        assert "Not detected" in result["details"]
+
+    def test_python_bandit_clean(self, tmp_path):
+        (tmp_path / "pyproject.toml").write_text("[project]\n")
+        bandit_output = json.dumps({"results": []})
+        with patch("factory.eval.hygiene._run_cmd") as mock:
+            mock.return_value = (0, bandit_output, "")
+            result = eval_security(tmp_path)
+        assert result["score"] == 1.0
+        assert result["passed"] is True
+        assert "clean" in result["details"]
+
+    def test_python_bandit_issues(self, tmp_path):
+        (tmp_path / "pyproject.toml").write_text("[project]\n")
+        bandit_output = json.dumps({
+            "results": [
+                {"issue_severity": "HIGH", "issue_text": "Use of exec"},
+                {"issue_severity": "MEDIUM", "issue_text": "Hardcoded password"},
+                {"issue_severity": "LOW", "issue_text": "Assert used"},
+            ],
+        })
+        with patch("factory.eval.hygiene._run_cmd") as mock:
+            mock.return_value = (1, bandit_output, "")
+            result = eval_security(tmp_path)
+        assert result["score"] == round(1.0 - 3 * 0.1, 4)
+        assert result["passed"] is False
+        assert "3 issues" in result["details"]
+
+    def test_node_npm_audit_clean(self, tmp_path):
+        (tmp_path / "package.json").write_text("{}\n")
+        audit_output = json.dumps({
+            "metadata": {"vulnerabilities": {"low": 0, "moderate": 0, "high": 0, "critical": 0}},
+        })
+        with patch("factory.eval.hygiene._run_cmd") as mock:
+            mock.return_value = (0, audit_output, "")
+            result = eval_security(tmp_path)
+        assert result["score"] == 1.0
+        assert result["passed"] is True
+        assert "js" in result["details"]
+
+    def test_node_npm_audit_vulnerabilities(self, tmp_path):
+        (tmp_path / "package.json").write_text("{}\n")
+        audit_output = json.dumps({
+            "metadata": {"vulnerabilities": {"low": 2, "moderate": 1, "high": 1, "critical": 0}},
+        })
+        with patch("factory.eval.hygiene._run_cmd") as mock:
+            mock.return_value = (1, audit_output, "")
+            result = eval_security(tmp_path)
+        assert result["passed"] is False
+        assert "4 vulnerabilities" in result["details"]
+
+    def test_bandit_not_installed(self, tmp_path):
+        (tmp_path / "pyproject.toml").write_text("[project]\n")
+        with patch("factory.eval.hygiene._run_cmd") as mock:
+            mock.return_value = (1, "", "Command not found: bandit")
+            result = eval_security(tmp_path)
+        assert result["score"] == 0.5
+        assert "Not detected" in result["details"]
+
+    def test_score_floor_at_zero(self, tmp_path):
+        (tmp_path / "pyproject.toml").write_text("[project]\n")
+        issues = [{"issue_severity": "HIGH", "issue_text": f"issue {i}"} for i in range(15)]
+        bandit_output = json.dumps({"results": issues})
+        with patch("factory.eval.hygiene._run_cmd") as mock:
+            mock.return_value = (1, bandit_output, "")
+            result = eval_security(tmp_path)
+        assert result["score"] == 0.0
diff --git a/tests/eval/test_runner.py b/tests/eval/test_runner.py
index 38a58fc..cde607f 100644
--- a/tests/eval/test_runner.py
+++ b/tests/eval/test_runner.py
@@ -62,7 +62,7 @@ async def test_failed_project_eval_still_has_mandatory(self, tmp_path):
         result = await run_eval("nonexistent_command_xyz", tmp_path, threshold=0.0)
         names = {r.name for r in result.results}
         # All 11 mandatory should still be present
-        assert len(names) >= 11
+        assert len(names) >= 12
         assert "tests" in names
         assert "capability_surface" in names
 
@@ -79,12 +79,12 @@ async def test_timeout_project_eval(self, tmp_path):
         result = await run_eval(f"{sys.executable} {script}", tmp_path, threshold=0.0, timeout=1.0)
         # Mandatory dimensions still computed
         names = {r.name for r in result.results}
-        assert len(names) >= 11
+        assert len(names) >= 12
 
     async def test_weight_split_is_50_50(self, tmp_path):
         """Hygiene dimensions get 50% total weight, growth gets 50%."""
         result = await run_eval("true", tmp_path, threshold=0.0)
-        hygiene_names = {"tests", "lint", "type_check", "coverage", "guard_patterns", "config_parser"}
+        hygiene_names = {"tests", "lint", "type_check", "coverage", "guard_patterns", "config_parser", "security"}
         growth_names = {
             "capability_surface", "experiment_diversity", "observability",
             "research_grounding", "factory_effectiveness",