Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 61 additions & 9 deletions factory/eval/hygiene.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Universal hygiene eval dimensions applied to every factory-managed project.

These 6 dimensions are mandatory and cannot be removed. They are computed by
These 7 dimensions are mandatory and cannot be removed. They are computed by
the factory itself (not by per-project eval/score.py) and auto-detect the
project's tooling. Projects can ADD dimensions via eval/score.py but cannot
remove any of these.

Together with the 5 growth dimensions in growth.py, these form the 11
Together with the 5 growth dimensions in growth.py, these form the 12
mandatory eval dimensions that define the factory's quality baseline.

All functions take a project_path and return an EvalResult-compatible dict.
Expand All @@ -17,15 +17,20 @@
import subprocess
from pathlib import Path

import structlog

log = structlog.get_logger()

# Relative weights within the hygiene category (sum to 1.0).
# The runner normalizes these so that hygiene gets 50% of the composite.
HYGIENE_WEIGHTS = {
"tests": 0.30,
"lint": 0.15,
"type_check": 0.10,
"coverage": 0.25,
"guard_patterns": 0.10,
"config_parser": 0.10,
"tests": 0.28,
"lint": 0.14,
"type_check": 0.09,
"coverage": 0.23,
"guard_patterns": 0.09,
"config_parser": 0.09,
"security": 0.08,
}


Expand Down Expand Up @@ -523,16 +528,63 @@ def eval_config_parser(project_path: Path) -> dict:
}


# ── Dimension 7: security (weight 0.08) ─────────────────────────


def eval_security(project_path: Path) -> dict:
"""Run security scanners via the pluggable scanner registry.

Delegates to factory.security.ScannerRegistry for scanner detection and
execution. Each registered scanner (bandit, npm-audit, semgrep, trivy,
git-secrets) auto-detects applicability and runs if appropriate.

Scoring: partial credit, deducting 0.1 per issue found across all scanners.
Returns neutral (0.5) when no scanner is applicable.
"""
from factory.security import get_default_registry

registry = get_default_registry()
sub_projects = _find_sub_projects(project_path)
total_issues = 0
ran_any = False
details_parts: list[str] = []

for sp in sub_projects:
results = registry.scan(sp)
for result in results:
ran_any = True
count = result.issue_count
total_issues += count
label = f"{sp.name}({result.scanner_name})"
if count > 0:
details_parts.append(f"{label}: {count} issues")
else:
details_parts.append(f"{label}: clean")

if not ran_any:
return _neutral("security", "no security scanner detected")

score = max(0.0, 1.0 - total_issues * 0.1)
return {
"name": "security",
"score": round(score, 4),
"weight": HYGIENE_WEIGHTS["security"],
"passed": total_issues == 0,
"details": "; ".join(details_parts),
}


# ── Public API ─────────────────────────────────────────────────────


def compute_hygiene_results(project_path: Path) -> list[dict]:
"""Compute all 6 mandatory hygiene dimensions for a project."""
"""Compute all 7 mandatory hygiene dimensions for a project."""
return [
eval_tests(project_path),
eval_lint(project_path),
eval_type_check(project_path),
eval_coverage(project_path),
eval_guard_patterns(project_path),
eval_config_parser(project_path),
eval_security(project_path),
]
5 changes: 3 additions & 2 deletions factory/eval/runner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""EvalRunner — compute mandatory dimensions and merge with project-specific evals.

The factory's eval system has mandatory dimensions that apply to every project:
- 6 hygiene dimensions (tests, lint, type_check, coverage, guard_patterns, config_parser)
- 7 hygiene dimensions (tests, lint, type_check, coverage, guard_patterns,
config_parser, security)
- 5 growth dimensions (capability_surface, experiment_diversity, observability,
research_grounding, factory_effectiveness)

Expand Down Expand Up @@ -123,7 +124,7 @@ async def _run_project_eval(
"""Run the project's eval/score.py (if it exists) and return additional results.

Returns an empty list if the command fails or returns no results.
These are project-specific ADDITIONS to the mandatory 11 dimensions.
These are project-specific ADDITIONS to the mandatory 12 dimensions.
"""
parts = eval_command.split()

Expand Down
154 changes: 154 additions & 0 deletions factory/security/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""Security scanning subsystem: pluggable scanner architecture.

Provides a Protocol-based scanner interface, a registry with auto-detection,
and concrete scanner implementations for multiple security tools.

Usage:
from factory.security import ScannerRegistry

registry = ScannerRegistry()
results = registry.scan(project_path)
"""

from __future__ import annotations

import structlog
from pathlib import Path
from typing import Protocol, runtime_checkable

from factory.security.models import SecurityScanResult

log = structlog.get_logger()


@runtime_checkable
class SecurityScanner(Protocol):
"""Interface for security scanning tools.

Each scanner must implement:
- name: human-readable scanner identifier
- detect: check if the scanner is applicable to the project
- run: execute the scan and return structured results
"""

@property
def name(self) -> str:
"""Human-readable name for this scanner (e.g. 'bandit', 'npm-audit')."""
...

def detect(self, project_path: Path) -> bool:
"""Return True if this scanner is applicable to the given project.

This checks both project compatibility (e.g. Python project for bandit)
and tool availability (e.g. bandit is installed).
"""
...

def run(self, project_path: Path) -> SecurityScanResult:
"""Execute the security scan and return structured results.

Should not raise exceptions. If the scanner fails, return a
SecurityScanResult with passed=False and details explaining the failure.
"""
...


class ScannerRegistry:
"""Registry of security scanners with auto-detection.

Scanners are registered at import time and auto-detected per project.
The registry handles sub-project discovery so individual scanners
only need to operate on a single project root.
"""

def __init__(self) -> None:
self._scanners: list[SecurityScanner] = []

def register(self, scanner: SecurityScanner) -> None:
"""Add a scanner to the registry."""
self._scanners.append(scanner)

@property
def scanners(self) -> list[SecurityScanner]:
"""All registered scanners."""
return list(self._scanners)

def detect(self, project_path: Path) -> list[SecurityScanner]:
"""Return scanners applicable to the given project path."""
applicable = []
for scanner in self._scanners:
try:
if scanner.detect(project_path):
applicable.append(scanner)
except Exception:
log.warning("scanner_detect_error", scanner=scanner.name, exc_info=True)
return applicable

def scan(self, project_path: Path) -> list[SecurityScanResult]:
"""Run all applicable scanners against the project.

Returns a list of SecurityScanResult, one per scanner that ran.
Scanners that are not applicable (detect returns False) are skipped.
"""
results: list[SecurityScanResult] = []
applicable = self.detect(project_path)

if not applicable:
log.debug("no_applicable_scanners", project=str(project_path))
return results

for scanner in applicable:
try:
result = scanner.run(project_path)
results.append(result)
log.debug(
"scanner_completed",
scanner=scanner.name,
issues=result.issue_count,
passed=result.passed,
)
except Exception:
log.warning("scanner_run_error", scanner=scanner.name, exc_info=True)
results.append(
SecurityScanResult(
scanner_name=scanner.name,
passed=False,
details=f"Scanner {scanner.name} failed with an unexpected error",
)
)

return results


# Global default registry instance, pre-populated with all built-in scanners.
_default_registry: ScannerRegistry | None = None


def get_default_registry() -> ScannerRegistry:
"""Return the global default scanner registry, creating it on first call.

Lazily imports and registers all built-in scanners to avoid circular
imports and unnecessary work if the security subsystem is not used.
"""
global _default_registry
if _default_registry is not None:
return _default_registry

_default_registry = ScannerRegistry()

# Import and register built-in scanners
from factory.security.scanners import (
BanditScanner,
GitSecretsScanner,
NpmAuditScanner,
SemgrepScanner,
TrivyScanner,
)

_default_registry.register(BanditScanner())
_default_registry.register(NpmAuditScanner())
_default_registry.register(SemgrepScanner())
_default_registry.register(TrivyScanner())
_default_registry.register(GitSecretsScanner())

return _default_registry
55 changes: 55 additions & 0 deletions factory/security/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""Data models for the security scanning subsystem.

SecurityIssue represents a single finding from any scanner.
SecurityScanResult aggregates issues from a single scanner run.
"""

from __future__ import annotations

from enum import Enum

from pydantic import BaseModel, ConfigDict


class SecuritySeverity(str, Enum):
"""Severity levels for security findings, ordered from most to least critical."""

CRITICAL = "critical"
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
INFO = "info"


class SecurityIssue(BaseModel):
"""A single security finding from a scanner."""

model_config = ConfigDict(strict=True, extra="forbid")

severity: SecuritySeverity
category: str
file: str = ""
line: int | None = None
message: str = ""
remediation: str = ""
scanner: str = ""


class SecurityScanResult(BaseModel):
"""Aggregated result from a single scanner run."""

model_config = ConfigDict(strict=True, extra="forbid")

scanner_name: str
issues: list[SecurityIssue] = []
passed: bool = True
details: str = ""
duration_seconds: float = 0.0

@property
def issue_count(self) -> int:
return len(self.issues)

def issues_by_severity(self, severity: SecuritySeverity) -> list[SecurityIssue]:
"""Filter issues by severity level."""
return [i for i in self.issues if i.severity == severity]
Loading
Loading