Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion src/arbiter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ def _run_analysis(repo_path: Path, analyzers: list[Analyzer], exclude_paths: lis
return all_findings


_FOOTER = "\n Powered by HUMMBL — https://hummbl.io/audit"


def _print_footer() -> None:
"""Print the HUMMBL attribution footer."""
print(_FOOTER)


def _find_git_root(path: Path) -> Path | None:
"""Walk up from path to find the nearest .git directory."""
current = path
Expand Down Expand Up @@ -165,7 +173,7 @@ def cmd_score(args: argparse.Namespace) -> None:
}, indent=2))
else:
print(f"Score: {score.overall} ({score.grade}) | Lint: {score.lint_score} | Security: {score.security_score} | Complexity: {score.complexity_score} | Findings: {score.total_findings} | LOC: {loc:,}")

_print_footer()

def cmd_agents(args: argparse.Namespace) -> None:
"""Print agent leaderboard."""
Expand Down Expand Up @@ -478,6 +486,42 @@ def cmd_commits(args: argparse.Namespace) -> None:
f"+{c['loc_added']}/-{c['loc_removed']} {c['timestamp'][:16]}")


def _parse_exclude(args: argparse.Namespace) -> list[str] | None:
"""Parse --exclude into a list of paths."""
if not args.exclude:
return None
return [p.strip() for p in args.exclude.split(",") if p.strip()]


def cmd_report(args: argparse.Namespace) -> None:
"""Generate HTML/PDF audit report for a client."""
from arbiter.report import generate_report, render_html, render_pdf

repo_path = Path(args.repo).resolve()
exclude_paths = _parse_exclude(args)
analyzers = _get_analyzers()

print(f"Generating {args.tier} report for {repo_path.name}...", file=sys.stderr)
report = generate_report(repo_path, analyzers, exclude_paths=exclude_paths)

output = Path(args.output) if args.output else Path(f"{repo_path.name}-audit.html")
html = render_html(report, tier=args.tier)
output.write_text(html)
print(f"HTML report: {output}")

if args.pdf:
pdf_path = output.with_suffix(".pdf")
try:
render_pdf(report, pdf_path, tier=args.tier)
print(f"PDF report: {pdf_path}")
except RuntimeError as e:
print(f"PDF fallback: {e}", file=sys.stderr)

print(f"\nScore: {report.score.overall} ({report.score.grade}) | "
f"{report.score.total_findings} findings | {report.loc:,} LOC")
_print_footer()


def main() -> None:
parser = argparse.ArgumentParser(description="Arbiter — Agent-aware code quality system")
parser.add_argument("--db", help="Path to SQLite database (default: arbiter_data.db)")
Expand Down Expand Up @@ -535,6 +579,14 @@ def main() -> None:
# fleet-report
subparsers.add_parser("fleet-report", help="Print fleet quality report")

# report
p_report = subparsers.add_parser("report", help="Generate HTML/PDF audit report")
p_report.add_argument("repo", help="Path to git repository")
p_report.add_argument("--tier", choices=["free", "paid"], default="paid", help="Report tier (default: paid)")
p_report.add_argument("--output", "-o", help="Output path (default: <repo>-audit.html)")
p_report.add_argument("--pdf", action="store_true", help="Also generate PDF (requires weasyprint)")
p_report.add_argument("--exclude", type=str, default="", help="Comma-separated paths to exclude")

# serve
p_serve = subparsers.add_parser("serve", help="Start API + dashboard")
p_serve.add_argument("--port", type=int, default=8080, help="Port")
Expand All @@ -554,6 +606,7 @@ def main() -> None:
"fleet-report": cmd_fleet_report,
"triage": cmd_triage,
"fix": cmd_fix,
"report": cmd_report,
}

handler = commands.get(args.command)
Expand Down
15 changes: 11 additions & 4 deletions src/arbiter/analyzers/dead_code_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,18 @@ def is_available(self) -> bool:
except (FileNotFoundError, subprocess.TimeoutExpired):
return False

# Directories that should never be scanned for dead code
_DEFAULT_EXCLUDES = ".venv,venv,node_modules,.git,__pycache__,.tox,.eggs,build,dist"

def analyze_repo(self, repo_path: Path, exclude_paths: list[str] | None = None) -> list[Finding]:
result = subprocess.run(
["vulture", str(repo_path), "--min-confidence", "80"],
capture_output=True, text=True, timeout=120,
)
cmd = ["vulture", str(repo_path), "--min-confidence", "80",
"--exclude", self._DEFAULT_EXCLUDES]
whitelist = repo_path / "vulture_whitelist.py"
if whitelist.exists():
cmd.append(str(whitelist))
if exclude_paths:
cmd[cmd.index("--exclude") + 1] += "," + ",".join(exclude_paths)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if not result.stdout.strip():
return []

Expand Down
251 changes: 251 additions & 0 deletions src/arbiter/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
"""Arbiter Report Generator — HTML + PDF audit reports for clients.

Generates self-contained HTML reports using hummbl.io design tokens.
Two tiers: free (grade + breakdown) and paid (full findings + remediation).

Usage:
from arbiter.report import generate_report, render_html, render_pdf

report = generate_report(repo_path, analyzers)
html = render_html(report, tier="paid")
render_pdf(report, output_path) # requires weasyprint
"""

from __future__ import annotations

import json
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path

from arbiter.analyzers.base import Finding
from arbiter.scoring import RepoScore


@dataclass
class AuditReport:
"""Complete audit data for a single repo."""

repo_name: str
audit_date: str
score: RepoScore
loc: int
findings: list[Finding]
findings_by_file: dict[str, list[Finding]] = field(default_factory=dict)
top_findings: list[Finding] = field(default_factory=list)
remediation_steps: list[str] = field(default_factory=list)

@classmethod
def build(cls, repo_name: str, score: RepoScore, loc: int,
findings: list[Finding]) -> AuditReport:
"""Build a report with derived fields."""
by_file: dict[str, list[Finding]] = {}
for f in findings:
by_file.setdefault(f.file_path, []).append(f)

sev_rank = {"CRITICAL": 4, "HIGH": 3, "MEDIUM": 2, "LOW": 1}
top = sorted(findings, key=lambda f: -sev_rank.get(f.severity, 0))[:20]

remediation = _generate_remediation(score, findings)

return cls(
repo_name=repo_name,
audit_date=datetime.now(timezone.utc).strftime("%Y-%m-%d"),
score=score,
loc=loc,
findings=findings,
findings_by_file=by_file,
top_findings=top,
remediation_steps=remediation,
)

def to_json(self) -> str:
"""Serialize to JSON for template injection."""
data = {
"repo_name": self.repo_name,
"audit_date": self.audit_date,
"overall": self.score.overall,
"grade": self.score.grade,
"lint_score": self.score.lint_score,
"security_score": self.score.security_score,
"complexity_score": self.score.complexity_score,
"loc": self.loc,
"total_findings": self.score.total_findings,
"findings_by_severity": self.score.findings_by_severity,
"findings_by_tool": self.score.findings_by_tool,
"top_findings": [
{"file": f.file_path, "line": f.line, "severity": f.severity,
"rule": f.rule_id, "message": f.message, "tool": f.tool}
for f in self.top_findings
],
"files_affected": len(self.findings_by_file),
"remediation": self.remediation_steps,
}
return json.dumps(data, indent=2)


def _generate_remediation(score: RepoScore, findings: list[Finding]) -> list[str]:
"""Generate prioritized remediation steps from findings."""
steps = []

crit_count = score.findings_by_severity.get("CRITICAL", 0)
high_count = score.findings_by_severity.get("HIGH", 0)

if crit_count:
steps.append(f"URGENT: Fix {crit_count} critical finding(s) — security vulnerabilities or fatal code issues")

if high_count:
steps.append(f"Fix {high_count} high-severity finding(s) — run `ruff check --fix` for auto-remediable lint issues")

if score.lint_score < 90:
steps.append("Run `ruff check --fix --unsafe-fixes` to auto-remediate lint findings")

if score.complexity_score < 90:
complex_files = [f for f in findings if f.tool == "radon" or "complexity" in f.rule_id.lower()]
if complex_files:
worst = complex_files[0]
steps.append(f"Reduce complexity in {worst.file_path} — extract helper functions from high-CC methods")

if score.security_score < 100:
steps.append("Address security findings — run `bandit -r src/` for details")

dead_code = [f for f in findings if f.tool == "vulture"]
if len(dead_code) > 10:
steps.append(f"Remove {len(dead_code)} unused code items or add a vulture whitelist")

if not steps:
steps.append("No remediation needed — codebase is in excellent shape")

return steps


def generate_report(repo_path: Path, analyzers, exclude_paths=None) -> AuditReport:
"""Run analysis and build a complete audit report."""
from arbiter.__main__ import _run_analysis
from arbiter.git_historian import count_loc
from arbiter.scoring import score_findings

findings = _run_analysis(repo_path, analyzers, exclude_paths=exclude_paths)
loc = count_loc(repo_path)
score = score_findings(findings, loc)

return AuditReport.build(
repo_name=repo_path.name,
score=score,
loc=loc,
findings=findings,
)


def render_html(report: AuditReport, tier: str = "free") -> str:
"""Render audit report as self-contained HTML."""
template_dir = Path(__file__).parent / "templates"
template_file = template_dir / f"report_{tier}.html"
if not template_file.exists():
template_file = template_dir / "report_free.html"

template = template_file.read_text()

# Build substitution values
grade_color = _grade_color(report.score.grade)
lint_bar = _score_bar(report.score.lint_score)
security_bar = _score_bar(report.score.security_score)
complexity_bar = _score_bar(report.score.complexity_score)

severity_summary = ", ".join(
f"{k}: {v}" for k, v in sorted(report.score.findings_by_severity.items())
if v > 0
) or "none"

findings_html = ""
if tier == "paid" and report.top_findings:
rows = []
for f in report.top_findings:
sev_class = f.severity.lower()
rows.append(
f'<tr class="finding-row {sev_class}">'
f'<td class="sev">{f.severity}</td>'
f'<td class="file">{_escape(f.file_path)}:{f.line}</td>'
f'<td class="rule">{_escape(f.rule_id)}</td>'
f'<td class="msg">{_escape(f.message[:80])}</td>'
f'</tr>'
)
findings_html = "\n".join(rows)

remediation_html = ""
if tier == "paid" and report.remediation_steps:
items = [f"<li>{_escape(step)}</li>" for step in report.remediation_steps]
remediation_html = "\n".join(items)

# Substitute
html = template
replacements = {
"{{REPO_NAME}}": _escape(report.repo_name),
"{{AUDIT_DATE}}": report.audit_date,
"{{GRADE}}": report.score.grade,
"{{GRADE_COLOR}}": grade_color,
"{{OVERALL_SCORE}}": f"{report.score.overall:.1f}",
"{{LOC}}": f"{report.loc:,}",
"{{TOTAL_FINDINGS}}": str(report.score.total_findings),
"{{SEVERITY_SUMMARY}}": severity_summary,
"{{LINT_SCORE}}": f"{report.score.lint_score:.1f}",
"{{LINT_BAR}}": lint_bar,
"{{SECURITY_SCORE}}": f"{report.score.security_score:.1f}",
"{{SECURITY_BAR}}": security_bar,
"{{COMPLEXITY_SCORE}}": f"{report.score.complexity_score:.1f}",
"{{COMPLEXITY_BAR}}": complexity_bar,
"{{FILES_AFFECTED}}": str(len(report.findings_by_file)),
"{{FINDINGS_ROWS}}": findings_html,
"{{REMEDIATION_ITEMS}}": remediation_html,
"{{REPORT_JSON}}": report.to_json(),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Escape JSON before embedding it in <script>

report.to_json() is inserted raw into {{REPORT_JSON}}, which is rendered inside <script type="application/json">; if any finding field contains </script> (for example from a crafted filename or analyzer message in an untrusted repo), the browser will terminate the script block early and parse attacker-controlled HTML/JS. This makes the generated report XSS-prone when opened locally or hosted. Please HTML-safe the JSON payload for script embedding (e.g., escape <, >, &, and </script> as Unicode escapes) before substitution.

Useful? React with 👍 / 👎.

}

for key, value in replacements.items():
html = html.replace(key, value)

return html


def render_pdf(report: AuditReport, output_path: Path, tier: str = "paid") -> Path:
"""Render report as PDF. Requires weasyprint."""
html = render_html(report, tier=tier)
try:
from weasyprint import HTML
HTML(string=html).write_pdf(str(output_path))
return output_path
except ImportError:
# Fallback: write HTML and let user print
html_path = output_path.with_suffix(".html")
html_path.write_text(html)
raise RuntimeError(
f"weasyprint not installed. HTML written to {html_path}. "
f"Open in browser and print to PDF, or: pip install weasyprint"
)


def _grade_color(grade: str) -> str:
"""Map grade to CSS color."""
return {
"A": "#00ff88",
"B": "#88ff00",
"C": "#ffcc00",
"D": "#ff6b35",
"F": "#ff3333",
}.get(grade, "#666")


def _score_bar(score: float) -> str:
"""Generate an inline SVG score bar."""
width = max(0, min(100, score))
color = "#00ff88" if score >= 90 else "#ffcc00" if score >= 70 else "#ff6b35" if score >= 50 else "#ff3333"
return (
f'<svg width="200" height="8" style="vertical-align:middle">'
f'<rect width="200" height="8" rx="4" fill="#1a1a1a"/>'
f'<rect width="{width * 2}" height="8" rx="4" fill="{color}"/>'
f'</svg>'
)


def _escape(text: str) -> str:
"""HTML-escape text."""
return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")
Loading
Loading