diff --git a/backend/secuscan/reporting.py b/backend/secuscan/reporting.py index fb2e8987..084cf827 100644 --- a/backend/secuscan/reporting.py +++ b/backend/secuscan/reporting.py @@ -283,18 +283,44 @@ def _format_timestamp(value: str) -> str: continue return value + @classmethod + def _build_pdf_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: + evidence_html = f"

Evidence

{cls._escape_html(finding['proof'])}
" if finding.get("proof") else "" + remediation_html = f"

Recommended action

{cls._escape_html(finding['remediation'])}

" if finding.get("remediation") else "" + cve_html = f"

CVE: {cls._escape_html(finding['cve'])}

" if finding.get("cve") else "" + + return f""" +
+ + + + + +
{cls._escape_html(finding['severity'])} +

{cls._escape_html(finding['title'])}

+

{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target, " ")}

+
+

Description

+

{cls._escape_html(finding['description'])}

+ {evidence_html} + {remediation_html} + {cve_html} +
+ """ + @classmethod def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate conservative HTML/CSS that xhtml2pdf can paginate reliably.""" payload = cls._build_report_payload(task, result) - findings = payload["findings"] severity_counts = payload["severity_counts"] + shield_icon = cls._icon_data_uri("shield", "1e3a5f") target_icon = cls._icon_data_uri("target", "2563eb") findings_icon = cls._icon_data_uri("findings", "0f172a") critical_icon = cls._icon_data_uri("critical", "991b1b") rows_icon = cls._icon_data_uri("rows", "2563eb") clock_icon = cls._icon_data_uri("clock", "475569") + target_html = cls._escape_html_with_breaks(payload["target"], " ") summary_markup = "".join( @@ -304,26 +330,10 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) f"{cls._escape_html(item['value'])}" for item in payload["scan_parameters"] ) + finding_markup = "".join( - f""" -
- - - - - -
{cls._escape_html(finding['severity'])} -

{cls._escape_html(finding['title'])}

-

{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or payload['target'], " ")}

-
-

Description

-

{cls._escape_html(finding['description'])}

- {f"

Evidence

{cls._escape_html(finding['proof'])}
" if finding['proof'] else ""} - {f"

Recommended action

{cls._escape_html(finding['remediation'])}

" if finding['remediation'] else ""} - {f"

CVE: {cls._escape_html(finding['cve'])}

" if finding['cve'] else ""} -
- """ - for finding in findings + cls._build_pdf_finding_markup(finding, payload["target"], critical_icon) + for finding in payload["findings"] ) if not finding_markup: @@ -546,7 +556,7 @@ def _generate_pdf_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) - + @@ -589,18 +599,46 @@ def generate_pdf_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> by raise RuntimeError("Failed to render SecuScan HTML report as PDF") return output.getvalue() + @classmethod + def _build_web_finding_markup(cls, finding: Dict[str, Any], target: str, critical_icon: str) -> str: + evidence_html = f"

Evidence

{cls._escape_html(finding['proof'])}
" if finding.get("proof") else "" + remediation_html = f"

Recommended action

{cls._escape_html(finding['remediation'])}

" if finding.get("remediation") else "" + cve_html = f"
CVE: {cls._escape_html(finding['cve'])}
" if finding.get("cve") else "" + + return f""" +
+
+ {cls._escape_html(finding['severity'])} +
+

{cls._escape_html(finding['title'])}

+

{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or target)}

+
+
+
+
+

Description

+

{cls._escape_html(finding['description'])}

+
+ {evidence_html} + {remediation_html} + {cve_html} +
+
+ """ + @classmethod def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a modern HTML report suitable for direct download.""" payload = cls._build_report_payload(task, result) - findings = payload["findings"] severity_counts = payload["severity_counts"] + shield_icon = cls._icon_data_uri("shield", "1e3a5f") target_icon = cls._icon_data_uri("target", "2563eb") findings_icon = cls._icon_data_uri("findings", "0f172a") critical_icon = cls._icon_data_uri("critical", "991b1b") rows_icon = cls._icon_data_uri("rows", "2563eb") clock_icon = cls._icon_data_uri("clock", "475569") + target_html = cls._escape_html_with_breaks(payload["target"]) summary_markup = "".join( @@ -610,28 +648,10 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s f"
{cls._escape_html(item['value'])}
" for item in payload["scan_parameters"] ) + finding_markup = "".join( - f""" -
-
- {cls._escape_html(finding['severity'])} -
-

{cls._escape_html(finding['title'])}

-

{cls._escape_html(finding['category'])} | {cls._escape_html_with_breaks(finding['target'] or payload['target'])}

-
-
-
-
-

Description

-

{cls._escape_html(finding['description'])}

-
- {f"

Evidence

{cls._escape_html(finding['proof'])}
" if finding['proof'] else ""} - {f"

Recommended action

{cls._escape_html(finding['remediation'])}

" if finding['remediation'] else ""} - {f"
CVE: {cls._escape_html(finding['cve'])}
" if finding['cve'] else ""} -
-
- """ - for finding in findings + cls._build_web_finding_markup(finding, payload["target"], critical_icon) + for finding in payload["findings"] ) if not finding_markup: @@ -914,7 +934,7 @@ def generate_html_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> s
-
{len(findings)}
+
{len(payload['findings'])}
{severity_counts['CRITICAL']}
{severity_counts['HIGH']}
{len(payload['rows'])}
@@ -976,13 +996,58 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st ) return output.getvalue() + @classmethod + def _extract_sarif_rule_id(cls, finding: Dict[str, Any]) -> str: + """Extract a stable, deterministic rule ID for SARIF.""" + raw_rule_id = None + + cve = finding.get("cve") + if cve and isinstance(cve, str) and cve.strip(): + raw_rule_id = cve.strip() + + if not raw_rule_id: + cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe") + if cwe and isinstance(cwe, str) and cwe.strip(): + raw_rule_id = cwe.strip() + + if not raw_rule_id: + for key in ["check_id", "plugin_rule_id", "rule_id", "id"]: + val = finding.get(key) or finding.get("metadata", {}).get(key) + if val and isinstance(val, str) and val.strip(): + raw_rule_id = val.strip() + break + + if not raw_rule_id: + raw_rule_id = finding.get("title") or "security-finding" + + rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower() + rule_id = re.sub(r"-+", "-", rule_id).strip("-") + return rule_id if rule_id else "security-finding" + + @classmethod + def _extract_sarif_locations(cls, finding: Dict[str, Any], default_target: str) -> List[Dict[str, Any]]: + """Extract location data for a SARIF finding.""" + target = finding.get("target") or default_target + if not target: + return [] + + is_url = "://" in target or target.startswith(("http://", "https://")) + location = {"physicalLocation": {"artifactLocation": {"uri": target}}} + + if not is_url and ":" in target: + parts = target.split(":") + if parts[-1].isdigit(): + location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1]) + location["physicalLocation"]["region"] = {"startLine": int(parts[-1])} + + return [location] + @classmethod def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str: """Generate a SARIF v2.1.0 report for GitHub Code Scanning.""" payload = cls._build_report_payload(task, result) tool_name = payload["tool_name"] - # Define severity mapping to SARIF levels severity_map = { "CRITICAL": "error", "HIGH": "error", @@ -996,37 +1061,7 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> results = [] for finding in payload["findings"]: - # Derive a stable, deterministic rule ID from finding-specific identifiers - raw_rule_id = None - - # 1. Check CVE - cve = finding.get("cve") - if cve and isinstance(cve, str) and cve.strip(): - raw_rule_id = cve.strip() - - # 2. Check CWE (direct or in metadata) - if not raw_rule_id: - cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe") - if cwe and isinstance(cwe, str) and cwe.strip(): - raw_rule_id = cwe.strip() - - # 3. Check specific check/plugin/finding identifiers - if not raw_rule_id: - for key in ["check_id", "plugin_rule_id", "rule_id", "id"]: - val = finding.get(key) or finding.get("metadata", {}).get(key) - if val and isinstance(val, str) and val.strip(): - raw_rule_id = val.strip() - break - - # 4. Fallback to sanitized title - if not raw_rule_id: - raw_rule_id = finding.get("title") or "security-finding" - - # Sanitize raw rule ID (lowercase, replace non-alphanumeric with hyphens) - rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower() - rule_id = re.sub(r"-+", "-", rule_id).strip("-") - if not rule_id: - rule_id = "security-finding" + rule_id = cls._extract_sarif_rule_id(finding) if rule_id not in rule_indices: rule_indices[rule_id] = len(rules) @@ -1054,34 +1089,8 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> "text": finding.get("description", "Security finding detected") }, "level": severity_map.get(finding["severity"], "note"), - "locations": [] + "locations": cls._extract_sarif_locations(finding, payload["target"]) } - - # Attempt to extract location if available - target = finding.get("target") or payload["target"] - # Check if target looks like a file path or URI - if target: - is_url = "://" in target or target.startswith(("http://", "https://")) - - location = { - "physicalLocation": { - "artifactLocation": { - "uri": target - } - } - } - - # If target has a line number like file.py:123 and is NOT a web URL - if not is_url and ":" in target: - parts = target.split(":") - if parts[-1].isdigit(): - location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1]) - location["physicalLocation"]["region"] = { - "startLine": int(parts[-1]) - } - - sarif_result["locations"].append(location) - results.append(sarif_result) sarif_output = {
{len(findings)}{len(payload['findings'])} {severity_counts['CRITICAL']} {severity_counts['HIGH']} {len(payload['rows'])}