-
{len(findings)}
+
{len(payload['findings'])}
{severity_counts['CRITICAL']}
{severity_counts['HIGH']}
{len(payload['rows'])}
@@ -976,13 +996,58 @@ def generate_csv_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> st
)
return output.getvalue()
+ @classmethod
+ def _extract_sarif_rule_id(cls, finding: Dict[str, Any]) -> str:
+ """Extract a stable, deterministic rule ID for SARIF."""
+ raw_rule_id = None
+
+ cve = finding.get("cve")
+ if cve and isinstance(cve, str) and cve.strip():
+ raw_rule_id = cve.strip()
+
+ if not raw_rule_id:
+ cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe")
+ if cwe and isinstance(cwe, str) and cwe.strip():
+ raw_rule_id = cwe.strip()
+
+ if not raw_rule_id:
+ for key in ["check_id", "plugin_rule_id", "rule_id", "id"]:
+ val = finding.get(key) or finding.get("metadata", {}).get(key)
+ if val and isinstance(val, str) and val.strip():
+ raw_rule_id = val.strip()
+ break
+
+ if not raw_rule_id:
+ raw_rule_id = finding.get("title") or "security-finding"
+
+ rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower()
+ rule_id = re.sub(r"-+", "-", rule_id).strip("-")
+ return rule_id if rule_id else "security-finding"
+
+ @classmethod
+ def _extract_sarif_locations(cls, finding: Dict[str, Any], default_target: str) -> List[Dict[str, Any]]:
+ """Extract location data for a SARIF finding."""
+ target = finding.get("target") or default_target
+ if not target:
+ return []
+
+ is_url = "://" in target or target.startswith(("http://", "https://"))
+ location = {"physicalLocation": {"artifactLocation": {"uri": target}}}
+
+ if not is_url and ":" in target:
+ parts = target.split(":")
+ if parts[-1].isdigit():
+ location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1])
+ location["physicalLocation"]["region"] = {"startLine": int(parts[-1])}
+
+ return [location]
+
@classmethod
def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) -> str:
"""Generate a SARIF v2.1.0 report for GitHub Code Scanning."""
payload = cls._build_report_payload(task, result)
tool_name = payload["tool_name"]
- # Define severity mapping to SARIF levels
severity_map = {
"CRITICAL": "error",
"HIGH": "error",
@@ -996,37 +1061,7 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) ->
results = []
for finding in payload["findings"]:
- # Derive a stable, deterministic rule ID from finding-specific identifiers
- raw_rule_id = None
-
- # 1. Check CVE
- cve = finding.get("cve")
- if cve and isinstance(cve, str) and cve.strip():
- raw_rule_id = cve.strip()
-
- # 2. Check CWE (direct or in metadata)
- if not raw_rule_id:
- cwe = finding.get("cwe") or finding.get("metadata", {}).get("cwe")
- if cwe and isinstance(cwe, str) and cwe.strip():
- raw_rule_id = cwe.strip()
-
- # 3. Check specific check/plugin/finding identifiers
- if not raw_rule_id:
- for key in ["check_id", "plugin_rule_id", "rule_id", "id"]:
- val = finding.get(key) or finding.get("metadata", {}).get(key)
- if val and isinstance(val, str) and val.strip():
- raw_rule_id = val.strip()
- break
-
- # 4. Fallback to sanitized title
- if not raw_rule_id:
- raw_rule_id = finding.get("title") or "security-finding"
-
- # Sanitize raw rule ID (lowercase, replace non-alphanumeric with hyphens)
- rule_id = re.sub(r"[^a-zA-Z0-9\-]", "-", raw_rule_id).lower()
- rule_id = re.sub(r"-+", "-", rule_id).strip("-")
- if not rule_id:
- rule_id = "security-finding"
+ rule_id = cls._extract_sarif_rule_id(finding)
if rule_id not in rule_indices:
rule_indices[rule_id] = len(rules)
@@ -1054,34 +1089,8 @@ def generate_sarif_report(cls, task: Dict[str, Any], result: Dict[str, Any]) ->
"text": finding.get("description", "Security finding detected")
},
"level": severity_map.get(finding["severity"], "note"),
- "locations": []
+ "locations": cls._extract_sarif_locations(finding, payload["target"])
}
-
- # Attempt to extract location if available
- target = finding.get("target") or payload["target"]
- # Check if target looks like a file path or URI
- if target:
- is_url = "://" in target or target.startswith(("http://", "https://"))
-
- location = {
- "physicalLocation": {
- "artifactLocation": {
- "uri": target
- }
- }
- }
-
- # If target has a line number like file.py:123 and is NOT a web URL
- if not is_url and ":" in target:
- parts = target.split(":")
- if parts[-1].isdigit():
- location["physicalLocation"]["artifactLocation"]["uri"] = ":".join(parts[:-1])
- location["physicalLocation"]["region"] = {
- "startLine": int(parts[-1])
- }
-
- sarif_result["locations"].append(location)
-
results.append(sarif_result)
sarif_output = {