Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions src/skillspector/llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from typing import Literal

from langchain_core.messages import BaseMessage
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator

from skillspector.llm_utils import get_chat_model
from skillspector.logging_config import get_logger
Expand Down Expand Up @@ -62,12 +62,35 @@ class LLMFinding(BaseModel):
rule_id: str = Field(description="Identifier for the type of finding")
message: str = Field(description="Short description of the finding")
severity: Literal["LOW", "MEDIUM", "HIGH", "CRITICAL"] = Field(description="Severity level")
start_line: int = Field(ge=1, description="Starting line number")
# start_line and confidence carry no ge/le Field bounds on purpose. Pydantic
# bounds emit JSON-schema minimum/maximum, which some OpenAI-compatible
# structured-output / tool-calling endpoints reject when they validate the
# response schema, failing the whole call. The ranges are enforced by the
# validators below instead, so the guarantee holds without those keywords in
# the emitted schema. start_line stays required (no default), so a finding
# with no location is still rejected rather than materialised at line 1;
# only the numeric bound is removed, not the requiredness.
start_line: int = Field(description="Starting line number (>= 1)")
end_line: int | None = Field(default=None, description="Ending line number (optional)")
confidence: float = Field(ge=0.0, le=1.0, default=0.5, description="Confidence score")
confidence: float = Field(default=0.5, description="Confidence score between 0.0 and 1.0")
explanation: str = Field(default="", description="Why this is a finding (2-3 sentences)")
remediation: str = Field(default="", description="Actionable steps to fix the issue")

@field_validator("start_line")
@classmethod
def _clamp_start_line(cls, v: int) -> int:
# Clamp rather than raise: an LLM occasionally returns 0 for a
# whole-file finding, and normalising to the first line is better than
# dropping the finding over an off-by-one.
return v if v >= 1 else 1

@field_validator("confidence")
@classmethod
def _clamp_confidence(cls, v: float) -> float:
# Clamp into [0.0, 1.0] so a slightly out-of-range model value
# normalises instead of failing the structured-output parse.
return min(1.0, max(0.0, v))

def to_finding(self, file: str) -> Finding:
"""Convert to a :class:`Finding` for the graph state."""
return Finding(
Expand Down
12 changes: 11 additions & 1 deletion src/skillspector/nodes/meta_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ class MetaAnalyzerFinding(BaseModel):
description="The end line number from the finding's Location, if available.",
)
is_vulnerability: bool = Field(description="Whether this is a true vulnerability")
confidence: float = Field(ge=0.0, le=1.0, description="Confidence score between 0.0 and 1.0")
# No ge/le bound on purpose: Pydantic bounds emit JSON-schema
# minimum/maximum, which some OpenAI-compatible structured-output endpoints
# reject. The range is enforced by the validator below instead.
confidence: float = Field(description="Confidence score between 0.0 and 1.0")
intent: Literal["malicious", "negligent", "benign"] = Field(
description="Likely intent behind the finding"
)
Expand All @@ -73,6 +76,13 @@ class MetaAnalyzerFinding(BaseModel):
explanation: str = Field(default="", description="Why this is dangerous (2-3 sentences)")
remediation: str = Field(default="", description="How to fix the issue (actionable steps)")

@field_validator("confidence")
@classmethod
def _clamp_confidence(cls, v: float) -> float:
# Clamp into [0.0, 1.0] so a slightly out-of-range model value
# normalises instead of failing the structured-output parse.
return min(1.0, max(0.0, v))


class OverallAssessment(BaseModel):
"""Overall risk assessment for the analyzed file."""
Expand Down
92 changes: 74 additions & 18 deletions tests/nodes/test_llm_analyzer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,15 +566,13 @@ def test_valid_finding(self) -> None:
assert len(result.findings) == 1
assert result.findings[0].confidence == 0.9

def test_confidence_validation(self) -> None:
with pytest.raises(ValueError):
LLMFinding(
rule_id="X",
message="x",
severity="LOW",
start_line=1,
confidence=1.5,
)
def test_confidence_is_clamped(self) -> None:
"""Out-of-range confidence is clamped, not rejected, so a slightly off
model value does not fail the whole structured-output parse."""
hi = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=1.5)
lo = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=-0.3)
assert hi.confidence == 1.0
assert lo.confidence == 0.0

def test_severity_validation(self) -> None:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -660,15 +658,25 @@ def test_valid_finding(self) -> None:
assert len(result.findings) == 1
assert result.findings[0].confidence == 0.9

def test_confidence_validation(self) -> None:
with pytest.raises(ValueError):
MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=1.5,
intent="malicious",
impact="high",
)
def test_confidence_is_clamped(self) -> None:
"""Out-of-range confidence is clamped, not rejected, so a slightly off
model value does not fail the whole structured-output parse."""
high = MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=1.5,
intent="malicious",
impact="high",
)
low = MetaAnalyzerFinding(
pattern_id="E1",
is_vulnerability=True,
confidence=-0.2,
intent="malicious",
impact="high",
)
assert high.confidence == 1.0
assert low.confidence == 0.0

def test_intent_validation(self) -> None:
with pytest.raises(ValueError):
Expand Down Expand Up @@ -719,6 +727,54 @@ def test_model_dump(self) -> None:
assert d["start_line"] is None


class TestStructuredOutputSchema:
"""The response schemas must stay portable across structured-output backends.

Pydantic ge/le bounds emit JSON-schema ``minimum`` / ``maximum``, which some
OpenAI-compatible structured-output / tool-calling endpoints reject when they
validate the response schema. The ranges are enforced by runtime validators
instead, so these keywords must not appear in the emitted schema.
"""

@staticmethod
def _numeric_keywords(schema: dict) -> set[str]:
found: set[str] = set()

def walk(node: object) -> None:
if isinstance(node, dict):
found.update(k for k in ("minimum", "maximum") if k in node)
for value in node.values():
walk(value)
elif isinstance(node, list):
for value in node:
walk(value)

walk(schema)
return found

def test_llm_finding_schema_has_no_numeric_bounds(self) -> None:
assert self._numeric_keywords(LLMFinding.model_json_schema()) == set()

def test_meta_finding_schema_has_no_numeric_bounds(self) -> None:
assert self._numeric_keywords(MetaAnalyzerFinding.model_json_schema()) == set()

def test_llm_finding_clamps_confidence(self) -> None:
hi = LLMFinding(rule_id="R", message="m", severity="LOW", start_line=1, confidence=1.5)
lo = LLMFinding(rule_id="R", message="m", severity="LOW", start_line=1, confidence=-0.3)
assert hi.confidence == 1.0
assert lo.confidence == 0.0

def test_llm_finding_clamps_start_line(self) -> None:
assert LLMFinding(rule_id="R", message="m", severity="LOW", start_line=0).start_line == 1
assert LLMFinding(rule_id="R", message="m", severity="LOW", start_line=42).start_line == 42

def test_llm_finding_start_line_is_required(self) -> None:
"""start_line stays required: a finding with no location is rejected,
not materialised at line 1."""
with pytest.raises(ValueError):
LLMFinding(rule_id="R", message="m", severity="LOW")


# ---------------------------------------------------------------------------
# LLMMetaAnalyzer.get_batches
# ---------------------------------------------------------------------------
Expand Down