diff --git a/src/skillspector/llm_analyzer_base.py b/src/skillspector/llm_analyzer_base.py index aa3e7e9..b6be791 100644 --- a/src/skillspector/llm_analyzer_base.py +++ b/src/skillspector/llm_analyzer_base.py @@ -33,7 +33,7 @@ from typing import Literal from langchain_core.messages import BaseMessage -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, field_validator from skillspector.llm_utils import get_chat_model from skillspector.logging_config import get_logger @@ -62,12 +62,35 @@ class LLMFinding(BaseModel): rule_id: str = Field(description="Identifier for the type of finding") message: str = Field(description="Short description of the finding") severity: Literal["LOW", "MEDIUM", "HIGH", "CRITICAL"] = Field(description="Severity level") - start_line: int = Field(ge=1, description="Starting line number") + # start_line and confidence carry no ge/le Field bounds on purpose. Pydantic + # bounds emit JSON-schema minimum/maximum, which some OpenAI-compatible + # structured-output / tool-calling endpoints reject when they validate the + # response schema, failing the whole call. The ranges are enforced by the + # validators below instead, so the guarantee holds without those keywords in + # the emitted schema. start_line stays required (no default), so a finding + # with no location is still rejected rather than materialised at line 1; + # only the numeric bound is removed, not the requiredness. + start_line: int = Field(description="Starting line number (>= 1)") end_line: int | None = Field(default=None, description="Ending line number (optional)") - confidence: float = Field(ge=0.0, le=1.0, default=0.5, description="Confidence score") + confidence: float = Field(default=0.5, description="Confidence score between 0.0 and 1.0") explanation: str = Field(default="", description="Why this is a finding (2-3 sentences)") remediation: str = Field(default="", description="Actionable steps to fix the issue") + @field_validator("start_line") + @classmethod + def _clamp_start_line(cls, v: int) -> int: + # Clamp rather than raise: an LLM occasionally returns 0 for a + # whole-file finding, and normalising to the first line is better than + # dropping the finding over an off-by-one. + return v if v >= 1 else 1 + + @field_validator("confidence") + @classmethod + def _clamp_confidence(cls, v: float) -> float: + # Clamp into [0.0, 1.0] so a slightly out-of-range model value + # normalises instead of failing the structured-output parse. + return min(1.0, max(0.0, v)) + def to_finding(self, file: str) -> Finding: """Convert to a :class:`Finding` for the graph state.""" return Finding( diff --git a/src/skillspector/nodes/meta_analyzer.py b/src/skillspector/nodes/meta_analyzer.py index 8f2b541..c1ae287 100644 --- a/src/skillspector/nodes/meta_analyzer.py +++ b/src/skillspector/nodes/meta_analyzer.py @@ -63,7 +63,10 @@ class MetaAnalyzerFinding(BaseModel): description="The end line number from the finding's Location, if available.", ) is_vulnerability: bool = Field(description="Whether this is a true vulnerability") - confidence: float = Field(ge=0.0, le=1.0, description="Confidence score between 0.0 and 1.0") + # No ge/le bound on purpose: Pydantic bounds emit JSON-schema + # minimum/maximum, which some OpenAI-compatible structured-output endpoints + # reject. The range is enforced by the validator below instead. + confidence: float = Field(description="Confidence score between 0.0 and 1.0") intent: Literal["malicious", "negligent", "benign"] = Field( description="Likely intent behind the finding" ) @@ -73,6 +76,13 @@ class MetaAnalyzerFinding(BaseModel): explanation: str = Field(default="", description="Why this is dangerous (2-3 sentences)") remediation: str = Field(default="", description="How to fix the issue (actionable steps)") + @field_validator("confidence") + @classmethod + def _clamp_confidence(cls, v: float) -> float: + # Clamp into [0.0, 1.0] so a slightly out-of-range model value + # normalises instead of failing the structured-output parse. + return min(1.0, max(0.0, v)) + class OverallAssessment(BaseModel): """Overall risk assessment for the analyzed file.""" diff --git a/tests/nodes/test_llm_analyzer_base.py b/tests/nodes/test_llm_analyzer_base.py index c1fabca..5b42db1 100644 --- a/tests/nodes/test_llm_analyzer_base.py +++ b/tests/nodes/test_llm_analyzer_base.py @@ -566,15 +566,13 @@ def test_valid_finding(self) -> None: assert len(result.findings) == 1 assert result.findings[0].confidence == 0.9 - def test_confidence_validation(self) -> None: - with pytest.raises(ValueError): - LLMFinding( - rule_id="X", - message="x", - severity="LOW", - start_line=1, - confidence=1.5, - ) + def test_confidence_is_clamped(self) -> None: + """Out-of-range confidence is clamped, not rejected, so a slightly off + model value does not fail the whole structured-output parse.""" + hi = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=1.5) + lo = LLMFinding(rule_id="X", message="x", severity="LOW", start_line=1, confidence=-0.3) + assert hi.confidence == 1.0 + assert lo.confidence == 0.0 def test_severity_validation(self) -> None: with pytest.raises(ValueError): @@ -660,15 +658,25 @@ def test_valid_finding(self) -> None: assert len(result.findings) == 1 assert result.findings[0].confidence == 0.9 - def test_confidence_validation(self) -> None: - with pytest.raises(ValueError): - MetaAnalyzerFinding( - pattern_id="E1", - is_vulnerability=True, - confidence=1.5, - intent="malicious", - impact="high", - ) + def test_confidence_is_clamped(self) -> None: + """Out-of-range confidence is clamped, not rejected, so a slightly off + model value does not fail the whole structured-output parse.""" + high = MetaAnalyzerFinding( + pattern_id="E1", + is_vulnerability=True, + confidence=1.5, + intent="malicious", + impact="high", + ) + low = MetaAnalyzerFinding( + pattern_id="E1", + is_vulnerability=True, + confidence=-0.2, + intent="malicious", + impact="high", + ) + assert high.confidence == 1.0 + assert low.confidence == 0.0 def test_intent_validation(self) -> None: with pytest.raises(ValueError): @@ -719,6 +727,54 @@ def test_model_dump(self) -> None: assert d["start_line"] is None +class TestStructuredOutputSchema: + """The response schemas must stay portable across structured-output backends. + + Pydantic ge/le bounds emit JSON-schema ``minimum`` / ``maximum``, which some + OpenAI-compatible structured-output / tool-calling endpoints reject when they + validate the response schema. The ranges are enforced by runtime validators + instead, so these keywords must not appear in the emitted schema. + """ + + @staticmethod + def _numeric_keywords(schema: dict) -> set[str]: + found: set[str] = set() + + def walk(node: object) -> None: + if isinstance(node, dict): + found.update(k for k in ("minimum", "maximum") if k in node) + for value in node.values(): + walk(value) + elif isinstance(node, list): + for value in node: + walk(value) + + walk(schema) + return found + + def test_llm_finding_schema_has_no_numeric_bounds(self) -> None: + assert self._numeric_keywords(LLMFinding.model_json_schema()) == set() + + def test_meta_finding_schema_has_no_numeric_bounds(self) -> None: + assert self._numeric_keywords(MetaAnalyzerFinding.model_json_schema()) == set() + + def test_llm_finding_clamps_confidence(self) -> None: + hi = LLMFinding(rule_id="R", message="m", severity="LOW", start_line=1, confidence=1.5) + lo = LLMFinding(rule_id="R", message="m", severity="LOW", start_line=1, confidence=-0.3) + assert hi.confidence == 1.0 + assert lo.confidence == 0.0 + + def test_llm_finding_clamps_start_line(self) -> None: + assert LLMFinding(rule_id="R", message="m", severity="LOW", start_line=0).start_line == 1 + assert LLMFinding(rule_id="R", message="m", severity="LOW", start_line=42).start_line == 42 + + def test_llm_finding_start_line_is_required(self) -> None: + """start_line stays required: a finding with no location is rejected, + not materialised at line 1.""" + with pytest.raises(ValueError): + LLMFinding(rule_id="R", message="m", severity="LOW") + + # --------------------------------------------------------------------------- # LLMMetaAnalyzer.get_batches # ---------------------------------------------------------------------------