diff --git a/docs/redacted_route_review_runbook.md b/docs/redacted_route_review_runbook.md new file mode 100644 index 0000000..9c5a2cd --- /dev/null +++ b/docs/redacted_route_review_runbook.md @@ -0,0 +1,49 @@ +# Redacted Route Quality Review Runbook + +Use this runbook to evaluate route quality without exposing raw prompts or private logs. + +## 1) Collect redacted samples only + +Create a JSONL file from review findings where every line is a **synthetic or redacted** sample: + +- MUST set `redacted: true` +- MUST include non-empty `text` +- MUST include non-empty `expect` as a **route_id** (for example `fast`, `strong`) +- MUST NOT use `target_model` names in `expect` (for example `pro-router` is invalid) + +Optional fields: + +- `source` (added to output as `production_review:`) +- `note` (operator hint) + +## 2) Import with route config validation + +Convert JSONL into eval-case YAML and validate expected routes against config: + +```bash +uv run python scripts/import_review_samples.py \ + --input tests/samples/redacted_review_samples.synthetic.jsonl \ + --output /tmp/redacted_review_cases.yaml \ + --routes config/routes.yaml +``` + +If a sample is not redacted or `expect` is not a configured route_id, import fails. + +## 3) Run review against the decision endpoint + +```bash +uv run python scripts/review_decisions.py \ + --endpoint http://127.0.0.1:8080/v1/semantic-router/decision \ + --cases /tmp/redacted_review_cases.yaml \ + --routes config/routes.yaml +``` + +Use `--output json` for machine-readable audit output. + +## 4) Interpret PASS/FAIL safely + +- `PASS`: selected `route_id` equals expected `route_id` +- `FAIL`: selected `route_id` differs from expected `route_id` +- `ERROR`: decision endpoint failure (network/HTTP/request) + +For audits, store the result table or JSON output, plus the redacted input file and route config revision. Do not attach raw prompts or private logs. diff --git a/tests/samples/redacted_review_samples.synthetic.jsonl b/tests/samples/redacted_review_samples.synthetic.jsonl new file mode 100644 index 0000000..9cb04ae --- /dev/null +++ b/tests/samples/redacted_review_samples.synthetic.jsonl @@ -0,0 +1 @@ +{"text":"[REDACTED] incident triage summary","expect":"strong","redacted":true,"source":"synthetic","note":"operator sanity check"} diff --git a/tests/test_import_review_samples.py b/tests/test_import_review_samples.py index d2a5004..d64e9b0 100644 --- a/tests/test_import_review_samples.py +++ b/tests/test_import_review_samples.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +from pathlib import Path import pytest import yaml @@ -197,3 +198,34 @@ def test_main_invalid_unredacted_input_fails_before_writing(tmp_path, monkeypatc with pytest.raises(ReviewSampleError, match="redacted=true"): import_review_samples.main() assert not output_path.exists() + + +def test_synthetic_redacted_fixture_is_strictly_redacted_and_route_id_based(): + fixture_path = Path("tests/samples/redacted_review_samples.synthetic.jsonl") + lines = fixture_path.read_text(encoding="utf-8").splitlines() + assert lines + + for line in lines: + sample = json.loads(line) + assert sample.get("redacted") is True + assert isinstance(sample.get("text"), str) and sample["text"].strip() + assert isinstance(sample.get("expect"), str) and sample["expect"].strip() + assert sample["expect"] != "pro-router" + + +def test_synthetic_redacted_fixture_converts_with_route_validation(): + fixture_path = Path("tests/samples/redacted_review_samples.synthetic.jsonl") + lines = fixture_path.read_text(encoding="utf-8").splitlines() + + result = convert_review_samples(lines, allowed_route_ids={"fast", "strong"}) + + assert result == { + "cases": [ + { + "text": "[REDACTED] incident triage summary", + "expect": "strong", + "source": "production_review:synthetic", + "note": "operator sanity check", + } + ] + }