Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions scripts/quality/eval_smoke.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,35 @@ python -m quality.pipeline.normalize --input "${INPUT_PATH}" --output "${NORMALI

INPUTS_PATH="${WORK_DIR}/inputs.jsonl"
EXPECTED_PATH="${WORK_DIR}/expected.jsonl"
printf '%s\n' '{"id": "smoke", "output": "normalized"}' >"${INPUTS_PATH}"
printf '%s\n' '{"id": "smoke", "expected": "normalized"}' >"${EXPECTED_PATH}"
python - <<'PY' "${INPUTS_PATH}" "${EXPECTED_PATH}"
import json
import os
import sys

inputs_path = sys.argv[1]
expected_path = sys.argv[2]

metadata_raw = os.environ.get("EVAL_SMOKE_METADATA")
if metadata_raw:
try:
metadata = json.loads(metadata_raw)
except json.JSONDecodeError as exc: # pragma: no cover - defensive guard
raise SystemExit(f"Invalid EVAL_SMOKE_METADATA: {exc}")
else:
metadata = {"task_type": "smoke"}

if not isinstance(metadata, dict):
raise SystemExit("EVAL_SMOKE_METADATA must decode to a JSON object")

inputs_record = {"id": "smoke", "output": "normalized", "metadata": metadata}
expected_record = {"id": "smoke", "expected": "normalized", "metadata": metadata}

with open(inputs_path, "w", encoding="utf-8") as stream:
stream.write(json.dumps(inputs_record, sort_keys=True) + "\n")

with open(expected_path, "w", encoding="utf-8") as stream:
stream.write(json.dumps(expected_record, sort_keys=True) + "\n")
PY

DEFAULT_METRICS_PATH="${PROJECT_ROOT}/scripts/quality/metrics.json"
METRICS_PATH="${EVAL_SMOKE_METRICS_PATH:-${DEFAULT_METRICS_PATH}}"
Expand Down
35 changes: 34 additions & 1 deletion tests/scripts/quality/test_eval_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def parse_args(argv: Iterable[str] | None = None) -> argparse.Namespace:
parser.add_argument("--inputs")
parser.add_argument("--expected")
parser.add_argument("--output")
parser.add_argument("--generated-at")
parser.add_argument("extras", nargs="*")
return parser.parse_args(list(argv) if argv is not None else None)

Expand All @@ -82,13 +83,34 @@ def main(argv: Iterable[str] | None = None) -> int:
if args.output is None:
raise AssertionError("--output must be provided")

with Path(args.inputs).open("r", encoding="utf-8") as stream:
inputs_records = [json.loads(line) for line in stream if line.strip()]
with Path(args.expected).open("r", encoding="utf-8") as stream:
expected_records = [json.loads(line) for line in stream if line.strip()]

if not inputs_records:
raise AssertionError("inputs.jsonl must contain at least one record")
if not expected_records:
raise AssertionError("expected.jsonl must contain at least one record")

input_metadata = inputs_records[0].get("metadata")
expected_metadata = expected_records[0].get("metadata")
if input_metadata is None:
raise AssertionError("inputs metadata must be provided")
if expected_metadata is None:
raise AssertionError("expected metadata must be provided")
if input_metadata != expected_metadata:
raise AssertionError("inputs and expected metadata must match")

record_path = Path(os.environ["EVAL_SMOKE_RECORD_PATH"])
record_path.parent.mkdir(parents=True, exist_ok=True)
with record_path.open("a", encoding="utf-8") as stream:
stream.write(f"ruleset={rules_path}\\n")
stream.write("severities=" + ",".join(severities) + "\\n")
stream.write(f"inputs={args.inputs}\\n")
stream.write(f"expected={args.expected}\\n")
stream.write("inputs_metadata=" + json.dumps(input_metadata, sort_keys=True) + "\\n")
stream.write("expected_metadata=" + json.dumps(expected_metadata, sort_keys=True) + "\\n")
stream.write("bert_score\\n")
stream.write("rouge\\n")

Expand Down Expand Up @@ -209,11 +231,22 @@ def test_eval_smoke_pipeline_invokes_stubs(
lines = record_path.read_text(encoding="utf-8").splitlines()
assert lines[0] == "normalize"
severity_line = next(line for line in lines if line.startswith("severities="))
assert severity_line == "severities=minor,major,critical"
severities = severity_line.split("=", 1)[1].split(",")
assert {"minor", "major", "critical"}.issubset(severities)
inputs_line = next(line for line in lines if line.startswith("inputs="))
expected_line = next(line for line in lines if line.startswith("expected="))
inputs_metadata_line = next(
line for line in lines if line.startswith("inputs_metadata=")
)
expected_metadata_line = next(
line for line in lines if line.startswith("expected_metadata=")
)
assert inputs_line.endswith("inputs.jsonl")
assert expected_line.endswith("expected.jsonl")
inputs_metadata = json.loads(inputs_metadata_line.split("=", 1)[1])
expected_metadata = json.loads(expected_metadata_line.split("=", 1)[1])
assert inputs_metadata == {"task_type": "smoke"}
assert expected_metadata == inputs_metadata
assert "bert_score" in lines
assert "rouge" in lines

Expand Down