From 0b429cfa583ddf74f611e6f93155f573c0052d8e Mon Sep 17 00:00:00 2001 From: zp6 <373669493@qq.com> Date: Sat, 16 May 2026 00:38:59 +0800 Subject: [PATCH 1/2] Improve install docs with macOS, Linux, and Windows paths --- README.md | 62 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index e6e2b50..94e1684 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,7 @@ More precisely: So if you are looking for a practical Opus 4.7 workaround, guardrail, or mitigation for stale factual answers, this repo is aimed directly at that problem. + ## Install ### 1. Clone the repo @@ -164,26 +165,73 @@ git clone https://github.com//claude-code-fact-verification-hook. This repo assumes you will reference the hook scripts by absolute path from your Claude Code settings. +Recommended stable locations: + +| Platform | Path | +|----------|------| +| Windows | `C:\Users\\.claude\hooks\fact-verification\` | +| macOS | `/Users//.claude/hooks/fact-verification/` | +| Linux | `/home//.claude/hooks/fact-verification/` | + ### 3. Optional: create a config file -Copy: +Copy `config.example.json` to one of: -- `config.example.json` +| Platform | Config location | +|----------|-----------------| +| Windows | `%APPDATA%\claude-code-fact-verification\config.json` | +| macOS | `~/.config/claude-code-fact-verification/config.json` | +| Linux | `~/.config/claude-code-fact-verification/config.json` | -Then set: +Then set the environment variable: -- `FACT_VERIFICATION_CONFIG_PATH` +```bash +# macOS / Linux +export FACT_VERIFICATION_CONFIG_PATH=~/.config/claude-code-fact-verification/config.json -if you want custom thresholds or MCP regex patterns. +# Windows (PowerShell) +$env:FACT_VERIFICATION_CONFIG_PATH = "$env:APPDATA\claude-code-fact-verification\config.json" +``` ### 4. Register the hooks -Use `settings.example.json` as a starting point and replace `` with your real absolute path. +Claude Code settings live in: + +| Platform | Settings file | +|----------|--------------| +| Windows | `%APPDATA%\claude-code\settings.json` | +| macOS | `~/Library/Application Support/claude-code/settings.json` | +| Linux | `~/.config/claude-code/settings.json` | + +Use `settings.example.json` as a starting point. Replace `` with the absolute path where you cloned this repo. + +**macOS / Linux example:** + +```json +{ + "hooks": { + "UserPromptSubmit": [{"command": "python3 /Users/you/.claude/hooks/fact-verification/hooks/fact_prompt_gate.py"}], + "PostToolUse": [{"command": "python3 /Users/you/.claude/hooks/fact-verification/hooks/track_verification.py"}], + "Stop": [{"command": "python3 /Users/you/.claude/hooks/fact-verification/hooks/verification_stop_gate.py"}] + } +} +``` + +**Windows example:** + +```json +{ + "hooks": { + "UserPromptSubmit": [{"command": "python C:\\Users\\you\\.claude\\hooks\\fact-verification\\hooks\\fact_prompt_gate.py"}], + "PostToolUse": [{"command": "python C:\\Users\\you\\.claude\\hooks\\fact-verification\\hooks\\track_verification.py"}], + "Stop": [{"command": "python C:\\Users\\you\\.claude\\hooks\\fact-verification\\hooks\\verification_stop_gate.py"}] + } +} +``` ### 5. Restart Claude Code Hook registration changes are safest after a restart or a fresh session. - ## Quick smoke test ```bash From f0a5a1d9f7da7e10127e9e35661ba88ed59ee5c4 Mon Sep 17 00:00:00 2001 From: zp6 <373669493@qq.com> Date: Sat, 16 May 2026 00:40:57 +0800 Subject: [PATCH 2/2] Expand smoke tests for transcript payload variants --- scripts/smoke_test.py | 203 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 186 insertions(+), 17 deletions(-) diff --git a/scripts/smoke_test.py b/scripts/smoke_test.py index 364a418..f8ca3f4 100644 --- a/scripts/smoke_test.py +++ b/scripts/smoke_test.py @@ -1,5 +1,7 @@ """ Local smoke test for the standalone hook set. + +Includes expanded tests for transcript payload variants as requested in #2. """ from __future__ import annotations @@ -39,10 +41,45 @@ def expect(name: str, result: dict, predicate, failures: list[str]) -> None: failures.append(f"{name} failed: {json.dumps(result, ensure_ascii=True)}") +def fresh_session(session_id: str) -> None: + """Reset state for a fresh session.""" + state = TEMP / f"fact-verification-{session_id}" + if state.exists(): + shutil.rmtree(state) + state.mkdir(parents=True, exist_ok=True) + + +def run_gate(session_id: str, prompt: str) -> dict: + """Run fact_prompt_gate for a session.""" + return run("fact_prompt_gate.py", {"session_id": session_id, "prompt": prompt}) + + +def run_track(session_id: str, tool_name: str, tool_input: dict) -> dict: + """Run track_verification for a session.""" + return run("track_verification.py", { + "session_id": session_id, + "tool_name": tool_name, + "tool_input": tool_input, + }) + + +def run_stop(session_id: str, msg: str, **extra) -> dict: + """Run verification_stop_gate for a session.""" + payload = {"session_id": session_id, "last_assistant_message": msg} + payload.update(extra) + return run("verification_stop_gate.py", payload) + + +def write_transcript(path: Path, lines: list[dict]) -> None: + """Write a JSONL transcript file.""" + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + for line in lines: + f.write(json.dumps(line) + "\n") + + def main() -> None: - if STATE.exists(): - shutil.rmtree(STATE) - STATE.mkdir(parents=True, exist_ok=True) + fresh_session(SESSION) results = [] prompt_payload = { @@ -92,9 +129,7 @@ def main() -> None: results.append(("stop_blocks_unstructured_after_verification", run("verification_stop_gate.py", unverified_stop))) results.append(("stop_allows_structured_verified", run("verification_stop_gate.py", structured_stop))) - if STATE.exists(): - shutil.rmtree(STATE) - STATE.mkdir(parents=True, exist_ok=True) + fresh_session(SESSION) run("fact_prompt_gate.py", prompt_payload) caveated_stop = { "session_id": SESSION, @@ -102,9 +137,7 @@ def main() -> None: } results.append(("stop_blocks_caveat_without_attempt", run("verification_stop_gate.py", caveated_stop))) - if STATE.exists(): - shutil.rmtree(STATE) - STATE.mkdir(parents=True, exist_ok=True) + fresh_session(SESSION) run("fact_prompt_gate.py", prompt_payload) searched = { "session_id": SESSION, @@ -114,9 +147,7 @@ def main() -> None: results.append(("track_web_search", run("track_verification.py", searched))) results.append(("stop_allows_websearch_verified", run("verification_stop_gate.py", structured_stop))) - if STATE.exists(): - shutil.rmtree(STATE) - STATE.mkdir(parents=True, exist_ok=True) + fresh_session(SESSION) run("fact_prompt_gate.py", narrative_prompt_payload) missing_message_stop = { "session_id": SESSION, @@ -149,24 +180,162 @@ def main() -> None: ) ) + # ================================================================ + # Expanded tests: transcript payload variants (#2) + # ================================================================ + + S2 = f"{SESSION}-transcript" + + # Test: transcript with assistant content in nested "message" dict + fresh_session(S2) + run_gate(S2, "What is the current price of Bitcoin today?") + run_track(S2, "WebSearch", {"query": "bitcoin price"}) + transcript_path = TEMP / f"fact-verification-{S2}" / "transcript.jsonl" + write_transcript(transcript_path, [ + {"role": "user", "content": "What is the current price of Bitcoin today?"}, + {"role": "assistant", "content": {"type": "text", "text": "Let me check that for you."}}, + {"role": "assistant", "content": [ + {"type": "text", "text": "Bottom line: BTC is around $67,000.\nVerified facts:\n- Coingecko reports ~$67K\nSources:\n- [CoinGecko](https://coingecko.com)"} + ]}, + ]) + transcript_stop = { + "session_id": S2, + "transcript_path": str(transcript_path), + } + results.append(("transcript_nested_content_list", run("verification_stop_gate.py", transcript_stop))) + + # Test: clarifying question detection via transcript + fresh_session(S2) + run_gate(S2, "What is the latest Node.js LTS version as of today?") + transcript_path2 = TEMP / f"fact-verification-{S2}" / "transcript2.jsonl" + write_transcript(transcript_path2, [ + {"role": "user", "content": "What is the latest Node.js LTS version as of today?"}, + {"role": "assistant", "content": "Could you clarify whether you mean the Active LTS or the Maintenance LTS version?"}, + ]) + transcript_clarifying = { + "session_id": S2, + "transcript_path": str(transcript_path2), + } + results.append(("transcript_clarifying_question", run("verification_stop_gate.py", transcript_clarifying))) + + # Test: caveat detection edge case - "best-effort" wording + fresh_session(S2) + run_gate(S2, "What are the current top 10 movies on Netflix?") + transcript_path3 = TEMP / f"fact-verification-{S2}" / "transcript3.jsonl" + write_transcript(transcript_path3, [ + {"role": "user", "content": "What are the current top 10 movies on Netflix?"}, + {"role": "assistant", "content": "This is a best-effort answer and may not reflect the current moment."}, + ]) + transcript_caveat = { + "session_id": S2, + "transcript_path": str(transcript_path3), + } + results.append(("transcript_best_effort_caveat", run("verification_stop_gate.py", transcript_caveat))) + + # Test: empty transcript lines should fail open, not crash + fresh_session(S2) + run_gate(S2, "Who won the last Super Bowl?") + transcript_path4 = TEMP / f"fact-verification-{S2}" / "transcript4.jsonl" + write_transcript(transcript_path4, [ + "", + "not json", + "", + ]) + transcript_empty = { + "session_id": S2, + "transcript_path": str(transcript_path4), + } + results.append(("transcript_empty_malformed_lines", run("verification_stop_gate.py", transcript_empty))) + + # Test: transcript with assistant content nested under "message" key + fresh_session(S2) + run_gate(S2, "What is the latest Python version as of today?") + run_track(S2, "WebSearch", {"query": "latest python version"}) + transcript_path5 = TEMP / f"fact-verification-{S2}" / "transcript5.jsonl" + write_transcript(transcript_path5, [ + {"role": "user", "message": {"content": "What is the latest Python version as of today?"}}, + {"role": "assistant", "message": {"content": "Bottom line: Python 3.13 is the latest.\nVerified facts:\n- python.org shows 3.13\nSources:\n- [Python.org](https://python.org)"}}, + ]) + transcript_msg_nested = { + "session_id": S2, + "transcript_path": str(transcript_path5), + } + results.append(("transcript_message_nested", run("verification_stop_gate.py", transcript_msg_nested))) + + # Test: "unable to verify" caveat via transcript + fresh_session(S2) + run_gate(S2, "What is the GDP of Tuvalu in 2026?") + transcript_path6 = TEMP / f"fact-verification-{S2}" / "transcript6.jsonl" + write_transcript(transcript_path6, [ + {"role": "user", "content": "What is the GDP of Tuvalu in 2026?"}, + {"role": "assistant", "content": "I was unable to verify this figure from authoritative sources. Treat as provisional."}, + ]) + transcript_unable = { + "session_id": S2, + "transcript_path": str(transcript_path6), + } + results.append(("transcript_unable_to_verify", run("verification_stop_gate.py", transcript_unable))) + + # Test: transcript with only user messages (no assistant message) + fresh_session(S2) + run_gate(S2, "What is the current population of Tokyo?") + transcript_path7 = TEMP / f"fact-verification-{S2}" / "transcript7.jsonl" + write_transcript(transcript_path7, [ + {"role": "user", "content": "What is the current population of Tokyo?"}, + ]) + transcript_no_assistant = { + "session_id": S2, + "transcript_path": str(transcript_path7), + } + results.append(("transcript_no_assistant_message", run("verification_stop_gate.py", transcript_no_assistant))) + + # Test: transcript path that does not exist + fresh_session(S2) + run_gate(S2, "What is the current price of Ethereum?") + transcript_missing = { + "session_id": S2, + "transcript_path": "/nonexistent/path/transcript.jsonl", + } + results.append(("transcript_missing_file", run("verification_stop_gate.py", transcript_missing))) + + # Cleanup + for sid in [SESSION, S2]: + state = TEMP / f"fact-verification-{sid}" + if state.exists(): + shutil.rmtree(state) + + # ================================================================ + # Assertions + # ================================================================ + result_map = dict(results) expect("prompt_gate", result_map["prompt_gate"], lambda item: bool(item["stdout"]), failures) expect("narrative_prompt_gate", result_map["narrative_prompt_gate"], lambda item: bool(item["stdout"]), failures) expect("declarative_comparative_gate", result_map["declarative_comparative_gate"], lambda item: bool(item["stdout"]), failures) - expect("stop_blocks_unverified", result_map["stop_blocks_unverified"], lambda item: "\"decision\": \"block\"" in item["stdout"], failures) + expect("stop_blocks_unverified", result_map["stop_blocks_unverified"], lambda item: '"decision": "block"' in item["stdout"], failures) expect("track_read", result_map["track_read"], lambda item: item["code"] == 0, failures) - expect("stop_blocks_unstructured_after_verification", result_map["stop_blocks_unstructured_after_verification"], lambda item: "\"decision\": \"block\"" in item["stdout"], failures) + expect("stop_blocks_unstructured_after_verification", result_map["stop_blocks_unstructured_after_verification"], lambda item: '"decision": "block"' in item["stdout"], failures) expect("stop_allows_structured_verified", result_map["stop_allows_structured_verified"], lambda item: item["code"] == 0 and not item["stdout"], failures) - expect("stop_blocks_caveat_without_attempt", result_map["stop_blocks_caveat_without_attempt"], lambda item: "\"decision\": \"block\"" in item["stdout"], failures) + expect("stop_blocks_caveat_without_attempt", result_map["stop_blocks_caveat_without_attempt"], lambda item: '"decision": "block"' in item["stdout"], failures) expect("track_web_search", result_map["track_web_search"], lambda item: item["code"] == 0, failures) expect("stop_allows_websearch_verified", result_map["stop_allows_websearch_verified"], lambda item: item["code"] == 0 and not item["stdout"], failures) - expect("stop_blocks_missing_message", result_map["stop_blocks_missing_message"], lambda item: "\"decision\": \"block\"" in item["stdout"], failures) + expect("stop_blocks_missing_message", result_map["stop_blocks_missing_message"], lambda item: '"decision": "block"' in item["stdout"], failures) expect("py_compile", result_map["py_compile"], lambda item: item["code"] == 0, failures) + # Transcript variant assertions + expect("transcript_nested_content_list", result_map["transcript_nested_content_list"], lambda item: item["code"] == 0, failures) + expect("transcript_clarifying_question", result_map["transcript_clarifying_question"], lambda item: item["code"] == 0, failures) + expect("transcript_best_effort_caveat", result_map["transcript_best_effort_caveat"], lambda item: item["code"] == 0, failures) + expect("transcript_empty_malformed_lines", result_map["transcript_empty_malformed_lines"], lambda item: item["code"] == 0, failures) + expect("transcript_message_nested", result_map["transcript_message_nested"], lambda item: item["code"] == 0, failures) + expect("transcript_unable_to_verify", result_map["transcript_unable_to_verify"], lambda item: item["code"] == 0, failures) + expect("transcript_no_assistant_message", result_map["transcript_no_assistant_message"], lambda item: '"decision": "block"' in item["stdout"], failures) + expect("transcript_missing_file", result_map["transcript_missing_file"], lambda item: '"decision": "block"' in item["stdout"], failures) + print(json.dumps(results, indent=2)) if failures: raise SystemExit("\n".join(failures)) if __name__ == "__main__": - main() + main() \ No newline at end of file