From a7a118192aa049f84b08a61c100c350493b72fa0 Mon Sep 17 00:00:00 2001 From: Darren Cheng Date: Tue, 5 May 2026 17:37:40 -0700 Subject: [PATCH 1/3] Auto-capture sessions and synthesize knowledge in dream Add a SessionEnd hook that writes a structured raw note to memory/inbox/ whenever a session authored a commit. Tags `commit-merged` (high-value) when commits land on origin/main|master, `work-in-progress` otherwise. Captures intent, commit shas, files touched, and session metadata. Upgrade the dream skill to translate raw data into knowledge: - Phase 0 ingests yesterday's Granola + Notion meetings into the inbox - Phase 3 distills inbox captures (decisions, people facts, conventions) into existing topical docs instead of just filing the raw note - High-value (commit-merged) captures process first Includes a guarded test suite. Hardens against the harness's silent remote-collision pitfall: aborts loudly if cwd isn't a temp dir or if origin doesn't point at the temp bare repo. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/skill-tests/test_session_capture.sh | 254 ++++++++++++++++++++ README.md | 4 +- agents/hooks/session-end-capture.sh | 165 +++++++++++++ agents/skills/dream/SKILL.md | 93 +++++-- cli/commands/install/agents.go | 55 +++++ 5 files changed, 544 insertions(+), 27 deletions(-) create mode 100755 .github/skill-tests/test_session_capture.sh create mode 100755 agents/hooks/session-end-capture.sh diff --git a/.github/skill-tests/test_session_capture.sh b/.github/skill-tests/test_session_capture.sh new file mode 100755 index 00000000..3dec6684 --- /dev/null +++ b/.github/skill-tests/test_session_capture.sh @@ -0,0 +1,254 @@ +#!/usr/bin/env bash +source "$(dirname "$0")/harness.sh" + +# Hook lives outside agents/skills, so SCRIPTS_DIR (which points at +# agents/skills) doesn't help — derive the hook path from the test file. +HOOK="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../agents/hooks" && pwd)/session-end-capture.sh" + +# Build an isolated test environment: +# - fresh git repo as cwd +# - bare "remote" so origin/master exists +# - synthetic transcript JSONL +# - fake argus(1) on PATH that returns the temp vault path +# - HOME redirected to keep ~/.dots/sys writes contained +# Returns: prints VAULT path on stdout +_setup_session_env() { + # make_test_repo / add_test_remote rely on `cd` taking effect in the + # caller's shell — so they must be called WITHOUT `$()` wrapping. + # Capture the resulting cwd via pwd after the cd lands. + make_test_repo >/dev/null + add_test_remote "origin" >/dev/null + local repo + repo=$(pwd) + + # SAFETY: refuse to proceed if cd-to-temp-dir didn't take effect. Without + # this guard, downstream `git push -q origin master` runs against the + # caller's real remote (`add_test_remote` silently leaves the existing + # `origin` URL when `git remote add` collides). A previous version of this + # test pushed junk commits to GitHub master before the bug was caught. + case "$repo" in + *"/skill-test-"*) ;; + *) + echo "FATAL: test setup did not cd into a temp dir (cwd=$repo). Aborting to avoid pushing to a real remote." >&2 + exit 1 + ;; + esac + # Belt-and-suspenders: explicitly point origin at the temp bare repo + # regardless of whatever `add_test_remote` did. If `git remote set-url` + # fails (e.g. cwd is not a git repo at all), bail. + local temp_remote + temp_remote=$(git remote get-url origin 2>/dev/null) + case "$temp_remote" in + *"/skill-remote-"*) ;; + *) + echo "FATAL: origin URL is not a temp bare repo (got: $temp_remote). Aborting." >&2 + exit 1 + ;; + esac + + # Author identity is required by the hook (filters commits by author). + git config user.email "test@test.com" + + # Make a session-eligible commit on master so origin/master is non-empty + # and the hook's merge-base check has a target. Push it so origin has it. + # Ensure local branch is `master` regardless of the host's `init.defaultBranch` + # so the hook's `origin/master` lookup hits. + git branch -M master 2>/dev/null + echo "seed" > seed.txt + git add seed.txt + git commit -q -m "seed commit" + git push -q origin master + + # Vault dir under the temp tree so cleanup happens automatically. + local vault="$repo/.test-vault" + mkdir -p "$vault/memory" + _TEST_VAULT="$vault" + + # Transcript: timestamp captured AFTER seed setup so the seed isn't picked + # up as a session commit. The 1-second sleep ensures the seed commit's + # committer date is strictly LESS than this timestamp (git's --since uses + # second-level granularity, so same-second commits are inclusive). + sleep 1 + local ts + ts=$(date -u +%Y-%m-%dT%H:%M:%SZ) + local transcript="$repo/.test-transcript.jsonl" + printf '{"type":"user","timestamp":"%s","message":{"role":"user","content":"fix the bug"}}\n' "$ts" \ + > "$transcript" + _TEST_TRANSCRIPT="$transcript" + _TEST_TS="$ts" + + # Stub argus(1) with a tiny shell wrapper that satisfies the hook's only + # call: `argus kb status`. Place it on PATH ahead of the real argus. + local stub_dir="$repo/.test-bin" + mkdir -p "$stub_dir" + cat > "$stub_dir/argus" </dev/null | head -1 +} + +test_skips_session_with_no_commits() { + _setup_session_env >/dev/null + local cwd + cwd=$(pwd) + + # No new commits past the seed (seed is the session-start ancestor); the + # hook should bail before touching the inbox. + local input + input=$(_session_input "test-skip-1" "$cwd") + + HOME="$_TEST_HOME" PATH="$_TEST_PATH" bash "$HOOK" <<< "$input" || true + + local file + file=$(_find_inbox_file "$_TEST_VAULT") + assert_eq "$file" "" "no inbox file should be created when no commits in session" +} + +test_skips_session_outside_git_repo() { + local non_repo + non_repo=$(mktemp -d "${TMPDIR:-/tmp}/skill-test-XXXXXX") + _TMPDIRS+=("$non_repo") + + # Set up a vault and stub argus, but point cwd at a non-repo dir. + local vault="$non_repo/.test-vault" + mkdir -p "$vault/memory" + local stub_dir="$non_repo/.test-bin" + mkdir -p "$stub_dir" + cat > "$stub_dir/argus" < "$transcript" + + local input + input=$(jq -nc --arg sid "no-repo" --arg cwd "$non_repo" --arg tp "$transcript" \ + '{session_id:$sid,cwd:$cwd,transcript_path:$tp}') + + HOME="$non_repo/.home" PATH="$stub_dir:$PATH" bash "$HOOK" <<< "$input" || true + + local file + file=$(find "$vault/memory/inbox" -name '*.md' 2>/dev/null | head -1) + assert_eq "$file" "" "no inbox file when cwd is not a git repo" +} + +test_captures_with_commit_merged_to_master() { + _setup_session_env >/dev/null + local cwd + cwd=$(pwd) + + # Sleep 1s so the new commit is strictly after the transcript timestamp. + # `git log --since` is exclusive on the boundary in some git versions. + sleep 1 + echo "feature" > feature.txt + git add feature.txt + git commit -q -m "ship feature foo" + git push -q origin master + + local input + input=$(_session_input "merged-session" "$cwd") + + HOME="$_TEST_HOME" PATH="$_TEST_PATH" bash "$HOOK" <<< "$input" || true + + local file + file=$(_find_inbox_file "$_TEST_VAULT") + assert_match "$file" '\.md$' "inbox file should be created" + + if [ -n "$file" ]; then + local body + body=$(cat "$file") + assert_contains "$body" "high-value" "merged commit should tag high-value" + assert_contains "$body" "commit-merged" "merged commit should tag commit-merged" + assert_contains "$body" "ship feature foo" "commit subject should appear in body" + assert_contains "$body" "[merged]" "commit status should be merged" + assert_contains "$body" "fix the bug" "user intent from transcript should be captured" + assert_contains "$body" "feature.txt" "files touched should appear" + assert_not_contains "$body" "work-in-progress" "merged commit should not tag work-in-progress" + fi +} + +test_captures_wip_commit_not_on_master() { + _setup_session_env >/dev/null + local cwd + cwd=$(pwd) + + # Branch off master and commit there so the commit is NOT on origin/master. + sleep 1 + git checkout -q -b wip-branch + echo "wip" > wip.txt + git add wip.txt + git commit -q -m "draft work in progress" + + local input + input=$(_session_input "wip-session" "$cwd") + + HOME="$_TEST_HOME" PATH="$_TEST_PATH" bash "$HOOK" <<< "$input" || true + + local file + file=$(_find_inbox_file "$_TEST_VAULT") + assert_match "$file" '\.md$' "inbox file should be created for wip commits" + + if [ -n "$file" ]; then + local body + body=$(cat "$file") + assert_contains "$body" "work-in-progress" "unmerged commit should tag work-in-progress" + assert_not_contains "$body" "high-value" "unmerged commit should not be high-value" + assert_not_contains "$body" "commit-merged" "unmerged commit should not be commit-merged" + assert_contains "$body" "[wip]" "commit status should be wip" + fi +} + +test_logs_change_to_kb_changes_file() { + _setup_session_env >/dev/null + local cwd + cwd=$(pwd) + + sleep 1 + echo "ship" > ship.txt + git add ship.txt + git commit -q -m "another ship" + git push -q origin master + + local input + input=$(_session_input "logged-session" "$cwd") + + HOME="$_TEST_HOME" PATH="$_TEST_PATH" bash "$HOOK" <<< "$input" || true + + local log="$_TEST_HOME/.dots/sys/kb-changes/changes.jsonl" + assert_match "$(cat "$log" 2>/dev/null || echo "")" "session-end-capture" "kb-changes log should record the capture" +} + +run_tests diff --git a/README.md b/README.md index ee551824..bde12ffe 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ dots docker stop-all # Stop all Docker containers | Component | What it installs | |-----------|------------------| -| `agents` | Agent skills, custom agents, hooks, and status line (symlinks `agents/skills/` → `~/.claude/skills/` + `~/.agents/skills/`, `agents/custom/` → `~/.claude/agents/`, registers hooks and status line in `~/.claude/settings.json`) | +| `agents` | Agent skills, custom agents, hooks, and status line (symlinks `agents/skills/` → `~/.claude/skills/` + `~/.agents/skills/`, `agents/custom/` → `~/.claude/agents/`, registers SessionStart/SessionEnd/PostToolUse hooks and status line in `~/.claude/settings.json`) | | `bin` | Custom shell scripts and Go utilities to `~/bin` | | `git` | `.gitconfig`, `.gitignore_global`, git extensions | | `home` | Dotfiles symlinked to `~/` (`.zshrc`, `.vimrc`, `.tmux.conf`, `.gitconfig`, etc.) | @@ -99,7 +99,7 @@ Dots includes 54 reusable slash-command skills for AI coding agents, following t | `/standup` | Daily standup summary from git activity | | `/pdf` | Export conversation content to styled PDF | | `/knowledge` | Initialize or update a project knowledge base | -| `/dream` | Audit and fix knowledge base hygiene — frontmatter, sizing, naming | +| `/dream` | Ingest meetings + session captures into the inbox, synthesize raw notes into existing topical docs, and audit KB hygiene — frontmatter, sizing, naming | | `/retro` | Structured retrospective or post-incident review | | `/logo` | Logo generation | | `/improve` | Improve skills, capture context and knowledge | diff --git a/agents/hooks/session-end-capture.sh b/agents/hooks/session-end-capture.sh new file mode 100755 index 00000000..34318a51 --- /dev/null +++ b/agents/hooks/session-end-capture.sh @@ -0,0 +1,165 @@ +#!/usr/bin/env bash +# Hook: SessionEnd — captures a structured raw note into memory/inbox/ when +# the session shipped real work. Only writes when at least one commit was +# authored from the session's cwd. Tagged `commit-merged` (high-value) when +# any of those commits has landed on origin/main or origin/master. +# +# Why: gives /dream a steady stream of inbox captures it can synthesize into +# topical KB docs. Sessions that never produced a commit are skipped — the +# inbox stays focused on shipped work, not exploration. +# +# Fail-soft on every error path so a missing dep, broken transcript, or +# detached daemon never blocks session shutdown. + +set -uo pipefail + +# Catch any unexpected error and exit clean — the hook is best-effort. +trap 'exit 0' ERR + +# stdin: { session_id, transcript_path, cwd, hook_event_name, ... } +INPUT=$(cat) +[ -n "$INPUT" ] || exit 0 + +command -v jq >/dev/null 2>&1 || exit 0 + +SESSION_ID=$(echo "$INPUT" | jq -r '.session_id // empty') +CWD=$(echo "$INPUT" | jq -r '.cwd // empty') +TRANSCRIPT=$(echo "$INPUT" | jq -r '.transcript_path // empty') + +[ -n "$SESSION_ID" ] || exit 0 +[ -n "$CWD" ] || exit 0 +[ -d "$CWD" ] || exit 0 + +# Only capture from git repos. Sessions in a scratch dir or $HOME aren't +# coding sessions worth preserving. +git -C "$CWD" rev-parse --git-dir >/dev/null 2>&1 || exit 0 + +# Session start: timestamp on the first transcript line. Falls back to +# transcript file mtime if jq parsing fails. +SESSION_START="" +if [ -n "$TRANSCRIPT" ] && [ -r "$TRANSCRIPT" ]; then + SESSION_START=$(head -1 "$TRANSCRIPT" 2>/dev/null | jq -r '.timestamp // empty' 2>/dev/null) + if [ -z "$SESSION_START" ]; then + if SS=$(stat -f %SB -t %Y-%m-%dT%H:%M:%SZ "$TRANSCRIPT" 2>/dev/null); then + SESSION_START="$SS" + fi + fi +fi +[ -n "$SESSION_START" ] || exit 0 + +# Commits authored in this cwd since the session started, by the local git +# user. Restricting to author dodges the case where a rebase sweeps in +# upstream commits authored by teammates. +USER_EMAIL=$(git -C "$CWD" config user.email 2>/dev/null || echo "") +[ -n "$USER_EMAIL" ] || exit 0 +COMMITS=$(git -C "$CWD" log --since="$SESSION_START" --format='%H|%s' --author="$USER_EMAIL" 2>/dev/null | head -20) +[ -n "$COMMITS" ] || exit 0 + +# Refresh remote refs so merge-base is accurate. Quiet, no-tags, capped via +# timeout-equivalent (single fetch). Failure is tolerable — we just won't +# see fresh merges. +git -C "$CWD" fetch --quiet --no-tags origin 2>/dev/null + +# Pick the base remote that exists. +BASE_REMOTE="" +for branch in origin/main origin/master; do + if git -C "$CWD" rev-parse --verify --quiet "$branch" >/dev/null; then + BASE_REMOTE="$branch" + break + fi +done + +# Walk each commit, classify merged vs. wip. Build a markdown bullet list. +MERGED_COUNT=0 +TOTAL_COUNT=0 +COMMIT_LIST="" +while IFS='|' read -r sha subject; do + [ -n "$sha" ] || continue + TOTAL_COUNT=$((TOTAL_COUNT + 1)) + status="wip" + if [ -n "$BASE_REMOTE" ] && git -C "$CWD" merge-base --is-ancestor "$sha" "$BASE_REMOTE" 2>/dev/null; then + status="merged" + MERGED_COUNT=$((MERGED_COUNT + 1)) + fi + COMMIT_LIST+="- ${sha:0:10} [$status] $subject"$'\n' +done <<< "$COMMITS" + +# Tag selection. high-value when any commit landed; work-in-progress otherwise. +if [ "$MERGED_COUNT" -gt 0 ]; then + TAGS='[session-capture, high-value, commit-merged]' +elif [ "$TOTAL_COUNT" -gt 0 ]; then + TAGS='[session-capture, work-in-progress]' +else + TAGS='[session-capture]' +fi + +REPO=$(basename "$(git -C "$CWD" rev-parse --show-toplevel 2>/dev/null || echo "$CWD")") +BRANCH=$(git -C "$CWD" rev-parse --abbrev-ref HEAD 2>/dev/null || echo "") + +# First user prompt = the session intent. Truncate to keep the capture lean. +INTENT="" +if [ -n "$TRANSCRIPT" ] && [ -r "$TRANSCRIPT" ]; then + INTENT=$(jq -rs ' + [.[] | select(.type == "user" and (.message.content | type) == "string")] + | .[0].message.content // "" + ' "$TRANSCRIPT" 2>/dev/null) + INTENT="${INTENT:0:600}" +fi + +# Files touched: union across all session commits. Capped at 30 so a big +# refactor doesn't blow up the capture. Using --pretty=format: + --name-only +# avoids needing each commit to have a parent — diff-style range fails when +# a session's first commit is the repo's root commit. +FILES_TOUCHED=$(git -C "$CWD" log --since="$SESSION_START" --author="$USER_EMAIL" \ + --name-only --pretty=format: 2>/dev/null | sort -u | grep -v '^$' | head -30 || true) + +# Vault path. Bail if the daemon doesn't know where the vault lives — the +# hook has no other place to write. +VAULT=$(argus kb status 2>/dev/null | awk -F': *' '/^Vault/ {print $2; exit}') +[ -n "${VAULT:-}" ] || exit 0 +[ -d "$VAULT" ] || exit 0 + +DATE=$(date -u +%Y-%m-%d) +SHORT_SESSION=${SESSION_ID:0:8} +SLUG=$(printf '%s-%s' "$REPO" "$BRANCH" | tr '[:upper:]' '[:lower:]' | tr '/_' '--' | sed 's/[^a-z0-9-]//g' | cut -c1-40) +[ -n "$SLUG" ] || SLUG="session" +INBOX_DIR="$VAULT/memory/inbox" +INBOX_FILE="$INBOX_DIR/$DATE-$SHORT_SESSION-$SLUG.md" +mkdir -p "$INBOX_DIR" + +CAPTURED_AT=$(date -u +%Y-%m-%dT%H:%M:%SZ) + +{ + printf -- '---\n' + printf 'title: "Session: %s @ %s"\n' "$REPO" "$BRANCH" + printf 'tags: %s\n' "$TAGS" + printf -- '---\n\n' + printf 'Session shipped %d of %d commits from `%s` on branch `%s`.\n\n' \ + "$MERGED_COUNT" "$TOTAL_COUNT" "$REPO" "$BRANCH" + printf '## Intent\n\n%s\n\n' "${INTENT:-}" + printf '## Commits\n\n%s\n' "$COMMIT_LIST" + if [ -n "$FILES_TOUCHED" ]; then + printf '\n## Files Touched\n\n' + while IFS= read -r f; do + [ -n "$f" ] && printf -- '- `%s`\n' "$f" + done <<< "$FILES_TOUCHED" + fi + printf '\n## Metadata\n\n' + printf -- '- Session ID: `%s`\n' "$SESSION_ID" + printf -- '- CWD: `%s`\n' "$CWD" + printf -- '- Started: %s\n' "$SESSION_START" + printf -- '- Captured: %s\n' "$CAPTURED_AT" +} > "$INBOX_FILE" + +# Mirror the write to the changes log so the next /dream sees the new +# inbox doc without a full rescan. +mkdir -p ~/.dots/sys/kb-changes +jq -nc \ + --arg ts "$CAPTURED_AT" \ + --arg path "memory/inbox/$DATE-$SHORT_SESSION-$SLUG.md" \ + --arg session_id "$SESSION_ID" \ + --arg cwd "$CWD" \ + '{ts:$ts,path:$path,session_id:$session_id,cwd:$cwd,source:"session-end-capture"}' \ + >> ~/.dots/sys/kb-changes/changes.jsonl + +exit 0 diff --git a/agents/skills/dream/SKILL.md b/agents/skills/dream/SKILL.md index 8a1a8060..b9772292 100644 --- a/agents/skills/dream/SKILL.md +++ b/agents/skills/dream/SKILL.md @@ -1,12 +1,14 @@ --- name: dream -description: Scheduled KB maintenance — auto-triage inbox captures, resolve conflicts, age out stale entries, fix frontmatter and links. Runs unattended; never asks for confirmation. Use for KB maintenance, knowledge base cleanup, dream consolidation, memory hygiene, or as a scheduled daily KB pass. -allowed-tools: mcp__argus__kb_list, mcp__argus__kb_read, mcp__argus__kb_ingest, mcp__argus__kb_delete, mcp__argus-kb__kb_list, mcp__argus-kb__kb_read, mcp__argus-kb__kb_ingest, mcp__argus-kb__kb_delete +description: Scheduled KB maintenance — ingest yesterday's meetings + session captures, synthesize raw notes into existing topical docs (decisions, people changes, conventions), resolve conflicts, age out stale entries, fix frontmatter and links. Translates raw data into knowledge. Runs unattended; never asks for confirmation. Use for KB maintenance, knowledge base cleanup, dream consolidation, memory hygiene, or as a scheduled daily KB pass. +allowed-tools: mcp__argus__kb_list, mcp__argus__kb_read, mcp__argus__kb_ingest, mcp__argus__kb_delete, mcp__argus-kb__kb_list, mcp__argus-kb__kb_read, mcp__argus-kb__kb_ingest, mcp__argus-kb__kb_delete, mcp__argus__kb_search, mcp__argus-kb__kb_search, mcp__granola__list_meetings, mcp__granola__get_meetings, mcp__granola__query_granola_meetings, mcp__claude_ai_Notion__notion-query-meeting-notes, mcp__claude_ai_Notion__notion-search, mcp__notion__notion-query-meeting-notes --- # Dream — Scheduled Knowledge Base Maintenance -Audit the argus-kb knowledge base, then **apply every fix** without confirmation: triage new captures from `memory/inbox/`, resolve conflicting facts in favor of the most recent, archive entries that have aged out, and fix schema/link/naming violations. Dream is a scheduled task — it must never block on user input. +Audit the argus-kb knowledge base, then **apply every fix** without confirmation: ingest yesterday's meetings and session captures into the inbox, distill those raw captures into knowledge inside existing topical docs, resolve conflicting facts in favor of the most recent, archive entries that have aged out, and fix schema/link/naming violations. Dream is a scheduled task — it must never block on user input. + +**Translate raw data into knowledge.** A session transcript or meeting summary is signal, not knowledge. Dream's job is to extract the durable facts (decisions, people changes, conventions, gotchas) and merge them into the topical docs that already track each subject. Filing the raw note is a fallback when nothing distillable was captured — not the goal. ## Operating principle @@ -33,12 +35,26 @@ The Argus KB MCP server is registered as `argus` (current) or `argus-kb` (legacy ## Instructions -Run the seven phases below in order. Apply every fix. Never prompt for confirmation. +Run the eight phases below in order. Apply every fix. Never prompt for confirmation. - If `$ARGUMENTS` contains `--dry-run`, replace every "apply" step with "report what would change." This is the only short-circuit on writes. - If the change log (`~/.dots/sys/kb-changes/changes.jsonl`) shows no writes since the timestamp of the last successful dream run (latest file under `~/.dots/sys/dream-runs/`), exit immediately with an empty report — saves work when the KB is quiet. - If `$ARGUMENTS` contains a bare path prefix, pass it to `kb_list` as the prefix filter to scope the audit. The triage and decay phases still scan their respective folders (`memory/inbox/`, full vault) regardless. +### Phase 0: Ingest (Meetings + Sessions) + +Pull yesterday's signal into `memory/inbox/` so the rest of dream can synthesize it. Skip silently when an upstream is unavailable — meetings are best-effort signal, not a hard dependency. + +1. **Granola meetings.** If `mcp__granola__list_meetings` is available, call it with `time_range: "last_day"` (fall back to `this_week` if the tool false-negatives — see `[[granola]]` for the same-day quirk). For each meeting: + - Skip if `memory/inbox/` already contains a doc with the meeting ID in the slug (idempotent on re-runs). + - Fetch summary + AI notes via `get_meetings(meeting_ids=[id])`. + - Write a raw inbox doc at `memory/inbox/-meeting--.md` with: + - `tags: [meeting-capture, granola, ]` + - Body: meeting title, attendees, AI notes, any decisions/action items the AI surfaced. +2. **Notion meeting notes.** If `mcp__claude_ai_Notion__notion-query-meeting-notes` (or the cortex Notion equivalent) is available, query for yesterday's meeting notes. Same dedupe + write pattern, tag with `meeting-capture, notion`. +3. **Session captures already in inbox.** The `session-end-capture` hook writes session summaries directly into `memory/inbox/` as Claude Code sessions wrap up. Don't re-fetch — these are already on disk before dream starts and will be processed in Phase 3. +4. Don't synthesize here. Phase 0's only job is to land raw captures in the inbox so Phase 3 can distill them. If meeting fetch fails entirely (no MCP, network down, daemon offline), proceed without it; subsequent phases still run on whatever is already in the inbox. + ### Phase 1: Orient 1. Call `kb_list` (with prefix filter if provided) to get all document paths. @@ -92,20 +108,34 @@ Rules from the kb_ingest schema: Record each violation with: document path, rule violated, current value, and suggested fix. -### Phase 3: Triage Inbox - -The inbox holds raw captures from `/improve` (and other capture flows) that haven't been classified yet. Goal: route every inbox doc to its proper folder OR merge it into an existing entry. - -1. Filter the doc list from Phase 1 down to paths under `memory/inbox/`. -2. For each inbox doc: - - The full content is already in memory from Phase 2 — re-read via `kb_read` only if truncated. - - Run a `kb_search` using the doc's title + key entities to find existing related entries. - - Decide one of: - - **Merge** — content overlaps an existing doc. Append/integrate into that doc's body, preserve frontmatter, write back via `kb_ingest` with the existing path. Then delete the inbox source via `kb_delete`. - - **Re-file** — content is genuinely new. Determine the correct destination folder using the routing rules below, write via `kb_ingest` to the new path, then `kb_delete` the inbox copy. - - **Hold** — too ambiguous to classify (rare). Leave in inbox and flag in the report. - -**Routing rules** (apply in order, first match wins): +### Phase 3: Triage & Synthesize Inbox + +The inbox holds raw captures from `/improve`, the `session-end-capture` hook (committed work), and Phase 0's meeting ingest. Goal: **extract durable knowledge into existing topical docs**, not just file the raw note. + +**Process order** (highest synthesis value first): +1. Captures tagged `high-value, commit-merged` — work that shipped to main/master. These had verified outcomes; their facts have the highest credibility. +2. Captures tagged `meeting-capture` — decisions, people changes, action items. +3. Captures tagged `session-capture, work-in-progress` — record intent + files touched, but discount unverified claims. +4. Everything else (legacy `/improve` captures). + +For each inbox doc: + +1. The full content is already in memory from Phase 2 — re-read via `kb_read` only if truncated. +2. Run `kb_search` on the doc's key entities (project names, people, tools, file paths, decisions) to surface candidate target docs. +3. **Synthesize first.** Walk the body and extract durable items into one of these shapes: + - **Decision** ("we decided to use X for Y") → merge into the relevant project doc as a `## Decision: ` section, with a one-line rationale. If a previous decision on the same topic exists, mark it superseded (Phase 4 mechanics) and link to the new one. + - **People fact** (role change, joined team, scope shift, area of ownership) → update `thanx/people-*` or the relevant people doc. Add `Previously: — superseded ` if it overrides existing data. + - **Convention / pattern** (a way the team does something, a gotcha, a workflow rule) → merge into `patterns/`, `thanx/dev-tools.md`, or the closest existing convention doc. Cross-link with `[[wikilinks]]`. + - **Action item** with a clear owner + deadline → if it's a recurring task or a follow-up that maps to an existing project doc, append a `## Open Action Items` section. Otherwise skip — action items rot fast and a stale "follow up next week" entry is noise. + - **Tool / vendor evaluation** → merge into `[[vendor-evaluations]]` or the tool's dedicated doc. +4. **For each fact merged, run a conflict check** before writing: does this contradict an existing fact in the target doc? If yes, apply the supersession pattern from Phase 4 (canonical = newest, mark prior as historical). +5. After synthesis is done, decide what to do with the raw inbox capture: + - **All durable content distilled** (most session-capture and meeting-capture docs) → `kb_delete` the inbox source. The knowledge survives in topical docs; the raw transcript was scaffolding. + - **Some content distilled, some narrative left** (long meeting with backstory worth preserving) → re-file the raw to `memory/archive/meetings/-.md` instead of deleting; the topical docs cite back to it via wikilink. + - **Nothing distillable** (genuinely raw observation that needs a home but doesn't update an existing topic) → fall through to the routing rules below and re-file as a new topical doc. + - **Too degraded to classify** (empty body, malformed frontmatter that can't be salvaged) → **Hold** in inbox, note path in the report. + +**Routing rules** (when synthesis didn't apply and the doc needs a new home — apply in order, first match wins): 1. Frontmatter `tags` contain a clear domain tag matching an existing top-level folder (e.g. `homelab`, `tools`, `patterns`, `health`, `home`, `personal`, or any user-defined domain folder) → match that folder. 2. Tags include `user` / `preference` → `memory/user/.md`. 3. Tags include `feedback` / `correction` → `memory/feedback/.md`. @@ -115,7 +145,9 @@ The inbox holds raw captures from `/improve` (and other capture flows) that have When choosing a filename, follow the existing schema (kebab-case, 2-3 words, topic noun). Strip the date prefix from inbox filenames before re-filing. -Apply every triage decision immediately. Do not batch and confirm. The "Hold" path exists only when the doc is so degraded (empty body, malformed frontmatter that can't be salvaged) that any classification would be wrong; in that case leave it in inbox and note the path in the report so a future run with more context can revisit it. +Apply every triage and synthesis decision immediately. Do not batch and confirm. The "Hold" path is the rare escape hatch for unsalvageable docs; in normal operation every inbox doc either contributes facts to existing docs (and is deleted) or becomes a new topical doc. + +**Be ruthless about discarding low-signal content.** A session capture that just says "edited a few files" with no decision, no convention, no people fact contributes nothing durable — delete the raw without re-filing. The inbox shouldn't become a graveyard of low-value captures. ### Phase 4: Conflict Detection & Supersession @@ -243,15 +275,26 @@ For decisions made under "Apply with judgment" (oversized/multi-topic splits, na If `--dry-run` was specified, label the report "KB Hygiene Report (Dry Run)" and note that no changes were made. -#### Extra report sections (Phases 1-3) +#### Extra report sections (Phases 0, 3-5) -Add these sections to the report — they cover triage, conflicts, and decay: +Add these sections to the report — they cover ingest, triage/synthesis, conflicts, and decay: ``` -### Inbox Triage (Phase 3) -| Inbox Doc | Action | Destination | -|-----------|--------|------------| -| memory/inbox/ | merge / re-file / hold | | +### Ingested (Phase 0) +| Source | Count | Notes | +|--------|-------|-------| +| granola | N | yesterday's meetings, deduped against existing inbox | +| notion | N | yesterday's meeting notes | +| session-end hook | N | session captures already on disk | + +### Inbox Triage & Synthesis (Phase 3) +| Inbox Doc | Tags | Action | Knowledge Distilled Into | +|-----------|------|--------|-------------------------| +| memory/inbox/ | high-value, commit-merged | synthesize+delete | thanx/dev-tools.md (new convention), memory/project/foo.md (decision) | +| memory/inbox/ | meeting-capture | synthesize+archive | thanx/people-engineering.md (role change) | +| memory/inbox/ | session-capture, work-in-progress | discard | nothing distillable | +| memory/inbox/ | | re-file | | +| memory/inbox/ | | hold | (kept in inbox — too degraded) | ### Conflicts Resolved (Phase 4) | Topic | Canonical | Superseded | Strategy | diff --git a/cli/commands/install/agents.go b/cli/commands/install/agents.go index 56b53958..e963799a 100644 --- a/cli/commands/install/agents.go +++ b/cli/commands/install/agents.go @@ -38,6 +38,9 @@ func Agents() { // Register Argus KB memory injection on SessionStart registerSessionStartMemoryHook() + // Register session-end raw capture into memory/inbox/ + registerSessionEndCaptureHook() + // Register Argus KB write logging on PostToolUse registerKBChangeTrackingHook() @@ -186,6 +189,58 @@ func registerSessionStartMemoryHook() { } } +// registerSessionEndCaptureHook adds a SessionEnd hook that writes a raw +// session summary into memory/inbox/ when the session shipped a commit. +// /dream synthesizes those captures into topical KB docs on its next pass. +func registerSessionEndCaptureHook() { + changed := mutateSettings(func(settings map[string]any) bool { + hookCmd := "bash \"" + path.FromDots("agents/hooks/session-end-capture.sh") + "\"" + hookEntry := map[string]any{ + "hooks": []any{ + map[string]any{ + "type": "command", + "command": hookCmd, + }, + }, + } + + hooks, _ := settings["hooks"].(map[string]any) + if hooks == nil { + hooks = make(map[string]any) + } + + sessionEnd, _ := hooks["SessionEnd"].([]any) + + // SessionEnd entries (like SessionStart) don't carry a `matcher` + // field, so dedupe by walking the inner command list. + for _, existing := range sessionEnd { + entry, ok := existing.(map[string]any) + if !ok { + continue + } + inner, ok := entry["hooks"].([]any) + if !ok { + continue + } + for _, h := range inner { + cmd, _ := h.(map[string]any) + if cmd != nil && cmd["command"] == hookCmd { + return false // already registered + } + } + } + + sessionEnd = append(sessionEnd, hookEntry) + hooks["SessionEnd"] = sessionEnd + settings["hooks"] = hooks + return true + }) + + if changed { + log.Success("Registered session-end inbox capture hook (SessionEnd)") + } +} + // registerKBChangeTrackingHook adds a PostToolUse hook that appends every // kb_ingest call to a JSONL change log so /dream can triage incrementally. func registerKBChangeTrackingHook() { From a8f614e478d5c1cb8309d00003e9bda01aa29e76 Mon Sep 17 00:00:00 2001 From: Darren Cheng Date: Tue, 5 May 2026 17:47:38 -0700 Subject: [PATCH 2/3] Address review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - session-end-capture.sh: add `|| true` to git fetch, COMMITS pipe, INTENT jq so the ERR trap doesn't kill the hook on fail-soft branches - session-end-capture.sh: sanitize SHORT_SESSION before use in filename - session-end-capture.sh: strip leading/trailing hyphens from SLUG - session-end-capture.sh: add Linux GNU stat fallback for the transcript mtime path - session-end-capture.sh: hoist MAX_COMMITS / MAX_FILES / MAX_INTENT_CHARS to named constants - session-end-capture.sh: add `command -v argus` guard mirroring peer hooks - session-end-capture.sh: document the `set -uo pipefail` + ERR trap divergence from peer hooks - dream/SKILL.md: replace `thanx/` paths in the synthesis examples and report templates with generic `/`, `patterns/`, `memory/people/` for public-repo policy compliance (pre-existing thanx/ refs left in place — out of scope for this PR) - dream/SKILL.md: drop the private `[[granola]]` wikilink in Phase 0; inline the same-day quirk explanation - dream/SKILL.md: note that the original Claude Code transcript at transcript_path is the recovery path after Phase 3 deletes inbox notes - dream/SKILL.md: explicit "skip silently on tool-not-found" instruction for Phase 0 MCP tools - test_session_capture.sh: add a comment explaining why the sleep 1 calls are required (git --since second-level granularity) Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/skill-tests/test_session_capture.sh | 6 +- agents/hooks/session-end-capture.sh | 78 ++++++++++++++++----- agents/skills/dream/SKILL.md | 16 ++--- 3 files changed, 72 insertions(+), 28 deletions(-) diff --git a/.github/skill-tests/test_session_capture.sh b/.github/skill-tests/test_session_capture.sh index 3dec6684..e88f5097 100755 --- a/.github/skill-tests/test_session_capture.sh +++ b/.github/skill-tests/test_session_capture.sh @@ -171,7 +171,11 @@ test_captures_with_commit_merged_to_master() { cwd=$(pwd) # Sleep 1s so the new commit is strictly after the transcript timestamp. - # `git log --since` is exclusive on the boundary in some git versions. + # `git log --since` uses second-level granularity and treats same-second + # commits as inclusive, so without the sleep, the seed commit could be + # picked up alongside this test's commit. Don't remove this sleep without + # also reworking the timestamp strategy. (Same applies to the other + # `sleep 1` calls in this file.) sleep 1 echo "feature" > feature.txt git add feature.txt diff --git a/agents/hooks/session-end-capture.sh b/agents/hooks/session-end-capture.sh index 34318a51..ec83bb21 100755 --- a/agents/hooks/session-end-capture.sh +++ b/agents/hooks/session-end-capture.sh @@ -10,17 +10,33 @@ # # Fail-soft on every error path so a missing dep, broken transcript, or # detached daemon never blocks session shutdown. +# +# Shell semantics: +# `set -uo pipefail` (no `-e`) + `trap 'exit 0' ERR` is the fail-soft +# mechanism. The ERR trap fires on any non-zero exit from a simple +# command or pipeline (including SIGPIPE from `... | head -N`), so any +# line that we *expect* to fail must end with `|| true` to suppress the +# trap. Compare with peer hooks (track-kb-change.sh, session-start-memory.sh) +# that use `set -euo pipefail` + named guards — same effect, different +# ergonomics. We use the trap form here because the script has many +# independent fail-soft branches and `|| true` per-call would be noisier. set -uo pipefail - -# Catch any unexpected error and exit clean — the hook is best-effort. trap 'exit 0' ERR +# Tunables — limits keep the capture lean and bound the work the hook does +# at session shutdown. All centralized so a future tuner doesn't have to +# hunt through the body. +MAX_COMMITS=20 +MAX_FILES=30 +MAX_INTENT_CHARS=600 + # stdin: { session_id, transcript_path, cwd, hook_event_name, ... } INPUT=$(cat) [ -n "$INPUT" ] || exit 0 command -v jq >/dev/null 2>&1 || exit 0 +command -v argus >/dev/null 2>&1 || exit 0 SESSION_ID=$(echo "$INPUT" | jq -r '.session_id // empty') CWD=$(echo "$INPUT" | jq -r '.cwd // empty') @@ -35,13 +51,17 @@ TRANSCRIPT=$(echo "$INPUT" | jq -r '.transcript_path // empty') git -C "$CWD" rev-parse --git-dir >/dev/null 2>&1 || exit 0 # Session start: timestamp on the first transcript line. Falls back to -# transcript file mtime if jq parsing fails. +# transcript file mtime via BSD `stat -f` (macOS) or GNU `stat -c` (Linux). SESSION_START="" if [ -n "$TRANSCRIPT" ] && [ -r "$TRANSCRIPT" ]; then SESSION_START=$(head -1 "$TRANSCRIPT" 2>/dev/null | jq -r '.timestamp // empty' 2>/dev/null) if [ -z "$SESSION_START" ]; then if SS=$(stat -f %SB -t %Y-%m-%dT%H:%M:%SZ "$TRANSCRIPT" 2>/dev/null); then SESSION_START="$SS" + elif SS=$(stat -c %y "$TRANSCRIPT" 2>/dev/null); then + # GNU stat returns "2026-05-04 09:30:00.000000000 -0700"; reformat + # to the same UTC ISO-8601 shape as the BSD branch. + SESSION_START=$(date -u -d "$SS" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null || echo "") fi fi fi @@ -49,21 +69,26 @@ fi # Commits authored in this cwd since the session started, by the local git # user. Restricting to author dodges the case where a rebase sweeps in -# upstream commits authored by teammates. +# upstream commits authored by teammates. The `|| true` after `head` is +# required because head closes early on long output → git gets SIGPIPE 141 +# → pipefail propagates → ERR trap kills the hook. USER_EMAIL=$(git -C "$CWD" config user.email 2>/dev/null || echo "") [ -n "$USER_EMAIL" ] || exit 0 -COMMITS=$(git -C "$CWD" log --since="$SESSION_START" --format='%H|%s' --author="$USER_EMAIL" 2>/dev/null | head -20) +COMMITS=$(git -C "$CWD" log --since="$SESSION_START" --format='%H|%s' --author="$USER_EMAIL" 2>/dev/null | head -"$MAX_COMMITS" || true) [ -n "$COMMITS" ] || exit 0 -# Refresh remote refs so merge-base is accurate. Quiet, no-tags, capped via -# timeout-equivalent (single fetch). Failure is tolerable — we just won't -# see fresh merges. -git -C "$CWD" fetch --quiet --no-tags origin 2>/dev/null +# Refresh remote refs so merge-base is accurate. Failure is tolerable — we +# just won't see fresh merges. The `|| true` is required because the ERR +# trap above otherwise kills the hook on any non-zero exit (no `origin` +# remote, network down, etc.) and no inbox file gets written. We do not +# add a hard timeout because GNU `timeout` isn't on macOS by default; in +# practice git's own connect timeout bounds this within seconds. +git -C "$CWD" fetch --quiet --no-tags origin 2>/dev/null || true # Pick the base remote that exists. BASE_REMOTE="" for branch in origin/main origin/master; do - if git -C "$CWD" rev-parse --verify --quiet "$branch" >/dev/null; then + if git -C "$CWD" rev-parse --verify --quiet "$branch" >/dev/null 2>&1; then BASE_REMOTE="$branch" break fi @@ -102,16 +127,16 @@ if [ -n "$TRANSCRIPT" ] && [ -r "$TRANSCRIPT" ]; then INTENT=$(jq -rs ' [.[] | select(.type == "user" and (.message.content | type) == "string")] | .[0].message.content // "" - ' "$TRANSCRIPT" 2>/dev/null) - INTENT="${INTENT:0:600}" + ' "$TRANSCRIPT" 2>/dev/null || echo "") + INTENT="${INTENT:0:$MAX_INTENT_CHARS}" fi -# Files touched: union across all session commits. Capped at 30 so a big -# refactor doesn't blow up the capture. Using --pretty=format: + --name-only -# avoids needing each commit to have a parent — diff-style range fails when -# a session's first commit is the repo's root commit. +# Files touched: union across all session commits. Capped via $MAX_FILES so +# a big refactor doesn't blow up the capture. Using --pretty=format: + +# --name-only avoids needing each commit to have a parent — diff-style range +# fails when a session's first commit is the repo's root commit. FILES_TOUCHED=$(git -C "$CWD" log --since="$SESSION_START" --author="$USER_EMAIL" \ - --name-only --pretty=format: 2>/dev/null | sort -u | grep -v '^$' | head -30 || true) + --name-only --pretty=format: 2>/dev/null | sort -u | grep -v '^$' | head -"$MAX_FILES" || true) # Vault path. Bail if the daemon doesn't know where the vault lives — the # hook has no other place to write. @@ -119,9 +144,24 @@ VAULT=$(argus kb status 2>/dev/null | awk -F': *' '/^Vault/ {print $2; exit}') [ -n "${VAULT:-}" ] || exit 0 [ -d "$VAULT" ] || exit 0 +# Sanitize SESSION_ID before use in the filename. UUIDs from Claude Code are +# alphanumeric+hyphens by spec, but defense-in-depth blocks a hostile or +# malformed session_id from emitting `..` path components. DATE=$(date -u +%Y-%m-%d) -SHORT_SESSION=${SESSION_ID:0:8} -SLUG=$(printf '%s-%s' "$REPO" "$BRANCH" | tr '[:upper:]' '[:lower:]' | tr '/_' '--' | sed 's/[^a-z0-9-]//g' | cut -c1-40) +SHORT_SESSION=$(printf '%s' "${SESSION_ID:0:8}" | tr -dc 'a-zA-Z0-9') +[ -n "$SHORT_SESSION" ] || SHORT_SESSION="anon" +# Build slug from REPO/BRANCH; strip leading/trailing hyphens after the +# character-class filter so "myrepo-" (empty branch) becomes "myrepo". +# Note on collisions: two sessions in the same repo+branch on the same date +# whose UUIDs share their first 8 chars would clobber. UUID collision odds +# at 8 hex chars are ~1 in 4 billion per day per repo+branch — acceptable +# for a best-effort capture system; /dream re-reads the file as the source +# of truth anyway. +SLUG=$(printf '%s-%s' "$REPO" "$BRANCH" \ + | tr '[:upper:]' '[:lower:]' \ + | tr '/_' '--' \ + | sed -e 's/[^a-z0-9-]//g' -e 's/^-*//' -e 's/-*$//' \ + | cut -c1-40) [ -n "$SLUG" ] || SLUG="session" INBOX_DIR="$VAULT/memory/inbox" INBOX_FILE="$INBOX_DIR/$DATE-$SHORT_SESSION-$SLUG.md" diff --git a/agents/skills/dream/SKILL.md b/agents/skills/dream/SKILL.md index b9772292..2f4343d0 100644 --- a/agents/skills/dream/SKILL.md +++ b/agents/skills/dream/SKILL.md @@ -45,13 +45,13 @@ Run the eight phases below in order. Apply every fix. Never prompt for confirmat Pull yesterday's signal into `memory/inbox/` so the rest of dream can synthesize it. Skip silently when an upstream is unavailable — meetings are best-effort signal, not a hard dependency. -1. **Granola meetings.** If `mcp__granola__list_meetings` is available, call it with `time_range: "last_day"` (fall back to `this_week` if the tool false-negatives — see `[[granola]]` for the same-day quirk). For each meeting: +1. **Granola meetings.** If `mcp__granola__list_meetings` is available, call it with `time_range: "last_day"`. If the result is empty for a day you know had meetings, retry with `this_week` — Granola's `query_granola_meetings` regularly false-negatives on same-day captures, so `list_meetings` is the more reliable discovery primitive. If the tool returns tool-not-found, skip silently. For each meeting: - Skip if `memory/inbox/` already contains a doc with the meeting ID in the slug (idempotent on re-runs). - Fetch summary + AI notes via `get_meetings(meeting_ids=[id])`. - Write a raw inbox doc at `memory/inbox/-meeting--.md` with: - `tags: [meeting-capture, granola, ]` - Body: meeting title, attendees, AI notes, any decisions/action items the AI surfaced. -2. **Notion meeting notes.** If `mcp__claude_ai_Notion__notion-query-meeting-notes` (or the cortex Notion equivalent) is available, query for yesterday's meeting notes. Same dedupe + write pattern, tag with `meeting-capture, notion`. +2. **Notion meeting notes.** If `mcp__claude_ai_Notion__notion-query-meeting-notes` (or the cortex Notion equivalent) is available, query for yesterday's meeting notes. Same dedupe + write pattern, tag with `meeting-capture, notion`. If the tool returns tool-not-found, skip silently and proceed to step 3. 3. **Session captures already in inbox.** The `session-end-capture` hook writes session summaries directly into `memory/inbox/` as Claude Code sessions wrap up. Don't re-fetch — these are already on disk before dream starts and will be processed in Phase 3. 4. Don't synthesize here. Phase 0's only job is to land raw captures in the inbox so Phase 3 can distill them. If meeting fetch fails entirely (no MCP, network down, daemon offline), proceed without it; subsequent phases still run on whatever is already in the inbox. @@ -124,13 +124,13 @@ For each inbox doc: 2. Run `kb_search` on the doc's key entities (project names, people, tools, file paths, decisions) to surface candidate target docs. 3. **Synthesize first.** Walk the body and extract durable items into one of these shapes: - **Decision** ("we decided to use X for Y") → merge into the relevant project doc as a `## Decision: ` section, with a one-line rationale. If a previous decision on the same topic exists, mark it superseded (Phase 4 mechanics) and link to the new one. - - **People fact** (role change, joined team, scope shift, area of ownership) → update `thanx/people-*` or the relevant people doc. Add `Previously: — superseded ` if it overrides existing data. - - **Convention / pattern** (a way the team does something, a gotcha, a workflow rule) → merge into `patterns/`, `thanx/dev-tools.md`, or the closest existing convention doc. Cross-link with `[[wikilinks]]`. + - **People fact** (role change, joined team, scope shift, area of ownership) → update the relevant `/people-*` or `memory/people/` doc. Add `Previously: — superseded ` if it overrides existing data. + - **Convention / pattern** (a way the team does something, a gotcha, a workflow rule) → merge into `patterns/` or the closest existing convention doc. Cross-link with `[[wikilinks]]`. - **Action item** with a clear owner + deadline → if it's a recurring task or a follow-up that maps to an existing project doc, append a `## Open Action Items` section. Otherwise skip — action items rot fast and a stale "follow up next week" entry is noise. - - **Tool / vendor evaluation** → merge into `[[vendor-evaluations]]` or the tool's dedicated doc. + - **Tool / vendor evaluation** → merge into the existing `vendor-evaluations` (or equivalent) doc, or the tool's dedicated doc. Use `[[wikilinks]]` for cross-references. 4. **For each fact merged, run a conflict check** before writing: does this contradict an existing fact in the target doc? If yes, apply the supersession pattern from Phase 4 (canonical = newest, mark prior as historical). 5. After synthesis is done, decide what to do with the raw inbox capture: - - **All durable content distilled** (most session-capture and meeting-capture docs) → `kb_delete` the inbox source. The knowledge survives in topical docs; the raw transcript was scaffolding. + - **All durable content distilled** (most session-capture and meeting-capture docs) → `kb_delete` the inbox source. The knowledge survives in topical docs; the raw inbox note was scaffolding. The original Claude Code session transcript at `transcript_path` (typically `~/.claude/projects//.jsonl`) is unaffected and remains the ground-truth recovery path if synthesis later turns out to have missed something. - **Some content distilled, some narrative left** (long meeting with backstory worth preserving) → re-file the raw to `memory/archive/meetings/-.md` instead of deleting; the topical docs cite back to it via wikilink. - **Nothing distillable** (genuinely raw observation that needs a home but doesn't update an existing topic) → fall through to the routing rules below and re-file as a new topical doc. - **Too degraded to classify** (empty body, malformed frontmatter that can't be salvaged) → **Hold** in inbox, note path in the report. @@ -290,8 +290,8 @@ Add these sections to the report — they cover ingest, triage/synthesis, confli ### Inbox Triage & Synthesis (Phase 3) | Inbox Doc | Tags | Action | Knowledge Distilled Into | |-----------|------|--------|-------------------------| -| memory/inbox/ | high-value, commit-merged | synthesize+delete | thanx/dev-tools.md (new convention), memory/project/foo.md (decision) | -| memory/inbox/ | meeting-capture | synthesize+archive | thanx/people-engineering.md (role change) | +| memory/inbox/ | high-value, commit-merged | synthesize+delete | patterns/dev-tools.md (new convention), memory/project/foo.md (decision) | +| memory/inbox/ | meeting-capture | synthesize+archive | memory/people/engineering.md (role change) | | memory/inbox/ | session-capture, work-in-progress | discard | nothing distillable | | memory/inbox/ | | re-file | | | memory/inbox/ | | hold | (kept in inbox — too degraded) | From 8d37f2cb9ff7484f09287919eb54694742a16c0f Mon Sep 17 00:00:00 2001 From: Darren Cheng Date: Tue, 5 May 2026 17:48:46 -0700 Subject: [PATCH 3/3] Harden test harness against silent remote collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `add_test_remote` previously swallowed the "remote already exists" error from `git remote add`, which let the subsequent `git push origin master` push test commits to whatever the parent repo's origin pointed at — in practice, the real GitHub remote. Refuse to proceed when the colliding remote URL doesn't match the temp bare we just created. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/skill-tests/harness.sh | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/skill-tests/harness.sh b/.github/skill-tests/harness.sh index f7ed1980..8b5b8bf0 100644 --- a/.github/skill-tests/harness.sh +++ b/.github/skill-tests/harness.sh @@ -89,14 +89,29 @@ make_test_repo() { echo "$dir" } -# Create a bare "remote" repo and add it as origin +# Create a bare "remote" repo and add it as origin. +# +# Safety: if `git remote add` collides (because a previous `make_test_repo` +# call cd'd to a temp dir that was itself inside a real git repo, or because +# cd failed and we're still in the parent worktree), the silent failure +# would historically leave `$name` pointing at the parent repo's real +# remote. The subsequent `git push origin master` would then push test +# commits to that real remote (e.g. GitHub). We refuse to proceed in that +# case — better a loud abort than a quiet master-clobber. add_test_remote() { local name="${1:-origin}" local remote_dir remote_dir=$(mktemp -d "${TMPDIR:-/tmp}/skill-remote-XXXXXX") _TMPDIRS+=("$remote_dir") git init -q --bare "$remote_dir" - git remote add "$name" "$remote_dir" 2>/dev/null || true + if ! git remote add "$name" "$remote_dir" 2>/dev/null; then + local actual + actual=$(git remote get-url "$name" 2>/dev/null || echo "") + if [ "$actual" != "$remote_dir" ]; then + echo "FATAL: harness setup error — remote '$name' points at '$actual' (expected temp bare at '$remote_dir'). Refusing to push (this would clobber a real remote)." >&2 + exit 1 + fi + fi git push -q "$name" HEAD:master 2>/dev/null || true echo "$remote_dir" }