diff --git a/.gitignore b/.gitignore index 770818be3..bdf5dc43b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ bin/gstack-global-discover .gstack-worktrees/ /tmp/ *.log +supabase/.temp/ bun.lock *.bun-build .env diff --git a/CHANGELOG.md b/CHANGELOG.md index 90e8335d7..7f40a4ce3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,38 @@ # Changelog +## [0.12.0.0] - 2026-03-23 — Community Mode + Trustworthy Telemetry + +### Added + +- **You can now count real users.** Every gstack install gets a random UUID fingerprint (`~/.gstack/.install-id`). Update-check pings — which send only your version, OS, and this random ID — now fire for all users (not just opted-in). The community dashboard shows unique weekly users for the first time. +- **One-liner installer.** `bash <(curl -fsSL https://raw.githubusercontent.com/garrytan/gstack/main/install.sh)` — checks prereqs, clones, runs setup, handles upgrades if already installed. +- **Community tier with email auth.** Opt in for cloud backup of your gstack config, benchmarks comparing your usage to other builders, and skill recommendations based on community patterns. +- **Growth funnel metrics.** New SQL views track install → activate (first skill run within 24h) → retain (return pings in week 2). Dashboard shows the full funnel. +- **Version adoption velocity.** Materialized view shows how fast users upgrade after each release. +- **Anomaly detection views.** Daily active installs and version adoption materialized views, ready for alert edge functions. + +### Fixed + +- **Telemetry data is now trustworthy.** E2E test events were polluting production Supabase (~230 of 232 events were test noise). Source field (`live`/`test`/`dev`) tags every event. All dashboard and edge function queries filter `source=live`. E2E test runner sets `GSTACK_TELEMETRY_SOURCE=test`. +- **56-year skill durations are gone.** The `_TEL_START` shell variable was lost between bash blocks, producing epoch-time durations. Now persisted to `~/.gstack/analytics/.tel-start-$PPID` and read back in the epilogue. Duration capped at 86,400s (24h) with a CHECK constraint in the schema. +- **Session ID persistence across bash blocks.** Same `$PPID` file pattern as duration fix — epilogue reads session ID from file instead of relying on shell variable scope. +- **Dashboard shows filtered data.** Weekly active installs, skill popularity, and error reports all filter `source=eq.live`. Community-pulse edge function uses `COUNT(DISTINCT install_fingerprint)` instead of raw count. +- **Benchmarks and recommendations no longer include test data.** Source filter added to both edge functions. `skill_sequences` and `crash_clusters` views recreated with source filter. + +### Changed + +- **Identity model simplified.** Replaced SHA-256(hostname+user) `installation_id` (community-only) with random UUID `install_fingerprint` (all tiers). Expand-contract migration preserves backward compat — old clients still work via a trigger that copies `installation_id` → `install_fingerprint`. +- **Update-check pings ungated from telemetry.** These pings send only version + OS + random UUID — equivalent to what GitHub sees in access logs. Transparency note added to the telemetry prompt. +- **Dead code removed.** Deleted the unused `telemetry-ingest` edge function — `telemetry-sync` POSTs directly to Supabase REST API. + +### For contributors + +- Migration 003 adds source columns, install_fingerprint, duration CHECK constraint, 4 indexes, recreated views, growth funnel view, and materialized views +- Deployment requires running Phase 4A cleanup SQL before migration, Phase 4B backfill after +- Expand-contract pattern: old `installation_id` column kept for backward compat with a trigger +- Homebrew tap deferred to TODOS.md (P2) +- 8 new telemetry tests (source field, duration caps, fingerprint persistence) + ## [0.11.18.1] - 2026-03-24 ### Changed diff --git a/README.md b/README.md index fd81d78ce..9ca214f16 100644 --- a/README.md +++ b/README.md @@ -42,9 +42,13 @@ Fork it. Improve it. Make it yours. And if you want to hate on free open source **Requirements:** [Claude Code](https://docs.anthropic.com/en/docs/claude-code), [Git](https://git-scm.com/), [Bun](https://bun.sh/) v1.0+, [Node.js](https://nodejs.org/) (Windows only) -### Step 1: Install on your machine +### One-liner install -Open Claude Code and paste this. Claude does the rest. +```bash +bash <(curl -fsSL https://raw.githubusercontent.com/garrytan/gstack/main/install.sh) +``` + +Or paste this into Claude Code — Claude does the rest: > Install gstack: run **`git clone https://github.com/garrytan/gstack.git ~/.claude/skills/gstack && cd ~/.claude/skills/gstack && ./setup`** then add a "gstack" section to CLAUDE.md that says to use the /browse skill from gstack for all web browsing, never use mcp\_\_claude-in-chrome\_\_\* tools, and lists the available skills: /office-hours, /plan-ceo-review, /plan-eng-review, /plan-design-review, /design-consultation, /review, /ship, /land-and-deploy, /canary, /benchmark, /browse, /qa, /qa-only, /design-review, /setup-browser-cookies, /setup-deploy, /retro, /investigate, /document-release, /codex, /cso, /autoplan, /careful, /freeze, /guard, /unfreeze, /gstack-upgrade. Then ask the user if they also want to add gstack to the current project so teammates get it. diff --git a/SKILL.md b/SKILL.md index dada1e75d..4817fd0e1 100644 --- a/SKILL.md +++ b/SKILL.md @@ -27,9 +27,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -47,8 +49,11 @@ echo '{"skill":"gstack","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -97,112 +102,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. - -Per-skill instructions may add additional formatting rules on top of this baseline. - -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/TODOS.md b/TODOS.md index 1c4b88ed4..191e7df4b 100644 --- a/TODOS.md +++ b/TODOS.md @@ -309,6 +309,20 @@ Linux cookie import shipped in v0.11.11.0 (Wave 3). Supports Chrome, Chromium, B **Priority:** P3 **Depends on:** Browse sessions +## Distribution + +### Homebrew tap + +**What:** Create a separate repo (`homebrew-gstack`) with a Homebrew formula so users can `brew tap garrytan/gstack && brew install gstack`. + +**Why:** Gold-standard dev tool distribution. Complements the curl-pipe-bash installer. Familiar install flow, automatic updates via `brew upgrade`, discoverable via Homebrew search. + +**Context:** The curl-pipe-bash installer (`install.sh`) ships first. The Homebrew formula would clone the repo + run `./setup`, similar to the installer. Needs a separate GitHub repo (`garrytan/homebrew-gstack`) with a `Formula/gstack.rb` file (~20 lines). Must be updated on each release. + +**Effort:** S (human: ~2h / CC: ~10 min) +**Priority:** P2 +**Depends on:** install.sh shipping first + ## Infrastructure ### /setup-gstack-upload skill (S3 bucket) diff --git a/VERSION b/VERSION index 53d7c74ca..6ca5e0480 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.11.18.1 +0.12.0.0 diff --git a/autoplan/SKILL.md b/autoplan/SKILL.md index e9161eabe..577f7061a 100644 --- a/autoplan/SKILL.md +++ b/autoplan/SKILL.md @@ -36,9 +36,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -56,8 +58,11 @@ echo '{"skill":"autoplan","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(bas for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -106,6 +111,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -113,7 +139,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -121,97 +146,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/benchmark/SKILL.md b/benchmark/SKILL.md index d9138a034..d6d65ae24 100644 --- a/benchmark/SKILL.md +++ b/benchmark/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"benchmark","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(ba for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,112 +104,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. - -Per-skill instructions may add additional formatting rules on top of this baseline. - -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/bin/gstack-auth b/bin/gstack-auth new file mode 100755 index 000000000..17714bb33 --- /dev/null +++ b/bin/gstack-auth @@ -0,0 +1,367 @@ +#!/usr/bin/env bash +# gstack-auth — authenticate with gstack.gg +# +# Usage: +# gstack-auth — device code flow (default: opens browser) +# gstack-auth otp [email] — email OTP flow (fallback for SSH/headless) +# gstack-auth status — show current auth status +# gstack-auth logout — remove saved tokens +# gstack-auth change-email — change your email address +# +# Default flow (device code, RFC 8628): +# 1. CLI requests a device code from gstack.gg +# 2. Browser opens → user signs in + approves +# 3. CLI polls until approved → gets Supabase tokens +# +# Fallback (email OTP): +# Sends a 6-digit verification code to the user's email. +# User enters the code in the terminal to authenticate. +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +# GSTACK_WEB_URL — override gstack.gg URL +set -euo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +AUTH_FILE="$STATE_DIR/auth-token.json" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +SUPABASE_URL="${GSTACK_SUPABASE_URL:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" +WEB_URL="${GSTACK_WEB_URL:-https://gstack.gg}" + +if [ -z "$SUPABASE_URL" ] || [ -z "$ANON_KEY" ]; then + echo "Error: Supabase not configured. Check supabase/config.sh" + exit 1 +fi + +AUTH_URL="${SUPABASE_URL}/auth/v1" + +# ─── Helper: write auth token file ────────────────────────── +save_token() { + local access_token="$1" + local refresh_token="$2" + local expires_in="$3" + local email="$4" + local user_id="$5" + + local expires_at + expires_at=$(( $(date +%s) + expires_in )) + + mkdir -p "$STATE_DIR" + cat > "$AUTH_FILE" </dev/null | sed 's/null//' +} + +# ─── Subcommand: status ───────────────────────────────────── +if [ "${1:-}" = "status" ]; then + if [ ! -f "$AUTH_FILE" ]; then + echo "Not authenticated. Run: gstack auth " + exit 0 + fi + AUTH_JSON="$(cat "$AUTH_FILE")" + EMAIL="$(json_field "$AUTH_JSON" "email")" + EXPIRES_AT="$(json_field "$AUTH_JSON" "expires_at")" + NOW="$(date +%s)" + if [ "$NOW" -lt "$EXPIRES_AT" ] 2>/dev/null; then + REMAINING=$(( (EXPIRES_AT - NOW) / 60 )) + echo "Authenticated as: $EMAIL" + echo "Token expires in: ${REMAINING}m" + else + echo "Authenticated as: $EMAIL (token expired — will auto-refresh)" + fi + exit 0 +fi + +# ─── Subcommand: logout ───────────────────────────────────── +if [ "${1:-}" = "logout" ]; then + rm -f "$AUTH_FILE" + echo "Logged out. Auth token removed." + exit 0 +fi + +# ─── Subcommand: change-email ───────────────────────────────── +if [ "${1:-}" = "change-email" ]; then + echo "To change your email, log out and re-authenticate:" + echo " gstack-auth logout" + echo " gstack-auth" + exit 0 +fi + +# ─── Device code flow (default) ────────────────────────────── +# If no arguments, or first arg is not 'otp', use device code flow +if [ "${1:-}" != "otp" ]; then + + # Check if we can open a browser + CAN_OPEN=false + if command -v open >/dev/null 2>&1; then + CAN_OPEN=true + elif command -v xdg-open >/dev/null 2>&1; then + CAN_OPEN=true + fi + + if [ "$CAN_OPEN" = "false" ]; then + echo "No browser available — falling back to email OTP." >&2 + echo "Run: gstack-auth otp [email]" >&2 + # Fall through to OTP flow + set -- "otp" "${@}" + else + echo "" + echo "Requesting device code from gstack.gg..." + + # Step 1: Request device code + DEVICE_RESPONSE="$(curl -s -w "\n%{http_code}" \ + --max-time 15 \ + -X POST "${WEB_URL}/api/auth/device" \ + -H "Content-Type: application/json" \ + 2>/dev/null || echo -e "\n000")" + + DEVICE_CODE_HTTP="$(echo "$DEVICE_RESPONSE" | tail -1)" + DEVICE_BODY="$(echo "$DEVICE_RESPONSE" | sed '$d')" + + if [ "${DEVICE_CODE_HTTP}" != "200" ]; then + echo "Device auth unavailable (HTTP ${DEVICE_CODE_HTTP}). Falling back to email OTP." >&2 + set -- "otp" "${@}" + else + DEVICE_CODE="$(json_field "$DEVICE_BODY" "device_code")" + DEVICE_SECRET="$(json_field "$DEVICE_BODY" "device_secret")" + USER_CODE="$(json_field "$DEVICE_BODY" "user_code")" + VERIFY_URL="$(json_field "$DEVICE_BODY" "verification_url")" + + if [ -z "$DEVICE_CODE" ] || [ -z "$USER_CODE" ]; then + echo "Error: invalid device code response" >&2 + set -- "otp" "${@}" + else + echo "" + echo "Your code: ${USER_CODE}" + echo "" + echo "Opening browser to approve..." + echo "If the browser doesn't open, visit: ${VERIFY_URL}" + echo "" + + # Step 2: Open browser + if command -v open >/dev/null 2>&1; then + open "$VERIFY_URL" 2>/dev/null + elif command -v xdg-open >/dev/null 2>&1; then + xdg-open "$VERIFY_URL" 2>/dev/null + fi + + # Step 3: Poll for approval (every 5s, max 2 minutes) + echo "Waiting for approval..." + POLL_COUNT=0 + MAX_POLLS=24 # 24 * 5s = 2 minutes + + while [ "$POLL_COUNT" -lt "$MAX_POLLS" ]; do + sleep 5 + POLL_COUNT=$((POLL_COUNT + 1)) + + POLL_RESPONSE="$(curl -s -w "\n%{http_code}" \ + --max-time 10 \ + -X POST "${WEB_URL}/api/auth/device/token" \ + -H "Content-Type: application/json" \ + -d "{\"device_code\":\"${DEVICE_CODE}\",\"device_secret\":\"${DEVICE_SECRET}\"}" \ + 2>/dev/null || echo -e "\n000")" + + POLL_HTTP="$(echo "$POLL_RESPONSE" | tail -1)" + POLL_BODY="$(echo "$POLL_RESPONSE" | sed '$d')" + + case "$POLL_HTTP" in + 200) + # Approved! Extract tokens + ACCESS_TOKEN="$(json_field "$POLL_BODY" "access_token")" + REFRESH_TOKEN="$(json_field "$POLL_BODY" "refresh_token")" + EXPIRES_IN="$(json_field "$POLL_BODY" "expires_in")" + USER_ID="$(json_field "$POLL_BODY" "user_id")" + EMAIL="$(json_field "$POLL_BODY" "email")" + + if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Error: approved but no token in response" >&2 + exit 1 + fi + + save_token "$ACCESS_TOKEN" "$REFRESH_TOKEN" "${EXPIRES_IN:-3600}" "${EMAIL:-}" "${USER_ID:-}" + + if [ -n "$EMAIL" ]; then + "$GSTACK_DIR/bin/gstack-config" set email "$EMAIL" 2>/dev/null || true + fi + + echo "" + echo "Authenticated${EMAIL:+ as: $EMAIL}" + echo "Token saved to: ${AUTH_FILE}" + exit 0 + ;; + 202) + # Still pending — keep polling + printf "\r Waiting... (%ds)" "$((POLL_COUNT * 5))" + ;; + 403) + echo "" + echo "Error: invalid device secret (403). Try again." >&2 + exit 1 + ;; + 410) + echo "" + echo "Device code expired. Run gstack-auth again." >&2 + exit 1 + ;; + *) + # Keep trying on transient errors + ;; + esac + done + + echo "" + echo "Timed out waiting for approval (2 minutes)." >&2 + echo "Run gstack-auth again to get a new code." >&2 + exit 1 + fi + fi + fi +fi + +# ─── OTP flow (fallback) ──────────────────────────────────── +# Strip the "otp" subcommand if present +if [ "${1:-}" = "otp" ]; then + shift +fi + +EMAIL="${1:-}" +if [ -z "$EMAIL" ]; then + printf "Enter your email: " + read -r EMAIL +fi + +if [ -z "$EMAIL" ]; then + echo "Error: email is required" + exit 1 +fi + +if ! echo "$EMAIL" | grep -qE '^[^@]+@[^@]+\.[^@]+$'; then + echo "Error: invalid email format" + exit 1 +fi + +# ─── Step 1: Send OTP ──────────────────────────────────────── +echo "" +echo "Sending verification code to ${EMAIL}..." + +OTP_BODY="{\"email\":\"${EMAIL}\"}" + +HTTP_RESPONSE="$(curl -s -w "\n%{http_code}" \ + -X POST "${AUTH_URL}/otp" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "$OTP_BODY" 2>/dev/null || echo -e "\n000")" + +HTTP_CODE="$(echo "$HTTP_RESPONSE" | tail -1)" +HTTP_BODY="$(echo "$HTTP_RESPONSE" | sed '$d')" + +case "$HTTP_CODE" in + 2*) + ;; # success + 429) + if echo "$HTTP_BODY" | grep -q "email_send_rate_limit"; then + echo "" + echo "Email rate limit exceeded (Supabase free tier: ~3 emails/hour)." + echo "Try again in a few minutes, or set up custom SMTP in the Supabase" + echo "dashboard for unlimited sends." + exit 1 + fi + echo "Cooldown active — waiting 60s before retrying..." + for i in $(seq 60 -1 1); do + printf "\r Retrying in %2ds..." "$i" + sleep 1 + done + printf "\r \r" + echo "Retrying..." + HTTP_RESPONSE="$(curl -s -w "\n%{http_code}" \ + -X POST "${AUTH_URL}/otp" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "$OTP_BODY" 2>/dev/null || echo -e "\n000")" + HTTP_CODE="$(echo "$HTTP_RESPONSE" | tail -1)" + HTTP_BODY="$(echo "$HTTP_RESPONSE" | sed '$d')" + case "$HTTP_CODE" in + 2*) ;; # success on retry + *) echo "Error sending OTP (HTTP ${HTTP_CODE}): ${HTTP_BODY}"; exit 1 ;; + esac + ;; + *) + echo "Error sending OTP (HTTP ${HTTP_CODE}): ${HTTP_BODY}" + exit 1 + ;; +esac + +echo "" +echo "Check your email for a 6-digit code." +echo "" + +# ─── Step 2: Read OTP code ─────────────────────────────────── +printf "Enter code: " +read -r OTP_CODE + +if [ -z "$OTP_CODE" ]; then + echo "No code entered." + exit 1 +fi + +# ─── Step 3: Verify OTP ───────────────────────────────────── +OTP_CODE="$(echo "$OTP_CODE" | tr -d '[:space:]')" + +if ! echo "$OTP_CODE" | grep -qE '^[0-9]{6}$'; then + echo "Error: code must be exactly 6 digits" + exit 1 +fi + +VERIFY_RESPONSE="$(curl -s \ + -X POST "${AUTH_URL}/verify" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "{\"email\":\"${EMAIL}\",\"token\":\"${OTP_CODE}\",\"type\":\"email\"}" \ + 2>/dev/null || echo "{}")" + +ACCESS_TOKEN="$(json_field "$VERIFY_RESPONSE" "access_token")" +REFRESH_TOKEN="$(json_field "$VERIFY_RESPONSE" "refresh_token")" +EXPIRES_IN="$(json_field "$VERIFY_RESPONSE" "expires_in")" +USER_ID="$(json_field "$VERIFY_RESPONSE" "id" 2>/dev/null || true)" + +if [ -z "$USER_ID" ]; then + USER_ID="$(echo "$VERIFY_RESPONSE" | grep -o '"id":"[^"]*"' | head -1 | sed 's/"id":"//;s/"//')" +fi + +if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + ERROR_MSG="$(json_field "$VERIFY_RESPONSE" "error_description" 2>/dev/null || json_field "$VERIFY_RESPONSE" "msg" 2>/dev/null || echo "unknown error")" + echo "" + echo "Verification failed: $ERROR_MSG" + echo "Check the code and try again." + exit 1 +fi + +save_token "$ACCESS_TOKEN" "$REFRESH_TOKEN" "${EXPIRES_IN:-3600}" "$EMAIL" "$USER_ID" + +# ─── Step 4: Save email to config ──────────────────────────── +"$GSTACK_DIR/bin/gstack-config" set email "$EMAIL" + +echo "" +echo "Authenticated as: ${EMAIL}" +echo "Token saved to: ${AUTH_FILE}" diff --git a/bin/gstack-auth-refresh b/bin/gstack-auth-refresh new file mode 100755 index 000000000..b60a6d86e --- /dev/null +++ b/bin/gstack-auth-refresh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# gstack-auth-refresh — silently refresh auth token if expired +# +# Usage: +# gstack-auth-refresh — refresh and print access token +# gstack-auth-refresh --check — exit 0 if authenticated, 1 if not +# +# Called by gstack-community-backup and other authenticated scripts. +# If the refresh token is also expired, prints an error and exits 1. +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +set -euo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +AUTH_FILE="$STATE_DIR/auth-token.json" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +SUPABASE_URL="${GSTACK_SUPABASE_URL:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" +AUTH_URL="${SUPABASE_URL}/auth/v1" + +# ─── Helper: extract JSON field ────────────────────────────── +json_field() { + local json="$1" + local field="$2" + echo "$json" | jq -r ".${field}" 2>/dev/null | sed 's/null//' +} + +# ─── Check auth file exists ───────────────────────────────── +if [ ! -f "$AUTH_FILE" ]; then + if [ "${1:-}" = "--check" ]; then + exit 1 + fi + echo "Not authenticated. Run: gstack auth " >&2 + exit 1 +fi + +AUTH_JSON="$(cat "$AUTH_FILE")" +ACCESS_TOKEN="$(json_field "$AUTH_JSON" "access_token")" +REFRESH_TOKEN="$(json_field "$AUTH_JSON" "refresh_token")" +EXPIRES_AT="$(json_field "$AUTH_JSON" "expires_at")" +EMAIL="$(json_field "$AUTH_JSON" "email")" +USER_ID="$(json_field "$AUTH_JSON" "user_id")" +NOW="$(date +%s)" + +# ─── Check-only mode ──────────────────────────────────────── +if [ "${1:-}" = "--check" ]; then + [ -n "$ACCESS_TOKEN" ] && exit 0 || exit 1 +fi + +# ─── Token still valid? Return it. ─────────────────────────── +# Add 60s buffer to avoid using a token that's about to expire +BUFFER=60 +if [ -n "$EXPIRES_AT" ] && [ "$NOW" -lt "$(( EXPIRES_AT - BUFFER ))" ] 2>/dev/null; then + echo "$ACCESS_TOKEN" + exit 0 +fi + +# ─── Token expired — refresh it ───────────────────────────── +if [ -z "$REFRESH_TOKEN" ] || [ "$REFRESH_TOKEN" = "null" ]; then + echo "Session expired and no refresh token. Run: gstack auth " >&2 + exit 1 +fi + +if [ -z "$SUPABASE_URL" ] || [ -z "$ANON_KEY" ]; then + echo "Error: Supabase not configured" >&2 + exit 1 +fi + +REFRESH_RESPONSE="$(curl -s --max-time 10 \ + -X POST "${AUTH_URL}/token?grant_type=refresh_token" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -d "{\"refresh_token\":\"${REFRESH_TOKEN}\"}" \ + 2>/dev/null || echo "{}")" + +NEW_ACCESS="$(json_field "$REFRESH_RESPONSE" "access_token")" +NEW_REFRESH="$(json_field "$REFRESH_RESPONSE" "refresh_token")" +NEW_EXPIRES_IN="$(json_field "$REFRESH_RESPONSE" "expires_in")" + +if [ -z "$NEW_ACCESS" ] || [ "$NEW_ACCESS" = "null" ]; then + echo "Session expired. Run: gstack auth " >&2 + rm -f "$AUTH_FILE" + exit 1 +fi + +# Update token file +NEW_EXPIRES_AT=$(( NOW + ${NEW_EXPIRES_IN:-3600} )) + +cat > "$AUTH_FILE" </dev/null || true)" +[ "$TIER" != "community" ] && exit 0 + +# Must have auth +"$AUTH_REFRESH" --check 2>/dev/null || exit 0 + +# Must have endpoint +[ -z "$ENDPOINT" ] && exit 0 + +# Rate limit: once per 30 minutes +if [ -f "$BACKUP_RATE_FILE" ]; then + STALE=$(find "$BACKUP_RATE_FILE" -mmin +30 2>/dev/null || true) + [ -z "$STALE" ] && exit 0 +fi + +# ─── Get auth token ───────────────────────────────────────── +ACCESS_TOKEN="$("$AUTH_REFRESH" 2>/dev/null || true)" +[ -z "$ACCESS_TOKEN" ] && exit 0 + +# Read user info from auth file +AUTH_JSON="$(cat "$STATE_DIR/auth-token.json" 2>/dev/null || echo "{}")" +USER_ID="$(echo "$AUTH_JSON" | grep -o '"user_id":"[^"]*"' | head -1 | sed 's/"user_id":"//;s/"//')" +EMAIL="$(echo "$AUTH_JSON" | grep -o '"email":"[^"]*"' | head -1 | sed 's/"email":"//;s/"//')" + +[ -z "$USER_ID" ] && exit 0 + +# ─── Build config snapshot ─────────────────────────────────── +CONFIG_SNAPSHOT="{}" +if [ -f "$STATE_DIR/config.yaml" ]; then + # Convert YAML-like config to JSON safely using jq + CONFIG_SNAPSHOT="$(grep -v '^#' "$STATE_DIR/config.yaml" | grep ':' | \ + jq -R 'split(": ") | {(.[0]): .[1]}' | jq -s 'add' || echo "{}")" +fi + +# ─── Build analytics summary ──────────────────────────────── +# Per-skill aggregates + last 100 events (not raw JSONL) +ANALYTICS_SNAPSHOT="{\"skills\":{},\"recent_events\":[]}" +if [ -f "$JSONL_FILE" ]; then + # Count per-skill totals + SKILL_COUNTS_JSON="$(grep -o '"skill":"[^"]*"' "$JSONL_FILE" 2>/dev/null | \ + awk -F'"' '{print $4}' | sort | uniq -c | sort -rn | head -20 | \ + jq -R 'capture("\\s+(?\\d+)\\s+(?.+)") | {(.skill): {total_runs: (.count|tonumber)}}' | jq -s 'add')" + + # Last 100 events (strip local-only fields) + RECENT_JSON="$(tail -100 "$JSONL_FILE" 2>/dev/null | \ + jq -c 'del(._repo_slug, ._branch)' | jq -s -c '.')" + + ANALYTICS_SNAPSHOT="$(jq -n \ + --argjson skills "${SKILL_COUNTS_JSON:-{}}" \ + --argjson recent "${RECENT_JSON:-[]}" \ + '{"skills": $skills, "recent_events": $recent}')" +fi + +# ─── Build retro history snapshot ──────────────────────────── +RETRO_SNAPSHOT="[]" +# Look for retro files in common locations +RETRO_FILES="" +if [ -d "$STATE_DIR" ]; then + RETRO_FILES="$(find "$STATE_DIR" -name "retro-*.json" -o -name "retro_*.json" 2>/dev/null | head -20 || true)" +fi + +if [ -n "$RETRO_FILES" ]; then + RETRO_SNAPSHOT="$(cat $RETRO_FILES 2>/dev/null | jq -s -c '.' || echo "[]")" +fi + +# ─── Upsert to installations table ────────────────────────── +GSTACK_VERSION="$(cat "$GSTACK_DIR/VERSION" 2>/dev/null | tr -d '[:space:]' || echo "unknown")" +OS="$(uname -s | tr '[:upper:]' '[:lower:]')" +NOW_ISO="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + +PAYLOAD="$(jq -n \ + --arg id "$USER_ID" \ + --arg email "$EMAIL" \ + --arg version "$GSTACK_VERSION" \ + --arg os "$OS" \ + --argjson config "${CONFIG_SNAPSHOT:-{}}" \ + --argjson analytics "${ANALYTICS_SNAPSHOT:-{}}" \ + --argjson retro "${RETRO_SNAPSHOT:-[]}" \ + --arg last_backup "$NOW_ISO" \ + '{ + installation_id: $id, + user_id: $id, + email: $email, + gstack_version: $version, + os: $os, + config_snapshot: $config, + analytics_snapshot: $analytics, + retro_history: $retro, + last_backup_at: $last_backup, + last_seen: $last_backup + }')" + +# Upsert (POST with Prefer: resolution=merge-duplicates) +HTTP_CODE="$(curl -s -o /dev/null -w '%{http_code}' --max-time 15 \ + -X POST "${ENDPOINT}/installations" \ + -H "Content-Type: application/json" \ + -H "apikey: ${ANON_KEY}" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H "Prefer: resolution=merge-duplicates,return=minimal" \ + -d "$PAYLOAD" 2>/dev/null || echo "000")" + +# Update rate limit marker on success +case "$HTTP_CODE" in + 2*) touch "$BACKUP_RATE_FILE" 2>/dev/null || true ;; +esac + +exit 0 diff --git a/bin/gstack-community-benchmarks b/bin/gstack-community-benchmarks new file mode 100755 index 000000000..9ab333801 --- /dev/null +++ b/bin/gstack-community-benchmarks @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# gstack-community-benchmarks — compare your stats to the community +# +# Fetches community benchmarks and compares against local analytics. +# Shows side-by-side: your average vs community median per skill. +# +# Usage: +# gstack-community-benchmarks — show comparison +# gstack-community-benchmarks --json — output as JSON +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +set -uo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +ANALYTICS_DIR="$STATE_DIR/analytics" +JSONL_FILE="$ANALYTICS_DIR/skill-usage.jsonl" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +SUPABASE_URL="${GSTACK_SUPABASE_URL:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" +ENDPOINT="${GSTACK_TELEMETRY_ENDPOINT:-}" + +JSON_MODE=false +[ "${1:-}" = "--json" ] && JSON_MODE=true + +# ─── Fetch community benchmarks ───────────────────────────── +echo "gstack benchmarks" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" + +BENCHMARKS="" +if [ -n "$SUPABASE_URL" ] && [ -n "$ANON_KEY" ]; then + # Try edge function first + BENCHMARKS="$(curl -sf --max-time 10 \ + "${SUPABASE_URL}/functions/v1/community-benchmarks" \ + -H "Authorization: Bearer ${ANON_KEY}" \ + 2>/dev/null || true)" + + # Fall back to direct table query + if [ -z "$BENCHMARKS" ] || [ "$BENCHMARKS" = "[]" ]; then + BENCHMARKS="$(curl -sf --max-time 10 \ + "${ENDPOINT}/community_benchmarks?select=skill,median_duration_s,total_runs,success_rate&order=total_runs.desc&limit=15" \ + -H "apikey: ${ANON_KEY}" \ + -H "Authorization: Bearer ${ANON_KEY}" \ + 2>/dev/null || echo "[]")" + fi +fi + +# ─── Compute local stats ──────────────────────────────────── +if [ ! -f "$JSONL_FILE" ]; then + echo "No local analytics data. Use gstack skills to generate data." + exit 0 +fi + +# Compute per-skill average duration from local JSONL +# Extract skill and duration, filter out nulls +echo " Skill You (avg) Community vs." +echo " ───────────────── ───────── ────────── ────────" + +# Get unique skills from local data +LOCAL_SKILLS="$(grep -o '"skill":"[^"]*"' "$JSONL_FILE" 2>/dev/null | awk -F'"' '{print $4}' | sort -u)" + +while IFS= read -r SKILL; do + [ -z "$SKILL" ] && continue + # Skip internal/meta skills + case "$SKILL" in _*|test-*) continue ;; esac + + # Local: average duration in seconds + LOCAL_AVG="$(grep "\"skill\":\"${SKILL}\"" "$JSONL_FILE" 2>/dev/null | \ + grep -o '"duration_s":[0-9]*' | awk -F: '{sum+=$2; n++} END {if(n>0) printf "%.0f", sum/n; else print "0"}')" + + LOCAL_COUNT="$(grep -c "\"skill\":\"${SKILL}\"" "$JSONL_FILE" 2>/dev/null || echo "0")" + + # Format duration + if [ "$LOCAL_AVG" -ge 60 ] 2>/dev/null; then + LOCAL_FMT="$(( LOCAL_AVG / 60 ))m $(( LOCAL_AVG % 60 ))s" + else + LOCAL_FMT="${LOCAL_AVG:-0}s" + fi + + # Community: find matching skill in benchmarks + COMM_MEDIAN="" + COMM_FMT="--" + DELTA="" + if [ -n "$BENCHMARKS" ] && [ "$BENCHMARKS" != "[]" ]; then + COMM_MEDIAN="$(echo "$BENCHMARKS" | grep -o "\"skill\":\"${SKILL}\"[^}]*\"median_duration_s\":[0-9.]*" | \ + grep -o '"median_duration_s":[0-9.]*' | head -1 | awk -F: '{printf "%.0f", $2}')" + + if [ -n "$COMM_MEDIAN" ] && [ "$COMM_MEDIAN" -gt 0 ] 2>/dev/null; then + if [ "$COMM_MEDIAN" -ge 60 ] 2>/dev/null; then + COMM_FMT="$(( COMM_MEDIAN / 60 ))m $(( COMM_MEDIAN % 60 ))s" + else + COMM_FMT="${COMM_MEDIAN}s" + fi + + # Compute delta percentage + if [ "$LOCAL_AVG" -gt 0 ] 2>/dev/null && [ "$COMM_MEDIAN" -gt 0 ] 2>/dev/null; then + DIFF=$(( (LOCAL_AVG - COMM_MEDIAN) * 100 / COMM_MEDIAN )) + if [ "$DIFF" -gt 5 ] 2>/dev/null; then + DELTA="+${DIFF}% slower" + elif [ "$DIFF" -lt -5 ] 2>/dev/null; then + DELTA="$(( -DIFF ))% faster" + else + DELTA="~same" + fi + fi + fi + fi + + printf " /%-17s %-10s %-12s %s\n" "$SKILL" "$LOCAL_FMT" "$COMM_FMT" "${DELTA:-}" + +done <<< "$LOCAL_SKILLS" + +echo "" +echo "Your runs: $(wc -l < "$JSONL_FILE" | tr -d ' ') total events" +echo "Community benchmarks refresh hourly." diff --git a/bin/gstack-community-dashboard b/bin/gstack-community-dashboard index 1f469283d..c98bb74bf 100755 --- a/bin/gstack-community-dashboard +++ b/bin/gstack-community-dashboard @@ -70,9 +70,9 @@ else fi echo "" -# ─── Crash clusters ────────────────────────────────────────── -echo "Top crash clusters" -echo "──────────────────" +# ─── Errors (last 7 days) ──────────────────────────────────── +echo "Top errors (last 7 days)" +echo "────────────────────────" CRASHES="$(echo "$DATA" | grep -o '"crashes":\[[^]]*\]' || echo "")" if [ -n "$CRASHES" ] && [ "$CRASHES" != '"crashes":[]' ]; then @@ -82,7 +82,7 @@ if [ -n "$CRASHES" ] && [ "$CRASHES" != '"crashes":[]' ]; then [ -n "$ERR" ] && printf " %-30s %s occurrences\n" "$ERR" "${C:-?}" done else - echo " No crashes reported" + echo " No errors reported" fi echo "" @@ -103,3 +103,4 @@ fi echo "" echo "For local analytics: gstack-analytics" +echo "For benchmarks: gstack-community-benchmarks" diff --git a/bin/gstack-community-restore b/bin/gstack-community-restore new file mode 100755 index 000000000..b7f4e3231 --- /dev/null +++ b/bin/gstack-community-restore @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +# gstack-community-restore — restore gstack state from cloud backup +# +# Requires community tier + valid auth token. +# Restores: config, analytics summary, retro history. +# Local config values take precedence on conflicts. +# +# Usage: +# gstack-community-restore — restore from backup +# gstack-community-restore --dry-run — show what would be restored +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +set -euo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" +CONFIG_FILE="$STATE_DIR/config.yaml" +ANALYTICS_DIR="$STATE_DIR/analytics" +JSONL_FILE="$ANALYTICS_DIR/skill-usage.jsonl" +AUTH_REFRESH="$GSTACK_DIR/bin/gstack-auth-refresh" + +# Source Supabase config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +ENDPOINT="${GSTACK_TELEMETRY_ENDPOINT:-}" +ANON_KEY="${GSTACK_SUPABASE_ANON_KEY:-}" + +DRY_RUN=false +[ "${1:-}" = "--dry-run" ] && DRY_RUN=true + +# ─── Pre-checks ───────────────────────────────────────────── +if ! "$AUTH_REFRESH" --check 2>/dev/null; then + echo "Not authenticated. Run: gstack auth " + exit 1 +fi + +ACCESS_TOKEN="$("$AUTH_REFRESH" 2>/dev/null)" +if [ -z "$ACCESS_TOKEN" ]; then + echo "Failed to get auth token. Run: gstack auth " + exit 1 +fi + +AUTH_JSON="$(cat "$STATE_DIR/auth-token.json" 2>/dev/null || echo "{}")" +USER_ID="$(echo "$AUTH_JSON" | grep -o '"user_id":"[^"]*"' | head -1 | sed 's/"user_id":"//;s/"//')" + +if [ -z "$USER_ID" ]; then + echo "No user_id in auth token. Run: gstack auth " + exit 1 +fi + +# ─── Fetch backup from Supabase ────────────────────────────── +echo "Fetching backup..." + +BACKUP="$(curl -s --max-time 15 \ + "${ENDPOINT}/installations?installation_id=eq.${USER_ID}&select=config_snapshot,analytics_snapshot,retro_history,last_backup_at,email" \ + -H "apikey: ${ANON_KEY}" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + 2>/dev/null || echo "[]")" + +# Check if we got data +if [ "$BACKUP" = "[]" ] || [ -z "$BACKUP" ]; then + echo "No backup found for your account." + echo "Run gstack for a while and backup will happen automatically." + exit 0 +fi + +# Extract first result (strip array brackets) +BACKUP="$(echo "$BACKUP" | sed 's/^\[//;s/\]$//')" + +LAST_BACKUP="$(echo "$BACKUP" | grep -o '"last_backup_at":"[^"]*"' | head -1 | sed 's/"last_backup_at":"//;s/"//')" +echo "Last backup: ${LAST_BACKUP:-unknown}" +echo "" + +# ─── Restore config ───────────────────────────────────────── +CONFIG_DATA="$(echo "$BACKUP" | grep -o '"config_snapshot":{[^}]*}' | sed 's/"config_snapshot"://' || true)" + +if [ -n "$CONFIG_DATA" ] && [ "$CONFIG_DATA" != "null" ] && [ "$CONFIG_DATA" != "{}" ]; then + echo "Config snapshot found:" + # Extract key-value pairs from JSON + KEYS="$(echo "$CONFIG_DATA" | grep -o '"[^"]*":"[^"]*"' | sed 's/"//g')" + + while IFS=: read -r KEY VALUE; do + [ -z "$KEY" ] && continue + EXISTING="$("$GSTACK_DIR/bin/gstack-config" get "$KEY" 2>/dev/null || true)" + if [ -n "$EXISTING" ]; then + echo " $KEY: $EXISTING (keeping local value, backup had: $VALUE)" + else + echo " $KEY: $VALUE (restoring from backup)" + if [ "$DRY_RUN" = "false" ]; then + "$GSTACK_DIR/bin/gstack-config" set "$KEY" "$VALUE" + fi + fi + done <<< "$KEYS" + echo "" +fi + +# ─── Restore analytics summary ────────────────────────────── +ANALYTICS_DATA="$(echo "$BACKUP" | grep -o '"analytics_snapshot":{[^}]*}' | sed 's/"analytics_snapshot"://' || true)" + +if [ -n "$ANALYTICS_DATA" ] && [ "$ANALYTICS_DATA" != "null" ] && [ "$ANALYTICS_DATA" != "{}" ]; then + echo "Analytics summary found in backup." + if [ -f "$JSONL_FILE" ]; then + LOCAL_LINES="$(wc -l < "$JSONL_FILE" | tr -d ' ')" + echo " Local analytics: ${LOCAL_LINES} events (keeping local data)" + else + echo " No local analytics found." + if [ "$DRY_RUN" = "false" ]; then + mkdir -p "$ANALYTICS_DIR" + # Extract recent_events array and write as JSONL + echo "$ANALYTICS_DATA" | jq -r '.recent_events[] | tojson' > "$JSONL_FILE" 2>/dev/null + echo " Restored $(wc -l < "$JSONL_FILE" | tr -d ' ') recent events from backup." + fi + fi + echo "" +fi + +# ─── Restore retro history ────────────────────────────────── +RETRO_DATA="$(echo "$BACKUP" | grep -o '"retro_history":\[.*\]' | sed 's/"retro_history"://' || true)" + +if [ -n "$RETRO_DATA" ] && [ "$RETRO_DATA" != "null" ] && [ "$RETRO_DATA" != "[]" ]; then + echo "Retro history found in backup." + if [ "$DRY_RUN" = "false" ]; then + # Merge: each retro in the array is a JSON object. Write as retro-restored-N.json + echo "$RETRO_DATA" | jq -c '.[]' | while read -r RETRO; do + [ -z "$RETRO" ] && continue + TS="$(echo "$RETRO" | jq -r .ts 2>/dev/null | tr -d ':-')" + [ -z "$TS" ] && TS="$(date +%s)" + RNAME="retro-restored-${TS}-$RANDOM.json" + echo "$RETRO" > "$STATE_DIR/$RNAME" + done + echo " Retro history merged with local data ($(echo "$RETRO_DATA" | jq 'length') entries restored)." + fi + echo "" +fi + +if [ "$DRY_RUN" = "true" ]; then + echo "(dry run — no changes made)" +else + echo "Restore complete." +fi diff --git a/bin/gstack-screenshot-upload b/bin/gstack-screenshot-upload new file mode 100755 index 000000000..5dd4a99f2 --- /dev/null +++ b/bin/gstack-screenshot-upload @@ -0,0 +1,159 @@ +#!/usr/bin/env bash +# gstack-screenshot-upload — upload a screenshot to gstack.gg +# +# Usage: +# gstack-screenshot-upload [--repo-slug X] [--branch X] [--viewport X] +# +# Uploads a PNG to gstack.gg and prints the proxy URL (with watermark) to stdout. +# All diagnostics go to stderr. Exit 0 = success, 1 = error. +# +# Env overrides (for testing): +# GSTACK_STATE_DIR — override ~/.gstack state directory +# GSTACK_DIR — override auto-detected gstack root +# GSTACK_WEB_URL — override gstack.gg URL +set -euo pipefail + +GSTACK_DIR="${GSTACK_DIR:-$(cd "$(dirname "$0")/.." && pwd)}" +STATE_DIR="${GSTACK_STATE_DIR:-$HOME/.gstack}" + +# Source config +if [ -f "$GSTACK_DIR/supabase/config.sh" ]; then + . "$GSTACK_DIR/supabase/config.sh" +fi +WEB_URL="${GSTACK_WEB_URL:-https://gstack.gg}" + +# ─── Parse args ─────────────────────────────────────────────────── +FILE="" +REPO_SLUG="" +BRANCH="" +VIEWPORT="" + +while [ $# -gt 0 ]; do + case "$1" in + --repo-slug) REPO_SLUG="$2"; shift 2 ;; + --branch) BRANCH="$2"; shift 2 ;; + --viewport) VIEWPORT="$2"; shift 2 ;; + -*) echo "Unknown option: $1" >&2; exit 1 ;; + *) FILE="$1"; shift ;; + esac +done + +if [ -z "$FILE" ]; then + echo "Usage: gstack-screenshot-upload [--repo-slug X] [--branch X] [--viewport X]" >&2 + exit 1 +fi + +if [ ! -f "$FILE" ]; then + echo "Error: file not found: $FILE" >&2 + exit 1 +fi + +# ─── Validate PNG ───────────────────────────────────────────────── +MIME=$(file --mime-type -b "$FILE" 2>/dev/null || echo "unknown") +if [ "$MIME" != "image/png" ]; then + echo "Error: only PNG files are supported (got $MIME)" >&2 + exit 1 +fi + +# ─── Slugify helper ─────────────────────────────────────────────── +slugify() { + echo "$1" | tr '[:upper:]' '[:lower:]' | sed 's|/|-|g; s|[^a-z0-9._-]||g' +} + +[ -n "$REPO_SLUG" ] && REPO_SLUG="$(slugify "$REPO_SLUG")" +[ -n "$BRANCH" ] && BRANCH="$(slugify "$BRANCH")" + +# ─── Pre-upload compression ────────────────────────────────────── +FILE_SIZE=$(wc -c < "$FILE" | tr -d ' ') +UPLOAD_FILE="$FILE" + +if [ "$FILE_SIZE" -gt 2097152 ]; then # > 2MB + echo "File is $(( FILE_SIZE / 1024 ))KB — compressing..." >&2 + TMPFILE="$(mktemp /tmp/gstack-compress-XXXXXX.png)" + + if command -v sips >/dev/null 2>&1; then + # macOS: sips resize to max 1920px wide + cp "$FILE" "$TMPFILE" + sips --resampleWidth 1920 "$TMPFILE" >/dev/null 2>&1 && UPLOAD_FILE="$TMPFILE" + elif command -v magick >/dev/null 2>&1; then + # ImageMagick 7+ + magick "$FILE" -resize '1920x>' "$TMPFILE" 2>/dev/null && UPLOAD_FILE="$TMPFILE" + elif command -v convert >/dev/null 2>&1; then + # ImageMagick 6 + convert "$FILE" -resize '1920x>' "$TMPFILE" 2>/dev/null && UPLOAD_FILE="$TMPFILE" + else + echo "Warning: no resize tool available (sips/magick/convert), uploading raw" >&2 + fi + + if [ "$UPLOAD_FILE" = "$TMPFILE" ]; then + NEW_SIZE=$(wc -c < "$TMPFILE" | tr -d ' ') + echo "Compressed: $(( FILE_SIZE / 1024 ))KB → $(( NEW_SIZE / 1024 ))KB" >&2 + fi +fi + +# ─── Check file size limit ──────────────────────────────────────── +FINAL_SIZE=$(wc -c < "$UPLOAD_FILE" | tr -d ' ') +if [ "$FINAL_SIZE" -gt 10485760 ]; then # 10MB + echo "Error: file too large ($(( FINAL_SIZE / 1024 ))KB, max 10MB)" >&2 + exit 1 +fi + +# ─── Get auth token ─────────────────────────────────────────────── +if ! "$GSTACK_DIR/bin/gstack-auth-refresh" --check >/dev/null 2>&1; then + echo "Error: not authenticated. Run: gstack-auth" >&2 + exit 1 +fi + +ACCESS_TOKEN="$("$GSTACK_DIR/bin/gstack-auth-refresh" 2>/dev/null)" +if [ -z "$ACCESS_TOKEN" ]; then + echo "Error: failed to get auth token" >&2 + exit 1 +fi + +# ─── Upload ─────────────────────────────────────────────────────── +HTTP_RESPONSE="$(curl -s -w "\n%{http_code}" \ + --max-time 30 \ + -X POST "${WEB_URL}/api/images/upload" \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -F "file=@${UPLOAD_FILE}" \ + -F "repo_slug=${REPO_SLUG}" \ + -F "branch=${BRANCH}" \ + -F "viewport=${VIEWPORT}" \ + 2>/dev/null || echo -e "\n000")" + +HTTP_CODE="$(echo "$HTTP_RESPONSE" | tail -1)" +HTTP_BODY="$(echo "$HTTP_RESPONSE" | sed '$d')" + +# Clean up temp file +[ "$UPLOAD_FILE" != "$FILE" ] && rm -f "$UPLOAD_FILE" 2>/dev/null + +case "$HTTP_CODE" in + 2*) + # Extract proxy URL from response JSON + URL="$(echo "$HTTP_BODY" | jq -r '.url' 2>/dev/null || echo "")" + if [ -n "$URL" ] && [ "$URL" != "null" ]; then + echo "$URL" # stdout: proxy URL only + else + echo "Error: upload succeeded but no URL in response" >&2 + echo "$HTTP_BODY" >&2 + exit 1 + fi + ;; + 401) + echo "Error: authentication failed (401). Re-run: gstack-auth" >&2 + exit 1 + ;; + 413) + echo "Error: file too large (413)" >&2 + exit 1 + ;; + 415) + echo "Error: unsupported file type (415). Only PNG supported." >&2 + exit 1 + ;; + *) + echo "Error: upload failed (HTTP ${HTTP_CODE})" >&2 + [ -n "$HTTP_BODY" ] && echo "$HTTP_BODY" >&2 + exit 1 + ;; +esac diff --git a/bin/gstack-telemetry-log b/bin/gstack-telemetry-log index 5cddc519f..0522b7eed 100755 --- a/bin/gstack-telemetry-log +++ b/bin/gstack-telemetry-log @@ -114,29 +114,27 @@ if [ -d "$STATE_DIR/sessions" ]; then [ -n "$_SC" ] && [ "$_SC" -gt 0 ] 2>/dev/null && SESSIONS="$_SC" fi -# Generate installation_id for community tier +# Generate/read persistent UUID fingerprint (all tiers, not just community) # Uses a random UUID stored locally — not derived from hostname/user so it # can't be guessed or correlated by someone who knows your machine identity. -INSTALL_ID="" -if [ "$TIER" = "community" ]; then - ID_FILE="$HOME/.gstack/installation-id" - if [ -f "$ID_FILE" ]; then - INSTALL_ID="$(cat "$ID_FILE" 2>/dev/null)" +INSTALL_FP="" +FP_FILE="$STATE_DIR/.install-id" +if [ -f "$FP_FILE" ]; then + INSTALL_FP="$(cat "$FP_FILE" 2>/dev/null | tr -d '[:space:]')" +fi +if [ -z "$INSTALL_FP" ]; then + # Generate a random UUID v4 + if command -v uuidgen >/dev/null 2>&1; then + INSTALL_FP="$(uuidgen | tr '[:upper:]' '[:lower:]')" + elif [ -r /proc/sys/kernel/random/uuid ]; then + INSTALL_FP="$(cat /proc/sys/kernel/random/uuid)" + else + # Fallback: random hex from /dev/urandom + INSTALL_FP="$(od -An -tx1 -N16 /dev/urandom 2>/dev/null | tr -d ' \n')" fi - if [ -z "$INSTALL_ID" ]; then - # Generate a random UUID v4 - if command -v uuidgen >/dev/null 2>&1; then - INSTALL_ID="$(uuidgen | tr '[:upper:]' '[:lower:]')" - elif [ -r /proc/sys/kernel/random/uuid ]; then - INSTALL_ID="$(cat /proc/sys/kernel/random/uuid)" - else - # Fallback: random hex from /dev/urandom - INSTALL_ID="$(od -An -tx1 -N16 /dev/urandom 2>/dev/null | tr -d ' \n')" - fi - if [ -n "$INSTALL_ID" ]; then - mkdir -p "$(dirname "$ID_FILE")" 2>/dev/null - printf '%s' "$INSTALL_ID" > "$ID_FILE" 2>/dev/null - fi + if [ -n "$INSTALL_FP" ]; then + mkdir -p "$STATE_DIR" 2>/dev/null + printf '%s' "$INSTALL_FP" > "$FP_FILE" 2>/dev/null fi fi @@ -173,12 +171,12 @@ DUR_FIELD="null" [ -n "$DURATION" ] && DUR_FIELD="$DURATION" INSTALL_FIELD="null" -[ -n "$INSTALL_ID" ] && INSTALL_FIELD="\"$INSTALL_ID\"" +[ -n "$INSTALL_FP" ] && INSTALL_FIELD="\"$INSTALL_FP\"" BROWSE_BOOL="false" [ "$USED_BROWSE" = "true" ] && BROWSE_BOOL="true" -printf '{"v":1,"ts":"%s","event_type":"%s","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":%s,"outcome":"%s","error_class":%s,"error_message":%s,"failed_step":%s,"used_browse":%s,"sessions":%s,"installation_id":%s,"source":"%s","_repo_slug":"%s","_branch":"%s"}\n' \ +printf '{"v":1,"ts":"%s","event_type":"%s","skill":"%s","session_id":"%s","gstack_version":"%s","os":"%s","arch":"%s","duration_s":%s,"outcome":"%s","error_class":%s,"error_message":%s,"failed_step":%s,"used_browse":%s,"sessions":%s,"install_fingerprint":%s,"source":"%s","_repo_slug":"%s","_branch":"%s"}\n' \ "$TS" "$EVENT_TYPE" "$SKILL" "$SESSION_ID" "$GSTACK_VERSION" "$OS" "$ARCH" \ "$DUR_FIELD" "$OUTCOME" "$ERR_FIELD" "$ERR_MSG_FIELD" "$STEP_FIELD" \ "$BROWSE_BOOL" "${SESSIONS:-1}" \ diff --git a/bin/gstack-telemetry-sync b/bin/gstack-telemetry-sync index be767c23e..c5b280915 100755 --- a/bin/gstack-telemetry-sync +++ b/bin/gstack-telemetry-sync @@ -84,11 +84,6 @@ while IFS= read -r LINE; do -e 's/,"_branch":"[^"]*"//g' \ -e 's/,"repo":"[^"]*"//g')" - # If anonymous tier, strip installation_id - if [ "$TIER" = "anonymous" ]; then - CLEAN="$(echo "$CLEAN" | sed 's/,"installation_id":"[^"]*"//g; s/,"installation_id":null//g')" - fi - if [ "$FIRST" = "true" ]; then FIRST=false else diff --git a/browse/SKILL.md b/browse/SKILL.md index 91845a99d..c52dcaa53 100644 --- a/browse/SKILL.md +++ b/browse/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"browse","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,112 +104,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. - -Per-skill instructions may add additional formatting rules on top of this baseline. - -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/canary/SKILL.md b/canary/SKILL.md index fe889c74f..af71fb396 100644 --- a/canary/SKILL.md +++ b/canary/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"canary","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,6 +104,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -106,7 +132,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -114,97 +139,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/codex/SKILL.md b/codex/SKILL.md index f34b8db4c..ef6c1a6c5 100644 --- a/codex/SKILL.md +++ b/codex/SKILL.md @@ -30,9 +30,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -50,8 +52,11 @@ echo '{"skill":"codex","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basena for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -100,6 +105,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -107,7 +133,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -115,97 +140,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/cso/SKILL.md b/cso/SKILL.md index c023e1ebb..3f092fd64 100644 --- a/cso/SKILL.md +++ b/cso/SKILL.md @@ -33,9 +33,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -53,8 +55,11 @@ echo '{"skill":"cso","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -103,6 +108,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -110,7 +136,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -118,97 +143,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/design-consultation/SKILL.md b/design-consultation/SKILL.md index e265f26de..ce9822681 100644 --- a/design-consultation/SKILL.md +++ b/design-consultation/SKILL.md @@ -34,9 +34,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -54,8 +56,11 @@ echo '{"skill":"design-consultation","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","re for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -104,6 +109,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -111,7 +137,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -119,97 +144,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/design-review/SKILL.md b/design-review/SKILL.md index 383410334..132ec819c 100644 --- a/design-review/SKILL.md +++ b/design-review/SKILL.md @@ -34,9 +34,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -54,8 +56,11 @@ echo '{"skill":"design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"' for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -104,6 +109,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -111,7 +137,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -119,97 +144,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -733,7 +715,7 @@ The test: would a human designer at a respected studio ever ship this? **10. Performance as Design** (6 items) - LCP < 2.0s (web apps), < 1.5s (informational sites) - CLS < 0.1 (no visible layout shifts during load) -- Skeleton quality: shapes match real content, shimmer animation +- Skeleton quality: shapes match real content layout, shimmer animation - Images: `loading="lazy"`, width/height dimensions set, WebP/AVIF format - Fonts: `font-display: swap`, preconnect to CDN origins - No visible font swap flash (FOUT) — critical fonts preloaded diff --git a/document-release/SKILL.md b/document-release/SKILL.md index 1364e4d9f..ca787cb5d 100644 --- a/document-release/SKILL.md +++ b/document-release/SKILL.md @@ -31,9 +31,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -51,8 +53,11 @@ echo '{"skill":"document-release","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo" for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -101,6 +106,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -108,7 +134,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -116,97 +141,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/install.sh b/install.sh new file mode 100755 index 000000000..4febb724b --- /dev/null +++ b/install.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +# gstack installer — curl-pipe-bash one-liner +# +# Usage: +# bash <(curl -fsSL https://raw.githubusercontent.com/garrytan/gstack/main/install.sh) +# +set -euo pipefail + +INSTALL_DIR="$HOME/.claude/skills/gstack" + +echo "gstack installer" +echo "━━━━━━━━━━━━━━━━" +echo "" + +# ─── Check prereqs ──────────────────────────────────────────── +for cmd in git bun; do + if ! command -v "$cmd" >/dev/null 2>&1; then + echo "Error: $cmd is required but not found." + case "$cmd" in + git) echo " Install: https://git-scm.com/downloads" ;; + bun) echo " Install: curl -fsSL https://bun.sh/install | bash" ;; + esac + exit 1 + fi +done + +# Claude CLI check (warn, don't fail — they might install it after) +if ! command -v claude >/dev/null 2>&1; then + echo "Warning: Claude CLI not found." + echo " Install: npm install -g @anthropic-ai/claude-code" + echo " (gstack requires Claude Code to run skills)" + echo "" +fi + +# ─── Fresh install vs upgrade ───────────────────────────────── +if [ -d "$INSTALL_DIR/.git" ]; then + echo "gstack already installed — upgrading..." + cd "$INSTALL_DIR" && git pull origin main && ./setup +else + echo "Installing gstack to $INSTALL_DIR..." + mkdir -p "$(dirname "$INSTALL_DIR")" + git clone https://github.com/garrytan/gstack.git "$INSTALL_DIR" + cd "$INSTALL_DIR" && ./setup +fi + +echo "" +echo "Note: gstack checks for updates by pinging our server with your" +echo "version number, OS, and a random device ID. No usage data is sent." +echo "" +echo "gstack installed! Try: /office-hours" diff --git a/investigate/SKILL.md b/investigate/SKILL.md index b1df5ca29..4d1cb933e 100644 --- a/investigate/SKILL.md +++ b/investigate/SKILL.md @@ -45,9 +45,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -65,8 +67,11 @@ echo '{"skill":"investigate","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$( for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -115,6 +120,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -122,7 +148,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -130,97 +155,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/land-and-deploy/SKILL.md b/land-and-deploy/SKILL.md index 85e52e4eb..7b3fd8450 100644 --- a/land-and-deploy/SKILL.md +++ b/land-and-deploy/SKILL.md @@ -28,9 +28,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -48,8 +50,11 @@ echo '{"skill":"land-and-deploy","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo": for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -98,6 +103,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -105,7 +131,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -113,97 +138,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/office-hours/SKILL.md b/office-hours/SKILL.md index 8bf43efa5..1ac243453 100644 --- a/office-hours/SKILL.md +++ b/office-hours/SKILL.md @@ -36,9 +36,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -56,8 +58,11 @@ echo '{"skill":"office-hours","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -106,6 +111,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -113,7 +139,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -121,97 +146,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/package.json b/package.json index 70b409094..580171442 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "0.11.17.0", + "version": "0.12.0.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", diff --git a/plan-ceo-review/SKILL.md b/plan-ceo-review/SKILL.md index e5d4af6a7..e03546193 100644 --- a/plan-ceo-review/SKILL.md +++ b/plan-ceo-review/SKILL.md @@ -34,9 +34,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -54,8 +56,11 @@ echo '{"skill":"plan-ceo-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo": for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -104,6 +109,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -111,7 +137,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -119,97 +144,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/plan-design-review/SKILL.md b/plan-design-review/SKILL.md index 9b45e8c8c..83272a1f0 100644 --- a/plan-design-review/SKILL.md +++ b/plan-design-review/SKILL.md @@ -32,9 +32,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -52,8 +54,11 @@ echo '{"skill":"plan-design-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","rep for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -102,6 +107,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -109,7 +135,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -117,97 +142,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/plan-eng-review/SKILL.md b/plan-eng-review/SKILL.md index 53bf7112d..dfcc23fd6 100644 --- a/plan-eng-review/SKILL.md +++ b/plan-eng-review/SKILL.md @@ -33,9 +33,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -53,8 +55,11 @@ echo '{"skill":"plan-eng-review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo": for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -103,6 +108,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -110,7 +136,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -118,97 +143,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/qa-only/SKILL.md b/qa-only/SKILL.md index 6736211e3..1129d52af 100644 --- a/qa-only/SKILL.md +++ b/qa-only/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"qa-only","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(base for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,6 +104,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -106,7 +132,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -114,97 +139,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/qa/SKILL.md b/qa/SKILL.md index 290c89af2..5df13a70e 100644 --- a/qa/SKILL.md +++ b/qa/SKILL.md @@ -35,9 +35,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -55,8 +57,11 @@ echo '{"skill":"qa","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basename for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -105,6 +110,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -112,7 +138,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -120,97 +145,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -## Repo Ownership Mode — See Something, Say Something +## Repo Ownership — See Something, Say Something -`REPO_MODE` from the preamble tells you who owns issues in this repo: +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/retro/SKILL.md b/retro/SKILL.md index 806ffde30..7721c186f 100644 --- a/retro/SKILL.md +++ b/retro/SKILL.md @@ -29,9 +29,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -49,8 +51,11 @@ echo '{"skill":"retro","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basena for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -99,6 +104,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -106,7 +132,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -114,97 +139,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/review/SKILL.md b/review/SKILL.md index a58e86274..e711467a8 100644 --- a/review/SKILL.md +++ b/review/SKILL.md @@ -32,9 +32,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -52,8 +54,11 @@ echo '{"skill":"review","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basen for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -102,6 +107,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -109,7 +135,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -117,97 +142,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -## Repo Ownership Mode — See Something, Say Something +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -`REPO_MODE` from the preamble tells you who owns issues in this repo: +## Repo Ownership — See Something, Say Something -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -762,6 +744,21 @@ If no test framework detected → include gaps as INFORMATIONAL findings only, n **Diff is test-only changes:** Skip Step 4.75 entirely: "No new application code paths to audit." +### Coverage Warning + +After producing the coverage diagram, check the coverage percentage. Read CLAUDE.md for a `## Test Coverage` section with a `Minimum:` field. If not found, use default: 60%. + +If coverage is below the minimum threshold, output a prominent warning **before** the regular review findings: + +``` +⚠️ COVERAGE WARNING: AI-assessed coverage is {X}%. {N} code paths untested. +Consider writing tests before running /ship. +``` + +This is INFORMATIONAL — does not block /review. But it makes low coverage visible early so the developer can address it before reaching the /ship coverage gate. + +If coverage percentage cannot be determined, skip the warning silently. + This step subsumes the "Test Gaps" category from Pass 2 — do not duplicate findings between the checklist Test Gaps item and this coverage diagram. Include any coverage gaps alongside the findings from Step 4 and Step 4.5. They follow the same Fix-First flow — gaps are INFORMATIONAL findings. --- diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 8d483dad2..8b5361373 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -34,35 +34,7 @@ const HOST: Host = (() => { throw new Error(`Unknown host: ${val}. Use claude, codex, or agents.`); })(); -interface HostPaths { - skillRoot: string; - localSkillRoot: string; - binDir: string; - browseDir: string; -} - -const HOST_PATHS: Record = { - claude: { - skillRoot: '~/.claude/skills/gstack', - localSkillRoot: '.claude/skills/gstack', - binDir: '~/.claude/skills/gstack/bin', - browseDir: '~/.claude/skills/gstack/browse/dist', - }, - codex: { - skillRoot: '$GSTACK_ROOT', - localSkillRoot: '.agents/skills/gstack', - binDir: '$GSTACK_BIN', - browseDir: '$GSTACK_BROWSE', - }, -}; - -interface TemplateContext { - skillName: string; - tmplPath: string; - benefitsFrom?: string[]; - host: Host; - paths: HostPaths; -} +// HostPaths, HOST_PATHS, and TemplateContext are imported from ./resolvers/types // ─── Shared Design Constants ──────────────────────────────── @@ -2801,169 +2773,9 @@ function generateSlugSetup(ctx: TemplateContext): string { return `eval "$(${ctx.paths.binDir}/gstack-slug 2>/dev/null)" && mkdir -p ~/.gstack/projects/$SLUG`; } -const RESOLVERS: Record string> = { - SLUG_EVAL: generateSlugEval, - SLUG_SETUP: generateSlugSetup, - COMMAND_REFERENCE: generateCommandReference, - SNAPSHOT_FLAGS: generateSnapshotFlags, - PREAMBLE: generatePreamble, - BROWSE_SETUP: generateBrowseSetup, - BASE_BRANCH_DETECT: generateBaseBranchDetect, - QA_METHODOLOGY: generateQAMethodology, - DESIGN_METHODOLOGY: generateDesignMethodology, - DESIGN_HARD_RULES: generateDesignHardRules, - DESIGN_OUTSIDE_VOICES: generateDesignOutsideVoices, - DESIGN_REVIEW_LITE: generateDesignReviewLite, - REVIEW_DASHBOARD: generateReviewDashboard, - PLAN_FILE_REVIEW_REPORT: generatePlanFileReviewReport, - TEST_BOOTSTRAP: generateTestBootstrap, - TEST_COVERAGE_AUDIT_PLAN: generateTestCoverageAuditPlan, - TEST_COVERAGE_AUDIT_SHIP: generateTestCoverageAuditShip, - TEST_COVERAGE_AUDIT_REVIEW: generateTestCoverageAuditReview, - TEST_FAILURE_TRIAGE: generateTestFailureTriage, - SPEC_REVIEW_LOOP: generateSpecReviewLoop, - DESIGN_SKETCH: generateDesignSketch, - BENEFITS_FROM: generateBenefitsFrom, - CODEX_SECOND_OPINION: generateCodexSecondOpinion, - CODEX_REVIEW_STEP: generateAdversarialStep, - ADVERSARIAL_STEP: generateAdversarialStep, - DEPLOY_BOOTSTRAP: generateDeployBootstrap, - CODEX_PLAN_REVIEW: generateCodexPlanReview, - PLAN_COMPLETION_AUDIT_SHIP: generatePlanCompletionAuditShip, - PLAN_COMPLETION_AUDIT_REVIEW: generatePlanCompletionAuditReview, - PLAN_VERIFICATION_EXEC: generatePlanVerificationExec, -}; - -// ─── Codex Helpers ─────────────────────────────────────────── - -function codexSkillName(skillDir: string): string { - if (skillDir === '.' || skillDir === '') return 'gstack'; - // Don't double-prefix: gstack-upgrade → gstack-upgrade (not gstack-gstack-upgrade) - if (skillDir.startsWith('gstack-')) return skillDir; - return `gstack-${skillDir}`; -} - -function extractNameAndDescription(content: string): { name: string; description: string } { - const fmStart = content.indexOf('---\n'); - if (fmStart !== 0) return { name: '', description: '' }; - const fmEnd = content.indexOf('\n---', fmStart + 4); - if (fmEnd === -1) return { name: '', description: '' }; - - const frontmatter = content.slice(fmStart + 4, fmEnd); - const nameMatch = frontmatter.match(/^name:\s*(.+)$/m); - const name = nameMatch ? nameMatch[1].trim() : ''; - - let description = ''; - const lines = frontmatter.split('\n'); - let inDescription = false; - const descLines: string[] = []; - for (const line of lines) { - if (line.match(/^description:\s*\|?\s*$/)) { - inDescription = true; - continue; - } - if (line.match(/^description:\s*\S/)) { - description = line.replace(/^description:\s*/, '').trim(); - break; - } - if (inDescription) { - if (line === '' || line.match(/^\s/)) { - descLines.push(line.replace(/^ /, '')); - } else { - break; - } - } - } - if (descLines.length > 0) { - description = descLines.join('\n').trim(); - } - - return { name, description }; -} - -const OPENAI_SHORT_DESCRIPTION_LIMIT = 120; - -function condenseOpenAIShortDescription(description: string): string { - const firstParagraph = description.split(/\n\s*\n/)[0] || description; - const collapsed = firstParagraph.replace(/\s+/g, ' ').trim(); - if (collapsed.length <= OPENAI_SHORT_DESCRIPTION_LIMIT) return collapsed; - - const truncated = collapsed.slice(0, OPENAI_SHORT_DESCRIPTION_LIMIT - 3); - const lastSpace = truncated.lastIndexOf(' '); - const safe = lastSpace > 40 ? truncated.slice(0, lastSpace) : truncated; - return `${safe}...`; -} - -function generateOpenAIYaml(displayName: string, shortDescription: string): string { - return `interface: - display_name: ${JSON.stringify(displayName)} - short_description: ${JSON.stringify(shortDescription)} - default_prompt: ${JSON.stringify(`Use ${displayName} for this task.`)} -policy: - allow_implicit_invocation: true -`; -} - -/** - * Transform frontmatter for Codex: keep only name + description. - * Strips allowed-tools, hooks, version, and all other fields. - * Handles multiline block scalar descriptions (YAML | syntax). - */ -function transformFrontmatter(content: string, host: Host): string { - if (host === 'claude') return content; - - const fmStart = content.indexOf('---\n'); - if (fmStart !== 0) return content; - const fmEnd = content.indexOf('\n---', fmStart + 4); - if (fmEnd === -1) return content; - const body = content.slice(fmEnd + 4); // includes the leading \n after --- - const { name, description } = extractNameAndDescription(content); - - // Codex 1024-char description limit — fail build, don't ship broken skills - const MAX_DESC = 1024; - if (description.length > MAX_DESC) { - throw new Error( - `Codex description for "${name}" is ${description.length} chars (max ${MAX_DESC}). ` + - `Compress the description in the .tmpl file.` - ); - } - - // Re-emit Codex frontmatter (name + description only) - const indentedDesc = description.split('\n').map(l => ` ${l}`).join('\n'); - const codexFm = `---\nname: ${name}\ndescription: |\n${indentedDesc}\n---`; - return codexFm + body; -} - -/** - * Extract hook descriptions from frontmatter for inline safety prose. - * Returns a description of what the hooks do, or null if no hooks. - */ -function extractHookSafetyProse(tmplContent: string): string | null { - if (!tmplContent.match(/^hooks:/m)) return null; - - // Parse the hook matchers to build a human-readable safety description - const matchers: string[] = []; - const matcherRegex = /matcher:\s*"(\w+)"/g; - let m; - while ((m = matcherRegex.exec(tmplContent)) !== null) { - if (!matchers.includes(m[1])) matchers.push(m[1]); - } - - if (matchers.length === 0) return null; - - // Build safety prose based on what tools are hooked - const toolDescriptions: Record = { - Bash: 'check bash commands for destructive operations (rm -rf, DROP TABLE, force-push, git reset --hard, etc.) before execution', - Edit: 'verify file edits are within the allowed scope boundary before applying', - Write: 'verify file writes are within the allowed scope boundary before applying', - }; +// RESOLVERS is imported from ./resolvers/index — single source of truth for all placeholders - const safetyChecks = matchers - .map(t => toolDescriptions[t] || `check ${t} operations for safety`) - .join(', and '); - - return `> **Safety Advisory:** This skill includes safety checks that ${safetyChecks}. When using this skill, always pause and verify before executing potentially destructive operations. If uncertain about a command's safety, ask the user for confirmation before proceeding.`; -} +// Codex helpers are imported from ./resolvers/codex-helpers // ─── Template Processing ──────────────────────────────────── @@ -3035,6 +2847,8 @@ function processTemplate(tmplPath: string, host: Host = 'claude'): { outputPath: content = content.replace(/\.claude\/skills\/gstack/g, ctx.paths.localSkillRoot); content = content.replace(/\.claude\/skills\/review/g, '.agents/skills/gstack/review'); content = content.replace(/\.claude\/skills/g, '.agents/skills'); + content = content.replace(/~\/\.claude\/plans/g, '~/.codex/plans'); + content = content.replace(/~\/\.claude\//g, '~/.codex/'); if (outputDir) { const codexName = codexSkillName(skillDir === '.' ? '' : skillDir); diff --git a/setup b/setup index bfae87851..3d723722e 100755 --- a/setup +++ b/setup @@ -453,3 +453,10 @@ if [ ! -d "$HOME/.gstack" ]; then echo " Welcome! Run /gstack-upgrade anytime to stay current." fi rm -f /tmp/gstack-latest-version + +# 9. Install ping (best-effort, non-blocking) +# Fires update-check with --force to register this install in Supabase. +# Sends only: version, OS, random UUID. No usage data. +if [ -x "$SOURCE_GSTACK_DIR/bin/gstack-update-check" ]; then + "$SOURCE_GSTACK_DIR/bin/gstack-update-check" --force 2>/dev/null & +fi diff --git a/setup-browser-cookies/SKILL.md b/setup-browser-cookies/SKILL.md index 85c1ce20e..85815c915 100644 --- a/setup-browser-cookies/SKILL.md +++ b/setup-browser-cookies/SKILL.md @@ -26,9 +26,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -46,8 +48,11 @@ echo '{"skill":"setup-browser-cookies","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"," for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -96,112 +101,44 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. -## AskUserQuestion Format +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: -**ALWAYS follow this structure for every AskUserQuestion call:** -1. **Re-ground:** State the project, the current branch (use the `_BRANCH` value printed by the preamble — NOT any branch from conversation history or gitStatus), and the current plan/task. (1-2 sentences) -2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. -3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. -4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. -Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. - -Per-skill instructions may add additional formatting rules on top of this baseline. - -## Completeness Principle — Boil the Lake - -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: - -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: - -| Task type | Human team | CC+gstack | Compression | -|-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building - -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` -Log eureka moments: +Always run: ```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true +touch ~/.gstack/.proactive-prompted ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/setup-deploy/SKILL.md b/setup-deploy/SKILL.md index 9eba44790..e5c942787 100644 --- a/setup-deploy/SKILL.md +++ b/setup-deploy/SKILL.md @@ -32,9 +32,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -52,8 +54,11 @@ echo '{"skill":"setup-deploy","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$ for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -102,6 +107,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -109,7 +135,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -117,97 +142,36 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") - -## Repo Ownership Mode — See Something, Say Something - -`REPO_MODE` from the preamble tells you who owns issues in this repo: - -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). - -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. - -## Search Before Building +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. - -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: -```bash -jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true -``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. - -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol diff --git a/ship/SKILL.md b/ship/SKILL.md index af2ea565c..36a29b6e3 100644 --- a/ship/SKILL.md +++ b/ship/SKILL.md @@ -30,9 +30,11 @@ _SESSIONS=$(find ~/.gstack/sessions -mmin -120 -type f 2>/dev/null | wc -l | tr find ~/.gstack/sessions -mmin +120 -type f -delete 2>/dev/null || true _CONTRIB=$(~/.claude/skills/gstack/bin/gstack-config get gstack_contributor 2>/dev/null || true) _PROACTIVE=$(~/.claude/skills/gstack/bin/gstack-config get proactive 2>/dev/null || echo "true") +_PROACTIVE_PROMPTED=$([ -f ~/.gstack/.proactive-prompted ] && echo "yes" || echo "no") _BRANCH=$(git branch --show-current 2>/dev/null || echo "unknown") echo "BRANCH: $_BRANCH" echo "PROACTIVE: $_PROACTIVE" +echo "PROACTIVE_PROMPTED: $_PROACTIVE_PROMPTED" source <(~/.claude/skills/gstack/bin/gstack-repo-mode 2>/dev/null) || true REPO_MODE=${REPO_MODE:-unknown} echo "REPO_MODE: $REPO_MODE" @@ -50,8 +52,11 @@ echo '{"skill":"ship","ts":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","repo":"'$(basenam for _PF in $(find ~/.gstack/analytics -maxdepth 1 -name '.pending-*' 2>/dev/null); do [ -f "$_PF" ] && ~/.claude/skills/gstack/bin/gstack-telemetry-log --event-type skill_run --skill _pending_finalize --outcome unknown --session-id "$_SESSION_ID" 2>/dev/null || true; break; done ``` -If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills — only invoke -them when the user explicitly asks. The user opted out of proactive suggestions. +If `PROACTIVE` is `"false"`, do not proactively suggest gstack skills AND do not +auto-invoke skills based on conversation context. Only run skills the user explicitly +types (e.g., /qa, /ship). If you would have auto-invoked a skill, instead briefly say: +"I think /skillname might help here — want me to run it?" and wait for confirmation. +The user opted out of proactive behavior. If output shows `UPGRADE_AVAILABLE `: read `~/.claude/skills/gstack/gstack-upgrade/SKILL.md` and follow the "Inline upgrade flow" (auto-upgrade if configured, otherwise AskUserQuestion with 4 options, write snooze state if declined). If `JUST_UPGRADED `: tell user "Running gstack v{to} (just updated!)" and continue. @@ -100,6 +105,27 @@ touch ~/.gstack/.telemetry-prompted This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. +If `PROACTIVE_PROMPTED` is `no` AND `TEL_PROMPTED` is `yes`: After telemetry is handled, +ask the user about proactive behavior. Use AskUserQuestion: + +> gstack can proactively figure out when you might need a skill while you work — +> like suggesting /qa when you say "does this work?" or /investigate when you hit +> a bug. We recommend keeping this on — it speeds up every part of your workflow. + +Options: +- A) Keep it on (recommended) +- B) Turn it off — I'll type /commands myself + +If A: run `~/.claude/skills/gstack/bin/gstack-config set proactive true` +If B: run `~/.claude/skills/gstack/bin/gstack-config set proactive false` + +Always run: +```bash +touch ~/.gstack/.proactive-prompted +``` + +This only happens once. If `PROACTIVE_PROMPTED` is `yes`, skip this entirely. + ## AskUserQuestion Format **ALWAYS follow this structure for every AskUserQuestion call:** @@ -107,7 +133,6 @@ This only happens once. If `TEL_PROMPTED` is `yes`, skip this entirely. 2. **Simplify:** Explain the problem in plain English a smart 16-year-old could follow. No raw function names, no internal jargon, no implementation details. Use concrete examples and analogies. Say what it DOES, not what it's called. 3. **Recommend:** `RECOMMENDATION: Choose [X] because [one-line reason]` — always prefer the complete option over shortcuts (see Completeness Principle). Include `Completeness: X/10` for each option. Calibration: 10 = complete implementation (all edge cases, full coverage), 7 = covers happy path but skips some edges, 3 = shortcut that defers significant work. If both options are 8+, pick the higher; if one is ≤5, flag it. 4. **Options:** Lettered options: `A) ... B) ... C) ...` — when an option involves effort, show both scales: `(human: ~X / CC: ~Y)` -5. **One decision per question:** NEVER combine multiple independent decisions into a single AskUserQuestion. Each decision gets its own call with its own recommendation and focused options. Batching multiple AskUserQuestion calls in rapid succession is fine and often preferred. Only after all individual taste decisions are resolved should a final "Approve / Revise / Reject" gate be presented. Assume the user hasn't looked at this window in 20 minutes and doesn't have the code open. If you'd need to read the source to understand your own explanation, it's too complex. @@ -115,97 +140,54 @@ Per-skill instructions may add additional formatting rules on top of this baseli ## Completeness Principle — Boil the Lake -AI-assisted coding makes the marginal cost of completeness near-zero. When you present options: +AI makes completeness near-free. Always recommend the complete option over shortcuts — the delta is minutes with CC+gstack. A "lake" (100% coverage, all edge cases) is boilable; an "ocean" (full rewrite, multi-quarter migration) is not. Boil lakes, flag oceans. -- If Option A is the complete implementation (full parity, all edge cases, 100% coverage) and Option B is a shortcut that saves modest effort — **always recommend A**. The delta between 80 lines and 150 lines is meaningless with CC+gstack. "Good enough" is the wrong instinct when "complete" costs minutes more. -- **Lake vs. ocean:** A "lake" is boilable — 100% test coverage for a module, full feature implementation, handling all edge cases, complete error paths. An "ocean" is not — rewriting an entire system from scratch, adding features to dependencies you don't control, multi-quarter platform migrations. Recommend boiling lakes. Flag oceans as out of scope. -- **When estimating effort**, always show both scales: human team time and CC+gstack time. The compression ratio varies by task type — use this reference: +**Effort reference** — always show both scales: | Task type | Human team | CC+gstack | Compression | |-----------|-----------|-----------|-------------| -| Boilerplate / scaffolding | 2 days | 15 min | ~100x | -| Test writing | 1 day | 15 min | ~50x | -| Feature implementation | 1 week | 30 min | ~30x | -| Bug fix + regression test | 4 hours | 15 min | ~20x | -| Architecture / design | 2 days | 4 hours | ~5x | -| Research / exploration | 1 day | 3 hours | ~3x | - -- This principle applies to test coverage, error handling, documentation, edge cases, and feature completeness. Don't skip the last 10% to "save time" — with AI, that 10% costs seconds. - -**Anti-patterns — DON'T do this:** -- BAD: "Choose B — it covers 90% of the value with less code." (If A is only 70 lines more, choose A.) -- BAD: "We can skip edge case handling to save time." (Edge case handling costs minutes with CC.) -- BAD: "Let's defer test coverage to a follow-up PR." (Tests are the cheapest lake to boil.) -- BAD: Quoting only human-team effort: "This would take 2 weeks." (Say: "2 weeks human / ~1 hour CC.") +| Boilerplate | 2 days | 15 min | ~100x | +| Tests | 1 day | 15 min | ~50x | +| Feature | 1 week | 30 min | ~30x | +| Bug fix | 4 hours | 15 min | ~20x | -## Repo Ownership Mode — See Something, Say Something +Include `Completeness: X/10` for each option (10=all edge cases, 7=happy path, 3=shortcut). -`REPO_MODE` from the preamble tells you who owns issues in this repo: +## Repo Ownership — See Something, Say Something -- **`solo`** — One person does 80%+ of the work. They own everything. When you notice issues outside the current branch's changes (test failures, deprecation warnings, security advisories, linting errors, dead code, env problems), **investigate and offer to fix proactively**. The solo dev is the only person who will fix it. Default to action. -- **`collaborative`** — Multiple active contributors. When you notice issues outside the branch's changes, **flag them via AskUserQuestion** — it may be someone else's responsibility. Default to asking, not fixing. -- **`unknown`** — Treat as collaborative (safer default — ask before fixing). +`REPO_MODE` controls how to handle issues outside your branch: +- **`solo`** — You own everything. Investigate and offer to fix proactively. +- **`collaborative`** / **`unknown`** — Flag via AskUserQuestion, don't fix (may be someone else's). -**See Something, Say Something:** Whenever you notice something that looks wrong during ANY workflow step — not just test failures — flag it briefly. One sentence: what you noticed and its impact. In solo mode, follow up with "Want me to fix it?" In collaborative mode, just flag it and move on. - -Never let a noticed issue silently pass. The whole point is proactive communication. +Always flag anything that looks wrong — one sentence, what you noticed and its impact. ## Search Before Building -Before building infrastructure, unfamiliar patterns, or anything the runtime might have a built-in — **search first.** Read `~/.claude/skills/gstack/ETHOS.md` for the full philosophy. - -**Three layers of knowledge:** -- **Layer 1** (tried and true — in distribution). Don't reinvent the wheel. But the cost of checking is near-zero, and once in a while, questioning the tried-and-true is where brilliance occurs. -- **Layer 2** (new and popular — search for these). But scrutinize: humans are subject to mania. Search results are inputs to your thinking, not answers. -- **Layer 3** (first principles — prize these above all). Original observations derived from reasoning about the specific problem. The most valuable of all. +Before building anything unfamiliar, **search first.** See `~/.claude/skills/gstack/ETHOS.md`. +- **Layer 1** (tried and true) — don't reinvent. **Layer 2** (new and popular) — scrutinize. **Layer 3** (first principles) — prize above all. -**Eureka moment:** When first-principles reasoning reveals conventional wisdom is wrong, name it: -"EUREKA: Everyone does X because [assumption]. But [evidence] shows this is wrong. Y is better because [reasoning]." - -Log eureka moments: +**Eureka:** When first-principles reasoning contradicts conventional wisdom, name it and log: ```bash jq -n --arg ts "$(date -u +%Y-%m-%dT%H:%M:%SZ)" --arg skill "SKILL_NAME" --arg branch "$(git branch --show-current 2>/dev/null)" --arg insight "ONE_LINE_SUMMARY" '{ts:$ts,skill:$skill,branch:$branch,insight:$insight}' >> ~/.gstack/analytics/eureka.jsonl 2>/dev/null || true ``` -Replace SKILL_NAME and ONE_LINE_SUMMARY. Runs inline — don't stop the workflow. - -**WebSearch fallback:** If WebSearch is unavailable, skip the search step and note: "Search unavailable — proceeding with in-distribution knowledge only." ## Contributor Mode -If `_CONTRIB` is `true`: you are in **contributor mode**. You're a gstack user who also helps make it better. - -**At the end of each major workflow step** (not after every single command), reflect on the gstack tooling you used. Rate your experience 0 to 10. If it wasn't a 10, think about why. If there is an obvious, actionable bug OR an insightful, interesting thing that could have been done better by gstack code or skill markdown — file a field report. Maybe our contributor will help make us better! - -**Calibration — this is the bar:** For example, `$B js "await fetch(...)"` used to fail with `SyntaxError: await is only valid in async functions` because gstack didn't wrap expressions in async context. Small, but the input was reasonable and gstack should have handled it — that's the kind of thing worth filing. Things less consequential than this, ignore. +If `_CONTRIB` is `true`: you are in **contributor mode**. At the end of each major workflow step, rate your gstack experience 0-10. If not a 10 and there's an actionable bug or improvement — file a field report. -**NOT worth filing:** user's app bugs, network errors to user's URL, auth failures on user's site, user's own JS logic bugs. - -**To file:** write `~/.gstack/contributor-logs/{slug}.md` with **all sections below** (do not truncate — include every section through the Date/Version footer): +**File only:** gstack tooling bugs where the input was reasonable but gstack failed. **Skip:** user app bugs, network errors, auth failures on user's site. +**To file:** write `~/.gstack/contributor-logs/{slug}.md`: ``` # {Title} - -Hey gstack team — ran into this while using /{skill-name}: - -**What I was trying to do:** {what the user/agent was attempting} -**What happened instead:** {what actually happened} -**My rating:** {0-10} — {one sentence on why it wasn't a 10} - -## Steps to reproduce +**What I tried:** {action} | **What happened:** {result} | **Rating:** {0-10} +## Repro 1. {step} - -## Raw output -``` -{paste the actual error or unexpected output here} -``` - ## What would make this a 10 -{one sentence: what gstack should have done differently} - -**Date:** {YYYY-MM-DD} | **Version:** {gstack version} | **Skill:** /{skill} +{one sentence} +**Date:** {YYYY-MM-DD} | **Version:** {version} | **Skill:** /{skill} ``` - -Slug: lowercase, hyphens, max 60 chars (e.g. `browse-js-no-await`). Skip if file already exists. Max 3 reports per session. File inline and continue — don't stop the workflow. Tell user: "Filed gstack field report: {title}" +Slug: lowercase hyphens, max 60 chars. Skip if exists. Max 3/session. File inline, don't stop. ## Completion Status Protocol @@ -331,6 +313,7 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat - Plan verification failures (see Step 3.47) - TODOS.md missing and user wants to create one (ask — see Step 5.5) - TODOS.md disorganized and user wants to reorganize (ask — see Step 5.5) +- Screenshots: asking whether to capture PR screenshots (see Step 6.75) **Never stop for:** - Uncommitted changes (always include them) @@ -999,6 +982,39 @@ find . -name '*.test.*' -o -name '*.spec.*' -o -name '*_test.*' -o -name '*_spec For PR body: `Tests: {before} → {after} (+{delta} new)` Coverage line: `Test Coverage Audit: N new code paths. M covered (X%). K tests generated, J committed.` +**7. Coverage gate:** + +Before proceeding, check CLAUDE.md for a `## Test Coverage` section with `Minimum:` and `Target:` fields. If found, use those percentages. Otherwise use defaults: Minimum = 60%, Target = 80%. + +Using the coverage percentage from the diagram in substep 4 (the `COVERAGE: X/Y (Z%)` line): + +- **>= target:** Pass. "Coverage gate: PASS ({X}%)." Continue. +- **>= minimum, < target:** Use AskUserQuestion: + - "AI-assessed coverage is {X}%. {N} code paths are untested. Target is {target}%." + - RECOMMENDATION: Choose A because untested code paths are where production bugs hide. + - Options: + A) Generate more tests for remaining gaps (recommended) + B) Ship anyway — I accept the coverage risk + C) These paths don't need tests — mark as intentionally uncovered + - If A: Loop back to substep 5 (generate tests) targeting the remaining gaps. After second pass, if still below target, present AskUserQuestion again with updated numbers. Maximum 2 generation passes total. + - If B: Continue. Include in PR body: "Coverage gate: {X}% — user accepted risk." + - If C: Continue. Include in PR body: "Coverage gate: {X}% — {N} paths intentionally uncovered." + +- **< minimum:** Use AskUserQuestion: + - "AI-assessed coverage is critically low ({X}%). {N} of {M} code paths have no tests. Minimum threshold is {minimum}%." + - RECOMMENDATION: Choose A because less than {minimum}% means more code is untested than tested. + - Options: + A) Generate tests for remaining gaps (recommended) + B) Override — ship with low coverage (I understand the risk) + - If A: Loop back to substep 5. Maximum 2 passes. If still below minimum after 2 passes, present the override choice again. + - If B: Continue. Include in PR body: "Coverage gate: OVERRIDDEN at {X}%." + +**Coverage percentage undetermined:** If the coverage diagram doesn't produce a clear numeric percentage (ambiguous output, parse error), **skip the gate** with: "Coverage gate: could not determine percentage — skipping." Do not default to 0% or block. + +**Test-only diffs:** Skip the gate (same as the existing fast-path). + +**100% coverage:** "Coverage gate: PASS (100%)." Continue. + ### Test Plan Artifact After producing the coverage diagram, write a test plan artifact so `/qa` and `/qa-only` can consume it: @@ -1631,6 +1647,80 @@ Claiming work is complete without verification is dishonesty, not efficiency. --- +## Step 6.75: PR Screenshots (optional) + +Check if this PR includes frontend/UI changes: + +```bash +source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null) || true +echo "SCOPE_FRONTEND: ${SCOPE_FRONTEND:-false}" +``` + +If `SCOPE_FRONTEND=true`, check if the browse binary is available: + +```bash +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +B="" +[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" +[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse +[ -x "$B" ] && echo "BROWSE_READY" || echo "BROWSE_NOT_AVAILABLE" +``` + +If both frontend scope AND browse are available, use AskUserQuestion: + +> This PR changes frontend code. Want to add screenshots to the PR? Your screenshots +> will get a "Screenshots · GStack" watermark — free visual evidence in your PR. +> +> A) Responsive screenshots (mobile + tablet + desktop) — recommended +> B) Single desktop screenshot +> C) Skip screenshots + +If the user chooses A or B: + +1. **Check authentication:** + ```bash + ~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null + ``` + If not authenticated, run `~/.claude/skills/gstack/bin/gstack-auth` inline. Wait for completion. + +2. **Detect app URL:** + Read CLAUDE.md and look for an `app_url` or `dev_url` setting. If not found, use + AskUserQuestion: "What URL should I screenshot? (e.g., http://localhost:3000)" + Persist the answer to CLAUDE.md under a `## Screenshots` section so we never ask again. + +3. **Capture screenshots:** + For option A (responsive): + ```bash + $B goto + $B responsive /tmp/gstack-pr-screenshots + ``` + For option B (single): + ```bash + $B goto + $B screenshot /tmp/gstack-pr-screenshots/desktop.png + ``` + +4. **Upload each screenshot:** + ```bash + REPO_SLUG=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)") + BRANCH=$(git branch --show-current 2>/dev/null) + for img in /tmp/gstack-pr-screenshots/*.png; do + VIEWPORT=$(basename "$img" .png) + URL=$(~/.claude/skills/gstack/bin/gstack-screenshot-upload "$img" \ + --repo-slug "$REPO_SLUG" --branch "$BRANCH" --viewport "$VIEWPORT") + echo "SCREENSHOT_URL[$VIEWPORT]=$URL" + done + ``` + +5. **Store proxy URLs** for use in Step 8's PR body. + +**Failure handling:** If any step fails (browse unavailable, auth fails, upload fails), +warn in output and continue without screenshots. Never block /ship for screenshot failures. + +If `SCOPE_FRONTEND=false` or browse is not available, skip this step silently. + +--- + ## Step 7: Push Push to the remote with upstream tracking: @@ -1685,6 +1775,18 @@ gh pr create --base --title ": " --body "$(cat <<'EOF' +## Screenshots + + +| Mobile | Tablet | Desktop | +|--------|--------|---------| +| ![mobile](PROXY_URL) | ![tablet](PROXY_URL) | ![desktop](PROXY_URL) | + +Screenshots by [GStack](https://gstack.gg) + + + + ## Test plan - [x] All Rails tests pass (N runs, 0 failures) - [x] All Vitest tests pass (N tests) diff --git a/ship/SKILL.md.tmpl b/ship/SKILL.md.tmpl index 7f82c64dd..8820f4064 100644 --- a/ship/SKILL.md.tmpl +++ b/ship/SKILL.md.tmpl @@ -37,6 +37,7 @@ You are running the `/ship` workflow. This is a **non-interactive, fully automat - Plan verification failures (see Step 3.47) - TODOS.md missing and user wants to create one (ask — see Step 5.5) - TODOS.md disorganized and user wants to reorganize (ask — see Step 5.5) +- Screenshots: asking whether to capture PR screenshots (see Step 6.75) **Never stop for:** - Uncommitted changes (always include them) @@ -475,6 +476,80 @@ Claiming work is complete without verification is dishonesty, not efficiency. --- +## Step 6.75: PR Screenshots (optional) + +Check if this PR includes frontend/UI changes: + +```bash +source <(~/.claude/skills/gstack/bin/gstack-diff-scope 2>/dev/null) || true +echo "SCOPE_FRONTEND: ${SCOPE_FRONTEND:-false}" +``` + +If `SCOPE_FRONTEND=true`, check if the browse binary is available: + +```bash +_ROOT=$(git rev-parse --show-toplevel 2>/dev/null) +B="" +[ -n "$_ROOT" ] && [ -x "$_ROOT/.claude/skills/gstack/browse/dist/browse" ] && B="$_ROOT/.claude/skills/gstack/browse/dist/browse" +[ -z "$B" ] && B=~/.claude/skills/gstack/browse/dist/browse +[ -x "$B" ] && echo "BROWSE_READY" || echo "BROWSE_NOT_AVAILABLE" +``` + +If both frontend scope AND browse are available, use AskUserQuestion: + +> This PR changes frontend code. Want to add screenshots to the PR? Your screenshots +> will get a "Screenshots · GStack" watermark — free visual evidence in your PR. +> +> A) Responsive screenshots (mobile + tablet + desktop) — recommended +> B) Single desktop screenshot +> C) Skip screenshots + +If the user chooses A or B: + +1. **Check authentication:** + ```bash + ~/.claude/skills/gstack/bin/gstack-auth-refresh --check 2>/dev/null + ``` + If not authenticated, run `~/.claude/skills/gstack/bin/gstack-auth` inline. Wait for completion. + +2. **Detect app URL:** + Read CLAUDE.md and look for an `app_url` or `dev_url` setting. If not found, use + AskUserQuestion: "What URL should I screenshot? (e.g., http://localhost:3000)" + Persist the answer to CLAUDE.md under a `## Screenshots` section so we never ask again. + +3. **Capture screenshots:** + For option A (responsive): + ```bash + $B goto + $B responsive /tmp/gstack-pr-screenshots + ``` + For option B (single): + ```bash + $B goto + $B screenshot /tmp/gstack-pr-screenshots/desktop.png + ``` + +4. **Upload each screenshot:** + ```bash + REPO_SLUG=$(basename "$(git rev-parse --show-toplevel 2>/dev/null)") + BRANCH=$(git branch --show-current 2>/dev/null) + for img in /tmp/gstack-pr-screenshots/*.png; do + VIEWPORT=$(basename "$img" .png) + URL=$(~/.claude/skills/gstack/bin/gstack-screenshot-upload "$img" \ + --repo-slug "$REPO_SLUG" --branch "$BRANCH" --viewport "$VIEWPORT") + echo "SCREENSHOT_URL[$VIEWPORT]=$URL" + done + ``` + +5. **Store proxy URLs** for use in Step 8's PR body. + +**Failure handling:** If any step fails (browse unavailable, auth fails, upload fails), +warn in output and continue without screenshots. Never block /ship for screenshot failures. + +If `SCOPE_FRONTEND=false` or browse is not available, skip this step silently. + +--- + ## Step 7: Push Push to the remote with upstream tracking: @@ -529,6 +604,18 @@ gh pr create --base --title ": " --body "$(cat <<'EOF' +## Screenshots + + +| Mobile | Tablet | Desktop | +|--------|--------|---------| +| ![mobile](PROXY_URL) | ![tablet](PROXY_URL) | ![desktop](PROXY_URL) | + +Screenshots by [GStack](https://gstack.gg) + + + + ## Test plan - [x] All Rails tests pass (N runs, 0 failures) - [x] All Vitest tests pass (N tests) diff --git a/supabase/config.sh b/supabase/config.sh index bfc739bc4..a8f889ea0 100644 --- a/supabase/config.sh +++ b/supabase/config.sh @@ -6,3 +6,6 @@ GSTACK_SUPABASE_URL="https://frugpmstpnojnhfyimgv.supabase.co" GSTACK_SUPABASE_ANON_KEY="sb_publishable_tR4i6cyMIrYTE3s6OyHGHw_ppx2p6WK" + +# gstack.gg web app (auth + screenshot upload) +GSTACK_WEB_URL="https://gstack.gg" diff --git a/supabase/functions/community-benchmarks/index.ts b/supabase/functions/community-benchmarks/index.ts new file mode 100644 index 000000000..e669e8b91 --- /dev/null +++ b/supabase/functions/community-benchmarks/index.ts @@ -0,0 +1,109 @@ +// gstack community-benchmarks edge function +// Computes per-skill duration stats from telemetry_events (last 30 days). +// Upserts results into community_benchmarks table. +// Cached for 1 hour via Cache-Control header. + +import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; + +Deno.serve(async () => { + const supabase = createClient( + Deno.env.get("SUPABASE_URL") ?? "", + Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "" + ); + + try { + const thirtyDaysAgo = new Date( + Date.now() - 30 * 24 * 60 * 60 * 1000 + ).toISOString(); + + // Fetch all skill_run events with duration from last 30 days + const { data: events, error } = await supabase + .from("telemetry_events") + .select("skill, duration_s, outcome") + .eq("event_type", "skill_run") + .eq("source", "live") + .not("duration_s", "is", null) + .not("skill", "is", null) + .gte("event_timestamp", thirtyDaysAgo) + .order("skill") + .limit(10000); + + if (error) throw error; + if (!events || events.length === 0) { + return new Response(JSON.stringify([]), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=3600", + }, + }); + } + + // Group by skill and compute stats + const skillMap: Record< + string, + { durations: number[]; successes: number; total: number } + > = {}; + + for (const event of events) { + if (!event.skill || event.duration_s == null) continue; + if (!skillMap[event.skill]) { + skillMap[event.skill] = { durations: [], successes: 0, total: 0 }; + } + skillMap[event.skill].durations.push(Number(event.duration_s)); + skillMap[event.skill].total++; + if (event.outcome === "success") { + skillMap[event.skill].successes++; + } + } + + const benchmarks = Object.entries(skillMap) + .filter(([skill]) => !skill.startsWith("_")) // skip internal skills + .map(([skill, data]) => { + const sorted = data.durations.sort((a, b) => a - b); + const len = sorted.length; + const percentile = (p: number) => { + const idx = Math.floor((p / 100) * (len - 1)); + return sorted[idx] ?? 0; + }; + + return { + skill, + median_duration_s: percentile(50), + p25_duration_s: percentile(25), + p75_duration_s: percentile(75), + total_runs: data.total, + success_rate: + data.total > 0 + ? Math.round((data.successes / data.total) * 1000) / 10 + : 0, + updated_at: new Date().toISOString(), + }; + }); + + // Upsert into community_benchmarks table + if (benchmarks.length > 0) { + const { error: upsertError } = await supabase + .from("community_benchmarks") + .upsert(benchmarks, { onConflict: "skill" }); + + if (upsertError) { + console.error("Upsert error:", upsertError); + } + } + + return new Response(JSON.stringify(benchmarks), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=3600", + }, + }); + } catch (err) { + console.error("Benchmarks error:", err); + return new Response(JSON.stringify([]), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } +}); diff --git a/supabase/functions/community-pulse/index.ts b/supabase/functions/community-pulse/index.ts index acf2fdb7a..c6693bf2d 100644 --- a/supabase/functions/community-pulse/index.ts +++ b/supabase/functions/community-pulse/index.ts @@ -41,13 +41,15 @@ Deno.serve(async () => { // Weekly active (update checks this week) const { count: thisWeek } = await supabase .from("update_checks") - .select("*", { count: "exact", head: true }) + .select("install_fingerprint") + .eq("source", "live") .gte("checked_at", weekAgo); // Last week (for change %) const { count: lastWeek } = await supabase .from("update_checks") - .select("*", { count: "exact", head: true }) + .select("install_fingerprint") + .eq("source", "live") .gte("checked_at", twoWeeksAgo) .lt("checked_at", weekAgo); diff --git a/supabase/functions/community-recommendations/index.ts b/supabase/functions/community-recommendations/index.ts new file mode 100644 index 000000000..295177637 --- /dev/null +++ b/supabase/functions/community-recommendations/index.ts @@ -0,0 +1,106 @@ +// gstack community-recommendations edge function +// Returns skill recommendations based on co-occurrence patterns. +// Input: ?skills=qa,ship (user's top skills as comma-separated query param) +// Output: top 3 recommended skills the user hasn't tried yet. +// Cached for 24 hours via Cache-Control header. + +import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; + +Deno.serve(async (req) => { + const supabase = createClient( + Deno.env.get("SUPABASE_URL") ?? "", + Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "" + ); + + try { + const url = new URL(req.url); + const userSkills = (url.searchParams.get("skills") ?? "") + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + + if (userSkills.length === 0) { + return new Response(JSON.stringify({ recommendations: [] }), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=86400", + }, + }); + } + + // Query skill_sequences for co-occurring skills + const { data: sequences, error } = await supabase + .from("skill_sequences") + .select("skill_a, skill_b, co_occurrences") + .in("skill_a", userSkills) + .order("co_occurrences", { ascending: false }) + .limit(50); + + if (error) throw error; + + // Find skills the user hasn't used yet, ranked by co-occurrence + const userSkillSet = new Set(userSkills); + const recommendations: Record< + string, + { co_occurrences: number; paired_with: string[] } + > = {}; + + for (const seq of sequences ?? []) { + if (userSkillSet.has(seq.skill_b)) continue; // already used + if (seq.skill_b.startsWith("_")) continue; // skip internal + + if (!recommendations[seq.skill_b]) { + recommendations[seq.skill_b] = { + co_occurrences: 0, + paired_with: [], + }; + } + recommendations[seq.skill_b].co_occurrences += seq.co_occurrences; + recommendations[seq.skill_b].paired_with.push(seq.skill_a); + } + + // Also get total run counts for percentage calculation + const { data: benchmarks } = await supabase + .from("community_benchmarks") + .select("skill, total_runs"); + + const totalBySkill: Record = {}; + for (const b of benchmarks ?? []) { + totalBySkill[b.skill] = b.total_runs; + } + + // Build top 3 recommendations + const sorted = Object.entries(recommendations) + .sort(([, a], [, b]) => b.co_occurrences - a.co_occurrences) + .slice(0, 3) + .map(([skill, data]) => { + const pairedSkill = data.paired_with[0]; + const pairedTotal = totalBySkill[pairedSkill] ?? 0; + const pct = + pairedTotal > 0 + ? Math.round((data.co_occurrences / pairedTotal) * 100) + : 0; + + return { + skill, + reason: `used by ${pct}% of /${pairedSkill} users`, + co_occurrences: data.co_occurrences, + }; + }); + + return new Response(JSON.stringify({ recommendations: sorted }), { + status: 200, + headers: { + "Content-Type": "application/json", + "Cache-Control": "public, max-age=86400", + }, + }); + } catch (err) { + console.error("Recommendations error:", err); + return new Response(JSON.stringify({ recommendations: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + } +}); diff --git a/supabase/functions/telemetry-ingest/index.ts b/supabase/functions/telemetry-ingest/index.ts deleted file mode 100644 index 07d65d364..000000000 --- a/supabase/functions/telemetry-ingest/index.ts +++ /dev/null @@ -1,135 +0,0 @@ -// gstack telemetry-ingest edge function -// Validates and inserts a batch of telemetry events. -// Called by bin/gstack-telemetry-sync. - -import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; - -interface TelemetryEvent { - v: number; - ts: string; - event_type: string; - skill: string; - session_id?: string; - gstack_version: string; - os: string; - arch?: string; - duration_s?: number; - outcome: string; - error_class?: string; - used_browse?: boolean; - sessions?: number; - installation_id?: string; -} - -const MAX_BATCH_SIZE = 100; -const MAX_PAYLOAD_BYTES = 50_000; // 50KB - -Deno.serve(async (req) => { - if (req.method !== "POST") { - return new Response("POST required", { status: 405 }); - } - - // Check payload size - const contentLength = parseInt(req.headers.get("content-length") || "0"); - if (contentLength > MAX_PAYLOAD_BYTES) { - return new Response("Payload too large", { status: 413 }); - } - - try { - const body = await req.json(); - const events: TelemetryEvent[] = Array.isArray(body) ? body : [body]; - - if (events.length > MAX_BATCH_SIZE) { - return new Response(`Batch too large (max ${MAX_BATCH_SIZE})`, { status: 400 }); - } - - const supabase = createClient( - Deno.env.get("SUPABASE_URL") ?? "", - Deno.env.get("SUPABASE_SERVICE_ROLE_KEY") ?? "" - ); - - // Validate and transform events - const rows = []; - const installationUpserts: Map = new Map(); - - for (const event of events) { - // Required fields - if (!event.ts || !event.gstack_version || !event.os || !event.outcome) { - continue; // skip malformed - } - - // Validate schema version - if (event.v !== 1) continue; - - // Validate event_type - const validTypes = ["skill_run", "upgrade_prompted", "upgrade_completed"]; - if (!validTypes.includes(event.event_type)) continue; - - rows.push({ - schema_version: event.v, - event_type: event.event_type, - gstack_version: String(event.gstack_version).slice(0, 20), - os: String(event.os).slice(0, 20), - arch: event.arch ? String(event.arch).slice(0, 20) : null, - event_timestamp: event.ts, - skill: event.skill ? String(event.skill).slice(0, 50) : null, - session_id: event.session_id ? String(event.session_id).slice(0, 50) : null, - duration_s: typeof event.duration_s === "number" ? event.duration_s : null, - outcome: String(event.outcome).slice(0, 20), - error_class: event.error_class ? String(event.error_class).slice(0, 100) : null, - used_browse: event.used_browse === true, - concurrent_sessions: typeof event.sessions === "number" ? event.sessions : 1, - installation_id: event.installation_id ? String(event.installation_id).slice(0, 64) : null, - }); - - // Track installations for upsert - if (event.installation_id) { - installationUpserts.set(event.installation_id, { - version: event.gstack_version, - os: event.os, - }); - } - } - - if (rows.length === 0) { - return new Response(JSON.stringify({ inserted: 0 }), { - status: 200, - headers: { "Content-Type": "application/json" }, - }); - } - - // Insert events - const { error: insertError } = await supabase - .from("telemetry_events") - .insert(rows); - - if (insertError) { - return new Response(JSON.stringify({ error: insertError.message }), { - status: 500, - headers: { "Content-Type": "application/json" }, - }); - } - - // Upsert installations (update last_seen) - for (const [id, data] of installationUpserts) { - await supabase - .from("installations") - .upsert( - { - installation_id: id, - last_seen: new Date().toISOString(), - gstack_version: data.version, - os: data.os, - }, - { onConflict: "installation_id" } - ); - } - - return new Response(JSON.stringify({ inserted: rows.length }), { - status: 200, - headers: { "Content-Type": "application/json" }, - }); - } catch { - return new Response("Invalid request", { status: 400 }); - } -}); diff --git a/supabase/migrations/002_community_tier.sql b/supabase/migrations/002_community_tier.sql new file mode 100644 index 000000000..0d6b26042 --- /dev/null +++ b/supabase/migrations/002_community_tier.sql @@ -0,0 +1,43 @@ +-- gstack community tier schema +-- Adds authenticated backup, benchmarks, email, and richer error telemetry. + +-- Add error context columns to telemetry_events +ALTER TABLE telemetry_events ADD COLUMN error_message TEXT; +ALTER TABLE telemetry_events ADD COLUMN failed_step TEXT; + +-- Add columns to installations for backup + email + auth identity +ALTER TABLE installations ADD COLUMN user_id UUID UNIQUE; +ALTER TABLE installations ADD COLUMN email TEXT; +ALTER TABLE installations ADD COLUMN config_snapshot JSONB; +ALTER TABLE installations ADD COLUMN analytics_snapshot JSONB; +ALTER TABLE installations ADD COLUMN retro_history JSONB; +ALTER TABLE installations ADD COLUMN last_backup_at TIMESTAMPTZ; +ALTER TABLE installations ADD COLUMN backup_version INTEGER DEFAULT 0; + +-- RLS: authenticated users can read/write their own installation row +CREATE POLICY "auth_read_own" ON installations + FOR SELECT USING ( + (select auth.uid()) IS NOT NULL AND user_id = (select auth.uid()) + ); +CREATE POLICY "auth_write_own" ON installations + FOR INSERT WITH CHECK (user_id = (select auth.uid())); +CREATE POLICY "auth_update_own" ON installations + FOR UPDATE USING (user_id = (select auth.uid())) + WITH CHECK (user_id = (select auth.uid())); + +-- Community benchmarks (computed by edge function, cached) +CREATE TABLE community_benchmarks ( + skill TEXT PRIMARY KEY, + median_duration_s NUMERIC, + p25_duration_s NUMERIC, + p75_duration_s NUMERIC, + total_runs BIGINT, + success_rate NUMERIC, + updated_at TIMESTAMPTZ DEFAULT now() +); + +ALTER TABLE community_benchmarks ENABLE ROW LEVEL SECURITY; +CREATE POLICY "anon_select" ON community_benchmarks FOR SELECT USING (true); +CREATE POLICY "service_upsert" ON community_benchmarks FOR ALL + USING ((select auth.role()) = 'service_role') + WITH CHECK ((select auth.role()) = 'service_role'); diff --git a/supabase/migrations/003_source_and_guards.sql b/supabase/migrations/003_source_and_guards.sql new file mode 100644 index 000000000..06bdbdf64 --- /dev/null +++ b/supabase/migrations/003_source_and_guards.sql @@ -0,0 +1,129 @@ +-- gstack telemetry data integrity + growth metrics +-- Adds source tagging, install fingerprinting, duration guards, and growth views. +-- +-- PREREQUISITE: Run Phase 4A cleanup BEFORE this migration: +-- UPDATE telemetry_events SET duration_s = NULL WHERE duration_s > 86400 OR duration_s < 0; + +-- ─── Source field (live/test/dev tagging) ───────────────────── +ALTER TABLE telemetry_events ADD COLUMN source TEXT DEFAULT 'live'; +ALTER TABLE update_checks ADD COLUMN source TEXT DEFAULT 'live'; + +-- ─── Install fingerprinting (expand-then-contract) ─────────── +-- ADD new column (don't RENAME — old clients still POST installation_id) +ALTER TABLE telemetry_events ADD COLUMN install_fingerprint TEXT; +ALTER TABLE update_checks ADD COLUMN install_fingerprint TEXT; + +-- Trigger: copy installation_id → install_fingerprint on INSERT (backward compat) +CREATE OR REPLACE FUNCTION copy_install_id_to_fingerprint() +RETURNS TRIGGER AS $$ +BEGIN + IF NEW.install_fingerprint IS NULL AND NEW.installation_id IS NOT NULL THEN + NEW.install_fingerprint := NEW.installation_id; + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER trg_copy_install_fingerprint + BEFORE INSERT ON telemetry_events + FOR EACH ROW + EXECUTE FUNCTION copy_install_id_to_fingerprint(); + +-- Backfill existing rows +UPDATE telemetry_events + SET install_fingerprint = installation_id + WHERE installation_id IS NOT NULL AND install_fingerprint IS NULL; + +-- ─── Duration guard ────────────────────────────────────────── +ALTER TABLE telemetry_events + ADD CONSTRAINT duration_reasonable + CHECK (duration_s IS NULL OR (duration_s >= 0 AND duration_s <= 86400)); + +-- ─── Indexes for fingerprint joins + source filtering ──────── +CREATE INDEX idx_update_checks_fingerprint ON update_checks (install_fingerprint); +CREATE INDEX idx_telemetry_fingerprint ON telemetry_events (install_fingerprint); +CREATE INDEX idx_update_checks_source ON update_checks (source) WHERE source = 'live'; +CREATE INDEX idx_telemetry_source ON telemetry_events (source) WHERE source = 'live'; + +-- ─── Recreate crash_clusters with source filter ────────────── +DROP VIEW IF EXISTS crash_clusters; +CREATE VIEW crash_clusters AS +SELECT + error_class, + gstack_version, + COUNT(*) as total_occurrences, + COUNT(DISTINCT install_fingerprint) as identified_users, + COUNT(*) - COUNT(install_fingerprint) as anonymous_occurrences, + MIN(event_timestamp) as first_seen, + MAX(event_timestamp) as last_seen +FROM telemetry_events +WHERE outcome = 'error' AND error_class IS NOT NULL + AND (source = 'live' OR source IS NULL) +GROUP BY error_class, gstack_version +ORDER BY total_occurrences DESC; + +-- ─── Recreate skill_sequences with source filter ───────────── +DROP VIEW IF EXISTS skill_sequences; +CREATE VIEW skill_sequences AS +SELECT + a.skill as skill_a, + b.skill as skill_b, + COUNT(DISTINCT a.session_id) as co_occurrences +FROM telemetry_events a +JOIN telemetry_events b ON a.session_id = b.session_id + AND a.skill != b.skill + AND a.event_timestamp < b.event_timestamp +WHERE a.event_type = 'skill_run' AND b.event_type = 'skill_run' + AND (a.source = 'live' OR a.source IS NULL) + AND (b.source = 'live' OR b.source IS NULL) +GROUP BY a.skill, b.skill +HAVING COUNT(DISTINCT a.session_id) >= 10 +ORDER BY co_occurrences DESC; + +-- ─── Growth views ──────────────────────────────────────────── + +-- Daily active installs (materialized for dashboard perf) +CREATE MATERIALIZED VIEW daily_active_installs AS +SELECT DATE(checked_at) as day, + COUNT(DISTINCT install_fingerprint) as unique_installs, + COUNT(*) as total_pings +FROM update_checks +WHERE source = 'live' OR source IS NULL +GROUP BY DATE(checked_at) +ORDER BY day DESC; + +-- Version adoption velocity (materialized) +CREATE MATERIALIZED VIEW version_adoption AS +SELECT DATE(checked_at) as day, + gstack_version, + COUNT(DISTINCT install_fingerprint) as unique_installs +FROM update_checks +WHERE source = 'live' OR source IS NULL +GROUP BY DATE(checked_at), gstack_version +ORDER BY day DESC; + +-- Growth funnel: first-seen based (not heartbeat-based) +CREATE VIEW growth_funnel AS +WITH first_seen AS ( + SELECT install_fingerprint, MIN(checked_at) as first_check + FROM update_checks + WHERE install_fingerprint IS NOT NULL AND (source = 'live' OR source IS NULL) + GROUP BY install_fingerprint +) +SELECT + DATE(fs.first_check) as install_day, + COUNT(DISTINCT fs.install_fingerprint) as installs, + COUNT(DISTINCT CASE WHEN te.event_timestamp IS NOT NULL THEN fs.install_fingerprint END) as activated, + COUNT(DISTINCT CASE WHEN uc2.checked_at IS NOT NULL THEN fs.install_fingerprint END) as retained_7d +FROM first_seen fs +LEFT JOIN telemetry_events te + ON fs.install_fingerprint = te.install_fingerprint + AND te.event_timestamp BETWEEN fs.first_check AND fs.first_check + INTERVAL '24 hours' + AND te.event_type = 'skill_run' + AND (te.source = 'live' OR te.source IS NULL) +LEFT JOIN update_checks uc2 + ON fs.install_fingerprint = uc2.install_fingerprint + AND uc2.checked_at BETWEEN fs.first_check + INTERVAL '7 days' AND fs.first_check + INTERVAL '14 days' +WHERE fs.install_fingerprint IS NOT NULL +GROUP BY DATE(fs.first_check) +ORDER BY install_day DESC; diff --git a/supabase/migrations/004_screenshot_storage.sql b/supabase/migrations/004_screenshot_storage.sql new file mode 100644 index 000000000..551965618 --- /dev/null +++ b/supabase/migrations/004_screenshot_storage.sql @@ -0,0 +1,96 @@ +-- 004_screenshot_storage.sql +-- PR screenshot storage + device code auth for CLI → web auth flow + +-- ─── Storage bucket (PRIVATE — proxy adds watermark) ───────────── +INSERT INTO storage.buckets (id, name, public) +VALUES ('pr-screenshots', 'pr-screenshots', false) +ON CONFLICT (id) DO NOTHING; + +-- Storage RLS: authenticated users upload to their own prefix +CREATE POLICY "auth_upload_own_prefix" ON storage.objects + FOR INSERT TO authenticated + WITH CHECK (bucket_id = 'pr-screenshots' AND (storage.foldername(name))[1] = auth.uid()::text); + +-- Storage RLS: service_role reads (proxy fetches via service key) +-- No public read — raw images must go through watermark proxy +CREATE POLICY "service_read_screenshots" ON storage.objects + FOR SELECT TO service_role + USING (bucket_id = 'pr-screenshots'); + +-- Storage RLS: authenticated users can delete their own uploads +CREATE POLICY "auth_delete_own" ON storage.objects + FOR DELETE TO authenticated + USING (bucket_id = 'pr-screenshots' AND (storage.foldername(name))[1] = auth.uid()::text); + +-- ─── Screenshots metadata table ────────────────────────────────── +CREATE TABLE IF NOT EXISTS screenshots ( + id TEXT PRIMARY KEY, -- 8-char nanoid + user_id UUID NOT NULL REFERENCES auth.users(id), + storage_path TEXT NOT NULL, -- path in pr-screenshots bucket + repo_slug TEXT NOT NULL, -- slugified repo name + branch TEXT NOT NULL, -- slugified branch name + viewport TEXT, -- e.g. 'mobile', 'tablet', 'desktop' + pr_number INTEGER, -- populated after PR creation + created_at TIMESTAMPTZ NOT NULL DEFAULT now() +); + +-- Indexes +CREATE INDEX idx_screenshots_user ON screenshots(user_id); +CREATE INDEX idx_screenshots_repo_branch ON screenshots(repo_slug, branch); + +-- RLS on screenshots: auth insert own, public read metadata, auth delete own +ALTER TABLE screenshots ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "auth_insert_own_screenshots" ON screenshots + FOR INSERT TO authenticated + WITH CHECK (user_id = auth.uid()); + +CREATE POLICY "public_read_screenshots" ON screenshots + FOR SELECT TO anon, authenticated + USING (true); + +CREATE POLICY "auth_delete_own_screenshots" ON screenshots + FOR DELETE TO authenticated + USING (user_id = auth.uid()); + +-- ─── Device codes table (RFC 8628 pattern) ─────────────────────── +CREATE TABLE IF NOT EXISTS device_codes ( + code TEXT PRIMARY KEY, -- server-generated device code + device_secret TEXT NOT NULL, -- PKCE-like secret for verification + user_code TEXT NOT NULL, -- short human-readable code (e.g. ABCD-1234) + user_id UUID REFERENCES auth.users(id), -- NULL until user approves + status TEXT NOT NULL DEFAULT 'pending', -- pending | approved | expired + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + expires_at TIMESTAMPTZ NOT NULL -- 10 minutes from creation +); + +-- Index for polling (CLI polls by device_code + secret) +CREATE INDEX idx_device_codes_status ON device_codes(code, status); + +-- RLS: service_role only (all access goes through API routes) +ALTER TABLE device_codes ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "service_only_device_codes" ON device_codes + FOR ALL TO service_role + USING (true) + WITH CHECK (true); + +-- ─── Cleanup: expired device codes + orphan screenshots ────────── +-- Delete expired device codes (> 15 minutes old, generous buffer over 10min expiry) +-- Delete orphan screenshots (no PR number after 24h) +-- Run via pg_cron if available, otherwise manual/API trigger +DO $$ +BEGIN + IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_cron') THEN + PERFORM cron.schedule( + 'cleanup_device_codes', + '*/15 * * * *', -- every 15 minutes + $$DELETE FROM device_codes WHERE expires_at < now() - interval '5 minutes'$$ + ); + PERFORM cron.schedule( + 'cleanup_orphan_screenshots', + '0 */6 * * *', -- every 6 hours + $$DELETE FROM screenshots WHERE pr_number IS NULL AND created_at < now() - interval '24 hours'$$ + ); + END IF; +END $$; diff --git a/test/community-tier.test.ts b/test/community-tier.test.ts new file mode 100644 index 000000000..2516d76a5 --- /dev/null +++ b/test/community-tier.test.ts @@ -0,0 +1,209 @@ +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import { execSync } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; + +const ROOT = path.resolve(import.meta.dir, '..'); +const BIN = path.join(ROOT, 'bin'); + +let tmpDir: string; + +function run(cmd: string, env: Record = {}): string { + try { + return execSync(cmd, { + cwd: ROOT, + env: { ...process.env, GSTACK_STATE_DIR: tmpDir, GSTACK_DIR: ROOT, ...env }, + encoding: 'utf-8', + timeout: 10000, + }).trim(); + } catch (e: any) { + return e.stdout?.toString() || e.message; + } +} + +function setConfig(key: string, value: string) { + run(`${BIN}/gstack-config set ${key} ${value}`); +} + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gstack-comm-')); +}); + +afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('gstack-auth', () => { + test('status shows not authenticated when no token file', () => { + const output = run(`${BIN}/gstack-auth status`); + expect(output).toContain('Not authenticated'); + }); + + test('logout removes token file', () => { + const authFile = path.join(tmpDir, 'auth-token.json'); + fs.writeFileSync(authFile, '{"access_token":"test"}'); + expect(fs.existsSync(authFile)).toBe(true); + + run(`${BIN}/gstack-auth logout`); + expect(fs.existsSync(authFile)).toBe(false); + }); +}); + +describe('gstack-auth-refresh', () => { + test('--check fails when not authenticated', () => { + // execSync throws on non-zero exit code + try { + execSync(`${BIN}/gstack-auth-refresh --check`, { + env: { ...process.env, GSTACK_STATE_DIR: tmpDir, GSTACK_DIR: ROOT } + }); + expect(false).toBe(true); // Should not reach here + } catch (e: any) { + expect(e.status).toBe(1); + } + }); + + test('--check succeeds when authenticated', () => { + const authFile = path.join(tmpDir, 'auth-token.json'); + const expiresAt = Math.floor(Date.now() / 1000) + 3600; + fs.writeFileSync(authFile, JSON.stringify({ + access_token: 'valid', + refresh_token: 'refresh', + expires_at: expiresAt, + email: 'test@example.com', + user_id: 'user-123' + })); + + const status = execSync(`${BIN}/gstack-auth-refresh --check`, { + env: { ...process.env, GSTACK_STATE_DIR: tmpDir, GSTACK_DIR: ROOT } + }); + // Should not throw + }); +}); + +describe('gstack-community-backup', () => { + test('exits early if not community tier', () => { + setConfig('telemetry', 'anonymous'); + const output = run(`${BIN}/gstack-community-backup`); + expect(output).toBe(''); + }); + + test('exits early if not authenticated', () => { + setConfig('telemetry', 'community'); + const output = run(`${BIN}/gstack-community-backup`); + expect(output).toBe(''); + }); + + test('snapshot generation (dry run/mock check)', () => { + setConfig('telemetry', 'community'); + const authFile = path.join(tmpDir, 'auth-token.json'); + fs.writeFileSync(authFile, JSON.stringify({ + access_token: 'valid', + refresh_token: 'refresh', + expires_at: Math.floor(Date.now() / 1000) + 3600, + email: 'test@example.com', + user_id: 'user-123' + })); + + // Create some data to backup + fs.writeFileSync(path.join(tmpDir, 'config.yaml'), 'key: "value with \\"quotes\\""\n'); + const analyticsDir = path.join(tmpDir, 'analytics'); + fs.mkdirSync(analyticsDir); + fs.writeFileSync(path.join(analyticsDir, 'skill-usage.jsonl'), '{"skill":"qa","duration_s":10,"outcome":"success"}\n'); + + // We can't easily test the Supabase POST without mocking curl or the endpoint + // but we can verify it doesn't crash and respects the rate limit marker. + run(`${BIN}/gstack-community-backup`, { GSTACK_TELEMETRY_ENDPOINT: 'http://localhost:9999' }); + + // It should NOT have created the rate limit marker because the POST failed (HTTP 000) + expect(fs.existsSync(path.join(analyticsDir, '.last-backup-time'))).toBe(false); + }); +}); + +describe('gstack-screenshot-upload', () => { + test('shows usage when no file provided', () => { + const output = run(`${BIN}/gstack-screenshot-upload`); + expect(output).toContain('Usage:'); + }); + + test('errors on missing file', () => { + const output = run(`${BIN}/gstack-screenshot-upload /nonexistent/file.png`); + expect(output).toContain('file not found'); + }); + + test('errors when not authenticated', () => { + // Create a valid PNG file (1x1 pixel) + const pngFile = path.join(tmpDir, 'test.png'); + // Minimal valid PNG: 1x1 white pixel + const png = Buffer.from([ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature + 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, // IHDR chunk + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, + 0xDE, 0x00, 0x00, 0x00, 0x0C, 0x49, 0x44, 0x41, // IDAT chunk + 0x54, 0x08, 0xD7, 0x63, 0xF8, 0xCF, 0xC0, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x01, 0xE2, 0x21, 0xBC, + 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, // IEND chunk + 0x44, 0xAE, 0x42, 0x60, 0x82 + ]); + fs.writeFileSync(pngFile, png); + + const output = run(`${BIN}/gstack-screenshot-upload ${pngFile}`); + expect(output).toContain('not authenticated'); + }); + + test('slugifies repo and branch names', () => { + // Test the slugify behavior by checking the upload script parses args correctly + // We can't test actual upload without a server, but we can verify arg parsing + const pngFile = path.join(tmpDir, 'test.png'); + const png = Buffer.from([ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, + 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, + 0xDE, 0x00, 0x00, 0x00, 0x0C, 0x49, 0x44, 0x41, + 0x54, 0x08, 0xD7, 0x63, 0xF8, 0xCF, 0xC0, 0x00, + 0x00, 0x00, 0x02, 0x00, 0x01, 0xE2, 0x21, 0xBC, + 0x33, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, + 0x44, 0xAE, 0x42, 0x60, 0x82 + ]); + fs.writeFileSync(pngFile, png); + + // Will fail at auth check, but we verify it gets past arg parsing + const output = run(`${BIN}/gstack-screenshot-upload ${pngFile} --repo-slug "My/Repo" --branch "feat/my-thing" --viewport desktop`); + // Should fail at auth, not at arg parsing + expect(output).toContain('not authenticated'); + }); + + test('rejects non-PNG files', () => { + const txtFile = path.join(tmpDir, 'test.txt'); + fs.writeFileSync(txtFile, 'not a png'); + const output = run(`${BIN}/gstack-screenshot-upload ${txtFile}`); + expect(output).toContain('only PNG'); + }); +}); + +describe('gstack-auth device code', () => { + test('change-email shows instructions', () => { + const output = run(`${BIN}/gstack-auth change-email`); + expect(output).toContain('log out'); + expect(output).toContain('re-authenticate'); + }); +}); + +describe('gstack-community-benchmarks', () => { + test('shows no data message when no local analytics', () => { + const output = run(`${BIN}/gstack-community-benchmarks`); + expect(output).toContain('No local analytics data'); + }); + + test('renders comparison table with local data', () => { + const analyticsDir = path.join(tmpDir, 'analytics'); + fs.mkdirSync(analyticsDir); + fs.writeFileSync(path.join(analyticsDir, 'skill-usage.jsonl'), '{"skill":"qa","duration_s":120,"outcome":"success"}\n'); + + const output = run(`${BIN}/gstack-community-benchmarks`); + expect(output).toContain('/qa'); + expect(output).toContain('2m 0s'); + }); +}); diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index d8a071a1f..fb6af9e6a 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -1561,7 +1561,7 @@ describe('telemetry', () => { test('generated SKILL.md contains telemetry opt-in prompt', () => { const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8'); expect(content).toContain('.telemetry-prompted'); - expect(content).toContain('Help gstack get better'); + expect(content).toContain('Community mode shares usage data'); expect(content).toContain('gstack-config set telemetry community'); expect(content).toContain('gstack-config set telemetry anonymous'); expect(content).toContain('gstack-config set telemetry off'); diff --git a/test/helpers/session-runner.ts b/test/helpers/session-runner.ts index 7101e30c5..f07dec68d 100644 --- a/test/helpers/session-runner.ts +++ b/test/helpers/session-runner.ts @@ -169,10 +169,16 @@ export async function runSkillTest(options: { const promptFile = path.join(os.tmpdir(), `.prompt-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2)}`); fs.writeFileSync(promptFile, prompt); + // Isolate telemetry: E2E tests use a temp state dir so they don't pollute + // production telemetry with test events (e.g. fake timeout crashes). + const testStateDir = path.join(os.tmpdir(), `gstack-e2e-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`); + fs.mkdirSync(testStateDir, { recursive: true }); + const proc = Bun.spawn(['sh', '-c', `cat "${promptFile}" | claude ${args.map(a => `"${a}"`).join(' ')}`], { cwd: workingDirectory, stdout: 'pipe', stderr: 'pipe', + env: { ...process.env, GSTACK_STATE_DIR: testStateDir, GSTACK_TELEMETRY_SOURCE: 'test' }, }); // Race against timeout diff --git a/test/helpers/touchfiles.ts b/test/helpers/touchfiles.ts index d61ae1647..8814b34bd 100644 --- a/test/helpers/touchfiles.ts +++ b/test/helpers/touchfiles.ts @@ -208,6 +208,9 @@ export const E2E_TIERS: Record = { 'ship-local-workflow': 'gate', 'ship-coverage-audit': 'gate', 'ship-triage': 'gate', + 'ship-plan-completion': 'gate', + 'ship-plan-verification': 'gate', + 'review-plan-completion': 'gate', // Retro — gate for cheap branch detection, periodic for full Opus retro 'retro': 'periodic', diff --git a/test/telemetry.test.ts b/test/telemetry.test.ts index a30506316..34ab94152 100644 --- a/test/telemetry.test.ts +++ b/test/telemetry.test.ts @@ -72,33 +72,96 @@ describe('gstack-telemetry-log', () => { expect(readJsonl()).toHaveLength(0); }); - test('includes installation_id for community tier', () => { + test('includes install_fingerprint for community tier (UUID)', () => { setConfig('telemetry', 'community'); run(`${BIN}/gstack-telemetry-log --skill review --duration 100 --outcome success --session-id comm-123`); const events = parseJsonl(); expect(events).toHaveLength(1); - // installation_id should be a UUID v4 (or hex fallback) - expect(events[0].installation_id).toMatch(/^[a-f0-9-]{32,36}$/); + // install_fingerprint should be a UUID v4 (or hex fallback) + expect(events[0].install_fingerprint).toMatch(/^[a-f0-9-]{32,36}$/); + }); - test('installation_id is null for anonymous tier', () => { + test('includes install_fingerprint for anonymous tier (not null — UUID is not PII)', () => { setConfig('telemetry', 'anonymous'); run(`${BIN}/gstack-telemetry-log --skill qa --duration 50 --outcome success --session-id anon-123`); const events = parseJsonl(); - expect(events[0].installation_id).toBeNull(); + // All tiers now get install_fingerprint (random UUID, not PII) + expect(events[0].install_fingerprint).toMatch(/^[a-f0-9-]{36}$/); + }); + + test('source field defaults to live', () => { + setConfig('telemetry', 'anonymous'); + run(`${BIN}/gstack-telemetry-log --skill qa --duration 50 --outcome success --session-id src-123`); + + const events = parseJsonl(); + expect(events[0].source).toBe('live'); + }); + + test('--source flag overrides default', () => { + setConfig('telemetry', 'anonymous'); + run(`${BIN}/gstack-telemetry-log --skill qa --duration 50 --outcome success --source test --session-id src-456`); + + const events = parseJsonl(); + expect(events[0].source).toBe('test'); + }); + + test('GSTACK_TELEMETRY_SOURCE env sets source', () => { + setConfig('telemetry', 'anonymous'); + run(`GSTACK_TELEMETRY_SOURCE=test ${BIN}/gstack-telemetry-log --skill qa --duration 50 --outcome success --session-id src-789`); + + const events = parseJsonl(); + expect(events[0].source).toBe('test'); }); - test('includes error_class when provided', () => { + test('duration > 86400 is capped to null', () => { setConfig('telemetry', 'anonymous'); - run(`${BIN}/gstack-telemetry-log --skill browse --duration 10 --outcome error --error-class timeout --session-id err-123`); + run(`${BIN}/gstack-telemetry-log --skill qa --duration 100000 --outcome success --session-id dur-123`); + + const events = parseJsonl(); + expect(events[0].duration_s).toBeNull(); + }); + + test('negative duration is capped to null', () => { + setConfig('telemetry', 'anonymous'); + run(`${BIN}/gstack-telemetry-log --skill qa --duration -5 --outcome success --session-id dur-456`); + + const events = parseJsonl(); + expect(events[0].duration_s).toBeNull(); + }); + + test('install_fingerprint persists across runs', () => { + setConfig('telemetry', 'anonymous'); + run(`${BIN}/gstack-telemetry-log --skill qa --duration 10 --outcome success --session-id fp-1`); + run(`${BIN}/gstack-telemetry-log --skill qa --duration 20 --outcome success --session-id fp-2`); + + const events = parseJsonl(); + expect(events).toHaveLength(2); + expect(events[0].install_fingerprint).toBe(events[1].install_fingerprint); + }); + + test('includes error_class, error_message, and failed_step when provided', () => { + setConfig('telemetry', 'anonymous'); + run(`${BIN}/gstack-telemetry-log --skill browse --duration 10 --outcome error --error-class timeout --error-message "request timed out after 30s" --failed-step "goto_page" --session-id err-123`); const events = parseJsonl(); expect(events[0].error_class).toBe('timeout'); + expect(events[0].error_message).toBe('request timed out after 30s'); + expect(events[0].failed_step).toBe('goto_page'); expect(events[0].outcome).toBe('error'); }); + test('truncates long error messages', () => { + setConfig('telemetry', 'anonymous'); + const longMsg = 'a'.repeat(300); + run(`${BIN}/gstack-telemetry-log --skill qa --outcome error --error-message "${longMsg}" --session-id trunc-123`); + + const events = parseJsonl(); + expect(events[0].error_message).toHaveLength(200); + }); + test('handles missing duration gracefully', () => { setConfig('telemetry', 'anonymous'); run(`${BIN}/gstack-telemetry-log --skill qa --outcome success --session-id nodur-123`);