From 34c7a0becba84e654e2be2ab8177727234daf8f0 Mon Sep 17 00:00:00 2001 From: drummerms Date: Wed, 13 May 2026 16:49:15 -0500 Subject: [PATCH] fix(gbrain-sync): cut source-id slugs on hyphen boundaries `constrainSourceId` truncated the slug with `slug.slice(-tailBudget)`, which cut mid-word when the boundary fell inside a token. For a repo where the combined `prefix-org-repo-pathhash` exceeded 32 chars, this produced embarrassing artifacts like `gstack-code-kill-270c0001-c32152` (from `drummerms-av-sow-wiz-skill-270c0001`). Two changes: 1. `constrainSourceId` now walks hyphen-separated tokens from the right, accumulating whole tokens until adding the next would exceed `tailBudget`. When no token fits, falls through to the existing `${prefix}-${hash}` form. 2. `deriveCodeSourceId` now retries with `repo-only-pathhash` (dropping the org segment) when the full `org-repo-pathhash` triggers truncation. Keeps the repo name readable when it fits at all. Before: `gstack-code-kill-270c0001-c32152` After: `gstack-code-270c0001-050d83` (repo+pathhash doesn't fit at 32 chars even after dropping org; pathhash + collision-hash is the clean deterministic fallback) Note that gbrain's 32-char source-id cap is the actual root constraint for mid-length org+repo names. Raising the cap on the gbrain side would let this resolve to `gstack-code-av-sow-wiz-skill-270c0001` (37 chars) or `gstack-code-drummerms-av-sow-wiz-skill-270c0001` (47 chars). That's a gbrain-side change not bundled here. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/gstack-gbrain-sync.ts | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/bin/gstack-gbrain-sync.ts b/bin/gstack-gbrain-sync.ts index 36b265e42d..8098c03f1b 100644 --- a/bin/gstack-gbrain-sync.ts +++ b/bin/gstack-gbrain-sync.ts @@ -179,7 +179,14 @@ function deriveCodeSourceId(repoPath: string): string { if (remote) { const segs = remote.split("/").filter(Boolean); const slugSource = segs.slice(-2).join("-"); - return constrainSourceId("gstack-code", `${slugSource}-${pathHash}`); + const fullId = constrainSourceId("gstack-code", `${slugSource}-${pathHash}`); + // If the org+repo+pathhash fits cleanly (suffix preserved), use it. + if (fullId.endsWith(`-${pathHash}`)) return fullId; + // Otherwise drop the org prefix and retry with just repo+pathhash so the + // repo name stays readable. If that still doesn't fit, constrainSourceId + // falls back to a deterministic hash-only form. + const repoOnly = segs[segs.length - 1] || "repo"; + return constrainSourceId("gstack-code", `${repoOnly}-${pathHash}`); } const base = repoPath.split("/").pop() || "repo"; return constrainSourceId("gstack-code", `${base}-${pathHash}`); @@ -210,6 +217,10 @@ function deriveLegacyCodeSourceId(repoPath: string): string { * Build a gbrain-valid source id (1-32 lowercase alnum + interior hyphens). Sanitizes * `raw`, prefixes with `prefix`, and falls back to a hashed-tail form when total length * would exceed 32 chars. + * + * Truncation cuts on hyphen boundaries (whole-word units) from the right, never + * mid-word. Inputs like "drummerms-av-sow-wiz-skill-270c0001" produce + * "${prefix}-270c0001-", not "${prefix}-kill-270c0001-". */ function constrainSourceId(prefix: string, raw: string): string { const MAX = 32; @@ -228,7 +239,20 @@ function constrainSourceId(prefix: string, raw: string): string { // Total budget: prefix + "-" + tail + "-" + hash const tailBudget = MAX - prefix.length - 2 - hash.length; if (tailBudget < 1) return `${prefix}-${hash}`; - const tail = slug.slice(-tailBudget).replace(/^-+|-+$/g, ""); + // Cut on hyphen boundaries instead of mid-word. Walk tokens from the right, + // accumulating until adding the next token would exceed tailBudget. This + // preserves readable suffixes (pathhash, repo name) and avoids embarrassing + // mid-word artifacts like "skill" → "kill". + const tokens = slug.split("-").filter(Boolean); + const kept: string[] = []; + let len = 0; + for (let i = tokens.length - 1; i >= 0; i--) { + const add = kept.length === 0 ? tokens[i].length : tokens[i].length + 1; + if (len + add > tailBudget) break; + kept.unshift(tokens[i]); + len += add; + } + const tail = kept.join("-"); return tail ? `${prefix}-${tail}-${hash}` : `${prefix}-${hash}`; }