From 761f0a8d7255f988aaccd782c9fc79cf6f447aef Mon Sep 17 00:00:00 2001 From: Andriy Massimilla Date: Wed, 13 May 2026 14:36:20 -0400 Subject: [PATCH 1/4] fix: improve final output prompt legibility --- src/pipeline.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/pipeline.ts b/src/pipeline.ts index 3044c9f..0d27eaa 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -1,6 +1,7 @@ import { readFile, writeFile, access } from "node:fs/promises"; -import { resolve, dirname } from "node:path"; +import { resolve, dirname, relative, isAbsolute } from "node:path"; import { fileURLToPath } from "node:url"; +import pc from "picocolors"; import { loadConfig, resolveChangelogPath, @@ -175,7 +176,8 @@ export async function runPipeline(opts: PipelineOptions): Promise { ); if (!opts.yes) { - const ok = await confirmPrompt(`write to ${changelogPath}? [y/N] `); + const displayPath = displayPathFor(changelogPath, loaded.projectRoot); + const ok = await confirmPrompt(pc.dim(`write to ${displayPath}? [y/N] `)); if (!ok) { process.stderr.write("cliff-notes: aborted\n"); return; @@ -238,6 +240,12 @@ async function readMaybe(p: string): Promise { } } +function displayPathFor(absPath: string, projectRoot: string): string { + const rel = relative(projectRoot, absPath); + if (!rel || rel.startsWith("..") || isAbsolute(rel)) return absPath; + return rel; +} + async function confirmPrompt(prompt: string): Promise { if (!process.stdin.isTTY) return false; process.stderr.write(prompt); From 7f0ce22240cdd85e9edeb27bdf113bdf7162fe34 Mon Sep 17 00:00:00 2001 From: Andriy Massimilla Date: Wed, 13 May 2026 15:21:46 -0400 Subject: [PATCH 2/4] feat: add curate step to group/omit entries using LLM --- README.md | 13 +- cliff-notes.example.toml | 23 +- planning/commit-grouping.md | 506 ++++++++++++++++++++++++++++++++++++ src/cli.ts | 2 + src/config.ts | 25 +- src/curation.ts | 383 +++++++++++++++++++++++++++ src/exec.ts | 33 +++ src/git-cliff.ts | 47 ++-- src/git-diff.ts | 59 +++++ src/git-remote.ts | 32 +-- src/github.ts | 102 +++++--- src/llm.ts | 93 ++++++- src/pipeline.ts | 114 +++++--- src/prompts/curate.ts | 84 ++++++ src/prompts/rewrite.ts | 8 + src/prompts/summary.ts | 12 +- src/prompts/system.ts | 1 + src/render.ts | 29 ++- src/schemas.ts | 135 ++++++++++ tests/curation.test.ts | 227 ++++++++++++++++ tests/render.test.ts | 80 +++++- tests/schemas.test.ts | 137 +++++++++- 22 files changed, 1999 insertions(+), 146 deletions(-) create mode 100644 planning/commit-grouping.md create mode 100644 src/curation.ts create mode 100644 src/exec.ts create mode 100644 src/git-diff.ts create mode 100644 src/prompts/curate.ts create mode 100644 tests/curation.test.ts diff --git a/README.md b/README.md index 7a00476..b70e332 100644 --- a/README.md +++ b/README.md @@ -37,14 +37,18 @@ model = "claude-sonnet-4-6" [project] name = "my-project" -audience = "internal-devs" -voice = "concise, technical, no marketing fluff" +audience = "end-users of the application" +voice = "clear, user-focused, concise, no marketing fluff" ``` Then export the relevant API key (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or AWS credentials for Bedrock). If the project doesn't already have a `cliff.toml`, cliff-notes uses a bundled default. Override via `git_cliff.config = "path/to/cliff.toml"`. +By default, cliff-notes runs a curation pass before rewriting entries. Commits sharing a PR number are grouped deterministically; remaining commits can be grouped or omitted by the model when the schema validates the full partition. Omission decisions use `project.audience` as free-form guidance, so external-user or operator notes can skip test-only and internal-only churn while maintainer notes can keep those changes when useful. Set `[curation] strategy = "by-pr-only"` for deterministic PR grouping only, or `"off"` for one bullet per commit. Use `--show-curation` to print the proposed groups and omissions before rewrite. + +GitHub PR enrichment reuses existing credentials: `GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth token`. In GitHub Actions, `GITHUB_TOKEN` and `GITHUB_REPOSITORY` are enough for git-cliff PR enrichment; locally, an authenticated `gh` CLI is sufficient. Set `[github] enabled = false` to skip token/repo resolution. + ## Use ```sh @@ -62,6 +66,9 @@ cliff-notes --tag v1.2.3 --out release-notes.md # Skip the confirmation prompt cliff-notes --tag v1.2.3 --yes + +# Print grouping and omission decisions before rewrite +cliff-notes --unreleased --dry-run --show-curation ``` Either `--tag ` or `--unreleased` is required — cliff-notes does not infer version numbers. @@ -84,7 +91,7 @@ Each generated section ends with an HTML comment containing the raw git-cliff en --> ``` -The block makes drift between raw commits and LLM rewrites diffable in code review. The marker version (`v1`) lets future cliff-notes re-render from the raw input without re-querying git. +Grouped entries list every member commit in the audit block. Omitted commits do not appear as rendered bullets, but they are still listed in the audit block with the model's reason. The block makes drift between raw commits and LLM rewrites diffable in code review. The marker version (`v1`) lets future cliff-notes re-render from the raw input without re-querying git. ## Goreleaser integration (recipes) diff --git a/cliff-notes.example.toml b/cliff-notes.example.toml index afa43c0..db53469 100644 --- a/cliff-notes.example.toml +++ b/cliff-notes.example.toml @@ -17,8 +17,8 @@ model = "claude-sonnet-4-6" [project] name = "my-project" -audience = "internal-devs" -voice = "concise, technical, no marketing fluff" +audience = "end-users of the application" +voice = "clear, user-focused, concise, no marketing fluff" [prompt] # Optional. Appended to the baseline system prompt — use to teach the LLM @@ -37,6 +37,25 @@ voice = "concise, technical, no marketing fluff" # own bundled default. # config = "cliff.toml" +[curation] +# strategy = "auto" # default. PR prefilter + LLM curation on residual. +# "by-pr-only" runs only deterministic PR grouping. +# "off" keeps one changelog bullet per commit. +# omit_plumbing = true # LLM may suppress changes that are not meaningful +# to the configured project.audience. Audience examples are guidance, not an +# exhaustive list: external-user/operator notes can omit test-only changes and +# developer-only churn, while maintainer notes can keep those when useful. +# Omissions remain in the audit block. +# min_group_size = 2 +# max_per_group = 5 +# max_index_gap = 15 # LLM cannot group commits more than 15 positions apart. +# require_same_type = true # reject LLM groups that mix conventional commit groups. +# cache = true # content-hash cache for deterministic curation reruns. + +[github] +# enabled = true # reuse gh/GITHUB_TOKEN credentials for git-cliff PR enrichment. +# repo = "owner/name" # optional; default autodetected from env, gh, or origin. + [output] changelog_file = "CHANGELOG.md" date_format = "%Y-%m-%d" diff --git a/planning/commit-grouping.md b/planning/commit-grouping.md new file mode 100644 index 0000000..0abbae3 --- /dev/null +++ b/planning/commit-grouping.md @@ -0,0 +1,506 @@ +# Plan: Curation pass for related commits (PR prefilter + LLM curation) + +## Context + +cliff-notes today renders **one bullet per commit** in a release. That's fine on squash-merge repos (each PR collapses to one commit carrying `(#N)`), but it breaks down for: + +- **Merge-commit PRs**: each PR contributes N constituent commits ("1", "2", "wip", "fix typo") plus a `Merge pull request #N from foo/branch` commit → N+1 entries per PR. +- **Rebase-merge PRs**: GitHub rewrites each rebased commit to carry `(#N)`, so the commits share a PR number — but cliff-notes still emits them as N separate entries. +- **Direct-to-main related commits**: a feature landed across multiple non-adjacent commits without a PR (e.g. "add user model", "wire user model into auth", "fix typo in user model"). No PR signal binds them; no deterministic rule reliably groups them. +- **Plumbing noise**: dependency bumps, lint/format fixes, comment-only edits, internal renames, test-only churn. Mechanically valid commits, but rarely interesting in a release-notes context. + +The first two are mechanical: same PR number → group. The third is semantic — only an LLM with subjects, bodies, and *what files each commit touched* can judge it. The fourth is editorial: only an LLM can sensibly decide "this commit shouldn't appear in the changelog." + +The goal is a **curation pass** that handles all four — grouping, primary-selection, and omission — while preserving cliff-notes' core invariants: + +- **Deterministic where deterministic suffices**, semantic only where it doesn't. +- **Strict Zod 1:1 input↔output contract** with the existing rewrite pass (src/schemas.ts:30–53) stays intact. +- **Audit block** (src/render.ts:42–46) still records every original commit, tagged with its curation disposition (grouped under X, omitted with reason, solo). + +## Recommended approach: PR prefilter + LLM curation on the residual + +### Two-stage pipeline + +1. **Free PR prefilter (no LLM call).** Resolve a PR number per commit, then group commits sharing a PR number. Sources of PR number, in order: + - `commit.remote.pr_number` from git-cliff's GitHub enrichment (most authoritative). + - `extractPRNumber(commit)` from src/git-cliff.ts:129 (subject `(#N)` / links). + + This step handles merge-commit and rebase-merge PRs uniformly. For squash-merge repos every commit has a unique PR number — the prefilter is a no-op (no commit shares a PR with any other). + +2. **Gate.** If fewer than 2 commits remain in the residual (commits not part of any multi-member PR group), skip the LLM curation call entirely. Most well-disciplined squash-merge releases exit here. + +3. **LLM curation pass on the residual.** Ask the model to classify each residual commit into exactly one of three dispositions: group with N others, stand alone, or omit (with reason). Strict partition schema, type-homogeneity guard, content-hash cached for reproducibility. + +The combined `groups[]` (prefilter + LLM) plus `omitted[]` is what flows forward. The existing **rewrite pass remains a separate, unchanged LLM call** consuming the surviving groups as `EntryInput[]`. + +### Why keep curation and rewrite as separate LLM calls + +- **Cache granularity.** Curation is the expensive call (full residual + file lists + diff stats in input). Rewrite is much smaller (just subjects/titles/types). Tweaks to the rewrite system prompt — `voice`, `audience`, `system_extra` — only invalidate the cheap cache, not the expensive curation one. +- **Independent retries.** Rewrite has strict 280-char length validation. Combined → re-pay curation cost on every rewrite-only retry. Separate → retry just rewrite. +- **Independent prompt tuning.** Curation needs "be conservative about merging and omission, justify every choice." Rewrite needs "be concise, technical, no marketing fluff." Each system prompt stays focused. +- **Lower-risk ship.** The existing rewrite pass works today and is tested. Curation lands as a new pre-stage that produces grouped `EntryInput[]` for the existing rewrite path. No churn in the working code path. +- **Reversibility.** If we later observe coherence problems (rewrite forgetting the group's narrative), combining is a small refactor. Going the other way is larger. + +### Reuse `gh auth` credentials + +cliff-notes already requires `gh` (src/github.ts:21, 76–86). Lift the existing OAuth token via `gh auth token` and pass it to git-cliff as `--github-token`. No new auth setup, no new env var, no new prompt. CI: GitHub Actions auto-sets `GITHUB_TOKEN` and `GITHUB_REPOSITORY`; git-cliff reads both via env. Token resolution falls back gracefully (env → `gh auth token` → `null`); if `null`, we degrade to subject-based PR extraction only. + +### Diff signal for the LLM pass + +Subjects alone are too thin to judge "are these the same feature." File overlap is the strongest practical signal you can feed without blowing the prompt budget. Per-commit: + +- **Touched files** via `git show --no-patch --name-only --format= `. ~5–20 paths. +- **Line stat** via `git show --no-patch --shortstat --format= `. ~one line. + +Batched into a single `git log --name-status --format=...` over the release range. No full diffs in v1 — file overlap alone resolves most calls correctly. If quality is insufficient after real-world use, a v2 fallback can send truncated diffs on a follow-up confirmation pass. + +### Default + +**`strategy = "auto"`** (PR prefilter + LLM curation on residual) is the default. `"by-pr-only"` skips the LLM pass (deterministic-only mode for users who want zero LLM curation cost). `"off"` keeps today's per-commit behavior. + +`omit_plumbing = true` is the default for the curation pass — the prompt is conservative, every omission requires a reason, and every omitted commit stays visible in the audit block plus `--show-curation` output. Easy to set `false` if a team wants every commit represented as a bullet. + +## File-by-file changes + +### `src/exec.ts` (NEW) + +Extract the duplicated `execCapture` from src/git-cliff.ts:97 and src/github.ts:88 into one shared helper, reused by curation + diff modules. + +### `src/github.ts` (extend) + +Add token/repo resolution helpers reused by the git-cliff invocation: + +- `async function resolveGitHubToken(opts: { cwd: string; verbose?: boolean }): Promise` — try `process.env.GITHUB_TOKEN`, then `process.env.GH_TOKEN`, then `gh auth token` (capture stdout, trim). Return `null` on failure (do not throw). +- `async function resolveGitHubRepo(opts: { cwd: string; configOverride?: string }): Promise` — try `configOverride`, then `process.env.GITHUB_REPOSITORY`, then `gh repo view --json nameWithOwner -q .nameWithOwner`, then parse `git remote get-url origin`. Return `null` on failure. + +### `src/git-cliff.ts` (extend `CommitSchema` + plumb token/repo) + +1. Extend `CommitSchema` (lines 4–29) to parse the `remote` field: + + ``` + remote: z.object({ + pr_number: z.number().int().nullable().optional(), + pr_title: z.string().nullable().optional(), + pr_labels: z.array(z.string()).optional().default([]), + username: z.string().nullable().optional(), + }).passthrough().optional() + ``` + +2. Extend `CliffOptions` (lines 46–51) with `githubToken?: string` and `githubRepo?: string`. In `runGitCliff`, append `--github-token ` and `--github-repo ` when both are present. + +### `src/git-diff.ts` (NEW) + +One `git log` invocation per release that yields per-commit file lists + line stats. + +- `interface CommitDiffStat { sha: string; files: string[]; additions: number; deletions: number }` +- `async function getDiffStats(shas: string[], cwd: string): Promise>` — internally runs `git log --no-walk --name-only --shortstat --format='%H' ...` (or `git show --shortstat --name-only` per SHA, batched). Parse into the map. Skip on error (return empty map; not fatal). + +### `src/curation.ts` (NEW) + +The orchestrator for the two-stage pipeline. Pure logic (no shell-outs of its own); consumes `commits[]`, the optional `diffStats` map, and an `llm.curate` callback. + +**Exports:** + +``` +interface CuratedMember { + sha: string; + subject: string; + body: string; + type: string | null; + scope: string | null; + files: string[]; // from diff stats; may be empty if diff lookup failed + additions: number; + deletions: number; +} + +interface CommitGroup { + prNumber: number | null; + prUrl: string | null; + members: CuratedMember[]; // length >= 1 + type: string; // resolved for the group + scope: string | null; + author: string | null; + curatedBy: "solo" | "pr" | "llm"; // for audit / verbose output + llmReason?: string; // populated when curatedBy === "llm" +} + +interface OmittedCommit { + member: CuratedMember; + reason: string; +} + +interface CurationOptions { + strategy: "off" | "by-pr-only" | "auto"; + omitPlumbing: boolean; + minGroupSize: number; + cwd: string; + llm?: LLMClient; + diffStats?: Map; + verbose?: boolean; +} + +interface CurationResult { + groups: CommitGroup[]; + omitted: OmittedCommit[]; +} + +async function curateCommits( + commits: CliffCommit[], + opts: CurationOptions, +): Promise; +``` + +**Algorithm:** + +1. If `strategy === "off"` → one `CommitGroup` per commit with `curatedBy: "solo"`; `omitted: []`. Pipeline shape is uniform regardless of strategy. +2. **PR prefilter.** For each commit, resolve a PR number via `commit.remote?.pr_number ?? extractPRNumber(commit)`. Group commits sharing a non-null PR number. For each group: + - Pick `primary` = the commit whose subject best resembles a PR title (heuristic: longest non-terse subject in the group, ties broken by chronological order). + - Resolve `type` from member types via majority + precedence (see below). + - Tag `curatedBy: "pr"` when `members.length > 1`, else `"solo"`. +3. If `strategy === "by-pr-only"`, return `{ groups, omitted: [] }` now. +4. **LLM gate.** Collect the residual = commits not part of any multi-member PR group. If `residual.length < 2`, return `{ groups, omitted: [] }` now. +5. **LLM curation pass.** Call `llm.curate(residual, diffStats, { omitPlumbing, ... })`. The response is a partition: every residual index is either in exactly one `groups[].member_indices` or in `omitted[].index`. Apply: + - For each multi-member proposed group, replace the corresponding solo groups with one multi-member `CommitGroup` tagged `curatedBy: "llm"` with `llmReason`. + - For each omitted index, remove the corresponding solo group from `groups[]` and push an `OmittedCommit` with its reason. +6. If LLM validation fails at the schema level, log a stderr warning and keep the solo groups for the residual; `omitted` stays empty. Never block the changelog. +7. Apply `min_group_size` filter: any group with `< min_group_size` members → expand back to solos. (Defensive.) +8. Emit `groups[]` in source order (each group's first member's release index). + +**Type/scope resolution** (deterministic, used for both PR and LLM groups): + +- Any `Reverts` member → group type `Reverts`. +- Else most common member type, ties broken by precedence: `Features > Bug Fixes > Performance > Refactor > Security > Build > CI > Documentation > Tests > Chores > Other`. +- Scope: shared scope across all members, else `null`. +- Author: most-frequent member author. + +### `src/schemas.ts` (extend) + +Add `EntryMember` and three new fields to `EntryInput` (lines 4–13): + +``` +export interface EntryMember { + sha: string; + subject: string; + body: string; + type: string | null; + scope: string | null; + files: string[]; + additions: number; + deletions: number; +} + +export interface EntryInput { + pr_number: number | null; + raw_subject: string; + pr_title: string | null; + pr_body: string | null; + type: string; + scope: string | null; + author: string | null; + url: string | null; + members: EntryMember[]; // NEW; length >= 1 + curated_by: "solo" | "pr" | "llm"; // NEW; for prompt + audit + llm_reason?: string; // NEW; only when curated_by === "llm" +} +``` + +`RewriteResponseSchema` / `buildRewriteSchema` (lines 24–53) **unchanged**: still enforce `entries.length === inputs.length`. Because `inputs.length === groups.length`, the contract holds; existing tests pass once construction sites add `members`/`curated_by` placeholders. + +Add a new partition schema for the LLM curation response: + +``` +const GroupSchema = z.object({ + member_indices: z.array(z.number().int().nonnegative()).min(1), + primary_index: z.number().int().nonnegative(), + reason: z.string().min(1).max(200), +}); + +const OmittedSchema = z.object({ + index: z.number().int().nonnegative(), + reason: z.string().min(1).max(200), +}); + +export const CurationResponseSchema = z.object({ + groups: z.array(GroupSchema).default([]), + omitted: z.array(OmittedSchema).default([]), +}); + +export function buildCurationSchema(residual: CurationInput[], opts: { + maxPerGroup: number; + maxIndexGap: number; + requireSameType: boolean; + allowOmissions: boolean; +}) { /* superRefine: + - union of groups.member_indices ∪ omitted.index = {0..n-1} + - no overlaps between groups, or between groups and omitted + - primary_index ∈ member_indices + - groups.member_indices.length ≤ maxPerGroup + - max - min member index ≤ maxIndexGap + - when requireSameType: all members in a group share the same type + - when !allowOmissions: omitted.length === 0 + */ } +``` + +`CurationInput` is a light shape (subject/body/type/scope/author/files/stat) — what we hand the LLM, separate from `EntryInput`. + +### `src/config.ts` (extend) + +Add to `ConfigSchema` (around line 39): + +``` +curation = z.object({ + strategy: z.enum(["off", "by-pr-only", "auto"]).default("auto"), + omit_plumbing: z.boolean().default(true), + min_group_size: z.number().int().min(1).default(2), + max_per_group: z.number().int().min(2).default(5), + max_index_gap: z.number().int().min(1).default(15), + require_same_type: z.boolean().default(true), + cache: z.boolean().default(true), +}).default({}) + +github = z.object({ + enabled: z.boolean().default(true), + repo: z.string().optional(), +}).default({}) +``` + +Document in `cliff-notes.example.toml`: + +```toml +[curation] +# strategy = "auto" # default. PR prefilter + LLM curation on residual. + # "by-pr-only" runs only the deterministic prefilter. + # "off" reverts to one bullet per commit. +# omit_plumbing = true # LLM may suppress obvious-noise commits (dep bumps, + # lint fixes, comment-only edits). Omissions always + # appear in the audit block with their reason. +# min_group_size = 2 +# max_per_group = 5 +# max_index_gap = 15 # LLM cannot group commits >15 positions apart +# cache = true # content-hash cache of LLM curations for determinism + +[github] +# enabled = true # uses gh auth credentials for PR enrichment +# repo = "owner/name" # optional; default autodetected +``` + +### `src/llm.ts` (extend `LLMClient`) + +Add a method (keeps `rewriteEntries` and `summarize` unchanged): + +``` +curate(residual: CurationInput[], opts: { + maxPerGroup: number; + maxIndexGap: number; + requireSameType: boolean; + allowOmissions: boolean; +}): Promise +``` + +Implementation parallels `rewriteEntries` (lines 38–48): one `generateObject` call with `buildCurationPrompt(residual, opts)` and `buildCurationSchema(residual, opts)`. Temperature 0. + +Wrap in a content-hash cache when `loaded.config.curation.cache`: hash `JSON.stringify(residual) + PROMPT_VERSION + opts`; key into `.cliff-notes/cache/curate-.json` under `projectRoot`. Same inputs → byte-identical curation across runs. + +### `src/prompts/curate.ts` (NEW) + +`buildCurationPrompt(residual: CurationInput[], opts): string`. Content: + +- Opening: "**Default to one disposition per commit.** Most commits should stand alone as their own changelog entry. Group commits only when they describe one logical change. Omit commits only when they clearly don't belong in user-facing release notes." +- **Grouping criteria.** Strong-evidence heuristics: file overlap, sequential indices with terse subjects, `fixup!`/`squash!` prefixes, shared scope + author within a small window. +- **Grouping negatives.** Different conventional types → separate (schema also enforces this). Different non-null PR numbers → separate. When in doubt → solo. +- **Omission criteria** (only when `opts.allowOmissions`): dependency bumps without behavioral impact, lint/format/whitespace fixes, comment-only edits, internal renames, test-only churn, CI config touches that don't change behavior. **When in doubt, do NOT omit.** Every omission must include a one-line reason. +- **Omission negatives.** Anything user-facing, bug fixes, performance changes, API changes, removed/added features — never omit. Borderline cases default to including the commit, not omitting it. +- Payload: full residual JSON (`index`, `subject`, `body` (truncated to 1500), `type`, `scope`, `author`, `pr_number`, `files`, `additions`, `deletions`). +- Output schema reminder: `{groups: [{member_indices, primary_index, reason}], omitted: [{index, reason}]}`. Both `reason` fields show up in the audit block — they're for the human reviewer. +- Two worked examples: + - Group: ["add user model", "wire user model into auth", "fix typo in user model"] (no PRs, overlapping `files`) → one group, `primary_index=0`, reason="all touch src/user.ts; describe initial user model rollout." + - Omit: ["chore: bump @types/node from 20.4.1 to 20.4.2", "style: fix lint warnings"] → both in `omitted`, with reasons. + +Include `PROMPT_VERSION` constant used in the cache key. Bump it whenever prompt or schema semantics change. + +### `src/prompts/rewrite.ts` + `src/prompts/system.ts` (surface members to the rewrite pass) + +Extend the rewrite payload to include `member_commits: [{subject, type, files}]` when `members.length > 1`. Add to the system prompt: + +> "When `member_commits` is present, the entry represents a multi-commit change (multi-commit PR or a series of related commits identified by the curation pass). Write one sentence that captures the net change. Use `pr_title`/`pr_body` as primary signal; `member_commits` for supporting context. Do not enumerate individual commits." + +### `src/pipeline.ts` (insert token/repo + diff stats + curation) + +Reorder so `makeLLMClient` is constructed before curation runs. Three insertions in `runPipeline`: + +1. **Before** `runGitCliff` (around line 50): resolve token + repo if `loaded.config.github.enabled`. Pass through to `runGitCliff`. +2. **After** `target.commits` is in hand (around line 80): fetch diff stats via `getDiffStats(target.commits.map(c => c.id), loaded.projectRoot)`. +3. **Replace** lines 80–112 with a curation-driven version: + + ``` + progress.step("curation", `strategy=${loaded.config.curation.strategy}`); + const { groups, omitted } = await curateCommits(target.commits, { + strategy: loaded.config.curation.strategy, + omitPlumbing: loaded.config.curation.omit_plumbing, + minGroupSize: loaded.config.curation.min_group_size, + cwd: loaded.projectRoot, + llm, // constructed earlier in the pipeline + diffStats, + verbose: opts.verbose, + }); + + const prNumbers = groups.flatMap(g => g.prNumber !== null ? [g.prNumber] : []); + const prMap = await enrichPRs(prNumbers, { cwd: loaded.projectRoot, verbose: opts.verbose }); + + const inputs: EntryInput[] = groups.map(g => { + const pr = g.prNumber !== null ? (prMap.get(g.prNumber) ?? null) : null; + const subject = pr?.title + ?? g.members.find(m => m.subject.length > 8)?.subject + ?? g.members[0]!.subject.replace(/\s*\(#\d+\)\s*$/, ""); + return { + pr_number: g.prNumber, + raw_subject: subject, + pr_title: pr?.title ?? null, + pr_body: pr?.body ?? null, + type: g.type, + scope: g.scope, + author: pr?.author ?? g.author, + url: pr?.url ?? g.prUrl, + members: g.members, + curated_by: g.curatedBy, + llm_reason: g.llmReason, + }; + }); + ``` + + `omitted` is passed through to `assembleRender` so the audit block can list them. + +If `opts.showCuration` (new CLI flag, default false) is set: emit the proposed groups + omissions to stderr in a small tree before `rewriteEntries` runs. + +### `src/render.ts` (extend audit block) + +`assembleRender` now takes `omitted: OmittedCommit[]` in addition to `inputs`/`rewritten`. Audit block emits: +- One rawLine per original commit in `inputs`, with a group anchor when grouped, tagged with curation source. +- A trailing section listing every omitted commit with its reason. + +``` +const isGrouped = inp.members.length > 1; +if (isGrouped) { + const tag = inp.curated_by === "pr" + ? `grouped by PR #${inp.pr_number}` + : `grouped by model: ${inp.llm_reason ?? ""}`; + rawLines.push(`- group (${tag}): ${inp.raw_subject}`); + for (const m of inp.members) { + const sc = m.scope ? `(${m.scope})` : ""; + rawLines.push(` - ${m.sha.slice(0, 7)} ${m.type ?? "?"}${sc}: ${m.subject}`); + } +} else { + const m = inp.members[0]!; + const sc = inp.scope ? `(${inp.scope})` : ""; + const prSuffix = inp.pr_number !== null ? ` (PR #${inp.pr_number})` : ""; + rawLines.push(`- ${m.sha.slice(0, 7)} ${inp.type}${sc}: ${inp.raw_subject}${prSuffix}`); +} +// ... after all inputs: +for (const o of omitted) { + const sc = o.member.scope ? `(${o.member.scope})` : ""; + rawLines.push( + `- omitted (${o.reason}): ${o.member.sha.slice(0, 7)} ${o.member.type ?? "?"}${sc}: ${o.member.subject}` + ); +} +``` + +Every original commit still appears in the audit block — re-renders detect drift commit-for-commit, and humans can see exactly why each group formed and why each omission happened. + +`extract.ts`'s `stripAuditBlock` (line 63) regex matches anything between markers — no change needed. Keep marker `v1`. + +### `src/cli.ts` (add `--show-curation`) + +Add `--show-curation` flag forwarded into pipeline opts. Prints the proposed groups + omissions tree to stderr before rewrite kicks in. Useful for human eyeballing. + +### Docs + +- `cliff-notes.example.toml`: document `[curation]` and `[github]` sections. +- `README.md`: short paragraph on `gh auth token` reuse + CI envs + `--show-curation` flag + how omissions appear in the audit block but not the rendered bullets. + +## Critical files + +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/curation.ts` (new) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/git-diff.ts` (new) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/prompts/curate.ts` (new) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/exec.ts` (new) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/pipeline.ts` (token/repo, diff stats, curation call, EntryInput build, omitted passthrough) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/github.ts` (token + repo resolution helpers) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/git-cliff.ts` (extend `CommitSchema.remote`; plumb token/repo flags) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/schemas.ts` (extend `EntryInput`; add curation partition schema) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/llm.ts` (add `curate`; cache layer; `rewriteEntries`/`summarize` unchanged) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/config.ts` (add `[curation]` + `[github]`) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/render.ts` (audit block per-member + curation source tag + omitted section) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/prompts/rewrite.ts` + `src/prompts/system.ts` (member context) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/src/cli.ts` (`--show-curation`) +- `/Users/andriymassimilla/Projects/a2-ai/cliff-notes/cliff-notes.example.toml` + `README.md` (docs) + +## Step order (each step compiles) + +1. `src/exec.ts` (new): extract shared `execCapture`; update call sites in git-cliff.ts + github.ts. +2. `src/config.ts` + `cliff-notes.example.toml`: add `[curation]` + `[github]` schemas and examples. +3. `src/schemas.ts`: add `EntryMember`, extend `EntryInput` with `members`/`curated_by`/`llm_reason`; add `GroupSchema`/`OmittedSchema`/`CurationResponseSchema`/`buildCurationSchema`. Update construction sites with placeholders. +4. `src/git-cliff.ts`: extend `CommitSchema.remote`; add token/repo to `CliffOptions`/`runGitCliff`. +5. `src/github.ts`: add `resolveGitHubToken`, `resolveGitHubRepo`. +6. `src/git-diff.ts` (new): `getDiffStats`. +7. `src/prompts/curate.ts` (new) + `src/llm.ts`: `curate` method + content-hash cache. +8. `src/curation.ts` (new): orchestrator combining prefilter, gate, LLM call, type resolution, omission passthrough. +9. `src/pipeline.ts`: hoist `makeLLMClient` earlier; insert token/repo + diff stats + curation; rewrite inputs block; pass `omitted` to `assembleRender`. +10. `src/render.ts`: extend audit block (per-member + omitted section). +11. `src/prompts/rewrite.ts` + `src/prompts/system.ts`: surface `member_commits`. +12. `src/cli.ts`: `--show-curation`. +13. Docs + tests. + +## Verification + +**Unit tests (`tests/curation.test.ts`, new)** — inject the LLM callback and diff-stats map for testability: + +- `strategy: "off"` → one group per commit, all `curatedBy: "solo"`, `omitted: []`. +- `strategy: "by-pr-only"`: 3 commits sharing `pr_number = 42` → one PR group; LLM never invoked; `omitted: []`. +- `strategy: "by-pr-only"`: 3 commits all with `(#42)` in subject (rebase-merge), no `remote` enrichment → one PR group from subject fallback. +- `strategy: "auto"`, no PR-less residual → LLM never invoked (gate works). +- `strategy: "auto"`, 4 PR-less commits with overlapping `files` → LLM proposes a group; partition validates; combined groups returned with `curatedBy: "llm"` and `llmReason` populated. +- `strategy: "auto"`, `omit_plumbing: true`, residual contains a dep-bump commit → LLM proposes it in `omitted`; final result has it in `omitted[]`, not in `groups[]`. +- `strategy: "auto"`, `omit_plumbing: false` → schema rejects any non-empty `omitted` (`allowOmissions: false`); LLM has no omission option. +- `strategy: "auto"`, LLM proposes a group mixing `feat` + `chore` → schema rejects; fallback to solo with stderr warning. +- `strategy: "auto"`, LLM proposes indices spanning > `max_index_gap` → rejected. +- `strategy: "auto"`, LLM proposes oversized group (> `max_per_group`) → rejected. +- `strategy: "auto"`, LLM hallucinates an out-of-range index → rejected; solo fallback. +- `strategy: "auto"`, LLM omits an index that's also in a group → rejected (partition overlap). +- `min_group_size = 2`, LLM proposes a group of 1 → expanded to solo. + +**Unit tests for token/repo resolution (`tests/github-resolve.test.ts`, new)** — mock `execCapture`: + +- `GITHUB_TOKEN` env set → returned. +- No env, `gh auth token` succeeds → returned. +- No env, no `gh` → `null` (no throw). +- `[github] repo` config override → returned. +- `GITHUB_REPOSITORY` env → returned when no override. +- `gh repo view` succeeds when no env → returned. + +**Unit tests for `buildCurationSchema` (`tests/schemas.test.ts`, extend)**: + +- Solo-only partition (every index in its own group, `omitted: []`) → valid. +- Well-formed multi-member group with no omissions → valid. +- Well-formed mix of groups + omissions covering all indices → valid. +- Missing index, overlapping index, out-of-range index, empty `member_indices` → rejected. +- Same index in both a group and `omitted` → rejected (overlap). +- `primary_index` not in `member_indices` → rejected. +- Multi-member group with mixed types when `requireSameType: true` → rejected. +- Member span > `maxIndexGap` → rejected. +- Group size > `maxPerGroup` → rejected. +- Non-empty `omitted` when `allowOmissions: false` → rejected. + +**Existing tests** (`tests/schemas.test.ts`, `tests/render.test.ts`): add `members`/`curated_by` placeholders at construction sites; refresh the render snapshot for the new audit-block format (per-member SHAs + omitted section). + +**Manual end-to-end:** + +1. On a repo with a known merge-commit PR + a few direct-to-main commits + a few plumbing commits (dep bumps, lint fixes): + - `bun run dev -- --unreleased --dry-run --verbose --show-curation` (default config) → confirm: (a) PR group formed from merge-commit constituents, (b) LLM proposes a group for the related direct-to-main commits with a reason, (c) LLM proposes omissions for the plumbing commits with reasons, (d) audit block lists every constituent SHA tagged with curation source AND every omitted SHA with its reason, (e) rendered bullets do NOT include the omitted commits. + - Set `strategy = "by-pr-only"` → same PR grouping; direct-to-main commits stay solo; plumbing commits get their own bullets (no omission). LLM not called. + - Set `omit_plumbing = false` under `auto` → grouping still happens; omission disabled; plumbing commits get their own bullets. + - Set `strategy = "off"` → pre-change behavior reachable. + - Set `[github] enabled = false` → PR grouping degrades to subject-only `(#N)` matching; merge-commit-only constituents stay solo (no rev-list fallback in v1). +2. On a squash-merge repo: confirm output is byte-identical with and without curation enabled (gate skips LLM when there's no residual to curate). +3. Determinism: run `auto` mode twice in a row; with `cache = true`, byte-identical output (including the audit block). +4. `cliff-notes --extract `: confirm audit block strips cleanly on a section produced with curation enabled (including the omitted section between markers). +5. CI smoke: run inside a GitHub Actions workflow with default-issued `GITHUB_TOKEN` + `GITHUB_REPOSITORY` set; confirm git-cliff enrichment activates without any explicit secrets. diff --git a/src/cli.ts b/src/cli.ts index 9e1945f..f0580f2 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -29,6 +29,7 @@ program .option("--yes", "skip confirmation prompt before writing CHANGELOG.md") .option("--quiet", "suppress stage progress lines on stderr") .option("--verbose", "log token counts, raw git-cliff JSON, intermediate LLM payloads") + .option("--show-curation", "print commit grouping and omissions before rewrite") .action(async (opts) => { const progress = makeProgress({ quiet: !!opts.quiet, @@ -55,6 +56,7 @@ program modelOverride: opts.model, yes: !!opts.yes, verbose: !!opts.verbose, + showCuration: !!opts.showCuration, progress, }); } catch (err) { diff --git a/src/config.ts b/src/config.ts index ad686fe..2c26642 100644 --- a/src/config.ts +++ b/src/config.ts @@ -12,8 +12,8 @@ const ProviderSchema = z.object({ const ProjectSchema = z.object({ name: z.string().min(1), - audience: z.string().default("internal-devs"), - voice: z.string().default("concise, technical, no marketing fluff"), + audience: z.string().default("end-users of the application"), + voice: z.string().default("clear, user-focused, concise, no marketing fluff"), }); const PromptSchema = z @@ -36,12 +36,33 @@ const OutputSchema = z }) .default({}); +const CurationSchema = z + .object({ + strategy: z.enum(["off", "by-pr-only", "auto"]).default("auto"), + omit_plumbing: z.boolean().default(true), + min_group_size: z.number().int().min(1).default(2), + max_per_group: z.number().int().min(2).default(5), + max_index_gap: z.number().int().min(1).default(15), + require_same_type: z.boolean().default(true), + cache: z.boolean().default(true), + }) + .default({}); + +const GitHubSchema = z + .object({ + enabled: z.boolean().default(true), + repo: z.string().optional(), + }) + .default({}); + export const ConfigSchema = z.object({ provider: ProviderSchema, project: ProjectSchema, prompt: PromptSchema, git_cliff: GitCliffSchema, output: OutputSchema, + curation: CurationSchema, + github: GitHubSchema, }); export type Config = z.infer; diff --git a/src/curation.ts b/src/curation.ts new file mode 100644 index 0000000..20b2db6 --- /dev/null +++ b/src/curation.ts @@ -0,0 +1,383 @@ +import { extractPRNumber, extractPRUrl, firstLine, type CliffCommit } from "./git-cliff.ts"; +import type { CommitDiffStat } from "./git-diff.ts"; +import type { LLMClient } from "./llm.ts"; +import { + buildCurationSchema, + type CurationInput, + type CurationResponse, + type EntryMember, +} from "./schemas.ts"; + +export interface CuratedMember extends EntryMember { + author: string | null; + prNumber: number | null; + prUrl: string | null; + releaseIndex: number; +} + +export interface CommitGroup { + prNumber: number | null; + prUrl: string | null; + members: CuratedMember[]; + type: string; + scope: string | null; + author: string | null; + curatedBy: "solo" | "pr" | "llm"; + llmReason?: string; +} + +export interface OmittedCommit { + member: CuratedMember; + reason: string; +} + +export interface CurationOptions { + strategy: "off" | "by-pr-only" | "auto"; + omitPlumbing: boolean; + minGroupSize: number; + maxPerGroup: number; + maxIndexGap: number; + requireSameType: boolean; + cwd: string; + llm?: LLMClient; + diffStats?: Map; + verbose?: boolean; +} + +export interface CurationResult { + groups: CommitGroup[]; + omitted: OmittedCommit[]; +} + +export function describeCurationPlan( + commits: CliffCommit[], + strategy: CurationOptions["strategy"], +): string { + if (strategy === "off") { + const entryText = commits.length === 1 ? "an individual entry" : "individual entries"; + return `disabled; keeping ${plural(commits.length, "commit")} as ${entryText}`; + } + + const members = commits.map((commit, index) => toMember(commit, index)); + const { groups: prefiltered, residual } = prefilterByPR(members); + const prGroupedCommits = prefiltered.reduce((sum, group) => sum + group.members.length, 0); + const prPart = + prefiltered.length === 0 + ? "no multi-commit PR groups" + : `PR grouping ${plural(prGroupedCommits, "commit")} into ${plural( + prefiltered.length, + "entry", + "entries", + )}`; + + if (strategy === "by-pr-only") { + return `${prPart}; keeping ${plural(residual.length, "remaining commit")} solo`; + } + + if (residual.length < 2) { + return `${prPart}; keeping ${plural(residual.length, "remaining commit")} solo (model skipped)`; + } + + return `${prPart}; asking model to classify ${plural( + residual.length, + "remaining commit", + )} (group/solo/omit)`; +} + +export async function curateCommits( + commits: CliffCommit[], + opts: CurationOptions, +): Promise { + const members = commits.map((commit, index) => toMember(commit, index, opts.diffStats)); + + if (opts.strategy === "off") { + return { + groups: members.map((member) => makeGroup([member], "solo")), + omitted: [], + }; + } + + const { groups: prefiltered, residual } = prefilterByPR(members); + if (opts.strategy === "by-pr-only" || residual.length < 2) { + return { + groups: sortGroups([...prefiltered, ...residual.map((m) => makeGroup([m], "solo"))]), + omitted: [], + }; + } + + if (!opts.llm) { + warn( + opts, + "LLM curation requested but no LLM client was provided; keeping residual commits solo", + ); + return { + groups: sortGroups([...prefiltered, ...residual.map((m) => makeGroup([m], "solo"))]), + omitted: [], + }; + } + + let response: CurationResponse; + const residualInput = residual.map(toCurationInput); + const schemaOpts = { + maxPerGroup: opts.maxPerGroup, + maxIndexGap: opts.maxIndexGap, + requireSameType: opts.requireSameType, + allowOmissions: opts.omitPlumbing, + }; + + try { + const raw = await opts.llm.curate(residualInput, schemaOpts); + const parsed = buildCurationSchema(residualInput, schemaOpts).safeParse(raw); + if (!parsed.success) { + throw new Error(parsed.error.issues.map((i) => i.message).join("; ")); + } + rejectMixedPRGroups(parsed.data, residual); + response = parsed.data; + } catch (err) { + warn( + opts, + `LLM curation failed (${err instanceof Error ? err.message : String(err)}); keeping residual commits solo`, + ); + return { + groups: sortGroups([...prefiltered, ...residual.map((m) => makeGroup([m], "solo"))]), + omitted: [], + }; + } + + const omitted: OmittedCommit[] = response.omitted.map((o) => ({ + member: residual[o.index]!, + reason: o.reason, + })); + const llmGroups = response.groups.flatMap((group) => { + const groupMembers = group.member_indices.map((i) => residual[i]!).filter(Boolean); + if (groupMembers.length < opts.minGroupSize) { + return groupMembers.map((member) => makeGroup([member], "solo")); + } + return [makeGroup(groupMembers, groupMembers.length > 1 ? "llm" : "solo", group.reason)]; + }); + + return { groups: sortGroups([...prefiltered, ...llmGroups]), omitted }; +} + +export function formatCurationResult(result: CurationResult): string { + const lines = ["cliff-notes: curation result"]; + for (const group of sortGroups(result.groups)) { + if (group.members.length === 1) { + const m = group.members[0]!; + lines.push(` - solo ${m.sha.slice(0, 7)} ${m.subject}`); + continue; + } + const label = + group.curatedBy === "pr" + ? `PR #${group.prNumber}` + : `model${group.llmReason ? `: ${group.llmReason}` : ""}`; + lines.push(` - group (${label}) ${group.members[0]?.subject ?? ""}`); + for (const m of group.members) { + lines.push(` - ${m.sha.slice(0, 7)} ${m.subject}`); + } + } + for (const o of result.omitted) { + lines.push(` - omitted ${o.member.sha.slice(0, 7)} ${o.member.subject}: ${o.reason}`); + } + return lines.join("\n") + "\n"; +} + +function prefilterByPR(members: CuratedMember[]): { + groups: CommitGroup[]; + residual: CuratedMember[]; +} { + const byPR = new Map(); + for (const member of members) { + if (member.prNumber === null) continue; + const group = byPR.get(member.prNumber) ?? []; + group.push(member); + byPR.set(member.prNumber, group); + } + + const grouped = new Set(); + const groups: CommitGroup[] = []; + for (const prMembers of byPR.values()) { + prMembers.forEach((member) => grouped.add(member)); + groups.push(makeGroup(prMembers, prMembers.length > 1 ? "pr" : "solo")); + } + + return { + groups, + residual: members.filter((member) => !grouped.has(member)), + }; +} + +function toMember( + commit: CliffCommit, + releaseIndex: number, + diffStats?: Map, +): CuratedMember { + const subject = firstLine(commit.message).replace(/\s*\(#\d+\)\s*$/, ""); + const body = commit.message.split("\n").slice(1).join("\n").trim(); + const stat = diffStats?.get(commit.id); + const prNumber = extractPRNumber(commit); + return { + sha: commit.id, + subject, + body, + type: commit.group ?? "Other", + scope: commit.scope ?? null, + files: stat?.files ?? [], + additions: stat?.additions ?? 0, + deletions: stat?.deletions ?? 0, + author: commit.author?.name ?? commit.remote?.username ?? null, + prNumber, + prUrl: extractPRUrl(commit), + releaseIndex, + }; +} + +function toCurationInput(member: CuratedMember): CurationInput { + return { + index: member.releaseIndex, + sha: member.sha, + subject: member.subject, + body: member.body, + type: member.type, + scope: member.scope, + files: member.files, + additions: member.additions, + deletions: member.deletions, + author: member.author, + pr_number: member.prNumber, + pr_url: member.prUrl, + }; +} + +function makeGroup( + members: CuratedMember[], + curatedBy: "solo" | "pr" | "llm", + llmReason?: string, +): CommitGroup { + return { + prNumber: commonPRNumber(members), + prUrl: commonPRUrl(members), + members: sortMembers(members), + type: resolveType(members), + scope: resolveScope(members), + author: mostFrequent(members.map((m) => m.author)), + curatedBy, + llmReason, + }; +} + +function resolveType(members: CuratedMember[]): string { + if (members.some((m) => m.type === "Reverts")) return "Reverts"; + const counts = new Map(); + for (const member of members) { + const type = member.type ?? "Other"; + counts.set(type, (counts.get(type) ?? 0) + 1); + } + let bestType = "Other"; + let bestCount = -1; + for (const [type, count] of counts) { + if (count > bestCount || (count === bestCount && typeRank(type) < typeRank(bestType))) { + bestType = type; + bestCount = count; + } + } + return bestType; +} + +function typeRank(type: string): number { + const precedence = [ + "Features", + "Bug Fixes", + "Performance", + "Refactor", + "Security", + "Build", + "CI", + "Documentation", + "Tests", + "Chores", + "Other", + ]; + const idx = precedence.indexOf(type); + return idx === -1 ? precedence.length : idx; +} + +function resolveScope(members: CuratedMember[]): string | null { + const first = members[0]?.scope ?? null; + if (!first) return null; + return members.every((m) => m.scope === first) ? first : null; +} + +function commonPRNumber(members: CuratedMember[]): number | null { + const numbers = [ + ...new Set(members.map((m) => m.prNumber).filter((n) => n !== null)), + ] as number[]; + return numbers.length === 1 ? numbers[0]! : null; +} + +function commonPRUrl(members: CuratedMember[]): string | null { + const urls = [...new Set(members.map((m) => m.prUrl).filter((u) => u !== null))] as string[]; + return urls.length === 1 ? urls[0]! : null; +} + +function mostFrequent(values: Array): string | null { + const counts = new Map(); + for (const value of values) { + if (!value) continue; + counts.set(value, (counts.get(value) ?? 0) + 1); + } + let best: string | null = null; + let bestCount = -1; + for (const [value, count] of counts) { + if (count > bestCount) { + best = value; + bestCount = count; + } + } + return best; +} + +function sortGroups(groups: CommitGroup[]): CommitGroup[] { + return ordered( + groups, + (a, b) => (a.members[0]?.releaseIndex ?? 0) - (b.members[0]?.releaseIndex ?? 0), + ); +} + +function sortMembers(members: CuratedMember[]): CuratedMember[] { + return ordered(members, (a, b) => a.releaseIndex - b.releaseIndex); +} + +function ordered(items: T[], compare: (a: T, b: T) => number): T[] { + const result: T[] = []; + for (const item of items) { + const insertAt = result.findIndex((existing) => compare(item, existing) < 0); + if (insertAt === -1) { + result.push(item); + } else { + result.splice(insertAt, 0, item); + } + } + return result; +} + +function rejectMixedPRGroups(response: CurationResponse, residual: CuratedMember[]): void { + for (const group of response.groups) { + const prNumbers = new Set( + group.member_indices.map((i) => residual[i]?.prNumber ?? null).filter((n) => n !== null), + ); + if (prNumbers.size > 1) { + throw new Error("curation grouped commits from different PRs"); + } + } +} + +function warn(opts: CurationOptions, message: string): void { + if (opts.verbose) { + process.stderr.write(`cliff-notes: ${message}\n`); + } +} + +function plural(count: number, singular: string, pluralForm = `${singular}s`): string { + return `${count} ${count === 1 ? singular : pluralForm}`; +} diff --git a/src/exec.ts b/src/exec.ts new file mode 100644 index 0000000..a390a3e --- /dev/null +++ b/src/exec.ts @@ -0,0 +1,33 @@ +import { spawn } from "node:child_process"; + +export interface ExecResult { + stdout: string; + stderr: string; + code: number; +} + +export function execCapture(cmd: string, args: string[], cwd: string): Promise { + return new Promise((resolve, reject) => { + let child; + try { + child = spawn(cmd, args, { cwd }); + } catch (err) { + reject(err); + return; + } + let stdout = ""; + let stderr = ""; + child.stdout.on("data", (d) => (stdout += d.toString())); + child.stderr.on("data", (d) => (stderr += d.toString())); + child.on("error", (err) => { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + resolve({ stdout: "", stderr: `ENOENT: ${cmd} not found`, code: 127 }); + } else { + reject(err); + } + }); + child.on("close", (code) => { + resolve({ stdout, stderr, code: code ?? 0 }); + }); + }); +} diff --git a/src/git-cliff.ts b/src/git-cliff.ts index cedca80..52f0eed 100644 --- a/src/git-cliff.ts +++ b/src/git-cliff.ts @@ -1,5 +1,5 @@ -import { spawn } from "node:child_process"; import { z } from "zod"; +import { execCapture } from "./exec.ts"; const CommitSchema = z .object({ @@ -25,6 +25,15 @@ const CommitSchema = z }) .optional(), conventional: z.boolean().optional(), + remote: z + .object({ + pr_number: z.number().int().nullable().optional(), + pr_title: z.string().nullable().optional(), + pr_labels: z.array(z.string()).optional().default([]), + username: z.string().nullable().optional(), + }) + .passthrough() + .optional(), }) .passthrough(); @@ -48,6 +57,8 @@ export interface CliffOptions { configPath?: string; unreleased: boolean; tag?: string; + githubToken?: string | null; + githubRepo?: string | null; } export async function runGitCliff(opts: CliffOptions): Promise { @@ -61,6 +72,9 @@ export async function runGitCliff(opts: CliffOptions): Promise { if (opts.tag) { args.push("--tag", opts.tag); } + if (opts.githubToken && opts.githubRepo) { + args.push("--github-token", opts.githubToken, "--github-repo", opts.githubRepo); + } const { stdout, stderr, code } = await execCapture("git-cliff", args, opts.cwd); if (code !== 0) { @@ -94,39 +108,10 @@ export async function runGitCliff(opts: CliffOptions): Promise { return result.data; } -function execCapture( - cmd: string, - args: string[], - cwd: string, -): Promise<{ stdout: string; stderr: string; code: number }> { - return new Promise((resolve, reject) => { - let child; - try { - child = spawn(cmd, args, { cwd }); - } catch (err) { - reject(err); - return; - } - let stdout = ""; - let stderr = ""; - child.stdout.on("data", (d) => (stdout += d.toString())); - child.stderr.on("data", (d) => (stderr += d.toString())); - child.on("error", (err) => { - if ((err as NodeJS.ErrnoException).code === "ENOENT") { - resolve({ stdout: "", stderr: `ENOENT: ${cmd} not found`, code: 127 }); - } else { - reject(err); - } - }); - child.on("close", (code) => { - resolve({ stdout, stderr, code: code ?? 0 }); - }); - }); -} - // Pluck the PR number from links (first link with text like "#123") or fallback // to scraping the commit message footer for "(#123)". export function extractPRNumber(commit: CliffCommit): number | null { + if (typeof commit.remote?.pr_number === "number") return commit.remote.pr_number; for (const link of commit.links ?? []) { const m = link.text.match(/^#(\d+)$/); if (m && m[1]) return parseInt(m[1], 10); diff --git a/src/git-diff.ts b/src/git-diff.ts new file mode 100644 index 0000000..82c9823 --- /dev/null +++ b/src/git-diff.ts @@ -0,0 +1,59 @@ +import { execCapture } from "./exec.ts"; + +export interface CommitDiffStat { + sha: string; + files: string[]; + additions: number; + deletions: number; +} + +export async function getDiffStats( + shas: string[], + cwd: string, +): Promise> { + const unique = [...new Set(shas.filter(Boolean))]; + if (unique.length === 0) return new Map(); + + const out = new Map(); + const { stdout, code } = await execCapture( + "git", + [ + "show", + "--no-renames", + "--name-only", + "--shortstat", + "--format=__CLiff_NOTES_COMMIT__%H", + ...unique, + ], + cwd, + ); + if (code !== 0) return out; + + for (const rawChunk of stdout.split("__CLiff_NOTES_COMMIT__")) { + const chunk = rawChunk.trim(); + if (!chunk) continue; + const lines = chunk + .split("\n") + .map((l) => l.trim()) + .filter(Boolean); + const sha = lines.shift(); + if (!sha) continue; + + const files: string[] = []; + let additions = 0; + let deletions = 0; + for (const line of lines) { + const add = line.match(/(\d+) insertion/); + const del = line.match(/(\d+) deletion/); + if (add?.[1] || del?.[1]) { + additions = add?.[1] ? parseInt(add[1], 10) : 0; + deletions = del?.[1] ? parseInt(del[1], 10) : 0; + continue; + } + files.push(line); + } + out.set(sha, { sha, files, additions, deletions }); + } + + return out; +} diff --git a/src/git-remote.ts b/src/git-remote.ts index 6e8676d..be2e6fb 100644 --- a/src/git-remote.ts +++ b/src/git-remote.ts @@ -1,4 +1,4 @@ -import { spawn } from "node:child_process"; +import { execCapture } from "./exec.ts"; export interface RepoSlug { owner: string; @@ -46,33 +46,3 @@ export function buildCommitUrl(slug: RepoSlug, sha: string): string { export function shortSha(sha: string): string { return sha.slice(0, 7); } - -function execCapture( - cmd: string, - args: string[], - cwd: string, -): Promise<{ stdout: string; stderr: string; code: number }> { - return new Promise((resolve, reject) => { - let child; - try { - child = spawn(cmd, args, { cwd }); - } catch (err) { - reject(err); - return; - } - let stdout = ""; - let stderr = ""; - child.stdout.on("data", (d) => (stdout += d.toString())); - child.stderr.on("data", (d) => (stderr += d.toString())); - child.on("error", (err) => { - if ((err as NodeJS.ErrnoException).code === "ENOENT") { - resolve({ stdout: "", stderr: `ENOENT: ${cmd} not found`, code: 127 }); - } else { - reject(err); - } - }); - child.on("close", (code) => { - resolve({ stdout, stderr, code: code ?? 0 }); - }); - }); -} diff --git a/src/github.ts b/src/github.ts index 9c8fce8..fee5484 100644 --- a/src/github.ts +++ b/src/github.ts @@ -1,4 +1,5 @@ -import { spawn } from "node:child_process"; +import { execCapture } from "./exec.ts"; +import { parseGitHubRemote } from "./git-remote.ts"; export interface PRInfo { number: number; @@ -18,7 +19,16 @@ export async function enrichPRs( const out = new Map(); if (prNumbers.length === 0) return out; - await ensureGhAvailable(); + try { + await ensureGhAvailable(); + } catch (err) { + if (opts.verbose) { + process.stderr.write( + `cliff-notes: gh unavailable (${err instanceof Error ? err.message : String(err)}); skipping PR enrichment\n`, + ); + } + return out; + } const unique = [...new Set(prNumbers)]; let cursor = 0; @@ -48,6 +58,64 @@ export async function enrichPRs( return out; } +export async function resolveGitHubToken(opts: { + cwd: string; + verbose?: boolean; +}): Promise { + const fromEnv = process.env.GITHUB_TOKEN ?? process.env.GH_TOKEN; + if (fromEnv?.trim()) return fromEnv.trim(); + + try { + const { stdout, code, stderr } = await execCapture("gh", ["auth", "token"], opts.cwd); + if (code === 0 && stdout.trim()) return stdout.trim(); + if (opts.verbose) { + process.stderr.write( + `cliff-notes: gh auth token unavailable (${stderr.trim() || `exit ${code}`})\n`, + ); + } + } catch (err) { + if (opts.verbose) { + process.stderr.write( + `cliff-notes: gh auth token failed (${err instanceof Error ? err.message : String(err)})\n`, + ); + } + } + return null; +} + +export async function resolveGitHubRepo(opts: { + cwd: string; + configOverride?: string; + verbose?: boolean; +}): Promise { + if (opts.configOverride?.trim()) return opts.configOverride.trim(); + if (process.env.GITHUB_REPOSITORY?.trim()) return process.env.GITHUB_REPOSITORY.trim(); + + try { + const { stdout, code } = await execCapture( + "gh", + ["repo", "view", "--json", "nameWithOwner", "-q", ".nameWithOwner"], + opts.cwd, + ); + if (code === 0 && stdout.trim()) return stdout.trim(); + } catch (err) { + if (opts.verbose) { + process.stderr.write( + `cliff-notes: gh repo view failed (${err instanceof Error ? err.message : String(err)})\n`, + ); + } + } + + try { + const { stdout, code } = await execCapture("git", ["remote", "get-url", "origin"], opts.cwd); + if (code !== 0) return null; + const slug = parseGitHubRemote(stdout.trim()); + return slug ? `${slug.owner}/${slug.repo}` : null; + } catch { + return null; + } +} + async function fetchPR(n: number, cwd: string): Promise { const args = ["pr", "view", String(n), "--json", "number,title,body,url,author,labels"]; const { stdout, stderr, code } = await execCapture("gh", args, cwd); @@ -84,33 +152,3 @@ async function ensureGhAvailable(): Promise { } ghChecked = true; } - -function execCapture( - cmd: string, - args: string[], - cwd: string, -): Promise<{ stdout: string; stderr: string; code: number }> { - return new Promise((resolve, reject) => { - let child; - try { - child = spawn(cmd, args, { cwd }); - } catch (err) { - reject(err); - return; - } - let stdout = ""; - let stderr = ""; - child.stdout.on("data", (d) => (stdout += d.toString())); - child.stderr.on("data", (d) => (stderr += d.toString())); - child.on("error", (err) => { - if ((err as NodeJS.ErrnoException).code === "ENOENT") { - resolve({ stdout: "", stderr: `ENOENT: ${cmd} not found`, code: 127 }); - } else { - reject(err); - } - }); - child.on("close", (code) => { - resolve({ stdout, stderr, code: code ?? 0 }); - }); - }); -} diff --git a/src/llm.ts b/src/llm.ts index 4905110..8ade368 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -1,13 +1,19 @@ import { generateObject } from "ai"; import type { LanguageModel } from "ai"; +import { createHash } from "node:crypto"; +import { mkdir, readFile, writeFile } from "node:fs/promises"; +import { join } from "node:path"; import type { Config } from "./config.ts"; -import type { EntryInput } from "./schemas.ts"; +import type { CurationInput, EntryInput } from "./schemas.ts"; import { + buildCurationSchema, buildRewriteSchema, SummaryResponseSchema, + type CurationResponse, type RewriteResponse, type SummaryResponse, } from "./schemas.ts"; +import { buildCurationPrompt, CURATION_PROMPT_VERSION } from "./prompts/curate.ts"; import { buildSystemPrompt } from "./prompts/system.ts"; import { buildRewritePrompt } from "./prompts/rewrite.ts"; import { buildSummaryPrompt } from "./prompts/summary.ts"; @@ -17,12 +23,22 @@ export interface LLMClient { readonly model: string; rewriteEntries(entries: EntryInput[]): Promise; summarize(entries: EntryInput[], rewritten: RewriteResponse["entries"]): Promise; + curate( + residual: CurationInput[], + opts: { + maxPerGroup: number; + maxIndexGap: number; + requireSameType: boolean; + allowOmissions: boolean; + }, + ): Promise; } export interface LLMOptions { providerOverride?: string; modelOverride?: string; verbose?: boolean; + projectRoot?: string; } export async function makeLLMClient(cfg: Config, opts: LLMOptions = {}): Promise { @@ -58,9 +74,84 @@ export async function makeLLMClient(cfg: Config, opts: LLMOptions = {}): Promise } return result.object; }, + async curate(residual, curateOpts) { + const audience = cfg.project.audience.trim() || "end-users of the application"; + const cacheKey = curationCacheKey(residual, curateOpts, audience); + if (cfg.curation.cache && opts.projectRoot) { + const cached = await readCurationCache(opts.projectRoot, cacheKey); + if (cached) return cached; + } + + const schema = buildCurationSchema(residual, curateOpts); + const result = await generateObject({ + model, + schema, + temperature: 0, + messages: buildMessages( + system, + buildCurationPrompt(residual, { + allowOmissions: curateOpts.allowOmissions, + audience, + }), + isAnthropic, + ), + }); + if (opts.verbose) { + process.stderr.write(`cliff-notes: curation tokens=${JSON.stringify(result.usage)}\n`); + } + if (cfg.curation.cache && opts.projectRoot) { + await writeCurationCache(opts.projectRoot, cacheKey, result.object); + } + return result.object; + }, }; } +function curationCacheKey( + residual: CurationInput[], + opts: { + maxPerGroup: number; + maxIndexGap: number; + requireSameType: boolean; + allowOmissions: boolean; + }, + audience: string, +): string { + return createHash("sha256") + .update(JSON.stringify({ residual, opts, audience, prompt: CURATION_PROMPT_VERSION })) + .digest("hex") + .slice(0, 32); +} + +async function readCurationCache( + projectRoot: string, + key: string, +): Promise { + try { + const raw = await readFile( + join(projectRoot, ".cliff-notes", "cache", `curate-${key}.json`), + "utf-8", + ); + return JSON.parse(raw) as CurationResponse; + } catch { + return null; + } +} + +async function writeCurationCache( + projectRoot: string, + key: string, + response: CurationResponse, +): Promise { + try { + const dir = join(projectRoot, ".cliff-notes", "cache"); + await mkdir(dir, { recursive: true }); + await writeFile(join(dir, `curate-${key}.json`), JSON.stringify(response, null, 2)); + } catch { + // Cache failures should not affect changelog generation. + } +} + function buildMessages( system: string, user: string, diff --git a/src/pipeline.ts b/src/pipeline.ts index 0d27eaa..f92ff8d 100644 --- a/src/pipeline.ts +++ b/src/pipeline.ts @@ -8,15 +8,11 @@ import { resolveGitCliffConfig, type LoadedConfig, } from "./config.ts"; -import { - runGitCliff, - extractPRNumber, - extractPRUrl, - firstLine, - type CliffRelease, -} from "./git-cliff.ts"; -import { enrichPRs } from "./github.ts"; +import { runGitCliff, type CliffRelease } from "./git-cliff.ts"; +import { enrichPRs, resolveGitHubRepo, resolveGitHubToken } from "./github.ts"; import { getOriginGitHubSlug, buildCommitUrl } from "./git-remote.ts"; +import { getDiffStats } from "./git-diff.ts"; +import { curateCommits, describeCurationPlan, formatCurationResult } from "./curation.ts"; import { makeLLMClient } from "./llm.ts"; import type { EntryInput } from "./schemas.ts"; import { assembleRender, formatDate, renderSection } from "./render.ts"; @@ -33,6 +29,7 @@ export interface PipelineOptions { modelOverride?: string; yes: boolean; verbose?: boolean; + showCuration?: boolean; progress: Progress; } @@ -49,12 +46,27 @@ export async function runPipeline(opts: PipelineOptions): Promise { const cliffConfig = await chooseCliffConfig(loaded); const repoSlug = await getOriginGitHubSlug(loaded.projectRoot); + let githubToken: string | null = null; + let githubRepo: string | null = null; + if (loaded.config.github.enabled) { + [githubToken, githubRepo] = await Promise.all([ + resolveGitHubToken({ cwd: loaded.projectRoot, verbose: opts.verbose }), + resolveGitHubRepo({ + cwd: loaded.projectRoot, + configOverride: loaded.config.github.repo, + verbose: opts.verbose, + }), + ]); + } + progress.step("git-cliff", "collecting commits"); const releases = await runGitCliff({ cwd: loaded.projectRoot, configPath: cliffConfig, unreleased: opts.unreleased, tag: opts.tag, + githubToken, + githubRepo, }); if (opts.verbose) { @@ -80,15 +92,38 @@ export async function runPipeline(opts: PipelineOptions): Promise { loaded.config.output.date_format, ); - // Build EntryInput[] preserving commit order from git-cliff. - const prNumbers: number[] = []; - const inputsRaw = target.commits.map((c) => { - const prNumber = extractPRNumber(c); - if (prNumber !== null) prNumbers.push(prNumber); - return { commit: c, prNumber }; + const llm = await makeLLMClient(loaded.config, { + providerOverride: opts.providerOverride, + modelOverride: opts.modelOverride, + verbose: opts.verbose, + projectRoot: loaded.projectRoot, + }); + + progress.step("diff", `collecting stats for ${target.commits.length} commits`); + const diffStats = await getDiffStats( + target.commits.map((c) => c.id), + loaded.projectRoot, + ); + + progress.step("curation", describeCurationPlan(target.commits, loaded.config.curation.strategy)); + const curation = await curateCommits(target.commits, { + strategy: loaded.config.curation.strategy, + omitPlumbing: loaded.config.curation.omit_plumbing, + minGroupSize: loaded.config.curation.min_group_size, + maxPerGroup: loaded.config.curation.max_per_group, + maxIndexGap: loaded.config.curation.max_index_gap, + requireSameType: loaded.config.curation.require_same_type, + cwd: loaded.projectRoot, + llm, + diffStats, + verbose: opts.verbose, }); + if (opts.showCuration) { + process.stderr.write(formatCurationResult(curation)); + } // Enrich PR data via gh — best-effort, errors per-PR don't fail the run. + const prNumbers = curation.groups.flatMap((g) => (g.prNumber !== null ? [g.prNumber] : [])); if (prNumbers.length > 0) { const uniqueCount = new Set(prNumbers).size; progress.step("github", `enriching ${uniqueCount} PR${uniqueCount === 1 ? "" : "s"}`); @@ -98,24 +133,30 @@ export async function runPipeline(opts: PipelineOptions): Promise { verbose: opts.verbose, }); - const inputs: EntryInput[] = inputsRaw.map(({ commit, prNumber }) => { - const pr = prNumber !== null ? (prMap.get(prNumber) ?? null) : null; - const subject = firstLine(commit.message); - const subjectWithoutPRSuffix = subject.replace(/\s*\(#\d+\)\s*$/, ""); - const commitSha = commit.id || null; + const inputs: EntryInput[] = curation.groups.map((group) => { + const pr = group.prNumber !== null ? (prMap.get(group.prNumber) ?? null) : null; + const subject = + pr?.title ?? + group.members.find((m) => m.subject.length > 8)?.subject ?? + group.members[0]!.subject; + const isSolo = group.members.length === 1; + const commitSha = isSolo ? group.members[0]!.sha : null; const commitUrl = - prNumber === null && commitSha && repoSlug ? buildCommitUrl(repoSlug, commitSha) : null; + group.prNumber === null && commitSha && repoSlug ? buildCommitUrl(repoSlug, commitSha) : null; return { - pr_number: prNumber, - raw_subject: subjectWithoutPRSuffix, + pr_number: group.prNumber, + raw_subject: subject, pr_title: pr?.title ?? null, pr_body: pr?.body ?? null, - type: commit.group ?? "Other", - scope: commit.scope ?? null, - author: pr?.author ?? commit.author?.name ?? null, - url: pr?.url ?? extractPRUrl(commit), + type: group.type, + scope: group.scope, + author: pr?.author ?? group.author, + url: pr?.url ?? group.prUrl, commit_sha: commitSha, commit_url: commitUrl, + members: group.members, + curated_by: group.curatedBy, + llm_reason: group.llmReason, }; }); @@ -125,16 +166,11 @@ export async function runPipeline(opts: PipelineOptions): Promise { ); } - const llm = await makeLLMClient(loaded.config, { - providerOverride: opts.providerOverride, - modelOverride: opts.modelOverride, - verbose: opts.verbose, - }); - progress.step("model", `rewriting ${inputs.length} entries · ${llm.provider}/${llm.model}`); const rewriteResp = await llm.rewriteEntries(inputs); - progress.step("model", "generating summary"); + progress.step("summary", "generating release summary"); const summaryResp = await llm.summarize(inputs, rewriteResp.entries); + printGeneratedSummary(summaryResp.summary); const renderInput = assembleRender({ versionHeader, @@ -142,6 +178,7 @@ export async function runPipeline(opts: PipelineOptions): Promise { summary: summaryResp.summary, inputs, rewritten: rewriteResp.entries, + omitted: curation.omitted, groupForInput: (i) => inputs[i]?.type ?? "Other", }); const section = renderSection(renderInput); @@ -187,6 +224,17 @@ export async function runPipeline(opts: PipelineOptions): Promise { progress.done(`wrote ${changelogPath}`); } +function printGeneratedSummary(summary: string): void { + process.stderr.write("cliff-notes: generated summary:\n"); + process.stderr.write( + summary + .trim() + .split("\n") + .map((line) => ` ${line}`) + .join("\n") + "\n", + ); +} + function pickTargetRelease(releases: CliffRelease[], opts: PipelineOptions): CliffRelease | null { if (releases.length === 0) return null; if (opts.unreleased) { diff --git a/src/prompts/curate.ts b/src/prompts/curate.ts new file mode 100644 index 0000000..c431175 --- /dev/null +++ b/src/prompts/curate.ts @@ -0,0 +1,84 @@ +import type { CurationInput } from "../schemas.ts"; + +export const CURATION_PROMPT_VERSION = "curate-v4"; + +export function buildCurationPrompt( + residual: CurationInput[], + opts: { allowOmissions: boolean; audience: string }, +): string { + const payload = residual.map((entry, index) => ({ + index, + original_index: entry.index, + sha: entry.sha, + subject: entry.subject, + body: truncate(entry.body, 1500), + type: entry.type, + scope: entry.scope, + author: entry.author, + pr_number: entry.pr_number, + files: entry.files, + additions: entry.additions, + deletions: entry.deletions, + })); + + return [ + `Audience: ${opts.audience}.`, + "", + "Default to one disposition per commit. Most commits should stand alone as their own changelog entry.", + "Group commits only when they describe one logical change. Omit commits only when they clearly do not belong in release notes for this audience.", + "", + "Grouping criteria:", + "- Strong evidence includes overlapping files, sequential indices with terse subjects, fixup!/squash! prefixes, and shared scope plus author in a small window.", + "- Different conventional types should stay separate. Different non-null PR numbers must stay separate.", + "- When in doubt, return solo groups.", + "", + opts.allowOmissions + ? [ + "Omission criteria:", + "- Use the configured audience literally. It may describe any mix of users, operators, admins, developers, maintainers, or business stakeholders.", + "- The audience examples below are non-exhaustive guidance, not a closed list. Apply the same judgment to analogous audience descriptions.", + "- Omit only changes that are not meaningful to the configured audience.", + "- For external product users, operators, or sysadmins, omit developer-only changes such as test additions, test rewrites, test relocations, Playwright/Cypress/Jest setup, smoke/e2e/unit coverage expansion, internal refactors, internal renames, developer-only tooling, dependency bumps without security/compatibility/runtime/deployment impact, lint/format/whitespace fixes, comment-only edits, and CI config touches that do not change delivered behavior.", + "- For internal developer or maintainer audiences, keep refactors, tooling, infrastructure, build, test, and CI changes when they affect developer workflow, maintainability, APIs, release reliability, or operational behavior.", + "- For every audience, comment-only edits, pure formatting, lint-only cleanup, and dependency bumps without security, compatibility, runtime, or deployment impact are safe to omit.", + "- Do not omit user-facing changes, bug fixes, performance changes, API changes, removed features, or added features.", + "- When in doubt, do not omit. Every omission needs a one-line reason.", + ].join("\n") + : "Omissions are disabled. Return every commit in groups, using singleton groups where needed.", + "", + "Return JSON shaped as { groups: [{ member_indices, primary_index, reason }], omitted: [{ index, reason }] }.", + "Every input index must appear exactly once, either in one group or in omitted.", + "Reasons are shown to human reviewers, so keep them specific and under one sentence.", + "", + "Examples:", + JSON.stringify( + { + groups: [ + { + member_indices: [0, 1, 2], + primary_index: 0, + reason: "all touch src/user.ts and describe the initial user model rollout", + }, + { member_indices: [3], primary_index: 3, reason: "standalone user-facing fix" }, + ], + omitted: opts.allowOmissions + ? [ + { index: 4, reason: "dependency type bump with no runtime behavior change" }, + { index: 5, reason: "lint-only cleanup" }, + ] + : [], + }, + null, + 2, + ), + "", + "Commits (JSON):", + JSON.stringify(payload, null, 2), + ].join("\n"); +} + +function truncate(s: string | null | undefined, max: number): string { + if (!s) return ""; + if (s.length <= max) return s; + return s.slice(0, max) + "..."; +} diff --git a/src/prompts/rewrite.ts b/src/prompts/rewrite.ts index f3562fe..22ccf06 100644 --- a/src/prompts/rewrite.ts +++ b/src/prompts/rewrite.ts @@ -10,6 +10,14 @@ export function buildRewritePrompt(entries: EntryInput[]): string { pr_title: e.pr_title, pr_body: truncate(e.pr_body, 1500), author: e.author, + member_commits: + e.members.length > 1 + ? e.members.map((m) => ({ + subject: m.subject, + type: m.type, + files: m.files, + })) + : undefined, })); return [ "Rewrite the following changelog entries. Return an object with an `entries` array of the same length, in the same order.", diff --git a/src/prompts/summary.ts b/src/prompts/summary.ts index c3ad14e..d70f31c 100644 --- a/src/prompts/summary.ts +++ b/src/prompts/summary.ts @@ -1,6 +1,14 @@ import type { Config } from "../config.ts"; import type { EntryInput, RewrittenEntry } from "../schemas.ts"; +export const SUMMARY_PROMPT_GUIDANCE = [ + "Write a release summary in 1–2 short sentences, plain prose, no bullet list, no heading.", + "Lead with the user-visible outcome or operational impact, not implementation details or changelog mechanics.", + "Use highlighted entries as the main signal. Mention low-level details only when they change user behavior, compatibility, deployment, or operations.", + "Prefer one coherent release theme over enumerating entries.", + "Do not include PR numbers or links.", +] as const; + export function buildSummaryPrompt( inputs: EntryInput[], rewritten: RewrittenEntry[], @@ -17,9 +25,7 @@ export function buildSummaryPrompt( }); const style = cfg.prompt.summary_style?.trim(); return [ - "Write a release summary in 2–4 sentences, plain prose, no bullet list, no heading.", - "Lead with what changed for the audience, not the changelog mechanics.", - "Do not include PR numbers or links.", + ...SUMMARY_PROMPT_GUIDANCE, style ? `Style guidance: ${style}` : "", "", "Entries (JSON):", diff --git a/src/prompts/system.ts b/src/prompts/system.ts index d4ba3b3..a546eee 100644 --- a/src/prompts/system.ts +++ b/src/prompts/system.ts @@ -17,6 +17,7 @@ export function buildSystemPrompt(cfg: Config): string { "- Each rewritten entry must be one sentence under 280 characters.", "- pr_number in each output entry MUST equal the corresponding input pr_number — copy it verbatim.", "- Output the entries in the SAME ORDER as the input.", + "- When member_commits is present, the entry represents a multi-commit change. Write one sentence that captures the net change. Use pr_title/pr_body as primary signal and member_commits for supporting context. Do not enumerate individual commits.", extra ? `\nProject-specific guidance:\n${extra}` : "", ] .filter(Boolean) diff --git a/src/render.ts b/src/render.ts index a8f370c..a45339e 100644 --- a/src/render.ts +++ b/src/render.ts @@ -1,4 +1,5 @@ import type { EntryInput, RewrittenEntry } from "./schemas.ts"; +import type { OmittedCommit } from "./curation.ts"; import { shortSha } from "./git-remote.ts"; export interface RenderInput { @@ -70,6 +71,7 @@ export function assembleRender(args: { summary: string; inputs: EntryInput[]; rewritten: RewrittenEntry[]; + omitted?: OmittedCommit[]; groupForInput: (i: number) => string; }): RenderInput { const groupOrder: string[] = []; @@ -92,9 +94,30 @@ export function assembleRender(args: { commitSha: inp.commit_sha, commitUrl: inp.commit_url, }); - const scopeSuffix = inp.scope ? `(${inp.scope})` : ""; - const prSuffix = inp.pr_number !== null ? ` (PR #${inp.pr_number})` : ""; - rawLines.push(`- ${inp.type}${scopeSuffix}: ${inp.raw_subject}${prSuffix}`); + if (inp.members.length > 1) { + const tag = + inp.curated_by === "pr" + ? `grouped by PR #${inp.pr_number}` + : `grouped by model${inp.llm_reason ? `: ${inp.llm_reason}` : ""}`; + rawLines.push(`- group (${tag}): ${inp.raw_subject}`); + for (const m of inp.members) { + const scopeSuffix = m.scope ? `(${m.scope})` : ""; + rawLines.push(` - ${shortSha(m.sha)} ${m.type ?? "?"}${scopeSuffix}: ${m.subject}`); + } + } else { + const member = inp.members[0]; + const scopeSuffix = inp.scope ? `(${inp.scope})` : ""; + const prSuffix = inp.pr_number !== null ? ` (PR #${inp.pr_number})` : ""; + const shaPrefix = member?.sha ? `${shortSha(member.sha)} ` : ""; + rawLines.push(`- ${shaPrefix}${inp.type}${scopeSuffix}: ${inp.raw_subject}${prSuffix}`); + } + } + + for (const omitted of args.omitted ?? []) { + const scopeSuffix = omitted.member.scope ? `(${omitted.member.scope})` : ""; + rawLines.push( + `- omitted (${omitted.reason}): ${shortSha(omitted.member.sha)} ${omitted.member.type ?? "?"}${scopeSuffix}: ${omitted.member.subject}`, + ); } return { diff --git a/src/schemas.ts b/src/schemas.ts index dd7e1b1..46e155e 100644 --- a/src/schemas.ts +++ b/src/schemas.ts @@ -1,5 +1,16 @@ import { z } from "zod"; +export interface EntryMember { + sha: string; + subject: string; + body: string; + type: string | null; + scope: string | null; + files: string[]; + additions: number; + deletions: number; +} + // What we feed the LLM for entry rewriting. export interface EntryInput { pr_number: number | null; @@ -12,6 +23,16 @@ export interface EntryInput { url: string | null; commit_sha: string | null; commit_url: string | null; + members: EntryMember[]; + curated_by: "solo" | "pr" | "llm"; + llm_reason?: string; +} + +export interface CurationInput extends EntryMember { + index: number; + author: string | null; + pr_number: number | null; + pr_url: string | null; } // What we expect back from the LLM for each entry. @@ -59,3 +80,117 @@ export const SummaryResponseSchema = z.object({ }); export type SummaryResponse = z.infer; + +const CurationGroupSchema = z.object({ + member_indices: z.array(z.number().int().nonnegative()).min(1), + primary_index: z.number().int().nonnegative(), + reason: z.string().min(1).max(200), +}); + +const CurationOmittedSchema = z.object({ + index: z.number().int().nonnegative(), + reason: z.string().min(1).max(200), +}); + +export const CurationResponseSchema = z.object({ + groups: z.array(CurationGroupSchema).default([]), + omitted: z.array(CurationOmittedSchema).default([]), +}); + +export type CurationResponse = z.infer; + +export function buildCurationSchema( + residual: CurationInput[], + opts: { + maxPerGroup: number; + maxIndexGap: number; + requireSameType: boolean; + allowOmissions: boolean; + }, +) { + return CurationResponseSchema.superRefine((data, ctx) => { + if (!opts.allowOmissions && data.omitted.length > 0) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: "omissions are disabled", + path: ["omitted"], + }); + } + + const seen = new Map(); + const expected = new Set(residual.map((_, i) => i)); + + function record(index: number, path: (string | number)[]) { + if (!expected.has(index)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `index ${index} is out of range`, + path, + }); + return; + } + const prior = seen.get(index); + if (prior) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `index ${index} appears more than once (${prior})`, + path, + }); + return; + } + seen.set(index, path.join(".")); + } + + data.groups.forEach((group, groupIdx) => { + const indices = group.member_indices; + if (!indices.includes(group.primary_index)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: "primary_index must be included in member_indices", + path: ["groups", groupIdx, "primary_index"], + }); + } + if (indices.length > opts.maxPerGroup) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `group exceeds max_per_group ${opts.maxPerGroup}`, + path: ["groups", groupIdx, "member_indices"], + }); + } + if (indices.length > 0 && Math.max(...indices) - Math.min(...indices) > opts.maxIndexGap) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `group exceeds max_index_gap ${opts.maxIndexGap}`, + path: ["groups", groupIdx, "member_indices"], + }); + } + if (opts.requireSameType) { + const types = new Set(indices.map((i) => residual[i]?.type ?? null)); + if (types.size > 1) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: "group mixes commit types", + path: ["groups", groupIdx, "member_indices"], + }); + } + } + indices.forEach((index, memberIdx) => { + record(index, ["groups", groupIdx, "member_indices", memberIdx]); + }); + }); + + data.omitted.forEach((omitted, omittedIdx) => { + record(omitted.index, ["omitted", omittedIdx, "index"]); + }); + + for (const index of expected) { + if (!seen.has(index)) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: `missing disposition for index ${index}`, + path: [], + }); + } + } + }); +} diff --git a/tests/curation.test.ts b/tests/curation.test.ts new file mode 100644 index 0000000..d86c4dd --- /dev/null +++ b/tests/curation.test.ts @@ -0,0 +1,227 @@ +import { describe, expect, test } from "bun:test"; +import { curateCommits, describeCurationPlan } from "../src/curation.ts"; +import type { CliffCommit } from "../src/git-cliff.ts"; +import type { LLMClient } from "../src/llm.ts"; +import type { CurationResponse } from "../src/schemas.ts"; + +function commit( + id: string, + message: string, + opts: { + group?: string; + scope?: string | null; + prNumber?: number | null; + author?: string; + } = {}, +): CliffCommit { + return { + id, + message, + group: opts.group ?? "Features", + scope: opts.scope ?? null, + links: + opts.prNumber === undefined || opts.prNumber === null + ? [] + : [{ text: `#${opts.prNumber}`, href: `https://gh/pr/${opts.prNumber}` }], + author: { name: opts.author ?? "dev" }, + remote: + opts.prNumber === undefined + ? undefined + : { + pr_number: opts.prNumber, + pr_title: null, + pr_labels: [], + username: null, + }, + }; +} + +function llm(response: CurationResponse, calls: { count: number }): LLMClient { + return { + provider: "test", + model: "test", + async rewriteEntries() { + throw new Error("not used"); + }, + async summarize() { + throw new Error("not used"); + }, + async curate() { + calls.count++; + return response; + }, + }; +} + +function baseOpts(overrides: Partial[1]> = {}) { + return { + strategy: "auto" as const, + omitPlumbing: true, + minGroupSize: 2, + maxPerGroup: 5, + maxIndexGap: 15, + requireSameType: true, + cwd: process.cwd(), + ...overrides, + }; +} + +describe("curateCommits", () => { + test("describes auto curation with PR grouping and residual model classification", () => { + const message = describeCurationPlan( + [ + commit("aaa1111", "feat: add model", { prNumber: 42 }), + commit("bbb2222", "feat: wire model", { prNumber: 42 }), + commit("ccc3333", "feat: add settings"), + commit("ddd4444", "chore: fix lint", { group: "Chores" }), + ], + "auto", + ); + expect(message).toBe( + "PR grouping 2 commits into 1 entry; asking model to classify 2 remaining commits (group/solo/omit)", + ); + }); + + test("describes skipped model curation when auto has too little residual work", () => { + const message = describeCurationPlan( + [ + commit("aaa1111", "feat: add model", { prNumber: 42 }), + commit("bbb2222", "feat: wire model", { prNumber: 42 }), + ], + "auto", + ); + expect(message).toBe( + "PR grouping 2 commits into 1 entry; keeping 0 remaining commits solo (model skipped)", + ); + }); + + test("strategy off returns one solo group per commit", async () => { + const result = await curateCommits( + [commit("aaa1111", "feat: add a"), commit("bbb2222", "feat: add b")], + baseOpts({ strategy: "off" }), + ); + expect(result.omitted).toEqual([]); + expect(result.groups.map((g) => g.curatedBy)).toEqual(["solo", "solo"]); + }); + + test("by-pr-only groups commits sharing remote PR number without LLM", async () => { + const calls = { count: 0 }; + const result = await curateCommits( + [ + commit("aaa1111", "feat: add model", { prNumber: 42 }), + commit("bbb2222", "feat: wire model", { prNumber: 42 }), + commit("ccc3333", "feat: docs", { prNumber: 42 }), + ], + baseOpts({ strategy: "by-pr-only", llm: llm({ groups: [], omitted: [] }, calls) }), + ); + expect(calls.count).toBe(0); + expect(result.groups).toHaveLength(1); + expect(result.groups[0]?.curatedBy).toBe("pr"); + expect(result.groups[0]?.prNumber).toBe(42); + expect(result.groups[0]?.members).toHaveLength(3); + }); + + test("by-pr-only groups commits using subject PR fallback", async () => { + const result = await curateCommits( + [ + commit("aaa1111", "feat: add model (#42)", { prNumber: null }), + commit("bbb2222", "feat: wire model (#42)", { prNumber: null }), + commit("ccc3333", "feat: fix model (#42)", { prNumber: null }), + ], + baseOpts({ strategy: "by-pr-only" }), + ); + expect(result.groups).toHaveLength(1); + expect(result.groups[0]?.prNumber).toBe(42); + }); + + test("auto skips LLM when no residual needs curation", async () => { + const calls = { count: 0 }; + const result = await curateCommits( + [ + commit("aaa1111", "feat: add model", { prNumber: 42 }), + commit("bbb2222", "feat: wire model", { prNumber: 42 }), + commit("ccc3333", "feat: unrelated squash PR", { prNumber: 43 }), + ], + baseOpts({ llm: llm({ groups: [], omitted: [] }, calls) }), + ); + expect(calls.count).toBe(0); + expect(result.groups).toHaveLength(2); + expect(result.groups.map((g) => g.curatedBy)).toEqual(["pr", "solo"]); + }); + + test("auto applies validated LLM groups and omissions", async () => { + const calls = { count: 0 }; + const result = await curateCommits( + [ + commit("aaa1111", "feat: add user model"), + commit("bbb2222", "feat: wire user model"), + commit("ccc3333", "feat: add settings"), + commit("ddd4444", "chore: fix lint", { group: "Chores" }), + ], + baseOpts({ + requireSameType: false, + diffStats: new Map([ + ["aaa1111", { sha: "aaa1111", files: ["src/user.ts"], additions: 10, deletions: 0 }], + ["bbb2222", { sha: "bbb2222", files: ["src/user.ts"], additions: 6, deletions: 1 }], + ]), + llm: llm( + { + groups: [ + { member_indices: [0, 1], primary_index: 0, reason: "both touch src/user.ts" }, + { member_indices: [2], primary_index: 2, reason: "standalone settings feature" }, + ], + omitted: [{ index: 3, reason: "lint-only cleanup" }], + }, + calls, + ), + }), + ); + expect(calls.count).toBe(1); + expect(result.groups.map((g) => g.curatedBy)).toEqual(["llm", "solo"]); + expect(result.groups[0]?.llmReason).toBe("both touch src/user.ts"); + expect(result.omitted[0]?.reason).toBe("lint-only cleanup"); + }); + + test("auto falls back to solos when omissions are disabled but returned", async () => { + const calls = { count: 0 }; + const commits = [ + commit("aaa1111", "feat: add a"), + commit("bbb2222", "chore: lint", { group: "Chores" }), + ]; + const result = await curateCommits( + commits, + baseOpts({ + omitPlumbing: false, + requireSameType: false, + llm: llm( + { + groups: [{ member_indices: [0], primary_index: 0, reason: "solo" }], + omitted: [{ index: 1, reason: "lint" }], + }, + calls, + ), + }), + ); + expect(calls.count).toBe(1); + expect(result.omitted).toEqual([]); + expect(result.groups.map((g) => g.curatedBy)).toEqual(["solo", "solo"]); + }); + + test("auto falls back to solos for mixed-type LLM groups when same type is required", async () => { + const calls = { count: 0 }; + const result = await curateCommits( + [commit("aaa1111", "feat: add a"), commit("bbb2222", "chore: lint", { group: "Chores" })], + baseOpts({ + llm: llm( + { + groups: [{ member_indices: [0, 1], primary_index: 0, reason: "related" }], + omitted: [], + }, + calls, + ), + }), + ); + expect(calls.count).toBe(1); + expect(result.groups.map((g) => g.curatedBy)).toEqual(["solo", "solo"]); + }); +}); diff --git a/tests/render.test.ts b/tests/render.test.ts index 5400317..bbb2354 100644 --- a/tests/render.test.ts +++ b/tests/render.test.ts @@ -1,6 +1,24 @@ import { describe, test, expect } from "bun:test"; import { renderSection, assembleRender, formatDate } from "../src/render.ts"; -import type { EntryInput } from "../src/schemas.ts"; +import type { EntryInput, EntryMember } from "../src/schemas.ts"; + +function member( + sha: string, + subject: string, + type = "Features", + scope: string | null = null, +): EntryMember { + return { + sha, + subject, + body: "", + type, + scope, + files: [], + additions: 0, + deletions: 0, + }; +} describe("renderSection", () => { test("renders header, summary, grouped entries, and audit block", () => { @@ -152,6 +170,8 @@ describe("assembleRender", () => { url: "https://gh/pr/1", commit_sha: null, commit_url: null, + members: [member("abc1234", "add foo", "Features", "api")], + curated_by: "solo", }, { pr_number: 2, @@ -164,6 +184,8 @@ describe("assembleRender", () => { url: null, commit_sha: null, commit_url: null, + members: [member("def5678", "fix bar", "Bug Fixes")], + curated_by: "solo", }, { pr_number: 3, @@ -176,6 +198,8 @@ describe("assembleRender", () => { url: null, commit_sha: null, commit_url: null, + members: [member("fed4321", "add baz", "Features")], + curated_by: "solo", }, ]; const render = assembleRender({ @@ -193,9 +217,57 @@ describe("assembleRender", () => { expect(render.groupOrder).toEqual(["Features", "Bug Fixes"]); expect(render.byGroup.get("Features")?.map((e) => e.text)).toEqual(["Added foo", "Added baz"]); expect(render.rawLines).toEqual([ - "- Features(api): add foo (PR #1)", - "- Bug Fixes: fix bar (PR #2)", - "- Features: add baz (PR #3)", + "- abc1234 Features(api): add foo (PR #1)", + "- def5678 Bug Fixes: fix bar (PR #2)", + "- fed4321 Features: add baz (PR #3)", + ]); + }); + + test("records grouped members and omitted commits in audit block", () => { + const inputs: EntryInput[] = [ + { + pr_number: 42, + raw_subject: "add grouped feature", + pr_title: null, + pr_body: null, + type: "Features", + scope: null, + author: null, + url: "https://gh/pr/42", + commit_sha: null, + commit_url: null, + members: [ + member("aaa1111", "add model", "Features"), + member("bbb2222", "wire model", "Features"), + ], + curated_by: "pr", + }, + ]; + const render = assembleRender({ + versionHeader: "v1.0.0", + date: "2026-05-13", + summary: "s", + inputs, + rewritten: [{ pr_number: 42, rewritten: "Added grouped feature", highlight: false }], + omitted: [ + { + member: { + ...member("ccc3333", "fix lint", "Chores"), + author: null, + prNumber: null, + prUrl: null, + releaseIndex: 2, + }, + reason: "lint-only cleanup", + }, + ], + groupForInput: (i) => inputs[i]!.type, + }); + expect(render.rawLines).toEqual([ + "- group (grouped by PR #42): add grouped feature", + " - aaa1111 Features: add model", + " - bbb2222 Features: wire model", + "- omitted (lint-only cleanup): ccc3333 Chores: fix lint", ]); }); }); diff --git a/tests/schemas.test.ts b/tests/schemas.test.ts index 9ea2375..460a0f6 100644 --- a/tests/schemas.test.ts +++ b/tests/schemas.test.ts @@ -1,5 +1,10 @@ import { describe, test, expect } from "bun:test"; -import { buildRewriteSchema, type EntryInput } from "../src/schemas.ts"; +import { + buildCurationSchema, + buildRewriteSchema, + type CurationInput, + type EntryInput, +} from "../src/schemas.ts"; function input(pr: number | null): EntryInput { return { @@ -13,6 +18,36 @@ function input(pr: number | null): EntryInput { url: null, commit_sha: null, commit_url: null, + members: [ + { + sha: "abc1234", + subject: "x", + body: "", + type: "Features", + scope: null, + files: [], + additions: 0, + deletions: 0, + }, + ], + curated_by: "solo", + }; +} + +function curationInput(index: number, type = "Features"): CurationInput { + return { + index, + sha: `sha${index}`, + subject: `subject ${index}`, + body: "", + type, + scope: null, + files: [], + additions: 0, + deletions: 0, + author: null, + pr_number: null, + pr_url: null, }; } @@ -66,3 +101,103 @@ describe("buildRewriteSchema", () => { expect(bad.success).toBe(false); }); }); + +describe("buildCurationSchema", () => { + const residual = [curationInput(0), curationInput(1), curationInput(2)]; + const opts = { + maxPerGroup: 3, + maxIndexGap: 2, + requireSameType: true, + allowOmissions: true, + }; + + test("accepts solo-only partition", () => { + const schema = buildCurationSchema(residual, opts); + const result = schema.safeParse({ + groups: [ + { member_indices: [0], primary_index: 0, reason: "solo" }, + { member_indices: [1], primary_index: 1, reason: "solo" }, + { member_indices: [2], primary_index: 2, reason: "solo" }, + ], + omitted: [], + }); + expect(result.success).toBe(true); + }); + + test("accepts groups plus omissions covering all indices", () => { + const schema = buildCurationSchema(residual, opts); + const result = schema.safeParse({ + groups: [{ member_indices: [0, 1], primary_index: 0, reason: "related" }], + omitted: [{ index: 2, reason: "lint-only cleanup" }], + }); + expect(result.success).toBe(true); + }); + + test("rejects missing, duplicate, and out-of-range indices", () => { + const schema = buildCurationSchema(residual, opts); + expect( + schema.safeParse({ + groups: [{ member_indices: [0, 1], primary_index: 0, reason: "related" }], + omitted: [], + }).success, + ).toBe(false); + expect( + schema.safeParse({ + groups: [ + { member_indices: [0, 1], primary_index: 0, reason: "related" }, + { member_indices: [1, 2], primary_index: 1, reason: "related" }, + ], + omitted: [], + }).success, + ).toBe(false); + expect( + schema.safeParse({ + groups: [{ member_indices: [0, 1, 3], primary_index: 0, reason: "related" }], + omitted: [], + }).success, + ).toBe(false); + }); + + test("rejects invalid primary, mixed types, large spans, oversized groups, and disabled omissions", () => { + expect( + buildCurationSchema(residual, opts).safeParse({ + groups: [{ member_indices: [0, 1], primary_index: 2, reason: "related" }], + omitted: [{ index: 2, reason: "lint" }], + }).success, + ).toBe(false); + + expect( + buildCurationSchema([curationInput(0, "Features"), curationInput(1, "Chores")], { + ...opts, + maxIndexGap: 1, + }).safeParse({ + groups: [{ member_indices: [0, 1], primary_index: 0, reason: "related" }], + omitted: [], + }).success, + ).toBe(false); + + expect( + buildCurationSchema(residual, { ...opts, maxIndexGap: 1 }).safeParse({ + groups: [{ member_indices: [0, 2], primary_index: 0, reason: "related" }], + omitted: [{ index: 1, reason: "lint" }], + }).success, + ).toBe(false); + + expect( + buildCurationSchema(residual, { ...opts, maxPerGroup: 2 }).safeParse({ + groups: [{ member_indices: [0, 1, 2], primary_index: 0, reason: "related" }], + omitted: [], + }).success, + ).toBe(false); + + expect( + buildCurationSchema(residual, { ...opts, allowOmissions: false }).safeParse({ + groups: [ + { member_indices: [0, 1], primary_index: 0, reason: "related" }, + { member_indices: [2], primary_index: 2, reason: "solo" }, + ], + omitted: [{ index: 2, reason: "lint" }], + }).success, + ).toBe(false); + }); +}); From e6f2028755b43960102907762596decb18cf249b Mon Sep 17 00:00:00 2001 From: Andriy Massimilla Date: Wed, 13 May 2026 15:48:37 -0400 Subject: [PATCH 3/4] fix: use direct bun TS execution --- README.md | 2 +- package.json | 4 ++-- src/cli.ts | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b70e332..2388aa5 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ One tool, multiple projects, multiple LLM providers (Anthropic / OpenAI / Bedroc ## Install -cliff-notes is published as a raw source repo (no compiled binary yet). Two ways to use it: +cliff-notes is published as a raw Bun source repo (no compiled binary yet). Two ways to use it: ```sh # Ad-hoc, no install diff --git a/package.json b/package.json index 17bb4ea..3cd80e6 100644 --- a/package.json +++ b/package.json @@ -3,10 +3,10 @@ "version": "0.1.0", "description": "Controlled LLM changelog generator (git-cliff + structured LLM)", "bin": { - "cliff-notes": "./dist/cli.js" + "cliff-notes": "./src/cli.ts" }, "files": [ - "dist", + "src", "cliff.toml", "cliff-notes.example.toml", "README.md" diff --git a/src/cli.ts b/src/cli.ts index f0580f2..c0bc527 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -1,4 +1,4 @@ -#!/usr/bin/env node +#!/usr/bin/env bun import { Command } from "commander"; import { runPipeline } from "./pipeline.ts"; import { runExtract } from "./extract.ts"; From 3198cbf2a53db03d773d94a2f64ffe6b2d9ce14d Mon Sep 17 00:00:00 2001 From: Andriy Massimilla Date: Wed, 13 May 2026 16:18:01 -0400 Subject: [PATCH 4/4] fmt: fix formatting --- planning/commit-grouping.md | 7 ++++--- planning/commit-sha-fallback-links.md | 14 +++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/planning/commit-grouping.md b/planning/commit-grouping.md index 0abbae3..2d9ca72 100644 --- a/planning/commit-grouping.md +++ b/planning/commit-grouping.md @@ -9,7 +9,7 @@ cliff-notes today renders **one bullet per commit** in a release. That's fine on - **Direct-to-main related commits**: a feature landed across multiple non-adjacent commits without a PR (e.g. "add user model", "wire user model into auth", "fix typo in user model"). No PR signal binds them; no deterministic rule reliably groups them. - **Plumbing noise**: dependency bumps, lint/format fixes, comment-only edits, internal renames, test-only churn. Mechanically valid commits, but rarely interesting in a release-notes context. -The first two are mechanical: same PR number → group. The third is semantic — only an LLM with subjects, bodies, and *what files each commit touched* can judge it. The fourth is editorial: only an LLM can sensibly decide "this commit shouldn't appear in the changelog." +The first two are mechanical: same PR number → group. The third is semantic — only an LLM with subjects, bodies, and _what files each commit touched_ can judge it. The fourth is editorial: only an LLM can sensibly decide "this commit shouldn't appear in the changelog." The goal is a **curation pass** that handles all four — grouping, primary-selection, and omission — while preserving cliff-notes' core invariants: @@ -24,9 +24,9 @@ The goal is a **curation pass** that handles all four — grouping, primary-sele 1. **Free PR prefilter (no LLM call).** Resolve a PR number per commit, then group commits sharing a PR number. Sources of PR number, in order: - `commit.remote.pr_number` from git-cliff's GitHub enrichment (most authoritative). - `extractPRNumber(commit)` from src/git-cliff.ts:129 (subject `(#N)` / links). - + This step handles merge-commit and rebase-merge PRs uniformly. For squash-merge repos every commit has a unique PR number — the prefilter is a no-op (no commit shares a PR with any other). - + 2. **Gate.** If fewer than 2 commits remain in the residual (commits not part of any multi-member PR group), skip the LLM curation call entirely. Most well-disciplined squash-merge releases exit here. 3. **LLM curation pass on the residual.** Ask the model to classify each residual commit into exactly one of three dispositions: group with N others, stand alone, or omit (with reason). Strict partition schema, type-homogeneity guard, content-hash cached for reproducibility. @@ -375,6 +375,7 @@ If `opts.showCuration` (new CLI flag, default false) is set: emit the proposed g ### `src/render.ts` (extend audit block) `assembleRender` now takes `omitted: OmittedCommit[]` in addition to `inputs`/`rewritten`. Audit block emits: + - One rawLine per original commit in `inputs`, with a group anchor when grouped, tagged with curation source. - A trailing section listing every omitted commit with its reason. diff --git a/planning/commit-sha-fallback-links.md b/planning/commit-sha-fallback-links.md index 1b50248..b59ecea 100644 --- a/planning/commit-sha-fallback-links.md +++ b/planning/commit-sha-fallback-links.md @@ -20,19 +20,22 @@ This change adds a short-SHA fallback link (e.g. `([abc1234](https://github.com/ Single-purpose module so `pipeline.ts` stays tidy and the regex is unit-testable. ```ts -export interface RepoSlug { owner: string; repo: string; } +export interface RepoSlug { + owner: string; + repo: string; +} -export async function getOriginGitHubSlug(cwd: string): Promise +export async function getOriginGitHubSlug(cwd: string): Promise; // spawns `git remote get-url origin`; returns null on any failure // (no origin, non-github.com host, parse miss). -export function parseGitHubRemote(url: string): RepoSlug | null +export function parseGitHubRemote(url: string): RepoSlug | null; // pure; handles the three URL forms above. Exported for tests. -export function buildCommitUrl(slug: RepoSlug, sha: string): string +export function buildCommitUrl(slug: RepoSlug, sha: string): string; // returns `https://github.com/${owner}/${repo}/commit/${sha}`. -export function shortSha(sha: string): string +export function shortSha(sha: string): string; // returns sha.slice(0, 7). ``` @@ -46,6 +49,7 @@ Reuse the `execCapture` pattern already in `src/git-cliff.ts:97` and `src/github ### 3. Extend `EntryInput`: `src/schemas.ts` Add two optional fields to the existing schema (`src/schemas.ts:4-12`): + - `commit_sha: string | null` - `commit_url: string | null`