Skip to content

Commit 7afaff9

Browse files
kulvirgitclaude
andcommitted
feat: env-based skill selection — run LLM once, cache for session
Replace per-turn LLM skill selection with a one-time call using environment fingerprint, cached for the session. Controlled by `experimental.env_fingerprint_skill_selection` (default: true). - Cache LLM result per session; subsequent turns get 0ms / zero API cost - Use configured default model with try-catch fallback - Remove messageText/MessageContext dependency — selection uses fingerprint only - Trim fingerprint to data-engineering detections (dbt, sql, profiles.yml adapters); add airflow + databricks; remove generic detections - Add config guard + integration tests in tool/skill.test.ts Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 15d2264 commit 7afaff9

8 files changed

Lines changed: 791 additions & 6 deletions

File tree

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import { Filesystem } from "../../util/filesystem"
2+
import { Glob } from "../../util/glob"
3+
import { Log } from "../../util/log"
4+
import path from "path"
5+
6+
const log = Log.create({ service: "fingerprint" })
7+
8+
export namespace Fingerprint {
9+
export interface Result {
10+
tags: string[]
11+
detectedAt: number
12+
cwd: string
13+
}
14+
15+
let cached: Result | undefined
16+
17+
export function get(): Result | undefined {
18+
return cached
19+
}
20+
21+
export async function refresh(): Promise<Result> {
22+
const previousCwd = cached?.cwd ?? process.cwd()
23+
cached = undefined
24+
return detect(previousCwd)
25+
}
26+
27+
export async function detect(cwd: string, root?: string): Promise<Result> {
28+
if (cached && cached.cwd === cwd) return cached
29+
30+
const timer = log.time("detect", { cwd, root })
31+
const tags: string[] = []
32+
33+
const dirs = root && root !== cwd ? [cwd, root] : [cwd]
34+
35+
await Promise.all(
36+
dirs.map((dir) => detectDir(dir, tags)),
37+
)
38+
39+
// Deduplicate
40+
const unique = [...new Set(tags)]
41+
42+
const result: Result = {
43+
tags: unique,
44+
detectedAt: Date.now(),
45+
cwd,
46+
}
47+
48+
cached = result
49+
timer.stop()
50+
log.info("detected", { tags: unique.join(","), cwd })
51+
return result
52+
}
53+
54+
async function detectDir(dir: string, tags: string[]): Promise<void> {
55+
// Data-engineering detections only
56+
const [
57+
hasDbtProject,
58+
hasProfilesYml,
59+
hasSqlfluff,
60+
hasDbtPackagesYml,
61+
hasAirflowCfg,
62+
hasDagsDir,
63+
hasDatabricksYml,
64+
] = await Promise.all([
65+
Filesystem.exists(path.join(dir, "dbt_project.yml")),
66+
Filesystem.exists(path.join(dir, "profiles.yml")),
67+
Filesystem.exists(path.join(dir, ".sqlfluff")),
68+
Filesystem.exists(path.join(dir, "dbt_packages.yml")),
69+
Filesystem.exists(path.join(dir, "airflow.cfg")),
70+
Filesystem.isDir(path.join(dir, "dags")),
71+
Filesystem.exists(path.join(dir, "databricks.yml")),
72+
])
73+
74+
// dbt detection
75+
if (hasDbtProject) {
76+
tags.push("dbt", "data-engineering")
77+
}
78+
79+
// dbt packages
80+
if (hasDbtPackagesYml) {
81+
tags.push("dbt-packages")
82+
}
83+
84+
// profiles.yml - extract adapter type
85+
if (hasProfilesYml) {
86+
try {
87+
const content = await Filesystem.readText(path.join(dir, "profiles.yml"))
88+
const adapterMatch = content.match(
89+
/type:\s*(snowflake|bigquery|redshift|databricks|postgres|mysql|sqlite|duckdb|trino|spark|clickhouse)/i,
90+
)
91+
if (adapterMatch) {
92+
tags.push(adapterMatch[1]!.toLowerCase())
93+
}
94+
} catch (e) {
95+
log.debug("profiles.yml unreadable", { dir, error: e })
96+
}
97+
}
98+
99+
// SQL - check for .sqlfluff or any .sql files
100+
if (hasSqlfluff) {
101+
tags.push("sql")
102+
} else {
103+
try {
104+
const sqlFiles = await Glob.scan("*.sql", {
105+
cwd: dir,
106+
include: "file",
107+
})
108+
if (sqlFiles.length > 0) {
109+
tags.push("sql")
110+
}
111+
} catch (e) {
112+
log.debug("sql glob scan failed", { dir, error: e })
113+
}
114+
}
115+
116+
// Airflow
117+
if (hasAirflowCfg || hasDagsDir) {
118+
tags.push("airflow")
119+
}
120+
121+
// Databricks
122+
if (hasDatabricksYml) {
123+
tags.push("databricks")
124+
}
125+
}
126+
}
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// altimate_change start - LLM-based dynamic skill selection
2+
import { generateObject } from "ai"
3+
import type { LanguageModelV2 } from "@openrouter/ai-sdk-provider"
4+
import z from "zod"
5+
import { Provider } from "../provider/provider"
6+
import { Log } from "../util/log"
7+
import type { Skill } from "../skill"
8+
import type { Fingerprint } from "./fingerprint"
9+
10+
const log = Log.create({ service: "skill-selector" })
11+
12+
const TIMEOUT_MS = 3_000
13+
const MAX_SKILLS = 15
14+
15+
// Session cache keyed by working directory — invalidates if project changes.
16+
let cachedResult: Skill.Info[] | undefined
17+
let cachedCwd: string | undefined
18+
19+
/** Reset the session cache (exported for testing) */
20+
export function resetSkillSelectorCache(): void {
21+
cachedResult = undefined
22+
cachedCwd = undefined
23+
}
24+
25+
export interface SkillSelectorDeps {
26+
resolveModel: () => Promise<LanguageModelV2 | undefined>
27+
generate: (params: {
28+
model: LanguageModelV2
29+
temperature: number
30+
schema: z.ZodType
31+
messages: Array<{ role: "system" | "user"; content: string }>
32+
}) => Promise<{ object: { selected: string[] } }>
33+
}
34+
35+
async function defaultResolveModel(): Promise<LanguageModelV2 | undefined> {
36+
try {
37+
const { providerID, modelID } = await Provider.defaultModel()
38+
const model = await Provider.getModel(providerID, modelID)
39+
return Provider.getLanguage(model)
40+
} catch {
41+
return undefined
42+
}
43+
}
44+
45+
const defaultDeps: SkillSelectorDeps = {
46+
resolveModel: defaultResolveModel,
47+
generate: generateObject as any,
48+
}
49+
50+
/**
51+
* Use the configured model to select relevant skills based on the project fingerprint.
52+
* Results are cached for the session — the LLM is only called once.
53+
*
54+
* Graceful fallback: returns ALL skills on any failure (matches pre-feature behavior).
55+
*/
56+
export async function selectSkillsWithLLM(
57+
skills: Skill.Info[],
58+
fingerprint: Fingerprint.Result | undefined,
59+
deps?: SkillSelectorDeps,
60+
): Promise<Skill.Info[]> {
61+
// Return cached result if cwd hasn't changed (0ms)
62+
const cwd = fingerprint?.cwd
63+
if (cachedResult && cwd === cachedCwd) {
64+
log.info("returning cached skill selection", {
65+
count: cachedResult.length,
66+
})
67+
return cachedResult
68+
}
69+
70+
const { resolveModel, generate } = deps ?? defaultDeps
71+
72+
function cache(result: Skill.Info[]): Skill.Info[] {
73+
cachedResult = result
74+
cachedCwd = cwd
75+
return result
76+
}
77+
78+
try {
79+
const model = await resolveModel()
80+
if (!model) {
81+
log.info("no small model available, returning all skills")
82+
return cache(skills)
83+
}
84+
85+
// Build compact skill list for the prompt
86+
const skillList = skills.map((s) => ({
87+
name: s.name,
88+
description: s.description,
89+
}))
90+
91+
const envContext =
92+
fingerprint && fingerprint.tags.length > 0
93+
? fingerprint.tags.join(", ")
94+
: "none detected"
95+
96+
const params = {
97+
model,
98+
temperature: 0,
99+
schema: z.object({ selected: z.array(z.string()) }),
100+
messages: [
101+
{
102+
role: "system" as const,
103+
content: [
104+
"You are a skill selector for a coding assistant.",
105+
"Given a project environment and available skills, select which skills are relevant for this project.",
106+
"Return ONLY skill names the user likely needs. Select 0-15 skills.",
107+
"Prefer fewer, more relevant skills over many loosely related ones.",
108+
].join("\n"),
109+
},
110+
{
111+
role: "user" as const,
112+
content: [
113+
`Project environment: ${envContext}`,
114+
"",
115+
`Available skills: ${JSON.stringify(skillList)}`,
116+
].join("\n"),
117+
},
118+
],
119+
}
120+
121+
const result = await Promise.race([
122+
generate(params),
123+
new Promise<never>((_, reject) =>
124+
setTimeout(() => reject(new Error("skill selection timeout")), TIMEOUT_MS),
125+
),
126+
])
127+
128+
const selected = result.object.selected.slice(0, MAX_SKILLS)
129+
130+
// Zero-selection guard
131+
if (selected.length === 0) {
132+
log.info("LLM returned zero skills, returning all")
133+
return cache(skills)
134+
}
135+
136+
// Filter skills by returned names
137+
const selectedSet = new Set(selected)
138+
const matched = skills.filter((s) => selectedSet.has(s.name))
139+
140+
// If no valid matches (LLM returned non-existent names), return all
141+
if (matched.length === 0) {
142+
log.info("LLM returned no valid skill names, returning all")
143+
return cache(skills)
144+
}
145+
146+
log.info("selected skills", {
147+
count: matched.length,
148+
names: matched.map((s) => s.name),
149+
})
150+
return cache(matched)
151+
} catch (e) {
152+
log.info("skill selection failed, returning all skills", {
153+
error: e instanceof Error ? e.message : String(e),
154+
})
155+
return cache(skills)
156+
}
157+
}
158+
// altimate_change end

packages/opencode/src/config/config.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1266,6 +1266,11 @@ export namespace Config {
12661266
.describe(
12671267
"Automatically enhance prompts with AI before sending (default: false). Uses a small model to rewrite rough prompts into clearer versions.",
12681268
),
1269+
// altimate_change start - env fingerprint skill selection toggle
1270+
env_fingerprint_skill_selection: z
1271+
.boolean()
1272+
.optional()
1273+
.describe("Use environment fingerprint to select relevant skills once per session (default: true). When false, all skills are provided every turn."),
12691274
// altimate_change end
12701275
})
12711276
.optional(),

packages/opencode/src/session/prompt.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ import { iife } from "@/util/iife"
5050
import { Shell } from "@/shell/shell"
5151
import { Truncate } from "@/tool/truncation"
5252
import { decodeDataUrl } from "@/util/data-url"
53+
// altimate_change start - import fingerprint for env-based skill selection
54+
import { Fingerprint } from "../altimate/fingerprint"
55+
import { Config } from "../config/config"
56+
// altimate_change end
5357
import { Telemetry } from "@/telemetry" // altimate_change — session telemetry
5458

5559
// @ts-ignore
@@ -297,6 +301,14 @@ export namespace SessionPrompt {
297301

298302
let step = 0
299303
const session = await Session.get(sessionID)
304+
// altimate_change start - detect environment fingerprint at session start
305+
const altCfg = await Config.get()
306+
if (altCfg.experimental?.env_fingerprint_skill_selection !== false) {
307+
await Fingerprint.detect(Instance.directory, Instance.worktree).catch((e) => {
308+
log.warn("fingerprint detection failed", { error: e })
309+
})
310+
}
311+
// altimate_change end
300312
// altimate_change start — session telemetry tracking
301313
await Telemetry.init()
302314
Telemetry.setContext({ sessionId: sessionID, projectId: Instance.project?.id ?? "" })

0 commit comments

Comments
 (0)