diff --git a/crates/forge_config/.forge.toml b/crates/forge_config/.forge.toml index fa2331e690..601d1c3589 100644 --- a/crates/forge_config/.forge.toml +++ b/crates/forge_config/.forge.toml @@ -15,7 +15,6 @@ max_sem_search_results = 100 max_stdout_line_chars = 500 max_stdout_prefix_lines = 100 max_stdout_suffix_lines = 100 -max_tokens = 20480 max_tool_failure_per_turn = 3 model_cache_ttl_secs = 604800 restricted = false @@ -23,6 +22,10 @@ sem_search_top_k = 10 services_url = "https://api.forgecode.dev/" tool_supported = true tool_timeout_secs = 300 + +[[presets]] +id = "default" +max_tokens = 20480 top_k = 30 top_p = 0.8 diff --git a/crates/forge_config/src/config.rs b/crates/forge_config/src/config.rs index 404e0863eb..6af56c001c 100644 --- a/crates/forge_config/src/config.rs +++ b/crates/forge_config/src/config.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use crate::reader::ConfigReader; use crate::writer::ConfigWriter; -use crate::{AutoDumpFormat, Compact, HttpConfig, ModelConfig, RetryConfig, Update}; +use crate::{AutoDumpFormat, Compact, HttpConfig, ModelConfig, PresetConfig, RetryConfig, Update}; /// Top-level Forge configuration merged from all sources (defaults, file, /// environment). @@ -82,25 +82,10 @@ pub struct ForgeConfig { #[serde(skip_serializing_if = "Option::is_none")] pub updates: Option, - /// Output randomness for all agents; lower values are deterministic, higher - /// values are creative (0.0–2.0). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub temperature: Option, - - /// Nucleus sampling threshold for all agents; limits token selection to the - /// top cumulative probability mass (0.0–1.0). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub top_p: Option, - - /// Top-k vocabulary cutoff for all agents; restricts sampling to the k - /// highest-probability tokens (1–1000). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub top_k: Option, - - /// Maximum tokens the model may generate per response for all agents - /// (1–100,000). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub max_tokens: Option, + /// Named presets of LLM-specific sampling and generation parameters + /// applied to model configurations and agent definitions. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub presets: Vec, /// Maximum tool failures per turn before the orchestrator forces /// completion. diff --git a/crates/forge_config/src/legacy.rs b/crates/forge_config/src/legacy.rs index 7310333814..c6b7af9d7b 100644 --- a/crates/forge_config/src/legacy.rs +++ b/crates/forge_config/src/legacy.rs @@ -55,16 +55,24 @@ impl LegacyConfig { fn into_forge_config(self) -> ForgeConfig { let session = self.provider.as_deref().map(|provider_id| { let model_id = self.model.get(provider_id).cloned(); - ModelConfig { provider_id: Some(provider_id.to_string()), model_id } + ModelConfig { + provider_id: Some(provider_id.to_string()), + model_id, + ..Default::default() + } }); - let commit = self - .commit - .map(|c| ModelConfig { provider_id: c.provider, model_id: c.model }); + let commit = self.commit.map(|c| ModelConfig { + provider_id: c.provider, + model_id: c.model, + ..Default::default() + }); - let suggest = self - .suggest - .map(|s| ModelConfig { provider_id: s.provider, model_id: s.model }); + let suggest = self.suggest.map(|s| ModelConfig { + provider_id: s.provider, + model_id: s.model, + ..Default::default() + }); ForgeConfig { session, commit, suggest, ..Default::default() } } diff --git a/crates/forge_config/src/lib.rs b/crates/forge_config/src/lib.rs index b0ba37a4b3..2bd9e609af 100644 --- a/crates/forge_config/src/lib.rs +++ b/crates/forge_config/src/lib.rs @@ -5,6 +5,7 @@ mod error; mod http; mod legacy; mod model; +mod preset; mod reader; mod retry; mod writer; @@ -15,6 +16,7 @@ pub use config::*; pub use error::Error; pub use http::*; pub use model::*; +pub use preset::*; pub use reader::*; pub use retry::*; pub use writer::*; diff --git a/crates/forge_config/src/model.rs b/crates/forge_config/src/model.rs index c993222700..fbec599b69 100644 --- a/crates/forge_config/src/model.rs +++ b/crates/forge_config/src/model.rs @@ -18,4 +18,6 @@ pub struct ModelConfig { pub provider_id: Option, /// The model to use for this operation. pub model_id: Option, + /// The preset ID to apply preset parameters to this configuration. + pub preset_id: Option, } diff --git a/crates/forge_config/src/preset.rs b/crates/forge_config/src/preset.rs new file mode 100644 index 0000000000..7f199b1135 --- /dev/null +++ b/crates/forge_config/src/preset.rs @@ -0,0 +1,79 @@ +use derive_setters::Setters; +use fake::Dummy; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Effort level for reasoning; controls the depth of model thinking. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)] +#[serde(rename_all = "snake_case")] +pub enum Effort { + /// Minimal reasoning; fastest and cheapest. + Low, + /// Balanced reasoning effort. + Medium, + /// Maximum reasoning depth; slowest and most expensive. + High, + /// Beyond maximum reasoning depth; highest cost and latency. + XHigh, +} + +/// Reasoning configuration for a preset. +/// Controls how and whether models engage extended chain-of-thought reasoning. +#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)] +#[serde(rename_all = "snake_case")] +#[setters(strip_option, into)] +pub struct ReasoningConfig { + /// Effort level for reasoning; controls the depth of model thinking. + /// Supported by OpenRouter and the Forge provider. + #[serde(skip_serializing_if = "Option::is_none")] + pub effort: Option, + + /// Maximum number of tokens the model may spend on reasoning. + /// Supported by OpenRouter, Anthropic, and the Forge provider. + #[serde(skip_serializing_if = "Option::is_none")] + pub max_tokens: Option, + + /// When true, the model reasons internally but reasoning output is hidden. + /// Supported by OpenRouter and the Forge provider. + #[serde(skip_serializing_if = "Option::is_none")] + pub exclude: Option, + + /// Enables reasoning at the "medium" effort level with no exclusions. + /// Supported by OpenRouter, Anthropic, and the Forge provider. + #[serde(skip_serializing_if = "Option::is_none")] + pub enabled: Option, +} + +/// A named collection of LLM-specific sampling and generation parameters. +/// Presets apply a consistent set of inference settings to model configurations +/// and agent definitions. +#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)] +#[serde(rename_all = "snake_case")] +#[setters(strip_option, into)] +pub struct PresetConfig { + /// Unique identifier for this preset. + pub id: String, + + /// Output randomness; lower values are deterministic, higher values are + /// creative (0.0–2.0). + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, + + /// Nucleus sampling threshold; limits token selection to the top + /// cumulative probability mass (0.0–1.0). + #[serde(skip_serializing_if = "Option::is_none")] + pub top_p: Option, + + /// Top-k vocabulary cutoff; restricts sampling to the k + /// highest-probability tokens (1–1000). + #[serde(skip_serializing_if = "Option::is_none")] + pub top_k: Option, + + /// Maximum tokens the model may generate per response (1–100,000). + #[serde(skip_serializing_if = "Option::is_none")] + pub max_tokens: Option, + + /// Reasoning configuration; controls extended chain-of-thought thinking. + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, +} diff --git a/crates/forge_config/src/reader.rs b/crates/forge_config/src/reader.rs index 2f2ed94821..fee01835e2 100644 --- a/crates/forge_config/src/reader.rs +++ b/crates/forge_config/src/reader.rs @@ -186,6 +186,7 @@ mod tests { let expected = Some(ModelConfig { provider_id: Some("fake-provider".to_string()), model_id: Some("fake-model".to_string()), + preset_id: None, }); assert_eq!(actual.session, expected); } diff --git a/crates/forge_domain/src/env.rs b/crates/forge_domain/src/env.rs index 1db2b2903c..db085455c0 100644 --- a/crates/forge_domain/src/env.rs +++ b/crates/forge_domain/src/env.rs @@ -23,6 +23,8 @@ pub struct SessionConfig { pub provider_id: Option, /// The model ID to use with this provider. pub model_id: Option, + /// The preset ID to apply preset parameters to this configuration. + pub preset_id: Option, } /// All discrete mutations that can be applied to the application configuration. diff --git a/crates/forge_infra/src/env.rs b/crates/forge_infra/src/env.rs index 04b17274ba..7a8dc7b774 100644 --- a/crates/forge_infra/src/env.rs +++ b/crates/forge_infra/src/env.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use std::sync::Arc; use forge_app::EnvironmentInfra; -use forge_config::{ConfigReader, ForgeConfig, ModelConfig}; +use forge_config::{ConfigReader, ForgeConfig, ModelConfig, PresetConfig}; use forge_domain::{ AutoDumpFormat, Compact, ConfigOperation, Environment, HttpConfig, MaxTokens, ModelId, RetryConfig, SessionConfig, Temperature, TlsBackend, TlsVersion, TopK, TopP, Update, @@ -17,6 +17,7 @@ fn to_session_config(mc: &ModelConfig) -> SessionConfig { SessionConfig { provider_id: mc.provider_id.clone(), model_id: mc.model_id.clone(), + preset_id: mc.preset_id.clone(), } } @@ -165,10 +166,10 @@ fn to_environment(fc: ForgeConfig, cwd: PathBuf) -> Environment { suggest: fc.suggest.as_ref().map(to_session_config), is_restricted: fc.restricted, tool_supported: fc.tool_supported, - temperature: fc.temperature.and_then(|v| Temperature::new(v).ok()), - top_p: fc.top_p.and_then(|v| TopP::new(v).ok()), - top_k: fc.top_k.and_then(|v| TopK::new(v).ok()), - max_tokens: fc.max_tokens.and_then(|v| MaxTokens::new(v).ok()), + temperature: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.temperature).and_then(|v| Temperature::new(v).ok()), + top_p: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.top_p).and_then(|v| TopP::new(v).ok()), + top_k: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.top_k).and_then(|v| TopK::new(v).ok()), + max_tokens: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.max_tokens).and_then(|v| MaxTokens::new(v).ok()), max_tool_failure_per_turn: fc.max_tool_failure_per_turn, max_requests_per_turn: fc.max_requests_per_turn, compact: fc.compact.map(to_compact), @@ -324,10 +325,22 @@ fn to_forge_config(env: &Environment) -> ForgeConfig { fc.tool_supported = env.tool_supported; // --- Workflow fields --- - fc.temperature = env.temperature.map(|t| t.value()); - fc.top_p = env.top_p.map(|t| t.value()); - fc.top_k = env.top_k.map(|t| t.value()); - fc.max_tokens = env.max_tokens.map(|t| t.value()); + let forge_default = PresetConfig { + id: "default".to_string(), + temperature: env.temperature.map(|t| t.value()), + top_p: env.top_p.map(|t| t.value()), + top_k: env.top_k.map(|t| t.value()), + max_tokens: env.max_tokens.map(|t| t.value()), + reasoning: None, + }; + if forge_default.temperature.is_some() + || forge_default.top_p.is_some() + || forge_default.top_k.is_some() + || forge_default.max_tokens.is_some() + || forge_default.reasoning.is_some() + { + fc.presets.push(forge_default); + } fc.max_tool_failure_per_turn = env.max_tool_failure_per_turn; fc.max_requests_per_turn = env.max_requests_per_turn; fc.compact = env.compact.as_ref().map(from_compact); @@ -337,14 +350,17 @@ fn to_forge_config(env: &Environment) -> ForgeConfig { fc.session = env.session.as_ref().map(|sc| ModelConfig { provider_id: sc.provider_id.clone(), model_id: sc.model_id.clone(), + preset_id: sc.preset_id.clone(), }); fc.commit = env.commit.as_ref().map(|sc| ModelConfig { provider_id: sc.provider_id.clone(), model_id: sc.model_id.clone(), + preset_id: sc.preset_id.clone(), }); fc.suggest = env.suggest.as_ref().map(|sc| ModelConfig { provider_id: sc.provider_id.clone(), model_id: sc.model_id.clone(), + preset_id: sc.preset_id.clone(), }); fc } diff --git a/forge.default.yaml b/forge.default.yaml deleted file mode 100644 index b311bbd4e8..0000000000 --- a/forge.default.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# yaml-language-server: $schema=./forge.schema.json -variables: - operating_agent: Forge - # Define model anchors with simpler, purpose-based names - advanced_model: &advanced_model anthropic/claude-sonnet-4 - -max_requests_per_turn: 100 -max_tool_failure_per_turn: 3 -top_p: 0.8 -top_k: 30 -max_tokens: 20480 -max_walker_depth: 1 -tool_supported: true - -# Global compact configuration applied to all agents -compact: - max_tokens: 2000 - token_threshold: 100000 - retention_window: 6 - message_threshold: 200 - eviction_window: 0.2 - on_turn_end: false - -updates: - frequency: "daily" - auto_update: false -model: *advanced_model diff --git a/forge.schema.json b/forge.schema.json index 9d6ab4ac8f..e705d39d56 100644 --- a/forge.schema.json +++ b/forge.schema.json @@ -166,15 +166,6 @@ "format": "uint", "minimum": 0 }, - "max_tokens": { - "description": "Maximum tokens the model may generate per response for all agents\n(1–100,000).", - "type": [ - "integer", - "null" - ], - "format": "uint32", - "minimum": 0 - }, "max_tool_failure_per_turn": { "description": "Maximum tool failures per turn before the orchestrator forces\ncompletion.", "type": [ @@ -190,6 +181,13 @@ "format": "uint64", "minimum": 0 }, + "presets": { + "description": "Named presets of LLM-specific sampling and generation parameters\napplied to model configurations and agent definitions.", + "type": "array", + "items": { + "$ref": "#/$defs/PresetConfig" + } + }, "restricted": { "description": "Whether the application is running in restricted mode.\nWhen true, tool execution requires explicit permission grants.", "type": "boolean" @@ -239,14 +237,6 @@ ], "default": null }, - "temperature": { - "description": "Output randomness for all agents; lower values are deterministic, higher\nvalues are creative (0.0–2.0).", - "type": [ - "number", - "null" - ], - "format": "float" - }, "tool_supported": { "description": "Whether tool use is supported in the current environment.\nWhen false, tool calls are disabled regardless of agent configuration.", "type": "boolean" @@ -257,23 +247,6 @@ "format": "uint64", "minimum": 0 }, - "top_k": { - "description": "Top-k vocabulary cutoff for all agents; restricts sampling to the k\nhighest-probability tokens (1–1000).", - "type": [ - "integer", - "null" - ], - "format": "uint32", - "minimum": 0 - }, - "top_p": { - "description": "Nucleus sampling threshold for all agents; limits token selection to the\ntop cumulative probability mass (0.0–1.0).", - "type": [ - "number", - "null" - ], - "format": "float" - }, "updates": { "description": "Configuration for automatic forge updates", "anyOf": [ @@ -395,6 +368,31 @@ } } }, + "Effort": { + "description": "Effort level for reasoning; controls the depth of model thinking.", + "oneOf": [ + { + "description": "Minimal reasoning; fastest and cheapest.", + "type": "string", + "const": "low" + }, + { + "description": "Balanced reasoning effort.", + "type": "string", + "const": "medium" + }, + { + "description": "Maximum reasoning depth; slowest and most expensive.", + "type": "string", + "const": "high" + }, + { + "description": "Beyond maximum reasoning depth; highest cost and latency.", + "type": "string", + "const": "x_high" + } + ] + }, "HttpConfig": { "description": "HTTP client configuration.", "type": "object", @@ -515,6 +513,13 @@ "null" ] }, + "preset_id": { + "description": "The preset ID to apply preset parameters to this configuration.", + "type": [ + "string", + "null" + ] + }, "provider_id": { "description": "The provider to use for this operation.", "type": [ @@ -524,6 +529,104 @@ } } }, + "PresetConfig": { + "description": "A named collection of LLM-specific sampling and generation parameters.\nPresets apply a consistent set of inference settings to model configurations\nand agent definitions.", + "type": "object", + "properties": { + "id": { + "description": "Unique identifier for this preset.", + "type": "string" + }, + "max_tokens": { + "description": "Maximum tokens the model may generate per response (1–100,000).", + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0 + }, + "reasoning": { + "description": "Reasoning configuration; controls extended chain-of-thought thinking.", + "anyOf": [ + { + "$ref": "#/$defs/ReasoningConfig" + }, + { + "type": "null" + } + ] + }, + "temperature": { + "description": "Output randomness; lower values are deterministic, higher values are\ncreative (0.0–2.0).", + "type": [ + "number", + "null" + ], + "format": "float" + }, + "top_k": { + "description": "Top-k vocabulary cutoff; restricts sampling to the k\nhighest-probability tokens (1–1000).", + "type": [ + "integer", + "null" + ], + "format": "uint32", + "minimum": 0 + }, + "top_p": { + "description": "Nucleus sampling threshold; limits token selection to the top\ncumulative probability mass (0.0–1.0).", + "type": [ + "number", + "null" + ], + "format": "float" + } + }, + "required": [ + "id" + ] + }, + "ReasoningConfig": { + "description": "Reasoning configuration for a preset.\nControls how and whether models engage extended chain-of-thought reasoning.", + "type": "object", + "properties": { + "effort": { + "description": "Effort level for reasoning; controls the depth of model thinking.\nSupported by OpenRouter and the Forge provider.", + "anyOf": [ + { + "$ref": "#/$defs/Effort" + }, + { + "type": "null" + } + ] + }, + "enabled": { + "description": "Enables reasoning at the \"medium\" effort level with no exclusions.\nSupported by OpenRouter, Anthropic, and the Forge provider.", + "type": [ + "boolean", + "null" + ] + }, + "exclude": { + "description": "When true, the model reasons internally but reasoning output is hidden.\nSupported by OpenRouter and the Forge provider.", + "type": [ + "boolean", + "null" + ] + }, + "max_tokens": { + "description": "Maximum number of tokens the model may spend on reasoning.\nSupported by OpenRouter, Anthropic, and the Forge provider.", + "type": [ + "integer", + "null" + ], + "format": "uint", + "minimum": 0 + } + } + }, "RetryConfig": { "description": "Configuration for retry mechanism.", "type": "object",