From 669b107b08f88a2985956879f69e7655edb456a3 Mon Sep 17 00:00:00 2001
From: Tushar <tusharmath@gmail.com>
Date: Sun, 29 Mar 2026 16:05:44 +0530
Subject: [PATCH 1/6] chore: remove forge.default.yaml configuration

---
 forge.default.yaml | 27 ---------------------------
 1 file changed, 27 deletions(-)
 delete mode 100644 forge.default.yaml
diff --git a/forge.default.yaml b/forge.default.yaml
deleted file mode 100644
index b311bbd4e8..0000000000
--- a/forge.default.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# yaml-language-server: $schema=./forge.schema.json
-variables:
-  operating_agent: Forge
-  # Define model anchors with simpler, purpose-based names
-  advanced_model: &advanced_model anthropic/claude-sonnet-4
-
-max_requests_per_turn: 100
-max_tool_failure_per_turn: 3
-top_p: 0.8
-top_k: 30
-max_tokens: 20480
-max_walker_depth: 1
-tool_supported: true
-
-# Global compact configuration applied to all agents
-compact:
-  max_tokens: 2000
-  token_threshold: 100000
-  retention_window: 6
-  message_threshold: 200
-  eviction_window: 0.2
-  on_turn_end: false
-
-updates:
-  frequency: "daily"
-  auto_update: false
-model: *advanced_model

From 1c4be687ba8f989b809a2d3d061015e4f546eeaa Mon Sep 17 00:00:00 2001
From: Tushar <tusharmath@gmail.com>
Date: Sun, 29 Mar 2026 16:20:52 +0530
Subject: [PATCH 2/6] feat(forge_config): add LLM sampling presets config

---
 crates/forge_config/src/config.rs | 27 ++++-------
 crates/forge_config/src/legacy.rs | 22 ++++++---
 crates/forge_config/src/lib.rs    |  2 +
 crates/forge_config/src/preset.rs | 81 +++++++++++++++++++++++++++++++
 4 files changed, 107 insertions(+), 25 deletions(-)
 create mode 100644 crates/forge_config/src/preset.rs

diff --git a/crates/forge_config/src/config.rs b/crates/forge_config/src/config.rs
index 404e0863eb..c05de4c0e7 100644
--- a/crates/forge_config/src/config.rs
+++ b/crates/forge_config/src/config.rs
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::reader::ConfigReader;
 use crate::writer::ConfigWriter;
-use crate::{AutoDumpFormat, Compact, HttpConfig, ModelConfig, RetryConfig, Update};
+use crate::{AutoDumpFormat, Compact, HttpConfig, ModelConfig, PresetConfig, RetryConfig, Update};
 
 /// Top-level Forge configuration merged from all sources (defaults, file,
 /// environment).
@@ -82,25 +82,16 @@ pub struct ForgeConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub updates: Option<Update>,
 
-    /// Output randomness for all agents; lower values are deterministic, higher
-    /// values are creative (0.0–2.0).
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-
-    /// Nucleus sampling threshold for all agents; limits token selection to the
-    /// top cumulative probability mass (0.0–1.0).
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-
-    /// Top-k vocabulary cutoff for all agents; restricts sampling to the k
-    /// highest-probability tokens (1–1000).
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub top_k: Option<u32>,
+    /// Named presets of LLM-specific sampling and generation parameters.
+    /// Each preset is identified by its `id` and may be referenced from model
+    /// configurations and agent definitions.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub presets: Vec<PresetConfig>,
 
-    /// Maximum tokens the model may generate per response for all agents
-    /// (1–100,000).
+    /// Default LLM sampling parameters applied to all agents when no
+    /// agent-specific or role-specific preset is in effect.
     #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub max_tokens: Option<u32>,
+    pub session_preset: Option<PresetConfig>,
 
     /// Maximum tool failures per turn before the orchestrator forces
     /// completion.
diff --git a/crates/forge_config/src/legacy.rs b/crates/forge_config/src/legacy.rs
index 7310333814..c6b7af9d7b 100644
--- a/crates/forge_config/src/legacy.rs
+++ b/crates/forge_config/src/legacy.rs
@@ -55,16 +55,24 @@ impl LegacyConfig {
     fn into_forge_config(self) -> ForgeConfig {
         let session = self.provider.as_deref().map(|provider_id| {
             let model_id = self.model.get(provider_id).cloned();
-            ModelConfig { provider_id: Some(provider_id.to_string()), model_id }
+            ModelConfig {
+                provider_id: Some(provider_id.to_string()),
+                model_id,
+                ..Default::default()
+            }
         });
 
-        let commit = self
-            .commit
-            .map(|c| ModelConfig { provider_id: c.provider, model_id: c.model });
+        let commit = self.commit.map(|c| ModelConfig {
+            provider_id: c.provider,
+            model_id: c.model,
+            ..Default::default()
+        });
 
-        let suggest = self
-            .suggest
-            .map(|s| ModelConfig { provider_id: s.provider, model_id: s.model });
+        let suggest = self.suggest.map(|s| ModelConfig {
+            provider_id: s.provider,
+            model_id: s.model,
+            ..Default::default()
+        });
 
         ForgeConfig { session, commit, suggest, ..Default::default() }
     }
diff --git a/crates/forge_config/src/lib.rs b/crates/forge_config/src/lib.rs
index b0ba37a4b3..2bd9e609af 100644
--- a/crates/forge_config/src/lib.rs
+++ b/crates/forge_config/src/lib.rs
@@ -5,6 +5,7 @@ mod error;
 mod http;
 mod legacy;
 mod model;
+mod preset;
 mod reader;
 mod retry;
 mod writer;
@@ -15,6 +16,7 @@ pub use config::*;
 pub use error::Error;
 pub use http::*;
 pub use model::*;
+pub use preset::*;
 pub use reader::*;
 pub use retry::*;
 pub use writer::*;
diff --git a/crates/forge_config/src/preset.rs b/crates/forge_config/src/preset.rs
new file mode 100644
index 0000000000..d49f328e11
--- /dev/null
+++ b/crates/forge_config/src/preset.rs
@@ -0,0 +1,81 @@
+use derive_setters::Setters;
+use fake::Dummy;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+/// Effort level for reasoning; controls the depth of model thinking.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+#[serde(rename_all = "snake_case")]
+pub enum Effort {
+    /// Minimal reasoning; fastest and cheapest.
+    Low,
+    /// Balanced reasoning effort.
+    Medium,
+    /// Maximum reasoning depth; slowest and most expensive.
+    High,
+    /// Beyond maximum reasoning depth; highest cost and latency.
+    XHigh,
+}
+
+/// Reasoning configuration for a preset.
+/// Controls how and whether models engage extended chain-of-thought reasoning.
+#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+#[serde(rename_all = "snake_case")]
+#[setters(strip_option, into)]
+pub struct ReasoningConfig {
+    /// Effort level for reasoning; controls the depth of model thinking.
+    /// Supported by OpenRouter and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<Effort>,
+
+    /// Maximum number of tokens the model may spend on reasoning.
+    /// Supported by OpenRouter, Anthropic, and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<usize>,
+
+    /// When true, the model reasons internally but reasoning output is hidden.
+    /// Supported by OpenRouter and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exclude: Option<bool>,
+
+    /// Enables reasoning at the "medium" effort level with no exclusions.
+    /// Supported by OpenRouter, Anthropic, and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub enabled: Option<bool>,
+}
+
+/// A named collection of LLM-specific sampling and generation parameters.
+/// Presets are referenced by their `id` from model configurations and agent
+/// definitions to apply a consistent set of inference settings.
+#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+#[serde(rename_all = "snake_case")]
+#[setters(strip_option, into)]
+pub struct PresetConfig {
+    /// Unique identifier for this preset. Required for catalog entries in
+    /// [`ForgeConfig::presets`]; not meaningful for inline uses such as
+    /// [`ForgeConfig::session_preset`].
+    pub id: String,
+
+    /// Output randomness; lower values are deterministic, higher values are
+    /// creative (0.0–2.0).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    /// Nucleus sampling threshold; limits token selection to the top
+    /// cumulative probability mass (0.0–1.0).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+
+    /// Top-k vocabulary cutoff; restricts sampling to the k
+    /// highest-probability tokens (1–1000).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_k: Option<u32>,
+
+    /// Maximum tokens the model may generate per response (1–100,000).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>,
+
+    /// Reasoning configuration; controls extended chain-of-thought thinking.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
+}

From 2006cc2bfb4be92f8ae720f4100b136356d1deec Mon Sep 17 00:00:00 2001
From: Tushar <tusharmath@gmail.com>
Date: Sun, 29 Mar 2026 16:23:13 +0530
Subject: [PATCH 3/6] feat(config): move sampling params to session_preset

---
 crates/forge_config/.forge.toml   |   4 +-
 crates/forge_config/src/config.rs |   5 +-
 crates/forge_config/src/preset.rs |  11 +-
 crates/forge_infra/src/env.rs     |  22 ++--
 forge.schema.json                 | 168 ++++++++++++++++++++++++------
 5 files changed, 155 insertions(+), 55 deletions(-)

diff --git a/crates/forge_config/.forge.toml b/crates/forge_config/.forge.toml
index fa2331e690..e55d0f2112 100644
--- a/crates/forge_config/.forge.toml
+++ b/crates/forge_config/.forge.toml
@@ -15,7 +15,6 @@ max_sem_search_results = 100
 max_stdout_line_chars = 500
 max_stdout_prefix_lines = 100
 max_stdout_suffix_lines = 100
-max_tokens = 20480
 max_tool_failure_per_turn = 3
 model_cache_ttl_secs = 604800
 restricted = false
@@ -23,6 +22,9 @@ sem_search_top_k = 10
 services_url = "https://api.forgecode.dev/"
 tool_supported = true
 tool_timeout_secs = 300
+
+[session_preset]
+max_tokens = 20480
 top_k = 30
 top_p = 0.8
 
diff --git a/crates/forge_config/src/config.rs b/crates/forge_config/src/config.rs
index c05de4c0e7..4a044bf5ff 100644
--- a/crates/forge_config/src/config.rs
+++ b/crates/forge_config/src/config.rs
@@ -82,9 +82,8 @@ pub struct ForgeConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub updates: Option<Update>,
 
-    /// Named presets of LLM-specific sampling and generation parameters.
-    /// Each preset is identified by its `id` and may be referenced from model
-    /// configurations and agent definitions.
+    /// Named presets of LLM-specific sampling and generation parameters
+    /// applied to model configurations and agent definitions.
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub presets: Vec<PresetConfig>,
 
diff --git a/crates/forge_config/src/preset.rs b/crates/forge_config/src/preset.rs
index d49f328e11..89941e6fb9 100644
--- a/crates/forge_config/src/preset.rs
+++ b/crates/forge_config/src/preset.rs
@@ -45,17 +45,12 @@ pub struct ReasoningConfig {
 }
 
 /// A named collection of LLM-specific sampling and generation parameters.
-/// Presets are referenced by their `id` from model configurations and agent
-/// definitions to apply a consistent set of inference settings.
-#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+/// Presets apply a consistent set of inference settings to model configurations
+/// and agent definitions.
+#[derive(Debug, Default, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
 #[serde(rename_all = "snake_case")]
 #[setters(strip_option, into)]
 pub struct PresetConfig {
-    /// Unique identifier for this preset. Required for catalog entries in
-    /// [`ForgeConfig::presets`]; not meaningful for inline uses such as
-    /// [`ForgeConfig::session_preset`].
-    pub id: String,
-
     /// Output randomness; lower values are deterministic, higher values are
     /// creative (0.0–2.0).
     #[serde(skip_serializing_if = "Option::is_none")]
diff --git a/crates/forge_infra/src/env.rs b/crates/forge_infra/src/env.rs
index 04b17274ba..5ef989a94a 100644
--- a/crates/forge_infra/src/env.rs
+++ b/crates/forge_infra/src/env.rs
@@ -3,7 +3,7 @@ use std::path::PathBuf;
 use std::sync::Arc;
 
 use forge_app::EnvironmentInfra;
-use forge_config::{ConfigReader, ForgeConfig, ModelConfig};
+use forge_config::{ConfigReader, ForgeConfig, ModelConfig, PresetConfig};
 use forge_domain::{
     AutoDumpFormat, Compact, ConfigOperation, Environment, HttpConfig, MaxTokens, ModelId,
     RetryConfig, SessionConfig, Temperature, TlsBackend, TlsVersion, TopK, TopP, Update,
@@ -165,10 +165,10 @@ fn to_environment(fc: ForgeConfig, cwd: PathBuf) -> Environment {
         suggest: fc.suggest.as_ref().map(to_session_config),
         is_restricted: fc.restricted,
         tool_supported: fc.tool_supported,
-        temperature: fc.temperature.and_then(|v| Temperature::new(v).ok()),
-        top_p: fc.top_p.and_then(|v| TopP::new(v).ok()),
-        top_k: fc.top_k.and_then(|v| TopK::new(v).ok()),
-        max_tokens: fc.max_tokens.and_then(|v| MaxTokens::new(v).ok()),
+        temperature: fc.session_preset.as_ref().and_then(|p| p.temperature).and_then(|v| Temperature::new(v).ok()),
+        top_p: fc.session_preset.as_ref().and_then(|p| p.top_p).and_then(|v| TopP::new(v).ok()),
+        top_k: fc.session_preset.as_ref().and_then(|p| p.top_k).and_then(|v| TopK::new(v).ok()),
+        max_tokens: fc.session_preset.as_ref().and_then(|p| p.max_tokens).and_then(|v| MaxTokens::new(v).ok()),
         max_tool_failure_per_turn: fc.max_tool_failure_per_turn,
         max_requests_per_turn: fc.max_requests_per_turn,
         compact: fc.compact.map(to_compact),
@@ -324,10 +324,14 @@ fn to_forge_config(env: &Environment) -> ForgeConfig {
     fc.tool_supported = env.tool_supported;
 
     // --- Workflow fields ---
-    fc.temperature = env.temperature.map(|t| t.value());
-    fc.top_p = env.top_p.map(|t| t.value());
-    fc.top_k = env.top_k.map(|t| t.value());
-    fc.max_tokens = env.max_tokens.map(|t| t.value());
+    let session_preset = PresetConfig {
+        temperature: env.temperature.map(|t| t.value()),
+        top_p: env.top_p.map(|t| t.value()),
+        top_k: env.top_k.map(|t| t.value()),
+        max_tokens: env.max_tokens.map(|t| t.value()),
+        reasoning: None,
+    };
+    fc.session_preset = if session_preset == PresetConfig::default() { None } else { Some(session_preset) };
     fc.max_tool_failure_per_turn = env.max_tool_failure_per_turn;
     fc.max_requests_per_turn = env.max_requests_per_turn;
     fc.compact = env.compact.as_ref().map(from_compact);
diff --git a/forge.schema.json b/forge.schema.json
index 9d6ab4ac8f..cdb90d8f85 100644
--- a/forge.schema.json
+++ b/forge.schema.json
@@ -166,15 +166,6 @@
       "format": "uint",
       "minimum": 0
     },
-    "max_tokens": {
-      "description": "Maximum tokens the model may generate per response for all agents\n(1–100,000).",
-      "type": [
-        "integer",
-        "null"
-      ],
-      "format": "uint32",
-      "minimum": 0
-    },
     "max_tool_failure_per_turn": {
       "description": "Maximum tool failures per turn before the orchestrator forces\ncompletion.",
       "type": [
@@ -190,6 +181,13 @@
       "format": "uint64",
       "minimum": 0
     },
+    "presets": {
+      "description": "Named presets of LLM-specific sampling and generation parameters.\nEach preset is identified by its `id` and may be referenced from model\nconfigurations and agent definitions.",
+      "type": "array",
+      "items": {
+        "$ref": "#/$defs/Preset"
+      }
+    },
     "restricted": {
       "description": "Whether the application is running in restricted mode.\nWhen true, tool execution requires explicit permission grants.",
       "type": "boolean"
@@ -227,6 +225,17 @@
       ],
       "default": null
     },
+    "session_preset": {
+      "description": "Default LLM sampling parameters applied to all agents when no\nagent-specific or role-specific preset is in effect.",
+      "anyOf": [
+        {
+          "$ref": "#/$defs/Preset"
+        },
+        {
+          "type": "null"
+        }
+      ]
+    },
     "suggest": {
       "description": "Provider and model to use for shell command suggestion generation",
       "anyOf": [
@@ -239,14 +248,6 @@
       ],
       "default": null
     },
-    "temperature": {
-      "description": "Output randomness for all agents; lower values are deterministic, higher\nvalues are creative (0.0–2.0).",
-      "type": [
-        "number",
-        "null"
-      ],
-      "format": "float"
-    },
     "tool_supported": {
       "description": "Whether tool use is supported in the current environment.\nWhen false, tool calls are disabled regardless of agent configuration.",
       "type": "boolean"
@@ -257,23 +258,6 @@
       "format": "uint64",
       "minimum": 0
     },
-    "top_k": {
-      "description": "Top-k vocabulary cutoff for all agents; restricts sampling to the k\nhighest-probability tokens (1–1000).",
-      "type": [
-        "integer",
-        "null"
-      ],
-      "format": "uint32",
-      "minimum": 0
-    },
-    "top_p": {
-      "description": "Nucleus sampling threshold for all agents; limits token selection to the\ntop cumulative probability mass (0.0–1.0).",
-      "type": [
-        "number",
-        "null"
-      ],
-      "format": "float"
-    },
     "updates": {
       "description": "Configuration for automatic forge updates",
       "anyOf": [
@@ -395,6 +379,26 @@
         }
       }
     },
+    "Effort": {
+      "description": "Effort level for reasoning; controls the depth of model thinking.",
+      "oneOf": [
+        {
+          "description": "Minimal reasoning; fastest and cheapest.",
+          "type": "string",
+          "const": "low"
+        },
+        {
+          "description": "Balanced reasoning effort.",
+          "type": "string",
+          "const": "medium"
+        },
+        {
+          "description": "Maximum reasoning depth; slowest and most expensive.",
+          "type": "string",
+          "const": "high"
+        }
+      ]
+    },
     "HttpConfig": {
       "description": "HTTP client configuration.",
       "type": "object",
@@ -524,6 +528,102 @@
         }
       }
     },
+    "Preset": {
+      "description": "A named collection of LLM-specific sampling and generation parameters.\nPresets are referenced by their `id` from model configurations and agent\ndefinitions to apply a consistent set of inference settings.",
+      "type": "object",
+      "properties": {
+        "id": {
+          "description": "Unique identifier for this preset. Required for catalog entries in\n[`ForgeConfig::presets`]; not meaningful for inline uses such as\n[`ForgeConfig::session_preset`].",
+          "type": "string",
+          "default": ""
+        },
+        "max_tokens": {
+          "description": "Maximum tokens the model may generate per response (1–100,000).",
+          "type": [
+            "integer",
+            "null"
+          ],
+          "format": "uint32",
+          "minimum": 0
+        },
+        "reasoning": {
+          "description": "Reasoning configuration; controls extended chain-of-thought thinking.",
+          "anyOf": [
+            {
+              "$ref": "#/$defs/ReasoningConfig"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "temperature": {
+          "description": "Output randomness; lower values are deterministic, higher values are\ncreative (0.0–2.0).",
+          "type": [
+            "number",
+            "null"
+          ],
+          "format": "float"
+        },
+        "top_k": {
+          "description": "Top-k vocabulary cutoff; restricts sampling to the k\nhighest-probability tokens (1–1000).",
+          "type": [
+            "integer",
+            "null"
+          ],
+          "format": "uint32",
+          "minimum": 0
+        },
+        "top_p": {
+          "description": "Nucleus sampling threshold; limits token selection to the top\ncumulative probability mass (0.0–1.0).",
+          "type": [
+            "number",
+            "null"
+          ],
+          "format": "float"
+        }
+      }
+    },
+    "ReasoningConfig": {
+      "description": "Reasoning configuration for a preset.\nControls how and whether models engage extended chain-of-thought reasoning.",
+      "type": "object",
+      "properties": {
+        "effort": {
+          "description": "Effort level for reasoning; controls the depth of model thinking.\nSupported by OpenRouter and the Forge provider.",
+          "anyOf": [
+            {
+              "$ref": "#/$defs/Effort"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "enabled": {
+          "description": "Enables reasoning at the \"medium\" effort level with no exclusions.\nSupported by OpenRouter, Anthropic, and the Forge provider.",
+          "type": [
+            "boolean",
+            "null"
+          ]
+        },
+        "exclude": {
+          "description": "When true, the model reasons internally but reasoning output is hidden.\nSupported by OpenRouter and the Forge provider.",
+          "type": [
+            "boolean",
+            "null"
+          ]
+        },
+        "max_tokens": {
+          "description": "Maximum number of tokens the model may spend on reasoning.\nSupported by OpenRouter, Anthropic, and the Forge provider.",
+          "type": [
+            "integer",
+            "null"
+          ],
+          "format": "uint",
+          "minimum": 0
+        }
+      }
+    },
     "RetryConfig": {
       "description": "Configuration for retry mechanism.",
       "type": "object",

From d49de0eb6502a218c9b3cb313204158830855595 Mon Sep 17 00:00:00 2001
From: Tushar <tusharmath@gmail.com>
Date: Sun, 29 Mar 2026 16:36:57 +0530
Subject: [PATCH 4/6] feat(forge_config): add default presets config

---
 crates/forge_config/.forge.toml   | 3 ++-
 crates/forge_config/src/config.rs | 5 -----
 crates/forge_config/src/preset.rs | 5 ++++-
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/crates/forge_config/.forge.toml b/crates/forge_config/.forge.toml
index e55d0f2112..601d1c3589 100644
--- a/crates/forge_config/.forge.toml
+++ b/crates/forge_config/.forge.toml
@@ -23,7 +23,8 @@ services_url = "https://api.forgecode.dev/"
 tool_supported = true
 tool_timeout_secs = 300
 
-[session_preset]
+[[presets]]
+id = "default"
 max_tokens = 20480
 top_k = 30
 top_p = 0.8
diff --git a/crates/forge_config/src/config.rs b/crates/forge_config/src/config.rs
index 4a044bf5ff..6af56c001c 100644
--- a/crates/forge_config/src/config.rs
+++ b/crates/forge_config/src/config.rs
@@ -87,11 +87,6 @@ pub struct ForgeConfig {
     #[serde(default, skip_serializing_if = "Vec::is_empty")]
     pub presets: Vec<PresetConfig>,
 
-    /// Default LLM sampling parameters applied to all agents when no
-    /// agent-specific or role-specific preset is in effect.
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub session_preset: Option<PresetConfig>,
-
     /// Maximum tool failures per turn before the orchestrator forces
     /// completion.
     #[serde(default, skip_serializing_if = "Option::is_none")]
diff --git a/crates/forge_config/src/preset.rs b/crates/forge_config/src/preset.rs
index 89941e6fb9..7f199b1135 100644
--- a/crates/forge_config/src/preset.rs
+++ b/crates/forge_config/src/preset.rs
@@ -47,10 +47,13 @@ pub struct ReasoningConfig {
 /// A named collection of LLM-specific sampling and generation parameters.
 /// Presets apply a consistent set of inference settings to model configurations
 /// and agent definitions.
-#[derive(Debug, Default, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
 #[serde(rename_all = "snake_case")]
 #[setters(strip_option, into)]
 pub struct PresetConfig {
+    /// Unique identifier for this preset.
+    pub id: String,
+
     /// Output randomness; lower values are deterministic, higher values are
     /// creative (0.0–2.0).
     #[serde(skip_serializing_if = "Option::is_none")]

From 5572d4521f4b5e93f1c0c61f4ba4f40dbfa9f08a Mon Sep 17 00:00:00 2001
From: Tushar <tusharmath@gmail.com>
Date: Sun, 29 Mar 2026 16:38:02 +0530
Subject: [PATCH 5/6] refactor(forge_config)!: replace session_preset with
 presets default

---
 crates/forge_infra/src/env.rs | 20 ++++++++++++++------
 forge.schema.json             | 34 +++++++++++++++-------------------
 2 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/crates/forge_infra/src/env.rs b/crates/forge_infra/src/env.rs
index 5ef989a94a..1e3021495a 100644
--- a/crates/forge_infra/src/env.rs
+++ b/crates/forge_infra/src/env.rs
@@ -165,10 +165,10 @@ fn to_environment(fc: ForgeConfig, cwd: PathBuf) -> Environment {
         suggest: fc.suggest.as_ref().map(to_session_config),
         is_restricted: fc.restricted,
         tool_supported: fc.tool_supported,
-        temperature: fc.session_preset.as_ref().and_then(|p| p.temperature).and_then(|v| Temperature::new(v).ok()),
-        top_p: fc.session_preset.as_ref().and_then(|p| p.top_p).and_then(|v| TopP::new(v).ok()),
-        top_k: fc.session_preset.as_ref().and_then(|p| p.top_k).and_then(|v| TopK::new(v).ok()),
-        max_tokens: fc.session_preset.as_ref().and_then(|p| p.max_tokens).and_then(|v| MaxTokens::new(v).ok()),
+        temperature: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.temperature).and_then(|v| Temperature::new(v).ok()),
+        top_p: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.top_p).and_then(|v| TopP::new(v).ok()),
+        top_k: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.top_k).and_then(|v| TopK::new(v).ok()),
+        max_tokens: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.max_tokens).and_then(|v| MaxTokens::new(v).ok()),
         max_tool_failure_per_turn: fc.max_tool_failure_per_turn,
         max_requests_per_turn: fc.max_requests_per_turn,
         compact: fc.compact.map(to_compact),
@@ -324,14 +324,22 @@ fn to_forge_config(env: &Environment) -> ForgeConfig {
     fc.tool_supported = env.tool_supported;
 
     // --- Workflow fields ---
-    let session_preset = PresetConfig {
+    let forge_default = PresetConfig {
+        id: "default".to_string(),
         temperature: env.temperature.map(|t| t.value()),
         top_p: env.top_p.map(|t| t.value()),
         top_k: env.top_k.map(|t| t.value()),
         max_tokens: env.max_tokens.map(|t| t.value()),
         reasoning: None,
     };
-    fc.session_preset = if session_preset == PresetConfig::default() { None } else { Some(session_preset) };
+    if forge_default.temperature.is_some()
+        || forge_default.top_p.is_some()
+        || forge_default.top_k.is_some()
+        || forge_default.max_tokens.is_some()
+        || forge_default.reasoning.is_some()
+    {
+        fc.presets.push(forge_default);
+    }
     fc.max_tool_failure_per_turn = env.max_tool_failure_per_turn;
     fc.max_requests_per_turn = env.max_requests_per_turn;
     fc.compact = env.compact.as_ref().map(from_compact);
diff --git a/forge.schema.json b/forge.schema.json
index cdb90d8f85..c768fbb640 100644
--- a/forge.schema.json
+++ b/forge.schema.json
@@ -182,10 +182,10 @@
       "minimum": 0
     },
     "presets": {
-      "description": "Named presets of LLM-specific sampling and generation parameters.\nEach preset is identified by its `id` and may be referenced from model\nconfigurations and agent definitions.",
+      "description": "Named presets of LLM-specific sampling and generation parameters\napplied to model configurations and agent definitions.",
       "type": "array",
       "items": {
-        "$ref": "#/$defs/Preset"
+        "$ref": "#/$defs/PresetConfig"
       }
     },
     "restricted": {
@@ -225,17 +225,6 @@
       ],
       "default": null
     },
-    "session_preset": {
-      "description": "Default LLM sampling parameters applied to all agents when no\nagent-specific or role-specific preset is in effect.",
-      "anyOf": [
-        {
-          "$ref": "#/$defs/Preset"
-        },
-        {
-          "type": "null"
-        }
-      ]
-    },
     "suggest": {
       "description": "Provider and model to use for shell command suggestion generation",
       "anyOf": [
@@ -396,6 +385,11 @@
           "description": "Maximum reasoning depth; slowest and most expensive.",
           "type": "string",
           "const": "high"
+        },
+        {
+          "description": "Beyond maximum reasoning depth; highest cost and latency.",
+          "type": "string",
+          "const": "x_high"
         }
       ]
     },
@@ -528,14 +522,13 @@
         }
       }
     },
-    "Preset": {
-      "description": "A named collection of LLM-specific sampling and generation parameters.\nPresets are referenced by their `id` from model configurations and agent\ndefinitions to apply a consistent set of inference settings.",
+    "PresetConfig": {
+      "description": "A named collection of LLM-specific sampling and generation parameters.\nPresets apply a consistent set of inference settings to model configurations\nand agent definitions.",
       "type": "object",
       "properties": {
         "id": {
-          "description": "Unique identifier for this preset. Required for catalog entries in\n[`ForgeConfig::presets`]; not meaningful for inline uses such as\n[`ForgeConfig::session_preset`].",
-          "type": "string",
-          "default": ""
+          "description": "Unique identifier for this preset.",
+          "type": "string"
         },
         "max_tokens": {
           "description": "Maximum tokens the model may generate per response (1–100,000).",
@@ -582,7 +575,10 @@
           ],
           "format": "float"
         }
-      }
+      },
+      "required": [
+        "id"
+      ]
     },
     "ReasoningConfig": {
       "description": "Reasoning configuration for a preset.\nControls how and whether models engage extended chain-of-thought reasoning.",

From 1fdadd69710946501fdada1fa75389b83e853ebc Mon Sep 17 00:00:00 2001
From: Tushar <tusharmath@gmail.com>
Date: Sun, 29 Mar 2026 16:45:34 +0530
Subject: [PATCH 6/6] feat(forge_config): add preset_id to session and models

---
 crates/forge_config/src/model.rs  | 2 ++
 crates/forge_config/src/reader.rs | 1 +
 crates/forge_domain/src/env.rs    | 2 ++
 crates/forge_infra/src/env.rs     | 4 ++++
 forge.schema.json                 | 7 +++++++
 5 files changed, 16 insertions(+)

diff --git a/crates/forge_config/src/model.rs b/crates/forge_config/src/model.rs
index c993222700..fbec599b69 100644
--- a/crates/forge_config/src/model.rs
+++ b/crates/forge_config/src/model.rs
@@ -18,4 +18,6 @@ pub struct ModelConfig {
     pub provider_id: Option<String>,
     /// The model to use for this operation.
     pub model_id: Option<String>,
+    /// The preset ID to apply preset parameters to this configuration.
+    pub preset_id: Option<String>,
 }
diff --git a/crates/forge_config/src/reader.rs b/crates/forge_config/src/reader.rs
index 2f2ed94821..fee01835e2 100644
--- a/crates/forge_config/src/reader.rs
+++ b/crates/forge_config/src/reader.rs
@@ -186,6 +186,7 @@ mod tests {
         let expected = Some(ModelConfig {
             provider_id: Some("fake-provider".to_string()),
             model_id: Some("fake-model".to_string()),
+            preset_id: None,
         });
         assert_eq!(actual.session, expected);
     }
diff --git a/crates/forge_domain/src/env.rs b/crates/forge_domain/src/env.rs
index 1db2b2903c..db085455c0 100644
--- a/crates/forge_domain/src/env.rs
+++ b/crates/forge_domain/src/env.rs
@@ -23,6 +23,8 @@ pub struct SessionConfig {
     pub provider_id: Option<String>,
     /// The model ID to use with this provider.
     pub model_id: Option<String>,
+    /// The preset ID to apply preset parameters to this configuration.
+    pub preset_id: Option<String>,
 }
 
 /// All discrete mutations that can be applied to the application configuration.
diff --git a/crates/forge_infra/src/env.rs b/crates/forge_infra/src/env.rs
index 1e3021495a..7a8dc7b774 100644
--- a/crates/forge_infra/src/env.rs
+++ b/crates/forge_infra/src/env.rs
@@ -17,6 +17,7 @@ fn to_session_config(mc: &ModelConfig) -> SessionConfig {
     SessionConfig {
         provider_id: mc.provider_id.clone(),
         model_id: mc.model_id.clone(),
+        preset_id: mc.preset_id.clone(),
     }
 }
 
@@ -349,14 +350,17 @@ fn to_forge_config(env: &Environment) -> ForgeConfig {
     fc.session = env.session.as_ref().map(|sc| ModelConfig {
         provider_id: sc.provider_id.clone(),
         model_id: sc.model_id.clone(),
+        preset_id: sc.preset_id.clone(),
     });
     fc.commit = env.commit.as_ref().map(|sc| ModelConfig {
         provider_id: sc.provider_id.clone(),
         model_id: sc.model_id.clone(),
+        preset_id: sc.preset_id.clone(),
     });
     fc.suggest = env.suggest.as_ref().map(|sc| ModelConfig {
         provider_id: sc.provider_id.clone(),
         model_id: sc.model_id.clone(),
+        preset_id: sc.preset_id.clone(),
     });
     fc
 }
diff --git a/forge.schema.json b/forge.schema.json
index c768fbb640..e705d39d56 100644
--- a/forge.schema.json
+++ b/forge.schema.json
@@ -513,6 +513,13 @@
             "null"
           ]
         },
+        "preset_id": {
+          "description": "The preset ID to apply preset parameters to this configuration.",
+          "type": [
+            "string",
+            "null"
+          ]
+        },
         "provider_id": {
           "description": "The provider to use for this operation.",
           "type": [