diff --git a/crates/forge_config/.forge.toml b/crates/forge_config/.forge.toml
index fa2331e690..601d1c3589 100644
--- a/crates/forge_config/.forge.toml
+++ b/crates/forge_config/.forge.toml
@@ -15,7 +15,6 @@ max_sem_search_results = 100
 max_stdout_line_chars = 500
 max_stdout_prefix_lines = 100
 max_stdout_suffix_lines = 100
-max_tokens = 20480
 max_tool_failure_per_turn = 3
 model_cache_ttl_secs = 604800
 restricted = false
@@ -23,6 +22,10 @@ sem_search_top_k = 10
 services_url = "https://api.forgecode.dev/"
 tool_supported = true
 tool_timeout_secs = 300
+
+[[presets]]
+id = "default"
+max_tokens = 20480
 top_k = 30
 top_p = 0.8
 
diff --git a/crates/forge_config/src/config.rs b/crates/forge_config/src/config.rs
index 404e0863eb..6af56c001c 100644
--- a/crates/forge_config/src/config.rs
+++ b/crates/forge_config/src/config.rs
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
 
 use crate::reader::ConfigReader;
 use crate::writer::ConfigWriter;
-use crate::{AutoDumpFormat, Compact, HttpConfig, ModelConfig, RetryConfig, Update};
+use crate::{AutoDumpFormat, Compact, HttpConfig, ModelConfig, PresetConfig, RetryConfig, Update};
 
 /// Top-level Forge configuration merged from all sources (defaults, file,
 /// environment).
@@ -82,25 +82,10 @@ pub struct ForgeConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub updates: Option<Update>,
 
-    /// Output randomness for all agents; lower values are deterministic, higher
-    /// values are creative (0.0–2.0).
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-
-    /// Nucleus sampling threshold for all agents; limits token selection to the
-    /// top cumulative probability mass (0.0–1.0).
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-
-    /// Top-k vocabulary cutoff for all agents; restricts sampling to the k
-    /// highest-probability tokens (1–1000).
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub top_k: Option<u32>,
-
-    /// Maximum tokens the model may generate per response for all agents
-    /// (1–100,000).
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub max_tokens: Option<u32>,
+    /// Named presets of LLM-specific sampling and generation parameters
+    /// applied to model configurations and agent definitions.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub presets: Vec<PresetConfig>,
 
     /// Maximum tool failures per turn before the orchestrator forces
     /// completion.
diff --git a/crates/forge_config/src/legacy.rs b/crates/forge_config/src/legacy.rs
index 7310333814..c6b7af9d7b 100644
--- a/crates/forge_config/src/legacy.rs
+++ b/crates/forge_config/src/legacy.rs
@@ -55,16 +55,24 @@ impl LegacyConfig {
     fn into_forge_config(self) -> ForgeConfig {
         let session = self.provider.as_deref().map(|provider_id| {
             let model_id = self.model.get(provider_id).cloned();
-            ModelConfig { provider_id: Some(provider_id.to_string()), model_id }
+            ModelConfig {
+                provider_id: Some(provider_id.to_string()),
+                model_id,
+                ..Default::default()
+            }
         });
 
-        let commit = self
-            .commit
-            .map(|c| ModelConfig { provider_id: c.provider, model_id: c.model });
+        let commit = self.commit.map(|c| ModelConfig {
+            provider_id: c.provider,
+            model_id: c.model,
+            ..Default::default()
+        });
 
-        let suggest = self
-            .suggest
-            .map(|s| ModelConfig { provider_id: s.provider, model_id: s.model });
+        let suggest = self.suggest.map(|s| ModelConfig {
+            provider_id: s.provider,
+            model_id: s.model,
+            ..Default::default()
+        });
 
         ForgeConfig { session, commit, suggest, ..Default::default() }
     }
diff --git a/crates/forge_config/src/lib.rs b/crates/forge_config/src/lib.rs
index b0ba37a4b3..2bd9e609af 100644
--- a/crates/forge_config/src/lib.rs
+++ b/crates/forge_config/src/lib.rs
@@ -5,6 +5,7 @@ mod error;
 mod http;
 mod legacy;
 mod model;
+mod preset;
 mod reader;
 mod retry;
 mod writer;
@@ -15,6 +16,7 @@ pub use config::*;
 pub use error::Error;
 pub use http::*;
 pub use model::*;
+pub use preset::*;
 pub use reader::*;
 pub use retry::*;
 pub use writer::*;
diff --git a/crates/forge_config/src/model.rs b/crates/forge_config/src/model.rs
index c993222700..fbec599b69 100644
--- a/crates/forge_config/src/model.rs
+++ b/crates/forge_config/src/model.rs
@@ -18,4 +18,6 @@ pub struct ModelConfig {
     pub provider_id: Option<String>,
     /// The model to use for this operation.
     pub model_id: Option<String>,
+    /// The preset ID to apply preset parameters to this configuration.
+    pub preset_id: Option<String>,
 }
diff --git a/crates/forge_config/src/preset.rs b/crates/forge_config/src/preset.rs
new file mode 100644
index 0000000000..7f199b1135
--- /dev/null
+++ b/crates/forge_config/src/preset.rs
@@ -0,0 +1,79 @@
+use derive_setters::Setters;
+use fake::Dummy;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+/// Effort level for reasoning; controls the depth of model thinking.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+#[serde(rename_all = "snake_case")]
+pub enum Effort {
+    /// Minimal reasoning; fastest and cheapest.
+    Low,
+    /// Balanced reasoning effort.
+    Medium,
+    /// Maximum reasoning depth; slowest and most expensive.
+    High,
+    /// Beyond maximum reasoning depth; highest cost and latency.
+    XHigh,
+}
+
+/// Reasoning configuration for a preset.
+/// Controls how and whether models engage extended chain-of-thought reasoning.
+#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+#[serde(rename_all = "snake_case")]
+#[setters(strip_option, into)]
+pub struct ReasoningConfig {
+    /// Effort level for reasoning; controls the depth of model thinking.
+    /// Supported by OpenRouter and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<Effort>,
+
+    /// Maximum number of tokens the model may spend on reasoning.
+    /// Supported by OpenRouter, Anthropic, and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<usize>,
+
+    /// When true, the model reasons internally but reasoning output is hidden.
+    /// Supported by OpenRouter and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exclude: Option<bool>,
+
+    /// Enables reasoning at the "medium" effort level with no exclusions.
+    /// Supported by OpenRouter, Anthropic, and the Forge provider.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub enabled: Option<bool>,
+}
+
+/// A named collection of LLM-specific sampling and generation parameters.
+/// Presets apply a consistent set of inference settings to model configurations
+/// and agent definitions.
+#[derive(Debug, Setters, Clone, PartialEq, Serialize, Deserialize, JsonSchema, Dummy)]
+#[serde(rename_all = "snake_case")]
+#[setters(strip_option, into)]
+pub struct PresetConfig {
+    /// Unique identifier for this preset.
+    pub id: String,
+
+    /// Output randomness; lower values are deterministic, higher values are
+    /// creative (0.0–2.0).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    /// Nucleus sampling threshold; limits token selection to the top
+    /// cumulative probability mass (0.0–1.0).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+
+    /// Top-k vocabulary cutoff; restricts sampling to the k
+    /// highest-probability tokens (1–1000).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_k: Option<u32>,
+
+    /// Maximum tokens the model may generate per response (1–100,000).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>,
+
+    /// Reasoning configuration; controls extended chain-of-thought thinking.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
+}
diff --git a/crates/forge_config/src/reader.rs b/crates/forge_config/src/reader.rs
index 2f2ed94821..fee01835e2 100644
--- a/crates/forge_config/src/reader.rs
+++ b/crates/forge_config/src/reader.rs
@@ -186,6 +186,7 @@ mod tests {
         let expected = Some(ModelConfig {
             provider_id: Some("fake-provider".to_string()),
             model_id: Some("fake-model".to_string()),
+            preset_id: None,
         });
         assert_eq!(actual.session, expected);
     }
diff --git a/crates/forge_domain/src/env.rs b/crates/forge_domain/src/env.rs
index 1db2b2903c..db085455c0 100644
--- a/crates/forge_domain/src/env.rs
+++ b/crates/forge_domain/src/env.rs
@@ -23,6 +23,8 @@ pub struct SessionConfig {
     pub provider_id: Option<String>,
     /// The model ID to use with this provider.
     pub model_id: Option<String>,
+    /// The preset ID to apply preset parameters to this configuration.
+    pub preset_id: Option<String>,
 }
 
 /// All discrete mutations that can be applied to the application configuration.
diff --git a/crates/forge_infra/src/env.rs b/crates/forge_infra/src/env.rs
index 04b17274ba..7a8dc7b774 100644
--- a/crates/forge_infra/src/env.rs
+++ b/crates/forge_infra/src/env.rs
@@ -3,7 +3,7 @@ use std::path::PathBuf;
 use std::sync::Arc;
 
 use forge_app::EnvironmentInfra;
-use forge_config::{ConfigReader, ForgeConfig, ModelConfig};
+use forge_config::{ConfigReader, ForgeConfig, ModelConfig, PresetConfig};
 use forge_domain::{
     AutoDumpFormat, Compact, ConfigOperation, Environment, HttpConfig, MaxTokens, ModelId,
     RetryConfig, SessionConfig, Temperature, TlsBackend, TlsVersion, TopK, TopP, Update,
@@ -17,6 +17,7 @@ fn to_session_config(mc: &ModelConfig) -> SessionConfig {
     SessionConfig {
         provider_id: mc.provider_id.clone(),
         model_id: mc.model_id.clone(),
+        preset_id: mc.preset_id.clone(),
     }
 }
 
@@ -165,10 +166,10 @@ fn to_environment(fc: ForgeConfig, cwd: PathBuf) -> Environment {
         suggest: fc.suggest.as_ref().map(to_session_config),
         is_restricted: fc.restricted,
         tool_supported: fc.tool_supported,
-        temperature: fc.temperature.and_then(|v| Temperature::new(v).ok()),
-        top_p: fc.top_p.and_then(|v| TopP::new(v).ok()),
-        top_k: fc.top_k.and_then(|v| TopK::new(v).ok()),
-        max_tokens: fc.max_tokens.and_then(|v| MaxTokens::new(v).ok()),
+        temperature: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.temperature).and_then(|v| Temperature::new(v).ok()),
+        top_p: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.top_p).and_then(|v| TopP::new(v).ok()),
+        top_k: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.top_k).and_then(|v| TopK::new(v).ok()),
+        max_tokens: fc.presets.iter().find(|p| p.id == "default").and_then(|p| p.max_tokens).and_then(|v| MaxTokens::new(v).ok()),
         max_tool_failure_per_turn: fc.max_tool_failure_per_turn,
         max_requests_per_turn: fc.max_requests_per_turn,
         compact: fc.compact.map(to_compact),
@@ -324,10 +325,22 @@ fn to_forge_config(env: &Environment) -> ForgeConfig {
     fc.tool_supported = env.tool_supported;
 
     // --- Workflow fields ---
-    fc.temperature = env.temperature.map(|t| t.value());
-    fc.top_p = env.top_p.map(|t| t.value());
-    fc.top_k = env.top_k.map(|t| t.value());
-    fc.max_tokens = env.max_tokens.map(|t| t.value());
+    let forge_default = PresetConfig {
+        id: "default".to_string(),
+        temperature: env.temperature.map(|t| t.value()),
+        top_p: env.top_p.map(|t| t.value()),
+        top_k: env.top_k.map(|t| t.value()),
+        max_tokens: env.max_tokens.map(|t| t.value()),
+        reasoning: None,
+    };
+    if forge_default.temperature.is_some()
+        || forge_default.top_p.is_some()
+        || forge_default.top_k.is_some()
+        || forge_default.max_tokens.is_some()
+        || forge_default.reasoning.is_some()
+    {
+        fc.presets.push(forge_default);
+    }
     fc.max_tool_failure_per_turn = env.max_tool_failure_per_turn;
     fc.max_requests_per_turn = env.max_requests_per_turn;
     fc.compact = env.compact.as_ref().map(from_compact);
@@ -337,14 +350,17 @@ fn to_forge_config(env: &Environment) -> ForgeConfig {
     fc.session = env.session.as_ref().map(|sc| ModelConfig {
         provider_id: sc.provider_id.clone(),
         model_id: sc.model_id.clone(),
+        preset_id: sc.preset_id.clone(),
     });
     fc.commit = env.commit.as_ref().map(|sc| ModelConfig {
         provider_id: sc.provider_id.clone(),
         model_id: sc.model_id.clone(),
+        preset_id: sc.preset_id.clone(),
     });
     fc.suggest = env.suggest.as_ref().map(|sc| ModelConfig {
         provider_id: sc.provider_id.clone(),
         model_id: sc.model_id.clone(),
+        preset_id: sc.preset_id.clone(),
     });
     fc
 }
diff --git a/forge.default.yaml b/forge.default.yaml
deleted file mode 100644
index b311bbd4e8..0000000000
--- a/forge.default.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# yaml-language-server: $schema=./forge.schema.json
-variables:
-  operating_agent: Forge
-  # Define model anchors with simpler, purpose-based names
-  advanced_model: &advanced_model anthropic/claude-sonnet-4
-
-max_requests_per_turn: 100
-max_tool_failure_per_turn: 3
-top_p: 0.8
-top_k: 30
-max_tokens: 20480
-max_walker_depth: 1
-tool_supported: true
-
-# Global compact configuration applied to all agents
-compact:
-  max_tokens: 2000
-  token_threshold: 100000
-  retention_window: 6
-  message_threshold: 200
-  eviction_window: 0.2
-  on_turn_end: false
-
-updates:
-  frequency: "daily"
-  auto_update: false
-model: *advanced_model
diff --git a/forge.schema.json b/forge.schema.json
index 9d6ab4ac8f..e705d39d56 100644
--- a/forge.schema.json
+++ b/forge.schema.json
@@ -166,15 +166,6 @@
       "format": "uint",
       "minimum": 0
     },
-    "max_tokens": {
-      "description": "Maximum tokens the model may generate per response for all agents\n(1–100,000).",
-      "type": [
-        "integer",
-        "null"
-      ],
-      "format": "uint32",
-      "minimum": 0
-    },
     "max_tool_failure_per_turn": {
       "description": "Maximum tool failures per turn before the orchestrator forces\ncompletion.",
       "type": [
@@ -190,6 +181,13 @@
       "format": "uint64",
       "minimum": 0
     },
+    "presets": {
+      "description": "Named presets of LLM-specific sampling and generation parameters\napplied to model configurations and agent definitions.",
+      "type": "array",
+      "items": {
+        "$ref": "#/$defs/PresetConfig"
+      }
+    },
     "restricted": {
       "description": "Whether the application is running in restricted mode.\nWhen true, tool execution requires explicit permission grants.",
       "type": "boolean"
@@ -239,14 +237,6 @@
       ],
       "default": null
     },
-    "temperature": {
-      "description": "Output randomness for all agents; lower values are deterministic, higher\nvalues are creative (0.0–2.0).",
-      "type": [
-        "number",
-        "null"
-      ],
-      "format": "float"
-    },
     "tool_supported": {
       "description": "Whether tool use is supported in the current environment.\nWhen false, tool calls are disabled regardless of agent configuration.",
       "type": "boolean"
@@ -257,23 +247,6 @@
       "format": "uint64",
       "minimum": 0
     },
-    "top_k": {
-      "description": "Top-k vocabulary cutoff for all agents; restricts sampling to the k\nhighest-probability tokens (1–1000).",
-      "type": [
-        "integer",
-        "null"
-      ],
-      "format": "uint32",
-      "minimum": 0
-    },
-    "top_p": {
-      "description": "Nucleus sampling threshold for all agents; limits token selection to the\ntop cumulative probability mass (0.0–1.0).",
-      "type": [
-        "number",
-        "null"
-      ],
-      "format": "float"
-    },
     "updates": {
       "description": "Configuration for automatic forge updates",
       "anyOf": [
@@ -395,6 +368,31 @@
         }
       }
     },
+    "Effort": {
+      "description": "Effort level for reasoning; controls the depth of model thinking.",
+      "oneOf": [
+        {
+          "description": "Minimal reasoning; fastest and cheapest.",
+          "type": "string",
+          "const": "low"
+        },
+        {
+          "description": "Balanced reasoning effort.",
+          "type": "string",
+          "const": "medium"
+        },
+        {
+          "description": "Maximum reasoning depth; slowest and most expensive.",
+          "type": "string",
+          "const": "high"
+        },
+        {
+          "description": "Beyond maximum reasoning depth; highest cost and latency.",
+          "type": "string",
+          "const": "x_high"
+        }
+      ]
+    },
     "HttpConfig": {
       "description": "HTTP client configuration.",
       "type": "object",
@@ -515,6 +513,13 @@
             "null"
           ]
         },
+        "preset_id": {
+          "description": "The preset ID to apply preset parameters to this configuration.",
+          "type": [
+            "string",
+            "null"
+          ]
+        },
         "provider_id": {
           "description": "The provider to use for this operation.",
           "type": [
@@ -524,6 +529,104 @@
         }
       }
     },
+    "PresetConfig": {
+      "description": "A named collection of LLM-specific sampling and generation parameters.\nPresets apply a consistent set of inference settings to model configurations\nand agent definitions.",
+      "type": "object",
+      "properties": {
+        "id": {
+          "description": "Unique identifier for this preset.",
+          "type": "string"
+        },
+        "max_tokens": {
+          "description": "Maximum tokens the model may generate per response (1–100,000).",
+          "type": [
+            "integer",
+            "null"
+          ],
+          "format": "uint32",
+          "minimum": 0
+        },
+        "reasoning": {
+          "description": "Reasoning configuration; controls extended chain-of-thought thinking.",
+          "anyOf": [
+            {
+              "$ref": "#/$defs/ReasoningConfig"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "temperature": {
+          "description": "Output randomness; lower values are deterministic, higher values are\ncreative (0.0–2.0).",
+          "type": [
+            "number",
+            "null"
+          ],
+          "format": "float"
+        },
+        "top_k": {
+          "description": "Top-k vocabulary cutoff; restricts sampling to the k\nhighest-probability tokens (1–1000).",
+          "type": [
+            "integer",
+            "null"
+          ],
+          "format": "uint32",
+          "minimum": 0
+        },
+        "top_p": {
+          "description": "Nucleus sampling threshold; limits token selection to the top\ncumulative probability mass (0.0–1.0).",
+          "type": [
+            "number",
+            "null"
+          ],
+          "format": "float"
+        }
+      },
+      "required": [
+        "id"
+      ]
+    },
+    "ReasoningConfig": {
+      "description": "Reasoning configuration for a preset.\nControls how and whether models engage extended chain-of-thought reasoning.",
+      "type": "object",
+      "properties": {
+        "effort": {
+          "description": "Effort level for reasoning; controls the depth of model thinking.\nSupported by OpenRouter and the Forge provider.",
+          "anyOf": [
+            {
+              "$ref": "#/$defs/Effort"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
+        "enabled": {
+          "description": "Enables reasoning at the \"medium\" effort level with no exclusions.\nSupported by OpenRouter, Anthropic, and the Forge provider.",
+          "type": [
+            "boolean",
+            "null"
+          ]
+        },
+        "exclude": {
+          "description": "When true, the model reasons internally but reasoning output is hidden.\nSupported by OpenRouter and the Forge provider.",
+          "type": [
+            "boolean",
+            "null"
+          ]
+        },
+        "max_tokens": {
+          "description": "Maximum number of tokens the model may spend on reasoning.\nSupported by OpenRouter, Anthropic, and the Forge provider.",
+          "type": [
+            "integer",
+            "null"
+          ],
+          "format": "uint",
+          "minimum": 0
+        }
+      }
+    },
     "RetryConfig": {
       "description": "Configuration for retry mechanism.",
       "type": "object",