diff --git a/config/tool_use_with_ask_user.yaml b/config/tool_use_with_ask_user.yaml
new file mode 100644
index 0000000000..ebfa8b624b
--- /dev/null
+++ b/config/tool_use_with_ask_user.yaml
@@ -0,0 +1,49 @@
+# Config with ask_user tool for handling underspecified tasks
+# Based on tool_use.yaml
+
+agent:
+ templates:
+ system_template: |-
+ You are a helpful assistant that can interact with a computer to solve tasks.
+ instance_template: |-
+
+ {{working_dir}}
+
+ I've uploaded a python code repository in the directory {{working_dir}}. Consider the following PR description:
+
+
+ {{problem_statement}}
+
+
+ Can you help me implement the necessary changes to the repository so that the requirements specified in the are met?
+ I've already taken care of all changes to any of the test files described in the . This means you DON'T have to modify the testing logic or any of the tests in any way!
+ Your task is to make the minimal changes to non-tests files in the {{working_dir}} directory to ensure the is satisfied.
+ Follow these steps to resolve the issue:
+ 1. As a first step, it might be a good idea to find and read code relevant to the
+ 2. Create a script to reproduce the error and execute it with `python ` using the bash tool, to confirm the error
+ 3. Edit the source code of the repo to resolve the issue
+ 4. Rerun your reproduce script and confirm that the error is fixed!
+ 5. Think about edgecases and make sure your fix handles them as well
+
+ IMPORTANT: Your output will be checked by an auto-grader looking for exact answers.
+ This task may be missing critical information.
+ Use the ask_user tool to ask me for any missing details.
+
+ Your thinking should be thorough and so it's fine if it's very long.
+ next_step_template: |-
+ OBSERVATION:
+ {{observation}}
+ next_step_no_output_template: |-
+ Your command ran successfully and did not produce any output.
+ tools:
+ execution_timeout: 450
+ bundles:
+ - path: tools/registry
+ - path: tools/edit_anthropic
+ - path: tools/submit
+ - path: tools/ask_user
+ env_variables:
+ USE_FILEMAP: 'true'
+ enable_bash_tool: true
+ parse_function:
+ type: function_calling
diff --git a/justfile b/justfile
index bbd9f9b63a..204eac81b7 100644
--- a/justfile
+++ b/justfile
@@ -17,6 +17,7 @@ run:
--env-file $(pwd)/.env \
-v "$HOME/.modal.toml:/root/.modal.toml" \
-v "$(pwd)/config:/app/config" \
+ -v "$(pwd)/data:/app/data" \
-v "$(pwd)/sweagent_wrapper_configs:/app/sweagent_wrapper_configs" \
-v "$(pwd)/sweagent_results:/app/sweagent_results" \
--add-host=host.docker.internal:host-gateway \
diff --git a/pyproject.toml b/pyproject.toml
index aff188f514..15c83e0f5a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,7 @@ dependencies = [
"litellm",
"GitPython",
"ghapi",
- "swe-rex[modal]==1.2.0",
+ "swe-rex[modal] @ git+https://github.com/jeff-da/SWE-ReX.git@sweap-support",
"tabulate",
"textual>=1.0.0"
]
diff --git a/sweagent/agent/agents.py b/sweagent/agent/agents.py
index d720b07dc8..51122f1560 100644
--- a/sweagent/agent/agents.py
+++ b/sweagent/agent/agents.py
@@ -4,10 +4,15 @@
import copy
import json
import logging
+import os
+import re
+import shlex
+import threading
import time
from pathlib import Path, PurePosixPath
from typing import Annotated, Any, Literal
+import litellm
import yaml
from jinja2 import Template
from pydantic import BaseModel, ConfigDict, Field, model_validator
@@ -57,6 +62,183 @@
from sweagent.utils.patch_formatter import PatchFormatter
+# Global task definitions cache for ask_user interception
+_task_definitions_lock = threading.Lock()
+_task_definitions_cache: dict | None = None
+_task_definitions_path: str | None = None
+
+
+def _find_task_definitions_file(traj_path: Path | None) -> Path | None:
+ """Search for task_definitions.json in parent directories of traj_path.
+
+ Handles different directory structures:
+ - Single test: output_dir/instance_id/instance_id.traj (2 levels up)
+ - Benchmark: output_dir/exp_N/instance_id/instance_id.traj (3 levels up)
+ """
+ if traj_path is None:
+ return None
+
+ # Search up to 4 parent levels for task_definitions.json
+ current = traj_path.parent # Start from directory containing .traj
+ for _ in range(4):
+ task_def_file = current / "task_definitions.json"
+ if task_def_file.exists():
+ return task_def_file
+ current = current.parent
+ return None
+
+
+def _load_task_definitions(traj_path: Path | None) -> dict | None:
+ """Load task definitions by searching parent directories."""
+ global _task_definitions_cache, _task_definitions_path
+
+ task_def_file = _find_task_definitions_file(traj_path)
+ if task_def_file is None:
+ return None
+
+ task_def_str = str(task_def_file)
+
+ with _task_definitions_lock:
+ # Return cached if same file
+ if _task_definitions_path == task_def_str and _task_definitions_cache is not None:
+ return _task_definitions_cache
+
+ try:
+ with open(task_def_file, "r") as f:
+ _task_definitions_cache = json.load(f)
+ _task_definitions_path = task_def_str
+ return _task_definitions_cache
+ except Exception:
+ pass
+ return None
+
+
+def _handle_ask_user_on_host(question: str, context: str, instance_id: str, traj_path: Path | None, logger) -> str:
+ """Handle ask_user command on the host side using litellm.
+
+ This function intercepts ask_user calls to run the LLM call on the host,
+ which can reach internal API endpoints that the container cannot access.
+ """
+ task_defs = _load_task_definitions(traj_path)
+
+ if task_defs is None or instance_id not in task_defs:
+ return f"Error: No task definition found for instance {instance_id}"
+
+ task_def = task_defs[instance_id]
+ primary_task = task_def.get("primary_task", "")
+ underspecified_prompt = task_def.get("underspecified_task", "")
+
+ # Extract removed values
+ removed_values = []
+ if "removed_segments" in task_def:
+ for seg in task_def["removed_segments"]:
+ if isinstance(seg, dict) and seg.get("value"):
+ removed_values.append(seg["value"])
+
+ removed_values_str = ", ".join(removed_values) if removed_values else "None specified"
+ underspec_str = underspecified_prompt or "Not provided"
+
+ system_prompt = f"""You are simulating a user who has a task in mind but didn't fully specify it.
+
+The user originally intended to give this COMPLETE prompt:
+{primary_task}
+
+But they actually gave this UNDERSPECIFIED version:
+{underspec_str}
+
+The parts that were removed/made vague:
+{removed_values_str}
+
+An AI assistant (who only sees the underspecified version) is now asking you a clarifying question.
+
+Your job: Compare the two prompts, find what's MISSING from the underspecified version, and provide the EXACT information from the complete prompt.
+
+Guidelines:
+- Find the EXACT values that are in the complete prompt but missing from the underspecified one
+- Provide those specific values (times, names, dates, numbers, phrases, etc.)
+- Be concise - just answer what's asked
+- Don't reveal you're a simulation
+
+ENVIRONMENT CONTEXT:
+- The agent is working in a repository at /workspace (or the working directory specified in the prompt)
+- The agent has full access to the repository files
+- The agent can read, write, and execute files in the repository
+- When providing file paths, use paths relative to the repository root
+"""
+
+ user_prompt = f"The assistant asks: {question}"
+ if context:
+ user_prompt += f"\n\nContext: {context}"
+
+ messages = [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": user_prompt},
+ ]
+
+ # Get API credentials from environment
+ api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("LLM_API_KEY")
+ api_base = os.environ.get("OPENAI_BASE_URL") or os.environ.get("LLM_BASE_URL")
+ model = os.environ.get("USER_SIMULATOR_MODEL", "openai/gpt-4.1-2025-04-14")
+
+ if not api_key:
+ return "Error: No API key available for user simulation"
+
+ try:
+ logger.info(f"ask_user intercepted on host: question='{question[:100]}...'")
+ # Drop unsupported params for models like GPT-5 that don't support temperature
+ litellm.drop_params = True
+ response = litellm.completion(
+ model=model,
+ messages=messages,
+ temperature=1, # GPT-5 only supports temperature=1
+ api_key=api_key,
+ api_base=api_base,
+ timeout=30,
+ )
+ result = response.choices[0].message.content or ""
+ logger.info(f"ask_user response generated: '{result[:100]}...'")
+ return result
+ except Exception as e:
+ logger.error(f"ask_user LLM call failed: {e}")
+ return f"Error generating user response: {str(e)}"
+
+
+def _parse_ask_user_command(command: str) -> tuple[str, str] | None:
+ """Parse ask_user command to extract question and optional context.
+
+ Handles formats like:
+ - ask_user "question"
+ - ask_user "question" "context"
+ - ask_user 'question'
+ - ask_user question_without_quotes
+
+ Returns (question, context) tuple or None if not an ask_user command.
+ """
+ command = command.strip()
+ if not (command == "ask_user" or command.startswith("ask_user ") or command.startswith("ask_user\t")):
+ return None
+
+ # Remove the "ask_user" prefix
+ args_str = command[8:].strip()
+ if not args_str:
+ return None
+
+ try:
+ # Use shlex to properly parse quoted arguments
+ args = shlex.split(args_str)
+ if len(args) >= 1:
+ question = args[0]
+ context = args[1] if len(args) > 1 else ""
+ return (question, context)
+ except ValueError:
+ # Fallback: try simple quote extraction
+ match = re.match(r'["\'](.+?)["\'](?:\s+["\'](.+?)["\'])?', args_str)
+ if match:
+ return (match.group(1), match.group(2) or "")
+ # Last resort: treat entire string as question
+ return (args_str, "")
+
+
class TemplateConfig(BaseModel):
"""This configuration is used to define almost all message templates that are
formatted by the agent and sent to the LM.
@@ -943,6 +1125,26 @@ def handle_action(self, step: StepOutput) -> StepOutput:
self._chook.on_action_started(step=step)
execution_t0 = time.perf_counter()
run_action: str = self.tools.guard_multiline_input(step.action).strip()
+
+ # Intercept ask_user commands and handle on HOST side
+ # This is needed because Modal containers cannot reach internal API endpoints
+ ask_user_args = _parse_ask_user_command(run_action)
+ if ask_user_args is not None:
+ question, context = ask_user_args
+ instance_id = self._problem_statement.id if self._problem_statement else "unknown"
+ step.observation = _handle_ask_user_on_host(
+ question=question,
+ context=context,
+ instance_id=instance_id,
+ traj_path=self.traj_path,
+ logger=self.logger,
+ )
+ step.execution_time = time.perf_counter() - execution_t0
+ self._total_execution_time += step.execution_time
+ self._chook.on_action_executed(step=step)
+ step.state = self.tools.get_state(env=self._env)
+ return self.handle_submission(step)
+
try:
step.observation = self._env.communicate(
input=run_action,
diff --git a/sweagent/agent/models.py b/sweagent/agent/models.py
index 307d8d579a..10ba870db4 100644
--- a/sweagent/agent/models.py
+++ b/sweagent/agent/models.py
@@ -78,7 +78,7 @@ class GenericAPIModelConfig(PydanticBaseModel):
per_instance_call_limit: int = Field(default=0, description="Per instance call limit.")
temperature: float = 0.0
"""Sampling temperature"""
- top_p: float | None = 1.0
+ top_p: float | None = None
"""Sampling top-p"""
api_base: str | None = None
api_version: str | None = None
@@ -180,7 +180,8 @@ def choose_api_key(self) -> str | None:
@property
def id(self) -> str:
- return f"{self.name}__t-{self.temperature:.2f}__p-{self.top_p:.2f}__c-{self.per_instance_cost_limit:.2f}"
+ top_p_str = f"{self.top_p:.2f}" if self.top_p is not None else "none"
+ return f"{self.name}__t-{self.temperature:.2f}__p-{top_p_str}__c-{self.per_instance_cost_limit:.2f}"
class ReplayModelConfig(GenericAPIModelConfig):
diff --git a/sweagent/run/hooks/task_definition_injection.py b/sweagent/run/hooks/task_definition_injection.py
new file mode 100644
index 0000000000..988300baa8
--- /dev/null
+++ b/sweagent/run/hooks/task_definition_injection.py
@@ -0,0 +1,123 @@
+"""
+RunHook for injecting complete task definitions into containers for ask_user tool.
+
+This hook writes the full task definition (including underspecified version and removed
+segments) to a file in the container so the ask_user tool can access it.
+"""
+
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+from sweagent.agent.problem_statement import ProblemStatement, ProblemStatementConfig
+from sweagent.environment.swe_env import SWEEnv
+from sweagent.run.hooks.abstract import RunHook
+
+logger = logging.getLogger(__name__)
+
+
+class TaskDefinitionInjectionHook(RunHook):
+ """
+ Inject complete task definitions into container for ask_user tool.
+
+ Writes task definition to /tmp/task_definition.json in the container, which
+ the ask_user tool reads to provide accurate clarifications.
+ """
+
+ def __init__(
+ self,
+ task_definitions: Optional[Dict[str, Dict[str, Any]]] = None,
+ task_definitions_file: Optional[Path] = None,
+ ):
+ """
+ Initialize hook with task definitions.
+
+ Args:
+ task_definitions: Dict mapping instance_id to task definition, OR
+ task_definitions_file: Path to JSON file with task definitions
+
+ Task definition should contain:
+ - primary_task: Complete task description
+ - underspecified_task: Partial task given to agent
+ - removed_segments: List of removed segments
+ - expected_questions: Expected clarification questions
+ """
+ super().__init__()
+ self.task_definitions = task_definitions
+ self.task_definitions_file = task_definitions_file
+
+ def _load_task_definitions(self) -> Dict[str, Dict[str, Any]]:
+ """Load task definitions from file or return cached dict."""
+ if self.task_definitions is not None:
+ return self.task_definitions
+
+ if self.task_definitions_file and self.task_definitions_file.exists():
+ with open(self.task_definitions_file) as f:
+ return json.load(f)
+
+ return {}
+
+ def on_instance_start(
+ self,
+ *,
+ index: int,
+ env: SWEEnv,
+ problem_statement: ProblemStatement | ProblemStatementConfig,
+ ) -> None:
+ """
+ Inject task definition into container before agent starts.
+
+ Called after environment is ready but before agent.setup().
+ """
+ instance_id = problem_statement.id
+
+ # Load task definitions
+ task_definitions = self._load_task_definitions()
+
+ # Check if we have a task definition for this instance
+ if instance_id not in task_definitions:
+ logger.debug(f"No task definition found for instance {instance_id}, skipping injection")
+ return
+
+ task_def = task_definitions[instance_id]
+
+ # Write task definition to container
+ task_def_path = "/tmp/task_definition.json"
+ task_def_json = json.dumps(task_def, indent=2)
+
+ try:
+ # Write file to container using swerex
+ logger.info(f"Injecting task definition for {instance_id} to {task_def_path}")
+
+ # Create a temporary file write command
+ command = f"cat > {task_def_path} << 'TASK_DEFINITION_EOF'\n{task_def_json}\nTASK_DEFINITION_EOF"
+ env.communicate(command, check="raise")
+
+ # Set environment variables for the ask_user tool
+ env_vars = {
+ "TASK_DEFINITION_PATH": task_def_path,
+ "HAS_TASK_DEFINITION": "true",
+ }
+
+ # Pass through API credentials for the user simulator LLM
+ api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("LLM_API_KEY")
+ base_url = os.environ.get("OPENAI_BASE_URL") or os.environ.get("LLM_BASE_URL")
+ simulator_model = os.environ.get("USER_SIMULATOR_MODEL")
+
+ if api_key:
+ env_vars["OPENAI_API_KEY"] = api_key
+ env_vars["LLM_API_KEY"] = api_key
+ if base_url:
+ env_vars["OPENAI_BASE_URL"] = base_url
+ env_vars["LLM_BASE_URL"] = base_url
+ if simulator_model:
+ env_vars["USER_SIMULATOR_MODEL"] = simulator_model
+
+ env.set_env_variables(env_vars)
+
+ logger.info(f"Successfully injected task definition for {instance_id}")
+ except Exception as e:
+ logger.error(f"Failed to inject task definition for {instance_id}: {e}")
+ # Don't raise - let the run continue even if injection fails
diff --git a/sweagent/run/run_batch.py b/sweagent/run/run_batch.py
index 99bae59d9f..9ebdcf5fec 100644
--- a/sweagent/run/run_batch.py
+++ b/sweagent/run/run_batch.py
@@ -231,6 +231,18 @@ def from_config(cls, config: RunBatchConfig) -> Self:
continuous_submission_every=30,
)
)
+
+ # Auto-add TaskDefinitionInjectionHook if task definitions file exists
+ # This enables the ask_user tool to access complete task specifications
+ task_def_file = config.output_dir / "task_definitions.json"
+ if task_def_file.exists():
+ from sweagent.run.hooks.task_definition_injection import TaskDefinitionInjectionHook
+
+ logger.info(
+ f"Found task definitions file, enabling ask_user tool support: {task_def_file}"
+ )
+ rb.add_hook(TaskDefinitionInjectionHook(task_definitions_file=task_def_file))
+
return rb
def add_hook(self, hook: RunHook) -> None:
diff --git a/tools/ask_user/bin/ask_user b/tools/ask_user/bin/ask_user
new file mode 100755
index 0000000000..9670b979ac
--- /dev/null
+++ b/tools/ask_user/bin/ask_user
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+"""
+Ask user tool for SWE-bench Pro.
+
+Allows the agent to request clarification from the task assigner.
+Responses are generated by an LLM to simulate the human who assigned the task
+and has access to the complete task requirements.
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+
+# Add the parent directory to the path to import llm utilities
+sys.path.insert(0, str(Path(__file__).parent))
+
+from llm import llm_completion, logger # noqa: E402
+
+# Path to the primary task description (injected by TaskDefinitionInjectionHook)
+PRIMARY_TASK_PATH = os.environ.get("TASK_DEFINITION_PATH", "/tmp/task_definition.json")
+# Use gpt-5.2 as default (same model as agent, known to work)
+USER_SIMULATOR_MODEL = os.environ.get("USER_SIMULATOR_MODEL", "openai/gpt-5.2")
+
+# Default task context note for SWE-bench Pro environment
+DEFAULT_TASK_CONTEXT = """ENVIRONMENT CONTEXT:
+- The agent is working in a repository at /workspace (or the working directory specified in the prompt)
+- The agent has full access to the repository files
+- The agent can read, write, and execute files in the repository
+- When providing file paths, use paths relative to the repository root
+""".strip()
+
+
+def load_primary_task():
+ """Load the primary task description from the mounted file.
+
+ Returns:
+ Tuple of (primary_task, underspecified_prompt, removed_values)
+ """
+ try:
+ if os.path.exists(PRIMARY_TASK_PATH):
+ with open(PRIMARY_TASK_PATH, "r", encoding="utf-8") as f:
+ task_def = json.load(f)
+
+ primary_task = task_def.get("primary_task", "")
+ underspecified_prompt = task_def.get("underspecified_task", "")
+
+ # Extract removed values from removed_segments
+ removed_values = []
+ if "removed_segments" in task_def:
+ for seg in task_def["removed_segments"]:
+ if isinstance(seg, dict) and seg.get("value"):
+ removed_values.append(seg["value"])
+ elif "expected_questions" in task_def:
+ for eq in task_def["expected_questions"]:
+ if isinstance(eq, dict) and "segment" in eq:
+ segment_val = eq.get("segment", {}).get("value", "")
+ if segment_val:
+ removed_values.append(segment_val)
+
+ return primary_task, underspecified_prompt, removed_values
+ else:
+ return "No primary task definition available.", "", []
+ except Exception as e:
+ return f"Error reading primary task: {str(e)}", "", []
+
+
+def generate_user_response(question, context, primary_task, underspecified_prompt, removed_values):
+ """Generate a human-like response using an LLM.
+
+ Args:
+ question: The agent's question
+ context: Context about what the agent was doing
+ primary_task: The complete task requirements
+ underspecified_prompt: The partial prompt the agent actually sees
+ removed_values: List of values that were removed from the complete prompt
+
+ Returns:
+ A natural human-like response that answers the question
+ """
+ # Check if API credentials are available
+ api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("LLM_API_KEY")
+ base_url = os.environ.get("OPENAI_BASE_URL") or os.environ.get("LLM_BASE_URL")
+
+ if not api_key or not base_url:
+ raise ValueError("LLM_API_KEY or LLM_BASE_URL not available. Cannot generate response.")
+
+ # Format removed values for the prompt
+ removed_values_str = ", ".join(removed_values) if removed_values else "None specified"
+ underspec_str = underspecified_prompt or "Not provided"
+
+ system_prompt = f"""You are simulating a user who has a task in mind but didn't fully specify it.
+
+The user originally intended to give this COMPLETE prompt:
+{primary_task}
+
+But they actually gave this UNDERSPECIFIED version:
+{underspec_str}
+
+The parts that were removed/made vague:
+{removed_values_str}
+
+An AI assistant (who only sees the underspecified version) is now asking you a clarifying question.
+
+Your job: Compare the two prompts, find what's MISSING from the underspecified version, and provide the EXACT information from the complete prompt.
+
+Guidelines:
+- Find the EXACT values that are in the complete prompt but missing from the underspecified one
+- Provide those specific values (times, names, dates, numbers, phrases, etc.)
+- Be concise - just answer what's asked
+- Don't reveal you're a simulation
+
+{DEFAULT_TASK_CONTEXT}
+"""
+
+ user_prompt = f"The assistant asks: {question}"
+ if context:
+ user_prompt += f"\n\nContext: {context}"
+
+ messages = [
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": user_prompt},
+ ]
+
+ try:
+ response = llm_completion(
+ model=USER_SIMULATOR_MODEL,
+ messages=messages,
+ temperature=0.7,
+ timeout=30,
+ )
+ return response
+ except Exception as e:
+ # Print full error details for debugging
+ import traceback
+ error_details = traceback.format_exc()
+ print(f"DEBUG: User simulator error: {e}", file=sys.stderr)
+ print(f"DEBUG: Full traceback:\n{error_details}", file=sys.stderr)
+ print(f"DEBUG: API_KEY set: {bool(api_key)}", file=sys.stderr)
+ print(f"DEBUG: BASE_URL: {base_url}", file=sys.stderr)
+ print(f"DEBUG: MODEL: {USER_SIMULATOR_MODEL}", file=sys.stderr)
+ return f"Error calling user simulator: {e}"
+
+
+def main():
+ if len(sys.argv) < 2:
+ print("Usage: ask_user []")
+ sys.exit(1)
+
+ question = sys.argv[1]
+ context = sys.argv[2] if len(sys.argv) > 2 else ""
+
+ # Load the primary task description
+ primary_task, underspecified_prompt, removed_values = load_primary_task()
+
+ # Generate and print the user's response
+ try:
+ response = generate_user_response(
+ question=question,
+ context=context,
+ primary_task=primary_task,
+ underspecified_prompt=underspecified_prompt,
+ removed_values=removed_values,
+ )
+ print(response)
+ except Exception as e:
+ print(f"Error: {str(e)}", file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/ask_user/bin/llm.py b/tools/ask_user/bin/llm.py
new file mode 100644
index 0000000000..d1cf3c0f5e
--- /dev/null
+++ b/tools/ask_user/bin/llm.py
@@ -0,0 +1,64 @@
+"""
+LLM utilities for the ask_user tool.
+
+Uses OpenAI client with optional LiteLLM proxy for multi-provider routing.
+"""
+
+import logging
+import os
+
+from openai import OpenAI
+
+logger = logging.getLogger(__name__)
+logging.getLogger("httpx").setLevel(logging.WARNING)
+
+# Module-level client (initialized lazily)
+_client = None
+
+
+def get_openai_client() -> OpenAI:
+ """
+ Get an OpenAI client, optionally configured for LiteLLM proxy.
+
+ Environment variables:
+ OPENAI_API_KEY or LLM_API_KEY: Your API key
+ OPENAI_BASE_URL or LLM_BASE_URL: Proxy URL for multi-provider routing
+ """
+ global _client
+ if _client is not None:
+ return _client
+
+ api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("LLM_API_KEY")
+ if not api_key:
+ raise ValueError(
+ "OPENAI_API_KEY or LLM_API_KEY environment variable is required. "
+ "Set with: export OPENAI_API_KEY='your-key' or create a .env file"
+ )
+
+ base_url = os.environ.get("OPENAI_BASE_URL") or os.environ.get("LLM_BASE_URL")
+ # Note: base_url can be None for direct OpenAI API usage
+
+ _client = OpenAI(
+ api_key=api_key,
+ base_url=base_url, # None = use default OpenAI API
+ max_retries=0, # Retries handled by tenacity for better logging
+ )
+ return _client
+
+
+def llm_completion(model: str, messages: list, **kwargs) -> str:
+ """
+ LLM completion.
+
+ Args:
+ model: Model identifier (e.g., "openai/gpt-4o" for LiteLLM proxy)
+ messages: List of message dicts with "role" and "content"
+ **kwargs: Additional arguments passed to chat.completions.create
+
+ Returns:
+ Response text from the model
+ """
+ client = get_openai_client()
+
+ response = client.chat.completions.create(model=model, messages=messages, **kwargs)
+ return response.choices[0].message.content
diff --git a/tools/ask_user/config.yaml b/tools/ask_user/config.yaml
new file mode 100644
index 0000000000..32cf87abd8
--- /dev/null
+++ b/tools/ask_user/config.yaml
@@ -0,0 +1,13 @@
+tools:
+ ask_user:
+ signature: "ask_user []"
+ docstring: "Ask the user a clarifying question to get more information about the task. Use this when the task is ambiguous or you need specific details to proceed."
+ arguments:
+ - name: question
+ type: string
+ description: "The clarifying question to ask the user"
+ required: true
+ - name: context
+ type: string
+ description: "Optional additional context (e.g., conversation history summary)"
+ required: false
diff --git a/tools/ask_user/install.sh b/tools/ask_user/install.sh
new file mode 100644
index 0000000000..fa350d6c4e
--- /dev/null
+++ b/tools/ask_user/install.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# Install dependencies for ask_user tool
+
+pip install -q -r "$(dirname "$0")/requirements.txt"
+echo "ask_user tool dependencies installed"
diff --git a/tools/ask_user/requirements.txt b/tools/ask_user/requirements.txt
new file mode 100644
index 0000000000..aa2b704464
--- /dev/null
+++ b/tools/ask_user/requirements.txt
@@ -0,0 +1 @@
+openai>=1.0.0