diff --git a/.gitignore b/.gitignore
index a59cdee..5f5dc85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,4 +57,4 @@ test_debug_*.py
 test_performance_*.py
 test_user_*.py
 test_new_*.py
-test_roocode_compatibility.py
\ No newline at end of file
+test_roocode_compatibility.py.hypothesis/
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..0dcf105
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,6 @@
+{
+  "name": "claude-code-openai-wrapper",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {}
+}
diff --git a/poetry.lock b/poetry.lock
index 03d8e92..66d585e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand.
 
 [[package]]
 name = "annotated-types"
@@ -12,6 +12,33 @@ files = [
     {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
 ]
 
+[[package]]
+name = "anthropic"
+version = "0.79.0"
+description = "The official Python library for the anthropic API"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "anthropic-0.79.0-py3-none-any.whl", hash = "sha256:04cbd473b6bbda4ca2e41dd670fe2f829a911530f01697d0a1e37321eb75f3cf"},
+    {file = "anthropic-0.79.0.tar.gz", hash = "sha256:8707aafb3b1176ed6c13e2b1c9fb3efddce90d17aee5d8b83a86c70dcdcca871"},
+]
+
+[package.dependencies]
+anyio = ">=3.5.0,<5"
+distro = ">=1.7.0,<2"
+docstring-parser = ">=0.15,<1"
+httpx = ">=0.25.0,<1"
+jiter = ">=0.4.0,<1"
+pydantic = ">=1.9.0,<3"
+sniffio = "*"
+typing-extensions = ">=4.10,<5"
+
+[package.extras]
+aiohttp = ["aiohttp", "httpx-aiohttp (>=0.1.9)"]
+bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"]
+vertex = ["google-auth[requests] (>=2,<3)"]
+
 [[package]]
 name = "anyio"
 version = "4.9.0"
@@ -665,12 +692,29 @@ version = "1.9.0"
 description = "Distro - an OS platform information API"
 optional = false
 python-versions = ">=3.6"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"},
     {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"},
 ]
 
+[[package]]
+name = "docstring-parser"
+version = "0.17.0"
+description = "Parse Python docstrings in reST, Google and Numpydoc format"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708"},
+    {file = "docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912"},
+]
+
+[package.extras]
+dev = ["pre-commit (>=2.16.0) ; python_version >= \"3.9\"", "pydoctor (>=25.4.0)", "pytest"]
+docs = ["pydoctor (>=25.4.0)"]
+test = ["pytest"]
+
 [[package]]
 name = "dparse"
 version = "0.6.4"
@@ -958,7 +1002,7 @@ version = "0.10.0"
 description = "Fast iterable JSON parser."
 optional = false
 python-versions = ">=3.9"
-groups = ["dev"]
+groups = ["main", "dev"]
 files = [
     {file = "jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303"},
     {file = "jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e"},
@@ -1065,7 +1109,7 @@ files = [
 
 [package.dependencies]
 attrs = ">=22.2.0"
-jsonschema-specifications = ">=2023.03.6"
+jsonschema-specifications = ">=2023.3.6"
 referencing = ">=0.28.4"
 rpds-py = ">=0.7.1"
 
@@ -3053,4 +3097,4 @@ files = [
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "995cbb6b6bfbf14612eff7e0690ca47fc7b0c01fd2ef3351dea01d6940be0ed6"
+content-hash = "a8afd4e405e3cc48eb3448318558896957c3719239b8839606eb855788e97d11"
diff --git a/pyproject.toml b/pyproject.toml
index e0cc381..49877ac 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ sse-starlette = "^2.1.3"
 python-multipart = "^0.0.18"
 claude-agent-sdk = "^0.1.18"
 slowapi = "^0.1.9"
+anthropic = "^0.79.0"
 
 [tool.poetry.group.dev.dependencies]
 black = "^24.0.0"
diff --git a/src/claude_cli.py b/src/claude_cli.py
index d87057e..0d63331 100644
--- a/src/claude_cli.py
+++ b/src/claude_cli.py
@@ -104,6 +104,7 @@ async def run_completion(
         session_id: Optional[str] = None,
         continue_session: bool = False,
         permission_mode: Optional[str] = None,
+        mcp_servers: Optional[Dict[str, Any]] = None,
     ) -> AsyncGenerator[Dict[str, Any], None]:
         """Run Claude Agent using the Python SDK and yield response chunks."""
 
@@ -123,13 +124,14 @@ async def run_completion(
                 if model:
                     options.model = model
 
-                # Set system prompt - CLAUDE AGENT SDK STRUCTURED FORMAT
-                # Use structured format as per SDK documentation
+                # Set system prompt
+                # SDK's _build_command only handles: None, str, or {"type": "preset", "append": "..."}
+                # Passing a plain string ensures --system-prompt flag is added to CLI command
                 if system_prompt:
-                    options.system_prompt = {"type": "text", "text": system_prompt}
+                    options.system_prompt = system_prompt
                 else:
-                    # Use Claude Code preset to maintain expected behavior
-                    options.system_prompt = {"type": "preset", "preset": "claude_code"}
+                    # No custom prompt - let Claude Code use its default behavior
+                    options.system_prompt = None
 
                 # Set tool restrictions
                 if allowed_tools:
@@ -141,6 +143,10 @@ async def run_completion(
                 if permission_mode:
                     options.permission_mode = permission_mode
 
+                # Set MCP servers (for OpenClaw tool bridge)
+                if mcp_servers:
+                    options.mcp_servers = mcp_servers
+
                 # Handle session continuity
                 if continue_session:
                     options.continue_session = True
diff --git a/src/constants.py b/src/constants.py
index 5fb452b..b2018f4 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -58,6 +58,21 @@ async def chat_endpoint(): ...
     "Edit",
 ]
 
+# Tools to allow in passthrough mode (agent frameworks like OpenClaw)
+# Broader set including network tools needed for agent functionality
+PASSTHROUGH_ALLOWED_TOOLS = [
+    "Read",
+    "Glob",
+    "Grep",
+    "Bash",
+    "Write",
+    "Edit",
+    "WebFetch",
+    "WebSearch",
+    "NotebookEdit",
+    "Task",
+]
+
 # Tools to disallow by default (potentially dangerous or slow)
 DEFAULT_DISALLOWED_TOOLS = [
     "Task",  # Can spawn sub-agents
@@ -69,26 +84,21 @@ async def chat_endpoint(): ...
 # Models supported by Claude Agent SDK (as of November 2025)
 # NOTE: Claude Agent SDK only supports Claude 4+ models, not Claude 3.x
 CLAUDE_MODELS = [
-    # Claude 4.5 Family (Latest - Fall 2025) - RECOMMENDED
-    "claude-opus-4-5-20250929",  # Latest Opus 4.5 - Most capable
-    "claude-sonnet-4-5-20250929",  # Recommended - best coding model
+    # Claude 4.6 (Latest - February 2026)
+    "claude-opus-4-6",  # Latest Opus 4.6 - Most capable
+    # Claude 4.5 Family (Fall 2025)
+    "claude-sonnet-4-5-20250929",  # Best coding model
     "claude-haiku-4-5-20251001",  # Fast & cheap
     # Claude 4.1
-    "claude-opus-4-1-20250805",  # Upgraded Opus 4
+    "claude-opus-4-1-20250805",
     # Claude 4.0 Family (Original - May 2025)
     "claude-opus-4-20250514",
     "claude-sonnet-4-20250514",
-    # Claude 3.x Family - NOT SUPPORTED by Claude Agent SDK
-    # These models work with Anthropic API but NOT with Claude Code
-    # Uncomment only if using direct Anthropic API (not Claude Agent SDK)
-    # "claude-3-7-sonnet-20250219",
-    # "claude-3-5-sonnet-20241022",
-    # "claude-3-5-haiku-20241022",
 ]
 
 # Default model (recommended for most use cases)
 # Can be overridden via DEFAULT_MODEL environment variable
-DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-sonnet-4-5-20250929")
+DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "claude-opus-4-6")
 
 # Fast model (for speed/cost optimization)
 FAST_MODEL = "claude-haiku-4-5-20251001"
diff --git a/src/main.py b/src/main.py
index 4a74aa4..2e5d937 100644
--- a/src/main.py
+++ b/src/main.py
@@ -24,6 +24,8 @@
     Message,
     Usage,
     StreamChoice,
+    FunctionCall,
+    ToolCall,
     SessionListResponse,
     ToolListResponse,
     ToolMetadataResponse,
@@ -46,12 +48,14 @@
 from src.session_manager import session_manager
 from src.tool_manager import tool_manager
 from src.mcp_client import mcp_client, MCPServerConfig
+# OpenClaw bridge available for future MCP-based tool passthrough
+# from src.openclaw_bridge import openai_tools_to_mcp_server
 from src.rate_limiter import (
     limiter,
     rate_limit_exceeded_handler,
     rate_limit_endpoint,
 )
-from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS
+from src.constants import CLAUDE_MODELS, CLAUDE_TOOLS, DEFAULT_ALLOWED_TOOLS, PASSTHROUGH_ALLOWED_TOOLS
 
 # Load environment variables
 load_dotenv()
@@ -410,10 +414,14 @@ async def generate_streaming_response(
                 system_prompt = sampling_instructions
             logger.debug(f"Added sampling instructions: {sampling_instructions}")
 
-        # Filter content for unsupported features
-        prompt = MessageAdapter.filter_content(prompt)
-        if system_prompt:
-            system_prompt = MessageAdapter.filter_content(system_prompt)
+        # Detect passthrough mode: when caller sends tools (e.g. OpenClaw agent framework)
+        passthrough_mode = bool(request.tools)
+
+        # Filter content for unsupported features (skip in passthrough mode)
+        if not passthrough_mode:
+            prompt = MessageAdapter.filter_content(prompt)
+            if system_prompt:
+                system_prompt = MessageAdapter.filter_content(system_prompt)
 
         # Get Claude Agent SDK options from request
         claude_options = request.to_claude_options()
@@ -426,23 +434,39 @@ async def generate_streaming_response(
         if claude_options.get("model"):
             ParameterValidator.validate_model(claude_options["model"])
 
-        # Handle tools - disabled by default for OpenAI compatibility
-        if not request.enable_tools:
-            # Disable all tools by using CLAUDE_TOOLS constant
-            claude_options["disallowed_tools"] = CLAUDE_TOOLS
-            claude_options["max_turns"] = 1  # Single turn for Q&A
-            logger.info("Tools disabled (default behavior for OpenAI compatibility)")
-        else:
+        # Handle tools based on mode
+        if passthrough_mode:
+            # Passthrough mode: caller sent tools (e.g. OpenClaw agent framework).
+            # Enable Claude's built-in tools so it can fulfill requests using Read, Bash, etc.
+            # The caller's tool definitions are embedded in the system prompt text by the
+            # agent framework, so Claude sees them and outputs text-based tool invocations
+            # that the framework parses on its side.
+            claude_options["allowed_tools"] = PASSTHROUGH_ALLOWED_TOOLS
+            claude_options["permission_mode"] = "bypassPermissions"
+            # Allow many turns so the model can complete complex multi-step tasks
+            claude_options["max_turns"] = 50
+            logger.info(
+                f"Passthrough mode: {len(PASSTHROUGH_ALLOWED_TOOLS)} built-in tools enabled, "
+                f"{len(request.tools)} caller tools detected (handled via system prompt)"
+            )
+        elif request.enable_tools:
             # Enable tools - use default safe subset (Read, Glob, Grep, Bash, Write, Edit)
             claude_options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS
             # Set permission mode to bypass prompts (required for API/headless usage)
             claude_options["permission_mode"] = "bypassPermissions"
             logger.info(f"Tools enabled by user request: {DEFAULT_ALLOWED_TOOLS}")
+        else:
+            # Disable all tools by using CLAUDE_TOOLS constant
+            claude_options["disallowed_tools"] = CLAUDE_TOOLS
+            claude_options["max_turns"] = 1  # Single turn for Q&A
+            logger.info("Tools disabled (default behavior for OpenAI compatibility)")
 
         # Run Claude Code
         chunks_buffer = []
         role_sent = False  # Track if we've sent the initial role chunk
         content_sent = False  # Track if we've sent any content
+        tool_calls_collected = []  # Collect tool_use blocks for passthrough
+        tool_call_index = 0  # Track tool call index for streaming deltas
 
         async for chunk in claude_cli.run_completion(
             prompt=prompt,
@@ -488,6 +512,75 @@ async def generate_streaming_response(
                 # Handle content blocks
                 if isinstance(content, list):
                     for block in content:
+                        # === TOOL CALL PASSTHROUGH ===
+                        # Detect ToolUseBlock from Claude Agent SDK and convert to
+                        # OpenAI tool_calls format for agent framework passthrough
+                        block_type = getattr(block, "type", None) or (
+                            block.get("type") if isinstance(block, dict) else None
+                        )
+
+                        if block_type == "tool_use" and passthrough_mode:
+                            # Extract tool_use data from either object or dict format
+                            if hasattr(block, "id"):
+                                tc_id = block.id
+                                tc_name = block.name
+                                tc_input = block.input
+                            else:
+                                tc_id = block.get("id", f"call_{uuid.uuid4().hex[:24]}")
+                                tc_name = block.get("name", "")
+                                tc_input = block.get("input", {})
+
+                            # Strip MCP namespace prefix if present
+                            # Claude returns "mcp__openclaw_tools__cron" but caller expects "cron"
+                            mcp_prefix = "mcp__openclaw_tools__"
+                            if tc_name.startswith(mcp_prefix):
+                                tc_name = tc_name[len(mcp_prefix):]
+
+                            # Only passthrough tool calls for external (caller) tools
+                            # Skip Claude's built-in tool calls (Read, Bash, etc.)
+                            if external_tool_names and tc_name not in external_tool_names:
+                                logger.debug(f"Skipping built-in tool call: {tc_name}")
+                                continue
+
+                            tc_args = json.dumps(tc_input) if isinstance(tc_input, dict) else str(tc_input)
+
+                            # Collect for finish_reason decision
+                            tool_calls_collected.append({
+                                "id": tc_id,
+                                "name": tc_name,
+                                "arguments": tc_args,
+                            })
+
+                            # Emit OpenAI-format tool_calls delta
+                            # First chunk: includes function name and id
+                            tc_delta = {
+                                "tool_calls": [{
+                                    "index": tool_call_index,
+                                    "id": tc_id,
+                                    "type": "function",
+                                    "function": {
+                                        "name": tc_name,
+                                        "arguments": tc_args,
+                                    },
+                                }]
+                            }
+                            tc_chunk = ChatCompletionStreamResponse(
+                                id=request_id,
+                                model=request.model,
+                                choices=[
+                                    StreamChoice(
+                                        index=0,
+                                        delta=tc_delta,
+                                        finish_reason=None,
+                                    )
+                                ],
+                            )
+                            yield f"data: {tc_chunk.model_dump_json()}\n\n"
+                            tool_call_index += 1
+                            content_sent = True
+                            logger.info(f"Passthrough tool_call emitted: {tc_name}({tc_args[:100]}...)")
+                            continue
+
                         # Handle TextBlock objects from Claude Agent SDK
                         if hasattr(block, "text"):
                             raw_text = block.text
@@ -497,8 +590,11 @@ async def generate_streaming_response(
                         else:
                             continue
 
-                        # Filter out tool usage and thinking blocks
-                        filtered_text = MessageAdapter.filter_content(raw_text)
+                        # Filter out tool usage and thinking blocks (skip in passthrough)
+                        if passthrough_mode:
+                            filtered_text = raw_text
+                        else:
+                            filtered_text = MessageAdapter.filter_content(raw_text)
 
                         if filtered_text and not filtered_text.isspace():
                             # Create streaming chunk
@@ -518,8 +614,11 @@ async def generate_streaming_response(
                             content_sent = True
 
                 elif isinstance(content, str):
-                    # Filter out tool usage and thinking blocks
-                    filtered_content = MessageAdapter.filter_content(content)
+                    # Filter out tool usage and thinking blocks (skip in passthrough)
+                    if passthrough_mode:
+                        filtered_content = content
+                    else:
+                        filtered_content = MessageAdapter.filter_content(content)
 
                     if filtered_content and not filtered_content.isspace():
                         # Create streaming chunk
@@ -551,20 +650,31 @@ async def generate_streaming_response(
             yield f"data: {initial_chunk.model_dump_json()}\n\n"
             role_sent = True
 
-        # If we sent role but no content, send a minimal response
+        # If we sent role but no content (and no tool calls), try to extract from parse_claude_message
+        # This can happen when the SDK returns only tool_use blocks with no text
         if role_sent and not content_sent:
+            # Try to extract any text from the collected chunks
+            fallback_text = None
+            if chunks_buffer:
+                fallback_text = claude_cli.parse_claude_message(chunks_buffer)
+
+            if not fallback_text:
+                fallback_text = "I completed my internal processing but didn't produce a text response. Please try rephrasing your request or asking me to explain what I found."
+                logger.warning("Fallback response triggered: SDK returned no text content blocks")
+
             fallback_chunk = ChatCompletionStreamResponse(
                 id=request_id,
                 model=request.model,
                 choices=[
                     StreamChoice(
                         index=0,
-                        delta={"content": "I'm unable to provide a response at the moment."},
+                        delta={"content": fallback_text},
                         finish_reason=None,
                     )
                 ],
             )
             yield f"data: {fallback_chunk.model_dump_json()}\n\n"
+            content_sent = True
 
         # Extract assistant response from all chunks
         assistant_content = None
@@ -589,11 +699,14 @@ async def generate_streaming_response(
             )
             logger.debug(f"Estimated usage: {usage_data}")
 
+        # Determine finish_reason: "tool_calls" if we emitted tool calls, "stop" otherwise
+        finish_reason = "tool_calls" if tool_calls_collected else "stop"
+
         # Send final chunk with finish reason and optionally usage data
         final_chunk = ChatCompletionStreamResponse(
             id=request_id,
             model=request.model,
-            choices=[StreamChoice(index=0, delta={}, finish_reason="stop")],
+            choices=[StreamChoice(index=0, delta={}, finish_reason=finish_reason)],
             usage=usage_data,
         )
         yield f"data: {final_chunk.model_dump_json()}\n\n"
@@ -672,10 +785,14 @@ async def chat_completions(
                     system_prompt = sampling_instructions
                 logger.debug(f"Added sampling instructions: {sampling_instructions}")
 
-            # Filter content
-            prompt = MessageAdapter.filter_content(prompt)
-            if system_prompt:
-                system_prompt = MessageAdapter.filter_content(system_prompt)
+            # Detect passthrough mode
+            passthrough_mode = bool(request_body.tools)
+
+            # Filter content (skip in passthrough mode)
+            if not passthrough_mode:
+                prompt = MessageAdapter.filter_content(prompt)
+                if system_prompt:
+                    system_prompt = MessageAdapter.filter_content(system_prompt)
 
             # Get Claude Agent SDK options from request
             claude_options = request_body.to_claude_options()
@@ -688,18 +805,29 @@ async def chat_completions(
             if claude_options.get("model"):
                 ParameterValidator.validate_model(claude_options["model"])
 
-            # Handle tools - disabled by default for OpenAI compatibility
-            if not request_body.enable_tools:
-                # Disable all tools by using CLAUDE_TOOLS constant
-                claude_options["disallowed_tools"] = CLAUDE_TOOLS
-                claude_options["max_turns"] = 1  # Single turn for Q&A
-                logger.info("Tools disabled (default behavior for OpenAI compatibility)")
-            else:
+            # Handle tools based on mode
+            if passthrough_mode:
+                # Passthrough mode: caller sent tools (e.g. OpenClaw agent framework).
+                # Enable Claude's built-in tools. Caller's tool definitions are in the
+                # system prompt text, so Claude outputs text-based invocations for them.
+                claude_options["allowed_tools"] = PASSTHROUGH_ALLOWED_TOOLS
+                claude_options["permission_mode"] = "bypassPermissions"
+                claude_options["max_turns"] = 50
+                logger.info(
+                    f"Passthrough mode (non-streaming): {len(PASSTHROUGH_ALLOWED_TOOLS)} built-in tools enabled, "
+                    f"{len(request_body.tools)} caller tools detected (handled via system prompt)"
+                )
+            elif request_body.enable_tools:
                 # Enable tools - use default safe subset (Read, Glob, Grep, Bash, Write, Edit)
                 claude_options["allowed_tools"] = DEFAULT_ALLOWED_TOOLS
                 # Set permission mode to bypass prompts (required for API/headless usage)
                 claude_options["permission_mode"] = "bypassPermissions"
                 logger.info(f"Tools enabled by user request: {DEFAULT_ALLOWED_TOOLS}")
+            else:
+                # Disable all tools by using CLAUDE_TOOLS constant
+                claude_options["disallowed_tools"] = CLAUDE_TOOLS
+                claude_options["max_turns"] = 1  # Single turn for Q&A
+                logger.info("Tools disabled (default behavior for OpenAI compatibility)")
 
             # Collect all chunks
             chunks = []
@@ -715,14 +843,55 @@ async def chat_completions(
             ):
                 chunks.append(chunk)
 
-            # Extract assistant message
+            # Extract assistant message text
             raw_assistant_content = claude_cli.parse_claude_message(chunks)
 
-            if not raw_assistant_content:
+            # Extract tool_use blocks from chunks for passthrough mode
+            # Note: In the current architecture, Claude's built-in tool calls are
+            # handled internally by the SDK. This extraction catches any ToolUseBlock
+            # that surfaces in the response (future MCP bridge support).
+            tool_calls_list = []
+            if passthrough_mode:
+                for chunk in chunks:
+                    content = None
+                    if chunk.get("type") == "assistant" and "message" in chunk:
+                        message = chunk["message"]
+                        if isinstance(message, dict) and "content" in message:
+                            content = message["content"]
+                    elif "content" in chunk and isinstance(chunk["content"], list):
+                        content = chunk["content"]
+
+                    if content and isinstance(content, list):
+                        for block in content:
+                            block_type = getattr(block, "type", None) or (
+                                block.get("type") if isinstance(block, dict) else None
+                            )
+                            if block_type == "tool_use":
+                                if hasattr(block, "id"):
+                                    tc_id = block.id
+                                    tc_name = block.name
+                                    tc_input = block.input
+                                else:
+                                    tc_id = block.get("id", f"call_{uuid.uuid4().hex[:24]}")
+                                    tc_name = block.get("name", "")
+                                    tc_input = block.get("input", {})
+
+                                tc_args = json.dumps(tc_input) if isinstance(tc_input, dict) else str(tc_input)
+                                tool_calls_list.append(
+                                    ToolCall(
+                                        id=tc_id,
+                                        function=FunctionCall(name=tc_name, arguments=tc_args),
+                                    )
+                                )
+
+            if not raw_assistant_content and not tool_calls_list:
                 raise HTTPException(status_code=500, detail="No response from Claude Code")
 
-            # Filter out tool usage and thinking blocks
-            assistant_content = MessageAdapter.filter_content(raw_assistant_content)
+            # Filter out tool usage and thinking blocks (skip in passthrough)
+            if passthrough_mode:
+                assistant_content = raw_assistant_content or ""
+            else:
+                assistant_content = MessageAdapter.filter_content(raw_assistant_content or "")
 
             # Add assistant response to session if using session mode
             if actual_session_id:
@@ -733,23 +902,37 @@ async def chat_completions(
             prompt_tokens = MessageAdapter.estimate_tokens(prompt)
             completion_tokens = MessageAdapter.estimate_tokens(assistant_content)
 
-            # Create response
-            response = ChatCompletionResponse(
-                id=request_id,
-                model=request_body.model,
-                choices=[
-                    Choice(
-                        index=0,
-                        message=Message(role="assistant", content=assistant_content),
-                        finish_reason="stop",
-                    )
-                ],
-                usage=Usage(
+            # Determine finish_reason
+            finish_reason = "tool_calls" if tool_calls_list else "stop"
+
+            # Build response message
+            response_message = Message(role="assistant", content=assistant_content or None)
+
+            # Create response - include tool_calls in the choice if present
+            choice_data = {
+                "index": 0,
+                "message": response_message,
+                "finish_reason": finish_reason,
+            }
+            response_dict = {
+                "id": request_id,
+                "model": request_body.model,
+                "choices": [Choice(**choice_data)],
+                "usage": Usage(
                     prompt_tokens=prompt_tokens,
                     completion_tokens=completion_tokens,
                     total_tokens=prompt_tokens + completion_tokens,
                 ),
-            )
+            }
+            response = ChatCompletionResponse(**response_dict)
+
+            # Inject tool_calls into the serialized response (bypasses Pydantic model)
+            if tool_calls_list:
+                resp_json = response.model_dump()
+                resp_json["choices"][0]["message"]["tool_calls"] = [
+                    tc.model_dump() for tc in tool_calls_list
+                ]
+                return JSONResponse(content=resp_json)
 
             return response
 
diff --git a/src/message_adapter.py b/src/message_adapter.py
index 1c9d732..90a548d 100644
--- a/src/message_adapter.py
+++ b/src/message_adapter.py
@@ -6,11 +6,18 @@
 class MessageAdapter:
     """Converts between OpenAI message format and Claude Code prompts."""
 
+    # Max prompt size in characters. Keep small to avoid slow CLI responses.
+    # ~30K chars ≈ ~7.5K tokens — enough context without multi-minute waits.
+    MAX_PROMPT_CHARS = 30_000
+
     @staticmethod
     def messages_to_prompt(messages: List[Message]) -> tuple[str, Optional[str]]:
         """
         Convert OpenAI messages to Claude Code prompt format.
         Returns (prompt, system_prompt)
+
+        Truncates older conversation history if the prompt would exceed
+        the OS command-line argument size limit (ARG_MAX).
         """
         system_prompt = None
         conversation_parts = []
@@ -24,12 +31,25 @@ def messages_to_prompt(messages: List[Message]) -> tuple[str, Optional[str]]:
             elif message.role == "assistant":
                 conversation_parts.append(f"Assistant: {message.content}")
 
-        # Join conversation parts
-        prompt = "\n\n".join(conversation_parts)
-
-        # If the last message wasn't from the user, add a prompt for assistant
+        # If the last message wasn't from the user, add a continuation prompt
         if messages and messages[-1].role != "user":
-            prompt += "\n\nHuman: Please continue."
+            conversation_parts.append("Human: Please continue.")
+
+        # Truncate from the front (oldest messages) if prompt is too large
+        prompt = "\n\n".join(conversation_parts)
+        if len(prompt) > MessageAdapter.MAX_PROMPT_CHARS and len(conversation_parts) > 1:
+            # Always keep the last message; drop oldest until it fits
+            while len(conversation_parts) > 1:
+                conversation_parts.pop(0)
+                candidate = "[Earlier conversation truncated for length]\n\n" + "\n\n".join(
+                    conversation_parts
+                )
+                if len(candidate) <= MessageAdapter.MAX_PROMPT_CHARS:
+                    prompt = candidate
+                    break
+            else:
+                # Even a single message is too long — hard-truncate it
+                prompt = conversation_parts[0][: MessageAdapter.MAX_PROMPT_CHARS]
 
         return prompt, system_prompt
 
@@ -96,6 +116,14 @@ def replace_image(match):
         if not content or content.isspace():
             return "I understand you're testing the system. How can I help you today?"
 
+        # Avoid false-positive billing error detection by downstream platforms.
+        # Some platforms rewrite responses containing "billing" + "credits"/"plans"
+        # to a billing error message. Replace with safe synonyms.
+        content = re.sub(r'\bbilling\b', 'invoicing', content, flags=re.IGNORECASE)
+        content = re.sub(r'\bBilling\b', 'Invoicing', content)
+        content = re.sub(r'\binsufficient credits\b', 'insufficient balance', content, flags=re.IGNORECASE)
+        content = re.sub(r'\bpayment required\b', 'payment needed', content, flags=re.IGNORECASE)
+
         return content
 
     @staticmethod
diff --git a/src/models.py b/src/models.py
index 82e85f4..331f843 100644
--- a/src/models.py
+++ b/src/models.py
@@ -1,5 +1,5 @@
 from typing import List, Optional, Dict, Any, Union, Literal
-from pydantic import BaseModel, Field, field_validator, model_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 from datetime import datetime
 import uuid
 import logging
@@ -17,19 +17,49 @@ def get_default_model():
 
 class ContentPart(BaseModel):
     """Content part for multimodal messages (OpenAI format)."""
+    model_config = ConfigDict(extra="ignore")
 
     type: Literal["text"]
     text: str
 
 
 class Message(BaseModel):
-    role: Literal["system", "user", "assistant"]
-    content: Union[str, List[ContentPart]]
+    model_config = ConfigDict(extra="ignore")
+
+    role: Literal["system", "user", "assistant", "developer", "tool"]
+    content: Optional[Union[str, List[ContentPart]]] = None
     name: Optional[str] = None
+    tool_calls: Optional[List[Any]] = Field(
+        default=None,
+        description="Tool calls made by the assistant (OpenAI format)",
+    )
+    tool_call_id: Optional[str] = Field(
+        default=None,
+        description="Tool call ID this message is responding to (for role=tool)",
+    )
 
     @model_validator(mode="after")
     def normalize_content(self):
         """Convert array content to string for Claude Code compatibility."""
+        # Treat 'developer' role as 'system' for Claude compatibility
+        if self.role == "developer":
+            self.role = "system"
+        # Treat 'tool' role as 'user' for Claude compatibility
+        # Include tool_call_id context so Claude knows which tool result this is
+        if self.role == "tool":
+            tool_result_prefix = ""
+            if self.tool_call_id:
+                tool_result_prefix = f"[Tool result for call {self.tool_call_id}]\n"
+            if self.name:
+                tool_result_prefix += f"[Tool: {self.name}]\n"
+            self.role = "user"
+            if self.content is None:
+                self.content = tool_result_prefix or ""
+            else:
+                self.content = f"{tool_result_prefix}{self.content}" if tool_result_prefix else self.content
+        # Handle null content (e.g. assistant messages with tool_calls)
+        if self.content is None:
+            self.content = ""
         if isinstance(self.content, list):
             # Extract text from content parts and concatenate
             text_parts = []
@@ -54,6 +84,8 @@ class StreamOptions(BaseModel):
 
 
 class ChatCompletionRequest(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
     model: str = Field(default_factory=get_default_model)
     messages: List[Message]
     temperature: Optional[float] = Field(default=1.0, ge=0, le=2)
@@ -76,6 +108,14 @@ class ChatCompletionRequest(BaseModel):
         default=False,
         description="Enable Claude Code tools (Read, Write, Bash, etc.) - disabled by default for OpenAI compatibility",
     )
+    tools: Optional[List[Dict[str, Any]]] = Field(
+        default=None,
+        description="OpenAI-format tool definitions. When present, enables passthrough mode with full tool access.",
+    )
+    tool_choice: Optional[Any] = Field(
+        default=None,
+        description="Tool choice preference (auto, none, or specific tool)",
+    )
     stream_options: Optional[StreamOptions] = Field(
         default=None, description="Options for streaming responses"
     )
@@ -199,10 +239,23 @@ def to_claude_options(self) -> Dict[str, Any]:
         return options
 
 
+class FunctionCall(BaseModel):
+    """OpenAI function call format within a tool call."""
+    name: str
+    arguments: str  # JSON-encoded arguments
+
+
+class ToolCall(BaseModel):
+    """OpenAI tool call format."""
+    id: str
+    type: Literal["function"] = "function"
+    function: FunctionCall
+
+
 class Choice(BaseModel):
     index: int
     message: Message
-    finish_reason: Optional[Literal["stop", "length", "content_filter", "null"]] = None
+    finish_reason: Optional[Literal["stop", "length", "content_filter", "tool_calls", "null"]] = None
 
 
 class Usage(BaseModel):
@@ -224,7 +277,7 @@ class ChatCompletionResponse(BaseModel):
 class StreamChoice(BaseModel):
     index: int
     delta: Dict[str, Any]
-    finish_reason: Optional[Literal["stop", "length", "content_filter", "null"]] = None
+    finish_reason: Optional[Literal["stop", "length", "content_filter", "tool_calls", "null"]] = None
 
 
 class ChatCompletionStreamResponse(BaseModel):
diff --git a/src/openclaw_bridge.py b/src/openclaw_bridge.py
new file mode 100644
index 0000000..16b09c2
--- /dev/null
+++ b/src/openclaw_bridge.py
@@ -0,0 +1,270 @@
+"""
+OpenClaw Bridge - Converts OpenAI-format tool definitions to SDK MCP tools
+and translates Claude tool_use blocks back to OpenAI tool_calls format.
+
+This module enables agent frameworks like OpenClaw to use their native tools
+through the Claude Agent SDK by:
+1. Converting OpenAI function definitions to in-process MCP tools
+2. Extracting ToolUseBlock content from Claude's response
+3. Formatting tool calls as OpenAI-compatible SSE streaming events
+"""
+
+import json
+import logging
+import uuid
+from typing import Any, Dict, List, Optional, Tuple
+
+from claude_agent_sdk import create_sdk_mcp_server, tool, SdkMcpTool
+
+logger = logging.getLogger(__name__)
+
+# Sentinel prefix for placeholder tool results
+TOOL_CALL_PLACEHOLDER = "[OPENCLAW_TOOL_CALL_FORWARDED]"
+
+
+def openai_tools_to_mcp_server(
+    tools: List[Dict[str, Any]],
+    server_name: str = "openclaw_tools",
+) -> Tuple[Any, List[str]]:
+    """Convert OpenAI-format tool definitions to an SDK MCP server.
+
+    Args:
+        tools: List of OpenAI tool definitions with format:
+            [{"type": "function", "function": {"name": ..., "description": ..., "parameters": ...}}]
+        server_name: Name for the MCP server
+
+    Returns:
+        Tuple of (McpSdkServerConfig, list of tool names)
+    """
+    sdk_tools: List[SdkMcpTool] = []
+    tool_names: List[str] = []
+
+    for tool_def in tools:
+        if tool_def.get("type") != "function":
+            continue
+
+        func_def = tool_def.get("function", {})
+        name = func_def.get("name", "")
+        description = func_def.get("description", "")
+        parameters = func_def.get("parameters", {"type": "object", "properties": {}})
+
+        if not name:
+            continue
+
+        tool_names.append(name)
+
+        # Create MCP tool with a placeholder handler
+        # In single-turn mode (max_turns=1), this handler is never called
+        # because the SDK returns the assistant message without executing tools.
+        # If it IS called (shouldn't happen), return a placeholder.
+        sdk_tool = _create_placeholder_tool(name, description, parameters)
+        sdk_tools.append(sdk_tool)
+
+    if not sdk_tools:
+        logger.warning("No valid tool definitions found in request")
+        return None, []
+
+    mcp_server = create_sdk_mcp_server(
+        name=server_name,
+        version="1.0.0",
+        tools=sdk_tools,
+    )
+
+    logger.info(f"Created MCP server '{server_name}' with {len(sdk_tools)} tools: {tool_names}")
+    return mcp_server, tool_names
+
+
+def _create_placeholder_tool(
+    name: str, description: str, parameters: Dict[str, Any]
+) -> SdkMcpTool:
+    """Create an MCP tool with a placeholder handler.
+
+    The handler should never be called in single-turn mode, but if it is,
+    it returns a sentinel value that signals this is an external tool call.
+    """
+
+    @tool(name, description, parameters)
+    async def placeholder_handler(args: Any) -> Dict[str, Any]:
+        logger.warning(
+            f"Placeholder handler called for tool '{name}' - "
+            "this shouldn't happen in single-turn mode. "
+            f"Args: {args}"
+        )
+        return {
+            "content": [
+                {
+                    "type": "text",
+                    "text": f"{TOOL_CALL_PLACEHOLDER}: {name}({json.dumps(args)})",
+                }
+            ]
+        }
+
+    return placeholder_handler
+
+
+def extract_tool_calls_from_chunks(
+    chunks: List[Dict[str, Any]],
+    external_tool_names: Optional[List[str]] = None,
+) -> Tuple[Optional[str], List[Dict[str, Any]]]:
+    """Extract text content and tool calls from Claude SDK response chunks.
+
+    Scans assistant messages for TextBlock and ToolUseBlock content,
+    separating them into text content and tool_calls.
+
+    Args:
+        chunks: List of SDK message dicts
+        external_tool_names: If provided, only extract tool calls for these tool names.
+            Tool calls for other tools (Claude built-in) are ignored.
+
+    Returns:
+        Tuple of (text_content, tool_calls) where tool_calls is a list of
+        OpenAI-format tool call dicts.
+    """
+    text_parts = []
+    tool_calls = []
+    tool_call_index = 0
+
+    for chunk in chunks:
+        content = None
+
+        # Handle ResultMessage with 'result' field
+        if chunk.get("subtype") == "success" and "result" in chunk:
+            if chunk["result"]:
+                text_parts.append(chunk["result"])
+            continue
+
+        # Handle AssistantMessage content
+        if "content" in chunk and isinstance(chunk["content"], list):
+            content = chunk["content"]
+        elif chunk.get("type") == "assistant" and "message" in chunk:
+            message = chunk["message"]
+            if isinstance(message, dict) and "content" in message:
+                content = message["content"]
+
+        if content is None:
+            continue
+
+        if not isinstance(content, list):
+            if isinstance(content, str) and content:
+                text_parts.append(content)
+            continue
+
+        for block in content:
+            # Handle TextBlock
+            if hasattr(block, "text"):
+                if block.text:
+                    text_parts.append(block.text)
+            elif isinstance(block, dict) and block.get("type") == "text":
+                text = block.get("text", "")
+                if text:
+                    text_parts.append(text)
+
+            # Handle ToolUseBlock
+            elif hasattr(block, "name") and hasattr(block, "input") and hasattr(block, "id"):
+                tool_name = block.name
+                # Filter by external tool names if provided
+                if external_tool_names and tool_name not in external_tool_names:
+                    logger.debug(f"Skipping internal tool call: {tool_name}")
+                    continue
+
+                tool_call = {
+                    "index": tool_call_index,
+                    "id": getattr(block, "id", f"call_{uuid.uuid4().hex[:24]}"),
+                    "type": "function",
+                    "function": {
+                        "name": tool_name,
+                        "arguments": json.dumps(block.input) if isinstance(block.input, dict) else str(block.input),
+                    },
+                }
+                tool_calls.append(tool_call)
+                tool_call_index += 1
+                logger.info(f"Extracted tool call: {tool_name} (id={tool_call['id']})")
+
+            elif isinstance(block, dict) and block.get("type") == "tool_use":
+                tool_name = block.get("name", "")
+                if external_tool_names and tool_name not in external_tool_names:
+                    logger.debug(f"Skipping internal tool call: {tool_name}")
+                    continue
+
+                tool_input = block.get("input", {})
+                tool_call = {
+                    "index": tool_call_index,
+                    "id": block.get("id", f"call_{uuid.uuid4().hex[:24]}"),
+                    "type": "function",
+                    "function": {
+                        "name": tool_name,
+                        "arguments": json.dumps(tool_input) if isinstance(tool_input, dict) else str(tool_input),
+                    },
+                }
+                tool_calls.append(tool_call)
+                tool_call_index += 1
+                logger.info(f"Extracted tool call (dict): {tool_name}")
+
+    text_content = "\n".join(text_parts) if text_parts else None
+    return text_content, tool_calls
+
+
+def format_tool_calls_for_sse(
+    tool_calls: List[Dict[str, Any]],
+    request_id: str,
+    model: str,
+) -> List[str]:
+    """Format tool calls as OpenAI SSE streaming events.
+
+    Returns a list of SSE-formatted data strings ready to yield.
+    """
+    events = []
+
+    for tc in tool_calls:
+        # Send tool call start (with id, type, name)
+        start_chunk = {
+            "id": request_id,
+            "object": "chat.completion.chunk",
+            "model": model,
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "tool_calls": [
+                            {
+                                "index": tc["index"],
+                                "id": tc["id"],
+                                "type": "function",
+                                "function": {
+                                    "name": tc["function"]["name"],
+                                    "arguments": "",
+                                },
+                            }
+                        ]
+                    },
+                    "finish_reason": None,
+                }
+            ],
+        }
+        events.append(f"data: {json.dumps(start_chunk)}\n\n")
+
+        # Send arguments in chunks (could split for large args, but send all at once for simplicity)
+        args_chunk = {
+            "id": request_id,
+            "object": "chat.completion.chunk",
+            "model": model,
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {
+                        "tool_calls": [
+                            {
+                                "index": tc["index"],
+                                "function": {
+                                    "arguments": tc["function"]["arguments"],
+                                },
+                            }
+                        ]
+                    },
+                    "finish_reason": None,
+                }
+            ],
+        }
+        events.append(f"data: {json.dumps(args_chunk)}\n\n")
+
+    return events
diff --git a/tests/test_claude_cli_unit.py b/tests/test_claude_cli_unit.py
index c67c7fe..774e9e4 100644
--- a/tests/test_claude_cli_unit.py
+++ b/tests/test_claude_cli_unit.py
@@ -548,7 +548,7 @@ async def mock_query(prompt, options):
 
             assert len(captured_options) == 1
             opts = captured_options[0]
-            assert opts.system_prompt == {"type": "text", "text": "You are helpful"}
+            assert opts.system_prompt == "You are helpful"
 
     @pytest.mark.asyncio
     async def test_run_completion_with_model(self, cli_instance):
diff --git a/tests/test_sdk_migration.py b/tests/test_sdk_migration.py
index 6ad2d95..d28a654 100644
--- a/tests/test_sdk_migration.py
+++ b/tests/test_sdk_migration.py
@@ -74,7 +74,7 @@ def test_default_model_defined(self):
         from src.constants import DEFAULT_MODEL, CLAUDE_MODELS
 
         assert DEFAULT_MODEL in CLAUDE_MODELS
-        assert DEFAULT_MODEL == "claude-sonnet-4-5-20250929"
+        assert DEFAULT_MODEL == "claude-opus-4-6"
 
     def test_fast_model_defined(self):
         """Test that FAST_MODEL is set to fastest model."""