MINIMAL REPRO

xzrderek · xzrderek · commit 1165ff182669 · 2025-08-08T05:48:08.000Z
diff --git a/test_burst_client.py b/test_burst_client.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""
+Burst Client Test - Simulates 50 threads calling envs.reset() -> get_initial_state
+Exact pattern: _execute_rollout() -> envs.reset() -> get_initial_state -> client.get()
+"""
+
+import asyncio
+import threading
+import time
+from typing import Any, Dict, List
+
+import httpx
+
+
+class EnvResetClient:
+    """
+    Simulates the exact pattern from your code:
+    50 threads -> _execute_rollout() -> envs.reset() -> get_initial_state -> client.get()
+    """
+
+    def __init__(self, base_url: str = "http://localhost:8000"):
+        self.base_url = base_url
+        self.initial_state_url = f"{base_url}/control/initial_state"
+
+    async def get_initial_state(self, thread_id: int) -> Dict[str, Any]:
+        """
+        Simulates the get_initial_state call from your McpGym code.
+        This is the slow HTTP call that happens during envs.reset().
+        """
+        headers = {"Content-Type": "application/json", "Accept": "application/json"}
+
+        start_time = time.time()
+
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                # This is the exact pattern from your code
+                initial_state_response = await client.get(
+                    self.initial_state_url,
+                    headers=headers,
+                    timeout=30.0,
+                )
+                initial_state_response.raise_for_status()
+                result = initial_state_response.json()
+
+                end_time = time.time()
+                duration = end_time - start_time
+
+                return {"thread_id": thread_id, "success": True, "duration": duration, "initial_state": result}
+
+        except Exception as e:
+            end_time = time.time()
+            duration = end_time - start_time
+            return {"thread_id": thread_id, "success": False, "duration": duration, "error": str(e)}
+
+    async def envs_reset(self, thread_id: int) -> Dict[str, Any]:
+        """
+        Simulates envs.reset() which internally calls get_initial_state.
+        This is what gets called from _execute_rollout().
+        """
+        print(f"🔄 Thread {thread_id}: envs.reset() called")
+
+        # This simulates the envs.reset() -> get_initial_state call chain
+        return await self.get_initial_state(thread_id)
+
+
+async def _execute_rollout(thread_id: int, client: EnvResetClient) -> Dict[str, Any]:
+    """
+    Simulates _execute_rollout() function that calls envs.reset().
+    This runs concurrently using asyncio, matching your actual pattern.
+    """
+    print(f"🚀 Rollout {thread_id}: _execute_rollout() started")
+
+    # This is where envs.reset() gets called
+    result = await client.envs_reset(thread_id)
+    return result
+
+
+async def run_burst_test(num_clients: int = 50, server_url: str = "http://localhost:8000"):
+    """
+    Run burst test simulating 50 concurrent _execute_rollout() calls.
+    Each one calls envs.reset() -> get_initial_state -> client.get()
+    """
+    print(f"🚀 Starting burst test with {num_clients} concurrent rollouts")
+    print(f"🎯 Target server: {server_url}")
+    print(f"📋 Pattern: _execute_rollout() -> envs.reset() -> get_initial_state -> client.get()")
+
+    client = EnvResetClient(server_url)
+
+    # Create tasks for concurrent rollouts (simulating your threading pattern)
+    start_time = time.time()
+    tasks = [_execute_rollout(i, client) for i in range(num_clients)]
+
+    # Run all rollouts concurrently
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+
+    end_time = time.time()
+    total_duration = end_time - start_time
+
+    # Analyze results
+    successful = [r for r in results if isinstance(r, dict) and r.get("success")]
+    failed = [r for r in results if isinstance(r, dict) and not r.get("success")]
+    exceptions = [r for r in results if not isinstance(r, dict)]
+
+    print(f"\n📊 BURST TEST RESULTS:")
+    print(f"   Total rollouts: {num_clients}")
+    print(f"   Total time: {total_duration:.3f}s")
+    print(f"   Successful: {len(successful)}")
+    print(f"   Failed: {len(failed)}")
+    print(f"   Exceptions: {len(exceptions)}")
+
+    if successful:
+        avg_duration = sum(r["duration"] for r in successful) / len(successful)
+        min_duration = min(r["duration"] for r in successful)
+        max_duration = max(r["duration"] for r in successful)
+
+        print(f"   Average rollout duration: {avg_duration:.3f}s")
+        print(f"   Min rollout duration: {min_duration:.3f}s")
+        print(f"   Max rollout duration: {max_duration:.3f}s")
+
+        # Show sample successful result
+        sample = successful[0]
+        print(f"\n✅ Sample successful rollout:")
+        print(f"   Thread ID: {sample['thread_id']}")
+        print(f"   Initial state: {sample['initial_state']['observation']}")
+        print(f"   Timestamp: {sample['initial_state']['timestamp']}")
+
+    if failed:
+        print(f"\n❌ Sample failed rollouts:")
+        for fail in failed[:3]:  # Show first 3 failures
+            print(f"   Thread {fail['thread_id']}: {fail['error']}")
+
+    if exceptions:
+        print(f"\n💥 Sample exceptions:")
+        for exc in exceptions[:3]:  # Show first 3 exceptions
+            print(f"   {type(exc).__name__}: {exc}")
+
+    # Key test: If concurrent, should take ~1 second. If sequential, ~50 seconds.
+    if total_duration < 5:  # Allow some overhead
+        print(f"\n🎉 CONCURRENCY WORKING! Total time {total_duration:.3f}s (expected ~1s for concurrent)")
+    else:
+        print(f"\n⚠️  POSSIBLE SEQUENTIAL EXECUTION! Total time {total_duration:.3f}s (expected ~1s for concurrent)")
+
+    return len(successful) == num_clients
+
+
+def main():
+    """Run the burst test."""
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Envs Reset Burst Test - Simulates 50 rollouts calling get_initial_state"
+    )
+    parser.add_argument("--rollouts", type=int, default=50, help="Number of concurrent rollouts")
+    parser.add_argument("--server", default="http://localhost:8000", help="Server URL")
+
+    args = parser.parse_args()
+
+    success = asyncio.run(run_burst_test(args.rollouts, args.server))
+
+    if success:
+        print(f"\n🎉 ALL {args.rollouts} ROLLOUTS SUCCESSFUL!")
+        exit(0)
+    else:
+        print(f"\n💥 SOME ROLLOUTS FAILED!")
+        exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test_simple_mcp_server.py b/test_simple_mcp_server.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+"""
+Simple MCP Server for Testing get_initial_state Concurrency
+Simulates the exact pattern: envs.reset() -> get_initial_state -> slow HTTP endpoint
+"""
+
+import asyncio
+import os
+import time
+
+from fastmcp import FastMCP
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+# Create a simple MCP server
+mcp = FastMCP(name="TestServer")
+
+
+@mcp.custom_route("/control/initial_state", methods=["GET"])
+async def get_initial_state_endpoint(request: Request) -> JSONResponse:
+    """
+    Simulate the get_initial_state endpoint that's slow.
+    This mimics the pattern in your McpGym code.
+    """
+    print(f"🔍 get_initial_state called at {time.time()}")
+
+    # Simulate the slow operation (like environment initialization)
+    time.sleep(1)  # 1 second delay to test concurrency
+
+    # Return a dummy initial state
+    return JSONResponse({"observation": "dummy_initial_state", "session_id": "test_session", "timestamp": time.time()})
+
+
+@mcp.tool
+def dummy_tool() -> str:
+    """Dummy tool for MCP compatibility."""
+    return "dummy"
+
+
+def main():
+    """Run the test server."""
+    port = int(os.environ.get("PORT", 8000))
+    print(f"🚀 Starting get_initial_state test server on port {port}")
+    print(f"📡 Endpoint: http://localhost:{port}/control/initial_state")
+
+    # Use FastMCP 2.0 run method with streamable-http transport
+    mcp.run(transport="http", host="0.0.0.0", port=port)
+
+
+if __name__ == "__main__":
+    main()