Skip to content

Commit 70d61cb

Browse files
authored
fix for windows (#417)
1 parent b011c79 commit 70d61cb

File tree

1 file changed

+149
-71
lines changed

1 file changed

+149
-71
lines changed

eval_protocol/rewards/code_execution.py

Lines changed: 149 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import os
2020
import platform
2121
import re
22-
import resource
2322
import shlex # Added for robust splitting of arguments
2423
import signal
2524
import subprocess
@@ -174,7 +173,7 @@ def local_code_execution_reward(
174173
# Normalize content to string; Message.content may be str or list of content parts
175174
last_content = messages[-1].content
176175
response_content = (
177-
last_content if isinstance(last_content, str) else "".join([p.text for p in (last_content or [])])
176+
last_content if isinstance(last_content, str) else "".join([p.text for p in (last_content or [])]) # pyright: ignore[reportAttributeAccessIssue]
178177
)
179178
expected_output_str = ground_truth
180179

@@ -320,31 +319,44 @@ def _execute_python_in_subprocess(code: str, timeout: int) -> Dict[str, Any]:
320319
Returns:
321320
Dictionary with execution results
322321
"""
322+
# Try to import resource module (Unix-only)
323+
try:
324+
import resource as resource_module
325+
except ImportError:
326+
resource_module = None
327+
323328
try:
324329
with tempfile.NamedTemporaryFile(suffix=".py", delete=False) as temp_file:
325330
temp_file_path = temp_file.name
326331

332+
# Build platform-appropriate reliability guard
333+
# The generated code checks for resource availability at runtime
327334
safe_code = (
328335
"import sys\n"
329336
"import os\n"
330-
"import signal\n"
331-
"import resource\n"
332337
"import platform\n\n"
333338
"def _reliability_guard():\n"
334339
" memory_limit = 100 * 1024 * 1024 # 100 MB\n"
335-
" if platform.uname().system != 'Darwin':\n"
336-
" resource.setrlimit(resource.RLIMIT_AS, (memory_limit, memory_limit))\n"
337-
" resource.setrlimit(resource.RLIMIT_DATA, (memory_limit, memory_limit))\n"
338-
" resource.setrlimit(resource.RLIMIT_STACK, (memory_limit, memory_limit))\n"
340+
" try:\n"
341+
" import resource\n"
342+
" if platform.uname().system != 'Darwin':\n"
343+
" if hasattr(resource, 'RLIMIT_AS'):\n"
344+
" resource.setrlimit(resource.RLIMIT_AS, (memory_limit, memory_limit))\n"
345+
" if hasattr(resource, 'RLIMIT_DATA'):\n"
346+
" resource.setrlimit(resource.RLIMIT_DATA, (memory_limit, memory_limit))\n"
347+
" if hasattr(resource, 'RLIMIT_STACK'):\n"
348+
" resource.setrlimit(resource.RLIMIT_STACK, (memory_limit, memory_limit))\n"
349+
" except ImportError:\n"
350+
" pass # resource module not available (e.g., Windows)\n"
339351
" import builtins\n"
340352
" builtins.exit = None\n"
341353
" builtins.quit = None\n"
342354
" os.environ['OMP_NUM_THREADS'] = '1'\n"
343355
" os.system = None\n"
344356
" os.popen = None\n"
345-
" os.execl = None\n"
346-
" os.execve = None\n"
347-
" os.fork = None\n"
357+
" if hasattr(os, 'execl'): os.execl = None\n"
358+
" if hasattr(os, 'execve'): os.execve = None\n"
359+
" if hasattr(os, 'fork'): os.fork = None\n"
348360
" os.remove = None\n"
349361
" os.removedirs = None\n"
350362
" os.rmdir = None\n"
@@ -356,42 +368,87 @@ def _execute_python_in_subprocess(code: str, timeout: int) -> Dict[str, Any]:
356368

357369
temp_file.write(safe_code.encode("utf-8"))
358370

359-
def timeout_handler(signum, frame):
360-
raise TimeoutError(f"Execution timed out after {timeout} seconds")
371+
# Check if we can use Unix-specific features (SIGALRM, preexec_fn with resource limits)
372+
has_sigalrm = hasattr(signal, "SIGALRM")
361373

362-
signal.signal(signal.SIGALRM, timeout_handler)
363-
signal.alarm(timeout)
374+
if has_sigalrm and resource_module is not None:
375+
# Unix: use SIGALRM for timeout and resource limits
376+
def timeout_handler(signum, frame):
377+
raise TimeoutError(f"Execution timed out after {timeout} seconds")
364378

365-
try:
366-
process = subprocess.Popen(
367-
[sys.executable, temp_file_path],
368-
stdout=subprocess.PIPE,
369-
stderr=subprocess.PIPE,
370-
text=True,
371-
preexec_fn=lambda: resource.setrlimit(resource.RLIMIT_CPU, (timeout, timeout + 1)),
372-
)
379+
signal.signal(signal.SIGALRM, timeout_handler)
380+
signal.alarm(timeout)
373381

374-
stdout, stderr = process.communicate()
375-
signal.alarm(0)
382+
try:
383+
preexec = None
384+
if hasattr(resource_module, "RLIMIT_CPU"):
385+
preexec = lambda: resource_module.setrlimit(resource_module.RLIMIT_CPU, (timeout, timeout + 1))
386+
387+
process = subprocess.Popen(
388+
[sys.executable, temp_file_path],
389+
stdout=subprocess.PIPE,
390+
stderr=subprocess.PIPE,
391+
text=True,
392+
preexec_fn=preexec,
393+
)
376394

377-
if process.returncode == 0:
378-
return {
379-
"success": True,
380-
"output": stdout.strip(),
381-
"error": None,
382-
}
383-
else:
384-
return {
385-
"success": False,
386-
"output": None,
387-
"error": stderr.strip(),
388-
}
389-
except TimeoutError as e:
390-
return {"success": False, "output": None, "error": str(e)}
391-
finally:
392-
signal.alarm(0)
393-
if os.path.exists(temp_file_path):
394-
os.unlink(temp_file_path)
395+
stdout, stderr = process.communicate()
396+
signal.alarm(0)
397+
398+
if process.returncode == 0:
399+
return {
400+
"success": True,
401+
"output": stdout.strip(),
402+
"error": None,
403+
}
404+
else:
405+
return {
406+
"success": False,
407+
"output": None,
408+
"error": stderr.strip(),
409+
}
410+
except TimeoutError as e:
411+
return {"success": False, "output": None, "error": str(e)}
412+
finally:
413+
signal.alarm(0)
414+
if os.path.exists(temp_file_path):
415+
os.unlink(temp_file_path)
416+
else:
417+
# Windows or systems without SIGALRM: use subprocess timeout only
418+
try:
419+
process = subprocess.Popen(
420+
[sys.executable, temp_file_path],
421+
stdout=subprocess.PIPE,
422+
stderr=subprocess.PIPE,
423+
text=True,
424+
)
425+
426+
try:
427+
stdout, stderr = process.communicate(timeout=timeout)
428+
except subprocess.TimeoutExpired:
429+
process.kill()
430+
process.communicate()
431+
return {
432+
"success": False,
433+
"output": None,
434+
"error": f"Timeout: execution timed out after {timeout} seconds",
435+
}
436+
437+
if process.returncode == 0:
438+
return {
439+
"success": True,
440+
"output": stdout.strip(),
441+
"error": None,
442+
}
443+
else:
444+
return {
445+
"success": False,
446+
"output": None,
447+
"error": stderr.strip(),
448+
}
449+
finally:
450+
if os.path.exists(temp_file_path):
451+
os.unlink(temp_file_path)
395452
except Exception as e:
396453
error_traceback = traceback.format_exc()
397454
return {
@@ -473,11 +530,7 @@ def _execute_javascript_in_subprocess(code: str, timeout: int) -> Dict[str, Any]
473530

474531
temp_file.write(safe_code.encode("utf-8"))
475532

476-
def timeout_handler(signum, frame):
477-
raise TimeoutError(f"Execution timed out after {timeout} seconds")
478-
479-
signal.signal(signal.SIGALRM, timeout_handler)
480-
signal.alarm(timeout)
533+
has_sigalrm = hasattr(signal, "SIGALRM")
481534

482535
try:
483536
process = subprocess.Popen(
@@ -492,19 +545,30 @@ def timeout_handler(signum, frame):
492545
text=True,
493546
)
494547

548+
# On Unix, we can use SIGALRM as a backup timeout mechanism
549+
if has_sigalrm:
550+
551+
def timeout_handler(signum, frame):
552+
raise TimeoutError(f"Execution timed out after {timeout} seconds")
553+
554+
signal.signal(signal.SIGALRM, timeout_handler)
555+
signal.alarm(timeout)
556+
495557
try:
496558
stdout, stderr = process.communicate(timeout=timeout)
497559
except subprocess.TimeoutExpired:
498560
process.kill()
499561
stdout, stderr = process.communicate()
500-
signal.alarm(0)
562+
if has_sigalrm:
563+
signal.alarm(0)
501564
return {
502565
"success": False,
503566
"output": None,
504567
"error": f"JavaScript execution timed out after {timeout} seconds (subprocess.TimeoutExpired). Output: {stdout.strip()}, Error: {stderr.strip()}",
505568
}
506569

507-
signal.alarm(0)
570+
if has_sigalrm:
571+
signal.alarm(0)
508572

509573
if process.returncode == 0:
510574
return {
@@ -527,7 +591,8 @@ def timeout_handler(signum, frame):
527591
"error": f"JavaScript execution timed out after {timeout} seconds (signal.alarm): {str(e)}",
528592
}
529593
finally:
530-
signal.alarm(0)
594+
if has_sigalrm:
595+
signal.alarm(0)
531596
if os.path.exists(temp_file_path):
532597
os.unlink(temp_file_path)
533598

@@ -941,7 +1006,7 @@ def e2b_code_execution_reward(
9411006

9421007
last_content = messages[-1].content
9431008
response_content = (
944-
last_content if isinstance(last_content, str) else "".join([p.text for p in (last_content or [])])
1009+
last_content if isinstance(last_content, str) else "".join([p.text for p in (last_content or [])]) # pyright: ignore[reportAttributeAccessIssue]
9451010
)
9461011
expected_output_str = ground_truth
9471012

@@ -1549,17 +1614,20 @@ def reliability_guard(maximum_memory_bytes: Optional[int] = None) -> None:
15491614
This function is NOT a security sandbox. Untrusted code should not be
15501615
blindly executed outside of a proper sandbox environment.
15511616
"""
1617+
# Resource limits are only available on Unix systems
15521618
if maximum_memory_bytes is not None:
1553-
if platform.uname().system != "Darwin":
1554-
resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
1555-
resource.setrlimit(
1556-
resource.RLIMIT_DATA,
1557-
(maximum_memory_bytes, maximum_memory_bytes),
1558-
)
1559-
resource.setrlimit(
1560-
resource.RLIMIT_STACK,
1561-
(maximum_memory_bytes, maximum_memory_bytes),
1562-
)
1619+
try:
1620+
import resource
1621+
1622+
if platform.uname().system != "Darwin":
1623+
if hasattr(resource, "RLIMIT_AS"):
1624+
resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
1625+
if hasattr(resource, "RLIMIT_DATA"):
1626+
resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
1627+
if hasattr(resource, "RLIMIT_STACK"):
1628+
resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
1629+
except ImportError:
1630+
pass # resource module not available (e.g., Windows)
15631631

15641632
faulthandler.disable()
15651633

@@ -1576,22 +1644,32 @@ def reliability_guard(maximum_memory_bytes: Optional[int] = None) -> None:
15761644
os.remove = noop # type: ignore
15771645
os.removedirs = noop # type: ignore
15781646
os.rmdir = noop # type: ignore
1579-
os.fchdir = noop # type: ignore
1580-
os.setuid = noop # type: ignore
1581-
os.fork = noop # type: ignore
1582-
os.forkpty = noop # type: ignore
1583-
os.killpg = noop # type: ignore
15841647
os.rename = noop # type: ignore
15851648
os.renames = noop # type: ignore
15861649
os.truncate = noop # type: ignore
15871650
os.replace = noop # type: ignore
15881651
os.unlink = noop # type: ignore
1589-
os.fchmod = noop # type: ignore
1590-
os.fchown = noop # type: ignore
15911652
os.chmod = noop # type: ignore
1592-
os.chown = noop # type: ignore
1593-
os.chroot = noop # type: ignore
15941653

1654+
# Unix-only attributes
1655+
if hasattr(os, "fchdir"):
1656+
os.fchdir = noop # type: ignore
1657+
if hasattr(os, "setuid"):
1658+
os.setuid = noop # type: ignore
1659+
if hasattr(os, "fork"):
1660+
os.fork = noop # type: ignore
1661+
if hasattr(os, "forkpty"):
1662+
os.forkpty = noop # type: ignore
1663+
if hasattr(os, "killpg"):
1664+
os.killpg = noop # type: ignore
1665+
if hasattr(os, "fchmod"):
1666+
os.fchmod = noop # type: ignore
1667+
if hasattr(os, "fchown"):
1668+
os.fchown = noop # type: ignore
1669+
if hasattr(os, "chown"):
1670+
os.chown = noop # type: ignore
1671+
if hasattr(os, "chroot"):
1672+
os.chroot = noop # type: ignore
15951673
if hasattr(os, "lchflags"):
15961674
os.lchflags = noop # type: ignore
15971675
if hasattr(os, "lchmod"):

0 commit comments

Comments
 (0)