-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexecution_fitness.py
More file actions
93 lines (68 loc) · 2.23 KB
/
execution_fitness.py
File metadata and controls
93 lines (68 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import subprocess
import tempfile
import os
import uuid
import time
class ExecutionFitness:
"""
Executes generated code safely in a sandboxed subprocess
and measures real correctness.
"""
def __init__(self, timeout=2):
self.timeout = timeout
# =====================================================
# SAFE PYTHON EXECUTION
# =====================================================
def run_python(self, code, test_input=""):
file_id = str(uuid.uuid4())
file_path = f"/tmp/opcode_{file_id}.py"
with open(file_path, "w") as f:
f.write(code)
try:
start = time.time()
result = subprocess.run(
["python3", file_path],
input=test_input.encode(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=self.timeout
)
duration = time.time() - start
return {
"stdout": result.stdout.decode(),
"stderr": result.stderr.decode(),
"exit_code": result.returncode,
"time": duration,
"success": result.returncode == 0
}
except subprocess.TimeoutExpired:
return {
"stdout": "",
"stderr": "TIMEOUT",
"exit_code": -1,
"time": self.timeout,
"success": False
}
finally:
if os.path.exists(file_path):
os.remove(file_path)
# =====================================================
# FITNESS SCORE
# =====================================================
def score(self, execution_results, expected_output=None):
score = 0.0
for res in execution_results:
if res["success"]:
score += 2.0
else:
score -= 3.0
if res["stderr"]:
score -= 1.0
score -= res["time"] * 0.3
return score
def run_with_tests(self, code, tests):
results = []
for test in tests:
res = self.run_python(code, test_input=test)
results.append(res)
return results