-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy patheval_dsl_accuracy.py
More file actions
47 lines (34 loc) · 1.5 KB
/
eval_dsl_accuracy.py
File metadata and controls
47 lines (34 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
For each task in our benchmark, evaluate the accuracy of the manually-written program on the training examples
"""
from lib.eval.benchmark import read_manual_benchmark
from lib.interpreter.executor import Executor
from lib.utils.csv_utils import read_csv_to_dict
def eval_dsl_accuracy():
benchmark_folder = 'benchmarks'
benchmarks = read_csv_to_dict('{}/benchmarks.csv'.format(benchmark_folder))
executor = Executor()
correct_count = 0
total_count = 0
error_entry = []
for b_task in benchmarks:
print("========== eval {} ============".format(b_task))
if b_task['token_mode'] == '':
continue
b_ins = read_manual_benchmark(b_task)
pos_ex_res, neg_ex_res = b_ins.run_program_on_task(executor, b_ins.program)
if all(pos_ex_res) and not any(neg_ex_res):
correct_count += 1
else:
# although I want to print error here but i think error should be shown in evaluate_program
error = (b_ins, [b_ins.task.pos_examples[i] if not e else 'PASS' for i, e in enumerate(pos_ex_res)], [b_ins.task.neg_examples[i] if e else 'PASS' for i, e in enumerate(neg_ex_res)])
error_entry.append(error)
pass
total_count += 1
print("program correct rate: {}/{}={}".format(str(correct_count), str(total_count), str(correct_count/total_count)))
print()
print("detailed error: ")
for entry in error_entry:
print(entry)
if __name__ == '__main__':
eval_dsl_accuracy()