Skip to content

Commit 050a87f

Browse files
authored
fix reasoning gym enviroment (PrimeIntellect-ai#401)
* fix reasoning gym enviroment * comments
1 parent 168293a commit 050a87f

2 files changed

Lines changed: 4 additions & 3 deletions

File tree

environments/reasoning_gym_env/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "reasoning-gym-env"
33
description = "ReasoningGym suite of programmatically-generated reasoning tasks"
44
tags = ["reasoning-gym", "logic", "puzzles", "math", "train"]
5-
version = "0.1.1"
5+
version = "0.1.2"
66
requires-python = ">=3.11"
77
dependencies = [
88
"verifiers>=0.1.4",

environments/reasoning_gym_env/reasoning_gym_env.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def __init__(
2929
rubric = Rubric(parser=parser)
3030

3131
def check_answer_reward_func(completion, answer, **kwargs) -> float:
32-
entry = self.rg_dataset[answer]
32+
# rg_dataset expects an int index
33+
entry = self.rg_dataset[int(answer)]
3334
response = str(parser.parse_answer(completion)).strip()
3435
reward = self.rg_dataset.score_answer(answer=response, entry=entry)
3536
return reward
@@ -73,7 +74,7 @@ def rg_to_hf(self, rg_dataset: ProceduralDataset) -> Tuple[Dataset, Dataset]:
7374
for i, x in enumerate(rg_dataset):
7475
row = {
7576
"question": x["question"],
76-
"answer": i,
77+
"answer": str(i), # in verifiers, an answer must be a string
7778
"task": x["metadata"]["source_dataset"],
7879
}
7980
if i < self.num_train_examples:

0 commit comments

Comments
 (0)