diff --git a/agentlightning/verl/trainer.py b/agentlightning/verl/trainer.py index 413a0a1cf..2f9cee4de 100644 --- a/agentlightning/verl/trainer.py +++ b/agentlightning/verl/trainer.py @@ -417,10 +417,17 @@ def _train_step(self, batch_dict: dict) -> dict: print(batch.batch.keys()) inputs = self.tokenizer.batch_decode(batch.batch["prompts"], skip_special_tokens=True) outputs = self.tokenizer.batch_decode(batch.batch["responses"], skip_special_tokens=True) + sample_gts = [ + item.non_tensor_batch.get("reward_model", {}).get( + "ground_truth", None + ) + for item in batch + ] scores = batch.batch["token_level_scores"].sum(-1).cpu().tolist() self._dump_generations( inputs=inputs, outputs=outputs, + gts=sample_gts, scores=scores, reward_extra_infos_dict=reward_extra_infos_dict, dump_path=rollout_data_dir,