Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions recipe/dapo/dapo_ray_trainer.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imo, it would be ideal to create another directory called cispo instead of adding modifications in dapo

Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,10 @@ def fit(self):
batch = new_batch if batch is None else DataProto.concat([batch, new_batch])

prompt_bsz = self.config.data.train_batch_size
if num_prompt_in_batch < prompt_bsz:
max_num_gen_batches = self.config.algorithm.filter_groups.max_num_gen_batches
if num_prompt_in_batch < prompt_bsz and max_num_gen_batches > 1: # Added by Reasoning360 TWK NOTE: second condition is to account for when we have zero-variance filtering but are not dynamically growing the batch...
print(f"{num_prompt_in_batch=} < {prompt_bsz=}")
max_num_gen_batches = self.config.algorithm.filter_groups.max_num_gen_batches
# max_num_gen_batches = self.config.algorithm.filter_groups.max_num_gen_batches
if max_num_gen_batches <= 0 or num_gen_batches < max_num_gen_batches:
print(f"{num_gen_batches=}. Keep generating...")
progress_bar.update(1)
Expand All @@ -267,9 +268,14 @@ def fit(self):
+ " You could also try set max_num_gen_batches=0 to enable endless trials."
)
else:
# Align the batch
traj_bsz = self.config.data.train_batch_size * self.config.actor_rollout_ref.rollout.n
batch = batch[:traj_bsz]
# Added by Reasoning360, need to account for when our batch is smaller due to zero-variance filtering
if num_prompt_in_batch >= prompt_bsz:
# Align the batch
traj_bsz = self.config.data.train_batch_size * self.config.actor_rollout_ref.rollout.n
batch = batch[:traj_bsz]
else:
# TWK TODO!!!: RESCALE THIS SO THAT THE BATCH*N IS DIVISIBLE BY k_partitions (n_gpus...)
print(f"Final {num_prompt_in_batch=} < {prompt_bsz=} after {num_gen_batches=} generation batches. Proceeding with smaller batch...")

# === Updating ===

Expand Down
2 changes: 1 addition & 1 deletion scripts/tools/serve_llm_as_verifier.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH --job-name=server_llm_as_verifier
#SBATCH --partition=main
#SBATCH --partition=higherprio
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=64
Expand Down
22 changes: 22 additions & 0 deletions scripts/tools/serve_math_llm_as_verifier.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
#SBATCH --job-name=server_math_llm_as_verifier
#SBATCH --partition=higherprio
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=64
#SBATCH --gres=gpu:8
#SBATCH --time=720:00:00
#SBATCH --output=slurm/serve_math_llm_as_verifier_%j.log
#SBATCH --error=slurm/serve_math_llm_as_verifier_%j.log


# (1) detect this node’s primary IP
NODE_IP=$(hostname -I | awk '{print $1}')
echo "Detected NODE_IP = $NODE_IP"

# (2) export judge URL for downstream clients
export MATH_LLM_JUDGE_URL="http://${NODE_IP}:8000"
echo "MATH_LLM_JUDGE_URL=$MATH_LLM_JUDGE_URL"

# (3) launch the vLLM server bound to that IP
vllm serve openai/gpt-oss-120b --host "$NODE_IP" --data-parallel-size 8 --enable-expert-parallel
Loading