CompleteTech-LLC-AI-Research · romgenie · Aug 4, 2025
diff --git a/agents/ppo_agent.py b/agents/ppo_agent.py
@@ -135,6 +135,8 @@ def __init__(self, config, device):
 
         # Add learning rate scheduler for linear decay with minimum LR
         num_updates = config.total_timesteps // (config.n_steps * config.n_envs)
+        # Ensure at least one update to avoid division by zero
+        num_updates = max(num_updates, 1)
         min_lr_ratio = 0.1  # Don't let LR go below 10% of initial
         lr_lambda = lambda update: max(min_lr_ratio, 1.0 - (update / num_updates))
         self.scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda=lr_lambda)