Mulebot
/

dei-model

renpas22 commited on Dec 17, 2025

Commit

fa9e543

1 Parent(s): 5af9eca

Skip .to(device) for quantized models with device_map

Files changed (1) hide show

src/reasoning/rl_trainer.py CHANGED Viewed

@@ -76,9 +76,13 @@ class RLReasoningTrainer:
         self.config = config
         self.device = device
-        # Move models to device
-        self.policy.to(device)
-        self.prm.to(device)
         # Freeze PRM (only train policy)
         for param in self.prm.parameters():

         self.config = config
         self.device = device
+        # Move models to device (skip if already quantized with device_map)
+        if not (hasattr(self.policy, 'hf_device_map') or
+                getattr(self.policy, 'is_quantized', False)):
+            self.policy.to(device)
+        if not (hasattr(self.prm, 'hf_device_map') or
+                getattr(self.prm, 'is_quantized', False)):
+            self.prm.to(device)
         # Freeze PRM (only train policy)
         for param in self.prm.parameters():