Spaces:

Dovakiins
/

qwerrwe

Build error

winglian commited on Aug 26, 2023

Commit

31f3e71

unverified ·

1 Parent(s): 56c4a94

fix checkpints on multigpu (#481)

Files changed (1) hide show

src/axolotl/monkeypatch/relora.py CHANGED Viewed

@@ -131,7 +131,7 @@ class ReLoRACallback(TrainerCallback):
             and state.global_step % self.relora_steps != 0
         ):
             if self.quantized:
-                if self.last_full_model != checkpoint_folder:
                     # ensure the latest full parameter save is in the latest checkpoint
                     # folder, so that automatic pruning of checkpoints does not remove it
                     LOG.info(f"moving last full parameter save to {checkpoint_folder}")

             and state.global_step % self.relora_steps != 0
         ):
             if self.quantized:
+                if is_main_process() and self.last_full_model != checkpoint_folder:
                     # ensure the latest full parameter save is in the latest checkpoint
                     # folder, so that automatic pruning of checkpoints does not remove it
                     LOG.info(f"moving last full parameter save to {checkpoint_folder}")