boris commited on
Commit
8149924
1 Parent(s): 032f623

feat(train): custom start_preconditioning_step

Browse files
Files changed (1) hide show
  1. tools/train/train.py +5 -1
tools/train/train.py CHANGED
@@ -248,6 +248,10 @@ class TrainingArguments:
248
  default=1024,
249
  metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
250
  )
 
 
 
 
251
  preconditioning_compute_steps: int = field(
252
  default=10, metadata={"help": "Number of steps to update preconditioner."}
253
  )
@@ -608,7 +612,7 @@ def main():
608
  beta2=training_args.beta2,
609
  diagonal_epsilon=1e-10,
610
  matrix_epsilon=1e-8,
611
- start_preconditioning_step=training_args.warmup_steps,
612
  preconditioning_compute_steps=training_args.preconditioning_compute_steps,
613
  statistics_compute_steps=1,
614
  best_effort_shape_interpretation=True,
 
248
  default=1024,
249
  metadata={"help": "Chunked size for large layers with Distributed Shampoo."},
250
  )
251
+ start_preconditioning_step: int = field(
252
+ default=100,
253
+ metadata={"help": "Number of steps before starting to update preconditioner."},
254
+ )
255
  preconditioning_compute_steps: int = field(
256
  default=10, metadata={"help": "Number of steps to update preconditioner."}
257
  )
 
612
  beta2=training_args.beta2,
613
  diagonal_epsilon=1e-10,
614
  matrix_epsilon=1e-8,
615
+ start_preconditioning_step=training_args.start_preconditioning_step,
616
  preconditioning_compute_steps=training_args.preconditioning_compute_steps,
617
  statistics_compute_steps=1,
618
  best_effort_shape_interpretation=True,