use math.ceil instead of round /cc #498
Browse files
src/axolotl/utils/trainer.py
CHANGED
|
@@ -588,7 +588,9 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
|
|
| 588 |
"padding": True, # True/"longest" is the default
|
| 589 |
}
|
| 590 |
if cfg.pad_to_sequence_len:
|
| 591 |
-
data_collator_kwargs["pad_to_multiple_of"] = 64 *
|
|
|
|
|
|
|
| 592 |
else:
|
| 593 |
# A100 is best at 64, while others at 8. Let's use the larger so we don't have to check
|
| 594 |
# https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html
|
|
|
|
| 588 |
"padding": True, # True/"longest" is the default
|
| 589 |
}
|
| 590 |
if cfg.pad_to_sequence_len:
|
| 591 |
+
data_collator_kwargs["pad_to_multiple_of"] = 64 * math.ceil(
|
| 592 |
+
cfg.sequence_len / 64
|
| 593 |
+
)
|
| 594 |
else:
|
| 595 |
# A100 is best at 64, while others at 8. Let's use the larger so we don't have to check
|
| 596 |
# https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html
|