tweak config to work
Browse files
examples/openllama-3b/config.yml
CHANGED
|
@@ -26,17 +26,18 @@ wandb_watch:
|
|
| 26 |
wandb_run_id:
|
| 27 |
wandb_log_model:
|
| 28 |
output_dir: ./openllama-out
|
| 29 |
-
|
| 30 |
-
micro_batch_size:
|
| 31 |
num_epochs: 3
|
| 32 |
optimizer: adamw_bnb_8bit
|
| 33 |
torchdistx_path:
|
| 34 |
lr_scheduler: cosine
|
| 35 |
-
learning_rate: 0.
|
| 36 |
train_on_inputs: false
|
| 37 |
group_by_length: false
|
|
|
|
| 38 |
bf16: false
|
| 39 |
-
fp16:
|
| 40 |
tf32: false
|
| 41 |
gradient_checkpointing: true
|
| 42 |
early_stopping_patience:
|
|
@@ -52,7 +53,7 @@ eval_steps: 50
|
|
| 52 |
save_steps:
|
| 53 |
debug:
|
| 54 |
deepspeed:
|
| 55 |
-
weight_decay: 0.
|
| 56 |
fsdp:
|
| 57 |
fsdp_config:
|
| 58 |
special_tokens:
|
|
|
|
| 26 |
wandb_run_id:
|
| 27 |
wandb_log_model:
|
| 28 |
output_dir: ./openllama-out
|
| 29 |
+
gradient_accumulation_steps: 1
|
| 30 |
+
micro_batch_size: 1
|
| 31 |
num_epochs: 3
|
| 32 |
optimizer: adamw_bnb_8bit
|
| 33 |
torchdistx_path:
|
| 34 |
lr_scheduler: cosine
|
| 35 |
+
learning_rate: 0.00001
|
| 36 |
train_on_inputs: false
|
| 37 |
group_by_length: false
|
| 38 |
+
float16: true
|
| 39 |
bf16: false
|
| 40 |
+
fp16: false
|
| 41 |
tf32: false
|
| 42 |
gradient_checkpointing: true
|
| 43 |
early_stopping_patience:
|
|
|
|
| 53 |
save_steps:
|
| 54 |
debug:
|
| 55 |
deepspeed:
|
| 56 |
+
weight_decay: 0.1
|
| 57 |
fsdp:
|
| 58 |
fsdp_config:
|
| 59 |
special_tokens:
|