wandb_version: 1 | |
_wandb: | |
desc: null | |
value: | |
cli_version: 0.12.1 | |
framework: huggingface | |
huggingface_version: 4.10.0.dev0 | |
is_jupyter_run: false | |
is_kaggle_kernel: false | |
python_version: 3.8.11 | |
start_time: 1630329234 | |
t: | |
1: | |
- 1 | |
- 11 | |
4: 3.8.11 | |
5: 0.12.1 | |
6: 4.10.0.dev0 | |
8: | |
- 5 | |
gradient_accumulation_steps: | |
desc: null | |
value: 1 | |
learning_rate: | |
desc: null | |
value: 0.0005 | |
lr_scheduler_type: | |
desc: null | |
value: cosine | |
max_eval_steps: | |
desc: null | |
value: -1 | |
max_train_steps: | |
desc: null | |
value: 150000 | |
num_warmup_steps: | |
desc: null | |
value: 2000 | |
save_checkpoint_steps: | |
desc: null | |
value: 15000 | |
seed: | |
desc: null | |
value: 1 | |
seq_length: | |
desc: null | |
value: 1024 | |
shuffle_buffer: | |
desc: null | |
value: 1000 | |
train_batch_size: | |
desc: null | |
value: 12 | |
valid_batch_size: | |
desc: null | |
value: 12 | |
weight_decay: | |
desc: null | |
value: 0.1 | |