wandb_version: 1 _wandb: desc: null value: cli_version: 0.12.2 framework: huggingface huggingface_version: 4.12.2 is_jupyter_run: false is_kaggle_kernel: false python_version: 3.8.11 start_time: 1636233370 t: 1: - 1 - 11 3: - 16 4: 3.8.11 5: 0.12.2 6: 4.12.2 8: - 5 backend: desc: null value: nccl deepspeed_plugin: desc: null value: None device: desc: null value: cuda:0 distributed_type: desc: null value: DistributedType.MULTI_GPU gradient_accumulation_steps: desc: null value: 1 gradient_checkpointing: desc: null value: false initialized: desc: null value: 'True' learning_rate: desc: null value: 0.0005 local_process_index: desc: null value: '0' lr_scheduler_type: desc: null value: cosine max_eval_steps: desc: null value: -1 max_train_steps: desc: null value: 150000 num_processes: desc: null value: '16' num_warmup_steps: desc: null value: 2000 process_index: desc: null value: '0' save_checkpoint_steps: desc: null value: 15000 seed: desc: null value: 1 seq_length: desc: null value: 1024 shuffle_buffer: desc: null value: 1000 train_batch_size: desc: null value: 12 use_fp16: desc: null value: 'True' valid_batch_size: desc: null value: 12 weight_decay: desc: null value: 0.1