wandb_version: 1 __cached__setup_devices: desc: null value: cpu _n_gpu: desc: null value: 0 _wandb: desc: null value: cli_version: 0.10.33 framework: huggingface huggingface_version: 4.9.0.dev0 is_jupyter_run: false is_kaggle_kernel: false python_version: 3.8.10 t: 1: - 1 - 3 - 11 4: 3.8.10 5: 0.10.33 6: 4.9.0.dev0 8: - 5 adafactor: desc: null value: false adam_beta1: desc: null value: 0.9 adam_beta2: desc: null value: 0.98 adam_epsilon: desc: null value: 1.0e-08 cache_dir: desc: null value: null config_name: desc: null value: ./ dataloader_drop_last: desc: null value: false dataloader_num_workers: desc: null value: 0 dataloader_pin_memory: desc: null value: true dataset_config_name: desc: null value: null dataset_name: desc: null value: null ddp_find_unused_parameters: desc: null value: null debug: desc: null value: [] deepspeed: desc: null value: null disable_tqdm: desc: null value: false do_eval: desc: null value: false do_predict: desc: null value: false do_train: desc: null value: false dtype: desc: null value: float32 eval_accumulation_steps: desc: null value: null eval_steps: desc: null value: 6000 evaluation_strategy: desc: null value: IntervalStrategy.NO fp16: desc: null value: false fp16_backend: desc: null value: auto fp16_full_eval: desc: null value: false fp16_opt_level: desc: null value: O1 gradient_accumulation_steps: desc: null value: 1 greater_is_better: desc: null value: null group_by_length: desc: null value: false ignore_data_skip: desc: null value: false label_names: desc: null value: null label_smoothing_factor: desc: null value: 0.0 learning_rate: desc: null value: 3.0e-05 length_column_name: desc: null value: length line_by_line: desc: null value: false load_best_model_at_end: desc: null value: false local_rank: desc: null value: -1 log_level: desc: null value: -1 log_level_replica: desc: null value: -1 log_on_each_node: desc: null value: true logging_dir: desc: null value: ./runs/Jul16_09-59-13_t1v-n-f5c06ea1-w-0 logging_first_step: desc: null value: false logging_steps: desc: null value: 50 logging_strategy: desc: null value: IntervalStrategy.STEPS lr_scheduler_type: desc: null value: SchedulerType.LINEAR max_eval_samples: desc: null value: 4000 max_grad_norm: desc: null value: 1.0 max_seq_length: desc: null value: 4096 max_steps: desc: null value: -1 metric_for_best_model: desc: null value: null mlm_probability: desc: null value: 0.15 model_name_or_path: desc: null value: null model_type: desc: null value: big_bird mp_parameters: desc: null value: '' no_cuda: desc: null value: false num_train_epochs: desc: null value: 5.0 output_dir: desc: null value: ./ overwrite_cache: desc: null value: false overwrite_output_dir: desc: null value: true pad_to_max_length: desc: null value: false past_index: desc: null value: -1 per_device_eval_batch_size: desc: null value: 1 per_device_train_batch_size: desc: null value: 1 per_gpu_eval_batch_size: desc: null value: null per_gpu_train_batch_size: desc: null value: null prediction_loss_only: desc: null value: false preprocessing_num_workers: desc: null value: 96 push_to_hub: desc: null value: true push_to_hub_model_id: desc: null value: '' push_to_hub_organization: desc: null value: null push_to_hub_token: desc: null value: null remove_unused_columns: desc: null value: true report_to: desc: null value: - tensorboard - wandb resume_from_checkpoint: desc: null value: ./ run_name: desc: null value: ./ save_on_each_node: desc: null value: false save_steps: desc: null value: 15000 save_strategy: desc: null value: IntervalStrategy.STEPS save_total_limit: desc: null value: 20 seed: desc: null value: 42 sharded_ddp: desc: null value: [] skip_memory_metrics: desc: null value: true tokenizer_name: desc: null value: ./ tpu_metrics_debug: desc: null value: false tpu_num_cores: desc: null value: null train_ref_file: desc: null value: null use_fast_tokenizer: desc: null value: true use_legacy_prediction_loop: desc: null value: false validation_ref_file: desc: null value: null validation_split_percentage: desc: null value: 5 warmup_ratio: desc: null value: 0.0 warmup_steps: desc: null value: 10000 weight_decay: desc: null value: 0.0095