wandb_version: 1 __cached__setup_devices: desc: null value: cpu _n_gpu: desc: null value: 0 _wandb: desc: null value: cli_version: 0.10.33 framework: huggingface huggingface_version: 4.9.0.dev0 is_jupyter_run: false is_kaggle_kernel: false python_version: 3.8.10 t: 1: - 1 - 3 - 11 4: 3.8.10 5: 0.10.33 6: 4.9.0.dev0 8: - 5 adafactor: desc: null value: false adam_beta1: desc: null value: 0.9 adam_beta2: desc: null value: 0.98 adam_epsilon: desc: null value: 1.0e-08 block_size: desc: null value: 512 cache_dir: desc: null value: null config_name: desc: null value: /home/cahya/Work/flax-community/gpt2-medium-indonesian dataloader_drop_last: desc: null value: false dataloader_num_workers: desc: null value: 64 dataloader_pin_memory: desc: null value: true dataset_config_name: desc: null value: unshuffled_deduplicated_id dataset_name: desc: null value: oscar ddp_find_unused_parameters: desc: null value: null debug: desc: null value: [] deepspeed: desc: null value: null disable_tqdm: desc: null value: false do_eval: desc: null value: true do_predict: desc: null value: false do_train: desc: null value: true dtype: desc: null value: float32 eval_accumulation_steps: desc: null value: null eval_steps: desc: null value: 10 evaluation_strategy: desc: null value: IntervalStrategy.NO fp16: desc: null value: false fp16_backend: desc: null value: auto fp16_full_eval: desc: null value: false fp16_opt_level: desc: null value: O1 gradient_accumulation_steps: desc: null value: 1 greater_is_better: desc: null value: null group_by_length: desc: null value: false ignore_data_skip: desc: null value: false label_names: desc: null value: null label_smoothing_factor: desc: null value: 0.0 learning_rate: desc: null value: 0.0024 length_column_name: desc: null value: length load_best_model_at_end: desc: null value: false local_rank: desc: null value: -1 log_level: desc: null value: -1 log_level_replica: desc: null value: -1 log_on_each_node: desc: null value: true logging_dir: desc: null value: /home/cahya/Work/flax-community/gpt2-medium-indonesian/runs/Jul09_14-14-49_t1v-n-528d9406-w-0 logging_first_step: desc: null value: false logging_steps: desc: null value: 500 logging_strategy: desc: null value: IntervalStrategy.STEPS lr_scheduler_type: desc: null value: SchedulerType.LINEAR max_eval_samples: desc: null value: 1000 max_grad_norm: desc: null value: 1.0 max_steps: desc: null value: -1 max_train_samples: desc: null value: 10000 metric_for_best_model: desc: null value: null model_name_or_path: desc: null value: null model_type: desc: null value: gpt2 mp_parameters: desc: null value: '' no_cuda: desc: null value: false num_train_epochs: desc: null value: 20.0 output_dir: desc: null value: /home/cahya/Work/flax-community/gpt2-medium-indonesian overwrite_cache: desc: null value: false overwrite_output_dir: desc: null value: true past_index: desc: null value: -1 per_device_eval_batch_size: desc: null value: 24 per_device_train_batch_size: desc: null value: 24 per_gpu_eval_batch_size: desc: null value: null per_gpu_train_batch_size: desc: null value: null prediction_loss_only: desc: null value: false preprocessing_num_workers: desc: null value: 64 push_to_hub: desc: null value: true push_to_hub_model_id: desc: null value: gpt2-medium-indonesian push_to_hub_organization: desc: null value: null push_to_hub_token: desc: null value: null remove_unused_columns: desc: null value: true report_to: desc: null value: - tensorboard - wandb resume_from_checkpoint: desc: null value: null run_name: desc: null value: /home/cahya/Work/flax-community/gpt2-medium-indonesian save_on_each_node: desc: null value: false save_steps: desc: null value: 10 save_strategy: desc: null value: IntervalStrategy.STEPS save_total_limit: desc: null value: null seed: desc: null value: 42 sharded_ddp: desc: null value: [] skip_memory_metrics: desc: null value: true test_log: desc: null value: 12345 tokenizer_name: desc: null value: /home/cahya/Work/flax-community/gpt2-medium-indonesian tpu_metrics_debug: desc: null value: false tpu_num_cores: desc: null value: null train_file: desc: null value: null use_fast_tokenizer: desc: null value: true use_legacy_prediction_loop: desc: null value: false validation_file: desc: null value: null validation_split_percentage: desc: null value: 5 warmup_ratio: desc: null value: 0.0 warmup_steps: desc: null value: 1000 weight_decay: desc: null value: 0.01