_n_gpu: 1 adafactor: false adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 cache_dir: null dataloader_drop_last: false dataloader_num_workers: 0 dataloader_pin_memory: true ddp_find_unused_parameters: null debug: [] deepspeed: null disable_tqdm: false do_eval: true do_predict: false do_train: true eval_accumulation_steps: 1 eval_dataset_list: - tquad2-valid - xquad.tr eval_steps: 300 evaluation_strategy: &id001 !!python/object/apply:transformers.trainer_utils.IntervalStrategy - steps fp16: false fp16_backend: auto fp16_full_eval: false fp16_opt_level: O1 freeze_embeddings: false gradient_accumulation_steps: 8 greater_is_better: null group_by_length: false ignore_data_skip: false label_names: null label_smoothing_factor: 0 learning_rate: 0.001 length_column_name: length load_best_model_at_end: false local_rank: -1 log_level: -1 log_level_replica: -1 log_on_each_node: true logging_dir: runs/mt5-base/mt5base-3task-highlight-combined3/runs/Dec02_01-35-48_palamut3.yonetim logging_first_step: false logging_steps: 500 logging_strategy: *id001 lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType - linear max_grad_norm: 1.0 max_source_length: 512 max_steps: -1 max_target_length: 64 metric_for_best_model: null model_name_or_path: google/mt5-base model_type: mt5 mp_parameters: '' mt5_qg_format: highlight mt5_task_list: - qa - qg - ans_ext neptune_api_token: null neptune_project: null neptune_run: null no_cuda: false num_train_epochs: 15 output_dir: runs/mt5-base/mt5base-3task-highlight-combined3 overwrite_output_dir: false past_index: -1 per_device_eval_batch_size: 32 per_device_train_batch_size: 32 per_gpu_eval_batch_size: null per_gpu_train_batch_size: null prediction_loss_only: false prepare_data: true push_to_hub: false push_to_hub_model_id: mt5base-3task-highlight-combined3 push_to_hub_organization: null push_to_hub_token: null remove_unused_columns: false report_to: - wandb resume_from_checkpoint: null run_name: mt5base-3task-highlight-combined3 save_on_each_node: false save_steps: 500 save_strategy: *id001 save_total_limit: 1 seed: 42 sharded_ddp: [] skip_memory_metrics: true tokenizer_path: tokenizers/mt5-base tpu_metrics_debug: false tpu_num_cores: null train_dataset_list: - tquad2-train - tquad2-valid - xquad.tr train_file_path: data/train_data.pt use_legacy_prediction_loop: false valid_dataset_list: - tquad2-valid valid_file_path: data/valid_data.pt wandb_id: null wandb_project: turkish-qa-qg warmup_ratio: 0.0 warmup_steps: 0 weight_decay: 0.0