File size: 2,438 Bytes
d3e3c87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
_n_gpu: 1
adafactor: false
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
cache_dir: null
dataloader_drop_last: false
dataloader_num_workers: 0
dataloader_pin_memory: true
ddp_find_unused_parameters: null
debug: []
deepspeed: null
disable_tqdm: false
do_eval: true
do_predict: false
do_train: true
eval_accumulation_steps: 1
eval_dataset_list:
- tquad2-valid
- xquad.tr
eval_steps: 300
evaluation_strategy: &id001 !!python/object/apply:transformers.trainer_utils.IntervalStrategy
- steps
fp16: false
fp16_backend: auto
fp16_full_eval: false
fp16_opt_level: O1
freeze_embeddings: false
gradient_accumulation_steps: 4
greater_is_better: null
group_by_length: false
ignore_data_skip: false
label_names: null
label_smoothing_factor: 0
learning_rate: 0.001
length_column_name: length
load_best_model_at_end: false
local_rank: -1
log_level: -1
log_level_replica: -1
log_on_each_node: true
logging_dir: null
logging_first_step: false
logging_steps: 500
logging_strategy: *id001
lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType
- linear
max_grad_norm: 1.0
max_source_length: 512
max_steps: -1
max_target_length: 64
metric_for_best_model: null
model_name_or_path: google/mt5-small
model_type: mt5
mp_parameters: ''
mt5_qg_format: both
mt5_task_list:
- qa
- qg
- ans_ext
neptune_api_token: null
neptune_project: null
neptune_run: null
no_cuda: false
num_train_epochs: 15
output_dir: runs/mt5-small/3task/adamw-1e3-15ep-both-tquad2train
overwrite_output_dir: false
past_index: -1
per_device_eval_batch_size: 64
per_device_train_batch_size: 64
per_gpu_eval_batch_size: null
per_gpu_train_batch_size: null
prediction_loss_only: false
prepare_data: true
push_to_hub: false
push_to_hub_model_id: adamw-1e3-15ep-both-tquad2train
push_to_hub_organization: null
push_to_hub_token: null
remove_unused_columns: false
report_to:
- wandb
- neptune
resume_from_checkpoint: null
run_name: turque-mt5small-adamw-1e3-15ep-tquad2train
save_on_each_node: false
save_steps: 500
save_strategy: *id001
save_total_limit: 1
seed: 42
sharded_ddp: []
skip_memory_metrics: true
tokenizer_path: tokenizers/mt5-small
tpu_metrics_debug: false
tpu_num_cores: null
train_dataset_list:
- tquad2-train
train_file_path: data/train_data.pt
use_legacy_prediction_loop: false
valid_dataset_list:
- tquad2-valid
valid_file_path: data/valid_data.pt
wandb_id: null
wandb_project: turkish-qa-qg
warmup_ratio: 0.0
warmup_steps: 0
weight_decay: 0.0