File size: 2,161 Bytes
9c3de9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
wandb_version: 1

_wandb:
  desc: null
  value:
    cli_version: 0.12.9
    code_path: code/run_mlm_flax.py
    framework: huggingface
    huggingface_version: 4.16.0.dev0
    is_jupyter_run: false
    is_kaggle_kernel: false
    python_version: 3.8.10
    start_time: 1642608719
    t:
      1:
      - 2
      - 3
      - 11
      - 12
      4: 3.8.10
      5: 0.12.9
      6: 4.16.0.dev0
      8:
      - 5
adafactor:
  desc: null
  value: false
adam_beta1:
  desc: null
  value: 0.9
adam_beta2:
  desc: null
  value: 0.98
adam_epsilon:
  desc: null
  value: 1.0e-06
cache_dir:
  desc: null
  value: null
config_name:
  desc: null
  value: ./
dataset_config_name:
  desc: null
  value: null
dataset_name:
  desc: null
  value: NbAiLab/NCC
do_eval:
  desc: null
  value: true
do_train:
  desc: null
  value: true
dtype:
  desc: null
  value: bfloat16
eval_steps:
  desc: null
  value: 1000
hub_model_id:
  desc: null
  value: null
hub_token:
  desc: null
  value: null
learning_rate:
  desc: null
  value: 0.0006
line_by_line:
  desc: null
  value: false
logging_steps:
  desc: null
  value: 1000
max_seq_length:
  desc: null
  value: 512
mlm_probability:
  desc: null
  value: 0.15
model_name_or_path:
  desc: null
  value: ./
model_type:
  desc: null
  value: roberta
num_train_epochs:
  desc: null
  value: 3.0
output_dir:
  desc: null
  value: ./
overwrite_cache:
  desc: null
  value: false
overwrite_output_dir:
  desc: null
  value: true
pad_to_max_length:
  desc: null
  value: true
per_device_eval_batch_size:
  desc: null
  value: 46
per_device_train_batch_size:
  desc: null
  value: 46
preprocessing_num_workers:
  desc: null
  value: null
push_to_hub:
  desc: null
  value: true
save_steps:
  desc: null
  value: 1000
seed:
  desc: null
  value: 42
tokenizer_name:
  desc: null
  value: ./
train_file:
  desc: null
  value: null
train_ref_file:
  desc: null
  value: null
use_fast_tokenizer:
  desc: null
  value: true
validation_file:
  desc: null
  value: null
validation_ref_file:
  desc: null
  value: null
validation_split_percentage:
  desc: null
  value: 5
warmup_steps:
  desc: null
  value: 1000
weight_decay:
  desc: null
  value: 0.01