File size: 4,866 Bytes
872c781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
wandb_version: 1

__cached__setup_devices:
  desc: null
  value: cpu
_n_gpu:
  desc: null
  value: 0
_wandb:
  desc: null
  value:
    cli_version: 0.10.33
    framework: huggingface
    huggingface_version: 4.9.0.dev0
    is_jupyter_run: false
    is_kaggle_kernel: false
    python_version: 3.8.10
    t:
      1:
      - 1
      - 3
      - 11
      4: 3.8.10
      5: 0.10.33
      6: 4.9.0.dev0
      8:
      - 5
adafactor:
  desc: null
  value: false
adam_beta1:
  desc: null
  value: 0.9
adam_beta2:
  desc: null
  value: 0.98
adam_epsilon:
  desc: null
  value: 1.0e-08
block_size:
  desc: null
  value: 512
cache_dir:
  desc: null
  value: null
config_name:
  desc: null
  value: /home/cahya/Work/flax-community/gpt2-medium-indonesian
dataloader_drop_last:
  desc: null
  value: false
dataloader_num_workers:
  desc: null
  value: 64
dataloader_pin_memory:
  desc: null
  value: true
dataset_config_name:
  desc: null
  value: unshuffled_deduplicated_id
dataset_name:
  desc: null
  value: oscar
ddp_find_unused_parameters:
  desc: null
  value: null
debug:
  desc: null
  value: []
deepspeed:
  desc: null
  value: null
disable_tqdm:
  desc: null
  value: false
do_eval:
  desc: null
  value: true
do_predict:
  desc: null
  value: false
do_train:
  desc: null
  value: true
dtype:
  desc: null
  value: float32
eval_accumulation_steps:
  desc: null
  value: null
eval_steps:
  desc: null
  value: 10
evaluation_strategy:
  desc: null
  value: IntervalStrategy.NO
fp16:
  desc: null
  value: false
fp16_backend:
  desc: null
  value: auto
fp16_full_eval:
  desc: null
  value: false
fp16_opt_level:
  desc: null
  value: O1
gradient_accumulation_steps:
  desc: null
  value: 1
greater_is_better:
  desc: null
  value: null
group_by_length:
  desc: null
  value: false
ignore_data_skip:
  desc: null
  value: false
label_names:
  desc: null
  value: null
label_smoothing_factor:
  desc: null
  value: 0.0
learning_rate:
  desc: null
  value: 0.0024
length_column_name:
  desc: null
  value: length
load_best_model_at_end:
  desc: null
  value: false
local_rank:
  desc: null
  value: -1
log_level:
  desc: null
  value: -1
log_level_replica:
  desc: null
  value: -1
log_on_each_node:
  desc: null
  value: true
logging_dir:
  desc: null
  value: /home/cahya/Work/flax-community/gpt2-medium-indonesian/runs/Jul09_14-14-49_t1v-n-528d9406-w-0
logging_first_step:
  desc: null
  value: false
logging_steps:
  desc: null
  value: 500
logging_strategy:
  desc: null
  value: IntervalStrategy.STEPS
lr_scheduler_type:
  desc: null
  value: SchedulerType.LINEAR
max_eval_samples:
  desc: null
  value: 1000
max_grad_norm:
  desc: null
  value: 1.0
max_steps:
  desc: null
  value: -1
max_train_samples:
  desc: null
  value: 10000
metric_for_best_model:
  desc: null
  value: null
model_name_or_path:
  desc: null
  value: null
model_type:
  desc: null
  value: gpt2
mp_parameters:
  desc: null
  value: ''
no_cuda:
  desc: null
  value: false
num_train_epochs:
  desc: null
  value: 20.0
output_dir:
  desc: null
  value: /home/cahya/Work/flax-community/gpt2-medium-indonesian
overwrite_cache:
  desc: null
  value: false
overwrite_output_dir:
  desc: null
  value: true
past_index:
  desc: null
  value: -1
per_device_eval_batch_size:
  desc: null
  value: 24
per_device_train_batch_size:
  desc: null
  value: 24
per_gpu_eval_batch_size:
  desc: null
  value: null
per_gpu_train_batch_size:
  desc: null
  value: null
prediction_loss_only:
  desc: null
  value: false
preprocessing_num_workers:
  desc: null
  value: 64
push_to_hub:
  desc: null
  value: true
push_to_hub_model_id:
  desc: null
  value: gpt2-medium-indonesian
push_to_hub_organization:
  desc: null
  value: null
push_to_hub_token:
  desc: null
  value: null
remove_unused_columns:
  desc: null
  value: true
report_to:
  desc: null
  value:
  - tensorboard
  - wandb
resume_from_checkpoint:
  desc: null
  value: null
run_name:
  desc: null
  value: /home/cahya/Work/flax-community/gpt2-medium-indonesian
save_on_each_node:
  desc: null
  value: false
save_steps:
  desc: null
  value: 10
save_strategy:
  desc: null
  value: IntervalStrategy.STEPS
save_total_limit:
  desc: null
  value: null
seed:
  desc: null
  value: 42
sharded_ddp:
  desc: null
  value: []
skip_memory_metrics:
  desc: null
  value: true
test_log:
  desc: null
  value: 12345
tokenizer_name:
  desc: null
  value: /home/cahya/Work/flax-community/gpt2-medium-indonesian
tpu_metrics_debug:
  desc: null
  value: false
tpu_num_cores:
  desc: null
  value: null
train_file:
  desc: null
  value: null
use_fast_tokenizer:
  desc: null
  value: true
use_legacy_prediction_loop:
  desc: null
  value: false
validation_file:
  desc: null
  value: null
validation_split_percentage:
  desc: null
  value: 5
warmup_ratio:
  desc: null
  value: 0.0
warmup_steps:
  desc: null
  value: 1000
weight_decay:
  desc: null
  value: 0.01