tiagoblima commited on
Commit
dcb0611
1 Parent(s): 737a380

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. all_results.json +13 -0
  3. eval_results.json +8 -0
  4. train_results.json +8 -0
  5. trainer_state.json +482 -0
README.md CHANGED
@@ -3,6 +3,8 @@ license: apache-2.0
3
  base_model: google/mt5-base
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: mt5_base-qg-aas-oficial
8
  results: []
@@ -13,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # mt5_base-qg-aas-oficial
15
 
16
- This model is a fine-tuned version of [google/mt5-base](https://huggingface.co/google/mt5-base) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - Loss: 1.5883
19
 
 
3
  base_model: google/mt5-base
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - tiagoblima/preprocessed-du-qg-squadv1_pt
8
  model-index:
9
  - name: mt5_base-qg-aas-oficial
10
  results: []
 
15
 
16
  # mt5_base-qg-aas-oficial
17
 
18
+ This model is a fine-tuned version of [google/mt5-base](https://huggingface.co/google/mt5-base) on the tiagoblima/preprocessed-du-qg-squadv1_pt dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 1.5883
21
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_loss": 1.5882539749145508,
4
+ "eval_runtime": 32.2344,
5
+ "eval_samples": 6327,
6
+ "eval_samples_per_second": 196.281,
7
+ "eval_steps_per_second": 3.071,
8
+ "train_loss": 1.9092482425089694,
9
+ "train_runtime": 4641.0606,
10
+ "train_samples": 44334,
11
+ "train_samples_per_second": 47.763,
12
+ "train_steps_per_second": 1.493
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_loss": 1.5882539749145508,
4
+ "eval_runtime": 32.2344,
5
+ "eval_samples": 6327,
6
+ "eval_samples_per_second": 196.281,
7
+ "eval_steps_per_second": 3.071
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "train_loss": 1.9092482425089694,
4
+ "train_runtime": 4641.0606,
5
+ "train_samples": 44334,
6
+ "train_samples_per_second": 47.763,
7
+ "train_steps_per_second": 1.493
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.5882539749145508,
3
+ "best_model_checkpoint": "/temp/mt5_base-qg-aas-oficial/checkpoint-6930",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6930,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07,
13
+ "learning_rate": 9.855699855699856e-05,
14
+ "loss": 10.3838,
15
+ "step": 100
16
+ },
17
+ {
18
+ "epoch": 0.14,
19
+ "learning_rate": 9.711399711399713e-05,
20
+ "loss": 3.4535,
21
+ "step": 200
22
+ },
23
+ {
24
+ "epoch": 0.22,
25
+ "learning_rate": 9.567099567099568e-05,
26
+ "loss": 2.5236,
27
+ "step": 300
28
+ },
29
+ {
30
+ "epoch": 0.29,
31
+ "learning_rate": 9.422799422799424e-05,
32
+ "loss": 2.3734,
33
+ "step": 400
34
+ },
35
+ {
36
+ "epoch": 0.36,
37
+ "learning_rate": 9.278499278499279e-05,
38
+ "loss": 2.2729,
39
+ "step": 500
40
+ },
41
+ {
42
+ "epoch": 0.43,
43
+ "learning_rate": 9.134199134199136e-05,
44
+ "loss": 2.2424,
45
+ "step": 600
46
+ },
47
+ {
48
+ "epoch": 0.51,
49
+ "learning_rate": 8.98989898989899e-05,
50
+ "loss": 2.157,
51
+ "step": 700
52
+ },
53
+ {
54
+ "epoch": 0.58,
55
+ "learning_rate": 8.845598845598845e-05,
56
+ "loss": 2.1577,
57
+ "step": 800
58
+ },
59
+ {
60
+ "epoch": 0.65,
61
+ "learning_rate": 8.701298701298701e-05,
62
+ "loss": 2.0695,
63
+ "step": 900
64
+ },
65
+ {
66
+ "epoch": 0.72,
67
+ "learning_rate": 8.556998556998557e-05,
68
+ "loss": 2.0293,
69
+ "step": 1000
70
+ },
71
+ {
72
+ "epoch": 0.79,
73
+ "learning_rate": 8.412698412698413e-05,
74
+ "loss": 2.0455,
75
+ "step": 1100
76
+ },
77
+ {
78
+ "epoch": 0.87,
79
+ "learning_rate": 8.268398268398268e-05,
80
+ "loss": 2.0116,
81
+ "step": 1200
82
+ },
83
+ {
84
+ "epoch": 0.94,
85
+ "learning_rate": 8.124098124098124e-05,
86
+ "loss": 1.9679,
87
+ "step": 1300
88
+ },
89
+ {
90
+ "epoch": 1.0,
91
+ "eval_loss": 1.6873297691345215,
92
+ "eval_runtime": 32.2612,
93
+ "eval_samples_per_second": 196.118,
94
+ "eval_steps_per_second": 3.069,
95
+ "step": 1386
96
+ },
97
+ {
98
+ "epoch": 1.01,
99
+ "learning_rate": 7.97979797979798e-05,
100
+ "loss": 1.9474,
101
+ "step": 1400
102
+ },
103
+ {
104
+ "epoch": 1.08,
105
+ "learning_rate": 7.835497835497836e-05,
106
+ "loss": 1.8847,
107
+ "step": 1500
108
+ },
109
+ {
110
+ "epoch": 1.15,
111
+ "learning_rate": 7.691197691197691e-05,
112
+ "loss": 1.8713,
113
+ "step": 1600
114
+ },
115
+ {
116
+ "epoch": 1.23,
117
+ "learning_rate": 7.546897546897548e-05,
118
+ "loss": 1.8745,
119
+ "step": 1700
120
+ },
121
+ {
122
+ "epoch": 1.3,
123
+ "learning_rate": 7.402597402597404e-05,
124
+ "loss": 1.8253,
125
+ "step": 1800
126
+ },
127
+ {
128
+ "epoch": 1.37,
129
+ "learning_rate": 7.258297258297259e-05,
130
+ "loss": 1.8577,
131
+ "step": 1900
132
+ },
133
+ {
134
+ "epoch": 1.44,
135
+ "learning_rate": 7.113997113997114e-05,
136
+ "loss": 1.8153,
137
+ "step": 2000
138
+ },
139
+ {
140
+ "epoch": 1.52,
141
+ "learning_rate": 6.96969696969697e-05,
142
+ "loss": 1.8273,
143
+ "step": 2100
144
+ },
145
+ {
146
+ "epoch": 1.59,
147
+ "learning_rate": 6.825396825396825e-05,
148
+ "loss": 1.8135,
149
+ "step": 2200
150
+ },
151
+ {
152
+ "epoch": 1.66,
153
+ "learning_rate": 6.681096681096681e-05,
154
+ "loss": 1.8156,
155
+ "step": 2300
156
+ },
157
+ {
158
+ "epoch": 1.73,
159
+ "learning_rate": 6.536796536796536e-05,
160
+ "loss": 1.8122,
161
+ "step": 2400
162
+ },
163
+ {
164
+ "epoch": 1.8,
165
+ "learning_rate": 6.392496392496393e-05,
166
+ "loss": 1.8016,
167
+ "step": 2500
168
+ },
169
+ {
170
+ "epoch": 1.88,
171
+ "learning_rate": 6.248196248196248e-05,
172
+ "loss": 1.8018,
173
+ "step": 2600
174
+ },
175
+ {
176
+ "epoch": 1.95,
177
+ "learning_rate": 6.103896103896104e-05,
178
+ "loss": 1.7971,
179
+ "step": 2700
180
+ },
181
+ {
182
+ "epoch": 2.0,
183
+ "eval_loss": 1.6266478300094604,
184
+ "eval_runtime": 32.2462,
185
+ "eval_samples_per_second": 196.209,
186
+ "eval_steps_per_second": 3.07,
187
+ "step": 2772
188
+ },
189
+ {
190
+ "epoch": 2.02,
191
+ "learning_rate": 5.959595959595959e-05,
192
+ "loss": 1.7425,
193
+ "step": 2800
194
+ },
195
+ {
196
+ "epoch": 2.09,
197
+ "learning_rate": 5.815295815295816e-05,
198
+ "loss": 1.7245,
199
+ "step": 2900
200
+ },
201
+ {
202
+ "epoch": 2.16,
203
+ "learning_rate": 5.6709956709956715e-05,
204
+ "loss": 1.6993,
205
+ "step": 3000
206
+ },
207
+ {
208
+ "epoch": 2.24,
209
+ "learning_rate": 5.526695526695527e-05,
210
+ "loss": 1.7235,
211
+ "step": 3100
212
+ },
213
+ {
214
+ "epoch": 2.31,
215
+ "learning_rate": 5.382395382395382e-05,
216
+ "loss": 1.7153,
217
+ "step": 3200
218
+ },
219
+ {
220
+ "epoch": 2.38,
221
+ "learning_rate": 5.2380952380952384e-05,
222
+ "loss": 1.6814,
223
+ "step": 3300
224
+ },
225
+ {
226
+ "epoch": 2.45,
227
+ "learning_rate": 5.093795093795094e-05,
228
+ "loss": 1.6935,
229
+ "step": 3400
230
+ },
231
+ {
232
+ "epoch": 2.53,
233
+ "learning_rate": 4.94949494949495e-05,
234
+ "loss": 1.6631,
235
+ "step": 3500
236
+ },
237
+ {
238
+ "epoch": 2.6,
239
+ "learning_rate": 4.8051948051948054e-05,
240
+ "loss": 1.6955,
241
+ "step": 3600
242
+ },
243
+ {
244
+ "epoch": 2.67,
245
+ "learning_rate": 4.6608946608946615e-05,
246
+ "loss": 1.681,
247
+ "step": 3700
248
+ },
249
+ {
250
+ "epoch": 2.74,
251
+ "learning_rate": 4.516594516594517e-05,
252
+ "loss": 1.6911,
253
+ "step": 3800
254
+ },
255
+ {
256
+ "epoch": 2.81,
257
+ "learning_rate": 4.3722943722943724e-05,
258
+ "loss": 1.6962,
259
+ "step": 3900
260
+ },
261
+ {
262
+ "epoch": 2.89,
263
+ "learning_rate": 4.227994227994228e-05,
264
+ "loss": 1.6871,
265
+ "step": 4000
266
+ },
267
+ {
268
+ "epoch": 2.96,
269
+ "learning_rate": 4.083694083694084e-05,
270
+ "loss": 1.6798,
271
+ "step": 4100
272
+ },
273
+ {
274
+ "epoch": 3.0,
275
+ "eval_loss": 1.5993900299072266,
276
+ "eval_runtime": 32.2465,
277
+ "eval_samples_per_second": 196.207,
278
+ "eval_steps_per_second": 3.07,
279
+ "step": 4158
280
+ },
281
+ {
282
+ "epoch": 3.03,
283
+ "learning_rate": 3.939393939393939e-05,
284
+ "loss": 1.6521,
285
+ "step": 4200
286
+ },
287
+ {
288
+ "epoch": 3.1,
289
+ "learning_rate": 3.7950937950937954e-05,
290
+ "loss": 1.6179,
291
+ "step": 4300
292
+ },
293
+ {
294
+ "epoch": 3.17,
295
+ "learning_rate": 3.650793650793651e-05,
296
+ "loss": 1.6063,
297
+ "step": 4400
298
+ },
299
+ {
300
+ "epoch": 3.25,
301
+ "learning_rate": 3.506493506493507e-05,
302
+ "loss": 1.625,
303
+ "step": 4500
304
+ },
305
+ {
306
+ "epoch": 3.32,
307
+ "learning_rate": 3.3621933621933624e-05,
308
+ "loss": 1.6208,
309
+ "step": 4600
310
+ },
311
+ {
312
+ "epoch": 3.39,
313
+ "learning_rate": 3.217893217893218e-05,
314
+ "loss": 1.6281,
315
+ "step": 4700
316
+ },
317
+ {
318
+ "epoch": 3.46,
319
+ "learning_rate": 3.073593073593073e-05,
320
+ "loss": 1.6049,
321
+ "step": 4800
322
+ },
323
+ {
324
+ "epoch": 3.54,
325
+ "learning_rate": 2.9292929292929294e-05,
326
+ "loss": 1.5794,
327
+ "step": 4900
328
+ },
329
+ {
330
+ "epoch": 3.61,
331
+ "learning_rate": 2.7849927849927855e-05,
332
+ "loss": 1.6161,
333
+ "step": 5000
334
+ },
335
+ {
336
+ "epoch": 3.68,
337
+ "learning_rate": 2.640692640692641e-05,
338
+ "loss": 1.611,
339
+ "step": 5100
340
+ },
341
+ {
342
+ "epoch": 3.75,
343
+ "learning_rate": 2.4963924963924963e-05,
344
+ "loss": 1.6126,
345
+ "step": 5200
346
+ },
347
+ {
348
+ "epoch": 3.82,
349
+ "learning_rate": 2.352092352092352e-05,
350
+ "loss": 1.6125,
351
+ "step": 5300
352
+ },
353
+ {
354
+ "epoch": 3.9,
355
+ "learning_rate": 2.207792207792208e-05,
356
+ "loss": 1.6168,
357
+ "step": 5400
358
+ },
359
+ {
360
+ "epoch": 3.97,
361
+ "learning_rate": 2.0634920634920636e-05,
362
+ "loss": 1.6261,
363
+ "step": 5500
364
+ },
365
+ {
366
+ "epoch": 4.0,
367
+ "eval_loss": 1.5885429382324219,
368
+ "eval_runtime": 32.2346,
369
+ "eval_samples_per_second": 196.28,
370
+ "eval_steps_per_second": 3.071,
371
+ "step": 5544
372
+ },
373
+ {
374
+ "epoch": 4.04,
375
+ "learning_rate": 1.919191919191919e-05,
376
+ "loss": 1.572,
377
+ "step": 5600
378
+ },
379
+ {
380
+ "epoch": 4.11,
381
+ "learning_rate": 1.7748917748917752e-05,
382
+ "loss": 1.5608,
383
+ "step": 5700
384
+ },
385
+ {
386
+ "epoch": 4.18,
387
+ "learning_rate": 1.630591630591631e-05,
388
+ "loss": 1.5693,
389
+ "step": 5800
390
+ },
391
+ {
392
+ "epoch": 4.26,
393
+ "learning_rate": 1.4862914862914865e-05,
394
+ "loss": 1.5761,
395
+ "step": 5900
396
+ },
397
+ {
398
+ "epoch": 4.33,
399
+ "learning_rate": 1.3419913419913421e-05,
400
+ "loss": 1.5722,
401
+ "step": 6000
402
+ },
403
+ {
404
+ "epoch": 4.4,
405
+ "learning_rate": 1.1976911976911977e-05,
406
+ "loss": 1.5814,
407
+ "step": 6100
408
+ },
409
+ {
410
+ "epoch": 4.47,
411
+ "learning_rate": 1.0533910533910535e-05,
412
+ "loss": 1.5865,
413
+ "step": 6200
414
+ },
415
+ {
416
+ "epoch": 4.55,
417
+ "learning_rate": 9.090909090909091e-06,
418
+ "loss": 1.5635,
419
+ "step": 6300
420
+ },
421
+ {
422
+ "epoch": 4.62,
423
+ "learning_rate": 7.647907647907649e-06,
424
+ "loss": 1.5789,
425
+ "step": 6400
426
+ },
427
+ {
428
+ "epoch": 4.69,
429
+ "learning_rate": 6.204906204906205e-06,
430
+ "loss": 1.5731,
431
+ "step": 6500
432
+ },
433
+ {
434
+ "epoch": 4.76,
435
+ "learning_rate": 4.7619047619047615e-06,
436
+ "loss": 1.5402,
437
+ "step": 6600
438
+ },
439
+ {
440
+ "epoch": 4.83,
441
+ "learning_rate": 3.318903318903319e-06,
442
+ "loss": 1.5828,
443
+ "step": 6700
444
+ },
445
+ {
446
+ "epoch": 4.91,
447
+ "learning_rate": 1.875901875901876e-06,
448
+ "loss": 1.5648,
449
+ "step": 6800
450
+ },
451
+ {
452
+ "epoch": 4.98,
453
+ "learning_rate": 4.329004329004329e-07,
454
+ "loss": 1.5887,
455
+ "step": 6900
456
+ },
457
+ {
458
+ "epoch": 5.0,
459
+ "eval_loss": 1.5882539749145508,
460
+ "eval_runtime": 32.2402,
461
+ "eval_samples_per_second": 196.246,
462
+ "eval_steps_per_second": 3.071,
463
+ "step": 6930
464
+ },
465
+ {
466
+ "epoch": 5.0,
467
+ "step": 6930,
468
+ "total_flos": 2.6579255601659904e+17,
469
+ "train_loss": 1.9092482425089694,
470
+ "train_runtime": 4641.0606,
471
+ "train_samples_per_second": 47.763,
472
+ "train_steps_per_second": 1.493
473
+ }
474
+ ],
475
+ "logging_steps": 100,
476
+ "max_steps": 6930,
477
+ "num_train_epochs": 5,
478
+ "save_steps": 500,
479
+ "total_flos": 2.6579255601659904e+17,
480
+ "trial_name": null,
481
+ "trial_params": null
482
+ }