File size: 13,903 Bytes
1b3e20c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
{
  "best_metric": 0.06875239312648773,
  "best_model_checkpoint": "/data/jcanete/all_results/pos/beto_uncased/epochs_4_bs_32_lr_5e-5/checkpoint-850",
  "epoch": 4.0,
  "global_step": 1792,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.11,
      "eval_accuracy": 0.9416445623342176,
      "eval_f1": 0.9315524877309181,
      "eval_loss": 0.20549777150154114,
      "eval_precision": 0.9304758488761358,
      "eval_recall": 0.9326316209954462,
      "eval_runtime": 2.2237,
      "eval_samples_per_second": 743.792,
      "eval_steps_per_second": 23.384,
      "step": 50
    },
    {
      "epoch": 0.22,
      "eval_accuracy": 0.9580969009166302,
      "eval_f1": 0.9511250424261785,
      "eval_loss": 0.14205431938171387,
      "eval_precision": 0.9507643795154272,
      "eval_recall": 0.9514859790684669,
      "eval_runtime": 2.1719,
      "eval_samples_per_second": 761.545,
      "eval_steps_per_second": 23.942,
      "step": 100
    },
    {
      "epoch": 0.33,
      "eval_accuracy": 0.963922371822852,
      "eval_f1": 0.9588180131287536,
      "eval_loss": 0.12034180760383606,
      "eval_precision": 0.9578433326689256,
      "eval_recall": 0.9597946792362387,
      "eval_runtime": 2.8499,
      "eval_samples_per_second": 580.367,
      "eval_steps_per_second": 18.246,
      "step": 150
    },
    {
      "epoch": 0.45,
      "eval_accuracy": 0.9677668468589464,
      "eval_f1": 0.9631556392700891,
      "eval_loss": 0.10953102260828018,
      "eval_precision": 0.9611918922682156,
      "eval_recall": 0.9651274266996884,
      "eval_runtime": 3.0514,
      "eval_samples_per_second": 542.046,
      "eval_steps_per_second": 17.041,
      "step": 200
    },
    {
      "epoch": 0.56,
      "eval_accuracy": 0.9704865191552228,
      "eval_f1": 0.9662253768243082,
      "eval_loss": 0.1019054427742958,
      "eval_precision": 0.9645330785765465,
      "eval_recall": 0.9679236238715347,
      "eval_runtime": 3.1025,
      "eval_samples_per_second": 533.122,
      "eval_steps_per_second": 16.761,
      "step": 250
    },
    {
      "epoch": 0.67,
      "eval_accuracy": 0.9732061914514992,
      "eval_f1": 0.9694056641404069,
      "eval_loss": 0.09022298455238342,
      "eval_precision": 0.9680156137974986,
      "eval_recall": 0.970799712391148,
      "eval_runtime": 3.1303,
      "eval_samples_per_second": 528.378,
      "eval_steps_per_second": 16.612,
      "step": 300
    },
    {
      "epoch": 0.78,
      "eval_accuracy": 0.973928079777054,
      "eval_f1": 0.9705615354688107,
      "eval_loss": 0.08854629099369049,
      "eval_precision": 0.9695266567015446,
      "eval_recall": 0.9715986258688184,
      "eval_runtime": 3.1649,
      "eval_samples_per_second": 522.602,
      "eval_steps_per_second": 16.43,
      "step": 350
    },
    {
      "epoch": 0.89,
      "eval_accuracy": 0.9757579827418326,
      "eval_f1": 0.9730894722662914,
      "eval_loss": 0.08236456662416458,
      "eval_precision": 0.972264869499332,
      "eval_recall": 0.9739154749540625,
      "eval_runtime": 3.2614,
      "eval_samples_per_second": 507.147,
      "eval_steps_per_second": 15.944,
      "step": 400
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.9756908303394554,
      "eval_f1": 0.9728064139767254,
      "eval_loss": 0.08048809319734573,
      "eval_precision": 0.9714018282118176,
      "eval_recall": 0.9742150675081889,
      "eval_runtime": 3.0696,
      "eval_samples_per_second": 538.832,
      "eval_steps_per_second": 16.94,
      "step": 450
    },
    {
      "epoch": 1.12,
      "learning_rate": 3.607700892857143e-05,
      "loss": 0.1891,
      "step": 500
    },
    {
      "epoch": 1.12,
      "eval_accuracy": 0.976681328274519,
      "eval_f1": 0.9741610570929944,
      "eval_loss": 0.08373625576496124,
      "eval_precision": 0.9728125560180851,
      "eval_recall": 0.9755133019094032,
      "eval_runtime": 3.2499,
      "eval_samples_per_second": 508.931,
      "eval_steps_per_second": 16.0,
      "step": 500
    },
    {
      "epoch": 1.23,
      "eval_accuracy": 0.9778229191149314,
      "eval_f1": 0.975405828768695,
      "eval_loss": 0.08112843334674835,
      "eval_precision": 0.9745210231463944,
      "eval_recall": 0.9762922425501318,
      "eval_runtime": 3.1199,
      "eval_samples_per_second": 530.139,
      "eval_steps_per_second": 16.667,
      "step": 550
    },
    {
      "epoch": 1.34,
      "eval_accuracy": 0.9784944431387033,
      "eval_f1": 0.9759250024932683,
      "eval_loss": 0.07825577259063721,
      "eval_precision": 0.974622524999004,
      "eval_recall": 0.9772309658863945,
      "eval_runtime": 3.0821,
      "eval_samples_per_second": 536.646,
      "eval_steps_per_second": 16.872,
      "step": 600
    },
    {
      "epoch": 1.45,
      "eval_accuracy": 0.9782594097303831,
      "eval_f1": 0.976126314948999,
      "eval_loss": 0.07251805067062378,
      "eval_precision": 0.9755615848062882,
      "eval_recall": 0.976691699288967,
      "eval_runtime": 3.0567,
      "eval_samples_per_second": 541.099,
      "eval_steps_per_second": 17.012,
      "step": 650
    },
    {
      "epoch": 1.56,
      "eval_accuracy": 0.9793506362690125,
      "eval_f1": 0.9770881149585869,
      "eval_loss": 0.07318206131458282,
      "eval_precision": 0.976367117386622,
      "eval_recall": 0.9778101781577055,
      "eval_runtime": 3.123,
      "eval_samples_per_second": 529.611,
      "eval_steps_per_second": 16.65,
      "step": 700
    },
    {
      "epoch": 1.67,
      "eval_accuracy": 0.9792331195648525,
      "eval_f1": 0.9769158636100637,
      "eval_loss": 0.07212899625301361,
      "eval_precision": 0.9758839239446725,
      "eval_recall": 0.9779499880162978,
      "eval_runtime": 3.1071,
      "eval_samples_per_second": 532.328,
      "eval_steps_per_second": 16.736,
      "step": 750
    },
    {
      "epoch": 1.79,
      "eval_accuracy": 0.9795856696773327,
      "eval_f1": 0.9771997366898726,
      "eval_loss": 0.07169780880212784,
      "eval_precision": 0.9759732238913017,
      "eval_recall": 0.9784293361029001,
      "eval_runtime": 3.1916,
      "eval_samples_per_second": 518.234,
      "eval_steps_per_second": 16.293,
      "step": 800
    },
    {
      "epoch": 1.9,
      "eval_accuracy": 0.9804082866064533,
      "eval_f1": 0.9784274595889044,
      "eval_loss": 0.06875239312648773,
      "eval_precision": 0.9776080714627532,
      "eval_recall": 0.9792482224175122,
      "eval_runtime": 3.2127,
      "eval_samples_per_second": 514.836,
      "eval_steps_per_second": 16.186,
      "step": 850
    },
    {
      "epoch": 2.01,
      "eval_accuracy": 0.9791995433636639,
      "eval_f1": 0.977885114366692,
      "eval_loss": 0.07294180244207382,
      "eval_precision": 0.9772219562789214,
      "eval_recall": 0.9785491731245506,
      "eval_runtime": 3.0782,
      "eval_samples_per_second": 537.322,
      "eval_steps_per_second": 16.893,
      "step": 900
    },
    {
      "epoch": 2.12,
      "eval_accuracy": 0.9806097438135849,
      "eval_f1": 0.9782331112086509,
      "eval_loss": 0.0767468586564064,
      "eval_precision": 0.977180325255102,
      "eval_recall": 0.9792881680913957,
      "eval_runtime": 3.1248,
      "eval_samples_per_second": 529.314,
      "eval_steps_per_second": 16.641,
      "step": 950
    },
    {
      "epoch": 2.23,
      "learning_rate": 2.2126116071428573e-05,
      "loss": 0.049,
      "step": 1000
    },
    {
      "epoch": 2.23,
      "eval_accuracy": 0.9794177886713897,
      "eval_f1": 0.9773666247530087,
      "eval_loss": 0.07647667080163956,
      "eval_precision": 0.9766843511907136,
      "eval_recall": 0.9780498522010066,
      "eval_runtime": 3.0978,
      "eval_samples_per_second": 533.936,
      "eval_steps_per_second": 16.786,
      "step": 1000
    },
    {
      "epoch": 2.34,
      "eval_accuracy": 0.97977033878387,
      "eval_f1": 0.9773353018634531,
      "eval_loss": 0.07652360200881958,
      "eval_precision": 0.9762834821428571,
      "eval_recall": 0.9783893904290165,
      "eval_runtime": 3.0494,
      "eval_samples_per_second": 542.399,
      "eval_steps_per_second": 17.052,
      "step": 1050
    },
    {
      "epoch": 2.46,
      "eval_accuracy": 0.9807272605177451,
      "eval_f1": 0.9790142798694754,
      "eval_loss": 0.07351543754339218,
      "eval_precision": 0.9782821131563728,
      "eval_recall": 0.9797475433410562,
      "eval_runtime": 2.1592,
      "eval_samples_per_second": 766.034,
      "eval_steps_per_second": 24.083,
      "step": 1100
    },
    {
      "epoch": 2.57,
      "eval_accuracy": 0.9812812678373569,
      "eval_f1": 0.9792086517548935,
      "eval_loss": 0.07407119125127792,
      "eval_precision": 0.9782522026870789,
      "eval_recall": 0.9801669729168331,
      "eval_runtime": 2.3144,
      "eval_samples_per_second": 714.646,
      "eval_steps_per_second": 22.468,
      "step": 1150
    },
    {
      "epoch": 2.68,
      "eval_accuracy": 0.9805761676123963,
      "eval_f1": 0.9790627120673825,
      "eval_loss": 0.07566250115633011,
      "eval_precision": 0.9783987553853518,
      "eval_recall": 0.9797275705041144,
      "eval_runtime": 2.157,
      "eval_samples_per_second": 766.796,
      "eval_steps_per_second": 24.107,
      "step": 1200
    },
    {
      "epoch": 2.79,
      "eval_accuracy": 0.9812141154349797,
      "eval_f1": 0.9793385543971667,
      "eval_loss": 0.0741068422794342,
      "eval_precision": 0.978372236729324,
      "eval_recall": 0.9803067827754254,
      "eval_runtime": 2.1422,
      "eval_samples_per_second": 772.1,
      "eval_steps_per_second": 24.274,
      "step": 1250
    },
    {
      "epoch": 2.9,
      "eval_accuracy": 0.9809287177248766,
      "eval_f1": 0.9787904786607873,
      "eval_loss": 0.07299875468015671,
      "eval_precision": 0.9777954953159258,
      "eval_recall": 0.9797874890149397,
      "eval_runtime": 2.1492,
      "eval_samples_per_second": 769.58,
      "eval_steps_per_second": 24.195,
      "step": 1300
    },
    {
      "epoch": 3.01,
      "eval_accuracy": 0.9813652083403284,
      "eval_f1": 0.9792876384316073,
      "eval_loss": 0.07347211241722107,
      "eval_precision": 0.9783700809377617,
      "eval_recall": 0.9802069185907166,
      "eval_runtime": 2.1559,
      "eval_samples_per_second": 767.204,
      "eval_steps_per_second": 24.12,
      "step": 1350
    },
    {
      "epoch": 3.12,
      "eval_accuracy": 0.9818856394587516,
      "eval_f1": 0.9799355463098764,
      "eval_loss": 0.07577352970838547,
      "eval_precision": 0.9790466317111585,
      "eval_recall": 0.9808260765359111,
      "eval_runtime": 2.1533,
      "eval_samples_per_second": 768.11,
      "eval_steps_per_second": 24.149,
      "step": 1400
    },
    {
      "epoch": 3.24,
      "eval_accuracy": 0.98168418225162,
      "eval_f1": 0.9799245674602383,
      "eval_loss": 0.07578457146883011,
      "eval_precision": 0.9790844199864418,
      "eval_recall": 0.9807661580250859,
      "eval_runtime": 2.1906,
      "eval_samples_per_second": 755.058,
      "eval_steps_per_second": 23.738,
      "step": 1450
    },
    {
      "epoch": 3.35,
      "learning_rate": 8.175223214285714e-06,
      "loss": 0.0257,
      "step": 1500
    },
    {
      "epoch": 3.35,
      "eval_accuracy": 0.9813316321391398,
      "eval_f1": 0.9794737209743247,
      "eval_loss": 0.07658305764198303,
      "eval_precision": 0.9787217070495563,
      "eval_recall": 0.9802268914276584,
      "eval_runtime": 2.1425,
      "eval_samples_per_second": 771.995,
      "eval_steps_per_second": 24.271,
      "step": 1500
    },
    {
      "epoch": 3.46,
      "eval_accuracy": 0.9814659369438942,
      "eval_f1": 0.9793369052251389,
      "eval_loss": 0.07673583924770355,
      "eval_precision": 0.9784485336629518,
      "eval_recall": 0.9802268914276584,
      "eval_runtime": 2.137,
      "eval_samples_per_second": 773.994,
      "eval_steps_per_second": 24.334,
      "step": 1550
    },
    {
      "epoch": 3.57,
      "eval_accuracy": 0.9814323607427056,
      "eval_f1": 0.9795576307204215,
      "eval_loss": 0.07736379653215408,
      "eval_precision": 0.9786105850692715,
      "eval_recall": 0.980506511144843,
      "eval_runtime": 2.2904,
      "eval_samples_per_second": 722.158,
      "eval_steps_per_second": 22.704,
      "step": 1600
    },
    {
      "epoch": 3.68,
      "eval_accuracy": 0.98168418225162,
      "eval_f1": 0.9797461787125354,
      "eval_loss": 0.07633356750011444,
      "eval_precision": 0.978847687400319,
      "eval_recall": 0.9806463210034353,
      "eval_runtime": 2.1499,
      "eval_samples_per_second": 769.338,
      "eval_steps_per_second": 24.187,
      "step": 1650
    },
    {
      "epoch": 3.79,
      "eval_accuracy": 0.9814995131450828,
      "eval_f1": 0.9798431356896241,
      "eval_loss": 0.07660035789012909,
      "eval_precision": 0.9790810832369481,
      "eval_recall": 0.9806063753295519,
      "eval_runtime": 2.1514,
      "eval_samples_per_second": 768.802,
      "eval_steps_per_second": 24.17,
      "step": 1700
    },
    {
      "epoch": 3.91,
      "eval_accuracy": 0.9819192156599402,
      "eval_f1": 0.9799660780205528,
      "eval_loss": 0.07586053013801575,
      "eval_precision": 0.979047884853076,
      "eval_recall": 0.9808859950467365,
      "eval_runtime": 2.182,
      "eval_samples_per_second": 758.038,
      "eval_steps_per_second": 23.832,
      "step": 1750
    },
    {
      "epoch": 4.0,
      "step": 1792,
      "total_flos": 2508921974905632.0,
      "train_loss": 0.07637897772448403,
      "train_runtime": 8322.2563,
      "train_samples_per_second": 6.876,
      "train_steps_per_second": 0.215
    }
  ],
  "max_steps": 1792,
  "num_train_epochs": 4,
  "total_flos": 2508921974905632.0,
  "trial_name": null,
  "trial_params": null
}