SerdarHelli commited on
Commit
6a33562
·
1 Parent(s): 2a3559e

ThyroidTumorClassification

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 149.89,
3
- "eval_f1": 0.9032258064516129,
4
- "eval_loss": 0.45062127709388733,
5
- "eval_runtime": 0.5727,
6
- "eval_samples_per_second": 122.228,
7
- "eval_steps_per_second": 5.238,
8
- "total_flos": 1.0435256966870508e+18,
9
- "train_loss": 0.3680222670237223,
10
- "train_runtime": 877.204,
11
- "train_samples_per_second": 47.366,
12
- "train_steps_per_second": 0.342
13
  }
 
1
  {
2
+ "epoch": 19.73,
3
+ "eval_f1": 0.9312977099236641,
4
+ "eval_loss": 0.5987359285354614,
5
+ "eval_runtime": 0.4648,
6
+ "eval_samples_per_second": 176.425,
7
+ "eval_steps_per_second": 6.455,
8
+ "total_flos": 1.620732679243776e+17,
9
+ "train_loss": 0.8578881025314331,
10
+ "train_runtime": 102.9255,
11
+ "train_samples_per_second": 63.347,
12
+ "train_steps_per_second": 0.389
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 149.89,
3
- "eval_f1": 0.9032258064516129,
4
- "eval_loss": 0.45062127709388733,
5
- "eval_runtime": 0.5727,
6
- "eval_samples_per_second": 122.228,
7
- "eval_steps_per_second": 5.238
8
  }
 
1
  {
2
+ "epoch": 19.73,
3
+ "eval_f1": 0.9312977099236641,
4
+ "eval_loss": 0.5987359285354614,
5
+ "eval_runtime": 0.4648,
6
+ "eval_samples_per_second": 176.425,
7
+ "eval_steps_per_second": 6.455
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1111386620231e1358294477e6b15819e1f4b775f95505cb21a90993de59864
3
  size 111347349
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ce7c3b86954dad0996ccf49be4284947b73504f1521f15cd9a889bf12083e90
3
  size 111347349
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 149.89,
3
- "total_flos": 1.0435256966870508e+18,
4
- "train_loss": 0.3680222670237223,
5
- "train_runtime": 877.204,
6
- "train_samples_per_second": 47.366,
7
- "train_steps_per_second": 0.342
8
  }
 
1
  {
2
+ "epoch": 19.73,
3
+ "total_flos": 1.620732679243776e+17,
4
+ "train_loss": 0.8578881025314331,
5
+ "train_runtime": 102.9255,
6
+ "train_samples_per_second": 63.347,
7
+ "train_steps_per_second": 0.389
8
  }
trainer_state.json CHANGED
@@ -1,1447 +1,211 @@
1
  {
2
- "best_metric": 0.9032258064516129,
3
- "best_model_checkpoint": "Cvt-finetuned-thyroid/checkpoint-218",
4
- "epoch": 149.88888888888889,
5
- "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.89,
12
- "eval_f1": 0.8474576271186439,
13
- "eval_loss": 0.6517891883850098,
14
- "eval_runtime": 0.5116,
15
- "eval_samples_per_second": 136.828,
16
- "eval_steps_per_second": 5.864,
17
  "step": 2
18
  },
19
  {
20
- "epoch": 1.89,
21
- "eval_f1": 0.8474576271186439,
22
- "eval_loss": 0.6492317318916321,
23
- "eval_runtime": 0.4952,
24
- "eval_samples_per_second": 141.346,
25
- "eval_steps_per_second": 6.058,
26
  "step": 4
27
  },
28
  {
29
- "epoch": 2.89,
30
- "eval_f1": 0.8474576271186439,
31
- "eval_loss": 0.6447046995162964,
32
- "eval_runtime": 0.6963,
33
- "eval_samples_per_second": 100.538,
34
- "eval_steps_per_second": 4.309,
35
  "step": 6
36
  },
37
  {
38
- "epoch": 3.89,
39
- "eval_f1": 0.8333333333333334,
40
- "eval_loss": 0.6384778022766113,
41
- "eval_runtime": 0.5,
42
- "eval_samples_per_second": 140.006,
43
- "eval_steps_per_second": 6.0,
44
  "step": 8
45
  },
46
  {
47
- "epoch": 4.89,
48
- "eval_f1": 0.8524590163934426,
49
- "eval_loss": 0.6307393312454224,
50
- "eval_runtime": 0.5113,
51
- "eval_samples_per_second": 136.904,
52
- "eval_steps_per_second": 5.867,
53
  "step": 10
54
  },
55
  {
56
- "epoch": 5.89,
57
- "eval_f1": 0.8709677419354839,
58
- "eval_loss": 0.621659517288208,
59
- "eval_runtime": 0.5005,
60
- "eval_samples_per_second": 139.863,
61
- "eval_steps_per_second": 5.994,
62
  "step": 12
63
  },
64
  {
65
- "epoch": 6.89,
66
- "eval_f1": 0.8709677419354839,
67
- "eval_loss": 0.6113398671150208,
68
- "eval_runtime": 0.5076,
69
- "eval_samples_per_second": 137.913,
70
- "eval_steps_per_second": 5.911,
71
  "step": 14
72
  },
73
  {
74
- "epoch": 7.89,
75
- "eval_f1": 0.8799999999999999,
76
- "eval_loss": 0.5999341011047363,
77
- "eval_runtime": 0.504,
78
- "eval_samples_per_second": 138.893,
79
- "eval_steps_per_second": 5.953,
80
  "step": 16
81
  },
82
  {
83
- "epoch": 8.89,
84
- "eval_f1": 0.888888888888889,
85
- "eval_loss": 0.587955117225647,
86
- "eval_runtime": 0.5041,
87
- "eval_samples_per_second": 138.872,
88
- "eval_steps_per_second": 5.952,
89
  "step": 18
90
  },
91
  {
92
- "epoch": 9.89,
93
- "eval_f1": 0.888888888888889,
94
- "eval_loss": 0.5761495232582092,
95
- "eval_runtime": 0.5087,
96
- "eval_samples_per_second": 137.618,
97
- "eval_steps_per_second": 5.898,
98
  "step": 20
99
  },
100
  {
101
- "epoch": 10.89,
102
- "eval_f1": 0.888888888888889,
103
- "eval_loss": 0.5640864372253418,
104
- "eval_runtime": 0.5127,
105
- "eval_samples_per_second": 136.532,
106
- "eval_steps_per_second": 5.851,
107
  "step": 22
108
  },
109
  {
110
- "epoch": 11.89,
111
- "eval_f1": 0.888888888888889,
112
- "eval_loss": 0.5520942211151123,
113
- "eval_runtime": 0.5104,
114
- "eval_samples_per_second": 137.159,
115
- "eval_steps_per_second": 5.878,
116
  "step": 24
117
  },
118
  {
119
- "epoch": 12.44,
120
- "learning_rate": 8.333333333333334e-06,
121
- "loss": 0.6685,
122
  "step": 25
123
  },
124
  {
125
- "epoch": 12.89,
126
- "eval_f1": 0.888888888888889,
127
- "eval_loss": 0.5406573414802551,
128
- "eval_runtime": 0.5165,
129
- "eval_samples_per_second": 135.523,
130
- "eval_steps_per_second": 5.808,
131
  "step": 26
132
  },
133
  {
134
- "epoch": 13.89,
135
- "eval_f1": 0.888888888888889,
136
- "eval_loss": 0.5298128128051758,
137
- "eval_runtime": 0.5102,
138
- "eval_samples_per_second": 137.199,
139
- "eval_steps_per_second": 5.88,
140
  "step": 28
141
  },
142
  {
143
- "epoch": 14.89,
144
- "eval_f1": 0.888888888888889,
145
- "eval_loss": 0.5198087096214294,
146
- "eval_runtime": 0.51,
147
- "eval_samples_per_second": 137.243,
148
- "eval_steps_per_second": 5.882,
149
  "step": 30
150
  },
151
  {
152
- "epoch": 15.89,
153
- "eval_f1": 0.888888888888889,
154
- "eval_loss": 0.5109697580337524,
155
- "eval_runtime": 0.5205,
156
- "eval_samples_per_second": 134.496,
157
- "eval_steps_per_second": 5.764,
158
  "step": 32
159
  },
160
  {
161
- "epoch": 16.89,
162
- "eval_f1": 0.888888888888889,
163
- "eval_loss": 0.5045211911201477,
164
- "eval_runtime": 0.514,
165
- "eval_samples_per_second": 136.181,
166
- "eval_steps_per_second": 5.836,
167
  "step": 34
168
  },
169
  {
170
- "epoch": 17.89,
171
- "eval_f1": 0.888888888888889,
172
- "eval_loss": 0.4998997747898102,
173
- "eval_runtime": 0.5294,
174
- "eval_samples_per_second": 132.214,
175
- "eval_steps_per_second": 5.666,
176
  "step": 36
177
  },
178
  {
179
- "epoch": 18.89,
180
- "eval_f1": 0.888888888888889,
181
- "eval_loss": 0.4965072572231293,
182
- "eval_runtime": 0.5087,
183
- "eval_samples_per_second": 137.597,
184
- "eval_steps_per_second": 5.897,
185
  "step": 38
186
  },
187
  {
188
- "epoch": 19.89,
189
- "eval_f1": 0.888888888888889,
190
- "eval_loss": 0.493766188621521,
191
- "eval_runtime": 0.5102,
192
- "eval_samples_per_second": 137.214,
193
- "eval_steps_per_second": 5.881,
194
  "step": 40
195
  },
196
  {
197
- "epoch": 20.89,
198
- "eval_f1": 0.888888888888889,
199
- "eval_loss": 0.49165278673171997,
200
- "eval_runtime": 0.5078,
201
- "eval_samples_per_second": 137.845,
202
- "eval_steps_per_second": 5.908,
203
- "step": 42
204
- },
205
- {
206
- "epoch": 21.89,
207
- "eval_f1": 0.888888888888889,
208
- "eval_loss": 0.4898264706134796,
209
- "eval_runtime": 0.5144,
210
- "eval_samples_per_second": 136.069,
211
- "eval_steps_per_second": 5.832,
212
- "step": 44
213
- },
214
- {
215
- "epoch": 22.89,
216
- "eval_f1": 0.888888888888889,
217
- "eval_loss": 0.4883803129196167,
218
- "eval_runtime": 0.5035,
219
- "eval_samples_per_second": 139.037,
220
- "eval_steps_per_second": 5.959,
221
- "step": 46
222
- },
223
- {
224
- "epoch": 23.89,
225
- "eval_f1": 0.888888888888889,
226
- "eval_loss": 0.4872118830680847,
227
- "eval_runtime": 0.5072,
228
- "eval_samples_per_second": 138.01,
229
- "eval_steps_per_second": 5.915,
230
- "step": 48
231
- },
232
- {
233
- "epoch": 24.89,
234
- "learning_rate": 9.25925925925926e-06,
235
- "loss": 0.5044,
236
- "step": 50
237
- },
238
- {
239
- "epoch": 24.89,
240
- "eval_f1": 0.888888888888889,
241
- "eval_loss": 0.4861142933368683,
242
- "eval_runtime": 0.5085,
243
- "eval_samples_per_second": 137.671,
244
- "eval_steps_per_second": 5.9,
245
- "step": 50
246
- },
247
- {
248
- "epoch": 25.89,
249
- "eval_f1": 0.888888888888889,
250
- "eval_loss": 0.4849891662597656,
251
- "eval_runtime": 0.5374,
252
- "eval_samples_per_second": 130.249,
253
- "eval_steps_per_second": 5.582,
254
- "step": 52
255
- },
256
- {
257
- "epoch": 26.89,
258
- "eval_f1": 0.888888888888889,
259
- "eval_loss": 0.483755499124527,
260
- "eval_runtime": 0.51,
261
- "eval_samples_per_second": 137.255,
262
- "eval_steps_per_second": 5.882,
263
- "step": 54
264
- },
265
- {
266
- "epoch": 27.89,
267
- "eval_f1": 0.888888888888889,
268
- "eval_loss": 0.4827323257923126,
269
- "eval_runtime": 0.5139,
270
- "eval_samples_per_second": 136.21,
271
- "eval_steps_per_second": 5.838,
272
- "step": 56
273
- },
274
- {
275
- "epoch": 28.89,
276
- "eval_f1": 0.888888888888889,
277
- "eval_loss": 0.4820977449417114,
278
- "eval_runtime": 0.5117,
279
- "eval_samples_per_second": 136.794,
280
- "eval_steps_per_second": 5.863,
281
- "step": 58
282
- },
283
- {
284
- "epoch": 29.89,
285
- "eval_f1": 0.888888888888889,
286
- "eval_loss": 0.4817977249622345,
287
- "eval_runtime": 0.5176,
288
- "eval_samples_per_second": 135.231,
289
- "eval_steps_per_second": 5.796,
290
- "step": 60
291
- },
292
- {
293
- "epoch": 30.89,
294
- "eval_f1": 0.888888888888889,
295
- "eval_loss": 0.4813471734523773,
296
- "eval_runtime": 0.5076,
297
- "eval_samples_per_second": 137.901,
298
- "eval_steps_per_second": 5.91,
299
- "step": 62
300
- },
301
- {
302
- "epoch": 31.89,
303
- "eval_f1": 0.888888888888889,
304
- "eval_loss": 0.4812641739845276,
305
- "eval_runtime": 0.5176,
306
- "eval_samples_per_second": 135.242,
307
- "eval_steps_per_second": 5.796,
308
- "step": 64
309
- },
310
- {
311
- "epoch": 32.89,
312
- "eval_f1": 0.888888888888889,
313
- "eval_loss": 0.48070254921913147,
314
- "eval_runtime": 0.5078,
315
- "eval_samples_per_second": 137.854,
316
- "eval_steps_per_second": 5.908,
317
- "step": 66
318
- },
319
- {
320
- "epoch": 33.89,
321
- "eval_f1": 0.888888888888889,
322
- "eval_loss": 0.48028674721717834,
323
- "eval_runtime": 0.5091,
324
- "eval_samples_per_second": 137.488,
325
- "eval_steps_per_second": 5.892,
326
- "step": 68
327
- },
328
- {
329
- "epoch": 34.89,
330
- "eval_f1": 0.888888888888889,
331
- "eval_loss": 0.4801904261112213,
332
- "eval_runtime": 0.5047,
333
- "eval_samples_per_second": 138.694,
334
- "eval_steps_per_second": 5.944,
335
- "step": 70
336
- },
337
- {
338
- "epoch": 35.89,
339
- "eval_f1": 0.888888888888889,
340
- "eval_loss": 0.4799834191799164,
341
- "eval_runtime": 0.5083,
342
- "eval_samples_per_second": 137.704,
343
- "eval_steps_per_second": 5.902,
344
- "step": 72
345
- },
346
- {
347
- "epoch": 36.89,
348
- "eval_f1": 0.888888888888889,
349
- "eval_loss": 0.47955021262168884,
350
- "eval_runtime": 0.5047,
351
- "eval_samples_per_second": 138.688,
352
- "eval_steps_per_second": 5.944,
353
- "step": 74
354
- },
355
- {
356
- "epoch": 37.44,
357
- "learning_rate": 8.333333333333334e-06,
358
- "loss": 0.4434,
359
- "step": 75
360
- },
361
- {
362
- "epoch": 37.89,
363
- "eval_f1": 0.888888888888889,
364
- "eval_loss": 0.47954249382019043,
365
- "eval_runtime": 0.5098,
366
- "eval_samples_per_second": 137.317,
367
- "eval_steps_per_second": 5.885,
368
- "step": 76
369
- },
370
- {
371
- "epoch": 38.89,
372
- "eval_f1": 0.888888888888889,
373
- "eval_loss": 0.47896233201026917,
374
- "eval_runtime": 0.5167,
375
- "eval_samples_per_second": 135.481,
376
- "eval_steps_per_second": 5.806,
377
- "step": 78
378
- },
379
- {
380
- "epoch": 39.89,
381
- "eval_f1": 0.888888888888889,
382
- "eval_loss": 0.4780120849609375,
383
- "eval_runtime": 0.5174,
384
- "eval_samples_per_second": 135.289,
385
- "eval_steps_per_second": 5.798,
386
- "step": 80
387
- },
388
- {
389
- "epoch": 40.89,
390
- "eval_f1": 0.888888888888889,
391
- "eval_loss": 0.4772844612598419,
392
- "eval_runtime": 0.5162,
393
- "eval_samples_per_second": 135.603,
394
- "eval_steps_per_second": 5.812,
395
- "step": 82
396
- },
397
- {
398
- "epoch": 41.89,
399
- "eval_f1": 0.888888888888889,
400
- "eval_loss": 0.47639530897140503,
401
- "eval_runtime": 0.5186,
402
- "eval_samples_per_second": 134.98,
403
- "eval_steps_per_second": 5.785,
404
- "step": 84
405
- },
406
- {
407
- "epoch": 42.89,
408
- "eval_f1": 0.888888888888889,
409
- "eval_loss": 0.47554805874824524,
410
- "eval_runtime": 0.5172,
411
- "eval_samples_per_second": 135.355,
412
- "eval_steps_per_second": 5.801,
413
- "step": 86
414
- },
415
- {
416
- "epoch": 43.89,
417
- "eval_f1": 0.888888888888889,
418
- "eval_loss": 0.4745935797691345,
419
- "eval_runtime": 0.514,
420
- "eval_samples_per_second": 136.19,
421
- "eval_steps_per_second": 5.837,
422
- "step": 88
423
- },
424
- {
425
- "epoch": 44.89,
426
- "eval_f1": 0.888888888888889,
427
- "eval_loss": 0.4736994504928589,
428
- "eval_runtime": 0.5108,
429
- "eval_samples_per_second": 137.03,
430
- "eval_steps_per_second": 5.873,
431
- "step": 90
432
- },
433
- {
434
- "epoch": 45.89,
435
- "eval_f1": 0.888888888888889,
436
- "eval_loss": 0.4725368618965149,
437
- "eval_runtime": 0.5067,
438
- "eval_samples_per_second": 138.137,
439
- "eval_steps_per_second": 5.92,
440
- "step": 92
441
- },
442
- {
443
- "epoch": 46.89,
444
- "eval_f1": 0.888888888888889,
445
- "eval_loss": 0.47122785449028015,
446
- "eval_runtime": 0.5187,
447
- "eval_samples_per_second": 134.95,
448
- "eval_steps_per_second": 5.784,
449
- "step": 94
450
- },
451
- {
452
- "epoch": 47.89,
453
- "eval_f1": 0.888888888888889,
454
- "eval_loss": 0.4697120189666748,
455
- "eval_runtime": 0.5106,
456
- "eval_samples_per_second": 137.089,
457
- "eval_steps_per_second": 5.875,
458
- "step": 96
459
- },
460
- {
461
- "epoch": 48.89,
462
- "eval_f1": 0.888888888888889,
463
- "eval_loss": 0.4686456322669983,
464
- "eval_runtime": 0.5071,
465
- "eval_samples_per_second": 138.045,
466
- "eval_steps_per_second": 5.916,
467
- "step": 98
468
- },
469
- {
470
- "epoch": 49.89,
471
- "learning_rate": 7.4074074074074075e-06,
472
- "loss": 0.4,
473
- "step": 100
474
- },
475
- {
476
- "epoch": 49.89,
477
- "eval_f1": 0.888888888888889,
478
- "eval_loss": 0.46755868196487427,
479
- "eval_runtime": 0.5094,
480
- "eval_samples_per_second": 137.403,
481
- "eval_steps_per_second": 5.889,
482
- "step": 100
483
- },
484
- {
485
- "epoch": 50.89,
486
- "eval_f1": 0.888888888888889,
487
- "eval_loss": 0.4666549265384674,
488
- "eval_runtime": 0.5092,
489
- "eval_samples_per_second": 137.458,
490
- "eval_steps_per_second": 5.891,
491
- "step": 102
492
- },
493
- {
494
- "epoch": 51.89,
495
- "eval_f1": 0.888888888888889,
496
- "eval_loss": 0.46638351678848267,
497
- "eval_runtime": 0.5124,
498
- "eval_samples_per_second": 136.625,
499
- "eval_steps_per_second": 5.855,
500
- "step": 104
501
- },
502
- {
503
- "epoch": 52.89,
504
- "eval_f1": 0.888888888888889,
505
- "eval_loss": 0.4666298031806946,
506
- "eval_runtime": 0.5135,
507
- "eval_samples_per_second": 136.326,
508
- "eval_steps_per_second": 5.843,
509
- "step": 106
510
- },
511
- {
512
- "epoch": 53.89,
513
- "eval_f1": 0.888888888888889,
514
- "eval_loss": 0.46660488843917847,
515
- "eval_runtime": 0.5175,
516
- "eval_samples_per_second": 135.261,
517
- "eval_steps_per_second": 5.797,
518
- "step": 108
519
- },
520
- {
521
- "epoch": 54.89,
522
- "eval_f1": 0.888888888888889,
523
- "eval_loss": 0.4668794870376587,
524
- "eval_runtime": 0.5009,
525
- "eval_samples_per_second": 139.747,
526
- "eval_steps_per_second": 5.989,
527
- "step": 110
528
- },
529
- {
530
- "epoch": 55.89,
531
- "eval_f1": 0.888888888888889,
532
- "eval_loss": 0.46678298711776733,
533
- "eval_runtime": 0.5078,
534
- "eval_samples_per_second": 137.844,
535
- "eval_steps_per_second": 5.908,
536
- "step": 112
537
- },
538
- {
539
- "epoch": 56.89,
540
- "eval_f1": 0.888888888888889,
541
- "eval_loss": 0.46638283133506775,
542
- "eval_runtime": 0.5157,
543
- "eval_samples_per_second": 135.74,
544
- "eval_steps_per_second": 5.817,
545
- "step": 114
546
- },
547
- {
548
- "epoch": 57.89,
549
- "eval_f1": 0.888888888888889,
550
- "eval_loss": 0.46611693501472473,
551
- "eval_runtime": 0.5257,
552
- "eval_samples_per_second": 133.154,
553
- "eval_steps_per_second": 5.707,
554
- "step": 116
555
- },
556
- {
557
- "epoch": 58.89,
558
- "eval_f1": 0.888888888888889,
559
- "eval_loss": 0.4656626582145691,
560
- "eval_runtime": 0.5108,
561
- "eval_samples_per_second": 137.041,
562
- "eval_steps_per_second": 5.873,
563
- "step": 118
564
- },
565
- {
566
- "epoch": 59.89,
567
- "eval_f1": 0.888888888888889,
568
- "eval_loss": 0.4653801918029785,
569
- "eval_runtime": 0.5184,
570
- "eval_samples_per_second": 135.019,
571
- "eval_steps_per_second": 5.787,
572
- "step": 120
573
- },
574
- {
575
- "epoch": 60.89,
576
- "eval_f1": 0.888888888888889,
577
- "eval_loss": 0.46494531631469727,
578
- "eval_runtime": 0.5265,
579
- "eval_samples_per_second": 132.948,
580
- "eval_steps_per_second": 5.698,
581
- "step": 122
582
- },
583
- {
584
- "epoch": 61.89,
585
- "eval_f1": 0.888888888888889,
586
- "eval_loss": 0.4643649458885193,
587
- "eval_runtime": 0.5176,
588
- "eval_samples_per_second": 135.236,
589
- "eval_steps_per_second": 5.796,
590
- "step": 124
591
- },
592
- {
593
- "epoch": 62.44,
594
- "learning_rate": 6.481481481481482e-06,
595
- "loss": 0.3712,
596
- "step": 125
597
- },
598
- {
599
- "epoch": 62.89,
600
- "eval_f1": 0.888888888888889,
601
- "eval_loss": 0.46361738443374634,
602
- "eval_runtime": 0.5177,
603
- "eval_samples_per_second": 135.21,
604
- "eval_steps_per_second": 5.795,
605
- "step": 126
606
- },
607
- {
608
- "epoch": 63.89,
609
- "eval_f1": 0.888888888888889,
610
- "eval_loss": 0.46249672770500183,
611
- "eval_runtime": 0.5106,
612
- "eval_samples_per_second": 137.097,
613
- "eval_steps_per_second": 5.876,
614
- "step": 128
615
- },
616
- {
617
- "epoch": 64.89,
618
- "eval_f1": 0.888888888888889,
619
- "eval_loss": 0.4611242711544037,
620
- "eval_runtime": 0.5134,
621
- "eval_samples_per_second": 136.352,
622
- "eval_steps_per_second": 5.844,
623
- "step": 130
624
- },
625
- {
626
- "epoch": 65.89,
627
- "eval_f1": 0.888888888888889,
628
- "eval_loss": 0.4598376154899597,
629
- "eval_runtime": 0.5045,
630
- "eval_samples_per_second": 138.759,
631
- "eval_steps_per_second": 5.947,
632
- "step": 132
633
- },
634
- {
635
- "epoch": 66.89,
636
- "eval_f1": 0.888888888888889,
637
- "eval_loss": 0.45868563652038574,
638
- "eval_runtime": 0.5061,
639
- "eval_samples_per_second": 138.303,
640
- "eval_steps_per_second": 5.927,
641
- "step": 134
642
- },
643
- {
644
- "epoch": 67.89,
645
- "eval_f1": 0.888888888888889,
646
- "eval_loss": 0.4579010605812073,
647
- "eval_runtime": 0.5154,
648
- "eval_samples_per_second": 135.819,
649
- "eval_steps_per_second": 5.821,
650
- "step": 136
651
- },
652
- {
653
- "epoch": 68.89,
654
- "eval_f1": 0.888888888888889,
655
- "eval_loss": 0.4574340879917145,
656
- "eval_runtime": 0.5107,
657
- "eval_samples_per_second": 137.062,
658
- "eval_steps_per_second": 5.874,
659
- "step": 138
660
- },
661
- {
662
- "epoch": 69.89,
663
- "eval_f1": 0.888888888888889,
664
- "eval_loss": 0.45691609382629395,
665
- "eval_runtime": 0.5186,
666
- "eval_samples_per_second": 134.99,
667
- "eval_steps_per_second": 5.785,
668
- "step": 140
669
- },
670
- {
671
- "epoch": 70.89,
672
- "eval_f1": 0.888888888888889,
673
- "eval_loss": 0.45680859684944153,
674
- "eval_runtime": 0.5147,
675
- "eval_samples_per_second": 136.006,
676
- "eval_steps_per_second": 5.829,
677
- "step": 142
678
- },
679
- {
680
- "epoch": 71.89,
681
- "eval_f1": 0.888888888888889,
682
- "eval_loss": 0.4564048945903778,
683
- "eval_runtime": 0.5157,
684
- "eval_samples_per_second": 135.741,
685
- "eval_steps_per_second": 5.817,
686
- "step": 144
687
- },
688
- {
689
- "epoch": 72.89,
690
- "eval_f1": 0.888888888888889,
691
- "eval_loss": 0.4561113119125366,
692
- "eval_runtime": 0.5039,
693
- "eval_samples_per_second": 138.917,
694
- "eval_steps_per_second": 5.954,
695
- "step": 146
696
- },
697
- {
698
- "epoch": 73.89,
699
- "eval_f1": 0.888888888888889,
700
- "eval_loss": 0.45622721314430237,
701
- "eval_runtime": 0.514,
702
- "eval_samples_per_second": 136.175,
703
- "eval_steps_per_second": 5.836,
704
- "step": 148
705
- },
706
- {
707
- "epoch": 74.89,
708
- "learning_rate": 5.555555555555557e-06,
709
- "loss": 0.3419,
710
- "step": 150
711
- },
712
- {
713
- "epoch": 74.89,
714
- "eval_f1": 0.888888888888889,
715
- "eval_loss": 0.4559585452079773,
716
- "eval_runtime": 0.5107,
717
- "eval_samples_per_second": 137.074,
718
- "eval_steps_per_second": 5.875,
719
- "step": 150
720
- },
721
- {
722
- "epoch": 75.89,
723
- "eval_f1": 0.888888888888889,
724
- "eval_loss": 0.4556769132614136,
725
- "eval_runtime": 0.5134,
726
- "eval_samples_per_second": 136.339,
727
- "eval_steps_per_second": 5.843,
728
- "step": 152
729
- },
730
- {
731
- "epoch": 76.89,
732
- "eval_f1": 0.888888888888889,
733
- "eval_loss": 0.45586857199668884,
734
- "eval_runtime": 0.5119,
735
- "eval_samples_per_second": 136.757,
736
- "eval_steps_per_second": 5.861,
737
- "step": 154
738
- },
739
- {
740
- "epoch": 77.89,
741
- "eval_f1": 0.888888888888889,
742
- "eval_loss": 0.45571208000183105,
743
- "eval_runtime": 0.5116,
744
- "eval_samples_per_second": 136.818,
745
- "eval_steps_per_second": 5.864,
746
- "step": 156
747
- },
748
- {
749
- "epoch": 78.89,
750
- "eval_f1": 0.888888888888889,
751
- "eval_loss": 0.4558698832988739,
752
- "eval_runtime": 0.5082,
753
- "eval_samples_per_second": 137.744,
754
- "eval_steps_per_second": 5.903,
755
- "step": 158
756
- },
757
- {
758
- "epoch": 79.89,
759
- "eval_f1": 0.888888888888889,
760
- "eval_loss": 0.4559585154056549,
761
- "eval_runtime": 0.5127,
762
- "eval_samples_per_second": 136.542,
763
- "eval_steps_per_second": 5.852,
764
- "step": 160
765
- },
766
- {
767
- "epoch": 80.89,
768
- "eval_f1": 0.888888888888889,
769
- "eval_loss": 0.4561022222042084,
770
- "eval_runtime": 0.5136,
771
- "eval_samples_per_second": 136.283,
772
- "eval_steps_per_second": 5.841,
773
- "step": 162
774
- },
775
- {
776
- "epoch": 81.89,
777
- "eval_f1": 0.888888888888889,
778
- "eval_loss": 0.4561418294906616,
779
- "eval_runtime": 0.5185,
780
- "eval_samples_per_second": 135.002,
781
- "eval_steps_per_second": 5.786,
782
- "step": 164
783
- },
784
- {
785
- "epoch": 82.89,
786
- "eval_f1": 0.888888888888889,
787
- "eval_loss": 0.45633062720298767,
788
- "eval_runtime": 0.5583,
789
- "eval_samples_per_second": 125.383,
790
- "eval_steps_per_second": 5.374,
791
- "step": 166
792
- },
793
- {
794
- "epoch": 83.89,
795
- "eval_f1": 0.896,
796
- "eval_loss": 0.45609140396118164,
797
- "eval_runtime": 0.5096,
798
- "eval_samples_per_second": 137.376,
799
- "eval_steps_per_second": 5.888,
800
- "step": 168
801
- },
802
- {
803
- "epoch": 84.89,
804
- "eval_f1": 0.896,
805
- "eval_loss": 0.4557640850543976,
806
- "eval_runtime": 0.5089,
807
- "eval_samples_per_second": 137.544,
808
- "eval_steps_per_second": 5.895,
809
- "step": 170
810
- },
811
- {
812
- "epoch": 85.89,
813
- "eval_f1": 0.896,
814
- "eval_loss": 0.4552680552005768,
815
- "eval_runtime": 0.5124,
816
- "eval_samples_per_second": 136.601,
817
- "eval_steps_per_second": 5.854,
818
- "step": 172
819
- },
820
- {
821
- "epoch": 86.89,
822
- "eval_f1": 0.896,
823
- "eval_loss": 0.45485004782676697,
824
- "eval_runtime": 0.5155,
825
- "eval_samples_per_second": 135.799,
826
- "eval_steps_per_second": 5.82,
827
- "step": 174
828
- },
829
- {
830
- "epoch": 87.44,
831
- "learning_rate": 4.62962962962963e-06,
832
- "loss": 0.3221,
833
- "step": 175
834
- },
835
- {
836
- "epoch": 87.89,
837
- "eval_f1": 0.896,
838
- "eval_loss": 0.45451802015304565,
839
- "eval_runtime": 0.5105,
840
- "eval_samples_per_second": 137.109,
841
- "eval_steps_per_second": 5.876,
842
- "step": 176
843
- },
844
- {
845
- "epoch": 88.89,
846
- "eval_f1": 0.896,
847
- "eval_loss": 0.4541896879673004,
848
- "eval_runtime": 0.5149,
849
- "eval_samples_per_second": 135.937,
850
- "eval_steps_per_second": 5.826,
851
- "step": 178
852
- },
853
- {
854
- "epoch": 89.89,
855
- "eval_f1": 0.896,
856
- "eval_loss": 0.45373955368995667,
857
- "eval_runtime": 0.5183,
858
- "eval_samples_per_second": 135.051,
859
- "eval_steps_per_second": 5.788,
860
- "step": 180
861
- },
862
- {
863
- "epoch": 90.89,
864
- "eval_f1": 0.896,
865
- "eval_loss": 0.4535920023918152,
866
- "eval_runtime": 0.5109,
867
- "eval_samples_per_second": 137.024,
868
- "eval_steps_per_second": 5.872,
869
- "step": 182
870
- },
871
- {
872
- "epoch": 91.89,
873
- "eval_f1": 0.896,
874
- "eval_loss": 0.45348674058914185,
875
- "eval_runtime": 0.5161,
876
- "eval_samples_per_second": 135.628,
877
- "eval_steps_per_second": 5.813,
878
- "step": 184
879
- },
880
- {
881
- "epoch": 92.89,
882
- "eval_f1": 0.896,
883
- "eval_loss": 0.45325374603271484,
884
- "eval_runtime": 0.5128,
885
- "eval_samples_per_second": 136.516,
886
- "eval_steps_per_second": 5.851,
887
- "step": 186
888
- },
889
- {
890
- "epoch": 93.89,
891
- "eval_f1": 0.896,
892
- "eval_loss": 0.4530419111251831,
893
- "eval_runtime": 0.5198,
894
- "eval_samples_per_second": 134.677,
895
- "eval_steps_per_second": 5.772,
896
- "step": 188
897
- },
898
- {
899
- "epoch": 94.89,
900
- "eval_f1": 0.896,
901
- "eval_loss": 0.452932745218277,
902
- "eval_runtime": 0.5123,
903
- "eval_samples_per_second": 136.651,
904
- "eval_steps_per_second": 5.856,
905
- "step": 190
906
- },
907
- {
908
- "epoch": 95.89,
909
- "eval_f1": 0.896,
910
- "eval_loss": 0.45291921496391296,
911
- "eval_runtime": 0.5108,
912
- "eval_samples_per_second": 137.03,
913
- "eval_steps_per_second": 5.873,
914
- "step": 192
915
- },
916
- {
917
- "epoch": 96.89,
918
- "eval_f1": 0.896,
919
- "eval_loss": 0.45280784368515015,
920
- "eval_runtime": 0.5125,
921
- "eval_samples_per_second": 136.577,
922
- "eval_steps_per_second": 5.853,
923
- "step": 194
924
- },
925
- {
926
- "epoch": 97.89,
927
- "eval_f1": 0.896,
928
- "eval_loss": 0.4528333246707916,
929
- "eval_runtime": 0.5077,
930
- "eval_samples_per_second": 137.871,
931
- "eval_steps_per_second": 5.909,
932
- "step": 196
933
- },
934
- {
935
- "epoch": 98.89,
936
- "eval_f1": 0.896,
937
- "eval_loss": 0.45277661085128784,
938
- "eval_runtime": 0.508,
939
- "eval_samples_per_second": 137.803,
940
- "eval_steps_per_second": 5.906,
941
- "step": 198
942
- },
943
- {
944
- "epoch": 99.89,
945
- "learning_rate": 3.7037037037037037e-06,
946
- "loss": 0.297,
947
- "step": 200
948
- },
949
- {
950
- "epoch": 99.89,
951
- "eval_f1": 0.896,
952
- "eval_loss": 0.45276370644569397,
953
- "eval_runtime": 0.5103,
954
- "eval_samples_per_second": 137.186,
955
- "eval_steps_per_second": 5.879,
956
- "step": 200
957
- },
958
- {
959
- "epoch": 100.89,
960
- "eval_f1": 0.896,
961
- "eval_loss": 0.4528166651725769,
962
- "eval_runtime": 0.5087,
963
- "eval_samples_per_second": 137.613,
964
- "eval_steps_per_second": 5.898,
965
- "step": 202
966
- },
967
- {
968
- "epoch": 101.89,
969
- "eval_f1": 0.896,
970
- "eval_loss": 0.45277735590934753,
971
- "eval_runtime": 0.5107,
972
- "eval_samples_per_second": 137.056,
973
- "eval_steps_per_second": 5.874,
974
- "step": 204
975
- },
976
- {
977
- "epoch": 102.89,
978
- "eval_f1": 0.896,
979
- "eval_loss": 0.4523409307003021,
980
- "eval_runtime": 0.519,
981
- "eval_samples_per_second": 134.862,
982
- "eval_steps_per_second": 5.78,
983
- "step": 206
984
- },
985
- {
986
- "epoch": 103.89,
987
- "eval_f1": 0.896,
988
- "eval_loss": 0.4522373080253601,
989
- "eval_runtime": 0.5234,
990
- "eval_samples_per_second": 133.744,
991
- "eval_steps_per_second": 5.732,
992
- "step": 208
993
- },
994
- {
995
- "epoch": 104.89,
996
- "eval_f1": 0.896,
997
- "eval_loss": 0.4519382417201996,
998
- "eval_runtime": 0.5099,
999
- "eval_samples_per_second": 137.277,
1000
- "eval_steps_per_second": 5.883,
1001
- "step": 210
1002
- },
1003
- {
1004
- "epoch": 105.89,
1005
- "eval_f1": 0.896,
1006
- "eval_loss": 0.4515880048274994,
1007
- "eval_runtime": 0.5111,
1008
- "eval_samples_per_second": 136.947,
1009
- "eval_steps_per_second": 5.869,
1010
- "step": 212
1011
- },
1012
- {
1013
- "epoch": 106.89,
1014
- "eval_f1": 0.896,
1015
- "eval_loss": 0.4515409767627716,
1016
- "eval_runtime": 0.5212,
1017
- "eval_samples_per_second": 134.295,
1018
- "eval_steps_per_second": 5.755,
1019
- "step": 214
1020
- },
1021
- {
1022
- "epoch": 107.89,
1023
- "eval_f1": 0.896,
1024
- "eval_loss": 0.45120465755462646,
1025
- "eval_runtime": 0.5234,
1026
- "eval_samples_per_second": 133.737,
1027
- "eval_steps_per_second": 5.732,
1028
- "step": 216
1029
- },
1030
- {
1031
- "epoch": 108.89,
1032
- "eval_f1": 0.9032258064516129,
1033
- "eval_loss": 0.45062127709388733,
1034
- "eval_runtime": 0.5116,
1035
- "eval_samples_per_second": 136.817,
1036
- "eval_steps_per_second": 5.864,
1037
- "step": 218
1038
- },
1039
- {
1040
- "epoch": 109.89,
1041
- "eval_f1": 0.9032258064516129,
1042
- "eval_loss": 0.450234055519104,
1043
- "eval_runtime": 0.5074,
1044
- "eval_samples_per_second": 137.954,
1045
- "eval_steps_per_second": 5.912,
1046
- "step": 220
1047
- },
1048
- {
1049
- "epoch": 110.89,
1050
- "eval_f1": 0.9032258064516129,
1051
- "eval_loss": 0.45027095079421997,
1052
- "eval_runtime": 0.5078,
1053
- "eval_samples_per_second": 137.84,
1054
- "eval_steps_per_second": 5.907,
1055
- "step": 222
1056
- },
1057
- {
1058
- "epoch": 111.89,
1059
- "eval_f1": 0.9032258064516129,
1060
- "eval_loss": 0.4502160847187042,
1061
- "eval_runtime": 0.5143,
1062
- "eval_samples_per_second": 136.105,
1063
- "eval_steps_per_second": 5.833,
1064
- "step": 224
1065
- },
1066
- {
1067
- "epoch": 112.44,
1068
- "learning_rate": 2.7777777777777783e-06,
1069
- "loss": 0.2809,
1070
- "step": 225
1071
- },
1072
- {
1073
- "epoch": 112.89,
1074
- "eval_f1": 0.9032258064516129,
1075
- "eval_loss": 0.45027267932891846,
1076
- "eval_runtime": 0.5062,
1077
- "eval_samples_per_second": 138.288,
1078
- "eval_steps_per_second": 5.927,
1079
- "step": 226
1080
- },
1081
- {
1082
- "epoch": 113.89,
1083
- "eval_f1": 0.9032258064516129,
1084
- "eval_loss": 0.45001256465911865,
1085
- "eval_runtime": 0.5176,
1086
- "eval_samples_per_second": 135.243,
1087
- "eval_steps_per_second": 5.796,
1088
- "step": 228
1089
- },
1090
- {
1091
- "epoch": 114.89,
1092
- "eval_f1": 0.9032258064516129,
1093
- "eval_loss": 0.44984087347984314,
1094
- "eval_runtime": 0.5057,
1095
- "eval_samples_per_second": 138.435,
1096
- "eval_steps_per_second": 5.933,
1097
- "step": 230
1098
- },
1099
- {
1100
- "epoch": 115.89,
1101
- "eval_f1": 0.9032258064516129,
1102
- "eval_loss": 0.4496540129184723,
1103
- "eval_runtime": 0.5087,
1104
- "eval_samples_per_second": 137.605,
1105
- "eval_steps_per_second": 5.897,
1106
- "step": 232
1107
- },
1108
- {
1109
- "epoch": 116.89,
1110
- "eval_f1": 0.9032258064516129,
1111
- "eval_loss": 0.4495893120765686,
1112
- "eval_runtime": 0.5162,
1113
- "eval_samples_per_second": 135.612,
1114
- "eval_steps_per_second": 5.812,
1115
- "step": 234
1116
- },
1117
- {
1118
- "epoch": 117.89,
1119
- "eval_f1": 0.9032258064516129,
1120
- "eval_loss": 0.4495932459831238,
1121
- "eval_runtime": 0.5162,
1122
- "eval_samples_per_second": 135.596,
1123
- "eval_steps_per_second": 5.811,
1124
- "step": 236
1125
- },
1126
- {
1127
- "epoch": 118.89,
1128
- "eval_f1": 0.9032258064516129,
1129
- "eval_loss": 0.4495743215084076,
1130
- "eval_runtime": 0.5143,
1131
- "eval_samples_per_second": 136.098,
1132
- "eval_steps_per_second": 5.833,
1133
- "step": 238
1134
- },
1135
- {
1136
- "epoch": 119.89,
1137
- "eval_f1": 0.9032258064516129,
1138
- "eval_loss": 0.4495793282985687,
1139
- "eval_runtime": 0.5201,
1140
- "eval_samples_per_second": 134.594,
1141
- "eval_steps_per_second": 5.768,
1142
- "step": 240
1143
- },
1144
- {
1145
- "epoch": 120.89,
1146
- "eval_f1": 0.9032258064516129,
1147
- "eval_loss": 0.44972047209739685,
1148
- "eval_runtime": 0.517,
1149
- "eval_samples_per_second": 135.393,
1150
- "eval_steps_per_second": 5.803,
1151
- "step": 242
1152
- },
1153
- {
1154
- "epoch": 121.89,
1155
- "eval_f1": 0.9032258064516129,
1156
- "eval_loss": 0.4496006965637207,
1157
- "eval_runtime": 0.5201,
1158
- "eval_samples_per_second": 134.597,
1159
- "eval_steps_per_second": 5.768,
1160
- "step": 244
1161
- },
1162
- {
1163
- "epoch": 122.89,
1164
- "eval_f1": 0.9032258064516129,
1165
- "eval_loss": 0.4495578706264496,
1166
- "eval_runtime": 0.5087,
1167
- "eval_samples_per_second": 137.594,
1168
- "eval_steps_per_second": 5.897,
1169
- "step": 246
1170
- },
1171
- {
1172
- "epoch": 123.89,
1173
- "eval_f1": 0.9032258064516129,
1174
- "eval_loss": 0.44961830973625183,
1175
- "eval_runtime": 0.5091,
1176
- "eval_samples_per_second": 137.492,
1177
- "eval_steps_per_second": 5.893,
1178
- "step": 248
1179
- },
1180
- {
1181
- "epoch": 124.89,
1182
- "learning_rate": 1.8518518518518519e-06,
1183
- "loss": 0.2717,
1184
- "step": 250
1185
- },
1186
- {
1187
- "epoch": 124.89,
1188
- "eval_f1": 0.9032258064516129,
1189
- "eval_loss": 0.4495493769645691,
1190
- "eval_runtime": 0.5126,
1191
- "eval_samples_per_second": 136.564,
1192
- "eval_steps_per_second": 5.853,
1193
- "step": 250
1194
- },
1195
- {
1196
- "epoch": 125.89,
1197
- "eval_f1": 0.9032258064516129,
1198
- "eval_loss": 0.44953247904777527,
1199
- "eval_runtime": 0.5166,
1200
- "eval_samples_per_second": 135.508,
1201
- "eval_steps_per_second": 5.807,
1202
- "step": 252
1203
- },
1204
- {
1205
- "epoch": 126.89,
1206
- "eval_f1": 0.9032258064516129,
1207
- "eval_loss": 0.44945859909057617,
1208
- "eval_runtime": 0.5087,
1209
- "eval_samples_per_second": 137.598,
1210
- "eval_steps_per_second": 5.897,
1211
- "step": 254
1212
- },
1213
- {
1214
- "epoch": 127.89,
1215
- "eval_f1": 0.9032258064516129,
1216
- "eval_loss": 0.4493277072906494,
1217
- "eval_runtime": 0.5129,
1218
- "eval_samples_per_second": 136.49,
1219
- "eval_steps_per_second": 5.85,
1220
- "step": 256
1221
- },
1222
- {
1223
- "epoch": 128.89,
1224
- "eval_f1": 0.9032258064516129,
1225
- "eval_loss": 0.4492017924785614,
1226
- "eval_runtime": 0.5132,
1227
- "eval_samples_per_second": 136.411,
1228
- "eval_steps_per_second": 5.846,
1229
- "step": 258
1230
- },
1231
- {
1232
- "epoch": 129.89,
1233
- "eval_f1": 0.9032258064516129,
1234
- "eval_loss": 0.44910600781440735,
1235
- "eval_runtime": 0.5158,
1236
- "eval_samples_per_second": 135.724,
1237
- "eval_steps_per_second": 5.817,
1238
- "step": 260
1239
- },
1240
- {
1241
- "epoch": 130.89,
1242
- "eval_f1": 0.9032258064516129,
1243
- "eval_loss": 0.44898271560668945,
1244
- "eval_runtime": 0.5171,
1245
- "eval_samples_per_second": 135.359,
1246
- "eval_steps_per_second": 5.801,
1247
- "step": 262
1248
- },
1249
- {
1250
- "epoch": 131.89,
1251
- "eval_f1": 0.9032258064516129,
1252
- "eval_loss": 0.44882574677467346,
1253
- "eval_runtime": 0.5104,
1254
- "eval_samples_per_second": 137.159,
1255
- "eval_steps_per_second": 5.878,
1256
- "step": 264
1257
- },
1258
- {
1259
- "epoch": 132.89,
1260
- "eval_f1": 0.9032258064516129,
1261
- "eval_loss": 0.44872909784317017,
1262
- "eval_runtime": 0.5186,
1263
- "eval_samples_per_second": 134.967,
1264
- "eval_steps_per_second": 5.784,
1265
- "step": 266
1266
- },
1267
- {
1268
- "epoch": 133.89,
1269
- "eval_f1": 0.9032258064516129,
1270
- "eval_loss": 0.4485660791397095,
1271
- "eval_runtime": 0.511,
1272
- "eval_samples_per_second": 136.99,
1273
- "eval_steps_per_second": 5.871,
1274
- "step": 268
1275
- },
1276
- {
1277
- "epoch": 134.89,
1278
- "eval_f1": 0.9032258064516129,
1279
- "eval_loss": 0.44834819436073303,
1280
- "eval_runtime": 0.5105,
1281
- "eval_samples_per_second": 137.112,
1282
- "eval_steps_per_second": 5.876,
1283
- "step": 270
1284
- },
1285
- {
1286
- "epoch": 135.89,
1287
- "eval_f1": 0.9032258064516129,
1288
- "eval_loss": 0.448197603225708,
1289
- "eval_runtime": 0.5111,
1290
- "eval_samples_per_second": 136.954,
1291
- "eval_steps_per_second": 5.869,
1292
- "step": 272
1293
- },
1294
- {
1295
- "epoch": 136.89,
1296
- "eval_f1": 0.9032258064516129,
1297
- "eval_loss": 0.4480949342250824,
1298
- "eval_runtime": 0.5102,
1299
- "eval_samples_per_second": 137.192,
1300
- "eval_steps_per_second": 5.88,
1301
- "step": 274
1302
- },
1303
- {
1304
- "epoch": 137.44,
1305
- "learning_rate": 9.259259259259259e-07,
1306
- "loss": 0.2597,
1307
- "step": 275
1308
- },
1309
- {
1310
- "epoch": 137.89,
1311
- "eval_f1": 0.9032258064516129,
1312
- "eval_loss": 0.447935551404953,
1313
- "eval_runtime": 0.5123,
1314
- "eval_samples_per_second": 136.645,
1315
- "eval_steps_per_second": 5.856,
1316
- "step": 276
1317
- },
1318
- {
1319
- "epoch": 138.89,
1320
- "eval_f1": 0.9032258064516129,
1321
- "eval_loss": 0.44773900508880615,
1322
- "eval_runtime": 0.5066,
1323
- "eval_samples_per_second": 138.167,
1324
- "eval_steps_per_second": 5.921,
1325
- "step": 278
1326
- },
1327
- {
1328
- "epoch": 139.89,
1329
- "eval_f1": 0.9032258064516129,
1330
- "eval_loss": 0.44755300879478455,
1331
- "eval_runtime": 0.5128,
1332
- "eval_samples_per_second": 136.51,
1333
- "eval_steps_per_second": 5.85,
1334
- "step": 280
1335
- },
1336
- {
1337
- "epoch": 140.89,
1338
- "eval_f1": 0.9032258064516129,
1339
- "eval_loss": 0.44738340377807617,
1340
- "eval_runtime": 0.5088,
1341
- "eval_samples_per_second": 137.582,
1342
- "eval_steps_per_second": 5.896,
1343
- "step": 282
1344
- },
1345
- {
1346
- "epoch": 141.89,
1347
- "eval_f1": 0.9032258064516129,
1348
- "eval_loss": 0.4472770690917969,
1349
- "eval_runtime": 0.5106,
1350
- "eval_samples_per_second": 137.086,
1351
- "eval_steps_per_second": 5.875,
1352
- "step": 284
1353
- },
1354
- {
1355
- "epoch": 142.89,
1356
- "eval_f1": 0.9032258064516129,
1357
- "eval_loss": 0.44719263911247253,
1358
- "eval_runtime": 0.5111,
1359
- "eval_samples_per_second": 136.971,
1360
- "eval_steps_per_second": 5.87,
1361
- "step": 286
1362
- },
1363
- {
1364
- "epoch": 143.89,
1365
- "eval_f1": 0.9032258064516129,
1366
- "eval_loss": 0.44712573289871216,
1367
- "eval_runtime": 0.519,
1368
- "eval_samples_per_second": 134.866,
1369
- "eval_steps_per_second": 5.78,
1370
- "step": 288
1371
- },
1372
- {
1373
- "epoch": 144.89,
1374
- "eval_f1": 0.9032258064516129,
1375
- "eval_loss": 0.44704240560531616,
1376
- "eval_runtime": 0.5125,
1377
- "eval_samples_per_second": 136.589,
1378
- "eval_steps_per_second": 5.854,
1379
- "step": 290
1380
- },
1381
- {
1382
- "epoch": 145.89,
1383
- "eval_f1": 0.9032258064516129,
1384
- "eval_loss": 0.4469843804836273,
1385
- "eval_runtime": 0.5194,
1386
- "eval_samples_per_second": 134.777,
1387
- "eval_steps_per_second": 5.776,
1388
- "step": 292
1389
- },
1390
- {
1391
- "epoch": 146.89,
1392
- "eval_f1": 0.9032258064516129,
1393
- "eval_loss": 0.44694092869758606,
1394
- "eval_runtime": 0.5135,
1395
- "eval_samples_per_second": 136.31,
1396
- "eval_steps_per_second": 5.842,
1397
- "step": 294
1398
- },
1399
- {
1400
- "epoch": 147.89,
1401
- "eval_f1": 0.9032258064516129,
1402
- "eval_loss": 0.4469132423400879,
1403
- "eval_runtime": 0.5162,
1404
- "eval_samples_per_second": 135.603,
1405
- "eval_steps_per_second": 5.812,
1406
- "step": 296
1407
- },
1408
- {
1409
- "epoch": 148.89,
1410
- "eval_f1": 0.9032258064516129,
1411
- "eval_loss": 0.44689762592315674,
1412
- "eval_runtime": 0.514,
1413
- "eval_samples_per_second": 136.176,
1414
- "eval_steps_per_second": 5.836,
1415
- "step": 298
1416
- },
1417
- {
1418
- "epoch": 149.89,
1419
- "learning_rate": 0.0,
1420
- "loss": 0.2556,
1421
- "step": 300
1422
- },
1423
- {
1424
- "epoch": 149.89,
1425
- "eval_f1": 0.9032258064516129,
1426
- "eval_loss": 0.44689178466796875,
1427
- "eval_runtime": 0.5161,
1428
- "eval_samples_per_second": 135.631,
1429
- "eval_steps_per_second": 5.813,
1430
- "step": 300
1431
- },
1432
- {
1433
- "epoch": 149.89,
1434
- "step": 300,
1435
- "total_flos": 1.0435256966870508e+18,
1436
- "train_loss": 0.3680222670237223,
1437
- "train_runtime": 877.204,
1438
- "train_samples_per_second": 47.366,
1439
- "train_steps_per_second": 0.342
1440
  }
1441
  ],
1442
- "max_steps": 300,
1443
- "num_train_epochs": 150,
1444
- "total_flos": 1.0435256966870508e+18,
1445
  "trial_name": null,
1446
  "trial_params": null
1447
  }
 
1
  {
2
+ "best_metric": 0.9312977099236641,
3
+ "best_model_checkpoint": "Cvt-finetuned-thyroid/checkpoint-36",
4
+ "epoch": 19.727272727272727,
5
+ "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.73,
12
+ "eval_f1": 0.08695652173913043,
13
+ "eval_loss": 0.729124128818512,
14
+ "eval_runtime": 0.5602,
15
+ "eval_samples_per_second": 146.387,
16
+ "eval_steps_per_second": 5.356,
17
  "step": 2
18
  },
19
  {
20
+ "epoch": 1.73,
21
+ "eval_f1": 0.24,
22
+ "eval_loss": 0.715528666973114,
23
+ "eval_runtime": 0.4576,
24
+ "eval_samples_per_second": 179.212,
25
+ "eval_steps_per_second": 6.557,
26
  "step": 4
27
  },
28
  {
29
+ "epoch": 2.73,
30
+ "eval_f1": 0.4651162790697675,
31
+ "eval_loss": 0.6962855458259583,
32
+ "eval_runtime": 0.4553,
33
+ "eval_samples_per_second": 180.089,
34
+ "eval_steps_per_second": 6.589,
35
  "step": 6
36
  },
37
  {
38
+ "epoch": 3.73,
39
+ "eval_f1": 0.6732673267326733,
40
+ "eval_loss": 0.6805053353309631,
41
+ "eval_runtime": 0.456,
42
+ "eval_samples_per_second": 179.84,
43
+ "eval_steps_per_second": 6.58,
44
  "step": 8
45
  },
46
  {
47
+ "epoch": 4.73,
48
+ "eval_f1": 0.8035714285714285,
49
+ "eval_loss": 0.6671938896179199,
50
+ "eval_runtime": 0.4582,
51
+ "eval_samples_per_second": 178.951,
52
+ "eval_steps_per_second": 6.547,
53
  "step": 10
54
  },
55
  {
56
+ "epoch": 5.73,
57
+ "eval_f1": 0.8666666666666666,
58
+ "eval_loss": 0.6562464237213135,
59
+ "eval_runtime": 0.4523,
60
+ "eval_samples_per_second": 181.301,
61
+ "eval_steps_per_second": 6.633,
62
  "step": 12
63
  },
64
  {
65
+ "epoch": 6.73,
66
+ "eval_f1": 0.8688524590163934,
67
+ "eval_loss": 0.6468915343284607,
68
+ "eval_runtime": 0.4563,
69
+ "eval_samples_per_second": 179.688,
70
+ "eval_steps_per_second": 6.574,
71
  "step": 14
72
  },
73
  {
74
+ "epoch": 7.73,
75
+ "eval_f1": 0.9047619047619049,
76
+ "eval_loss": 0.6390055418014526,
77
+ "eval_runtime": 0.4554,
78
+ "eval_samples_per_second": 180.045,
79
+ "eval_steps_per_second": 6.587,
80
  "step": 16
81
  },
82
  {
83
+ "epoch": 8.73,
84
+ "eval_f1": 0.9218749999999999,
85
+ "eval_loss": 0.6321854591369629,
86
+ "eval_runtime": 0.4533,
87
+ "eval_samples_per_second": 180.88,
88
+ "eval_steps_per_second": 6.618,
89
  "step": 18
90
  },
91
  {
92
+ "epoch": 9.73,
93
+ "eval_f1": 0.9218749999999999,
94
+ "eval_loss": 0.6260910034179688,
95
+ "eval_runtime": 0.4585,
96
+ "eval_samples_per_second": 178.853,
97
+ "eval_steps_per_second": 6.543,
98
  "step": 20
99
  },
100
  {
101
+ "epoch": 10.73,
102
+ "eval_f1": 0.9147286821705426,
103
+ "eval_loss": 0.6207503080368042,
104
+ "eval_runtime": 0.4613,
105
+ "eval_samples_per_second": 177.741,
106
+ "eval_steps_per_second": 6.503,
107
  "step": 22
108
  },
109
  {
110
+ "epoch": 11.73,
111
+ "eval_f1": 0.9147286821705426,
112
+ "eval_loss": 0.6160098910331726,
113
+ "eval_runtime": 0.4683,
114
+ "eval_samples_per_second": 175.09,
115
+ "eval_steps_per_second": 6.406,
116
  "step": 24
117
  },
118
  {
119
+ "epoch": 12.36,
120
+ "learning_rate": 4.166666666666667e-06,
121
+ "loss": 0.8937,
122
  "step": 25
123
  },
124
  {
125
+ "epoch": 12.73,
126
+ "eval_f1": 0.9147286821705426,
127
+ "eval_loss": 0.6118525862693787,
128
+ "eval_runtime": 0.4721,
129
+ "eval_samples_per_second": 173.678,
130
+ "eval_steps_per_second": 6.354,
131
  "step": 26
132
  },
133
  {
134
+ "epoch": 13.73,
135
+ "eval_f1": 0.9147286821705426,
136
+ "eval_loss": 0.6081259846687317,
137
+ "eval_runtime": 0.4553,
138
+ "eval_samples_per_second": 180.118,
139
+ "eval_steps_per_second": 6.59,
140
  "step": 28
141
  },
142
  {
143
+ "epoch": 14.73,
144
+ "eval_f1": 0.923076923076923,
145
+ "eval_loss": 0.6047901511192322,
146
+ "eval_runtime": 0.4555,
147
+ "eval_samples_per_second": 180.011,
148
+ "eval_steps_per_second": 6.586,
149
  "step": 30
150
  },
151
  {
152
+ "epoch": 15.73,
153
+ "eval_f1": 0.923076923076923,
154
+ "eval_loss": 0.6022080183029175,
155
+ "eval_runtime": 0.4519,
156
+ "eval_samples_per_second": 181.441,
157
+ "eval_steps_per_second": 6.638,
158
  "step": 32
159
  },
160
  {
161
+ "epoch": 16.73,
162
+ "eval_f1": 0.923076923076923,
163
+ "eval_loss": 0.6001817584037781,
164
+ "eval_runtime": 0.4544,
165
+ "eval_samples_per_second": 180.453,
166
+ "eval_steps_per_second": 6.602,
167
  "step": 34
168
  },
169
  {
170
+ "epoch": 17.73,
171
+ "eval_f1": 0.9312977099236641,
172
+ "eval_loss": 0.5987359285354614,
173
+ "eval_runtime": 0.4559,
174
+ "eval_samples_per_second": 179.858,
175
+ "eval_steps_per_second": 6.58,
176
  "step": 36
177
  },
178
  {
179
+ "epoch": 18.73,
180
+ "eval_f1": 0.9312977099236641,
181
+ "eval_loss": 0.5978376269340515,
182
+ "eval_runtime": 0.4536,
183
+ "eval_samples_per_second": 180.765,
184
+ "eval_steps_per_second": 6.613,
185
  "step": 38
186
  },
187
  {
188
+ "epoch": 19.73,
189
+ "eval_f1": 0.9312977099236641,
190
+ "eval_loss": 0.5974743962287903,
191
+ "eval_runtime": 0.4555,
192
+ "eval_samples_per_second": 180.031,
193
+ "eval_steps_per_second": 6.586,
194
  "step": 40
195
  },
196
  {
197
+ "epoch": 19.73,
198
+ "step": 40,
199
+ "total_flos": 1.620732679243776e+17,
200
+ "train_loss": 0.8578881025314331,
201
+ "train_runtime": 102.9255,
202
+ "train_samples_per_second": 63.347,
203
+ "train_steps_per_second": 0.389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  }
205
  ],
206
+ "max_steps": 40,
207
+ "num_train_epochs": 20,
208
+ "total_flos": 1.620732679243776e+17,
209
  "trial_name": null,
210
  "trial_params": null
211
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40d1617033179273eb9bed1677a63638edc820431ce580d577ec90836a4cfcb0
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98064b8c1615330c92e0bd8f4b34a86ff482df6adfeb93cfdfa4d377fa744a73
3
  size 3311