Augusto777 commited on
Commit
925876c
1 Parent(s): 4eba0ae

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7608695652173914
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.2530
36
- - Accuracy: 0.7609
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.8478260869565217
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.7770
36
+ - Accuracy: 0.8478
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 39.61,
3
- "eval_accuracy": 0.8043478260869565,
4
- "eval_loss": 2.584726139752747e+28,
5
- "eval_runtime": 0.9982,
6
- "eval_samples_per_second": 46.082,
7
- "eval_steps_per_second": 6.011,
8
- "train_loss": 6.009952755433709e+28,
9
- "train_runtime": 674.9098,
10
- "train_samples_per_second": 48.54,
11
- "train_steps_per_second": 3.023
12
  }
 
1
  {
2
  "epoch": 39.61,
3
+ "eval_accuracy": 0.8478260869565217,
4
+ "eval_loss": 0.7770432233810425,
5
+ "eval_runtime": 1.0732,
6
+ "eval_samples_per_second": 42.861,
7
+ "eval_steps_per_second": 5.591,
8
+ "train_loss": 0.18828091986349546,
9
+ "train_runtime": 647.6496,
10
+ "train_samples_per_second": 50.583,
11
+ "train_steps_per_second": 3.15
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 39.61,
3
- "eval_accuracy": 0.8043478260869565,
4
- "eval_loss": 2.584726139752747e+28,
5
- "eval_runtime": 0.9982,
6
- "eval_samples_per_second": 46.082,
7
- "eval_steps_per_second": 6.011
8
  }
 
1
  {
2
  "epoch": 39.61,
3
+ "eval_accuracy": 0.8478260869565217,
4
+ "eval_loss": 0.7770432233810425,
5
+ "eval_runtime": 1.0732,
6
+ "eval_samples_per_second": 42.861,
7
+ "eval_steps_per_second": 5.591
8
  }
runs/Jun23_16-08-59_DESKTOP-SKBE9FB/events.out.tfevents.1719181356.DESKTOP-SKBE9FB.16180.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edf082c17080765a531d7a3d13bcc101e1ec85b83d9bcc549e7a0db55add628f
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 39.61,
3
- "train_loss": 6.009952755433709e+28,
4
- "train_runtime": 674.9098,
5
- "train_samples_per_second": 48.54,
6
- "train_steps_per_second": 3.023
7
  }
 
1
  {
2
  "epoch": 39.61,
3
+ "train_loss": 0.18828091986349546,
4
+ "train_runtime": 647.6496,
5
+ "train_samples_per_second": 50.583,
6
+ "train_steps_per_second": 3.15
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.8043478260869565,
3
- "best_model_checkpoint": "vit-base-patch16-224-ve-U13b-80RX1\\checkpoint-360",
4
  "epoch": 39.61165048543689,
5
  "eval_steps": 500,
6
  "global_step": 2040,
@@ -11,37 +11,37 @@
11
  {
12
  "epoch": 0.19,
13
  "learning_rate": 5.392156862745098e-06,
14
- "loss": 6.687978618025639e+28,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.39,
19
  "learning_rate": 1.0784313725490196e-05,
20
- "loss": 5.796248488225585e+28,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.58,
25
  "learning_rate": 1.6176470588235296e-05,
26
- "loss": 6.539356300076766e+28,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.78,
31
  "learning_rate": 2.156862745098039e-05,
32
- "loss": 5.053138787427811e+28,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.97,
37
  "learning_rate": 2.696078431372549e-05,
38
- "loss": 6.242113175336294e+28,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.99,
43
- "eval_accuracy": 0.3695652173913043,
44
- "eval_loss": 2.584726139752747e+28,
45
  "eval_runtime": 0.8432,
46
  "eval_samples_per_second": 54.555,
47
  "eval_steps_per_second": 7.116,
@@ -50,1556 +50,1556 @@
50
  {
51
  "epoch": 1.17,
52
  "learning_rate": 3.235294117647059e-05,
53
- "loss": 5.300842021027069e+28,
54
  "step": 60
55
  },
56
  {
57
  "epoch": 1.36,
58
  "learning_rate": 3.774509803921569e-05,
59
- "loss": 6.093491612966058e+28,
60
  "step": 70
61
  },
62
  {
63
  "epoch": 1.55,
64
  "learning_rate": 4.313725490196078e-05,
65
- "loss": 4.90451835842553e+28,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 1.75,
70
  "learning_rate": 4.8529411764705885e-05,
71
- "loss": 7.133844060714986e+28,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 1.94,
76
  "learning_rate": 5.392156862745098e-05,
77
- "loss": 6.836600935974513e+28,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 2.0,
82
- "eval_accuracy": 0.5434782608695652,
83
- "eval_loss": 2.584726139752747e+28,
84
- "eval_runtime": 0.7792,
85
- "eval_samples_per_second": 59.035,
86
- "eval_steps_per_second": 7.7,
87
  "step": 103
88
  },
89
  {
90
  "epoch": 2.14,
91
  "learning_rate": 5.477296181630547e-05,
92
- "loss": 5.152220836446151e+28,
93
  "step": 110
94
  },
95
  {
96
  "epoch": 2.33,
97
  "learning_rate": 5.448916408668731e-05,
98
- "loss": 6.985221742766112e+28,
99
  "step": 120
100
  },
101
  {
102
  "epoch": 2.52,
103
  "learning_rate": 5.4205366357069146e-05,
104
- "loss": 6.3907354932851674e+28,
105
  "step": 130
106
  },
107
  {
108
  "epoch": 2.72,
109
  "learning_rate": 5.392156862745098e-05,
110
- "loss": 5.350383045536239e+28,
111
  "step": 140
112
  },
113
  {
114
  "epoch": 2.91,
115
  "learning_rate": 5.363777089783282e-05,
116
- "loss": 5.64762654806603e+28,
117
  "step": 150
118
  },
119
  {
120
  "epoch": 2.99,
121
- "eval_accuracy": 0.717391304347826,
122
- "eval_loss": 2.584726139752747e+28,
123
- "eval_runtime": 0.8532,
124
- "eval_samples_per_second": 53.915,
125
- "eval_steps_per_second": 7.032,
126
  "step": 154
127
  },
128
  {
129
  "epoch": 3.11,
130
  "learning_rate": 5.3353973168214655e-05,
131
- "loss": 5.944870050595821e+28,
132
  "step": 160
133
  },
134
  {
135
  "epoch": 3.3,
136
  "learning_rate": 5.3070175438596496e-05,
137
- "loss": 6.985223253923386e+28,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 3.5,
142
  "learning_rate": 5.278637770897833e-05,
143
- "loss": 6.242113175336294e+28,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 3.69,
148
  "learning_rate": 5.2502579979360165e-05,
149
- "loss": 5.499004230117157e+28,
150
  "step": 190
151
  },
152
  {
153
  "epoch": 3.88,
154
  "learning_rate": 5.2218782249742006e-05,
155
- "loss": 5.944870050595821e+28,
156
  "step": 200
157
  },
158
  {
159
  "epoch": 4.0,
160
- "eval_accuracy": 0.7391304347826086,
161
- "eval_loss": 2.584726139752747e+28,
162
- "eval_runtime": 0.8142,
163
- "eval_samples_per_second": 56.498,
164
- "eval_steps_per_second": 7.369,
165
  "step": 206
166
  },
167
  {
168
  "epoch": 4.08,
169
  "learning_rate": 5.193498452012384e-05,
170
- "loss": 5.746707463716415e+28,
171
  "step": 210
172
  },
173
  {
174
  "epoch": 4.27,
175
  "learning_rate": 5.165118679050568e-05,
176
- "loss": 6.3907354932851674e+28,
177
  "step": 220
178
  },
179
  {
180
  "epoch": 4.47,
181
  "learning_rate": 5.1367389060887515e-05,
182
- "loss": 6.539355544498129e+28,
183
  "step": 230
184
  },
185
  {
186
  "epoch": 4.66,
187
  "learning_rate": 5.108359133126935e-05,
188
- "loss": 6.3907354932851674e+28,
189
  "step": 240
190
  },
191
  {
192
  "epoch": 4.85,
193
  "learning_rate": 5.079979360165119e-05,
194
- "loss": 5.796248110436267e+28,
195
  "step": 250
196
  },
197
  {
198
  "epoch": 4.99,
199
- "eval_accuracy": 0.7391304347826086,
200
- "eval_loss": 2.584726139752747e+28,
201
- "eval_runtime": 0.8212,
202
- "eval_samples_per_second": 56.017,
203
- "eval_steps_per_second": 7.307,
204
  "step": 257
205
  },
206
  {
207
  "epoch": 5.05,
208
  "learning_rate": 5.0515995872033025e-05,
209
- "loss": 4.161409035417075e+28,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 5.24,
214
  "learning_rate": 5.023219814241486e-05,
215
- "loss": 6.539357811234041e+28,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 5.44,
220
  "learning_rate": 4.99484004127967e-05,
221
- "loss": 6.836600935974513e+28,
222
  "step": 280
223
  },
224
  {
225
  "epoch": 5.63,
226
  "learning_rate": 4.9664602683178534e-05,
227
- "loss": 6.539356300076766e+28,
228
  "step": 290
229
  },
230
  {
231
  "epoch": 5.83,
232
  "learning_rate": 4.9380804953560375e-05,
233
- "loss": 5.647626925855348e+28,
234
  "step": 300
235
  },
236
  {
237
  "epoch": 6.0,
238
- "eval_accuracy": 0.717391304347826,
239
- "eval_loss": 2.584726139752747e+28,
240
- "eval_runtime": 0.7827,
241
- "eval_samples_per_second": 58.772,
242
- "eval_steps_per_second": 7.666,
243
  "step": 309
244
  },
245
  {
246
  "epoch": 6.02,
247
  "learning_rate": 4.909700722394221e-05,
248
- "loss": 4.5577334535972505e+28,
249
  "step": 310
250
  },
251
  {
252
  "epoch": 6.21,
253
  "learning_rate": 4.8813209494324044e-05,
254
- "loss": 5.647626170276711e+28,
255
  "step": 320
256
  },
257
  {
258
  "epoch": 6.41,
259
  "learning_rate": 4.8529411764705885e-05,
260
- "loss": 5.944870050595821e+28,
261
  "step": 330
262
  },
263
  {
264
  "epoch": 6.6,
265
  "learning_rate": 4.824561403508772e-05,
266
- "loss": 6.687978618025639e+28,
267
  "step": 340
268
  },
269
  {
270
  "epoch": 6.8,
271
  "learning_rate": 4.796181630546956e-05,
272
- "loss": 5.944870050595821e+28,
273
  "step": 350
274
  },
275
  {
276
  "epoch": 6.99,
277
  "learning_rate": 4.7678018575851394e-05,
278
- "loss": 6.093491612966058e+28,
279
  "step": 360
280
  },
281
  {
282
  "epoch": 6.99,
283
- "eval_accuracy": 0.8043478260869565,
284
- "eval_loss": 2.584726139752747e+28,
285
- "eval_runtime": 0.7872,
286
- "eval_samples_per_second": 58.435,
287
- "eval_steps_per_second": 7.622,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 7.18,
292
  "learning_rate": 4.739422084623323e-05,
293
- "loss": 6.48981678672487e+28,
294
  "step": 370
295
  },
296
  {
297
  "epoch": 7.38,
298
  "learning_rate": 4.711042311661507e-05,
299
- "loss": 5.053139920795767e+28,
300
  "step": 380
301
  },
302
  {
303
  "epoch": 7.57,
304
  "learning_rate": 4.6826625386996904e-05,
305
- "loss": 6.093492368544695e+28,
306
  "step": 390
307
  },
308
  {
309
  "epoch": 7.77,
310
  "learning_rate": 4.6542827657378745e-05,
311
- "loss": 5.64762654806603e+28,
312
  "step": 400
313
  },
314
  {
315
  "epoch": 7.96,
316
  "learning_rate": 4.625902992776058e-05,
317
- "loss": 6.985221742766112e+28,
318
  "step": 410
319
  },
320
  {
321
  "epoch": 8.0,
322
- "eval_accuracy": 0.717391304347826,
323
- "eval_loss": 2.584726139752747e+28,
324
- "eval_runtime": 0.8117,
325
- "eval_samples_per_second": 56.672,
326
- "eval_steps_per_second": 7.392,
327
  "step": 412
328
  },
329
  {
330
  "epoch": 8.16,
331
  "learning_rate": 4.597523219814241e-05,
332
- "loss": 6.3907347377065295e+28,
333
  "step": 420
334
  },
335
  {
336
  "epoch": 8.35,
337
  "learning_rate": 4.5691434468524254e-05,
338
- "loss": 8.025574946093677e+28,
339
  "step": 430
340
  },
341
  {
342
  "epoch": 8.54,
343
  "learning_rate": 4.540763673890609e-05,
344
- "loss": 4.0127867174682015e+28,
345
  "step": 440
346
  },
347
  {
348
  "epoch": 8.74,
349
  "learning_rate": 4.512383900928793e-05,
350
- "loss": 5.499005363485113e+28,
351
  "step": 450
352
  },
353
  {
354
  "epoch": 8.93,
355
  "learning_rate": 4.4840041279669764e-05,
356
- "loss": 5.94487042838514e+28,
357
  "step": 460
358
  },
359
  {
360
  "epoch": 8.99,
361
- "eval_accuracy": 0.7608695652173914,
362
- "eval_loss": 2.584726139752747e+28,
363
- "eval_runtime": 0.7912,
364
- "eval_samples_per_second": 58.141,
365
- "eval_steps_per_second": 7.584,
366
  "step": 463
367
  },
368
  {
369
  "epoch": 9.13,
370
  "learning_rate": 4.45562435500516e-05,
371
- "loss": 5.300842776605706e+28,
372
  "step": 470
373
  },
374
  {
375
  "epoch": 9.32,
376
  "learning_rate": 4.427244582043344e-05,
377
- "loss": 5.647626925855348e+28,
378
  "step": 480
379
  },
380
  {
381
  "epoch": 9.51,
382
  "learning_rate": 4.398864809081527e-05,
383
- "loss": 6.539357811234041e+28,
384
  "step": 490
385
  },
386
  {
387
  "epoch": 9.71,
388
  "learning_rate": 4.3704850361197114e-05,
389
- "loss": 5.053139920795767e+28,
390
  "step": 500
391
  },
392
  {
393
  "epoch": 9.9,
394
  "learning_rate": 4.342105263157895e-05,
395
- "loss": 8.174196508463913e+28,
396
  "step": 510
397
  },
398
  {
399
  "epoch": 10.0,
400
- "eval_accuracy": 0.7608695652173914,
401
- "eval_loss": 2.584726139752747e+28,
402
- "eval_runtime": 0.8382,
403
- "eval_samples_per_second": 54.88,
404
- "eval_steps_per_second": 7.158,
405
  "step": 515
406
  },
407
  {
408
  "epoch": 10.1,
409
  "learning_rate": 4.313725490196078e-05,
410
- "loss": 5.449463961186624e+28,
411
  "step": 520
412
  },
413
  {
414
  "epoch": 10.29,
415
  "learning_rate": 4.2853457172342624e-05,
416
- "loss": 5.944870050595821e+28,
417
  "step": 530
418
  },
419
  {
420
  "epoch": 10.49,
421
  "learning_rate": 4.2569659442724465e-05,
422
- "loss": 6.687978618025639e+28,
423
  "step": 540
424
  },
425
  {
426
  "epoch": 10.68,
427
  "learning_rate": 4.22858617131063e-05,
428
- "loss": 6.3907347377065295e+28,
429
  "step": 550
430
  },
431
  {
432
  "epoch": 10.87,
433
  "learning_rate": 4.200206398348813e-05,
434
- "loss": 5.796248488225585e+28,
435
  "step": 560
436
  },
437
  {
438
  "epoch": 10.99,
439
- "eval_accuracy": 0.7608695652173914,
440
- "eval_loss": 2.584726139752747e+28,
441
- "eval_runtime": 0.7817,
442
- "eval_samples_per_second": 58.847,
443
- "eval_steps_per_second": 7.676,
444
  "step": 566
445
  },
446
  {
447
  "epoch": 11.07,
448
  "learning_rate": 4.171826625386997e-05,
449
- "loss": 5.5980859013461785e+28,
450
  "step": 570
451
  },
452
  {
453
  "epoch": 11.26,
454
  "learning_rate": 4.143446852425181e-05,
455
- "loss": 6.242113553125612e+28,
456
  "step": 580
457
  },
458
  {
459
  "epoch": 11.46,
460
  "learning_rate": 4.115067079463365e-05,
461
- "loss": 7.4310871854554575e+28,
462
  "step": 590
463
  },
464
  {
465
  "epoch": 11.65,
466
  "learning_rate": 4.0866873065015484e-05,
467
- "loss": 5.350383045536239e+28,
468
  "step": 600
469
  },
470
  {
471
  "epoch": 11.84,
472
  "learning_rate": 4.058307533539732e-05,
473
- "loss": 5.796247354857629e+28,
474
  "step": 610
475
  },
476
  {
477
  "epoch": 12.0,
478
- "eval_accuracy": 0.8043478260869565,
479
- "eval_loss": 2.584726139752747e+28,
480
- "eval_runtime": 0.7862,
481
- "eval_samples_per_second": 58.51,
482
- "eval_steps_per_second": 7.632,
483
  "step": 618
484
  },
485
  {
486
  "epoch": 12.04,
487
  "learning_rate": 4.029927760577915e-05,
488
- "loss": 4.90451835842553e+28,
489
  "step": 620
490
  },
491
  {
492
  "epoch": 12.23,
493
  "learning_rate": 4.001547987616099e-05,
494
- "loss": 5.053139920795767e+28,
495
  "step": 630
496
  },
497
  {
498
  "epoch": 12.43,
499
  "learning_rate": 3.9731682146542834e-05,
500
- "loss": 5.64762654806603e+28,
501
  "step": 640
502
  },
503
  {
504
  "epoch": 12.62,
505
  "learning_rate": 3.944788441692467e-05,
506
- "loss": 7.282465623085222e+28,
507
  "step": 650
508
  },
509
  {
510
  "epoch": 12.82,
511
  "learning_rate": 3.91640866873065e-05,
512
- "loss": 5.796248488225585e+28,
513
  "step": 660
514
  },
515
  {
516
  "epoch": 12.99,
517
- "eval_accuracy": 0.7391304347826086,
518
- "eval_loss": 2.584726139752747e+28,
519
- "eval_runtime": 0.7922,
520
- "eval_samples_per_second": 58.067,
521
- "eval_steps_per_second": 7.574,
522
  "step": 669
523
  },
524
  {
525
  "epoch": 13.01,
526
  "learning_rate": 3.888028895768834e-05,
527
- "loss": 5.647626170276711e+28,
528
  "step": 670
529
  },
530
  {
531
  "epoch": 13.2,
532
  "learning_rate": 3.859649122807018e-05,
533
- "loss": 6.687979373604276e+28,
534
  "step": 680
535
  },
536
  {
537
  "epoch": 13.4,
538
  "learning_rate": 3.831269349845202e-05,
539
- "loss": 6.985221742766112e+28,
540
  "step": 690
541
  },
542
  {
543
  "epoch": 13.59,
544
  "learning_rate": 3.802889576883385e-05,
545
- "loss": 4.458652537946866e+28,
546
  "step": 700
547
  },
548
  {
549
  "epoch": 13.79,
550
  "learning_rate": 3.774509803921569e-05,
551
- "loss": 6.093492368544695e+28,
552
  "step": 710
553
  },
554
  {
555
  "epoch": 13.98,
556
  "learning_rate": 3.746130030959752e-05,
557
- "loss": 6.093491612966058e+28,
558
  "step": 720
559
  },
560
  {
561
  "epoch": 14.0,
562
- "eval_accuracy": 0.8043478260869565,
563
- "eval_loss": 2.584726139752747e+28,
564
- "eval_runtime": 0.7842,
565
- "eval_samples_per_second": 58.659,
566
- "eval_steps_per_second": 7.651,
567
  "step": 721
568
  },
569
  {
570
  "epoch": 14.17,
571
  "learning_rate": 3.7177502579979356e-05,
572
- "loss": 5.449464338975942e+28,
573
  "step": 730
574
  },
575
  {
576
  "epoch": 14.37,
577
  "learning_rate": 3.6893704850361204e-05,
578
- "loss": 6.242113175336294e+28,
579
  "step": 740
580
  },
581
  {
582
  "epoch": 14.56,
583
  "learning_rate": 3.660990712074304e-05,
584
- "loss": 6.242113930914931e+28,
585
  "step": 750
586
  },
587
  {
588
  "epoch": 14.76,
589
  "learning_rate": 3.632610939112487e-05,
590
- "loss": 5.944870050595821e+28,
591
  "step": 760
592
  },
593
  {
594
  "epoch": 14.95,
595
  "learning_rate": 3.6042311661506706e-05,
596
- "loss": 5.796247732646948e+28,
597
  "step": 770
598
  },
599
  {
600
  "epoch": 14.99,
601
- "eval_accuracy": 0.782608695652174,
602
- "eval_loss": 2.584726139752747e+28,
603
- "eval_runtime": 0.8192,
604
- "eval_samples_per_second": 56.153,
605
- "eval_steps_per_second": 7.324,
606
  "step": 772
607
  },
608
  {
609
  "epoch": 15.15,
610
  "learning_rate": 3.575851393188854e-05,
611
- "loss": 7.183384329645518e+28,
612
  "step": 780
613
  },
614
  {
615
  "epoch": 15.34,
616
  "learning_rate": 3.547471620227039e-05,
617
- "loss": 5.944870806174458e+28,
618
  "step": 790
619
  },
620
  {
621
  "epoch": 15.53,
622
  "learning_rate": 3.519091847265222e-05,
623
- "loss": 6.093491612966058e+28,
624
  "step": 800
625
  },
626
  {
627
  "epoch": 15.73,
628
  "learning_rate": 3.490712074303406e-05,
629
- "loss": 5.64762654806603e+28,
630
  "step": 810
631
  },
632
  {
633
  "epoch": 15.92,
634
  "learning_rate": 3.462332301341589e-05,
635
- "loss": 5.0531391652171296e+28,
636
  "step": 820
637
  },
638
  {
639
  "epoch": 16.0,
640
- "eval_accuracy": 0.782608695652174,
641
- "eval_loss": 2.584726139752747e+28,
642
- "eval_runtime": 0.7942,
643
- "eval_samples_per_second": 57.921,
644
- "eval_steps_per_second": 7.555,
645
  "step": 824
646
  },
647
  {
648
  "epoch": 16.12,
649
  "learning_rate": 3.4339525283797725e-05,
650
- "loss": 7.4310871854554575e+28,
651
  "step": 830
652
  },
653
  {
654
  "epoch": 16.31,
655
  "learning_rate": 3.405572755417957e-05,
656
- "loss": 5.64762654806603e+28,
657
  "step": 840
658
  },
659
  {
660
  "epoch": 16.5,
661
  "learning_rate": 3.377192982456141e-05,
662
- "loss": 6.242113930914931e+28,
663
  "step": 850
664
  },
665
  {
666
  "epoch": 16.7,
667
  "learning_rate": 3.348813209494324e-05,
668
- "loss": 5.944870050595821e+28,
669
  "step": 860
670
  },
671
  {
672
  "epoch": 16.89,
673
  "learning_rate": 3.3204334365325076e-05,
674
- "loss": 5.796248110436267e+28,
675
  "step": 870
676
  },
677
  {
678
  "epoch": 16.99,
679
- "eval_accuracy": 0.717391304347826,
680
- "eval_loss": 2.584726139752747e+28,
681
- "eval_runtime": 0.7732,
682
- "eval_samples_per_second": 59.495,
683
- "eval_steps_per_second": 7.76,
684
  "step": 875
685
  },
686
  {
687
  "epoch": 17.09,
688
  "learning_rate": 3.292053663570691e-05,
689
- "loss": 5.300842776605706e+28,
690
  "step": 880
691
  },
692
  {
693
  "epoch": 17.28,
694
  "learning_rate": 3.263673890608876e-05,
695
- "loss": 5.944870050595821e+28,
696
  "step": 890
697
  },
698
  {
699
  "epoch": 17.48,
700
  "learning_rate": 3.235294117647059e-05,
701
- "loss": 7.4310879410340955e+28,
702
  "step": 900
703
  },
704
  {
705
  "epoch": 17.67,
706
  "learning_rate": 3.2069143446852426e-05,
707
- "loss": 5.499004607906475e+28,
708
  "step": 910
709
  },
710
  {
711
  "epoch": 17.86,
712
  "learning_rate": 3.178534571723426e-05,
713
- "loss": 4.755896040476657e+28,
714
  "step": 920
715
  },
716
  {
717
  "epoch": 18.0,
718
- "eval_accuracy": 0.717391304347826,
719
- "eval_loss": 2.584726139752747e+28,
720
- "eval_runtime": 0.8047,
721
- "eval_samples_per_second": 57.165,
722
- "eval_steps_per_second": 7.456,
723
  "step": 927
724
  },
725
  {
726
  "epoch": 18.06,
727
  "learning_rate": 3.1501547987616095e-05,
728
- "loss": 6.935681473835579e+28,
729
  "step": 930
730
  },
731
  {
732
  "epoch": 18.25,
733
  "learning_rate": 3.121775025799794e-05,
734
- "loss": 5.94487042838514e+28,
735
  "step": 940
736
  },
737
  {
738
  "epoch": 18.45,
739
  "learning_rate": 3.093395252837978e-05,
740
- "loss": 5.796247732646948e+28,
741
  "step": 950
742
  },
743
  {
744
  "epoch": 18.64,
745
  "learning_rate": 3.065015479876161e-05,
746
- "loss": 5.944869672806502e+28,
747
  "step": 960
748
  },
749
  {
750
  "epoch": 18.83,
751
  "learning_rate": 3.0366357069143445e-05,
752
- "loss": 6.836600180395876e+28,
753
  "step": 970
754
  },
755
  {
756
  "epoch": 18.99,
757
- "eval_accuracy": 0.782608695652174,
758
- "eval_loss": 2.584726139752747e+28,
759
- "eval_runtime": 0.8292,
760
- "eval_samples_per_second": 55.476,
761
- "eval_steps_per_second": 7.236,
762
  "step": 978
763
  },
764
  {
765
  "epoch": 19.03,
766
  "learning_rate": 3.0082559339525283e-05,
767
- "loss": 5.746708219295052e+28,
768
  "step": 980
769
  },
770
  {
771
  "epoch": 19.22,
772
  "learning_rate": 2.9798761609907124e-05,
773
- "loss": 5.3503834233255575e+28,
774
  "step": 990
775
  },
776
  {
777
  "epoch": 19.42,
778
  "learning_rate": 2.9514963880288958e-05,
779
- "loss": 5.64762654806603e+28,
780
  "step": 1000
781
  },
782
  {
783
  "epoch": 19.61,
784
  "learning_rate": 2.9231166150670796e-05,
785
- "loss": 6.687979373604276e+28,
786
  "step": 1010
787
  },
788
  {
789
  "epoch": 19.81,
790
  "learning_rate": 2.894736842105263e-05,
791
- "loss": 6.093491612966058e+28,
792
  "step": 1020
793
  },
794
  {
795
  "epoch": 20.0,
796
  "learning_rate": 2.8663570691434468e-05,
797
- "loss": 6.043951721824843e+28,
798
  "step": 1030
799
  },
800
  {
801
  "epoch": 20.0,
802
- "eval_accuracy": 0.717391304347826,
803
- "eval_loss": 2.584726139752747e+28,
804
- "eval_runtime": 0.7862,
805
- "eval_samples_per_second": 58.511,
806
- "eval_steps_per_second": 7.632,
807
  "step": 1030
808
  },
809
  {
810
  "epoch": 20.19,
811
  "learning_rate": 2.837977296181631e-05,
812
- "loss": 5.796248488225585e+28,
813
  "step": 1040
814
  },
815
  {
816
  "epoch": 20.39,
817
  "learning_rate": 2.8095975232198143e-05,
818
- "loss": 6.093491990755376e+28,
819
  "step": 1050
820
  },
821
  {
822
  "epoch": 20.58,
823
  "learning_rate": 2.781217750257998e-05,
824
- "loss": 5.3503834233255575e+28,
825
  "step": 1060
826
  },
827
  {
828
  "epoch": 20.78,
829
  "learning_rate": 2.7528379772961815e-05,
830
- "loss": 5.647626925855348e+28,
831
  "step": 1070
832
  },
833
  {
834
  "epoch": 20.97,
835
  "learning_rate": 2.7244582043343656e-05,
836
- "loss": 7.579709503404331e+28,
837
  "step": 1080
838
  },
839
  {
840
  "epoch": 20.99,
841
- "eval_accuracy": 0.7391304347826086,
842
- "eval_loss": 2.584726139752747e+28,
843
- "eval_runtime": 0.8627,
844
- "eval_samples_per_second": 53.321,
845
- "eval_steps_per_second": 6.955,
846
  "step": 1081
847
  },
848
  {
849
  "epoch": 21.17,
850
  "learning_rate": 2.696078431372549e-05,
851
- "loss": 5.944870050595821e+28,
852
  "step": 1090
853
  },
854
  {
855
  "epoch": 21.36,
856
  "learning_rate": 2.6676986584107328e-05,
857
- "loss": 4.904517602846893e+28,
858
  "step": 1100
859
  },
860
  {
861
  "epoch": 21.55,
862
  "learning_rate": 2.6393188854489165e-05,
863
- "loss": 6.242113175336294e+28,
864
  "step": 1110
865
  },
866
  {
867
  "epoch": 21.75,
868
  "learning_rate": 2.6109391124871003e-05,
869
- "loss": 6.242113930914931e+28,
870
  "step": 1120
871
  },
872
  {
873
  "epoch": 21.94,
874
  "learning_rate": 2.582559339525284e-05,
875
- "loss": 5.944870050595821e+28,
876
  "step": 1130
877
  },
878
  {
879
  "epoch": 22.0,
880
- "eval_accuracy": 0.7608695652173914,
881
- "eval_loss": 2.584726139752747e+28,
882
- "eval_runtime": 0.7952,
883
- "eval_samples_per_second": 57.847,
884
- "eval_steps_per_second": 7.545,
885
  "step": 1133
886
  },
887
  {
888
  "epoch": 22.14,
889
  "learning_rate": 2.5541795665634675e-05,
890
- "loss": 6.687978618025639e+28,
891
  "step": 1140
892
  },
893
  {
894
  "epoch": 22.33,
895
  "learning_rate": 2.5257997936016512e-05,
896
- "loss": 5.796248866014904e+28,
897
  "step": 1150
898
  },
899
  {
900
  "epoch": 22.52,
901
  "learning_rate": 2.497420020639835e-05,
902
- "loss": 5.0531391652171296e+28,
903
  "step": 1160
904
  },
905
  {
906
  "epoch": 22.72,
907
  "learning_rate": 2.4690402476780188e-05,
908
- "loss": 5.796248488225585e+28,
909
  "step": 1170
910
  },
911
  {
912
  "epoch": 22.91,
913
  "learning_rate": 2.4406604747162022e-05,
914
- "loss": 6.3907354932851674e+28,
915
  "step": 1180
916
  },
917
  {
918
  "epoch": 22.99,
919
- "eval_accuracy": 0.7608695652173914,
920
- "eval_loss": 2.584726139752747e+28,
921
- "eval_runtime": 0.7682,
922
- "eval_samples_per_second": 59.882,
923
- "eval_steps_per_second": 7.811,
924
  "step": 1184
925
  },
926
  {
927
  "epoch": 23.11,
928
  "learning_rate": 2.412280701754386e-05,
929
- "loss": 5.449464338975942e+28,
930
  "step": 1190
931
  },
932
  {
933
  "epoch": 23.3,
934
  "learning_rate": 2.3839009287925697e-05,
935
- "loss": 5.944869672806502e+28,
936
  "step": 1200
937
  },
938
  {
939
  "epoch": 23.5,
940
  "learning_rate": 2.3555211558307535e-05,
941
- "loss": 5.796247732646948e+28,
942
  "step": 1210
943
  },
944
  {
945
  "epoch": 23.69,
946
  "learning_rate": 2.3271413828689372e-05,
947
- "loss": 5.350383045536239e+28,
948
  "step": 1220
949
  },
950
  {
951
  "epoch": 23.88,
952
  "learning_rate": 2.2987616099071207e-05,
953
- "loss": 7.876953383723441e+28,
954
  "step": 1230
955
  },
956
  {
957
  "epoch": 24.0,
958
- "eval_accuracy": 0.7608695652173914,
959
- "eval_loss": 2.584726139752747e+28,
960
- "eval_runtime": 0.7822,
961
- "eval_samples_per_second": 58.81,
962
- "eval_steps_per_second": 7.671,
963
  "step": 1236
964
  },
965
  {
966
  "epoch": 24.08,
967
  "learning_rate": 2.2703818369453044e-05,
968
- "loss": 6.6384375935164695e+28,
969
  "step": 1240
970
  },
971
  {
972
  "epoch": 24.27,
973
  "learning_rate": 2.2420020639834882e-05,
974
- "loss": 4.755895662687339e+28,
975
  "step": 1250
976
  },
977
  {
978
  "epoch": 24.47,
979
  "learning_rate": 2.213622291021672e-05,
980
- "loss": 5.499004985695794e+28,
981
  "step": 1260
982
  },
983
  {
984
  "epoch": 24.66,
985
  "learning_rate": 2.1852425180598557e-05,
986
- "loss": 5.944869295017184e+28,
987
  "step": 1270
988
  },
989
  {
990
  "epoch": 24.85,
991
  "learning_rate": 2.156862745098039e-05,
992
- "loss": 6.687978618025639e+28,
993
  "step": 1280
994
  },
995
  {
996
  "epoch": 24.99,
997
- "eval_accuracy": 0.8043478260869565,
998
- "eval_loss": 2.584726139752747e+28,
999
- "eval_runtime": 0.7867,
1000
- "eval_samples_per_second": 58.473,
1001
- "eval_steps_per_second": 7.627,
1002
  "step": 1287
1003
  },
1004
  {
1005
  "epoch": 25.05,
1006
  "learning_rate": 2.1284829721362232e-05,
1007
- "loss": 6.8861404493264086e+28,
1008
  "step": 1290
1009
  },
1010
  {
1011
  "epoch": 25.24,
1012
  "learning_rate": 2.1001031991744067e-05,
1013
- "loss": 6.3907354932851674e+28,
1014
  "step": 1300
1015
  },
1016
  {
1017
  "epoch": 25.44,
1018
  "learning_rate": 2.0717234262125904e-05,
1019
- "loss": 5.499004607906475e+28,
1020
  "step": 1310
1021
  },
1022
  {
1023
  "epoch": 25.63,
1024
  "learning_rate": 2.0433436532507742e-05,
1025
- "loss": 6.985221742766112e+28,
1026
  "step": 1320
1027
  },
1028
  {
1029
  "epoch": 25.83,
1030
  "learning_rate": 2.0149638802889576e-05,
1031
- "loss": 5.647626925855348e+28,
1032
  "step": 1330
1033
  },
1034
  {
1035
  "epoch": 26.0,
1036
  "eval_accuracy": 0.7608695652173914,
1037
- "eval_loss": 2.584726139752747e+28,
1038
- "eval_runtime": 0.7822,
1039
- "eval_samples_per_second": 58.81,
1040
- "eval_steps_per_second": 7.671,
1041
  "step": 1339
1042
  },
1043
  {
1044
  "epoch": 26.02,
1045
  "learning_rate": 1.9865841073271417e-05,
1046
- "loss": 5.15222121423547e+28,
1047
  "step": 1340
1048
  },
1049
  {
1050
  "epoch": 26.21,
1051
  "learning_rate": 1.958204334365325e-05,
1052
- "loss": 4.904517602846893e+28,
1053
  "step": 1350
1054
  },
1055
  {
1056
  "epoch": 26.41,
1057
  "learning_rate": 1.929824561403509e-05,
1058
- "loss": 7.133844060714986e+28,
1059
  "step": 1360
1060
  },
1061
  {
1062
  "epoch": 26.6,
1063
  "learning_rate": 1.9014447884416927e-05,
1064
- "loss": 5.944870050595821e+28,
1065
  "step": 1370
1066
  },
1067
  {
1068
  "epoch": 26.8,
1069
  "learning_rate": 1.873065015479876e-05,
1070
- "loss": 5.796248110436267e+28,
1071
  "step": 1380
1072
  },
1073
  {
1074
  "epoch": 26.99,
1075
  "learning_rate": 1.8446852425180602e-05,
1076
- "loss": 6.687978618025639e+28,
1077
  "step": 1390
1078
  },
1079
  {
1080
  "epoch": 26.99,
1081
- "eval_accuracy": 0.7608695652173914,
1082
- "eval_loss": 2.584726139752747e+28,
1083
- "eval_runtime": 0.8347,
1084
- "eval_samples_per_second": 55.11,
1085
- "eval_steps_per_second": 7.188,
1086
  "step": 1390
1087
  },
1088
  {
1089
  "epoch": 27.18,
1090
  "learning_rate": 1.8163054695562436e-05,
1091
- "loss": 6.78706066704398e+28,
1092
  "step": 1400
1093
  },
1094
  {
1095
  "epoch": 27.38,
1096
  "learning_rate": 1.787925696594427e-05,
1097
- "loss": 5.350383045536239e+28,
1098
  "step": 1410
1099
  },
1100
  {
1101
  "epoch": 27.57,
1102
  "learning_rate": 1.759545923632611e-05,
1103
- "loss": 5.944870050595821e+28,
1104
  "step": 1420
1105
  },
1106
  {
1107
  "epoch": 27.77,
1108
  "learning_rate": 1.7311661506707946e-05,
1109
- "loss": 5.647626170276711e+28,
1110
  "step": 1430
1111
  },
1112
  {
1113
  "epoch": 27.96,
1114
  "learning_rate": 1.7027863777089787e-05,
1115
- "loss": 6.093491990755376e+28,
1116
  "step": 1440
1117
  },
1118
  {
1119
  "epoch": 28.0,
1120
  "eval_accuracy": 0.717391304347826,
1121
- "eval_loss": 2.584726139752747e+28,
1122
- "eval_runtime": 0.7802,
1123
- "eval_samples_per_second": 58.961,
1124
- "eval_steps_per_second": 7.691,
1125
  "step": 1442
1126
  },
1127
  {
1128
  "epoch": 28.16,
1129
  "learning_rate": 1.674406604747162e-05,
1130
- "loss": 7.084303036205815e+28,
1131
  "step": 1450
1132
  },
1133
  {
1134
  "epoch": 28.35,
1135
  "learning_rate": 1.6460268317853455e-05,
1136
- "loss": 6.985222498344749e+28,
1137
  "step": 1460
1138
  },
1139
  {
1140
  "epoch": 28.54,
1141
  "learning_rate": 1.6176470588235296e-05,
1142
- "loss": 4.458652160157547e+28,
1143
  "step": 1470
1144
  },
1145
  {
1146
  "epoch": 28.74,
1147
  "learning_rate": 1.589267285861713e-05,
1148
- "loss": 6.093491612966058e+28,
1149
  "step": 1480
1150
  },
1151
  {
1152
  "epoch": 28.93,
1153
  "learning_rate": 1.560887512899897e-05,
1154
- "loss": 5.499004607906475e+28,
1155
  "step": 1490
1156
  },
1157
  {
1158
  "epoch": 28.99,
1159
- "eval_accuracy": 0.717391304347826,
1160
- "eval_loss": 2.584726139752747e+28,
1161
- "eval_runtime": 0.7762,
1162
- "eval_samples_per_second": 59.264,
1163
- "eval_steps_per_second": 7.73,
1164
  "step": 1493
1165
  },
1166
  {
1167
  "epoch": 29.13,
1168
  "learning_rate": 1.5325077399380806e-05,
1169
- "loss": 5.895329403875969e+28,
1170
  "step": 1500
1171
  },
1172
  {
1173
  "epoch": 29.32,
1174
  "learning_rate": 1.5041279669762642e-05,
1175
- "loss": 5.944869672806502e+28,
1176
  "step": 1510
1177
  },
1178
  {
1179
  "epoch": 29.51,
1180
  "learning_rate": 1.4757481940144479e-05,
1181
- "loss": 6.242113930914931e+28,
1182
  "step": 1520
1183
  },
1184
  {
1185
  "epoch": 29.71,
1186
  "learning_rate": 1.4473684210526315e-05,
1187
- "loss": 5.3503838011148765e+28,
1188
  "step": 1530
1189
  },
1190
  {
1191
  "epoch": 29.9,
1192
  "learning_rate": 1.4189886480908154e-05,
1193
- "loss": 6.985223253923386e+28,
1194
  "step": 1540
1195
  },
1196
  {
1197
  "epoch": 30.0,
1198
  "eval_accuracy": 0.782608695652174,
1199
- "eval_loss": 2.584726139752747e+28,
1200
- "eval_runtime": 0.7882,
1201
- "eval_samples_per_second": 58.363,
1202
- "eval_steps_per_second": 7.613,
1203
  "step": 1545
1204
  },
1205
  {
1206
  "epoch": 30.1,
1207
  "learning_rate": 1.390608875128999e-05,
1208
- "loss": 5.449463961186624e+28,
1209
  "step": 1550
1210
  },
1211
  {
1212
  "epoch": 30.29,
1213
  "learning_rate": 1.3622291021671828e-05,
1214
- "loss": 4.755896040476657e+28,
1215
  "step": 1560
1216
  },
1217
  {
1218
  "epoch": 30.49,
1219
  "learning_rate": 1.3338493292053664e-05,
1220
- "loss": 5.796248488225585e+28,
1221
  "step": 1570
1222
  },
1223
  {
1224
  "epoch": 30.68,
1225
  "learning_rate": 1.3054695562435501e-05,
1226
- "loss": 6.836600935974513e+28,
1227
  "step": 1580
1228
  },
1229
  {
1230
  "epoch": 30.87,
1231
  "learning_rate": 1.2770897832817337e-05,
1232
- "loss": 7.133844060714986e+28,
1233
  "step": 1590
1234
  },
1235
  {
1236
  "epoch": 30.99,
1237
  "eval_accuracy": 0.7608695652173914,
1238
- "eval_loss": 2.584726139752747e+28,
1239
- "eval_runtime": 0.8897,
1240
- "eval_samples_per_second": 51.701,
1241
- "eval_steps_per_second": 6.744,
1242
  "step": 1596
1243
  },
1244
  {
1245
  "epoch": 31.07,
1246
  "learning_rate": 1.2487100103199175e-05,
1247
- "loss": 5.499004607906475e+28,
1248
  "step": 1600
1249
  },
1250
  {
1251
  "epoch": 31.26,
1252
  "learning_rate": 1.2203302373581011e-05,
1253
- "loss": 6.3907347377065295e+28,
1254
  "step": 1610
1255
  },
1256
  {
1257
  "epoch": 31.46,
1258
  "learning_rate": 1.1919504643962849e-05,
1259
- "loss": 5.944870050595821e+28,
1260
  "step": 1620
1261
  },
1262
  {
1263
  "epoch": 31.65,
1264
  "learning_rate": 1.1635706914344686e-05,
1265
- "loss": 6.242113930914931e+28,
1266
  "step": 1630
1267
  },
1268
  {
1269
  "epoch": 31.84,
1270
  "learning_rate": 1.1351909184726522e-05,
1271
- "loss": 6.687979373604276e+28,
1272
  "step": 1640
1273
  },
1274
  {
1275
  "epoch": 32.0,
1276
- "eval_accuracy": 0.7608695652173914,
1277
- "eval_loss": 2.584726139752747e+28,
1278
- "eval_runtime": 0.8212,
1279
- "eval_samples_per_second": 56.017,
1280
- "eval_steps_per_second": 7.307,
1281
  "step": 1648
1282
  },
1283
  {
1284
  "epoch": 32.04,
1285
  "learning_rate": 1.106811145510836e-05,
1286
- "loss": 5.449463961186624e+28,
1287
  "step": 1650
1288
  },
1289
  {
1290
  "epoch": 32.23,
1291
  "learning_rate": 1.0784313725490196e-05,
1292
- "loss": 5.499004607906475e+28,
1293
  "step": 1660
1294
  },
1295
  {
1296
  "epoch": 32.43,
1297
  "learning_rate": 1.0500515995872033e-05,
1298
- "loss": 5.944870050595821e+28,
1299
  "step": 1670
1300
  },
1301
  {
1302
  "epoch": 32.62,
1303
  "learning_rate": 1.0216718266253871e-05,
1304
- "loss": 7.7283318213532045e+28,
1305
  "step": 1680
1306
  },
1307
  {
1308
  "epoch": 32.82,
1309
  "learning_rate": 9.932920536635709e-06,
1310
- "loss": 4.458652537946866e+28,
1311
  "step": 1690
1312
  },
1313
  {
1314
  "epoch": 32.99,
1315
- "eval_accuracy": 0.7391304347826086,
1316
- "eval_loss": 2.584726139752747e+28,
1317
- "eval_runtime": 0.8102,
1318
- "eval_samples_per_second": 56.775,
1319
- "eval_steps_per_second": 7.405,
1320
  "step": 1699
1321
  },
1322
  {
1323
  "epoch": 33.01,
1324
  "learning_rate": 9.649122807017545e-06,
1325
- "loss": 5.944870050595821e+28,
1326
  "step": 1700
1327
  },
1328
  {
1329
  "epoch": 33.2,
1330
  "learning_rate": 9.36532507739938e-06,
1331
- "loss": 5.944869295017184e+28,
1332
  "step": 1710
1333
  },
1334
  {
1335
  "epoch": 33.4,
1336
  "learning_rate": 9.081527347781218e-06,
1337
- "loss": 6.539356300076766e+28,
1338
  "step": 1720
1339
  },
1340
  {
1341
  "epoch": 33.59,
1342
  "learning_rate": 8.797729618163056e-06,
1343
- "loss": 6.093491990755376e+28,
1344
  "step": 1730
1345
  },
1346
  {
1347
  "epoch": 33.79,
1348
  "learning_rate": 8.513931888544893e-06,
1349
- "loss": 5.796247732646948e+28,
1350
  "step": 1740
1351
  },
1352
  {
1353
  "epoch": 33.98,
1354
  "learning_rate": 8.230134158926728e-06,
1355
- "loss": 5.944870050595821e+28,
1356
  "step": 1750
1357
  },
1358
  {
1359
  "epoch": 34.0,
1360
- "eval_accuracy": 0.7391304347826086,
1361
- "eval_loss": 2.584726139752747e+28,
1362
- "eval_runtime": 0.8142,
1363
- "eval_samples_per_second": 56.495,
1364
- "eval_steps_per_second": 7.369,
1365
  "step": 1751
1366
  },
1367
  {
1368
  "epoch": 34.17,
1369
  "learning_rate": 7.946336429308565e-06,
1370
- "loss": 5.449464338975942e+28,
1371
  "step": 1760
1372
  },
1373
  {
1374
  "epoch": 34.37,
1375
  "learning_rate": 7.662538699690403e-06,
1376
- "loss": 6.985221742766112e+28,
1377
  "step": 1770
1378
  },
1379
  {
1380
  "epoch": 34.56,
1381
  "learning_rate": 7.3787409700722396e-06,
1382
- "loss": 4.904517225057574e+28,
1383
  "step": 1780
1384
  },
1385
  {
1386
  "epoch": 34.76,
1387
  "learning_rate": 7.094943240454077e-06,
1388
- "loss": 6.539356300076766e+28,
1389
  "step": 1790
1390
  },
1391
  {
1392
  "epoch": 34.95,
1393
  "learning_rate": 6.811145510835914e-06,
1394
- "loss": 6.3907347377065295e+28,
1395
  "step": 1800
1396
  },
1397
  {
1398
  "epoch": 34.99,
1399
- "eval_accuracy": 0.7391304347826086,
1400
- "eval_loss": 2.584726139752747e+28,
1401
- "eval_runtime": 0.7832,
1402
- "eval_samples_per_second": 58.735,
1403
- "eval_steps_per_second": 7.661,
1404
  "step": 1802
1405
  },
1406
  {
1407
  "epoch": 35.15,
1408
  "learning_rate": 6.527347781217751e-06,
1409
- "loss": 5.300842021027069e+28,
1410
  "step": 1810
1411
  },
1412
  {
1413
  "epoch": 35.34,
1414
  "learning_rate": 6.2435500515995875e-06,
1415
- "loss": 5.499004607906475e+28,
1416
  "step": 1820
1417
  },
1418
  {
1419
  "epoch": 35.53,
1420
  "learning_rate": 5.959752321981424e-06,
1421
- "loss": 6.539357055655403e+28,
1422
  "step": 1830
1423
  },
1424
  {
1425
  "epoch": 35.73,
1426
  "learning_rate": 5.675954592363261e-06,
1427
- "loss": 5.94487042838514e+28,
1428
  "step": 1840
1429
  },
1430
  {
1431
  "epoch": 35.92,
1432
  "learning_rate": 5.392156862745098e-06,
1433
- "loss": 7.579709503404331e+28,
1434
  "step": 1850
1435
  },
1436
  {
1437
  "epoch": 36.0,
1438
- "eval_accuracy": 0.7391304347826086,
1439
- "eval_loss": 2.584726139752747e+28,
1440
- "eval_runtime": 0.8372,
1441
- "eval_samples_per_second": 54.945,
1442
- "eval_steps_per_second": 7.167,
1443
  "step": 1854
1444
  },
1445
  {
1446
  "epoch": 36.12,
1447
  "learning_rate": 5.1083591331269355e-06,
1448
- "loss": 4.90451835842553e+28,
1449
  "step": 1860
1450
  },
1451
  {
1452
  "epoch": 36.31,
1453
  "learning_rate": 4.824561403508772e-06,
1454
- "loss": 5.053139920795767e+28,
1455
  "step": 1870
1456
  },
1457
  {
1458
  "epoch": 36.5,
1459
  "learning_rate": 4.540763673890609e-06,
1460
- "loss": 7.72833031019593e+28,
1461
  "step": 1880
1462
  },
1463
  {
1464
  "epoch": 36.7,
1465
  "learning_rate": 4.256965944272447e-06,
1466
- "loss": 4.90451835842553e+28,
1467
  "step": 1890
1468
  },
1469
  {
1470
  "epoch": 36.89,
1471
  "learning_rate": 3.973168214654283e-06,
1472
- "loss": 6.242113553125612e+28,
1473
  "step": 1900
1474
  },
1475
  {
1476
  "epoch": 36.99,
1477
  "eval_accuracy": 0.717391304347826,
1478
- "eval_loss": 2.584726139752747e+28,
1479
- "eval_runtime": 0.8212,
1480
- "eval_samples_per_second": 56.017,
1481
- "eval_steps_per_second": 7.307,
1482
  "step": 1905
1483
  },
1484
  {
1485
  "epoch": 37.09,
1486
  "learning_rate": 3.6893704850361198e-06,
1487
- "loss": 7.232925354154688e+28,
1488
  "step": 1910
1489
  },
1490
  {
1491
  "epoch": 37.28,
1492
  "learning_rate": 3.405572755417957e-06,
1493
- "loss": 7.282466378663859e+28,
1494
  "step": 1920
1495
  },
1496
  {
1497
  "epoch": 37.48,
1498
  "learning_rate": 3.1217750257997938e-06,
1499
- "loss": 5.944870050595821e+28,
1500
  "step": 1930
1501
  },
1502
  {
1503
  "epoch": 37.67,
1504
  "learning_rate": 2.8379772961816305e-06,
1505
- "loss": 5.201761105376684e+28,
1506
  "step": 1940
1507
  },
1508
  {
1509
  "epoch": 37.86,
1510
  "learning_rate": 2.5541795665634677e-06,
1511
- "loss": 5.350383045536239e+28,
1512
  "step": 1950
1513
  },
1514
  {
1515
  "epoch": 38.0,
1516
- "eval_accuracy": 0.717391304347826,
1517
- "eval_loss": 2.584726139752747e+28,
1518
- "eval_runtime": 0.8017,
1519
- "eval_samples_per_second": 57.379,
1520
- "eval_steps_per_second": 7.484,
1521
  "step": 1957
1522
  },
1523
  {
1524
  "epoch": 38.06,
1525
  "learning_rate": 2.2703818369453045e-06,
1526
- "loss": 4.755896040476657e+28,
1527
  "step": 1960
1528
  },
1529
  {
1530
  "epoch": 38.25,
1531
  "learning_rate": 1.9865841073271413e-06,
1532
- "loss": 6.985222498344749e+28,
1533
  "step": 1970
1534
  },
1535
  {
1536
  "epoch": 38.45,
1537
  "learning_rate": 1.7027863777089785e-06,
1538
- "loss": 5.499004607906475e+28,
1539
  "step": 1980
1540
  },
1541
  {
1542
  "epoch": 38.64,
1543
  "learning_rate": 1.4189886480908153e-06,
1544
- "loss": 6.242113930914931e+28,
1545
  "step": 1990
1546
  },
1547
  {
1548
  "epoch": 38.83,
1549
  "learning_rate": 1.1351909184726523e-06,
1550
- "loss": 5.64762654806603e+28,
1551
  "step": 2000
1552
  },
1553
  {
1554
  "epoch": 38.99,
1555
- "eval_accuracy": 0.717391304347826,
1556
- "eval_loss": 2.584726139752747e+28,
1557
- "eval_runtime": 0.7817,
1558
- "eval_samples_per_second": 58.848,
1559
- "eval_steps_per_second": 7.676,
1560
  "step": 2008
1561
  },
1562
  {
1563
  "epoch": 39.03,
1564
  "learning_rate": 8.513931888544892e-07,
1565
- "loss": 5.895329403875969e+28,
1566
  "step": 2010
1567
  },
1568
  {
1569
  "epoch": 39.22,
1570
  "learning_rate": 5.675954592363261e-07,
1571
- "loss": 6.687978618025639e+28,
1572
  "step": 2020
1573
  },
1574
  {
1575
  "epoch": 39.42,
1576
  "learning_rate": 2.8379772961816306e-07,
1577
- "loss": 6.093491612966058e+28,
1578
  "step": 2030
1579
  },
1580
  {
1581
  "epoch": 39.61,
1582
  "learning_rate": 0.0,
1583
- "loss": 5.3503826677469204e+28,
1584
  "step": 2040
1585
  },
1586
  {
1587
  "epoch": 39.61,
1588
- "eval_accuracy": 0.717391304347826,
1589
- "eval_loss": 2.584726139752747e+28,
1590
- "eval_runtime": 0.7909,
1591
- "eval_samples_per_second": 58.159,
1592
- "eval_steps_per_second": 7.586,
1593
  "step": 2040
1594
  },
1595
  {
1596
  "epoch": 39.61,
1597
  "step": 2040,
1598
  "total_flos": 2.5142726714989363e+18,
1599
- "train_loss": 6.009952755433709e+28,
1600
- "train_runtime": 674.9098,
1601
- "train_samples_per_second": 48.54,
1602
- "train_steps_per_second": 3.023
1603
  }
1604
  ],
1605
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.8478260869565217,
3
+ "best_model_checkpoint": "vit-base-patch16-224-ve-U13b-80RX1\\checkpoint-824",
4
  "epoch": 39.61165048543689,
5
  "eval_steps": 500,
6
  "global_step": 2040,
 
11
  {
12
  "epoch": 0.19,
13
  "learning_rate": 5.392156862745098e-06,
14
+ "loss": 1.3862,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.39,
19
  "learning_rate": 1.0784313725490196e-05,
20
+ "loss": 1.3853,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.58,
25
  "learning_rate": 1.6176470588235296e-05,
26
+ "loss": 1.3787,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.78,
31
  "learning_rate": 2.156862745098039e-05,
32
+ "loss": 1.3618,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.97,
37
  "learning_rate": 2.696078431372549e-05,
38
+ "loss": 1.3157,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.99,
43
+ "eval_accuracy": 0.34782608695652173,
44
+ "eval_loss": 1.2967547178268433,
45
  "eval_runtime": 0.8432,
46
  "eval_samples_per_second": 54.555,
47
  "eval_steps_per_second": 7.116,
 
50
  {
51
  "epoch": 1.17,
52
  "learning_rate": 3.235294117647059e-05,
53
+ "loss": 1.268,
54
  "step": 60
55
  },
56
  {
57
  "epoch": 1.36,
58
  "learning_rate": 3.774509803921569e-05,
59
+ "loss": 1.1883,
60
  "step": 70
61
  },
62
  {
63
  "epoch": 1.55,
64
  "learning_rate": 4.313725490196078e-05,
65
+ "loss": 1.1403,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 1.75,
70
  "learning_rate": 4.8529411764705885e-05,
71
+ "loss": 1.0174,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 1.94,
76
  "learning_rate": 5.392156862745098e-05,
77
+ "loss": 1.0334,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 2.0,
82
+ "eval_accuracy": 0.5217391304347826,
83
+ "eval_loss": 1.0059559345245361,
84
+ "eval_runtime": 0.8107,
85
+ "eval_samples_per_second": 56.741,
86
+ "eval_steps_per_second": 7.401,
87
  "step": 103
88
  },
89
  {
90
  "epoch": 2.14,
91
  "learning_rate": 5.477296181630547e-05,
92
+ "loss": 0.9461,
93
  "step": 110
94
  },
95
  {
96
  "epoch": 2.33,
97
  "learning_rate": 5.448916408668731e-05,
98
+ "loss": 0.9687,
99
  "step": 120
100
  },
101
  {
102
  "epoch": 2.52,
103
  "learning_rate": 5.4205366357069146e-05,
104
+ "loss": 0.8107,
105
  "step": 130
106
  },
107
  {
108
  "epoch": 2.72,
109
  "learning_rate": 5.392156862745098e-05,
110
+ "loss": 0.6984,
111
  "step": 140
112
  },
113
  {
114
  "epoch": 2.91,
115
  "learning_rate": 5.363777089783282e-05,
116
+ "loss": 0.691,
117
  "step": 150
118
  },
119
  {
120
  "epoch": 2.99,
121
+ "eval_accuracy": 0.7608695652173914,
122
+ "eval_loss": 0.75058913230896,
123
+ "eval_runtime": 0.8087,
124
+ "eval_samples_per_second": 56.882,
125
+ "eval_steps_per_second": 7.419,
126
  "step": 154
127
  },
128
  {
129
  "epoch": 3.11,
130
  "learning_rate": 5.3353973168214655e-05,
131
+ "loss": 0.6486,
132
  "step": 160
133
  },
134
  {
135
  "epoch": 3.3,
136
  "learning_rate": 5.3070175438596496e-05,
137
+ "loss": 0.5011,
138
  "step": 170
139
  },
140
  {
141
  "epoch": 3.5,
142
  "learning_rate": 5.278637770897833e-05,
143
+ "loss": 0.5658,
144
  "step": 180
145
  },
146
  {
147
  "epoch": 3.69,
148
  "learning_rate": 5.2502579979360165e-05,
149
+ "loss": 0.6253,
150
  "step": 190
151
  },
152
  {
153
  "epoch": 3.88,
154
  "learning_rate": 5.2218782249742006e-05,
155
+ "loss": 0.5005,
156
  "step": 200
157
  },
158
  {
159
  "epoch": 4.0,
160
+ "eval_accuracy": 0.782608695652174,
161
+ "eval_loss": 0.643278181552887,
162
+ "eval_runtime": 0.8062,
163
+ "eval_samples_per_second": 57.059,
164
+ "eval_steps_per_second": 7.443,
165
  "step": 206
166
  },
167
  {
168
  "epoch": 4.08,
169
  "learning_rate": 5.193498452012384e-05,
170
+ "loss": 0.4162,
171
  "step": 210
172
  },
173
  {
174
  "epoch": 4.27,
175
  "learning_rate": 5.165118679050568e-05,
176
+ "loss": 0.3671,
177
  "step": 220
178
  },
179
  {
180
  "epoch": 4.47,
181
  "learning_rate": 5.1367389060887515e-05,
182
+ "loss": 0.4731,
183
  "step": 230
184
  },
185
  {
186
  "epoch": 4.66,
187
  "learning_rate": 5.108359133126935e-05,
188
+ "loss": 0.4894,
189
  "step": 240
190
  },
191
  {
192
  "epoch": 4.85,
193
  "learning_rate": 5.079979360165119e-05,
194
+ "loss": 0.3478,
195
  "step": 250
196
  },
197
  {
198
  "epoch": 4.99,
199
+ "eval_accuracy": 0.7608695652173914,
200
+ "eval_loss": 0.5674107074737549,
201
+ "eval_runtime": 0.8242,
202
+ "eval_samples_per_second": 55.813,
203
+ "eval_steps_per_second": 7.28,
204
  "step": 257
205
  },
206
  {
207
  "epoch": 5.05,
208
  "learning_rate": 5.0515995872033025e-05,
209
+ "loss": 0.273,
210
  "step": 260
211
  },
212
  {
213
  "epoch": 5.24,
214
  "learning_rate": 5.023219814241486e-05,
215
+ "loss": 0.2627,
216
  "step": 270
217
  },
218
  {
219
  "epoch": 5.44,
220
  "learning_rate": 4.99484004127967e-05,
221
+ "loss": 0.2588,
222
  "step": 280
223
  },
224
  {
225
  "epoch": 5.63,
226
  "learning_rate": 4.9664602683178534e-05,
227
+ "loss": 0.2494,
228
  "step": 290
229
  },
230
  {
231
  "epoch": 5.83,
232
  "learning_rate": 4.9380804953560375e-05,
233
+ "loss": 0.3339,
234
  "step": 300
235
  },
236
  {
237
  "epoch": 6.0,
238
+ "eval_accuracy": 0.7608695652173914,
239
+ "eval_loss": 0.6622583270072937,
240
+ "eval_runtime": 0.7892,
241
+ "eval_samples_per_second": 58.287,
242
+ "eval_steps_per_second": 7.603,
243
  "step": 309
244
  },
245
  {
246
  "epoch": 6.02,
247
  "learning_rate": 4.909700722394221e-05,
248
+ "loss": 0.2956,
249
  "step": 310
250
  },
251
  {
252
  "epoch": 6.21,
253
  "learning_rate": 4.8813209494324044e-05,
254
+ "loss": 0.3412,
255
  "step": 320
256
  },
257
  {
258
  "epoch": 6.41,
259
  "learning_rate": 4.8529411764705885e-05,
260
+ "loss": 0.3145,
261
  "step": 330
262
  },
263
  {
264
  "epoch": 6.6,
265
  "learning_rate": 4.824561403508772e-05,
266
+ "loss": 0.2888,
267
  "step": 340
268
  },
269
  {
270
  "epoch": 6.8,
271
  "learning_rate": 4.796181630546956e-05,
272
+ "loss": 0.2232,
273
  "step": 350
274
  },
275
  {
276
  "epoch": 6.99,
277
  "learning_rate": 4.7678018575851394e-05,
278
+ "loss": 0.2533,
279
  "step": 360
280
  },
281
  {
282
  "epoch": 6.99,
283
+ "eval_accuracy": 0.7391304347826086,
284
+ "eval_loss": 0.6905426979064941,
285
+ "eval_runtime": 0.8007,
286
+ "eval_samples_per_second": 57.451,
287
+ "eval_steps_per_second": 7.494,
288
  "step": 360
289
  },
290
  {
291
  "epoch": 7.18,
292
  "learning_rate": 4.739422084623323e-05,
293
+ "loss": 0.2497,
294
  "step": 370
295
  },
296
  {
297
  "epoch": 7.38,
298
  "learning_rate": 4.711042311661507e-05,
299
+ "loss": 0.3348,
300
  "step": 380
301
  },
302
  {
303
  "epoch": 7.57,
304
  "learning_rate": 4.6826625386996904e-05,
305
+ "loss": 0.2502,
306
  "step": 390
307
  },
308
  {
309
  "epoch": 7.77,
310
  "learning_rate": 4.6542827657378745e-05,
311
+ "loss": 0.2309,
312
  "step": 400
313
  },
314
  {
315
  "epoch": 7.96,
316
  "learning_rate": 4.625902992776058e-05,
317
+ "loss": 0.138,
318
  "step": 410
319
  },
320
  {
321
  "epoch": 8.0,
322
+ "eval_accuracy": 0.782608695652174,
323
+ "eval_loss": 0.7250736355781555,
324
+ "eval_runtime": 0.7887,
325
+ "eval_samples_per_second": 58.325,
326
+ "eval_steps_per_second": 7.608,
327
  "step": 412
328
  },
329
  {
330
  "epoch": 8.16,
331
  "learning_rate": 4.597523219814241e-05,
332
+ "loss": 0.1209,
333
  "step": 420
334
  },
335
  {
336
  "epoch": 8.35,
337
  "learning_rate": 4.5691434468524254e-05,
338
+ "loss": 0.0895,
339
  "step": 430
340
  },
341
  {
342
  "epoch": 8.54,
343
  "learning_rate": 4.540763673890609e-05,
344
+ "loss": 0.1707,
345
  "step": 440
346
  },
347
  {
348
  "epoch": 8.74,
349
  "learning_rate": 4.512383900928793e-05,
350
+ "loss": 0.1935,
351
  "step": 450
352
  },
353
  {
354
  "epoch": 8.93,
355
  "learning_rate": 4.4840041279669764e-05,
356
+ "loss": 0.1289,
357
  "step": 460
358
  },
359
  {
360
  "epoch": 8.99,
361
+ "eval_accuracy": 0.7391304347826086,
362
+ "eval_loss": 0.7467479109764099,
363
+ "eval_runtime": 0.7842,
364
+ "eval_samples_per_second": 58.659,
365
+ "eval_steps_per_second": 7.651,
366
  "step": 463
367
  },
368
  {
369
  "epoch": 9.13,
370
  "learning_rate": 4.45562435500516e-05,
371
+ "loss": 0.0955,
372
  "step": 470
373
  },
374
  {
375
  "epoch": 9.32,
376
  "learning_rate": 4.427244582043344e-05,
377
+ "loss": 0.1515,
378
  "step": 480
379
  },
380
  {
381
  "epoch": 9.51,
382
  "learning_rate": 4.398864809081527e-05,
383
+ "loss": 0.2114,
384
  "step": 490
385
  },
386
  {
387
  "epoch": 9.71,
388
  "learning_rate": 4.3704850361197114e-05,
389
+ "loss": 0.0635,
390
  "step": 500
391
  },
392
  {
393
  "epoch": 9.9,
394
  "learning_rate": 4.342105263157895e-05,
395
+ "loss": 0.152,
396
  "step": 510
397
  },
398
  {
399
  "epoch": 10.0,
400
+ "eval_accuracy": 0.717391304347826,
401
+ "eval_loss": 0.9011275172233582,
402
+ "eval_runtime": 0.8162,
403
+ "eval_samples_per_second": 56.36,
404
+ "eval_steps_per_second": 7.351,
405
  "step": 515
406
  },
407
  {
408
  "epoch": 10.1,
409
  "learning_rate": 4.313725490196078e-05,
410
+ "loss": 0.113,
411
  "step": 520
412
  },
413
  {
414
  "epoch": 10.29,
415
  "learning_rate": 4.2853457172342624e-05,
416
+ "loss": 0.1291,
417
  "step": 530
418
  },
419
  {
420
  "epoch": 10.49,
421
  "learning_rate": 4.2569659442724465e-05,
422
+ "loss": 0.1063,
423
  "step": 540
424
  },
425
  {
426
  "epoch": 10.68,
427
  "learning_rate": 4.22858617131063e-05,
428
+ "loss": 0.1218,
429
  "step": 550
430
  },
431
  {
432
  "epoch": 10.87,
433
  "learning_rate": 4.200206398348813e-05,
434
+ "loss": 0.2609,
435
  "step": 560
436
  },
437
  {
438
  "epoch": 10.99,
439
+ "eval_accuracy": 0.717391304347826,
440
+ "eval_loss": 1.0149883031845093,
441
+ "eval_runtime": 0.8082,
442
+ "eval_samples_per_second": 56.918,
443
+ "eval_steps_per_second": 7.424,
444
  "step": 566
445
  },
446
  {
447
  "epoch": 11.07,
448
  "learning_rate": 4.171826625386997e-05,
449
+ "loss": 0.2021,
450
  "step": 570
451
  },
452
  {
453
  "epoch": 11.26,
454
  "learning_rate": 4.143446852425181e-05,
455
+ "loss": 0.1712,
456
  "step": 580
457
  },
458
  {
459
  "epoch": 11.46,
460
  "learning_rate": 4.115067079463365e-05,
461
+ "loss": 0.0442,
462
  "step": 590
463
  },
464
  {
465
  "epoch": 11.65,
466
  "learning_rate": 4.0866873065015484e-05,
467
+ "loss": 0.1487,
468
  "step": 600
469
  },
470
  {
471
  "epoch": 11.84,
472
  "learning_rate": 4.058307533539732e-05,
473
+ "loss": 0.2202,
474
  "step": 610
475
  },
476
  {
477
  "epoch": 12.0,
478
+ "eval_accuracy": 0.782608695652174,
479
+ "eval_loss": 0.9713221788406372,
480
+ "eval_runtime": 0.7902,
481
+ "eval_samples_per_second": 58.215,
482
+ "eval_steps_per_second": 7.593,
483
  "step": 618
484
  },
485
  {
486
  "epoch": 12.04,
487
  "learning_rate": 4.029927760577915e-05,
488
+ "loss": 0.1409,
489
  "step": 620
490
  },
491
  {
492
  "epoch": 12.23,
493
  "learning_rate": 4.001547987616099e-05,
494
+ "loss": 0.1339,
495
  "step": 630
496
  },
497
  {
498
  "epoch": 12.43,
499
  "learning_rate": 3.9731682146542834e-05,
500
+ "loss": 0.1061,
501
  "step": 640
502
  },
503
  {
504
  "epoch": 12.62,
505
  "learning_rate": 3.944788441692467e-05,
506
+ "loss": 0.0665,
507
  "step": 650
508
  },
509
  {
510
  "epoch": 12.82,
511
  "learning_rate": 3.91640866873065e-05,
512
+ "loss": 0.1083,
513
  "step": 660
514
  },
515
  {
516
  "epoch": 12.99,
517
+ "eval_accuracy": 0.6739130434782609,
518
+ "eval_loss": 1.1106468439102173,
519
+ "eval_runtime": 0.8002,
520
+ "eval_samples_per_second": 57.487,
521
+ "eval_steps_per_second": 7.498,
522
  "step": 669
523
  },
524
  {
525
  "epoch": 13.01,
526
  "learning_rate": 3.888028895768834e-05,
527
+ "loss": 0.0472,
528
  "step": 670
529
  },
530
  {
531
  "epoch": 13.2,
532
  "learning_rate": 3.859649122807018e-05,
533
+ "loss": 0.1036,
534
  "step": 680
535
  },
536
  {
537
  "epoch": 13.4,
538
  "learning_rate": 3.831269349845202e-05,
539
+ "loss": 0.1185,
540
  "step": 690
541
  },
542
  {
543
  "epoch": 13.59,
544
  "learning_rate": 3.802889576883385e-05,
545
+ "loss": 0.0721,
546
  "step": 700
547
  },
548
  {
549
  "epoch": 13.79,
550
  "learning_rate": 3.774509803921569e-05,
551
+ "loss": 0.1805,
552
  "step": 710
553
  },
554
  {
555
  "epoch": 13.98,
556
  "learning_rate": 3.746130030959752e-05,
557
+ "loss": 0.07,
558
  "step": 720
559
  },
560
  {
561
  "epoch": 14.0,
562
+ "eval_accuracy": 0.717391304347826,
563
+ "eval_loss": 1.121076226234436,
564
+ "eval_runtime": 0.8312,
565
+ "eval_samples_per_second": 55.342,
566
+ "eval_steps_per_second": 7.219,
567
  "step": 721
568
  },
569
  {
570
  "epoch": 14.17,
571
  "learning_rate": 3.7177502579979356e-05,
572
+ "loss": 0.0911,
573
  "step": 730
574
  },
575
  {
576
  "epoch": 14.37,
577
  "learning_rate": 3.6893704850361204e-05,
578
+ "loss": 0.1052,
579
  "step": 740
580
  },
581
  {
582
  "epoch": 14.56,
583
  "learning_rate": 3.660990712074304e-05,
584
+ "loss": 0.1289,
585
  "step": 750
586
  },
587
  {
588
  "epoch": 14.76,
589
  "learning_rate": 3.632610939112487e-05,
590
+ "loss": 0.118,
591
  "step": 760
592
  },
593
  {
594
  "epoch": 14.95,
595
  "learning_rate": 3.6042311661506706e-05,
596
+ "loss": 0.0791,
597
  "step": 770
598
  },
599
  {
600
  "epoch": 14.99,
601
+ "eval_accuracy": 0.7608695652173914,
602
+ "eval_loss": 1.1829625368118286,
603
+ "eval_runtime": 0.8107,
604
+ "eval_samples_per_second": 56.742,
605
+ "eval_steps_per_second": 7.401,
606
  "step": 772
607
  },
608
  {
609
  "epoch": 15.15,
610
  "learning_rate": 3.575851393188854e-05,
611
+ "loss": 0.0461,
612
  "step": 780
613
  },
614
  {
615
  "epoch": 15.34,
616
  "learning_rate": 3.547471620227039e-05,
617
+ "loss": 0.2331,
618
  "step": 790
619
  },
620
  {
621
  "epoch": 15.53,
622
  "learning_rate": 3.519091847265222e-05,
623
+ "loss": 0.0504,
624
  "step": 800
625
  },
626
  {
627
  "epoch": 15.73,
628
  "learning_rate": 3.490712074303406e-05,
629
+ "loss": 0.0089,
630
  "step": 810
631
  },
632
  {
633
  "epoch": 15.92,
634
  "learning_rate": 3.462332301341589e-05,
635
+ "loss": 0.0427,
636
  "step": 820
637
  },
638
  {
639
  "epoch": 16.0,
640
+ "eval_accuracy": 0.8478260869565217,
641
+ "eval_loss": 0.7770432233810425,
642
+ "eval_runtime": 0.7982,
643
+ "eval_samples_per_second": 57.631,
644
+ "eval_steps_per_second": 7.517,
645
  "step": 824
646
  },
647
  {
648
  "epoch": 16.12,
649
  "learning_rate": 3.4339525283797725e-05,
650
+ "loss": 0.1128,
651
  "step": 830
652
  },
653
  {
654
  "epoch": 16.31,
655
  "learning_rate": 3.405572755417957e-05,
656
+ "loss": 0.1035,
657
  "step": 840
658
  },
659
  {
660
  "epoch": 16.5,
661
  "learning_rate": 3.377192982456141e-05,
662
+ "loss": 0.0311,
663
  "step": 850
664
  },
665
  {
666
  "epoch": 16.7,
667
  "learning_rate": 3.348813209494324e-05,
668
+ "loss": 0.0639,
669
  "step": 860
670
  },
671
  {
672
  "epoch": 16.89,
673
  "learning_rate": 3.3204334365325076e-05,
674
+ "loss": 0.1219,
675
  "step": 870
676
  },
677
  {
678
  "epoch": 16.99,
679
+ "eval_accuracy": 0.7391304347826086,
680
+ "eval_loss": 1.096178650856018,
681
+ "eval_runtime": 0.7962,
682
+ "eval_samples_per_second": 57.776,
683
+ "eval_steps_per_second": 7.536,
684
  "step": 875
685
  },
686
  {
687
  "epoch": 17.09,
688
  "learning_rate": 3.292053663570691e-05,
689
+ "loss": 0.0854,
690
  "step": 880
691
  },
692
  {
693
  "epoch": 17.28,
694
  "learning_rate": 3.263673890608876e-05,
695
+ "loss": 0.1007,
696
  "step": 890
697
  },
698
  {
699
  "epoch": 17.48,
700
  "learning_rate": 3.235294117647059e-05,
701
+ "loss": 0.0978,
702
  "step": 900
703
  },
704
  {
705
  "epoch": 17.67,
706
  "learning_rate": 3.2069143446852426e-05,
707
+ "loss": 0.125,
708
  "step": 910
709
  },
710
  {
711
  "epoch": 17.86,
712
  "learning_rate": 3.178534571723426e-05,
713
+ "loss": 0.0739,
714
  "step": 920
715
  },
716
  {
717
  "epoch": 18.0,
718
+ "eval_accuracy": 0.7608695652173914,
719
+ "eval_loss": 0.9447097182273865,
720
+ "eval_runtime": 0.8317,
721
+ "eval_samples_per_second": 55.309,
722
+ "eval_steps_per_second": 7.214,
723
  "step": 927
724
  },
725
  {
726
  "epoch": 18.06,
727
  "learning_rate": 3.1501547987616095e-05,
728
+ "loss": 0.0827,
729
  "step": 930
730
  },
731
  {
732
  "epoch": 18.25,
733
  "learning_rate": 3.121775025799794e-05,
734
+ "loss": 0.0488,
735
  "step": 940
736
  },
737
  {
738
  "epoch": 18.45,
739
  "learning_rate": 3.093395252837978e-05,
740
+ "loss": 0.1147,
741
  "step": 950
742
  },
743
  {
744
  "epoch": 18.64,
745
  "learning_rate": 3.065015479876161e-05,
746
+ "loss": 0.0895,
747
  "step": 960
748
  },
749
  {
750
  "epoch": 18.83,
751
  "learning_rate": 3.0366357069143445e-05,
752
+ "loss": 0.1989,
753
  "step": 970
754
  },
755
  {
756
  "epoch": 18.99,
757
+ "eval_accuracy": 0.7391304347826086,
758
+ "eval_loss": 1.1543340682983398,
759
+ "eval_runtime": 0.8057,
760
+ "eval_samples_per_second": 57.094,
761
+ "eval_steps_per_second": 7.447,
762
  "step": 978
763
  },
764
  {
765
  "epoch": 19.03,
766
  "learning_rate": 3.0082559339525283e-05,
767
+ "loss": 0.0619,
768
  "step": 980
769
  },
770
  {
771
  "epoch": 19.22,
772
  "learning_rate": 2.9798761609907124e-05,
773
+ "loss": 0.0551,
774
  "step": 990
775
  },
776
  {
777
  "epoch": 19.42,
778
  "learning_rate": 2.9514963880288958e-05,
779
+ "loss": 0.0815,
780
  "step": 1000
781
  },
782
  {
783
  "epoch": 19.61,
784
  "learning_rate": 2.9231166150670796e-05,
785
+ "loss": 0.1433,
786
  "step": 1010
787
  },
788
  {
789
  "epoch": 19.81,
790
  "learning_rate": 2.894736842105263e-05,
791
+ "loss": 0.0481,
792
  "step": 1020
793
  },
794
  {
795
  "epoch": 20.0,
796
  "learning_rate": 2.8663570691434468e-05,
797
+ "loss": 0.1097,
798
  "step": 1030
799
  },
800
  {
801
  "epoch": 20.0,
802
+ "eval_accuracy": 0.7608695652173914,
803
+ "eval_loss": 1.1795116662979126,
804
+ "eval_runtime": 0.8007,
805
+ "eval_samples_per_second": 57.45,
806
+ "eval_steps_per_second": 7.493,
807
  "step": 1030
808
  },
809
  {
810
  "epoch": 20.19,
811
  "learning_rate": 2.837977296181631e-05,
812
+ "loss": 0.1128,
813
  "step": 1040
814
  },
815
  {
816
  "epoch": 20.39,
817
  "learning_rate": 2.8095975232198143e-05,
818
+ "loss": 0.1016,
819
  "step": 1050
820
  },
821
  {
822
  "epoch": 20.58,
823
  "learning_rate": 2.781217750257998e-05,
824
+ "loss": 0.1064,
825
  "step": 1060
826
  },
827
  {
828
  "epoch": 20.78,
829
  "learning_rate": 2.7528379772961815e-05,
830
+ "loss": 0.1248,
831
  "step": 1070
832
  },
833
  {
834
  "epoch": 20.97,
835
  "learning_rate": 2.7244582043343656e-05,
836
+ "loss": 0.1204,
837
  "step": 1080
838
  },
839
  {
840
  "epoch": 20.99,
841
+ "eval_accuracy": 0.6739130434782609,
842
+ "eval_loss": 1.267902135848999,
843
+ "eval_runtime": 0.7942,
844
+ "eval_samples_per_second": 57.922,
845
+ "eval_steps_per_second": 7.555,
846
  "step": 1081
847
  },
848
  {
849
  "epoch": 21.17,
850
  "learning_rate": 2.696078431372549e-05,
851
+ "loss": 0.0909,
852
  "step": 1090
853
  },
854
  {
855
  "epoch": 21.36,
856
  "learning_rate": 2.6676986584107328e-05,
857
+ "loss": 0.0697,
858
  "step": 1100
859
  },
860
  {
861
  "epoch": 21.55,
862
  "learning_rate": 2.6393188854489165e-05,
863
+ "loss": 0.0691,
864
  "step": 1110
865
  },
866
  {
867
  "epoch": 21.75,
868
  "learning_rate": 2.6109391124871003e-05,
869
+ "loss": 0.0676,
870
  "step": 1120
871
  },
872
  {
873
  "epoch": 21.94,
874
  "learning_rate": 2.582559339525284e-05,
875
+ "loss": 0.0514,
876
  "step": 1130
877
  },
878
  {
879
  "epoch": 22.0,
880
+ "eval_accuracy": 0.717391304347826,
881
+ "eval_loss": 1.0645781755447388,
882
+ "eval_runtime": 0.7947,
883
+ "eval_samples_per_second": 57.884,
884
+ "eval_steps_per_second": 7.55,
885
  "step": 1133
886
  },
887
  {
888
  "epoch": 22.14,
889
  "learning_rate": 2.5541795665634675e-05,
890
+ "loss": 0.0133,
891
  "step": 1140
892
  },
893
  {
894
  "epoch": 22.33,
895
  "learning_rate": 2.5257997936016512e-05,
896
+ "loss": 0.051,
897
  "step": 1150
898
  },
899
  {
900
  "epoch": 22.52,
901
  "learning_rate": 2.497420020639835e-05,
902
+ "loss": 0.1221,
903
  "step": 1160
904
  },
905
  {
906
  "epoch": 22.72,
907
  "learning_rate": 2.4690402476780188e-05,
908
+ "loss": 0.0973,
909
  "step": 1170
910
  },
911
  {
912
  "epoch": 22.91,
913
  "learning_rate": 2.4406604747162022e-05,
914
+ "loss": 0.0612,
915
  "step": 1180
916
  },
917
  {
918
  "epoch": 22.99,
919
+ "eval_accuracy": 0.6956521739130435,
920
+ "eval_loss": 1.1412676572799683,
921
+ "eval_runtime": 0.8302,
922
+ "eval_samples_per_second": 55.409,
923
+ "eval_steps_per_second": 7.227,
924
  "step": 1184
925
  },
926
  {
927
  "epoch": 23.11,
928
  "learning_rate": 2.412280701754386e-05,
929
+ "loss": 0.0519,
930
  "step": 1190
931
  },
932
  {
933
  "epoch": 23.3,
934
  "learning_rate": 2.3839009287925697e-05,
935
+ "loss": 0.0405,
936
  "step": 1200
937
  },
938
  {
939
  "epoch": 23.5,
940
  "learning_rate": 2.3555211558307535e-05,
941
+ "loss": 0.0487,
942
  "step": 1210
943
  },
944
  {
945
  "epoch": 23.69,
946
  "learning_rate": 2.3271413828689372e-05,
947
+ "loss": 0.0784,
948
  "step": 1220
949
  },
950
  {
951
  "epoch": 23.88,
952
  "learning_rate": 2.2987616099071207e-05,
953
+ "loss": 0.0207,
954
  "step": 1230
955
  },
956
  {
957
  "epoch": 24.0,
958
+ "eval_accuracy": 0.782608695652174,
959
+ "eval_loss": 0.8928263187408447,
960
+ "eval_runtime": 0.8367,
961
+ "eval_samples_per_second": 54.978,
962
+ "eval_steps_per_second": 7.171,
963
  "step": 1236
964
  },
965
  {
966
  "epoch": 24.08,
967
  "learning_rate": 2.2703818369453044e-05,
968
+ "loss": 0.0647,
969
  "step": 1240
970
  },
971
  {
972
  "epoch": 24.27,
973
  "learning_rate": 2.2420020639834882e-05,
974
+ "loss": 0.1242,
975
  "step": 1250
976
  },
977
  {
978
  "epoch": 24.47,
979
  "learning_rate": 2.213622291021672e-05,
980
+ "loss": 0.0921,
981
  "step": 1260
982
  },
983
  {
984
  "epoch": 24.66,
985
  "learning_rate": 2.1852425180598557e-05,
986
+ "loss": 0.057,
987
  "step": 1270
988
  },
989
  {
990
  "epoch": 24.85,
991
  "learning_rate": 2.156862745098039e-05,
992
+ "loss": 0.1063,
993
  "step": 1280
994
  },
995
  {
996
  "epoch": 24.99,
997
+ "eval_accuracy": 0.7608695652173914,
998
+ "eval_loss": 1.118552327156067,
999
+ "eval_runtime": 0.7882,
1000
+ "eval_samples_per_second": 58.362,
1001
+ "eval_steps_per_second": 7.612,
1002
  "step": 1287
1003
  },
1004
  {
1005
  "epoch": 25.05,
1006
  "learning_rate": 2.1284829721362232e-05,
1007
+ "loss": 0.0474,
1008
  "step": 1290
1009
  },
1010
  {
1011
  "epoch": 25.24,
1012
  "learning_rate": 2.1001031991744067e-05,
1013
+ "loss": 0.1243,
1014
  "step": 1300
1015
  },
1016
  {
1017
  "epoch": 25.44,
1018
  "learning_rate": 2.0717234262125904e-05,
1019
+ "loss": 0.0265,
1020
  "step": 1310
1021
  },
1022
  {
1023
  "epoch": 25.63,
1024
  "learning_rate": 2.0433436532507742e-05,
1025
+ "loss": 0.1075,
1026
  "step": 1320
1027
  },
1028
  {
1029
  "epoch": 25.83,
1030
  "learning_rate": 2.0149638802889576e-05,
1031
+ "loss": 0.1076,
1032
  "step": 1330
1033
  },
1034
  {
1035
  "epoch": 26.0,
1036
  "eval_accuracy": 0.7608695652173914,
1037
+ "eval_loss": 1.1741024255752563,
1038
+ "eval_runtime": 0.8087,
1039
+ "eval_samples_per_second": 56.882,
1040
+ "eval_steps_per_second": 7.419,
1041
  "step": 1339
1042
  },
1043
  {
1044
  "epoch": 26.02,
1045
  "learning_rate": 1.9865841073271417e-05,
1046
+ "loss": 0.0272,
1047
  "step": 1340
1048
  },
1049
  {
1050
  "epoch": 26.21,
1051
  "learning_rate": 1.958204334365325e-05,
1052
+ "loss": 0.0688,
1053
  "step": 1350
1054
  },
1055
  {
1056
  "epoch": 26.41,
1057
  "learning_rate": 1.929824561403509e-05,
1058
+ "loss": 0.0913,
1059
  "step": 1360
1060
  },
1061
  {
1062
  "epoch": 26.6,
1063
  "learning_rate": 1.9014447884416927e-05,
1064
+ "loss": 0.0374,
1065
  "step": 1370
1066
  },
1067
  {
1068
  "epoch": 26.8,
1069
  "learning_rate": 1.873065015479876e-05,
1070
+ "loss": 0.0891,
1071
  "step": 1380
1072
  },
1073
  {
1074
  "epoch": 26.99,
1075
  "learning_rate": 1.8446852425180602e-05,
1076
+ "loss": 0.0714,
1077
  "step": 1390
1078
  },
1079
  {
1080
  "epoch": 26.99,
1081
+ "eval_accuracy": 0.8043478260869565,
1082
+ "eval_loss": 1.0977226495742798,
1083
+ "eval_runtime": 0.7997,
1084
+ "eval_samples_per_second": 57.522,
1085
+ "eval_steps_per_second": 7.503,
1086
  "step": 1390
1087
  },
1088
  {
1089
  "epoch": 27.18,
1090
  "learning_rate": 1.8163054695562436e-05,
1091
+ "loss": 0.0417,
1092
  "step": 1400
1093
  },
1094
  {
1095
  "epoch": 27.38,
1096
  "learning_rate": 1.787925696594427e-05,
1097
+ "loss": 0.1741,
1098
  "step": 1410
1099
  },
1100
  {
1101
  "epoch": 27.57,
1102
  "learning_rate": 1.759545923632611e-05,
1103
+ "loss": 0.0141,
1104
  "step": 1420
1105
  },
1106
  {
1107
  "epoch": 27.77,
1108
  "learning_rate": 1.7311661506707946e-05,
1109
+ "loss": 0.0031,
1110
  "step": 1430
1111
  },
1112
  {
1113
  "epoch": 27.96,
1114
  "learning_rate": 1.7027863777089787e-05,
1115
+ "loss": 0.062,
1116
  "step": 1440
1117
  },
1118
  {
1119
  "epoch": 28.0,
1120
  "eval_accuracy": 0.717391304347826,
1121
+ "eval_loss": 1.3964738845825195,
1122
+ "eval_runtime": 0.8222,
1123
+ "eval_samples_per_second": 55.949,
1124
+ "eval_steps_per_second": 7.298,
1125
  "step": 1442
1126
  },
1127
  {
1128
  "epoch": 28.16,
1129
  "learning_rate": 1.674406604747162e-05,
1130
+ "loss": 0.1189,
1131
  "step": 1450
1132
  },
1133
  {
1134
  "epoch": 28.35,
1135
  "learning_rate": 1.6460268317853455e-05,
1136
+ "loss": 0.085,
1137
  "step": 1460
1138
  },
1139
  {
1140
  "epoch": 28.54,
1141
  "learning_rate": 1.6176470588235296e-05,
1142
+ "loss": 0.0294,
1143
  "step": 1470
1144
  },
1145
  {
1146
  "epoch": 28.74,
1147
  "learning_rate": 1.589267285861713e-05,
1148
+ "loss": 0.0267,
1149
  "step": 1480
1150
  },
1151
  {
1152
  "epoch": 28.93,
1153
  "learning_rate": 1.560887512899897e-05,
1154
+ "loss": 0.0617,
1155
  "step": 1490
1156
  },
1157
  {
1158
  "epoch": 28.99,
1159
+ "eval_accuracy": 0.7608695652173914,
1160
+ "eval_loss": 1.184866189956665,
1161
+ "eval_runtime": 0.8482,
1162
+ "eval_samples_per_second": 54.233,
1163
+ "eval_steps_per_second": 7.074,
1164
  "step": 1493
1165
  },
1166
  {
1167
  "epoch": 29.13,
1168
  "learning_rate": 1.5325077399380806e-05,
1169
+ "loss": 0.0499,
1170
  "step": 1500
1171
  },
1172
  {
1173
  "epoch": 29.32,
1174
  "learning_rate": 1.5041279669762642e-05,
1175
+ "loss": 0.0568,
1176
  "step": 1510
1177
  },
1178
  {
1179
  "epoch": 29.51,
1180
  "learning_rate": 1.4757481940144479e-05,
1181
+ "loss": 0.0511,
1182
  "step": 1520
1183
  },
1184
  {
1185
  "epoch": 29.71,
1186
  "learning_rate": 1.4473684210526315e-05,
1187
+ "loss": 0.0284,
1188
  "step": 1530
1189
  },
1190
  {
1191
  "epoch": 29.9,
1192
  "learning_rate": 1.4189886480908154e-05,
1193
+ "loss": 0.0536,
1194
  "step": 1540
1195
  },
1196
  {
1197
  "epoch": 30.0,
1198
  "eval_accuracy": 0.782608695652174,
1199
+ "eval_loss": 1.086539387702942,
1200
+ "eval_runtime": 0.7992,
1201
+ "eval_samples_per_second": 57.559,
1202
+ "eval_steps_per_second": 7.508,
1203
  "step": 1545
1204
  },
1205
  {
1206
  "epoch": 30.1,
1207
  "learning_rate": 1.390608875128999e-05,
1208
+ "loss": 0.0039,
1209
  "step": 1550
1210
  },
1211
  {
1212
  "epoch": 30.29,
1213
  "learning_rate": 1.3622291021671828e-05,
1214
+ "loss": 0.038,
1215
  "step": 1560
1216
  },
1217
  {
1218
  "epoch": 30.49,
1219
  "learning_rate": 1.3338493292053664e-05,
1220
+ "loss": 0.1408,
1221
  "step": 1570
1222
  },
1223
  {
1224
  "epoch": 30.68,
1225
  "learning_rate": 1.3054695562435501e-05,
1226
+ "loss": 0.012,
1227
  "step": 1580
1228
  },
1229
  {
1230
  "epoch": 30.87,
1231
  "learning_rate": 1.2770897832817337e-05,
1232
+ "loss": 0.0707,
1233
  "step": 1590
1234
  },
1235
  {
1236
  "epoch": 30.99,
1237
  "eval_accuracy": 0.7608695652173914,
1238
+ "eval_loss": 1.2080574035644531,
1239
+ "eval_runtime": 0.8012,
1240
+ "eval_samples_per_second": 57.415,
1241
+ "eval_steps_per_second": 7.489,
1242
  "step": 1596
1243
  },
1244
  {
1245
  "epoch": 31.07,
1246
  "learning_rate": 1.2487100103199175e-05,
1247
+ "loss": 0.0148,
1248
  "step": 1600
1249
  },
1250
  {
1251
  "epoch": 31.26,
1252
  "learning_rate": 1.2203302373581011e-05,
1253
+ "loss": 0.0309,
1254
  "step": 1610
1255
  },
1256
  {
1257
  "epoch": 31.46,
1258
  "learning_rate": 1.1919504643962849e-05,
1259
+ "loss": 0.0383,
1260
  "step": 1620
1261
  },
1262
  {
1263
  "epoch": 31.65,
1264
  "learning_rate": 1.1635706914344686e-05,
1265
+ "loss": 0.0231,
1266
  "step": 1630
1267
  },
1268
  {
1269
  "epoch": 31.84,
1270
  "learning_rate": 1.1351909184726522e-05,
1271
+ "loss": 0.0967,
1272
  "step": 1640
1273
  },
1274
  {
1275
  "epoch": 32.0,
1276
+ "eval_accuracy": 0.7391304347826086,
1277
+ "eval_loss": 1.330019474029541,
1278
+ "eval_runtime": 0.8382,
1279
+ "eval_samples_per_second": 54.88,
1280
+ "eval_steps_per_second": 7.158,
1281
  "step": 1648
1282
  },
1283
  {
1284
  "epoch": 32.04,
1285
  "learning_rate": 1.106811145510836e-05,
1286
+ "loss": 0.0414,
1287
  "step": 1650
1288
  },
1289
  {
1290
  "epoch": 32.23,
1291
  "learning_rate": 1.0784313725490196e-05,
1292
+ "loss": 0.0196,
1293
  "step": 1660
1294
  },
1295
  {
1296
  "epoch": 32.43,
1297
  "learning_rate": 1.0500515995872033e-05,
1298
+ "loss": 0.0517,
1299
  "step": 1670
1300
  },
1301
  {
1302
  "epoch": 32.62,
1303
  "learning_rate": 1.0216718266253871e-05,
1304
+ "loss": 0.0126,
1305
  "step": 1680
1306
  },
1307
  {
1308
  "epoch": 32.82,
1309
  "learning_rate": 9.932920536635709e-06,
1310
+ "loss": 0.0564,
1311
  "step": 1690
1312
  },
1313
  {
1314
  "epoch": 32.99,
1315
+ "eval_accuracy": 0.782608695652174,
1316
+ "eval_loss": 1.2239521741867065,
1317
+ "eval_runtime": 0.8475,
1318
+ "eval_samples_per_second": 54.275,
1319
+ "eval_steps_per_second": 7.079,
1320
  "step": 1699
1321
  },
1322
  {
1323
  "epoch": 33.01,
1324
  "learning_rate": 9.649122807017545e-06,
1325
+ "loss": 0.1062,
1326
  "step": 1700
1327
  },
1328
  {
1329
  "epoch": 33.2,
1330
  "learning_rate": 9.36532507739938e-06,
1331
+ "loss": 0.0345,
1332
  "step": 1710
1333
  },
1334
  {
1335
  "epoch": 33.4,
1336
  "learning_rate": 9.081527347781218e-06,
1337
+ "loss": 0.0488,
1338
  "step": 1720
1339
  },
1340
  {
1341
  "epoch": 33.59,
1342
  "learning_rate": 8.797729618163056e-06,
1343
+ "loss": 0.0097,
1344
  "step": 1730
1345
  },
1346
  {
1347
  "epoch": 33.79,
1348
  "learning_rate": 8.513931888544893e-06,
1349
+ "loss": 0.0395,
1350
  "step": 1740
1351
  },
1352
  {
1353
  "epoch": 33.98,
1354
  "learning_rate": 8.230134158926728e-06,
1355
+ "loss": 0.0435,
1356
  "step": 1750
1357
  },
1358
  {
1359
  "epoch": 34.0,
1360
+ "eval_accuracy": 0.7608695652173914,
1361
+ "eval_loss": 1.239098072052002,
1362
+ "eval_runtime": 0.7892,
1363
+ "eval_samples_per_second": 58.289,
1364
+ "eval_steps_per_second": 7.603,
1365
  "step": 1751
1366
  },
1367
  {
1368
  "epoch": 34.17,
1369
  "learning_rate": 7.946336429308565e-06,
1370
+ "loss": 0.0378,
1371
  "step": 1760
1372
  },
1373
  {
1374
  "epoch": 34.37,
1375
  "learning_rate": 7.662538699690403e-06,
1376
+ "loss": 0.0318,
1377
  "step": 1770
1378
  },
1379
  {
1380
  "epoch": 34.56,
1381
  "learning_rate": 7.3787409700722396e-06,
1382
+ "loss": 0.0482,
1383
  "step": 1780
1384
  },
1385
  {
1386
  "epoch": 34.76,
1387
  "learning_rate": 7.094943240454077e-06,
1388
+ "loss": 0.01,
1389
  "step": 1790
1390
  },
1391
  {
1392
  "epoch": 34.95,
1393
  "learning_rate": 6.811145510835914e-06,
1394
+ "loss": 0.043,
1395
  "step": 1800
1396
  },
1397
  {
1398
  "epoch": 34.99,
1399
+ "eval_accuracy": 0.7608695652173914,
1400
+ "eval_loss": 1.181269645690918,
1401
+ "eval_runtime": 0.8162,
1402
+ "eval_samples_per_second": 56.36,
1403
+ "eval_steps_per_second": 7.351,
1404
  "step": 1802
1405
  },
1406
  {
1407
  "epoch": 35.15,
1408
  "learning_rate": 6.527347781217751e-06,
1409
+ "loss": 0.0863,
1410
  "step": 1810
1411
  },
1412
  {
1413
  "epoch": 35.34,
1414
  "learning_rate": 6.2435500515995875e-06,
1415
+ "loss": 0.0324,
1416
  "step": 1820
1417
  },
1418
  {
1419
  "epoch": 35.53,
1420
  "learning_rate": 5.959752321981424e-06,
1421
+ "loss": 0.0119,
1422
  "step": 1830
1423
  },
1424
  {
1425
  "epoch": 35.73,
1426
  "learning_rate": 5.675954592363261e-06,
1427
+ "loss": 0.0153,
1428
  "step": 1840
1429
  },
1430
  {
1431
  "epoch": 35.92,
1432
  "learning_rate": 5.392156862745098e-06,
1433
+ "loss": 0.0218,
1434
  "step": 1850
1435
  },
1436
  {
1437
  "epoch": 36.0,
1438
+ "eval_accuracy": 0.782608695652174,
1439
+ "eval_loss": 1.2496311664581299,
1440
+ "eval_runtime": 0.8557,
1441
+ "eval_samples_per_second": 53.757,
1442
+ "eval_steps_per_second": 7.012,
1443
  "step": 1854
1444
  },
1445
  {
1446
  "epoch": 36.12,
1447
  "learning_rate": 5.1083591331269355e-06,
1448
+ "loss": 0.0449,
1449
  "step": 1860
1450
  },
1451
  {
1452
  "epoch": 36.31,
1453
  "learning_rate": 4.824561403508772e-06,
1454
+ "loss": 0.0531,
1455
  "step": 1870
1456
  },
1457
  {
1458
  "epoch": 36.5,
1459
  "learning_rate": 4.540763673890609e-06,
1460
+ "loss": 0.0336,
1461
  "step": 1880
1462
  },
1463
  {
1464
  "epoch": 36.7,
1465
  "learning_rate": 4.256965944272447e-06,
1466
+ "loss": 0.1423,
1467
  "step": 1890
1468
  },
1469
  {
1470
  "epoch": 36.89,
1471
  "learning_rate": 3.973168214654283e-06,
1472
+ "loss": 0.0043,
1473
  "step": 1900
1474
  },
1475
  {
1476
  "epoch": 36.99,
1477
  "eval_accuracy": 0.717391304347826,
1478
+ "eval_loss": 1.2797400951385498,
1479
+ "eval_runtime": 0.7897,
1480
+ "eval_samples_per_second": 58.25,
1481
+ "eval_steps_per_second": 7.598,
1482
  "step": 1905
1483
  },
1484
  {
1485
  "epoch": 37.09,
1486
  "learning_rate": 3.6893704850361198e-06,
1487
+ "loss": 0.0707,
1488
  "step": 1910
1489
  },
1490
  {
1491
  "epoch": 37.28,
1492
  "learning_rate": 3.405572755417957e-06,
1493
+ "loss": 0.0878,
1494
  "step": 1920
1495
  },
1496
  {
1497
  "epoch": 37.48,
1498
  "learning_rate": 3.1217750257997938e-06,
1499
+ "loss": 0.0214,
1500
  "step": 1930
1501
  },
1502
  {
1503
  "epoch": 37.67,
1504
  "learning_rate": 2.8379772961816305e-06,
1505
+ "loss": 0.0337,
1506
  "step": 1940
1507
  },
1508
  {
1509
  "epoch": 37.86,
1510
  "learning_rate": 2.5541795665634677e-06,
1511
+ "loss": 0.0051,
1512
  "step": 1950
1513
  },
1514
  {
1515
  "epoch": 38.0,
1516
+ "eval_accuracy": 0.7391304347826086,
1517
+ "eval_loss": 1.2492655515670776,
1518
+ "eval_runtime": 0.8062,
1519
+ "eval_samples_per_second": 57.059,
1520
+ "eval_steps_per_second": 7.443,
1521
  "step": 1957
1522
  },
1523
  {
1524
  "epoch": 38.06,
1525
  "learning_rate": 2.2703818369453045e-06,
1526
+ "loss": 0.0228,
1527
  "step": 1960
1528
  },
1529
  {
1530
  "epoch": 38.25,
1531
  "learning_rate": 1.9865841073271413e-06,
1532
+ "loss": 0.0322,
1533
  "step": 1970
1534
  },
1535
  {
1536
  "epoch": 38.45,
1537
  "learning_rate": 1.7027863777089785e-06,
1538
+ "loss": 0.0503,
1539
  "step": 1980
1540
  },
1541
  {
1542
  "epoch": 38.64,
1543
  "learning_rate": 1.4189886480908153e-06,
1544
+ "loss": 0.039,
1545
  "step": 1990
1546
  },
1547
  {
1548
  "epoch": 38.83,
1549
  "learning_rate": 1.1351909184726523e-06,
1550
+ "loss": 0.0123,
1551
  "step": 2000
1552
  },
1553
  {
1554
  "epoch": 38.99,
1555
+ "eval_accuracy": 0.7391304347826086,
1556
+ "eval_loss": 1.2537580728530884,
1557
+ "eval_runtime": 0.7792,
1558
+ "eval_samples_per_second": 59.037,
1559
+ "eval_steps_per_second": 7.7,
1560
  "step": 2008
1561
  },
1562
  {
1563
  "epoch": 39.03,
1564
  "learning_rate": 8.513931888544892e-07,
1565
+ "loss": 0.0046,
1566
  "step": 2010
1567
  },
1568
  {
1569
  "epoch": 39.22,
1570
  "learning_rate": 5.675954592363261e-07,
1571
+ "loss": 0.0218,
1572
  "step": 2020
1573
  },
1574
  {
1575
  "epoch": 39.42,
1576
  "learning_rate": 2.8379772961816306e-07,
1577
+ "loss": 0.085,
1578
  "step": 2030
1579
  },
1580
  {
1581
  "epoch": 39.61,
1582
  "learning_rate": 0.0,
1583
+ "loss": 0.0546,
1584
  "step": 2040
1585
  },
1586
  {
1587
  "epoch": 39.61,
1588
+ "eval_accuracy": 0.7608695652173914,
1589
+ "eval_loss": 1.2530285120010376,
1590
+ "eval_runtime": 0.8032,
1591
+ "eval_samples_per_second": 57.272,
1592
+ "eval_steps_per_second": 7.47,
1593
  "step": 2040
1594
  },
1595
  {
1596
  "epoch": 39.61,
1597
  "step": 2040,
1598
  "total_flos": 2.5142726714989363e+18,
1599
+ "train_loss": 0.18828091986349546,
1600
+ "train_runtime": 647.6496,
1601
+ "train_samples_per_second": 50.583,
1602
+ "train_steps_per_second": 3.15
1603
  }
1604
  ],
1605
  "logging_steps": 10,