vumichien commited on
Commit
dc59aac
1 Parent(s): 14c6955

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +8 -8
  2. eval_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +360 -360
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9152691968225949,
4
- "eval_loss": 0.3084510564804077,
5
- "eval_runtime": 142.0303,
6
- "eval_samples_per_second": 47.863,
7
- "eval_steps_per_second": 1.5,
8
- "train_loss": 0.8349186476609461,
9
- "train_runtime": 26368.6047,
10
- "train_samples_per_second": 38.754,
11
- "train_steps_per_second": 0.605
12
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9152691968225949,
4
+ "eval_loss": 0.30927804112434387,
5
+ "eval_runtime": 150.9277,
6
+ "eval_samples_per_second": 45.041,
7
+ "eval_steps_per_second": 1.411,
8
+ "train_loss": 0.8353394886007285,
9
+ "train_runtime": 26863.0447,
10
+ "train_samples_per_second": 38.04,
11
+ "train_steps_per_second": 0.594
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9152691968225949,
4
- "eval_loss": 0.3084510564804077,
5
- "eval_runtime": 142.0303,
6
- "eval_samples_per_second": 47.863,
7
- "eval_steps_per_second": 1.5
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9152691968225949,
4
+ "eval_loss": 0.30927804112434387,
5
+ "eval_runtime": 150.9277,
6
+ "eval_samples_per_second": 45.041,
7
+ "eval_steps_per_second": 1.411
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.8349186476609461,
4
- "train_runtime": 26368.6047,
5
- "train_samples_per_second": 38.754,
6
- "train_steps_per_second": 0.605
7
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.8353394886007285,
4
+ "train_runtime": 26863.0447,
5
+ "train_samples_per_second": 38.04,
6
+ "train_steps_per_second": 0.594
7
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 0.9152691968225949,
3
- "best_model_checkpoint": "trillsson3-ft-keyword-spotting-13/checkpoint-13566",
4
  "epoch": 19.999373825923605,
5
  "global_step": 15960,
6
  "is_hyper_param_search": false,
@@ -10,1145 +10,1145 @@
10
  {
11
  "epoch": 0.13,
12
  "learning_rate": 1.7669172932330825e-05,
13
- "loss": 7.8644,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.25,
18
  "learning_rate": 3.646616541353383e-05,
19
- "loss": 6.0388,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.38,
24
  "learning_rate": 5.526315789473683e-05,
25
- "loss": 3.8396,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 0.5,
30
  "learning_rate": 7.406015037593984e-05,
31
- "loss": 3.0482,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 0.63,
36
  "learning_rate": 9.285714285714286e-05,
37
- "loss": 2.6758,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.75,
42
  "learning_rate": 0.00011165413533834586,
43
- "loss": 2.1056,
44
  "step": 600
45
  },
46
  {
47
  "epoch": 0.88,
48
  "learning_rate": 0.00013045112781954885,
49
- "loss": 1.8064,
50
  "step": 700
51
  },
52
  {
53
  "epoch": 1.0,
54
- "eval_accuracy": 0.6403353927625772,
55
- "eval_loss": 0.9358639717102051,
56
- "eval_runtime": 141.8393,
57
- "eval_samples_per_second": 47.927,
58
- "eval_steps_per_second": 1.502,
59
  "step": 798
60
  },
61
  {
62
  "epoch": 1.0,
63
  "learning_rate": 0.00014924812030075185,
64
- "loss": 1.4335,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 1.13,
69
  "learning_rate": 0.00016804511278195486,
70
- "loss": 1.2967,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 1.25,
75
  "learning_rate": 0.0001868421052631579,
76
- "loss": 1.1709,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 1.38,
81
  "learning_rate": 0.0002056390977443609,
82
- "loss": 1.0607,
83
  "step": 1100
84
  },
85
  {
86
  "epoch": 1.5,
87
  "learning_rate": 0.0002244360902255639,
88
- "loss": 0.9944,
89
  "step": 1200
90
  },
91
  {
92
  "epoch": 1.63,
93
  "learning_rate": 0.0002432330827067669,
94
- "loss": 0.9323,
95
  "step": 1300
96
  },
97
  {
98
  "epoch": 1.75,
99
  "learning_rate": 0.0002620300751879699,
100
- "loss": 0.8718,
101
  "step": 1400
102
  },
103
  {
104
  "epoch": 1.88,
105
  "learning_rate": 0.0002808270676691729,
106
- "loss": 0.8601,
107
  "step": 1500
108
  },
109
  {
110
  "epoch": 2.0,
111
- "eval_accuracy": 0.8527508090614887,
112
- "eval_loss": 0.4831967353820801,
113
- "eval_runtime": 141.9778,
114
- "eval_samples_per_second": 47.881,
115
- "eval_steps_per_second": 1.5,
116
  "step": 1596
117
  },
118
  {
119
  "epoch": 2.01,
120
  "learning_rate": 0.0002996240601503759,
121
- "loss": 0.836,
122
  "step": 1600
123
  },
124
  {
125
  "epoch": 2.13,
126
  "learning_rate": 0.000297953216374269,
127
- "loss": 0.8312,
128
  "step": 1700
129
  },
130
  {
131
  "epoch": 2.26,
132
  "learning_rate": 0.0002958646616541353,
133
- "loss": 0.8016,
134
  "step": 1800
135
  },
136
  {
137
  "epoch": 2.38,
138
  "learning_rate": 0.0002937761069340017,
139
- "loss": 0.7724,
140
  "step": 1900
141
  },
142
  {
143
  "epoch": 2.51,
144
  "learning_rate": 0.00029168755221386795,
145
- "loss": 0.7621,
146
  "step": 2000
147
  },
148
  {
149
  "epoch": 2.63,
150
  "learning_rate": 0.00028959899749373433,
151
- "loss": 0.7624,
152
  "step": 2100
153
  },
154
  {
155
  "epoch": 2.76,
156
  "learning_rate": 0.00028751044277360065,
157
- "loss": 0.7549,
158
  "step": 2200
159
  },
160
  {
161
  "epoch": 2.88,
162
  "learning_rate": 0.000285421888053467,
163
- "loss": 0.7585,
164
  "step": 2300
165
  },
166
  {
167
  "epoch": 3.0,
168
- "eval_accuracy": 0.8854074727861135,
169
- "eval_loss": 0.39516541361808777,
170
- "eval_runtime": 141.91,
171
- "eval_samples_per_second": 47.904,
172
- "eval_steps_per_second": 1.501,
173
  "step": 2394
174
  },
175
  {
176
  "epoch": 3.01,
177
  "learning_rate": 0.0002833333333333333,
178
- "loss": 0.7495,
179
  "step": 2400
180
  },
181
  {
182
  "epoch": 3.13,
183
  "learning_rate": 0.0002812447786131997,
184
- "loss": 0.7343,
185
  "step": 2500
186
  },
187
  {
188
  "epoch": 3.26,
189
  "learning_rate": 0.00027915622389306595,
190
- "loss": 0.7065,
191
  "step": 2600
192
  },
193
  {
194
  "epoch": 3.38,
195
  "learning_rate": 0.0002770676691729323,
196
- "loss": 0.7109,
197
  "step": 2700
198
  },
199
  {
200
  "epoch": 3.51,
201
  "learning_rate": 0.00027497911445279865,
202
- "loss": 0.687,
203
  "step": 2800
204
  },
205
  {
206
  "epoch": 3.63,
207
  "learning_rate": 0.000272890559732665,
208
- "loss": 0.7325,
209
  "step": 2900
210
  },
211
  {
212
  "epoch": 3.76,
213
  "learning_rate": 0.0002708020050125313,
214
- "loss": 0.7026,
215
  "step": 3000
216
  },
217
  {
218
  "epoch": 3.88,
219
  "learning_rate": 0.0002687134502923976,
220
- "loss": 0.7026,
221
  "step": 3100
222
  },
223
  {
224
  "epoch": 4.0,
225
- "eval_accuracy": 0.9049720506031186,
226
- "eval_loss": 0.36231401562690735,
227
- "eval_runtime": 141.8088,
228
- "eval_samples_per_second": 47.938,
229
- "eval_steps_per_second": 1.502,
230
  "step": 3192
231
  },
232
  {
233
  "epoch": 4.01,
234
  "learning_rate": 0.00026662489557226395,
235
- "loss": 0.73,
236
  "step": 3200
237
  },
238
  {
239
  "epoch": 4.14,
240
  "learning_rate": 0.0002645363408521303,
241
- "loss": 0.6916,
242
  "step": 3300
243
  },
244
  {
245
  "epoch": 4.26,
246
  "learning_rate": 0.00026244778613199665,
247
- "loss": 0.7066,
248
  "step": 3400
249
  },
250
  {
251
  "epoch": 4.39,
252
  "learning_rate": 0.00026035923141186297,
253
- "loss": 0.6909,
254
  "step": 3500
255
  },
256
  {
257
  "epoch": 4.51,
258
  "learning_rate": 0.0002582706766917293,
259
- "loss": 0.7258,
260
  "step": 3600
261
  },
262
  {
263
  "epoch": 4.64,
264
  "learning_rate": 0.0002561821219715956,
265
- "loss": 0.6908,
266
  "step": 3700
267
  },
268
  {
269
  "epoch": 4.76,
270
  "learning_rate": 0.00025409356725146194,
271
- "loss": 0.6977,
272
  "step": 3800
273
  },
274
  {
275
  "epoch": 4.89,
276
  "learning_rate": 0.0002520050125313283,
277
- "loss": 0.6924,
278
  "step": 3900
279
  },
280
  {
281
  "epoch": 5.0,
282
- "eval_accuracy": 0.903501029714622,
283
- "eval_loss": 0.3456359803676605,
284
- "eval_runtime": 141.2792,
285
- "eval_samples_per_second": 48.117,
286
- "eval_steps_per_second": 1.508,
287
  "step": 3990
288
  },
289
  {
290
  "epoch": 5.01,
291
  "learning_rate": 0.00024991645781119465,
292
- "loss": 0.7063,
293
  "step": 4000
294
  },
295
  {
296
  "epoch": 5.14,
297
  "learning_rate": 0.00024782790309106097,
298
- "loss": 0.714,
299
  "step": 4100
300
  },
301
  {
302
  "epoch": 5.26,
303
  "learning_rate": 0.0002457393483709273,
304
- "loss": 0.7052,
305
  "step": 4200
306
  },
307
  {
308
  "epoch": 5.39,
309
  "learning_rate": 0.00024365079365079364,
310
- "loss": 0.6786,
311
  "step": 4300
312
  },
313
  {
314
  "epoch": 5.51,
315
- "learning_rate": 0.00024158312447786128,
316
- "loss": 0.6846,
317
  "step": 4400
318
  },
319
  {
320
  "epoch": 5.64,
321
- "learning_rate": 0.00023949456975772763,
322
- "loss": 0.697,
323
  "step": 4500
324
  },
325
  {
326
  "epoch": 5.76,
327
- "learning_rate": 0.00023740601503759396,
328
- "loss": 0.6739,
329
  "step": 4600
330
  },
331
  {
332
  "epoch": 5.89,
333
- "learning_rate": 0.0002353174603174603,
334
- "loss": 0.6816,
335
  "step": 4700
336
  },
337
  {
338
  "epoch": 6.0,
339
- "eval_accuracy": 0.9005589879376287,
340
- "eval_loss": 0.34049057960510254,
341
- "eval_runtime": 141.6045,
342
- "eval_samples_per_second": 48.007,
343
- "eval_steps_per_second": 1.504,
344
  "step": 4788
345
  },
346
  {
347
  "epoch": 6.02,
348
- "learning_rate": 0.00023322890559732663,
349
- "loss": 0.6973,
350
  "step": 4800
351
  },
352
  {
353
  "epoch": 6.14,
354
- "learning_rate": 0.00023114035087719298,
355
- "loss": 0.6901,
356
  "step": 4900
357
  },
358
  {
359
  "epoch": 6.27,
360
- "learning_rate": 0.00022905179615705928,
361
- "loss": 0.6833,
362
  "step": 5000
363
  },
364
  {
365
  "epoch": 6.39,
366
- "learning_rate": 0.00022696324143692563,
367
- "loss": 0.6515,
368
  "step": 5100
369
  },
370
  {
371
  "epoch": 6.52,
372
- "learning_rate": 0.00022487468671679195,
373
- "loss": 0.7043,
374
  "step": 5200
375
  },
376
  {
377
  "epoch": 6.64,
378
- "learning_rate": 0.0002227861319966583,
379
- "loss": 0.6675,
380
  "step": 5300
381
  },
382
  {
383
  "epoch": 6.77,
384
- "learning_rate": 0.00022069757727652463,
385
- "loss": 0.6833,
386
  "step": 5400
387
  },
388
  {
389
  "epoch": 6.89,
390
- "learning_rate": 0.00021860902255639098,
391
- "loss": 0.6461,
392
  "step": 5500
393
  },
394
  {
395
  "epoch": 7.0,
396
- "eval_accuracy": 0.9004118858487791,
397
- "eval_loss": 0.33838745951652527,
398
- "eval_runtime": 142.5146,
399
- "eval_samples_per_second": 47.7,
400
- "eval_steps_per_second": 1.495,
401
  "step": 5586
402
  },
403
  {
404
  "epoch": 7.02,
405
- "learning_rate": 0.00021652046783625728,
406
- "loss": 0.6867,
407
  "step": 5600
408
  },
409
  {
410
  "epoch": 7.14,
411
  "learning_rate": 0.00021443191311612363,
412
- "loss": 0.6969,
413
  "step": 5700
414
  },
415
  {
416
  "epoch": 7.27,
417
  "learning_rate": 0.00021234335839598995,
418
- "loss": 0.6741,
419
  "step": 5800
420
  },
421
  {
422
  "epoch": 7.39,
423
  "learning_rate": 0.0002102548036758563,
424
- "loss": 0.6698,
425
  "step": 5900
426
  },
427
  {
428
  "epoch": 7.52,
429
  "learning_rate": 0.00020816624895572263,
430
- "loss": 0.6678,
431
  "step": 6000
432
  },
433
  {
434
  "epoch": 7.64,
435
  "learning_rate": 0.00020607769423558895,
436
- "loss": 0.6446,
437
  "step": 6100
438
  },
439
  {
440
  "epoch": 7.77,
441
  "learning_rate": 0.00020398913951545527,
442
- "loss": 0.6887,
443
  "step": 6200
444
  },
445
  {
446
  "epoch": 7.89,
447
  "learning_rate": 0.00020190058479532163,
448
- "loss": 0.6697,
449
  "step": 6300
450
  },
451
  {
452
  "epoch": 8.0,
453
- "eval_accuracy": 0.9045307443365695,
454
- "eval_loss": 0.3271743953227997,
455
- "eval_runtime": 143.1828,
456
- "eval_samples_per_second": 47.478,
457
- "eval_steps_per_second": 1.488,
458
  "step": 6384
459
  },
460
  {
461
  "epoch": 8.02,
462
- "learning_rate": 0.0001998329156223893,
463
- "loss": 0.676,
464
  "step": 6400
465
  },
466
  {
467
  "epoch": 8.15,
468
- "learning_rate": 0.00019774436090225564,
469
- "loss": 0.6687,
470
  "step": 6500
471
  },
472
  {
473
  "epoch": 8.27,
474
- "learning_rate": 0.00019565580618212194,
475
- "loss": 0.6582,
476
  "step": 6600
477
  },
478
  {
479
  "epoch": 8.4,
480
- "learning_rate": 0.0001935672514619883,
481
- "loss": 0.6945,
482
  "step": 6700
483
  },
484
  {
485
  "epoch": 8.52,
486
- "learning_rate": 0.0001914786967418546,
487
- "loss": 0.6876,
488
  "step": 6800
489
  },
490
  {
491
  "epoch": 8.65,
492
- "learning_rate": 0.00018939014202172096,
493
- "loss": 0.6754,
494
  "step": 6900
495
  },
496
  {
497
  "epoch": 8.77,
498
- "learning_rate": 0.0001873015873015873,
499
- "loss": 0.6757,
500
  "step": 7000
501
  },
502
  {
503
  "epoch": 8.9,
504
- "learning_rate": 0.0001852130325814536,
505
- "loss": 0.6575,
506
  "step": 7100
507
  },
508
  {
509
  "epoch": 9.0,
510
- "eval_accuracy": 0.910856134157105,
511
- "eval_loss": 0.3237280249595642,
512
- "eval_runtime": 142.0574,
513
- "eval_samples_per_second": 47.854,
514
- "eval_steps_per_second": 1.499,
515
  "step": 7182
516
  },
517
  {
518
  "epoch": 9.02,
519
- "learning_rate": 0.00018312447786131994,
520
- "loss": 0.6676,
521
  "step": 7200
522
  },
523
  {
524
  "epoch": 9.15,
525
- "learning_rate": 0.00018103592314118629,
526
- "loss": 0.6514,
527
  "step": 7300
528
  },
529
  {
530
  "epoch": 9.27,
531
- "learning_rate": 0.0001789473684210526,
532
- "loss": 0.6948,
533
  "step": 7400
534
  },
535
  {
536
  "epoch": 9.4,
537
- "learning_rate": 0.00017685881370091896,
538
- "loss": 0.6489,
539
  "step": 7500
540
  },
541
  {
542
  "epoch": 9.52,
543
- "learning_rate": 0.00017477025898078526,
544
- "loss": 0.6642,
545
  "step": 7600
546
  },
547
  {
548
  "epoch": 9.65,
549
- "learning_rate": 0.0001726817042606516,
550
- "loss": 0.6609,
551
  "step": 7700
552
  },
553
  {
554
  "epoch": 9.77,
555
- "learning_rate": 0.00017059314954051793,
556
- "loss": 0.666,
557
  "step": 7800
558
  },
559
  {
560
  "epoch": 9.9,
561
- "learning_rate": 0.00016850459482038428,
562
- "loss": 0.6634,
563
  "step": 7900
564
  },
565
  {
566
  "epoch": 10.0,
567
- "eval_accuracy": 0.902618417181524,
568
- "eval_loss": 0.32581621408462524,
569
- "eval_runtime": 143.1323,
570
- "eval_samples_per_second": 47.495,
571
- "eval_steps_per_second": 1.488,
572
  "step": 7980
573
  },
574
  {
575
  "epoch": 10.03,
576
- "learning_rate": 0.0001664160401002506,
577
- "loss": 0.659,
578
  "step": 8000
579
  },
580
  {
581
  "epoch": 10.15,
582
  "learning_rate": 0.00016432748538011696,
583
- "loss": 0.6515,
584
  "step": 8100
585
  },
586
  {
587
  "epoch": 10.28,
588
  "learning_rate": 0.00016223893065998326,
589
- "loss": 0.6513,
590
  "step": 8200
591
  },
592
  {
593
  "epoch": 10.4,
594
  "learning_rate": 0.0001601503759398496,
595
- "loss": 0.6516,
596
  "step": 8300
597
  },
598
  {
599
  "epoch": 10.53,
600
  "learning_rate": 0.00015806182121971593,
601
- "loss": 0.6812,
602
  "step": 8400
603
  },
604
  {
605
  "epoch": 10.65,
606
  "learning_rate": 0.00015597326649958228,
607
- "loss": 0.6584,
608
  "step": 8500
609
  },
610
  {
611
  "epoch": 10.78,
612
  "learning_rate": 0.0001538847117794486,
613
- "loss": 0.6548,
614
  "step": 8600
615
  },
616
  {
617
  "epoch": 10.9,
618
  "learning_rate": 0.00015179615705931496,
619
- "loss": 0.6604,
620
  "step": 8700
621
  },
622
  {
623
  "epoch": 11.0,
624
- "eval_accuracy": 0.9042365401588702,
625
- "eval_loss": 0.3179236054420471,
626
- "eval_runtime": 144.9391,
627
- "eval_samples_per_second": 46.902,
628
- "eval_steps_per_second": 1.47,
629
  "step": 8778
630
  },
631
  {
632
  "epoch": 11.03,
633
  "learning_rate": 0.00014970760233918125,
634
- "loss": 0.7029,
635
  "step": 8800
636
  },
637
  {
638
  "epoch": 11.15,
639
- "learning_rate": 0.00014763993316624894,
640
- "loss": 0.6572,
641
  "step": 8900
642
  },
643
  {
644
  "epoch": 11.28,
645
- "learning_rate": 0.00014555137844611527,
646
- "loss": 0.6593,
647
  "step": 9000
648
  },
649
  {
650
  "epoch": 11.4,
651
- "learning_rate": 0.00014346282372598162,
652
- "loss": 0.6585,
653
  "step": 9100
654
  },
655
  {
656
  "epoch": 11.53,
657
- "learning_rate": 0.00014137426900584794,
658
- "loss": 0.6797,
659
  "step": 9200
660
  },
661
  {
662
  "epoch": 11.65,
663
- "learning_rate": 0.00013928571428571427,
664
- "loss": 0.6646,
665
  "step": 9300
666
  },
667
  {
668
  "epoch": 11.78,
669
- "learning_rate": 0.00013719715956558062,
670
- "loss": 0.6978,
671
  "step": 9400
672
  },
673
  {
674
  "epoch": 11.9,
675
- "learning_rate": 0.00013510860484544694,
676
- "loss": 0.6483,
677
  "step": 9500
678
  },
679
  {
680
  "epoch": 12.0,
681
- "eval_accuracy": 0.9058546631362165,
682
- "eval_loss": 0.3203018307685852,
683
- "eval_runtime": 145.5825,
684
- "eval_samples_per_second": 46.695,
685
- "eval_steps_per_second": 1.463,
686
  "step": 9576
687
  },
688
  {
689
  "epoch": 12.03,
690
- "learning_rate": 0.00013302005012531327,
691
- "loss": 0.6659,
692
  "step": 9600
693
  },
694
  {
695
  "epoch": 12.16,
696
- "learning_rate": 0.00013093149540517962,
697
- "loss": 0.6641,
698
  "step": 9700
699
  },
700
  {
701
  "epoch": 12.28,
702
- "learning_rate": 0.00012884294068504594,
703
- "loss": 0.6587,
704
  "step": 9800
705
  },
706
  {
707
  "epoch": 12.41,
708
- "learning_rate": 0.00012675438596491227,
709
- "loss": 0.6521,
710
  "step": 9900
711
  },
712
  {
713
  "epoch": 12.53,
714
- "learning_rate": 0.00012466583124477862,
715
- "loss": 0.6697,
716
  "step": 10000
717
  },
718
  {
719
  "epoch": 12.66,
720
- "learning_rate": 0.00012257727652464494,
721
- "loss": 0.6604,
722
  "step": 10100
723
  },
724
  {
725
  "epoch": 12.78,
726
- "learning_rate": 0.00012048872180451128,
727
- "loss": 0.6298,
728
  "step": 10200
729
  },
730
  {
731
  "epoch": 12.91,
732
- "learning_rate": 0.0001184001670843776,
733
- "loss": 0.6578,
734
  "step": 10300
735
  },
736
  {
737
  "epoch": 13.0,
738
- "eval_accuracy": 0.9089438070020595,
739
- "eval_loss": 0.3160444498062134,
740
- "eval_runtime": 145.1947,
741
- "eval_samples_per_second": 46.82,
742
- "eval_steps_per_second": 1.467,
743
  "step": 10374
744
  },
745
  {
746
  "epoch": 13.03,
747
- "learning_rate": 0.00011631161236424394,
748
- "loss": 0.6578,
749
  "step": 10400
750
  },
751
  {
752
  "epoch": 13.16,
753
- "learning_rate": 0.00011422305764411028,
754
- "loss": 0.6588,
755
  "step": 10500
756
  },
757
  {
758
  "epoch": 13.28,
759
- "learning_rate": 0.0001121345029239766,
760
  "loss": 0.6701,
761
  "step": 10600
762
  },
763
  {
764
  "epoch": 13.41,
765
  "learning_rate": 0.00011004594820384294,
766
- "loss": 0.6476,
767
  "step": 10700
768
  },
769
  {
770
  "epoch": 13.53,
771
  "learning_rate": 0.00010795739348370928,
772
- "loss": 0.6363,
773
  "step": 10800
774
  },
775
  {
776
  "epoch": 13.66,
777
  "learning_rate": 0.0001058688387635756,
778
- "loss": 0.6569,
779
  "step": 10900
780
  },
781
  {
782
  "epoch": 13.78,
783
  "learning_rate": 0.00010378028404344194,
784
- "loss": 0.6646,
785
  "step": 11000
786
  },
787
  {
788
  "epoch": 13.91,
789
- "learning_rate": 0.0001017126148705096,
790
- "loss": 0.654,
791
  "step": 11100
792
  },
793
  {
794
  "epoch": 14.0,
795
- "eval_accuracy": 0.9090909090909091,
796
- "eval_loss": 0.3138688802719116,
797
- "eval_runtime": 144.1517,
798
- "eval_samples_per_second": 47.159,
799
- "eval_steps_per_second": 1.478,
800
  "step": 11172
801
  },
802
  {
803
  "epoch": 14.04,
804
- "learning_rate": 9.962406015037594e-05,
805
- "loss": 0.6437,
806
  "step": 11200
807
  },
808
  {
809
  "epoch": 14.16,
810
- "learning_rate": 9.753550543024226e-05,
811
- "loss": 0.671,
812
  "step": 11300
813
  },
814
  {
815
  "epoch": 14.29,
816
- "learning_rate": 9.54469507101086e-05,
817
- "loss": 0.6526,
818
  "step": 11400
819
  },
820
  {
821
  "epoch": 14.41,
822
- "learning_rate": 9.335839598997494e-05,
823
- "loss": 0.647,
824
  "step": 11500
825
  },
826
  {
827
  "epoch": 14.54,
828
- "learning_rate": 9.126984126984126e-05,
829
- "loss": 0.6658,
830
  "step": 11600
831
  },
832
  {
833
  "epoch": 14.66,
834
- "learning_rate": 8.91812865497076e-05,
835
- "loss": 0.6508,
836
  "step": 11700
837
  },
838
  {
839
  "epoch": 14.79,
840
- "learning_rate": 8.709273182957394e-05,
841
- "loss": 0.6234,
842
  "step": 11800
843
  },
844
  {
845
  "epoch": 14.91,
846
- "learning_rate": 8.500417710944026e-05,
847
- "loss": 0.6418,
848
  "step": 11900
849
  },
850
  {
851
  "epoch": 15.0,
852
- "eval_accuracy": 0.9124742571344513,
853
- "eval_loss": 0.30906587839126587,
854
- "eval_runtime": 143.6897,
855
- "eval_samples_per_second": 47.31,
856
- "eval_steps_per_second": 1.482,
857
  "step": 11970
858
  },
859
  {
860
  "epoch": 15.04,
861
- "learning_rate": 8.29156223893066e-05,
862
- "loss": 0.6882,
863
  "step": 12000
864
  },
865
  {
866
  "epoch": 15.16,
867
- "learning_rate": 8.082706766917294e-05,
868
- "loss": 0.6246,
869
  "step": 12100
870
  },
871
  {
872
  "epoch": 15.29,
873
- "learning_rate": 7.873851294903926e-05,
874
- "loss": 0.6462,
875
  "step": 12200
876
  },
877
  {
878
  "epoch": 15.41,
879
- "learning_rate": 7.66499582289056e-05,
880
- "loss": 0.6391,
881
  "step": 12300
882
  },
883
  {
884
  "epoch": 15.54,
885
- "learning_rate": 7.456140350877192e-05,
886
- "loss": 0.6373,
887
  "step": 12400
888
  },
889
  {
890
  "epoch": 15.66,
891
- "learning_rate": 7.247284878863826e-05,
892
- "loss": 0.6593,
893
  "step": 12500
894
  },
895
  {
896
  "epoch": 15.79,
897
- "learning_rate": 7.038429406850458e-05,
898
- "loss": 0.6799,
899
  "step": 12600
900
  },
901
  {
902
  "epoch": 15.91,
903
- "learning_rate": 6.829573934837092e-05,
904
- "loss": 0.6394,
905
  "step": 12700
906
  },
907
  {
908
  "epoch": 16.0,
909
- "eval_accuracy": 0.9029126213592233,
910
- "eval_loss": 0.32229486107826233,
911
- "eval_runtime": 143.4982,
912
- "eval_samples_per_second": 47.373,
913
- "eval_steps_per_second": 1.484,
914
  "step": 12768
915
  },
916
  {
917
  "epoch": 16.04,
918
- "learning_rate": 6.620718462823724e-05,
919
- "loss": 0.6501,
920
  "step": 12800
921
  },
922
  {
923
  "epoch": 16.17,
924
- "learning_rate": 6.411862990810358e-05,
925
- "loss": 0.6517,
926
  "step": 12900
927
  },
928
  {
929
  "epoch": 16.29,
930
- "learning_rate": 6.203007518796992e-05,
931
- "loss": 0.6452,
932
  "step": 13000
933
  },
934
  {
935
  "epoch": 16.42,
936
- "learning_rate": 5.994152046783625e-05,
937
- "loss": 0.6472,
938
  "step": 13100
939
  },
940
  {
941
  "epoch": 16.54,
942
- "learning_rate": 5.785296574770258e-05,
943
- "loss": 0.6504,
944
  "step": 13200
945
  },
946
  {
947
  "epoch": 16.67,
948
- "learning_rate": 5.578529657477026e-05,
949
- "loss": 0.6369,
950
  "step": 13300
951
  },
952
  {
953
  "epoch": 16.79,
954
- "learning_rate": 5.369674185463659e-05,
955
- "loss": 0.6703,
956
  "step": 13400
957
  },
958
  {
959
  "epoch": 16.92,
960
- "learning_rate": 5.160818713450292e-05,
961
- "loss": 0.637,
962
  "step": 13500
963
  },
964
  {
965
  "epoch": 17.0,
966
- "eval_accuracy": 0.9152691968225949,
967
- "eval_loss": 0.3084510564804077,
968
- "eval_runtime": 142.2965,
969
- "eval_samples_per_second": 47.773,
970
- "eval_steps_per_second": 1.497,
971
  "step": 13566
972
  },
973
  {
974
  "epoch": 17.04,
975
- "learning_rate": 4.951963241436926e-05,
976
- "loss": 0.6336,
977
  "step": 13600
978
  },
979
  {
980
  "epoch": 17.17,
981
- "learning_rate": 4.743107769423559e-05,
982
- "loss": 0.6343,
983
  "step": 13700
984
  },
985
  {
986
  "epoch": 17.29,
987
- "learning_rate": 4.534252297410192e-05,
988
- "loss": 0.6277,
989
  "step": 13800
990
  },
991
  {
992
  "epoch": 17.42,
993
- "learning_rate": 4.325396825396825e-05,
994
- "loss": 0.6402,
995
  "step": 13900
996
  },
997
  {
998
  "epoch": 17.54,
999
- "learning_rate": 4.1165413533834586e-05,
1000
- "loss": 0.6435,
1001
  "step": 14000
1002
  },
1003
  {
1004
  "epoch": 17.67,
1005
- "learning_rate": 3.907685881370092e-05,
1006
- "loss": 0.6751,
1007
  "step": 14100
1008
  },
1009
  {
1010
  "epoch": 17.79,
1011
- "learning_rate": 3.698830409356725e-05,
1012
- "loss": 0.6788,
1013
  "step": 14200
1014
  },
1015
  {
1016
  "epoch": 17.92,
1017
- "learning_rate": 3.489974937343358e-05,
1018
- "loss": 0.6258,
1019
  "step": 14300
1020
  },
1021
  {
1022
  "epoch": 18.0,
1023
- "eval_accuracy": 0.9068843777581642,
1024
- "eval_loss": 0.31823334097862244,
1025
- "eval_runtime": 144.3331,
1026
- "eval_samples_per_second": 47.099,
1027
- "eval_steps_per_second": 1.476,
1028
  "step": 14364
1029
  },
1030
  {
1031
  "epoch": 18.05,
1032
- "learning_rate": 3.281119465329991e-05,
1033
- "loss": 0.6569,
1034
  "step": 14400
1035
  },
1036
  {
1037
  "epoch": 18.17,
1038
- "learning_rate": 3.072263993316625e-05,
1039
- "loss": 0.636,
1040
  "step": 14500
1041
  },
1042
  {
1043
  "epoch": 18.3,
1044
- "learning_rate": 2.8634085213032577e-05,
1045
- "loss": 0.637,
1046
  "step": 14600
1047
  },
1048
  {
1049
  "epoch": 18.42,
1050
- "learning_rate": 2.654553049289891e-05,
1051
- "loss": 0.6577,
1052
  "step": 14700
1053
  },
1054
  {
1055
  "epoch": 18.55,
1056
- "learning_rate": 2.4456975772765242e-05,
1057
- "loss": 0.6397,
1058
  "step": 14800
1059
  },
1060
  {
1061
  "epoch": 18.67,
1062
- "learning_rate": 2.2368421052631576e-05,
1063
- "loss": 0.6459,
1064
  "step": 14900
1065
  },
1066
  {
1067
  "epoch": 18.8,
1068
- "learning_rate": 2.0279866332497907e-05,
1069
- "loss": 0.6389,
1070
  "step": 15000
1071
  },
1072
  {
1073
  "epoch": 18.92,
1074
- "learning_rate": 1.819131161236424e-05,
1075
- "loss": 0.6438,
1076
  "step": 15100
1077
  },
1078
  {
1079
  "epoch": 19.0,
1080
- "eval_accuracy": 0.9077669902912622,
1081
- "eval_loss": 0.31267043948173523,
1082
- "eval_runtime": 142.4378,
1083
- "eval_samples_per_second": 47.726,
1084
- "eval_steps_per_second": 1.495,
1085
  "step": 15162
1086
  },
1087
  {
1088
  "epoch": 19.05,
1089
- "learning_rate": 1.6102756892230575e-05,
1090
- "loss": 0.6462,
1091
  "step": 15200
1092
  },
1093
  {
1094
  "epoch": 19.17,
1095
- "learning_rate": 1.4014202172096908e-05,
1096
- "loss": 0.6364,
1097
  "step": 15300
1098
  },
1099
  {
1100
  "epoch": 19.3,
1101
- "learning_rate": 1.192564745196324e-05,
1102
- "loss": 0.6412,
1103
  "step": 15400
1104
  },
1105
  {
1106
  "epoch": 19.42,
1107
- "learning_rate": 9.837092731829572e-06,
1108
- "loss": 0.6138,
1109
  "step": 15500
1110
  },
1111
  {
1112
  "epoch": 19.55,
1113
- "learning_rate": 7.748538011695905e-06,
1114
- "loss": 0.6652,
1115
  "step": 15600
1116
  },
1117
  {
1118
  "epoch": 19.67,
1119
- "learning_rate": 5.659983291562238e-06,
1120
- "loss": 0.64,
1121
  "step": 15700
1122
  },
1123
  {
1124
  "epoch": 19.8,
1125
- "learning_rate": 3.571428571428571e-06,
1126
- "loss": 0.6232,
1127
  "step": 15800
1128
  },
1129
  {
1130
  "epoch": 19.92,
1131
- "learning_rate": 1.4828738512949038e-06,
1132
- "loss": 0.6569,
1133
  "step": 15900
1134
  },
1135
  {
1136
  "epoch": 20.0,
1137
- "eval_accuracy": 0.9114445425125037,
1138
- "eval_loss": 0.3101291060447693,
1139
- "eval_runtime": 141.8004,
1140
- "eval_samples_per_second": 47.941,
1141
- "eval_steps_per_second": 1.502,
1142
  "step": 15960
1143
  },
1144
  {
1145
  "epoch": 20.0,
1146
  "step": 15960,
1147
  "total_flos": 0.0,
1148
- "train_loss": 0.8349186476609461,
1149
- "train_runtime": 26368.6047,
1150
- "train_samples_per_second": 38.754,
1151
- "train_steps_per_second": 0.605
1152
  }
1153
  ],
1154
  "max_steps": 15960,
 
1
  {
2
  "best_metric": 0.9152691968225949,
3
+ "best_model_checkpoint": "trillsson3-ft-keyword-spotting-13/checkpoint-11970",
4
  "epoch": 19.999373825923605,
5
  "global_step": 15960,
6
  "is_hyper_param_search": false,
 
10
  {
11
  "epoch": 0.13,
12
  "learning_rate": 1.7669172932330825e-05,
13
+ "loss": 7.8872,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.25,
18
  "learning_rate": 3.646616541353383e-05,
19
+ "loss": 6.0261,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.38,
24
  "learning_rate": 5.526315789473683e-05,
25
+ "loss": 3.8766,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 0.5,
30
  "learning_rate": 7.406015037593984e-05,
31
+ "loss": 3.1036,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 0.63,
36
  "learning_rate": 9.285714285714286e-05,
37
+ "loss": 2.5949,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.75,
42
  "learning_rate": 0.00011165413533834586,
43
+ "loss": 2.158,
44
  "step": 600
45
  },
46
  {
47
  "epoch": 0.88,
48
  "learning_rate": 0.00013045112781954885,
49
+ "loss": 1.7756,
50
  "step": 700
51
  },
52
  {
53
  "epoch": 1.0,
54
+ "eval_accuracy": 0.6395998823183289,
55
+ "eval_loss": 0.9283319711685181,
56
+ "eval_runtime": 153.4542,
57
+ "eval_samples_per_second": 44.3,
58
+ "eval_steps_per_second": 1.388,
59
  "step": 798
60
  },
61
  {
62
  "epoch": 1.0,
63
  "learning_rate": 0.00014924812030075185,
64
+ "loss": 1.4696,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 1.13,
69
  "learning_rate": 0.00016804511278195486,
70
+ "loss": 1.2914,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 1.25,
75
  "learning_rate": 0.0001868421052631579,
76
+ "loss": 1.1715,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 1.38,
81
  "learning_rate": 0.0002056390977443609,
82
+ "loss": 1.087,
83
  "step": 1100
84
  },
85
  {
86
  "epoch": 1.5,
87
  "learning_rate": 0.0002244360902255639,
88
+ "loss": 0.978,
89
  "step": 1200
90
  },
91
  {
92
  "epoch": 1.63,
93
  "learning_rate": 0.0002432330827067669,
94
+ "loss": 0.92,
95
  "step": 1300
96
  },
97
  {
98
  "epoch": 1.75,
99
  "learning_rate": 0.0002620300751879699,
100
+ "loss": 0.8856,
101
  "step": 1400
102
  },
103
  {
104
  "epoch": 1.88,
105
  "learning_rate": 0.0002808270676691729,
106
+ "loss": 0.8631,
107
  "step": 1500
108
  },
109
  {
110
  "epoch": 2.0,
111
+ "eval_accuracy": 0.8573109738158282,
112
+ "eval_loss": 0.4884476065635681,
113
+ "eval_runtime": 147.3242,
114
+ "eval_samples_per_second": 46.143,
115
+ "eval_steps_per_second": 1.446,
116
  "step": 1596
117
  },
118
  {
119
  "epoch": 2.01,
120
  "learning_rate": 0.0002996240601503759,
121
+ "loss": 0.837,
122
  "step": 1600
123
  },
124
  {
125
  "epoch": 2.13,
126
  "learning_rate": 0.000297953216374269,
127
+ "loss": 0.8428,
128
  "step": 1700
129
  },
130
  {
131
  "epoch": 2.26,
132
  "learning_rate": 0.0002958646616541353,
133
+ "loss": 0.7937,
134
  "step": 1800
135
  },
136
  {
137
  "epoch": 2.38,
138
  "learning_rate": 0.0002937761069340017,
139
+ "loss": 0.7866,
140
  "step": 1900
141
  },
142
  {
143
  "epoch": 2.51,
144
  "learning_rate": 0.00029168755221386795,
145
+ "loss": 0.7648,
146
  "step": 2000
147
  },
148
  {
149
  "epoch": 2.63,
150
  "learning_rate": 0.00028959899749373433,
151
+ "loss": 0.7622,
152
  "step": 2100
153
  },
154
  {
155
  "epoch": 2.76,
156
  "learning_rate": 0.00028751044277360065,
157
+ "loss": 0.7636,
158
  "step": 2200
159
  },
160
  {
161
  "epoch": 2.88,
162
  "learning_rate": 0.000285421888053467,
163
+ "loss": 0.7551,
164
  "step": 2300
165
  },
166
  {
167
  "epoch": 3.0,
168
+ "eval_accuracy": 0.8832009414533687,
169
+ "eval_loss": 0.3967166543006897,
170
+ "eval_runtime": 146.488,
171
+ "eval_samples_per_second": 46.407,
172
+ "eval_steps_per_second": 1.454,
173
  "step": 2394
174
  },
175
  {
176
  "epoch": 3.01,
177
  "learning_rate": 0.0002833333333333333,
178
+ "loss": 0.7545,
179
  "step": 2400
180
  },
181
  {
182
  "epoch": 3.13,
183
  "learning_rate": 0.0002812447786131997,
184
+ "loss": 0.7217,
185
  "step": 2500
186
  },
187
  {
188
  "epoch": 3.26,
189
  "learning_rate": 0.00027915622389306595,
190
+ "loss": 0.7158,
191
  "step": 2600
192
  },
193
  {
194
  "epoch": 3.38,
195
  "learning_rate": 0.0002770676691729323,
196
+ "loss": 0.7164,
197
  "step": 2700
198
  },
199
  {
200
  "epoch": 3.51,
201
  "learning_rate": 0.00027497911445279865,
202
+ "loss": 0.6992,
203
  "step": 2800
204
  },
205
  {
206
  "epoch": 3.63,
207
  "learning_rate": 0.000272890559732665,
208
+ "loss": 0.7291,
209
  "step": 2900
210
  },
211
  {
212
  "epoch": 3.76,
213
  "learning_rate": 0.0002708020050125313,
214
+ "loss": 0.6976,
215
  "step": 3000
216
  },
217
  {
218
  "epoch": 3.88,
219
  "learning_rate": 0.0002687134502923976,
220
+ "loss": 0.6968,
221
  "step": 3100
222
  },
223
  {
224
  "epoch": 4.0,
225
+ "eval_accuracy": 0.8989408649602825,
226
+ "eval_loss": 0.36444538831710815,
227
+ "eval_runtime": 146.8663,
228
+ "eval_samples_per_second": 46.287,
229
+ "eval_steps_per_second": 1.45,
230
  "step": 3192
231
  },
232
  {
233
  "epoch": 4.01,
234
  "learning_rate": 0.00026662489557226395,
235
+ "loss": 0.7219,
236
  "step": 3200
237
  },
238
  {
239
  "epoch": 4.14,
240
  "learning_rate": 0.0002645363408521303,
241
+ "loss": 0.6859,
242
  "step": 3300
243
  },
244
  {
245
  "epoch": 4.26,
246
  "learning_rate": 0.00026244778613199665,
247
+ "loss": 0.7076,
248
  "step": 3400
249
  },
250
  {
251
  "epoch": 4.39,
252
  "learning_rate": 0.00026035923141186297,
253
+ "loss": 0.6935,
254
  "step": 3500
255
  },
256
  {
257
  "epoch": 4.51,
258
  "learning_rate": 0.0002582706766917293,
259
+ "loss": 0.7098,
260
  "step": 3600
261
  },
262
  {
263
  "epoch": 4.64,
264
  "learning_rate": 0.0002561821219715956,
265
+ "loss": 0.6902,
266
  "step": 3700
267
  },
268
  {
269
  "epoch": 4.76,
270
  "learning_rate": 0.00025409356725146194,
271
+ "loss": 0.6824,
272
  "step": 3800
273
  },
274
  {
275
  "epoch": 4.89,
276
  "learning_rate": 0.0002520050125313283,
277
+ "loss": 0.67,
278
  "step": 3900
279
  },
280
  {
281
  "epoch": 5.0,
282
+ "eval_accuracy": 0.9057075610473668,
283
+ "eval_loss": 0.342781126499176,
284
+ "eval_runtime": 148.4976,
285
+ "eval_samples_per_second": 45.779,
286
+ "eval_steps_per_second": 1.434,
287
  "step": 3990
288
  },
289
  {
290
  "epoch": 5.01,
291
  "learning_rate": 0.00024991645781119465,
292
+ "loss": 0.6909,
293
  "step": 4000
294
  },
295
  {
296
  "epoch": 5.14,
297
  "learning_rate": 0.00024782790309106097,
298
+ "loss": 0.7105,
299
  "step": 4100
300
  },
301
  {
302
  "epoch": 5.26,
303
  "learning_rate": 0.0002457393483709273,
304
+ "loss": 0.7022,
305
  "step": 4200
306
  },
307
  {
308
  "epoch": 5.39,
309
  "learning_rate": 0.00024365079365079364,
310
+ "loss": 0.6867,
311
  "step": 4300
312
  },
313
  {
314
  "epoch": 5.51,
315
+ "learning_rate": 0.00024156223893065997,
316
+ "loss": 0.6881,
317
  "step": 4400
318
  },
319
  {
320
  "epoch": 5.64,
321
+ "learning_rate": 0.00023947368421052632,
322
+ "loss": 0.6882,
323
  "step": 4500
324
  },
325
  {
326
  "epoch": 5.76,
327
+ "learning_rate": 0.00023738512949039262,
328
+ "loss": 0.6711,
329
  "step": 4600
330
  },
331
  {
332
  "epoch": 5.89,
333
+ "learning_rate": 0.00023529657477025897,
334
+ "loss": 0.6854,
335
  "step": 4700
336
  },
337
  {
338
  "epoch": 6.0,
339
+ "eval_accuracy": 0.902618417181524,
340
+ "eval_loss": 0.34081852436065674,
341
+ "eval_runtime": 149.584,
342
+ "eval_samples_per_second": 45.446,
343
+ "eval_steps_per_second": 1.424,
344
  "step": 4788
345
  },
346
  {
347
  "epoch": 6.02,
348
+ "learning_rate": 0.0002332080200501253,
349
+ "loss": 0.6815,
350
  "step": 4800
351
  },
352
  {
353
  "epoch": 6.14,
354
+ "learning_rate": 0.00023111946532999164,
355
+ "loss": 0.6863,
356
  "step": 4900
357
  },
358
  {
359
  "epoch": 6.27,
360
+ "learning_rate": 0.00022903091060985797,
361
+ "loss": 0.6843,
362
  "step": 5000
363
  },
364
  {
365
  "epoch": 6.39,
366
+ "learning_rate": 0.0002269423558897243,
367
+ "loss": 0.6677,
368
  "step": 5100
369
  },
370
  {
371
  "epoch": 6.52,
372
+ "learning_rate": 0.00022485380116959061,
373
+ "loss": 0.7096,
374
  "step": 5200
375
  },
376
  {
377
  "epoch": 6.64,
378
+ "learning_rate": 0.00022276524644945696,
379
+ "loss": 0.6534,
380
  "step": 5300
381
  },
382
  {
383
  "epoch": 6.77,
384
+ "learning_rate": 0.0002206766917293233,
385
+ "loss": 0.6773,
386
  "step": 5400
387
  },
388
  {
389
  "epoch": 6.89,
390
+ "learning_rate": 0.00021858813700918964,
391
+ "loss": 0.6701,
392
  "step": 5500
393
  },
394
  {
395
  "epoch": 7.0,
396
+ "eval_accuracy": 0.901294498381877,
397
+ "eval_loss": 0.3358515202999115,
398
+ "eval_runtime": 149.9158,
399
+ "eval_samples_per_second": 45.345,
400
+ "eval_steps_per_second": 1.421,
401
  "step": 5586
402
  },
403
  {
404
  "epoch": 7.02,
405
+ "learning_rate": 0.00021649958228905596,
406
+ "loss": 0.6695,
407
  "step": 5600
408
  },
409
  {
410
  "epoch": 7.14,
411
  "learning_rate": 0.00021443191311612363,
412
+ "loss": 0.7058,
413
  "step": 5700
414
  },
415
  {
416
  "epoch": 7.27,
417
  "learning_rate": 0.00021234335839598995,
418
+ "loss": 0.6577,
419
  "step": 5800
420
  },
421
  {
422
  "epoch": 7.39,
423
  "learning_rate": 0.0002102548036758563,
424
+ "loss": 0.6799,
425
  "step": 5900
426
  },
427
  {
428
  "epoch": 7.52,
429
  "learning_rate": 0.00020816624895572263,
430
+ "loss": 0.6727,
431
  "step": 6000
432
  },
433
  {
434
  "epoch": 7.64,
435
  "learning_rate": 0.00020607769423558895,
436
+ "loss": 0.6397,
437
  "step": 6100
438
  },
439
  {
440
  "epoch": 7.77,
441
  "learning_rate": 0.00020398913951545527,
442
+ "loss": 0.6845,
443
  "step": 6200
444
  },
445
  {
446
  "epoch": 7.89,
447
  "learning_rate": 0.00020190058479532163,
448
+ "loss": 0.6734,
449
  "step": 6300
450
  },
451
  {
452
  "epoch": 8.0,
453
+ "eval_accuracy": 0.9058546631362165,
454
+ "eval_loss": 0.3285204768180847,
455
+ "eval_runtime": 149.6806,
456
+ "eval_samples_per_second": 45.417,
457
+ "eval_steps_per_second": 1.423,
458
  "step": 6384
459
  },
460
  {
461
  "epoch": 8.02,
462
+ "learning_rate": 0.00019981203007518795,
463
+ "loss": 0.6753,
464
  "step": 6400
465
  },
466
  {
467
  "epoch": 8.15,
468
+ "learning_rate": 0.0001977234753550543,
469
+ "loss": 0.6674,
470
  "step": 6500
471
  },
472
  {
473
  "epoch": 8.27,
474
+ "learning_rate": 0.00019563492063492062,
475
+ "loss": 0.6704,
476
  "step": 6600
477
  },
478
  {
479
  "epoch": 8.4,
480
+ "learning_rate": 0.00019354636591478695,
481
+ "loss": 0.686,
482
  "step": 6700
483
  },
484
  {
485
  "epoch": 8.52,
486
+ "learning_rate": 0.00019145781119465327,
487
+ "loss": 0.6792,
488
  "step": 6800
489
  },
490
  {
491
  "epoch": 8.65,
492
+ "learning_rate": 0.00018936925647451962,
493
+ "loss": 0.6698,
494
  "step": 6900
495
  },
496
  {
497
  "epoch": 8.77,
498
+ "learning_rate": 0.00018728070175438595,
499
+ "loss": 0.6755,
500
  "step": 7000
501
  },
502
  {
503
  "epoch": 8.9,
504
+ "learning_rate": 0.0001851921470342523,
505
+ "loss": 0.6581,
506
  "step": 7100
507
  },
508
  {
509
  "epoch": 9.0,
510
+ "eval_accuracy": 0.9095322153574581,
511
+ "eval_loss": 0.31989586353302,
512
+ "eval_runtime": 148.5082,
513
+ "eval_samples_per_second": 45.775,
514
+ "eval_steps_per_second": 1.434,
515
  "step": 7182
516
  },
517
  {
518
  "epoch": 9.02,
519
+ "learning_rate": 0.0001831035923141186,
520
+ "loss": 0.6601,
521
  "step": 7200
522
  },
523
  {
524
  "epoch": 9.15,
525
+ "learning_rate": 0.00018101503759398495,
526
+ "loss": 0.6746,
527
  "step": 7300
528
  },
529
  {
530
  "epoch": 9.27,
531
+ "learning_rate": 0.00017892648287385127,
532
+ "loss": 0.6805,
533
  "step": 7400
534
  },
535
  {
536
  "epoch": 9.4,
537
+ "learning_rate": 0.00017683792815371762,
538
+ "loss": 0.6777,
539
  "step": 7500
540
  },
541
  {
542
  "epoch": 9.52,
543
+ "learning_rate": 0.00017474937343358395,
544
+ "loss": 0.6619,
545
  "step": 7600
546
  },
547
  {
548
  "epoch": 9.65,
549
+ "learning_rate": 0.0001726608187134503,
550
+ "loss": 0.6625,
551
  "step": 7700
552
  },
553
  {
554
  "epoch": 9.77,
555
+ "learning_rate": 0.0001705722639933166,
556
+ "loss": 0.665,
557
  "step": 7800
558
  },
559
  {
560
  "epoch": 9.9,
561
+ "learning_rate": 0.00016848370927318294,
562
+ "loss": 0.6557,
563
  "step": 7900
564
  },
565
  {
566
  "epoch": 10.0,
567
+ "eval_accuracy": 0.8986466607825832,
568
+ "eval_loss": 0.3300594091415405,
569
+ "eval_runtime": 148.7316,
570
+ "eval_samples_per_second": 45.706,
571
+ "eval_steps_per_second": 1.432,
572
  "step": 7980
573
  },
574
  {
575
  "epoch": 10.03,
576
+ "learning_rate": 0.00016639515455304927,
577
+ "loss": 0.6688,
578
  "step": 8000
579
  },
580
  {
581
  "epoch": 10.15,
582
  "learning_rate": 0.00016432748538011696,
583
+ "loss": 0.6642,
584
  "step": 8100
585
  },
586
  {
587
  "epoch": 10.28,
588
  "learning_rate": 0.00016223893065998326,
589
+ "loss": 0.6542,
590
  "step": 8200
591
  },
592
  {
593
  "epoch": 10.4,
594
  "learning_rate": 0.0001601503759398496,
595
+ "loss": 0.6461,
596
  "step": 8300
597
  },
598
  {
599
  "epoch": 10.53,
600
  "learning_rate": 0.00015806182121971593,
601
+ "loss": 0.6721,
602
  "step": 8400
603
  },
604
  {
605
  "epoch": 10.65,
606
  "learning_rate": 0.00015597326649958228,
607
+ "loss": 0.6642,
608
  "step": 8500
609
  },
610
  {
611
  "epoch": 10.78,
612
  "learning_rate": 0.0001538847117794486,
613
+ "loss": 0.6529,
614
  "step": 8600
615
  },
616
  {
617
  "epoch": 10.9,
618
  "learning_rate": 0.00015179615705931496,
619
+ "loss": 0.6768,
620
  "step": 8700
621
  },
622
  {
623
  "epoch": 11.0,
624
+ "eval_accuracy": 0.9046778464254193,
625
+ "eval_loss": 0.31744641065597534,
626
+ "eval_runtime": 148.9285,
627
+ "eval_samples_per_second": 45.646,
628
+ "eval_steps_per_second": 1.43,
629
  "step": 8778
630
  },
631
  {
632
  "epoch": 11.03,
633
  "learning_rate": 0.00014970760233918125,
634
+ "loss": 0.7036,
635
  "step": 8800
636
  },
637
  {
638
  "epoch": 11.15,
639
+ "learning_rate": 0.0001476190476190476,
640
+ "loss": 0.6825,
641
  "step": 8900
642
  },
643
  {
644
  "epoch": 11.28,
645
+ "learning_rate": 0.00014553049289891393,
646
+ "loss": 0.6693,
647
  "step": 9000
648
  },
649
  {
650
  "epoch": 11.4,
651
+ "learning_rate": 0.00014344193817878025,
652
+ "loss": 0.661,
653
  "step": 9100
654
  },
655
  {
656
  "epoch": 11.53,
657
+ "learning_rate": 0.0001413533834586466,
658
+ "loss": 0.6625,
659
  "step": 9200
660
  },
661
  {
662
  "epoch": 11.65,
663
+ "learning_rate": 0.00013926482873851293,
664
+ "loss": 0.6523,
665
  "step": 9300
666
  },
667
  {
668
  "epoch": 11.78,
669
+ "learning_rate": 0.00013717627401837925,
670
+ "loss": 0.6765,
671
  "step": 9400
672
  },
673
  {
674
  "epoch": 11.9,
675
+ "learning_rate": 0.0001350877192982456,
676
+ "loss": 0.6459,
677
  "step": 9500
678
  },
679
  {
680
  "epoch": 12.0,
681
+ "eval_accuracy": 0.903059723448073,
682
+ "eval_loss": 0.3192310631275177,
683
+ "eval_runtime": 149.2235,
684
+ "eval_samples_per_second": 45.556,
685
+ "eval_steps_per_second": 1.427,
686
  "step": 9576
687
  },
688
  {
689
  "epoch": 12.03,
690
+ "learning_rate": 0.00013299916457811193,
691
+ "loss": 0.6518,
692
  "step": 9600
693
  },
694
  {
695
  "epoch": 12.16,
696
+ "learning_rate": 0.00013091060985797825,
697
+ "loss": 0.6644,
698
  "step": 9700
699
  },
700
  {
701
  "epoch": 12.28,
702
+ "learning_rate": 0.0001288220551378446,
703
+ "loss": 0.6565,
704
  "step": 9800
705
  },
706
  {
707
  "epoch": 12.41,
708
+ "learning_rate": 0.00012673350041771093,
709
+ "loss": 0.6647,
710
  "step": 9900
711
  },
712
  {
713
  "epoch": 12.53,
714
+ "learning_rate": 0.00012464494569757725,
715
+ "loss": 0.6651,
716
  "step": 10000
717
  },
718
  {
719
  "epoch": 12.66,
720
+ "learning_rate": 0.0001225563909774436,
721
+ "loss": 0.6697,
722
  "step": 10100
723
  },
724
  {
725
  "epoch": 12.78,
726
+ "learning_rate": 0.00012046783625730992,
727
+ "loss": 0.6559,
728
  "step": 10200
729
  },
730
  {
731
  "epoch": 12.91,
732
+ "learning_rate": 0.00011837928153717626,
733
+ "loss": 0.6607,
734
  "step": 10300
735
  },
736
  {
737
  "epoch": 13.0,
738
+ "eval_accuracy": 0.9065901735804649,
739
+ "eval_loss": 0.3172565996646881,
740
+ "eval_runtime": 149.6048,
741
+ "eval_samples_per_second": 45.44,
742
+ "eval_steps_per_second": 1.424,
743
  "step": 10374
744
  },
745
  {
746
  "epoch": 13.03,
747
+ "learning_rate": 0.00011629072681704259,
748
+ "loss": 0.6435,
749
  "step": 10400
750
  },
751
  {
752
  "epoch": 13.16,
753
+ "learning_rate": 0.00011420217209690892,
754
+ "loss": 0.6727,
755
  "step": 10500
756
  },
757
  {
758
  "epoch": 13.28,
759
+ "learning_rate": 0.00011211361737677526,
760
  "loss": 0.6701,
761
  "step": 10600
762
  },
763
  {
764
  "epoch": 13.41,
765
  "learning_rate": 0.00011004594820384294,
766
+ "loss": 0.6474,
767
  "step": 10700
768
  },
769
  {
770
  "epoch": 13.53,
771
  "learning_rate": 0.00010795739348370928,
772
+ "loss": 0.6307,
773
  "step": 10800
774
  },
775
  {
776
  "epoch": 13.66,
777
  "learning_rate": 0.0001058688387635756,
778
+ "loss": 0.6748,
779
  "step": 10900
780
  },
781
  {
782
  "epoch": 13.78,
783
  "learning_rate": 0.00010378028404344194,
784
+ "loss": 0.6382,
785
  "step": 11000
786
  },
787
  {
788
  "epoch": 13.91,
789
+ "learning_rate": 0.00010169172932330827,
790
+ "loss": 0.656,
791
  "step": 11100
792
  },
793
  {
794
  "epoch": 14.0,
795
+ "eval_accuracy": 0.9093851132686084,
796
+ "eval_loss": 0.3141985833644867,
797
+ "eval_runtime": 152.4058,
798
+ "eval_samples_per_second": 44.605,
799
+ "eval_steps_per_second": 1.398,
800
  "step": 11172
801
  },
802
  {
803
  "epoch": 14.04,
804
+ "learning_rate": 9.96031746031746e-05,
805
+ "loss": 0.6329,
806
  "step": 11200
807
  },
808
  {
809
  "epoch": 14.16,
810
+ "learning_rate": 9.751461988304094e-05,
811
+ "loss": 0.6499,
812
  "step": 11300
813
  },
814
  {
815
  "epoch": 14.29,
816
+ "learning_rate": 9.542606516290727e-05,
817
+ "loss": 0.6651,
818
  "step": 11400
819
  },
820
  {
821
  "epoch": 14.41,
822
+ "learning_rate": 9.33375104427736e-05,
823
+ "loss": 0.6593,
824
  "step": 11500
825
  },
826
  {
827
  "epoch": 14.54,
828
+ "learning_rate": 9.124895572263994e-05,
829
+ "loss": 0.6637,
830
  "step": 11600
831
  },
832
  {
833
  "epoch": 14.66,
834
+ "learning_rate": 8.916040100250626e-05,
835
+ "loss": 0.6527,
836
  "step": 11700
837
  },
838
  {
839
  "epoch": 14.79,
840
+ "learning_rate": 8.70718462823726e-05,
841
+ "loss": 0.6358,
842
  "step": 11800
843
  },
844
  {
845
  "epoch": 14.91,
846
+ "learning_rate": 8.498329156223893e-05,
847
+ "loss": 0.6302,
848
  "step": 11900
849
  },
850
  {
851
  "epoch": 15.0,
852
+ "eval_accuracy": 0.9152691968225949,
853
+ "eval_loss": 0.30927804112434387,
854
+ "eval_runtime": 149.4384,
855
+ "eval_samples_per_second": 45.49,
856
+ "eval_steps_per_second": 1.425,
857
  "step": 11970
858
  },
859
  {
860
  "epoch": 15.04,
861
+ "learning_rate": 8.289473684210526e-05,
862
+ "loss": 0.6972,
863
  "step": 12000
864
  },
865
  {
866
  "epoch": 15.16,
867
+ "learning_rate": 8.08061821219716e-05,
868
+ "loss": 0.6475,
869
  "step": 12100
870
  },
871
  {
872
  "epoch": 15.29,
873
+ "learning_rate": 7.871762740183793e-05,
874
+ "loss": 0.6497,
875
  "step": 12200
876
  },
877
  {
878
  "epoch": 15.41,
879
+ "learning_rate": 7.662907268170426e-05,
880
+ "loss": 0.6496,
881
  "step": 12300
882
  },
883
  {
884
  "epoch": 15.54,
885
+ "learning_rate": 7.454051796157058e-05,
886
+ "loss": 0.6397,
887
  "step": 12400
888
  },
889
  {
890
  "epoch": 15.66,
891
+ "learning_rate": 7.245196324143692e-05,
892
+ "loss": 0.6485,
893
  "step": 12500
894
  },
895
  {
896
  "epoch": 15.79,
897
+ "learning_rate": 7.036340852130326e-05,
898
+ "loss": 0.6785,
899
  "step": 12600
900
  },
901
  {
902
  "epoch": 15.91,
903
+ "learning_rate": 6.827485380116958e-05,
904
+ "loss": 0.636,
905
  "step": 12700
906
  },
907
  {
908
  "epoch": 16.0,
909
+ "eval_accuracy": 0.9043836422477199,
910
+ "eval_loss": 0.3184495270252228,
911
+ "eval_runtime": 150.904,
912
+ "eval_samples_per_second": 45.049,
913
+ "eval_steps_per_second": 1.411,
914
  "step": 12768
915
  },
916
  {
917
  "epoch": 16.04,
918
+ "learning_rate": 6.618629908103592e-05,
919
+ "loss": 0.6455,
920
  "step": 12800
921
  },
922
  {
923
  "epoch": 16.17,
924
+ "learning_rate": 6.409774436090225e-05,
925
+ "loss": 0.629,
926
  "step": 12900
927
  },
928
  {
929
  "epoch": 16.29,
930
+ "learning_rate": 6.200918964076858e-05,
931
+ "loss": 0.6345,
932
  "step": 13000
933
  },
934
  {
935
  "epoch": 16.42,
936
+ "learning_rate": 5.9920634920634916e-05,
937
+ "loss": 0.6492,
938
  "step": 13100
939
  },
940
  {
941
  "epoch": 16.54,
942
+ "learning_rate": 5.783208020050125e-05,
943
+ "loss": 0.6486,
944
  "step": 13200
945
  },
946
  {
947
  "epoch": 16.67,
948
+ "learning_rate": 5.5743525480367584e-05,
949
+ "loss": 0.6422,
950
  "step": 13300
951
  },
952
  {
953
  "epoch": 16.79,
954
+ "learning_rate": 5.367585630743525e-05,
955
+ "loss": 0.6606,
956
  "step": 13400
957
  },
958
  {
959
  "epoch": 16.92,
960
+ "learning_rate": 5.158730158730158e-05,
961
+ "loss": 0.6327,
962
  "step": 13500
963
  },
964
  {
965
  "epoch": 17.0,
966
+ "eval_accuracy": 0.911738746690203,
967
+ "eval_loss": 0.3104247748851776,
968
+ "eval_runtime": 150.9624,
969
+ "eval_samples_per_second": 45.031,
970
+ "eval_steps_per_second": 1.411,
971
  "step": 13566
972
  },
973
  {
974
  "epoch": 17.04,
975
+ "learning_rate": 4.949874686716791e-05,
976
+ "loss": 0.6315,
977
  "step": 13600
978
  },
979
  {
980
  "epoch": 17.17,
981
+ "learning_rate": 4.741019214703425e-05,
982
+ "loss": 0.6487,
983
  "step": 13700
984
  },
985
  {
986
  "epoch": 17.29,
987
+ "learning_rate": 4.532163742690058e-05,
988
+ "loss": 0.6465,
989
  "step": 13800
990
  },
991
  {
992
  "epoch": 17.42,
993
+ "learning_rate": 4.323308270676691e-05,
994
+ "loss": 0.6309,
995
  "step": 13900
996
  },
997
  {
998
  "epoch": 17.54,
999
+ "learning_rate": 4.1144527986633246e-05,
1000
+ "loss": 0.6369,
1001
  "step": 14000
1002
  },
1003
  {
1004
  "epoch": 17.67,
1005
+ "learning_rate": 3.905597326649958e-05,
1006
+ "loss": 0.655,
1007
  "step": 14100
1008
  },
1009
  {
1010
  "epoch": 17.79,
1011
+ "learning_rate": 3.696741854636591e-05,
1012
+ "loss": 0.6558,
1013
  "step": 14200
1014
  },
1015
  {
1016
  "epoch": 17.92,
1017
+ "learning_rate": 3.4878863826232245e-05,
1018
+ "loss": 0.6428,
1019
  "step": 14300
1020
  },
1021
  {
1022
  "epoch": 18.0,
1023
+ "eval_accuracy": 0.9083553986466608,
1024
+ "eval_loss": 0.315799742937088,
1025
+ "eval_runtime": 150.6062,
1026
+ "eval_samples_per_second": 45.138,
1027
+ "eval_steps_per_second": 1.414,
1028
  "step": 14364
1029
  },
1030
  {
1031
  "epoch": 18.05,
1032
+ "learning_rate": 3.2790309106098576e-05,
1033
+ "loss": 0.6466,
1034
  "step": 14400
1035
  },
1036
  {
1037
  "epoch": 18.17,
1038
+ "learning_rate": 3.070175438596491e-05,
1039
+ "loss": 0.6219,
1040
  "step": 14500
1041
  },
1042
  {
1043
  "epoch": 18.3,
1044
+ "learning_rate": 2.861319966583124e-05,
1045
+ "loss": 0.6351,
1046
  "step": 14600
1047
  },
1048
  {
1049
  "epoch": 18.42,
1050
+ "learning_rate": 2.6524644945697575e-05,
1051
+ "loss": 0.6638,
1052
  "step": 14700
1053
  },
1054
  {
1055
  "epoch": 18.55,
1056
+ "learning_rate": 2.4436090225563906e-05,
1057
+ "loss": 0.6414,
1058
  "step": 14800
1059
  },
1060
  {
1061
  "epoch": 18.67,
1062
+ "learning_rate": 2.234753550543024e-05,
1063
+ "loss": 0.6617,
1064
  "step": 14900
1065
  },
1066
  {
1067
  "epoch": 18.8,
1068
+ "learning_rate": 2.025898078529657e-05,
1069
+ "loss": 0.6359,
1070
  "step": 15000
1071
  },
1072
  {
1073
  "epoch": 18.92,
1074
+ "learning_rate": 1.8170426065162904e-05,
1075
+ "loss": 0.6515,
1076
  "step": 15100
1077
  },
1078
  {
1079
  "epoch": 19.0,
1080
+ "eval_accuracy": 0.9096793174463077,
1081
+ "eval_loss": 0.312863290309906,
1082
+ "eval_runtime": 150.6838,
1083
+ "eval_samples_per_second": 45.114,
1084
+ "eval_steps_per_second": 1.414,
1085
  "step": 15162
1086
  },
1087
  {
1088
  "epoch": 19.05,
1089
+ "learning_rate": 1.608187134502924e-05,
1090
+ "loss": 0.6385,
1091
  "step": 15200
1092
  },
1093
  {
1094
  "epoch": 19.17,
1095
+ "learning_rate": 1.3993316624895571e-05,
1096
+ "loss": 0.634,
1097
  "step": 15300
1098
  },
1099
  {
1100
  "epoch": 19.3,
1101
+ "learning_rate": 1.1904761904761903e-05,
1102
+ "loss": 0.6509,
1103
  "step": 15400
1104
  },
1105
  {
1106
  "epoch": 19.42,
1107
+ "learning_rate": 9.816207184628236e-06,
1108
+ "loss": 0.6388,
1109
  "step": 15500
1110
  },
1111
  {
1112
  "epoch": 19.55,
1113
+ "learning_rate": 7.727652464494568e-06,
1114
+ "loss": 0.6493,
1115
  "step": 15600
1116
  },
1117
  {
1118
  "epoch": 19.67,
1119
+ "learning_rate": 5.6390977443609015e-06,
1120
+ "loss": 0.6526,
1121
  "step": 15700
1122
  },
1123
  {
1124
  "epoch": 19.8,
1125
+ "learning_rate": 3.5505430242272343e-06,
1126
+ "loss": 0.6309,
1127
  "step": 15800
1128
  },
1129
  {
1130
  "epoch": 19.92,
1131
+ "learning_rate": 1.4619883040935671e-06,
1132
+ "loss": 0.6441,
1133
  "step": 15900
1134
  },
1135
  {
1136
  "epoch": 20.0,
1137
+ "eval_accuracy": 0.909973521624007,
1138
+ "eval_loss": 0.31152603030204773,
1139
+ "eval_runtime": 152.3497,
1140
+ "eval_samples_per_second": 44.621,
1141
+ "eval_steps_per_second": 1.398,
1142
  "step": 15960
1143
  },
1144
  {
1145
  "epoch": 20.0,
1146
  "step": 15960,
1147
  "total_flos": 0.0,
1148
+ "train_loss": 0.8353394886007285,
1149
+ "train_runtime": 26863.0447,
1150
+ "train_samples_per_second": 38.04,
1151
+ "train_steps_per_second": 0.594
1152
  }
1153
  ],
1154
  "max_steps": 15960,