rajistics commited on
Commit
b715185
1 Parent(s): 6dd4624

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9521785334750266,
4
- "eval_loss": 0.21390262246131897,
5
- "eval_runtime": 40.27,
6
- "eval_samples_per_second": 23.367,
7
- "eval_steps_per_second": 2.93,
8
  "total_flos": 1.651775717862015e+18,
9
- "train_loss": 0.4871297548483084,
10
- "train_runtime": 1589.6821,
11
- "train_samples_per_second": 13.406,
12
- "train_steps_per_second": 0.838
13
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9543039319872476,
4
+ "eval_loss": 0.19176600873470306,
5
+ "eval_runtime": 33.828,
6
+ "eval_samples_per_second": 27.817,
7
+ "eval_steps_per_second": 3.488,
8
  "total_flos": 1.651775717862015e+18,
9
+ "train_loss": 0.4876434194433081,
10
+ "train_runtime": 1561.5053,
11
+ "train_samples_per_second": 13.648,
12
+ "train_steps_per_second": 0.853
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9521785334750266,
4
- "eval_loss": 0.21390262246131897,
5
- "eval_runtime": 40.27,
6
- "eval_samples_per_second": 23.367,
7
- "eval_steps_per_second": 2.93
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9543039319872476,
4
+ "eval_loss": 0.19176600873470306,
5
+ "eval_runtime": 33.828,
6
+ "eval_samples_per_second": 27.817,
7
+ "eval_steps_per_second": 3.488
8
  }
runs/Aug03_21-01-32_cab156e75ca5/events.out.tfevents.1659562835.cab156e75ca5.72.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3430a5c69cc495109663e9714ad902b05733e10604b8b2bae9a0112271bad718
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 1.651775717862015e+18,
4
- "train_loss": 0.4871297548483084,
5
- "train_runtime": 1589.6821,
6
- "train_samples_per_second": 13.406,
7
- "train_steps_per_second": 0.838
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 1.651775717862015e+18,
4
+ "train_loss": 0.4876434194433081,
5
+ "train_runtime": 1561.5053,
6
+ "train_samples_per_second": 13.648,
7
+ "train_steps_per_second": 0.853
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.21390262246131897,
3
  "best_model_checkpoint": "finetuned-indian-food/checkpoint-1300",
4
  "epoch": 4.0,
5
  "global_step": 1332,
@@ -10,926 +10,926 @@
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 0.0001984984984984985,
13
- "loss": 2.8538,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.06,
18
  "learning_rate": 0.00019699699699699701,
19
- "loss": 2.5964,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.09,
24
  "learning_rate": 0.0001954954954954955,
25
- "loss": 2.2524,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.12,
30
  "learning_rate": 0.00019399399399399402,
31
- "loss": 1.9596,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.15,
36
  "learning_rate": 0.0001924924924924925,
37
- "loss": 1.726,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.18,
42
  "learning_rate": 0.000190990990990991,
43
- "loss": 1.6152,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.21,
48
  "learning_rate": 0.0001894894894894895,
49
- "loss": 1.3396,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.24,
54
  "learning_rate": 0.000187987987987988,
55
- "loss": 1.2854,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.27,
60
  "learning_rate": 0.0001864864864864865,
61
- "loss": 1.107,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.3,
66
  "learning_rate": 0.000184984984984985,
67
- "loss": 1.0846,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.3,
72
- "eval_accuracy": 0.8554729011689692,
73
- "eval_loss": 0.9561436772346497,
74
- "eval_runtime": 39.5068,
75
- "eval_samples_per_second": 23.819,
76
- "eval_steps_per_second": 2.987,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.33,
81
  "learning_rate": 0.0001834834834834835,
82
- "loss": 0.9812,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.36,
87
  "learning_rate": 0.000181981981981982,
88
- "loss": 0.8571,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.39,
93
  "learning_rate": 0.0001804804804804805,
94
- "loss": 0.916,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.42,
99
  "learning_rate": 0.00017897897897897898,
100
- "loss": 0.9905,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 0.45,
105
  "learning_rate": 0.0001774774774774775,
106
- "loss": 0.7381,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 0.48,
111
  "learning_rate": 0.000175975975975976,
112
- "loss": 0.801,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 0.51,
117
  "learning_rate": 0.0001744744744744745,
118
- "loss": 0.6606,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 0.54,
123
  "learning_rate": 0.000172972972972973,
124
- "loss": 0.7538,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 0.57,
129
  "learning_rate": 0.00017147147147147148,
130
- "loss": 0.6495,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 0.6,
135
  "learning_rate": 0.00016996996996997,
136
- "loss": 0.7894,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 0.6,
141
- "eval_accuracy": 0.8926673751328374,
142
- "eval_loss": 0.587147057056427,
143
- "eval_runtime": 38.4761,
144
- "eval_samples_per_second": 24.457,
145
- "eval_steps_per_second": 3.067,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 0.63,
150
  "learning_rate": 0.00016846846846846846,
151
- "loss": 0.7121,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 0.66,
156
  "learning_rate": 0.00016696696696696697,
157
- "loss": 0.6888,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 0.69,
162
  "learning_rate": 0.00016546546546546546,
163
- "loss": 0.5768,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 0.72,
168
  "learning_rate": 0.00016396396396396395,
169
- "loss": 0.6568,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 0.75,
174
- "learning_rate": 0.00016261261261261262,
175
- "loss": 0.601,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 0.78,
180
- "learning_rate": 0.0001611111111111111,
181
- "loss": 0.5431,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 0.81,
186
- "learning_rate": 0.00015960960960960963,
187
- "loss": 0.6634,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 0.84,
192
- "learning_rate": 0.00015810810810810812,
193
- "loss": 0.8153,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 0.87,
198
- "learning_rate": 0.00015660660660660663,
199
- "loss": 0.9727,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 0.9,
204
- "learning_rate": 0.00015510510510510512,
205
- "loss": 0.6233,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 0.9,
210
- "eval_accuracy": 0.9107332624867163,
211
- "eval_loss": 0.4447122812271118,
212
- "eval_runtime": 38.3227,
213
- "eval_samples_per_second": 24.555,
214
- "eval_steps_per_second": 3.079,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 0.93,
219
- "learning_rate": 0.0001536036036036036,
220
- "loss": 0.578,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 0.96,
225
- "learning_rate": 0.00015210210210210212,
226
- "loss": 0.6366,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 0.99,
231
- "learning_rate": 0.0001506006006006006,
232
- "loss": 0.8622,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 1.02,
237
- "learning_rate": 0.00014909909909909913,
238
- "loss": 0.5957,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 1.05,
243
- "learning_rate": 0.00014759759759759762,
244
- "loss": 0.4458,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 1.08,
249
- "learning_rate": 0.0001460960960960961,
250
- "loss": 0.383,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 1.11,
255
- "learning_rate": 0.00014459459459459462,
256
- "loss": 0.4259,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 1.14,
261
- "learning_rate": 0.00014309309309309308,
262
- "loss": 0.4246,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 1.17,
267
- "learning_rate": 0.0001415915915915916,
268
- "loss": 0.3343,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 1.2,
273
- "learning_rate": 0.0001400900900900901,
274
- "loss": 0.3619,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 1.2,
279
- "eval_accuracy": 0.8937300743889479,
280
- "eval_loss": 0.4354824125766754,
281
- "eval_runtime": 37.6727,
282
- "eval_samples_per_second": 24.978,
283
- "eval_steps_per_second": 3.132,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 1.23,
288
- "learning_rate": 0.00013858858858858858,
289
- "loss": 0.5238,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 1.26,
294
- "learning_rate": 0.0001370870870870871,
295
- "loss": 0.5456,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 1.29,
300
- "learning_rate": 0.00013558558558558558,
301
- "loss": 0.5347,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 1.32,
306
- "learning_rate": 0.0001340840840840841,
307
- "loss": 0.3739,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 1.35,
312
- "learning_rate": 0.00013258258258258258,
313
- "loss": 0.392,
314
  "step": 450
315
  },
316
  {
317
  "epoch": 1.38,
318
- "learning_rate": 0.00013108108108108107,
319
- "loss": 0.5306,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 1.41,
324
- "learning_rate": 0.0001295795795795796,
325
- "loss": 0.4171,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 1.44,
330
- "learning_rate": 0.00012807807807807808,
331
- "loss": 0.3715,
332
  "step": 480
333
  },
334
  {
335
  "epoch": 1.47,
336
- "learning_rate": 0.00012657657657657657,
337
- "loss": 0.5192,
338
  "step": 490
339
  },
340
  {
341
  "epoch": 1.5,
342
- "learning_rate": 0.00012507507507507508,
343
- "loss": 0.34,
344
  "step": 500
345
  },
346
  {
347
  "epoch": 1.5,
348
- "eval_accuracy": 0.9117959617428267,
349
- "eval_loss": 0.3712367117404938,
350
- "eval_runtime": 37.8371,
351
- "eval_samples_per_second": 24.87,
352
- "eval_steps_per_second": 3.119,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 1.53,
357
- "learning_rate": 0.00012357357357357357,
358
- "loss": 0.5209,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 1.56,
363
- "learning_rate": 0.00012207207207207209,
364
- "loss": 0.4896,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 1.59,
369
- "learning_rate": 0.00012057057057057057,
370
- "loss": 0.5666,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 1.62,
375
- "learning_rate": 0.00011906906906906908,
376
- "loss": 0.3288,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 1.65,
381
- "learning_rate": 0.00011756756756756758,
382
- "loss": 0.4315,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 1.68,
387
- "learning_rate": 0.00011606606606606607,
388
- "loss": 0.4185,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 1.71,
393
- "learning_rate": 0.00011456456456456457,
394
- "loss": 0.2794,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 1.74,
399
- "learning_rate": 0.00011306306306306307,
400
- "loss": 0.3125,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 1.77,
405
- "learning_rate": 0.00011156156156156157,
406
- "loss": 0.3229,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 1.8,
411
  "learning_rate": 0.00011006006006006006,
412
- "loss": 0.3413,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 1.8,
417
- "eval_accuracy": 0.8916046758767269,
418
- "eval_loss": 0.40877240896224976,
419
- "eval_runtime": 39.2551,
420
- "eval_samples_per_second": 23.971,
421
- "eval_steps_per_second": 3.006,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 1.83,
426
  "learning_rate": 0.00010855855855855856,
427
- "loss": 0.3967,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 1.86,
432
  "learning_rate": 0.00010705705705705707,
433
- "loss": 0.32,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 1.89,
438
  "learning_rate": 0.00010555555555555557,
439
- "loss": 0.3256,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 1.92,
444
- "learning_rate": 0.0001042042042042042,
445
- "loss": 0.4024,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 1.95,
450
- "learning_rate": 0.0001027027027027027,
451
- "loss": 0.4209,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 1.98,
456
- "learning_rate": 0.0001012012012012012,
457
- "loss": 0.3888,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 2.01,
462
- "learning_rate": 9.96996996996997e-05,
463
- "loss": 0.3793,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 2.04,
468
- "learning_rate": 9.81981981981982e-05,
469
- "loss": 0.3094,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 2.07,
474
- "learning_rate": 9.66966966966967e-05,
475
- "loss": 0.3613,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 2.1,
480
- "learning_rate": 9.51951951951952e-05,
481
- "loss": 0.3619,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 2.1,
486
- "eval_accuracy": 0.9043570669500531,
487
- "eval_loss": 0.37407416105270386,
488
- "eval_runtime": 39.0938,
489
- "eval_samples_per_second": 24.07,
490
- "eval_steps_per_second": 3.018,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 2.13,
495
- "learning_rate": 9.36936936936937e-05,
496
- "loss": 0.3051,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 2.16,
501
- "learning_rate": 9.219219219219219e-05,
502
- "loss": 0.4062,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 2.19,
507
- "learning_rate": 9.069069069069069e-05,
508
- "loss": 0.3811,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 2.22,
513
- "learning_rate": 8.918918918918919e-05,
514
- "loss": 0.3118,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 2.25,
519
- "learning_rate": 8.76876876876877e-05,
520
- "loss": 0.3014,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 2.28,
525
- "learning_rate": 8.61861861861862e-05,
526
- "loss": 0.2404,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 2.31,
531
- "learning_rate": 8.468468468468469e-05,
532
- "loss": 0.3802,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 2.34,
537
- "learning_rate": 8.318318318318319e-05,
538
- "loss": 0.3222,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 2.37,
543
- "learning_rate": 8.168168168168169e-05,
544
- "loss": 0.2459,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 2.4,
549
- "learning_rate": 8.018018018018019e-05,
550
- "loss": 0.2135,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 2.4,
555
- "eval_accuracy": 0.9160467587672688,
556
- "eval_loss": 0.328594833612442,
557
- "eval_runtime": 38.4594,
558
- "eval_samples_per_second": 24.467,
559
- "eval_steps_per_second": 3.068,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 2.43,
564
- "learning_rate": 7.867867867867868e-05,
565
- "loss": 0.2152,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 2.46,
570
- "learning_rate": 7.717717717717718e-05,
571
- "loss": 0.2621,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 2.49,
576
- "learning_rate": 7.567567567567568e-05,
577
- "loss": 0.2118,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 2.52,
582
- "learning_rate": 7.417417417417419e-05,
583
- "loss": 0.2826,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 2.55,
588
- "learning_rate": 7.267267267267268e-05,
589
- "loss": 0.4047,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 2.58,
594
- "learning_rate": 7.117117117117116e-05,
595
- "loss": 0.4956,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 2.61,
600
- "learning_rate": 6.966966966966967e-05,
601
- "loss": 0.2431,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 2.64,
606
- "learning_rate": 6.816816816816817e-05,
607
- "loss": 0.2189,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 2.67,
612
- "learning_rate": 6.666666666666667e-05,
613
- "loss": 0.2455,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 2.7,
618
- "learning_rate": 6.516516516516516e-05,
619
- "loss": 0.2166,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 2.7,
624
- "eval_accuracy": 0.9415515409139213,
625
- "eval_loss": 0.2758049964904785,
626
- "eval_runtime": 38.181,
627
- "eval_samples_per_second": 24.646,
628
- "eval_steps_per_second": 3.091,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 2.73,
633
- "learning_rate": 6.366366366366366e-05,
634
- "loss": 0.2648,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 2.76,
639
- "learning_rate": 6.216216216216216e-05,
640
- "loss": 0.2233,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 2.79,
645
- "learning_rate": 6.0660660660660665e-05,
646
- "loss": 0.2876,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 2.82,
651
- "learning_rate": 5.915915915915916e-05,
652
- "loss": 0.3385,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 2.85,
657
- "learning_rate": 5.765765765765766e-05,
658
- "loss": 0.2823,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 2.88,
663
- "learning_rate": 5.615615615615616e-05,
664
- "loss": 0.135,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 2.91,
669
- "learning_rate": 5.465465465465466e-05,
670
- "loss": 0.289,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 2.94,
675
- "learning_rate": 5.3153153153153155e-05,
676
- "loss": 0.2228,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 2.97,
681
- "learning_rate": 5.165165165165166e-05,
682
- "loss": 0.3221,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 3.0,
687
- "learning_rate": 5.015015015015015e-05,
688
- "loss": 0.1557,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 3.0,
693
- "eval_accuracy": 0.9330499468650372,
694
- "eval_loss": 0.2679324150085449,
695
- "eval_runtime": 37.7775,
696
- "eval_samples_per_second": 24.909,
697
- "eval_steps_per_second": 3.124,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 3.03,
702
- "learning_rate": 4.8648648648648654e-05,
703
- "loss": 0.1808,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 3.06,
708
- "learning_rate": 4.714714714714715e-05,
709
- "loss": 0.2742,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 3.09,
714
- "learning_rate": 4.5645645645645645e-05,
715
- "loss": 0.147,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 3.12,
720
- "learning_rate": 4.414414414414415e-05,
721
- "loss": 0.248,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 3.15,
726
- "learning_rate": 4.264264264264264e-05,
727
- "loss": 0.1214,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 3.18,
732
- "learning_rate": 4.1141141141141144e-05,
733
- "loss": 0.352,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 3.21,
738
- "learning_rate": 3.963963963963964e-05,
739
- "loss": 0.2441,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 3.24,
744
- "learning_rate": 3.813813813813814e-05,
745
- "loss": 0.2205,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 3.27,
750
- "learning_rate": 3.663663663663664e-05,
751
- "loss": 0.1925,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 3.3,
756
- "learning_rate": 3.513513513513514e-05,
757
- "loss": 0.1115,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 3.3,
762
- "eval_accuracy": 0.9362380446333688,
763
- "eval_loss": 0.2528650462627411,
764
- "eval_runtime": 38.8623,
765
- "eval_samples_per_second": 24.214,
766
- "eval_steps_per_second": 3.036,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 3.33,
771
- "learning_rate": 3.3633633633633635e-05,
772
- "loss": 0.2245,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 3.36,
777
- "learning_rate": 3.213213213213213e-05,
778
- "loss": 0.2538,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 3.39,
783
- "learning_rate": 3.063063063063063e-05,
784
- "loss": 0.1725,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 3.42,
789
- "learning_rate": 2.912912912912913e-05,
790
- "loss": 0.3009,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 3.45,
795
- "learning_rate": 2.762762762762763e-05,
796
- "loss": 0.3042,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 3.48,
801
- "learning_rate": 2.6126126126126128e-05,
802
- "loss": 0.1748,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 3.51,
807
- "learning_rate": 2.4624624624624627e-05,
808
- "loss": 0.1229,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 3.54,
813
- "learning_rate": 2.3123123123123125e-05,
814
- "loss": 0.2376,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 3.57,
819
  "learning_rate": 2.1621621621621624e-05,
820
- "loss": 0.2025,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 3.6,
825
  "learning_rate": 2.012012012012012e-05,
826
- "loss": 0.1571,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 3.6,
831
- "eval_accuracy": 0.946865037194474,
832
- "eval_loss": 0.23596596717834473,
833
- "eval_runtime": 38.9005,
834
- "eval_samples_per_second": 24.19,
835
- "eval_steps_per_second": 3.033,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 3.63,
840
  "learning_rate": 1.8618618618618618e-05,
841
- "loss": 0.0491,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 3.66,
846
  "learning_rate": 1.7117117117117117e-05,
847
- "loss": 0.1921,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 3.69,
852
  "learning_rate": 1.5615615615615616e-05,
853
- "loss": 0.1733,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 3.72,
858
  "learning_rate": 1.4114114114114116e-05,
859
- "loss": 0.1822,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 3.75,
864
  "learning_rate": 1.2612612612612611e-05,
865
- "loss": 0.2661,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 3.78,
870
  "learning_rate": 1.1111111111111112e-05,
871
- "loss": 0.1566,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 3.81,
876
  "learning_rate": 9.60960960960961e-06,
877
- "loss": 0.1764,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 3.84,
882
  "learning_rate": 8.108108108108109e-06,
883
- "loss": 0.1824,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 3.87,
888
  "learning_rate": 6.606606606606607e-06,
889
- "loss": 0.122,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 3.9,
894
  "learning_rate": 5.105105105105106e-06,
895
- "loss": 0.1079,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 3.9,
900
- "eval_accuracy": 0.9521785334750266,
901
- "eval_loss": 0.21390262246131897,
902
- "eval_runtime": 38.2092,
903
- "eval_samples_per_second": 24.628,
904
- "eval_steps_per_second": 3.088,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 3.93,
909
  "learning_rate": 3.603603603603604e-06,
910
- "loss": 0.1973,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 3.96,
915
  "learning_rate": 2.102102102102102e-06,
916
- "loss": 0.0976,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 3.99,
921
  "learning_rate": 6.006006006006006e-07,
922
- "loss": 0.1724,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 4.0,
927
  "step": 1332,
928
  "total_flos": 1.651775717862015e+18,
929
- "train_loss": 0.4871297548483084,
930
- "train_runtime": 1589.6821,
931
- "train_samples_per_second": 13.406,
932
- "train_steps_per_second": 0.838
933
  }
934
  ],
935
  "max_steps": 1332,
 
1
  {
2
+ "best_metric": 0.19176600873470306,
3
  "best_model_checkpoint": "finetuned-indian-food/checkpoint-1300",
4
  "epoch": 4.0,
5
  "global_step": 1332,
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 0.0001984984984984985,
13
+ "loss": 2.8856,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.06,
18
  "learning_rate": 0.00019699699699699701,
19
+ "loss": 2.5941,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.09,
24
  "learning_rate": 0.0001954954954954955,
25
+ "loss": 2.1576,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.12,
30
  "learning_rate": 0.00019399399399399402,
31
+ "loss": 1.9221,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.15,
36
  "learning_rate": 0.0001924924924924925,
37
+ "loss": 1.7671,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.18,
42
  "learning_rate": 0.000190990990990991,
43
+ "loss": 1.4461,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.21,
48
  "learning_rate": 0.0001894894894894895,
49
+ "loss": 1.3917,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.24,
54
  "learning_rate": 0.000187987987987988,
55
+ "loss": 1.3666,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.27,
60
  "learning_rate": 0.0001864864864864865,
61
+ "loss": 1.1615,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.3,
66
  "learning_rate": 0.000184984984984985,
67
+ "loss": 1.0175,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.3,
72
+ "eval_accuracy": 0.8629117959617428,
73
+ "eval_loss": 0.9246562719345093,
74
+ "eval_runtime": 35.8495,
75
+ "eval_samples_per_second": 26.249,
76
+ "eval_steps_per_second": 3.292,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.33,
81
  "learning_rate": 0.0001834834834834835,
82
+ "loss": 1.044,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.36,
87
  "learning_rate": 0.000181981981981982,
88
+ "loss": 0.9848,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.39,
93
  "learning_rate": 0.0001804804804804805,
94
+ "loss": 0.7889,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.42,
99
  "learning_rate": 0.00017897897897897898,
100
+ "loss": 0.921,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 0.45,
105
  "learning_rate": 0.0001774774774774775,
106
+ "loss": 0.7709,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 0.48,
111
  "learning_rate": 0.000175975975975976,
112
+ "loss": 0.9096,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 0.51,
117
  "learning_rate": 0.0001744744744744745,
118
+ "loss": 0.7293,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 0.54,
123
  "learning_rate": 0.000172972972972973,
124
+ "loss": 0.6419,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 0.57,
129
  "learning_rate": 0.00017147147147147148,
130
+ "loss": 0.7251,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 0.6,
135
  "learning_rate": 0.00016996996996997,
136
+ "loss": 0.7418,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 0.6,
141
+ "eval_accuracy": 0.8990435706695006,
142
+ "eval_loss": 0.5535812377929688,
143
+ "eval_runtime": 33.962,
144
+ "eval_samples_per_second": 27.707,
145
+ "eval_steps_per_second": 3.474,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 0.63,
150
  "learning_rate": 0.00016846846846846846,
151
+ "loss": 0.7307,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 0.66,
156
  "learning_rate": 0.00016696696696696697,
157
+ "loss": 0.7721,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 0.69,
162
  "learning_rate": 0.00016546546546546546,
163
+ "loss": 0.6073,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 0.72,
168
  "learning_rate": 0.00016396396396396395,
169
+ "loss": 0.6946,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 0.75,
174
+ "learning_rate": 0.00016246246246246247,
175
+ "loss": 0.7824,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 0.78,
180
+ "learning_rate": 0.00016096096096096096,
181
+ "loss": 0.5175,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 0.81,
186
+ "learning_rate": 0.00015945945945945947,
187
+ "loss": 0.7959,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 0.84,
192
+ "learning_rate": 0.00015795795795795796,
193
+ "loss": 0.5273,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 0.87,
198
+ "learning_rate": 0.00015645645645645645,
199
+ "loss": 0.6327,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 0.9,
204
+ "learning_rate": 0.00015495495495495496,
205
+ "loss": 0.6652,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 0.9,
210
+ "eval_accuracy": 0.9181721572794899,
211
+ "eval_loss": 0.4035964906215668,
212
+ "eval_runtime": 33.8609,
213
+ "eval_samples_per_second": 27.79,
214
+ "eval_steps_per_second": 3.485,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 0.93,
219
+ "learning_rate": 0.00015345345345345345,
220
+ "loss": 0.6688,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 0.96,
225
+ "learning_rate": 0.00015195195195195194,
226
+ "loss": 0.5384,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 0.99,
231
+ "learning_rate": 0.00015045045045045046,
232
+ "loss": 0.5232,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 1.02,
237
+ "learning_rate": 0.00014894894894894895,
238
+ "loss": 0.5407,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 1.05,
243
+ "learning_rate": 0.00014744744744744746,
244
+ "loss": 0.512,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 1.08,
249
+ "learning_rate": 0.00014594594594594595,
250
+ "loss": 0.4422,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 1.11,
255
+ "learning_rate": 0.00014444444444444444,
256
+ "loss": 0.6211,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 1.14,
261
+ "learning_rate": 0.00014294294294294295,
262
+ "loss": 0.3153,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 1.17,
267
+ "learning_rate": 0.00014144144144144144,
268
+ "loss": 0.4897,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 1.2,
273
+ "learning_rate": 0.00013993993993993996,
274
+ "loss": 0.5959,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 1.2,
279
+ "eval_accuracy": 0.89798087141339,
280
+ "eval_loss": 0.40219178795814514,
281
+ "eval_runtime": 34.8649,
282
+ "eval_samples_per_second": 26.99,
283
+ "eval_steps_per_second": 3.384,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 1.23,
288
+ "learning_rate": 0.00013843843843843845,
289
+ "loss": 0.4482,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 1.26,
294
+ "learning_rate": 0.00013693693693693693,
295
+ "loss": 0.3453,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 1.29,
300
+ "learning_rate": 0.00013543543543543545,
301
+ "loss": 0.4837,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 1.32,
306
+ "learning_rate": 0.00013393393393393394,
307
+ "loss": 0.403,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 1.35,
312
+ "learning_rate": 0.00013243243243243243,
313
+ "loss": 0.5089,
314
  "step": 450
315
  },
316
  {
317
  "epoch": 1.38,
318
+ "learning_rate": 0.00013093093093093094,
319
+ "loss": 0.4843,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 1.41,
324
+ "learning_rate": 0.00012942942942942943,
325
+ "loss": 0.3155,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 1.44,
330
+ "learning_rate": 0.00012792792792792795,
331
+ "loss": 0.4345,
332
  "step": 480
333
  },
334
  {
335
  "epoch": 1.47,
336
+ "learning_rate": 0.00012642642642642644,
337
+ "loss": 0.4005,
338
  "step": 490
339
  },
340
  {
341
  "epoch": 1.5,
342
+ "learning_rate": 0.00012492492492492492,
343
+ "loss": 0.4478,
344
  "step": 500
345
  },
346
  {
347
  "epoch": 1.5,
348
+ "eval_accuracy": 0.9287991498405951,
349
+ "eval_loss": 0.3246866464614868,
350
+ "eval_runtime": 33.5066,
351
+ "eval_samples_per_second": 28.084,
352
+ "eval_steps_per_second": 3.522,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 1.53,
357
+ "learning_rate": 0.00012342342342342344,
358
+ "loss": 0.4507,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 1.56,
363
+ "learning_rate": 0.00012192192192192193,
364
+ "loss": 0.5392,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 1.59,
369
+ "learning_rate": 0.00012042042042042043,
370
+ "loss": 0.2738,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 1.62,
375
+ "learning_rate": 0.00011891891891891893,
376
+ "loss": 0.566,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 1.65,
381
+ "learning_rate": 0.00011741741741741743,
382
+ "loss": 0.5345,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 1.68,
387
+ "learning_rate": 0.00011591591591591592,
388
+ "loss": 0.443,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 1.71,
393
+ "learning_rate": 0.00011441441441441443,
394
+ "loss": 0.346,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 1.74,
399
+ "learning_rate": 0.00011291291291291293,
400
+ "loss": 0.3985,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 1.77,
405
+ "learning_rate": 0.00011141141141141143,
406
+ "loss": 0.3706,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 1.8,
411
  "learning_rate": 0.00011006006006006006,
412
+ "loss": 0.4717,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 1.8,
417
+ "eval_accuracy": 0.926673751328374,
418
+ "eval_loss": 0.30190205574035645,
419
+ "eval_runtime": 33.6807,
420
+ "eval_samples_per_second": 27.939,
421
+ "eval_steps_per_second": 3.503,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 1.83,
426
  "learning_rate": 0.00010855855855855856,
427
+ "loss": 0.3582,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 1.86,
432
  "learning_rate": 0.00010705705705705707,
433
+ "loss": 0.3629,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 1.89,
438
  "learning_rate": 0.00010555555555555557,
439
+ "loss": 0.3866,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 1.92,
444
+ "learning_rate": 0.00010405405405405406,
445
+ "loss": 0.3155,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 1.95,
450
+ "learning_rate": 0.00010255255255255256,
451
+ "loss": 0.4311,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 1.98,
456
+ "learning_rate": 0.00010105105105105106,
457
+ "loss": 0.2544,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 2.01,
462
+ "learning_rate": 9.954954954954956e-05,
463
+ "loss": 0.3617,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 2.04,
468
+ "learning_rate": 9.804804804804806e-05,
469
+ "loss": 0.3127,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 2.07,
474
+ "learning_rate": 9.654654654654654e-05,
475
+ "loss": 0.2718,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 2.1,
480
+ "learning_rate": 9.504504504504504e-05,
481
+ "loss": 0.34,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 2.1,
486
+ "eval_accuracy": 0.9351753453772582,
487
+ "eval_loss": 0.25940415263175964,
488
+ "eval_runtime": 35.0686,
489
+ "eval_samples_per_second": 26.833,
490
+ "eval_steps_per_second": 3.365,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 2.13,
495
+ "learning_rate": 9.354354354354354e-05,
496
+ "loss": 0.3881,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 2.16,
501
+ "learning_rate": 9.204204204204205e-05,
502
+ "loss": 0.3528,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 2.19,
507
+ "learning_rate": 9.054054054054055e-05,
508
+ "loss": 0.3053,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 2.22,
513
+ "learning_rate": 8.903903903903904e-05,
514
+ "loss": 0.223,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 2.25,
519
+ "learning_rate": 8.753753753753754e-05,
520
+ "loss": 0.2268,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 2.28,
525
+ "learning_rate": 8.603603603603604e-05,
526
+ "loss": 0.4058,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 2.31,
531
+ "learning_rate": 8.453453453453454e-05,
532
+ "loss": 0.3018,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 2.34,
537
+ "learning_rate": 8.303303303303303e-05,
538
+ "loss": 0.2973,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 2.37,
543
+ "learning_rate": 8.153153153153153e-05,
544
+ "loss": 0.2607,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 2.4,
549
+ "learning_rate": 8.003003003003004e-05,
550
+ "loss": 0.3518,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 2.4,
555
+ "eval_accuracy": 0.9351753453772582,
556
+ "eval_loss": 0.2507326304912567,
557
+ "eval_runtime": 34.9575,
558
+ "eval_samples_per_second": 26.918,
559
+ "eval_steps_per_second": 3.376,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 2.43,
564
+ "learning_rate": 7.852852852852854e-05,
565
+ "loss": 0.2251,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 2.46,
570
+ "learning_rate": 7.702702702702703e-05,
571
+ "loss": 0.2747,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 2.49,
576
+ "learning_rate": 7.552552552552553e-05,
577
+ "loss": 0.2653,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 2.52,
582
+ "learning_rate": 7.402402402402403e-05,
583
+ "loss": 0.2361,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 2.55,
588
+ "learning_rate": 7.252252252252253e-05,
589
+ "loss": 0.2548,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 2.58,
594
+ "learning_rate": 7.102102102102103e-05,
595
+ "loss": 0.392,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 2.61,
600
+ "learning_rate": 6.951951951951952e-05,
601
+ "loss": 0.3041,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 2.64,
606
+ "learning_rate": 6.801801801801802e-05,
607
+ "loss": 0.3341,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 2.67,
612
+ "learning_rate": 6.651651651651653e-05,
613
+ "loss": 0.2462,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 2.7,
618
+ "learning_rate": 6.501501501501502e-05,
619
+ "loss": 0.3352,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 2.7,
624
+ "eval_accuracy": 0.9426142401700319,
625
+ "eval_loss": 0.2483620047569275,
626
+ "eval_runtime": 33.9353,
627
+ "eval_samples_per_second": 27.729,
628
+ "eval_steps_per_second": 3.477,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 2.73,
633
+ "learning_rate": 6.351351351351352e-05,
634
+ "loss": 0.2314,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 2.76,
639
+ "learning_rate": 6.2012012012012e-05,
640
+ "loss": 0.2739,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 2.79,
645
+ "learning_rate": 6.051051051051051e-05,
646
+ "loss": 0.2905,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 2.82,
651
+ "learning_rate": 5.900900900900901e-05,
652
+ "loss": 0.2607,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 2.85,
657
+ "learning_rate": 5.7507507507507506e-05,
658
+ "loss": 0.334,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 2.88,
663
+ "learning_rate": 5.600600600600601e-05,
664
+ "loss": 0.3202,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 2.91,
669
+ "learning_rate": 5.45045045045045e-05,
670
+ "loss": 0.3326,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 2.94,
675
+ "learning_rate": 5.3003003003003005e-05,
676
+ "loss": 0.2717,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 2.97,
681
+ "learning_rate": 5.15015015015015e-05,
682
+ "loss": 0.259,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 3.0,
687
+ "learning_rate": 5e-05,
688
+ "loss": 0.2493,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 3.0,
693
+ "eval_accuracy": 0.9394261424017003,
694
+ "eval_loss": 0.22659793496131897,
695
+ "eval_runtime": 33.5582,
696
+ "eval_samples_per_second": 28.041,
697
+ "eval_steps_per_second": 3.516,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 3.03,
702
+ "learning_rate": 4.8498498498498504e-05,
703
+ "loss": 0.2026,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 3.06,
708
+ "learning_rate": 4.6996996996997e-05,
709
+ "loss": 0.189,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 3.09,
714
+ "learning_rate": 4.54954954954955e-05,
715
+ "loss": 0.1097,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 3.12,
720
+ "learning_rate": 4.3993993993994e-05,
721
+ "loss": 0.1605,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 3.15,
726
+ "learning_rate": 4.24924924924925e-05,
727
+ "loss": 0.2681,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 3.18,
732
+ "learning_rate": 4.099099099099099e-05,
733
+ "loss": 0.2872,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 3.21,
738
+ "learning_rate": 3.948948948948949e-05,
739
+ "loss": 0.3481,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 3.24,
744
+ "learning_rate": 3.7987987987987985e-05,
745
+ "loss": 0.1615,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 3.27,
750
+ "learning_rate": 3.648648648648649e-05,
751
+ "loss": 0.1447,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 3.3,
756
+ "learning_rate": 3.498498498498499e-05,
757
+ "loss": 0.2034,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 3.3,
762
+ "eval_accuracy": 0.9479277364505845,
763
+ "eval_loss": 0.20114077627658844,
764
+ "eval_runtime": 34.4276,
765
+ "eval_samples_per_second": 27.333,
766
+ "eval_steps_per_second": 3.427,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 3.33,
771
+ "learning_rate": 3.3483483483483485e-05,
772
+ "loss": 0.204,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 3.36,
777
+ "learning_rate": 3.198198198198199e-05,
778
+ "loss": 0.18,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 3.39,
783
+ "learning_rate": 3.0480480480480482e-05,
784
+ "loss": 0.1657,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 3.42,
789
+ "learning_rate": 2.897897897897898e-05,
790
+ "loss": 0.1932,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 3.45,
795
+ "learning_rate": 2.7477477477477483e-05,
796
+ "loss": 0.1524,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 3.48,
801
+ "learning_rate": 2.5975975975975975e-05,
802
+ "loss": 0.1144,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 3.51,
807
+ "learning_rate": 2.4474474474474477e-05,
808
+ "loss": 0.1424,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 3.54,
813
+ "learning_rate": 2.2972972972972976e-05,
814
+ "loss": 0.2142,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 3.57,
819
  "learning_rate": 2.1621621621621624e-05,
820
+ "loss": 0.1448,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 3.6,
825
  "learning_rate": 2.012012012012012e-05,
826
+ "loss": 0.1753,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 3.6,
831
+ "eval_accuracy": 0.944739638682253,
832
+ "eval_loss": 0.20892110466957092,
833
+ "eval_runtime": 33.5408,
834
+ "eval_samples_per_second": 28.055,
835
+ "eval_steps_per_second": 3.518,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 3.63,
840
  "learning_rate": 1.8618618618618618e-05,
841
+ "loss": 0.2085,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 3.66,
846
  "learning_rate": 1.7117117117117117e-05,
847
+ "loss": 0.2179,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 3.69,
852
  "learning_rate": 1.5615615615615616e-05,
853
+ "loss": 0.1755,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 3.72,
858
  "learning_rate": 1.4114114114114116e-05,
859
+ "loss": 0.1905,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 3.75,
864
  "learning_rate": 1.2612612612612611e-05,
865
+ "loss": 0.2246,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 3.78,
870
  "learning_rate": 1.1111111111111112e-05,
871
+ "loss": 0.1495,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 3.81,
876
  "learning_rate": 9.60960960960961e-06,
877
+ "loss": 0.2038,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 3.84,
882
  "learning_rate": 8.108108108108109e-06,
883
+ "loss": 0.2163,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 3.87,
888
  "learning_rate": 6.606606606606607e-06,
889
+ "loss": 0.152,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 3.9,
894
  "learning_rate": 5.105105105105106e-06,
895
+ "loss": 0.1614,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 3.9,
900
+ "eval_accuracy": 0.9543039319872476,
901
+ "eval_loss": 0.19176600873470306,
902
+ "eval_runtime": 33.687,
903
+ "eval_samples_per_second": 27.934,
904
+ "eval_steps_per_second": 3.503,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 3.93,
909
  "learning_rate": 3.603603603603604e-06,
910
+ "loss": 0.1276,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 3.96,
915
  "learning_rate": 2.102102102102102e-06,
916
+ "loss": 0.2218,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 3.99,
921
  "learning_rate": 6.006006006006006e-07,
922
+ "loss": 0.1194,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 4.0,
927
  "step": 1332,
928
  "total_flos": 1.651775717862015e+18,
929
+ "train_loss": 0.4876434194433081,
930
+ "train_runtime": 1561.5053,
931
+ "train_samples_per_second": 13.648,
932
+ "train_steps_per_second": 0.853
933
  }
934
  ],
935
  "max_steps": 1332,