rajistics commited on
Commit
2a7930a
1 Parent(s): 3b64ca2

🍻 cheers

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9543039319872476,
4
- "eval_loss": 0.19176600873470306,
5
- "eval_runtime": 33.828,
6
- "eval_samples_per_second": 27.817,
7
- "eval_steps_per_second": 3.488,
8
  "total_flos": 1.651775717862015e+18,
9
- "train_loss": 0.4876434194433081,
10
- "train_runtime": 1561.5053,
11
- "train_samples_per_second": 13.648,
12
- "train_steps_per_second": 0.853
13
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9330499468650372,
4
+ "eval_loss": 0.263217031955719,
5
+ "eval_runtime": 37.3903,
6
+ "eval_samples_per_second": 25.167,
7
+ "eval_steps_per_second": 3.156,
8
  "total_flos": 1.651775717862015e+18,
9
+ "train_loss": 0.4671340096104252,
10
+ "train_runtime": 1617.2249,
11
+ "train_samples_per_second": 13.178,
12
+ "train_steps_per_second": 0.824
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
- "eval_accuracy": 0.9543039319872476,
4
- "eval_loss": 0.19176600873470306,
5
- "eval_runtime": 33.828,
6
- "eval_samples_per_second": 27.817,
7
- "eval_steps_per_second": 3.488
8
  }
 
1
  {
2
  "epoch": 4.0,
3
+ "eval_accuracy": 0.9330499468650372,
4
+ "eval_loss": 0.263217031955719,
5
+ "eval_runtime": 37.3903,
6
+ "eval_samples_per_second": 25.167,
7
+ "eval_steps_per_second": 3.156
8
  }
runs/Aug04_18-07-39_26434d7bb3db/events.out.tfevents.1659638706.26434d7bb3db.71.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ca0a601772eb39bfa464c7e6ad56c189320ff6d9de5958c31880d97590da0e
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 1.651775717862015e+18,
4
- "train_loss": 0.4876434194433081,
5
- "train_runtime": 1561.5053,
6
- "train_samples_per_second": 13.648,
7
- "train_steps_per_second": 0.853
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 1.651775717862015e+18,
4
+ "train_loss": 0.4671340096104252,
5
+ "train_runtime": 1617.2249,
6
+ "train_samples_per_second": 13.178,
7
+ "train_steps_per_second": 0.824
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.19176600873470306,
3
  "best_model_checkpoint": "finetuned-indian-food/checkpoint-1300",
4
  "epoch": 4.0,
5
  "global_step": 1332,
@@ -10,926 +10,926 @@
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 0.0001984984984984985,
13
- "loss": 2.8856,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.06,
18
  "learning_rate": 0.00019699699699699701,
19
- "loss": 2.5941,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.09,
24
  "learning_rate": 0.0001954954954954955,
25
- "loss": 2.1576,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.12,
30
  "learning_rate": 0.00019399399399399402,
31
- "loss": 1.9221,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.15,
36
  "learning_rate": 0.0001924924924924925,
37
- "loss": 1.7671,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.18,
42
  "learning_rate": 0.000190990990990991,
43
- "loss": 1.4461,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.21,
48
  "learning_rate": 0.0001894894894894895,
49
- "loss": 1.3917,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.24,
54
  "learning_rate": 0.000187987987987988,
55
- "loss": 1.3666,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.27,
60
  "learning_rate": 0.0001864864864864865,
61
- "loss": 1.1615,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.3,
66
  "learning_rate": 0.000184984984984985,
67
- "loss": 1.0175,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.3,
72
- "eval_accuracy": 0.8629117959617428,
73
- "eval_loss": 0.9246562719345093,
74
- "eval_runtime": 35.8495,
75
- "eval_samples_per_second": 26.249,
76
- "eval_steps_per_second": 3.292,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.33,
81
  "learning_rate": 0.0001834834834834835,
82
- "loss": 1.044,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.36,
87
  "learning_rate": 0.000181981981981982,
88
- "loss": 0.9848,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.39,
93
  "learning_rate": 0.0001804804804804805,
94
- "loss": 0.7889,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.42,
99
- "learning_rate": 0.00017897897897897898,
100
- "loss": 0.921,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 0.45,
105
- "learning_rate": 0.0001774774774774775,
106
- "loss": 0.7709,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 0.48,
111
- "learning_rate": 0.000175975975975976,
112
- "loss": 0.9096,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 0.51,
117
- "learning_rate": 0.0001744744744744745,
118
- "loss": 0.7293,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 0.54,
123
- "learning_rate": 0.000172972972972973,
124
- "loss": 0.6419,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 0.57,
129
- "learning_rate": 0.00017147147147147148,
130
- "loss": 0.7251,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 0.6,
135
- "learning_rate": 0.00016996996996997,
136
- "loss": 0.7418,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 0.6,
141
- "eval_accuracy": 0.8990435706695006,
142
- "eval_loss": 0.5535812377929688,
143
- "eval_runtime": 33.962,
144
- "eval_samples_per_second": 27.707,
145
- "eval_steps_per_second": 3.474,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 0.63,
150
- "learning_rate": 0.00016846846846846846,
151
- "loss": 0.7307,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 0.66,
156
- "learning_rate": 0.00016696696696696697,
157
- "loss": 0.7721,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 0.69,
162
- "learning_rate": 0.00016546546546546546,
163
- "loss": 0.6073,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 0.72,
168
- "learning_rate": 0.00016396396396396395,
169
- "loss": 0.6946,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 0.75,
174
- "learning_rate": 0.00016246246246246247,
175
- "loss": 0.7824,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 0.78,
180
- "learning_rate": 0.00016096096096096096,
181
- "loss": 0.5175,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 0.81,
186
- "learning_rate": 0.00015945945945945947,
187
- "loss": 0.7959,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 0.84,
192
- "learning_rate": 0.00015795795795795796,
193
- "loss": 0.5273,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 0.87,
198
- "learning_rate": 0.00015645645645645645,
199
- "loss": 0.6327,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 0.9,
204
- "learning_rate": 0.00015495495495495496,
205
- "loss": 0.6652,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 0.9,
210
- "eval_accuracy": 0.9181721572794899,
211
- "eval_loss": 0.4035964906215668,
212
- "eval_runtime": 33.8609,
213
- "eval_samples_per_second": 27.79,
214
- "eval_steps_per_second": 3.485,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 0.93,
219
- "learning_rate": 0.00015345345345345345,
220
- "loss": 0.6688,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 0.96,
225
- "learning_rate": 0.00015195195195195194,
226
- "loss": 0.5384,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 0.99,
231
- "learning_rate": 0.00015045045045045046,
232
- "loss": 0.5232,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 1.02,
237
- "learning_rate": 0.00014894894894894895,
238
- "loss": 0.5407,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 1.05,
243
- "learning_rate": 0.00014744744744744746,
244
- "loss": 0.512,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 1.08,
249
- "learning_rate": 0.00014594594594594595,
250
- "loss": 0.4422,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 1.11,
255
- "learning_rate": 0.00014444444444444444,
256
- "loss": 0.6211,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 1.14,
261
- "learning_rate": 0.00014294294294294295,
262
- "loss": 0.3153,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 1.17,
267
- "learning_rate": 0.00014144144144144144,
268
- "loss": 0.4897,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 1.2,
273
- "learning_rate": 0.00013993993993993996,
274
- "loss": 0.5959,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 1.2,
279
- "eval_accuracy": 0.89798087141339,
280
- "eval_loss": 0.40219178795814514,
281
- "eval_runtime": 34.8649,
282
- "eval_samples_per_second": 26.99,
283
- "eval_steps_per_second": 3.384,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 1.23,
288
- "learning_rate": 0.00013843843843843845,
289
- "loss": 0.4482,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 1.26,
294
- "learning_rate": 0.00013693693693693693,
295
- "loss": 0.3453,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 1.29,
300
- "learning_rate": 0.00013543543543543545,
301
- "loss": 0.4837,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 1.32,
306
- "learning_rate": 0.00013393393393393394,
307
- "loss": 0.403,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 1.35,
312
- "learning_rate": 0.00013243243243243243,
313
- "loss": 0.5089,
314
  "step": 450
315
  },
316
  {
317
  "epoch": 1.38,
318
- "learning_rate": 0.00013093093093093094,
319
- "loss": 0.4843,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 1.41,
324
- "learning_rate": 0.00012942942942942943,
325
- "loss": 0.3155,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 1.44,
330
- "learning_rate": 0.00012792792792792795,
331
- "loss": 0.4345,
332
  "step": 480
333
  },
334
  {
335
  "epoch": 1.47,
336
- "learning_rate": 0.00012642642642642644,
337
- "loss": 0.4005,
338
  "step": 490
339
  },
340
  {
341
  "epoch": 1.5,
342
- "learning_rate": 0.00012492492492492492,
343
- "loss": 0.4478,
344
  "step": 500
345
  },
346
  {
347
  "epoch": 1.5,
348
- "eval_accuracy": 0.9287991498405951,
349
- "eval_loss": 0.3246866464614868,
350
- "eval_runtime": 33.5066,
351
- "eval_samples_per_second": 28.084,
352
- "eval_steps_per_second": 3.522,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 1.53,
357
- "learning_rate": 0.00012342342342342344,
358
- "loss": 0.4507,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 1.56,
363
- "learning_rate": 0.00012192192192192193,
364
- "loss": 0.5392,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 1.59,
369
- "learning_rate": 0.00012042042042042043,
370
- "loss": 0.2738,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 1.62,
375
- "learning_rate": 0.00011891891891891893,
376
- "loss": 0.566,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 1.65,
381
- "learning_rate": 0.00011741741741741743,
382
- "loss": 0.5345,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 1.68,
387
- "learning_rate": 0.00011591591591591592,
388
- "loss": 0.443,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 1.71,
393
- "learning_rate": 0.00011441441441441443,
394
- "loss": 0.346,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 1.74,
399
- "learning_rate": 0.00011291291291291293,
400
- "loss": 0.3985,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 1.77,
405
- "learning_rate": 0.00011141141141141143,
406
- "loss": 0.3706,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 1.8,
411
  "learning_rate": 0.00011006006006006006,
412
- "loss": 0.4717,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 1.8,
417
- "eval_accuracy": 0.926673751328374,
418
- "eval_loss": 0.30190205574035645,
419
- "eval_runtime": 33.6807,
420
- "eval_samples_per_second": 27.939,
421
- "eval_steps_per_second": 3.503,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 1.83,
426
  "learning_rate": 0.00010855855855855856,
427
- "loss": 0.3582,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 1.86,
432
  "learning_rate": 0.00010705705705705707,
433
- "loss": 0.3629,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 1.89,
438
  "learning_rate": 0.00010555555555555557,
439
- "loss": 0.3866,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 1.92,
444
  "learning_rate": 0.00010405405405405406,
445
- "loss": 0.3155,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 1.95,
450
  "learning_rate": 0.00010255255255255256,
451
- "loss": 0.4311,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 1.98,
456
  "learning_rate": 0.00010105105105105106,
457
- "loss": 0.2544,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 2.01,
462
  "learning_rate": 9.954954954954956e-05,
463
- "loss": 0.3617,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 2.04,
468
  "learning_rate": 9.804804804804806e-05,
469
- "loss": 0.3127,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 2.07,
474
  "learning_rate": 9.654654654654654e-05,
475
- "loss": 0.2718,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 2.1,
480
  "learning_rate": 9.504504504504504e-05,
481
- "loss": 0.34,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 2.1,
486
- "eval_accuracy": 0.9351753453772582,
487
- "eval_loss": 0.25940415263175964,
488
- "eval_runtime": 35.0686,
489
- "eval_samples_per_second": 26.833,
490
- "eval_steps_per_second": 3.365,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 2.13,
495
  "learning_rate": 9.354354354354354e-05,
496
- "loss": 0.3881,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 2.16,
501
  "learning_rate": 9.204204204204205e-05,
502
- "loss": 0.3528,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 2.19,
507
  "learning_rate": 9.054054054054055e-05,
508
- "loss": 0.3053,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 2.22,
513
  "learning_rate": 8.903903903903904e-05,
514
- "loss": 0.223,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 2.25,
519
  "learning_rate": 8.753753753753754e-05,
520
- "loss": 0.2268,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 2.28,
525
  "learning_rate": 8.603603603603604e-05,
526
- "loss": 0.4058,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 2.31,
531
  "learning_rate": 8.453453453453454e-05,
532
- "loss": 0.3018,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 2.34,
537
  "learning_rate": 8.303303303303303e-05,
538
- "loss": 0.2973,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 2.37,
543
  "learning_rate": 8.153153153153153e-05,
544
- "loss": 0.2607,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 2.4,
549
  "learning_rate": 8.003003003003004e-05,
550
- "loss": 0.3518,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 2.4,
555
- "eval_accuracy": 0.9351753453772582,
556
- "eval_loss": 0.2507326304912567,
557
- "eval_runtime": 34.9575,
558
- "eval_samples_per_second": 26.918,
559
- "eval_steps_per_second": 3.376,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 2.43,
564
  "learning_rate": 7.852852852852854e-05,
565
- "loss": 0.2251,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 2.46,
570
  "learning_rate": 7.702702702702703e-05,
571
- "loss": 0.2747,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 2.49,
576
  "learning_rate": 7.552552552552553e-05,
577
- "loss": 0.2653,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 2.52,
582
  "learning_rate": 7.402402402402403e-05,
583
- "loss": 0.2361,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 2.55,
588
  "learning_rate": 7.252252252252253e-05,
589
- "loss": 0.2548,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 2.58,
594
  "learning_rate": 7.102102102102103e-05,
595
- "loss": 0.392,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 2.61,
600
  "learning_rate": 6.951951951951952e-05,
601
- "loss": 0.3041,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 2.64,
606
  "learning_rate": 6.801801801801802e-05,
607
- "loss": 0.3341,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 2.67,
612
  "learning_rate": 6.651651651651653e-05,
613
- "loss": 0.2462,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 2.7,
618
  "learning_rate": 6.501501501501502e-05,
619
- "loss": 0.3352,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 2.7,
624
- "eval_accuracy": 0.9426142401700319,
625
- "eval_loss": 0.2483620047569275,
626
- "eval_runtime": 33.9353,
627
- "eval_samples_per_second": 27.729,
628
- "eval_steps_per_second": 3.477,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 2.73,
633
  "learning_rate": 6.351351351351352e-05,
634
- "loss": 0.2314,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 2.76,
639
  "learning_rate": 6.2012012012012e-05,
640
- "loss": 0.2739,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 2.79,
645
  "learning_rate": 6.051051051051051e-05,
646
- "loss": 0.2905,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 2.82,
651
  "learning_rate": 5.900900900900901e-05,
652
- "loss": 0.2607,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 2.85,
657
  "learning_rate": 5.7507507507507506e-05,
658
- "loss": 0.334,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 2.88,
663
  "learning_rate": 5.600600600600601e-05,
664
- "loss": 0.3202,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 2.91,
669
  "learning_rate": 5.45045045045045e-05,
670
- "loss": 0.3326,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 2.94,
675
  "learning_rate": 5.3003003003003005e-05,
676
- "loss": 0.2717,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 2.97,
681
  "learning_rate": 5.15015015015015e-05,
682
- "loss": 0.259,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 3.0,
687
  "learning_rate": 5e-05,
688
- "loss": 0.2493,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 3.0,
693
- "eval_accuracy": 0.9394261424017003,
694
- "eval_loss": 0.22659793496131897,
695
- "eval_runtime": 33.5582,
696
- "eval_samples_per_second": 28.041,
697
- "eval_steps_per_second": 3.516,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 3.03,
702
  "learning_rate": 4.8498498498498504e-05,
703
- "loss": 0.2026,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 3.06,
708
  "learning_rate": 4.6996996996997e-05,
709
- "loss": 0.189,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 3.09,
714
  "learning_rate": 4.54954954954955e-05,
715
- "loss": 0.1097,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 3.12,
720
  "learning_rate": 4.3993993993994e-05,
721
- "loss": 0.1605,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 3.15,
726
  "learning_rate": 4.24924924924925e-05,
727
- "loss": 0.2681,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 3.18,
732
  "learning_rate": 4.099099099099099e-05,
733
- "loss": 0.2872,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 3.21,
738
  "learning_rate": 3.948948948948949e-05,
739
- "loss": 0.3481,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 3.24,
744
  "learning_rate": 3.7987987987987985e-05,
745
- "loss": 0.1615,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 3.27,
750
  "learning_rate": 3.648648648648649e-05,
751
- "loss": 0.1447,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 3.3,
756
  "learning_rate": 3.498498498498499e-05,
757
- "loss": 0.2034,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 3.3,
762
- "eval_accuracy": 0.9479277364505845,
763
- "eval_loss": 0.20114077627658844,
764
- "eval_runtime": 34.4276,
765
- "eval_samples_per_second": 27.333,
766
- "eval_steps_per_second": 3.427,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 3.33,
771
  "learning_rate": 3.3483483483483485e-05,
772
- "loss": 0.204,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 3.36,
777
  "learning_rate": 3.198198198198199e-05,
778
- "loss": 0.18,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 3.39,
783
  "learning_rate": 3.0480480480480482e-05,
784
- "loss": 0.1657,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 3.42,
789
  "learning_rate": 2.897897897897898e-05,
790
- "loss": 0.1932,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 3.45,
795
  "learning_rate": 2.7477477477477483e-05,
796
- "loss": 0.1524,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 3.48,
801
  "learning_rate": 2.5975975975975975e-05,
802
- "loss": 0.1144,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 3.51,
807
  "learning_rate": 2.4474474474474477e-05,
808
- "loss": 0.1424,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 3.54,
813
  "learning_rate": 2.2972972972972976e-05,
814
- "loss": 0.2142,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 3.57,
819
- "learning_rate": 2.1621621621621624e-05,
820
- "loss": 0.1448,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 3.6,
825
- "learning_rate": 2.012012012012012e-05,
826
- "loss": 0.1753,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 3.6,
831
- "eval_accuracy": 0.944739638682253,
832
- "eval_loss": 0.20892110466957092,
833
- "eval_runtime": 33.5408,
834
- "eval_samples_per_second": 28.055,
835
- "eval_steps_per_second": 3.518,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 3.63,
840
- "learning_rate": 1.8618618618618618e-05,
841
- "loss": 0.2085,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 3.66,
846
- "learning_rate": 1.7117117117117117e-05,
847
- "loss": 0.2179,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 3.69,
852
- "learning_rate": 1.5615615615615616e-05,
853
- "loss": 0.1755,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 3.72,
858
- "learning_rate": 1.4114114114114116e-05,
859
- "loss": 0.1905,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 3.75,
864
- "learning_rate": 1.2612612612612611e-05,
865
- "loss": 0.2246,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 3.78,
870
- "learning_rate": 1.1111111111111112e-05,
871
- "loss": 0.1495,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 3.81,
876
- "learning_rate": 9.60960960960961e-06,
877
- "loss": 0.2038,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 3.84,
882
- "learning_rate": 8.108108108108109e-06,
883
- "loss": 0.2163,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 3.87,
888
- "learning_rate": 6.606606606606607e-06,
889
- "loss": 0.152,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 3.9,
894
- "learning_rate": 5.105105105105106e-06,
895
- "loss": 0.1614,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 3.9,
900
- "eval_accuracy": 0.9543039319872476,
901
- "eval_loss": 0.19176600873470306,
902
- "eval_runtime": 33.687,
903
- "eval_samples_per_second": 27.934,
904
- "eval_steps_per_second": 3.503,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 3.93,
909
- "learning_rate": 3.603603603603604e-06,
910
- "loss": 0.1276,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 3.96,
915
- "learning_rate": 2.102102102102102e-06,
916
- "loss": 0.2218,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 3.99,
921
- "learning_rate": 6.006006006006006e-07,
922
- "loss": 0.1194,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 4.0,
927
  "step": 1332,
928
  "total_flos": 1.651775717862015e+18,
929
- "train_loss": 0.4876434194433081,
930
- "train_runtime": 1561.5053,
931
- "train_samples_per_second": 13.648,
932
- "train_steps_per_second": 0.853
933
  }
934
  ],
935
  "max_steps": 1332,
 
1
  {
2
+ "best_metric": 0.263217031955719,
3
  "best_model_checkpoint": "finetuned-indian-food/checkpoint-1300",
4
  "epoch": 4.0,
5
  "global_step": 1332,
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 0.0001984984984984985,
13
+ "loss": 2.8484,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.06,
18
  "learning_rate": 0.00019699699699699701,
19
+ "loss": 2.5652,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.09,
24
  "learning_rate": 0.0001954954954954955,
25
+ "loss": 2.2041,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.12,
30
  "learning_rate": 0.00019399399399399402,
31
+ "loss": 1.8817,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.15,
36
  "learning_rate": 0.0001924924924924925,
37
+ "loss": 1.7182,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.18,
42
  "learning_rate": 0.000190990990990991,
43
+ "loss": 1.4787,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.21,
48
  "learning_rate": 0.0001894894894894895,
49
+ "loss": 1.3547,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.24,
54
  "learning_rate": 0.000187987987987988,
55
+ "loss": 1.1844,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.27,
60
  "learning_rate": 0.0001864864864864865,
61
+ "loss": 1.1045,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.3,
66
  "learning_rate": 0.000184984984984985,
67
+ "loss": 1.1794,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.3,
72
+ "eval_accuracy": 0.8565356004250797,
73
+ "eval_loss": 0.9208076596260071,
74
+ "eval_runtime": 37.6614,
75
+ "eval_samples_per_second": 24.986,
76
+ "eval_steps_per_second": 3.133,
77
  "step": 100
78
  },
79
  {
80
  "epoch": 0.33,
81
  "learning_rate": 0.0001834834834834835,
82
+ "loss": 1.0324,
83
  "step": 110
84
  },
85
  {
86
  "epoch": 0.36,
87
  "learning_rate": 0.000181981981981982,
88
+ "loss": 0.9189,
89
  "step": 120
90
  },
91
  {
92
  "epoch": 0.39,
93
  "learning_rate": 0.0001804804804804805,
94
+ "loss": 0.9565,
95
  "step": 130
96
  },
97
  {
98
  "epoch": 0.42,
99
+ "learning_rate": 0.00017912912912912914,
100
+ "loss": 0.7858,
101
  "step": 140
102
  },
103
  {
104
  "epoch": 0.45,
105
+ "learning_rate": 0.00017762762762762763,
106
+ "loss": 0.7527,
107
  "step": 150
108
  },
109
  {
110
  "epoch": 0.48,
111
+ "learning_rate": 0.00017612612612612615,
112
+ "loss": 0.7514,
113
  "step": 160
114
  },
115
  {
116
  "epoch": 0.51,
117
+ "learning_rate": 0.00017462462462462463,
118
+ "loss": 0.8192,
119
  "step": 170
120
  },
121
  {
122
  "epoch": 0.54,
123
+ "learning_rate": 0.00017312312312312312,
124
+ "loss": 0.8094,
125
  "step": 180
126
  },
127
  {
128
  "epoch": 0.57,
129
+ "learning_rate": 0.00017162162162162164,
130
+ "loss": 0.6748,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 0.6,
135
+ "learning_rate": 0.00017012012012012013,
136
+ "loss": 0.6513,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 0.6,
141
+ "eval_accuracy": 0.8841657810839533,
142
+ "eval_loss": 0.540999174118042,
143
+ "eval_runtime": 35.8211,
144
+ "eval_samples_per_second": 26.269,
145
+ "eval_steps_per_second": 3.294,
146
  "step": 200
147
  },
148
  {
149
  "epoch": 0.63,
150
+ "learning_rate": 0.00016861861861861864,
151
+ "loss": 0.6462,
152
  "step": 210
153
  },
154
  {
155
  "epoch": 0.66,
156
+ "learning_rate": 0.00016711711711711713,
157
+ "loss": 0.6141,
158
  "step": 220
159
  },
160
  {
161
  "epoch": 0.69,
162
+ "learning_rate": 0.00016561561561561562,
163
+ "loss": 0.5329,
164
  "step": 230
165
  },
166
  {
167
  "epoch": 0.72,
168
+ "learning_rate": 0.00016411411411411413,
169
+ "loss": 0.7009,
170
  "step": 240
171
  },
172
  {
173
  "epoch": 0.75,
174
+ "learning_rate": 0.00016261261261261262,
175
+ "loss": 0.6468,
176
  "step": 250
177
  },
178
  {
179
  "epoch": 0.78,
180
+ "learning_rate": 0.0001611111111111111,
181
+ "loss": 0.712,
182
  "step": 260
183
  },
184
  {
185
  "epoch": 0.81,
186
+ "learning_rate": 0.00015960960960960963,
187
+ "loss": 0.6749,
188
  "step": 270
189
  },
190
  {
191
  "epoch": 0.84,
192
+ "learning_rate": 0.00015810810810810812,
193
+ "loss": 0.7913,
194
  "step": 280
195
  },
196
  {
197
  "epoch": 0.87,
198
+ "learning_rate": 0.00015660660660660663,
199
+ "loss": 0.6343,
200
  "step": 290
201
  },
202
  {
203
  "epoch": 0.9,
204
+ "learning_rate": 0.00015510510510510512,
205
+ "loss": 0.5904,
206
  "step": 300
207
  },
208
  {
209
  "epoch": 0.9,
210
+ "eval_accuracy": 0.8799149840595112,
211
+ "eval_loss": 0.4978478252887726,
212
+ "eval_runtime": 36.7863,
213
+ "eval_samples_per_second": 25.58,
214
+ "eval_steps_per_second": 3.208,
215
  "step": 300
216
  },
217
  {
218
  "epoch": 0.93,
219
+ "learning_rate": 0.0001536036036036036,
220
+ "loss": 0.5076,
221
  "step": 310
222
  },
223
  {
224
  "epoch": 0.96,
225
+ "learning_rate": 0.00015210210210210212,
226
+ "loss": 0.544,
227
  "step": 320
228
  },
229
  {
230
  "epoch": 0.99,
231
+ "learning_rate": 0.0001506006006006006,
232
+ "loss": 0.4454,
233
  "step": 330
234
  },
235
  {
236
  "epoch": 1.02,
237
+ "learning_rate": 0.00014909909909909913,
238
+ "loss": 0.5019,
239
  "step": 340
240
  },
241
  {
242
  "epoch": 1.05,
243
+ "learning_rate": 0.00014759759759759762,
244
+ "loss": 0.6077,
245
  "step": 350
246
  },
247
  {
248
  "epoch": 1.08,
249
+ "learning_rate": 0.0001460960960960961,
250
+ "loss": 0.3247,
251
  "step": 360
252
  },
253
  {
254
  "epoch": 1.11,
255
+ "learning_rate": 0.00014459459459459462,
256
+ "loss": 0.3434,
257
  "step": 370
258
  },
259
  {
260
  "epoch": 1.14,
261
+ "learning_rate": 0.00014309309309309308,
262
+ "loss": 0.57,
263
  "step": 380
264
  },
265
  {
266
  "epoch": 1.17,
267
+ "learning_rate": 0.0001415915915915916,
268
+ "loss": 0.4769,
269
  "step": 390
270
  },
271
  {
272
  "epoch": 1.2,
273
+ "learning_rate": 0.0001400900900900901,
274
+ "loss": 0.4461,
275
  "step": 400
276
  },
277
  {
278
  "epoch": 1.2,
279
+ "eval_accuracy": 0.9192348565356004,
280
+ "eval_loss": 0.3668522834777832,
281
+ "eval_runtime": 38.3659,
282
+ "eval_samples_per_second": 24.527,
283
+ "eval_steps_per_second": 3.076,
284
  "step": 400
285
  },
286
  {
287
  "epoch": 1.23,
288
+ "learning_rate": 0.00013858858858858858,
289
+ "loss": 0.4357,
290
  "step": 410
291
  },
292
  {
293
  "epoch": 1.26,
294
+ "learning_rate": 0.0001370870870870871,
295
+ "loss": 0.5206,
296
  "step": 420
297
  },
298
  {
299
  "epoch": 1.29,
300
+ "learning_rate": 0.00013558558558558558,
301
+ "loss": 0.3509,
302
  "step": 430
303
  },
304
  {
305
  "epoch": 1.32,
306
+ "learning_rate": 0.0001340840840840841,
307
+ "loss": 0.4684,
308
  "step": 440
309
  },
310
  {
311
  "epoch": 1.35,
312
+ "learning_rate": 0.00013258258258258258,
313
+ "loss": 0.5342,
314
  "step": 450
315
  },
316
  {
317
  "epoch": 1.38,
318
+ "learning_rate": 0.00013108108108108107,
319
+ "loss": 0.5583,
320
  "step": 460
321
  },
322
  {
323
  "epoch": 1.41,
324
+ "learning_rate": 0.0001295795795795796,
325
+ "loss": 0.3255,
326
  "step": 470
327
  },
328
  {
329
  "epoch": 1.44,
330
+ "learning_rate": 0.00012807807807807808,
331
+ "loss": 0.4101,
332
  "step": 480
333
  },
334
  {
335
  "epoch": 1.47,
336
+ "learning_rate": 0.00012657657657657657,
337
+ "loss": 0.3863,
338
  "step": 490
339
  },
340
  {
341
  "epoch": 1.5,
342
+ "learning_rate": 0.00012507507507507508,
343
+ "loss": 0.5633,
344
  "step": 500
345
  },
346
  {
347
  "epoch": 1.5,
348
+ "eval_accuracy": 0.8841657810839533,
349
+ "eval_loss": 0.4339892268180847,
350
+ "eval_runtime": 37.8861,
351
+ "eval_samples_per_second": 24.838,
352
+ "eval_steps_per_second": 3.115,
353
  "step": 500
354
  },
355
  {
356
  "epoch": 1.53,
357
+ "learning_rate": 0.00012357357357357357,
358
+ "loss": 0.4033,
359
  "step": 510
360
  },
361
  {
362
  "epoch": 1.56,
363
+ "learning_rate": 0.00012207207207207209,
364
+ "loss": 0.3891,
365
  "step": 520
366
  },
367
  {
368
  "epoch": 1.59,
369
+ "learning_rate": 0.00012057057057057057,
370
+ "loss": 0.4604,
371
  "step": 530
372
  },
373
  {
374
  "epoch": 1.62,
375
+ "learning_rate": 0.00011906906906906908,
376
+ "loss": 0.3802,
377
  "step": 540
378
  },
379
  {
380
  "epoch": 1.65,
381
+ "learning_rate": 0.00011756756756756758,
382
+ "loss": 0.3528,
383
  "step": 550
384
  },
385
  {
386
  "epoch": 1.68,
387
+ "learning_rate": 0.00011606606606606607,
388
+ "loss": 0.3331,
389
  "step": 560
390
  },
391
  {
392
  "epoch": 1.71,
393
+ "learning_rate": 0.00011456456456456457,
394
+ "loss": 0.4153,
395
  "step": 570
396
  },
397
  {
398
  "epoch": 1.74,
399
+ "learning_rate": 0.00011306306306306307,
400
+ "loss": 0.3143,
401
  "step": 580
402
  },
403
  {
404
  "epoch": 1.77,
405
+ "learning_rate": 0.00011156156156156157,
406
+ "loss": 0.5106,
407
  "step": 590
408
  },
409
  {
410
  "epoch": 1.8,
411
  "learning_rate": 0.00011006006006006006,
412
+ "loss": 0.2489,
413
  "step": 600
414
  },
415
  {
416
  "epoch": 1.8,
417
+ "eval_accuracy": 0.9171094580233794,
418
+ "eval_loss": 0.3354736864566803,
419
+ "eval_runtime": 38.0141,
420
+ "eval_samples_per_second": 24.754,
421
+ "eval_steps_per_second": 3.104,
422
  "step": 600
423
  },
424
  {
425
  "epoch": 1.83,
426
  "learning_rate": 0.00010855855855855856,
427
+ "loss": 0.2632,
428
  "step": 610
429
  },
430
  {
431
  "epoch": 1.86,
432
  "learning_rate": 0.00010705705705705707,
433
+ "loss": 0.2744,
434
  "step": 620
435
  },
436
  {
437
  "epoch": 1.89,
438
  "learning_rate": 0.00010555555555555557,
439
+ "loss": 0.4888,
440
  "step": 630
441
  },
442
  {
443
  "epoch": 1.92,
444
  "learning_rate": 0.00010405405405405406,
445
+ "loss": 0.3897,
446
  "step": 640
447
  },
448
  {
449
  "epoch": 1.95,
450
  "learning_rate": 0.00010255255255255256,
451
+ "loss": 0.4025,
452
  "step": 650
453
  },
454
  {
455
  "epoch": 1.98,
456
  "learning_rate": 0.00010105105105105106,
457
+ "loss": 0.3853,
458
  "step": 660
459
  },
460
  {
461
  "epoch": 2.01,
462
  "learning_rate": 9.954954954954956e-05,
463
+ "loss": 0.4504,
464
  "step": 670
465
  },
466
  {
467
  "epoch": 2.04,
468
  "learning_rate": 9.804804804804806e-05,
469
+ "loss": 0.3721,
470
  "step": 680
471
  },
472
  {
473
  "epoch": 2.07,
474
  "learning_rate": 9.654654654654654e-05,
475
+ "loss": 0.2667,
476
  "step": 690
477
  },
478
  {
479
  "epoch": 2.1,
480
  "learning_rate": 9.504504504504504e-05,
481
+ "loss": 0.3171,
482
  "step": 700
483
  },
484
  {
485
  "epoch": 2.1,
486
+ "eval_accuracy": 0.9192348565356004,
487
+ "eval_loss": 0.3286140263080597,
488
+ "eval_runtime": 39.0994,
489
+ "eval_samples_per_second": 24.067,
490
+ "eval_steps_per_second": 3.018,
491
  "step": 700
492
  },
493
  {
494
  "epoch": 2.13,
495
  "learning_rate": 9.354354354354354e-05,
496
+ "loss": 0.1323,
497
  "step": 710
498
  },
499
  {
500
  "epoch": 2.16,
501
  "learning_rate": 9.204204204204205e-05,
502
+ "loss": 0.2416,
503
  "step": 720
504
  },
505
  {
506
  "epoch": 2.19,
507
  "learning_rate": 9.054054054054055e-05,
508
+ "loss": 0.2462,
509
  "step": 730
510
  },
511
  {
512
  "epoch": 2.22,
513
  "learning_rate": 8.903903903903904e-05,
514
+ "loss": 0.2489,
515
  "step": 740
516
  },
517
  {
518
  "epoch": 2.25,
519
  "learning_rate": 8.753753753753754e-05,
520
+ "loss": 0.2155,
521
  "step": 750
522
  },
523
  {
524
  "epoch": 2.28,
525
  "learning_rate": 8.603603603603604e-05,
526
+ "loss": 0.3265,
527
  "step": 760
528
  },
529
  {
530
  "epoch": 2.31,
531
  "learning_rate": 8.453453453453454e-05,
532
+ "loss": 0.4114,
533
  "step": 770
534
  },
535
  {
536
  "epoch": 2.34,
537
  "learning_rate": 8.303303303303303e-05,
538
+ "loss": 0.2201,
539
  "step": 780
540
  },
541
  {
542
  "epoch": 2.37,
543
  "learning_rate": 8.153153153153153e-05,
544
+ "loss": 0.2655,
545
  "step": 790
546
  },
547
  {
548
  "epoch": 2.4,
549
  "learning_rate": 8.003003003003004e-05,
550
+ "loss": 0.3785,
551
  "step": 800
552
  },
553
  {
554
  "epoch": 2.4,
555
+ "eval_accuracy": 0.9171094580233794,
556
+ "eval_loss": 0.32322293519973755,
557
+ "eval_runtime": 38.4173,
558
+ "eval_samples_per_second": 24.494,
559
+ "eval_steps_per_second": 3.072,
560
  "step": 800
561
  },
562
  {
563
  "epoch": 2.43,
564
  "learning_rate": 7.852852852852854e-05,
565
+ "loss": 0.188,
566
  "step": 810
567
  },
568
  {
569
  "epoch": 2.46,
570
  "learning_rate": 7.702702702702703e-05,
571
+ "loss": 0.2789,
572
  "step": 820
573
  },
574
  {
575
  "epoch": 2.49,
576
  "learning_rate": 7.552552552552553e-05,
577
+ "loss": 0.2386,
578
  "step": 830
579
  },
580
  {
581
  "epoch": 2.52,
582
  "learning_rate": 7.402402402402403e-05,
583
+ "loss": 0.3068,
584
  "step": 840
585
  },
586
  {
587
  "epoch": 2.55,
588
  "learning_rate": 7.252252252252253e-05,
589
+ "loss": 0.2085,
590
  "step": 850
591
  },
592
  {
593
  "epoch": 2.58,
594
  "learning_rate": 7.102102102102103e-05,
595
+ "loss": 0.2076,
596
  "step": 860
597
  },
598
  {
599
  "epoch": 2.61,
600
  "learning_rate": 6.951951951951952e-05,
601
+ "loss": 0.1623,
602
  "step": 870
603
  },
604
  {
605
  "epoch": 2.64,
606
  "learning_rate": 6.801801801801802e-05,
607
+ "loss": 0.3782,
608
  "step": 880
609
  },
610
  {
611
  "epoch": 2.67,
612
  "learning_rate": 6.651651651651653e-05,
613
+ "loss": 0.1853,
614
  "step": 890
615
  },
616
  {
617
  "epoch": 2.7,
618
  "learning_rate": 6.501501501501502e-05,
619
+ "loss": 0.2278,
620
  "step": 900
621
  },
622
  {
623
  "epoch": 2.7,
624
+ "eval_accuracy": 0.9192348565356004,
625
+ "eval_loss": 0.33382856845855713,
626
+ "eval_runtime": 38.8106,
627
+ "eval_samples_per_second": 24.246,
628
+ "eval_steps_per_second": 3.04,
629
  "step": 900
630
  },
631
  {
632
  "epoch": 2.73,
633
  "learning_rate": 6.351351351351352e-05,
634
+ "loss": 0.2452,
635
  "step": 910
636
  },
637
  {
638
  "epoch": 2.76,
639
  "learning_rate": 6.2012012012012e-05,
640
+ "loss": 0.2648,
641
  "step": 920
642
  },
643
  {
644
  "epoch": 2.79,
645
  "learning_rate": 6.051051051051051e-05,
646
+ "loss": 0.3084,
647
  "step": 930
648
  },
649
  {
650
  "epoch": 2.82,
651
  "learning_rate": 5.900900900900901e-05,
652
+ "loss": 0.3583,
653
  "step": 940
654
  },
655
  {
656
  "epoch": 2.85,
657
  "learning_rate": 5.7507507507507506e-05,
658
+ "loss": 0.2682,
659
  "step": 950
660
  },
661
  {
662
  "epoch": 2.88,
663
  "learning_rate": 5.600600600600601e-05,
664
+ "loss": 0.2135,
665
  "step": 960
666
  },
667
  {
668
  "epoch": 2.91,
669
  "learning_rate": 5.45045045045045e-05,
670
+ "loss": 0.2129,
671
  "step": 970
672
  },
673
  {
674
  "epoch": 2.94,
675
  "learning_rate": 5.3003003003003005e-05,
676
+ "loss": 0.3823,
677
  "step": 980
678
  },
679
  {
680
  "epoch": 2.97,
681
  "learning_rate": 5.15015015015015e-05,
682
+ "loss": 0.2763,
683
  "step": 990
684
  },
685
  {
686
  "epoch": 3.0,
687
  "learning_rate": 5e-05,
688
+ "loss": 0.0894,
689
  "step": 1000
690
  },
691
  {
692
  "epoch": 3.0,
693
+ "eval_accuracy": 0.924548352816153,
694
+ "eval_loss": 0.2870355546474457,
695
+ "eval_runtime": 38.3272,
696
+ "eval_samples_per_second": 24.552,
697
+ "eval_steps_per_second": 3.079,
698
  "step": 1000
699
  },
700
  {
701
  "epoch": 3.03,
702
  "learning_rate": 4.8498498498498504e-05,
703
+ "loss": 0.1427,
704
  "step": 1010
705
  },
706
  {
707
  "epoch": 3.06,
708
  "learning_rate": 4.6996996996997e-05,
709
+ "loss": 0.3126,
710
  "step": 1020
711
  },
712
  {
713
  "epoch": 3.09,
714
  "learning_rate": 4.54954954954955e-05,
715
+ "loss": 0.1022,
716
  "step": 1030
717
  },
718
  {
719
  "epoch": 3.12,
720
  "learning_rate": 4.3993993993994e-05,
721
+ "loss": 0.2393,
722
  "step": 1040
723
  },
724
  {
725
  "epoch": 3.15,
726
  "learning_rate": 4.24924924924925e-05,
727
+ "loss": 0.2127,
728
  "step": 1050
729
  },
730
  {
731
  "epoch": 3.18,
732
  "learning_rate": 4.099099099099099e-05,
733
+ "loss": 0.0907,
734
  "step": 1060
735
  },
736
  {
737
  "epoch": 3.21,
738
  "learning_rate": 3.948948948948949e-05,
739
+ "loss": 0.0853,
740
  "step": 1070
741
  },
742
  {
743
  "epoch": 3.24,
744
  "learning_rate": 3.7987987987987985e-05,
745
+ "loss": 0.1306,
746
  "step": 1080
747
  },
748
  {
749
  "epoch": 3.27,
750
  "learning_rate": 3.648648648648649e-05,
751
+ "loss": 0.1956,
752
  "step": 1090
753
  },
754
  {
755
  "epoch": 3.3,
756
  "learning_rate": 3.498498498498499e-05,
757
+ "loss": 0.2092,
758
  "step": 1100
759
  },
760
  {
761
  "epoch": 3.3,
762
+ "eval_accuracy": 0.9287991498405951,
763
+ "eval_loss": 0.2884349524974823,
764
+ "eval_runtime": 38.6836,
765
+ "eval_samples_per_second": 24.326,
766
+ "eval_steps_per_second": 3.05,
767
  "step": 1100
768
  },
769
  {
770
  "epoch": 3.33,
771
  "learning_rate": 3.3483483483483485e-05,
772
+ "loss": 0.1815,
773
  "step": 1110
774
  },
775
  {
776
  "epoch": 3.36,
777
  "learning_rate": 3.198198198198199e-05,
778
+ "loss": 0.0951,
779
  "step": 1120
780
  },
781
  {
782
  "epoch": 3.39,
783
  "learning_rate": 3.0480480480480482e-05,
784
+ "loss": 0.1645,
785
  "step": 1130
786
  },
787
  {
788
  "epoch": 3.42,
789
  "learning_rate": 2.897897897897898e-05,
790
+ "loss": 0.2262,
791
  "step": 1140
792
  },
793
  {
794
  "epoch": 3.45,
795
  "learning_rate": 2.7477477477477483e-05,
796
+ "loss": 0.2508,
797
  "step": 1150
798
  },
799
  {
800
  "epoch": 3.48,
801
  "learning_rate": 2.5975975975975975e-05,
802
+ "loss": 0.2116,
803
  "step": 1160
804
  },
805
  {
806
  "epoch": 3.51,
807
  "learning_rate": 2.4474474474474477e-05,
808
+ "loss": 0.1312,
809
  "step": 1170
810
  },
811
  {
812
  "epoch": 3.54,
813
  "learning_rate": 2.2972972972972976e-05,
814
+ "loss": 0.1955,
815
  "step": 1180
816
  },
817
  {
818
  "epoch": 3.57,
819
+ "learning_rate": 2.147147147147147e-05,
820
+ "loss": 0.1116,
821
  "step": 1190
822
  },
823
  {
824
  "epoch": 3.6,
825
+ "learning_rate": 1.996996996996997e-05,
826
+ "loss": 0.1466,
827
  "step": 1200
828
  },
829
  {
830
  "epoch": 3.6,
831
+ "eval_accuracy": 0.9319872476089267,
832
+ "eval_loss": 0.2672683000564575,
833
+ "eval_runtime": 38.9895,
834
+ "eval_samples_per_second": 24.135,
835
+ "eval_steps_per_second": 3.026,
836
  "step": 1200
837
  },
838
  {
839
  "epoch": 3.63,
840
+ "learning_rate": 1.846846846846847e-05,
841
+ "loss": 0.1497,
842
  "step": 1210
843
  },
844
  {
845
  "epoch": 3.66,
846
+ "learning_rate": 1.6966966966966967e-05,
847
+ "loss": 0.0741,
848
  "step": 1220
849
  },
850
  {
851
  "epoch": 3.69,
852
+ "learning_rate": 1.5465465465465466e-05,
853
+ "loss": 0.2916,
854
  "step": 1230
855
  },
856
  {
857
  "epoch": 3.72,
858
+ "learning_rate": 1.3963963963963963e-05,
859
+ "loss": 0.1485,
860
  "step": 1240
861
  },
862
  {
863
  "epoch": 3.75,
864
+ "learning_rate": 1.2462462462462463e-05,
865
+ "loss": 0.1156,
866
  "step": 1250
867
  },
868
  {
869
  "epoch": 3.78,
870
+ "learning_rate": 1.0960960960960962e-05,
871
+ "loss": 0.0887,
872
  "step": 1260
873
  },
874
  {
875
  "epoch": 3.81,
876
+ "learning_rate": 9.45945945945946e-06,
877
+ "loss": 0.153,
878
  "step": 1270
879
  },
880
  {
881
  "epoch": 3.84,
882
+ "learning_rate": 7.957957957957958e-06,
883
+ "loss": 0.2049,
884
  "step": 1280
885
  },
886
  {
887
  "epoch": 3.87,
888
+ "learning_rate": 6.456456456456457e-06,
889
+ "loss": 0.099,
890
  "step": 1290
891
  },
892
  {
893
  "epoch": 3.9,
894
+ "learning_rate": 4.954954954954955e-06,
895
+ "loss": 0.1789,
896
  "step": 1300
897
  },
898
  {
899
  "epoch": 3.9,
900
+ "eval_accuracy": 0.9330499468650372,
901
+ "eval_loss": 0.263217031955719,
902
+ "eval_runtime": 37.6874,
903
+ "eval_samples_per_second": 24.969,
904
+ "eval_steps_per_second": 3.131,
905
  "step": 1300
906
  },
907
  {
908
  "epoch": 3.93,
909
+ "learning_rate": 3.4534534534534537e-06,
910
+ "loss": 0.2831,
911
  "step": 1310
912
  },
913
  {
914
  "epoch": 3.96,
915
+ "learning_rate": 1.951951951951952e-06,
916
+ "loss": 0.2103,
917
  "step": 1320
918
  },
919
  {
920
  "epoch": 3.99,
921
+ "learning_rate": 4.504504504504505e-07,
922
+ "loss": 0.1034,
923
  "step": 1330
924
  },
925
  {
926
  "epoch": 4.0,
927
  "step": 1332,
928
  "total_flos": 1.651775717862015e+18,
929
+ "train_loss": 0.4671340096104252,
930
+ "train_runtime": 1617.2249,
931
+ "train_samples_per_second": 13.178,
932
+ "train_steps_per_second": 0.824
933
  }
934
  ],
935
  "max_steps": 1332,