Raihan004 commited on
Commit
ac4f92f
1 Parent(s): d607ad5

🍻 cheers

Browse files
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
5
  - generated_from_trainer
6
  metrics:
7
  - accuracy
@@ -15,10 +16,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # Action_all_10_class
17
 
18
- This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.3598
21
- - Accuracy: 0.9017
22
 
23
  ## Model description
24
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
  - generated_from_trainer
7
  metrics:
8
  - accuracy
 
16
 
17
  # Action_all_10_class
18
 
19
+ This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the Action_small_dataset dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3587
22
+ - Accuracy: 0.8946
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
  "epoch": 5.0,
 
 
 
 
 
3
  "total_flos": 1.539101261655982e+18,
4
- "train_loss": 0.6643937945844658,
5
- "train_runtime": 748.7697,
6
- "train_samples_per_second": 26.524,
7
- "train_steps_per_second": 1.663
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.8945868945868946,
4
+ "eval_loss": 0.3586599826812744,
5
+ "eval_runtime": 12.4844,
6
+ "eval_samples_per_second": 56.23,
7
+ "eval_steps_per_second": 7.049,
8
  "total_flos": 1.539101261655982e+18,
9
+ "train_loss": 0.6706935805968013,
10
+ "train_runtime": 749.324,
11
+ "train_samples_per_second": 26.504,
12
+ "train_steps_per_second": 1.661
13
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.0,
3
+ "eval_accuracy": 0.8945868945868946,
4
+ "eval_loss": 0.3586599826812744,
5
+ "eval_runtime": 12.4844,
6
+ "eval_samples_per_second": 56.23,
7
+ "eval_steps_per_second": 7.049
8
+ }
runs/Nov06_15-48-14_dfb0c3665f64/events.out.tfevents.1699286522.dfb0c3665f64.18450.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb34ed9597a7a8ee3a53f1be04b579f3923496da01a7c2b12e565483683c97cf
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.539101261655982e+18,
4
- "train_loss": 0.6643937945844658,
5
- "train_runtime": 748.7697,
6
- "train_samples_per_second": 26.524,
7
- "train_steps_per_second": 1.663
8
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.539101261655982e+18,
4
+ "train_loss": 0.6706935805968013,
5
+ "train_runtime": 749.324,
6
+ "train_samples_per_second": 26.504,
7
+ "train_steps_per_second": 1.661
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.39916983246803284,
3
- "best_model_checkpoint": "Action_all_10_class/checkpoint-1200",
4
  "epoch": 5.0,
5
  "eval_steps": 100,
6
  "global_step": 1245,
@@ -11,863 +11,863 @@
11
  {
12
  "epoch": 0.04,
13
  "learning_rate": 0.00019839357429718877,
14
- "loss": 2.2294,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.08,
19
  "learning_rate": 0.00019678714859437752,
20
- "loss": 2.1036,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.12,
25
  "learning_rate": 0.00019518072289156628,
26
- "loss": 1.9019,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.16,
31
  "learning_rate": 0.00019357429718875504,
32
- "loss": 1.6396,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.2,
37
  "learning_rate": 0.00019196787148594377,
38
- "loss": 1.5942,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.24,
43
  "learning_rate": 0.00019036144578313252,
44
- "loss": 1.3722,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.28,
49
  "learning_rate": 0.00018875502008032128,
50
- "loss": 1.2927,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.32,
55
  "learning_rate": 0.00018714859437751004,
56
- "loss": 1.2947,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.36,
61
  "learning_rate": 0.0001855421686746988,
62
- "loss": 1.2353,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.4,
67
  "learning_rate": 0.00018393574297188755,
68
- "loss": 1.1348,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.4,
73
- "eval_accuracy": 0.698005698005698,
74
- "eval_loss": 1.0964313745498657,
75
- "eval_runtime": 13.3411,
76
- "eval_samples_per_second": 52.619,
77
- "eval_steps_per_second": 6.596,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 0.44,
82
  "learning_rate": 0.0001823293172690763,
83
- "loss": 1.0305,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 0.48,
88
  "learning_rate": 0.00018072289156626507,
89
- "loss": 1.0352,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 0.52,
94
  "learning_rate": 0.00017911646586345382,
95
- "loss": 1.0957,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 0.56,
100
  "learning_rate": 0.00017751004016064258,
101
- "loss": 1.1747,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 0.6,
106
  "learning_rate": 0.00017590361445783134,
107
- "loss": 1.0863,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 0.64,
112
  "learning_rate": 0.0001742971887550201,
113
- "loss": 1.0802,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 0.68,
118
  "learning_rate": 0.00017269076305220885,
119
- "loss": 1.0912,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 0.72,
124
  "learning_rate": 0.0001710843373493976,
125
- "loss": 1.0056,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 0.76,
130
  "learning_rate": 0.00016947791164658636,
131
- "loss": 0.9641,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 0.8,
136
  "learning_rate": 0.00016787148594377512,
137
- "loss": 0.9992,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 0.8,
142
- "eval_accuracy": 0.7948717948717948,
143
- "eval_loss": 0.7362096309661865,
144
- "eval_runtime": 13.2093,
145
- "eval_samples_per_second": 53.145,
146
- "eval_steps_per_second": 6.662,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 0.84,
151
  "learning_rate": 0.00016626506024096388,
152
- "loss": 0.9851,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 0.88,
157
  "learning_rate": 0.00016465863453815263,
158
- "loss": 0.886,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 0.92,
163
  "learning_rate": 0.0001630522088353414,
164
- "loss": 0.9604,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 0.96,
169
  "learning_rate": 0.00016144578313253015,
170
- "loss": 0.7707,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 1.0,
175
- "learning_rate": 0.00016,
176
- "loss": 0.9153,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 1.04,
181
- "learning_rate": 0.00015839357429718874,
182
- "loss": 0.7581,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 1.08,
187
- "learning_rate": 0.0001567871485943775,
188
- "loss": 0.6622,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 1.12,
193
- "learning_rate": 0.00015518072289156626,
194
- "loss": 0.7251,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 1.16,
199
- "learning_rate": 0.00015357429718875501,
200
- "loss": 0.7955,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 1.2,
205
- "learning_rate": 0.00015196787148594377,
206
- "loss": 0.8314,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 1.2,
211
- "eval_accuracy": 0.8205128205128205,
212
- "eval_loss": 0.6410418748855591,
213
- "eval_runtime": 12.6655,
214
- "eval_samples_per_second": 55.426,
215
- "eval_steps_per_second": 6.948,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 1.24,
220
- "learning_rate": 0.00015036144578313253,
221
- "loss": 0.845,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 1.29,
226
- "learning_rate": 0.00014875502008032128,
227
- "loss": 0.7132,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 1.33,
232
- "learning_rate": 0.00014714859437751004,
233
- "loss": 0.7161,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 1.37,
238
- "learning_rate": 0.0001455421686746988,
239
- "loss": 0.7773,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 1.41,
244
- "learning_rate": 0.00014393574297188756,
245
- "loss": 0.7676,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 1.45,
250
- "learning_rate": 0.0001423293172690763,
251
- "loss": 0.8516,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 1.49,
256
- "learning_rate": 0.00014072289156626507,
257
- "loss": 0.6597,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 1.53,
262
- "learning_rate": 0.00013911646586345383,
263
- "loss": 0.6998,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 1.57,
268
- "learning_rate": 0.00013751004016064258,
269
- "loss": 0.8191,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 1.61,
274
- "learning_rate": 0.00013590361445783134,
275
- "loss": 0.7359,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 1.61,
280
- "eval_accuracy": 0.8247863247863247,
281
- "eval_loss": 0.5803518891334534,
282
- "eval_runtime": 12.2034,
283
- "eval_samples_per_second": 57.525,
284
- "eval_steps_per_second": 7.211,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 1.65,
289
- "learning_rate": 0.0001342971887550201,
290
- "loss": 0.7354,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 1.69,
295
- "learning_rate": 0.00013269076305220885,
296
- "loss": 0.7507,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 1.73,
301
- "learning_rate": 0.0001310843373493976,
302
- "loss": 0.8088,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 1.77,
307
- "learning_rate": 0.00012947791164658637,
308
- "loss": 0.7279,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 1.81,
313
- "learning_rate": 0.00012787148594377512,
314
- "loss": 0.7254,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 1.85,
319
- "learning_rate": 0.00012626506024096385,
320
- "loss": 0.6707,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 1.89,
325
- "learning_rate": 0.0001246586345381526,
326
- "loss": 0.6482,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 1.93,
331
- "learning_rate": 0.00012305220883534137,
332
- "loss": 0.7238,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 1.97,
337
- "learning_rate": 0.00012144578313253012,
338
- "loss": 0.6626,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 2.01,
343
- "learning_rate": 0.00011983935742971888,
344
- "loss": 0.776,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 2.01,
349
- "eval_accuracy": 0.8376068376068376,
350
- "eval_loss": 0.5489934086799622,
351
- "eval_runtime": 12.4349,
352
- "eval_samples_per_second": 56.454,
353
- "eval_steps_per_second": 7.077,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 2.05,
358
- "learning_rate": 0.00011823293172690764,
359
- "loss": 0.8069,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 2.09,
364
- "learning_rate": 0.0001166265060240964,
365
- "loss": 0.5448,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 2.13,
370
- "learning_rate": 0.00011502008032128515,
371
- "loss": 0.7318,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 2.17,
376
- "learning_rate": 0.00011341365461847391,
377
- "loss": 0.5083,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 2.21,
382
- "learning_rate": 0.00011180722891566267,
383
- "loss": 0.6493,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 2.25,
388
- "learning_rate": 0.0001102008032128514,
389
- "loss": 0.4928,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 2.29,
394
- "learning_rate": 0.00010859437751004015,
395
- "loss": 0.5395,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 2.33,
400
- "learning_rate": 0.00010698795180722891,
401
- "loss": 0.5588,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 2.37,
406
- "learning_rate": 0.00010538152610441767,
407
- "loss": 0.5892,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 2.41,
412
- "learning_rate": 0.00010377510040160642,
413
- "loss": 0.614,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 2.41,
418
- "eval_accuracy": 0.8504273504273504,
419
- "eval_loss": 0.5006864070892334,
420
- "eval_runtime": 12.2103,
421
- "eval_samples_per_second": 57.493,
422
- "eval_steps_per_second": 7.207,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 2.45,
427
- "learning_rate": 0.00010216867469879518,
428
- "loss": 0.7658,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 2.49,
433
- "learning_rate": 0.00010056224899598394,
434
- "loss": 0.5152,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 2.53,
439
- "learning_rate": 9.89558232931727e-05,
440
- "loss": 0.4941,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 2.57,
445
- "learning_rate": 9.734939759036145e-05,
446
- "loss": 0.6831,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 2.61,
451
- "learning_rate": 9.574297188755021e-05,
452
- "loss": 0.5971,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 2.65,
457
- "learning_rate": 9.413654618473896e-05,
458
- "loss": 0.5842,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 2.69,
463
- "learning_rate": 9.253012048192772e-05,
464
- "loss": 0.5976,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 2.73,
469
- "learning_rate": 9.092369477911648e-05,
470
- "loss": 0.5051,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 2.77,
475
- "learning_rate": 8.931726907630522e-05,
476
- "loss": 0.4737,
477
  "step": 690
478
  },
479
  {
480
  "epoch": 2.81,
481
- "learning_rate": 8.771084337349398e-05,
482
- "loss": 0.5484,
483
  "step": 700
484
  },
485
  {
486
  "epoch": 2.81,
487
- "eval_accuracy": 0.8333333333333334,
488
- "eval_loss": 0.5322346091270447,
489
- "eval_runtime": 12.3228,
490
- "eval_samples_per_second": 56.968,
491
- "eval_steps_per_second": 7.141,
492
  "step": 700
493
  },
494
  {
495
  "epoch": 2.85,
496
- "learning_rate": 8.610441767068274e-05,
497
- "loss": 0.4436,
498
  "step": 710
499
  },
500
  {
501
  "epoch": 2.89,
502
- "learning_rate": 8.449799196787149e-05,
503
- "loss": 0.6452,
504
  "step": 720
505
  },
506
  {
507
  "epoch": 2.93,
508
- "learning_rate": 8.289156626506025e-05,
509
- "loss": 0.5724,
510
  "step": 730
511
  },
512
  {
513
  "epoch": 2.97,
514
- "learning_rate": 8.128514056224899e-05,
515
- "loss": 0.3933,
516
  "step": 740
517
  },
518
  {
519
  "epoch": 3.01,
520
- "learning_rate": 7.967871485943775e-05,
521
- "loss": 0.5753,
522
  "step": 750
523
  },
524
  {
525
  "epoch": 3.05,
526
- "learning_rate": 7.80722891566265e-05,
527
- "loss": 0.4426,
528
  "step": 760
529
  },
530
  {
531
  "epoch": 3.09,
532
- "learning_rate": 7.646586345381526e-05,
533
- "loss": 0.5442,
534
  "step": 770
535
  },
536
  {
537
  "epoch": 3.13,
538
- "learning_rate": 7.485943775100402e-05,
539
- "loss": 0.4839,
540
  "step": 780
541
  },
542
  {
543
  "epoch": 3.17,
544
- "learning_rate": 7.325301204819278e-05,
545
- "loss": 0.3711,
546
  "step": 790
547
  },
548
  {
549
  "epoch": 3.21,
550
- "learning_rate": 7.164658634538153e-05,
551
- "loss": 0.3844,
552
  "step": 800
553
  },
554
  {
555
  "epoch": 3.21,
556
- "eval_accuracy": 0.8518518518518519,
557
- "eval_loss": 0.5012323260307312,
558
- "eval_runtime": 12.9006,
559
- "eval_samples_per_second": 54.416,
560
- "eval_steps_per_second": 6.821,
561
  "step": 800
562
  },
563
  {
564
  "epoch": 3.25,
565
- "learning_rate": 7.004016064257029e-05,
566
- "loss": 0.513,
567
  "step": 810
568
  },
569
  {
570
  "epoch": 3.29,
571
- "learning_rate": 6.843373493975905e-05,
572
- "loss": 0.3505,
573
  "step": 820
574
  },
575
  {
576
  "epoch": 3.33,
577
- "learning_rate": 6.68273092369478e-05,
578
- "loss": 0.4022,
579
  "step": 830
580
  },
581
  {
582
  "epoch": 3.37,
583
- "learning_rate": 6.522088353413655e-05,
584
- "loss": 0.3574,
585
  "step": 840
586
  },
587
  {
588
  "epoch": 3.41,
589
- "learning_rate": 6.36144578313253e-05,
590
- "loss": 0.5703,
591
  "step": 850
592
  },
593
  {
594
  "epoch": 3.45,
595
- "learning_rate": 6.200803212851406e-05,
596
- "loss": 0.4585,
597
  "step": 860
598
  },
599
  {
600
  "epoch": 3.49,
601
- "learning_rate": 6.040160642570282e-05,
602
- "loss": 0.3114,
603
  "step": 870
604
  },
605
  {
606
  "epoch": 3.53,
607
- "learning_rate": 5.8795180722891576e-05,
608
- "loss": 0.3508,
609
  "step": 880
610
  },
611
  {
612
  "epoch": 3.57,
613
- "learning_rate": 5.718875502008032e-05,
614
- "loss": 0.3614,
615
  "step": 890
616
  },
617
  {
618
  "epoch": 3.61,
619
- "learning_rate": 5.5582329317269076e-05,
620
- "loss": 0.5681,
621
  "step": 900
622
  },
623
  {
624
  "epoch": 3.61,
625
- "eval_accuracy": 0.8589743589743589,
626
- "eval_loss": 0.49228861927986145,
627
- "eval_runtime": 13.1999,
628
- "eval_samples_per_second": 53.182,
629
- "eval_steps_per_second": 6.667,
630
  "step": 900
631
  },
632
  {
633
  "epoch": 3.65,
634
- "learning_rate": 5.397590361445783e-05,
635
- "loss": 0.5074,
636
  "step": 910
637
  },
638
  {
639
  "epoch": 3.69,
640
- "learning_rate": 5.236947791164659e-05,
641
- "loss": 0.3371,
642
  "step": 920
643
  },
644
  {
645
  "epoch": 3.73,
646
- "learning_rate": 5.076305220883535e-05,
647
- "loss": 0.5226,
648
  "step": 930
649
  },
650
  {
651
  "epoch": 3.78,
652
  "learning_rate": 4.9156626506024104e-05,
653
- "loss": 0.4462,
654
  "step": 940
655
  },
656
  {
657
  "epoch": 3.82,
658
  "learning_rate": 4.7550200803212854e-05,
659
- "loss": 0.4781,
660
  "step": 950
661
  },
662
  {
663
  "epoch": 3.86,
664
  "learning_rate": 4.594377510040161e-05,
665
- "loss": 0.4001,
666
  "step": 960
667
  },
668
  {
669
  "epoch": 3.9,
670
  "learning_rate": 4.433734939759036e-05,
671
- "loss": 0.4788,
672
  "step": 970
673
  },
674
  {
675
  "epoch": 3.94,
676
  "learning_rate": 4.273092369477912e-05,
677
- "loss": 0.4561,
678
  "step": 980
679
  },
680
  {
681
  "epoch": 3.98,
682
  "learning_rate": 4.1124497991967875e-05,
683
- "loss": 0.2563,
684
  "step": 990
685
  },
686
  {
687
  "epoch": 4.02,
688
  "learning_rate": 3.9518072289156625e-05,
689
- "loss": 0.4315,
690
  "step": 1000
691
  },
692
  {
693
  "epoch": 4.02,
694
- "eval_accuracy": 0.8774928774928775,
695
- "eval_loss": 0.42575880885124207,
696
- "eval_runtime": 13.0968,
697
- "eval_samples_per_second": 53.601,
698
- "eval_steps_per_second": 6.719,
699
  "step": 1000
700
  },
701
  {
702
  "epoch": 4.06,
703
  "learning_rate": 3.791164658634538e-05,
704
- "loss": 0.2898,
705
  "step": 1010
706
  },
707
  {
708
  "epoch": 4.1,
709
  "learning_rate": 3.630522088353414e-05,
710
- "loss": 0.3803,
711
  "step": 1020
712
  },
713
  {
714
  "epoch": 4.14,
715
  "learning_rate": 3.4698795180722896e-05,
716
- "loss": 0.2827,
717
  "step": 1030
718
  },
719
  {
720
  "epoch": 4.18,
721
  "learning_rate": 3.309236947791165e-05,
722
- "loss": 0.3382,
723
  "step": 1040
724
  },
725
  {
726
  "epoch": 4.22,
727
  "learning_rate": 3.14859437751004e-05,
728
- "loss": 0.3808,
729
  "step": 1050
730
  },
731
  {
732
  "epoch": 4.26,
733
  "learning_rate": 2.987951807228916e-05,
734
- "loss": 0.2551,
735
  "step": 1060
736
  },
737
  {
738
  "epoch": 4.3,
739
  "learning_rate": 2.827309236947791e-05,
740
- "loss": 0.3011,
741
  "step": 1070
742
  },
743
  {
744
  "epoch": 4.34,
745
  "learning_rate": 2.6666666666666667e-05,
746
- "loss": 0.4091,
747
  "step": 1080
748
  },
749
  {
750
  "epoch": 4.38,
751
  "learning_rate": 2.5060240963855423e-05,
752
- "loss": 0.286,
753
  "step": 1090
754
  },
755
  {
756
  "epoch": 4.42,
757
  "learning_rate": 2.345381526104418e-05,
758
- "loss": 0.3804,
759
  "step": 1100
760
  },
761
  {
762
  "epoch": 4.42,
763
- "eval_accuracy": 0.8675213675213675,
764
- "eval_loss": 0.4324430227279663,
765
- "eval_runtime": 12.1257,
766
- "eval_samples_per_second": 57.894,
767
- "eval_steps_per_second": 7.257,
768
  "step": 1100
769
  },
770
  {
771
  "epoch": 4.46,
772
  "learning_rate": 2.1847389558232934e-05,
773
- "loss": 0.2672,
774
  "step": 1110
775
  },
776
  {
777
  "epoch": 4.5,
778
  "learning_rate": 2.0240963855421687e-05,
779
- "loss": 0.3661,
780
  "step": 1120
781
  },
782
  {
783
  "epoch": 4.54,
784
  "learning_rate": 1.863453815261044e-05,
785
- "loss": 0.3126,
786
  "step": 1130
787
  },
788
  {
789
  "epoch": 4.58,
790
  "learning_rate": 1.7028112449799198e-05,
791
- "loss": 0.3455,
792
  "step": 1140
793
  },
794
  {
795
  "epoch": 4.62,
796
  "learning_rate": 1.5421686746987955e-05,
797
- "loss": 0.3604,
798
  "step": 1150
799
  },
800
  {
801
  "epoch": 4.66,
802
  "learning_rate": 1.3815261044176708e-05,
803
- "loss": 0.4628,
804
  "step": 1160
805
  },
806
  {
807
  "epoch": 4.7,
808
  "learning_rate": 1.2208835341365463e-05,
809
- "loss": 0.4074,
810
  "step": 1170
811
  },
812
  {
813
  "epoch": 4.74,
814
  "learning_rate": 1.0602409638554217e-05,
815
- "loss": 0.2512,
816
  "step": 1180
817
  },
818
  {
819
  "epoch": 4.78,
820
  "learning_rate": 8.995983935742972e-06,
821
- "loss": 0.2974,
822
  "step": 1190
823
  },
824
  {
825
  "epoch": 4.82,
826
  "learning_rate": 7.389558232931727e-06,
827
- "loss": 0.2887,
828
  "step": 1200
829
  },
830
  {
831
  "epoch": 4.82,
832
- "eval_accuracy": 0.8803418803418803,
833
- "eval_loss": 0.39916983246803284,
834
- "eval_runtime": 12.2383,
835
- "eval_samples_per_second": 57.361,
836
- "eval_steps_per_second": 7.191,
837
  "step": 1200
838
  },
839
  {
840
  "epoch": 4.86,
841
  "learning_rate": 5.783132530120483e-06,
842
- "loss": 0.3073,
843
  "step": 1210
844
  },
845
  {
846
  "epoch": 4.9,
847
  "learning_rate": 4.176706827309238e-06,
848
- "loss": 0.2825,
849
  "step": 1220
850
  },
851
  {
852
  "epoch": 4.94,
853
  "learning_rate": 2.570281124497992e-06,
854
- "loss": 0.288,
855
  "step": 1230
856
  },
857
  {
858
  "epoch": 4.98,
859
  "learning_rate": 9.638554216867472e-07,
860
- "loss": 0.3198,
861
  "step": 1240
862
  },
863
  {
864
  "epoch": 5.0,
865
  "step": 1245,
866
  "total_flos": 1.539101261655982e+18,
867
- "train_loss": 0.6643937945844658,
868
- "train_runtime": 748.7697,
869
- "train_samples_per_second": 26.524,
870
- "train_steps_per_second": 1.663
871
  }
872
  ],
873
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.3586599826812744,
3
+ "best_model_checkpoint": "Action_all_10_class/checkpoint-1000",
4
  "epoch": 5.0,
5
  "eval_steps": 100,
6
  "global_step": 1245,
 
11
  {
12
  "epoch": 0.04,
13
  "learning_rate": 0.00019839357429718877,
14
+ "loss": 2.1986,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.08,
19
  "learning_rate": 0.00019678714859437752,
20
+ "loss": 2.0053,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.12,
25
  "learning_rate": 0.00019518072289156628,
26
+ "loss": 1.8025,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.16,
31
  "learning_rate": 0.00019357429718875504,
32
+ "loss": 1.577,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.2,
37
  "learning_rate": 0.00019196787148594377,
38
+ "loss": 1.5746,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.24,
43
  "learning_rate": 0.00019036144578313252,
44
+ "loss": 1.4402,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.28,
49
  "learning_rate": 0.00018875502008032128,
50
+ "loss": 1.3624,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.32,
55
  "learning_rate": 0.00018714859437751004,
56
+ "loss": 1.2849,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.36,
61
  "learning_rate": 0.0001855421686746988,
62
+ "loss": 1.2064,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.4,
67
  "learning_rate": 0.00018393574297188755,
68
+ "loss": 1.1524,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.4,
73
+ "eval_accuracy": 0.6951566951566952,
74
+ "eval_loss": 1.0612293481826782,
75
+ "eval_runtime": 13.0271,
76
+ "eval_samples_per_second": 53.888,
77
+ "eval_steps_per_second": 6.755,
78
  "step": 100
79
  },
80
  {
81
  "epoch": 0.44,
82
  "learning_rate": 0.0001823293172690763,
83
+ "loss": 1.085,
84
  "step": 110
85
  },
86
  {
87
  "epoch": 0.48,
88
  "learning_rate": 0.00018072289156626507,
89
+ "loss": 0.9779,
90
  "step": 120
91
  },
92
  {
93
  "epoch": 0.52,
94
  "learning_rate": 0.00017911646586345382,
95
+ "loss": 1.0527,
96
  "step": 130
97
  },
98
  {
99
  "epoch": 0.56,
100
  "learning_rate": 0.00017751004016064258,
101
+ "loss": 1.2653,
102
  "step": 140
103
  },
104
  {
105
  "epoch": 0.6,
106
  "learning_rate": 0.00017590361445783134,
107
+ "loss": 1.0224,
108
  "step": 150
109
  },
110
  {
111
  "epoch": 0.64,
112
  "learning_rate": 0.0001742971887550201,
113
+ "loss": 1.0313,
114
  "step": 160
115
  },
116
  {
117
  "epoch": 0.68,
118
  "learning_rate": 0.00017269076305220885,
119
+ "loss": 1.1361,
120
  "step": 170
121
  },
122
  {
123
  "epoch": 0.72,
124
  "learning_rate": 0.0001710843373493976,
125
+ "loss": 1.0008,
126
  "step": 180
127
  },
128
  {
129
  "epoch": 0.76,
130
  "learning_rate": 0.00016947791164658636,
131
+ "loss": 1.0386,
132
  "step": 190
133
  },
134
  {
135
  "epoch": 0.8,
136
  "learning_rate": 0.00016787148594377512,
137
+ "loss": 0.9818,
138
  "step": 200
139
  },
140
  {
141
  "epoch": 0.8,
142
+ "eval_accuracy": 0.7877492877492878,
143
+ "eval_loss": 0.7399429082870483,
144
+ "eval_runtime": 13.2513,
145
+ "eval_samples_per_second": 52.976,
146
+ "eval_steps_per_second": 6.641,
147
  "step": 200
148
  },
149
  {
150
  "epoch": 0.84,
151
  "learning_rate": 0.00016626506024096388,
152
+ "loss": 1.058,
153
  "step": 210
154
  },
155
  {
156
  "epoch": 0.88,
157
  "learning_rate": 0.00016465863453815263,
158
+ "loss": 0.9421,
159
  "step": 220
160
  },
161
  {
162
  "epoch": 0.92,
163
  "learning_rate": 0.0001630522088353414,
164
+ "loss": 0.9055,
165
  "step": 230
166
  },
167
  {
168
  "epoch": 0.96,
169
  "learning_rate": 0.00016144578313253015,
170
+ "loss": 0.7195,
171
  "step": 240
172
  },
173
  {
174
  "epoch": 1.0,
175
+ "learning_rate": 0.00015983935742971888,
176
+ "loss": 0.9547,
177
  "step": 250
178
  },
179
  {
180
  "epoch": 1.04,
181
+ "learning_rate": 0.00015823293172690763,
182
+ "loss": 0.7786,
183
  "step": 260
184
  },
185
  {
186
  "epoch": 1.08,
187
+ "learning_rate": 0.0001566265060240964,
188
+ "loss": 0.7334,
189
  "step": 270
190
  },
191
  {
192
  "epoch": 1.12,
193
+ "learning_rate": 0.00015502008032128515,
194
+ "loss": 0.8422,
195
  "step": 280
196
  },
197
  {
198
  "epoch": 1.16,
199
+ "learning_rate": 0.0001534136546184739,
200
+ "loss": 0.7638,
201
  "step": 290
202
  },
203
  {
204
  "epoch": 1.2,
205
+ "learning_rate": 0.00015180722891566266,
206
+ "loss": 0.844,
207
  "step": 300
208
  },
209
  {
210
  "epoch": 1.2,
211
+ "eval_accuracy": 0.7692307692307693,
212
+ "eval_loss": 0.7196186780929565,
213
+ "eval_runtime": 12.9868,
214
+ "eval_samples_per_second": 54.055,
215
+ "eval_steps_per_second": 6.776,
216
  "step": 300
217
  },
218
  {
219
  "epoch": 1.24,
220
+ "learning_rate": 0.00015020080321285142,
221
+ "loss": 0.812,
222
  "step": 310
223
  },
224
  {
225
  "epoch": 1.29,
226
+ "learning_rate": 0.00014859437751004018,
227
+ "loss": 0.801,
228
  "step": 320
229
  },
230
  {
231
  "epoch": 1.33,
232
+ "learning_rate": 0.00014698795180722893,
233
+ "loss": 0.72,
234
  "step": 330
235
  },
236
  {
237
  "epoch": 1.37,
238
+ "learning_rate": 0.0001453815261044177,
239
+ "loss": 0.7235,
240
  "step": 340
241
  },
242
  {
243
  "epoch": 1.41,
244
+ "learning_rate": 0.00014377510040160642,
245
+ "loss": 0.7429,
246
  "step": 350
247
  },
248
  {
249
  "epoch": 1.45,
250
+ "learning_rate": 0.00014216867469879518,
251
+ "loss": 0.8045,
252
  "step": 360
253
  },
254
  {
255
  "epoch": 1.49,
256
+ "learning_rate": 0.00014056224899598393,
257
+ "loss": 0.6534,
258
  "step": 370
259
  },
260
  {
261
  "epoch": 1.53,
262
+ "learning_rate": 0.0001389558232931727,
263
+ "loss": 0.6554,
264
  "step": 380
265
  },
266
  {
267
  "epoch": 1.57,
268
+ "learning_rate": 0.00013734939759036145,
269
+ "loss": 0.7605,
270
  "step": 390
271
  },
272
  {
273
  "epoch": 1.61,
274
+ "learning_rate": 0.0001357429718875502,
275
+ "loss": 0.7992,
276
  "step": 400
277
  },
278
  {
279
  "epoch": 1.61,
280
+ "eval_accuracy": 0.8376068376068376,
281
+ "eval_loss": 0.5383489727973938,
282
+ "eval_runtime": 13.2408,
283
+ "eval_samples_per_second": 53.018,
284
+ "eval_steps_per_second": 6.646,
285
  "step": 400
286
  },
287
  {
288
  "epoch": 1.65,
289
+ "learning_rate": 0.00013413654618473896,
290
+ "loss": 0.8029,
291
  "step": 410
292
  },
293
  {
294
  "epoch": 1.69,
295
+ "learning_rate": 0.00013253012048192772,
296
+ "loss": 0.7173,
297
  "step": 420
298
  },
299
  {
300
  "epoch": 1.73,
301
+ "learning_rate": 0.00013092369477911648,
302
+ "loss": 0.7658,
303
  "step": 430
304
  },
305
  {
306
  "epoch": 1.77,
307
+ "learning_rate": 0.00012931726907630523,
308
+ "loss": 0.6767,
309
  "step": 440
310
  },
311
  {
312
  "epoch": 1.81,
313
+ "learning_rate": 0.00012771084337349396,
314
+ "loss": 0.7436,
315
  "step": 450
316
  },
317
  {
318
  "epoch": 1.85,
319
+ "learning_rate": 0.00012610441767068272,
320
+ "loss": 0.679,
321
  "step": 460
322
  },
323
  {
324
  "epoch": 1.89,
325
+ "learning_rate": 0.00012449799196787148,
326
+ "loss": 0.6192,
327
  "step": 470
328
  },
329
  {
330
  "epoch": 1.93,
331
+ "learning_rate": 0.00012289156626506023,
332
+ "loss": 0.7447,
333
  "step": 480
334
  },
335
  {
336
  "epoch": 1.97,
337
+ "learning_rate": 0.000121285140562249,
338
+ "loss": 0.7223,
339
  "step": 490
340
  },
341
  {
342
  "epoch": 2.01,
343
+ "learning_rate": 0.00011967871485943776,
344
+ "loss": 0.7203,
345
  "step": 500
346
  },
347
  {
348
  "epoch": 2.01,
349
+ "eval_accuracy": 0.8390313390313391,
350
+ "eval_loss": 0.5222235321998596,
351
+ "eval_runtime": 12.8642,
352
+ "eval_samples_per_second": 54.57,
353
+ "eval_steps_per_second": 6.841,
354
  "step": 500
355
  },
356
  {
357
  "epoch": 2.05,
358
+ "learning_rate": 0.00011807228915662652,
359
+ "loss": 0.7294,
360
  "step": 510
361
  },
362
  {
363
  "epoch": 2.09,
364
+ "learning_rate": 0.00011646586345381527,
365
+ "loss": 0.5726,
366
  "step": 520
367
  },
368
  {
369
  "epoch": 2.13,
370
+ "learning_rate": 0.00011485943775100403,
371
+ "loss": 0.6862,
372
  "step": 530
373
  },
374
  {
375
  "epoch": 2.17,
376
+ "learning_rate": 0.00011325301204819279,
377
+ "loss": 0.5806,
378
  "step": 540
379
  },
380
  {
381
  "epoch": 2.21,
382
+ "learning_rate": 0.00011164658634538152,
383
+ "loss": 0.7621,
384
  "step": 550
385
  },
386
  {
387
  "epoch": 2.25,
388
+ "learning_rate": 0.00011004016064257027,
389
+ "loss": 0.4579,
390
  "step": 560
391
  },
392
  {
393
  "epoch": 2.29,
394
+ "learning_rate": 0.00010843373493975903,
395
+ "loss": 0.5173,
396
  "step": 570
397
  },
398
  {
399
  "epoch": 2.33,
400
+ "learning_rate": 0.00010682730923694779,
401
+ "loss": 0.5604,
402
  "step": 580
403
  },
404
  {
405
  "epoch": 2.37,
406
+ "learning_rate": 0.00010522088353413654,
407
+ "loss": 0.5459,
408
  "step": 590
409
  },
410
  {
411
  "epoch": 2.41,
412
+ "learning_rate": 0.0001036144578313253,
413
+ "loss": 0.5527,
414
  "step": 600
415
  },
416
  {
417
  "epoch": 2.41,
418
+ "eval_accuracy": 0.8376068376068376,
419
+ "eval_loss": 0.5394238233566284,
420
+ "eval_runtime": 12.6495,
421
+ "eval_samples_per_second": 55.496,
422
+ "eval_steps_per_second": 6.957,
423
  "step": 600
424
  },
425
  {
426
  "epoch": 2.45,
427
+ "learning_rate": 0.00010200803212851406,
428
+ "loss": 0.7029,
429
  "step": 610
430
  },
431
  {
432
  "epoch": 2.49,
433
+ "learning_rate": 0.00010040160642570282,
434
+ "loss": 0.5672,
435
  "step": 620
436
  },
437
  {
438
  "epoch": 2.53,
439
+ "learning_rate": 9.879518072289157e-05,
440
+ "loss": 0.5614,
441
  "step": 630
442
  },
443
  {
444
  "epoch": 2.57,
445
+ "learning_rate": 9.718875502008033e-05,
446
+ "loss": 0.6897,
447
  "step": 640
448
  },
449
  {
450
  "epoch": 2.61,
451
+ "learning_rate": 9.558232931726909e-05,
452
+ "loss": 0.5952,
453
  "step": 650
454
  },
455
  {
456
  "epoch": 2.65,
457
+ "learning_rate": 9.397590361445784e-05,
458
+ "loss": 0.6168,
459
  "step": 660
460
  },
461
  {
462
  "epoch": 2.69,
463
+ "learning_rate": 9.23694779116466e-05,
464
+ "loss": 0.5881,
465
  "step": 670
466
  },
467
  {
468
  "epoch": 2.73,
469
+ "learning_rate": 9.076305220883534e-05,
470
+ "loss": 0.464,
471
  "step": 680
472
  },
473
  {
474
  "epoch": 2.77,
475
+ "learning_rate": 8.91566265060241e-05,
476
+ "loss": 0.541,
477
  "step": 690
478
  },
479
  {
480
  "epoch": 2.81,
481
+ "learning_rate": 8.755020080321286e-05,
482
+ "loss": 0.5682,
483
  "step": 700
484
  },
485
  {
486
  "epoch": 2.81,
487
+ "eval_accuracy": 0.8461538461538461,
488
+ "eval_loss": 0.4943247437477112,
489
+ "eval_runtime": 12.6964,
490
+ "eval_samples_per_second": 55.291,
491
+ "eval_steps_per_second": 6.931,
492
  "step": 700
493
  },
494
  {
495
  "epoch": 2.85,
496
+ "learning_rate": 8.594377510040161e-05,
497
+ "loss": 0.4844,
498
  "step": 710
499
  },
500
  {
501
  "epoch": 2.89,
502
+ "learning_rate": 8.433734939759037e-05,
503
+ "loss": 0.6745,
504
  "step": 720
505
  },
506
  {
507
  "epoch": 2.93,
508
+ "learning_rate": 8.273092369477911e-05,
509
+ "loss": 0.5597,
510
  "step": 730
511
  },
512
  {
513
  "epoch": 2.97,
514
+ "learning_rate": 8.112449799196787e-05,
515
+ "loss": 0.4626,
516
  "step": 740
517
  },
518
  {
519
  "epoch": 3.01,
520
+ "learning_rate": 7.951807228915663e-05,
521
+ "loss": 0.6571,
522
  "step": 750
523
  },
524
  {
525
  "epoch": 3.05,
526
+ "learning_rate": 7.791164658634539e-05,
527
+ "loss": 0.4811,
528
  "step": 760
529
  },
530
  {
531
  "epoch": 3.09,
532
+ "learning_rate": 7.630522088353414e-05,
533
+ "loss": 0.5323,
534
  "step": 770
535
  },
536
  {
537
  "epoch": 3.13,
538
+ "learning_rate": 7.469879518072289e-05,
539
+ "loss": 0.5492,
540
  "step": 780
541
  },
542
  {
543
  "epoch": 3.17,
544
+ "learning_rate": 7.309236947791164e-05,
545
+ "loss": 0.4278,
546
  "step": 790
547
  },
548
  {
549
  "epoch": 3.21,
550
+ "learning_rate": 7.14859437751004e-05,
551
+ "loss": 0.4238,
552
  "step": 800
553
  },
554
  {
555
  "epoch": 3.21,
556
+ "eval_accuracy": 0.8675213675213675,
557
+ "eval_loss": 0.4441128373146057,
558
+ "eval_runtime": 12.9024,
559
+ "eval_samples_per_second": 54.409,
560
+ "eval_steps_per_second": 6.82,
561
  "step": 800
562
  },
563
  {
564
  "epoch": 3.25,
565
+ "learning_rate": 6.987951807228917e-05,
566
+ "loss": 0.5329,
567
  "step": 810
568
  },
569
  {
570
  "epoch": 3.29,
571
+ "learning_rate": 6.827309236947793e-05,
572
+ "loss": 0.3775,
573
  "step": 820
574
  },
575
  {
576
  "epoch": 3.33,
577
+ "learning_rate": 6.666666666666667e-05,
578
+ "loss": 0.4408,
579
  "step": 830
580
  },
581
  {
582
  "epoch": 3.37,
583
+ "learning_rate": 6.506024096385543e-05,
584
+ "loss": 0.3515,
585
  "step": 840
586
  },
587
  {
588
  "epoch": 3.41,
589
+ "learning_rate": 6.345381526104418e-05,
590
+ "loss": 0.5296,
591
  "step": 850
592
  },
593
  {
594
  "epoch": 3.45,
595
+ "learning_rate": 6.184738955823294e-05,
596
+ "loss": 0.4531,
597
  "step": 860
598
  },
599
  {
600
  "epoch": 3.49,
601
+ "learning_rate": 6.02409638554217e-05,
602
+ "loss": 0.3878,
603
  "step": 870
604
  },
605
  {
606
  "epoch": 3.53,
607
+ "learning_rate": 5.863453815261044e-05,
608
+ "loss": 0.4233,
609
  "step": 880
610
  },
611
  {
612
  "epoch": 3.57,
613
+ "learning_rate": 5.70281124497992e-05,
614
+ "loss": 0.3732,
615
  "step": 890
616
  },
617
  {
618
  "epoch": 3.61,
619
+ "learning_rate": 5.5421686746987955e-05,
620
+ "loss": 0.4684,
621
  "step": 900
622
  },
623
  {
624
  "epoch": 3.61,
625
+ "eval_accuracy": 0.8846153846153846,
626
+ "eval_loss": 0.41125282645225525,
627
+ "eval_runtime": 13.1688,
628
+ "eval_samples_per_second": 53.308,
629
+ "eval_steps_per_second": 6.682,
630
  "step": 900
631
  },
632
  {
633
  "epoch": 3.65,
634
+ "learning_rate": 5.381526104417671e-05,
635
+ "loss": 0.4347,
636
  "step": 910
637
  },
638
  {
639
  "epoch": 3.69,
640
+ "learning_rate": 5.220883534136547e-05,
641
+ "loss": 0.3793,
642
  "step": 920
643
  },
644
  {
645
  "epoch": 3.73,
646
+ "learning_rate": 5.060240963855422e-05,
647
+ "loss": 0.4689,
648
  "step": 930
649
  },
650
  {
651
  "epoch": 3.78,
652
  "learning_rate": 4.9156626506024104e-05,
653
+ "loss": 0.4459,
654
  "step": 940
655
  },
656
  {
657
  "epoch": 3.82,
658
  "learning_rate": 4.7550200803212854e-05,
659
+ "loss": 0.4529,
660
  "step": 950
661
  },
662
  {
663
  "epoch": 3.86,
664
  "learning_rate": 4.594377510040161e-05,
665
+ "loss": 0.3795,
666
  "step": 960
667
  },
668
  {
669
  "epoch": 3.9,
670
  "learning_rate": 4.433734939759036e-05,
671
+ "loss": 0.4545,
672
  "step": 970
673
  },
674
  {
675
  "epoch": 3.94,
676
  "learning_rate": 4.273092369477912e-05,
677
+ "loss": 0.4298,
678
  "step": 980
679
  },
680
  {
681
  "epoch": 3.98,
682
  "learning_rate": 4.1124497991967875e-05,
683
+ "loss": 0.2978,
684
  "step": 990
685
  },
686
  {
687
  "epoch": 4.02,
688
  "learning_rate": 3.9518072289156625e-05,
689
+ "loss": 0.4239,
690
  "step": 1000
691
  },
692
  {
693
  "epoch": 4.02,
694
+ "eval_accuracy": 0.8945868945868946,
695
+ "eval_loss": 0.3586599826812744,
696
+ "eval_runtime": 12.9226,
697
+ "eval_samples_per_second": 54.323,
698
+ "eval_steps_per_second": 6.81,
699
  "step": 1000
700
  },
701
  {
702
  "epoch": 4.06,
703
  "learning_rate": 3.791164658634538e-05,
704
+ "loss": 0.2987,
705
  "step": 1010
706
  },
707
  {
708
  "epoch": 4.1,
709
  "learning_rate": 3.630522088353414e-05,
710
+ "loss": 0.3289,
711
  "step": 1020
712
  },
713
  {
714
  "epoch": 4.14,
715
  "learning_rate": 3.4698795180722896e-05,
716
+ "loss": 0.3527,
717
  "step": 1030
718
  },
719
  {
720
  "epoch": 4.18,
721
  "learning_rate": 3.309236947791165e-05,
722
+ "loss": 0.3239,
723
  "step": 1040
724
  },
725
  {
726
  "epoch": 4.22,
727
  "learning_rate": 3.14859437751004e-05,
728
+ "loss": 0.4095,
729
  "step": 1050
730
  },
731
  {
732
  "epoch": 4.26,
733
  "learning_rate": 2.987951807228916e-05,
734
+ "loss": 0.2874,
735
  "step": 1060
736
  },
737
  {
738
  "epoch": 4.3,
739
  "learning_rate": 2.827309236947791e-05,
740
+ "loss": 0.3045,
741
  "step": 1070
742
  },
743
  {
744
  "epoch": 4.34,
745
  "learning_rate": 2.6666666666666667e-05,
746
+ "loss": 0.3476,
747
  "step": 1080
748
  },
749
  {
750
  "epoch": 4.38,
751
  "learning_rate": 2.5060240963855423e-05,
752
+ "loss": 0.3654,
753
  "step": 1090
754
  },
755
  {
756
  "epoch": 4.42,
757
  "learning_rate": 2.345381526104418e-05,
758
+ "loss": 0.4083,
759
  "step": 1100
760
  },
761
  {
762
  "epoch": 4.42,
763
+ "eval_accuracy": 0.8831908831908832,
764
+ "eval_loss": 0.3767581582069397,
765
+ "eval_runtime": 12.931,
766
+ "eval_samples_per_second": 54.288,
767
+ "eval_steps_per_second": 6.805,
768
  "step": 1100
769
  },
770
  {
771
  "epoch": 4.46,
772
  "learning_rate": 2.1847389558232934e-05,
773
+ "loss": 0.3791,
774
  "step": 1110
775
  },
776
  {
777
  "epoch": 4.5,
778
  "learning_rate": 2.0240963855421687e-05,
779
+ "loss": 0.4041,
780
  "step": 1120
781
  },
782
  {
783
  "epoch": 4.54,
784
  "learning_rate": 1.863453815261044e-05,
785
+ "loss": 0.3001,
786
  "step": 1130
787
  },
788
  {
789
  "epoch": 4.58,
790
  "learning_rate": 1.7028112449799198e-05,
791
+ "loss": 0.4239,
792
  "step": 1140
793
  },
794
  {
795
  "epoch": 4.62,
796
  "learning_rate": 1.5421686746987955e-05,
797
+ "loss": 0.3491,
798
  "step": 1150
799
  },
800
  {
801
  "epoch": 4.66,
802
  "learning_rate": 1.3815261044176708e-05,
803
+ "loss": 0.4658,
804
  "step": 1160
805
  },
806
  {
807
  "epoch": 4.7,
808
  "learning_rate": 1.2208835341365463e-05,
809
+ "loss": 0.4182,
810
  "step": 1170
811
  },
812
  {
813
  "epoch": 4.74,
814
  "learning_rate": 1.0602409638554217e-05,
815
+ "loss": 0.2625,
816
  "step": 1180
817
  },
818
  {
819
  "epoch": 4.78,
820
  "learning_rate": 8.995983935742972e-06,
821
+ "loss": 0.3003,
822
  "step": 1190
823
  },
824
  {
825
  "epoch": 4.82,
826
  "learning_rate": 7.389558232931727e-06,
827
+ "loss": 0.3541,
828
  "step": 1200
829
  },
830
  {
831
  "epoch": 4.82,
832
+ "eval_accuracy": 0.9017094017094017,
833
+ "eval_loss": 0.35980701446533203,
834
+ "eval_runtime": 13.1716,
835
+ "eval_samples_per_second": 53.297,
836
+ "eval_steps_per_second": 6.681,
837
  "step": 1200
838
  },
839
  {
840
  "epoch": 4.86,
841
  "learning_rate": 5.783132530120483e-06,
842
+ "loss": 0.3299,
843
  "step": 1210
844
  },
845
  {
846
  "epoch": 4.9,
847
  "learning_rate": 4.176706827309238e-06,
848
+ "loss": 0.2998,
849
  "step": 1220
850
  },
851
  {
852
  "epoch": 4.94,
853
  "learning_rate": 2.570281124497992e-06,
854
+ "loss": 0.3439,
855
  "step": 1230
856
  },
857
  {
858
  "epoch": 4.98,
859
  "learning_rate": 9.638554216867472e-07,
860
+ "loss": 0.308,
861
  "step": 1240
862
  },
863
  {
864
  "epoch": 5.0,
865
  "step": 1245,
866
  "total_flos": 1.539101261655982e+18,
867
+ "train_loss": 0.6706935805968013,
868
+ "train_runtime": 749.324,
869
+ "train_samples_per_second": 26.504,
870
+ "train_steps_per_second": 1.661
871
  }
872
  ],
873
  "logging_steps": 10,