younggi commited on
Commit
754414a
·
1 Parent(s): 961d9a9

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.896774193548387,
4
- "eval_loss": 0.43612316250801086,
5
- "eval_runtime": 34.2635,
6
- "eval_samples_per_second": 4.524,
7
- "eval_steps_per_second": 4.524
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.8387096774193549,
4
+ "eval_loss": 0.4286610186100006,
5
+ "eval_runtime": 12.6984,
6
+ "eval_samples_per_second": 12.206,
7
+ "eval_steps_per_second": 3.071
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfaa06778e83200561c5fb73d27f3fb7456aba29d72864e8ffe2040908d8f41e
3
- size 345004642
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0297de5a668e3575a74144d278c911fb0b63d22e20f7c4ecf55dbeb6da3791a
3
+ size 345004566
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.25,
3
- "eval_accuracy": 0.896774193548387,
4
- "eval_loss": 0.43612316250801086,
5
- "eval_runtime": 34.2635,
6
- "eval_samples_per_second": 4.524,
7
- "eval_steps_per_second": 4.524
8
  }
 
1
  {
2
  "epoch": 3.25,
3
+ "eval_accuracy": 0.8387096774193549,
4
+ "eval_loss": 0.4286610186100006,
5
+ "eval_runtime": 12.6984,
6
+ "eval_samples_per_second": 12.206,
7
+ "eval_steps_per_second": 3.071
8
  }
trainer_state.json CHANGED
@@ -1,797 +1,257 @@
1
  {
2
- "best_metric": 0.9428571428571428,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-1200",
4
  "epoch": 3.25,
5
- "global_step": 1200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.01,
12
- "learning_rate": 4.166666666666667e-06,
13
- "loss": 2.4409,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.02,
18
- "learning_rate": 8.333333333333334e-06,
19
- "loss": 2.2714,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.03,
24
- "learning_rate": 1.25e-05,
25
- "loss": 2.4193,
26
- "step": 30
27
- },
28
  {
29
  "epoch": 0.03,
30
  "learning_rate": 1.6666666666666667e-05,
31
- "loss": 2.2694,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.04,
36
- "learning_rate": 2.0833333333333336e-05,
37
- "loss": 2.413,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.05,
42
- "learning_rate": 2.5e-05,
43
- "loss": 2.2647,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.06,
48
- "learning_rate": 2.916666666666667e-05,
49
- "loss": 2.3393,
50
- "step": 70
51
  },
52
  {
53
  "epoch": 0.07,
54
  "learning_rate": 3.3333333333333335e-05,
55
- "loss": 2.2992,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.07,
60
- "learning_rate": 3.7500000000000003e-05,
61
- "loss": 1.9369,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.08,
66
- "learning_rate": 4.166666666666667e-05,
67
- "loss": 2.0877,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.09,
72
- "learning_rate": 4.5833333333333334e-05,
73
- "loss": 2.3769,
74
- "step": 110
75
  },
76
  {
77
  "epoch": 0.1,
78
  "learning_rate": 5e-05,
79
- "loss": 2.3081,
80
- "step": 120
81
- },
82
- {
83
- "epoch": 0.11,
84
- "learning_rate": 4.9537037037037035e-05,
85
- "loss": 2.3568,
86
- "step": 130
87
- },
88
- {
89
- "epoch": 0.12,
90
- "learning_rate": 4.9074074074074075e-05,
91
- "loss": 2.1998,
92
- "step": 140
93
- },
94
- {
95
- "epoch": 0.12,
96
- "learning_rate": 4.8611111111111115e-05,
97
- "loss": 1.6882,
98
- "step": 150
99
  },
100
  {
101
  "epoch": 0.13,
102
  "learning_rate": 4.814814814814815e-05,
103
- "loss": 1.6895,
104
- "step": 160
105
- },
106
- {
107
- "epoch": 0.14,
108
- "learning_rate": 4.768518518518519e-05,
109
- "loss": 1.8284,
110
- "step": 170
111
- },
112
- {
113
- "epoch": 0.15,
114
- "learning_rate": 4.722222222222222e-05,
115
- "loss": 1.9579,
116
- "step": 180
117
- },
118
- {
119
- "epoch": 0.16,
120
- "learning_rate": 4.675925925925926e-05,
121
- "loss": 2.2044,
122
- "step": 190
123
  },
124
  {
125
  "epoch": 0.17,
126
  "learning_rate": 4.62962962962963e-05,
127
- "loss": 1.6571,
128
- "step": 200
129
- },
130
- {
131
- "epoch": 0.17,
132
- "learning_rate": 4.5833333333333334e-05,
133
- "loss": 2.4752,
134
- "step": 210
135
- },
136
- {
137
- "epoch": 0.18,
138
- "learning_rate": 4.5370370370370374e-05,
139
- "loss": 1.7249,
140
- "step": 220
141
- },
142
- {
143
- "epoch": 0.19,
144
- "learning_rate": 4.490740740740741e-05,
145
- "loss": 1.7212,
146
- "step": 230
147
  },
148
  {
149
  "epoch": 0.2,
150
  "learning_rate": 4.4444444444444447e-05,
151
- "loss": 2.1472,
152
- "step": 240
153
- },
154
- {
155
- "epoch": 0.21,
156
- "learning_rate": 4.3981481481481486e-05,
157
- "loss": 1.7036,
158
- "step": 250
159
- },
160
- {
161
- "epoch": 0.22,
162
- "learning_rate": 4.351851851851852e-05,
163
- "loss": 1.9239,
164
- "step": 260
165
- },
166
- {
167
- "epoch": 0.23,
168
- "learning_rate": 4.305555555555556e-05,
169
- "loss": 1.5625,
170
- "step": 270
171
  },
172
  {
173
  "epoch": 0.23,
174
  "learning_rate": 4.259259259259259e-05,
175
- "loss": 1.5056,
176
- "step": 280
177
- },
178
- {
179
- "epoch": 0.24,
180
- "learning_rate": 4.212962962962963e-05,
181
- "loss": 1.2661,
182
- "step": 290
183
- },
184
- {
185
- "epoch": 0.25,
186
- "learning_rate": 4.166666666666667e-05,
187
- "loss": 1.9142,
188
- "step": 300
189
  },
190
  {
191
  "epoch": 0.25,
192
- "eval_accuracy": 0.2714285714285714,
193
- "eval_loss": 1.962735652923584,
194
- "eval_runtime": 15.8959,
195
- "eval_samples_per_second": 4.404,
196
- "eval_steps_per_second": 4.404,
197
- "step": 300
198
- },
199
- {
200
- "epoch": 1.01,
201
- "learning_rate": 4.1203703703703705e-05,
202
- "loss": 1.0597,
203
- "step": 310
204
  },
205
  {
206
  "epoch": 1.02,
207
  "learning_rate": 4.074074074074074e-05,
208
- "loss": 1.2077,
209
- "step": 320
210
- },
211
- {
212
- "epoch": 1.02,
213
- "learning_rate": 4.027777777777778e-05,
214
- "loss": 1.66,
215
- "step": 330
216
- },
217
- {
218
- "epoch": 1.03,
219
- "learning_rate": 3.981481481481482e-05,
220
- "loss": 1.2875,
221
- "step": 340
222
- },
223
- {
224
- "epoch": 1.04,
225
- "learning_rate": 3.935185185185186e-05,
226
- "loss": 0.6674,
227
- "step": 350
228
  },
229
  {
230
  "epoch": 1.05,
231
  "learning_rate": 3.888888888888889e-05,
232
- "loss": 1.2272,
233
- "step": 360
234
- },
235
- {
236
- "epoch": 1.06,
237
- "learning_rate": 3.8425925925925924e-05,
238
- "loss": 0.9939,
239
- "step": 370
240
- },
241
- {
242
- "epoch": 1.07,
243
- "learning_rate": 3.7962962962962964e-05,
244
- "loss": 1.4937,
245
- "step": 380
246
- },
247
- {
248
- "epoch": 1.07,
249
- "learning_rate": 3.7500000000000003e-05,
250
- "loss": 1.7014,
251
- "step": 390
252
  },
253
  {
254
  "epoch": 1.08,
255
  "learning_rate": 3.7037037037037037e-05,
256
- "loss": 1.3917,
257
- "step": 400
258
- },
259
- {
260
- "epoch": 1.09,
261
- "learning_rate": 3.6574074074074076e-05,
262
- "loss": 0.7352,
263
- "step": 410
264
- },
265
- {
266
- "epoch": 1.1,
267
- "learning_rate": 3.611111111111111e-05,
268
- "loss": 1.0035,
269
- "step": 420
270
- },
271
- {
272
- "epoch": 1.11,
273
- "learning_rate": 3.564814814814815e-05,
274
- "loss": 0.897,
275
- "step": 430
276
  },
277
  {
278
  "epoch": 1.12,
279
  "learning_rate": 3.518518518518519e-05,
280
- "loss": 0.7653,
281
- "step": 440
282
- },
283
- {
284
- "epoch": 1.12,
285
- "learning_rate": 3.472222222222222e-05,
286
- "loss": 1.3736,
287
- "step": 450
288
- },
289
- {
290
- "epoch": 1.13,
291
- "learning_rate": 3.425925925925926e-05,
292
- "loss": 0.9104,
293
- "step": 460
294
- },
295
- {
296
- "epoch": 1.14,
297
- "learning_rate": 3.3796296296296295e-05,
298
- "loss": 0.8664,
299
- "step": 470
300
  },
301
  {
302
  "epoch": 1.15,
303
  "learning_rate": 3.3333333333333335e-05,
304
- "loss": 1.5456,
305
- "step": 480
306
- },
307
- {
308
- "epoch": 1.16,
309
- "learning_rate": 3.2870370370370375e-05,
310
- "loss": 1.3923,
311
- "step": 490
312
- },
313
- {
314
- "epoch": 1.17,
315
- "learning_rate": 3.240740740740741e-05,
316
- "loss": 0.9565,
317
- "step": 500
318
- },
319
- {
320
- "epoch": 1.18,
321
- "learning_rate": 3.194444444444444e-05,
322
- "loss": 0.5909,
323
- "step": 510
324
  },
325
  {
326
  "epoch": 1.18,
327
  "learning_rate": 3.148148148148148e-05,
328
- "loss": 0.7574,
329
- "step": 520
330
- },
331
- {
332
- "epoch": 1.19,
333
- "learning_rate": 3.101851851851852e-05,
334
- "loss": 1.2527,
335
- "step": 530
336
- },
337
- {
338
- "epoch": 1.2,
339
- "learning_rate": 3.055555555555556e-05,
340
- "loss": 1.1361,
341
- "step": 540
342
- },
343
- {
344
- "epoch": 1.21,
345
- "learning_rate": 3.0092592592592593e-05,
346
- "loss": 0.2199,
347
- "step": 550
348
  },
349
  {
350
  "epoch": 1.22,
351
  "learning_rate": 2.962962962962963e-05,
352
- "loss": 0.1439,
353
- "step": 560
354
- },
355
- {
356
- "epoch": 1.23,
357
- "learning_rate": 2.916666666666667e-05,
358
- "loss": 1.9032,
359
- "step": 570
360
- },
361
- {
362
- "epoch": 1.23,
363
- "learning_rate": 2.8703703703703706e-05,
364
- "loss": 1.7757,
365
- "step": 580
366
- },
367
- {
368
- "epoch": 1.24,
369
- "learning_rate": 2.824074074074074e-05,
370
- "loss": 1.257,
371
- "step": 590
372
  },
373
  {
374
  "epoch": 1.25,
375
  "learning_rate": 2.777777777777778e-05,
376
- "loss": 1.5385,
377
- "step": 600
378
  },
379
  {
380
  "epoch": 1.25,
381
- "eval_accuracy": 0.6857142857142857,
382
- "eval_loss": 1.001339077949524,
383
- "eval_runtime": 16.8768,
384
- "eval_samples_per_second": 4.148,
385
- "eval_steps_per_second": 4.148,
386
- "step": 600
387
- },
388
- {
389
- "epoch": 2.01,
390
- "learning_rate": 2.7314814814814816e-05,
391
- "loss": 0.5868,
392
- "step": 610
393
- },
394
- {
395
- "epoch": 2.02,
396
- "learning_rate": 2.6851851851851855e-05,
397
- "loss": 0.3039,
398
- "step": 620
399
- },
400
- {
401
- "epoch": 2.02,
402
- "learning_rate": 2.6388888888888892e-05,
403
- "loss": 0.0372,
404
- "step": 630
405
  },
406
  {
407
  "epoch": 2.03,
408
  "learning_rate": 2.5925925925925925e-05,
409
- "loss": 0.6779,
410
- "step": 640
411
- },
412
- {
413
- "epoch": 2.04,
414
- "learning_rate": 2.5462962962962965e-05,
415
- "loss": 0.4467,
416
- "step": 650
417
- },
418
- {
419
- "epoch": 2.05,
420
- "learning_rate": 2.5e-05,
421
- "loss": 0.6143,
422
- "step": 660
423
- },
424
- {
425
- "epoch": 2.06,
426
- "learning_rate": 2.4537037037037038e-05,
427
- "loss": 0.2268,
428
- "step": 670
429
  },
430
  {
431
  "epoch": 2.07,
432
  "learning_rate": 2.4074074074074074e-05,
433
- "loss": 0.3228,
434
- "step": 680
435
- },
436
- {
437
- "epoch": 2.08,
438
- "learning_rate": 2.361111111111111e-05,
439
- "loss": 0.315,
440
- "step": 690
441
- },
442
- {
443
- "epoch": 2.08,
444
- "learning_rate": 2.314814814814815e-05,
445
- "loss": 0.2197,
446
- "step": 700
447
- },
448
- {
449
- "epoch": 2.09,
450
- "learning_rate": 2.2685185185185187e-05,
451
- "loss": 0.0526,
452
- "step": 710
453
  },
454
  {
455
  "epoch": 2.1,
456
  "learning_rate": 2.2222222222222223e-05,
457
- "loss": 0.523,
458
- "step": 720
459
- },
460
- {
461
- "epoch": 2.11,
462
- "learning_rate": 2.175925925925926e-05,
463
- "loss": 0.8258,
464
- "step": 730
465
- },
466
- {
467
- "epoch": 2.12,
468
- "learning_rate": 2.1296296296296296e-05,
469
- "loss": 0.0145,
470
- "step": 740
471
- },
472
- {
473
- "epoch": 2.12,
474
- "learning_rate": 2.0833333333333336e-05,
475
- "loss": 0.4996,
476
- "step": 750
477
  },
478
  {
479
  "epoch": 2.13,
480
  "learning_rate": 2.037037037037037e-05,
481
- "loss": 0.0515,
482
- "step": 760
483
- },
484
- {
485
- "epoch": 2.14,
486
- "learning_rate": 1.990740740740741e-05,
487
- "loss": 0.2202,
488
- "step": 770
489
- },
490
- {
491
- "epoch": 2.15,
492
- "learning_rate": 1.9444444444444445e-05,
493
- "loss": 0.3831,
494
- "step": 780
495
- },
496
- {
497
- "epoch": 2.16,
498
- "learning_rate": 1.8981481481481482e-05,
499
- "loss": 0.5722,
500
- "step": 790
501
  },
502
  {
503
  "epoch": 2.17,
504
  "learning_rate": 1.8518518518518518e-05,
505
- "loss": 0.3105,
506
- "step": 800
507
- },
508
- {
509
- "epoch": 2.17,
510
- "learning_rate": 1.8055555555555555e-05,
511
- "loss": 0.0518,
512
- "step": 810
513
- },
514
- {
515
- "epoch": 2.18,
516
- "learning_rate": 1.7592592592592595e-05,
517
- "loss": 0.0928,
518
- "step": 820
519
- },
520
- {
521
- "epoch": 2.19,
522
- "learning_rate": 1.712962962962963e-05,
523
- "loss": 0.3528,
524
- "step": 830
525
  },
526
  {
527
  "epoch": 2.2,
528
  "learning_rate": 1.6666666666666667e-05,
529
- "loss": 0.1551,
530
- "step": 840
531
- },
532
- {
533
- "epoch": 2.21,
534
- "learning_rate": 1.6203703703703704e-05,
535
- "loss": 0.5542,
536
- "step": 850
537
- },
538
- {
539
- "epoch": 2.22,
540
- "learning_rate": 1.574074074074074e-05,
541
- "loss": 0.6221,
542
- "step": 860
543
- },
544
- {
545
- "epoch": 2.23,
546
- "learning_rate": 1.527777777777778e-05,
547
- "loss": 0.0083,
548
- "step": 870
549
  },
550
  {
551
  "epoch": 2.23,
552
  "learning_rate": 1.4814814814814815e-05,
553
- "loss": 0.1625,
554
- "step": 880
555
- },
556
- {
557
- "epoch": 2.24,
558
- "learning_rate": 1.4351851851851853e-05,
559
- "loss": 0.2397,
560
- "step": 890
561
- },
562
- {
563
- "epoch": 2.25,
564
- "learning_rate": 1.388888888888889e-05,
565
- "loss": 0.1054,
566
- "step": 900
567
  },
568
  {
569
  "epoch": 2.25,
570
- "eval_accuracy": 0.7142857142857143,
571
- "eval_loss": 0.9923623204231262,
572
- "eval_runtime": 16.913,
573
- "eval_samples_per_second": 4.139,
574
- "eval_steps_per_second": 4.139,
575
- "step": 900
576
- },
577
- {
578
- "epoch": 3.01,
579
- "learning_rate": 1.3425925925925928e-05,
580
- "loss": 0.0559,
581
- "step": 910
582
  },
583
  {
584
  "epoch": 3.02,
585
  "learning_rate": 1.2962962962962962e-05,
586
- "loss": 0.0097,
587
- "step": 920
588
- },
589
- {
590
- "epoch": 3.02,
591
- "learning_rate": 1.25e-05,
592
- "loss": 0.0173,
593
- "step": 930
594
- },
595
- {
596
- "epoch": 3.03,
597
- "learning_rate": 1.2037037037037037e-05,
598
- "loss": 0.017,
599
- "step": 940
600
- },
601
- {
602
- "epoch": 3.04,
603
- "learning_rate": 1.1574074074074075e-05,
604
- "loss": 0.1396,
605
- "step": 950
606
  },
607
  {
608
  "epoch": 3.05,
609
  "learning_rate": 1.1111111111111112e-05,
610
- "loss": 0.0058,
611
- "step": 960
612
- },
613
- {
614
- "epoch": 3.06,
615
- "learning_rate": 1.0648148148148148e-05,
616
- "loss": 0.0069,
617
- "step": 970
618
- },
619
- {
620
- "epoch": 3.07,
621
- "learning_rate": 1.0185185185185185e-05,
622
- "loss": 0.0354,
623
- "step": 980
624
- },
625
- {
626
- "epoch": 3.08,
627
- "learning_rate": 9.722222222222223e-06,
628
- "loss": 0.0286,
629
- "step": 990
630
  },
631
  {
632
  "epoch": 3.08,
633
  "learning_rate": 9.259259259259259e-06,
634
- "loss": 0.2338,
635
- "step": 1000
636
- },
637
- {
638
- "epoch": 3.09,
639
- "learning_rate": 8.796296296296297e-06,
640
- "loss": 0.3499,
641
- "step": 1010
642
- },
643
- {
644
- "epoch": 3.1,
645
- "learning_rate": 8.333333333333334e-06,
646
- "loss": 0.0078,
647
- "step": 1020
648
- },
649
- {
650
- "epoch": 3.11,
651
- "learning_rate": 7.87037037037037e-06,
652
- "loss": 0.1362,
653
- "step": 1030
654
  },
655
  {
656
  "epoch": 3.12,
657
  "learning_rate": 7.4074074074074075e-06,
658
- "loss": 0.0039,
659
- "step": 1040
660
- },
661
- {
662
- "epoch": 3.12,
663
- "learning_rate": 6.944444444444445e-06,
664
- "loss": 0.0396,
665
- "step": 1050
666
- },
667
- {
668
- "epoch": 3.13,
669
- "learning_rate": 6.481481481481481e-06,
670
- "loss": 0.0284,
671
- "step": 1060
672
- },
673
- {
674
- "epoch": 3.14,
675
- "learning_rate": 6.0185185185185185e-06,
676
- "loss": 0.4627,
677
- "step": 1070
678
  },
679
  {
680
  "epoch": 3.15,
681
  "learning_rate": 5.555555555555556e-06,
682
- "loss": 0.231,
683
- "step": 1080
684
- },
685
- {
686
- "epoch": 3.16,
687
- "learning_rate": 5.092592592592592e-06,
688
- "loss": 0.3487,
689
- "step": 1090
690
- },
691
- {
692
- "epoch": 3.17,
693
- "learning_rate": 4.6296296296296296e-06,
694
- "loss": 0.0043,
695
- "step": 1100
696
- },
697
- {
698
- "epoch": 3.17,
699
- "learning_rate": 4.166666666666667e-06,
700
- "loss": 0.0374,
701
- "step": 1110
702
  },
703
  {
704
  "epoch": 3.18,
705
  "learning_rate": 3.7037037037037037e-06,
706
- "loss": 0.7036,
707
- "step": 1120
708
- },
709
- {
710
- "epoch": 3.19,
711
- "learning_rate": 3.2407407407407406e-06,
712
- "loss": 0.0039,
713
- "step": 1130
714
- },
715
- {
716
- "epoch": 3.2,
717
- "learning_rate": 2.777777777777778e-06,
718
- "loss": 0.3257,
719
- "step": 1140
720
- },
721
- {
722
- "epoch": 3.21,
723
- "learning_rate": 2.3148148148148148e-06,
724
- "loss": 0.0092,
725
- "step": 1150
726
  },
727
  {
728
  "epoch": 3.22,
729
  "learning_rate": 1.8518518518518519e-06,
730
- "loss": 0.0107,
731
- "step": 1160
732
- },
733
- {
734
- "epoch": 3.23,
735
- "learning_rate": 1.388888888888889e-06,
736
- "loss": 0.1857,
737
- "step": 1170
738
- },
739
- {
740
- "epoch": 3.23,
741
- "learning_rate": 9.259259259259259e-07,
742
- "loss": 0.1795,
743
- "step": 1180
744
- },
745
- {
746
- "epoch": 3.24,
747
- "learning_rate": 4.6296296296296297e-07,
748
- "loss": 0.0069,
749
- "step": 1190
750
  },
751
  {
752
  "epoch": 3.25,
753
  "learning_rate": 0.0,
754
- "loss": 1.3117,
755
- "step": 1200
756
  },
757
  {
758
  "epoch": 3.25,
759
- "eval_accuracy": 0.9428571428571428,
760
- "eval_loss": 0.2302306592464447,
761
- "eval_runtime": 16.1163,
762
- "eval_samples_per_second": 4.343,
763
- "eval_steps_per_second": 4.343,
764
- "step": 1200
765
  },
766
  {
767
  "epoch": 3.25,
768
- "step": 1200,
769
  "total_flos": 1.495384188125184e+18,
770
- "train_loss": 0.9095784844768544,
771
- "train_runtime": 733.7164,
772
- "train_samples_per_second": 1.636,
773
- "train_steps_per_second": 1.636
774
  },
775
  {
776
  "epoch": 3.25,
777
- "eval_accuracy": 0.896774193548387,
778
- "eval_loss": 0.43612316250801086,
779
- "eval_runtime": 36.0305,
780
- "eval_samples_per_second": 4.302,
781
- "eval_steps_per_second": 4.302,
782
- "step": 1200
783
  },
784
  {
785
  "epoch": 3.25,
786
- "eval_accuracy": 0.896774193548387,
787
- "eval_loss": 0.43612316250801086,
788
- "eval_runtime": 34.2635,
789
- "eval_samples_per_second": 4.524,
790
- "eval_steps_per_second": 4.524,
791
- "step": 1200
792
  }
793
  ],
794
- "max_steps": 1200,
795
  "num_train_epochs": 9223372036854775807,
796
  "total_flos": 1.495384188125184e+18,
797
  "trial_name": null,
 
1
  {
2
+ "best_metric": 0.9285714285714286,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-225",
4
  "epoch": 3.25,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 1.6666666666666667e-05,
13
+ "loss": 2.3532,
14
+ "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.07,
18
  "learning_rate": 3.3333333333333335e-05,
19
+ "loss": 2.3027,
20
+ "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 0.1,
24
  "learning_rate": 5e-05,
25
+ "loss": 2.2167,
26
+ "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 0.13,
30
  "learning_rate": 4.814814814814815e-05,
31
+ "loss": 2.019,
32
+ "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
  "epoch": 0.17,
36
  "learning_rate": 4.62962962962963e-05,
37
+ "loss": 2.0514,
38
+ "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  },
40
  {
41
  "epoch": 0.2,
42
  "learning_rate": 4.4444444444444447e-05,
43
+ "loss": 1.7294,
44
+ "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 0.23,
48
  "learning_rate": 4.259259259259259e-05,
49
+ "loss": 1.6613,
50
+ "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
51
  },
52
  {
53
  "epoch": 0.25,
54
+ "eval_accuracy": 0.5285714285714286,
55
+ "eval_loss": 1.3506253957748413,
56
+ "eval_runtime": 6.0005,
57
+ "eval_samples_per_second": 11.666,
58
+ "eval_steps_per_second": 3.0,
59
+ "step": 75
 
 
 
 
 
 
60
  },
61
  {
62
  "epoch": 1.02,
63
  "learning_rate": 4.074074074074074e-05,
64
+ "loss": 1.2301,
65
+ "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  },
67
  {
68
  "epoch": 1.05,
69
  "learning_rate": 3.888888888888889e-05,
70
+ "loss": 0.9771,
71
+ "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  },
73
  {
74
  "epoch": 1.08,
75
  "learning_rate": 3.7037037037037037e-05,
76
+ "loss": 1.0313,
77
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  },
79
  {
80
  "epoch": 1.12,
81
  "learning_rate": 3.518518518518519e-05,
82
+ "loss": 0.6111,
83
+ "step": 110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  },
85
  {
86
  "epoch": 1.15,
87
  "learning_rate": 3.3333333333333335e-05,
88
+ "loss": 1.0471,
89
+ "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  },
91
  {
92
  "epoch": 1.18,
93
  "learning_rate": 3.148148148148148e-05,
94
+ "loss": 0.8325,
95
+ "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  },
97
  {
98
  "epoch": 1.22,
99
  "learning_rate": 2.962962962962963e-05,
100
+ "loss": 0.8005,
101
+ "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  },
103
  {
104
  "epoch": 1.25,
105
  "learning_rate": 2.777777777777778e-05,
106
+ "loss": 0.4175,
107
+ "step": 150
108
  },
109
  {
110
  "epoch": 1.25,
111
+ "eval_accuracy": 0.7285714285714285,
112
+ "eval_loss": 0.5572353601455688,
113
+ "eval_runtime": 5.7245,
114
+ "eval_samples_per_second": 12.228,
115
+ "eval_steps_per_second": 3.144,
116
+ "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  },
118
  {
119
  "epoch": 2.03,
120
  "learning_rate": 2.5925925925925925e-05,
121
+ "loss": 0.5881,
122
+ "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  },
124
  {
125
  "epoch": 2.07,
126
  "learning_rate": 2.4074074074074074e-05,
127
+ "loss": 0.402,
128
+ "step": 170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  },
130
  {
131
  "epoch": 2.1,
132
  "learning_rate": 2.2222222222222223e-05,
133
+ "loss": 0.2738,
134
+ "step": 180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  },
136
  {
137
  "epoch": 2.13,
138
  "learning_rate": 2.037037037037037e-05,
139
+ "loss": 0.3433,
140
+ "step": 190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  },
142
  {
143
  "epoch": 2.17,
144
  "learning_rate": 1.8518518518518518e-05,
145
+ "loss": 0.4863,
146
+ "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  },
148
  {
149
  "epoch": 2.2,
150
  "learning_rate": 1.6666666666666667e-05,
151
+ "loss": 0.2916,
152
+ "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  },
154
  {
155
  "epoch": 2.23,
156
  "learning_rate": 1.4814814814814815e-05,
157
+ "loss": 0.3592,
158
+ "step": 220
 
 
 
 
 
 
 
 
 
 
 
 
159
  },
160
  {
161
  "epoch": 2.25,
162
+ "eval_accuracy": 0.9285714285714286,
163
+ "eval_loss": 0.24635276198387146,
164
+ "eval_runtime": 5.6321,
165
+ "eval_samples_per_second": 12.429,
166
+ "eval_steps_per_second": 3.196,
167
+ "step": 225
 
 
 
 
 
 
168
  },
169
  {
170
  "epoch": 3.02,
171
  "learning_rate": 1.2962962962962962e-05,
172
+ "loss": 0.2532,
173
+ "step": 230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  },
175
  {
176
  "epoch": 3.05,
177
  "learning_rate": 1.1111111111111112e-05,
178
+ "loss": 0.3399,
179
+ "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  },
181
  {
182
  "epoch": 3.08,
183
  "learning_rate": 9.259259259259259e-06,
184
+ "loss": 0.1234,
185
+ "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  },
187
  {
188
  "epoch": 3.12,
189
  "learning_rate": 7.4074074074074075e-06,
190
+ "loss": 0.2255,
191
+ "step": 260
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  },
193
  {
194
  "epoch": 3.15,
195
  "learning_rate": 5.555555555555556e-06,
196
+ "loss": 0.2442,
197
+ "step": 270
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  },
199
  {
200
  "epoch": 3.18,
201
  "learning_rate": 3.7037037037037037e-06,
202
+ "loss": 0.087,
203
+ "step": 280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  },
205
  {
206
  "epoch": 3.22,
207
  "learning_rate": 1.8518518518518519e-06,
208
+ "loss": 0.1592,
209
+ "step": 290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  },
211
  {
212
  "epoch": 3.25,
213
  "learning_rate": 0.0,
214
+ "loss": 0.1935,
215
+ "step": 300
216
  },
217
  {
218
  "epoch": 3.25,
219
+ "eval_accuracy": 0.9,
220
+ "eval_loss": 0.260299414396286,
221
+ "eval_runtime": 6.0335,
222
+ "eval_samples_per_second": 11.602,
223
+ "eval_steps_per_second": 2.983,
224
+ "step": 300
225
  },
226
  {
227
  "epoch": 3.25,
228
+ "step": 300,
229
  "total_flos": 1.495384188125184e+18,
230
+ "train_loss": 0.8550392069419225,
231
+ "train_runtime": 221.509,
232
+ "train_samples_per_second": 5.417,
233
+ "train_steps_per_second": 1.354
234
  },
235
  {
236
  "epoch": 3.25,
237
+ "eval_accuracy": 0.8387096774193549,
238
+ "eval_loss": 0.4286609888076782,
239
+ "eval_runtime": 13.9577,
240
+ "eval_samples_per_second": 11.105,
241
+ "eval_steps_per_second": 2.794,
242
+ "step": 300
243
  },
244
  {
245
  "epoch": 3.25,
246
+ "eval_accuracy": 0.8387096774193549,
247
+ "eval_loss": 0.4286610186100006,
248
+ "eval_runtime": 12.6984,
249
+ "eval_samples_per_second": 12.206,
250
+ "eval_steps_per_second": 3.071,
251
+ "step": 300
252
  }
253
  ],
254
+ "max_steps": 300,
255
  "num_train_epochs": 9223372036854775807,
256
  "total_flos": 1.495384188125184e+18,
257
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:688df4976d927071681df19224c731e42bd5ca3036b442e3bd2dadf8599ccc1d
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db98e461593c9f94e624b2505bcf86db00ab67fbc2dbba21490949385c2ccf65
3
  size 3439