Julia0408 commited on
Commit
047afc0
1 Parent(s): 6c42a9e

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.12,
3
- "eval_accuracy": 0.9548387096774194,
4
- "eval_loss": 0.16578371822834015,
5
- "eval_runtime": 38.5267,
6
- "eval_samples_per_second": 4.023,
7
- "eval_steps_per_second": 1.012
8
  }
 
1
  {
2
+ "epoch": 7.11,
3
+ "eval_accuracy": 0.9032258064516129,
4
+ "eval_loss": 0.2707708775997162,
5
+ "eval_runtime": 38.8958,
6
+ "eval_samples_per_second": 3.985,
7
+ "eval_steps_per_second": 0.514
8
  }
runs/May24_19-16-49_52f550a25be1/events.out.tfevents.1684957116.52f550a25be1.31.5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab60d87bcc8706b13f49c5f5fb137c759f12fd676bf832ec89de14fbb7e38503
3
- size 363
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1029911e349a2f8020a7210839a2da8a434a876b33c6a2927a03c11174a6766
3
+ size 686
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 7.12,
3
- "eval_accuracy": 0.9548387096774194,
4
- "eval_loss": 0.16578371822834015,
5
- "eval_runtime": 38.5267,
6
- "eval_samples_per_second": 4.023,
7
- "eval_steps_per_second": 1.012
8
  }
 
1
  {
2
+ "epoch": 7.11,
3
+ "eval_accuracy": 0.9032258064516129,
4
+ "eval_loss": 0.2707708775997162,
5
+ "eval_runtime": 38.8958,
6
+ "eval_samples_per_second": 3.985,
7
+ "eval_steps_per_second": 0.514
8
  }
trainer_state.json CHANGED
@@ -1,475 +1,295 @@
1
  {
2
- "best_metric": 0.9857142857142858,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-525",
4
- "epoch": 7.125,
5
- "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 8.333333333333334e-06,
13
- "loss": 2.4273,
14
- "step": 10
15
- },
16
  {
17
  "epoch": 0.03,
18
  "learning_rate": 1.6666666666666667e-05,
19
- "loss": 2.3496,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.05,
24
- "learning_rate": 2.5e-05,
25
- "loss": 2.291,
26
- "step": 30
27
  },
28
  {
29
  "epoch": 0.07,
30
  "learning_rate": 3.3333333333333335e-05,
31
- "loss": 2.193,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.08,
36
- "learning_rate": 4.166666666666667e-05,
37
- "loss": 2.2776,
38
- "step": 50
39
  },
40
  {
41
  "epoch": 0.1,
42
  "learning_rate": 5e-05,
43
- "loss": 2.0892,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.12,
48
- "learning_rate": 4.9074074074074075e-05,
49
- "loss": 2.0934,
50
- "step": 70
51
  },
52
  {
53
- "epoch": 0.12,
54
- "eval_accuracy": 0.5428571428571428,
55
- "eval_loss": 1.7595646381378174,
56
- "eval_runtime": 17.4275,
57
- "eval_samples_per_second": 4.017,
58
- "eval_steps_per_second": 1.033,
59
- "step": 75
60
  },
61
  {
62
  "epoch": 1.01,
63
  "learning_rate": 4.814814814814815e-05,
64
- "loss": 1.6538,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 1.02,
69
- "learning_rate": 4.722222222222222e-05,
70
- "loss": 1.3968,
71
- "step": 90
72
  },
73
  {
74
  "epoch": 1.04,
75
  "learning_rate": 4.62962962962963e-05,
76
- "loss": 1.3605,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 1.06,
81
- "learning_rate": 4.5370370370370374e-05,
82
- "loss": 1.0257,
83
- "step": 110
84
  },
85
  {
86
  "epoch": 1.07,
87
  "learning_rate": 4.4444444444444447e-05,
88
- "loss": 1.0096,
89
- "step": 120
90
- },
91
- {
92
- "epoch": 1.09,
93
- "learning_rate": 4.351851851851852e-05,
94
- "loss": 0.7081,
95
- "step": 130
96
  },
97
  {
98
  "epoch": 1.11,
99
  "learning_rate": 4.259259259259259e-05,
100
- "loss": 0.6225,
101
- "step": 140
102
- },
103
- {
104
- "epoch": 1.12,
105
- "learning_rate": 4.166666666666667e-05,
106
- "loss": 0.9556,
107
- "step": 150
108
  },
109
  {
110
- "epoch": 1.12,
111
- "eval_accuracy": 0.6428571428571429,
112
- "eval_loss": 1.1258111000061035,
113
- "eval_runtime": 17.2592,
114
- "eval_samples_per_second": 4.056,
115
- "eval_steps_per_second": 1.043,
116
- "step": 150
117
  },
118
  {
119
- "epoch": 2.02,
120
  "learning_rate": 4.074074074074074e-05,
121
- "loss": 0.6893,
122
- "step": 160
123
- },
124
- {
125
- "epoch": 2.03,
126
- "learning_rate": 3.981481481481482e-05,
127
- "loss": 0.5757,
128
- "step": 170
129
  },
130
  {
131
  "epoch": 2.05,
132
  "learning_rate": 3.888888888888889e-05,
133
- "loss": 0.4064,
134
- "step": 180
135
- },
136
- {
137
- "epoch": 2.07,
138
- "learning_rate": 3.7962962962962964e-05,
139
- "loss": 0.5554,
140
- "step": 190
141
  },
142
  {
143
  "epoch": 2.08,
144
  "learning_rate": 3.7037037037037037e-05,
145
- "loss": 0.3856,
146
- "step": 200
147
- },
148
- {
149
- "epoch": 2.1,
150
- "learning_rate": 3.611111111111111e-05,
151
- "loss": 0.3292,
152
- "step": 210
153
  },
154
  {
155
- "epoch": 2.12,
156
  "learning_rate": 3.518518518518519e-05,
157
- "loss": 0.4154,
158
- "step": 220
159
- },
160
- {
161
- "epoch": 2.12,
162
- "eval_accuracy": 0.8571428571428571,
163
- "eval_loss": 0.4632211923599243,
164
- "eval_runtime": 14.7059,
165
- "eval_samples_per_second": 4.76,
166
- "eval_steps_per_second": 1.224,
167
- "step": 225
168
  },
169
  {
170
- "epoch": 3.01,
171
- "learning_rate": 3.425925925925926e-05,
172
- "loss": 0.3805,
173
- "step": 230
 
 
 
174
  },
175
  {
176
  "epoch": 3.02,
177
  "learning_rate": 3.3333333333333335e-05,
178
- "loss": 0.1538,
179
- "step": 240
180
- },
181
- {
182
- "epoch": 3.04,
183
- "learning_rate": 3.240740740740741e-05,
184
- "loss": 0.1232,
185
- "step": 250
186
  },
187
  {
188
- "epoch": 3.06,
189
  "learning_rate": 3.148148148148148e-05,
190
- "loss": 0.2196,
191
- "step": 260
192
- },
193
- {
194
- "epoch": 3.08,
195
- "learning_rate": 3.055555555555556e-05,
196
- "loss": 0.3353,
197
- "step": 270
198
  },
199
  {
200
  "epoch": 3.09,
201
  "learning_rate": 2.962962962962963e-05,
202
- "loss": 0.2452,
203
- "step": 280
204
- },
205
- {
206
- "epoch": 3.11,
207
- "learning_rate": 2.8703703703703706e-05,
208
- "loss": 0.1609,
209
- "step": 290
210
  },
211
  {
212
  "epoch": 3.12,
213
  "learning_rate": 2.777777777777778e-05,
214
- "loss": 0.4116,
215
- "step": 300
216
- },
217
- {
218
- "epoch": 3.12,
219
- "eval_accuracy": 0.8571428571428571,
220
- "eval_loss": 0.5566033124923706,
221
- "eval_runtime": 16.6659,
222
- "eval_samples_per_second": 4.2,
223
- "eval_steps_per_second": 1.08,
224
- "step": 300
225
  },
226
  {
227
- "epoch": 4.02,
228
- "learning_rate": 2.6851851851851855e-05,
229
- "loss": 0.2113,
230
- "step": 310
 
 
 
231
  },
232
  {
233
  "epoch": 4.03,
234
  "learning_rate": 2.5925925925925925e-05,
235
- "loss": 0.3154,
236
- "step": 320
237
- },
238
- {
239
- "epoch": 4.05,
240
- "learning_rate": 2.5e-05,
241
- "loss": 0.0308,
242
- "step": 330
243
  },
244
  {
245
- "epoch": 4.07,
246
  "learning_rate": 2.4074074074074074e-05,
247
- "loss": 0.026,
248
- "step": 340
249
- },
250
- {
251
- "epoch": 4.08,
252
- "learning_rate": 2.314814814814815e-05,
253
- "loss": 0.0414,
254
- "step": 350
255
  },
256
  {
257
- "epoch": 4.1,
258
  "learning_rate": 2.2222222222222223e-05,
259
- "loss": 0.0476,
260
- "step": 360
261
- },
262
- {
263
- "epoch": 4.12,
264
- "learning_rate": 2.1296296296296296e-05,
265
- "loss": 0.021,
266
- "step": 370
267
- },
268
- {
269
- "epoch": 4.12,
270
- "eval_accuracy": 0.9714285714285714,
271
- "eval_loss": 0.17257067561149597,
272
- "eval_runtime": 17.1058,
273
- "eval_samples_per_second": 4.092,
274
- "eval_steps_per_second": 1.052,
275
- "step": 375
276
  },
277
  {
278
- "epoch": 5.01,
279
  "learning_rate": 2.037037037037037e-05,
280
- "loss": 0.189,
281
- "step": 380
282
  },
283
  {
284
- "epoch": 5.03,
285
- "learning_rate": 1.9444444444444445e-05,
286
- "loss": 0.2123,
287
- "step": 390
 
 
 
288
  },
289
  {
290
- "epoch": 5.04,
291
  "learning_rate": 1.8518518518518518e-05,
292
- "loss": 0.0096,
293
- "step": 400
294
- },
295
- {
296
- "epoch": 5.06,
297
- "learning_rate": 1.7592592592592595e-05,
298
- "loss": 0.0385,
299
- "step": 410
300
  },
301
  {
302
- "epoch": 5.08,
303
  "learning_rate": 1.6666666666666667e-05,
304
- "loss": 0.1287,
305
- "step": 420
306
- },
307
- {
308
- "epoch": 5.09,
309
- "learning_rate": 1.574074074074074e-05,
310
- "loss": 0.015,
311
- "step": 430
312
  },
313
  {
314
- "epoch": 5.11,
315
  "learning_rate": 1.4814814814814815e-05,
316
- "loss": 0.0473,
317
- "step": 440
318
- },
319
- {
320
- "epoch": 5.12,
321
- "learning_rate": 1.388888888888889e-05,
322
- "loss": 0.0084,
323
- "step": 450
324
  },
325
  {
326
- "epoch": 5.12,
327
- "eval_accuracy": 0.9714285714285714,
328
- "eval_loss": 0.10275492072105408,
329
- "eval_runtime": 17.0163,
330
- "eval_samples_per_second": 4.114,
331
- "eval_steps_per_second": 1.058,
332
- "step": 450
333
  },
334
  {
335
- "epoch": 6.02,
336
  "learning_rate": 1.2962962962962962e-05,
337
- "loss": 0.108,
338
- "step": 460
339
- },
340
- {
341
- "epoch": 6.03,
342
- "learning_rate": 1.2037037037037037e-05,
343
- "loss": 0.0063,
344
- "step": 470
345
  },
346
  {
347
- "epoch": 6.05,
348
  "learning_rate": 1.1111111111111112e-05,
349
- "loss": 0.0816,
350
- "step": 480
351
  },
352
  {
353
  "epoch": 6.07,
354
- "learning_rate": 1.0185185185185185e-05,
355
- "loss": 0.1057,
356
- "step": 490
357
- },
358
- {
359
- "epoch": 6.08,
360
  "learning_rate": 9.259259259259259e-06,
361
- "loss": 0.0057,
362
- "step": 500
363
- },
364
- {
365
- "epoch": 6.1,
366
- "learning_rate": 8.333333333333334e-06,
367
- "loss": 0.006,
368
- "step": 510
369
  },
370
  {
371
- "epoch": 6.12,
372
  "learning_rate": 7.4074074074074075e-06,
373
- "loss": 0.0056,
374
- "step": 520
375
  },
376
  {
377
- "epoch": 6.12,
378
- "eval_accuracy": 0.9857142857142858,
379
- "eval_loss": 0.035175032913684845,
380
- "eval_runtime": 16.5696,
381
- "eval_samples_per_second": 4.225,
382
- "eval_steps_per_second": 1.086,
383
- "step": 525
384
  },
385
  {
386
  "epoch": 7.01,
387
- "learning_rate": 6.481481481481481e-06,
388
- "loss": 0.0052,
389
- "step": 530
390
  },
391
  {
392
- "epoch": 7.03,
393
- "learning_rate": 5.555555555555556e-06,
394
- "loss": 0.0054,
395
- "step": 540
396
  },
397
  {
398
- "epoch": 7.04,
399
- "learning_rate": 4.6296296296296296e-06,
400
- "loss": 0.0049,
401
- "step": 550
402
  },
403
  {
404
- "epoch": 7.06,
405
- "learning_rate": 3.7037037037037037e-06,
406
- "loss": 0.0369,
407
- "step": 560
408
  },
409
  {
410
- "epoch": 7.08,
411
- "learning_rate": 2.777777777777778e-06,
412
- "loss": 0.139,
413
- "step": 570
 
 
 
414
  },
415
  {
416
- "epoch": 7.09,
417
- "learning_rate": 1.8518518518518519e-06,
418
- "loss": 0.0054,
419
- "step": 580
 
 
 
420
  },
421
  {
422
  "epoch": 7.11,
423
- "learning_rate": 9.259259259259259e-07,
424
- "loss": 0.008,
425
- "step": 590
 
 
 
426
  },
427
  {
428
- "epoch": 7.12,
429
- "learning_rate": 0.0,
430
- "loss": 0.0051,
431
- "step": 600
432
- },
433
- {
434
- "epoch": 7.12,
435
- "eval_accuracy": 0.9714285714285714,
436
- "eval_loss": 0.04347994923591614,
437
- "eval_runtime": 16.6367,
438
- "eval_samples_per_second": 4.208,
439
- "eval_steps_per_second": 1.082,
440
- "step": 600
441
- },
442
- {
443
- "epoch": 7.12,
444
- "step": 600,
445
- "total_flos": 2.990768376250368e+18,
446
- "train_loss": 0.5285309781382481,
447
- "train_runtime": 1415.9208,
448
- "train_samples_per_second": 1.695,
449
- "train_steps_per_second": 0.424
450
- },
451
- {
452
- "epoch": 7.12,
453
- "eval_accuracy": 0.9548387096774194,
454
- "eval_loss": 0.16580888628959656,
455
- "eval_runtime": 38.7083,
456
- "eval_samples_per_second": 4.004,
457
- "eval_steps_per_second": 1.008,
458
- "step": 600
459
- },
460
- {
461
- "epoch": 7.12,
462
- "eval_accuracy": 0.9548387096774194,
463
- "eval_loss": 0.16578371822834015,
464
- "eval_runtime": 38.5267,
465
- "eval_samples_per_second": 4.023,
466
- "eval_steps_per_second": 1.012,
467
- "step": 600
468
  }
469
  ],
470
- "max_steps": 600,
471
  "num_train_epochs": 9223372036854775807,
472
- "total_flos": 2.990768376250368e+18,
473
  "trial_name": null,
474
  "trial_params": null
475
  }
 
1
  {
2
+ "best_metric": 0.9428571428571428,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-228",
4
+ "epoch": 7.113333333333333,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
10
  {
11
  "epoch": 0.03,
12
  "learning_rate": 1.6666666666666667e-05,
13
+ "loss": 0.6438,
14
+ "step": 10
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.07,
18
  "learning_rate": 3.3333333333333335e-05,
19
+ "loss": 0.6929,
20
+ "step": 20
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 0.1,
24
  "learning_rate": 5e-05,
25
+ "loss": 0.7999,
26
+ "step": 30
 
 
 
 
 
 
27
  },
28
  {
29
+ "epoch": 0.13,
30
+ "eval_accuracy": 0.7142857142857143,
31
+ "eval_loss": 0.7967767715454102,
32
+ "eval_runtime": 16.581,
33
+ "eval_samples_per_second": 4.222,
34
+ "eval_steps_per_second": 0.543,
35
+ "step": 38
36
  },
37
  {
38
  "epoch": 1.01,
39
  "learning_rate": 4.814814814814815e-05,
40
+ "loss": 0.5279,
41
+ "step": 40
 
 
 
 
 
 
42
  },
43
  {
44
  "epoch": 1.04,
45
  "learning_rate": 4.62962962962963e-05,
46
+ "loss": 0.4039,
47
+ "step": 50
 
 
 
 
 
 
48
  },
49
  {
50
  "epoch": 1.07,
51
  "learning_rate": 4.4444444444444447e-05,
52
+ "loss": 0.3795,
53
+ "step": 60
 
 
 
 
 
 
54
  },
55
  {
56
  "epoch": 1.11,
57
  "learning_rate": 4.259259259259259e-05,
58
+ "loss": 0.3574,
59
+ "step": 70
 
 
 
 
 
 
60
  },
61
  {
62
+ "epoch": 1.13,
63
+ "eval_accuracy": 0.7571428571428571,
64
+ "eval_loss": 0.6632290482521057,
65
+ "eval_runtime": 16.8693,
66
+ "eval_samples_per_second": 4.15,
67
+ "eval_steps_per_second": 0.534,
68
+ "step": 76
69
  },
70
  {
71
+ "epoch": 2.01,
72
  "learning_rate": 4.074074074074074e-05,
73
+ "loss": 0.3809,
74
+ "step": 80
 
 
 
 
 
 
75
  },
76
  {
77
  "epoch": 2.05,
78
  "learning_rate": 3.888888888888889e-05,
79
+ "loss": 0.3093,
80
+ "step": 90
 
 
 
 
 
 
81
  },
82
  {
83
  "epoch": 2.08,
84
  "learning_rate": 3.7037037037037037e-05,
85
+ "loss": 0.2716,
86
+ "step": 100
 
 
 
 
 
 
87
  },
88
  {
89
+ "epoch": 2.11,
90
  "learning_rate": 3.518518518518519e-05,
91
+ "loss": 0.1651,
92
+ "step": 110
 
 
 
 
 
 
 
 
 
93
  },
94
  {
95
+ "epoch": 2.13,
96
+ "eval_accuracy": 0.8428571428571429,
97
+ "eval_loss": 0.3925662040710449,
98
+ "eval_runtime": 13.1445,
99
+ "eval_samples_per_second": 5.325,
100
+ "eval_steps_per_second": 0.685,
101
+ "step": 114
102
  },
103
  {
104
  "epoch": 3.02,
105
  "learning_rate": 3.3333333333333335e-05,
106
+ "loss": 0.1444,
107
+ "step": 120
 
 
 
 
 
 
108
  },
109
  {
110
+ "epoch": 3.05,
111
  "learning_rate": 3.148148148148148e-05,
112
+ "loss": 0.2528,
113
+ "step": 130
 
 
 
 
 
 
114
  },
115
  {
116
  "epoch": 3.09,
117
  "learning_rate": 2.962962962962963e-05,
118
+ "loss": 0.0722,
119
+ "step": 140
 
 
 
 
 
 
120
  },
121
  {
122
  "epoch": 3.12,
123
  "learning_rate": 2.777777777777778e-05,
124
+ "loss": 0.0843,
125
+ "step": 150
 
 
 
 
 
 
 
 
 
126
  },
127
  {
128
+ "epoch": 3.13,
129
+ "eval_accuracy": 0.8714285714285714,
130
+ "eval_loss": 0.375051349401474,
131
+ "eval_runtime": 12.7881,
132
+ "eval_samples_per_second": 5.474,
133
+ "eval_steps_per_second": 0.704,
134
+ "step": 152
135
  },
136
  {
137
  "epoch": 4.03,
138
  "learning_rate": 2.5925925925925925e-05,
139
+ "loss": 0.1844,
140
+ "step": 160
 
 
 
 
 
 
141
  },
142
  {
143
+ "epoch": 4.06,
144
  "learning_rate": 2.4074074074074074e-05,
145
+ "loss": 0.103,
146
+ "step": 170
 
 
 
 
 
 
147
  },
148
  {
149
+ "epoch": 4.09,
150
  "learning_rate": 2.2222222222222223e-05,
151
+ "loss": 0.1064,
152
+ "step": 180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  },
154
  {
155
+ "epoch": 4.13,
156
  "learning_rate": 2.037037037037037e-05,
157
+ "loss": 0.1116,
158
+ "step": 190
159
  },
160
  {
161
+ "epoch": 4.13,
162
+ "eval_accuracy": 0.9285714285714286,
163
+ "eval_loss": 0.32321324944496155,
164
+ "eval_runtime": 16.6139,
165
+ "eval_samples_per_second": 4.213,
166
+ "eval_steps_per_second": 0.542,
167
+ "step": 190
168
  },
169
  {
170
+ "epoch": 5.03,
171
  "learning_rate": 1.8518518518518518e-05,
172
+ "loss": 0.1736,
173
+ "step": 200
 
 
 
 
 
 
174
  },
175
  {
176
+ "epoch": 5.07,
177
  "learning_rate": 1.6666666666666667e-05,
178
+ "loss": 0.087,
179
+ "step": 210
 
 
 
 
 
 
180
  },
181
  {
182
+ "epoch": 5.1,
183
  "learning_rate": 1.4814814814814815e-05,
184
+ "loss": 0.0123,
185
+ "step": 220
 
 
 
 
 
 
186
  },
187
  {
188
+ "epoch": 5.13,
189
+ "eval_accuracy": 0.9428571428571428,
190
+ "eval_loss": 0.14982175827026367,
191
+ "eval_runtime": 17.2395,
192
+ "eval_samples_per_second": 4.06,
193
+ "eval_steps_per_second": 0.522,
194
+ "step": 228
195
  },
196
  {
197
+ "epoch": 6.01,
198
  "learning_rate": 1.2962962962962962e-05,
199
+ "loss": 0.0702,
200
+ "step": 230
 
 
 
 
 
 
201
  },
202
  {
203
+ "epoch": 6.04,
204
  "learning_rate": 1.1111111111111112e-05,
205
+ "loss": 0.0106,
206
+ "step": 240
207
  },
208
  {
209
  "epoch": 6.07,
 
 
 
 
 
 
210
  "learning_rate": 9.259259259259259e-06,
211
+ "loss": 0.0152,
212
+ "step": 250
 
 
 
 
 
 
213
  },
214
  {
215
+ "epoch": 6.11,
216
  "learning_rate": 7.4074074074074075e-06,
217
+ "loss": 0.0188,
218
+ "step": 260
219
  },
220
  {
221
+ "epoch": 6.13,
222
+ "eval_accuracy": 0.9,
223
+ "eval_loss": 0.42830967903137207,
224
+ "eval_runtime": 17.0875,
225
+ "eval_samples_per_second": 4.097,
226
+ "eval_steps_per_second": 0.527,
227
+ "step": 266
228
  },
229
  {
230
  "epoch": 7.01,
231
+ "learning_rate": 5.555555555555556e-06,
232
+ "loss": 0.0077,
233
+ "step": 270
234
  },
235
  {
236
+ "epoch": 7.05,
237
+ "learning_rate": 3.7037037037037037e-06,
238
+ "loss": 0.0079,
239
+ "step": 280
240
  },
241
  {
242
+ "epoch": 7.08,
243
+ "learning_rate": 1.8518518518518519e-06,
244
+ "loss": 0.0092,
245
+ "step": 290
246
  },
247
  {
248
+ "epoch": 7.11,
249
+ "learning_rate": 0.0,
250
+ "loss": 0.0146,
251
+ "step": 300
252
  },
253
  {
254
+ "epoch": 7.11,
255
+ "eval_accuracy": 0.9,
256
+ "eval_loss": 0.4196590185165405,
257
+ "eval_runtime": 16.8156,
258
+ "eval_samples_per_second": 4.163,
259
+ "eval_steps_per_second": 0.535,
260
+ "step": 300
261
  },
262
  {
263
+ "epoch": 7.11,
264
+ "step": 300,
265
+ "total_flos": 2.955876078527447e+18,
266
+ "train_loss": 0.21394788240393003,
267
+ "train_runtime": 1232.6567,
268
+ "train_samples_per_second": 1.947,
269
+ "train_steps_per_second": 0.243
270
  },
271
  {
272
  "epoch": 7.11,
273
+ "eval_accuracy": 0.9032258064516129,
274
+ "eval_loss": 0.2630390524864197,
275
+ "eval_runtime": 38.8311,
276
+ "eval_samples_per_second": 3.992,
277
+ "eval_steps_per_second": 0.515,
278
+ "step": 300
279
  },
280
  {
281
+ "epoch": 7.11,
282
+ "eval_accuracy": 0.9032258064516129,
283
+ "eval_loss": 0.2707708775997162,
284
+ "eval_runtime": 38.8958,
285
+ "eval_samples_per_second": 3.985,
286
+ "eval_steps_per_second": 0.514,
287
+ "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  }
289
  ],
290
+ "max_steps": 300,
291
  "num_train_epochs": 9223372036854775807,
292
+ "total_flos": 2.955876078527447e+18,
293
  "trial_name": null,
294
  "trial_params": null
295
  }