Augusto777 commited on
Commit
509071a
1 Parent(s): 0053395

End of training

Browse files
README.md CHANGED
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0110
21
  - Accuracy: 1.0
22
 
23
  ## Model description
 
17
 
18
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0213
21
  - Accuracy: 1.0
22
 
23
  ## Model description
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.021263618022203445,
5
+ "eval_runtime": 1.3658,
6
+ "eval_samples_per_second": 72.485,
7
+ "eval_steps_per_second": 2.929,
8
+ "total_flos": 2.7494650758139085e+18,
9
+ "train_loss": 0.2073148890797581,
10
+ "train_runtime": 1492.3317,
11
+ "train_samples_per_second": 23.775,
12
+ "train_steps_per_second": 0.188
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.021263618022203445,
5
+ "eval_runtime": 1.3658,
6
+ "eval_samples_per_second": 72.485,
7
+ "eval_steps_per_second": 2.929
8
+ }
runs/Mar11_00-37-16_89e2f3456c0c/events.out.tfevents.1710118958.89e2f3456c0c.2146.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24841351ae2575516e908f008d1bff94ea851e2e32c4bcb641dadcde731696af
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "total_flos": 2.7494650758139085e+18,
4
+ "train_loss": 0.2073148890797581,
5
+ "train_runtime": 1492.3317,
6
+ "train_samples_per_second": 23.775,
7
+ "train_steps_per_second": 0.188
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,551 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "vit-base-patch16-224-dmae-va-U3-40A/checkpoint-119",
4
+ "epoch": 40.0,
5
+ "eval_steps": 500,
6
+ "global_step": 280,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.37373737373737376,
14
+ "eval_loss": 1.329177737236023,
15
+ "eval_runtime": 1.2424,
16
+ "eval_samples_per_second": 79.683,
17
+ "eval_steps_per_second": 3.22,
18
+ "step": 7
19
+ },
20
+ {
21
+ "epoch": 1.71,
22
+ "grad_norm": 3.085270404815674,
23
+ "learning_rate": 2.1428571428571428e-05,
24
+ "loss": 1.3407,
25
+ "step": 12
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "eval_accuracy": 0.5151515151515151,
30
+ "eval_loss": 1.107913851737976,
31
+ "eval_runtime": 1.2457,
32
+ "eval_samples_per_second": 79.472,
33
+ "eval_steps_per_second": 3.211,
34
+ "step": 14
35
+ },
36
+ {
37
+ "epoch": 3.0,
38
+ "eval_accuracy": 0.6262626262626263,
39
+ "eval_loss": 0.8917983770370483,
40
+ "eval_runtime": 1.2574,
41
+ "eval_samples_per_second": 78.737,
42
+ "eval_steps_per_second": 3.181,
43
+ "step": 21
44
+ },
45
+ {
46
+ "epoch": 3.43,
47
+ "grad_norm": 3.550651788711548,
48
+ "learning_rate": 4.2857142857142856e-05,
49
+ "loss": 0.9919,
50
+ "step": 24
51
+ },
52
+ {
53
+ "epoch": 4.0,
54
+ "eval_accuracy": 0.7878787878787878,
55
+ "eval_loss": 0.6446935534477234,
56
+ "eval_runtime": 1.2679,
57
+ "eval_samples_per_second": 78.083,
58
+ "eval_steps_per_second": 3.155,
59
+ "step": 28
60
+ },
61
+ {
62
+ "epoch": 5.0,
63
+ "eval_accuracy": 0.8282828282828283,
64
+ "eval_loss": 0.45023515820503235,
65
+ "eval_runtime": 1.4064,
66
+ "eval_samples_per_second": 70.393,
67
+ "eval_steps_per_second": 2.844,
68
+ "step": 35
69
+ },
70
+ {
71
+ "epoch": 5.14,
72
+ "grad_norm": 3.4338462352752686,
73
+ "learning_rate": 4.841269841269841e-05,
74
+ "loss": 0.5761,
75
+ "step": 36
76
+ },
77
+ {
78
+ "epoch": 6.0,
79
+ "eval_accuracy": 0.9191919191919192,
80
+ "eval_loss": 0.27204275131225586,
81
+ "eval_runtime": 1.4909,
82
+ "eval_samples_per_second": 66.403,
83
+ "eval_steps_per_second": 2.683,
84
+ "step": 42
85
+ },
86
+ {
87
+ "epoch": 6.86,
88
+ "grad_norm": 1.6468698978424072,
89
+ "learning_rate": 4.603174603174603e-05,
90
+ "loss": 0.3111,
91
+ "step": 48
92
+ },
93
+ {
94
+ "epoch": 7.0,
95
+ "eval_accuracy": 0.9292929292929293,
96
+ "eval_loss": 0.23024092614650726,
97
+ "eval_runtime": 1.3066,
98
+ "eval_samples_per_second": 75.766,
99
+ "eval_steps_per_second": 3.061,
100
+ "step": 49
101
+ },
102
+ {
103
+ "epoch": 8.0,
104
+ "eval_accuracy": 0.9494949494949495,
105
+ "eval_loss": 0.16501076519489288,
106
+ "eval_runtime": 1.3125,
107
+ "eval_samples_per_second": 75.428,
108
+ "eval_steps_per_second": 3.048,
109
+ "step": 56
110
+ },
111
+ {
112
+ "epoch": 8.57,
113
+ "grad_norm": 1.7793350219726562,
114
+ "learning_rate": 4.3650793650793655e-05,
115
+ "loss": 0.204,
116
+ "step": 60
117
+ },
118
+ {
119
+ "epoch": 9.0,
120
+ "eval_accuracy": 0.9494949494949495,
121
+ "eval_loss": 0.1503186672925949,
122
+ "eval_runtime": 1.316,
123
+ "eval_samples_per_second": 75.227,
124
+ "eval_steps_per_second": 3.039,
125
+ "step": 63
126
+ },
127
+ {
128
+ "epoch": 10.0,
129
+ "eval_accuracy": 0.9797979797979798,
130
+ "eval_loss": 0.08136877417564392,
131
+ "eval_runtime": 1.3015,
132
+ "eval_samples_per_second": 76.065,
133
+ "eval_steps_per_second": 3.073,
134
+ "step": 70
135
+ },
136
+ {
137
+ "epoch": 10.29,
138
+ "grad_norm": 1.3909555673599243,
139
+ "learning_rate": 4.126984126984127e-05,
140
+ "loss": 0.1518,
141
+ "step": 72
142
+ },
143
+ {
144
+ "epoch": 11.0,
145
+ "eval_accuracy": 0.9797979797979798,
146
+ "eval_loss": 0.06037978082895279,
147
+ "eval_runtime": 1.2829,
148
+ "eval_samples_per_second": 77.171,
149
+ "eval_steps_per_second": 3.118,
150
+ "step": 77
151
+ },
152
+ {
153
+ "epoch": 12.0,
154
+ "grad_norm": 1.9186725616455078,
155
+ "learning_rate": 3.888888888888889e-05,
156
+ "loss": 0.1272,
157
+ "step": 84
158
+ },
159
+ {
160
+ "epoch": 12.0,
161
+ "eval_accuracy": 0.9494949494949495,
162
+ "eval_loss": 0.1265028864145279,
163
+ "eval_runtime": 1.2863,
164
+ "eval_samples_per_second": 76.965,
165
+ "eval_steps_per_second": 3.11,
166
+ "step": 84
167
+ },
168
+ {
169
+ "epoch": 13.0,
170
+ "eval_accuracy": 0.9797979797979798,
171
+ "eval_loss": 0.05176503211259842,
172
+ "eval_runtime": 1.3002,
173
+ "eval_samples_per_second": 76.143,
174
+ "eval_steps_per_second": 3.076,
175
+ "step": 91
176
+ },
177
+ {
178
+ "epoch": 13.71,
179
+ "grad_norm": 1.4530500173568726,
180
+ "learning_rate": 3.650793650793651e-05,
181
+ "loss": 0.1379,
182
+ "step": 96
183
+ },
184
+ {
185
+ "epoch": 14.0,
186
+ "eval_accuracy": 0.98989898989899,
187
+ "eval_loss": 0.044787079095840454,
188
+ "eval_runtime": 1.303,
189
+ "eval_samples_per_second": 75.979,
190
+ "eval_steps_per_second": 3.07,
191
+ "step": 98
192
+ },
193
+ {
194
+ "epoch": 15.0,
195
+ "eval_accuracy": 0.98989898989899,
196
+ "eval_loss": 0.03611420467495918,
197
+ "eval_runtime": 1.309,
198
+ "eval_samples_per_second": 75.628,
199
+ "eval_steps_per_second": 3.056,
200
+ "step": 105
201
+ },
202
+ {
203
+ "epoch": 15.43,
204
+ "grad_norm": 1.3324140310287476,
205
+ "learning_rate": 3.412698412698413e-05,
206
+ "loss": 0.092,
207
+ "step": 108
208
+ },
209
+ {
210
+ "epoch": 16.0,
211
+ "eval_accuracy": 0.98989898989899,
212
+ "eval_loss": 0.032215822488069534,
213
+ "eval_runtime": 1.3002,
214
+ "eval_samples_per_second": 76.141,
215
+ "eval_steps_per_second": 3.076,
216
+ "step": 112
217
+ },
218
+ {
219
+ "epoch": 17.0,
220
+ "eval_accuracy": 1.0,
221
+ "eval_loss": 0.021263618022203445,
222
+ "eval_runtime": 1.307,
223
+ "eval_samples_per_second": 75.748,
224
+ "eval_steps_per_second": 3.061,
225
+ "step": 119
226
+ },
227
+ {
228
+ "epoch": 17.14,
229
+ "grad_norm": 2.2976629734039307,
230
+ "learning_rate": 3.1746031746031745e-05,
231
+ "loss": 0.0762,
232
+ "step": 120
233
+ },
234
+ {
235
+ "epoch": 18.0,
236
+ "eval_accuracy": 0.98989898989899,
237
+ "eval_loss": 0.04688708856701851,
238
+ "eval_runtime": 1.315,
239
+ "eval_samples_per_second": 75.288,
240
+ "eval_steps_per_second": 3.042,
241
+ "step": 126
242
+ },
243
+ {
244
+ "epoch": 18.86,
245
+ "grad_norm": 2.308321714401245,
246
+ "learning_rate": 2.9365079365079366e-05,
247
+ "loss": 0.0954,
248
+ "step": 132
249
+ },
250
+ {
251
+ "epoch": 19.0,
252
+ "eval_accuracy": 0.98989898989899,
253
+ "eval_loss": 0.061472997069358826,
254
+ "eval_runtime": 1.3031,
255
+ "eval_samples_per_second": 75.971,
256
+ "eval_steps_per_second": 3.07,
257
+ "step": 133
258
+ },
259
+ {
260
+ "epoch": 20.0,
261
+ "eval_accuracy": 0.98989898989899,
262
+ "eval_loss": 0.03133073449134827,
263
+ "eval_runtime": 1.2993,
264
+ "eval_samples_per_second": 76.194,
265
+ "eval_steps_per_second": 3.079,
266
+ "step": 140
267
+ },
268
+ {
269
+ "epoch": 20.57,
270
+ "grad_norm": 1.274895191192627,
271
+ "learning_rate": 2.6984126984126984e-05,
272
+ "loss": 0.0795,
273
+ "step": 144
274
+ },
275
+ {
276
+ "epoch": 21.0,
277
+ "eval_accuracy": 0.98989898989899,
278
+ "eval_loss": 0.0380566343665123,
279
+ "eval_runtime": 1.3136,
280
+ "eval_samples_per_second": 75.368,
281
+ "eval_steps_per_second": 3.045,
282
+ "step": 147
283
+ },
284
+ {
285
+ "epoch": 22.0,
286
+ "eval_accuracy": 1.0,
287
+ "eval_loss": 0.013774486258625984,
288
+ "eval_runtime": 1.309,
289
+ "eval_samples_per_second": 75.628,
290
+ "eval_steps_per_second": 3.056,
291
+ "step": 154
292
+ },
293
+ {
294
+ "epoch": 22.29,
295
+ "grad_norm": 1.425993800163269,
296
+ "learning_rate": 2.4603174603174602e-05,
297
+ "loss": 0.077,
298
+ "step": 156
299
+ },
300
+ {
301
+ "epoch": 23.0,
302
+ "eval_accuracy": 1.0,
303
+ "eval_loss": 0.01703532226383686,
304
+ "eval_runtime": 1.3122,
305
+ "eval_samples_per_second": 75.446,
306
+ "eval_steps_per_second": 3.048,
307
+ "step": 161
308
+ },
309
+ {
310
+ "epoch": 24.0,
311
+ "grad_norm": 0.986053466796875,
312
+ "learning_rate": 2.2222222222222223e-05,
313
+ "loss": 0.0675,
314
+ "step": 168
315
+ },
316
+ {
317
+ "epoch": 24.0,
318
+ "eval_accuracy": 1.0,
319
+ "eval_loss": 0.010675261728465557,
320
+ "eval_runtime": 1.3159,
321
+ "eval_samples_per_second": 75.231,
322
+ "eval_steps_per_second": 3.04,
323
+ "step": 168
324
+ },
325
+ {
326
+ "epoch": 25.0,
327
+ "eval_accuracy": 0.98989898989899,
328
+ "eval_loss": 0.019328022375702858,
329
+ "eval_runtime": 1.3069,
330
+ "eval_samples_per_second": 75.752,
331
+ "eval_steps_per_second": 3.061,
332
+ "step": 175
333
+ },
334
+ {
335
+ "epoch": 25.71,
336
+ "grad_norm": 2.7184066772460938,
337
+ "learning_rate": 1.984126984126984e-05,
338
+ "loss": 0.0659,
339
+ "step": 180
340
+ },
341
+ {
342
+ "epoch": 26.0,
343
+ "eval_accuracy": 0.98989898989899,
344
+ "eval_loss": 0.025486120954155922,
345
+ "eval_runtime": 1.3084,
346
+ "eval_samples_per_second": 75.667,
347
+ "eval_steps_per_second": 3.057,
348
+ "step": 182
349
+ },
350
+ {
351
+ "epoch": 27.0,
352
+ "eval_accuracy": 0.98989898989899,
353
+ "eval_loss": 0.020136240869760513,
354
+ "eval_runtime": 1.3096,
355
+ "eval_samples_per_second": 75.596,
356
+ "eval_steps_per_second": 3.054,
357
+ "step": 189
358
+ },
359
+ {
360
+ "epoch": 27.43,
361
+ "grad_norm": 2.4670774936676025,
362
+ "learning_rate": 1.746031746031746e-05,
363
+ "loss": 0.0758,
364
+ "step": 192
365
+ },
366
+ {
367
+ "epoch": 28.0,
368
+ "eval_accuracy": 0.98989898989899,
369
+ "eval_loss": 0.03251149132847786,
370
+ "eval_runtime": 1.2895,
371
+ "eval_samples_per_second": 76.775,
372
+ "eval_steps_per_second": 3.102,
373
+ "step": 196
374
+ },
375
+ {
376
+ "epoch": 29.0,
377
+ "eval_accuracy": 1.0,
378
+ "eval_loss": 0.011012612842023373,
379
+ "eval_runtime": 1.3105,
380
+ "eval_samples_per_second": 75.542,
381
+ "eval_steps_per_second": 3.052,
382
+ "step": 203
383
+ },
384
+ {
385
+ "epoch": 29.14,
386
+ "grad_norm": 1.395484209060669,
387
+ "learning_rate": 1.5079365079365079e-05,
388
+ "loss": 0.0589,
389
+ "step": 204
390
+ },
391
+ {
392
+ "epoch": 30.0,
393
+ "eval_accuracy": 1.0,
394
+ "eval_loss": 0.0159281175583601,
395
+ "eval_runtime": 1.3316,
396
+ "eval_samples_per_second": 74.346,
397
+ "eval_steps_per_second": 3.004,
398
+ "step": 210
399
+ },
400
+ {
401
+ "epoch": 30.86,
402
+ "grad_norm": 1.4966486692428589,
403
+ "learning_rate": 1.2698412698412699e-05,
404
+ "loss": 0.0521,
405
+ "step": 216
406
+ },
407
+ {
408
+ "epoch": 31.0,
409
+ "eval_accuracy": 0.98989898989899,
410
+ "eval_loss": 0.031857237219810486,
411
+ "eval_runtime": 1.2919,
412
+ "eval_samples_per_second": 76.629,
413
+ "eval_steps_per_second": 3.096,
414
+ "step": 217
415
+ },
416
+ {
417
+ "epoch": 32.0,
418
+ "eval_accuracy": 0.9797979797979798,
419
+ "eval_loss": 0.029438314959406853,
420
+ "eval_runtime": 1.3011,
421
+ "eval_samples_per_second": 76.091,
422
+ "eval_steps_per_second": 3.074,
423
+ "step": 224
424
+ },
425
+ {
426
+ "epoch": 32.57,
427
+ "grad_norm": 1.3882635831832886,
428
+ "learning_rate": 1.0317460317460318e-05,
429
+ "loss": 0.0618,
430
+ "step": 228
431
+ },
432
+ {
433
+ "epoch": 33.0,
434
+ "eval_accuracy": 0.9797979797979798,
435
+ "eval_loss": 0.039191748946905136,
436
+ "eval_runtime": 1.3063,
437
+ "eval_samples_per_second": 75.788,
438
+ "eval_steps_per_second": 3.062,
439
+ "step": 231
440
+ },
441
+ {
442
+ "epoch": 34.0,
443
+ "eval_accuracy": 0.98989898989899,
444
+ "eval_loss": 0.026867415755987167,
445
+ "eval_runtime": 1.2944,
446
+ "eval_samples_per_second": 76.481,
447
+ "eval_steps_per_second": 3.09,
448
+ "step": 238
449
+ },
450
+ {
451
+ "epoch": 34.29,
452
+ "grad_norm": 1.203762412071228,
453
+ "learning_rate": 7.936507936507936e-06,
454
+ "loss": 0.0422,
455
+ "step": 240
456
+ },
457
+ {
458
+ "epoch": 35.0,
459
+ "eval_accuracy": 0.98989898989899,
460
+ "eval_loss": 0.021003253757953644,
461
+ "eval_runtime": 1.3003,
462
+ "eval_samples_per_second": 76.137,
463
+ "eval_steps_per_second": 3.076,
464
+ "step": 245
465
+ },
466
+ {
467
+ "epoch": 36.0,
468
+ "grad_norm": 1.9063193798065186,
469
+ "learning_rate": 5.555555555555556e-06,
470
+ "loss": 0.0551,
471
+ "step": 252
472
+ },
473
+ {
474
+ "epoch": 36.0,
475
+ "eval_accuracy": 0.98989898989899,
476
+ "eval_loss": 0.01777764968574047,
477
+ "eval_runtime": 1.456,
478
+ "eval_samples_per_second": 67.993,
479
+ "eval_steps_per_second": 2.747,
480
+ "step": 252
481
+ },
482
+ {
483
+ "epoch": 37.0,
484
+ "eval_accuracy": 0.98989898989899,
485
+ "eval_loss": 0.01593274623155594,
486
+ "eval_runtime": 1.3158,
487
+ "eval_samples_per_second": 75.242,
488
+ "eval_steps_per_second": 3.04,
489
+ "step": 259
490
+ },
491
+ {
492
+ "epoch": 37.71,
493
+ "grad_norm": 1.4439316987991333,
494
+ "learning_rate": 3.1746031746031746e-06,
495
+ "loss": 0.0518,
496
+ "step": 264
497
+ },
498
+ {
499
+ "epoch": 38.0,
500
+ "eval_accuracy": 0.98989898989899,
501
+ "eval_loss": 0.012379774823784828,
502
+ "eval_runtime": 1.3454,
503
+ "eval_samples_per_second": 73.583,
504
+ "eval_steps_per_second": 2.973,
505
+ "step": 266
506
+ },
507
+ {
508
+ "epoch": 39.0,
509
+ "eval_accuracy": 1.0,
510
+ "eval_loss": 0.011175006628036499,
511
+ "eval_runtime": 1.4492,
512
+ "eval_samples_per_second": 68.315,
513
+ "eval_steps_per_second": 2.76,
514
+ "step": 273
515
+ },
516
+ {
517
+ "epoch": 39.43,
518
+ "grad_norm": 1.1414995193481445,
519
+ "learning_rate": 7.936507936507937e-07,
520
+ "loss": 0.0313,
521
+ "step": 276
522
+ },
523
+ {
524
+ "epoch": 40.0,
525
+ "eval_accuracy": 1.0,
526
+ "eval_loss": 0.01096320990473032,
527
+ "eval_runtime": 1.3095,
528
+ "eval_samples_per_second": 75.6,
529
+ "eval_steps_per_second": 3.055,
530
+ "step": 280
531
+ },
532
+ {
533
+ "epoch": 40.0,
534
+ "step": 280,
535
+ "total_flos": 2.7494650758139085e+18,
536
+ "train_loss": 0.2073148890797581,
537
+ "train_runtime": 1492.3317,
538
+ "train_samples_per_second": 23.775,
539
+ "train_steps_per_second": 0.188
540
+ }
541
+ ],
542
+ "logging_steps": 12,
543
+ "max_steps": 280,
544
+ "num_input_tokens_seen": 0,
545
+ "num_train_epochs": 40,
546
+ "save_steps": 500,
547
+ "total_flos": 2.7494650758139085e+18,
548
+ "train_batch_size": 32,
549
+ "trial_name": null,
550
+ "trial_params": null
551
+ }