davanstrien HF staff commited on
Commit
55577f4
·
1 Parent(s): c521604

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_f1": 0.9746835443037974,
4
+ "eval_loss": 0.09489229321479797,
5
+ "eval_runtime": 9.9057,
6
+ "eval_samples_per_second": 7.975,
7
+ "eval_steps_per_second": 0.101,
8
+ "train_loss": 0.17715629466942379,
9
+ "train_runtime": 2286.0002,
10
+ "train_samples_per_second": 7.804,
11
+ "train_steps_per_second": 0.122
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "eval_f1": 0.9746835443037974,
4
+ "eval_loss": 0.09489229321479797,
5
+ "eval_runtime": 9.9057,
6
+ "eval_samples_per_second": 7.975,
7
+ "eval_steps_per_second": 0.101
8
+ }
runs/Dec06_20-52-20_49793c51f922/events.out.tfevents.1670362580.49793c51f922.5072.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01eef5558cd0e98c6c43403e4fdfd2fb9145fb54600e368e5933be3484ec3ce
3
+ size 357
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 40.0,
3
+ "train_loss": 0.17715629466942379,
4
+ "train_runtime": 2286.0002,
5
+ "train_samples_per_second": 7.804,
6
+ "train_steps_per_second": 0.122
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,553 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.09489229321479797,
3
+ "best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-273",
4
+ "epoch": 40.0,
5
+ "global_step": 280,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_f1": 0.8607594936708861,
13
+ "eval_loss": 0.45760300755500793,
14
+ "eval_runtime": 9.802,
15
+ "eval_samples_per_second": 8.06,
16
+ "eval_steps_per_second": 0.102,
17
+ "step": 7
18
+ },
19
+ {
20
+ "epoch": 1.43,
21
+ "learning_rate": 1.928571428571429e-05,
22
+ "loss": 0.5021,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "eval_f1": 0.8607594936708861,
28
+ "eval_loss": 0.39525437355041504,
29
+ "eval_runtime": 9.734,
30
+ "eval_samples_per_second": 8.116,
31
+ "eval_steps_per_second": 0.103,
32
+ "step": 14
33
+ },
34
+ {
35
+ "epoch": 2.86,
36
+ "learning_rate": 1.8571428571428575e-05,
37
+ "loss": 0.3595,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 3.0,
42
+ "eval_f1": 0.8607594936708861,
43
+ "eval_loss": 0.38087165355682373,
44
+ "eval_runtime": 9.8033,
45
+ "eval_samples_per_second": 8.059,
46
+ "eval_steps_per_second": 0.102,
47
+ "step": 21
48
+ },
49
+ {
50
+ "epoch": 4.0,
51
+ "eval_f1": 0.8607594936708861,
52
+ "eval_loss": 0.32862257957458496,
53
+ "eval_runtime": 9.7791,
54
+ "eval_samples_per_second": 8.078,
55
+ "eval_steps_per_second": 0.102,
56
+ "step": 28
57
+ },
58
+ {
59
+ "epoch": 4.29,
60
+ "learning_rate": 1.785714285714286e-05,
61
+ "loss": 0.3009,
62
+ "step": 30
63
+ },
64
+ {
65
+ "epoch": 5.0,
66
+ "eval_f1": 0.8607594936708861,
67
+ "eval_loss": 0.29453742504119873,
68
+ "eval_runtime": 9.9491,
69
+ "eval_samples_per_second": 7.94,
70
+ "eval_steps_per_second": 0.101,
71
+ "step": 35
72
+ },
73
+ {
74
+ "epoch": 5.71,
75
+ "learning_rate": 1.7142857142857142e-05,
76
+ "loss": 0.2843,
77
+ "step": 40
78
+ },
79
+ {
80
+ "epoch": 6.0,
81
+ "eval_f1": 0.8607594936708861,
82
+ "eval_loss": 0.35278087854385376,
83
+ "eval_runtime": 9.8405,
84
+ "eval_samples_per_second": 8.028,
85
+ "eval_steps_per_second": 0.102,
86
+ "step": 42
87
+ },
88
+ {
89
+ "epoch": 7.0,
90
+ "eval_f1": 0.8607594936708861,
91
+ "eval_loss": 0.23452825844287872,
92
+ "eval_runtime": 9.7839,
93
+ "eval_samples_per_second": 8.074,
94
+ "eval_steps_per_second": 0.102,
95
+ "step": 49
96
+ },
97
+ {
98
+ "epoch": 7.14,
99
+ "learning_rate": 1.642857142857143e-05,
100
+ "loss": 0.266,
101
+ "step": 50
102
+ },
103
+ {
104
+ "epoch": 8.0,
105
+ "eval_f1": 0.8607594936708861,
106
+ "eval_loss": 0.24986670911312103,
107
+ "eval_runtime": 9.8907,
108
+ "eval_samples_per_second": 7.987,
109
+ "eval_steps_per_second": 0.101,
110
+ "step": 56
111
+ },
112
+ {
113
+ "epoch": 8.57,
114
+ "learning_rate": 1.5714285714285715e-05,
115
+ "loss": 0.222,
116
+ "step": 60
117
+ },
118
+ {
119
+ "epoch": 9.0,
120
+ "eval_f1": 0.8607594936708861,
121
+ "eval_loss": 0.25441065430641174,
122
+ "eval_runtime": 9.8794,
123
+ "eval_samples_per_second": 7.996,
124
+ "eval_steps_per_second": 0.101,
125
+ "step": 63
126
+ },
127
+ {
128
+ "epoch": 10.0,
129
+ "learning_rate": 1.5000000000000002e-05,
130
+ "loss": 0.2018,
131
+ "step": 70
132
+ },
133
+ {
134
+ "epoch": 10.0,
135
+ "eval_f1": 0.8607594936708861,
136
+ "eval_loss": 0.19540712237358093,
137
+ "eval_runtime": 10.3358,
138
+ "eval_samples_per_second": 7.643,
139
+ "eval_steps_per_second": 0.097,
140
+ "step": 70
141
+ },
142
+ {
143
+ "epoch": 11.0,
144
+ "eval_f1": 0.8607594936708861,
145
+ "eval_loss": 0.23508581519126892,
146
+ "eval_runtime": 9.7761,
147
+ "eval_samples_per_second": 8.081,
148
+ "eval_steps_per_second": 0.102,
149
+ "step": 77
150
+ },
151
+ {
152
+ "epoch": 11.43,
153
+ "learning_rate": 1.4285714285714287e-05,
154
+ "loss": 0.1948,
155
+ "step": 80
156
+ },
157
+ {
158
+ "epoch": 12.0,
159
+ "eval_f1": 0.8607594936708861,
160
+ "eval_loss": 0.17053687572479248,
161
+ "eval_runtime": 9.7433,
162
+ "eval_samples_per_second": 8.108,
163
+ "eval_steps_per_second": 0.103,
164
+ "step": 84
165
+ },
166
+ {
167
+ "epoch": 12.86,
168
+ "learning_rate": 1.3571428571428574e-05,
169
+ "loss": 0.2053,
170
+ "step": 90
171
+ },
172
+ {
173
+ "epoch": 13.0,
174
+ "eval_f1": 0.8734177215189873,
175
+ "eval_loss": 0.16247014701366425,
176
+ "eval_runtime": 9.7199,
177
+ "eval_samples_per_second": 8.128,
178
+ "eval_steps_per_second": 0.103,
179
+ "step": 91
180
+ },
181
+ {
182
+ "epoch": 14.0,
183
+ "eval_f1": 0.9367088607594937,
184
+ "eval_loss": 0.17189449071884155,
185
+ "eval_runtime": 9.7734,
186
+ "eval_samples_per_second": 8.083,
187
+ "eval_steps_per_second": 0.102,
188
+ "step": 98
189
+ },
190
+ {
191
+ "epoch": 14.29,
192
+ "learning_rate": 1.2857142857142859e-05,
193
+ "loss": 0.1729,
194
+ "step": 100
195
+ },
196
+ {
197
+ "epoch": 15.0,
198
+ "eval_f1": 0.9367088607594937,
199
+ "eval_loss": 0.1488722860813141,
200
+ "eval_runtime": 9.7192,
201
+ "eval_samples_per_second": 8.128,
202
+ "eval_steps_per_second": 0.103,
203
+ "step": 105
204
+ },
205
+ {
206
+ "epoch": 15.71,
207
+ "learning_rate": 1.2142857142857142e-05,
208
+ "loss": 0.1535,
209
+ "step": 110
210
+ },
211
+ {
212
+ "epoch": 16.0,
213
+ "eval_f1": 0.9493670886075949,
214
+ "eval_loss": 0.14498455822467804,
215
+ "eval_runtime": 9.7322,
216
+ "eval_samples_per_second": 8.117,
217
+ "eval_steps_per_second": 0.103,
218
+ "step": 112
219
+ },
220
+ {
221
+ "epoch": 17.0,
222
+ "eval_f1": 0.9493670886075949,
223
+ "eval_loss": 0.1749649941921234,
224
+ "eval_runtime": 9.718,
225
+ "eval_samples_per_second": 8.129,
226
+ "eval_steps_per_second": 0.103,
227
+ "step": 119
228
+ },
229
+ {
230
+ "epoch": 17.14,
231
+ "learning_rate": 1.1428571428571429e-05,
232
+ "loss": 0.1492,
233
+ "step": 120
234
+ },
235
+ {
236
+ "epoch": 18.0,
237
+ "eval_f1": 0.9493670886075949,
238
+ "eval_loss": 0.15143541991710663,
239
+ "eval_runtime": 9.7273,
240
+ "eval_samples_per_second": 8.121,
241
+ "eval_steps_per_second": 0.103,
242
+ "step": 126
243
+ },
244
+ {
245
+ "epoch": 18.57,
246
+ "learning_rate": 1.0714285714285714e-05,
247
+ "loss": 0.1349,
248
+ "step": 130
249
+ },
250
+ {
251
+ "epoch": 19.0,
252
+ "eval_f1": 0.9620253164556962,
253
+ "eval_loss": 0.1304464191198349,
254
+ "eval_runtime": 9.7442,
255
+ "eval_samples_per_second": 8.107,
256
+ "eval_steps_per_second": 0.103,
257
+ "step": 133
258
+ },
259
+ {
260
+ "epoch": 20.0,
261
+ "learning_rate": 1e-05,
262
+ "loss": 0.1538,
263
+ "step": 140
264
+ },
265
+ {
266
+ "epoch": 20.0,
267
+ "eval_f1": 0.9620253164556962,
268
+ "eval_loss": 0.12909165024757385,
269
+ "eval_runtime": 9.7157,
270
+ "eval_samples_per_second": 8.131,
271
+ "eval_steps_per_second": 0.103,
272
+ "step": 140
273
+ },
274
+ {
275
+ "epoch": 21.0,
276
+ "eval_f1": 0.9620253164556962,
277
+ "eval_loss": 0.13061794638633728,
278
+ "eval_runtime": 9.7515,
279
+ "eval_samples_per_second": 8.101,
280
+ "eval_steps_per_second": 0.103,
281
+ "step": 147
282
+ },
283
+ {
284
+ "epoch": 21.43,
285
+ "learning_rate": 9.285714285714288e-06,
286
+ "loss": 0.1357,
287
+ "step": 150
288
+ },
289
+ {
290
+ "epoch": 22.0,
291
+ "eval_f1": 0.9620253164556962,
292
+ "eval_loss": 0.12830054759979248,
293
+ "eval_runtime": 9.7367,
294
+ "eval_samples_per_second": 8.114,
295
+ "eval_steps_per_second": 0.103,
296
+ "step": 154
297
+ },
298
+ {
299
+ "epoch": 22.86,
300
+ "learning_rate": 8.571428571428571e-06,
301
+ "loss": 0.147,
302
+ "step": 160
303
+ },
304
+ {
305
+ "epoch": 23.0,
306
+ "eval_f1": 0.9493670886075949,
307
+ "eval_loss": 0.12891362607479095,
308
+ "eval_runtime": 9.7565,
309
+ "eval_samples_per_second": 8.097,
310
+ "eval_steps_per_second": 0.102,
311
+ "step": 161
312
+ },
313
+ {
314
+ "epoch": 24.0,
315
+ "eval_f1": 0.9746835443037974,
316
+ "eval_loss": 0.1338558942079544,
317
+ "eval_runtime": 9.7379,
318
+ "eval_samples_per_second": 8.113,
319
+ "eval_steps_per_second": 0.103,
320
+ "step": 168
321
+ },
322
+ {
323
+ "epoch": 24.29,
324
+ "learning_rate": 7.857142857142858e-06,
325
+ "loss": 0.1388,
326
+ "step": 170
327
+ },
328
+ {
329
+ "epoch": 25.0,
330
+ "eval_f1": 0.9493670886075949,
331
+ "eval_loss": 0.12436553090810776,
332
+ "eval_runtime": 9.7614,
333
+ "eval_samples_per_second": 8.093,
334
+ "eval_steps_per_second": 0.102,
335
+ "step": 175
336
+ },
337
+ {
338
+ "epoch": 25.71,
339
+ "learning_rate": 7.1428571428571436e-06,
340
+ "loss": 0.1192,
341
+ "step": 180
342
+ },
343
+ {
344
+ "epoch": 26.0,
345
+ "eval_f1": 0.9746835443037974,
346
+ "eval_loss": 0.11165592074394226,
347
+ "eval_runtime": 9.7177,
348
+ "eval_samples_per_second": 8.129,
349
+ "eval_steps_per_second": 0.103,
350
+ "step": 182
351
+ },
352
+ {
353
+ "epoch": 27.0,
354
+ "eval_f1": 0.9873417721518988,
355
+ "eval_loss": 0.11046960204839706,
356
+ "eval_runtime": 9.7417,
357
+ "eval_samples_per_second": 8.109,
358
+ "eval_steps_per_second": 0.103,
359
+ "step": 189
360
+ },
361
+ {
362
+ "epoch": 27.14,
363
+ "learning_rate": 6.4285714285714295e-06,
364
+ "loss": 0.112,
365
+ "step": 190
366
+ },
367
+ {
368
+ "epoch": 28.0,
369
+ "eval_f1": 0.9746835443037974,
370
+ "eval_loss": 0.1078834980726242,
371
+ "eval_runtime": 9.7562,
372
+ "eval_samples_per_second": 8.097,
373
+ "eval_steps_per_second": 0.102,
374
+ "step": 196
375
+ },
376
+ {
377
+ "epoch": 28.57,
378
+ "learning_rate": 5.7142857142857145e-06,
379
+ "loss": 0.1215,
380
+ "step": 200
381
+ },
382
+ {
383
+ "epoch": 29.0,
384
+ "eval_f1": 0.9620253164556962,
385
+ "eval_loss": 0.11511888355016708,
386
+ "eval_runtime": 9.7361,
387
+ "eval_samples_per_second": 8.114,
388
+ "eval_steps_per_second": 0.103,
389
+ "step": 203
390
+ },
391
+ {
392
+ "epoch": 30.0,
393
+ "learning_rate": 5e-06,
394
+ "loss": 0.1139,
395
+ "step": 210
396
+ },
397
+ {
398
+ "epoch": 30.0,
399
+ "eval_f1": 0.9873417721518988,
400
+ "eval_loss": 0.10075395554304123,
401
+ "eval_runtime": 9.7351,
402
+ "eval_samples_per_second": 8.115,
403
+ "eval_steps_per_second": 0.103,
404
+ "step": 210
405
+ },
406
+ {
407
+ "epoch": 31.0,
408
+ "eval_f1": 0.9746835443037974,
409
+ "eval_loss": 0.10330603271722794,
410
+ "eval_runtime": 9.7699,
411
+ "eval_samples_per_second": 8.086,
412
+ "eval_steps_per_second": 0.102,
413
+ "step": 217
414
+ },
415
+ {
416
+ "epoch": 31.43,
417
+ "learning_rate": 4.2857142857142855e-06,
418
+ "loss": 0.1164,
419
+ "step": 220
420
+ },
421
+ {
422
+ "epoch": 32.0,
423
+ "eval_f1": 0.9873417721518988,
424
+ "eval_loss": 0.0984945222735405,
425
+ "eval_runtime": 9.7232,
426
+ "eval_samples_per_second": 8.125,
427
+ "eval_steps_per_second": 0.103,
428
+ "step": 224
429
+ },
430
+ {
431
+ "epoch": 32.86,
432
+ "learning_rate": 3.5714285714285718e-06,
433
+ "loss": 0.1192,
434
+ "step": 230
435
+ },
436
+ {
437
+ "epoch": 33.0,
438
+ "eval_f1": 0.9873417721518988,
439
+ "eval_loss": 0.09554588049650192,
440
+ "eval_runtime": 9.7164,
441
+ "eval_samples_per_second": 8.131,
442
+ "eval_steps_per_second": 0.103,
443
+ "step": 231
444
+ },
445
+ {
446
+ "epoch": 34.0,
447
+ "eval_f1": 0.9620253164556962,
448
+ "eval_loss": 0.10772588849067688,
449
+ "eval_runtime": 9.7297,
450
+ "eval_samples_per_second": 8.119,
451
+ "eval_steps_per_second": 0.103,
452
+ "step": 238
453
+ },
454
+ {
455
+ "epoch": 34.29,
456
+ "learning_rate": 2.8571428571428573e-06,
457
+ "loss": 0.1132,
458
+ "step": 240
459
+ },
460
+ {
461
+ "epoch": 35.0,
462
+ "eval_f1": 0.9620253164556962,
463
+ "eval_loss": 0.11074268072843552,
464
+ "eval_runtime": 9.7358,
465
+ "eval_samples_per_second": 8.114,
466
+ "eval_steps_per_second": 0.103,
467
+ "step": 245
468
+ },
469
+ {
470
+ "epoch": 35.71,
471
+ "learning_rate": 2.1428571428571427e-06,
472
+ "loss": 0.1021,
473
+ "step": 250
474
+ },
475
+ {
476
+ "epoch": 36.0,
477
+ "eval_f1": 0.9873417721518988,
478
+ "eval_loss": 0.09575933963060379,
479
+ "eval_runtime": 9.7306,
480
+ "eval_samples_per_second": 8.119,
481
+ "eval_steps_per_second": 0.103,
482
+ "step": 252
483
+ },
484
+ {
485
+ "epoch": 37.0,
486
+ "eval_f1": 0.9873417721518988,
487
+ "eval_loss": 0.09573517739772797,
488
+ "eval_runtime": 9.7256,
489
+ "eval_samples_per_second": 8.123,
490
+ "eval_steps_per_second": 0.103,
491
+ "step": 259
492
+ },
493
+ {
494
+ "epoch": 37.14,
495
+ "learning_rate": 1.4285714285714286e-06,
496
+ "loss": 0.0945,
497
+ "step": 260
498
+ },
499
+ {
500
+ "epoch": 38.0,
501
+ "eval_f1": 0.9746835443037974,
502
+ "eval_loss": 0.09509044885635376,
503
+ "eval_runtime": 9.7255,
504
+ "eval_samples_per_second": 8.123,
505
+ "eval_steps_per_second": 0.103,
506
+ "step": 266
507
+ },
508
+ {
509
+ "epoch": 38.57,
510
+ "learning_rate": 7.142857142857143e-07,
511
+ "loss": 0.1244,
512
+ "step": 270
513
+ },
514
+ {
515
+ "epoch": 39.0,
516
+ "eval_f1": 0.9746835443037974,
517
+ "eval_loss": 0.09489229321479797,
518
+ "eval_runtime": 9.7321,
519
+ "eval_samples_per_second": 8.117,
520
+ "eval_steps_per_second": 0.103,
521
+ "step": 273
522
+ },
523
+ {
524
+ "epoch": 40.0,
525
+ "learning_rate": 0.0,
526
+ "loss": 0.1012,
527
+ "step": 280
528
+ },
529
+ {
530
+ "epoch": 40.0,
531
+ "eval_f1": 0.9873417721518988,
532
+ "eval_loss": 0.09554900228977203,
533
+ "eval_runtime": 9.7342,
534
+ "eval_samples_per_second": 8.116,
535
+ "eval_steps_per_second": 0.103,
536
+ "step": 280
537
+ },
538
+ {
539
+ "epoch": 40.0,
540
+ "step": 280,
541
+ "total_flos": 1.4091487038849024e+18,
542
+ "train_loss": 0.17715629466942379,
543
+ "train_runtime": 2286.0002,
544
+ "train_samples_per_second": 7.804,
545
+ "train_steps_per_second": 0.122
546
+ }
547
+ ],
548
+ "max_steps": 280,
549
+ "num_train_epochs": 40,
550
+ "total_flos": 1.4091487038849024e+18,
551
+ "trial_name": null,
552
+ "trial_params": null
553
+ }