Prot10 commited on
Commit
142d0be
1 Parent(s): 417616d

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. all_results.json +13 -0
  3. eval_results.json +8 -0
  4. train_results.json +8 -0
  5. trainer_state.json +460 -0
README.md CHANGED
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [facebook/convnextv2-base-1k-224](https://huggingface.co/facebook/convnextv2-base-1k-224) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.4531
21
- - Accuracy: 0.4336
22
 
23
  ## Model description
24
 
 
17
 
18
  This model is a fine-tuned version of [facebook/convnextv2-base-1k-224](https://huggingface.co/facebook/convnextv2-base-1k-224) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.4479
21
+ - Accuracy: 0.4382
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.91,
3
+ "eval_accuracy": 0.4382284382284382,
4
+ "eval_loss": 1.447946548461914,
5
+ "eval_runtime": 8.404,
6
+ "eval_samples_per_second": 51.047,
7
+ "eval_steps_per_second": 1.666,
8
+ "total_flos": 3.007291871298355e+18,
9
+ "train_loss": 1.1542485936482747,
10
+ "train_runtime": 2419.37,
11
+ "train_samples_per_second": 16.864,
12
+ "train_steps_per_second": 0.124
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.91,
3
+ "eval_accuracy": 0.4382284382284382,
4
+ "eval_loss": 1.447946548461914,
5
+ "eval_runtime": 8.404,
6
+ "eval_samples_per_second": 51.047,
7
+ "eval_steps_per_second": 1.666
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.91,
3
+ "total_flos": 3.007291871298355e+18,
4
+ "train_loss": 1.1542485936482747,
5
+ "train_runtime": 2419.37,
6
+ "train_samples_per_second": 16.864,
7
+ "train_steps_per_second": 0.124
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4382284382284382,
3
+ "best_model_checkpoint": "convnextv2-base-1k-224-for-pre_evaluation/checkpoint-268",
4
+ "epoch": 27.906976744186046,
5
+ "eval_steps": 500,
6
+ "global_step": 300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.93,
13
+ "learning_rate": 1.6666666666666667e-05,
14
+ "loss": 1.5952,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.93,
19
+ "eval_accuracy": 0.29603729603729606,
20
+ "eval_loss": 1.5510554313659668,
21
+ "eval_runtime": 8.4324,
22
+ "eval_samples_per_second": 50.875,
23
+ "eval_steps_per_second": 1.66,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 1.86,
28
+ "learning_rate": 3.3333333333333335e-05,
29
+ "loss": 1.5238,
30
+ "step": 20
31
+ },
32
+ {
33
+ "epoch": 1.95,
34
+ "eval_accuracy": 0.34265734265734266,
35
+ "eval_loss": 1.5091006755828857,
36
+ "eval_runtime": 8.9811,
37
+ "eval_samples_per_second": 47.767,
38
+ "eval_steps_per_second": 1.559,
39
+ "step": 21
40
+ },
41
+ {
42
+ "epoch": 2.79,
43
+ "learning_rate": 5e-05,
44
+ "loss": 1.4881,
45
+ "step": 30
46
+ },
47
+ {
48
+ "epoch": 2.98,
49
+ "eval_accuracy": 0.34498834498834496,
50
+ "eval_loss": 1.4853538274765015,
51
+ "eval_runtime": 8.6991,
52
+ "eval_samples_per_second": 49.315,
53
+ "eval_steps_per_second": 1.609,
54
+ "step": 32
55
+ },
56
+ {
57
+ "epoch": 3.72,
58
+ "learning_rate": 4.814814814814815e-05,
59
+ "loss": 1.4708,
60
+ "step": 40
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "eval_accuracy": 0.3473193473193473,
65
+ "eval_loss": 1.4616328477859497,
66
+ "eval_runtime": 8.3701,
67
+ "eval_samples_per_second": 51.254,
68
+ "eval_steps_per_second": 1.673,
69
+ "step": 43
70
+ },
71
+ {
72
+ "epoch": 4.65,
73
+ "learning_rate": 4.62962962962963e-05,
74
+ "loss": 1.4361,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 4.93,
79
+ "eval_accuracy": 0.34498834498834496,
80
+ "eval_loss": 1.4416619539260864,
81
+ "eval_runtime": 8.7032,
82
+ "eval_samples_per_second": 49.292,
83
+ "eval_steps_per_second": 1.609,
84
+ "step": 53
85
+ },
86
+ {
87
+ "epoch": 5.58,
88
+ "learning_rate": 4.4444444444444447e-05,
89
+ "loss": 1.3764,
90
+ "step": 60
91
+ },
92
+ {
93
+ "epoch": 5.95,
94
+ "eval_accuracy": 0.3752913752913753,
95
+ "eval_loss": 1.4134629964828491,
96
+ "eval_runtime": 8.9398,
97
+ "eval_samples_per_second": 47.988,
98
+ "eval_steps_per_second": 1.566,
99
+ "step": 64
100
+ },
101
+ {
102
+ "epoch": 6.51,
103
+ "learning_rate": 4.259259259259259e-05,
104
+ "loss": 1.3333,
105
+ "step": 70
106
+ },
107
+ {
108
+ "epoch": 6.98,
109
+ "eval_accuracy": 0.3986013986013986,
110
+ "eval_loss": 1.3822472095489502,
111
+ "eval_runtime": 8.7499,
112
+ "eval_samples_per_second": 49.029,
113
+ "eval_steps_per_second": 1.6,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 7.44,
118
+ "learning_rate": 4.074074074074074e-05,
119
+ "loss": 1.3296,
120
+ "step": 80
121
+ },
122
+ {
123
+ "epoch": 8.0,
124
+ "eval_accuracy": 0.36363636363636365,
125
+ "eval_loss": 1.4111592769622803,
126
+ "eval_runtime": 8.8496,
127
+ "eval_samples_per_second": 48.477,
128
+ "eval_steps_per_second": 1.582,
129
+ "step": 86
130
+ },
131
+ {
132
+ "epoch": 8.37,
133
+ "learning_rate": 3.888888888888889e-05,
134
+ "loss": 1.2798,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 8.93,
139
+ "eval_accuracy": 0.38927738927738925,
140
+ "eval_loss": 1.4038037061691284,
141
+ "eval_runtime": 8.5853,
142
+ "eval_samples_per_second": 49.969,
143
+ "eval_steps_per_second": 1.631,
144
+ "step": 96
145
+ },
146
+ {
147
+ "epoch": 9.3,
148
+ "learning_rate": 3.7037037037037037e-05,
149
+ "loss": 1.3129,
150
+ "step": 100
151
+ },
152
+ {
153
+ "epoch": 9.95,
154
+ "eval_accuracy": 0.3776223776223776,
155
+ "eval_loss": 1.424072265625,
156
+ "eval_runtime": 8.6578,
157
+ "eval_samples_per_second": 49.551,
158
+ "eval_steps_per_second": 1.617,
159
+ "step": 107
160
+ },
161
+ {
162
+ "epoch": 10.23,
163
+ "learning_rate": 3.518518518518519e-05,
164
+ "loss": 1.3014,
165
+ "step": 110
166
+ },
167
+ {
168
+ "epoch": 10.98,
169
+ "eval_accuracy": 0.38927738927738925,
170
+ "eval_loss": 1.356952428817749,
171
+ "eval_runtime": 8.5422,
172
+ "eval_samples_per_second": 50.221,
173
+ "eval_steps_per_second": 1.639,
174
+ "step": 118
175
+ },
176
+ {
177
+ "epoch": 11.16,
178
+ "learning_rate": 3.3333333333333335e-05,
179
+ "loss": 1.2332,
180
+ "step": 120
181
+ },
182
+ {
183
+ "epoch": 12.0,
184
+ "eval_accuracy": 0.38927738927738925,
185
+ "eval_loss": 1.4072706699371338,
186
+ "eval_runtime": 8.3607,
187
+ "eval_samples_per_second": 51.312,
188
+ "eval_steps_per_second": 1.675,
189
+ "step": 129
190
+ },
191
+ {
192
+ "epoch": 12.09,
193
+ "learning_rate": 3.148148148148148e-05,
194
+ "loss": 1.212,
195
+ "step": 130
196
+ },
197
+ {
198
+ "epoch": 12.93,
199
+ "eval_accuracy": 0.40326340326340326,
200
+ "eval_loss": 1.376956820487976,
201
+ "eval_runtime": 8.3675,
202
+ "eval_samples_per_second": 51.27,
203
+ "eval_steps_per_second": 1.673,
204
+ "step": 139
205
+ },
206
+ {
207
+ "epoch": 13.02,
208
+ "learning_rate": 2.962962962962963e-05,
209
+ "loss": 1.1844,
210
+ "step": 140
211
+ },
212
+ {
213
+ "epoch": 13.95,
214
+ "learning_rate": 2.777777777777778e-05,
215
+ "loss": 1.1763,
216
+ "step": 150
217
+ },
218
+ {
219
+ "epoch": 13.95,
220
+ "eval_accuracy": 0.3962703962703963,
221
+ "eval_loss": 1.3891488313674927,
222
+ "eval_runtime": 8.5717,
223
+ "eval_samples_per_second": 50.049,
224
+ "eval_steps_per_second": 1.633,
225
+ "step": 150
226
+ },
227
+ {
228
+ "epoch": 14.88,
229
+ "learning_rate": 2.5925925925925925e-05,
230
+ "loss": 1.124,
231
+ "step": 160
232
+ },
233
+ {
234
+ "epoch": 14.98,
235
+ "eval_accuracy": 0.4125874125874126,
236
+ "eval_loss": 1.3915237188339233,
237
+ "eval_runtime": 8.5638,
238
+ "eval_samples_per_second": 50.095,
239
+ "eval_steps_per_second": 1.635,
240
+ "step": 161
241
+ },
242
+ {
243
+ "epoch": 15.81,
244
+ "learning_rate": 2.4074074074074074e-05,
245
+ "loss": 1.0963,
246
+ "step": 170
247
+ },
248
+ {
249
+ "epoch": 16.0,
250
+ "eval_accuracy": 0.4149184149184149,
251
+ "eval_loss": 1.4098657369613647,
252
+ "eval_runtime": 8.8116,
253
+ "eval_samples_per_second": 48.686,
254
+ "eval_steps_per_second": 1.589,
255
+ "step": 172
256
+ },
257
+ {
258
+ "epoch": 16.74,
259
+ "learning_rate": 2.2222222222222223e-05,
260
+ "loss": 1.0547,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 16.93,
265
+ "eval_accuracy": 0.40326340326340326,
266
+ "eval_loss": 1.4206278324127197,
267
+ "eval_runtime": 8.7717,
268
+ "eval_samples_per_second": 48.907,
269
+ "eval_steps_per_second": 1.596,
270
+ "step": 182
271
+ },
272
+ {
273
+ "epoch": 17.67,
274
+ "learning_rate": 2.037037037037037e-05,
275
+ "loss": 1.0631,
276
+ "step": 190
277
+ },
278
+ {
279
+ "epoch": 17.95,
280
+ "eval_accuracy": 0.4195804195804196,
281
+ "eval_loss": 1.4040827751159668,
282
+ "eval_runtime": 8.3983,
283
+ "eval_samples_per_second": 51.082,
284
+ "eval_steps_per_second": 1.667,
285
+ "step": 193
286
+ },
287
+ {
288
+ "epoch": 18.6,
289
+ "learning_rate": 1.8518518518518518e-05,
290
+ "loss": 0.9911,
291
+ "step": 200
292
+ },
293
+ {
294
+ "epoch": 18.98,
295
+ "eval_accuracy": 0.4149184149184149,
296
+ "eval_loss": 1.4271957874298096,
297
+ "eval_runtime": 8.2919,
298
+ "eval_samples_per_second": 51.737,
299
+ "eval_steps_per_second": 1.688,
300
+ "step": 204
301
+ },
302
+ {
303
+ "epoch": 19.53,
304
+ "learning_rate": 1.6666666666666667e-05,
305
+ "loss": 1.005,
306
+ "step": 210
307
+ },
308
+ {
309
+ "epoch": 20.0,
310
+ "eval_accuracy": 0.4219114219114219,
311
+ "eval_loss": 1.42105233669281,
312
+ "eval_runtime": 8.2769,
313
+ "eval_samples_per_second": 51.831,
314
+ "eval_steps_per_second": 1.691,
315
+ "step": 215
316
+ },
317
+ {
318
+ "epoch": 20.47,
319
+ "learning_rate": 1.4814814814814815e-05,
320
+ "loss": 0.9663,
321
+ "step": 220
322
+ },
323
+ {
324
+ "epoch": 20.93,
325
+ "eval_accuracy": 0.40093240093240096,
326
+ "eval_loss": 1.466171145439148,
327
+ "eval_runtime": 9.4718,
328
+ "eval_samples_per_second": 45.292,
329
+ "eval_steps_per_second": 1.478,
330
+ "step": 225
331
+ },
332
+ {
333
+ "epoch": 21.4,
334
+ "learning_rate": 1.2962962962962962e-05,
335
+ "loss": 0.9533,
336
+ "step": 230
337
+ },
338
+ {
339
+ "epoch": 21.95,
340
+ "eval_accuracy": 0.43356643356643354,
341
+ "eval_loss": 1.428614616394043,
342
+ "eval_runtime": 8.343,
343
+ "eval_samples_per_second": 51.42,
344
+ "eval_steps_per_second": 1.678,
345
+ "step": 236
346
+ },
347
+ {
348
+ "epoch": 22.33,
349
+ "learning_rate": 1.1111111111111112e-05,
350
+ "loss": 0.9506,
351
+ "step": 240
352
+ },
353
+ {
354
+ "epoch": 22.98,
355
+ "eval_accuracy": 0.43123543123543123,
356
+ "eval_loss": 1.413465976715088,
357
+ "eval_runtime": 8.7694,
358
+ "eval_samples_per_second": 48.92,
359
+ "eval_steps_per_second": 1.596,
360
+ "step": 247
361
+ },
362
+ {
363
+ "epoch": 23.26,
364
+ "learning_rate": 9.259259259259259e-06,
365
+ "loss": 0.8973,
366
+ "step": 250
367
+ },
368
+ {
369
+ "epoch": 24.0,
370
+ "eval_accuracy": 0.42657342657342656,
371
+ "eval_loss": 1.442847728729248,
372
+ "eval_runtime": 8.7464,
373
+ "eval_samples_per_second": 49.049,
374
+ "eval_steps_per_second": 1.601,
375
+ "step": 258
376
+ },
377
+ {
378
+ "epoch": 24.19,
379
+ "learning_rate": 7.4074074074074075e-06,
380
+ "loss": 0.8807,
381
+ "step": 260
382
+ },
383
+ {
384
+ "epoch": 24.93,
385
+ "eval_accuracy": 0.4382284382284382,
386
+ "eval_loss": 1.447946548461914,
387
+ "eval_runtime": 8.3339,
388
+ "eval_samples_per_second": 51.476,
389
+ "eval_steps_per_second": 1.68,
390
+ "step": 268
391
+ },
392
+ {
393
+ "epoch": 25.12,
394
+ "learning_rate": 5.555555555555556e-06,
395
+ "loss": 0.8731,
396
+ "step": 270
397
+ },
398
+ {
399
+ "epoch": 25.95,
400
+ "eval_accuracy": 0.4289044289044289,
401
+ "eval_loss": 1.4429428577423096,
402
+ "eval_runtime": 8.7998,
403
+ "eval_samples_per_second": 48.751,
404
+ "eval_steps_per_second": 1.591,
405
+ "step": 279
406
+ },
407
+ {
408
+ "epoch": 26.05,
409
+ "learning_rate": 3.7037037037037037e-06,
410
+ "loss": 0.8366,
411
+ "step": 280
412
+ },
413
+ {
414
+ "epoch": 26.98,
415
+ "learning_rate": 1.8518518518518519e-06,
416
+ "loss": 0.8472,
417
+ "step": 290
418
+ },
419
+ {
420
+ "epoch": 26.98,
421
+ "eval_accuracy": 0.43123543123543123,
422
+ "eval_loss": 1.4461231231689453,
423
+ "eval_runtime": 8.7766,
424
+ "eval_samples_per_second": 48.88,
425
+ "eval_steps_per_second": 1.595,
426
+ "step": 290
427
+ },
428
+ {
429
+ "epoch": 27.91,
430
+ "learning_rate": 0.0,
431
+ "loss": 0.8348,
432
+ "step": 300
433
+ },
434
+ {
435
+ "epoch": 27.91,
436
+ "eval_accuracy": 0.43356643356643354,
437
+ "eval_loss": 1.453087568283081,
438
+ "eval_runtime": 8.3523,
439
+ "eval_samples_per_second": 51.363,
440
+ "eval_steps_per_second": 1.676,
441
+ "step": 300
442
+ },
443
+ {
444
+ "epoch": 27.91,
445
+ "step": 300,
446
+ "total_flos": 3.007291871298355e+18,
447
+ "train_loss": 1.1542485936482747,
448
+ "train_runtime": 2419.37,
449
+ "train_samples_per_second": 16.864,
450
+ "train_steps_per_second": 0.124
451
+ }
452
+ ],
453
+ "logging_steps": 10,
454
+ "max_steps": 300,
455
+ "num_train_epochs": 30,
456
+ "save_steps": 500,
457
+ "total_flos": 3.007291871298355e+18,
458
+ "trial_name": null,
459
+ "trial_params": null
460
+ }