Augusto777 commited on
Commit
cf7506a
1 Parent(s): 0605b70

End of training

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.6736
20
  - Accuracy: 0.7523
21
 
22
  ## Model description
 
16
 
17
  This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.7007
20
  - Accuracy: 0.7523
21
 
22
  ## Model description
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 36.13,
3
+ "eval_accuracy": 0.7522935779816514,
4
+ "eval_loss": 0.7007379531860352,
5
+ "eval_runtime": 0.4794,
6
+ "eval_samples_per_second": 227.386,
7
+ "eval_steps_per_second": 8.344,
8
+ "total_flos": 9.686412043576934e+16,
9
+ "train_loss": 0.878055340903146,
10
+ "train_runtime": 251.6613,
11
+ "train_samples_per_second": 155.447,
12
+ "train_steps_per_second": 1.113
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 36.13,
3
+ "eval_accuracy": 0.7522935779816514,
4
+ "eval_loss": 0.7007379531860352,
5
+ "eval_runtime": 0.4794,
6
+ "eval_samples_per_second": 227.386,
7
+ "eval_steps_per_second": 8.344
8
+ }
runs/Feb01_15-59-58_56aa26a8524f/events.out.tfevents.1706803463.56aa26a8524f.1432.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9791ea8015279dfc50ca507b8a6956f014088eb63f1d09d21ca529bd782e201e
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 36.13,
3
+ "total_flos": 9.686412043576934e+16,
4
+ "train_loss": 0.878055340903146,
5
+ "train_runtime": 251.6613,
6
+ "train_samples_per_second": 155.447,
7
+ "train_steps_per_second": 1.113
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7522935779816514,
3
+ "best_model_checkpoint": "swiftformer-xs-dmae-va-U-40/checkpoint-224",
4
+ "epoch": 36.12903225806452,
5
+ "eval_steps": 500,
6
+ "global_step": 280,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9,
13
+ "eval_accuracy": 0.29357798165137616,
14
+ "eval_loss": 1.3578405380249023,
15
+ "eval_runtime": 0.4941,
16
+ "eval_samples_per_second": 220.605,
17
+ "eval_steps_per_second": 8.096,
18
+ "step": 7
19
+ },
20
+ {
21
+ "epoch": 1.29,
22
+ "learning_rate": 1.785714285714286e-05,
23
+ "loss": 1.3702,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 1.94,
28
+ "eval_accuracy": 0.29357798165137616,
29
+ "eval_loss": 1.3703261613845825,
30
+ "eval_runtime": 0.5833,
31
+ "eval_samples_per_second": 186.883,
32
+ "eval_steps_per_second": 6.858,
33
+ "step": 15
34
+ },
35
+ {
36
+ "epoch": 2.58,
37
+ "learning_rate": 3.571428571428572e-05,
38
+ "loss": 1.3497,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 2.97,
43
+ "eval_accuracy": 0.3394495412844037,
44
+ "eval_loss": 1.3361120223999023,
45
+ "eval_runtime": 0.4226,
46
+ "eval_samples_per_second": 257.899,
47
+ "eval_steps_per_second": 9.464,
48
+ "step": 23
49
+ },
50
+ {
51
+ "epoch": 3.87,
52
+ "learning_rate": 4.960317460317461e-05,
53
+ "loss": 1.3004,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 4.0,
58
+ "eval_accuracy": 0.3669724770642202,
59
+ "eval_loss": 1.2852015495300293,
60
+ "eval_runtime": 0.4154,
61
+ "eval_samples_per_second": 262.426,
62
+ "eval_steps_per_second": 9.63,
63
+ "step": 31
64
+ },
65
+ {
66
+ "epoch": 4.9,
67
+ "eval_accuracy": 0.43119266055045874,
68
+ "eval_loss": 1.231681227684021,
69
+ "eval_runtime": 0.4157,
70
+ "eval_samples_per_second": 262.204,
71
+ "eval_steps_per_second": 9.622,
72
+ "step": 38
73
+ },
74
+ {
75
+ "epoch": 5.16,
76
+ "learning_rate": 4.761904761904762e-05,
77
+ "loss": 1.2248,
78
+ "step": 40
79
+ },
80
+ {
81
+ "epoch": 5.94,
82
+ "eval_accuracy": 0.45871559633027525,
83
+ "eval_loss": 1.1785550117492676,
84
+ "eval_runtime": 0.4211,
85
+ "eval_samples_per_second": 258.867,
86
+ "eval_steps_per_second": 9.5,
87
+ "step": 46
88
+ },
89
+ {
90
+ "epoch": 6.45,
91
+ "learning_rate": 4.563492063492064e-05,
92
+ "loss": 1.1485,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 6.97,
97
+ "eval_accuracy": 0.5045871559633027,
98
+ "eval_loss": 1.123984694480896,
99
+ "eval_runtime": 0.4143,
100
+ "eval_samples_per_second": 263.124,
101
+ "eval_steps_per_second": 9.656,
102
+ "step": 54
103
+ },
104
+ {
105
+ "epoch": 7.74,
106
+ "learning_rate": 4.3650793650793655e-05,
107
+ "loss": 1.0759,
108
+ "step": 60
109
+ },
110
+ {
111
+ "epoch": 8.0,
112
+ "eval_accuracy": 0.5504587155963303,
113
+ "eval_loss": 1.0727368593215942,
114
+ "eval_runtime": 0.4109,
115
+ "eval_samples_per_second": 265.292,
116
+ "eval_steps_per_second": 9.735,
117
+ "step": 62
118
+ },
119
+ {
120
+ "epoch": 8.9,
121
+ "eval_accuracy": 0.5596330275229358,
122
+ "eval_loss": 1.0403714179992676,
123
+ "eval_runtime": 0.5431,
124
+ "eval_samples_per_second": 200.69,
125
+ "eval_steps_per_second": 7.365,
126
+ "step": 69
127
+ },
128
+ {
129
+ "epoch": 9.03,
130
+ "learning_rate": 4.166666666666667e-05,
131
+ "loss": 1.0244,
132
+ "step": 70
133
+ },
134
+ {
135
+ "epoch": 9.94,
136
+ "eval_accuracy": 0.6238532110091743,
137
+ "eval_loss": 0.974201500415802,
138
+ "eval_runtime": 0.4104,
139
+ "eval_samples_per_second": 265.623,
140
+ "eval_steps_per_second": 9.748,
141
+ "step": 77
142
+ },
143
+ {
144
+ "epoch": 10.32,
145
+ "learning_rate": 3.968253968253968e-05,
146
+ "loss": 0.9782,
147
+ "step": 80
148
+ },
149
+ {
150
+ "epoch": 10.97,
151
+ "eval_accuracy": 0.6422018348623854,
152
+ "eval_loss": 0.9374117851257324,
153
+ "eval_runtime": 0.6713,
154
+ "eval_samples_per_second": 162.361,
155
+ "eval_steps_per_second": 5.958,
156
+ "step": 85
157
+ },
158
+ {
159
+ "epoch": 11.61,
160
+ "learning_rate": 3.76984126984127e-05,
161
+ "loss": 0.9359,
162
+ "step": 90
163
+ },
164
+ {
165
+ "epoch": 12.0,
166
+ "eval_accuracy": 0.6788990825688074,
167
+ "eval_loss": 0.9196614027023315,
168
+ "eval_runtime": 0.4175,
169
+ "eval_samples_per_second": 261.061,
170
+ "eval_steps_per_second": 9.58,
171
+ "step": 93
172
+ },
173
+ {
174
+ "epoch": 12.9,
175
+ "learning_rate": 3.571428571428572e-05,
176
+ "loss": 0.9051,
177
+ "step": 100
178
+ },
179
+ {
180
+ "epoch": 12.9,
181
+ "eval_accuracy": 0.6880733944954128,
182
+ "eval_loss": 0.8753331899642944,
183
+ "eval_runtime": 0.41,
184
+ "eval_samples_per_second": 265.867,
185
+ "eval_steps_per_second": 9.757,
186
+ "step": 100
187
+ },
188
+ {
189
+ "epoch": 13.94,
190
+ "eval_accuracy": 0.6972477064220184,
191
+ "eval_loss": 0.8679403066635132,
192
+ "eval_runtime": 0.4077,
193
+ "eval_samples_per_second": 267.322,
194
+ "eval_steps_per_second": 9.81,
195
+ "step": 108
196
+ },
197
+ {
198
+ "epoch": 14.19,
199
+ "learning_rate": 3.3730158730158734e-05,
200
+ "loss": 0.8652,
201
+ "step": 110
202
+ },
203
+ {
204
+ "epoch": 14.97,
205
+ "eval_accuracy": 0.7155963302752294,
206
+ "eval_loss": 0.8316473364830017,
207
+ "eval_runtime": 0.4256,
208
+ "eval_samples_per_second": 256.117,
209
+ "eval_steps_per_second": 9.399,
210
+ "step": 116
211
+ },
212
+ {
213
+ "epoch": 15.48,
214
+ "learning_rate": 3.1746031746031745e-05,
215
+ "loss": 0.8336,
216
+ "step": 120
217
+ },
218
+ {
219
+ "epoch": 16.0,
220
+ "eval_accuracy": 0.6972477064220184,
221
+ "eval_loss": 0.8222222924232483,
222
+ "eval_runtime": 0.4301,
223
+ "eval_samples_per_second": 253.434,
224
+ "eval_steps_per_second": 9.3,
225
+ "step": 124
226
+ },
227
+ {
228
+ "epoch": 16.77,
229
+ "learning_rate": 2.9761904761904762e-05,
230
+ "loss": 0.8177,
231
+ "step": 130
232
+ },
233
+ {
234
+ "epoch": 16.9,
235
+ "eval_accuracy": 0.6972477064220184,
236
+ "eval_loss": 0.8177938461303711,
237
+ "eval_runtime": 0.4098,
238
+ "eval_samples_per_second": 266.011,
239
+ "eval_steps_per_second": 9.762,
240
+ "step": 131
241
+ },
242
+ {
243
+ "epoch": 17.94,
244
+ "eval_accuracy": 0.7339449541284404,
245
+ "eval_loss": 0.7817714810371399,
246
+ "eval_runtime": 0.5687,
247
+ "eval_samples_per_second": 191.682,
248
+ "eval_steps_per_second": 7.034,
249
+ "step": 139
250
+ },
251
+ {
252
+ "epoch": 18.06,
253
+ "learning_rate": 2.777777777777778e-05,
254
+ "loss": 0.8077,
255
+ "step": 140
256
+ },
257
+ {
258
+ "epoch": 18.97,
259
+ "eval_accuracy": 0.7339449541284404,
260
+ "eval_loss": 0.7627159357070923,
261
+ "eval_runtime": 0.4134,
262
+ "eval_samples_per_second": 263.676,
263
+ "eval_steps_per_second": 9.676,
264
+ "step": 147
265
+ },
266
+ {
267
+ "epoch": 19.35,
268
+ "learning_rate": 2.5793650793650796e-05,
269
+ "loss": 0.7796,
270
+ "step": 150
271
+ },
272
+ {
273
+ "epoch": 20.0,
274
+ "eval_accuracy": 0.7339449541284404,
275
+ "eval_loss": 0.7478492259979248,
276
+ "eval_runtime": 0.4144,
277
+ "eval_samples_per_second": 263.034,
278
+ "eval_steps_per_second": 9.653,
279
+ "step": 155
280
+ },
281
+ {
282
+ "epoch": 20.65,
283
+ "learning_rate": 2.380952380952381e-05,
284
+ "loss": 0.7673,
285
+ "step": 160
286
+ },
287
+ {
288
+ "epoch": 20.9,
289
+ "eval_accuracy": 0.7431192660550459,
290
+ "eval_loss": 0.7414626479148865,
291
+ "eval_runtime": 0.408,
292
+ "eval_samples_per_second": 267.182,
293
+ "eval_steps_per_second": 9.805,
294
+ "step": 162
295
+ },
296
+ {
297
+ "epoch": 21.94,
298
+ "learning_rate": 2.1825396825396827e-05,
299
+ "loss": 0.7445,
300
+ "step": 170
301
+ },
302
+ {
303
+ "epoch": 21.94,
304
+ "eval_accuracy": 0.7155963302752294,
305
+ "eval_loss": 0.7413556575775146,
306
+ "eval_runtime": 0.4043,
307
+ "eval_samples_per_second": 269.576,
308
+ "eval_steps_per_second": 9.893,
309
+ "step": 170
310
+ },
311
+ {
312
+ "epoch": 22.97,
313
+ "eval_accuracy": 0.7155963302752294,
314
+ "eval_loss": 0.7375438809394836,
315
+ "eval_runtime": 0.4062,
316
+ "eval_samples_per_second": 268.371,
317
+ "eval_steps_per_second": 9.848,
318
+ "step": 178
319
+ },
320
+ {
321
+ "epoch": 23.23,
322
+ "learning_rate": 1.984126984126984e-05,
323
+ "loss": 0.7413,
324
+ "step": 180
325
+ },
326
+ {
327
+ "epoch": 24.0,
328
+ "eval_accuracy": 0.7155963302752294,
329
+ "eval_loss": 0.7353999018669128,
330
+ "eval_runtime": 0.4171,
331
+ "eval_samples_per_second": 261.311,
332
+ "eval_steps_per_second": 9.589,
333
+ "step": 186
334
+ },
335
+ {
336
+ "epoch": 24.52,
337
+ "learning_rate": 1.785714285714286e-05,
338
+ "loss": 0.739,
339
+ "step": 190
340
+ },
341
+ {
342
+ "epoch": 24.9,
343
+ "eval_accuracy": 0.7431192660550459,
344
+ "eval_loss": 0.71100252866745,
345
+ "eval_runtime": 0.4123,
346
+ "eval_samples_per_second": 264.367,
347
+ "eval_steps_per_second": 9.702,
348
+ "step": 193
349
+ },
350
+ {
351
+ "epoch": 25.81,
352
+ "learning_rate": 1.5873015873015872e-05,
353
+ "loss": 0.6992,
354
+ "step": 200
355
+ },
356
+ {
357
+ "epoch": 25.94,
358
+ "eval_accuracy": 0.7339449541284404,
359
+ "eval_loss": 0.7120506763458252,
360
+ "eval_runtime": 0.4281,
361
+ "eval_samples_per_second": 254.6,
362
+ "eval_steps_per_second": 9.343,
363
+ "step": 201
364
+ },
365
+ {
366
+ "epoch": 26.97,
367
+ "eval_accuracy": 0.7431192660550459,
368
+ "eval_loss": 0.7044178247451782,
369
+ "eval_runtime": 0.5666,
370
+ "eval_samples_per_second": 192.368,
371
+ "eval_steps_per_second": 7.059,
372
+ "step": 209
373
+ },
374
+ {
375
+ "epoch": 27.1,
376
+ "learning_rate": 1.388888888888889e-05,
377
+ "loss": 0.7111,
378
+ "step": 210
379
+ },
380
+ {
381
+ "epoch": 28.0,
382
+ "eval_accuracy": 0.7339449541284404,
383
+ "eval_loss": 0.6947001218795776,
384
+ "eval_runtime": 0.4105,
385
+ "eval_samples_per_second": 265.532,
386
+ "eval_steps_per_second": 9.744,
387
+ "step": 217
388
+ },
389
+ {
390
+ "epoch": 28.39,
391
+ "learning_rate": 1.1904761904761905e-05,
392
+ "loss": 0.7013,
393
+ "step": 220
394
+ },
395
+ {
396
+ "epoch": 28.9,
397
+ "eval_accuracy": 0.7522935779816514,
398
+ "eval_loss": 0.7007379531860352,
399
+ "eval_runtime": 0.4106,
400
+ "eval_samples_per_second": 265.441,
401
+ "eval_steps_per_second": 9.741,
402
+ "step": 224
403
+ },
404
+ {
405
+ "epoch": 29.68,
406
+ "learning_rate": 9.92063492063492e-06,
407
+ "loss": 0.712,
408
+ "step": 230
409
+ },
410
+ {
411
+ "epoch": 29.94,
412
+ "eval_accuracy": 0.7431192660550459,
413
+ "eval_loss": 0.6792589426040649,
414
+ "eval_runtime": 0.4139,
415
+ "eval_samples_per_second": 263.354,
416
+ "eval_steps_per_second": 9.664,
417
+ "step": 232
418
+ },
419
+ {
420
+ "epoch": 30.97,
421
+ "learning_rate": 7.936507936507936e-06,
422
+ "loss": 0.671,
423
+ "step": 240
424
+ },
425
+ {
426
+ "epoch": 30.97,
427
+ "eval_accuracy": 0.7431192660550459,
428
+ "eval_loss": 0.6808269619941711,
429
+ "eval_runtime": 0.408,
430
+ "eval_samples_per_second": 267.189,
431
+ "eval_steps_per_second": 9.805,
432
+ "step": 240
433
+ },
434
+ {
435
+ "epoch": 32.0,
436
+ "eval_accuracy": 0.7339449541284404,
437
+ "eval_loss": 0.6820599436759949,
438
+ "eval_runtime": 0.4244,
439
+ "eval_samples_per_second": 256.807,
440
+ "eval_steps_per_second": 9.424,
441
+ "step": 248
442
+ },
443
+ {
444
+ "epoch": 32.26,
445
+ "learning_rate": 5.9523809523809525e-06,
446
+ "loss": 0.6862,
447
+ "step": 250
448
+ },
449
+ {
450
+ "epoch": 32.9,
451
+ "eval_accuracy": 0.7339449541284404,
452
+ "eval_loss": 0.6705361008644104,
453
+ "eval_runtime": 0.4159,
454
+ "eval_samples_per_second": 262.074,
455
+ "eval_steps_per_second": 9.617,
456
+ "step": 255
457
+ },
458
+ {
459
+ "epoch": 33.55,
460
+ "learning_rate": 3.968253968253968e-06,
461
+ "loss": 0.6606,
462
+ "step": 260
463
+ },
464
+ {
465
+ "epoch": 33.94,
466
+ "eval_accuracy": 0.7431192660550459,
467
+ "eval_loss": 0.6783888339996338,
468
+ "eval_runtime": 0.4197,
469
+ "eval_samples_per_second": 259.739,
470
+ "eval_steps_per_second": 9.532,
471
+ "step": 263
472
+ },
473
+ {
474
+ "epoch": 34.84,
475
+ "learning_rate": 1.984126984126984e-06,
476
+ "loss": 0.6667,
477
+ "step": 270
478
+ },
479
+ {
480
+ "epoch": 34.97,
481
+ "eval_accuracy": 0.7522935779816514,
482
+ "eval_loss": 0.6764441132545471,
483
+ "eval_runtime": 0.4166,
484
+ "eval_samples_per_second": 261.642,
485
+ "eval_steps_per_second": 9.602,
486
+ "step": 271
487
+ },
488
+ {
489
+ "epoch": 36.0,
490
+ "eval_accuracy": 0.7522935779816514,
491
+ "eval_loss": 0.6716886758804321,
492
+ "eval_runtime": 0.5848,
493
+ "eval_samples_per_second": 186.395,
494
+ "eval_steps_per_second": 6.84,
495
+ "step": 279
496
+ },
497
+ {
498
+ "epoch": 36.13,
499
+ "learning_rate": 0.0,
500
+ "loss": 0.6687,
501
+ "step": 280
502
+ },
503
+ {
504
+ "epoch": 36.13,
505
+ "eval_accuracy": 0.7522935779816514,
506
+ "eval_loss": 0.6736045479774475,
507
+ "eval_runtime": 0.4181,
508
+ "eval_samples_per_second": 260.729,
509
+ "eval_steps_per_second": 9.568,
510
+ "step": 280
511
+ },
512
+ {
513
+ "epoch": 36.13,
514
+ "step": 280,
515
+ "total_flos": 9.686412043576934e+16,
516
+ "train_loss": 0.878055340903146,
517
+ "train_runtime": 251.6613,
518
+ "train_samples_per_second": 155.447,
519
+ "train_steps_per_second": 1.113
520
+ }
521
+ ],
522
+ "logging_steps": 10,
523
+ "max_steps": 280,
524
+ "num_train_epochs": 40,
525
+ "save_steps": 500,
526
+ "total_flos": 9.686412043576934e+16,
527
+ "trial_name": null,
528
+ "trial_params": null
529
+ }