ahmedALM1221 commited on
Commit
54d11d6
1 Parent(s): ff97657

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.007612039800733328,
5
+ "eval_runtime": 11.0324,
6
+ "eval_samples_per_second": 79.765,
7
+ "eval_steps_per_second": 4.985,
8
+ "total_flos": 2.4304114274567455e+19,
9
+ "train_loss": 0.19108782262513133,
10
+ "train_runtime": 4786.2205,
11
+ "train_samples_per_second": 22.051,
12
+ "train_steps_per_second": 0.345
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_loss": 0.007612039800733328,
5
+ "eval_runtime": 11.0324,
6
+ "eval_samples_per_second": 79.765,
7
+ "eval_steps_per_second": 4.985
8
+ }
runs/Jul04_20-03-48_2df985b450cf/events.out.tfevents.1688506871.2df985b450cf.27022.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfdedd241bc0b6659f02f6cb33a3d97ab1b5f4c35669b5e72c120f85b058edc0
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "total_flos": 2.4304114274567455e+19,
4
+ "train_loss": 0.19108782262513133,
5
+ "train_runtime": 4786.2205,
6
+ "train_samples_per_second": 22.051,
7
+ "train_steps_per_second": 0.345
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,787 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "swinv2-large-patch4-window12to16-192to256-22kto1k-ft-finetuned-eurosat-50/checkpoint-495",
4
+ "epoch": 30.0,
5
+ "global_step": 1650,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.36,
12
+ "learning_rate": 7.228915662650602e-06,
13
+ "loss": 1.9251,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.73,
18
+ "learning_rate": 1.4457831325301205e-05,
19
+ "loss": 1.5952,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_accuracy": 0.6693181818181818,
25
+ "eval_loss": 0.848971426486969,
26
+ "eval_runtime": 10.9926,
27
+ "eval_samples_per_second": 80.054,
28
+ "eval_steps_per_second": 5.003,
29
+ "step": 55
30
+ },
31
+ {
32
+ "epoch": 1.09,
33
+ "learning_rate": 2.168674698795181e-05,
34
+ "loss": 1.1596,
35
+ "step": 60
36
+ },
37
+ {
38
+ "epoch": 1.45,
39
+ "learning_rate": 2.891566265060241e-05,
40
+ "loss": 0.8796,
41
+ "step": 80
42
+ },
43
+ {
44
+ "epoch": 1.82,
45
+ "learning_rate": 2.967453733248245e-05,
46
+ "loss": 0.7582,
47
+ "step": 100
48
+ },
49
+ {
50
+ "epoch": 2.0,
51
+ "eval_accuracy": 0.8386363636363636,
52
+ "eval_loss": 0.45608994364738464,
53
+ "eval_runtime": 11.0458,
54
+ "eval_samples_per_second": 79.668,
55
+ "eval_steps_per_second": 4.979,
56
+ "step": 110
57
+ },
58
+ {
59
+ "epoch": 2.18,
60
+ "learning_rate": 2.929164007657945e-05,
61
+ "loss": 0.5899,
62
+ "step": 120
63
+ },
64
+ {
65
+ "epoch": 2.55,
66
+ "learning_rate": 2.8908742820676455e-05,
67
+ "loss": 0.4953,
68
+ "step": 140
69
+ },
70
+ {
71
+ "epoch": 2.91,
72
+ "learning_rate": 2.852584556477345e-05,
73
+ "loss": 0.4359,
74
+ "step": 160
75
+ },
76
+ {
77
+ "epoch": 3.0,
78
+ "eval_accuracy": 0.9227272727272727,
79
+ "eval_loss": 0.24076080322265625,
80
+ "eval_runtime": 10.9212,
81
+ "eval_samples_per_second": 80.577,
82
+ "eval_steps_per_second": 5.036,
83
+ "step": 165
84
+ },
85
+ {
86
+ "epoch": 3.27,
87
+ "learning_rate": 2.8142948308870455e-05,
88
+ "loss": 0.4,
89
+ "step": 180
90
+ },
91
+ {
92
+ "epoch": 3.64,
93
+ "learning_rate": 2.7760051052967455e-05,
94
+ "loss": 0.361,
95
+ "step": 200
96
+ },
97
+ {
98
+ "epoch": 4.0,
99
+ "learning_rate": 2.7377153797064455e-05,
100
+ "loss": 0.318,
101
+ "step": 220
102
+ },
103
+ {
104
+ "epoch": 4.0,
105
+ "eval_accuracy": 0.9568181818181818,
106
+ "eval_loss": 0.12942154705524445,
107
+ "eval_runtime": 11.0819,
108
+ "eval_samples_per_second": 79.409,
109
+ "eval_steps_per_second": 4.963,
110
+ "step": 220
111
+ },
112
+ {
113
+ "epoch": 4.36,
114
+ "learning_rate": 2.6994256541161455e-05,
115
+ "loss": 0.2481,
116
+ "step": 240
117
+ },
118
+ {
119
+ "epoch": 4.73,
120
+ "learning_rate": 2.6611359285258455e-05,
121
+ "loss": 0.2414,
122
+ "step": 260
123
+ },
124
+ {
125
+ "epoch": 5.0,
126
+ "eval_accuracy": 0.990909090909091,
127
+ "eval_loss": 0.03464468568563461,
128
+ "eval_runtime": 11.2055,
129
+ "eval_samples_per_second": 78.533,
130
+ "eval_steps_per_second": 4.908,
131
+ "step": 275
132
+ },
133
+ {
134
+ "epoch": 5.09,
135
+ "learning_rate": 2.622846202935546e-05,
136
+ "loss": 0.2313,
137
+ "step": 280
138
+ },
139
+ {
140
+ "epoch": 5.45,
141
+ "learning_rate": 2.5845564773452456e-05,
142
+ "loss": 0.2074,
143
+ "step": 300
144
+ },
145
+ {
146
+ "epoch": 5.82,
147
+ "learning_rate": 2.546266751754946e-05,
148
+ "loss": 0.1888,
149
+ "step": 320
150
+ },
151
+ {
152
+ "epoch": 6.0,
153
+ "eval_accuracy": 0.9863636363636363,
154
+ "eval_loss": 0.04186202958226204,
155
+ "eval_runtime": 11.0353,
156
+ "eval_samples_per_second": 79.744,
157
+ "eval_steps_per_second": 4.984,
158
+ "step": 330
159
+ },
160
+ {
161
+ "epoch": 6.18,
162
+ "learning_rate": 2.507977026164646e-05,
163
+ "loss": 0.2057,
164
+ "step": 340
165
+ },
166
+ {
167
+ "epoch": 6.55,
168
+ "learning_rate": 2.469687300574346e-05,
169
+ "loss": 0.1693,
170
+ "step": 360
171
+ },
172
+ {
173
+ "epoch": 6.91,
174
+ "learning_rate": 2.431397574984046e-05,
175
+ "loss": 0.1717,
176
+ "step": 380
177
+ },
178
+ {
179
+ "epoch": 7.0,
180
+ "eval_accuracy": 0.9943181818181818,
181
+ "eval_loss": 0.023813609033823013,
182
+ "eval_runtime": 11.0829,
183
+ "eval_samples_per_second": 79.402,
184
+ "eval_steps_per_second": 4.963,
185
+ "step": 385
186
+ },
187
+ {
188
+ "epoch": 7.27,
189
+ "learning_rate": 2.393107849393746e-05,
190
+ "loss": 0.1614,
191
+ "step": 400
192
+ },
193
+ {
194
+ "epoch": 7.64,
195
+ "learning_rate": 2.3548181238034463e-05,
196
+ "loss": 0.1525,
197
+ "step": 420
198
+ },
199
+ {
200
+ "epoch": 8.0,
201
+ "learning_rate": 2.316528398213146e-05,
202
+ "loss": 0.1785,
203
+ "step": 440
204
+ },
205
+ {
206
+ "epoch": 8.0,
207
+ "eval_accuracy": 0.9943181818181818,
208
+ "eval_loss": 0.023027343675494194,
209
+ "eval_runtime": 11.2031,
210
+ "eval_samples_per_second": 78.55,
211
+ "eval_steps_per_second": 4.909,
212
+ "step": 440
213
+ },
214
+ {
215
+ "epoch": 8.36,
216
+ "learning_rate": 2.2782386726228464e-05,
217
+ "loss": 0.1256,
218
+ "step": 460
219
+ },
220
+ {
221
+ "epoch": 8.73,
222
+ "learning_rate": 2.2399489470325464e-05,
223
+ "loss": 0.1654,
224
+ "step": 480
225
+ },
226
+ {
227
+ "epoch": 9.0,
228
+ "eval_accuracy": 1.0,
229
+ "eval_loss": 0.007612039800733328,
230
+ "eval_runtime": 11.1318,
231
+ "eval_samples_per_second": 79.053,
232
+ "eval_steps_per_second": 4.941,
233
+ "step": 495
234
+ },
235
+ {
236
+ "epoch": 9.09,
237
+ "learning_rate": 2.2016592214422464e-05,
238
+ "loss": 0.1437,
239
+ "step": 500
240
+ },
241
+ {
242
+ "epoch": 9.45,
243
+ "learning_rate": 2.1633694958519464e-05,
244
+ "loss": 0.1363,
245
+ "step": 520
246
+ },
247
+ {
248
+ "epoch": 9.82,
249
+ "learning_rate": 2.1250797702616464e-05,
250
+ "loss": 0.1322,
251
+ "step": 540
252
+ },
253
+ {
254
+ "epoch": 10.0,
255
+ "eval_accuracy": 1.0,
256
+ "eval_loss": 0.004579578526318073,
257
+ "eval_runtime": 11.0656,
258
+ "eval_samples_per_second": 79.526,
259
+ "eval_steps_per_second": 4.97,
260
+ "step": 550
261
+ },
262
+ {
263
+ "epoch": 10.18,
264
+ "learning_rate": 2.0867900446713468e-05,
265
+ "loss": 0.1242,
266
+ "step": 560
267
+ },
268
+ {
269
+ "epoch": 10.55,
270
+ "learning_rate": 2.0485003190810464e-05,
271
+ "loss": 0.1121,
272
+ "step": 580
273
+ },
274
+ {
275
+ "epoch": 10.91,
276
+ "learning_rate": 2.0102105934907468e-05,
277
+ "loss": 0.1123,
278
+ "step": 600
279
+ },
280
+ {
281
+ "epoch": 11.0,
282
+ "eval_accuracy": 1.0,
283
+ "eval_loss": 0.003508554305881262,
284
+ "eval_runtime": 10.9833,
285
+ "eval_samples_per_second": 80.122,
286
+ "eval_steps_per_second": 5.008,
287
+ "step": 605
288
+ },
289
+ {
290
+ "epoch": 11.27,
291
+ "learning_rate": 1.9719208679004468e-05,
292
+ "loss": 0.1192,
293
+ "step": 620
294
+ },
295
+ {
296
+ "epoch": 11.64,
297
+ "learning_rate": 1.9336311423101468e-05,
298
+ "loss": 0.1179,
299
+ "step": 640
300
+ },
301
+ {
302
+ "epoch": 12.0,
303
+ "learning_rate": 1.8953414167198468e-05,
304
+ "loss": 0.0953,
305
+ "step": 660
306
+ },
307
+ {
308
+ "epoch": 12.0,
309
+ "eval_accuracy": 1.0,
310
+ "eval_loss": 0.002522848779335618,
311
+ "eval_runtime": 11.0556,
312
+ "eval_samples_per_second": 79.598,
313
+ "eval_steps_per_second": 4.975,
314
+ "step": 660
315
+ },
316
+ {
317
+ "epoch": 12.36,
318
+ "learning_rate": 1.8570516911295472e-05,
319
+ "loss": 0.1302,
320
+ "step": 680
321
+ },
322
+ {
323
+ "epoch": 12.73,
324
+ "learning_rate": 1.818761965539247e-05,
325
+ "loss": 0.0864,
326
+ "step": 700
327
+ },
328
+ {
329
+ "epoch": 13.0,
330
+ "eval_accuracy": 1.0,
331
+ "eval_loss": 0.0033287114929407835,
332
+ "eval_runtime": 11.1101,
333
+ "eval_samples_per_second": 79.207,
334
+ "eval_steps_per_second": 4.95,
335
+ "step": 715
336
+ },
337
+ {
338
+ "epoch": 13.09,
339
+ "learning_rate": 1.780472239948947e-05,
340
+ "loss": 0.1334,
341
+ "step": 720
342
+ },
343
+ {
344
+ "epoch": 13.45,
345
+ "learning_rate": 1.7421825143586472e-05,
346
+ "loss": 0.0769,
347
+ "step": 740
348
+ },
349
+ {
350
+ "epoch": 13.82,
351
+ "learning_rate": 1.7038927887683472e-05,
352
+ "loss": 0.0984,
353
+ "step": 760
354
+ },
355
+ {
356
+ "epoch": 14.0,
357
+ "eval_accuracy": 0.9988636363636364,
358
+ "eval_loss": 0.003271339228376746,
359
+ "eval_runtime": 11.0383,
360
+ "eval_samples_per_second": 79.723,
361
+ "eval_steps_per_second": 4.983,
362
+ "step": 770
363
+ },
364
+ {
365
+ "epoch": 14.18,
366
+ "learning_rate": 1.6656030631780472e-05,
367
+ "loss": 0.0932,
368
+ "step": 780
369
+ },
370
+ {
371
+ "epoch": 14.55,
372
+ "learning_rate": 1.6273133375877472e-05,
373
+ "loss": 0.079,
374
+ "step": 800
375
+ },
376
+ {
377
+ "epoch": 14.91,
378
+ "learning_rate": 1.5890236119974476e-05,
379
+ "loss": 0.0952,
380
+ "step": 820
381
+ },
382
+ {
383
+ "epoch": 15.0,
384
+ "eval_accuracy": 1.0,
385
+ "eval_loss": 0.001456564525142312,
386
+ "eval_runtime": 10.9322,
387
+ "eval_samples_per_second": 80.496,
388
+ "eval_steps_per_second": 5.031,
389
+ "step": 825
390
+ },
391
+ {
392
+ "epoch": 15.27,
393
+ "learning_rate": 1.5507338864071473e-05,
394
+ "loss": 0.087,
395
+ "step": 840
396
+ },
397
+ {
398
+ "epoch": 15.64,
399
+ "learning_rate": 1.5124441608168476e-05,
400
+ "loss": 0.0879,
401
+ "step": 860
402
+ },
403
+ {
404
+ "epoch": 16.0,
405
+ "learning_rate": 1.4741544352265476e-05,
406
+ "loss": 0.0678,
407
+ "step": 880
408
+ },
409
+ {
410
+ "epoch": 16.0,
411
+ "eval_accuracy": 1.0,
412
+ "eval_loss": 0.0022302898578345776,
413
+ "eval_runtime": 11.0758,
414
+ "eval_samples_per_second": 79.453,
415
+ "eval_steps_per_second": 4.966,
416
+ "step": 880
417
+ },
418
+ {
419
+ "epoch": 16.36,
420
+ "learning_rate": 1.4358647096362477e-05,
421
+ "loss": 0.0687,
422
+ "step": 900
423
+ },
424
+ {
425
+ "epoch": 16.73,
426
+ "learning_rate": 1.3975749840459477e-05,
427
+ "loss": 0.0592,
428
+ "step": 920
429
+ },
430
+ {
431
+ "epoch": 17.0,
432
+ "eval_accuracy": 1.0,
433
+ "eval_loss": 0.0012722605606541038,
434
+ "eval_runtime": 10.904,
435
+ "eval_samples_per_second": 80.704,
436
+ "eval_steps_per_second": 5.044,
437
+ "step": 935
438
+ },
439
+ {
440
+ "epoch": 17.09,
441
+ "learning_rate": 1.3592852584556478e-05,
442
+ "loss": 0.0701,
443
+ "step": 940
444
+ },
445
+ {
446
+ "epoch": 17.45,
447
+ "learning_rate": 1.3209955328653479e-05,
448
+ "loss": 0.0952,
449
+ "step": 960
450
+ },
451
+ {
452
+ "epoch": 17.82,
453
+ "learning_rate": 1.2827058072750479e-05,
454
+ "loss": 0.0729,
455
+ "step": 980
456
+ },
457
+ {
458
+ "epoch": 18.0,
459
+ "eval_accuracy": 0.9988636363636364,
460
+ "eval_loss": 0.00370142818428576,
461
+ "eval_runtime": 11.1436,
462
+ "eval_samples_per_second": 78.969,
463
+ "eval_steps_per_second": 4.936,
464
+ "step": 990
465
+ },
466
+ {
467
+ "epoch": 18.18,
468
+ "learning_rate": 1.2444160816847479e-05,
469
+ "loss": 0.0841,
470
+ "step": 1000
471
+ },
472
+ {
473
+ "epoch": 18.55,
474
+ "learning_rate": 1.206126356094448e-05,
475
+ "loss": 0.0837,
476
+ "step": 1020
477
+ },
478
+ {
479
+ "epoch": 18.91,
480
+ "learning_rate": 1.167836630504148e-05,
481
+ "loss": 0.0672,
482
+ "step": 1040
483
+ },
484
+ {
485
+ "epoch": 19.0,
486
+ "eval_accuracy": 0.9988636363636364,
487
+ "eval_loss": 0.004141129087656736,
488
+ "eval_runtime": 11.0993,
489
+ "eval_samples_per_second": 79.284,
490
+ "eval_steps_per_second": 4.955,
491
+ "step": 1045
492
+ },
493
+ {
494
+ "epoch": 19.27,
495
+ "learning_rate": 1.1295469049138481e-05,
496
+ "loss": 0.0572,
497
+ "step": 1060
498
+ },
499
+ {
500
+ "epoch": 19.64,
501
+ "learning_rate": 1.0912571793235483e-05,
502
+ "loss": 0.0577,
503
+ "step": 1080
504
+ },
505
+ {
506
+ "epoch": 20.0,
507
+ "learning_rate": 1.0529674537332483e-05,
508
+ "loss": 0.0615,
509
+ "step": 1100
510
+ },
511
+ {
512
+ "epoch": 20.0,
513
+ "eval_accuracy": 1.0,
514
+ "eval_loss": 0.001032730215229094,
515
+ "eval_runtime": 11.0623,
516
+ "eval_samples_per_second": 79.55,
517
+ "eval_steps_per_second": 4.972,
518
+ "step": 1100
519
+ },
520
+ {
521
+ "epoch": 20.36,
522
+ "learning_rate": 1.0146777281429485e-05,
523
+ "loss": 0.0546,
524
+ "step": 1120
525
+ },
526
+ {
527
+ "epoch": 20.73,
528
+ "learning_rate": 9.763880025526483e-06,
529
+ "loss": 0.058,
530
+ "step": 1140
531
+ },
532
+ {
533
+ "epoch": 21.0,
534
+ "eval_accuracy": 1.0,
535
+ "eval_loss": 0.0008914543432183564,
536
+ "eval_runtime": 10.9202,
537
+ "eval_samples_per_second": 80.585,
538
+ "eval_steps_per_second": 5.037,
539
+ "step": 1155
540
+ },
541
+ {
542
+ "epoch": 21.09,
543
+ "learning_rate": 9.380982769623483e-06,
544
+ "loss": 0.0497,
545
+ "step": 1160
546
+ },
547
+ {
548
+ "epoch": 21.45,
549
+ "learning_rate": 8.998085513720485e-06,
550
+ "loss": 0.0538,
551
+ "step": 1180
552
+ },
553
+ {
554
+ "epoch": 21.82,
555
+ "learning_rate": 8.615188257817485e-06,
556
+ "loss": 0.0571,
557
+ "step": 1200
558
+ },
559
+ {
560
+ "epoch": 22.0,
561
+ "eval_accuracy": 0.9988636363636364,
562
+ "eval_loss": 0.002123360289260745,
563
+ "eval_runtime": 10.9798,
564
+ "eval_samples_per_second": 80.147,
565
+ "eval_steps_per_second": 5.009,
566
+ "step": 1210
567
+ },
568
+ {
569
+ "epoch": 22.18,
570
+ "learning_rate": 8.232291001914487e-06,
571
+ "loss": 0.0636,
572
+ "step": 1220
573
+ },
574
+ {
575
+ "epoch": 22.55,
576
+ "learning_rate": 7.849393746011487e-06,
577
+ "loss": 0.0365,
578
+ "step": 1240
579
+ },
580
+ {
581
+ "epoch": 22.91,
582
+ "learning_rate": 7.466496490108488e-06,
583
+ "loss": 0.0755,
584
+ "step": 1260
585
+ },
586
+ {
587
+ "epoch": 23.0,
588
+ "eval_accuracy": 0.9988636363636364,
589
+ "eval_loss": 0.002230195328593254,
590
+ "eval_runtime": 11.0651,
591
+ "eval_samples_per_second": 79.529,
592
+ "eval_steps_per_second": 4.971,
593
+ "step": 1265
594
+ },
595
+ {
596
+ "epoch": 23.27,
597
+ "learning_rate": 7.083599234205488e-06,
598
+ "loss": 0.0763,
599
+ "step": 1280
600
+ },
601
+ {
602
+ "epoch": 23.64,
603
+ "learning_rate": 6.700701978302489e-06,
604
+ "loss": 0.0547,
605
+ "step": 1300
606
+ },
607
+ {
608
+ "epoch": 24.0,
609
+ "learning_rate": 6.317804722399489e-06,
610
+ "loss": 0.0688,
611
+ "step": 1320
612
+ },
613
+ {
614
+ "epoch": 24.0,
615
+ "eval_accuracy": 0.9988636363636364,
616
+ "eval_loss": 0.0025376665871590376,
617
+ "eval_runtime": 10.9568,
618
+ "eval_samples_per_second": 80.315,
619
+ "eval_steps_per_second": 5.02,
620
+ "step": 1320
621
+ },
622
+ {
623
+ "epoch": 24.36,
624
+ "learning_rate": 5.93490746649649e-06,
625
+ "loss": 0.0726,
626
+ "step": 1340
627
+ },
628
+ {
629
+ "epoch": 24.73,
630
+ "learning_rate": 5.5520102105934905e-06,
631
+ "loss": 0.0417,
632
+ "step": 1360
633
+ },
634
+ {
635
+ "epoch": 25.0,
636
+ "eval_accuracy": 1.0,
637
+ "eval_loss": 0.00029166368767619133,
638
+ "eval_runtime": 11.0108,
639
+ "eval_samples_per_second": 79.922,
640
+ "eval_steps_per_second": 4.995,
641
+ "step": 1375
642
+ },
643
+ {
644
+ "epoch": 25.09,
645
+ "learning_rate": 5.1691129546904915e-06,
646
+ "loss": 0.0472,
647
+ "step": 1380
648
+ },
649
+ {
650
+ "epoch": 25.45,
651
+ "learning_rate": 4.7862156987874925e-06,
652
+ "loss": 0.0565,
653
+ "step": 1400
654
+ },
655
+ {
656
+ "epoch": 25.82,
657
+ "learning_rate": 4.4033184428844934e-06,
658
+ "loss": 0.0589,
659
+ "step": 1420
660
+ },
661
+ {
662
+ "epoch": 26.0,
663
+ "eval_accuracy": 1.0,
664
+ "eval_loss": 0.0006550709367729723,
665
+ "eval_runtime": 10.9639,
666
+ "eval_samples_per_second": 80.264,
667
+ "eval_steps_per_second": 5.016,
668
+ "step": 1430
669
+ },
670
+ {
671
+ "epoch": 26.18,
672
+ "learning_rate": 4.020421186981493e-06,
673
+ "loss": 0.0452,
674
+ "step": 1440
675
+ },
676
+ {
677
+ "epoch": 26.55,
678
+ "learning_rate": 3.6375239310784937e-06,
679
+ "loss": 0.0587,
680
+ "step": 1460
681
+ },
682
+ {
683
+ "epoch": 26.91,
684
+ "learning_rate": 3.2546266751754947e-06,
685
+ "loss": 0.0563,
686
+ "step": 1480
687
+ },
688
+ {
689
+ "epoch": 27.0,
690
+ "eval_accuracy": 1.0,
691
+ "eval_loss": 0.0006738207302987576,
692
+ "eval_runtime": 10.975,
693
+ "eval_samples_per_second": 80.182,
694
+ "eval_steps_per_second": 5.011,
695
+ "step": 1485
696
+ },
697
+ {
698
+ "epoch": 27.27,
699
+ "learning_rate": 2.8717294192724956e-06,
700
+ "loss": 0.0441,
701
+ "step": 1500
702
+ },
703
+ {
704
+ "epoch": 27.64,
705
+ "learning_rate": 2.4888321633694958e-06,
706
+ "loss": 0.0401,
707
+ "step": 1520
708
+ },
709
+ {
710
+ "epoch": 28.0,
711
+ "learning_rate": 2.1059349074664967e-06,
712
+ "loss": 0.0603,
713
+ "step": 1540
714
+ },
715
+ {
716
+ "epoch": 28.0,
717
+ "eval_accuracy": 0.9988636363636364,
718
+ "eval_loss": 0.0009842341532930732,
719
+ "eval_runtime": 10.951,
720
+ "eval_samples_per_second": 80.358,
721
+ "eval_steps_per_second": 5.022,
722
+ "step": 1540
723
+ },
724
+ {
725
+ "epoch": 28.36,
726
+ "learning_rate": 1.723037651563497e-06,
727
+ "loss": 0.0417,
728
+ "step": 1560
729
+ },
730
+ {
731
+ "epoch": 28.73,
732
+ "learning_rate": 1.3401403956604976e-06,
733
+ "loss": 0.0469,
734
+ "step": 1580
735
+ },
736
+ {
737
+ "epoch": 29.0,
738
+ "eval_accuracy": 1.0,
739
+ "eval_loss": 0.0004589582094922662,
740
+ "eval_runtime": 10.9725,
741
+ "eval_samples_per_second": 80.2,
742
+ "eval_steps_per_second": 5.013,
743
+ "step": 1595
744
+ },
745
+ {
746
+ "epoch": 29.09,
747
+ "learning_rate": 9.572431397574984e-07,
748
+ "loss": 0.0518,
749
+ "step": 1600
750
+ },
751
+ {
752
+ "epoch": 29.45,
753
+ "learning_rate": 5.743458838544991e-07,
754
+ "loss": 0.0446,
755
+ "step": 1620
756
+ },
757
+ {
758
+ "epoch": 29.82,
759
+ "learning_rate": 1.914486279514997e-07,
760
+ "loss": 0.0525,
761
+ "step": 1640
762
+ },
763
+ {
764
+ "epoch": 30.0,
765
+ "eval_accuracy": 1.0,
766
+ "eval_loss": 0.00037947672535665333,
767
+ "eval_runtime": 10.9402,
768
+ "eval_samples_per_second": 80.438,
769
+ "eval_steps_per_second": 5.027,
770
+ "step": 1650
771
+ },
772
+ {
773
+ "epoch": 30.0,
774
+ "step": 1650,
775
+ "total_flos": 2.4304114274567455e+19,
776
+ "train_loss": 0.19108782262513133,
777
+ "train_runtime": 4786.2205,
778
+ "train_samples_per_second": 22.051,
779
+ "train_steps_per_second": 0.345
780
+ }
781
+ ],
782
+ "max_steps": 1650,
783
+ "num_train_epochs": 30,
784
+ "total_flos": 2.4304114274567455e+19,
785
+ "trial_name": null,
786
+ "trial_params": null
787
+ }