Floriankidev commited on
Commit
e2d2f04
1 Parent(s): 608d311

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +12 -12
  2. eval_results.json +7 -7
  3. train_results.json +7 -7
  4. trainer_state.json +850 -657
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
- {
2
- "epoch": 2.997333333333333,
3
- "eval_accuracy": 0.714,
4
- "eval_loss": 0.9282281398773193,
5
- "eval_runtime": 57.5051,
6
- "eval_samples_per_second": 69.559,
7
- "eval_steps_per_second": 2.174,
8
- "total_flos": 2.6829109150955274e+18,
9
- "train_loss": 1.3250810132756352,
10
- "train_runtime": 2481.2668,
11
- "train_samples_per_second": 43.521,
12
- "train_steps_per_second": 0.34
13
  }
 
1
+ {
2
+ "epoch": 4.982857142857143,
3
+ "eval_accuracy": 0.7499688084840923,
4
+ "eval_loss": 0.8480741381645203,
5
+ "eval_runtime": 72.5483,
6
+ "eval_samples_per_second": 110.478,
7
+ "eval_steps_per_second": 3.46,
8
+ "total_flos": 3.468170115610067e+18,
9
+ "train_loss": 1.2575330436776537,
10
+ "train_runtime": 2631.6311,
11
+ "train_samples_per_second": 53.178,
12
+ "train_steps_per_second": 0.414
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
- {
2
- "epoch": 2.997333333333333,
3
- "eval_accuracy": 0.714,
4
- "eval_loss": 0.9282281398773193,
5
- "eval_runtime": 57.5051,
6
- "eval_samples_per_second": 69.559,
7
- "eval_steps_per_second": 2.174
8
  }
 
1
+ {
2
+ "epoch": 4.982857142857143,
3
+ "eval_accuracy": 0.7499688084840923,
4
+ "eval_loss": 0.8480741381645203,
5
+ "eval_runtime": 72.5483,
6
+ "eval_samples_per_second": 110.478,
7
+ "eval_steps_per_second": 3.46
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
- {
2
- "epoch": 2.997333333333333,
3
- "total_flos": 2.6829109150955274e+18,
4
- "train_loss": 1.3250810132756352,
5
- "train_runtime": 2481.2668,
6
- "train_samples_per_second": 43.521,
7
- "train_steps_per_second": 0.34
8
  }
 
1
+ {
2
+ "epoch": 4.982857142857143,
3
+ "total_flos": 3.468170115610067e+18,
4
+ "train_loss": 1.2575330436776537,
5
+ "train_runtime": 2631.6311,
6
+ "train_samples_per_second": 53.178,
7
+ "train_steps_per_second": 0.414
8
  }
trainer_state.json CHANGED
@@ -1,657 +1,850 @@
1
- {
2
- "best_metric": 0.714,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-843",
4
- "epoch": 2.997333333333333,
5
- "eval_steps": 500,
6
- "global_step": 843,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.035555555555555556,
13
- "grad_norm": 7.188250541687012,
14
- "learning_rate": 5.882352941176471e-06,
15
- "loss": 2.7793,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.07111111111111111,
20
- "grad_norm": 15.252310752868652,
21
- "learning_rate": 1.1764705882352942e-05,
22
- "loss": 2.7235,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.10666666666666667,
27
- "grad_norm": 20.026681900024414,
28
- "learning_rate": 1.7647058823529414e-05,
29
- "loss": 2.6203,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.14222222222222222,
34
- "grad_norm": 12.86706829071045,
35
- "learning_rate": 2.3529411764705884e-05,
36
- "loss": 2.4325,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.17777777777777778,
41
- "grad_norm": 22.171051025390625,
42
- "learning_rate": 2.9411764705882354e-05,
43
- "loss": 2.1824,
44
- "step": 50
45
- },
46
- {
47
- "epoch": 0.21333333333333335,
48
- "grad_norm": 16.04732894897461,
49
- "learning_rate": 3.529411764705883e-05,
50
- "loss": 1.977,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.24888888888888888,
55
- "grad_norm": 11.828266143798828,
56
- "learning_rate": 4.11764705882353e-05,
57
- "loss": 1.8274,
58
- "step": 70
59
- },
60
- {
61
- "epoch": 0.28444444444444444,
62
- "grad_norm": 21.65501594543457,
63
- "learning_rate": 4.705882352941177e-05,
64
- "loss": 1.7634,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.32,
69
- "grad_norm": 9.372465133666992,
70
- "learning_rate": 4.967018469656992e-05,
71
- "loss": 1.7128,
72
- "step": 90
73
- },
74
- {
75
- "epoch": 0.35555555555555557,
76
- "grad_norm": 8.170567512512207,
77
- "learning_rate": 4.901055408970976e-05,
78
- "loss": 1.5845,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.39111111111111113,
83
- "grad_norm": 8.884113311767578,
84
- "learning_rate": 4.835092348284961e-05,
85
- "loss": 1.5652,
86
- "step": 110
87
- },
88
- {
89
- "epoch": 0.4266666666666667,
90
- "grad_norm": 9.709837913513184,
91
- "learning_rate": 4.7691292875989446e-05,
92
- "loss": 1.6439,
93
- "step": 120
94
- },
95
- {
96
- "epoch": 0.4622222222222222,
97
- "grad_norm": 12.577466011047363,
98
- "learning_rate": 4.703166226912929e-05,
99
- "loss": 1.6118,
100
- "step": 130
101
- },
102
- {
103
- "epoch": 0.49777777777777776,
104
- "grad_norm": 8.617718696594238,
105
- "learning_rate": 4.6372031662269136e-05,
106
- "loss": 1.4722,
107
- "step": 140
108
- },
109
- {
110
- "epoch": 0.5333333333333333,
111
- "grad_norm": 11.635614395141602,
112
- "learning_rate": 4.5712401055408974e-05,
113
- "loss": 1.4748,
114
- "step": 150
115
- },
116
- {
117
- "epoch": 0.5688888888888889,
118
- "grad_norm": 10.609350204467773,
119
- "learning_rate": 4.505277044854881e-05,
120
- "loss": 1.4546,
121
- "step": 160
122
- },
123
- {
124
- "epoch": 0.6044444444444445,
125
- "grad_norm": 9.702072143554688,
126
- "learning_rate": 4.439313984168866e-05,
127
- "loss": 1.4868,
128
- "step": 170
129
- },
130
- {
131
- "epoch": 0.64,
132
- "grad_norm": 8.165207862854004,
133
- "learning_rate": 4.3733509234828496e-05,
134
- "loss": 1.4197,
135
- "step": 180
136
- },
137
- {
138
- "epoch": 0.6755555555555556,
139
- "grad_norm": 8.345643043518066,
140
- "learning_rate": 4.307387862796834e-05,
141
- "loss": 1.4298,
142
- "step": 190
143
- },
144
- {
145
- "epoch": 0.7111111111111111,
146
- "grad_norm": 9.1146821975708,
147
- "learning_rate": 4.2414248021108186e-05,
148
- "loss": 1.437,
149
- "step": 200
150
- },
151
- {
152
- "epoch": 0.7466666666666667,
153
- "grad_norm": 10.016292572021484,
154
- "learning_rate": 4.1754617414248024e-05,
155
- "loss": 1.3961,
156
- "step": 210
157
- },
158
- {
159
- "epoch": 0.7822222222222223,
160
- "grad_norm": 6.980156421661377,
161
- "learning_rate": 4.109498680738786e-05,
162
- "loss": 1.3571,
163
- "step": 220
164
- },
165
- {
166
- "epoch": 0.8177777777777778,
167
- "grad_norm": 9.158329010009766,
168
- "learning_rate": 4.043535620052771e-05,
169
- "loss": 1.3691,
170
- "step": 230
171
- },
172
- {
173
- "epoch": 0.8533333333333334,
174
- "grad_norm": 8.437979698181152,
175
- "learning_rate": 3.9775725593667545e-05,
176
- "loss": 1.2722,
177
- "step": 240
178
- },
179
- {
180
- "epoch": 0.8888888888888888,
181
- "grad_norm": 7.31761360168457,
182
- "learning_rate": 3.911609498680739e-05,
183
- "loss": 1.3382,
184
- "step": 250
185
- },
186
- {
187
- "epoch": 0.9244444444444444,
188
- "grad_norm": 10.811269760131836,
189
- "learning_rate": 3.8456464379947235e-05,
190
- "loss": 1.3005,
191
- "step": 260
192
- },
193
- {
194
- "epoch": 0.96,
195
- "grad_norm": 7.826028823852539,
196
- "learning_rate": 3.779683377308707e-05,
197
- "loss": 1.2971,
198
- "step": 270
199
- },
200
- {
201
- "epoch": 0.9955555555555555,
202
- "grad_norm": 8.89599609375,
203
- "learning_rate": 3.713720316622691e-05,
204
- "loss": 1.3142,
205
- "step": 280
206
- },
207
- {
208
- "epoch": 0.9991111111111111,
209
- "eval_accuracy": 0.629,
210
- "eval_loss": 1.176965594291687,
211
- "eval_runtime": 61.7496,
212
- "eval_samples_per_second": 64.778,
213
- "eval_steps_per_second": 2.024,
214
- "step": 281
215
- },
216
- {
217
- "epoch": 1.031111111111111,
218
- "grad_norm": 8.035099029541016,
219
- "learning_rate": 3.6477572559366756e-05,
220
- "loss": 1.2879,
221
- "step": 290
222
- },
223
- {
224
- "epoch": 1.0666666666666667,
225
- "grad_norm": 10.644525527954102,
226
- "learning_rate": 3.58179419525066e-05,
227
- "loss": 1.2803,
228
- "step": 300
229
- },
230
- {
231
- "epoch": 1.1022222222222222,
232
- "grad_norm": 12.751725196838379,
233
- "learning_rate": 3.515831134564644e-05,
234
- "loss": 1.2765,
235
- "step": 310
236
- },
237
- {
238
- "epoch": 1.1377777777777778,
239
- "grad_norm": 9.897214889526367,
240
- "learning_rate": 3.4498680738786285e-05,
241
- "loss": 1.234,
242
- "step": 320
243
- },
244
- {
245
- "epoch": 1.1733333333333333,
246
- "grad_norm": 6.898780822753906,
247
- "learning_rate": 3.383905013192612e-05,
248
- "loss": 1.2247,
249
- "step": 330
250
- },
251
- {
252
- "epoch": 1.208888888888889,
253
- "grad_norm": 9.826775550842285,
254
- "learning_rate": 3.317941952506596e-05,
255
- "loss": 1.302,
256
- "step": 340
257
- },
258
- {
259
- "epoch": 1.2444444444444445,
260
- "grad_norm": 7.03531551361084,
261
- "learning_rate": 3.2519788918205806e-05,
262
- "loss": 1.2956,
263
- "step": 350
264
- },
265
- {
266
- "epoch": 1.28,
267
- "grad_norm": 7.564468860626221,
268
- "learning_rate": 3.186015831134565e-05,
269
- "loss": 1.2087,
270
- "step": 360
271
- },
272
- {
273
- "epoch": 1.3155555555555556,
274
- "grad_norm": 9.157870292663574,
275
- "learning_rate": 3.120052770448549e-05,
276
- "loss": 1.2794,
277
- "step": 370
278
- },
279
- {
280
- "epoch": 1.3511111111111112,
281
- "grad_norm": 6.955115795135498,
282
- "learning_rate": 3.0540897097625334e-05,
283
- "loss": 1.2916,
284
- "step": 380
285
- },
286
- {
287
- "epoch": 1.3866666666666667,
288
- "grad_norm": 8.055458068847656,
289
- "learning_rate": 2.9881266490765176e-05,
290
- "loss": 1.2643,
291
- "step": 390
292
- },
293
- {
294
- "epoch": 1.4222222222222223,
295
- "grad_norm": 8.577414512634277,
296
- "learning_rate": 2.9221635883905014e-05,
297
- "loss": 1.2435,
298
- "step": 400
299
- },
300
- {
301
- "epoch": 1.4577777777777778,
302
- "grad_norm": 8.334266662597656,
303
- "learning_rate": 2.8562005277044855e-05,
304
- "loss": 1.1744,
305
- "step": 410
306
- },
307
- {
308
- "epoch": 1.4933333333333334,
309
- "grad_norm": 7.725725173950195,
310
- "learning_rate": 2.79023746701847e-05,
311
- "loss": 1.1909,
312
- "step": 420
313
- },
314
- {
315
- "epoch": 1.528888888888889,
316
- "grad_norm": 10.035604476928711,
317
- "learning_rate": 2.724274406332454e-05,
318
- "loss": 1.1839,
319
- "step": 430
320
- },
321
- {
322
- "epoch": 1.5644444444444443,
323
- "grad_norm": 8.928838729858398,
324
- "learning_rate": 2.658311345646438e-05,
325
- "loss": 1.2524,
326
- "step": 440
327
- },
328
- {
329
- "epoch": 1.6,
330
- "grad_norm": 10.327030181884766,
331
- "learning_rate": 2.5923482849604225e-05,
332
- "loss": 1.2254,
333
- "step": 450
334
- },
335
- {
336
- "epoch": 1.6355555555555554,
337
- "grad_norm": 7.943435192108154,
338
- "learning_rate": 2.5263852242744063e-05,
339
- "loss": 1.1067,
340
- "step": 460
341
- },
342
- {
343
- "epoch": 1.6711111111111112,
344
- "grad_norm": 9.097575187683105,
345
- "learning_rate": 2.4604221635883905e-05,
346
- "loss": 1.1412,
347
- "step": 470
348
- },
349
- {
350
- "epoch": 1.7066666666666666,
351
- "grad_norm": 8.329163551330566,
352
- "learning_rate": 2.3944591029023746e-05,
353
- "loss": 1.1277,
354
- "step": 480
355
- },
356
- {
357
- "epoch": 1.7422222222222223,
358
- "grad_norm": 9.431710243225098,
359
- "learning_rate": 2.328496042216359e-05,
360
- "loss": 1.1697,
361
- "step": 490
362
- },
363
- {
364
- "epoch": 1.7777777777777777,
365
- "grad_norm": 7.484973907470703,
366
- "learning_rate": 2.262532981530343e-05,
367
- "loss": 1.1522,
368
- "step": 500
369
- },
370
- {
371
- "epoch": 1.8133333333333335,
372
- "grad_norm": 8.600388526916504,
373
- "learning_rate": 2.196569920844327e-05,
374
- "loss": 1.1673,
375
- "step": 510
376
- },
377
- {
378
- "epoch": 1.8488888888888888,
379
- "grad_norm": 7.894708156585693,
380
- "learning_rate": 2.1306068601583116e-05,
381
- "loss": 1.1206,
382
- "step": 520
383
- },
384
- {
385
- "epoch": 1.8844444444444446,
386
- "grad_norm": 9.636763572692871,
387
- "learning_rate": 2.0646437994722954e-05,
388
- "loss": 1.2176,
389
- "step": 530
390
- },
391
- {
392
- "epoch": 1.92,
393
- "grad_norm": 9.740330696105957,
394
- "learning_rate": 1.9986807387862796e-05,
395
- "loss": 1.2078,
396
- "step": 540
397
- },
398
- {
399
- "epoch": 1.9555555555555557,
400
- "grad_norm": 7.4491353034973145,
401
- "learning_rate": 1.932717678100264e-05,
402
- "loss": 1.1472,
403
- "step": 550
404
- },
405
- {
406
- "epoch": 1.991111111111111,
407
- "grad_norm": 10.187119483947754,
408
- "learning_rate": 1.866754617414248e-05,
409
- "loss": 1.0761,
410
- "step": 560
411
- },
412
- {
413
- "epoch": 1.9982222222222221,
414
- "eval_accuracy": 0.69825,
415
- "eval_loss": 1.0089702606201172,
416
- "eval_runtime": 60.6333,
417
- "eval_samples_per_second": 65.97,
418
- "eval_steps_per_second": 2.062,
419
- "step": 562
420
- },
421
- {
422
- "epoch": 2.026666666666667,
423
- "grad_norm": 10.728060722351074,
424
- "learning_rate": 1.8007915567282324e-05,
425
- "loss": 1.1074,
426
- "step": 570
427
- },
428
- {
429
- "epoch": 2.062222222222222,
430
- "grad_norm": 6.846221923828125,
431
- "learning_rate": 1.7348284960422166e-05,
432
- "loss": 1.1202,
433
- "step": 580
434
- },
435
- {
436
- "epoch": 2.097777777777778,
437
- "grad_norm": 7.134582996368408,
438
- "learning_rate": 1.6688654353562007e-05,
439
- "loss": 1.1013,
440
- "step": 590
441
- },
442
- {
443
- "epoch": 2.1333333333333333,
444
- "grad_norm": 6.4924397468566895,
445
- "learning_rate": 1.602902374670185e-05,
446
- "loss": 1.1429,
447
- "step": 600
448
- },
449
- {
450
- "epoch": 2.168888888888889,
451
- "grad_norm": 7.079750061035156,
452
- "learning_rate": 1.536939313984169e-05,
453
- "loss": 1.1282,
454
- "step": 610
455
- },
456
- {
457
- "epoch": 2.2044444444444444,
458
- "grad_norm": 11.857382774353027,
459
- "learning_rate": 1.470976253298153e-05,
460
- "loss": 1.0689,
461
- "step": 620
462
- },
463
- {
464
- "epoch": 2.24,
465
- "grad_norm": 7.431779861450195,
466
- "learning_rate": 1.4050131926121373e-05,
467
- "loss": 1.1116,
468
- "step": 630
469
- },
470
- {
471
- "epoch": 2.2755555555555556,
472
- "grad_norm": 8.456690788269043,
473
- "learning_rate": 1.3390501319261215e-05,
474
- "loss": 1.0721,
475
- "step": 640
476
- },
477
- {
478
- "epoch": 2.311111111111111,
479
- "grad_norm": 10.243916511535645,
480
- "learning_rate": 1.2730870712401055e-05,
481
- "loss": 1.1046,
482
- "step": 650
483
- },
484
- {
485
- "epoch": 2.3466666666666667,
486
- "grad_norm": 9.12217903137207,
487
- "learning_rate": 1.2071240105540896e-05,
488
- "loss": 1.1231,
489
- "step": 660
490
- },
491
- {
492
- "epoch": 2.3822222222222225,
493
- "grad_norm": 6.960771560668945,
494
- "learning_rate": 1.141160949868074e-05,
495
- "loss": 1.0449,
496
- "step": 670
497
- },
498
- {
499
- "epoch": 2.417777777777778,
500
- "grad_norm": 6.853394031524658,
501
- "learning_rate": 1.0751978891820581e-05,
502
- "loss": 1.0166,
503
- "step": 680
504
- },
505
- {
506
- "epoch": 2.453333333333333,
507
- "grad_norm": 6.347965717315674,
508
- "learning_rate": 1.0092348284960421e-05,
509
- "loss": 1.0753,
510
- "step": 690
511
- },
512
- {
513
- "epoch": 2.488888888888889,
514
- "grad_norm": 7.328911304473877,
515
- "learning_rate": 9.432717678100264e-06,
516
- "loss": 1.0528,
517
- "step": 700
518
- },
519
- {
520
- "epoch": 2.5244444444444447,
521
- "grad_norm": 7.785583019256592,
522
- "learning_rate": 8.773087071240106e-06,
523
- "loss": 1.1255,
524
- "step": 710
525
- },
526
- {
527
- "epoch": 2.56,
528
- "grad_norm": 8.781428337097168,
529
- "learning_rate": 8.113456464379948e-06,
530
- "loss": 1.0989,
531
- "step": 720
532
- },
533
- {
534
- "epoch": 2.5955555555555554,
535
- "grad_norm": 6.994375228881836,
536
- "learning_rate": 7.453825857519789e-06,
537
- "loss": 1.0473,
538
- "step": 730
539
- },
540
- {
541
- "epoch": 2.631111111111111,
542
- "grad_norm": 8.248795509338379,
543
- "learning_rate": 6.794195250659631e-06,
544
- "loss": 1.0968,
545
- "step": 740
546
- },
547
- {
548
- "epoch": 2.6666666666666665,
549
- "grad_norm": 7.137526035308838,
550
- "learning_rate": 6.134564643799472e-06,
551
- "loss": 1.0421,
552
- "step": 750
553
- },
554
- {
555
- "epoch": 2.7022222222222223,
556
- "grad_norm": 10.059906959533691,
557
- "learning_rate": 5.474934036939315e-06,
558
- "loss": 1.0794,
559
- "step": 760
560
- },
561
- {
562
- "epoch": 2.7377777777777776,
563
- "grad_norm": 7.585984230041504,
564
- "learning_rate": 4.8153034300791555e-06,
565
- "loss": 1.0677,
566
- "step": 770
567
- },
568
- {
569
- "epoch": 2.7733333333333334,
570
- "grad_norm": 7.5168137550354,
571
- "learning_rate": 4.155672823218998e-06,
572
- "loss": 0.9899,
573
- "step": 780
574
- },
575
- {
576
- "epoch": 2.8088888888888888,
577
- "grad_norm": 8.242053985595703,
578
- "learning_rate": 3.496042216358839e-06,
579
- "loss": 1.1027,
580
- "step": 790
581
- },
582
- {
583
- "epoch": 2.8444444444444446,
584
- "grad_norm": 7.243769645690918,
585
- "learning_rate": 2.836411609498681e-06,
586
- "loss": 1.1244,
587
- "step": 800
588
- },
589
- {
590
- "epoch": 2.88,
591
- "grad_norm": 9.468960762023926,
592
- "learning_rate": 2.1767810026385226e-06,
593
- "loss": 1.076,
594
- "step": 810
595
- },
596
- {
597
- "epoch": 2.9155555555555557,
598
- "grad_norm": 9.269329071044922,
599
- "learning_rate": 1.5171503957783642e-06,
600
- "loss": 1.0397,
601
- "step": 820
602
- },
603
- {
604
- "epoch": 2.951111111111111,
605
- "grad_norm": 8.011164665222168,
606
- "learning_rate": 8.575197889182058e-07,
607
- "loss": 1.0792,
608
- "step": 830
609
- },
610
- {
611
- "epoch": 2.986666666666667,
612
- "grad_norm": 7.399364471435547,
613
- "learning_rate": 1.9788918205804752e-07,
614
- "loss": 1.0439,
615
- "step": 840
616
- },
617
- {
618
- "epoch": 2.997333333333333,
619
- "eval_accuracy": 0.714,
620
- "eval_loss": 0.9282281398773193,
621
- "eval_runtime": 56.4403,
622
- "eval_samples_per_second": 70.871,
623
- "eval_steps_per_second": 2.215,
624
- "step": 843
625
- },
626
- {
627
- "epoch": 2.997333333333333,
628
- "step": 843,
629
- "total_flos": 2.6829109150955274e+18,
630
- "train_loss": 1.3250810132756352,
631
- "train_runtime": 2481.2668,
632
- "train_samples_per_second": 43.521,
633
- "train_steps_per_second": 0.34
634
- }
635
- ],
636
- "logging_steps": 10,
637
- "max_steps": 843,
638
- "num_input_tokens_seen": 0,
639
- "num_train_epochs": 3,
640
- "save_steps": 500,
641
- "stateful_callbacks": {
642
- "TrainerControl": {
643
- "args": {
644
- "should_epoch_stop": false,
645
- "should_evaluate": false,
646
- "should_log": false,
647
- "should_save": true,
648
- "should_training_stop": true
649
- },
650
- "attributes": {}
651
- }
652
- },
653
- "total_flos": 2.6829109150955274e+18,
654
- "train_batch_size": 32,
655
- "trial_name": null,
656
- "trial_params": null
657
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7499688084840923,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat\\checkpoint-1090",
4
+ "epoch": 4.982857142857143,
5
+ "eval_steps": 500,
6
+ "global_step": 1090,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.045714285714285714,
13
+ "grad_norm": 6.963473320007324,
14
+ "learning_rate": 4.587155963302753e-06,
15
+ "loss": 2.8055,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.09142857142857143,
20
+ "grad_norm": 11.695841789245605,
21
+ "learning_rate": 9.174311926605506e-06,
22
+ "loss": 2.7626,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.13714285714285715,
27
+ "grad_norm": 10.859987258911133,
28
+ "learning_rate": 1.3761467889908258e-05,
29
+ "loss": 2.6863,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.18285714285714286,
34
+ "grad_norm": 14.530256271362305,
35
+ "learning_rate": 1.834862385321101e-05,
36
+ "loss": 2.5932,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.22857142857142856,
41
+ "grad_norm": 22.234731674194336,
42
+ "learning_rate": 2.2935779816513765e-05,
43
+ "loss": 2.4006,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.2742857142857143,
48
+ "grad_norm": 21.946706771850586,
49
+ "learning_rate": 2.7522935779816515e-05,
50
+ "loss": 2.1545,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.32,
55
+ "grad_norm": 31.88918685913086,
56
+ "learning_rate": 3.211009174311927e-05,
57
+ "loss": 1.9868,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.3657142857142857,
62
+ "grad_norm": 17.8656063079834,
63
+ "learning_rate": 3.669724770642202e-05,
64
+ "loss": 1.8264,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.4114285714285714,
69
+ "grad_norm": 15.480560302734375,
70
+ "learning_rate": 4.1284403669724776e-05,
71
+ "loss": 1.8301,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.45714285714285713,
76
+ "grad_norm": 13.637527465820312,
77
+ "learning_rate": 4.587155963302753e-05,
78
+ "loss": 1.7708,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.5028571428571429,
83
+ "grad_norm": 16.25617218017578,
84
+ "learning_rate": 4.994903160040775e-05,
85
+ "loss": 1.7133,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.5485714285714286,
90
+ "grad_norm": 10.56847095489502,
91
+ "learning_rate": 4.943934760448522e-05,
92
+ "loss": 1.5611,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.5942857142857143,
97
+ "grad_norm": 16.05897331237793,
98
+ "learning_rate": 4.892966360856269e-05,
99
+ "loss": 1.6387,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.64,
104
+ "grad_norm": 17.286190032958984,
105
+ "learning_rate": 4.8419979612640164e-05,
106
+ "loss": 1.519,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.6857142857142857,
111
+ "grad_norm": 13.946508407592773,
112
+ "learning_rate": 4.7910295616717635e-05,
113
+ "loss": 1.4708,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.7314285714285714,
118
+ "grad_norm": 10.632002830505371,
119
+ "learning_rate": 4.740061162079511e-05,
120
+ "loss": 1.5119,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.7771428571428571,
125
+ "grad_norm": 11.511331558227539,
126
+ "learning_rate": 4.6890927624872586e-05,
127
+ "loss": 1.4476,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.8228571428571428,
132
+ "grad_norm": 9.265079498291016,
133
+ "learning_rate": 4.638124362895006e-05,
134
+ "loss": 1.4864,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.8685714285714285,
139
+ "grad_norm": 12.077649116516113,
140
+ "learning_rate": 4.587155963302753e-05,
141
+ "loss": 1.4542,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.9142857142857143,
146
+ "grad_norm": 8.974185943603516,
147
+ "learning_rate": 4.5361875637104995e-05,
148
+ "loss": 1.4505,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.96,
153
+ "grad_norm": 7.196322441101074,
154
+ "learning_rate": 4.4852191641182466e-05,
155
+ "loss": 1.4736,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.9965714285714286,
160
+ "eval_accuracy": 0.6177167810355584,
161
+ "eval_loss": 1.2365009784698486,
162
+ "eval_runtime": 74.6739,
163
+ "eval_samples_per_second": 107.333,
164
+ "eval_steps_per_second": 3.361,
165
+ "step": 218
166
+ },
167
+ {
168
+ "epoch": 1.0057142857142858,
169
+ "grad_norm": 8.575163841247559,
170
+ "learning_rate": 4.434250764525994e-05,
171
+ "loss": 1.2932,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 1.0514285714285714,
176
+ "grad_norm": 9.491937637329102,
177
+ "learning_rate": 4.383282364933741e-05,
178
+ "loss": 1.3322,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.0971428571428572,
183
+ "grad_norm": 7.442806720733643,
184
+ "learning_rate": 4.332313965341488e-05,
185
+ "loss": 1.3634,
186
+ "step": 240
187
+ },
188
+ {
189
+ "epoch": 1.1428571428571428,
190
+ "grad_norm": 11.594585418701172,
191
+ "learning_rate": 4.281345565749236e-05,
192
+ "loss": 1.3838,
193
+ "step": 250
194
+ },
195
+ {
196
+ "epoch": 1.1885714285714286,
197
+ "grad_norm": 9.72411823272705,
198
+ "learning_rate": 4.230377166156983e-05,
199
+ "loss": 1.3695,
200
+ "step": 260
201
+ },
202
+ {
203
+ "epoch": 1.2342857142857142,
204
+ "grad_norm": 8.765079498291016,
205
+ "learning_rate": 4.1794087665647304e-05,
206
+ "loss": 1.241,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 1.28,
211
+ "grad_norm": 7.949846267700195,
212
+ "learning_rate": 4.1284403669724776e-05,
213
+ "loss": 1.3099,
214
+ "step": 280
215
+ },
216
+ {
217
+ "epoch": 1.3257142857142856,
218
+ "grad_norm": 7.360624313354492,
219
+ "learning_rate": 4.077471967380224e-05,
220
+ "loss": 1.3464,
221
+ "step": 290
222
+ },
223
+ {
224
+ "epoch": 1.3714285714285714,
225
+ "grad_norm": 9.947721481323242,
226
+ "learning_rate": 4.026503567787971e-05,
227
+ "loss": 1.269,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 1.4171428571428573,
232
+ "grad_norm": 7.578132629394531,
233
+ "learning_rate": 3.9755351681957185e-05,
234
+ "loss": 1.2697,
235
+ "step": 310
236
+ },
237
+ {
238
+ "epoch": 1.4628571428571429,
239
+ "grad_norm": 7.614137172698975,
240
+ "learning_rate": 3.9245667686034656e-05,
241
+ "loss": 1.2017,
242
+ "step": 320
243
+ },
244
+ {
245
+ "epoch": 1.5085714285714285,
246
+ "grad_norm": 6.3952107429504395,
247
+ "learning_rate": 3.8735983690112135e-05,
248
+ "loss": 1.3131,
249
+ "step": 330
250
+ },
251
+ {
252
+ "epoch": 1.5542857142857143,
253
+ "grad_norm": 11.049560546875,
254
+ "learning_rate": 3.822629969418961e-05,
255
+ "loss": 1.2547,
256
+ "step": 340
257
+ },
258
+ {
259
+ "epoch": 1.6,
260
+ "grad_norm": 7.0264129638671875,
261
+ "learning_rate": 3.771661569826708e-05,
262
+ "loss": 1.2384,
263
+ "step": 350
264
+ },
265
+ {
266
+ "epoch": 1.6457142857142857,
267
+ "grad_norm": 8.316610336303711,
268
+ "learning_rate": 3.720693170234455e-05,
269
+ "loss": 1.2679,
270
+ "step": 360
271
+ },
272
+ {
273
+ "epoch": 1.6914285714285713,
274
+ "grad_norm": 7.154911041259766,
275
+ "learning_rate": 3.669724770642202e-05,
276
+ "loss": 1.3185,
277
+ "step": 370
278
+ },
279
+ {
280
+ "epoch": 1.737142857142857,
281
+ "grad_norm": 7.319551944732666,
282
+ "learning_rate": 3.6187563710499494e-05,
283
+ "loss": 1.1742,
284
+ "step": 380
285
+ },
286
+ {
287
+ "epoch": 1.782857142857143,
288
+ "grad_norm": 7.423411846160889,
289
+ "learning_rate": 3.567787971457696e-05,
290
+ "loss": 1.1981,
291
+ "step": 390
292
+ },
293
+ {
294
+ "epoch": 1.8285714285714287,
295
+ "grad_norm": 9.163586616516113,
296
+ "learning_rate": 3.516819571865443e-05,
297
+ "loss": 1.2371,
298
+ "step": 400
299
+ },
300
+ {
301
+ "epoch": 1.8742857142857143,
302
+ "grad_norm": 7.521251678466797,
303
+ "learning_rate": 3.465851172273191e-05,
304
+ "loss": 1.2717,
305
+ "step": 410
306
+ },
307
+ {
308
+ "epoch": 1.92,
309
+ "grad_norm": 10.229044914245605,
310
+ "learning_rate": 3.414882772680938e-05,
311
+ "loss": 1.206,
312
+ "step": 420
313
+ },
314
+ {
315
+ "epoch": 1.9657142857142857,
316
+ "grad_norm": 7.633674621582031,
317
+ "learning_rate": 3.363914373088685e-05,
318
+ "loss": 1.2161,
319
+ "step": 430
320
+ },
321
+ {
322
+ "epoch": 1.9977142857142858,
323
+ "eval_accuracy": 0.6915782907049283,
324
+ "eval_loss": 1.0157994031906128,
325
+ "eval_runtime": 74.9081,
326
+ "eval_samples_per_second": 106.998,
327
+ "eval_steps_per_second": 3.351,
328
+ "step": 437
329
+ },
330
+ {
331
+ "epoch": 2.0114285714285716,
332
+ "grad_norm": 8.755953788757324,
333
+ "learning_rate": 3.3129459734964325e-05,
334
+ "loss": 1.1613,
335
+ "step": 440
336
+ },
337
+ {
338
+ "epoch": 2.057142857142857,
339
+ "grad_norm": 7.305307865142822,
340
+ "learning_rate": 3.26197757390418e-05,
341
+ "loss": 1.0986,
342
+ "step": 450
343
+ },
344
+ {
345
+ "epoch": 2.1028571428571428,
346
+ "grad_norm": 8.31972885131836,
347
+ "learning_rate": 3.211009174311927e-05,
348
+ "loss": 1.1722,
349
+ "step": 460
350
+ },
351
+ {
352
+ "epoch": 2.1485714285714286,
353
+ "grad_norm": 8.749483108520508,
354
+ "learning_rate": 3.160040774719674e-05,
355
+ "loss": 1.1203,
356
+ "step": 470
357
+ },
358
+ {
359
+ "epoch": 2.1942857142857144,
360
+ "grad_norm": 6.05934476852417,
361
+ "learning_rate": 3.1090723751274206e-05,
362
+ "loss": 1.1225,
363
+ "step": 480
364
+ },
365
+ {
366
+ "epoch": 2.24,
367
+ "grad_norm": 10.040249824523926,
368
+ "learning_rate": 3.0581039755351684e-05,
369
+ "loss": 1.2017,
370
+ "step": 490
371
+ },
372
+ {
373
+ "epoch": 2.2857142857142856,
374
+ "grad_norm": 8.599287986755371,
375
+ "learning_rate": 3.0071355759429153e-05,
376
+ "loss": 1.1669,
377
+ "step": 500
378
+ },
379
+ {
380
+ "epoch": 2.3314285714285714,
381
+ "grad_norm": 7.941746711730957,
382
+ "learning_rate": 2.9561671763506628e-05,
383
+ "loss": 1.1189,
384
+ "step": 510
385
+ },
386
+ {
387
+ "epoch": 2.3771428571428572,
388
+ "grad_norm": 13.228888511657715,
389
+ "learning_rate": 2.90519877675841e-05,
390
+ "loss": 1.1882,
391
+ "step": 520
392
+ },
393
+ {
394
+ "epoch": 2.422857142857143,
395
+ "grad_norm": 7.503291130065918,
396
+ "learning_rate": 2.854230377166157e-05,
397
+ "loss": 1.185,
398
+ "step": 530
399
+ },
400
+ {
401
+ "epoch": 2.4685714285714284,
402
+ "grad_norm": 7.043280124664307,
403
+ "learning_rate": 2.8032619775739043e-05,
404
+ "loss": 1.1577,
405
+ "step": 540
406
+ },
407
+ {
408
+ "epoch": 2.5142857142857142,
409
+ "grad_norm": 8.937540054321289,
410
+ "learning_rate": 2.7522935779816515e-05,
411
+ "loss": 1.1539,
412
+ "step": 550
413
+ },
414
+ {
415
+ "epoch": 2.56,
416
+ "grad_norm": 8.076375961303711,
417
+ "learning_rate": 2.701325178389399e-05,
418
+ "loss": 1.1124,
419
+ "step": 560
420
+ },
421
+ {
422
+ "epoch": 2.605714285714286,
423
+ "grad_norm": 7.4971442222595215,
424
+ "learning_rate": 2.6503567787971462e-05,
425
+ "loss": 1.0799,
426
+ "step": 570
427
+ },
428
+ {
429
+ "epoch": 2.6514285714285712,
430
+ "grad_norm": 9.443109512329102,
431
+ "learning_rate": 2.5993883792048927e-05,
432
+ "loss": 1.0234,
433
+ "step": 580
434
+ },
435
+ {
436
+ "epoch": 2.697142857142857,
437
+ "grad_norm": 6.811278820037842,
438
+ "learning_rate": 2.5484199796126402e-05,
439
+ "loss": 1.0423,
440
+ "step": 590
441
+ },
442
+ {
443
+ "epoch": 2.742857142857143,
444
+ "grad_norm": 8.011152267456055,
445
+ "learning_rate": 2.4974515800203874e-05,
446
+ "loss": 1.1336,
447
+ "step": 600
448
+ },
449
+ {
450
+ "epoch": 2.7885714285714287,
451
+ "grad_norm": 6.383072853088379,
452
+ "learning_rate": 2.4464831804281346e-05,
453
+ "loss": 1.1203,
454
+ "step": 610
455
+ },
456
+ {
457
+ "epoch": 2.8342857142857145,
458
+ "grad_norm": 9.429741859436035,
459
+ "learning_rate": 2.3955147808358818e-05,
460
+ "loss": 1.091,
461
+ "step": 620
462
+ },
463
+ {
464
+ "epoch": 2.88,
465
+ "grad_norm": 6.606307506561279,
466
+ "learning_rate": 2.3445463812436293e-05,
467
+ "loss": 1.058,
468
+ "step": 630
469
+ },
470
+ {
471
+ "epoch": 2.9257142857142857,
472
+ "grad_norm": 8.834084510803223,
473
+ "learning_rate": 2.2935779816513765e-05,
474
+ "loss": 1.1266,
475
+ "step": 640
476
+ },
477
+ {
478
+ "epoch": 2.9714285714285715,
479
+ "grad_norm": 7.674890518188477,
480
+ "learning_rate": 2.2426095820591233e-05,
481
+ "loss": 1.0807,
482
+ "step": 650
483
+ },
484
+ {
485
+ "epoch": 2.998857142857143,
486
+ "eval_accuracy": 0.727386150966937,
487
+ "eval_loss": 0.9072983264923096,
488
+ "eval_runtime": 74.6334,
489
+ "eval_samples_per_second": 107.392,
490
+ "eval_steps_per_second": 3.363,
491
+ "step": 656
492
+ },
493
+ {
494
+ "epoch": 3.0171428571428573,
495
+ "grad_norm": 7.3469624519348145,
496
+ "learning_rate": 2.1916411824668705e-05,
497
+ "loss": 1.0863,
498
+ "step": 660
499
+ },
500
+ {
501
+ "epoch": 3.0628571428571427,
502
+ "grad_norm": 7.052463531494141,
503
+ "learning_rate": 2.140672782874618e-05,
504
+ "loss": 1.0188,
505
+ "step": 670
506
+ },
507
+ {
508
+ "epoch": 3.1085714285714285,
509
+ "grad_norm": 7.651565074920654,
510
+ "learning_rate": 2.0897043832823652e-05,
511
+ "loss": 0.9874,
512
+ "step": 680
513
+ },
514
+ {
515
+ "epoch": 3.1542857142857144,
516
+ "grad_norm": 11.283343315124512,
517
+ "learning_rate": 2.038735983690112e-05,
518
+ "loss": 1.0876,
519
+ "step": 690
520
+ },
521
+ {
522
+ "epoch": 3.2,
523
+ "grad_norm": 7.787779331207275,
524
+ "learning_rate": 1.9877675840978592e-05,
525
+ "loss": 1.0479,
526
+ "step": 700
527
+ },
528
+ {
529
+ "epoch": 3.2457142857142856,
530
+ "grad_norm": 8.140477180480957,
531
+ "learning_rate": 1.9367991845056068e-05,
532
+ "loss": 1.009,
533
+ "step": 710
534
+ },
535
+ {
536
+ "epoch": 3.2914285714285714,
537
+ "grad_norm": 8.705915451049805,
538
+ "learning_rate": 1.885830784913354e-05,
539
+ "loss": 1.056,
540
+ "step": 720
541
+ },
542
+ {
543
+ "epoch": 3.337142857142857,
544
+ "grad_norm": 7.134729385375977,
545
+ "learning_rate": 1.834862385321101e-05,
546
+ "loss": 1.0378,
547
+ "step": 730
548
+ },
549
+ {
550
+ "epoch": 3.382857142857143,
551
+ "grad_norm": 7.588448524475098,
552
+ "learning_rate": 1.783893985728848e-05,
553
+ "loss": 1.0508,
554
+ "step": 740
555
+ },
556
+ {
557
+ "epoch": 3.4285714285714284,
558
+ "grad_norm": 7.663401126861572,
559
+ "learning_rate": 1.7329255861365955e-05,
560
+ "loss": 1.0207,
561
+ "step": 750
562
+ },
563
+ {
564
+ "epoch": 3.474285714285714,
565
+ "grad_norm": 6.939538955688477,
566
+ "learning_rate": 1.6819571865443427e-05,
567
+ "loss": 1.1108,
568
+ "step": 760
569
+ },
570
+ {
571
+ "epoch": 3.52,
572
+ "grad_norm": 7.630512714385986,
573
+ "learning_rate": 1.63098878695209e-05,
574
+ "loss": 0.9975,
575
+ "step": 770
576
+ },
577
+ {
578
+ "epoch": 3.565714285714286,
579
+ "grad_norm": 9.263322830200195,
580
+ "learning_rate": 1.580020387359837e-05,
581
+ "loss": 1.0372,
582
+ "step": 780
583
+ },
584
+ {
585
+ "epoch": 3.611428571428571,
586
+ "grad_norm": 8.593291282653809,
587
+ "learning_rate": 1.5290519877675842e-05,
588
+ "loss": 1.0863,
589
+ "step": 790
590
+ },
591
+ {
592
+ "epoch": 3.657142857142857,
593
+ "grad_norm": 7.475892543792725,
594
+ "learning_rate": 1.4780835881753314e-05,
595
+ "loss": 1.0021,
596
+ "step": 800
597
+ },
598
+ {
599
+ "epoch": 3.702857142857143,
600
+ "grad_norm": 9.492719650268555,
601
+ "learning_rate": 1.4271151885830786e-05,
602
+ "loss": 1.0572,
603
+ "step": 810
604
+ },
605
+ {
606
+ "epoch": 3.7485714285714287,
607
+ "grad_norm": 11.467538833618164,
608
+ "learning_rate": 1.3761467889908258e-05,
609
+ "loss": 1.0216,
610
+ "step": 820
611
+ },
612
+ {
613
+ "epoch": 3.7942857142857145,
614
+ "grad_norm": 7.794005870819092,
615
+ "learning_rate": 1.3251783893985731e-05,
616
+ "loss": 1.0205,
617
+ "step": 830
618
+ },
619
+ {
620
+ "epoch": 3.84,
621
+ "grad_norm": 6.822214603424072,
622
+ "learning_rate": 1.2742099898063201e-05,
623
+ "loss": 1.0281,
624
+ "step": 840
625
+ },
626
+ {
627
+ "epoch": 3.8857142857142857,
628
+ "grad_norm": 6.747819423675537,
629
+ "learning_rate": 1.2232415902140673e-05,
630
+ "loss": 1.0237,
631
+ "step": 850
632
+ },
633
+ {
634
+ "epoch": 3.9314285714285715,
635
+ "grad_norm": 7.018404006958008,
636
+ "learning_rate": 1.1722731906218146e-05,
637
+ "loss": 1.0446,
638
+ "step": 860
639
+ },
640
+ {
641
+ "epoch": 3.977142857142857,
642
+ "grad_norm": 7.553677082061768,
643
+ "learning_rate": 1.1213047910295617e-05,
644
+ "loss": 0.9977,
645
+ "step": 870
646
+ },
647
+ {
648
+ "epoch": 4.0,
649
+ "eval_accuracy": 0.7456019962570181,
650
+ "eval_loss": 0.8551267385482788,
651
+ "eval_runtime": 74.4356,
652
+ "eval_samples_per_second": 107.677,
653
+ "eval_steps_per_second": 3.372,
654
+ "step": 875
655
+ },
656
+ {
657
+ "epoch": 4.022857142857143,
658
+ "grad_norm": 8.310098648071289,
659
+ "learning_rate": 1.070336391437309e-05,
660
+ "loss": 1.0626,
661
+ "step": 880
662
+ },
663
+ {
664
+ "epoch": 4.0685714285714285,
665
+ "grad_norm": 7.971031665802002,
666
+ "learning_rate": 1.019367991845056e-05,
667
+ "loss": 1.0294,
668
+ "step": 890
669
+ },
670
+ {
671
+ "epoch": 4.114285714285714,
672
+ "grad_norm": 8.742574691772461,
673
+ "learning_rate": 9.683995922528034e-06,
674
+ "loss": 1.0401,
675
+ "step": 900
676
+ },
677
+ {
678
+ "epoch": 4.16,
679
+ "grad_norm": 7.5797553062438965,
680
+ "learning_rate": 9.174311926605506e-06,
681
+ "loss": 1.0194,
682
+ "step": 910
683
+ },
684
+ {
685
+ "epoch": 4.2057142857142855,
686
+ "grad_norm": 10.30631160736084,
687
+ "learning_rate": 8.664627930682977e-06,
688
+ "loss": 1.0334,
689
+ "step": 920
690
+ },
691
+ {
692
+ "epoch": 4.251428571428572,
693
+ "grad_norm": 8.128324508666992,
694
+ "learning_rate": 8.15494393476045e-06,
695
+ "loss": 1.0027,
696
+ "step": 930
697
+ },
698
+ {
699
+ "epoch": 4.297142857142857,
700
+ "grad_norm": 7.413013935089111,
701
+ "learning_rate": 7.645259938837921e-06,
702
+ "loss": 0.9981,
703
+ "step": 940
704
+ },
705
+ {
706
+ "epoch": 4.3428571428571425,
707
+ "grad_norm": 7.2544426918029785,
708
+ "learning_rate": 7.135575942915393e-06,
709
+ "loss": 0.9954,
710
+ "step": 950
711
+ },
712
+ {
713
+ "epoch": 4.388571428571429,
714
+ "grad_norm": 6.764856815338135,
715
+ "learning_rate": 6.6258919469928655e-06,
716
+ "loss": 0.9712,
717
+ "step": 960
718
+ },
719
+ {
720
+ "epoch": 4.434285714285714,
721
+ "grad_norm": 7.096611022949219,
722
+ "learning_rate": 6.1162079510703365e-06,
723
+ "loss": 0.9646,
724
+ "step": 970
725
+ },
726
+ {
727
+ "epoch": 4.48,
728
+ "grad_norm": 7.685801982879639,
729
+ "learning_rate": 5.606523955147808e-06,
730
+ "loss": 1.0362,
731
+ "step": 980
732
+ },
733
+ {
734
+ "epoch": 4.525714285714286,
735
+ "grad_norm": 7.416136741638184,
736
+ "learning_rate": 5.09683995922528e-06,
737
+ "loss": 1.0596,
738
+ "step": 990
739
+ },
740
+ {
741
+ "epoch": 4.571428571428571,
742
+ "grad_norm": 7.128796100616455,
743
+ "learning_rate": 4.587155963302753e-06,
744
+ "loss": 0.9479,
745
+ "step": 1000
746
+ },
747
+ {
748
+ "epoch": 4.617142857142857,
749
+ "grad_norm": 6.152943134307861,
750
+ "learning_rate": 4.077471967380225e-06,
751
+ "loss": 0.9503,
752
+ "step": 1010
753
+ },
754
+ {
755
+ "epoch": 4.662857142857143,
756
+ "grad_norm": 8.12887191772461,
757
+ "learning_rate": 3.5677879714576964e-06,
758
+ "loss": 1.0071,
759
+ "step": 1020
760
+ },
761
+ {
762
+ "epoch": 4.708571428571428,
763
+ "grad_norm": 5.702174186706543,
764
+ "learning_rate": 3.0581039755351682e-06,
765
+ "loss": 1.0149,
766
+ "step": 1030
767
+ },
768
+ {
769
+ "epoch": 4.7542857142857144,
770
+ "grad_norm": 6.921694278717041,
771
+ "learning_rate": 2.54841997961264e-06,
772
+ "loss": 0.981,
773
+ "step": 1040
774
+ },
775
+ {
776
+ "epoch": 4.8,
777
+ "grad_norm": 9.122147560119629,
778
+ "learning_rate": 2.0387359836901123e-06,
779
+ "loss": 0.9939,
780
+ "step": 1050
781
+ },
782
+ {
783
+ "epoch": 4.845714285714286,
784
+ "grad_norm": 8.022086143493652,
785
+ "learning_rate": 1.5290519877675841e-06,
786
+ "loss": 0.8968,
787
+ "step": 1060
788
+ },
789
+ {
790
+ "epoch": 4.8914285714285715,
791
+ "grad_norm": 7.508928298950195,
792
+ "learning_rate": 1.0193679918450562e-06,
793
+ "loss": 0.9698,
794
+ "step": 1070
795
+ },
796
+ {
797
+ "epoch": 4.937142857142857,
798
+ "grad_norm": 9.233248710632324,
799
+ "learning_rate": 5.096839959225281e-07,
800
+ "loss": 0.9584,
801
+ "step": 1080
802
+ },
803
+ {
804
+ "epoch": 4.982857142857143,
805
+ "grad_norm": 7.491491794586182,
806
+ "learning_rate": 0.0,
807
+ "loss": 0.9737,
808
+ "step": 1090
809
+ },
810
+ {
811
+ "epoch": 4.982857142857143,
812
+ "eval_accuracy": 0.7499688084840923,
813
+ "eval_loss": 0.8480741381645203,
814
+ "eval_runtime": 71.0957,
815
+ "eval_samples_per_second": 112.735,
816
+ "eval_steps_per_second": 3.53,
817
+ "step": 1090
818
+ },
819
+ {
820
+ "epoch": 4.982857142857143,
821
+ "step": 1090,
822
+ "total_flos": 3.468170115610067e+18,
823
+ "train_loss": 1.2575330436776537,
824
+ "train_runtime": 2631.6311,
825
+ "train_samples_per_second": 53.178,
826
+ "train_steps_per_second": 0.414
827
+ }
828
+ ],
829
+ "logging_steps": 10,
830
+ "max_steps": 1090,
831
+ "num_input_tokens_seen": 0,
832
+ "num_train_epochs": 5,
833
+ "save_steps": 500,
834
+ "stateful_callbacks": {
835
+ "TrainerControl": {
836
+ "args": {
837
+ "should_epoch_stop": false,
838
+ "should_evaluate": false,
839
+ "should_log": false,
840
+ "should_save": true,
841
+ "should_training_stop": true
842
+ },
843
+ "attributes": {}
844
+ }
845
+ },
846
+ "total_flos": 3.468170115610067e+18,
847
+ "train_batch_size": 32,
848
+ "trial_name": null,
849
+ "trial_params": null
850
+ }