selmamalak commited on
Commit
8e1117c
1 Parent(s): 125ef8a

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -5
  2. all_results.json +16 -0
  3. eval_results.json +11 -0
  4. train_results.json +8 -0
  5. trainer_state.json +591 -0
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [microsoft/swin-large-patch4-window7-224-in22k](https://huggingface.co/microsoft/swin-large-patch4-window7-224-in22k) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.1064
25
- - Accuracy: 0.9622
26
- - Precision: 0.9377
27
- - Recall: 0.9723
28
- - F1: 0.9530
29
 
30
  ## Model description
31
 
 
21
 
22
  This model is a fine-tuned version of [microsoft/swin-large-patch4-window7-224-in22k](https://huggingface.co/microsoft/swin-large-patch4-window7-224-in22k) on an unknown dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.1159
25
+ - Accuracy: 0.9588
26
+ - Precision: 0.9599
27
+ - Recall: 0.9401
28
+ - F1: 0.9492
29
 
30
  ## Model description
31
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.88,
3
+ "eval_accuracy": 0.9587628865979382,
4
+ "eval_f1": 0.9492478744277306,
5
+ "eval_loss": 0.1159210205078125,
6
+ "eval_precision": 0.9598554797270173,
7
+ "eval_recall": 0.9400691509796388,
8
+ "eval_runtime": 9.465,
9
+ "eval_samples_per_second": 61.49,
10
+ "eval_steps_per_second": 3.909,
11
+ "total_flos": 7.138406630605308e+18,
12
+ "train_loss": 0.19363035304205758,
13
+ "train_runtime": 1305.2945,
14
+ "train_samples_per_second": 31.234,
15
+ "train_steps_per_second": 0.483
16
+ }
eval_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.88,
3
+ "eval_accuracy": 0.9587628865979382,
4
+ "eval_f1": 0.9492478744277306,
5
+ "eval_loss": 0.1159210205078125,
6
+ "eval_precision": 0.9598554797270173,
7
+ "eval_recall": 0.9400691509796388,
8
+ "eval_runtime": 9.465,
9
+ "eval_samples_per_second": 61.49,
10
+ "eval_steps_per_second": 3.909
11
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.88,
3
+ "total_flos": 7.138406630605308e+18,
4
+ "train_loss": 0.19363035304205758,
5
+ "train_runtime": 1305.2945,
6
+ "train_samples_per_second": 31.234,
7
+ "train_steps_per_second": 0.483
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,591 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9793991416309012,
3
+ "best_model_checkpoint": "swin-large-patch4-window7-224-in22k-finetuned-lora-medmnistv2/checkpoint-255",
4
+ "epoch": 9.882352941176471,
5
+ "eval_steps": 500,
6
+ "global_step": 630,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 0.9944203495979309,
14
+ "learning_rate": 0.004920634920634921,
15
+ "loss": 0.6411,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.31,
20
+ "grad_norm": 1.986366868019104,
21
+ "learning_rate": 0.004841269841269842,
22
+ "loss": 0.3966,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.47,
27
+ "grad_norm": 1.2478023767471313,
28
+ "learning_rate": 0.0047619047619047615,
29
+ "loss": 0.2835,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.63,
34
+ "grad_norm": 0.9372655749320984,
35
+ "learning_rate": 0.004682539682539683,
36
+ "loss": 0.2194,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.78,
41
+ "grad_norm": 0.834052562713623,
42
+ "learning_rate": 0.004603174603174603,
43
+ "loss": 0.4268,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.94,
48
+ "grad_norm": 0.8572702407836914,
49
+ "learning_rate": 0.004523809523809524,
50
+ "loss": 0.3305,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.99,
55
+ "eval_accuracy": 0.936480686695279,
56
+ "eval_f1": 0.9119407558733402,
57
+ "eval_loss": 0.16002865135669708,
58
+ "eval_precision": 0.9478312410980476,
59
+ "eval_recall": 0.886801607677731,
60
+ "eval_runtime": 18.303,
61
+ "eval_samples_per_second": 63.651,
62
+ "eval_steps_per_second": 3.988,
63
+ "step": 63
64
+ },
65
+ {
66
+ "epoch": 1.1,
67
+ "grad_norm": 1.092854619026184,
68
+ "learning_rate": 0.0044444444444444444,
69
+ "loss": 0.2775,
70
+ "step": 70
71
+ },
72
+ {
73
+ "epoch": 1.25,
74
+ "grad_norm": 1.632826328277588,
75
+ "learning_rate": 0.004365079365079365,
76
+ "loss": 0.2432,
77
+ "step": 80
78
+ },
79
+ {
80
+ "epoch": 1.41,
81
+ "grad_norm": 0.9421872496604919,
82
+ "learning_rate": 0.004285714285714286,
83
+ "loss": 0.2564,
84
+ "step": 90
85
+ },
86
+ {
87
+ "epoch": 1.57,
88
+ "grad_norm": 3.025440216064453,
89
+ "learning_rate": 0.004206349206349207,
90
+ "loss": 0.2726,
91
+ "step": 100
92
+ },
93
+ {
94
+ "epoch": 1.73,
95
+ "grad_norm": 0.8582188487052917,
96
+ "learning_rate": 0.0041269841269841265,
97
+ "loss": 0.2958,
98
+ "step": 110
99
+ },
100
+ {
101
+ "epoch": 1.88,
102
+ "grad_norm": 1.093877911567688,
103
+ "learning_rate": 0.004047619047619048,
104
+ "loss": 0.2335,
105
+ "step": 120
106
+ },
107
+ {
108
+ "epoch": 1.99,
109
+ "eval_accuracy": 0.9313304721030042,
110
+ "eval_f1": 0.9166446055107573,
111
+ "eval_loss": 0.15517598390579224,
112
+ "eval_precision": 0.8967545322648443,
113
+ "eval_recall": 0.9471582920716426,
114
+ "eval_runtime": 18.7183,
115
+ "eval_samples_per_second": 62.239,
116
+ "eval_steps_per_second": 3.9,
117
+ "step": 127
118
+ },
119
+ {
120
+ "epoch": 2.04,
121
+ "grad_norm": 1.588196873664856,
122
+ "learning_rate": 0.003968253968253968,
123
+ "loss": 0.1984,
124
+ "step": 130
125
+ },
126
+ {
127
+ "epoch": 2.2,
128
+ "grad_norm": 0.8592113256454468,
129
+ "learning_rate": 0.003896825396825397,
130
+ "loss": 0.2741,
131
+ "step": 140
132
+ },
133
+ {
134
+ "epoch": 2.35,
135
+ "grad_norm": 1.1290347576141357,
136
+ "learning_rate": 0.003817460317460317,
137
+ "loss": 0.2253,
138
+ "step": 150
139
+ },
140
+ {
141
+ "epoch": 2.51,
142
+ "grad_norm": 0.8879281878471375,
143
+ "learning_rate": 0.0037380952380952383,
144
+ "loss": 0.2726,
145
+ "step": 160
146
+ },
147
+ {
148
+ "epoch": 2.67,
149
+ "grad_norm": 0.8666319847106934,
150
+ "learning_rate": 0.0036587301587301586,
151
+ "loss": 0.2226,
152
+ "step": 170
153
+ },
154
+ {
155
+ "epoch": 2.82,
156
+ "grad_norm": 1.2298210859298706,
157
+ "learning_rate": 0.0035793650793650793,
158
+ "loss": 0.1967,
159
+ "step": 180
160
+ },
161
+ {
162
+ "epoch": 2.98,
163
+ "grad_norm": 0.7905568480491638,
164
+ "learning_rate": 0.0034999999999999996,
165
+ "loss": 0.1977,
166
+ "step": 190
167
+ },
168
+ {
169
+ "epoch": 3.0,
170
+ "eval_accuracy": 0.9733905579399141,
171
+ "eval_f1": 0.9659005824707372,
172
+ "eval_loss": 0.08546662330627441,
173
+ "eval_precision": 0.9607843137254902,
174
+ "eval_recall": 0.9713575096277278,
175
+ "eval_runtime": 18.7005,
176
+ "eval_samples_per_second": 62.298,
177
+ "eval_steps_per_second": 3.904,
178
+ "step": 191
179
+ },
180
+ {
181
+ "epoch": 3.14,
182
+ "grad_norm": 0.9821394681930542,
183
+ "learning_rate": 0.003420634920634921,
184
+ "loss": 0.2158,
185
+ "step": 200
186
+ },
187
+ {
188
+ "epoch": 3.29,
189
+ "grad_norm": 0.6182402968406677,
190
+ "learning_rate": 0.003341269841269841,
191
+ "loss": 0.1528,
192
+ "step": 210
193
+ },
194
+ {
195
+ "epoch": 3.45,
196
+ "grad_norm": 0.6537771821022034,
197
+ "learning_rate": 0.003261904761904762,
198
+ "loss": 0.2038,
199
+ "step": 220
200
+ },
201
+ {
202
+ "epoch": 3.61,
203
+ "grad_norm": 1.0976545810699463,
204
+ "learning_rate": 0.0031825396825396826,
205
+ "loss": 0.217,
206
+ "step": 230
207
+ },
208
+ {
209
+ "epoch": 3.76,
210
+ "grad_norm": 0.581605076789856,
211
+ "learning_rate": 0.0031031746031746034,
212
+ "loss": 0.2044,
213
+ "step": 240
214
+ },
215
+ {
216
+ "epoch": 3.92,
217
+ "grad_norm": 0.8219888806343079,
218
+ "learning_rate": 0.0030238095238095237,
219
+ "loss": 0.1746,
220
+ "step": 250
221
+ },
222
+ {
223
+ "epoch": 4.0,
224
+ "eval_accuracy": 0.9793991416309012,
225
+ "eval_f1": 0.972945356587167,
226
+ "eval_loss": 0.0870010182261467,
227
+ "eval_precision": 0.9794167490467448,
228
+ "eval_recall": 0.9669104162846139,
229
+ "eval_runtime": 18.7359,
230
+ "eval_samples_per_second": 62.18,
231
+ "eval_steps_per_second": 3.896,
232
+ "step": 255
233
+ },
234
+ {
235
+ "epoch": 4.08,
236
+ "grad_norm": 0.45729899406433105,
237
+ "learning_rate": 0.0029444444444444444,
238
+ "loss": 0.1785,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 4.24,
243
+ "grad_norm": 0.9842467904090881,
244
+ "learning_rate": 0.002865079365079365,
245
+ "loss": 0.1507,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 4.39,
250
+ "grad_norm": 2.326084852218628,
251
+ "learning_rate": 0.002785714285714286,
252
+ "loss": 0.2188,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 4.55,
257
+ "grad_norm": 0.7227071523666382,
258
+ "learning_rate": 0.002706349206349206,
259
+ "loss": 0.1853,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 4.71,
264
+ "grad_norm": 1.1356016397476196,
265
+ "learning_rate": 0.002626984126984127,
266
+ "loss": 0.1671,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 4.86,
271
+ "grad_norm": 0.6211657524108887,
272
+ "learning_rate": 0.0025476190476190477,
273
+ "loss": 0.1797,
274
+ "step": 310
275
+ },
276
+ {
277
+ "epoch": 4.99,
278
+ "eval_accuracy": 0.9699570815450643,
279
+ "eval_f1": 0.961658208434536,
280
+ "eval_loss": 0.0828637108206749,
281
+ "eval_precision": 0.95490488153731,
282
+ "eval_recall": 0.969034629256067,
283
+ "eval_runtime": 18.2407,
284
+ "eval_samples_per_second": 63.868,
285
+ "eval_steps_per_second": 4.002,
286
+ "step": 318
287
+ },
288
+ {
289
+ "epoch": 5.02,
290
+ "grad_norm": 0.3646533787250519,
291
+ "learning_rate": 0.0024682539682539684,
292
+ "loss": 0.1636,
293
+ "step": 320
294
+ },
295
+ {
296
+ "epoch": 5.18,
297
+ "grad_norm": 0.5121908187866211,
298
+ "learning_rate": 0.002388888888888889,
299
+ "loss": 0.1463,
300
+ "step": 330
301
+ },
302
+ {
303
+ "epoch": 5.33,
304
+ "grad_norm": 0.34603381156921387,
305
+ "learning_rate": 0.0023095238095238095,
306
+ "loss": 0.1284,
307
+ "step": 340
308
+ },
309
+ {
310
+ "epoch": 5.49,
311
+ "grad_norm": 1.5245040655136108,
312
+ "learning_rate": 0.0022301587301587302,
313
+ "loss": 0.1307,
314
+ "step": 350
315
+ },
316
+ {
317
+ "epoch": 5.65,
318
+ "grad_norm": 1.0758203268051147,
319
+ "learning_rate": 0.002150793650793651,
320
+ "loss": 0.1912,
321
+ "step": 360
322
+ },
323
+ {
324
+ "epoch": 5.8,
325
+ "grad_norm": 0.9437044858932495,
326
+ "learning_rate": 0.0020714285714285717,
327
+ "loss": 0.1942,
328
+ "step": 370
329
+ },
330
+ {
331
+ "epoch": 5.96,
332
+ "grad_norm": 0.6630730628967285,
333
+ "learning_rate": 0.001992063492063492,
334
+ "loss": 0.1436,
335
+ "step": 380
336
+ },
337
+ {
338
+ "epoch": 5.99,
339
+ "eval_accuracy": 0.9708154506437768,
340
+ "eval_f1": 0.9627915802446869,
341
+ "eval_loss": 0.0797128826379776,
342
+ "eval_precision": 0.9556120562130177,
343
+ "eval_recall": 0.9706793660981723,
344
+ "eval_runtime": 18.3826,
345
+ "eval_samples_per_second": 63.375,
346
+ "eval_steps_per_second": 3.971,
347
+ "step": 382
348
+ },
349
+ {
350
+ "epoch": 6.12,
351
+ "grad_norm": 0.5699043869972229,
352
+ "learning_rate": 0.0019126984126984126,
353
+ "loss": 0.1704,
354
+ "step": 390
355
+ },
356
+ {
357
+ "epoch": 6.27,
358
+ "grad_norm": 0.6287882328033447,
359
+ "learning_rate": 0.0018333333333333333,
360
+ "loss": 0.1034,
361
+ "step": 400
362
+ },
363
+ {
364
+ "epoch": 6.43,
365
+ "grad_norm": 0.4099302589893341,
366
+ "learning_rate": 0.0017539682539682538,
367
+ "loss": 0.1633,
368
+ "step": 410
369
+ },
370
+ {
371
+ "epoch": 6.59,
372
+ "grad_norm": 0.24668000638484955,
373
+ "learning_rate": 0.0016746031746031746,
374
+ "loss": 0.1477,
375
+ "step": 420
376
+ },
377
+ {
378
+ "epoch": 6.75,
379
+ "grad_norm": 0.6959215998649597,
380
+ "learning_rate": 0.001595238095238095,
381
+ "loss": 0.1578,
382
+ "step": 430
383
+ },
384
+ {
385
+ "epoch": 6.9,
386
+ "grad_norm": 0.4429934620857239,
387
+ "learning_rate": 0.0015158730158730158,
388
+ "loss": 0.1632,
389
+ "step": 440
390
+ },
391
+ {
392
+ "epoch": 7.0,
393
+ "eval_accuracy": 0.9699570815450643,
394
+ "eval_f1": 0.9621164611060785,
395
+ "eval_loss": 0.08162170648574829,
396
+ "eval_precision": 0.9507741239032943,
397
+ "eval_recall": 0.9754187297512072,
398
+ "eval_runtime": 18.5501,
399
+ "eval_samples_per_second": 62.803,
400
+ "eval_steps_per_second": 3.935,
401
+ "step": 446
402
+ },
403
+ {
404
+ "epoch": 7.06,
405
+ "grad_norm": 0.3358498513698578,
406
+ "learning_rate": 0.0014365079365079364,
407
+ "loss": 0.1628,
408
+ "step": 450
409
+ },
410
+ {
411
+ "epoch": 7.22,
412
+ "grad_norm": 1.001493215560913,
413
+ "learning_rate": 0.0013571428571428571,
414
+ "loss": 0.1495,
415
+ "step": 460
416
+ },
417
+ {
418
+ "epoch": 7.37,
419
+ "grad_norm": 1.0969635248184204,
420
+ "learning_rate": 0.0012777777777777776,
421
+ "loss": 0.1972,
422
+ "step": 470
423
+ },
424
+ {
425
+ "epoch": 7.53,
426
+ "grad_norm": 0.4329684376716614,
427
+ "learning_rate": 0.0011984126984126984,
428
+ "loss": 0.1538,
429
+ "step": 480
430
+ },
431
+ {
432
+ "epoch": 7.69,
433
+ "grad_norm": 0.426087886095047,
434
+ "learning_rate": 0.0011190476190476191,
435
+ "loss": 0.1291,
436
+ "step": 490
437
+ },
438
+ {
439
+ "epoch": 7.84,
440
+ "grad_norm": 0.24835826456546783,
441
+ "learning_rate": 0.0010396825396825396,
442
+ "loss": 0.1301,
443
+ "step": 500
444
+ },
445
+ {
446
+ "epoch": 8.0,
447
+ "grad_norm": 0.3314363956451416,
448
+ "learning_rate": 0.0009603174603174604,
449
+ "loss": 0.1125,
450
+ "step": 510
451
+ },
452
+ {
453
+ "epoch": 8.0,
454
+ "eval_accuracy": 0.9613733905579399,
455
+ "eval_f1": 0.9519463889441405,
456
+ "eval_loss": 0.10071194916963577,
457
+ "eval_precision": 0.9364616472251679,
458
+ "eval_recall": 0.9717395623204352,
459
+ "eval_runtime": 18.4952,
460
+ "eval_samples_per_second": 62.989,
461
+ "eval_steps_per_second": 3.947,
462
+ "step": 510
463
+ },
464
+ {
465
+ "epoch": 8.16,
466
+ "grad_norm": 0.4465714693069458,
467
+ "learning_rate": 0.000880952380952381,
468
+ "loss": 0.1279,
469
+ "step": 520
470
+ },
471
+ {
472
+ "epoch": 8.31,
473
+ "grad_norm": 0.7344756126403809,
474
+ "learning_rate": 0.0008015873015873017,
475
+ "loss": 0.1408,
476
+ "step": 530
477
+ },
478
+ {
479
+ "epoch": 8.47,
480
+ "grad_norm": 2.2782132625579834,
481
+ "learning_rate": 0.0007222222222222222,
482
+ "loss": 0.134,
483
+ "step": 540
484
+ },
485
+ {
486
+ "epoch": 8.63,
487
+ "grad_norm": 0.8408999443054199,
488
+ "learning_rate": 0.0006428571428571428,
489
+ "loss": 0.1193,
490
+ "step": 550
491
+ },
492
+ {
493
+ "epoch": 8.78,
494
+ "grad_norm": 0.4357975721359253,
495
+ "learning_rate": 0.0005634920634920636,
496
+ "loss": 0.114,
497
+ "step": 560
498
+ },
499
+ {
500
+ "epoch": 8.94,
501
+ "grad_norm": 0.6790524125099182,
502
+ "learning_rate": 0.00048412698412698415,
503
+ "loss": 0.1076,
504
+ "step": 570
505
+ },
506
+ {
507
+ "epoch": 8.99,
508
+ "eval_accuracy": 0.9690987124463519,
509
+ "eval_f1": 0.9612241124260354,
510
+ "eval_loss": 0.09004171937704086,
511
+ "eval_precision": 0.9481984892871503,
512
+ "eval_recall": 0.9769660431566722,
513
+ "eval_runtime": 18.3674,
514
+ "eval_samples_per_second": 63.427,
515
+ "eval_steps_per_second": 3.974,
516
+ "step": 573
517
+ },
518
+ {
519
+ "epoch": 9.1,
520
+ "grad_norm": 0.38498663902282715,
521
+ "learning_rate": 0.0004047619047619048,
522
+ "loss": 0.1174,
523
+ "step": 580
524
+ },
525
+ {
526
+ "epoch": 9.25,
527
+ "grad_norm": 0.7055544853210449,
528
+ "learning_rate": 0.0003253968253968254,
529
+ "loss": 0.0951,
530
+ "step": 590
531
+ },
532
+ {
533
+ "epoch": 9.41,
534
+ "grad_norm": 0.8157325387001038,
535
+ "learning_rate": 0.00024603174603174605,
536
+ "loss": 0.1397,
537
+ "step": 600
538
+ },
539
+ {
540
+ "epoch": 9.57,
541
+ "grad_norm": 0.6511685252189636,
542
+ "learning_rate": 0.00016666666666666666,
543
+ "loss": 0.1296,
544
+ "step": 610
545
+ },
546
+ {
547
+ "epoch": 9.73,
548
+ "grad_norm": 0.5405935049057007,
549
+ "learning_rate": 8.730158730158731e-05,
550
+ "loss": 0.1028,
551
+ "step": 620
552
+ },
553
+ {
554
+ "epoch": 9.88,
555
+ "grad_norm": 0.472501665353775,
556
+ "learning_rate": 7.936507936507936e-06,
557
+ "loss": 0.1188,
558
+ "step": 630
559
+ },
560
+ {
561
+ "epoch": 9.88,
562
+ "eval_accuracy": 0.9622317596566523,
563
+ "eval_f1": 0.9529697156530292,
564
+ "eval_loss": 0.10635051876306534,
565
+ "eval_precision": 0.9377354615755036,
566
+ "eval_recall": 0.9723202824133504,
567
+ "eval_runtime": 18.4162,
568
+ "eval_samples_per_second": 63.26,
569
+ "eval_steps_per_second": 3.964,
570
+ "step": 630
571
+ },
572
+ {
573
+ "epoch": 9.88,
574
+ "step": 630,
575
+ "total_flos": 7.138406630605308e+18,
576
+ "train_loss": 0.19363035304205758,
577
+ "train_runtime": 1305.2945,
578
+ "train_samples_per_second": 31.234,
579
+ "train_steps_per_second": 0.483
580
+ }
581
+ ],
582
+ "logging_steps": 10,
583
+ "max_steps": 630,
584
+ "num_input_tokens_seen": 0,
585
+ "num_train_epochs": 10,
586
+ "save_steps": 500,
587
+ "total_flos": 7.138406630605308e+18,
588
+ "train_batch_size": 16,
589
+ "trial_name": null,
590
+ "trial_params": null
591
+ }