nguyenkhoa commited on
Commit
1890f3d
·
verified ·
1 Parent(s): 326c33b

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +225 -355
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "total_flos": 1.9922182705152e+18,
4
- "train_loss": 0.11038042506926758,
5
- "train_runtime": 7101.5946,
6
- "train_samples_per_second": 56.325,
7
- "train_steps_per_second": 0.111
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "total_flos": 3.78521471397888e+18,
4
+ "train_loss": 0.055960290434986654,
5
+ "train_runtime": 18290.5686,
6
+ "train_samples_per_second": 41.551,
7
+ "train_steps_per_second": 0.054
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "total_flos": 1.9922182705152e+18,
4
- "train_loss": 0.11038042506926758,
5
- "train_runtime": 7101.5946,
6
- "train_samples_per_second": 56.325,
7
- "train_steps_per_second": 0.111
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "total_flos": 3.78521471397888e+18,
4
+ "train_loss": 0.055960290434986654,
5
+ "train_runtime": 18290.5686,
6
+ "train_samples_per_second": 41.551,
7
+ "train_steps_per_second": 0.054
8
  }
trainer_state.json CHANGED
@@ -1,443 +1,313 @@
1
  {
2
- "best_metric": 0.05071697756648064,
3
- "best_model_checkpoint": "./dinov2_Liveness_detection_v2.2.1/checkpoint-512",
4
  "epoch": 5.0,
5
- "eval_steps": 128,
6
- "global_step": 785,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.10191082802547771,
13
- "grad_norm": 3.0143625736236572,
14
- "learning_rate": 4.923566878980892e-05,
15
- "loss": 1.8573,
16
- "step": 16
17
- },
18
- {
19
- "epoch": 0.20382165605095542,
20
- "grad_norm": 13.854424476623535,
21
- "learning_rate": 4.821656050955414e-05,
22
- "loss": 0.4389,
23
- "step": 32
24
- },
25
- {
26
- "epoch": 0.3057324840764331,
27
- "grad_norm": 57.04545593261719,
28
- "learning_rate": 4.7197452229299366e-05,
29
- "loss": 0.3386,
30
- "step": 48
31
- },
32
- {
33
- "epoch": 0.40764331210191085,
34
- "grad_norm": 26.68746566772461,
35
- "learning_rate": 4.617834394904459e-05,
36
- "loss": 0.3205,
37
  "step": 64
38
  },
39
  {
40
- "epoch": 0.5095541401273885,
41
- "grad_norm": 28.5941219329834,
42
- "learning_rate": 4.5159235668789814e-05,
43
- "loss": 0.2216,
44
- "step": 80
45
- },
46
- {
47
- "epoch": 0.6114649681528662,
48
- "grad_norm": 7.417737007141113,
49
- "learning_rate": 4.414012738853504e-05,
50
- "loss": 0.1841,
51
- "step": 96
52
- },
53
- {
54
- "epoch": 0.7133757961783439,
55
- "grad_norm": 15.305898666381836,
56
- "learning_rate": 4.312101910828026e-05,
57
- "loss": 0.2161,
58
- "step": 112
59
  },
60
  {
61
- "epoch": 0.8152866242038217,
62
- "grad_norm": 65.58370208740234,
63
- "learning_rate": 4.210191082802548e-05,
64
- "loss": 0.183,
65
  "step": 128
66
  },
67
  {
68
- "epoch": 0.8152866242038217,
69
- "eval_accuracy": 0.9016,
70
- "eval_f1": 0.9039152443229508,
71
- "eval_loss": 0.24732650816440582,
72
- "eval_precision": 0.9122675566006839,
73
- "eval_recall": 0.9016,
74
- "eval_runtime": 285.4094,
75
- "eval_samples_per_second": 70.075,
76
- "eval_steps_per_second": 8.759,
77
  "step": 128
78
  },
79
  {
80
- "epoch": 0.9171974522292994,
81
- "grad_norm": 13.938675880432129,
82
- "learning_rate": 4.10828025477707e-05,
83
- "loss": 0.1348,
84
- "step": 144
85
- },
86
- {
87
- "epoch": 1.019108280254777,
88
- "grad_norm": 7.33504056930542,
89
- "learning_rate": 4.0063694267515926e-05,
90
- "loss": 0.1039,
91
- "step": 160
92
- },
93
- {
94
- "epoch": 1.1210191082802548,
95
- "grad_norm": 29.384906768798828,
96
- "learning_rate": 3.904458598726115e-05,
97
- "loss": 0.1084,
98
- "step": 176
99
- },
100
- {
101
- "epoch": 1.2229299363057324,
102
- "grad_norm": 14.105592727661133,
103
- "learning_rate": 3.8025477707006374e-05,
104
- "loss": 0.0855,
105
  "step": 192
106
  },
107
  {
108
- "epoch": 1.3248407643312101,
109
- "grad_norm": 4.494396209716797,
110
- "learning_rate": 3.700636942675159e-05,
111
- "loss": 0.1292,
112
- "step": 208
113
- },
114
- {
115
- "epoch": 1.426751592356688,
116
- "grad_norm": 15.541342735290527,
117
- "learning_rate": 3.5987261146496815e-05,
118
- "loss": 0.1187,
119
- "step": 224
120
- },
121
- {
122
- "epoch": 1.5286624203821657,
123
- "grad_norm": 37.96527862548828,
124
- "learning_rate": 3.496815286624204e-05,
125
- "loss": 0.0875,
126
- "step": 240
127
  },
128
  {
129
- "epoch": 1.6305732484076434,
130
- "grad_norm": 8.300914764404297,
131
- "learning_rate": 3.394904458598726e-05,
132
- "loss": 0.1022,
133
  "step": 256
134
  },
135
  {
136
- "epoch": 1.6305732484076434,
137
- "eval_accuracy": 0.9729,
138
- "eval_f1": 0.9727317036154979,
139
- "eval_loss": 0.07502331584692001,
140
- "eval_precision": 0.9737473833915566,
141
- "eval_recall": 0.9729,
142
- "eval_runtime": 271.7966,
143
- "eval_samples_per_second": 73.584,
144
- "eval_steps_per_second": 9.198,
145
  "step": 256
146
  },
147
  {
148
- "epoch": 1.732484076433121,
149
- "grad_norm": 5.855906009674072,
150
- "learning_rate": 3.2929936305732486e-05,
151
- "loss": 0.0579,
152
- "step": 272
153
- },
154
- {
155
- "epoch": 1.8343949044585988,
156
- "grad_norm": 6.417220115661621,
157
- "learning_rate": 3.191082802547771e-05,
158
- "loss": 0.0656,
159
- "step": 288
160
- },
161
- {
162
- "epoch": 1.9363057324840764,
163
- "grad_norm": 5.988188743591309,
164
- "learning_rate": 3.089171974522293e-05,
165
- "loss": 0.0536,
166
- "step": 304
167
- },
168
- {
169
- "epoch": 2.038216560509554,
170
- "grad_norm": 11.449820518493652,
171
- "learning_rate": 2.9872611464968155e-05,
172
- "loss": 0.05,
173
  "step": 320
174
  },
175
  {
176
- "epoch": 2.140127388535032,
177
- "grad_norm": 8.527064323425293,
178
- "learning_rate": 2.8853503184713375e-05,
179
- "loss": 0.0399,
180
- "step": 336
181
- },
182
- {
183
- "epoch": 2.2420382165605095,
184
- "grad_norm": 5.836983680725098,
185
- "learning_rate": 2.78343949044586e-05,
186
- "loss": 0.0518,
187
- "step": 352
188
- },
189
- {
190
- "epoch": 2.343949044585987,
191
- "grad_norm": 26.618896484375,
192
- "learning_rate": 2.6815286624203823e-05,
193
- "loss": 0.0372,
194
- "step": 368
195
  },
196
  {
197
- "epoch": 2.445859872611465,
198
- "grad_norm": 8.455528259277344,
199
- "learning_rate": 2.5796178343949047e-05,
200
- "loss": 0.0432,
201
  "step": 384
202
  },
203
  {
204
- "epoch": 2.445859872611465,
205
- "eval_accuracy": 0.98205,
206
- "eval_f1": 0.9819928803546507,
207
- "eval_loss": 0.05750665441155434,
208
- "eval_precision": 0.9823073850700162,
209
- "eval_recall": 0.98205,
210
- "eval_runtime": 276.5019,
211
- "eval_samples_per_second": 72.332,
212
- "eval_steps_per_second": 9.042,
213
  "step": 384
214
  },
215
  {
216
- "epoch": 2.5477707006369426,
217
- "grad_norm": 15.16251277923584,
218
- "learning_rate": 2.477707006369427e-05,
219
- "loss": 0.0592,
220
- "step": 400
221
- },
222
- {
223
- "epoch": 2.6496815286624202,
224
- "grad_norm": 5.268926620483398,
225
- "learning_rate": 2.375796178343949e-05,
226
- "loss": 0.0323,
227
- "step": 416
228
- },
229
- {
230
- "epoch": 2.7515923566878984,
231
- "grad_norm": 5.213953018188477,
232
- "learning_rate": 2.2738853503184715e-05,
233
- "loss": 0.0247,
234
- "step": 432
235
- },
236
- {
237
- "epoch": 2.853503184713376,
238
- "grad_norm": 4.638315677642822,
239
- "learning_rate": 2.171974522292994e-05,
240
- "loss": 0.0306,
241
  "step": 448
242
  },
243
  {
244
- "epoch": 2.9554140127388537,
245
- "grad_norm": 6.414985179901123,
246
- "learning_rate": 2.0700636942675162e-05,
247
- "loss": 0.0302,
248
- "step": 464
249
- },
250
- {
251
- "epoch": 3.0573248407643314,
252
- "grad_norm": 4.792835235595703,
253
- "learning_rate": 1.9681528662420383e-05,
254
- "loss": 0.0239,
255
- "step": 480
256
- },
257
- {
258
- "epoch": 3.159235668789809,
259
- "grad_norm": 3.82145357131958,
260
- "learning_rate": 1.8662420382165603e-05,
261
- "loss": 0.016,
262
- "step": 496
263
  },
264
  {
265
- "epoch": 3.261146496815287,
266
- "grad_norm": 6.392252445220947,
267
- "learning_rate": 1.7643312101910827e-05,
268
- "loss": 0.0247,
269
  "step": 512
270
  },
271
  {
272
- "epoch": 3.261146496815287,
273
- "eval_accuracy": 0.9832,
274
- "eval_f1": 0.9831797846161753,
275
- "eval_loss": 0.05071697756648064,
276
- "eval_precision": 0.983261693079801,
277
- "eval_recall": 0.9832,
278
- "eval_runtime": 271.679,
279
- "eval_samples_per_second": 73.616,
280
- "eval_steps_per_second": 9.202,
281
  "step": 512
282
  },
283
  {
284
- "epoch": 3.3630573248407645,
285
- "grad_norm": 7.162614345550537,
286
- "learning_rate": 1.662420382165605e-05,
287
- "loss": 0.0135,
288
- "step": 528
289
- },
290
- {
291
- "epoch": 3.464968152866242,
292
- "grad_norm": 3.0641708374023438,
293
- "learning_rate": 1.5605095541401275e-05,
294
- "loss": 0.0172,
295
- "step": 544
296
- },
297
- {
298
- "epoch": 3.56687898089172,
299
- "grad_norm": 11.316327095031738,
300
- "learning_rate": 1.4585987261146497e-05,
301
- "loss": 0.0129,
302
- "step": 560
303
- },
304
- {
305
- "epoch": 3.6687898089171975,
306
- "grad_norm": 9.987749099731445,
307
- "learning_rate": 1.356687898089172e-05,
308
- "loss": 0.0087,
309
  "step": 576
310
  },
311
  {
312
- "epoch": 3.770700636942675,
313
- "grad_norm": 1.408227562904358,
314
- "learning_rate": 1.2547770700636943e-05,
315
- "loss": 0.0145,
316
- "step": 592
317
- },
318
- {
319
- "epoch": 3.872611464968153,
320
- "grad_norm": 4.08975076675415,
321
- "learning_rate": 1.1528662420382167e-05,
322
- "loss": 0.0169,
323
- "step": 608
324
- },
325
- {
326
- "epoch": 3.9745222929936306,
327
- "grad_norm": 11.745902061462402,
328
- "learning_rate": 1.0509554140127389e-05,
329
- "loss": 0.0122,
330
- "step": 624
331
  },
332
  {
333
- "epoch": 4.076433121019108,
334
- "grad_norm": 1.0680924654006958,
335
- "learning_rate": 9.490445859872613e-06,
336
- "loss": 0.0115,
337
  "step": 640
338
  },
339
  {
340
- "epoch": 4.076433121019108,
341
- "eval_accuracy": 0.98645,
342
- "eval_f1": 0.9864145632656558,
343
- "eval_loss": 0.053590141236782074,
344
- "eval_precision": 0.9866191209109798,
345
- "eval_recall": 0.98645,
346
- "eval_runtime": 270.7038,
347
- "eval_samples_per_second": 73.881,
348
- "eval_steps_per_second": 9.235,
349
  "step": 640
350
  },
351
  {
352
- "epoch": 4.178343949044586,
353
- "grad_norm": 7.269558429718018,
354
- "learning_rate": 8.471337579617835e-06,
355
- "loss": 0.0092,
356
- "step": 656
357
  },
358
  {
359
- "epoch": 4.280254777070064,
360
- "grad_norm": 4.04400110244751,
361
- "learning_rate": 7.452229299363057e-06,
362
- "loss": 0.0065,
363
- "step": 672
 
 
 
 
 
364
  },
365
  {
366
- "epoch": 4.382165605095541,
367
- "grad_norm": 0.2636276185512543,
368
- "learning_rate": 6.433121019108281e-06,
369
- "loss": 0.0028,
370
- "step": 688
371
  },
372
  {
373
- "epoch": 4.484076433121019,
374
- "grad_norm": 11.116532325744629,
375
- "learning_rate": 5.414012738853504e-06,
376
- "loss": 0.0033,
377
- "step": 704
 
 
 
 
 
378
  },
379
  {
380
- "epoch": 4.585987261146497,
381
- "grad_norm": 0.07053454965353012,
382
- "learning_rate": 4.394904458598726e-06,
383
- "loss": 0.0023,
384
- "step": 720
385
  },
386
  {
387
- "epoch": 4.687898089171974,
388
- "grad_norm": 2.811331033706665,
389
- "learning_rate": 3.375796178343949e-06,
390
- "loss": 0.0057,
391
- "step": 736
 
 
 
 
 
392
  },
393
  {
394
- "epoch": 4.789808917197452,
395
- "grad_norm": 0.1817609667778015,
396
- "learning_rate": 2.3566878980891724e-06,
397
- "loss": 0.0043,
398
- "step": 752
399
  },
400
  {
401
- "epoch": 4.89171974522293,
402
- "grad_norm": 0.5368472933769226,
403
- "learning_rate": 1.337579617834395e-06,
404
- "loss": 0.002,
405
- "step": 768
 
 
 
 
 
406
  },
407
  {
408
- "epoch": 4.89171974522293,
409
- "eval_accuracy": 0.98685,
410
- "eval_f1": 0.9868193085416297,
411
- "eval_loss": 0.06709764152765274,
412
- "eval_precision": 0.9869900769633103,
413
- "eval_recall": 0.98685,
414
- "eval_runtime": 275.4886,
415
- "eval_samples_per_second": 72.598,
416
- "eval_steps_per_second": 9.075,
417
- "step": 768
418
  },
419
  {
420
- "epoch": 4.993630573248407,
421
- "grad_norm": 2.216209888458252,
422
- "learning_rate": 3.1847133757961787e-07,
423
- "loss": 0.0012,
424
- "step": 784
 
 
 
 
 
425
  },
426
  {
427
  "epoch": 5.0,
428
- "step": 785,
429
- "total_flos": 1.9922182705152e+18,
430
- "train_loss": 0.11038042506926758,
431
- "train_runtime": 7101.5946,
432
- "train_samples_per_second": 56.325,
433
- "train_steps_per_second": 0.111
434
  }
435
  ],
436
- "logging_steps": 16,
437
- "max_steps": 785,
438
  "num_input_tokens_seen": 0,
439
  "num_train_epochs": 5,
440
- "save_steps": 128,
441
  "stateful_callbacks": {
442
  "TrainerControl": {
443
  "args": {
@@ -450,8 +320,8 @@
450
  "attributes": {}
451
  }
452
  },
453
- "total_flos": 1.9922182705152e+18,
454
- "train_batch_size": 512,
455
  "trial_name": null,
456
  "trial_params": null
457
  }
 
1
  {
2
+ "best_metric": 0.030054476112127304,
3
+ "best_model_checkpoint": "./dinov2_Liveness_detection_v2.2.1/checkpoint-960",
4
  "epoch": 5.0,
5
+ "eval_steps": 64,
6
+ "global_step": 990,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.32323232323232326,
13
+ "grad_norm": 5.1993489265441895,
14
+ "learning_rate": 4.696969696969697e-05,
15
+ "loss": 0.4052,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 64
17
  },
18
  {
19
+ "epoch": 0.32323232323232326,
20
+ "eval_accuracy": 0.9712097686781231,
21
+ "eval_f1": 0.9713372966580719,
22
+ "eval_loss": 0.07746175676584244,
23
+ "eval_precision": 0.9712492471516934,
24
+ "eval_recall": 0.9712097686781231,
25
+ "eval_runtime": 513.7785,
26
+ "eval_samples_per_second": 73.962,
27
+ "eval_steps_per_second": 9.245,
28
+ "step": 64
 
 
 
 
 
 
 
 
 
29
  },
30
  {
31
+ "epoch": 0.6464646464646465,
32
+ "grad_norm": 4.368253231048584,
33
+ "learning_rate": 4.3737373737373736e-05,
34
+ "loss": 0.0784,
35
  "step": 128
36
  },
37
  {
38
+ "epoch": 0.6464646464646465,
39
+ "eval_accuracy": 0.9802889549724992,
40
+ "eval_f1": 0.980265299265069,
41
+ "eval_loss": 0.054525312036275864,
42
+ "eval_precision": 0.9803559157542755,
43
+ "eval_recall": 0.9802889549724992,
44
+ "eval_runtime": 493.8537,
45
+ "eval_samples_per_second": 76.946,
46
+ "eval_steps_per_second": 9.618,
47
  "step": 128
48
  },
49
  {
50
+ "epoch": 0.9696969696969697,
51
+ "grad_norm": 6.4717583656311035,
52
+ "learning_rate": 4.0505050505050506e-05,
53
+ "loss": 0.0639,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  "step": 192
55
  },
56
  {
57
+ "epoch": 0.9696969696969697,
58
+ "eval_accuracy": 0.9771046606489644,
59
+ "eval_f1": 0.9772131122819654,
60
+ "eval_loss": 0.0614825114607811,
61
+ "eval_precision": 0.9772293201056353,
62
+ "eval_recall": 0.9771046606489644,
63
+ "eval_runtime": 494.1919,
64
+ "eval_samples_per_second": 76.893,
65
+ "eval_steps_per_second": 9.612,
66
+ "step": 192
 
 
 
 
 
 
 
 
 
67
  },
68
  {
69
+ "epoch": 1.2929292929292928,
70
+ "grad_norm": 5.717361927032471,
71
+ "learning_rate": 3.7272727272727276e-05,
72
+ "loss": 0.0479,
73
  "step": 256
74
  },
75
  {
76
+ "epoch": 1.2929292929292928,
77
+ "eval_accuracy": 0.9794994605121188,
78
+ "eval_f1": 0.9793994612943837,
79
+ "eval_loss": 0.057158615440130234,
80
+ "eval_precision": 0.9800037878399945,
81
+ "eval_recall": 0.9794994605121188,
82
+ "eval_runtime": 451.5225,
83
+ "eval_samples_per_second": 84.16,
84
+ "eval_steps_per_second": 10.52,
85
  "step": 256
86
  },
87
  {
88
+ "epoch": 1.6161616161616161,
89
+ "grad_norm": 7.240455627441406,
90
+ "learning_rate": 3.4040404040404045e-05,
91
+ "loss": 0.0439,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  "step": 320
93
  },
94
  {
95
+ "epoch": 1.6161616161616161,
96
+ "eval_accuracy": 0.984394326166478,
97
+ "eval_f1": 0.984415214874111,
98
+ "eval_loss": 0.042214084416627884,
99
+ "eval_precision": 0.9843782986693216,
100
+ "eval_recall": 0.984394326166478,
101
+ "eval_runtime": 528.4934,
102
+ "eval_samples_per_second": 71.903,
103
+ "eval_steps_per_second": 8.988,
104
+ "step": 320
 
 
 
 
 
 
 
 
 
105
  },
106
  {
107
+ "epoch": 1.9393939393939394,
108
+ "grad_norm": 3.2924530506134033,
109
+ "learning_rate": 3.080808080808081e-05,
110
+ "loss": 0.0392,
111
  "step": 384
112
  },
113
  {
114
+ "epoch": 1.9393939393939394,
115
+ "eval_accuracy": 0.9802626384904866,
116
+ "eval_f1": 0.9801401584145893,
117
+ "eval_loss": 0.05642416328191757,
118
+ "eval_precision": 0.980989991407714,
119
+ "eval_recall": 0.9802626384904866,
120
+ "eval_runtime": 541.8272,
121
+ "eval_samples_per_second": 70.133,
122
+ "eval_steps_per_second": 8.767,
123
  "step": 384
124
  },
125
  {
126
+ "epoch": 2.2626262626262625,
127
+ "grad_norm": 5.49556827545166,
128
+ "learning_rate": 2.7575757575757578e-05,
129
+ "loss": 0.0374,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  "step": 448
131
  },
132
  {
133
+ "epoch": 2.2626262626262625,
134
+ "eval_accuracy": 0.9837100976341483,
135
+ "eval_f1": 0.9837088988305677,
136
+ "eval_loss": 0.04635660722851753,
137
+ "eval_precision": 0.983712570988981,
138
+ "eval_recall": 0.9837100976341483,
139
+ "eval_runtime": 523.4635,
140
+ "eval_samples_per_second": 72.593,
141
+ "eval_steps_per_second": 9.074,
142
+ "step": 448
 
 
 
 
 
 
 
 
 
143
  },
144
  {
145
+ "epoch": 2.5858585858585856,
146
+ "grad_norm": 7.576784133911133,
147
+ "learning_rate": 2.4343434343434344e-05,
148
+ "loss": 0.0273,
149
  "step": 512
150
  },
151
  {
152
+ "epoch": 2.5858585858585856,
153
+ "eval_accuracy": 0.9861048974973026,
154
+ "eval_f1": 0.9861281800402698,
155
+ "eval_loss": 0.03779837116599083,
156
+ "eval_precision": 0.9860990328572948,
157
+ "eval_recall": 0.9861048974973026,
158
+ "eval_runtime": 526.8186,
159
+ "eval_samples_per_second": 72.131,
160
+ "eval_steps_per_second": 9.016,
161
  "step": 512
162
  },
163
  {
164
+ "epoch": 2.909090909090909,
165
+ "grad_norm": 8.414101600646973,
166
+ "learning_rate": 2.111111111111111e-05,
167
+ "loss": 0.0271,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  "step": 576
169
  },
170
  {
171
+ "epoch": 2.909090909090909,
172
+ "eval_accuracy": 0.9883417984683808,
173
+ "eval_f1": 0.9883335392334167,
174
+ "eval_loss": 0.03357573598623276,
175
+ "eval_precision": 0.988365298180521,
176
+ "eval_recall": 0.9883417984683808,
177
+ "eval_runtime": 528.7169,
178
+ "eval_samples_per_second": 71.872,
179
+ "eval_steps_per_second": 8.984,
180
+ "step": 576
 
 
 
 
 
 
 
 
 
181
  },
182
  {
183
+ "epoch": 3.2323232323232323,
184
+ "grad_norm": 4.41958475112915,
185
+ "learning_rate": 1.787878787878788e-05,
186
+ "loss": 0.021,
187
  "step": 640
188
  },
189
  {
190
+ "epoch": 3.2323232323232323,
191
+ "eval_accuracy": 0.985894365641201,
192
+ "eval_f1": 0.9859268508760648,
193
+ "eval_loss": 0.04180537536740303,
194
+ "eval_precision": 0.9859071918052413,
195
+ "eval_recall": 0.985894365641201,
196
+ "eval_runtime": 523.9915,
197
+ "eval_samples_per_second": 72.52,
198
+ "eval_steps_per_second": 9.065,
199
  "step": 640
200
  },
201
  {
202
+ "epoch": 3.5555555555555554,
203
+ "grad_norm": 10.126025199890137,
204
+ "learning_rate": 1.4646464646464647e-05,
205
+ "loss": 0.019,
206
+ "step": 704
207
  },
208
  {
209
+ "epoch": 3.5555555555555554,
210
+ "eval_accuracy": 0.9848417063606937,
211
+ "eval_f1": 0.9848982600089184,
212
+ "eval_loss": 0.04540397599339485,
213
+ "eval_precision": 0.9849383014159727,
214
+ "eval_recall": 0.9848417063606937,
215
+ "eval_runtime": 525.5117,
216
+ "eval_samples_per_second": 72.31,
217
+ "eval_steps_per_second": 9.039,
218
+ "step": 704
219
  },
220
  {
221
+ "epoch": 3.878787878787879,
222
+ "grad_norm": 3.757537603378296,
223
+ "learning_rate": 1.1414141414141415e-05,
224
+ "loss": 0.0177,
225
+ "step": 768
226
  },
227
  {
228
+ "epoch": 3.878787878787879,
229
+ "eval_accuracy": 0.9883154819863681,
230
+ "eval_f1": 0.9883223194841958,
231
+ "eval_loss": 0.03590450435876846,
232
+ "eval_precision": 0.9883066841617396,
233
+ "eval_recall": 0.9883154819863681,
234
+ "eval_runtime": 522.9339,
235
+ "eval_samples_per_second": 72.667,
236
+ "eval_steps_per_second": 9.083,
237
+ "step": 768
238
  },
239
  {
240
+ "epoch": 4.202020202020202,
241
+ "grad_norm": 4.959455966949463,
242
+ "learning_rate": 8.181818181818183e-06,
243
+ "loss": 0.0134,
244
+ "step": 832
245
  },
246
  {
247
+ "epoch": 4.202020202020202,
248
+ "eval_accuracy": 0.9874470380799495,
249
+ "eval_f1": 0.987400014046709,
250
+ "eval_loss": 0.04097573831677437,
251
+ "eval_precision": 0.9877259518894549,
252
+ "eval_recall": 0.9874470380799495,
253
+ "eval_runtime": 521.4997,
254
+ "eval_samples_per_second": 72.867,
255
+ "eval_steps_per_second": 9.108,
256
+ "step": 832
257
  },
258
  {
259
+ "epoch": 4.525252525252525,
260
+ "grad_norm": 3.8119397163391113,
261
+ "learning_rate": 4.949494949494949e-06,
262
+ "loss": 0.0102,
263
+ "step": 896
264
  },
265
  {
266
+ "epoch": 4.525252525252525,
267
+ "eval_accuracy": 0.9909997631516619,
268
+ "eval_f1": 0.9910005190250708,
269
+ "eval_loss": 0.031363558024168015,
270
+ "eval_precision": 0.9909983512838277,
271
+ "eval_recall": 0.9909997631516619,
272
+ "eval_runtime": 523.3329,
273
+ "eval_samples_per_second": 72.612,
274
+ "eval_steps_per_second": 9.076,
275
+ "step": 896
276
  },
277
  {
278
+ "epoch": 4.848484848484849,
279
+ "grad_norm": 3.6070823669433594,
280
+ "learning_rate": 1.7171717171717171e-06,
281
+ "loss": 0.0103,
282
+ "step": 960
 
 
 
 
 
283
  },
284
  {
285
+ "epoch": 4.848484848484849,
286
+ "eval_accuracy": 0.9909997631516619,
287
+ "eval_f1": 0.9909972985219008,
288
+ "eval_loss": 0.030054476112127304,
289
+ "eval_precision": 0.9910057473707247,
290
+ "eval_recall": 0.9909997631516619,
291
+ "eval_runtime": 534.5024,
292
+ "eval_samples_per_second": 71.094,
293
+ "eval_steps_per_second": 8.887,
294
+ "step": 960
295
  },
296
  {
297
  "epoch": 5.0,
298
+ "step": 990,
299
+ "total_flos": 3.78521471397888e+18,
300
+ "train_loss": 0.055960290434986654,
301
+ "train_runtime": 18290.5686,
302
+ "train_samples_per_second": 41.551,
303
+ "train_steps_per_second": 0.054
304
  }
305
  ],
306
+ "logging_steps": 64,
307
+ "max_steps": 990,
308
  "num_input_tokens_seen": 0,
309
  "num_train_epochs": 5,
310
+ "save_steps": 64,
311
  "stateful_callbacks": {
312
  "TrainerControl": {
313
  "args": {
 
320
  "attributes": {}
321
  }
322
  },
323
+ "total_flos": 3.78521471397888e+18,
324
+ "train_batch_size": 768,
325
  "trial_name": null,
326
  "trial_params": null
327
  }