synergyai-jaeung commited on
Commit
aa7fae6
1 Parent(s): 3633e3f

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_AUC": 0.5354853273137697,
4
- "eval_F1": 0.5271698859516118,
5
- "eval_Precision": 0.8450433108758422,
6
- "eval_Recall": 0.9909706546275395,
7
- "eval_accuracy": 0.8407163053722903,
8
- "eval_loss": 0.42085567116737366,
9
- "eval_runtime": 11.8095,
10
- "eval_samples_per_second": 89.843,
11
- "eval_steps_per_second": 5.673,
12
- "total_flos": 3.2879851193471386e+19,
13
- "train_loss": 0.05622970362024654,
14
- "train_runtime": 7456.0224,
15
- "train_samples_per_second": 56.907,
16
- "train_steps_per_second": 3.568
17
  }
 
1
  {
2
+ "epoch": 12.0,
3
+ "eval_AUC": 0.9545454545454545,
4
+ "eval_F1": 0.9735863095238095,
5
+ "eval_Precision": 0.9896373056994818,
6
+ "eval_Recall": 1.0,
7
+ "eval_accuracy": 0.9906103286384976,
8
+ "eval_loss": 0.048169028013944626,
9
+ "eval_runtime": 4.3355,
10
+ "eval_samples_per_second": 49.129,
11
+ "eval_steps_per_second": 3.229,
12
+ "total_flos": 7.894883901934633e+17,
13
+ "train_loss": 0.09885871769101531,
14
+ "train_runtime": 224.7245,
15
+ "train_samples_per_second": 45.336,
16
+ "train_steps_per_second": 2.884
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_AUC": 0.5354853273137697,
4
- "eval_F1": 0.5271698859516118,
5
- "eval_Precision": 0.8450433108758422,
6
- "eval_Recall": 0.9909706546275395,
7
- "eval_accuracy": 0.8407163053722903,
8
- "eval_loss": 0.42085567116737366,
9
- "eval_runtime": 11.8095,
10
- "eval_samples_per_second": 89.843,
11
- "eval_steps_per_second": 5.673
12
  }
 
1
  {
2
+ "epoch": 12.0,
3
+ "eval_AUC": 0.9545454545454545,
4
+ "eval_F1": 0.9735863095238095,
5
+ "eval_Precision": 0.9896373056994818,
6
+ "eval_Recall": 1.0,
7
+ "eval_accuracy": 0.9906103286384976,
8
+ "eval_loss": 0.048169028013944626,
9
+ "eval_runtime": 4.3355,
10
+ "eval_samples_per_second": 49.129,
11
+ "eval_steps_per_second": 3.229
12
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07ad263c40de08805f0ed7a7346413962954d48906476b582b3387e7d1c08190
3
  size 343223968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af75b5d80d0a947cbd9ccf5fce3cf17bcc53c8b31f24138f4c52358b89462ed7
3
  size 343223968
runs/May28_13-21-32_RTX3090/events.out.tfevents.1716870364.RTX3090.312878.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea0c02ec4c231738dccbe81706e8bdee921bd1a6353b4dcfb38784072d734295
3
+ size 607
runs/May28_14-57-40_RTX3090/events.out.tfevents.1716875862.RTX3090.403009.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7ff672a01754c2181d5bc35fdf6a89a182590e6a44bc0294462fdc674b2a348
3
+ size 5634
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 100.0,
3
- "total_flos": 3.2879851193471386e+19,
4
- "train_loss": 0.05622970362024654,
5
- "train_runtime": 7456.0224,
6
- "train_samples_per_second": 56.907,
7
- "train_steps_per_second": 3.568
8
  }
 
1
  {
2
+ "epoch": 12.0,
3
+ "total_flos": 7.894883901934633e+17,
4
+ "train_loss": 0.09885871769101531,
5
+ "train_runtime": 224.7245,
6
+ "train_samples_per_second": 45.336,
7
+ "train_steps_per_second": 2.884
8
  }
trainer_state.json CHANGED
@@ -1,1705 +1,197 @@
1
  {
2
- "best_metric": 0.42085567116737366,
3
- "best_model_checkpoint": "google/vit-base-patch16-224-in21k_covid_19_ct_scans/checkpoint-2394",
4
- "epoch": 100.0,
5
  "eval_steps": 500,
6
- "global_step": 26600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0037593984962406013,
13
- "grad_norm": 3.3277029991149902,
14
- "learning_rate": 0.00019999248120300753,
15
- "loss": 0.768,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_AUC": 0.5,
21
- "eval_F1": 0.45505906522855677,
22
- "eval_Precision": 0.8350612629594723,
23
  "eval_Recall": 1.0,
24
- "eval_accuracy": 0.8350612629594723,
25
- "eval_loss": 0.45460373163223267,
26
- "eval_runtime": 23.809,
27
- "eval_samples_per_second": 44.563,
28
- "eval_steps_per_second": 2.814,
29
- "step": 266
30
- },
31
- {
32
- "epoch": 1.8796992481203008,
33
- "grad_norm": 0.9518311023712158,
34
- "learning_rate": 0.0001962406015037594,
35
- "loss": 0.4516,
36
- "step": 500
37
  },
38
  {
39
  "epoch": 2.0,
40
  "eval_AUC": 0.5,
41
- "eval_F1": 0.45505906522855677,
42
- "eval_Precision": 0.8350612629594723,
43
  "eval_Recall": 1.0,
44
- "eval_accuracy": 0.8350612629594723,
45
- "eval_loss": 0.449796199798584,
46
- "eval_runtime": 12.1544,
47
- "eval_samples_per_second": 87.293,
48
- "eval_steps_per_second": 5.512,
49
- "step": 532
50
  },
51
  {
52
  "epoch": 3.0,
53
- "eval_AUC": 0.5,
54
- "eval_F1": 0.45505906522855677,
55
- "eval_Precision": 0.8350612629594723,
56
  "eval_Recall": 1.0,
57
- "eval_accuracy": 0.8350612629594723,
58
- "eval_loss": 0.4491786062717438,
59
- "eval_runtime": 12.1586,
60
- "eval_samples_per_second": 87.263,
61
- "eval_steps_per_second": 5.51,
62
- "step": 798
63
- },
64
- {
65
- "epoch": 3.7593984962406015,
66
- "grad_norm": 0.6577199101448059,
67
- "learning_rate": 0.0001924812030075188,
68
- "loss": 0.4521,
69
- "step": 1000
70
  },
71
  {
72
  "epoch": 4.0,
73
- "eval_AUC": 0.5,
74
- "eval_F1": 0.45505906522855677,
75
- "eval_Precision": 0.8350612629594723,
76
  "eval_Recall": 1.0,
77
- "eval_accuracy": 0.8350612629594723,
78
- "eval_loss": 0.44855841994285583,
79
- "eval_runtime": 12.3395,
80
- "eval_samples_per_second": 85.984,
81
- "eval_steps_per_second": 5.43,
82
- "step": 1064
83
  },
84
  {
85
  "epoch": 5.0,
86
- "eval_AUC": 0.5,
87
- "eval_F1": 0.45505906522855677,
88
- "eval_Precision": 0.8350612629594723,
89
  "eval_Recall": 1.0,
90
- "eval_accuracy": 0.8350612629594723,
91
- "eval_loss": 0.44568774104118347,
92
- "eval_runtime": 12.3116,
93
- "eval_samples_per_second": 86.179,
94
- "eval_steps_per_second": 5.442,
95
- "step": 1330
96
- },
97
- {
98
- "epoch": 5.639097744360902,
99
- "grad_norm": 0.03062070906162262,
100
- "learning_rate": 0.00018872180451127822,
101
- "loss": 0.4415,
102
- "step": 1500
103
  },
104
  {
105
  "epoch": 6.0,
106
- "eval_AUC": 0.5,
107
- "eval_F1": 0.45505906522855677,
108
- "eval_Precision": 0.8350612629594723,
109
- "eval_Recall": 1.0,
110
- "eval_accuracy": 0.8350612629594723,
111
- "eval_loss": 0.4421917796134949,
112
- "eval_runtime": 11.8255,
113
- "eval_samples_per_second": 89.721,
114
- "eval_steps_per_second": 5.666,
115
- "step": 1596
116
  },
117
  {
118
  "epoch": 7.0,
119
- "eval_AUC": 0.5,
120
- "eval_F1": 0.45505906522855677,
121
- "eval_Precision": 0.8350612629594723,
122
- "eval_Recall": 1.0,
123
- "eval_accuracy": 0.8350612629594723,
124
- "eval_loss": 0.42494845390319824,
125
- "eval_runtime": 11.7622,
126
- "eval_samples_per_second": 90.204,
127
- "eval_steps_per_second": 5.696,
128
- "step": 1862
129
- },
130
- {
131
- "epoch": 7.518796992481203,
132
- "grad_norm": 0.4491100311279297,
133
- "learning_rate": 0.0001849624060150376,
134
- "loss": 0.4344,
135
- "step": 2000
136
  },
137
  {
138
  "epoch": 8.0,
139
- "eval_AUC": 0.5183424701709126,
140
- "eval_F1": 0.4965715075876916,
141
- "eval_Precision": 0.8401913875598086,
142
- "eval_Recall": 0.9909706546275395,
143
- "eval_accuracy": 0.8350612629594723,
144
- "eval_loss": 0.4643925130367279,
145
- "eval_runtime": 12.3543,
146
- "eval_samples_per_second": 85.881,
147
- "eval_steps_per_second": 5.423,
148
- "step": 2128
149
  },
150
  {
151
  "epoch": 9.0,
152
- "eval_AUC": 0.5354853273137697,
153
- "eval_F1": 0.5271698859516118,
154
- "eval_Precision": 0.8450433108758422,
155
- "eval_Recall": 0.9909706546275395,
156
- "eval_accuracy": 0.8407163053722903,
157
- "eval_loss": 0.42085567116737366,
158
- "eval_runtime": 12.5156,
159
- "eval_samples_per_second": 84.774,
160
- "eval_steps_per_second": 5.353,
161
- "step": 2394
162
- },
163
- {
164
- "epoch": 9.398496240601503,
165
- "grad_norm": 0.23315227031707764,
166
- "learning_rate": 0.000181203007518797,
167
- "loss": 0.3848,
168
- "step": 2500
169
  },
170
  {
171
  "epoch": 10.0,
172
- "eval_AUC": 0.6642373427926476,
173
- "eval_F1": 0.6571900744677727,
174
- "eval_Precision": 0.8904267589388697,
175
- "eval_Recall": 0.871331828442438,
176
- "eval_accuracy": 0.8030160226201697,
177
- "eval_loss": 0.4335523247718811,
178
- "eval_runtime": 12.0639,
179
- "eval_samples_per_second": 87.948,
180
- "eval_steps_per_second": 5.554,
181
- "step": 2660
182
  },
183
  {
184
  "epoch": 11.0,
185
- "eval_AUC": 0.6386617220251531,
186
- "eval_F1": 0.6595258665704602,
187
- "eval_Precision": 0.8777660695468915,
188
- "eval_Recall": 0.9401805869074492,
189
- "eval_accuracy": 0.8407163053722903,
190
- "eval_loss": 0.43072912096977234,
191
- "eval_runtime": 11.6338,
192
- "eval_samples_per_second": 91.2,
193
- "eval_steps_per_second": 5.759,
194
- "step": 2926
195
- },
196
- {
197
- "epoch": 11.278195488721805,
198
- "grad_norm": 0.8828286528587341,
199
- "learning_rate": 0.0001774436090225564,
200
- "loss": 0.2882,
201
- "step": 3000
202
  },
203
  {
204
  "epoch": 12.0,
205
- "eval_AUC": 0.7007449209932279,
206
- "eval_F1": 0.6913138583881986,
207
- "eval_Precision": 0.9028901734104047,
208
- "eval_Recall": 0.881489841986456,
209
- "eval_accuracy": 0.82186616399623,
210
- "eval_loss": 0.5094270706176758,
211
- "eval_runtime": 12.013,
212
- "eval_samples_per_second": 88.321,
213
- "eval_steps_per_second": 5.577,
214
- "step": 3192
215
- },
216
- {
217
- "epoch": 13.0,
218
- "eval_AUC": 0.6362624959690422,
219
- "eval_F1": 0.6636744500641045,
220
- "eval_Precision": 0.8761609907120743,
221
- "eval_Recall": 0.9582392776523702,
222
- "eval_accuracy": 0.8520263901979265,
223
- "eval_loss": 0.46198517084121704,
224
- "eval_runtime": 11.833,
225
- "eval_samples_per_second": 89.665,
226
- "eval_steps_per_second": 5.662,
227
- "step": 3458
228
- },
229
- {
230
- "epoch": 13.157894736842104,
231
- "grad_norm": 0.10690835863351822,
232
- "learning_rate": 0.0001736842105263158,
233
- "loss": 0.1654,
234
- "step": 3500
235
- },
236
- {
237
- "epoch": 14.0,
238
- "eval_AUC": 0.7246952595936795,
239
- "eval_F1": 0.7141794985075913,
240
- "eval_Precision": 0.9109826589595376,
241
- "eval_Recall": 0.8893905191873589,
242
- "eval_accuracy": 0.8350612629594723,
243
- "eval_loss": 0.58914715051651,
244
- "eval_runtime": 12.2166,
245
- "eval_samples_per_second": 86.849,
246
- "eval_steps_per_second": 5.484,
247
- "step": 3724
248
- },
249
- {
250
- "epoch": 15.0,
251
- "eval_AUC": 0.6827894227668494,
252
- "eval_F1": 0.6940221645449677,
253
- "eval_Precision": 0.893640350877193,
254
- "eval_Recall": 0.9198645598194131,
255
- "eval_accuracy": 0.8416588124410933,
256
- "eval_loss": 0.5601742267608643,
257
- "eval_runtime": 12.0344,
258
- "eval_samples_per_second": 88.164,
259
- "eval_steps_per_second": 5.567,
260
- "step": 3990
261
- },
262
- {
263
- "epoch": 15.037593984962406,
264
- "grad_norm": 0.1679229587316513,
265
- "learning_rate": 0.0001699248120300752,
266
- "loss": 0.0868,
267
- "step": 4000
268
- },
269
- {
270
- "epoch": 16.0,
271
- "eval_AUC": 0.6785198323121573,
272
- "eval_F1": 0.7114280962304207,
273
- "eval_Precision": 0.8894681960375391,
274
- "eval_Recall": 0.9627539503386005,
275
- "eval_accuracy": 0.8689915174363808,
276
- "eval_loss": 0.5927982926368713,
277
- "eval_runtime": 11.7748,
278
- "eval_samples_per_second": 90.107,
279
- "eval_steps_per_second": 5.69,
280
- "step": 4256
281
- },
282
- {
283
- "epoch": 16.917293233082706,
284
- "grad_norm": 0.02159872278571129,
285
- "learning_rate": 0.00016616541353383458,
286
- "loss": 0.045,
287
- "step": 4500
288
- },
289
- {
290
- "epoch": 17.0,
291
- "eval_AUC": 0.7072331505965818,
292
- "eval_F1": 0.7268478980719824,
293
- "eval_Precision": 0.9005405405405406,
294
- "eval_Recall": 0.9401805869074492,
295
- "eval_accuracy": 0.8633364750235627,
296
- "eval_loss": 0.6153913140296936,
297
- "eval_runtime": 11.9361,
298
- "eval_samples_per_second": 88.89,
299
- "eval_steps_per_second": 5.613,
300
- "step": 4522
301
- },
302
- {
303
- "epoch": 18.0,
304
- "eval_AUC": 0.7169332473395679,
305
- "eval_F1": 0.7369669924918544,
306
- "eval_Precision": 0.9036796536796536,
307
- "eval_Recall": 0.9424379232505643,
308
- "eval_accuracy": 0.8680490103675778,
309
- "eval_loss": 0.6357868909835815,
310
- "eval_runtime": 11.728,
311
- "eval_samples_per_second": 90.467,
312
- "eval_steps_per_second": 5.713,
313
- "step": 4788
314
- },
315
- {
316
- "epoch": 18.796992481203006,
317
- "grad_norm": 0.00539048295468092,
318
- "learning_rate": 0.00016240601503759398,
319
- "loss": 0.021,
320
- "step": 5000
321
- },
322
- {
323
- "epoch": 19.0,
324
- "eval_AUC": 0.7422960335375686,
325
- "eval_F1": 0.737905217953103,
326
- "eval_Precision": 0.9157175398633257,
327
- "eval_Recall": 0.90744920993228,
328
- "eval_accuracy": 0.8529688972667295,
329
- "eval_loss": 0.8246906399726868,
330
- "eval_runtime": 11.7691,
331
- "eval_samples_per_second": 90.151,
332
- "eval_steps_per_second": 5.693,
333
- "step": 5054
334
- },
335
- {
336
- "epoch": 20.0,
337
- "eval_AUC": 0.7228603676233473,
338
- "eval_F1": 0.7228603676233474,
339
- "eval_Precision": 0.9085778781038375,
340
- "eval_Recall": 0.9085778781038375,
341
- "eval_accuracy": 0.8473138548539114,
342
- "eval_loss": 0.9930059909820557,
343
- "eval_runtime": 11.643,
344
- "eval_samples_per_second": 91.128,
345
- "eval_steps_per_second": 5.755,
346
- "step": 5320
347
- },
348
- {
349
- "epoch": 20.67669172932331,
350
- "grad_norm": 1.4996395111083984,
351
- "learning_rate": 0.0001586466165413534,
352
- "loss": 0.0136,
353
- "step": 5500
354
- },
355
- {
356
- "epoch": 21.0,
357
- "eval_AUC": 0.7037762012254112,
358
- "eval_F1": 0.7262231926161842,
359
- "eval_Precision": 0.8990332975295381,
360
- "eval_Recall": 0.9446952595936795,
361
- "eval_accuracy": 0.8652214891611687,
362
- "eval_loss": 0.5600523352622986,
363
- "eval_runtime": 11.6192,
364
- "eval_samples_per_second": 91.315,
365
- "eval_steps_per_second": 5.766,
366
- "step": 5586
367
- },
368
- {
369
- "epoch": 22.0,
370
- "eval_AUC": 0.6561560786842955,
371
- "eval_F1": 0.6934912580385852,
372
- "eval_Precision": 0.8816326530612245,
373
- "eval_Recall": 0.9751693002257337,
374
- "eval_accuracy": 0.8699340245051838,
375
- "eval_loss": 0.64747554063797,
376
- "eval_runtime": 11.5363,
377
- "eval_samples_per_second": 91.971,
378
- "eval_steps_per_second": 5.808,
379
- "step": 5852
380
- },
381
- {
382
- "epoch": 22.55639097744361,
383
- "grad_norm": 0.029281923547387123,
384
- "learning_rate": 0.0001548872180451128,
385
- "loss": 0.0464,
386
- "step": 6000
387
- },
388
- {
389
- "epoch": 23.0,
390
- "eval_AUC": 0.7170396646243147,
391
- "eval_F1": 0.7272862554112554,
392
- "eval_Precision": 0.9050772626931567,
393
- "eval_Recall": 0.9255079006772009,
394
- "eval_accuracy": 0.8567389255419415,
395
- "eval_loss": 0.5766553282737732,
396
- "eval_runtime": 11.6613,
397
- "eval_samples_per_second": 90.984,
398
- "eval_steps_per_second": 5.745,
399
- "step": 6118
400
- },
401
- {
402
- "epoch": 24.0,
403
- "eval_AUC": 0.7451886488229603,
404
- "eval_F1": 0.736944199717763,
405
- "eval_Precision": 0.9173363949483353,
406
- "eval_Recall": 0.9018058690744921,
407
- "eval_accuracy": 0.8501413760603205,
408
- "eval_loss": 0.7393656373023987,
409
- "eval_runtime": 11.8414,
410
- "eval_samples_per_second": 89.601,
411
- "eval_steps_per_second": 5.658,
412
- "step": 6384
413
- },
414
- {
415
- "epoch": 24.43609022556391,
416
- "grad_norm": 0.0072451187297701836,
417
- "learning_rate": 0.00015112781954887218,
418
- "loss": 0.0438,
419
- "step": 6500
420
- },
421
- {
422
- "epoch": 25.0,
423
- "eval_AUC": 0.6412705578845533,
424
- "eval_F1": 0.6781337216357238,
425
- "eval_Precision": 0.8767676767676768,
426
- "eval_Recall": 0.9796839729119639,
427
- "eval_accuracy": 0.8680490103675778,
428
- "eval_loss": 0.762208878993988,
429
- "eval_runtime": 11.8755,
430
- "eval_samples_per_second": 89.344,
431
- "eval_steps_per_second": 5.642,
432
- "step": 6650
433
- },
434
- {
435
- "epoch": 26.0,
436
- "eval_AUC": 0.7167913576265721,
437
- "eval_F1": 0.7509163334545014,
438
- "eval_Precision": 0.9018987341772152,
439
- "eval_Recall": 0.9650112866817155,
440
- "eval_accuracy": 0.883129123468426,
441
- "eval_loss": 0.7616934180259705,
442
- "eval_runtime": 11.673,
443
- "eval_samples_per_second": 90.893,
444
- "eval_steps_per_second": 5.74,
445
- "step": 6916
446
- },
447
- {
448
- "epoch": 26.31578947368421,
449
- "grad_norm": 0.005205586086958647,
450
- "learning_rate": 0.00014736842105263158,
451
- "loss": 0.0126,
452
- "step": 7000
453
- },
454
- {
455
- "epoch": 27.0,
456
- "eval_AUC": 0.7227184779103515,
457
- "eval_F1": 0.7354277398991624,
458
- "eval_Precision": 0.9065934065934066,
459
- "eval_Recall": 0.9311512415349887,
460
- "eval_accuracy": 0.8623939679547596,
461
- "eval_loss": 0.8840720653533936,
462
- "eval_runtime": 12.2613,
463
- "eval_samples_per_second": 86.533,
464
- "eval_steps_per_second": 5.464,
465
- "step": 7182
466
- },
467
- {
468
- "epoch": 28.0,
469
- "eval_AUC": 0.7300193485972267,
470
- "eval_F1": 0.7543650900476486,
471
- "eval_Precision": 0.907427341227126,
472
- "eval_Recall": 0.9514672686230248,
473
- "eval_accuracy": 0.8784165881244109,
474
- "eval_loss": 0.7538221478462219,
475
- "eval_runtime": 11.7032,
476
- "eval_samples_per_second": 90.659,
477
- "eval_steps_per_second": 5.725,
478
- "step": 7448
479
- },
480
- {
481
- "epoch": 28.195488721804512,
482
- "grad_norm": 0.03350173309445381,
483
- "learning_rate": 0.000143609022556391,
484
- "loss": 0.016,
485
- "step": 7500
486
- },
487
- {
488
- "epoch": 29.0,
489
- "eval_AUC": 0.6320638503708481,
490
- "eval_F1": 0.6709177157453019,
491
- "eval_Precision": 0.8735059760956175,
492
- "eval_Recall": 0.989841986455982,
493
- "eval_accuracy": 0.8718190386427899,
494
- "eval_loss": 0.7105740308761597,
495
- "eval_runtime": 11.7867,
496
- "eval_samples_per_second": 90.017,
497
- "eval_steps_per_second": 5.684,
498
- "step": 7714
499
- },
500
- {
501
- "epoch": 30.0,
502
- "eval_AUC": 0.689348597226701,
503
- "eval_F1": 0.72511658580244,
504
- "eval_Precision": 0.8927083333333333,
505
- "eval_Recall": 0.9672686230248307,
506
- "eval_accuracy": 0.8755890669180019,
507
- "eval_loss": 0.6111597418785095,
508
- "eval_runtime": 11.7504,
509
- "eval_samples_per_second": 90.295,
510
- "eval_steps_per_second": 5.702,
511
- "step": 7980
512
- },
513
- {
514
- "epoch": 30.075187969924812,
515
- "grad_norm": 0.013551408424973488,
516
- "learning_rate": 0.0001398496240601504,
517
- "loss": 0.0384,
518
- "step": 8000
519
- },
520
- {
521
- "epoch": 31.0,
522
- "eval_AUC": 0.6887487907126733,
523
- "eval_F1": 0.7271465907527794,
524
- "eval_Precision": 0.8922279792746114,
525
- "eval_Recall": 0.9717832957110609,
526
- "eval_accuracy": 0.8784165881244109,
527
- "eval_loss": 0.5990052223205566,
528
- "eval_runtime": 11.854,
529
- "eval_samples_per_second": 89.506,
530
- "eval_steps_per_second": 5.652,
531
- "step": 8246
532
- },
533
- {
534
- "epoch": 31.954887218045112,
535
- "grad_norm": 0.10003461688756943,
536
- "learning_rate": 0.0001360902255639098,
537
- "loss": 0.0276,
538
- "step": 8500
539
- },
540
- {
541
- "epoch": 32.0,
542
- "eval_AUC": 0.699577555627217,
543
- "eval_F1": 0.741136709063275,
544
- "eval_Precision": 0.8954451345755694,
545
- "eval_Recall": 0.9762979683972912,
546
- "eval_accuracy": 0.8850141376060321,
547
- "eval_loss": 0.6617034673690796,
548
- "eval_runtime": 11.7353,
549
- "eval_samples_per_second": 90.411,
550
- "eval_steps_per_second": 5.709,
551
- "step": 8512
552
- },
553
- {
554
- "epoch": 33.0,
555
- "eval_AUC": 0.7190132215414382,
556
- "eval_F1": 0.7599135442188549,
557
- "eval_Precision": 0.9018789144050104,
558
- "eval_Recall": 0.9751693002257337,
559
- "eval_accuracy": 0.8906691800188501,
560
- "eval_loss": 0.7068904042243958,
561
- "eval_runtime": 11.6304,
562
- "eval_samples_per_second": 91.226,
563
- "eval_steps_per_second": 5.761,
564
- "step": 8778
565
- },
566
- {
567
- "epoch": 33.83458646616541,
568
- "grad_norm": 0.00604345602914691,
569
- "learning_rate": 0.00013233082706766918,
570
- "loss": 0.0109,
571
- "step": 9000
572
- },
573
- {
574
- "epoch": 34.0,
575
- "eval_AUC": 0.6566849403418252,
576
- "eval_F1": 0.6974271887335782,
577
- "eval_Precision": 0.8814589665653495,
578
- "eval_Recall": 0.981941309255079,
579
- "eval_accuracy": 0.8746465598491989,
580
- "eval_loss": 0.8042259812355042,
581
- "eval_runtime": 11.739,
582
- "eval_samples_per_second": 90.382,
583
- "eval_steps_per_second": 5.707,
584
- "step": 9044
585
- },
586
- {
587
- "epoch": 35.0,
588
- "eval_AUC": 0.6961560786842954,
589
- "eval_F1": 0.7368930485561156,
590
- "eval_Precision": 0.8944099378881988,
591
- "eval_Recall": 0.9751693002257337,
592
- "eval_accuracy": 0.883129123468426,
593
- "eval_loss": 0.7705923914909363,
594
- "eval_runtime": 11.6824,
595
- "eval_samples_per_second": 90.821,
596
- "eval_steps_per_second": 5.735,
597
- "step": 9310
598
- },
599
- {
600
- "epoch": 35.714285714285715,
601
- "grad_norm": 0.0020399852655828,
602
- "learning_rate": 0.00012857142857142858,
603
- "loss": 0.0028,
604
- "step": 9500
605
- },
606
- {
607
- "epoch": 36.0,
608
- "eval_AUC": 0.712170267655595,
609
- "eval_F1": 0.7516347009160568,
610
- "eval_Precision": 0.8997912317327766,
611
- "eval_Recall": 0.9729119638826185,
612
- "eval_accuracy": 0.8868991517436381,
613
- "eval_loss": 0.8394030928611755,
614
- "eval_runtime": 11.8855,
615
- "eval_samples_per_second": 89.268,
616
- "eval_steps_per_second": 5.637,
617
- "step": 9576
618
- },
619
- {
620
- "epoch": 37.0,
621
- "eval_AUC": 0.7087487907126733,
622
- "eval_F1": 0.7474952792646576,
623
- "eval_Precision": 0.8987473903966597,
624
- "eval_Recall": 0.9717832957110609,
625
- "eval_accuracy": 0.8850141376060321,
626
- "eval_loss": 0.8953573107719421,
627
- "eval_runtime": 11.8186,
628
- "eval_samples_per_second": 89.774,
629
- "eval_steps_per_second": 5.669,
630
- "step": 9842
631
- },
632
- {
633
- "epoch": 37.59398496240601,
634
- "grad_norm": 0.0010929929558187723,
635
- "learning_rate": 0.00012481203007518797,
636
- "loss": 0.0076,
637
- "step": 10000
638
- },
639
- {
640
- "epoch": 38.0,
641
- "eval_AUC": 0.7087487907126733,
642
- "eval_F1": 0.7474952792646576,
643
- "eval_Precision": 0.8987473903966597,
644
- "eval_Recall": 0.9717832957110609,
645
- "eval_accuracy": 0.8850141376060321,
646
- "eval_loss": 0.9388997554779053,
647
- "eval_runtime": 11.6255,
648
- "eval_samples_per_second": 91.265,
649
- "eval_steps_per_second": 5.763,
650
- "step": 10108
651
- },
652
- {
653
- "epoch": 39.0,
654
- "eval_AUC": 0.7087487907126733,
655
- "eval_F1": 0.7474952792646576,
656
- "eval_Precision": 0.8987473903966597,
657
- "eval_Recall": 0.9717832957110609,
658
- "eval_accuracy": 0.8850141376060321,
659
- "eval_loss": 0.9697290062904358,
660
- "eval_runtime": 11.5778,
661
- "eval_samples_per_second": 91.641,
662
- "eval_steps_per_second": 5.787,
663
- "step": 10374
664
- },
665
- {
666
- "epoch": 39.473684210526315,
667
- "grad_norm": 0.0006237945053726435,
668
- "learning_rate": 0.00012105263157894738,
669
- "loss": 0.0001,
670
- "step": 10500
671
- },
672
- {
673
- "epoch": 40.0,
674
- "eval_AUC": 0.7087487907126733,
675
- "eval_F1": 0.7474952792646576,
676
- "eval_Precision": 0.8987473903966597,
677
- "eval_Recall": 0.9717832957110609,
678
- "eval_accuracy": 0.8850141376060321,
679
- "eval_loss": 0.9953697323799133,
680
- "eval_runtime": 11.7777,
681
- "eval_samples_per_second": 90.086,
682
- "eval_steps_per_second": 5.689,
683
- "step": 10640
684
- },
685
- {
686
- "epoch": 41.0,
687
- "eval_AUC": 0.7087487907126733,
688
- "eval_F1": 0.7474952792646576,
689
- "eval_Precision": 0.8987473903966597,
690
- "eval_Recall": 0.9717832957110609,
691
- "eval_accuracy": 0.8850141376060321,
692
- "eval_loss": 1.0168683528900146,
693
- "eval_runtime": 11.7874,
694
- "eval_samples_per_second": 90.011,
695
- "eval_steps_per_second": 5.684,
696
- "step": 10906
697
- },
698
- {
699
- "epoch": 41.35338345864662,
700
- "grad_norm": 0.000347771099768579,
701
- "learning_rate": 0.00011729323308270677,
702
- "loss": 0.0,
703
- "step": 11000
704
- },
705
- {
706
- "epoch": 42.0,
707
- "eval_AUC": 0.7093131247984521,
708
- "eval_F1": 0.7487971197401504,
709
- "eval_Precision": 0.8988529718456726,
710
- "eval_Recall": 0.9729119638826185,
711
- "eval_accuracy": 0.885956644674835,
712
- "eval_loss": 1.038093090057373,
713
- "eval_runtime": 11.6656,
714
- "eval_samples_per_second": 90.951,
715
- "eval_steps_per_second": 5.743,
716
- "step": 11172
717
- },
718
- {
719
- "epoch": 43.0,
720
- "eval_AUC": 0.7093131247984521,
721
- "eval_F1": 0.7487971197401504,
722
- "eval_Precision": 0.8988529718456726,
723
- "eval_Recall": 0.9729119638826185,
724
- "eval_accuracy": 0.885956644674835,
725
- "eval_loss": 1.0582064390182495,
726
- "eval_runtime": 11.7863,
727
- "eval_samples_per_second": 90.019,
728
- "eval_steps_per_second": 5.685,
729
- "step": 11438
730
- },
731
- {
732
- "epoch": 43.233082706766915,
733
- "grad_norm": 0.00026405107928439975,
734
- "learning_rate": 0.00011353383458646618,
735
- "loss": 0.0,
736
- "step": 11500
737
- },
738
- {
739
- "epoch": 44.0,
740
- "eval_AUC": 0.7093131247984521,
741
- "eval_F1": 0.7487971197401504,
742
- "eval_Precision": 0.8988529718456726,
743
- "eval_Recall": 0.9729119638826185,
744
- "eval_accuracy": 0.885956644674835,
745
- "eval_loss": 1.0762717723846436,
746
- "eval_runtime": 11.6351,
747
- "eval_samples_per_second": 91.189,
748
- "eval_steps_per_second": 5.758,
749
- "step": 11704
750
- },
751
- {
752
- "epoch": 45.0,
753
- "eval_AUC": 0.7093131247984521,
754
- "eval_F1": 0.7487971197401504,
755
- "eval_Precision": 0.8988529718456726,
756
- "eval_Recall": 0.9729119638826185,
757
- "eval_accuracy": 0.885956644674835,
758
- "eval_loss": 1.0936743021011353,
759
- "eval_runtime": 11.9085,
760
- "eval_samples_per_second": 89.096,
761
- "eval_steps_per_second": 5.626,
762
- "step": 11970
763
- },
764
- {
765
- "epoch": 45.11278195488722,
766
- "grad_norm": 0.00022154749603942037,
767
- "learning_rate": 0.00010977443609022557,
768
- "loss": 0.0,
769
- "step": 12000
770
- },
771
- {
772
- "epoch": 46.0,
773
- "eval_AUC": 0.7150274105127379,
774
- "eval_F1": 0.7544540322094451,
775
- "eval_Precision": 0.9007314524555904,
776
- "eval_Recall": 0.9729119638826185,
777
- "eval_accuracy": 0.8878416588124411,
778
- "eval_loss": 1.1094835996627808,
779
- "eval_runtime": 11.7724,
780
- "eval_samples_per_second": 90.126,
781
- "eval_steps_per_second": 5.691,
782
- "step": 12236
783
- },
784
- {
785
- "epoch": 46.99248120300752,
786
- "grad_norm": 0.00019688473548740149,
787
- "learning_rate": 0.00010601503759398497,
788
- "loss": 0.0,
789
- "step": 12500
790
- },
791
- {
792
- "epoch": 47.0,
793
- "eval_AUC": 0.7150274105127379,
794
- "eval_F1": 0.7544540322094451,
795
- "eval_Precision": 0.9007314524555904,
796
- "eval_Recall": 0.9729119638826185,
797
- "eval_accuracy": 0.8878416588124411,
798
- "eval_loss": 1.1262503862380981,
799
- "eval_runtime": 11.5453,
800
- "eval_samples_per_second": 91.899,
801
- "eval_steps_per_second": 5.803,
802
- "step": 12502
803
- },
804
- {
805
- "epoch": 48.0,
806
- "eval_AUC": 0.7150274105127379,
807
- "eval_F1": 0.7544540322094451,
808
- "eval_Precision": 0.9007314524555904,
809
- "eval_Recall": 0.9729119638826185,
810
- "eval_accuracy": 0.8878416588124411,
811
- "eval_loss": 1.1426563262939453,
812
- "eval_runtime": 11.6837,
813
- "eval_samples_per_second": 90.81,
814
- "eval_steps_per_second": 5.734,
815
- "step": 12768
816
- },
817
- {
818
- "epoch": 48.87218045112782,
819
- "grad_norm": 0.0001134950143750757,
820
- "learning_rate": 0.00010225563909774436,
821
- "loss": 0.0,
822
- "step": 13000
823
- },
824
- {
825
- "epoch": 49.0,
826
- "eval_AUC": 0.7150274105127379,
827
- "eval_F1": 0.7544540322094451,
828
- "eval_Precision": 0.9007314524555904,
829
- "eval_Recall": 0.9729119638826185,
830
- "eval_accuracy": 0.8878416588124411,
831
- "eval_loss": 1.1587177515029907,
832
- "eval_runtime": 11.7191,
833
- "eval_samples_per_second": 90.536,
834
- "eval_steps_per_second": 5.717,
835
- "step": 13034
836
- },
837
- {
838
- "epoch": 50.0,
839
- "eval_AUC": 0.7150274105127379,
840
- "eval_F1": 0.7544540322094451,
841
- "eval_Precision": 0.9007314524555904,
842
- "eval_Recall": 0.9729119638826185,
843
- "eval_accuracy": 0.8878416588124411,
844
- "eval_loss": 1.174465537071228,
845
- "eval_runtime": 11.8222,
846
- "eval_samples_per_second": 89.747,
847
- "eval_steps_per_second": 5.667,
848
- "step": 13300
849
- },
850
- {
851
- "epoch": 50.75187969924812,
852
- "grad_norm": 9.584094368619844e-05,
853
- "learning_rate": 9.849624060150377e-05,
854
- "loss": 0.0,
855
- "step": 13500
856
- },
857
- {
858
- "epoch": 51.0,
859
- "eval_AUC": 0.7150274105127379,
860
- "eval_F1": 0.7544540322094451,
861
- "eval_Precision": 0.9007314524555904,
862
- "eval_Recall": 0.9729119638826185,
863
- "eval_accuracy": 0.8878416588124411,
864
- "eval_loss": 1.1900520324707031,
865
- "eval_runtime": 11.7601,
866
- "eval_samples_per_second": 90.22,
867
- "eval_steps_per_second": 5.697,
868
- "step": 13566
869
- },
870
- {
871
- "epoch": 52.0,
872
- "eval_AUC": 0.7178845533698807,
873
- "eval_F1": 0.7572553125484722,
874
- "eval_Precision": 0.9016736401673641,
875
- "eval_Recall": 0.9729119638826185,
876
- "eval_accuracy": 0.8887841658812441,
877
- "eval_loss": 1.2051938772201538,
878
- "eval_runtime": 11.9347,
879
- "eval_samples_per_second": 88.901,
880
- "eval_steps_per_second": 5.614,
881
- "step": 13832
882
- },
883
- {
884
- "epoch": 52.63157894736842,
885
- "grad_norm": 7.240776903927326e-05,
886
- "learning_rate": 9.473684210526316e-05,
887
- "loss": 0.0,
888
- "step": 14000
889
- },
890
- {
891
- "epoch": 53.0,
892
- "eval_AUC": 0.7178845533698807,
893
- "eval_F1": 0.7572553125484722,
894
- "eval_Precision": 0.9016736401673641,
895
- "eval_Recall": 0.9729119638826185,
896
- "eval_accuracy": 0.8887841658812441,
897
- "eval_loss": 1.2201390266418457,
898
- "eval_runtime": 11.8013,
899
- "eval_samples_per_second": 89.905,
900
- "eval_steps_per_second": 5.677,
901
- "step": 14098
902
- },
903
- {
904
- "epoch": 54.0,
905
- "eval_AUC": 0.7178845533698807,
906
- "eval_F1": 0.7572553125484722,
907
- "eval_Precision": 0.9016736401673641,
908
- "eval_Recall": 0.9729119638826185,
909
- "eval_accuracy": 0.8887841658812441,
910
- "eval_loss": 1.2349706888198853,
911
- "eval_runtime": 11.8152,
912
- "eval_samples_per_second": 89.8,
913
- "eval_steps_per_second": 5.671,
914
- "step": 14364
915
- },
916
- {
917
- "epoch": 54.51127819548872,
918
- "grad_norm": 4.7142420953605324e-05,
919
- "learning_rate": 9.097744360902256e-05,
920
- "loss": 0.0,
921
- "step": 14500
922
- },
923
- {
924
- "epoch": 55.0,
925
- "eval_AUC": 0.7178845533698807,
926
- "eval_F1": 0.7572553125484722,
927
- "eval_Precision": 0.9016736401673641,
928
- "eval_Recall": 0.9729119638826185,
929
- "eval_accuracy": 0.8887841658812441,
930
- "eval_loss": 1.249691367149353,
931
- "eval_runtime": 11.9642,
932
- "eval_samples_per_second": 88.682,
933
- "eval_steps_per_second": 5.6,
934
- "step": 14630
935
- },
936
- {
937
- "epoch": 56.0,
938
- "eval_AUC": 0.7178845533698807,
939
- "eval_F1": 0.7572553125484722,
940
- "eval_Precision": 0.9016736401673641,
941
- "eval_Recall": 0.9729119638826185,
942
- "eval_accuracy": 0.8887841658812441,
943
- "eval_loss": 1.2640849351882935,
944
- "eval_runtime": 11.9363,
945
- "eval_samples_per_second": 88.889,
946
- "eval_steps_per_second": 5.613,
947
- "step": 14896
948
- },
949
- {
950
- "epoch": 56.390977443609025,
951
- "grad_norm": 4.406652442412451e-05,
952
- "learning_rate": 8.721804511278195e-05,
953
- "loss": 0.0,
954
- "step": 15000
955
- },
956
- {
957
- "epoch": 57.0,
958
- "eval_AUC": 0.7178845533698807,
959
- "eval_F1": 0.7572553125484722,
960
- "eval_Precision": 0.9016736401673641,
961
- "eval_Recall": 0.9729119638826185,
962
- "eval_accuracy": 0.8887841658812441,
963
- "eval_loss": 1.2785232067108154,
964
- "eval_runtime": 11.9798,
965
- "eval_samples_per_second": 88.566,
966
- "eval_steps_per_second": 5.593,
967
- "step": 15162
968
- },
969
- {
970
- "epoch": 58.0,
971
- "eval_AUC": 0.7178845533698807,
972
- "eval_F1": 0.7572553125484722,
973
- "eval_Precision": 0.9016736401673641,
974
- "eval_Recall": 0.9729119638826185,
975
- "eval_accuracy": 0.8887841658812441,
976
- "eval_loss": 1.2925220727920532,
977
- "eval_runtime": 11.9038,
978
- "eval_samples_per_second": 89.131,
979
- "eval_steps_per_second": 5.628,
980
- "step": 15428
981
- },
982
- {
983
- "epoch": 58.27067669172932,
984
- "grad_norm": 2.6122717827092856e-05,
985
- "learning_rate": 8.345864661654136e-05,
986
- "loss": 0.0,
987
- "step": 15500
988
- },
989
- {
990
- "epoch": 59.0,
991
- "eval_AUC": 0.7178845533698807,
992
- "eval_F1": 0.7572553125484722,
993
- "eval_Precision": 0.9016736401673641,
994
- "eval_Recall": 0.9729119638826185,
995
- "eval_accuracy": 0.8887841658812441,
996
- "eval_loss": 1.3067699670791626,
997
- "eval_runtime": 11.8267,
998
- "eval_samples_per_second": 89.713,
999
- "eval_steps_per_second": 5.665,
1000
- "step": 15694
1001
- },
1002
- {
1003
- "epoch": 60.0,
1004
- "eval_AUC": 0.7178845533698807,
1005
- "eval_F1": 0.7572553125484722,
1006
- "eval_Precision": 0.9016736401673641,
1007
- "eval_Recall": 0.9729119638826185,
1008
- "eval_accuracy": 0.8887841658812441,
1009
- "eval_loss": 1.3207120895385742,
1010
- "eval_runtime": 11.6077,
1011
- "eval_samples_per_second": 91.405,
1012
- "eval_steps_per_second": 5.772,
1013
- "step": 15960
1014
- },
1015
- {
1016
- "epoch": 60.150375939849624,
1017
- "grad_norm": 2.532277903810609e-05,
1018
- "learning_rate": 7.969924812030075e-05,
1019
- "loss": 0.0,
1020
- "step": 16000
1021
- },
1022
- {
1023
- "epoch": 61.0,
1024
- "eval_AUC": 0.7178845533698807,
1025
- "eval_F1": 0.7572553125484722,
1026
- "eval_Precision": 0.9016736401673641,
1027
- "eval_Recall": 0.9729119638826185,
1028
- "eval_accuracy": 0.8887841658812441,
1029
- "eval_loss": 1.3345941305160522,
1030
- "eval_runtime": 11.8443,
1031
- "eval_samples_per_second": 89.579,
1032
- "eval_steps_per_second": 5.657,
1033
- "step": 16226
1034
- },
1035
- {
1036
- "epoch": 62.0,
1037
- "eval_AUC": 0.7178845533698807,
1038
- "eval_F1": 0.7572553125484722,
1039
- "eval_Precision": 0.9016736401673641,
1040
- "eval_Recall": 0.9729119638826185,
1041
- "eval_accuracy": 0.8887841658812441,
1042
- "eval_loss": 1.3484621047973633,
1043
- "eval_runtime": 11.6458,
1044
- "eval_samples_per_second": 91.106,
1045
- "eval_steps_per_second": 5.753,
1046
- "step": 16492
1047
- },
1048
- {
1049
- "epoch": 62.03007518796993,
1050
- "grad_norm": 1.7661703168414533e-05,
1051
- "learning_rate": 7.593984962406016e-05,
1052
- "loss": 0.0,
1053
- "step": 16500
1054
- },
1055
- {
1056
- "epoch": 63.0,
1057
- "eval_AUC": 0.7178845533698807,
1058
- "eval_F1": 0.7572553125484722,
1059
- "eval_Precision": 0.9016736401673641,
1060
- "eval_Recall": 0.9729119638826185,
1061
- "eval_accuracy": 0.8887841658812441,
1062
- "eval_loss": 1.3622149229049683,
1063
- "eval_runtime": 11.7437,
1064
- "eval_samples_per_second": 90.346,
1065
- "eval_steps_per_second": 5.705,
1066
- "step": 16758
1067
- },
1068
- {
1069
- "epoch": 63.909774436090224,
1070
- "grad_norm": 1.633859210414812e-05,
1071
- "learning_rate": 7.218045112781955e-05,
1072
- "loss": 0.0,
1073
- "step": 17000
1074
- },
1075
- {
1076
- "epoch": 64.0,
1077
- "eval_AUC": 0.7178845533698807,
1078
- "eval_F1": 0.7572553125484722,
1079
- "eval_Precision": 0.9016736401673641,
1080
- "eval_Recall": 0.9729119638826185,
1081
- "eval_accuracy": 0.8887841658812441,
1082
- "eval_loss": 1.3757728338241577,
1083
- "eval_runtime": 11.6035,
1084
- "eval_samples_per_second": 91.438,
1085
- "eval_steps_per_second": 5.774,
1086
- "step": 17024
1087
- },
1088
- {
1089
- "epoch": 65.0,
1090
- "eval_AUC": 0.7178845533698807,
1091
- "eval_F1": 0.7572553125484722,
1092
- "eval_Precision": 0.9016736401673641,
1093
- "eval_Recall": 0.9729119638826185,
1094
- "eval_accuracy": 0.8887841658812441,
1095
- "eval_loss": 1.3893355131149292,
1096
- "eval_runtime": 11.8424,
1097
- "eval_samples_per_second": 89.593,
1098
- "eval_steps_per_second": 5.658,
1099
- "step": 17290
1100
- },
1101
- {
1102
- "epoch": 65.78947368421052,
1103
- "grad_norm": 1.2574956599564757e-05,
1104
- "learning_rate": 6.842105263157895e-05,
1105
- "loss": 0.0,
1106
- "step": 17500
1107
- },
1108
- {
1109
- "epoch": 66.0,
1110
- "eval_AUC": 0.7178845533698807,
1111
- "eval_F1": 0.7572553125484722,
1112
- "eval_Precision": 0.9016736401673641,
1113
- "eval_Recall": 0.9729119638826185,
1114
- "eval_accuracy": 0.8887841658812441,
1115
- "eval_loss": 1.40289306640625,
1116
- "eval_runtime": 11.9699,
1117
- "eval_samples_per_second": 88.639,
1118
- "eval_steps_per_second": 5.597,
1119
- "step": 17556
1120
- },
1121
- {
1122
- "epoch": 67.0,
1123
- "eval_AUC": 0.7178845533698807,
1124
- "eval_F1": 0.7572553125484722,
1125
- "eval_Precision": 0.9016736401673641,
1126
- "eval_Recall": 0.9729119638826185,
1127
- "eval_accuracy": 0.8887841658812441,
1128
- "eval_loss": 1.4165505170822144,
1129
- "eval_runtime": 11.6917,
1130
- "eval_samples_per_second": 90.748,
1131
- "eval_steps_per_second": 5.731,
1132
- "step": 17822
1133
- },
1134
- {
1135
- "epoch": 67.66917293233082,
1136
- "grad_norm": 1.0964651664835401e-05,
1137
- "learning_rate": 6.466165413533834e-05,
1138
- "loss": 0.0,
1139
- "step": 18000
1140
- },
1141
- {
1142
- "epoch": 68.0,
1143
- "eval_AUC": 0.7178845533698807,
1144
- "eval_F1": 0.7572553125484722,
1145
- "eval_Precision": 0.9016736401673641,
1146
- "eval_Recall": 0.9729119638826185,
1147
- "eval_accuracy": 0.8887841658812441,
1148
- "eval_loss": 1.4297924041748047,
1149
- "eval_runtime": 11.881,
1150
- "eval_samples_per_second": 89.302,
1151
- "eval_steps_per_second": 5.639,
1152
- "step": 18088
1153
- },
1154
- {
1155
- "epoch": 69.0,
1156
- "eval_AUC": 0.7178845533698807,
1157
- "eval_F1": 0.7572553125484722,
1158
- "eval_Precision": 0.9016736401673641,
1159
- "eval_Recall": 0.9729119638826185,
1160
- "eval_accuracy": 0.8887841658812441,
1161
- "eval_loss": 1.4431047439575195,
1162
- "eval_runtime": 11.6298,
1163
- "eval_samples_per_second": 91.231,
1164
- "eval_steps_per_second": 5.761,
1165
- "step": 18354
1166
- },
1167
- {
1168
- "epoch": 69.54887218045113,
1169
- "grad_norm": 8.276247172034346e-06,
1170
- "learning_rate": 6.090225563909775e-05,
1171
- "loss": 0.0,
1172
- "step": 18500
1173
- },
1174
- {
1175
- "epoch": 70.0,
1176
- "eval_AUC": 0.7178845533698807,
1177
- "eval_F1": 0.7572553125484722,
1178
- "eval_Precision": 0.9016736401673641,
1179
- "eval_Recall": 0.9729119638826185,
1180
- "eval_accuracy": 0.8887841658812441,
1181
- "eval_loss": 1.4565781354904175,
1182
- "eval_runtime": 11.6654,
1183
- "eval_samples_per_second": 90.952,
1184
- "eval_steps_per_second": 5.743,
1185
- "step": 18620
1186
- },
1187
- {
1188
- "epoch": 71.0,
1189
- "eval_AUC": 0.7178845533698807,
1190
- "eval_F1": 0.7572553125484722,
1191
- "eval_Precision": 0.9016736401673641,
1192
- "eval_Recall": 0.9729119638826185,
1193
- "eval_accuracy": 0.8887841658812441,
1194
- "eval_loss": 1.4694792032241821,
1195
- "eval_runtime": 12.0384,
1196
- "eval_samples_per_second": 88.134,
1197
- "eval_steps_per_second": 5.566,
1198
- "step": 18886
1199
- },
1200
- {
1201
- "epoch": 71.42857142857143,
1202
- "grad_norm": 7.255929176608333e-06,
1203
- "learning_rate": 5.714285714285714e-05,
1204
- "loss": 0.0,
1205
- "step": 19000
1206
- },
1207
- {
1208
- "epoch": 72.0,
1209
- "eval_AUC": 0.7178845533698807,
1210
- "eval_F1": 0.7572553125484722,
1211
- "eval_Precision": 0.9016736401673641,
1212
- "eval_Recall": 0.9729119638826185,
1213
- "eval_accuracy": 0.8887841658812441,
1214
- "eval_loss": 1.482446551322937,
1215
- "eval_runtime": 11.6854,
1216
- "eval_samples_per_second": 90.797,
1217
- "eval_steps_per_second": 5.734,
1218
- "step": 19152
1219
- },
1220
- {
1221
- "epoch": 73.0,
1222
- "eval_AUC": 0.7178845533698807,
1223
- "eval_F1": 0.7572553125484722,
1224
- "eval_Precision": 0.9016736401673641,
1225
- "eval_Recall": 0.9729119638826185,
1226
- "eval_accuracy": 0.8887841658812441,
1227
- "eval_loss": 1.4949710369110107,
1228
- "eval_runtime": 11.7291,
1229
- "eval_samples_per_second": 90.459,
1230
- "eval_steps_per_second": 5.712,
1231
- "step": 19418
1232
- },
1233
- {
1234
- "epoch": 73.30827067669173,
1235
- "grad_norm": 5.73582974539022e-06,
1236
- "learning_rate": 5.338345864661655e-05,
1237
- "loss": 0.0,
1238
- "step": 19500
1239
- },
1240
- {
1241
- "epoch": 74.0,
1242
- "eval_AUC": 0.7178845533698807,
1243
- "eval_F1": 0.7572553125484722,
1244
- "eval_Precision": 0.9016736401673641,
1245
- "eval_Recall": 0.9729119638826185,
1246
- "eval_accuracy": 0.8887841658812441,
1247
- "eval_loss": 1.50760817527771,
1248
- "eval_runtime": 11.6937,
1249
- "eval_samples_per_second": 90.732,
1250
- "eval_steps_per_second": 5.73,
1251
- "step": 19684
1252
- },
1253
- {
1254
- "epoch": 75.0,
1255
- "eval_AUC": 0.7178845533698807,
1256
- "eval_F1": 0.7572553125484722,
1257
- "eval_Precision": 0.9016736401673641,
1258
- "eval_Recall": 0.9729119638826185,
1259
- "eval_accuracy": 0.8887841658812441,
1260
- "eval_loss": 1.5201044082641602,
1261
- "eval_runtime": 11.779,
1262
- "eval_samples_per_second": 90.075,
1263
- "eval_steps_per_second": 5.688,
1264
- "step": 19950
1265
- },
1266
- {
1267
- "epoch": 75.18796992481202,
1268
- "grad_norm": 3.381761189302779e-06,
1269
- "learning_rate": 4.9624060150375936e-05,
1270
- "loss": 0.0,
1271
- "step": 20000
1272
- },
1273
- {
1274
- "epoch": 76.0,
1275
- "eval_AUC": 0.7178845533698807,
1276
- "eval_F1": 0.7572553125484722,
1277
- "eval_Precision": 0.9016736401673641,
1278
- "eval_Recall": 0.9729119638826185,
1279
- "eval_accuracy": 0.8887841658812441,
1280
- "eval_loss": 1.5320940017700195,
1281
- "eval_runtime": 11.7044,
1282
- "eval_samples_per_second": 90.65,
1283
- "eval_steps_per_second": 5.724,
1284
- "step": 20216
1285
- },
1286
- {
1287
- "epoch": 77.0,
1288
- "eval_AUC": 0.7178845533698807,
1289
- "eval_F1": 0.7572553125484722,
1290
- "eval_Precision": 0.9016736401673641,
1291
- "eval_Recall": 0.9729119638826185,
1292
- "eval_accuracy": 0.8887841658812441,
1293
- "eval_loss": 1.5440773963928223,
1294
- "eval_runtime": 11.8766,
1295
- "eval_samples_per_second": 89.335,
1296
- "eval_steps_per_second": 5.641,
1297
- "step": 20482
1298
- },
1299
- {
1300
- "epoch": 77.06766917293233,
1301
- "grad_norm": 4.261892627255293e-06,
1302
- "learning_rate": 4.586466165413534e-05,
1303
- "loss": 0.0,
1304
- "step": 20500
1305
- },
1306
- {
1307
- "epoch": 78.0,
1308
- "eval_AUC": 0.7178845533698807,
1309
- "eval_F1": 0.7572553125484722,
1310
- "eval_Precision": 0.9016736401673641,
1311
- "eval_Recall": 0.9729119638826185,
1312
- "eval_accuracy": 0.8887841658812441,
1313
- "eval_loss": 1.5564316511154175,
1314
- "eval_runtime": 11.9626,
1315
- "eval_samples_per_second": 88.693,
1316
- "eval_steps_per_second": 5.601,
1317
- "step": 20748
1318
- },
1319
- {
1320
- "epoch": 78.94736842105263,
1321
- "grad_norm": 2.5668264242995065e-06,
1322
- "learning_rate": 4.210526315789474e-05,
1323
- "loss": 0.0,
1324
- "step": 21000
1325
- },
1326
- {
1327
- "epoch": 79.0,
1328
- "eval_AUC": 0.7178845533698807,
1329
- "eval_F1": 0.7572553125484722,
1330
- "eval_Precision": 0.9016736401673641,
1331
- "eval_Recall": 0.9729119638826185,
1332
- "eval_accuracy": 0.8887841658812441,
1333
- "eval_loss": 1.5691113471984863,
1334
- "eval_runtime": 11.9711,
1335
- "eval_samples_per_second": 88.63,
1336
- "eval_steps_per_second": 5.597,
1337
- "step": 21014
1338
- },
1339
- {
1340
- "epoch": 80.0,
1341
- "eval_AUC": 0.7178845533698807,
1342
- "eval_F1": 0.7572553125484722,
1343
- "eval_Precision": 0.9016736401673641,
1344
- "eval_Recall": 0.9729119638826185,
1345
- "eval_accuracy": 0.8887841658812441,
1346
- "eval_loss": 1.5799812078475952,
1347
- "eval_runtime": 11.8905,
1348
- "eval_samples_per_second": 89.231,
1349
- "eval_steps_per_second": 5.635,
1350
- "step": 21280
1351
- },
1352
- {
1353
- "epoch": 80.82706766917293,
1354
- "grad_norm": 1.7882749716591206e-06,
1355
- "learning_rate": 3.834586466165413e-05,
1356
- "loss": 0.0,
1357
- "step": 21500
1358
- },
1359
- {
1360
- "epoch": 81.0,
1361
- "eval_AUC": 0.7178845533698807,
1362
- "eval_F1": 0.7572553125484722,
1363
- "eval_Precision": 0.9016736401673641,
1364
- "eval_Recall": 0.9729119638826185,
1365
- "eval_accuracy": 0.8887841658812441,
1366
- "eval_loss": 1.5909699201583862,
1367
- "eval_runtime": 11.8739,
1368
- "eval_samples_per_second": 89.355,
1369
- "eval_steps_per_second": 5.643,
1370
- "step": 21546
1371
- },
1372
- {
1373
- "epoch": 82.0,
1374
- "eval_AUC": 0.7178845533698807,
1375
- "eval_F1": 0.7572553125484722,
1376
- "eval_Precision": 0.9016736401673641,
1377
- "eval_Recall": 0.9729119638826185,
1378
- "eval_accuracy": 0.8887841658812441,
1379
- "eval_loss": 1.6020997762680054,
1380
- "eval_runtime": 11.7593,
1381
- "eval_samples_per_second": 90.226,
1382
- "eval_steps_per_second": 5.698,
1383
- "step": 21812
1384
- },
1385
- {
1386
- "epoch": 82.70676691729324,
1387
- "grad_norm": 3.307637371108285e-06,
1388
- "learning_rate": 3.458646616541353e-05,
1389
- "loss": 0.0,
1390
- "step": 22000
1391
- },
1392
- {
1393
- "epoch": 83.0,
1394
- "eval_AUC": 0.7178845533698807,
1395
- "eval_F1": 0.7572553125484722,
1396
- "eval_Precision": 0.9016736401673641,
1397
- "eval_Recall": 0.9729119638826185,
1398
- "eval_accuracy": 0.8887841658812441,
1399
- "eval_loss": 1.6133201122283936,
1400
- "eval_runtime": 11.7049,
1401
- "eval_samples_per_second": 90.645,
1402
- "eval_steps_per_second": 5.724,
1403
- "step": 22078
1404
- },
1405
- {
1406
- "epoch": 84.0,
1407
- "eval_AUC": 0.7178845533698807,
1408
- "eval_F1": 0.7572553125484722,
1409
- "eval_Precision": 0.9016736401673641,
1410
- "eval_Recall": 0.9729119638826185,
1411
- "eval_accuracy": 0.8887841658812441,
1412
- "eval_loss": 1.6243833303451538,
1413
- "eval_runtime": 11.7522,
1414
- "eval_samples_per_second": 90.281,
1415
- "eval_steps_per_second": 5.701,
1416
- "step": 22344
1417
- },
1418
- {
1419
- "epoch": 84.58646616541354,
1420
- "grad_norm": 2.0808365661650896e-06,
1421
- "learning_rate": 3.082706766917293e-05,
1422
- "loss": 0.0,
1423
- "step": 22500
1424
- },
1425
- {
1426
- "epoch": 85.0,
1427
- "eval_AUC": 0.7178845533698807,
1428
- "eval_F1": 0.7572553125484722,
1429
- "eval_Precision": 0.9016736401673641,
1430
- "eval_Recall": 0.9729119638826185,
1431
- "eval_accuracy": 0.8887841658812441,
1432
- "eval_loss": 1.6356879472732544,
1433
- "eval_runtime": 11.8494,
1434
- "eval_samples_per_second": 89.54,
1435
- "eval_steps_per_second": 5.654,
1436
- "step": 22610
1437
- },
1438
- {
1439
- "epoch": 86.0,
1440
- "eval_AUC": 0.7178845533698807,
1441
- "eval_F1": 0.7572553125484722,
1442
- "eval_Precision": 0.9016736401673641,
1443
- "eval_Recall": 0.9729119638826185,
1444
- "eval_accuracy": 0.8887841658812441,
1445
- "eval_loss": 1.646845817565918,
1446
- "eval_runtime": 11.9457,
1447
- "eval_samples_per_second": 88.819,
1448
- "eval_steps_per_second": 5.609,
1449
- "step": 22876
1450
- },
1451
- {
1452
- "epoch": 86.46616541353383,
1453
- "grad_norm": 1.6075608755272697e-06,
1454
- "learning_rate": 2.706766917293233e-05,
1455
- "loss": 0.0,
1456
- "step": 23000
1457
- },
1458
- {
1459
- "epoch": 87.0,
1460
- "eval_AUC": 0.7178845533698807,
1461
- "eval_F1": 0.7572553125484722,
1462
- "eval_Precision": 0.9016736401673641,
1463
- "eval_Recall": 0.9729119638826185,
1464
- "eval_accuracy": 0.8887841658812441,
1465
- "eval_loss": 1.6580337285995483,
1466
- "eval_runtime": 11.988,
1467
- "eval_samples_per_second": 88.505,
1468
- "eval_steps_per_second": 5.589,
1469
- "step": 23142
1470
- },
1471
- {
1472
- "epoch": 88.0,
1473
- "eval_AUC": 0.7178845533698807,
1474
- "eval_F1": 0.7572553125484722,
1475
- "eval_Precision": 0.9016736401673641,
1476
- "eval_Recall": 0.9729119638826185,
1477
- "eval_accuracy": 0.8887841658812441,
1478
- "eval_loss": 1.6693716049194336,
1479
- "eval_runtime": 11.563,
1480
- "eval_samples_per_second": 91.758,
1481
- "eval_steps_per_second": 5.794,
1482
- "step": 23408
1483
- },
1484
- {
1485
- "epoch": 88.34586466165413,
1486
- "grad_norm": 1.4785607618250651e-06,
1487
- "learning_rate": 2.3308270676691728e-05,
1488
- "loss": 0.0,
1489
- "step": 23500
1490
- },
1491
- {
1492
- "epoch": 89.0,
1493
- "eval_AUC": 0.7178845533698807,
1494
- "eval_F1": 0.7572553125484722,
1495
- "eval_Precision": 0.9016736401673641,
1496
- "eval_Recall": 0.9729119638826185,
1497
- "eval_accuracy": 0.8887841658812441,
1498
- "eval_loss": 1.6805604696273804,
1499
- "eval_runtime": 11.7292,
1500
- "eval_samples_per_second": 90.458,
1501
- "eval_steps_per_second": 5.712,
1502
- "step": 23674
1503
- },
1504
- {
1505
- "epoch": 90.0,
1506
- "eval_AUC": 0.7178845533698807,
1507
- "eval_F1": 0.7572553125484722,
1508
- "eval_Precision": 0.9016736401673641,
1509
- "eval_Recall": 0.9729119638826185,
1510
- "eval_accuracy": 0.8887841658812441,
1511
- "eval_loss": 1.6876078844070435,
1512
- "eval_runtime": 11.6884,
1513
- "eval_samples_per_second": 90.774,
1514
- "eval_steps_per_second": 5.732,
1515
- "step": 23940
1516
- },
1517
- {
1518
- "epoch": 90.22556390977444,
1519
- "grad_norm": 7.791019811520528e-07,
1520
- "learning_rate": 1.954887218045113e-05,
1521
- "loss": 0.0,
1522
- "step": 24000
1523
- },
1524
- {
1525
- "epoch": 91.0,
1526
- "eval_AUC": 0.7178845533698807,
1527
- "eval_F1": 0.7572553125484722,
1528
- "eval_Precision": 0.9016736401673641,
1529
- "eval_Recall": 0.9729119638826185,
1530
- "eval_accuracy": 0.8887841658812441,
1531
- "eval_loss": 1.6937507390975952,
1532
- "eval_runtime": 11.7263,
1533
- "eval_samples_per_second": 90.481,
1534
- "eval_steps_per_second": 5.714,
1535
- "step": 24206
1536
- },
1537
- {
1538
- "epoch": 92.0,
1539
- "eval_AUC": 0.7178845533698807,
1540
- "eval_F1": 0.7572553125484722,
1541
- "eval_Precision": 0.9016736401673641,
1542
- "eval_Recall": 0.9729119638826185,
1543
- "eval_accuracy": 0.8887841658812441,
1544
- "eval_loss": 1.6996102333068848,
1545
- "eval_runtime": 11.8252,
1546
- "eval_samples_per_second": 89.723,
1547
- "eval_steps_per_second": 5.666,
1548
- "step": 24472
1549
- },
1550
- {
1551
- "epoch": 92.10526315789474,
1552
- "grad_norm": 8.590963034293964e-07,
1553
- "learning_rate": 1.5789473684210526e-05,
1554
- "loss": 0.0,
1555
- "step": 24500
1556
- },
1557
- {
1558
- "epoch": 93.0,
1559
- "eval_AUC": 0.7178845533698807,
1560
- "eval_F1": 0.7572553125484722,
1561
- "eval_Precision": 0.9016736401673641,
1562
- "eval_Recall": 0.9729119638826185,
1563
- "eval_accuracy": 0.8887841658812441,
1564
- "eval_loss": 1.705134630203247,
1565
- "eval_runtime": 12.0159,
1566
- "eval_samples_per_second": 88.3,
1567
- "eval_steps_per_second": 5.576,
1568
- "step": 24738
1569
- },
1570
- {
1571
- "epoch": 93.98496240601504,
1572
- "grad_norm": 1.100646727536514e-06,
1573
- "learning_rate": 1.2030075187969925e-05,
1574
- "loss": 0.0,
1575
- "step": 25000
1576
- },
1577
- {
1578
- "epoch": 94.0,
1579
- "eval_AUC": 0.7178845533698807,
1580
- "eval_F1": 0.7572553125484722,
1581
- "eval_Precision": 0.9016736401673641,
1582
- "eval_Recall": 0.9729119638826185,
1583
- "eval_accuracy": 0.8887841658812441,
1584
- "eval_loss": 1.7103519439697266,
1585
- "eval_runtime": 11.7931,
1586
- "eval_samples_per_second": 89.968,
1587
- "eval_steps_per_second": 5.681,
1588
- "step": 25004
1589
- },
1590
- {
1591
- "epoch": 95.0,
1592
- "eval_AUC": 0.7178845533698807,
1593
- "eval_F1": 0.7572553125484722,
1594
- "eval_Precision": 0.9016736401673641,
1595
- "eval_Recall": 0.9729119638826185,
1596
- "eval_accuracy": 0.8887841658812441,
1597
- "eval_loss": 1.715171456336975,
1598
- "eval_runtime": 11.6391,
1599
- "eval_samples_per_second": 91.159,
1600
- "eval_steps_per_second": 5.756,
1601
- "step": 25270
1602
- },
1603
- {
1604
- "epoch": 95.86466165413533,
1605
- "grad_norm": 5.422148774414381e-07,
1606
- "learning_rate": 8.270676691729324e-06,
1607
- "loss": 0.0,
1608
- "step": 25500
1609
- },
1610
- {
1611
- "epoch": 96.0,
1612
- "eval_AUC": 0.7178845533698807,
1613
- "eval_F1": 0.7572553125484722,
1614
- "eval_Precision": 0.9016736401673641,
1615
- "eval_Recall": 0.9729119638826185,
1616
- "eval_accuracy": 0.8887841658812441,
1617
- "eval_loss": 1.7194596529006958,
1618
- "eval_runtime": 11.8706,
1619
- "eval_samples_per_second": 89.38,
1620
- "eval_steps_per_second": 5.644,
1621
- "step": 25536
1622
- },
1623
- {
1624
- "epoch": 97.0,
1625
- "eval_AUC": 0.7178845533698807,
1626
- "eval_F1": 0.7572553125484722,
1627
- "eval_Precision": 0.9016736401673641,
1628
- "eval_Recall": 0.9729119638826185,
1629
- "eval_accuracy": 0.8887841658812441,
1630
- "eval_loss": 1.723157286643982,
1631
- "eval_runtime": 12.0004,
1632
- "eval_samples_per_second": 88.414,
1633
- "eval_steps_per_second": 5.583,
1634
- "step": 25802
1635
- },
1636
- {
1637
- "epoch": 97.74436090225564,
1638
- "grad_norm": 8.843226737553778e-07,
1639
- "learning_rate": 4.511278195488722e-06,
1640
- "loss": 0.0,
1641
- "step": 26000
1642
- },
1643
- {
1644
- "epoch": 98.0,
1645
- "eval_AUC": 0.7178845533698807,
1646
- "eval_F1": 0.7572553125484722,
1647
- "eval_Precision": 0.9016736401673641,
1648
- "eval_Recall": 0.9729119638826185,
1649
- "eval_accuracy": 0.8887841658812441,
1650
- "eval_loss": 1.7260410785675049,
1651
- "eval_runtime": 11.8627,
1652
- "eval_samples_per_second": 89.44,
1653
- "eval_steps_per_second": 5.648,
1654
- "step": 26068
1655
- },
1656
- {
1657
- "epoch": 99.0,
1658
- "eval_AUC": 0.7178845533698807,
1659
- "eval_F1": 0.7572553125484722,
1660
- "eval_Precision": 0.9016736401673641,
1661
- "eval_Recall": 0.9729119638826185,
1662
- "eval_accuracy": 0.8887841658812441,
1663
- "eval_loss": 1.7279813289642334,
1664
- "eval_runtime": 12.0306,
1665
- "eval_samples_per_second": 88.192,
1666
- "eval_steps_per_second": 5.569,
1667
- "step": 26334
1668
- },
1669
- {
1670
- "epoch": 99.62406015037594,
1671
- "grad_norm": 6.334667546070705e-07,
1672
- "learning_rate": 7.518796992481203e-07,
1673
- "loss": 0.0,
1674
- "step": 26500
1675
- },
1676
- {
1677
- "epoch": 100.0,
1678
- "eval_AUC": 0.7178845533698807,
1679
- "eval_F1": 0.7572553125484722,
1680
- "eval_Precision": 0.9016736401673641,
1681
- "eval_Recall": 0.9729119638826185,
1682
- "eval_accuracy": 0.8887841658812441,
1683
- "eval_loss": 1.7286875247955322,
1684
- "eval_runtime": 11.7137,
1685
- "eval_samples_per_second": 90.578,
1686
- "eval_steps_per_second": 5.72,
1687
- "step": 26600
1688
  },
1689
  {
1690
- "epoch": 100.0,
1691
- "step": 26600,
1692
- "total_flos": 3.2879851193471386e+19,
1693
- "train_loss": 0.05622970362024654,
1694
- "train_runtime": 7456.0224,
1695
- "train_samples_per_second": 56.907,
1696
- "train_steps_per_second": 3.568
1697
  }
1698
  ],
1699
  "logging_steps": 500,
1700
- "max_steps": 26600,
1701
  "num_input_tokens_seen": 0,
1702
- "num_train_epochs": 100,
1703
  "save_steps": 500,
1704
  "stateful_callbacks": {
1705
  "TrainerControl": {
@@ -1713,7 +205,7 @@
1713
  "attributes": {}
1714
  }
1715
  },
1716
- "total_flos": 3.2879851193471386e+19,
1717
  "train_batch_size": 16,
1718
  "trial_name": null,
1719
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.048169028013944626,
3
+ "best_model_checkpoint": "google/vit-base-patch16-224-in21k_covid_19_ct_scans/checkpoint-432",
4
+ "epoch": 12.0,
5
  "eval_steps": 500,
6
+ "global_step": 648,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.018518518518518517,
13
+ "grad_norm": 3.8348426818847656,
14
+ "learning_rate": 0.00019969135802469138,
15
+ "loss": 0.6804,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_AUC": 0.5,
21
+ "eval_F1": 0.47277227722772275,
22
+ "eval_Precision": 0.8967136150234741,
23
  "eval_Recall": 1.0,
24
+ "eval_accuracy": 0.8967136150234741,
25
+ "eval_loss": 0.38209885358810425,
26
+ "eval_runtime": 4.2504,
27
+ "eval_samples_per_second": 50.113,
28
+ "eval_steps_per_second": 3.294,
29
+ "step": 54
 
 
 
 
 
 
 
30
  },
31
  {
32
  "epoch": 2.0,
33
  "eval_AUC": 0.5,
34
+ "eval_F1": 0.47277227722772275,
35
+ "eval_Precision": 0.8967136150234741,
36
  "eval_Recall": 1.0,
37
+ "eval_accuracy": 0.8967136150234741,
38
+ "eval_loss": 0.413409024477005,
39
+ "eval_runtime": 4.3554,
40
+ "eval_samples_per_second": 48.904,
41
+ "eval_steps_per_second": 3.214,
42
+ "step": 108
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "eval_AUC": 0.5454545454545454,
47
+ "eval_F1": 0.5584577114427861,
48
+ "eval_Precision": 0.9052132701421801,
49
  "eval_Recall": 1.0,
50
+ "eval_accuracy": 0.9061032863849765,
51
+ "eval_loss": 0.27077189087867737,
52
+ "eval_runtime": 4.2214,
53
+ "eval_samples_per_second": 50.458,
54
+ "eval_steps_per_second": 3.316,
55
+ "step": 162
 
 
 
 
 
 
 
56
  },
57
  {
58
  "epoch": 4.0,
59
+ "eval_AUC": 0.7272727272727273,
60
+ "eval_F1": 0.797271573604061,
61
+ "eval_Precision": 0.9408866995073891,
62
  "eval_Recall": 1.0,
63
+ "eval_accuracy": 0.9436619718309859,
64
+ "eval_loss": 0.2405475229024887,
65
+ "eval_runtime": 4.1493,
66
+ "eval_samples_per_second": 51.335,
67
+ "eval_steps_per_second": 3.374,
68
+ "step": 216
69
  },
70
  {
71
  "epoch": 5.0,
72
+ "eval_AUC": 0.7272727272727273,
73
+ "eval_F1": 0.797271573604061,
74
+ "eval_Precision": 0.9408866995073891,
75
  "eval_Recall": 1.0,
76
+ "eval_accuracy": 0.9436619718309859,
77
+ "eval_loss": 0.21931496262550354,
78
+ "eval_runtime": 4.3053,
79
+ "eval_samples_per_second": 49.474,
80
+ "eval_steps_per_second": 3.252,
81
+ "step": 270
 
 
 
 
 
 
 
82
  },
83
  {
84
  "epoch": 6.0,
85
+ "eval_AUC": 0.9309852451213707,
86
+ "eval_F1": 0.8774901960784314,
87
+ "eval_Precision": 0.9891304347826086,
88
+ "eval_Recall": 0.9528795811518325,
89
+ "eval_accuracy": 0.9483568075117371,
90
+ "eval_loss": 0.1719074547290802,
91
+ "eval_runtime": 4.1609,
92
+ "eval_samples_per_second": 51.191,
93
+ "eval_steps_per_second": 3.365,
94
+ "step": 324
95
  },
96
  {
97
  "epoch": 7.0,
98
+ "eval_AUC": 0.9519276534983341,
99
+ "eval_F1": 0.9611998299836055,
100
+ "eval_Precision": 0.9895833333333334,
101
+ "eval_Recall": 0.9947643979057592,
102
+ "eval_accuracy": 0.9859154929577465,
103
+ "eval_loss": 0.05248570814728737,
104
+ "eval_runtime": 4.1623,
105
+ "eval_samples_per_second": 51.174,
106
+ "eval_steps_per_second": 3.364,
107
+ "step": 378
 
 
 
 
 
 
 
108
  },
109
  {
110
  "epoch": 8.0,
111
+ "eval_AUC": 0.9545454545454545,
112
+ "eval_F1": 0.9735863095238095,
113
+ "eval_Precision": 0.9896373056994818,
114
+ "eval_Recall": 1.0,
115
+ "eval_accuracy": 0.9906103286384976,
116
+ "eval_loss": 0.048169028013944626,
117
+ "eval_runtime": 4.2766,
118
+ "eval_samples_per_second": 49.806,
119
+ "eval_steps_per_second": 3.274,
120
+ "step": 432
121
  },
122
  {
123
  "epoch": 9.0,
124
+ "eval_AUC": 0.8863636363636364,
125
+ "eval_F1": 0.9294374875770225,
126
+ "eval_Precision": 0.9744897959183674,
127
+ "eval_Recall": 1.0,
128
+ "eval_accuracy": 0.9765258215962441,
129
+ "eval_loss": 0.09067811071872711,
130
+ "eval_runtime": 4.2163,
131
+ "eval_samples_per_second": 50.519,
132
+ "eval_steps_per_second": 3.32,
133
+ "step": 486
134
+ },
135
+ {
136
+ "epoch": 9.25925925925926,
137
+ "grad_norm": 0.03957719728350639,
138
+ "learning_rate": 4.567901234567901e-05,
139
+ "loss": 0.1258,
140
+ "step": 500
141
  },
142
  {
143
  "epoch": 10.0,
144
+ "eval_AUC": 0.8863636363636364,
145
+ "eval_F1": 0.9294374875770225,
146
+ "eval_Precision": 0.9744897959183674,
147
+ "eval_Recall": 1.0,
148
+ "eval_accuracy": 0.9765258215962441,
149
+ "eval_loss": 0.10092553496360779,
150
+ "eval_runtime": 4.2027,
151
+ "eval_samples_per_second": 50.682,
152
+ "eval_steps_per_second": 3.331,
153
+ "step": 540
154
  },
155
  {
156
  "epoch": 11.0,
157
+ "eval_AUC": 0.8863636363636364,
158
+ "eval_F1": 0.9294374875770225,
159
+ "eval_Precision": 0.9744897959183674,
160
+ "eval_Recall": 1.0,
161
+ "eval_accuracy": 0.9765258215962441,
162
+ "eval_loss": 0.10514290630817413,
163
+ "eval_runtime": 4.2216,
164
+ "eval_samples_per_second": 50.454,
165
+ "eval_steps_per_second": 3.316,
166
+ "step": 594
 
 
 
 
 
 
 
167
  },
168
  {
169
  "epoch": 12.0,
170
+ "eval_AUC": 0.8863636363636364,
171
+ "eval_F1": 0.9294374875770225,
172
+ "eval_Precision": 0.9744897959183674,
173
+ "eval_Recall": 1.0,
174
+ "eval_accuracy": 0.9765258215962441,
175
+ "eval_loss": 0.10642894357442856,
176
+ "eval_runtime": 4.2244,
177
+ "eval_samples_per_second": 50.421,
178
+ "eval_steps_per_second": 3.314,
179
+ "step": 648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  },
181
  {
182
+ "epoch": 12.0,
183
+ "step": 648,
184
+ "total_flos": 7.894883901934633e+17,
185
+ "train_loss": 0.09885871769101531,
186
+ "train_runtime": 224.7245,
187
+ "train_samples_per_second": 45.336,
188
+ "train_steps_per_second": 2.884
189
  }
190
  ],
191
  "logging_steps": 500,
192
+ "max_steps": 648,
193
  "num_input_tokens_seen": 0,
194
+ "num_train_epochs": 12,
195
  "save_steps": 500,
196
  "stateful_callbacks": {
197
  "TrainerControl": {
 
205
  "attributes": {}
206
  }
207
  },
208
+ "total_flos": 7.894883901934633e+17,
209
  "train_batch_size": 16,
210
  "trial_name": null,
211
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad48c236dcde4ba2188172ae41a74d97210cdacc82902d5be6d65420ec659fc2
3
  size 4731
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357df7b508e81337a045bc900ffa166dacc0301c45e85247db7b83a80952fefb
3
  size 4731