davanstrien HF staff commited on
Commit
132b16f
1 Parent(s): 35fbe73

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_f1": 0.8607594936708861,
4
- "eval_loss": 0.26272934675216675,
5
- "eval_runtime": 10.1059,
6
- "eval_samples_per_second": 7.817,
7
  "eval_steps_per_second": 0.099,
8
- "train_loss": 0.3480381011962891,
9
- "train_runtime": 655.7401,
10
- "train_samples_per_second": 6.801,
11
- "train_steps_per_second": 0.107
12
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_f1": 0.9620253164556962,
4
+ "eval_loss": 0.12829144299030304,
5
+ "eval_runtime": 10.0743,
6
+ "eval_samples_per_second": 7.842,
7
  "eval_steps_per_second": 0.099,
8
+ "train_loss": 0.20286732230867657,
9
+ "train_runtime": 1939.2485,
10
+ "train_samples_per_second": 6.9,
11
+ "train_steps_per_second": 0.108
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_f1": 0.8607594936708861,
4
- "eval_loss": 0.26272934675216675,
5
- "eval_runtime": 10.1059,
6
- "eval_samples_per_second": 7.817,
7
  "eval_steps_per_second": 0.099
8
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_f1": 0.9620253164556962,
4
+ "eval_loss": 0.12829144299030304,
5
+ "eval_runtime": 10.0743,
6
+ "eval_samples_per_second": 7.842,
7
  "eval_steps_per_second": 0.099
8
  }
runs/Dec06_19-59-00_49793c51f922/events.out.tfevents.1670359112.49793c51f922.371.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e397a2f826749209e647af9193c29c05a4fa40d7fa884bf7a7ee4a24d3164913
3
+ size 357
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 0.3480381011962891,
4
- "train_runtime": 655.7401,
5
- "train_samples_per_second": 6.801,
6
- "train_steps_per_second": 0.107
7
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "train_loss": 0.20286732230867657,
4
+ "train_runtime": 1939.2485,
5
+ "train_samples_per_second": 6.9,
6
+ "train_steps_per_second": 0.108
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.26272934675216675,
3
- "best_model_checkpoint": "/leicester_binary_convnext_small6/checkpoint-70",
4
- "epoch": 10.0,
5
- "global_step": 70,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,148 +10,412 @@
10
  {
11
  "epoch": 1.0,
12
  "eval_f1": 0.8607594936708861,
13
- "eval_loss": 0.5187062621116638,
14
- "eval_runtime": 10.04,
15
- "eval_samples_per_second": 7.869,
16
  "eval_steps_per_second": 0.1,
17
  "step": 7
18
  },
19
  {
20
  "epoch": 1.43,
21
- "learning_rate": 1.7142857142857142e-05,
22
- "loss": 0.5904,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 2.0,
27
  "eval_f1": 0.8607594936708861,
28
- "eval_loss": 0.4272507429122925,
29
- "eval_runtime": 10.0147,
30
- "eval_samples_per_second": 7.888,
31
- "eval_steps_per_second": 0.1,
32
  "step": 14
33
  },
34
  {
35
  "epoch": 2.86,
36
- "learning_rate": 1.4285714285714287e-05,
37
- "loss": 0.3981,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 3.0,
42
  "eval_f1": 0.8607594936708861,
43
- "eval_loss": 0.411451131105423,
44
- "eval_runtime": 10.0801,
45
- "eval_samples_per_second": 7.837,
46
- "eval_steps_per_second": 0.099,
47
  "step": 21
48
  },
49
  {
50
  "epoch": 4.0,
51
  "eval_f1": 0.8607594936708861,
52
- "eval_loss": 0.40291550755500793,
53
- "eval_runtime": 10.1912,
54
- "eval_samples_per_second": 7.752,
55
- "eval_steps_per_second": 0.098,
56
  "step": 28
57
  },
58
  {
59
  "epoch": 4.29,
60
- "learning_rate": 1.1428571428571429e-05,
61
- "loss": 0.3285,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 5.0,
66
  "eval_f1": 0.8607594936708861,
67
- "eval_loss": 0.3401729464530945,
68
- "eval_runtime": 10.0131,
69
- "eval_samples_per_second": 7.89,
70
- "eval_steps_per_second": 0.1,
71
  "step": 35
72
  },
73
  {
74
  "epoch": 5.71,
75
- "learning_rate": 8.571428571428571e-06,
76
- "loss": 0.308,
77
  "step": 40
78
  },
79
  {
80
  "epoch": 6.0,
81
  "eval_f1": 0.8607594936708861,
82
- "eval_loss": 0.31382453441619873,
83
- "eval_runtime": 10.0365,
84
- "eval_samples_per_second": 7.871,
85
  "eval_steps_per_second": 0.1,
86
  "step": 42
87
  },
88
  {
89
  "epoch": 7.0,
90
  "eval_f1": 0.8607594936708861,
91
- "eval_loss": 0.29120802879333496,
92
- "eval_runtime": 10.1307,
93
- "eval_samples_per_second": 7.798,
94
- "eval_steps_per_second": 0.099,
95
  "step": 49
96
  },
97
  {
98
  "epoch": 7.14,
99
- "learning_rate": 5.7142857142857145e-06,
100
- "loss": 0.2952,
101
  "step": 50
102
  },
103
  {
104
  "epoch": 8.0,
105
  "eval_f1": 0.8607594936708861,
106
- "eval_loss": 0.2751685678958893,
107
- "eval_runtime": 9.9617,
108
- "eval_samples_per_second": 7.93,
109
- "eval_steps_per_second": 0.1,
110
  "step": 56
111
  },
112
  {
113
  "epoch": 8.57,
114
- "learning_rate": 2.8571428571428573e-06,
115
- "loss": 0.2593,
116
  "step": 60
117
  },
118
  {
119
  "epoch": 9.0,
120
  "eval_f1": 0.8607594936708861,
121
- "eval_loss": 0.2657069265842438,
122
- "eval_runtime": 10.0549,
123
- "eval_samples_per_second": 7.857,
124
  "eval_steps_per_second": 0.099,
125
  "step": 63
126
  },
127
  {
128
  "epoch": 10.0,
129
- "learning_rate": 0.0,
130
- "loss": 0.2568,
131
  "step": 70
132
  },
133
  {
134
  "epoch": 10.0,
135
  "eval_f1": 0.8607594936708861,
136
- "eval_loss": 0.26272934675216675,
137
- "eval_runtime": 10.516,
138
- "eval_samples_per_second": 7.512,
139
- "eval_steps_per_second": 0.095,
140
  "step": 70
141
  },
142
  {
143
- "epoch": 10.0,
144
- "step": 70,
145
- "total_flos": 1.9921607202889728e+17,
146
- "train_loss": 0.3480381011962891,
147
- "train_runtime": 655.7401,
148
- "train_samples_per_second": 6.801,
149
- "train_steps_per_second": 0.107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
  ],
152
- "max_steps": 70,
153
- "num_train_epochs": 10,
154
- "total_flos": 1.9921607202889728e+17,
155
  "trial_name": null,
156
  "trial_params": null
157
  }
 
1
  {
2
+ "best_metric": 0.12829144299030304,
3
+ "best_model_checkpoint": "/leicester_binary_convnext_small/checkpoint-182",
4
+ "epoch": 30.0,
5
+ "global_step": 210,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "eval_f1": 0.8607594936708861,
13
+ "eval_loss": 0.5143362283706665,
14
+ "eval_runtime": 10.0208,
15
+ "eval_samples_per_second": 7.884,
16
  "eval_steps_per_second": 0.1,
17
  "step": 7
18
  },
19
  {
20
  "epoch": 1.43,
21
+ "learning_rate": 1.904761904761905e-05,
22
+ "loss": 0.5872,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 2.0,
27
  "eval_f1": 0.8607594936708861,
28
+ "eval_loss": 0.42147475481033325,
29
+ "eval_runtime": 10.225,
30
+ "eval_samples_per_second": 7.726,
31
+ "eval_steps_per_second": 0.098,
32
  "step": 14
33
  },
34
  {
35
  "epoch": 2.86,
36
+ "learning_rate": 1.8095238095238097e-05,
37
+ "loss": 0.3903,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 3.0,
42
  "eval_f1": 0.8607594936708861,
43
+ "eval_loss": 0.4127243757247925,
44
+ "eval_runtime": 9.9737,
45
+ "eval_samples_per_second": 7.921,
46
+ "eval_steps_per_second": 0.1,
47
  "step": 21
48
  },
49
  {
50
  "epoch": 4.0,
51
  "eval_f1": 0.8607594936708861,
52
+ "eval_loss": 0.36054694652557373,
53
+ "eval_runtime": 9.9338,
54
+ "eval_samples_per_second": 7.953,
55
+ "eval_steps_per_second": 0.101,
56
  "step": 28
57
  },
58
  {
59
  "epoch": 4.29,
60
+ "learning_rate": 1.7142857142857142e-05,
61
+ "loss": 0.3163,
62
  "step": 30
63
  },
64
  {
65
  "epoch": 5.0,
66
  "eval_f1": 0.8607594936708861,
67
+ "eval_loss": 0.3152060806751251,
68
+ "eval_runtime": 10.1128,
69
+ "eval_samples_per_second": 7.812,
70
+ "eval_steps_per_second": 0.099,
71
  "step": 35
72
  },
73
  {
74
  "epoch": 5.71,
75
+ "learning_rate": 1.6190476190476193e-05,
76
+ "loss": 0.2942,
77
  "step": 40
78
  },
79
  {
80
  "epoch": 6.0,
81
  "eval_f1": 0.8607594936708861,
82
+ "eval_loss": 0.2942211925983429,
83
+ "eval_runtime": 10.0302,
84
+ "eval_samples_per_second": 7.876,
85
  "eval_steps_per_second": 0.1,
86
  "step": 42
87
  },
88
  {
89
  "epoch": 7.0,
90
  "eval_f1": 0.8607594936708861,
91
+ "eval_loss": 0.2668735980987549,
92
+ "eval_runtime": 9.9387,
93
+ "eval_samples_per_second": 7.949,
94
+ "eval_steps_per_second": 0.101,
95
  "step": 49
96
  },
97
  {
98
  "epoch": 7.14,
99
+ "learning_rate": 1.523809523809524e-05,
100
+ "loss": 0.2755,
101
  "step": 50
102
  },
103
  {
104
  "epoch": 8.0,
105
  "eval_f1": 0.8607594936708861,
106
+ "eval_loss": 0.23157073557376862,
107
+ "eval_runtime": 10.0692,
108
+ "eval_samples_per_second": 7.846,
109
+ "eval_steps_per_second": 0.099,
110
  "step": 56
111
  },
112
  {
113
  "epoch": 8.57,
114
+ "learning_rate": 1.4285714285714287e-05,
115
+ "loss": 0.2281,
116
  "step": 60
117
  },
118
  {
119
  "epoch": 9.0,
120
  "eval_f1": 0.8607594936708861,
121
+ "eval_loss": 0.2104099839925766,
122
+ "eval_runtime": 10.0928,
123
+ "eval_samples_per_second": 7.827,
124
  "eval_steps_per_second": 0.099,
125
  "step": 63
126
  },
127
  {
128
  "epoch": 10.0,
129
+ "learning_rate": 1.3333333333333333e-05,
130
+ "loss": 0.2076,
131
  "step": 70
132
  },
133
  {
134
  "epoch": 10.0,
135
  "eval_f1": 0.8607594936708861,
136
+ "eval_loss": 0.1937984824180603,
137
+ "eval_runtime": 9.9756,
138
+ "eval_samples_per_second": 7.919,
139
+ "eval_steps_per_second": 0.1,
140
  "step": 70
141
  },
142
  {
143
+ "epoch": 11.0,
144
+ "eval_f1": 0.8607594936708861,
145
+ "eval_loss": 0.1803407520055771,
146
+ "eval_runtime": 10.0131,
147
+ "eval_samples_per_second": 7.89,
148
+ "eval_steps_per_second": 0.1,
149
+ "step": 77
150
+ },
151
+ {
152
+ "epoch": 11.43,
153
+ "learning_rate": 1.2380952380952383e-05,
154
+ "loss": 0.1832,
155
+ "step": 80
156
+ },
157
+ {
158
+ "epoch": 12.0,
159
+ "eval_f1": 0.8607594936708861,
160
+ "eval_loss": 0.17044714093208313,
161
+ "eval_runtime": 10.0514,
162
+ "eval_samples_per_second": 7.86,
163
+ "eval_steps_per_second": 0.099,
164
+ "step": 84
165
+ },
166
+ {
167
+ "epoch": 12.86,
168
+ "learning_rate": 1.1523809523809524e-05,
169
+ "loss": 0.1758,
170
+ "step": 90
171
+ },
172
+ {
173
+ "epoch": 13.0,
174
+ "eval_f1": 0.8607594936708861,
175
+ "eval_loss": 0.16500937938690186,
176
+ "eval_runtime": 9.8999,
177
+ "eval_samples_per_second": 7.98,
178
+ "eval_steps_per_second": 0.101,
179
+ "step": 91
180
+ },
181
+ {
182
+ "epoch": 14.0,
183
+ "eval_f1": 0.8607594936708861,
184
+ "eval_loss": 0.17136065661907196,
185
+ "eval_runtime": 10.3084,
186
+ "eval_samples_per_second": 7.664,
187
+ "eval_steps_per_second": 0.097,
188
+ "step": 98
189
+ },
190
+ {
191
+ "epoch": 14.29,
192
+ "learning_rate": 1.0571428571428572e-05,
193
+ "loss": 0.167,
194
+ "step": 100
195
+ },
196
+ {
197
+ "epoch": 15.0,
198
+ "eval_f1": 0.8607594936708861,
199
+ "eval_loss": 0.1574719250202179,
200
+ "eval_runtime": 10.3458,
201
+ "eval_samples_per_second": 7.636,
202
+ "eval_steps_per_second": 0.097,
203
+ "step": 105
204
+ },
205
+ {
206
+ "epoch": 15.71,
207
+ "learning_rate": 9.61904761904762e-06,
208
+ "loss": 0.1519,
209
+ "step": 110
210
+ },
211
+ {
212
+ "epoch": 16.0,
213
+ "eval_f1": 0.8607594936708861,
214
+ "eval_loss": 0.154911071062088,
215
+ "eval_runtime": 9.9419,
216
+ "eval_samples_per_second": 7.946,
217
+ "eval_steps_per_second": 0.101,
218
+ "step": 112
219
+ },
220
+ {
221
+ "epoch": 17.0,
222
+ "eval_f1": 0.8607594936708861,
223
+ "eval_loss": 0.17046359181404114,
224
+ "eval_runtime": 10.1139,
225
+ "eval_samples_per_second": 7.811,
226
+ "eval_steps_per_second": 0.099,
227
+ "step": 119
228
+ },
229
+ {
230
+ "epoch": 17.14,
231
+ "learning_rate": 8.666666666666668e-06,
232
+ "loss": 0.1422,
233
+ "step": 120
234
+ },
235
+ {
236
+ "epoch": 18.0,
237
+ "eval_f1": 0.8607594936708861,
238
+ "eval_loss": 0.14778320491313934,
239
+ "eval_runtime": 10.4334,
240
+ "eval_samples_per_second": 7.572,
241
+ "eval_steps_per_second": 0.096,
242
+ "step": 126
243
+ },
244
+ {
245
+ "epoch": 18.57,
246
+ "learning_rate": 7.714285714285716e-06,
247
+ "loss": 0.1444,
248
+ "step": 130
249
+ },
250
+ {
251
+ "epoch": 19.0,
252
+ "eval_f1": 0.8607594936708861,
253
+ "eval_loss": 0.14368951320648193,
254
+ "eval_runtime": 9.9263,
255
+ "eval_samples_per_second": 7.959,
256
+ "eval_steps_per_second": 0.101,
257
+ "step": 133
258
+ },
259
+ {
260
+ "epoch": 20.0,
261
+ "learning_rate": 6.761904761904763e-06,
262
+ "loss": 0.1396,
263
+ "step": 140
264
+ },
265
+ {
266
+ "epoch": 20.0,
267
+ "eval_f1": 0.8607594936708861,
268
+ "eval_loss": 0.13980019092559814,
269
+ "eval_runtime": 10.0444,
270
+ "eval_samples_per_second": 7.865,
271
+ "eval_steps_per_second": 0.1,
272
+ "step": 140
273
+ },
274
+ {
275
+ "epoch": 21.0,
276
+ "eval_f1": 0.8607594936708861,
277
+ "eval_loss": 0.13507133722305298,
278
+ "eval_runtime": 9.9798,
279
+ "eval_samples_per_second": 7.916,
280
+ "eval_steps_per_second": 0.1,
281
+ "step": 147
282
+ },
283
+ {
284
+ "epoch": 21.43,
285
+ "learning_rate": 5.8095238095238106e-06,
286
+ "loss": 0.1293,
287
+ "step": 150
288
+ },
289
+ {
290
+ "epoch": 22.0,
291
+ "eval_f1": 0.8987341772151899,
292
+ "eval_loss": 0.1370120793581009,
293
+ "eval_runtime": 9.9423,
294
+ "eval_samples_per_second": 7.946,
295
+ "eval_steps_per_second": 0.101,
296
+ "step": 154
297
+ },
298
+ {
299
+ "epoch": 22.86,
300
+ "learning_rate": 4.857142857142858e-06,
301
+ "loss": 0.1361,
302
+ "step": 160
303
+ },
304
+ {
305
+ "epoch": 23.0,
306
+ "eval_f1": 0.8987341772151899,
307
+ "eval_loss": 0.13351036608219147,
308
+ "eval_runtime": 9.9968,
309
+ "eval_samples_per_second": 7.903,
310
+ "eval_steps_per_second": 0.1,
311
+ "step": 161
312
+ },
313
+ {
314
+ "epoch": 24.0,
315
+ "eval_f1": 0.9367088607594937,
316
+ "eval_loss": 0.13105367124080658,
317
+ "eval_runtime": 10.0843,
318
+ "eval_samples_per_second": 7.834,
319
+ "eval_steps_per_second": 0.099,
320
+ "step": 168
321
+ },
322
+ {
323
+ "epoch": 24.29,
324
+ "learning_rate": 3.9047619047619055e-06,
325
+ "loss": 0.1246,
326
+ "step": 170
327
+ },
328
+ {
329
+ "epoch": 25.0,
330
+ "eval_f1": 0.9620253164556962,
331
+ "eval_loss": 0.12886276841163635,
332
+ "eval_runtime": 9.9441,
333
+ "eval_samples_per_second": 7.944,
334
+ "eval_steps_per_second": 0.101,
335
+ "step": 175
336
+ },
337
+ {
338
+ "epoch": 25.71,
339
+ "learning_rate": 2.9523809523809525e-06,
340
+ "loss": 0.1211,
341
+ "step": 180
342
+ },
343
+ {
344
+ "epoch": 26.0,
345
+ "eval_f1": 0.9620253164556962,
346
+ "eval_loss": 0.12829144299030304,
347
+ "eval_runtime": 10.0697,
348
+ "eval_samples_per_second": 7.845,
349
+ "eval_steps_per_second": 0.099,
350
+ "step": 182
351
+ },
352
+ {
353
+ "epoch": 27.0,
354
+ "eval_f1": 0.9620253164556962,
355
+ "eval_loss": 0.12941910326480865,
356
+ "eval_runtime": 10.0409,
357
+ "eval_samples_per_second": 7.868,
358
+ "eval_steps_per_second": 0.1,
359
+ "step": 189
360
+ },
361
+ {
362
+ "epoch": 27.14,
363
+ "learning_rate": 2.0000000000000003e-06,
364
+ "loss": 0.1182,
365
+ "step": 190
366
+ },
367
+ {
368
+ "epoch": 28.0,
369
+ "eval_f1": 0.9620253164556962,
370
+ "eval_loss": 0.13063742220401764,
371
+ "eval_runtime": 9.9814,
372
+ "eval_samples_per_second": 7.915,
373
+ "eval_steps_per_second": 0.1,
374
+ "step": 196
375
+ },
376
+ {
377
+ "epoch": 28.57,
378
+ "learning_rate": 1.0476190476190478e-06,
379
+ "loss": 0.1172,
380
+ "step": 200
381
+ },
382
+ {
383
+ "epoch": 29.0,
384
+ "eval_f1": 0.9620253164556962,
385
+ "eval_loss": 0.13123980164527893,
386
+ "eval_runtime": 9.9869,
387
+ "eval_samples_per_second": 7.91,
388
+ "eval_steps_per_second": 0.1,
389
+ "step": 203
390
+ },
391
+ {
392
+ "epoch": 30.0,
393
+ "learning_rate": 9.523809523809525e-08,
394
+ "loss": 0.1102,
395
+ "step": 210
396
+ },
397
+ {
398
+ "epoch": 30.0,
399
+ "eval_f1": 0.9620253164556962,
400
+ "eval_loss": 0.1317748874425888,
401
+ "eval_runtime": 9.9991,
402
+ "eval_samples_per_second": 7.901,
403
+ "eval_steps_per_second": 0.1,
404
+ "step": 210
405
+ },
406
+ {
407
+ "epoch": 30.0,
408
+ "step": 210,
409
+ "total_flos": 5.976482160866918e+17,
410
+ "train_loss": 0.20286732230867657,
411
+ "train_runtime": 1939.2485,
412
+ "train_samples_per_second": 6.9,
413
+ "train_steps_per_second": 0.108
414
  }
415
  ],
416
+ "max_steps": 210,
417
+ "num_train_epochs": 30,
418
+ "total_flos": 5.976482160866918e+17,
419
  "trial_name": null,
420
  "trial_params": null
421
  }