heavoc commited on
Commit
cee22fb
1 Parent(s): a5a1295

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +11 -0
  2. test_results.json +11 -0
  3. trainer_state.json +1574 -0
all_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 79.01,
3
+ "eval_accuracy": 0.6697312588401697,
4
+ "eval_f1": 0.635434606603408,
5
+ "eval_loss": 1.1357100009918213,
6
+ "eval_precision": 0.642912037609069,
7
+ "eval_recall": 0.6697312588401697,
8
+ "eval_runtime": 860.1584,
9
+ "eval_samples_per_second": 4.932,
10
+ "eval_steps_per_second": 0.617
11
+ }
test_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 79.01,
3
+ "eval_accuracy": 0.6697312588401697,
4
+ "eval_f1": 0.635434606603408,
5
+ "eval_loss": 1.1357100009918213,
6
+ "eval_precision": 0.642912037609069,
7
+ "eval_recall": 0.6697312588401697,
8
+ "eval_runtime": 860.1584,
9
+ "eval_samples_per_second": 4.932,
10
+ "eval_steps_per_second": 0.617
11
+ }
trainer_state.json ADDED
@@ -0,0 +1,1574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6680810938236681,
3
+ "best_model_checkpoint": "videomae-base-finetuned-chickenbehaviour-2/checkpoint-23865",
4
+ "epoch": 79.01218956303049,
5
+ "eval_steps": 500,
6
+ "global_step": 127240,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "grad_norm": 34.09493637084961,
14
+ "learning_rate": 6.251964790946243e-06,
15
+ "loss": 2.035,
16
+ "step": 1591
17
+ },
18
+ {
19
+ "epoch": 0.01,
20
+ "eval_accuracy": 0.5115511551155115,
21
+ "eval_f1": 0.4139359299241216,
22
+ "eval_loss": 1.6961450576782227,
23
+ "eval_precision": 0.426389188260245,
24
+ "eval_recall": 0.5115511551155115,
25
+ "eval_runtime": 443.2234,
26
+ "eval_samples_per_second": 9.571,
27
+ "eval_steps_per_second": 1.198,
28
+ "step": 1591
29
+ },
30
+ {
31
+ "epoch": 1.01,
32
+ "grad_norm": 25.231983184814453,
33
+ "learning_rate": 1.2503929581892487e-05,
34
+ "loss": 1.5431,
35
+ "step": 3182
36
+ },
37
+ {
38
+ "epoch": 1.01,
39
+ "eval_accuracy": 0.5898161244695899,
40
+ "eval_f1": 0.5166622256451852,
41
+ "eval_loss": 1.439492106437683,
42
+ "eval_precision": 0.5240058255527255,
43
+ "eval_recall": 0.5898161244695899,
44
+ "eval_runtime": 437.564,
45
+ "eval_samples_per_second": 9.695,
46
+ "eval_steps_per_second": 1.214,
47
+ "step": 3182
48
+ },
49
+ {
50
+ "epoch": 2.01,
51
+ "grad_norm": 24.721105575561523,
52
+ "learning_rate": 1.875589437283873e-05,
53
+ "loss": 1.4118,
54
+ "step": 4773
55
+ },
56
+ {
57
+ "epoch": 2.01,
58
+ "eval_accuracy": 0.6051390853371051,
59
+ "eval_f1": 0.5535236961754626,
60
+ "eval_loss": 1.363150715827942,
61
+ "eval_precision": 0.5552721379615939,
62
+ "eval_recall": 0.6051390853371051,
63
+ "eval_runtime": 683.9412,
64
+ "eval_samples_per_second": 6.202,
65
+ "eval_steps_per_second": 0.776,
66
+ "step": 4773
67
+ },
68
+ {
69
+ "epoch": 3.01,
70
+ "grad_norm": 7.513462066650391,
71
+ "learning_rate": 2.5007859163784973e-05,
72
+ "loss": 1.3413,
73
+ "step": 6364
74
+ },
75
+ {
76
+ "epoch": 3.01,
77
+ "eval_accuracy": 0.6020744931636021,
78
+ "eval_f1": 0.5384080508073906,
79
+ "eval_loss": 1.331170678138733,
80
+ "eval_precision": 0.551611875314686,
81
+ "eval_recall": 0.6020744931636021,
82
+ "eval_runtime": 680.73,
83
+ "eval_samples_per_second": 6.232,
84
+ "eval_steps_per_second": 0.78,
85
+ "step": 6364
86
+ },
87
+ {
88
+ "epoch": 4.01,
89
+ "grad_norm": 23.583574295043945,
90
+ "learning_rate": 3.125982395473121e-05,
91
+ "loss": 1.2969,
92
+ "step": 7955
93
+ },
94
+ {
95
+ "epoch": 4.01,
96
+ "eval_accuracy": 0.6211692597831212,
97
+ "eval_f1": 0.5663241746963936,
98
+ "eval_loss": 1.273905873298645,
99
+ "eval_precision": 0.6121530259266542,
100
+ "eval_recall": 0.6211692597831212,
101
+ "eval_runtime": 663.3464,
102
+ "eval_samples_per_second": 6.395,
103
+ "eval_steps_per_second": 0.8,
104
+ "step": 7955
105
+ },
106
+ {
107
+ "epoch": 5.01,
108
+ "grad_norm": 12.484142303466797,
109
+ "learning_rate": 3.751178874567746e-05,
110
+ "loss": 1.2636,
111
+ "step": 9546
112
+ },
113
+ {
114
+ "epoch": 5.01,
115
+ "eval_accuracy": 0.6058462989156058,
116
+ "eval_f1": 0.5429975108084805,
117
+ "eval_loss": 1.3211930990219116,
118
+ "eval_precision": 0.6186956973637323,
119
+ "eval_recall": 0.6058462989156058,
120
+ "eval_runtime": 661.8451,
121
+ "eval_samples_per_second": 6.409,
122
+ "eval_steps_per_second": 0.802,
123
+ "step": 9546
124
+ },
125
+ {
126
+ "epoch": 6.01,
127
+ "grad_norm": 6.579521179199219,
128
+ "learning_rate": 4.3763753536623704e-05,
129
+ "loss": 1.2231,
130
+ "step": 11137
131
+ },
132
+ {
133
+ "epoch": 6.01,
134
+ "eval_accuracy": 0.6242338519566243,
135
+ "eval_f1": 0.5747264230461347,
136
+ "eval_loss": 1.2543139457702637,
137
+ "eval_precision": 0.6460983768240532,
138
+ "eval_recall": 0.6242338519566243,
139
+ "eval_runtime": 669.9717,
140
+ "eval_samples_per_second": 6.332,
141
+ "eval_steps_per_second": 0.793,
142
+ "step": 11137
143
+ },
144
+ {
145
+ "epoch": 7.01,
146
+ "grad_norm": 12.255209922790527,
147
+ "learning_rate": 4.99982535191589e-05,
148
+ "loss": 1.1989,
149
+ "step": 12728
150
+ },
151
+ {
152
+ "epoch": 7.01,
153
+ "eval_accuracy": 0.6404997642621405,
154
+ "eval_f1": 0.5869125503826516,
155
+ "eval_loss": 1.2377874851226807,
156
+ "eval_precision": 0.6356351683189131,
157
+ "eval_recall": 0.6404997642621405,
158
+ "eval_runtime": 678.5987,
159
+ "eval_samples_per_second": 6.251,
160
+ "eval_steps_per_second": 0.782,
161
+ "step": 12728
162
+ },
163
+ {
164
+ "epoch": 8.01,
165
+ "grad_norm": 9.07247257232666,
166
+ "learning_rate": 4.9303590764609316e-05,
167
+ "loss": 1.1566,
168
+ "step": 14319
169
+ },
170
+ {
171
+ "epoch": 8.01,
172
+ "eval_accuracy": 0.6527581329561527,
173
+ "eval_f1": 0.594207871765373,
174
+ "eval_loss": 1.2123682498931885,
175
+ "eval_precision": 0.6198764093734253,
176
+ "eval_recall": 0.6527581329561527,
177
+ "eval_runtime": 667.9014,
178
+ "eval_samples_per_second": 6.351,
179
+ "eval_steps_per_second": 0.795,
180
+ "step": 14319
181
+ },
182
+ {
183
+ "epoch": 9.01,
184
+ "grad_norm": 13.849576950073242,
185
+ "learning_rate": 4.8608928010059734e-05,
186
+ "loss": 1.1145,
187
+ "step": 15910
188
+ },
189
+ {
190
+ "epoch": 9.01,
191
+ "eval_accuracy": 0.6475719000471476,
192
+ "eval_f1": 0.605227949854272,
193
+ "eval_loss": 1.1802877187728882,
194
+ "eval_precision": 0.6340925434803425,
195
+ "eval_recall": 0.6475719000471476,
196
+ "eval_runtime": 672.4937,
197
+ "eval_samples_per_second": 6.308,
198
+ "eval_steps_per_second": 0.79,
199
+ "step": 15910
200
+ },
201
+ {
202
+ "epoch": 10.01,
203
+ "grad_norm": 26.01058006286621,
204
+ "learning_rate": 4.791426525551015e-05,
205
+ "loss": 1.0567,
206
+ "step": 17501
207
+ },
208
+ {
209
+ "epoch": 10.01,
210
+ "eval_accuracy": 0.6265912305516266,
211
+ "eval_f1": 0.5969458291906395,
212
+ "eval_loss": 1.2576700448989868,
213
+ "eval_precision": 0.6278990782548687,
214
+ "eval_recall": 0.6265912305516266,
215
+ "eval_runtime": 662.684,
216
+ "eval_samples_per_second": 6.401,
217
+ "eval_steps_per_second": 0.801,
218
+ "step": 17501
219
+ },
220
+ {
221
+ "epoch": 11.01,
222
+ "grad_norm": 4.436341285705566,
223
+ "learning_rate": 4.721960250096057e-05,
224
+ "loss": 1.0172,
225
+ "step": 19092
226
+ },
227
+ {
228
+ "epoch": 11.01,
229
+ "eval_accuracy": 0.657001414427157,
230
+ "eval_f1": 0.6082570131553916,
231
+ "eval_loss": 1.1961308717727661,
232
+ "eval_precision": 0.6369039125738234,
233
+ "eval_recall": 0.657001414427157,
234
+ "eval_runtime": 753.1481,
235
+ "eval_samples_per_second": 5.632,
236
+ "eval_steps_per_second": 0.705,
237
+ "step": 19092
238
+ },
239
+ {
240
+ "epoch": 12.01,
241
+ "grad_norm": 12.711715698242188,
242
+ "learning_rate": 4.652493974641098e-05,
243
+ "loss": 0.9817,
244
+ "step": 20683
245
+ },
246
+ {
247
+ "epoch": 12.01,
248
+ "eval_accuracy": 0.6619519094766619,
249
+ "eval_f1": 0.6048958777519221,
250
+ "eval_loss": 1.2287497520446777,
251
+ "eval_precision": 0.6498679093091749,
252
+ "eval_recall": 0.6619519094766619,
253
+ "eval_runtime": 765.9815,
254
+ "eval_samples_per_second": 5.538,
255
+ "eval_steps_per_second": 0.693,
256
+ "step": 20683
257
+ },
258
+ {
259
+ "epoch": 13.01,
260
+ "grad_norm": 6.408486366271973,
261
+ "learning_rate": 4.583027699186141e-05,
262
+ "loss": 0.9279,
263
+ "step": 22274
264
+ },
265
+ {
266
+ "epoch": 13.01,
267
+ "eval_accuracy": 0.6548797736916548,
268
+ "eval_f1": 0.6213080237690763,
269
+ "eval_loss": 1.2358391284942627,
270
+ "eval_precision": 0.6503596413080538,
271
+ "eval_recall": 0.6548797736916548,
272
+ "eval_runtime": 754.2864,
273
+ "eval_samples_per_second": 5.624,
274
+ "eval_steps_per_second": 0.704,
275
+ "step": 22274
276
+ },
277
+ {
278
+ "epoch": 14.01,
279
+ "grad_norm": 14.318334579467773,
280
+ "learning_rate": 4.513561423731182e-05,
281
+ "loss": 0.8913,
282
+ "step": 23865
283
+ },
284
+ {
285
+ "epoch": 14.01,
286
+ "eval_accuracy": 0.6680810938236681,
287
+ "eval_f1": 0.6307923814434653,
288
+ "eval_loss": 1.181541919708252,
289
+ "eval_precision": 0.6325361512013947,
290
+ "eval_recall": 0.6680810938236681,
291
+ "eval_runtime": 766.2903,
292
+ "eval_samples_per_second": 5.536,
293
+ "eval_steps_per_second": 0.693,
294
+ "step": 23865
295
+ },
296
+ {
297
+ "epoch": 15.01,
298
+ "grad_norm": 14.112875938415527,
299
+ "learning_rate": 4.444095148276224e-05,
300
+ "loss": 0.8559,
301
+ "step": 25456
302
+ },
303
+ {
304
+ "epoch": 15.01,
305
+ "eval_accuracy": 0.639085337105139,
306
+ "eval_f1": 0.6036680470772597,
307
+ "eval_loss": 1.3211859464645386,
308
+ "eval_precision": 0.639173575618385,
309
+ "eval_recall": 0.639085337105139,
310
+ "eval_runtime": 741.2024,
311
+ "eval_samples_per_second": 5.723,
312
+ "eval_steps_per_second": 0.716,
313
+ "step": 25456
314
+ },
315
+ {
316
+ "epoch": 16.01,
317
+ "grad_norm": 19.837888717651367,
318
+ "learning_rate": 4.3746288728212655e-05,
319
+ "loss": 0.8083,
320
+ "step": 27047
321
+ },
322
+ {
323
+ "epoch": 16.01,
324
+ "eval_accuracy": 0.6230551626591231,
325
+ "eval_f1": 0.6006019601279614,
326
+ "eval_loss": 1.307276725769043,
327
+ "eval_precision": 0.6250962347990193,
328
+ "eval_recall": 0.6230551626591231,
329
+ "eval_runtime": 755.5211,
330
+ "eval_samples_per_second": 5.615,
331
+ "eval_steps_per_second": 0.703,
332
+ "step": 27047
333
+ },
334
+ {
335
+ "epoch": 17.01,
336
+ "grad_norm": 7.695098876953125,
337
+ "learning_rate": 4.305162597366307e-05,
338
+ "loss": 0.7662,
339
+ "step": 28638
340
+ },
341
+ {
342
+ "epoch": 17.01,
343
+ "eval_accuracy": 0.6461574728901461,
344
+ "eval_f1": 0.6214078287802024,
345
+ "eval_loss": 1.2981834411621094,
346
+ "eval_precision": 0.625200272023183,
347
+ "eval_recall": 0.6461574728901461,
348
+ "eval_runtime": 760.7104,
349
+ "eval_samples_per_second": 5.576,
350
+ "eval_steps_per_second": 0.698,
351
+ "step": 28638
352
+ },
353
+ {
354
+ "epoch": 18.01,
355
+ "grad_norm": 17.791141510009766,
356
+ "learning_rate": 4.2356963219113485e-05,
357
+ "loss": 0.7363,
358
+ "step": 30229
359
+ },
360
+ {
361
+ "epoch": 18.01,
362
+ "eval_accuracy": 0.6574728901461575,
363
+ "eval_f1": 0.6264016299127368,
364
+ "eval_loss": 1.3019486665725708,
365
+ "eval_precision": 0.6427931632515602,
366
+ "eval_recall": 0.6574728901461575,
367
+ "eval_runtime": 732.8997,
368
+ "eval_samples_per_second": 5.788,
369
+ "eval_steps_per_second": 0.725,
370
+ "step": 30229
371
+ },
372
+ {
373
+ "epoch": 19.01,
374
+ "grad_norm": 12.370431900024414,
375
+ "learning_rate": 4.166230046456391e-05,
376
+ "loss": 0.6787,
377
+ "step": 31820
378
+ },
379
+ {
380
+ "epoch": 19.01,
381
+ "eval_accuracy": 0.6511079679396511,
382
+ "eval_f1": 0.6229627569285623,
383
+ "eval_loss": 1.3867465257644653,
384
+ "eval_precision": 0.6368072307821887,
385
+ "eval_recall": 0.6511079679396511,
386
+ "eval_runtime": 662.5188,
387
+ "eval_samples_per_second": 6.403,
388
+ "eval_steps_per_second": 0.801,
389
+ "step": 31820
390
+ },
391
+ {
392
+ "epoch": 20.01,
393
+ "grad_norm": 10.867647171020508,
394
+ "learning_rate": 4.096763771001432e-05,
395
+ "loss": 0.6433,
396
+ "step": 33411
397
+ },
398
+ {
399
+ "epoch": 20.01,
400
+ "eval_accuracy": 0.6364922206506365,
401
+ "eval_f1": 0.6138991311356594,
402
+ "eval_loss": 1.4018532037734985,
403
+ "eval_precision": 0.6374756941286471,
404
+ "eval_recall": 0.6364922206506365,
405
+ "eval_runtime": 667.8552,
406
+ "eval_samples_per_second": 6.352,
407
+ "eval_steps_per_second": 0.795,
408
+ "step": 33411
409
+ },
410
+ {
411
+ "epoch": 21.01,
412
+ "grad_norm": 23.982513427734375,
413
+ "learning_rate": 4.0272974955464746e-05,
414
+ "loss": 0.5969,
415
+ "step": 35002
416
+ },
417
+ {
418
+ "epoch": 21.01,
419
+ "eval_accuracy": 0.6341348420556341,
420
+ "eval_f1": 0.6104193509325564,
421
+ "eval_loss": 1.4419147968292236,
422
+ "eval_precision": 0.6211641893240438,
423
+ "eval_recall": 0.6341348420556341,
424
+ "eval_runtime": 657.2327,
425
+ "eval_samples_per_second": 6.454,
426
+ "eval_steps_per_second": 0.808,
427
+ "step": 35002
428
+ },
429
+ {
430
+ "epoch": 22.01,
431
+ "grad_norm": 4.0233635902404785,
432
+ "learning_rate": 3.957831220091516e-05,
433
+ "loss": 0.563,
434
+ "step": 36593
435
+ },
436
+ {
437
+ "epoch": 22.01,
438
+ "eval_accuracy": 0.6508722300801508,
439
+ "eval_f1": 0.6170283959147944,
440
+ "eval_loss": 1.4777988195419312,
441
+ "eval_precision": 0.6292668904937919,
442
+ "eval_recall": 0.6508722300801508,
443
+ "eval_runtime": 657.9566,
444
+ "eval_samples_per_second": 6.447,
445
+ "eval_steps_per_second": 0.807,
446
+ "step": 36593
447
+ },
448
+ {
449
+ "epoch": 23.01,
450
+ "grad_norm": 23.503877639770508,
451
+ "learning_rate": 3.8883649446365576e-05,
452
+ "loss": 0.5252,
453
+ "step": 38184
454
+ },
455
+ {
456
+ "epoch": 23.01,
457
+ "eval_accuracy": 0.6433286185761433,
458
+ "eval_f1": 0.6213976507987007,
459
+ "eval_loss": 1.486406683921814,
460
+ "eval_precision": 0.6316274985409115,
461
+ "eval_recall": 0.6433286185761433,
462
+ "eval_runtime": 652.8988,
463
+ "eval_samples_per_second": 6.497,
464
+ "eval_steps_per_second": 0.813,
465
+ "step": 38184
466
+ },
467
+ {
468
+ "epoch": 24.01,
469
+ "grad_norm": 29.642290115356445,
470
+ "learning_rate": 3.8188986691815994e-05,
471
+ "loss": 0.5,
472
+ "step": 39775
473
+ },
474
+ {
475
+ "epoch": 24.01,
476
+ "eval_accuracy": 0.6232909005186232,
477
+ "eval_f1": 0.6023496407577721,
478
+ "eval_loss": 1.6704081296920776,
479
+ "eval_precision": 0.6273005697774543,
480
+ "eval_recall": 0.6232909005186232,
481
+ "eval_runtime": 651.691,
482
+ "eval_samples_per_second": 6.509,
483
+ "eval_steps_per_second": 0.815,
484
+ "step": 39775
485
+ },
486
+ {
487
+ "epoch": 25.01,
488
+ "grad_norm": 0.25407856702804565,
489
+ "learning_rate": 3.749432393726641e-05,
490
+ "loss": 0.4622,
491
+ "step": 41366
492
+ },
493
+ {
494
+ "epoch": 25.01,
495
+ "eval_accuracy": 0.6487505893446488,
496
+ "eval_f1": 0.6119499708020253,
497
+ "eval_loss": 1.665787935256958,
498
+ "eval_precision": 0.6259817520615986,
499
+ "eval_recall": 0.6487505893446488,
500
+ "eval_runtime": 664.0356,
501
+ "eval_samples_per_second": 6.388,
502
+ "eval_steps_per_second": 0.8,
503
+ "step": 41366
504
+ },
505
+ {
506
+ "epoch": 26.01,
507
+ "grad_norm": 12.53039836883545,
508
+ "learning_rate": 3.6799661182716824e-05,
509
+ "loss": 0.4292,
510
+ "step": 42957
511
+ },
512
+ {
513
+ "epoch": 26.01,
514
+ "eval_accuracy": 0.6494578029231495,
515
+ "eval_f1": 0.6242771027666587,
516
+ "eval_loss": 1.6428192853927612,
517
+ "eval_precision": 0.6286829818492489,
518
+ "eval_recall": 0.6494578029231495,
519
+ "eval_runtime": 673.3255,
520
+ "eval_samples_per_second": 6.3,
521
+ "eval_steps_per_second": 0.789,
522
+ "step": 42957
523
+ },
524
+ {
525
+ "epoch": 27.01,
526
+ "grad_norm": 2.96919846534729,
527
+ "learning_rate": 3.610499842816725e-05,
528
+ "loss": 0.4044,
529
+ "step": 44548
530
+ },
531
+ {
532
+ "epoch": 27.01,
533
+ "eval_accuracy": 0.6586515794436586,
534
+ "eval_f1": 0.6387100670675616,
535
+ "eval_loss": 1.6702899932861328,
536
+ "eval_precision": 0.6310773161399348,
537
+ "eval_recall": 0.6586515794436586,
538
+ "eval_runtime": 657.3579,
539
+ "eval_samples_per_second": 6.453,
540
+ "eval_steps_per_second": 0.808,
541
+ "step": 44548
542
+ },
543
+ {
544
+ "epoch": 28.01,
545
+ "grad_norm": 2.961900472640991,
546
+ "learning_rate": 3.541033567361766e-05,
547
+ "loss": 0.3952,
548
+ "step": 46139
549
+ },
550
+ {
551
+ "epoch": 28.01,
552
+ "eval_accuracy": 0.632956152758133,
553
+ "eval_f1": 0.6123356581191824,
554
+ "eval_loss": 1.7576137781143188,
555
+ "eval_precision": 0.6170827944289541,
556
+ "eval_recall": 0.632956152758133,
557
+ "eval_runtime": 659.0162,
558
+ "eval_samples_per_second": 6.437,
559
+ "eval_steps_per_second": 0.806,
560
+ "step": 46139
561
+ },
562
+ {
563
+ "epoch": 29.01,
564
+ "grad_norm": 17.30776596069336,
565
+ "learning_rate": 3.471567291906808e-05,
566
+ "loss": 0.3681,
567
+ "step": 47730
568
+ },
569
+ {
570
+ "epoch": 29.01,
571
+ "eval_accuracy": 0.6553512494106554,
572
+ "eval_f1": 0.6231268362993411,
573
+ "eval_loss": 1.9031915664672852,
574
+ "eval_precision": 0.6349163976683712,
575
+ "eval_recall": 0.6553512494106554,
576
+ "eval_runtime": 657.4951,
577
+ "eval_samples_per_second": 6.452,
578
+ "eval_steps_per_second": 0.808,
579
+ "step": 47730
580
+ },
581
+ {
582
+ "epoch": 30.01,
583
+ "grad_norm": 15.424605369567871,
584
+ "learning_rate": 3.40210101645185e-05,
585
+ "loss": 0.3541,
586
+ "step": 49321
587
+ },
588
+ {
589
+ "epoch": 30.01,
590
+ "eval_accuracy": 0.6445073078736445,
591
+ "eval_f1": 0.6207211544517277,
592
+ "eval_loss": 1.9507993459701538,
593
+ "eval_precision": 0.632026480012122,
594
+ "eval_recall": 0.6445073078736445,
595
+ "eval_runtime": 655.6116,
596
+ "eval_samples_per_second": 6.47,
597
+ "eval_steps_per_second": 0.81,
598
+ "step": 49321
599
+ },
600
+ {
601
+ "epoch": 31.01,
602
+ "grad_norm": 0.31878066062927246,
603
+ "learning_rate": 3.3326347409968915e-05,
604
+ "loss": 0.322,
605
+ "step": 50912
606
+ },
607
+ {
608
+ "epoch": 31.01,
609
+ "eval_accuracy": 0.6225836869401226,
610
+ "eval_f1": 0.6098707323593034,
611
+ "eval_loss": 2.1316964626312256,
612
+ "eval_precision": 0.6276797078299916,
613
+ "eval_recall": 0.6225836869401226,
614
+ "eval_runtime": 655.8263,
615
+ "eval_samples_per_second": 6.468,
616
+ "eval_steps_per_second": 0.81,
617
+ "step": 50912
618
+ },
619
+ {
620
+ "epoch": 32.01,
621
+ "grad_norm": 1.1452162265777588,
622
+ "learning_rate": 3.263168465541933e-05,
623
+ "loss": 0.3239,
624
+ "step": 52503
625
+ },
626
+ {
627
+ "epoch": 32.01,
628
+ "eval_accuracy": 0.6508722300801508,
629
+ "eval_f1": 0.6327759879930807,
630
+ "eval_loss": 1.9785257577896118,
631
+ "eval_precision": 0.6320825441180629,
632
+ "eval_recall": 0.6508722300801508,
633
+ "eval_runtime": 656.8912,
634
+ "eval_samples_per_second": 6.458,
635
+ "eval_steps_per_second": 0.808,
636
+ "step": 52503
637
+ },
638
+ {
639
+ "epoch": 33.01,
640
+ "grad_norm": 18.911890029907227,
641
+ "learning_rate": 3.193702190086975e-05,
642
+ "loss": 0.301,
643
+ "step": 54094
644
+ },
645
+ {
646
+ "epoch": 33.01,
647
+ "eval_accuracy": 0.6435643564356436,
648
+ "eval_f1": 0.6097216315179437,
649
+ "eval_loss": 2.2050163745880127,
650
+ "eval_precision": 0.6258806132730886,
651
+ "eval_recall": 0.6435643564356436,
652
+ "eval_runtime": 659.2017,
653
+ "eval_samples_per_second": 6.435,
654
+ "eval_steps_per_second": 0.806,
655
+ "step": 54094
656
+ },
657
+ {
658
+ "epoch": 34.01,
659
+ "grad_norm": 0.37402623891830444,
660
+ "learning_rate": 3.124235914632016e-05,
661
+ "loss": 0.28,
662
+ "step": 55685
663
+ },
664
+ {
665
+ "epoch": 34.01,
666
+ "eval_accuracy": 0.6320132013201321,
667
+ "eval_f1": 0.6173670056289928,
668
+ "eval_loss": 2.2267725467681885,
669
+ "eval_precision": 0.6318691154473476,
670
+ "eval_recall": 0.6320132013201321,
671
+ "eval_runtime": 655.5706,
672
+ "eval_samples_per_second": 6.471,
673
+ "eval_steps_per_second": 0.81,
674
+ "step": 55685
675
+ },
676
+ {
677
+ "epoch": 35.01,
678
+ "grad_norm": 0.24996362626552582,
679
+ "learning_rate": 3.054769639177059e-05,
680
+ "loss": 0.2742,
681
+ "step": 57276
682
+ },
683
+ {
684
+ "epoch": 35.01,
685
+ "eval_accuracy": 0.641914191419142,
686
+ "eval_f1": 0.6158490856542709,
687
+ "eval_loss": 2.3538448810577393,
688
+ "eval_precision": 0.6239469548156946,
689
+ "eval_recall": 0.641914191419142,
690
+ "eval_runtime": 658.9065,
691
+ "eval_samples_per_second": 6.438,
692
+ "eval_steps_per_second": 0.806,
693
+ "step": 57276
694
+ },
695
+ {
696
+ "epoch": 36.01,
697
+ "grad_norm": 40.534271240234375,
698
+ "learning_rate": 2.9853033637221e-05,
699
+ "loss": 0.2433,
700
+ "step": 58867
701
+ },
702
+ {
703
+ "epoch": 36.01,
704
+ "eval_accuracy": 0.6478076379066479,
705
+ "eval_f1": 0.6184054664095173,
706
+ "eval_loss": 2.3947157859802246,
707
+ "eval_precision": 0.6237019229335129,
708
+ "eval_recall": 0.6478076379066479,
709
+ "eval_runtime": 658.8157,
710
+ "eval_samples_per_second": 6.439,
711
+ "eval_steps_per_second": 0.806,
712
+ "step": 58867
713
+ },
714
+ {
715
+ "epoch": 37.01,
716
+ "grad_norm": 0.18938298523426056,
717
+ "learning_rate": 2.915837088267142e-05,
718
+ "loss": 0.2677,
719
+ "step": 60458
720
+ },
721
+ {
722
+ "epoch": 37.01,
723
+ "eval_accuracy": 0.6454502593116455,
724
+ "eval_f1": 0.6234092398599342,
725
+ "eval_loss": 2.400697708129883,
726
+ "eval_precision": 0.6284538940001326,
727
+ "eval_recall": 0.6454502593116455,
728
+ "eval_runtime": 672.9538,
729
+ "eval_samples_per_second": 6.304,
730
+ "eval_steps_per_second": 0.789,
731
+ "step": 60458
732
+ },
733
+ {
734
+ "epoch": 38.01,
735
+ "grad_norm": 0.05054619163274765,
736
+ "learning_rate": 2.8463708128121836e-05,
737
+ "loss": 0.2316,
738
+ "step": 62049
739
+ },
740
+ {
741
+ "epoch": 38.01,
742
+ "eval_accuracy": 0.6296558227251297,
743
+ "eval_f1": 0.6119646490374726,
744
+ "eval_loss": 2.5197205543518066,
745
+ "eval_precision": 0.6245868321675843,
746
+ "eval_recall": 0.6296558227251297,
747
+ "eval_runtime": 677.5713,
748
+ "eval_samples_per_second": 6.261,
749
+ "eval_steps_per_second": 0.784,
750
+ "step": 62049
751
+ },
752
+ {
753
+ "epoch": 39.01,
754
+ "grad_norm": 0.9518815875053406,
755
+ "learning_rate": 2.7769045373572254e-05,
756
+ "loss": 0.2229,
757
+ "step": 63640
758
+ },
759
+ {
760
+ "epoch": 39.01,
761
+ "eval_accuracy": 0.6506364922206507,
762
+ "eval_f1": 0.6235476374240025,
763
+ "eval_loss": 2.547842264175415,
764
+ "eval_precision": 0.6321911444043747,
765
+ "eval_recall": 0.6506364922206507,
766
+ "eval_runtime": 680.5865,
767
+ "eval_samples_per_second": 6.233,
768
+ "eval_steps_per_second": 0.78,
769
+ "step": 63640
770
+ },
771
+ {
772
+ "epoch": 40.01,
773
+ "grad_norm": 0.030849022790789604,
774
+ "learning_rate": 2.707438261902267e-05,
775
+ "loss": 0.215,
776
+ "step": 65231
777
+ },
778
+ {
779
+ "epoch": 40.01,
780
+ "eval_accuracy": 0.6445073078736445,
781
+ "eval_f1": 0.6209448574869749,
782
+ "eval_loss": 2.516798734664917,
783
+ "eval_precision": 0.6454990917874304,
784
+ "eval_recall": 0.6445073078736445,
785
+ "eval_runtime": 663.9051,
786
+ "eval_samples_per_second": 6.389,
787
+ "eval_steps_per_second": 0.8,
788
+ "step": 65231
789
+ },
790
+ {
791
+ "epoch": 41.01,
792
+ "grad_norm": 0.38587260246276855,
793
+ "learning_rate": 2.637971986447309e-05,
794
+ "loss": 0.2032,
795
+ "step": 66822
796
+ },
797
+ {
798
+ "epoch": 41.01,
799
+ "eval_accuracy": 0.6442715700141443,
800
+ "eval_f1": 0.6161042310233847,
801
+ "eval_loss": 2.6606993675231934,
802
+ "eval_precision": 0.6304245945338769,
803
+ "eval_recall": 0.6442715700141443,
804
+ "eval_runtime": 670.0548,
805
+ "eval_samples_per_second": 6.331,
806
+ "eval_steps_per_second": 0.792,
807
+ "step": 66822
808
+ },
809
+ {
810
+ "epoch": 42.01,
811
+ "grad_norm": 52.90748977661133,
812
+ "learning_rate": 2.5685057109923506e-05,
813
+ "loss": 0.1957,
814
+ "step": 68413
815
+ },
816
+ {
817
+ "epoch": 42.01,
818
+ "eval_accuracy": 0.6218764733616219,
819
+ "eval_f1": 0.6058915424282973,
820
+ "eval_loss": 2.6433801651000977,
821
+ "eval_precision": 0.6206059185377255,
822
+ "eval_recall": 0.6218764733616219,
823
+ "eval_runtime": 666.3612,
824
+ "eval_samples_per_second": 6.366,
825
+ "eval_steps_per_second": 0.797,
826
+ "step": 68413
827
+ },
828
+ {
829
+ "epoch": 43.01,
830
+ "grad_norm": 0.5584044456481934,
831
+ "learning_rate": 2.499039435537392e-05,
832
+ "loss": 0.1839,
833
+ "step": 70004
834
+ },
835
+ {
836
+ "epoch": 43.01,
837
+ "eval_accuracy": 0.648043375766148,
838
+ "eval_f1": 0.6202014579788635,
839
+ "eval_loss": 2.637795925140381,
840
+ "eval_precision": 0.6181521426299653,
841
+ "eval_recall": 0.648043375766148,
842
+ "eval_runtime": 669.3865,
843
+ "eval_samples_per_second": 6.337,
844
+ "eval_steps_per_second": 0.793,
845
+ "step": 70004
846
+ },
847
+ {
848
+ "epoch": 44.01,
849
+ "grad_norm": 0.02387963980436325,
850
+ "learning_rate": 2.429573160082434e-05,
851
+ "loss": 0.1672,
852
+ "step": 71595
853
+ },
854
+ {
855
+ "epoch": 44.01,
856
+ "eval_accuracy": 0.632956152758133,
857
+ "eval_f1": 0.6095258779451574,
858
+ "eval_loss": 2.8354904651641846,
859
+ "eval_precision": 0.6175432549732301,
860
+ "eval_recall": 0.632956152758133,
861
+ "eval_runtime": 658.5104,
862
+ "eval_samples_per_second": 6.442,
863
+ "eval_steps_per_second": 0.806,
864
+ "step": 71595
865
+ },
866
+ {
867
+ "epoch": 45.01,
868
+ "grad_norm": 0.04022861644625664,
869
+ "learning_rate": 2.3601068846274757e-05,
870
+ "loss": 0.1554,
871
+ "step": 73186
872
+ },
873
+ {
874
+ "epoch": 45.01,
875
+ "eval_accuracy": 0.6296558227251297,
876
+ "eval_f1": 0.609015505263741,
877
+ "eval_loss": 2.8833281993865967,
878
+ "eval_precision": 0.6179849508221367,
879
+ "eval_recall": 0.6296558227251297,
880
+ "eval_runtime": 666.4351,
881
+ "eval_samples_per_second": 6.365,
882
+ "eval_steps_per_second": 0.797,
883
+ "step": 73186
884
+ },
885
+ {
886
+ "epoch": 46.01,
887
+ "grad_norm": 3.8721201419830322,
888
+ "learning_rate": 2.2906406091725175e-05,
889
+ "loss": 0.1525,
890
+ "step": 74777
891
+ },
892
+ {
893
+ "epoch": 46.01,
894
+ "eval_accuracy": 0.6499292786421499,
895
+ "eval_f1": 0.6246638391623945,
896
+ "eval_loss": 2.8732240200042725,
897
+ "eval_precision": 0.6212359780695843,
898
+ "eval_recall": 0.6499292786421499,
899
+ "eval_runtime": 672.5613,
900
+ "eval_samples_per_second": 6.307,
901
+ "eval_steps_per_second": 0.79,
902
+ "step": 74777
903
+ },
904
+ {
905
+ "epoch": 47.01,
906
+ "grad_norm": 0.009451803751289845,
907
+ "learning_rate": 2.221174333717559e-05,
908
+ "loss": 0.1443,
909
+ "step": 76368
910
+ },
911
+ {
912
+ "epoch": 47.01,
913
+ "eval_accuracy": 0.6513437057991514,
914
+ "eval_f1": 0.6297491645477588,
915
+ "eval_loss": 2.7935521602630615,
916
+ "eval_precision": 0.6239725322581114,
917
+ "eval_recall": 0.6513437057991514,
918
+ "eval_runtime": 673.447,
919
+ "eval_samples_per_second": 6.299,
920
+ "eval_steps_per_second": 0.788,
921
+ "step": 76368
922
+ },
923
+ {
924
+ "epoch": 48.01,
925
+ "grad_norm": 0.0011284707579761744,
926
+ "learning_rate": 2.151708058262601e-05,
927
+ "loss": 0.1361,
928
+ "step": 77959
929
+ },
930
+ {
931
+ "epoch": 48.01,
932
+ "eval_accuracy": 0.6442715700141443,
933
+ "eval_f1": 0.6229754386330917,
934
+ "eval_loss": 2.8814539909362793,
935
+ "eval_precision": 0.6187358231984663,
936
+ "eval_recall": 0.6442715700141443,
937
+ "eval_runtime": 674.3209,
938
+ "eval_samples_per_second": 6.291,
939
+ "eval_steps_per_second": 0.787,
940
+ "step": 77959
941
+ },
942
+ {
943
+ "epoch": 49.01,
944
+ "grad_norm": 102.50770568847656,
945
+ "learning_rate": 2.0822417828076427e-05,
946
+ "loss": 0.1351,
947
+ "step": 79550
948
+ },
949
+ {
950
+ "epoch": 49.01,
951
+ "eval_accuracy": 0.6428571428571429,
952
+ "eval_f1": 0.6174587269654099,
953
+ "eval_loss": 3.070270299911499,
954
+ "eval_precision": 0.6243702935522684,
955
+ "eval_recall": 0.6428571428571429,
956
+ "eval_runtime": 669.5589,
957
+ "eval_samples_per_second": 6.336,
958
+ "eval_steps_per_second": 0.793,
959
+ "step": 79550
960
+ },
961
+ {
962
+ "epoch": 50.01,
963
+ "grad_norm": 8.653002738952637,
964
+ "learning_rate": 2.0127755073526845e-05,
965
+ "loss": 0.1196,
966
+ "step": 81141
967
+ },
968
+ {
969
+ "epoch": 50.01,
970
+ "eval_accuracy": 0.6423856671381424,
971
+ "eval_f1": 0.6190424793456692,
972
+ "eval_loss": 3.027528762817383,
973
+ "eval_precision": 0.625005878732158,
974
+ "eval_recall": 0.6423856671381424,
975
+ "eval_runtime": 677.2082,
976
+ "eval_samples_per_second": 6.264,
977
+ "eval_steps_per_second": 0.784,
978
+ "step": 81141
979
+ },
980
+ {
981
+ "epoch": 51.01,
982
+ "grad_norm": 62.25484848022461,
983
+ "learning_rate": 1.943309231897726e-05,
984
+ "loss": 0.111,
985
+ "step": 82732
986
+ },
987
+ {
988
+ "epoch": 51.01,
989
+ "eval_accuracy": 0.641914191419142,
990
+ "eval_f1": 0.618920231303522,
991
+ "eval_loss": 3.1254563331604004,
992
+ "eval_precision": 0.6280670220001587,
993
+ "eval_recall": 0.641914191419142,
994
+ "eval_runtime": 651.0502,
995
+ "eval_samples_per_second": 6.516,
996
+ "eval_steps_per_second": 0.816,
997
+ "step": 82732
998
+ },
999
+ {
1000
+ "epoch": 52.01,
1001
+ "grad_norm": 18.133270263671875,
1002
+ "learning_rate": 1.8738429564427678e-05,
1003
+ "loss": 0.1119,
1004
+ "step": 84323
1005
+ },
1006
+ {
1007
+ "epoch": 52.01,
1008
+ "eval_accuracy": 0.6471004243281471,
1009
+ "eval_f1": 0.6214547873156199,
1010
+ "eval_loss": 3.1854350566864014,
1011
+ "eval_precision": 0.629875200916356,
1012
+ "eval_recall": 0.6471004243281471,
1013
+ "eval_runtime": 653.5,
1014
+ "eval_samples_per_second": 6.491,
1015
+ "eval_steps_per_second": 0.813,
1016
+ "step": 84323
1017
+ },
1018
+ {
1019
+ "epoch": 53.01,
1020
+ "grad_norm": 19.144214630126953,
1021
+ "learning_rate": 1.8043766809878096e-05,
1022
+ "loss": 0.1069,
1023
+ "step": 85914
1024
+ },
1025
+ {
1026
+ "epoch": 53.01,
1027
+ "eval_accuracy": 0.6383781235266384,
1028
+ "eval_f1": 0.6195011414926834,
1029
+ "eval_loss": 3.2136049270629883,
1030
+ "eval_precision": 0.6251413165179668,
1031
+ "eval_recall": 0.6383781235266384,
1032
+ "eval_runtime": 653.7257,
1033
+ "eval_samples_per_second": 6.489,
1034
+ "eval_steps_per_second": 0.812,
1035
+ "step": 85914
1036
+ },
1037
+ {
1038
+ "epoch": 54.01,
1039
+ "grad_norm": 0.010962074622511864,
1040
+ "learning_rate": 1.7349104055328515e-05,
1041
+ "loss": 0.093,
1042
+ "step": 87505
1043
+ },
1044
+ {
1045
+ "epoch": 54.01,
1046
+ "eval_accuracy": 0.6506364922206507,
1047
+ "eval_f1": 0.6154588049677499,
1048
+ "eval_loss": 3.3124778270721436,
1049
+ "eval_precision": 0.614483037031078,
1050
+ "eval_recall": 0.6506364922206507,
1051
+ "eval_runtime": 653.1839,
1052
+ "eval_samples_per_second": 6.494,
1053
+ "eval_steps_per_second": 0.813,
1054
+ "step": 87505
1055
+ },
1056
+ {
1057
+ "epoch": 55.01,
1058
+ "grad_norm": 4.032207489013672,
1059
+ "learning_rate": 1.665444130077893e-05,
1060
+ "loss": 0.0901,
1061
+ "step": 89096
1062
+ },
1063
+ {
1064
+ "epoch": 55.01,
1065
+ "eval_accuracy": 0.6383781235266384,
1066
+ "eval_f1": 0.6217233442126047,
1067
+ "eval_loss": 3.3028151988983154,
1068
+ "eval_precision": 0.6277366002975752,
1069
+ "eval_recall": 0.6383781235266384,
1070
+ "eval_runtime": 651.3473,
1071
+ "eval_samples_per_second": 6.513,
1072
+ "eval_steps_per_second": 0.815,
1073
+ "step": 89096
1074
+ },
1075
+ {
1076
+ "epoch": 56.01,
1077
+ "grad_norm": 0.039573218673467636,
1078
+ "learning_rate": 1.5959778546229348e-05,
1079
+ "loss": 0.0776,
1080
+ "step": 90687
1081
+ },
1082
+ {
1083
+ "epoch": 56.01,
1084
+ "eval_accuracy": 0.6487505893446488,
1085
+ "eval_f1": 0.6297653212708532,
1086
+ "eval_loss": 3.3314833641052246,
1087
+ "eval_precision": 0.6272429266150353,
1088
+ "eval_recall": 0.6487505893446488,
1089
+ "eval_runtime": 649.3376,
1090
+ "eval_samples_per_second": 6.533,
1091
+ "eval_steps_per_second": 0.818,
1092
+ "step": 90687
1093
+ },
1094
+ {
1095
+ "epoch": 57.01,
1096
+ "grad_norm": 0.0005812808522023261,
1097
+ "learning_rate": 1.5265115791679766e-05,
1098
+ "loss": 0.0837,
1099
+ "step": 92278
1100
+ },
1101
+ {
1102
+ "epoch": 57.01,
1103
+ "eval_accuracy": 0.6558227251296558,
1104
+ "eval_f1": 0.6242411319775614,
1105
+ "eval_loss": 3.438481092453003,
1106
+ "eval_precision": 0.6374388645077231,
1107
+ "eval_recall": 0.6558227251296558,
1108
+ "eval_runtime": 658.6971,
1109
+ "eval_samples_per_second": 6.44,
1110
+ "eval_steps_per_second": 0.806,
1111
+ "step": 92278
1112
+ },
1113
+ {
1114
+ "epoch": 58.01,
1115
+ "grad_norm": 0.04262514412403107,
1116
+ "learning_rate": 1.4570453037130182e-05,
1117
+ "loss": 0.0701,
1118
+ "step": 93869
1119
+ },
1120
+ {
1121
+ "epoch": 58.01,
1122
+ "eval_accuracy": 0.644035832154644,
1123
+ "eval_f1": 0.6285778416417378,
1124
+ "eval_loss": 3.3799736499786377,
1125
+ "eval_precision": 0.6320916006510833,
1126
+ "eval_recall": 0.644035832154644,
1127
+ "eval_runtime": 669.2439,
1128
+ "eval_samples_per_second": 6.338,
1129
+ "eval_steps_per_second": 0.793,
1130
+ "step": 93869
1131
+ },
1132
+ {
1133
+ "epoch": 59.01,
1134
+ "grad_norm": 0.001757206628099084,
1135
+ "learning_rate": 1.38757902825806e-05,
1136
+ "loss": 0.0682,
1137
+ "step": 95460
1138
+ },
1139
+ {
1140
+ "epoch": 59.01,
1141
+ "eval_accuracy": 0.6541725601131542,
1142
+ "eval_f1": 0.6262011153983429,
1143
+ "eval_loss": 3.4473154544830322,
1144
+ "eval_precision": 0.6343707774148433,
1145
+ "eval_recall": 0.6541725601131542,
1146
+ "eval_runtime": 660.2778,
1147
+ "eval_samples_per_second": 6.425,
1148
+ "eval_steps_per_second": 0.804,
1149
+ "step": 95460
1150
+ },
1151
+ {
1152
+ "epoch": 60.01,
1153
+ "grad_norm": 0.08907134085893631,
1154
+ "learning_rate": 1.3181127528031017e-05,
1155
+ "loss": 0.0763,
1156
+ "step": 97051
1157
+ },
1158
+ {
1159
+ "epoch": 60.01,
1160
+ "eval_accuracy": 0.6315417256011315,
1161
+ "eval_f1": 0.6148352414634933,
1162
+ "eval_loss": 3.450514316558838,
1163
+ "eval_precision": 0.6148635373339948,
1164
+ "eval_recall": 0.6315417256011315,
1165
+ "eval_runtime": 666.8615,
1166
+ "eval_samples_per_second": 6.361,
1167
+ "eval_steps_per_second": 0.796,
1168
+ "step": 97051
1169
+ },
1170
+ {
1171
+ "epoch": 61.01,
1172
+ "grad_norm": 0.0005003924597986042,
1173
+ "learning_rate": 1.2486464773481436e-05,
1174
+ "loss": 0.0629,
1175
+ "step": 98642
1176
+ },
1177
+ {
1178
+ "epoch": 61.01,
1179
+ "eval_accuracy": 0.6504007543611504,
1180
+ "eval_f1": 0.6253235814801552,
1181
+ "eval_loss": 3.440239906311035,
1182
+ "eval_precision": 0.6233462363989224,
1183
+ "eval_recall": 0.6504007543611504,
1184
+ "eval_runtime": 653.614,
1185
+ "eval_samples_per_second": 6.49,
1186
+ "eval_steps_per_second": 0.812,
1187
+ "step": 98642
1188
+ },
1189
+ {
1190
+ "epoch": 62.01,
1191
+ "grad_norm": 0.06604283303022385,
1192
+ "learning_rate": 1.1791802018931852e-05,
1193
+ "loss": 0.0552,
1194
+ "step": 100233
1195
+ },
1196
+ {
1197
+ "epoch": 62.01,
1198
+ "eval_accuracy": 0.6537010843941538,
1199
+ "eval_f1": 0.6314537569564033,
1200
+ "eval_loss": 3.4401602745056152,
1201
+ "eval_precision": 0.6324239134320033,
1202
+ "eval_recall": 0.6537010843941538,
1203
+ "eval_runtime": 663.5087,
1204
+ "eval_samples_per_second": 6.393,
1205
+ "eval_steps_per_second": 0.8,
1206
+ "step": 100233
1207
+ },
1208
+ {
1209
+ "epoch": 63.01,
1210
+ "grad_norm": 0.00693962536752224,
1211
+ "learning_rate": 1.109713926438227e-05,
1212
+ "loss": 0.0463,
1213
+ "step": 101824
1214
+ },
1215
+ {
1216
+ "epoch": 63.01,
1217
+ "eval_accuracy": 0.6466289486091467,
1218
+ "eval_f1": 0.6216674773439607,
1219
+ "eval_loss": 3.529994010925293,
1220
+ "eval_precision": 0.6217012879573426,
1221
+ "eval_recall": 0.6466289486091467,
1222
+ "eval_runtime": 686.7487,
1223
+ "eval_samples_per_second": 6.177,
1224
+ "eval_steps_per_second": 0.773,
1225
+ "step": 101824
1226
+ },
1227
+ {
1228
+ "epoch": 64.01,
1229
+ "grad_norm": 0.0005720761837437749,
1230
+ "learning_rate": 1.0402476509832687e-05,
1231
+ "loss": 0.0471,
1232
+ "step": 103415
1233
+ },
1234
+ {
1235
+ "epoch": 64.01,
1236
+ "eval_accuracy": 0.6511079679396511,
1237
+ "eval_f1": 0.6222590555700158,
1238
+ "eval_loss": 3.6793229579925537,
1239
+ "eval_precision": 0.6346230225230813,
1240
+ "eval_recall": 0.6511079679396511,
1241
+ "eval_runtime": 669.336,
1242
+ "eval_samples_per_second": 6.338,
1243
+ "eval_steps_per_second": 0.793,
1244
+ "step": 103415
1245
+ },
1246
+ {
1247
+ "epoch": 65.01,
1248
+ "grad_norm": 0.0010839367751032114,
1249
+ "learning_rate": 9.707813755283105e-06,
1250
+ "loss": 0.0448,
1251
+ "step": 105006
1252
+ },
1253
+ {
1254
+ "epoch": 65.01,
1255
+ "eval_accuracy": 0.6449787835926449,
1256
+ "eval_f1": 0.6169893577308092,
1257
+ "eval_loss": 3.685042142868042,
1258
+ "eval_precision": 0.6265011843748146,
1259
+ "eval_recall": 0.6449787835926449,
1260
+ "eval_runtime": 670.2367,
1261
+ "eval_samples_per_second": 6.329,
1262
+ "eval_steps_per_second": 0.792,
1263
+ "step": 105006
1264
+ },
1265
+ {
1266
+ "epoch": 66.01,
1267
+ "grad_norm": 0.03215405344963074,
1268
+ "learning_rate": 9.013151000733522e-06,
1269
+ "loss": 0.0362,
1270
+ "step": 106597
1271
+ },
1272
+ {
1273
+ "epoch": 66.01,
1274
+ "eval_accuracy": 0.6482791136256483,
1275
+ "eval_f1": 0.624213528202086,
1276
+ "eval_loss": 3.658543348312378,
1277
+ "eval_precision": 0.6265115566919579,
1278
+ "eval_recall": 0.6482791136256483,
1279
+ "eval_runtime": 655.7488,
1280
+ "eval_samples_per_second": 6.469,
1281
+ "eval_steps_per_second": 0.81,
1282
+ "step": 106597
1283
+ },
1284
+ {
1285
+ "epoch": 67.01,
1286
+ "grad_norm": 0.00014659887528978288,
1287
+ "learning_rate": 8.31848824618394e-06,
1288
+ "loss": 0.0419,
1289
+ "step": 108188
1290
+ },
1291
+ {
1292
+ "epoch": 67.01,
1293
+ "eval_accuracy": 0.6343705799151343,
1294
+ "eval_f1": 0.6168968855551049,
1295
+ "eval_loss": 3.6284878253936768,
1296
+ "eval_precision": 0.6192216504465796,
1297
+ "eval_recall": 0.6343705799151343,
1298
+ "eval_runtime": 667.7711,
1299
+ "eval_samples_per_second": 6.352,
1300
+ "eval_steps_per_second": 0.795,
1301
+ "step": 108188
1302
+ },
1303
+ {
1304
+ "epoch": 68.01,
1305
+ "grad_norm": 0.0006045685149729252,
1306
+ "learning_rate": 7.6238254916343565e-06,
1307
+ "loss": 0.0309,
1308
+ "step": 109779
1309
+ },
1310
+ {
1311
+ "epoch": 68.01,
1312
+ "eval_accuracy": 0.648986327204149,
1313
+ "eval_f1": 0.6269316812634602,
1314
+ "eval_loss": 3.665743112564087,
1315
+ "eval_precision": 0.6264133629902356,
1316
+ "eval_recall": 0.648986327204149,
1317
+ "eval_runtime": 670.5591,
1318
+ "eval_samples_per_second": 6.326,
1319
+ "eval_steps_per_second": 0.792,
1320
+ "step": 109779
1321
+ },
1322
+ {
1323
+ "epoch": 69.01,
1324
+ "grad_norm": 0.0056765577755868435,
1325
+ "learning_rate": 6.929162737084774e-06,
1326
+ "loss": 0.0312,
1327
+ "step": 111370
1328
+ },
1329
+ {
1330
+ "epoch": 69.01,
1331
+ "eval_accuracy": 0.6416784535596417,
1332
+ "eval_f1": 0.6205254067422245,
1333
+ "eval_loss": 3.7122817039489746,
1334
+ "eval_precision": 0.6239256088005212,
1335
+ "eval_recall": 0.6416784535596417,
1336
+ "eval_runtime": 661.8739,
1337
+ "eval_samples_per_second": 6.409,
1338
+ "eval_steps_per_second": 0.802,
1339
+ "step": 111370
1340
+ },
1341
+ {
1342
+ "epoch": 70.01,
1343
+ "grad_norm": 0.0024060504510998726,
1344
+ "learning_rate": 6.234499982535192e-06,
1345
+ "loss": 0.0315,
1346
+ "step": 112961
1347
+ },
1348
+ {
1349
+ "epoch": 70.01,
1350
+ "eval_accuracy": 0.648986327204149,
1351
+ "eval_f1": 0.6189187635444573,
1352
+ "eval_loss": 3.753802537918091,
1353
+ "eval_precision": 0.6224224422132874,
1354
+ "eval_recall": 0.648986327204149,
1355
+ "eval_runtime": 664.7996,
1356
+ "eval_samples_per_second": 6.381,
1357
+ "eval_steps_per_second": 0.799,
1358
+ "step": 112961
1359
+ },
1360
+ {
1361
+ "epoch": 71.01,
1362
+ "grad_norm": 0.047281160950660706,
1363
+ "learning_rate": 5.53983722798561e-06,
1364
+ "loss": 0.0294,
1365
+ "step": 114552
1366
+ },
1367
+ {
1368
+ "epoch": 71.01,
1369
+ "eval_accuracy": 0.6482791136256483,
1370
+ "eval_f1": 0.6236630466746416,
1371
+ "eval_loss": 3.706387519836426,
1372
+ "eval_precision": 0.6234001348683471,
1373
+ "eval_recall": 0.6482791136256483,
1374
+ "eval_runtime": 677.1081,
1375
+ "eval_samples_per_second": 6.265,
1376
+ "eval_steps_per_second": 0.784,
1377
+ "step": 114552
1378
+ },
1379
+ {
1380
+ "epoch": 72.01,
1381
+ "grad_norm": 0.00014872274186927825,
1382
+ "learning_rate": 4.845174473436027e-06,
1383
+ "loss": 0.0282,
1384
+ "step": 116143
1385
+ },
1386
+ {
1387
+ "epoch": 72.01,
1388
+ "eval_accuracy": 0.6428571428571429,
1389
+ "eval_f1": 0.6192426747671007,
1390
+ "eval_loss": 3.7945356369018555,
1391
+ "eval_precision": 0.624713412639683,
1392
+ "eval_recall": 0.6428571428571429,
1393
+ "eval_runtime": 677.029,
1394
+ "eval_samples_per_second": 6.266,
1395
+ "eval_steps_per_second": 0.784,
1396
+ "step": 116143
1397
+ },
1398
+ {
1399
+ "epoch": 73.01,
1400
+ "grad_norm": 0.0006783913122490048,
1401
+ "learning_rate": 4.150511718886444e-06,
1402
+ "loss": 0.0275,
1403
+ "step": 117734
1404
+ },
1405
+ {
1406
+ "epoch": 73.01,
1407
+ "eval_accuracy": 0.6527581329561527,
1408
+ "eval_f1": 0.6271859964586763,
1409
+ "eval_loss": 3.7549855709075928,
1410
+ "eval_precision": 0.6297463780952456,
1411
+ "eval_recall": 0.6527581329561527,
1412
+ "eval_runtime": 694.5725,
1413
+ "eval_samples_per_second": 6.107,
1414
+ "eval_steps_per_second": 0.764,
1415
+ "step": 117734
1416
+ },
1417
+ {
1418
+ "epoch": 74.01,
1419
+ "grad_norm": 0.10241026431322098,
1420
+ "learning_rate": 3.4558489643368614e-06,
1421
+ "loss": 0.0319,
1422
+ "step": 119325
1423
+ },
1424
+ {
1425
+ "epoch": 74.01,
1426
+ "eval_accuracy": 0.6508722300801508,
1427
+ "eval_f1": 0.6233733427088337,
1428
+ "eval_loss": 3.740715980529785,
1429
+ "eval_precision": 0.628877991091339,
1430
+ "eval_recall": 0.6508722300801508,
1431
+ "eval_runtime": 654.6009,
1432
+ "eval_samples_per_second": 6.48,
1433
+ "eval_steps_per_second": 0.811,
1434
+ "step": 119325
1435
+ },
1436
+ {
1437
+ "epoch": 75.01,
1438
+ "grad_norm": 0.00016077565669547766,
1439
+ "learning_rate": 2.7611862097872788e-06,
1440
+ "loss": 0.021,
1441
+ "step": 120916
1442
+ },
1443
+ {
1444
+ "epoch": 75.01,
1445
+ "eval_accuracy": 0.6532296086751532,
1446
+ "eval_f1": 0.6269729664014984,
1447
+ "eval_loss": 3.75272536277771,
1448
+ "eval_precision": 0.6290218177795819,
1449
+ "eval_recall": 0.6532296086751532,
1450
+ "eval_runtime": 658.8706,
1451
+ "eval_samples_per_second": 6.438,
1452
+ "eval_steps_per_second": 0.806,
1453
+ "step": 120916
1454
+ },
1455
+ {
1456
+ "epoch": 76.01,
1457
+ "grad_norm": 0.0008236940484493971,
1458
+ "learning_rate": 2.066523455237696e-06,
1459
+ "loss": 0.0159,
1460
+ "step": 122507
1461
+ },
1462
+ {
1463
+ "epoch": 76.01,
1464
+ "eval_accuracy": 0.6515794436586516,
1465
+ "eval_f1": 0.6242553810002985,
1466
+ "eval_loss": 3.7779977321624756,
1467
+ "eval_precision": 0.6240515257248215,
1468
+ "eval_recall": 0.6515794436586516,
1469
+ "eval_runtime": 660.4492,
1470
+ "eval_samples_per_second": 6.423,
1471
+ "eval_steps_per_second": 0.804,
1472
+ "step": 122507
1473
+ },
1474
+ {
1475
+ "epoch": 77.01,
1476
+ "grad_norm": 0.004408856853842735,
1477
+ "learning_rate": 1.3718607006881136e-06,
1478
+ "loss": 0.0133,
1479
+ "step": 124098
1480
+ },
1481
+ {
1482
+ "epoch": 77.01,
1483
+ "eval_accuracy": 0.6499292786421499,
1484
+ "eval_f1": 0.624040880732074,
1485
+ "eval_loss": 3.7923333644866943,
1486
+ "eval_precision": 0.6271592552161068,
1487
+ "eval_recall": 0.6499292786421499,
1488
+ "eval_runtime": 666.8007,
1489
+ "eval_samples_per_second": 6.362,
1490
+ "eval_steps_per_second": 0.796,
1491
+ "step": 124098
1492
+ },
1493
+ {
1494
+ "epoch": 78.01,
1495
+ "grad_norm": 0.0014601564034819603,
1496
+ "learning_rate": 6.771979461385309e-07,
1497
+ "loss": 0.0125,
1498
+ "step": 125689
1499
+ },
1500
+ {
1501
+ "epoch": 78.01,
1502
+ "eval_accuracy": 0.6504007543611504,
1503
+ "eval_f1": 0.6216504935099332,
1504
+ "eval_loss": 3.8070006370544434,
1505
+ "eval_precision": 0.6262856082095383,
1506
+ "eval_recall": 0.6504007543611504,
1507
+ "eval_runtime": 667.8654,
1508
+ "eval_samples_per_second": 6.352,
1509
+ "eval_steps_per_second": 0.795,
1510
+ "step": 125689
1511
+ },
1512
+ {
1513
+ "epoch": 79.01,
1514
+ "grad_norm": 0.0005495115183293819,
1515
+ "learning_rate": 0.0,
1516
+ "loss": 0.0132,
1517
+ "step": 127240
1518
+ },
1519
+ {
1520
+ "epoch": 79.01,
1521
+ "eval_accuracy": 0.6506364922206507,
1522
+ "eval_f1": 0.6225468142229464,
1523
+ "eval_loss": 3.796358108520508,
1524
+ "eval_precision": 0.6264178866401664,
1525
+ "eval_recall": 0.6506364922206507,
1526
+ "eval_runtime": 698.0179,
1527
+ "eval_samples_per_second": 6.077,
1528
+ "eval_steps_per_second": 0.761,
1529
+ "step": 127240
1530
+ },
1531
+ {
1532
+ "epoch": 79.01,
1533
+ "step": 127240,
1534
+ "total_flos": 1.2682038581932563e+21,
1535
+ "train_loss": 0.41676221998635044,
1536
+ "train_runtime": 266875.9954,
1537
+ "train_samples_per_second": 3.814,
1538
+ "train_steps_per_second": 0.477
1539
+ },
1540
+ {
1541
+ "epoch": 79.01,
1542
+ "eval_accuracy": 0.6697312588401697,
1543
+ "eval_f1": 0.635434606603408,
1544
+ "eval_loss": 1.1357100009918213,
1545
+ "eval_precision": 0.642912037609069,
1546
+ "eval_recall": 0.6697312588401697,
1547
+ "eval_runtime": 859.5426,
1548
+ "eval_samples_per_second": 4.935,
1549
+ "eval_steps_per_second": 0.618,
1550
+ "step": 127240
1551
+ },
1552
+ {
1553
+ "epoch": 79.01,
1554
+ "eval_accuracy": 0.6697312588401697,
1555
+ "eval_f1": 0.635434606603408,
1556
+ "eval_loss": 1.1357100009918213,
1557
+ "eval_precision": 0.642912037609069,
1558
+ "eval_recall": 0.6697312588401697,
1559
+ "eval_runtime": 860.1584,
1560
+ "eval_samples_per_second": 4.932,
1561
+ "eval_steps_per_second": 0.617,
1562
+ "step": 127240
1563
+ }
1564
+ ],
1565
+ "logging_steps": 10,
1566
+ "max_steps": 127240,
1567
+ "num_input_tokens_seen": 0,
1568
+ "num_train_epochs": 9223372036854775807,
1569
+ "save_steps": 500,
1570
+ "total_flos": 1.2682038581932563e+21,
1571
+ "train_batch_size": 8,
1572
+ "trial_name": null,
1573
+ "trial_params": null
1574
+ }