File size: 10,347 Bytes
db0e4bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
{
  "best_metric": 0.959878879636639,
  "best_model_checkpoint": "trained_models/CoNLL/checkpoint-10000",
  "epoch": 0.7121492664862555,
  "global_step": 10000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.04,
      "learning_rate": 4.821962683378436e-05,
      "loss": 0.166,
      "step": 500
    },
    {
      "epoch": 0.04,
      "eval_accuracy_score": 0.9851641291226977,
      "eval_f1": 0.9190357439733999,
      "eval_loss": 0.08048456907272339,
      "eval_precision": 0.9080157687253614,
      "eval_recall": 0.9303264893975093,
      "eval_runtime": 27.4476,
      "eval_samples_per_second": 118.517,
      "eval_steps_per_second": 14.828,
      "step": 500
    },
    {
      "epoch": 0.07,
      "learning_rate": 4.643925366756873e-05,
      "loss": 0.0586,
      "step": 1000
    },
    {
      "epoch": 0.07,
      "eval_accuracy_score": 0.9874810170943499,
      "eval_f1": 0.9253980161707094,
      "eval_loss": 0.061091016978025436,
      "eval_precision": 0.9167630057803469,
      "eval_recall": 0.9341972399865365,
      "eval_runtime": 26.4887,
      "eval_samples_per_second": 122.807,
      "eval_steps_per_second": 15.365,
      "step": 1000
    },
    {
      "epoch": 0.11,
      "learning_rate": 4.465888050135308e-05,
      "loss": 0.0549,
      "step": 1500
    },
    {
      "epoch": 0.11,
      "eval_accuracy_score": 0.9892138156613839,
      "eval_f1": 0.9333000665335994,
      "eval_loss": 0.051654551178216934,
      "eval_precision": 0.9225583689575797,
      "eval_recall": 0.9442948502187816,
      "eval_runtime": 27.064,
      "eval_samples_per_second": 120.197,
      "eval_steps_per_second": 15.038,
      "step": 1500
    },
    {
      "epoch": 0.14,
      "learning_rate": 4.287850733513745e-05,
      "loss": 0.0408,
      "step": 2000
    },
    {
      "epoch": 0.14,
      "eval_accuracy_score": 0.9887660137845099,
      "eval_f1": 0.9369867605161722,
      "eval_loss": 0.05154793709516525,
      "eval_precision": 0.9330774365821095,
      "eval_recall": 0.9409289801413665,
      "eval_runtime": 34.9374,
      "eval_samples_per_second": 93.109,
      "eval_steps_per_second": 11.649,
      "step": 2000
    },
    {
      "epoch": 0.18,
      "learning_rate": 4.109813416892181e-05,
      "loss": 0.0313,
      "step": 2500
    },
    {
      "epoch": 0.18,
      "eval_accuracy_score": 0.9901094194151319,
      "eval_f1": 0.9437919463087249,
      "eval_loss": 0.049433596432209015,
      "eval_precision": 0.9409501505520241,
      "eval_recall": 0.946650959272972,
      "eval_runtime": 34.757,
      "eval_samples_per_second": 93.593,
      "eval_steps_per_second": 11.71,
      "step": 2500
    },
    {
      "epoch": 0.21,
      "learning_rate": 3.9317761002706174e-05,
      "loss": 0.0296,
      "step": 3000
    },
    {
      "epoch": 0.21,
      "eval_accuracy_score": 0.9906740391729294,
      "eval_f1": 0.9458756398422422,
      "eval_loss": 0.053381938487291336,
      "eval_precision": 0.9432635983263599,
      "eval_recall": 0.9485021878155503,
      "eval_runtime": 36.167,
      "eval_samples_per_second": 89.944,
      "eval_steps_per_second": 11.253,
      "step": 3000
    },
    {
      "epoch": 0.25,
      "learning_rate": 3.7537387836490526e-05,
      "loss": 0.0293,
      "step": 3500
    },
    {
      "epoch": 0.25,
      "eval_accuracy_score": 0.9910634321093416,
      "eval_f1": 0.9481096487551345,
      "eval_loss": 0.042416807264089584,
      "eval_precision": 0.9445465174544847,
      "eval_recall": 0.9516997643890945,
      "eval_runtime": 34.7402,
      "eval_samples_per_second": 93.638,
      "eval_steps_per_second": 11.716,
      "step": 3500
    },
    {
      "epoch": 0.28,
      "learning_rate": 3.575701467027489e-05,
      "loss": 0.0226,
      "step": 4000
    },
    {
      "epoch": 0.28,
      "eval_accuracy_score": 0.9903041158833379,
      "eval_f1": 0.9465661641541039,
      "eval_loss": 0.04618750512599945,
      "eval_precision": 0.9421473824608203,
      "eval_recall": 0.9510265903736116,
      "eval_runtime": 28.4523,
      "eval_samples_per_second": 114.332,
      "eval_steps_per_second": 14.305,
      "step": 4000
    },
    {
      "epoch": 0.32,
      "learning_rate": 3.397664150405925e-05,
      "loss": 0.0207,
      "step": 4500
    },
    {
      "epoch": 0.32,
      "eval_accuracy_score": 0.9913944161052919,
      "eval_f1": 0.9520862948407058,
      "eval_loss": 0.04386541247367859,
      "eval_precision": 0.9461525677247797,
      "eval_recall": 0.9580949175361831,
      "eval_runtime": 25.9577,
      "eval_samples_per_second": 125.319,
      "eval_steps_per_second": 15.679,
      "step": 4500
    },
    {
      "epoch": 0.36,
      "learning_rate": 3.219626833784362e-05,
      "loss": 0.0166,
      "step": 5000
    },
    {
      "epoch": 0.36,
      "eval_accuracy_score": 0.9914528250457537,
      "eval_f1": 0.9523649781952366,
      "eval_loss": 0.044772420078516006,
      "eval_precision": 0.949180875961217,
      "eval_recall": 0.9555705149781218,
      "eval_runtime": 28.1693,
      "eval_samples_per_second": 115.48,
      "eval_steps_per_second": 14.448,
      "step": 5000
    },
    {
      "epoch": 0.39,
      "learning_rate": 3.041589517162797e-05,
      "loss": 0.0159,
      "step": 5500
    },
    {
      "epoch": 0.39,
      "eval_accuracy_score": 0.9914333553989331,
      "eval_f1": 0.9543924604510265,
      "eval_loss": 0.046620924025774,
      "eval_precision": 0.9543924604510265,
      "eval_recall": 0.9543924604510265,
      "eval_runtime": 27.5426,
      "eval_samples_per_second": 118.108,
      "eval_steps_per_second": 14.777,
      "step": 5500
    },
    {
      "epoch": 0.43,
      "learning_rate": 2.8635522005412335e-05,
      "loss": 0.0124,
      "step": 6000
    },
    {
      "epoch": 0.43,
      "eval_accuracy_score": 0.9917254001012422,
      "eval_f1": 0.9533227185116904,
      "eval_loss": 0.04490247741341591,
      "eval_precision": 0.9494241362043064,
      "eval_recall": 0.9572534500168294,
      "eval_runtime": 22.1337,
      "eval_samples_per_second": 146.971,
      "eval_steps_per_second": 18.388,
      "step": 6000
    },
    {
      "epoch": 0.46,
      "learning_rate": 2.6855148839196698e-05,
      "loss": 0.0104,
      "step": 6500
    },
    {
      "epoch": 0.46,
      "eval_accuracy_score": 0.9911607803434446,
      "eval_f1": 0.9488426702448842,
      "eval_loss": 0.04854311794042587,
      "eval_precision": 0.9456703443664326,
      "eval_recall": 0.9520363513968361,
      "eval_runtime": 22.2143,
      "eval_samples_per_second": 146.437,
      "eval_steps_per_second": 18.322,
      "step": 6500
    },
    {
      "epoch": 0.5,
      "learning_rate": 2.507477567298106e-05,
      "loss": 0.0116,
      "step": 7000
    },
    {
      "epoch": 0.5,
      "eval_accuracy_score": 0.9924652466804252,
      "eval_f1": 0.958029655692385,
      "eval_loss": 0.04380907490849495,
      "eval_precision": 0.9537948290241868,
      "eval_recall": 0.9623022551329519,
      "eval_runtime": 22.1806,
      "eval_samples_per_second": 146.659,
      "eval_steps_per_second": 18.349,
      "step": 7000
    },
    {
      "epoch": 0.53,
      "learning_rate": 2.329440250676542e-05,
      "loss": 0.008,
      "step": 7500
    },
    {
      "epoch": 0.53,
      "eval_accuracy_score": 0.9924652466804252,
      "eval_f1": 0.9584347972121924,
      "eval_loss": 0.046966083347797394,
      "eval_precision": 0.9564270152505446,
      "eval_recall": 0.9604510265903736,
      "eval_runtime": 22.1907,
      "eval_samples_per_second": 146.593,
      "eval_steps_per_second": 18.341,
      "step": 7500
    },
    {
      "epoch": 0.57,
      "learning_rate": 2.151402934054978e-05,
      "loss": 0.0106,
      "step": 8000
    },
    {
      "epoch": 0.57,
      "eval_accuracy_score": 0.9920758537440131,
      "eval_f1": 0.9552964857837792,
      "eval_loss": 0.04950818791985512,
      "eval_precision": 0.9521819093797024,
      "eval_recall": 0.9584315045439246,
      "eval_runtime": 22.1428,
      "eval_samples_per_second": 146.91,
      "eval_steps_per_second": 18.381,
      "step": 8000
    },
    {
      "epoch": 0.61,
      "learning_rate": 1.9733656174334143e-05,
      "loss": 0.0089,
      "step": 8500
    },
    {
      "epoch": 0.61,
      "eval_accuracy_score": 0.9921732019781161,
      "eval_f1": 0.9558589496607757,
      "eval_loss": 0.04179555922746658,
      "eval_precision": 0.9514757378689345,
      "eval_recall": 0.9602827330865029,
      "eval_runtime": 22.1266,
      "eval_samples_per_second": 147.018,
      "eval_steps_per_second": 18.394,
      "step": 8500
    },
    {
      "epoch": 0.64,
      "learning_rate": 1.7953283008118502e-05,
      "loss": 0.0067,
      "step": 9000
    },
    {
      "epoch": 0.64,
      "eval_accuracy_score": 0.9919979751567306,
      "eval_f1": 0.9558872861455887,
      "eval_loss": 0.049222081899642944,
      "eval_precision": 0.9526914075560013,
      "eval_recall": 0.9591046785594076,
      "eval_runtime": 22.1688,
      "eval_samples_per_second": 146.738,
      "eval_steps_per_second": 18.359,
      "step": 9000
    },
    {
      "epoch": 0.68,
      "learning_rate": 1.6172909841902865e-05,
      "loss": 0.0054,
      "step": 9500
    },
    {
      "epoch": 0.68,
      "eval_accuracy_score": 0.9919979751567306,
      "eval_f1": 0.9572004028197382,
      "eval_loss": 0.0546395517885685,
      "eval_precision": 0.9546367592902578,
      "eval_recall": 0.9597778525748906,
      "eval_runtime": 23.9981,
      "eval_samples_per_second": 135.552,
      "eval_steps_per_second": 16.96,
      "step": 9500
    },
    {
      "epoch": 0.71,
      "learning_rate": 1.4392536675687223e-05,
      "loss": 0.0055,
      "step": 10000
    },
    {
      "epoch": 0.71,
      "eval_accuracy_score": 0.9924068377399634,
      "eval_f1": 0.959878879636639,
      "eval_loss": 0.05072605982422829,
      "eval_precision": 0.9594753657306205,
      "eval_recall": 0.9602827330865029,
      "eval_runtime": 22.125,
      "eval_samples_per_second": 147.028,
      "eval_steps_per_second": 18.396,
      "step": 10000
    }
  ],
  "max_steps": 14042,
  "num_train_epochs": 1,
  "total_flos": 5226265866240000.0,
  "trial_name": null,
  "trial_params": null
}