emilys commited on
Commit
95064b2
1 Parent(s): 423522f

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +17 -0
  2. eval_results.json +12 -0
  3. train_results.json +8 -0
  4. trainer_state.json +661 -0
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9862154900510105,
4
+ "eval_f1": 0.9181643400484828,
5
+ "eval_loss": 0.0572221502661705,
6
+ "eval_precision": 0.9121408403919614,
7
+ "eval_recall": 0.9242679232581622,
8
+ "eval_runtime": 20.7303,
9
+ "eval_samples": 3250,
10
+ "eval_samples_per_second": 156.775,
11
+ "eval_steps_per_second": 4.92,
12
+ "train_loss": 0.07309725869308391,
13
+ "train_runtime": 1743.2547,
14
+ "train_samples": 14041,
15
+ "train_samples_per_second": 24.163,
16
+ "train_steps_per_second": 0.755
17
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.9862154900510105,
4
+ "eval_f1": 0.9181643400484828,
5
+ "eval_loss": 0.0572221502661705,
6
+ "eval_precision": 0.9121408403919614,
7
+ "eval_recall": 0.9242679232581622,
8
+ "eval_runtime": 20.7303,
9
+ "eval_samples": 3250,
10
+ "eval_samples_per_second": 156.775,
11
+ "eval_steps_per_second": 4.92
12
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.07309725869308391,
4
+ "train_runtime": 1743.2547,
5
+ "train_samples": 14041,
6
+ "train_samples_per_second": 24.163,
7
+ "train_steps_per_second": 0.755
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9181643400484828,
3
+ "best_model_checkpoint": "hmBERT-CoNLL-cp3/checkpoint-1000",
4
+ "epoch": 3.0,
5
+ "global_step": 1317,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "eval_accuracy": 0.9007242708617266,
13
+ "eval_f1": 0.3684910996506405,
14
+ "eval_loss": 0.4115295708179474,
15
+ "eval_precision": 0.36430921052631576,
16
+ "eval_recall": 0.37277011107371255,
17
+ "eval_runtime": 21.3063,
18
+ "eval_samples_per_second": 152.537,
19
+ "eval_steps_per_second": 4.787,
20
+ "step": 25
21
+ },
22
+ {
23
+ "epoch": 0.11,
24
+ "eval_accuracy": 0.9460301390132783,
25
+ "eval_f1": 0.6640782981476988,
26
+ "eval_loss": 0.22425369918346405,
27
+ "eval_precision": 0.6393085189222862,
28
+ "eval_recall": 0.6908448333894311,
29
+ "eval_runtime": 21.4481,
30
+ "eval_samples_per_second": 151.529,
31
+ "eval_steps_per_second": 4.756,
32
+ "step": 50
33
+ },
34
+ {
35
+ "epoch": 0.17,
36
+ "eval_accuracy": 0.958023441454772,
37
+ "eval_f1": 0.7474880579805633,
38
+ "eval_loss": 0.16170601546764374,
39
+ "eval_precision": 0.7319354838709677,
40
+ "eval_recall": 0.7637159205654662,
41
+ "eval_runtime": 21.6197,
42
+ "eval_samples_per_second": 150.326,
43
+ "eval_steps_per_second": 4.718,
44
+ "step": 75
45
+ },
46
+ {
47
+ "epoch": 0.23,
48
+ "eval_accuracy": 0.9585101826252872,
49
+ "eval_f1": 0.7455232462625266,
50
+ "eval_loss": 0.15437664091587067,
51
+ "eval_precision": 0.7281771501925546,
52
+ "eval_recall": 0.7637159205654662,
53
+ "eval_runtime": 21.768,
54
+ "eval_samples_per_second": 149.302,
55
+ "eval_steps_per_second": 4.686,
56
+ "step": 100
57
+ },
58
+ {
59
+ "epoch": 0.28,
60
+ "eval_accuracy": 0.964409485611931,
61
+ "eval_f1": 0.784738041002278,
62
+ "eval_loss": 0.13407838344573975,
63
+ "eval_precision": 0.7595275590551182,
64
+ "eval_recall": 0.81167956916863,
65
+ "eval_runtime": 21.8222,
66
+ "eval_samples_per_second": 148.931,
67
+ "eval_steps_per_second": 4.674,
68
+ "step": 125
69
+ },
70
+ {
71
+ "epoch": 0.34,
72
+ "eval_accuracy": 0.9692963669639033,
73
+ "eval_f1": 0.811351977494622,
74
+ "eval_loss": 0.12208627909421921,
75
+ "eval_precision": 0.7980143229166666,
76
+ "eval_recall": 0.8251430494782901,
77
+ "eval_runtime": 21.2673,
78
+ "eval_samples_per_second": 152.817,
79
+ "eval_steps_per_second": 4.796,
80
+ "step": 150
81
+ },
82
+ {
83
+ "epoch": 0.4,
84
+ "eval_accuracy": 0.9718663603442234,
85
+ "eval_f1": 0.8151923709306149,
86
+ "eval_loss": 0.10127394646406174,
87
+ "eval_precision": 0.796849887495982,
88
+ "eval_recall": 0.8343991921911814,
89
+ "eval_runtime": 21.1083,
90
+ "eval_samples_per_second": 153.968,
91
+ "eval_steps_per_second": 4.832,
92
+ "step": 175
93
+ },
94
+ {
95
+ "epoch": 0.46,
96
+ "eval_accuracy": 0.9731513570343834,
97
+ "eval_f1": 0.8333472419260619,
98
+ "eval_loss": 0.10758877545595169,
99
+ "eval_precision": 0.8265187882800861,
100
+ "eval_recall": 0.8402894648266577,
101
+ "eval_runtime": 20.8538,
102
+ "eval_samples_per_second": 155.847,
103
+ "eval_steps_per_second": 4.891,
104
+ "step": 200
105
+ },
106
+ {
107
+ "epoch": 0.51,
108
+ "eval_accuracy": 0.9762859701725011,
109
+ "eval_f1": 0.8543123543123543,
110
+ "eval_loss": 0.08832722157239914,
111
+ "eval_precision": 0.8453047775947282,
112
+ "eval_recall": 0.8635139683608213,
113
+ "eval_runtime": 21.5789,
114
+ "eval_samples_per_second": 150.61,
115
+ "eval_steps_per_second": 4.727,
116
+ "step": 225
117
+ },
118
+ {
119
+ "epoch": 0.57,
120
+ "eval_accuracy": 0.9762665005256804,
121
+ "eval_f1": 0.8535063638632394,
122
+ "eval_loss": 0.0972680076956749,
123
+ "eval_precision": 0.8438887974995888,
124
+ "eval_recall": 0.8633456748569506,
125
+ "eval_runtime": 21.3825,
126
+ "eval_samples_per_second": 151.993,
127
+ "eval_steps_per_second": 4.77,
128
+ "step": 250
129
+ },
130
+ {
131
+ "epoch": 0.63,
132
+ "eval_accuracy": 0.9765390755811689,
133
+ "eval_f1": 0.8575239683201333,
134
+ "eval_loss": 0.0882844626903534,
135
+ "eval_precision": 0.8496613249628283,
136
+ "eval_recall": 0.8655334904072702,
137
+ "eval_runtime": 21.4561,
138
+ "eval_samples_per_second": 151.472,
139
+ "eval_steps_per_second": 4.754,
140
+ "step": 275
141
+ },
142
+ {
143
+ "epoch": 0.68,
144
+ "eval_accuracy": 0.9766364238152719,
145
+ "eval_f1": 0.8551207327227309,
146
+ "eval_loss": 0.08785738795995712,
147
+ "eval_precision": 0.8462425840474621,
148
+ "eval_recall": 0.8641871423763042,
149
+ "eval_runtime": 21.5171,
150
+ "eval_samples_per_second": 151.043,
151
+ "eval_steps_per_second": 4.74,
152
+ "step": 300
153
+ },
154
+ {
155
+ "epoch": 0.74,
156
+ "eval_accuracy": 0.9786612670846151,
157
+ "eval_f1": 0.8711310264708322,
158
+ "eval_loss": 0.07805178314447403,
159
+ "eval_precision": 0.8592240955966607,
160
+ "eval_recall": 0.8833726018175698,
161
+ "eval_runtime": 21.5676,
162
+ "eval_samples_per_second": 150.689,
163
+ "eval_steps_per_second": 4.729,
164
+ "step": 325
165
+ },
166
+ {
167
+ "epoch": 0.8,
168
+ "eval_accuracy": 0.9803161870643666,
169
+ "eval_f1": 0.8810828765985717,
170
+ "eval_loss": 0.07252655178308487,
171
+ "eval_precision": 0.8696721311475409,
172
+ "eval_recall": 0.8927970380343319,
173
+ "eval_runtime": 21.8654,
174
+ "eval_samples_per_second": 148.637,
175
+ "eval_steps_per_second": 4.665,
176
+ "step": 350
177
+ },
178
+ {
179
+ "epoch": 0.85,
180
+ "eval_accuracy": 0.9807250496475994,
181
+ "eval_f1": 0.881333443900821,
182
+ "eval_loss": 0.07547631114721298,
183
+ "eval_precision": 0.8687264999182606,
184
+ "eval_recall": 0.8943116795691687,
185
+ "eval_runtime": 21.5439,
186
+ "eval_samples_per_second": 150.855,
187
+ "eval_steps_per_second": 4.735,
188
+ "step": 375
189
+ },
190
+ {
191
+ "epoch": 0.91,
192
+ "eval_accuracy": 0.9822242124527861,
193
+ "eval_f1": 0.8890735355213959,
194
+ "eval_loss": 0.06660941988229752,
195
+ "eval_precision": 0.8780567864762843,
196
+ "eval_recall": 0.9003702457085156,
197
+ "eval_runtime": 21.2792,
198
+ "eval_samples_per_second": 152.731,
199
+ "eval_steps_per_second": 4.793,
200
+ "step": 400
201
+ },
202
+ {
203
+ "epoch": 0.97,
204
+ "eval_accuracy": 0.982282621393248,
205
+ "eval_f1": 0.8935885647412856,
206
+ "eval_loss": 0.06582659482955933,
207
+ "eval_precision": 0.8877262913137353,
208
+ "eval_recall": 0.8995287781891619,
209
+ "eval_runtime": 21.0258,
210
+ "eval_samples_per_second": 154.572,
211
+ "eval_steps_per_second": 4.851,
212
+ "step": 425
213
+ },
214
+ {
215
+ "epoch": 1.03,
216
+ "eval_accuracy": 0.983664966317511,
217
+ "eval_f1": 0.8993299832495812,
218
+ "eval_loss": 0.0645442008972168,
219
+ "eval_precision": 0.89513171057019,
220
+ "eval_recall": 0.9035678222820599,
221
+ "eval_runtime": 21.8717,
222
+ "eval_samples_per_second": 148.594,
223
+ "eval_steps_per_second": 4.664,
224
+ "step": 450
225
+ },
226
+ {
227
+ "epoch": 1.08,
228
+ "eval_accuracy": 0.9831003465597135,
229
+ "eval_f1": 0.8950920756603616,
230
+ "eval_loss": 0.06972972303628922,
231
+ "eval_precision": 0.8864499092259449,
232
+ "eval_recall": 0.9039044092898014,
233
+ "eval_runtime": 22.0283,
234
+ "eval_samples_per_second": 147.537,
235
+ "eval_steps_per_second": 4.63,
236
+ "step": 475
237
+ },
238
+ {
239
+ "epoch": 1.14,
240
+ "learning_rate": 3.1017463933181475e-05,
241
+ "loss": 0.1392,
242
+ "step": 500
243
+ },
244
+ {
245
+ "epoch": 1.14,
246
+ "eval_accuracy": 0.9824189089209922,
247
+ "eval_f1": 0.8908151358559759,
248
+ "eval_loss": 0.06883182376623154,
249
+ "eval_precision": 0.8824306472919419,
250
+ "eval_recall": 0.8993604846852912,
251
+ "eval_runtime": 21.1436,
252
+ "eval_samples_per_second": 153.711,
253
+ "eval_steps_per_second": 4.824,
254
+ "step": 500
255
+ },
256
+ {
257
+ "epoch": 1.2,
258
+ "eval_accuracy": 0.9827498929169425,
259
+ "eval_f1": 0.8999163179916319,
260
+ "eval_loss": 0.06805375963449478,
261
+ "eval_precision": 0.8949733688415447,
262
+ "eval_recall": 0.9049141703130259,
263
+ "eval_runtime": 21.618,
264
+ "eval_samples_per_second": 150.337,
265
+ "eval_steps_per_second": 4.718,
266
+ "step": 525
267
+ },
268
+ {
269
+ "epoch": 1.25,
270
+ "eval_accuracy": 0.9823215606868891,
271
+ "eval_f1": 0.8915259903058667,
272
+ "eval_loss": 0.06759364902973175,
273
+ "eval_precision": 0.8854581673306773,
274
+ "eval_recall": 0.8976775496465836,
275
+ "eval_runtime": 21.6174,
276
+ "eval_samples_per_second": 150.342,
277
+ "eval_steps_per_second": 4.718,
278
+ "step": 550
279
+ },
280
+ {
281
+ "epoch": 1.31,
282
+ "eval_accuracy": 0.9841906467816673,
283
+ "eval_f1": 0.9013520280420632,
284
+ "eval_loss": 0.06176742911338806,
285
+ "eval_precision": 0.8940397350993378,
286
+ "eval_recall": 0.9087849209020532,
287
+ "eval_runtime": 21.5727,
288
+ "eval_samples_per_second": 150.653,
289
+ "eval_steps_per_second": 4.728,
290
+ "step": 575
291
+ },
292
+ {
293
+ "epoch": 1.37,
294
+ "eval_accuracy": 0.984015419960282,
295
+ "eval_f1": 0.9010107760421018,
296
+ "eval_loss": 0.0644294023513794,
297
+ "eval_precision": 0.8945098689666612,
298
+ "eval_recall": 0.9076068663749579,
299
+ "eval_runtime": 22.0692,
300
+ "eval_samples_per_second": 147.264,
301
+ "eval_steps_per_second": 4.622,
302
+ "step": 600
303
+ },
304
+ {
305
+ "epoch": 1.42,
306
+ "eval_accuracy": 0.9837233752579728,
307
+ "eval_f1": 0.9010347129506008,
308
+ "eval_loss": 0.0641368106007576,
309
+ "eval_precision": 0.8935782853359815,
310
+ "eval_recall": 0.9086166273981824,
311
+ "eval_runtime": 21.6306,
312
+ "eval_samples_per_second": 150.25,
313
+ "eval_steps_per_second": 4.716,
314
+ "step": 625
315
+ },
316
+ {
317
+ "epoch": 1.48,
318
+ "eval_accuracy": 0.9845800397180795,
319
+ "eval_f1": 0.9043724966622163,
320
+ "eval_loss": 0.06189671903848648,
321
+ "eval_precision": 0.8968884475339292,
322
+ "eval_recall": 0.9119824974755975,
323
+ "eval_runtime": 20.932,
324
+ "eval_samples_per_second": 155.265,
325
+ "eval_steps_per_second": 4.873,
326
+ "step": 650
327
+ },
328
+ {
329
+ "epoch": 1.54,
330
+ "eval_accuracy": 0.9848136754799268,
331
+ "eval_f1": 0.9074897257401661,
332
+ "eval_loss": 0.06076198071241379,
333
+ "eval_precision": 0.9045310148804547,
334
+ "eval_recall": 0.9104678559407607,
335
+ "eval_runtime": 21.5144,
336
+ "eval_samples_per_second": 151.062,
337
+ "eval_steps_per_second": 4.741,
338
+ "step": 675
339
+ },
340
+ {
341
+ "epoch": 1.59,
342
+ "eval_accuracy": 0.9851057201822359,
343
+ "eval_f1": 0.9090604869070525,
344
+ "eval_loss": 0.06238234415650368,
345
+ "eval_precision": 0.903842954583264,
346
+ "eval_recall": 0.9143386065297879,
347
+ "eval_runtime": 21.3845,
348
+ "eval_samples_per_second": 151.979,
349
+ "eval_steps_per_second": 4.77,
350
+ "step": 700
351
+ },
352
+ {
353
+ "epoch": 1.65,
354
+ "eval_accuracy": 0.9852420077099802,
355
+ "eval_f1": 0.9115851108322878,
356
+ "eval_loss": 0.05955210700631142,
357
+ "eval_precision": 0.9062032263429237,
358
+ "eval_recall": 0.9170313025917199,
359
+ "eval_runtime": 21.1164,
360
+ "eval_samples_per_second": 153.909,
361
+ "eval_steps_per_second": 4.83,
362
+ "step": 725
363
+ },
364
+ {
365
+ "epoch": 1.71,
366
+ "eval_accuracy": 0.9848331451267474,
367
+ "eval_f1": 0.9068602904356534,
368
+ "eval_loss": 0.0579909048974514,
369
+ "eval_precision": 0.8995033112582781,
370
+ "eval_recall": 0.9143386065297879,
371
+ "eval_runtime": 21.1108,
372
+ "eval_samples_per_second": 153.95,
373
+ "eval_steps_per_second": 4.832,
374
+ "step": 750
375
+ },
376
+ {
377
+ "epoch": 1.77,
378
+ "eval_accuracy": 0.9857676881741365,
379
+ "eval_f1": 0.9126685087498954,
380
+ "eval_loss": 0.05822019651532173,
381
+ "eval_precision": 0.9081819696717214,
382
+ "eval_recall": 0.9171995960955908,
383
+ "eval_runtime": 22.2525,
384
+ "eval_samples_per_second": 146.051,
385
+ "eval_steps_per_second": 4.584,
386
+ "step": 775
387
+ },
388
+ {
389
+ "epoch": 1.82,
390
+ "eval_accuracy": 0.9852420077099802,
391
+ "eval_f1": 0.9100617386951443,
392
+ "eval_loss": 0.0588374100625515,
393
+ "eval_precision": 0.9023825281270682,
394
+ "eval_recall": 0.9178727701110737,
395
+ "eval_runtime": 21.7438,
396
+ "eval_samples_per_second": 149.468,
397
+ "eval_steps_per_second": 4.691,
398
+ "step": 800
399
+ },
400
+ {
401
+ "epoch": 1.88,
402
+ "eval_accuracy": 0.9855535220591098,
403
+ "eval_f1": 0.911860174781523,
404
+ "eval_loss": 0.0592406764626503,
405
+ "eval_precision": 0.9020253581425984,
406
+ "eval_recall": 0.9219118142039717,
407
+ "eval_runtime": 21.1485,
408
+ "eval_samples_per_second": 153.675,
409
+ "eval_steps_per_second": 4.823,
410
+ "step": 825
411
+ },
412
+ {
413
+ "epoch": 1.94,
414
+ "eval_accuracy": 0.9852225380631595,
415
+ "eval_f1": 0.911764705882353,
416
+ "eval_loss": 0.06002892181277275,
417
+ "eval_precision": 0.9054098904746101,
418
+ "eval_recall": 0.9182093571188152,
419
+ "eval_runtime": 22.0668,
420
+ "eval_samples_per_second": 147.28,
421
+ "eval_steps_per_second": 4.622,
422
+ "step": 850
423
+ },
424
+ {
425
+ "epoch": 1.99,
426
+ "eval_accuracy": 0.9860792025232662,
427
+ "eval_f1": 0.913464751085867,
428
+ "eval_loss": 0.05682874843478203,
429
+ "eval_precision": 0.9067993366500829,
430
+ "eval_recall": 0.9202288791652642,
431
+ "eval_runtime": 20.9827,
432
+ "eval_samples_per_second": 154.89,
433
+ "eval_steps_per_second": 4.861,
434
+ "step": 875
435
+ },
436
+ {
437
+ "epoch": 2.05,
438
+ "eval_accuracy": 0.9860792025232662,
439
+ "eval_f1": 0.9171483622350675,
440
+ "eval_loss": 0.0570731945335865,
441
+ "eval_precision": 0.9130942452043369,
442
+ "eval_recall": 0.9212386401884888,
443
+ "eval_runtime": 21.3364,
444
+ "eval_samples_per_second": 152.322,
445
+ "eval_steps_per_second": 4.781,
446
+ "step": 900
447
+ },
448
+ {
449
+ "epoch": 2.11,
450
+ "eval_accuracy": 0.9857676881741365,
451
+ "eval_f1": 0.9156969443281708,
452
+ "eval_loss": 0.0577477402985096,
453
+ "eval_precision": 0.9110444777611194,
454
+ "eval_recall": 0.920397172669135,
455
+ "eval_runtime": 21.9855,
456
+ "eval_samples_per_second": 147.825,
457
+ "eval_steps_per_second": 4.639,
458
+ "step": 925
459
+ },
460
+ {
461
+ "epoch": 2.16,
462
+ "eval_accuracy": 0.986040263229625,
463
+ "eval_f1": 0.9184714441006773,
464
+ "eval_loss": 0.06048833578824997,
465
+ "eval_precision": 0.912747216220708,
466
+ "eval_recall": 0.9242679232581622,
467
+ "eval_runtime": 21.8831,
468
+ "eval_samples_per_second": 148.516,
469
+ "eval_steps_per_second": 4.661,
470
+ "step": 950
471
+ },
472
+ {
473
+ "epoch": 2.22,
474
+ "eval_accuracy": 0.986682761574705,
475
+ "eval_f1": 0.9166318254034618,
476
+ "eval_loss": 0.05745207890868187,
477
+ "eval_precision": 0.9109190626558086,
478
+ "eval_recall": 0.922416694715584,
479
+ "eval_runtime": 21.8724,
480
+ "eval_samples_per_second": 148.589,
481
+ "eval_steps_per_second": 4.663,
482
+ "step": 975
483
+ },
484
+ {
485
+ "epoch": 2.28,
486
+ "learning_rate": 1.2034927866362947e-05,
487
+ "loss": 0.0392,
488
+ "step": 1000
489
+ },
490
+ {
491
+ "epoch": 2.28,
492
+ "eval_accuracy": 0.9862154900510105,
493
+ "eval_f1": 0.9181643400484828,
494
+ "eval_loss": 0.0572221502661705,
495
+ "eval_precision": 0.9121408403919614,
496
+ "eval_recall": 0.9242679232581622,
497
+ "eval_runtime": 21.5878,
498
+ "eval_samples_per_second": 150.548,
499
+ "eval_steps_per_second": 4.725,
500
+ "step": 1000
501
+ },
502
+ {
503
+ "epoch": 2.33,
504
+ "eval_accuracy": 0.9869748062770142,
505
+ "eval_f1": 0.9211694730669348,
506
+ "eval_loss": 0.056650131940841675,
507
+ "eval_precision": 0.9170975813177648,
508
+ "eval_recall": 0.9252776842813868,
509
+ "eval_runtime": 21.5295,
510
+ "eval_samples_per_second": 150.956,
511
+ "eval_steps_per_second": 4.738,
512
+ "step": 1025
513
+ },
514
+ {
515
+ "epoch": 2.39,
516
+ "eval_accuracy": 0.9870916241579378,
517
+ "eval_f1": 0.9243514644351465,
518
+ "eval_loss": 0.0570232979953289,
519
+ "eval_precision": 0.9192743009320905,
520
+ "eval_recall": 0.9294850218781555,
521
+ "eval_runtime": 21.5481,
522
+ "eval_samples_per_second": 150.825,
523
+ "eval_steps_per_second": 4.734,
524
+ "step": 1050
525
+ },
526
+ {
527
+ "epoch": 2.45,
528
+ "eval_accuracy": 0.986682761574705,
529
+ "eval_f1": 0.9215079829474212,
530
+ "eval_loss": 0.05837239325046539,
531
+ "eval_precision": 0.9154625477495433,
532
+ "eval_recall": 0.9276337933355773,
533
+ "eval_runtime": 21.2288,
534
+ "eval_samples_per_second": 153.094,
535
+ "eval_steps_per_second": 4.805,
536
+ "step": 1075
537
+ },
538
+ {
539
+ "epoch": 2.51,
540
+ "eval_accuracy": 0.9867022312215257,
541
+ "eval_f1": 0.9226653289858708,
542
+ "eval_loss": 0.059072330594062805,
543
+ "eval_precision": 0.9167635819903639,
544
+ "eval_recall": 0.9286435543588017,
545
+ "eval_runtime": 22.3224,
546
+ "eval_samples_per_second": 145.594,
547
+ "eval_steps_per_second": 4.569,
548
+ "step": 1100
549
+ },
550
+ {
551
+ "epoch": 2.56,
552
+ "eval_accuracy": 0.9874031385070675,
553
+ "eval_f1": 0.9246323529411764,
554
+ "eval_loss": 0.057747144252061844,
555
+ "eval_precision": 0.9181878526385662,
556
+ "eval_recall": 0.931167956916863,
557
+ "eval_runtime": 21.9739,
558
+ "eval_samples_per_second": 147.903,
559
+ "eval_steps_per_second": 4.642,
560
+ "step": 1125
561
+ },
562
+ {
563
+ "epoch": 2.62,
564
+ "eval_accuracy": 0.987033215217476,
565
+ "eval_f1": 0.9233344492802142,
566
+ "eval_loss": 0.057037804275751114,
567
+ "eval_precision": 0.9184149184149184,
568
+ "eval_recall": 0.9283069673510602,
569
+ "eval_runtime": 21.3838,
570
+ "eval_samples_per_second": 151.984,
571
+ "eval_steps_per_second": 4.77,
572
+ "step": 1150
573
+ },
574
+ {
575
+ "epoch": 2.68,
576
+ "eval_accuracy": 0.987227911685682,
577
+ "eval_f1": 0.9244541119384256,
578
+ "eval_loss": 0.05629764869809151,
579
+ "eval_precision": 0.9191482282482116,
580
+ "eval_recall": 0.929821608885897,
581
+ "eval_runtime": 21.8946,
582
+ "eval_samples_per_second": 148.438,
583
+ "eval_steps_per_second": 4.659,
584
+ "step": 1175
585
+ },
586
+ {
587
+ "epoch": 2.73,
588
+ "eval_accuracy": 0.9871500330983995,
589
+ "eval_f1": 0.9246449456975774,
590
+ "eval_loss": 0.05648628994822502,
591
+ "eval_precision": 0.918049104180491,
592
+ "eval_recall": 0.9313362504207338,
593
+ "eval_runtime": 21.2748,
594
+ "eval_samples_per_second": 152.763,
595
+ "eval_steps_per_second": 4.794,
596
+ "step": 1200
597
+ },
598
+ {
599
+ "epoch": 2.79,
600
+ "eval_accuracy": 0.9873447295666057,
601
+ "eval_f1": 0.9243767776476494,
602
+ "eval_loss": 0.055928729474544525,
603
+ "eval_precision": 0.9189953426480373,
604
+ "eval_recall": 0.929821608885897,
605
+ "eval_runtime": 21.5883,
606
+ "eval_samples_per_second": 150.545,
607
+ "eval_steps_per_second": 4.725,
608
+ "step": 1225
609
+ },
610
+ {
611
+ "epoch": 2.85,
612
+ "eval_accuracy": 0.9873057902729645,
613
+ "eval_f1": 0.9238748536054878,
614
+ "eval_loss": 0.05620228126645088,
615
+ "eval_precision": 0.9184963406520292,
616
+ "eval_recall": 0.9293167283742848,
617
+ "eval_runtime": 21.3558,
618
+ "eval_samples_per_second": 152.184,
619
+ "eval_steps_per_second": 4.776,
620
+ "step": 1250
621
+ },
622
+ {
623
+ "epoch": 2.9,
624
+ "eval_accuracy": 0.9871500330983995,
625
+ "eval_f1": 0.922961104140527,
626
+ "eval_loss": 0.05639491230249405,
627
+ "eval_precision": 0.9175120572093797,
628
+ "eval_recall": 0.928475260854931,
629
+ "eval_runtime": 21.3856,
630
+ "eval_samples_per_second": 151.972,
631
+ "eval_steps_per_second": 4.77,
632
+ "step": 1275
633
+ },
634
+ {
635
+ "epoch": 2.96,
636
+ "eval_accuracy": 0.9873057902729645,
637
+ "eval_f1": 0.9237330657300552,
638
+ "eval_loss": 0.05629625543951988,
639
+ "eval_precision": 0.9180518617021277,
640
+ "eval_recall": 0.9294850218781555,
641
+ "eval_runtime": 21.6582,
642
+ "eval_samples_per_second": 150.059,
643
+ "eval_steps_per_second": 4.71,
644
+ "step": 1300
645
+ },
646
+ {
647
+ "epoch": 3.0,
648
+ "step": 1317,
649
+ "total_flos": 1368370122693444.0,
650
+ "train_loss": 0.07309725869308391,
651
+ "train_runtime": 1743.2547,
652
+ "train_samples_per_second": 24.163,
653
+ "train_steps_per_second": 0.755
654
+ }
655
+ ],
656
+ "max_steps": 1317,
657
+ "num_train_epochs": 3,
658
+ "total_flos": 1368370122693444.0,
659
+ "trial_name": null,
660
+ "trial_params": null
661
+ }