apwic commited on
Commit
bcb8207
1 Parent(s): b9770ac

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,25 +1,25 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9612441617807811,
4
- "eval_f1": 0.829950630828305,
5
- "eval_loss": 0.16526208817958832,
6
- "eval_precision": 0.785974025974026,
7
- "eval_recall": 0.8791400348634515,
8
- "eval_runtime": 4.5952,
9
  "eval_samples": 935,
10
- "eval_samples_per_second": 203.475,
11
- "eval_steps_per_second": 3.264,
12
- "predict_accuracy": 0.9910021718895439,
13
- "predict_f1": 0.9460332103321033,
14
- "predict_loss": 0.03333849087357521,
15
- "predict_precision": 0.928054298642534,
16
- "predict_recall": 0.964722483537159,
17
- "predict_runtime": 10.6661,
18
- "predict_samples_per_second": 219.668,
19
- "predict_steps_per_second": 3.469,
20
- "train_loss": 0.0814560384461374,
21
- "train_runtime": 1318.3663,
22
  "train_samples": 8437,
23
- "train_samples_per_second": 127.992,
24
- "train_steps_per_second": 8.01
25
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9608466660041737,
4
+ "eval_f1": 0.8321289605336298,
5
+ "eval_loss": 0.17778323590755463,
6
+ "eval_precision": 0.7975492807671817,
7
+ "eval_recall": 0.8698431144683324,
8
+ "eval_runtime": 4.5207,
9
  "eval_samples": 935,
10
+ "eval_samples_per_second": 206.828,
11
+ "eval_steps_per_second": 3.318,
12
+ "predict_accuracy": 0.9915645361464475,
13
+ "predict_f1": 0.9511687109465402,
14
+ "predict_loss": 0.03257888928055763,
15
+ "predict_precision": 0.9362186788154897,
16
+ "predict_recall": 0.9666039510818438,
17
+ "predict_runtime": 10.6853,
18
+ "predict_samples_per_second": 219.273,
19
+ "predict_steps_per_second": 3.463,
20
+ "train_loss": 0.08150525255636736,
21
+ "train_runtime": 1343.1683,
22
  "train_samples": 8437,
23
+ "train_samples_per_second": 125.628,
24
+ "train_steps_per_second": 7.862
25
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9612441617807811,
4
- "eval_f1": 0.829950630828305,
5
- "eval_loss": 0.16526208817958832,
6
- "eval_precision": 0.785974025974026,
7
- "eval_recall": 0.8791400348634515,
8
- "eval_runtime": 4.5952,
9
  "eval_samples": 935,
10
- "eval_samples_per_second": 203.475,
11
- "eval_steps_per_second": 3.264
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9608466660041737,
4
+ "eval_f1": 0.8321289605336298,
5
+ "eval_loss": 0.17778323590755463,
6
+ "eval_precision": 0.7975492807671817,
7
+ "eval_recall": 0.8698431144683324,
8
+ "eval_runtime": 4.5207,
9
  "eval_samples": 935,
10
+ "eval_samples_per_second": 206.828,
11
+ "eval_steps_per_second": 3.318
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9910021718895439,
3
- "predict_f1": 0.9460332103321033,
4
- "predict_loss": 0.03333849087357521,
5
- "predict_precision": 0.928054298642534,
6
- "predict_recall": 0.964722483537159,
7
- "predict_runtime": 10.6661,
8
- "predict_samples_per_second": 219.668,
9
- "predict_steps_per_second": 3.469
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9915645361464475,
3
+ "predict_f1": 0.9511687109465402,
4
+ "predict_loss": 0.03257888928055763,
5
+ "predict_precision": 0.9362186788154897,
6
+ "predict_recall": 0.9666039510818438,
7
+ "predict_runtime": 10.6853,
8
+ "predict_samples_per_second": 219.273,
9
+ "predict_steps_per_second": 3.463
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
runs/May25_09-08-33_indolem-petl-vm/events.out.tfevents.1716629479.indolem-petl-vm.2062959.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27faa5d19209d3c836a2af64fee64dd387facb449754983301f62eb77c23368
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.0814560384461374,
4
- "train_runtime": 1318.3663,
5
  "train_samples": 8437,
6
- "train_samples_per_second": 127.992,
7
- "train_steps_per_second": 8.01
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.08150525255636736,
4
+ "train_runtime": 1343.1683,
5
  "train_samples": 8437,
6
+ "train_samples_per_second": 125.628,
7
+ "train_steps_per_second": 7.862
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.8100232481956482,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.4348,
16
  "step": 528
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.950561462784458,
21
- "eval_f1": 0.7816627816627817,
22
- "eval_loss": 0.15243592858314514,
23
- "eval_precision": 0.7051401869158879,
24
- "eval_recall": 0.8768158047646717,
25
- "eval_runtime": 4.5709,
26
- "eval_samples_per_second": 204.556,
27
- "eval_steps_per_second": 3.282,
28
  "step": 528
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 0.6027082800865173,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.1432,
35
  "step": 1056
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.957467951903011,
40
- "eval_f1": 0.7962466487935658,
41
- "eval_loss": 0.1272890865802765,
42
- "eval_precision": 0.7391737182677949,
43
- "eval_recall": 0.862870424171993,
44
- "eval_runtime": 4.7255,
45
- "eval_samples_per_second": 197.861,
46
- "eval_steps_per_second": 3.174,
47
  "step": 1056
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.0035598278045654,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.1149,
54
  "step": 1584
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.9554804730199742,
59
- "eval_f1": 0.8087049973132725,
60
- "eval_loss": 0.13823477923870087,
61
- "eval_precision": 0.7521239380309845,
62
- "eval_recall": 0.8744915746658919,
63
- "eval_runtime": 4.5819,
64
- "eval_samples_per_second": 204.062,
65
- "eval_steps_per_second": 3.274,
66
  "step": 1584
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.8790736198425293,
71
  "learning_rate": 4e-05,
72
- "loss": 0.0978,
73
  "step": 2112
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.9576666997913147,
78
- "eval_f1": 0.8132894014277869,
79
- "eval_loss": 0.13224001228809357,
80
- "eval_precision": 0.7709526288391463,
81
- "eval_recall": 0.8605461940732132,
82
- "eval_runtime": 4.596,
83
- "eval_samples_per_second": 203.437,
84
- "eval_steps_per_second": 3.264,
85
  "step": 2112
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.9501886367797852,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.0873,
92
  "step": 2640
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.9578654476796185,
97
- "eval_f1": 0.8121964382083108,
98
- "eval_loss": 0.13743577897548676,
99
- "eval_precision": 0.7581863979848866,
100
- "eval_recall": 0.8744915746658919,
101
- "eval_runtime": 4.6679,
102
- "eval_samples_per_second": 200.305,
103
- "eval_steps_per_second": 3.213,
104
  "step": 2640
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.0721073150634766,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.0781,
111
  "step": 3168
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.963231640663818,
116
- "eval_f1": 0.8332400671516508,
117
- "eval_loss": 0.13460490107536316,
118
- "eval_precision": 0.8035617916891528,
119
- "eval_recall": 0.8651946542707728,
120
- "eval_runtime": 4.5754,
121
- "eval_samples_per_second": 204.353,
122
- "eval_steps_per_second": 3.278,
123
  "step": 3168
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.6200407147407532,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.0709,
130
  "step": 3696
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.96064791811587,
135
- "eval_f1": 0.829950630828305,
136
- "eval_loss": 0.14596135914325714,
137
- "eval_precision": 0.785974025974026,
138
- "eval_recall": 0.8791400348634515,
139
- "eval_runtime": 4.6372,
140
- "eval_samples_per_second": 201.629,
141
- "eval_steps_per_second": 3.235,
142
  "step": 3696
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 0.3982696235179901,
147
  "learning_rate": 3e-05,
148
- "loss": 0.0656,
149
  "step": 4224
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.9578654476796185,
154
- "eval_f1": 0.8218040951853902,
155
- "eval_loss": 0.14514297246932983,
156
- "eval_precision": 0.7844690966719493,
157
- "eval_recall": 0.862870424171993,
158
- "eval_runtime": 4.6002,
159
- "eval_samples_per_second": 203.254,
160
- "eval_steps_per_second": 3.261,
161
  "step": 4224
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 1.1954327821731567,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.0606,
168
  "step": 4752
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.9580641955679221,
173
- "eval_f1": 0.8185816382627817,
174
- "eval_loss": 0.14650680124759674,
175
- "eval_precision": 0.7767344809598331,
176
- "eval_recall": 0.8651946542707728,
177
- "eval_runtime": 4.5627,
178
- "eval_samples_per_second": 204.924,
179
- "eval_steps_per_second": 3.288,
180
  "step": 4752
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 1.46392822265625,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.0563,
187
  "step": 5280
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.9590579350094405,
192
- "eval_f1": 0.8308621636463481,
193
- "eval_loss": 0.15200696885585785,
194
- "eval_precision": 0.7876106194690266,
195
- "eval_recall": 0.8791400348634515,
196
- "eval_runtime": 4.5886,
197
- "eval_samples_per_second": 203.765,
198
- "eval_steps_per_second": 3.269,
199
  "step": 5280
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 1.8282573223114014,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.0523,
206
  "step": 5808
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.9566729603497963,
211
- "eval_f1": 0.8245231607629426,
212
- "eval_loss": 0.15866349637508392,
213
- "eval_precision": 0.7762955361723961,
214
- "eval_recall": 0.8791400348634515,
215
- "eval_runtime": 4.6167,
216
- "eval_samples_per_second": 202.525,
217
- "eval_steps_per_second": 3.249,
218
  "step": 5808
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 0.6378235816955566,
223
  "learning_rate": 2e-05,
224
- "loss": 0.0504,
225
  "step": 6336
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.9592566828977442,
230
- "eval_f1": 0.8310626702997276,
231
- "eval_loss": 0.15401104092597961,
232
- "eval_precision": 0.7824525397639815,
233
- "eval_recall": 0.8861127251597908,
234
- "eval_runtime": 5.0455,
235
- "eval_samples_per_second": 185.315,
236
- "eval_steps_per_second": 2.973,
237
  "step": 6336
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.47105222940444946,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.0465,
244
  "step": 6864
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.9608466660041737,
249
- "eval_f1": 0.831404958677686,
250
- "eval_loss": 0.15168695151805878,
251
- "eval_precision": 0.7904662126767942,
252
- "eval_recall": 0.8768158047646717,
253
- "eval_runtime": 4.5874,
254
- "eval_samples_per_second": 203.819,
255
- "eval_steps_per_second": 3.27,
256
  "step": 6864
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 0.6378626227378845,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.0435,
263
  "step": 7392
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.9622379012222995,
268
- "eval_f1": 0.8302828618968386,
269
- "eval_loss": 0.1564669907093048,
270
- "eval_precision": 0.7941644562334218,
271
- "eval_recall": 0.8698431144683324,
272
- "eval_runtime": 4.6735,
273
- "eval_samples_per_second": 200.064,
274
- "eval_steps_per_second": 3.21,
275
  "step": 7392
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.05927232280373573,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.0411,
282
  "step": 7920
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9624366491106032,
287
- "eval_f1": 0.8267629094947252,
288
- "eval_loss": 0.15700677037239075,
289
- "eval_precision": 0.7916002126528442,
290
- "eval_recall": 0.8651946542707728,
291
- "eval_runtime": 4.585,
292
- "eval_samples_per_second": 203.925,
293
- "eval_steps_per_second": 3.272,
294
  "step": 7920
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 2.510366916656494,
299
  "learning_rate": 1e-05,
300
- "loss": 0.04,
301
  "step": 8448
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.9618404054456922,
306
- "eval_f1": 0.8308621636463481,
307
- "eval_loss": 0.16134144365787506,
308
- "eval_precision": 0.7876106194690266,
309
  "eval_recall": 0.8791400348634515,
310
- "eval_runtime": 4.598,
311
- "eval_samples_per_second": 203.348,
312
- "eval_steps_per_second": 3.262,
313
  "step": 8448
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.10409926623106003,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.0385,
320
  "step": 8976
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.9594554307860479,
325
- "eval_f1": 0.8274552360282147,
326
- "eval_loss": 0.1708444207906723,
327
- "eval_precision": 0.7760814249363868,
328
- "eval_recall": 0.8861127251597908,
329
- "eval_runtime": 4.6748,
330
- "eval_samples_per_second": 200.008,
331
- "eval_steps_per_second": 3.209,
332
  "step": 8976
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 1.0784730911254883,
337
  "learning_rate": 5e-06,
338
- "loss": 0.037,
339
  "step": 9504
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.9620391533339958,
344
- "eval_f1": 0.831578947368421,
345
- "eval_loss": 0.1626226305961609,
346
- "eval_precision": 0.7946003176283748,
347
- "eval_recall": 0.8721673445671121,
348
- "eval_runtime": 5.2776,
349
- "eval_samples_per_second": 177.163,
350
- "eval_steps_per_second": 2.842,
351
  "step": 9504
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.881395697593689,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.035,
358
  "step": 10032
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.9622379012222995,
363
- "eval_f1": 0.8297401879491432,
364
- "eval_loss": 0.1642664223909378,
365
- "eval_precision": 0.7912493410648392,
366
- "eval_recall": 0.8721673445671121,
367
- "eval_runtime": 4.5963,
368
- "eval_samples_per_second": 203.426,
369
- "eval_steps_per_second": 3.264,
370
  "step": 10032
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.892469048500061,
375
  "learning_rate": 0.0,
376
- "loss": 0.0352,
377
  "step": 10560
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.9612441617807811,
382
- "eval_f1": 0.829950630828305,
383
- "eval_loss": 0.16526208817958832,
384
- "eval_precision": 0.785974025974026,
385
- "eval_recall": 0.8791400348634515,
386
- "eval_runtime": 4.5732,
387
- "eval_samples_per_second": 204.45,
388
- "eval_steps_per_second": 3.28,
389
  "step": 10560
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 10560,
394
  "total_flos": 5062583230111038.0,
395
- "train_loss": 0.0814560384461374,
396
- "train_runtime": 1318.3663,
397
- "train_samples_per_second": 127.992,
398
- "train_steps_per_second": 8.01
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.8086087107658386,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.4434,
16
  "step": 528
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.9455927655768658,
21
+ "eval_f1": 0.7605633802816901,
22
+ "eval_loss": 0.16302905976772308,
23
+ "eval_precision": 0.679945054945055,
24
+ "eval_recall": 0.862870424171993,
25
+ "eval_runtime": 4.5797,
26
+ "eval_samples_per_second": 204.163,
27
+ "eval_steps_per_second": 3.275,
28
  "step": 528
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 0.7114465832710266,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.1462,
35
  "step": 1056
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.9566729603497963,
40
+ "eval_f1": 0.8073836276083467,
41
+ "eval_loss": 0.12938551604747772,
42
+ "eval_precision": 0.7481408031730292,
43
+ "eval_recall": 0.8768158047646717,
44
+ "eval_runtime": 4.6537,
45
+ "eval_samples_per_second": 200.915,
46
+ "eval_steps_per_second": 3.223,
47
  "step": 1056
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.316323161125183,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.1183,
54
  "step": 1584
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.9568717082381,
59
+ "eval_f1": 0.8116639914392724,
60
+ "eval_loss": 0.13784636557102203,
61
+ "eval_precision": 0.7521070897372335,
62
+ "eval_recall": 0.8814642649622313,
63
+ "eval_runtime": 4.903,
64
+ "eval_samples_per_second": 190.701,
65
+ "eval_steps_per_second": 3.059,
66
  "step": 1584
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 1.4418916702270508,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.1012,
73
  "step": 2112
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.9596541786743515,
78
+ "eval_f1": 0.8231144872490505,
79
+ "eval_loss": 0.1358918398618698,
80
+ "eval_precision": 0.7720101781170483,
81
+ "eval_recall": 0.8814642649622313,
82
+ "eval_runtime": 4.7436,
83
+ "eval_samples_per_second": 197.109,
84
+ "eval_steps_per_second": 3.162,
85
  "step": 2112
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 1.0950379371643066,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.0884,
92
  "step": 2640
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.9622379012222995,
97
+ "eval_f1": 0.8348926802421575,
98
+ "eval_loss": 0.12661471962928772,
99
+ "eval_precision": 0.7929952953476216,
100
+ "eval_recall": 0.8814642649622313,
101
+ "eval_runtime": 4.6673,
102
+ "eval_samples_per_second": 200.331,
103
+ "eval_steps_per_second": 3.214,
104
  "step": 2640
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 1.270456314086914,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.0793,
111
  "step": 3168
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.9610454138924774,
116
+ "eval_f1": 0.8404432132963989,
117
+ "eval_loss": 0.1408655196428299,
118
+ "eval_precision": 0.803070407623081,
119
+ "eval_recall": 0.8814642649622313,
120
+ "eval_runtime": 4.6946,
121
+ "eval_samples_per_second": 199.164,
122
+ "eval_steps_per_second": 3.195,
123
  "step": 3168
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.6711246967315674,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.072,
130
  "step": 3696
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.9588591871211368,
135
+ "eval_f1": 0.8222222222222223,
136
+ "eval_loss": 0.1545909196138382,
137
+ "eval_precision": 0.7704418486541391,
138
+ "eval_recall": 0.8814642649622313,
139
+ "eval_runtime": 4.6637,
140
+ "eval_samples_per_second": 200.485,
141
+ "eval_steps_per_second": 3.216,
142
  "step": 3696
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 0.5584707260131836,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.067,
149
  "step": 4224
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.9608466660041737,
154
+ "eval_f1": 0.8334258745141587,
155
+ "eval_loss": 0.14326535165309906,
156
+ "eval_precision": 0.797979797979798,
157
+ "eval_recall": 0.8721673445671121,
158
+ "eval_runtime": 4.6647,
159
+ "eval_samples_per_second": 200.444,
160
+ "eval_steps_per_second": 3.216,
161
  "step": 4224
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 0.5288811326026917,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.0607,
168
  "step": 4752
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.9598529265626553,
173
+ "eval_f1": 0.8312328767123287,
174
+ "eval_loss": 0.1468406319618225,
175
+ "eval_precision": 0.7864178330741317,
176
+ "eval_recall": 0.8814642649622313,
177
+ "eval_runtime": 4.6583,
178
+ "eval_samples_per_second": 200.719,
179
+ "eval_steps_per_second": 3.22,
180
  "step": 4752
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 1.041274070739746,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.0562,
187
  "step": 5280
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.9612441617807811,
192
+ "eval_f1": 0.8267029972752045,
193
+ "eval_loss": 0.14973483979701996,
194
+ "eval_precision": 0.7783478707029245,
195
+ "eval_recall": 0.8814642649622313,
196
+ "eval_runtime": 4.6532,
197
+ "eval_samples_per_second": 200.937,
198
+ "eval_steps_per_second": 3.224,
199
  "step": 5280
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 1.7796343564987183,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.0506,
206
  "step": 5808
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.9594554307860479,
211
+ "eval_f1": 0.8332413031474324,
212
+ "eval_loss": 0.1600087583065033,
213
+ "eval_precision": 0.793792740662809,
214
+ "eval_recall": 0.8768158047646717,
215
+ "eval_runtime": 4.6756,
216
+ "eval_samples_per_second": 199.975,
217
+ "eval_steps_per_second": 3.208,
218
  "step": 5808
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 1.6123548746109009,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.0483,
225
  "step": 6336
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.9608466660041737,
230
+ "eval_f1": 0.8328721638074155,
231
+ "eval_loss": 0.15964019298553467,
232
+ "eval_precision": 0.7950343370311674,
233
+ "eval_recall": 0.8744915746658919,
234
+ "eval_runtime": 4.6719,
235
+ "eval_samples_per_second": 200.134,
236
+ "eval_steps_per_second": 3.211,
237
  "step": 6336
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.9525193572044373,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.0443,
244
  "step": 6864
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.96064791811587,
249
+ "eval_f1": 0.8237547892720306,
250
+ "eval_loss": 0.15955425798892975,
251
+ "eval_precision": 0.7785825142265907,
252
+ "eval_recall": 0.8744915746658919,
253
+ "eval_runtime": 4.6803,
254
+ "eval_samples_per_second": 199.774,
255
+ "eval_steps_per_second": 3.205,
256
  "step": 6864
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 0.5356388688087463,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.0421,
263
  "step": 7392
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.9612441617807811,
268
+ "eval_f1": 0.8350857775318208,
269
+ "eval_loss": 0.16503094136714935,
270
+ "eval_precision": 0.7971473851030111,
271
+ "eval_recall": 0.8768158047646717,
272
+ "eval_runtime": 4.671,
273
+ "eval_samples_per_second": 200.172,
274
+ "eval_steps_per_second": 3.211,
275
  "step": 7392
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.6785407662391663,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.0395,
282
  "step": 7920
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.9602504223392626,
287
+ "eval_f1": 0.8284449363586054,
288
+ "eval_loss": 0.16934077441692352,
289
+ "eval_precision": 0.7908082408874801,
290
+ "eval_recall": 0.8698431144683324,
291
+ "eval_runtime": 4.6607,
292
+ "eval_samples_per_second": 200.612,
293
+ "eval_steps_per_second": 3.218,
294
  "step": 7920
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 1.843337893486023,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.0375,
301
  "step": 8448
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.9594554307860479,
306
+ "eval_f1": 0.8336088154269972,
307
+ "eval_loss": 0.17250221967697144,
308
+ "eval_precision": 0.7925615505500262,
309
  "eval_recall": 0.8791400348634515,
310
+ "eval_runtime": 4.6872,
311
+ "eval_samples_per_second": 199.478,
312
+ "eval_steps_per_second": 3.2,
313
  "step": 8448
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.6304071545600891,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.0358,
320
  "step": 8976
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.9612441617807811,
325
+ "eval_f1": 0.8321289605336298,
326
+ "eval_loss": 0.17892615497112274,
327
+ "eval_precision": 0.7975492807671817,
328
+ "eval_recall": 0.8698431144683324,
329
+ "eval_runtime": 4.6497,
330
+ "eval_samples_per_second": 201.086,
331
+ "eval_steps_per_second": 3.226,
332
  "step": 8976
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 1.1330559253692627,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.0339,
339
  "step": 9504
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.960051674450959,
344
+ "eval_f1": 0.8225895316804408,
345
+ "eval_loss": 0.17817425727844238,
346
+ "eval_precision": 0.7820848611838659,
347
+ "eval_recall": 0.8675188843695526,
348
+ "eval_runtime": 4.6927,
349
+ "eval_samples_per_second": 199.247,
350
+ "eval_steps_per_second": 3.196,
351
  "step": 9504
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.08901867270469666,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.0327,
358
  "step": 10032
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.9620391533339958,
363
+ "eval_f1": 0.833983286908078,
364
+ "eval_loss": 0.17433172464370728,
365
+ "eval_precision": 0.8009630818619583,
366
+ "eval_recall": 0.8698431144683324,
367
+ "eval_runtime": 4.6614,
368
+ "eval_samples_per_second": 200.583,
369
+ "eval_steps_per_second": 3.218,
370
  "step": 10032
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 1.3878382444381714,
375
  "learning_rate": 0.0,
376
+ "loss": 0.0327,
377
  "step": 10560
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.9608466660041737,
382
+ "eval_f1": 0.8321289605336298,
383
+ "eval_loss": 0.17778323590755463,
384
+ "eval_precision": 0.7975492807671817,
385
+ "eval_recall": 0.8698431144683324,
386
+ "eval_runtime": 4.9618,
387
+ "eval_samples_per_second": 188.439,
388
+ "eval_steps_per_second": 3.023,
389
  "step": 10560
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 10560,
394
  "total_flos": 5062583230111038.0,
395
+ "train_loss": 0.08150525255636736,
396
+ "train_runtime": 1343.1683,
397
+ "train_samples_per_second": 125.628,
398
+ "train_steps_per_second": 7.862
399
  }
400
  ],
401
  "logging_steps": 500,