apwic commited on
Commit
9391d95
1 Parent(s): 083b214

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,25 +1,25 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.96064791811587,
4
- "eval_f1": 0.8354500276090558,
5
- "eval_loss": 0.16905945539474487,
6
- "eval_precision": 0.7958968963703315,
7
  "eval_recall": 0.8791400348634515,
8
- "eval_runtime": 4.4133,
9
  "eval_samples": 935,
10
- "eval_samples_per_second": 211.862,
11
- "eval_steps_per_second": 3.399,
12
- "predict_accuracy": 0.9909633881476885,
13
- "predict_f1": 0.9484726224783863,
14
- "predict_loss": 0.03368888050317764,
15
- "predict_precision": 0.9301379154420076,
16
- "predict_recall": 0.9675446848541862,
17
- "predict_runtime": 10.5734,
18
- "predict_samples_per_second": 221.594,
19
- "predict_steps_per_second": 3.499,
20
- "train_loss": 0.08383050831881436,
21
- "train_runtime": 1267.5015,
22
  "train_samples": 8437,
23
- "train_samples_per_second": 133.128,
24
- "train_steps_per_second": 8.331
25
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9612441617807811,
4
+ "eval_f1": 0.829950630828305,
5
+ "eval_loss": 0.16526208817958832,
6
+ "eval_precision": 0.785974025974026,
7
  "eval_recall": 0.8791400348634515,
8
+ "eval_runtime": 4.5952,
9
  "eval_samples": 935,
10
+ "eval_samples_per_second": 203.475,
11
+ "eval_steps_per_second": 3.264,
12
+ "predict_accuracy": 0.9910021718895439,
13
+ "predict_f1": 0.9460332103321033,
14
+ "predict_loss": 0.03333849087357521,
15
+ "predict_precision": 0.928054298642534,
16
+ "predict_recall": 0.964722483537159,
17
+ "predict_runtime": 10.6661,
18
+ "predict_samples_per_second": 219.668,
19
+ "predict_steps_per_second": 3.469,
20
+ "train_loss": 0.0814560384461374,
21
+ "train_runtime": 1318.3663,
22
  "train_samples": 8437,
23
+ "train_samples_per_second": 127.992,
24
+ "train_steps_per_second": 8.01
25
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.96064791811587,
4
- "eval_f1": 0.8354500276090558,
5
- "eval_loss": 0.16905945539474487,
6
- "eval_precision": 0.7958968963703315,
7
  "eval_recall": 0.8791400348634515,
8
- "eval_runtime": 4.4133,
9
  "eval_samples": 935,
10
- "eval_samples_per_second": 211.862,
11
- "eval_steps_per_second": 3.399
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9612441617807811,
4
+ "eval_f1": 0.829950630828305,
5
+ "eval_loss": 0.16526208817958832,
6
+ "eval_precision": 0.785974025974026,
7
  "eval_recall": 0.8791400348634515,
8
+ "eval_runtime": 4.5952,
9
  "eval_samples": 935,
10
+ "eval_samples_per_second": 203.475,
11
+ "eval_steps_per_second": 3.264
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9909633881476885,
3
- "predict_f1": 0.9484726224783863,
4
- "predict_loss": 0.03368888050317764,
5
- "predict_precision": 0.9301379154420076,
6
- "predict_recall": 0.9675446848541862,
7
- "predict_runtime": 10.5734,
8
- "predict_samples_per_second": 221.594,
9
- "predict_steps_per_second": 3.499
10
  }
 
1
  {
2
+ "predict_accuracy": 0.9910021718895439,
3
+ "predict_f1": 0.9460332103321033,
4
+ "predict_loss": 0.03333849087357521,
5
+ "predict_precision": 0.928054298642534,
6
+ "predict_recall": 0.964722483537159,
7
+ "predict_runtime": 10.6661,
8
+ "predict_samples_per_second": 219.668,
9
+ "predict_steps_per_second": 3.469
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
runs/May25_08-45-45_indolem-petl-vm/events.out.tfevents.1716628086.indolem-petl-vm.2046665.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace42df963bbd1d29a68716e0990e32b2a9b319924fb6c779bc0f7e3b50394c2
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.08383050831881436,
4
- "train_runtime": 1267.5015,
5
  "train_samples": 8437,
6
- "train_samples_per_second": 133.128,
7
- "train_steps_per_second": 8.331
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.0814560384461374,
4
+ "train_runtime": 1318.3663,
5
  "train_samples": 8437,
6
+ "train_samples_per_second": 127.992,
7
+ "train_steps_per_second": 8.01
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 1.0725470781326294,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.4425,
16
  "step": 528
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.9451455828281824,
21
- "eval_f1": 0.7537506466632177,
22
- "eval_loss": 0.163856640458107,
23
- "eval_precision": 0.6792540792540792,
24
- "eval_recall": 0.8466008134805346,
25
- "eval_runtime": 4.0498,
26
- "eval_samples_per_second": 230.874,
27
- "eval_steps_per_second": 3.704,
28
  "step": 528
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 0.5166040658950806,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.1477,
35
  "step": 1056
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.9591076219815164,
40
- "eval_f1": 0.81545528235612,
41
- "eval_loss": 0.12932351231575012,
42
- "eval_precision": 0.7621212121212121,
43
- "eval_recall": 0.8768158047646717,
44
- "eval_runtime": 4.3607,
45
- "eval_samples_per_second": 214.417,
46
- "eval_steps_per_second": 3.44,
47
  "step": 1056
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 1.065200686454773,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.1181,
54
  "step": 1584
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.9586604392328332,
59
- "eval_f1": 0.8241286863270778,
60
- "eval_loss": 0.13230960071086884,
61
- "eval_precision": 0.7650572424091587,
62
- "eval_recall": 0.8930854154561302,
63
- "eval_runtime": 4.4799,
64
- "eval_samples_per_second": 208.711,
65
- "eval_steps_per_second": 3.348,
66
  "step": 1584
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.7599948644638062,
71
  "learning_rate": 4e-05,
72
- "loss": 0.1024,
73
  "step": 2112
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.9584616913445294,
78
- "eval_f1": 0.8290968090859925,
79
- "eval_loss": 0.13213977217674255,
80
- "eval_precision": 0.7754172989377845,
81
- "eval_recall": 0.8907611853573504,
82
- "eval_runtime": 4.7508,
83
- "eval_samples_per_second": 196.81,
84
- "eval_steps_per_second": 3.157,
85
  "step": 2112
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 1.1072996854782104,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.0897,
92
  "step": 2640
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.963231640663818,
97
- "eval_f1": 0.8354500276090558,
98
- "eval_loss": 0.1244335025548935,
99
- "eval_precision": 0.7958968963703315,
100
- "eval_recall": 0.8791400348634515,
101
- "eval_runtime": 4.5426,
102
- "eval_samples_per_second": 205.827,
103
- "eval_steps_per_second": 3.302,
104
  "step": 2640
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.0734171867370605,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.0819,
111
  "step": 3168
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.9630328927755143,
116
- "eval_f1": 0.8455598455598455,
117
- "eval_loss": 0.13085626065731049,
118
- "eval_precision": 0.8047244094488188,
119
- "eval_recall": 0.8907611853573504,
120
- "eval_runtime": 4.5169,
121
- "eval_samples_per_second": 207.0,
122
- "eval_steps_per_second": 3.321,
123
  "step": 3168
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.2100149393081665,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.0745,
130
  "step": 3696
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.9610454138924774,
135
- "eval_f1": 0.824793388429752,
136
- "eval_loss": 0.13755032420158386,
137
- "eval_precision": 0.7841801990570979,
138
- "eval_recall": 0.8698431144683324,
139
- "eval_runtime": 4.549,
140
- "eval_samples_per_second": 205.542,
141
- "eval_steps_per_second": 3.297,
142
  "step": 3696
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 0.23592530190944672,
147
  "learning_rate": 3e-05,
148
- "loss": 0.0675,
149
  "step": 4224
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.9576666997913147,
154
- "eval_f1": 0.8197706171490988,
155
- "eval_loss": 0.1482686847448349,
156
- "eval_precision": 0.7733127253992788,
157
- "eval_recall": 0.8721673445671121,
158
- "eval_runtime": 4.5189,
159
- "eval_samples_per_second": 206.909,
160
- "eval_steps_per_second": 3.319,
161
  "step": 4224
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 0.7292873859405518,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.0628,
168
  "step": 4752
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.9620391533339958,
173
- "eval_f1": 0.831951354339414,
174
- "eval_loss": 0.1377291977405548,
175
- "eval_precision": 0.7933579335793358,
176
- "eval_recall": 0.8744915746658919,
177
- "eval_runtime": 4.5138,
178
- "eval_samples_per_second": 207.145,
179
- "eval_steps_per_second": 3.323,
180
  "step": 4752
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 1.6225199699401855,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.0579,
187
  "step": 5280
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.9608466660041737,
192
- "eval_f1": 0.837094111172262,
193
- "eval_loss": 0.14934156835079193,
194
- "eval_precision": 0.7950862519602718,
195
- "eval_recall": 0.8837884950610111,
196
- "eval_runtime": 4.4996,
197
- "eval_samples_per_second": 207.794,
198
- "eval_steps_per_second": 3.334,
199
  "step": 5280
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 1.5616343021392822,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.0547,
206
  "step": 5808
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.9622379012222995,
211
- "eval_f1": 0.841845140032949,
212
- "eval_loss": 0.144600972533226,
213
- "eval_precision": 0.7980218636127017,
214
- "eval_recall": 0.8907611853573504,
215
- "eval_runtime": 4.5205,
216
- "eval_samples_per_second": 206.834,
217
- "eval_steps_per_second": 3.318,
218
  "step": 5808
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 2.8776261806488037,
223
  "learning_rate": 2e-05,
224
- "loss": 0.0505,
225
  "step": 6336
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.9610454138924774,
230
- "eval_f1": 0.8361737218251787,
231
- "eval_loss": 0.15445734560489655,
232
- "eval_precision": 0.7934272300469484,
233
- "eval_recall": 0.8837884950610111,
234
- "eval_runtime": 4.5955,
235
- "eval_samples_per_second": 203.459,
236
- "eval_steps_per_second": 3.264,
237
  "step": 6336
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.46279987692832947,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.0487,
244
  "step": 6864
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.9608466660041737,
249
- "eval_f1": 0.8358126721763085,
250
- "eval_loss": 0.15944679081439972,
251
- "eval_precision": 0.7946568884232582,
252
- "eval_recall": 0.8814642649622313,
253
- "eval_runtime": 4.594,
254
- "eval_samples_per_second": 203.525,
255
- "eval_steps_per_second": 3.265,
256
  "step": 6864
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 0.2790946066379547,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.0454,
263
  "step": 7392
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.9612441617807811,
268
- "eval_f1": 0.8416062465142219,
269
- "eval_loss": 0.15950244665145874,
270
- "eval_precision": 0.8091152815013405,
271
- "eval_recall": 0.8768158047646717,
272
- "eval_runtime": 4.496,
273
- "eval_samples_per_second": 207.964,
274
- "eval_steps_per_second": 3.336,
275
  "step": 7392
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.2504570782184601,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.0429,
282
  "step": 7920
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9608466660041737,
287
- "eval_f1": 0.8365758754863813,
288
- "eval_loss": 0.16204151511192322,
289
- "eval_precision": 0.8018114011720832,
290
- "eval_recall": 0.8744915746658919,
291
- "eval_runtime": 4.51,
292
- "eval_samples_per_second": 207.319,
293
- "eval_steps_per_second": 3.326,
294
  "step": 7920
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 1.7614328861236572,
299
  "learning_rate": 1e-05,
300
- "loss": 0.0407,
301
  "step": 8448
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.963231640663818,
306
- "eval_f1": 0.8454697053918844,
307
- "eval_loss": 0.15911279618740082,
308
- "eval_precision": 0.810335641981886,
309
- "eval_recall": 0.8837884950610111,
310
- "eval_runtime": 4.6043,
311
- "eval_samples_per_second": 203.07,
312
- "eval_steps_per_second": 3.258,
313
  "step": 8448
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.08284368366003036,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.0397,
320
  "step": 8976
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.96064791811587,
325
- "eval_f1": 0.83601108033241,
326
- "eval_loss": 0.1620124876499176,
327
- "eval_precision": 0.7988353626257279,
328
- "eval_recall": 0.8768158047646717,
329
- "eval_runtime": 4.5145,
330
- "eval_samples_per_second": 207.113,
331
- "eval_steps_per_second": 3.323,
332
  "step": 8976
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 1.4568191766738892,
337
  "learning_rate": 5e-06,
338
- "loss": 0.0375,
339
  "step": 9504
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.9592566828977442,
344
- "eval_f1": 0.8303229337712096,
345
- "eval_loss": 0.16869747638702393,
346
- "eval_precision": 0.784790481117434,
347
- "eval_recall": 0.8814642649622313,
348
- "eval_runtime": 4.5316,
349
- "eval_samples_per_second": 206.33,
350
- "eval_steps_per_second": 3.31,
351
  "step": 9504
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 1.1597135066986084,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.0361,
358
  "step": 10032
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.9614429096690847,
363
- "eval_f1": 0.8378678511937813,
364
- "eval_loss": 0.1673024296760559,
365
- "eval_precision": 0.8022328548644339,
366
- "eval_recall": 0.8768158047646717,
367
- "eval_runtime": 4.6304,
368
- "eval_samples_per_second": 201.924,
369
- "eval_steps_per_second": 3.239,
370
  "step": 10032
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.9693964719772339,
375
  "learning_rate": 0.0,
376
- "loss": 0.0356,
377
  "step": 10560
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.96064791811587,
382
- "eval_f1": 0.8354500276090558,
383
- "eval_loss": 0.16905945539474487,
384
- "eval_precision": 0.7958968963703315,
385
  "eval_recall": 0.8791400348634515,
386
- "eval_runtime": 4.6757,
387
- "eval_samples_per_second": 199.97,
388
- "eval_steps_per_second": 3.208,
389
  "step": 10560
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 10560,
394
  "total_flos": 5062583230111038.0,
395
- "train_loss": 0.08383050831881436,
396
- "train_runtime": 1267.5015,
397
- "train_samples_per_second": 133.128,
398
- "train_steps_per_second": 8.331
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 0.8100232481956482,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.4348,
16
  "step": 528
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.950561462784458,
21
+ "eval_f1": 0.7816627816627817,
22
+ "eval_loss": 0.15243592858314514,
23
+ "eval_precision": 0.7051401869158879,
24
+ "eval_recall": 0.8768158047646717,
25
+ "eval_runtime": 4.5709,
26
+ "eval_samples_per_second": 204.556,
27
+ "eval_steps_per_second": 3.282,
28
  "step": 528
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 0.6027082800865173,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.1432,
35
  "step": 1056
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.957467951903011,
40
+ "eval_f1": 0.7962466487935658,
41
+ "eval_loss": 0.1272890865802765,
42
+ "eval_precision": 0.7391737182677949,
43
+ "eval_recall": 0.862870424171993,
44
+ "eval_runtime": 4.7255,
45
+ "eval_samples_per_second": 197.861,
46
+ "eval_steps_per_second": 3.174,
47
  "step": 1056
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 1.0035598278045654,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.1149,
54
  "step": 1584
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.9554804730199742,
59
+ "eval_f1": 0.8087049973132725,
60
+ "eval_loss": 0.13823477923870087,
61
+ "eval_precision": 0.7521239380309845,
62
+ "eval_recall": 0.8744915746658919,
63
+ "eval_runtime": 4.5819,
64
+ "eval_samples_per_second": 204.062,
65
+ "eval_steps_per_second": 3.274,
66
  "step": 1584
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 1.8790736198425293,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.0978,
73
  "step": 2112
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.9576666997913147,
78
+ "eval_f1": 0.8132894014277869,
79
+ "eval_loss": 0.13224001228809357,
80
+ "eval_precision": 0.7709526288391463,
81
+ "eval_recall": 0.8605461940732132,
82
+ "eval_runtime": 4.596,
83
+ "eval_samples_per_second": 203.437,
84
+ "eval_steps_per_second": 3.264,
85
  "step": 2112
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 0.9501886367797852,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.0873,
92
  "step": 2640
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.9578654476796185,
97
+ "eval_f1": 0.8121964382083108,
98
+ "eval_loss": 0.13743577897548676,
99
+ "eval_precision": 0.7581863979848866,
100
+ "eval_recall": 0.8744915746658919,
101
+ "eval_runtime": 4.6679,
102
+ "eval_samples_per_second": 200.305,
103
+ "eval_steps_per_second": 3.213,
104
  "step": 2640
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 1.0721073150634766,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.0781,
111
  "step": 3168
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.963231640663818,
116
+ "eval_f1": 0.8332400671516508,
117
+ "eval_loss": 0.13460490107536316,
118
+ "eval_precision": 0.8035617916891528,
119
+ "eval_recall": 0.8651946542707728,
120
+ "eval_runtime": 4.5754,
121
+ "eval_samples_per_second": 204.353,
122
+ "eval_steps_per_second": 3.278,
123
  "step": 3168
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.6200407147407532,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.0709,
130
  "step": 3696
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.96064791811587,
135
+ "eval_f1": 0.829950630828305,
136
+ "eval_loss": 0.14596135914325714,
137
+ "eval_precision": 0.785974025974026,
138
+ "eval_recall": 0.8791400348634515,
139
+ "eval_runtime": 4.6372,
140
+ "eval_samples_per_second": 201.629,
141
+ "eval_steps_per_second": 3.235,
142
  "step": 3696
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 0.3982696235179901,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.0656,
149
  "step": 4224
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.9578654476796185,
154
+ "eval_f1": 0.8218040951853902,
155
+ "eval_loss": 0.14514297246932983,
156
+ "eval_precision": 0.7844690966719493,
157
+ "eval_recall": 0.862870424171993,
158
+ "eval_runtime": 4.6002,
159
+ "eval_samples_per_second": 203.254,
160
+ "eval_steps_per_second": 3.261,
161
  "step": 4224
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 1.1954327821731567,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.0606,
168
  "step": 4752
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.9580641955679221,
173
+ "eval_f1": 0.8185816382627817,
174
+ "eval_loss": 0.14650680124759674,
175
+ "eval_precision": 0.7767344809598331,
176
+ "eval_recall": 0.8651946542707728,
177
+ "eval_runtime": 4.5627,
178
+ "eval_samples_per_second": 204.924,
179
+ "eval_steps_per_second": 3.288,
180
  "step": 4752
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 1.46392822265625,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.0563,
187
  "step": 5280
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.9590579350094405,
192
+ "eval_f1": 0.8308621636463481,
193
+ "eval_loss": 0.15200696885585785,
194
+ "eval_precision": 0.7876106194690266,
195
+ "eval_recall": 0.8791400348634515,
196
+ "eval_runtime": 4.5886,
197
+ "eval_samples_per_second": 203.765,
198
+ "eval_steps_per_second": 3.269,
199
  "step": 5280
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 1.8282573223114014,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.0523,
206
  "step": 5808
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.9566729603497963,
211
+ "eval_f1": 0.8245231607629426,
212
+ "eval_loss": 0.15866349637508392,
213
+ "eval_precision": 0.7762955361723961,
214
+ "eval_recall": 0.8791400348634515,
215
+ "eval_runtime": 4.6167,
216
+ "eval_samples_per_second": 202.525,
217
+ "eval_steps_per_second": 3.249,
218
  "step": 5808
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 0.6378235816955566,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.0504,
225
  "step": 6336
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.9592566828977442,
230
+ "eval_f1": 0.8310626702997276,
231
+ "eval_loss": 0.15401104092597961,
232
+ "eval_precision": 0.7824525397639815,
233
+ "eval_recall": 0.8861127251597908,
234
+ "eval_runtime": 5.0455,
235
+ "eval_samples_per_second": 185.315,
236
+ "eval_steps_per_second": 2.973,
237
  "step": 6336
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.47105222940444946,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.0465,
244
  "step": 6864
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.9608466660041737,
249
+ "eval_f1": 0.831404958677686,
250
+ "eval_loss": 0.15168695151805878,
251
+ "eval_precision": 0.7904662126767942,
252
+ "eval_recall": 0.8768158047646717,
253
+ "eval_runtime": 4.5874,
254
+ "eval_samples_per_second": 203.819,
255
+ "eval_steps_per_second": 3.27,
256
  "step": 6864
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 0.6378626227378845,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.0435,
263
  "step": 7392
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.9622379012222995,
268
+ "eval_f1": 0.8302828618968386,
269
+ "eval_loss": 0.1564669907093048,
270
+ "eval_precision": 0.7941644562334218,
271
+ "eval_recall": 0.8698431144683324,
272
+ "eval_runtime": 4.6735,
273
+ "eval_samples_per_second": 200.064,
274
+ "eval_steps_per_second": 3.21,
275
  "step": 7392
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.05927232280373573,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.0411,
282
  "step": 7920
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.9624366491106032,
287
+ "eval_f1": 0.8267629094947252,
288
+ "eval_loss": 0.15700677037239075,
289
+ "eval_precision": 0.7916002126528442,
290
+ "eval_recall": 0.8651946542707728,
291
+ "eval_runtime": 4.585,
292
+ "eval_samples_per_second": 203.925,
293
+ "eval_steps_per_second": 3.272,
294
  "step": 7920
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 2.510366916656494,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.04,
301
  "step": 8448
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.9618404054456922,
306
+ "eval_f1": 0.8308621636463481,
307
+ "eval_loss": 0.16134144365787506,
308
+ "eval_precision": 0.7876106194690266,
309
+ "eval_recall": 0.8791400348634515,
310
+ "eval_runtime": 4.598,
311
+ "eval_samples_per_second": 203.348,
312
+ "eval_steps_per_second": 3.262,
313
  "step": 8448
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.10409926623106003,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.0385,
320
  "step": 8976
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.9594554307860479,
325
+ "eval_f1": 0.8274552360282147,
326
+ "eval_loss": 0.1708444207906723,
327
+ "eval_precision": 0.7760814249363868,
328
+ "eval_recall": 0.8861127251597908,
329
+ "eval_runtime": 4.6748,
330
+ "eval_samples_per_second": 200.008,
331
+ "eval_steps_per_second": 3.209,
332
  "step": 8976
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 1.0784730911254883,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.037,
339
  "step": 9504
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.9620391533339958,
344
+ "eval_f1": 0.831578947368421,
345
+ "eval_loss": 0.1626226305961609,
346
+ "eval_precision": 0.7946003176283748,
347
+ "eval_recall": 0.8721673445671121,
348
+ "eval_runtime": 5.2776,
349
+ "eval_samples_per_second": 177.163,
350
+ "eval_steps_per_second": 2.842,
351
  "step": 9504
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.881395697593689,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.035,
358
  "step": 10032
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.9622379012222995,
363
+ "eval_f1": 0.8297401879491432,
364
+ "eval_loss": 0.1642664223909378,
365
+ "eval_precision": 0.7912493410648392,
366
+ "eval_recall": 0.8721673445671121,
367
+ "eval_runtime": 4.5963,
368
+ "eval_samples_per_second": 203.426,
369
+ "eval_steps_per_second": 3.264,
370
  "step": 10032
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.892469048500061,
375
  "learning_rate": 0.0,
376
+ "loss": 0.0352,
377
  "step": 10560
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.9612441617807811,
382
+ "eval_f1": 0.829950630828305,
383
+ "eval_loss": 0.16526208817958832,
384
+ "eval_precision": 0.785974025974026,
385
  "eval_recall": 0.8791400348634515,
386
+ "eval_runtime": 4.5732,
387
+ "eval_samples_per_second": 204.45,
388
+ "eval_steps_per_second": 3.28,
389
  "step": 10560
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 10560,
394
  "total_flos": 5062583230111038.0,
395
+ "train_loss": 0.0814560384461374,
396
+ "train_runtime": 1318.3663,
397
+ "train_samples_per_second": 127.992,
398
+ "train_steps_per_second": 8.01
399
  }
400
  ],
401
  "logging_steps": 500,