apwic commited on
Commit
3f0dcfd
1 Parent(s): 556bf37

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "accuracy": 0.9090009891196835,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.9022556390977443,
5
- "eval_f1": 0.8811928811928812,
6
- "eval_loss": 0.29284632205963135,
7
- "eval_precision": 0.8842105263157894,
8
- "eval_recall": 0.878341516639389,
9
- "eval_runtime": 5.4499,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 73.212,
12
- "eval_steps_per_second": 9.174,
13
- "f1": 0.8907735522904062,
14
- "precision": 0.8900210970464135,
15
- "recall": 0.8915373175070833,
16
- "train_loss": 0.2130153269064231,
17
- "train_runtime": 2293.0977,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 31.73,
20
- "train_steps_per_second": 1.064
21
  }
 
1
  {
2
  "accuracy": 0.9090009891196835,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8872180451127819,
5
+ "eval_f1": 0.8662440310793597,
6
+ "eval_loss": 0.3288058042526245,
7
+ "eval_precision": 0.8606158357771261,
8
+ "eval_recall": 0.872704128023277,
9
+ "eval_runtime": 2.0252,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 197.016,
12
+ "eval_steps_per_second": 24.689,
13
+ "f1": 0.8918102806521253,
14
+ "precision": 0.8875983617873084,
15
+ "recall": 0.8964202678916009,
16
+ "train_loss": 0.2203434850348801,
17
+ "train_runtime": 746.002,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 97.533,
20
+ "train_steps_per_second": 3.271
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9022556390977443,
4
- "eval_f1": 0.8811928811928812,
5
- "eval_loss": 0.29284632205963135,
6
- "eval_precision": 0.8842105263157894,
7
- "eval_recall": 0.878341516639389,
8
- "eval_runtime": 5.4499,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 73.212,
11
- "eval_steps_per_second": 9.174
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8872180451127819,
4
+ "eval_f1": 0.8662440310793597,
5
+ "eval_loss": 0.3288058042526245,
6
+ "eval_precision": 0.8606158357771261,
7
+ "eval_recall": 0.872704128023277,
8
+ "eval_runtime": 2.0252,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 197.016,
11
+ "eval_steps_per_second": 24.689
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "accuracy": 0.9090009891196835,
3
- "f1": 0.8907735522904062,
4
- "precision": 0.8900210970464135,
5
- "recall": 0.8915373175070833
6
  }
 
1
  {
2
  "accuracy": 0.9090009891196835,
3
+ "f1": 0.8918102806521253,
4
+ "precision": 0.8875983617873084,
5
+ "recall": 0.8964202678916009
6
  }
predict_results.txt CHANGED
@@ -21,7 +21,7 @@ index prediction
21
  19 1
22
  20 1
23
  21 1
24
- 22 0
25
  23 1
26
  24 1
27
  25 0
@@ -40,12 +40,12 @@ index prediction
40
  38 1
41
  39 1
42
  40 1
43
- 41 0
44
  42 1
45
  43 1
46
  44 1
47
  45 0
48
- 46 0
49
  47 1
50
  48 1
51
  49 1
@@ -67,7 +67,7 @@ index prediction
67
  65 1
68
  66 1
69
  67 0
70
- 68 1
71
  69 1
72
  70 1
73
  71 1
@@ -75,7 +75,7 @@ index prediction
75
  73 1
76
  74 1
77
  75 0
78
- 76 0
79
  77 1
80
  78 1
81
  79 1
@@ -104,9 +104,9 @@ index prediction
104
  102 0
105
  103 1
106
  104 1
107
- 105 0
108
  106 1
109
- 107 1
110
  108 1
111
  109 1
112
  110 1
@@ -166,7 +166,7 @@ index prediction
166
  164 1
167
  165 1
168
  166 1
169
- 167 0
170
  168 1
171
  169 1
172
  170 0
@@ -180,18 +180,18 @@ index prediction
180
  178 1
181
  179 0
182
  180 1
183
- 181 0
184
  182 1
185
  183 1
186
  184 1
187
- 185 1
188
  186 1
189
  187 1
190
  188 1
191
  189 1
192
  190 1
193
  191 1
194
- 192 0
195
  193 1
196
  194 1
197
  195 1
@@ -228,7 +228,7 @@ index prediction
228
  226 1
229
  227 1
230
  228 1
231
- 229 0
232
  230 1
233
  231 1
234
  232 1
@@ -236,7 +236,7 @@ index prediction
236
  234 1
237
  235 0
238
  236 1
239
- 237 0
240
  238 1
241
  239 1
242
  240 1
@@ -247,21 +247,21 @@ index prediction
247
  245 1
248
  246 1
249
  247 1
250
- 248 0
251
  249 1
252
  250 1
253
  251 1
254
  252 1
255
- 253 0
256
  254 1
257
  255 1
258
  256 1
259
- 257 1
260
  258 1
261
  259 1
262
  260 1
263
  261 1
264
- 262 1
265
  263 1
266
  264 1
267
  265 1
@@ -277,13 +277,13 @@ index prediction
277
  275 0
278
  276 0
279
  277 1
280
- 278 1
281
  279 1
282
  280 1
283
  281 0
284
  282 1
285
  283 1
286
- 284 1
287
  285 1
288
  286 1
289
  287 1
@@ -322,13 +322,13 @@ index prediction
322
  320 0
323
  321 0
324
  322 0
325
- 323 1
326
  324 1
327
  325 0
328
  326 0
329
  327 0
330
  328 0
331
- 329 0
332
  330 0
333
  331 0
334
  332 0
@@ -346,7 +346,7 @@ index prediction
346
  344 0
347
  345 0
348
  346 0
349
- 347 0
350
  348 0
351
  349 0
352
  350 0
@@ -382,7 +382,7 @@ index prediction
382
  380 0
383
  381 0
384
  382 1
385
- 383 0
386
  384 0
387
  385 0
388
  386 0
@@ -470,7 +470,7 @@ index prediction
470
  468 0
471
  469 0
472
  470 0
473
- 471 0
474
  472 0
475
  473 0
476
  474 0
@@ -490,19 +490,19 @@ index prediction
490
  488 0
491
  489 0
492
  490 0
493
- 491 1
494
  492 0
495
  493 0
496
  494 0
497
  495 0
498
  496 0
499
  497 0
500
- 498 1
501
  499 0
502
  500 0
503
  501 0
504
  502 0
505
- 503 1
506
  504 0
507
  505 0
508
  506 0
@@ -587,11 +587,11 @@ index prediction
587
  585 0
588
  586 0
589
  587 0
590
- 588 0
591
  589 0
592
  590 0
593
  591 1
594
- 592 0
595
  593 0
596
  594 0
597
  595 0
@@ -642,7 +642,7 @@ index prediction
642
  640 0
643
  641 0
644
  642 0
645
- 643 0
646
  644 0
647
  645 0
648
  646 0
@@ -678,7 +678,7 @@ index prediction
678
  676 0
679
  677 0
680
  678 0
681
- 679 0
682
  680 0
683
  681 0
684
  682 0
@@ -953,7 +953,7 @@ index prediction
953
  951 0
954
  952 0
955
  953 0
956
- 954 0
957
  955 0
958
  956 0
959
  957 0
 
21
  19 1
22
  20 1
23
  21 1
24
+ 22 1
25
  23 1
26
  24 1
27
  25 0
 
40
  38 1
41
  39 1
42
  40 1
43
+ 41 1
44
  42 1
45
  43 1
46
  44 1
47
  45 0
48
+ 46 1
49
  47 1
50
  48 1
51
  49 1
 
67
  65 1
68
  66 1
69
  67 0
70
+ 68 0
71
  69 1
72
  70 1
73
  71 1
 
75
  73 1
76
  74 1
77
  75 0
78
+ 76 1
79
  77 1
80
  78 1
81
  79 1
 
104
  102 0
105
  103 1
106
  104 1
107
+ 105 1
108
  106 1
109
+ 107 0
110
  108 1
111
  109 1
112
  110 1
 
166
  164 1
167
  165 1
168
  166 1
169
+ 167 1
170
  168 1
171
  169 1
172
  170 0
 
180
  178 1
181
  179 0
182
  180 1
183
+ 181 1
184
  182 1
185
  183 1
186
  184 1
187
+ 185 0
188
  186 1
189
  187 1
190
  188 1
191
  189 1
192
  190 1
193
  191 1
194
+ 192 1
195
  193 1
196
  194 1
197
  195 1
 
228
  226 1
229
  227 1
230
  228 1
231
+ 229 1
232
  230 1
233
  231 1
234
  232 1
 
236
  234 1
237
  235 0
238
  236 1
239
+ 237 1
240
  238 1
241
  239 1
242
  240 1
 
247
  245 1
248
  246 1
249
  247 1
250
+ 248 1
251
  249 1
252
  250 1
253
  251 1
254
  252 1
255
+ 253 1
256
  254 1
257
  255 1
258
  256 1
259
+ 257 0
260
  258 1
261
  259 1
262
  260 1
263
  261 1
264
+ 262 0
265
  263 1
266
  264 1
267
  265 1
 
277
  275 0
278
  276 0
279
  277 1
280
+ 278 0
281
  279 1
282
  280 1
283
  281 0
284
  282 1
285
  283 1
286
+ 284 0
287
  285 1
288
  286 1
289
  287 1
 
322
  320 0
323
  321 0
324
  322 0
325
+ 323 0
326
  324 1
327
  325 0
328
  326 0
329
  327 0
330
  328 0
331
+ 329 1
332
  330 0
333
  331 0
334
  332 0
 
346
  344 0
347
  345 0
348
  346 0
349
+ 347 1
350
  348 0
351
  349 0
352
  350 0
 
382
  380 0
383
  381 0
384
  382 1
385
+ 383 1
386
  384 0
387
  385 0
388
  386 0
 
470
  468 0
471
  469 0
472
  470 0
473
+ 471 1
474
  472 0
475
  473 0
476
  474 0
 
490
  488 0
491
  489 0
492
  490 0
493
+ 491 0
494
  492 0
495
  493 0
496
  494 0
497
  495 0
498
  496 0
499
  497 0
500
+ 498 0
501
  499 0
502
  500 0
503
  501 0
504
  502 0
505
+ 503 0
506
  504 0
507
  505 0
508
  506 0
 
587
  585 0
588
  586 0
589
  587 0
590
+ 588 1
591
  589 0
592
  590 0
593
  591 1
594
+ 592 1
595
  593 0
596
  594 0
597
  595 0
 
642
  640 0
643
  641 0
644
  642 0
645
+ 643 1
646
  644 0
647
  645 0
648
  646 0
 
678
  676 0
679
  677 0
680
  678 0
681
+ 679 1
682
  680 0
683
  681 0
684
  682 0
 
953
  951 0
954
  952 0
955
  953 0
956
+ 954 1
957
  955 0
958
  956 0
959
  957 0
runs/Jun03_15-48-01_a358b85c7679/events.out.tfevents.1717430443.a358b85c7679.183185.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b94d9bcf029836ebd4c3bcf6a7b0e59fc0deaf499f3fd73103deebfdc1809a
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.2130153269064231,
4
- "train_runtime": 2293.0977,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 31.73,
7
- "train_steps_per_second": 1.064
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2203434850348801,
4
+ "train_runtime": 746.002,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 97.533,
7
+ "train_steps_per_second": 3.271
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.303664207458496,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5535,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7293233082706767,
21
- "eval_f1": 0.6372727272727272,
22
- "eval_loss": 0.49923330545425415,
23
- "eval_precision": 0.6645702306079665,
24
- "eval_recall": 0.6284779050736498,
25
- "eval_runtime": 5.6404,
26
- "eval_samples_per_second": 70.74,
27
- "eval_steps_per_second": 8.865,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 4.080423831939697,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.444,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8170426065162907,
40
- "eval_f1": 0.7960536910871955,
41
- "eval_loss": 0.4052737355232239,
42
- "eval_precision": 0.7846938775510204,
43
- "eval_recall": 0.8255591925804692,
44
- "eval_runtime": 5.5824,
45
- "eval_samples_per_second": 71.475,
46
- "eval_steps_per_second": 8.957,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 4.110426425933838,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3464,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8421052631578947,
59
- "eval_f1": 0.7905197629940748,
60
- "eval_loss": 0.3424628674983978,
61
- "eval_precision": 0.8345238095238094,
62
- "eval_recall": 0.7682760501909438,
63
- "eval_runtime": 5.558,
64
- "eval_samples_per_second": 71.789,
65
- "eval_steps_per_second": 8.996,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.6437464952468872,
71
  "learning_rate": 4e-05,
72
- "loss": 0.2852,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
  "eval_accuracy": 0.8721804511278195,
78
- "eval_f1": 0.8469505178365937,
79
- "eval_loss": 0.3135569393634796,
80
- "eval_precision": 0.844489247311828,
81
- "eval_recall": 0.8495635570103655,
82
- "eval_runtime": 5.5547,
83
- "eval_samples_per_second": 71.83,
84
- "eval_steps_per_second": 9.001,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.6121034622192383,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2608,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8721804511278195,
97
- "eval_f1": 0.8469505178365937,
98
- "eval_loss": 0.3060314953327179,
99
- "eval_precision": 0.844489247311828,
100
- "eval_recall": 0.8495635570103655,
101
- "eval_runtime": 5.5535,
102
- "eval_samples_per_second": 71.846,
103
- "eval_steps_per_second": 9.003,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 2.0154571533203125,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2415,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8646616541353384,
116
- "eval_f1": 0.8447157518450185,
117
- "eval_loss": 0.3100413978099823,
118
- "eval_precision": 0.8325401217487549,
119
- "eval_recall": 0.864248045099109,
120
- "eval_runtime": 5.6089,
121
- "eval_samples_per_second": 71.137,
122
- "eval_steps_per_second": 8.914,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.5025161504745483,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2329,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8847117794486216,
135
- "eval_f1": 0.8642214594306682,
136
- "eval_loss": 0.28597915172576904,
137
- "eval_precision": 0.8566755442334414,
138
- "eval_recall": 0.8734315330060011,
139
- "eval_runtime": 5.5584,
140
- "eval_samples_per_second": 71.783,
141
- "eval_steps_per_second": 8.995,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 2.4278862476348877,
147
  "learning_rate": 3e-05,
148
- "loss": 0.199,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8872180451127819,
154
- "eval_f1": 0.8622036668943447,
155
- "eval_loss": 0.2878971993923187,
156
- "eval_precision": 0.8671602787456446,
157
- "eval_recall": 0.8577014002545917,
158
- "eval_runtime": 5.5998,
159
- "eval_samples_per_second": 71.253,
160
- "eval_steps_per_second": 8.929,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 8.504670143127441,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.1939,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8897243107769424,
173
- "eval_f1": 0.8676337535436396,
174
- "eval_loss": 0.28258949518203735,
175
- "eval_precision": 0.8658613445378152,
176
- "eval_recall": 0.8694762684124386,
177
- "eval_runtime": 5.5681,
178
- "eval_samples_per_second": 71.658,
179
- "eval_steps_per_second": 8.98,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 2.462061882019043,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.1806,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8796992481203008,
192
- "eval_f1": 0.8439374185136896,
193
- "eval_loss": 0.2981988787651062,
194
- "eval_precision": 0.8794955044955045,
195
- "eval_recall": 0.822376795781051,
196
- "eval_runtime": 5.6592,
197
- "eval_samples_per_second": 70.505,
198
- "eval_steps_per_second": 8.835,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 1.0077548027038574,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1674,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8947368421052632,
211
- "eval_f1": 0.8730223677032187,
212
- "eval_loss": 0.2734816372394562,
213
- "eval_precision": 0.8730223677032187,
214
- "eval_recall": 0.8730223677032187,
215
- "eval_runtime": 5.6293,
216
- "eval_samples_per_second": 70.879,
217
- "eval_steps_per_second": 8.882,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 3.9673709869384766,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1553,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8947368421052632,
230
- "eval_f1": 0.8717238211879976,
231
- "eval_loss": 0.2753015458583832,
232
- "eval_precision": 0.8757194133300328,
233
- "eval_recall": 0.8680214584469903,
234
- "eval_runtime": 5.5504,
235
- "eval_samples_per_second": 71.887,
236
- "eval_steps_per_second": 9.008,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 3.968949794769287,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1431,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8922305764411027,
249
- "eval_f1": 0.8661961395983623,
250
- "eval_loss": 0.2937251627445221,
251
- "eval_precision": 0.8784532165625604,
252
- "eval_recall": 0.8562465902891435,
253
- "eval_runtime": 5.5697,
254
- "eval_samples_per_second": 71.638,
255
- "eval_steps_per_second": 8.977,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 8.566404342651367,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1417,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.9072681704260651,
268
- "eval_f1": 0.8910359080340997,
269
- "eval_loss": 0.29110613465309143,
270
- "eval_precision": 0.8822647601476015,
271
- "eval_recall": 0.9018912529550827,
272
- "eval_runtime": 5.5933,
273
- "eval_samples_per_second": 71.335,
274
- "eval_steps_per_second": 8.939,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.1758796125650406,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1236,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9022556390977443,
287
- "eval_f1": 0.8817957385392532,
288
- "eval_loss": 0.2955999970436096,
289
- "eval_precision": 0.8827677592299257,
290
- "eval_recall": 0.8808419712675032,
291
- "eval_runtime": 5.5574,
292
- "eval_samples_per_second": 71.797,
293
- "eval_steps_per_second": 8.997,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.7015694975852966,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1304,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.9022556390977443,
306
- "eval_f1": 0.884617951284618,
307
- "eval_loss": 0.3010990023612976,
308
- "eval_precision": 0.8772893772893773,
309
- "eval_recall": 0.8933442444080741,
310
- "eval_runtime": 5.5644,
311
- "eval_samples_per_second": 71.705,
312
- "eval_steps_per_second": 8.986,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.19915825128555298,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1164,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.899749373433584,
325
- "eval_f1": 0.879667048676036,
326
- "eval_loss": 0.29428762197494507,
327
- "eval_precision": 0.8778361344537815,
328
- "eval_recall": 0.8815693762502272,
329
- "eval_runtime": 5.5462,
330
- "eval_samples_per_second": 71.942,
331
- "eval_steps_per_second": 9.015,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 9.03445816040039,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1144,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8972431077694235,
344
- "eval_f1": 0.8775533117267087,
345
- "eval_loss": 0.2937219738960266,
346
- "eval_precision": 0.873246730188791,
347
- "eval_recall": 0.8822967812329514,
348
- "eval_runtime": 5.5516,
349
- "eval_samples_per_second": 71.872,
350
- "eval_steps_per_second": 9.006,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 5.45957612991333,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1198,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8972431077694235,
363
- "eval_f1": 0.8737897035111135,
364
- "eval_loss": 0.29848915338516235,
365
- "eval_precision": 0.8812047813777917,
366
- "eval_recall": 0.8672940534642661,
367
- "eval_runtime": 5.5952,
368
- "eval_samples_per_second": 71.31,
369
- "eval_steps_per_second": 8.936,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 3.5000033378601074,
375
  "learning_rate": 0.0,
376
- "loss": 0.1104,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.9022556390977443,
382
- "eval_f1": 0.8811928811928812,
383
- "eval_loss": 0.29284632205963135,
384
- "eval_precision": 0.8842105263157894,
385
- "eval_recall": 0.878341516639389,
386
- "eval_runtime": 5.5669,
387
- "eval_samples_per_second": 71.674,
388
- "eval_steps_per_second": 8.982,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8551203605328000.0,
395
- "train_loss": 0.2130153269064231,
396
- "train_runtime": 2293.0977,
397
- "train_samples_per_second": 31.73,
398
- "train_steps_per_second": 1.064
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 6.6789751052856445,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5528,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7167919799498746,
21
+ "eval_f1": 0.6444270944235455,
22
+ "eval_loss": 0.5050782561302185,
23
+ "eval_precision": 0.6520598138245197,
24
+ "eval_recall": 0.639616293871613,
25
+ "eval_runtime": 1.9903,
26
+ "eval_samples_per_second": 200.468,
27
+ "eval_steps_per_second": 25.121,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 4.4306488037109375,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4431,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7969924812030075,
40
+ "eval_f1": 0.7789473684210526,
41
+ "eval_loss": 0.4333195090293884,
42
+ "eval_precision": 0.7711038961038961,
43
+ "eval_recall": 0.8188761593016912,
44
+ "eval_runtime": 1.9992,
45
+ "eval_samples_per_second": 199.58,
46
+ "eval_steps_per_second": 25.01,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.0677573680877686,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3491,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8671679197994987,
59
+ "eval_f1": 0.8292627788498026,
60
+ "eval_loss": 0.32689258456230164,
61
+ "eval_precision": 0.8579176839902337,
62
+ "eval_recall": 0.8110110929259866,
63
+ "eval_runtime": 1.9988,
64
+ "eval_samples_per_second": 199.624,
65
+ "eval_steps_per_second": 25.015,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.5607471466064453,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.2886,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
  "eval_accuracy": 0.8721804511278195,
78
+ "eval_f1": 0.8430067043674827,
79
+ "eval_loss": 0.2994827926158905,
80
+ "eval_precision": 0.8498269896193772,
81
+ "eval_recall": 0.8370612838697945,
82
+ "eval_runtime": 2.0374,
83
+ "eval_samples_per_second": 195.834,
84
+ "eval_steps_per_second": 24.541,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 2.971632957458496,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2671,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8696741854636592,
97
+ "eval_f1": 0.842789598108747,
98
+ "eval_loss": 0.29669511318206787,
99
+ "eval_precision": 0.842789598108747,
100
+ "eval_recall": 0.842789598108747,
101
+ "eval_runtime": 2.0319,
102
+ "eval_samples_per_second": 196.364,
103
+ "eval_steps_per_second": 24.607,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 4.395784854888916,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2481,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8546365914786967,
116
+ "eval_f1": 0.8364661654135338,
117
+ "eval_loss": 0.33456894755363464,
118
+ "eval_precision": 0.8229932885906039,
119
+ "eval_recall": 0.8646572104018913,
120
+ "eval_runtime": 2.0674,
121
+ "eval_samples_per_second": 192.992,
122
+ "eval_steps_per_second": 24.184,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.19698984920978546,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2339,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8671679197994987,
135
+ "eval_f1": 0.8424651921601347,
136
+ "eval_loss": 0.2965049147605896,
137
+ "eval_precision": 0.8372140762463343,
138
+ "eval_recall": 0.8485179123476996,
139
+ "eval_runtime": 2.0387,
140
+ "eval_samples_per_second": 195.713,
141
+ "eval_steps_per_second": 24.525,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 1.805320143699646,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.217,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8771929824561403,
154
+ "eval_f1": 0.8483536940081443,
155
+ "eval_loss": 0.29290771484375,
156
+ "eval_precision": 0.8575792287132493,
157
+ "eval_recall": 0.8406073831605747,
158
+ "eval_runtime": 2.0519,
159
+ "eval_samples_per_second": 194.45,
160
+ "eval_steps_per_second": 24.367,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 8.52855396270752,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.1984,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8796992481203008,
173
+ "eval_f1": 0.8533986527862829,
174
+ "eval_loss": 0.2778124511241913,
175
+ "eval_precision": 0.8572003218020917,
176
+ "eval_recall": 0.8498817966903074,
177
+ "eval_runtime": 2.0595,
178
+ "eval_samples_per_second": 193.734,
179
+ "eval_steps_per_second": 24.277,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 4.634001731872559,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.1937,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8771929824561403,
192
+ "eval_f1": 0.8458135188208294,
193
+ "eval_loss": 0.29049694538116455,
194
+ "eval_precision": 0.8627946127946129,
195
+ "eval_recall": 0.8331060192762321,
196
+ "eval_runtime": 2.0594,
197
+ "eval_samples_per_second": 193.746,
198
+ "eval_steps_per_second": 24.279,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 4.496700763702393,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1734,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8872180451127819,
211
+ "eval_f1": 0.8649563392675828,
212
+ "eval_loss": 0.2918776869773865,
213
+ "eval_precision": 0.8623655913978494,
214
+ "eval_recall": 0.8677032187670486,
215
+ "eval_runtime": 2.0871,
216
+ "eval_samples_per_second": 191.171,
217
+ "eval_steps_per_second": 23.956,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 5.332368850708008,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1561,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8796992481203008,
230
+ "eval_f1": 0.8556004584112431,
231
+ "eval_loss": 0.31261545419692993,
232
+ "eval_precision": 0.8538865546218487,
233
+ "eval_recall": 0.85738316057465,
234
+ "eval_runtime": 2.0745,
235
+ "eval_samples_per_second": 192.331,
236
+ "eval_steps_per_second": 24.102,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.6808285713195801,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1579,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8822055137844611,
249
+ "eval_f1": 0.8560793854229822,
250
+ "eval_loss": 0.3164937198162079,
251
+ "eval_precision": 0.8609538327526132,
252
+ "eval_recall": 0.8516548463356974,
253
+ "eval_runtime": 2.07,
254
+ "eval_samples_per_second": 192.753,
255
+ "eval_steps_per_second": 24.155,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 11.75323486328125,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1459,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8972431077694235,
268
+ "eval_f1": 0.8787009231453675,
269
+ "eval_loss": 0.3108453154563904,
270
+ "eval_precision": 0.8714896214896215,
271
+ "eval_recall": 0.8872976904891798,
272
+ "eval_runtime": 2.0849,
273
+ "eval_samples_per_second": 191.373,
274
+ "eval_steps_per_second": 23.982,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.21581852436065674,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1399,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8922305764411027,
287
+ "eval_f1": 0.8727838950061173,
288
+ "eval_loss": 0.3192116618156433,
289
+ "eval_precision": 0.8656898656898657,
290
+ "eval_recall": 0.8812511365702855,
291
+ "eval_runtime": 2.0655,
292
+ "eval_samples_per_second": 193.177,
293
+ "eval_steps_per_second": 24.208,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 1.523335337638855,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1377,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8847117794486216,
306
+ "eval_f1": 0.8642214594306682,
307
+ "eval_loss": 0.32000479102134705,
308
+ "eval_precision": 0.8566755442334414,
309
+ "eval_recall": 0.8734315330060011,
310
+ "eval_runtime": 2.0706,
311
+ "eval_samples_per_second": 192.702,
312
+ "eval_steps_per_second": 24.148,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 9.250269889831543,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1259,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8847117794486216,
325
+ "eval_f1": 0.8622899159663866,
326
+ "eval_loss": 0.3303412199020386,
327
+ "eval_precision": 0.8589244307033712,
328
+ "eval_recall": 0.8659301691216585,
329
+ "eval_runtime": 2.084,
330
+ "eval_samples_per_second": 191.461,
331
+ "eval_steps_per_second": 23.993,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 7.081236362457275,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1293,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8922305764411027,
344
+ "eval_f1": 0.8727838950061173,
345
+ "eval_loss": 0.3265065848827362,
346
+ "eval_precision": 0.8656898656898657,
347
+ "eval_recall": 0.8812511365702855,
348
+ "eval_runtime": 2.0665,
349
+ "eval_samples_per_second": 193.079,
350
+ "eval_steps_per_second": 24.195,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 1.5093868970870972,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1187,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8847117794486216,
363
+ "eval_f1": 0.8622899159663866,
364
+ "eval_loss": 0.33049851655960083,
365
+ "eval_precision": 0.8589244307033712,
366
+ "eval_recall": 0.8659301691216585,
367
+ "eval_runtime": 2.0389,
368
+ "eval_samples_per_second": 195.69,
369
+ "eval_steps_per_second": 24.523,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.4667631983757019,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1302,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8872180451127819,
382
+ "eval_f1": 0.8662440310793597,
383
+ "eval_loss": 0.3288058042526245,
384
+ "eval_precision": 0.8606158357771261,
385
+ "eval_recall": 0.872704128023277,
386
+ "eval_runtime": 2.0647,
387
+ "eval_samples_per_second": 193.253,
388
+ "eval_steps_per_second": 24.217,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8551203605328000.0,
395
+ "train_loss": 0.2203434850348801,
396
+ "train_runtime": 746.002,
397
+ "train_samples_per_second": 97.533,
398
+ "train_steps_per_second": 3.271
399
  }
400
  ],
401
  "logging_steps": 500,