apwic commited on
Commit
469d980
1 Parent(s): 9164175

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9060336300692384,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8822055137844611,
5
- "eval_f1": 0.858259325044405,
6
- "eval_loss": 0.29368409514427185,
7
- "eval_precision": 0.8573798178418481,
8
- "eval_recall": 0.8591562102200401,
9
- "eval_runtime": 5.0799,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 78.545,
12
- "eval_steps_per_second": 9.843,
13
- "f1": 0.8877529196862761,
14
- "precision": 0.8852319695351321,
15
- "recall": 0.8904101207677175,
16
- "train_loss": 0.23575165701694176,
17
- "train_runtime": 1927.8211,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 37.742,
20
- "train_steps_per_second": 1.266
21
  }
 
1
  {
2
+ "accuracy": 0.9050445103857567,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8872180451127819,
5
+ "eval_f1": 0.8649563392675828,
6
+ "eval_loss": 0.32958927750587463,
7
+ "eval_precision": 0.8623655913978494,
8
+ "eval_recall": 0.8677032187670486,
9
+ "eval_runtime": 1.8306,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 217.959,
12
+ "eval_steps_per_second": 27.313,
13
+ "f1": 0.887526074259491,
14
+ "precision": 0.8821336208866023,
15
+ "recall": 0.8936152188032418,
16
+ "train_loss": 0.24024685015443895,
17
+ "train_runtime": 634.5102,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 114.671,
20
+ "train_steps_per_second": 3.845
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8822055137844611,
4
- "eval_f1": 0.858259325044405,
5
- "eval_loss": 0.29368409514427185,
6
- "eval_precision": 0.8573798178418481,
7
- "eval_recall": 0.8591562102200401,
8
- "eval_runtime": 5.0799,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 78.545,
11
- "eval_steps_per_second": 9.843
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8872180451127819,
4
+ "eval_f1": 0.8649563392675828,
5
+ "eval_loss": 0.32958927750587463,
6
+ "eval_precision": 0.8623655913978494,
7
+ "eval_recall": 0.8677032187670486,
8
+ "eval_runtime": 1.8306,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 217.959,
11
+ "eval_steps_per_second": 27.313
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9060336300692384,
3
- "f1": 0.8877529196862761,
4
- "precision": 0.8852319695351321,
5
- "recall": 0.8904101207677175
6
  }
 
1
  {
2
+ "accuracy": 0.9050445103857567,
3
+ "f1": 0.887526074259491,
4
+ "precision": 0.8821336208866023,
5
+ "recall": 0.8936152188032418
6
  }
predict_results.txt CHANGED
@@ -17,14 +17,14 @@ index prediction
17
  15 1
18
  16 1
19
  17 1
20
- 18 1
21
  19 1
22
  20 1
23
  21 1
24
- 22 1
25
  23 1
26
  24 1
27
- 25 0
28
  26 0
29
  27 1
30
  28 1
@@ -35,12 +35,12 @@ index prediction
35
  33 1
36
  34 1
37
  35 1
38
- 36 0
39
  37 1
40
  38 1
41
  39 1
42
  40 1
43
- 41 1
44
  42 1
45
  43 1
46
  44 1
@@ -56,14 +56,14 @@ index prediction
56
  54 1
57
  55 1
58
  56 1
59
- 57 0
60
  58 1
61
  59 1
62
  60 0
63
  61 1
64
  62 1
65
  63 1
66
- 64 1
67
  65 1
68
  66 1
69
  67 0
@@ -75,13 +75,13 @@ index prediction
75
  73 1
76
  74 1
77
  75 0
78
- 76 0
79
  77 1
80
  78 1
81
  79 1
82
  80 1
83
  81 1
84
- 82 0
85
  83 0
86
  84 1
87
  85 1
@@ -89,7 +89,7 @@ index prediction
89
  87 1
90
  88 1
91
  89 1
92
- 90 1
93
  91 1
94
  92 1
95
  93 1
@@ -101,12 +101,12 @@ index prediction
101
  99 1
102
  100 1
103
  101 1
104
- 102 0
105
  103 1
106
  104 1
107
  105 1
108
  106 1
109
- 107 1
110
  108 1
111
  109 1
112
  110 1
@@ -119,8 +119,8 @@ index prediction
119
  117 1
120
  118 1
121
  119 1
122
- 120 1
123
- 121 0
124
  122 1
125
  123 1
126
  124 0
@@ -180,7 +180,7 @@ index prediction
180
  178 1
181
  179 0
182
  180 1
183
- 181 1
184
  182 1
185
  183 1
186
  184 1
@@ -220,7 +220,7 @@ index prediction
220
  218 1
221
  219 1
222
  220 1
223
- 221 0
224
  222 1
225
  223 1
226
  224 0
@@ -234,11 +234,11 @@ index prediction
234
  232 1
235
  233 0
236
  234 1
237
- 235 0
238
  236 1
239
  237 0
240
  238 1
241
- 239 0
242
  240 1
243
  241 1
244
  242 1
@@ -252,11 +252,11 @@ index prediction
252
  250 1
253
  251 1
254
  252 1
255
- 253 1
256
- 254 0
257
  255 1
258
  256 1
259
- 257 0
260
  258 1
261
  259 1
262
  260 1
@@ -273,7 +273,7 @@ index prediction
273
  271 1
274
  272 0
275
  273 1
276
- 274 0
277
  275 1
278
  276 0
279
  277 1
@@ -322,7 +322,7 @@ index prediction
322
  320 0
323
  321 0
324
  322 0
325
- 323 0
326
  324 1
327
  325 0
328
  326 0
@@ -367,7 +367,7 @@ index prediction
367
  365 0
368
  366 0
369
  367 0
370
- 368 0
371
  369 0
372
  370 0
373
  371 0
@@ -470,7 +470,7 @@ index prediction
470
  468 0
471
  469 0
472
  470 0
473
- 471 1
474
  472 0
475
  473 0
476
  474 0
@@ -479,7 +479,7 @@ index prediction
479
  477 0
480
  478 0
481
  479 0
482
- 480 0
483
  481 0
484
  482 0
485
  483 0
@@ -490,7 +490,7 @@ index prediction
490
  488 0
491
  489 0
492
  490 0
493
- 491 0
494
  492 0
495
  493 0
496
  494 0
@@ -535,7 +535,7 @@ index prediction
535
  533 0
536
  534 0
537
  535 0
538
- 536 1
539
  537 0
540
  538 0
541
  539 0
@@ -546,7 +546,7 @@ index prediction
546
  544 0
547
  545 0
548
  546 0
549
- 547 1
550
  548 0
551
  549 0
552
  550 0
@@ -587,7 +587,7 @@ index prediction
587
  585 0
588
  586 0
589
  587 0
590
- 588 0
591
  589 0
592
  590 0
593
  591 1
@@ -736,7 +736,7 @@ index prediction
736
  734 1
737
  735 0
738
  736 0
739
- 737 0
740
  738 0
741
  739 0
742
  740 0
@@ -783,7 +783,7 @@ index prediction
783
  781 0
784
  782 0
785
  783 0
786
- 784 1
787
  785 0
788
  786 1
789
  787 0
@@ -849,12 +849,12 @@ index prediction
849
  847 0
850
  848 0
851
  849 0
852
- 850 0
853
  851 1
854
  852 0
855
  853 0
856
  854 1
857
- 855 0
858
  856 0
859
  857 0
860
  858 0
@@ -981,7 +981,7 @@ index prediction
981
  979 0
982
  980 0
983
  981 0
984
- 982 0
985
  983 0
986
  984 0
987
  985 0
 
17
  15 1
18
  16 1
19
  17 1
20
+ 18 0
21
  19 1
22
  20 1
23
  21 1
24
+ 22 0
25
  23 1
26
  24 1
27
+ 25 1
28
  26 0
29
  27 1
30
  28 1
 
35
  33 1
36
  34 1
37
  35 1
38
+ 36 1
39
  37 1
40
  38 1
41
  39 1
42
  40 1
43
+ 41 0
44
  42 1
45
  43 1
46
  44 1
 
56
  54 1
57
  55 1
58
  56 1
59
+ 57 1
60
  58 1
61
  59 1
62
  60 0
63
  61 1
64
  62 1
65
  63 1
66
+ 64 0
67
  65 1
68
  66 1
69
  67 0
 
75
  73 1
76
  74 1
77
  75 0
78
+ 76 1
79
  77 1
80
  78 1
81
  79 1
82
  80 1
83
  81 1
84
+ 82 1
85
  83 0
86
  84 1
87
  85 1
 
89
  87 1
90
  88 1
91
  89 1
92
+ 90 0
93
  91 1
94
  92 1
95
  93 1
 
101
  99 1
102
  100 1
103
  101 1
104
+ 102 1
105
  103 1
106
  104 1
107
  105 1
108
  106 1
109
+ 107 0
110
  108 1
111
  109 1
112
  110 1
 
119
  117 1
120
  118 1
121
  119 1
122
+ 120 0
123
+ 121 1
124
  122 1
125
  123 1
126
  124 0
 
180
  178 1
181
  179 0
182
  180 1
183
+ 181 0
184
  182 1
185
  183 1
186
  184 1
 
220
  218 1
221
  219 1
222
  220 1
223
+ 221 1
224
  222 1
225
  223 1
226
  224 0
 
234
  232 1
235
  233 0
236
  234 1
237
+ 235 1
238
  236 1
239
  237 0
240
  238 1
241
+ 239 1
242
  240 1
243
  241 1
244
  242 1
 
252
  250 1
253
  251 1
254
  252 1
255
+ 253 0
256
+ 254 1
257
  255 1
258
  256 1
259
+ 257 1
260
  258 1
261
  259 1
262
  260 1
 
273
  271 1
274
  272 0
275
  273 1
276
+ 274 1
277
  275 1
278
  276 0
279
  277 1
 
322
  320 0
323
  321 0
324
  322 0
325
+ 323 1
326
  324 1
327
  325 0
328
  326 0
 
367
  365 0
368
  366 0
369
  367 0
370
+ 368 1
371
  369 0
372
  370 0
373
  371 0
 
470
  468 0
471
  469 0
472
  470 0
473
+ 471 0
474
  472 0
475
  473 0
476
  474 0
 
479
  477 0
480
  478 0
481
  479 0
482
+ 480 1
483
  481 0
484
  482 0
485
  483 0
 
490
  488 0
491
  489 0
492
  490 0
493
+ 491 1
494
  492 0
495
  493 0
496
  494 0
 
535
  533 0
536
  534 0
537
  535 0
538
+ 536 0
539
  537 0
540
  538 0
541
  539 0
 
546
  544 0
547
  545 0
548
  546 0
549
+ 547 0
550
  548 0
551
  549 0
552
  550 0
 
587
  585 0
588
  586 0
589
  587 0
590
+ 588 1
591
  589 0
592
  590 0
593
  591 1
 
736
  734 1
737
  735 0
738
  736 0
739
+ 737 1
740
  738 0
741
  739 0
742
  740 0
 
783
  781 0
784
  782 0
785
  783 0
786
+ 784 0
787
  785 0
788
  786 1
789
  787 0
 
849
  847 0
850
  848 0
851
  849 0
852
+ 850 1
853
  851 1
854
  852 0
855
  853 0
856
  854 1
857
+ 855 1
858
  856 0
859
  857 0
860
  858 0
 
981
  979 0
982
  980 0
983
  981 0
984
+ 982 1
985
  983 0
986
  984 0
987
  985 0
runs/Jun03_13-33-12_a358b85c7679/events.out.tfevents.1717422242.a358b85c7679.111292.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d26b13dd873ce32c43089413cbbbfb0aab64e95fb28a8008500efeef50415f
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.23575165701694176,
4
- "train_runtime": 1927.8211,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 37.742,
7
- "train_steps_per_second": 1.266
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.24024685015443895,
4
+ "train_runtime": 634.5102,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 114.671,
7
+ "train_steps_per_second": 3.845
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.66318416595459,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5467,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7443609022556391,
21
- "eval_f1": 0.6574242424242425,
22
- "eval_loss": 0.499358206987381,
23
- "eval_precision": 0.6878057302585605,
24
- "eval_recall": 0.6466175668303328,
25
- "eval_runtime": 5.0995,
26
- "eval_samples_per_second": 78.243,
27
- "eval_steps_per_second": 9.805,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.530482530593872,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4498,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7969924812030075,
40
- "eval_f1": 0.7646051989482654,
41
- "eval_loss": 0.4013344645500183,
42
- "eval_precision": 0.7567868206139117,
43
- "eval_recall": 0.7763684306237497,
44
- "eval_runtime": 5.1487,
45
- "eval_samples_per_second": 77.495,
46
- "eval_steps_per_second": 9.711,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 2.9630391597747803,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3643,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8295739348370927,
59
- "eval_f1": 0.7684508773127603,
60
- "eval_loss": 0.36033105850219727,
61
- "eval_precision": 0.8252399774138905,
62
- "eval_recall": 0.7444080741953083,
63
- "eval_runtime": 5.0787,
64
- "eval_samples_per_second": 78.564,
65
- "eval_steps_per_second": 9.845,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.813259243965149,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3131,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8546365914786967,
78
- "eval_f1": 0.8159125620465827,
79
- "eval_loss": 0.31337350606918335,
80
- "eval_precision": 0.8356565656565657,
81
- "eval_recall": 0.8021458446990362,
82
- "eval_runtime": 5.0532,
83
- "eval_samples_per_second": 78.96,
84
- "eval_steps_per_second": 9.895,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.7617347240448,
90
  "learning_rate": 3.7500000000000003e-05,
91
  "loss": 0.2812,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8671679197994987,
97
- "eval_f1": 0.8350789627607721,
98
- "eval_loss": 0.30869799852371216,
99
- "eval_precision": 0.8463358876939919,
100
- "eval_recall": 0.8260138206946717,
101
- "eval_runtime": 5.1714,
102
- "eval_samples_per_second": 77.155,
103
- "eval_steps_per_second": 9.669,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.2499818801879883,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2579,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.87468671679198,
116
- "eval_f1": 0.8537390029325513,
117
- "eval_loss": 0.30366042256355286,
118
- "eval_precision": 0.8442805058676086,
119
- "eval_recall": 0.8663393344244408,
120
- "eval_runtime": 5.0754,
121
- "eval_samples_per_second": 78.614,
122
- "eval_steps_per_second": 9.851,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.6943317651748657,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.242,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8771929824561403,
135
- "eval_f1": 0.8522278069611882,
136
- "eval_loss": 0.2868788540363312,
137
- "eval_precision": 0.8513631702756499,
138
- "eval_recall": 0.8531096563011457,
139
- "eval_runtime": 5.0844,
140
- "eval_samples_per_second": 78.476,
141
- "eval_steps_per_second": 9.834,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 4.454681873321533,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2238,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8621553884711779,
154
- "eval_f1": 0.8238834717707957,
155
- "eval_loss": 0.3086492121219635,
156
- "eval_precision": 0.8487520627062706,
157
- "eval_recall": 0.8074649936352064,
158
- "eval_runtime": 5.076,
159
- "eval_samples_per_second": 78.605,
160
- "eval_steps_per_second": 9.85,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 6.322339057922363,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2134,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
  "eval_accuracy": 0.8696741854636592,
173
- "eval_f1": 0.8368354828562441,
174
- "eval_loss": 0.29160723090171814,
175
- "eval_precision": 0.8520237470480189,
176
- "eval_recall": 0.8252864157119476,
177
- "eval_runtime": 5.0668,
178
- "eval_samples_per_second": 78.748,
179
- "eval_steps_per_second": 9.868,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 4.7913737297058105,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.2014,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8696741854636592,
192
- "eval_f1": 0.8339841249519908,
193
- "eval_loss": 0.3077145218849182,
194
- "eval_precision": 0.8579231241892538,
195
- "eval_recall": 0.8177850518276051,
196
- "eval_runtime": 5.1374,
197
- "eval_samples_per_second": 77.665,
198
- "eval_steps_per_second": 9.733,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.7732164859771729,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1918,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8671679197994987,
211
- "eval_f1": 0.8409485773595975,
212
- "eval_loss": 0.29099568724632263,
213
- "eval_precision": 0.8385304659498208,
214
- "eval_recall": 0.8435170030914712,
215
- "eval_runtime": 5.0596,
216
- "eval_samples_per_second": 78.861,
217
- "eval_steps_per_second": 9.882,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 15.641918182373047,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1764,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8796992481203008,
230
- "eval_f1": 0.8563025210084034,
231
- "eval_loss": 0.28647324442863464,
232
- "eval_precision": 0.8529936381473334,
233
- "eval_recall": 0.8598836152027641,
234
- "eval_runtime": 5.0747,
235
- "eval_samples_per_second": 78.626,
236
- "eval_steps_per_second": 9.853,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 3.4587786197662354,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1771,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8696741854636592,
249
- "eval_f1": 0.8368354828562441,
250
- "eval_loss": 0.3067673444747925,
251
- "eval_precision": 0.8520237470480189,
252
- "eval_recall": 0.8252864157119476,
253
- "eval_runtime": 5.0713,
254
- "eval_samples_per_second": 78.679,
255
- "eval_steps_per_second": 9.859,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 7.602293968200684,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1708,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8872180451127819,
268
- "eval_f1": 0.8680720368560659,
269
- "eval_loss": 0.29624542593955994,
270
- "eval_precision": 0.8587217615098657,
271
- "eval_recall": 0.8802054919076197,
272
- "eval_runtime": 5.0603,
273
- "eval_samples_per_second": 78.849,
274
- "eval_steps_per_second": 9.881,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.1862931102514267,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1585,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8872180451127819,
287
- "eval_f1": 0.8636104675452922,
288
- "eval_loss": 0.2889001667499542,
289
- "eval_precision": 0.8645363713902765,
290
- "eval_recall": 0.8627023095108202,
291
- "eval_runtime": 5.1374,
292
- "eval_samples_per_second": 77.666,
293
- "eval_steps_per_second": 9.733,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.2601371705532074,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1602,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
  "eval_accuracy": 0.8822055137844611,
306
- "eval_f1": 0.8609498387276164,
307
- "eval_loss": 0.2941306233406067,
308
- "eval_precision": 0.8540903540903542,
309
- "eval_recall": 0.8691580287324968,
310
- "eval_runtime": 5.0692,
311
- "eval_samples_per_second": 78.711,
312
- "eval_steps_per_second": 9.863,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 2.504516839981079,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1481,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8847117794486216,
325
- "eval_f1": 0.8595070422535211,
326
- "eval_loss": 0.29707595705986023,
327
- "eval_precision": 0.8633733523114054,
328
- "eval_recall": 0.8559283506092017,
329
- "eval_runtime": 5.0692,
330
- "eval_samples_per_second": 78.711,
331
- "eval_steps_per_second": 9.864,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 4.364478588104248,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1536,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8847117794486216,
344
- "eval_f1": 0.8602260265626904,
345
- "eval_loss": 0.29374733567237854,
346
- "eval_precision": 0.8620943049601959,
347
- "eval_recall": 0.8584288052373159,
348
- "eval_runtime": 5.0687,
349
- "eval_samples_per_second": 78.718,
350
- "eval_steps_per_second": 9.864,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 2.8036956787109375,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.147,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8822055137844611,
363
- "eval_f1": 0.8568221901555235,
364
- "eval_loss": 0.2946490943431854,
365
- "eval_precision": 0.8596491228070176,
366
- "eval_recall": 0.8541553009638116,
367
- "eval_runtime": 5.0871,
368
- "eval_samples_per_second": 78.434,
369
- "eval_steps_per_second": 9.829,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.6669352054595947,
375
  "learning_rate": 0.0,
376
- "loss": 0.1379,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8822055137844611,
382
- "eval_f1": 0.858259325044405,
383
- "eval_loss": 0.29368409514427185,
384
- "eval_precision": 0.8573798178418481,
385
- "eval_recall": 0.8591562102200401,
386
- "eval_runtime": 5.1066,
387
- "eval_samples_per_second": 78.134,
388
- "eval_steps_per_second": 9.791,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.23575165701694176,
396
- "train_runtime": 1927.8211,
397
- "train_samples_per_second": 37.742,
398
- "train_steps_per_second": 1.266
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.0522332191467285,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5524,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7167919799498746,
21
+ "eval_f1": 0.5962731806907421,
22
+ "eval_loss": 0.5142865777015686,
23
+ "eval_precision": 0.6417444029850746,
24
+ "eval_recall": 0.5921076559374432,
25
+ "eval_runtime": 1.7987,
26
+ "eval_samples_per_second": 221.826,
27
+ "eval_steps_per_second": 27.798,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.862292528152466,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.468,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7919799498746867,
40
+ "eval_f1": 0.7577413479052824,
41
+ "eval_loss": 0.427168607711792,
42
+ "eval_precision": 0.7507235274089207,
43
+ "eval_recall": 0.7678214220767412,
44
+ "eval_runtime": 1.8042,
45
+ "eval_samples_per_second": 221.147,
46
+ "eval_steps_per_second": 27.713,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 2.0498220920562744,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3759,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8345864661654135,
59
+ "eval_f1": 0.7840625819994752,
60
+ "eval_loss": 0.3480204641819,
61
+ "eval_precision": 0.8174519753533889,
62
+ "eval_recall": 0.7654573558828878,
63
+ "eval_runtime": 1.8451,
64
+ "eval_samples_per_second": 216.245,
65
+ "eval_steps_per_second": 27.098,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.6740951538085938,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.3116,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8646616541353384,
78
+ "eval_f1": 0.8315033783783784,
79
+ "eval_loss": 0.308014452457428,
80
+ "eval_precision": 0.8438775510204082,
81
+ "eval_recall": 0.8217403164211674,
82
+ "eval_runtime": 1.8424,
83
+ "eval_samples_per_second": 216.565,
84
+ "eval_steps_per_second": 27.138,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 3.3190958499908447,
90
  "learning_rate": 3.7500000000000003e-05,
91
  "loss": 0.2812,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8696741854636592,
97
+ "eval_f1": 0.8368354828562441,
98
+ "eval_loss": 0.3000350892543793,
99
+ "eval_precision": 0.8520237470480189,
100
+ "eval_recall": 0.8252864157119476,
101
+ "eval_runtime": 1.8448,
102
+ "eval_samples_per_second": 216.28,
103
+ "eval_steps_per_second": 27.103,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 6.869318962097168,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2692,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8771929824561403,
116
+ "eval_f1": 0.8563451067988272,
117
+ "eval_loss": 0.2969658374786377,
118
+ "eval_precision": 0.8473119816985988,
119
+ "eval_recall": 0.8681123840698308,
120
+ "eval_runtime": 1.8412,
121
+ "eval_samples_per_second": 216.704,
122
+ "eval_steps_per_second": 27.156,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 2.0313923358917236,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2603,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8771929824561403,
135
+ "eval_f1": 0.8543546116197471,
136
+ "eval_loss": 0.29286739230155945,
137
+ "eval_precision": 0.8489149560117302,
138
+ "eval_recall": 0.8606110201854882,
139
+ "eval_runtime": 1.8427,
140
+ "eval_samples_per_second": 216.531,
141
+ "eval_steps_per_second": 27.134,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 2.5121915340423584,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.231,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8596491228070176,
154
+ "eval_f1": 0.819047619047619,
155
+ "eval_loss": 0.3082924783229828,
156
+ "eval_precision": 0.848619017499473,
157
+ "eval_recall": 0.8006910347335879,
158
+ "eval_runtime": 1.8502,
159
+ "eval_samples_per_second": 215.65,
160
+ "eval_steps_per_second": 27.024,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 6.5755615234375,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2278,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
  "eval_accuracy": 0.8696741854636592,
173
+ "eval_f1": 0.842789598108747,
174
+ "eval_loss": 0.29386115074157715,
175
+ "eval_precision": 0.842789598108747,
176
+ "eval_recall": 0.842789598108747,
177
+ "eval_runtime": 1.8446,
178
+ "eval_samples_per_second": 216.311,
179
+ "eval_steps_per_second": 27.107,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 2.957425117492676,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2117,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.87468671679198,
192
+ "eval_f1": 0.8403693509153758,
193
+ "eval_loss": 0.32400935888290405,
194
+ "eval_precision": 0.8647333925035843,
195
+ "eval_recall": 0.8238316057464994,
196
+ "eval_runtime": 1.8502,
197
+ "eval_samples_per_second": 215.651,
198
+ "eval_steps_per_second": 27.024,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 0.6018930673599243,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.2014,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8796992481203008,
211
+ "eval_f1": 0.8533986527862829,
212
+ "eval_loss": 0.2902269959449768,
213
+ "eval_precision": 0.8572003218020917,
214
+ "eval_recall": 0.8498817966903074,
215
+ "eval_runtime": 1.8555,
216
+ "eval_samples_per_second": 215.034,
217
+ "eval_steps_per_second": 26.947,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 5.241063594818115,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1869,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8947368421052632,
230
+ "eval_f1": 0.8748655913978494,
231
+ "eval_loss": 0.27595847845077515,
232
+ "eval_precision": 0.86983032873807,
233
+ "eval_recall": 0.8805237315875614,
234
+ "eval_runtime": 1.849,
235
+ "eval_samples_per_second": 215.789,
236
+ "eval_steps_per_second": 27.041,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.832000970840454,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1685,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8822055137844611,
249
+ "eval_f1": 0.8560793854229822,
250
+ "eval_loss": 0.3015528619289398,
251
+ "eval_precision": 0.8609538327526132,
252
+ "eval_recall": 0.8516548463356974,
253
+ "eval_runtime": 1.8472,
254
+ "eval_samples_per_second": 216.004,
255
+ "eval_steps_per_second": 27.068,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 4.6754584312438965,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1703,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8897243107769424,
268
+ "eval_f1": 0.8695225637671682,
269
+ "eval_loss": 0.30271315574645996,
270
+ "eval_precision": 0.8631532846715328,
271
+ "eval_recall": 0.8769776322967813,
272
+ "eval_runtime": 1.8487,
273
+ "eval_samples_per_second": 215.823,
274
+ "eval_steps_per_second": 27.045,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.3340052366256714,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1617,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8897243107769424,
287
+ "eval_f1": 0.8695225637671682,
288
+ "eval_loss": 0.30203503370285034,
289
+ "eval_precision": 0.8631532846715328,
290
+ "eval_recall": 0.8769776322967813,
291
+ "eval_runtime": 1.8491,
292
+ "eval_samples_per_second": 215.78,
293
+ "eval_steps_per_second": 27.04,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.8184535503387451,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1524,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
  "eval_accuracy": 0.8822055137844611,
306
+ "eval_f1": 0.8622085718274466,
307
+ "eval_loss": 0.31774768233299255,
308
+ "eval_precision": 0.8530168716042322,
309
+ "eval_recall": 0.8741589379887251,
310
+ "eval_runtime": 1.8434,
311
+ "eval_samples_per_second": 216.442,
312
+ "eval_steps_per_second": 27.123,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 1.1876635551452637,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1356,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8897243107769424,
325
+ "eval_f1": 0.8682773109243698,
326
+ "eval_loss": 0.3291165828704834,
327
+ "eval_precision": 0.864855223259409,
328
+ "eval_recall": 0.8719767230405528,
329
+ "eval_runtime": 1.8462,
330
+ "eval_samples_per_second": 216.117,
331
+ "eval_steps_per_second": 27.082,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 4.472387313842773,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1474,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8897243107769424,
344
+ "eval_f1": 0.8682773109243698,
345
+ "eval_loss": 0.3268108665943146,
346
+ "eval_precision": 0.864855223259409,
347
+ "eval_recall": 0.8719767230405528,
348
+ "eval_runtime": 1.8536,
349
+ "eval_samples_per_second": 215.257,
350
+ "eval_steps_per_second": 26.975,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 6.7281928062438965,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.145,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8872180451127819,
363
+ "eval_f1": 0.8656072933585827,
364
+ "eval_loss": 0.3314986526966095,
365
+ "eval_precision": 0.8614399005740664,
366
+ "eval_recall": 0.8702036733951628,
367
+ "eval_runtime": 1.8468,
368
+ "eval_samples_per_second": 216.052,
369
+ "eval_steps_per_second": 27.074,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 1.0261917114257812,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1466,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8872180451127819,
382
+ "eval_f1": 0.8649563392675828,
383
+ "eval_loss": 0.32958927750587463,
384
+ "eval_precision": 0.8623655913978494,
385
+ "eval_recall": 0.8677032187670486,
386
+ "eval_runtime": 1.8152,
387
+ "eval_samples_per_second": 219.809,
388
+ "eval_steps_per_second": 27.545,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.24024685015443895,
396
+ "train_runtime": 634.5102,
397
+ "train_samples_per_second": 114.671,
398
+ "train_steps_per_second": 3.845
399
  }
400
  ],
401
  "logging_steps": 500,