apwic commited on
Commit
a104f4a
1 Parent(s): 617500b

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9050445103857567,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8822055137844611,
5
- "eval_f1": 0.858259325044405,
6
- "eval_loss": 0.2988053262233734,
7
- "eval_precision": 0.8573798178418481,
8
- "eval_recall": 0.8591562102200401,
9
- "eval_runtime": 5.0411,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.149,
12
- "eval_steps_per_second": 9.918,
13
- "f1": 0.8853524853524853,
14
- "precision": 0.886901203996243,
15
- "recall": 0.8838493180342065,
16
- "train_loss": 0.2301350734272941,
17
- "train_runtime": 1951.0131,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 37.293,
20
- "train_steps_per_second": 1.251
21
  }
 
1
  {
2
+ "accuracy": 0.9099901088031652,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.899749373433584,
5
+ "eval_f1": 0.8784574144023395,
6
+ "eval_loss": 0.29978305101394653,
7
+ "eval_precision": 0.8804194733619106,
8
+ "eval_recall": 0.8765684669939988,
9
+ "eval_runtime": 1.8044,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 221.128,
12
+ "eval_steps_per_second": 27.71,
13
+ "f1": 0.8918557700784624,
14
+ "precision": 0.8914757994814175,
15
+ "recall": 0.892238579779173,
16
+ "train_loss": 0.2259816083751741,
17
+ "train_runtime": 638.5228,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 113.951,
20
+ "train_steps_per_second": 3.821
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8822055137844611,
4
- "eval_f1": 0.858259325044405,
5
- "eval_loss": 0.2988053262233734,
6
- "eval_precision": 0.8573798178418481,
7
- "eval_recall": 0.8591562102200401,
8
- "eval_runtime": 5.0411,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.149,
11
- "eval_steps_per_second": 9.918
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.899749373433584,
4
+ "eval_f1": 0.8784574144023395,
5
+ "eval_loss": 0.29978305101394653,
6
+ "eval_precision": 0.8804194733619106,
7
+ "eval_recall": 0.8765684669939988,
8
+ "eval_runtime": 1.8044,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 221.128,
11
+ "eval_steps_per_second": 27.71
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9050445103857567,
3
- "f1": 0.8853524853524853,
4
- "precision": 0.886901203996243,
5
- "recall": 0.8838493180342065
6
  }
 
1
  {
2
+ "accuracy": 0.9099901088031652,
3
+ "f1": 0.8918557700784624,
4
+ "precision": 0.8914757994814175,
5
+ "recall": 0.892238579779173
6
  }
predict_results.txt CHANGED
@@ -5,14 +5,14 @@ index prediction
5
  3 1
6
  4 0
7
  5 1
8
- 6 1
9
  7 1
10
  8 1
11
  9 1
12
  10 1
13
  11 1
14
  12 1
15
- 13 0
16
  14 1
17
  15 1
18
  16 0
@@ -106,14 +106,14 @@ index prediction
106
  104 1
107
  105 1
108
  106 1
109
- 107 1
110
  108 1
111
  109 1
112
  110 0
113
  111 1
114
  112 0
115
- 113 0
116
- 114 0
117
  115 1
118
  116 1
119
  117 1
@@ -130,7 +130,7 @@ index prediction
130
  128 1
131
  129 1
132
  130 1
133
- 131 0
134
  132 1
135
  133 1
136
  134 1
@@ -139,20 +139,20 @@ index prediction
139
  137 1
140
  138 1
141
  139 1
142
- 140 0
143
  141 1
144
  142 1
145
- 143 0
146
  144 1
147
  145 1
148
  146 1
149
  147 1
150
  148 1
151
  149 1
152
- 150 1
153
  151 1
154
  152 1
155
- 153 0
156
  154 1
157
  155 1
158
  156 1
@@ -164,7 +164,7 @@ index prediction
164
  162 1
165
  163 1
166
  164 0
167
- 165 1
168
  166 1
169
  167 1
170
  168 1
@@ -176,7 +176,7 @@ index prediction
176
  174 1
177
  175 1
178
  176 1
179
- 177 1
180
  178 1
181
  179 1
182
  180 1
@@ -184,7 +184,7 @@ index prediction
184
  182 1
185
  183 1
186
  184 1
187
- 185 0
188
  186 1
189
  187 1
190
  188 1
@@ -202,23 +202,23 @@ index prediction
202
  200 1
203
  201 1
204
  202 1
205
- 203 0
206
  204 0
207
  205 1
208
  206 1
209
- 207 1
210
  208 1
211
  209 1
212
  210 1
213
  211 1
214
  212 1
215
  213 1
216
- 214 0
217
  215 0
218
  216 1
219
  217 0
220
  218 1
221
- 219 1
222
  220 1
223
  221 1
224
  222 1
@@ -274,19 +274,19 @@ index prediction
274
  272 1
275
  273 1
276
  274 1
277
- 275 0
278
  276 1
279
  277 1
280
  278 1
281
  279 1
282
- 280 1
283
- 281 0
284
  282 1
285
  283 1
286
  284 1
287
  285 1
288
  286 0
289
- 287 0
290
  288 1
291
  289 1
292
  290 1
@@ -440,7 +440,7 @@ index prediction
440
  438 0
441
  439 0
442
  440 0
443
- 441 1
444
  442 0
445
  443 0
446
  444 0
@@ -477,7 +477,7 @@ index prediction
477
  475 0
478
  476 0
479
  477 0
480
- 478 1
481
  479 0
482
  480 0
483
  481 0
@@ -485,7 +485,7 @@ index prediction
485
  483 0
486
  484 0
487
  485 0
488
- 486 1
489
  487 0
490
  488 0
491
  489 0
@@ -530,7 +530,7 @@ index prediction
530
  528 0
531
  529 0
532
  530 0
533
- 531 0
534
  532 0
535
  533 0
536
  534 0
@@ -538,7 +538,7 @@ index prediction
538
  536 0
539
  537 0
540
  538 0
541
- 539 1
542
  540 0
543
  541 0
544
  542 0
@@ -551,7 +551,7 @@ index prediction
551
  549 0
552
  550 0
553
  551 0
554
- 552 1
555
  553 0
556
  554 0
557
  555 0
@@ -561,7 +561,7 @@ index prediction
561
  559 0
562
  560 0
563
  561 0
564
- 562 0
565
  563 0
566
  564 0
567
  565 0
@@ -580,7 +580,7 @@ index prediction
580
  578 0
581
  579 0
582
  580 0
583
- 581 0
584
  582 0
585
  583 0
586
  584 0
@@ -610,7 +610,7 @@ index prediction
610
  608 0
611
  609 0
612
  610 0
613
- 611 1
614
  612 0
615
  613 0
616
  614 0
@@ -663,8 +663,8 @@ index prediction
663
  661 0
664
  662 0
665
  663 1
666
- 664 0
667
- 665 1
668
  666 0
669
  667 0
670
  668 0
@@ -685,7 +685,7 @@ index prediction
685
  683 0
686
  684 0
687
  685 0
688
- 686 1
689
  687 0
690
  688 0
691
  689 0
@@ -859,7 +859,7 @@ index prediction
859
  857 0
860
  858 0
861
  859 0
862
- 860 0
863
  861 0
864
  862 0
865
  863 0
@@ -869,7 +869,7 @@ index prediction
869
  867 1
870
  868 0
871
  869 0
872
- 870 0
873
  871 0
874
  872 0
875
  873 0
@@ -924,7 +924,7 @@ index prediction
924
  922 0
925
  923 0
926
  924 0
927
- 925 0
928
  926 0
929
  927 0
930
  928 0
@@ -951,7 +951,7 @@ index prediction
951
  949 0
952
  950 0
953
  951 0
954
- 952 0
955
  953 1
956
  954 0
957
  955 0
@@ -973,18 +973,18 @@ index prediction
973
  971 0
974
  972 0
975
  973 0
976
- 974 1
977
  975 0
978
- 976 1
979
  977 0
980
  978 0
981
  979 0
982
  980 1
983
- 981 0
984
  982 0
985
  983 0
986
  984 0
987
- 985 0
988
  986 1
989
  987 0
990
  988 0
 
5
  3 1
6
  4 0
7
  5 1
8
+ 6 0
9
  7 1
10
  8 1
11
  9 1
12
  10 1
13
  11 1
14
  12 1
15
+ 13 1
16
  14 1
17
  15 1
18
  16 0
 
106
  104 1
107
  105 1
108
  106 1
109
+ 107 0
110
  108 1
111
  109 1
112
  110 0
113
  111 1
114
  112 0
115
+ 113 1
116
+ 114 1
117
  115 1
118
  116 1
119
  117 1
 
130
  128 1
131
  129 1
132
  130 1
133
+ 131 1
134
  132 1
135
  133 1
136
  134 1
 
139
  137 1
140
  138 1
141
  139 1
142
+ 140 1
143
  141 1
144
  142 1
145
+ 143 1
146
  144 1
147
  145 1
148
  146 1
149
  147 1
150
  148 1
151
  149 1
152
+ 150 0
153
  151 1
154
  152 1
155
+ 153 1
156
  154 1
157
  155 1
158
  156 1
 
164
  162 1
165
  163 1
166
  164 0
167
+ 165 0
168
  166 1
169
  167 1
170
  168 1
 
176
  174 1
177
  175 1
178
  176 1
179
+ 177 0
180
  178 1
181
  179 1
182
  180 1
 
184
  182 1
185
  183 1
186
  184 1
187
+ 185 1
188
  186 1
189
  187 1
190
  188 1
 
202
  200 1
203
  201 1
204
  202 1
205
+ 203 1
206
  204 0
207
  205 1
208
  206 1
209
+ 207 0
210
  208 1
211
  209 1
212
  210 1
213
  211 1
214
  212 1
215
  213 1
216
+ 214 1
217
  215 0
218
  216 1
219
  217 0
220
  218 1
221
+ 219 0
222
  220 1
223
  221 1
224
  222 1
 
274
  272 1
275
  273 1
276
  274 1
277
+ 275 1
278
  276 1
279
  277 1
280
  278 1
281
  279 1
282
+ 280 0
283
+ 281 1
284
  282 1
285
  283 1
286
  284 1
287
  285 1
288
  286 0
289
+ 287 1
290
  288 1
291
  289 1
292
  290 1
 
440
  438 0
441
  439 0
442
  440 0
443
+ 441 0
444
  442 0
445
  443 0
446
  444 0
 
477
  475 0
478
  476 0
479
  477 0
480
+ 478 0
481
  479 0
482
  480 0
483
  481 0
 
485
  483 0
486
  484 0
487
  485 0
488
+ 486 0
489
  487 0
490
  488 0
491
  489 0
 
530
  528 0
531
  529 0
532
  530 0
533
+ 531 1
534
  532 0
535
  533 0
536
  534 0
 
538
  536 0
539
  537 0
540
  538 0
541
+ 539 0
542
  540 0
543
  541 0
544
  542 0
 
551
  549 0
552
  550 0
553
  551 0
554
+ 552 0
555
  553 0
556
  554 0
557
  555 0
 
561
  559 0
562
  560 0
563
  561 0
564
+ 562 1
565
  563 0
566
  564 0
567
  565 0
 
580
  578 0
581
  579 0
582
  580 0
583
+ 581 1
584
  582 0
585
  583 0
586
  584 0
 
610
  608 0
611
  609 0
612
  610 0
613
+ 611 0
614
  612 0
615
  613 0
616
  614 0
 
663
  661 0
664
  662 0
665
  663 1
666
+ 664 1
667
+ 665 0
668
  666 0
669
  667 0
670
  668 0
 
685
  683 0
686
  684 0
687
  685 0
688
+ 686 0
689
  687 0
690
  688 0
691
  689 0
 
859
  857 0
860
  858 0
861
  859 0
862
+ 860 1
863
  861 0
864
  862 0
865
  863 0
 
869
  867 1
870
  868 0
871
  869 0
872
+ 870 1
873
  871 0
874
  872 0
875
  873 0
 
924
  922 0
925
  923 0
926
  924 0
927
+ 925 1
928
  926 0
929
  927 0
930
  928 0
 
951
  949 0
952
  950 0
953
  951 0
954
+ 952 1
955
  953 1
956
  954 0
957
  955 0
 
973
  971 0
974
  972 0
975
  973 0
976
+ 974 0
977
  975 0
978
+ 976 0
979
  977 0
980
  978 0
981
  979 0
982
  980 1
983
+ 981 1
984
  982 0
985
  983 0
986
  984 0
987
+ 985 1
988
  986 1
989
  987 0
990
  988 0
runs/Jun03_14-17-29_a358b85c7679/events.out.tfevents.1717424904.a358b85c7679.134923.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21e250776b51f52f2d8b87824014b4a4e9233bca4e2fe412fc47b85531bbd611
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.2301350734272941,
4
- "train_runtime": 1951.0131,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 37.293,
7
- "train_steps_per_second": 1.251
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2259816083751741,
4
+ "train_runtime": 638.5228,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 113.951,
7
+ "train_steps_per_second": 3.821
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.0894904136657715,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.541,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7293233082706767,
21
- "eval_f1": 0.6396989966555184,
22
- "eval_loss": 0.4985284209251404,
23
- "eval_precision": 0.664766661583041,
24
- "eval_recall": 0.630978359701764,
25
- "eval_runtime": 5.2157,
26
- "eval_samples_per_second": 76.5,
27
- "eval_steps_per_second": 9.586,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 5.95181941986084,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4477,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7644110275689223,
40
- "eval_f1": 0.7461557203963398,
41
- "eval_loss": 0.44652456045150757,
42
- "eval_precision": 0.7426785714285714,
43
- "eval_recall": 0.7883251500272777,
44
- "eval_runtime": 5.1119,
45
- "eval_samples_per_second": 78.053,
46
- "eval_steps_per_second": 9.781,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 3.869917154312134,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.347,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8646616541353384,
59
- "eval_f1": 0.8255102040816327,
60
- "eval_loss": 0.323697566986084,
61
- "eval_precision": 0.8556293485135991,
62
- "eval_recall": 0.8067375886524822,
63
- "eval_runtime": 5.1398,
64
- "eval_samples_per_second": 77.63,
65
- "eval_steps_per_second": 9.728,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 3.4031054973602295,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3005,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8922305764411027,
78
- "eval_f1": 0.8683279483657071,
79
- "eval_loss": 0.2991793751716614,
80
- "eval_precision": 0.873366724738676,
81
- "eval_recall": 0.863747954173486,
82
- "eval_runtime": 5.1616,
83
- "eval_samples_per_second": 77.302,
84
- "eval_steps_per_second": 9.687,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.774945020675659,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.281,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8646616541353384,
97
- "eval_f1": 0.8342105263157895,
98
- "eval_loss": 0.2868594527244568,
99
- "eval_precision": 0.8398085585585586,
100
- "eval_recall": 0.82924168030551,
101
- "eval_runtime": 5.1483,
102
- "eval_samples_per_second": 77.502,
103
- "eval_steps_per_second": 9.712,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 2.3688924312591553,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2419,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
  "eval_accuracy": 0.87468671679198,
116
- "eval_f1": 0.8537390029325513,
117
- "eval_loss": 0.29453349113464355,
118
- "eval_precision": 0.8442805058676086,
119
- "eval_recall": 0.8663393344244408,
120
- "eval_runtime": 5.0969,
121
- "eval_samples_per_second": 78.284,
122
- "eval_steps_per_second": 9.81,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.5997357964515686,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2394,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8771929824561403,
135
- "eval_f1": 0.8529524583135901,
136
- "eval_loss": 0.2835337221622467,
137
- "eval_precision": 0.8504480286738352,
138
- "eval_recall": 0.8556101109292599,
139
- "eval_runtime": 5.0928,
140
- "eval_samples_per_second": 78.346,
141
- "eval_steps_per_second": 9.818,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 2.8720760345458984,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2192,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8771929824561403,
154
- "eval_f1": 0.850729517396184,
155
- "eval_loss": 0.28028008341789246,
156
- "eval_precision": 0.8535087719298247,
157
- "eval_recall": 0.8481087470449173,
158
- "eval_runtime": 5.1964,
159
- "eval_samples_per_second": 76.784,
160
- "eval_steps_per_second": 9.622,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 5.954423904418945,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2144,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.87468671679198,
173
- "eval_f1": 0.8480717680029244,
174
- "eval_loss": 0.28611448407173157,
175
- "eval_precision": 0.8498775260257195,
176
- "eval_recall": 0.8463356973995272,
177
- "eval_runtime": 5.1448,
178
- "eval_samples_per_second": 77.554,
179
- "eval_steps_per_second": 9.719,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 4.511594772338867,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.2056,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
  "eval_accuracy": 0.8922305764411027,
192
- "eval_f1": 0.8696722245432793,
193
- "eval_loss": 0.27244648337364197,
194
- "eval_precision": 0.8706135006701596,
195
- "eval_recall": 0.8687488634297145,
196
- "eval_runtime": 5.104,
197
- "eval_samples_per_second": 78.174,
198
- "eval_steps_per_second": 9.796,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 2.713789463043213,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1822,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
  "eval_accuracy": 0.8872180451127819,
211
- "eval_f1": 0.8662440310793597,
212
- "eval_loss": 0.28133633732795715,
213
- "eval_precision": 0.8606158357771261,
214
- "eval_recall": 0.872704128023277,
215
- "eval_runtime": 5.1442,
216
- "eval_samples_per_second": 77.563,
217
- "eval_steps_per_second": 9.72,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 6.766155242919922,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1817,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8872180451127819,
230
- "eval_f1": 0.8584001703456596,
231
- "eval_loss": 0.2900215685367584,
232
- "eval_precision": 0.8759655377302435,
233
- "eval_recall": 0.8451991271140207,
234
- "eval_runtime": 5.1374,
235
- "eval_samples_per_second": 77.665,
236
- "eval_steps_per_second": 9.733,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.9034644365310669,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1621,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8947368421052632,
249
- "eval_f1": 0.8710526315789473,
250
- "eval_loss": 0.29263192415237427,
251
- "eval_precision": 0.8772522522522522,
252
- "eval_recall": 0.8655210038188761,
253
- "eval_runtime": 5.1149,
254
- "eval_samples_per_second": 78.008,
255
- "eval_steps_per_second": 9.775,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 6.05497932434082,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1577,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8922305764411027,
268
- "eval_f1": 0.8709582797445791,
269
- "eval_loss": 0.29044803977012634,
270
- "eval_precision": 0.8683243727598566,
271
- "eval_recall": 0.8737497726859429,
272
- "eval_runtime": 5.096,
273
- "eval_samples_per_second": 78.297,
274
- "eval_steps_per_second": 9.812,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 3.013443946838379,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1612,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8847117794486216,
287
- "eval_f1": 0.8587719298245614,
288
- "eval_loss": 0.2996305227279663,
289
- "eval_precision": 0.864771021021021,
290
- "eval_recall": 0.8534278959810875,
291
- "eval_runtime": 5.1189,
292
- "eval_samples_per_second": 77.946,
293
- "eval_steps_per_second": 9.768,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 1.775810718536377,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1496,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8872180451127819,
306
- "eval_f1": 0.8649563392675828,
307
- "eval_loss": 0.29704856872558594,
308
- "eval_precision": 0.8623655913978494,
309
- "eval_recall": 0.8677032187670486,
310
- "eval_runtime": 5.2102,
311
- "eval_samples_per_second": 76.58,
312
- "eval_steps_per_second": 9.596,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.5526378154754639,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.149,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8822055137844611,
325
- "eval_f1": 0.858259325044405,
326
- "eval_loss": 0.29482966661453247,
327
- "eval_precision": 0.8573798178418481,
328
- "eval_recall": 0.8591562102200401,
329
- "eval_runtime": 5.1218,
330
- "eval_samples_per_second": 77.902,
331
- "eval_steps_per_second": 9.762,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 4.444484233856201,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1424,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8847117794486216,
344
- "eval_f1": 0.8609292598654301,
345
- "eval_loss": 0.29769569635391235,
346
- "eval_precision": 0.8609292598654301,
347
- "eval_recall": 0.8609292598654301,
348
- "eval_runtime": 5.0934,
349
- "eval_samples_per_second": 78.336,
350
- "eval_steps_per_second": 9.817,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 3.0727450847625732,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1383,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8847117794486216,
363
- "eval_f1": 0.8602260265626904,
364
- "eval_loss": 0.2990491986274719,
365
- "eval_precision": 0.8620943049601959,
366
- "eval_recall": 0.8584288052373159,
367
- "eval_runtime": 5.1176,
368
- "eval_samples_per_second": 77.966,
369
- "eval_steps_per_second": 9.77,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 4.4248151779174805,
375
  "learning_rate": 0.0,
376
- "loss": 0.1407,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8822055137844611,
382
- "eval_f1": 0.858259325044405,
383
- "eval_loss": 0.2988053262233734,
384
- "eval_precision": 0.8573798178418481,
385
- "eval_recall": 0.8591562102200401,
386
- "eval_runtime": 5.0823,
387
- "eval_samples_per_second": 78.508,
388
- "eval_steps_per_second": 9.838,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.2301350734272941,
396
- "train_runtime": 1951.0131,
397
- "train_samples_per_second": 37.293,
398
- "train_steps_per_second": 1.251
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.803397178649902,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5411,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7368421052631579,
21
+ "eval_f1": 0.6508662716567915,
22
+ "eval_loss": 0.49393221735954285,
23
+ "eval_precision": 0.6761904761904762,
24
+ "eval_recall": 0.6412984178941625,
25
+ "eval_runtime": 1.7881,
26
+ "eval_samples_per_second": 223.142,
27
+ "eval_steps_per_second": 27.963,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.498361587524414,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4231,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8245614035087719,
40
+ "eval_f1": 0.7995262704565029,
41
+ "eval_loss": 0.3851858377456665,
42
+ "eval_precision": 0.7887596899224806,
43
+ "eval_recall": 0.8183760683760684,
44
+ "eval_runtime": 1.7893,
45
+ "eval_samples_per_second": 222.988,
46
+ "eval_steps_per_second": 27.943,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.0573930740356445,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3331,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8471177944862155,
59
+ "eval_f1": 0.8080535642463387,
60
+ "eval_loss": 0.33130019903182983,
61
+ "eval_precision": 0.8232818379877204,
62
+ "eval_recall": 0.796826695762866,
63
+ "eval_runtime": 1.7976,
64
+ "eval_samples_per_second": 221.96,
65
+ "eval_steps_per_second": 27.815,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 3.250720500946045,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.2924,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8822055137844611,
78
+ "eval_f1": 0.8560793854229822,
79
+ "eval_loss": 0.30566585063934326,
80
+ "eval_precision": 0.8609538327526132,
81
+ "eval_recall": 0.8516548463356974,
82
+ "eval_runtime": 1.7957,
83
+ "eval_samples_per_second": 222.195,
84
+ "eval_steps_per_second": 27.844,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 2.8374593257904053,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2705,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.87468671679198,
97
+ "eval_f1": 0.8422176526415692,
98
+ "eval_loss": 0.3068975508213043,
99
+ "eval_precision": 0.8604724566416373,
100
+ "eval_recall": 0.8288325150027278,
101
+ "eval_runtime": 1.7956,
102
+ "eval_samples_per_second": 222.215,
103
+ "eval_steps_per_second": 27.846,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 4.305149078369141,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2461,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
  "eval_accuracy": 0.87468671679198,
116
+ "eval_f1": 0.8562182887453875,
117
+ "eval_loss": 0.31193241477012634,
118
+ "eval_precision": 0.8435805201992252,
119
+ "eval_recall": 0.8763411529368976,
120
+ "eval_runtime": 1.7944,
121
+ "eval_samples_per_second": 222.356,
122
+ "eval_steps_per_second": 27.864,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.4272942841053009,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2313,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8872180451127819,
135
+ "eval_f1": 0.8662440310793597,
136
+ "eval_loss": 0.28799474239349365,
137
+ "eval_precision": 0.8606158357771261,
138
+ "eval_recall": 0.872704128023277,
139
+ "eval_runtime": 1.8006,
140
+ "eval_samples_per_second": 221.595,
141
+ "eval_steps_per_second": 27.769,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 2.898254871368408,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2183,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8922305764411027,
154
+ "eval_f1": 0.8676331036823873,
155
+ "eval_loss": 0.27734559774398804,
156
+ "eval_precision": 0.8749292230261088,
157
+ "eval_recall": 0.8612474995453718,
158
+ "eval_runtime": 1.7976,
159
+ "eval_samples_per_second": 221.957,
160
+ "eval_steps_per_second": 27.814,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 6.726850509643555,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2093,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8847117794486216,
173
+ "eval_f1": 0.8587719298245614,
174
+ "eval_loss": 0.28041473031044006,
175
+ "eval_precision": 0.864771021021021,
176
+ "eval_recall": 0.8534278959810875,
177
+ "eval_runtime": 1.7999,
178
+ "eval_samples_per_second": 221.684,
179
+ "eval_steps_per_second": 27.78,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 2.7861063480377197,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.1986,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
  "eval_accuracy": 0.8922305764411027,
192
+ "eval_f1": 0.8654532336864889,
193
+ "eval_loss": 0.28901827335357666,
194
+ "eval_precision": 0.8804269882659713,
195
+ "eval_recall": 0.8537461356610292,
196
+ "eval_runtime": 1.7942,
197
+ "eval_samples_per_second": 222.384,
198
+ "eval_steps_per_second": 27.868,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 1.18681001663208,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1881,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
  "eval_accuracy": 0.8872180451127819,
211
+ "eval_f1": 0.8629148629148629,
212
+ "eval_loss": 0.29107582569122314,
213
+ "eval_precision": 0.8657894736842104,
214
+ "eval_recall": 0.860201854882706,
215
+ "eval_runtime": 1.8131,
216
+ "eval_samples_per_second": 220.068,
217
+ "eval_steps_per_second": 27.577,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 3.137617588043213,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1802,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8822055137844611,
230
+ "eval_f1": 0.8568221901555235,
231
+ "eval_loss": 0.28662246465682983,
232
+ "eval_precision": 0.8596491228070176,
233
+ "eval_recall": 0.8541553009638116,
234
+ "eval_runtime": 1.8223,
235
+ "eval_samples_per_second": 218.951,
236
+ "eval_steps_per_second": 27.437,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.8551347851753235,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.169,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8847117794486216,
249
+ "eval_f1": 0.8564658408408408,
250
+ "eval_loss": 0.2963576018810272,
251
+ "eval_precision": 0.8697278911564625,
252
+ "eval_recall": 0.8459265320967448,
253
+ "eval_runtime": 1.8327,
254
+ "eval_samples_per_second": 217.71,
255
+ "eval_steps_per_second": 27.282,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 5.66387414932251,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1709,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8872180451127819,
268
+ "eval_f1": 0.8629148629148629,
269
+ "eval_loss": 0.29438090324401855,
270
+ "eval_precision": 0.8657894736842104,
271
+ "eval_recall": 0.860201854882706,
272
+ "eval_runtime": 1.8299,
273
+ "eval_samples_per_second": 218.043,
274
+ "eval_steps_per_second": 27.324,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 3.363886833190918,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1492,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8872180451127819,
287
+ "eval_f1": 0.8636104675452922,
288
+ "eval_loss": 0.28655046224594116,
289
+ "eval_precision": 0.8645363713902765,
290
+ "eval_recall": 0.8627023095108202,
291
+ "eval_runtime": 1.8286,
292
+ "eval_samples_per_second": 218.201,
293
+ "eval_steps_per_second": 27.344,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.6481318473815918,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1493,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8947368421052632,
306
+ "eval_f1": 0.8742647058823529,
307
+ "eval_loss": 0.2950553596019745,
308
+ "eval_precision": 0.8707860158154468,
309
+ "eval_recall": 0.8780232769594472,
310
+ "eval_runtime": 1.8286,
311
+ "eval_samples_per_second": 218.205,
312
+ "eval_steps_per_second": 27.344,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.7666211724281311,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1425,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8947368421052632,
325
+ "eval_f1": 0.8710526315789473,
326
+ "eval_loss": 0.304831326007843,
327
+ "eval_precision": 0.8772522522522522,
328
+ "eval_recall": 0.8655210038188761,
329
+ "eval_runtime": 1.7983,
330
+ "eval_samples_per_second": 221.876,
331
+ "eval_steps_per_second": 27.804,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 3.819899797439575,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1375,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.899749373433584,
344
+ "eval_f1": 0.8790689216221131,
345
+ "eval_loss": 0.298705130815506,
346
+ "eval_precision": 0.8790689216221131,
347
+ "eval_recall": 0.8790689216221131,
348
+ "eval_runtime": 1.8038,
349
+ "eval_samples_per_second": 221.2,
350
+ "eval_steps_per_second": 27.719,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 1.7430284023284912,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1326,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.899749373433584,
363
+ "eval_f1": 0.8778322106552358,
364
+ "eval_loss": 0.30734923481941223,
365
+ "eval_precision": 0.8818924438393465,
366
+ "eval_recall": 0.8740680123658847,
367
+ "eval_runtime": 1.8064,
368
+ "eval_samples_per_second": 220.886,
369
+ "eval_steps_per_second": 27.68,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.931983709335327,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1365,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.899749373433584,
382
+ "eval_f1": 0.8784574144023395,
383
+ "eval_loss": 0.29978305101394653,
384
+ "eval_precision": 0.8804194733619106,
385
+ "eval_recall": 0.8765684669939988,
386
+ "eval_runtime": 1.8111,
387
+ "eval_samples_per_second": 220.313,
388
+ "eval_steps_per_second": 27.608,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.2259816083751741,
396
+ "train_runtime": 638.5228,
397
+ "train_samples_per_second": 113.951,
398
+ "train_steps_per_second": 3.821
399
  }
400
  ],
401
  "logging_steps": 500,