apwic commited on
Commit
e02e88e
1 Parent(s): 72f7343

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9000989119683481,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.9022556390977443,
5
- "eval_f1": 0.884617951284618,
6
- "eval_loss": 0.2810901701450348,
7
- "eval_precision": 0.8772893772893773,
8
- "eval_recall": 0.8933442444080741,
9
- "eval_runtime": 5.4917,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 72.655,
12
- "eval_steps_per_second": 9.105,
13
- "f1": 0.881994210133999,
14
- "precision": 0.875823871126101,
15
- "recall": 0.8891323173658895,
16
- "train_loss": 0.21111928674041247,
17
- "train_runtime": 2276.8554,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 31.956,
20
- "train_steps_per_second": 1.072
21
  }
 
1
  {
2
+ "accuracy": 0.8991097922848664,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8922305764411027,
5
+ "eval_f1": 0.8703223612108386,
6
+ "eval_loss": 0.3013243079185486,
7
+ "eval_precision": 0.8694131129742446,
8
+ "eval_recall": 0.8712493180578287,
9
+ "eval_runtime": 2.0053,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 198.975,
12
+ "eval_steps_per_second": 24.934,
13
+ "f1": 0.8802744624529791,
14
+ "precision": 0.8755777484318257,
15
+ "recall": 0.8855012848630891,
16
+ "train_loss": 0.2159292881606055,
17
+ "train_runtime": 745.0279,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 97.661,
20
+ "train_steps_per_second": 3.275
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9022556390977443,
4
- "eval_f1": 0.884617951284618,
5
- "eval_loss": 0.2810901701450348,
6
- "eval_precision": 0.8772893772893773,
7
- "eval_recall": 0.8933442444080741,
8
- "eval_runtime": 5.4917,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 72.655,
11
- "eval_steps_per_second": 9.105
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8922305764411027,
4
+ "eval_f1": 0.8703223612108386,
5
+ "eval_loss": 0.3013243079185486,
6
+ "eval_precision": 0.8694131129742446,
7
+ "eval_recall": 0.8712493180578287,
8
+ "eval_runtime": 2.0053,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 198.975,
11
+ "eval_steps_per_second": 24.934
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9000989119683481,
3
- "f1": 0.881994210133999,
4
- "precision": 0.875823871126101,
5
- "recall": 0.8891323173658895
6
  }
 
1
  {
2
+ "accuracy": 0.8991097922848664,
3
+ "f1": 0.8802744624529791,
4
+ "precision": 0.8755777484318257,
5
+ "recall": 0.8855012848630891
6
  }
predict_results.txt CHANGED
@@ -12,7 +12,7 @@ index prediction
12
  10 1
13
  11 1
14
  12 1
15
- 13 1
16
  14 1
17
  15 0
18
  16 1
@@ -36,7 +36,7 @@ index prediction
36
  34 0
37
  35 1
38
  36 1
39
- 37 0
40
  38 1
41
  39 0
42
  40 1
@@ -44,7 +44,7 @@ index prediction
44
  42 1
45
  43 0
46
  44 0
47
- 45 0
48
  46 1
49
  47 1
50
  48 1
@@ -80,7 +80,7 @@ index prediction
80
  78 1
81
  79 1
82
  80 1
83
- 81 0
84
  82 1
85
  83 1
86
  84 1
@@ -163,7 +163,7 @@ index prediction
163
  161 1
164
  162 1
165
  163 1
166
- 164 1
167
  165 0
168
  166 1
169
  167 1
@@ -173,7 +173,7 @@ index prediction
173
  171 1
174
  172 1
175
  173 0
176
- 174 1
177
  175 1
178
  176 1
179
  177 0
@@ -206,7 +206,7 @@ index prediction
206
  204 1
207
  205 0
208
  206 1
209
- 207 1
210
  208 1
211
  209 1
212
  210 1
@@ -229,7 +229,7 @@ index prediction
229
  227 0
230
  228 1
231
  229 0
232
- 230 1
233
  231 1
234
  232 1
235
  233 1
@@ -273,7 +273,7 @@ index prediction
273
  271 1
274
  272 1
275
  273 1
276
- 274 1
277
  275 1
278
  276 1
279
  277 1
@@ -315,7 +315,7 @@ index prediction
315
  313 0
316
  314 1
317
  315 0
318
- 316 1
319
  317 0
320
  318 1
321
  319 0
@@ -345,7 +345,7 @@ index prediction
345
  343 0
346
  344 0
347
  345 0
348
- 346 0
349
  347 0
350
  348 0
351
  349 0
@@ -400,7 +400,7 @@ index prediction
400
  398 1
401
  399 0
402
  400 0
403
- 401 1
404
  402 1
405
  403 0
406
  404 0
@@ -470,7 +470,7 @@ index prediction
470
  468 0
471
  469 0
472
  470 0
473
- 471 0
474
  472 0
475
  473 0
476
  474 0
@@ -486,7 +486,7 @@ index prediction
486
  484 0
487
  485 0
488
  486 0
489
- 487 0
490
  488 0
491
  489 0
492
  490 0
@@ -502,7 +502,7 @@ index prediction
502
  500 0
503
  501 0
504
  502 0
505
- 503 1
506
  504 0
507
  505 0
508
  506 0
@@ -523,7 +523,7 @@ index prediction
523
  521 0
524
  522 0
525
  523 0
526
- 524 0
527
  525 0
528
  526 0
529
  527 0
@@ -559,7 +559,7 @@ index prediction
559
  557 0
560
  558 0
561
  559 0
562
- 560 1
563
  561 0
564
  562 0
565
  563 0
@@ -609,7 +609,7 @@ index prediction
609
  607 0
610
  608 1
611
  609 0
612
- 610 1
613
  611 1
614
  612 0
615
  613 0
@@ -662,7 +662,7 @@ index prediction
662
  660 0
663
  661 0
664
  662 0
665
- 663 0
666
  664 0
667
  665 0
668
  666 0
@@ -726,7 +726,7 @@ index prediction
726
  724 0
727
  725 0
728
  726 0
729
- 727 1
730
  728 1
731
  729 0
732
  730 0
@@ -755,7 +755,7 @@ index prediction
755
  753 0
756
  754 0
757
  755 0
758
- 756 1
759
  757 0
760
  758 0
761
  759 0
@@ -848,7 +848,7 @@ index prediction
848
  846 0
849
  847 0
850
  848 0
851
- 849 1
852
  850 0
853
  851 0
854
  852 0
@@ -858,7 +858,7 @@ index prediction
858
  856 0
859
  857 0
860
  858 0
861
- 859 0
862
  860 0
863
  861 0
864
  862 0
@@ -867,7 +867,7 @@ index prediction
867
  865 0
868
  866 0
869
  867 0
870
- 868 1
871
  869 0
872
  870 0
873
  871 0
@@ -890,7 +890,7 @@ index prediction
890
  888 0
891
  889 0
892
  890 0
893
- 891 1
894
  892 0
895
  893 0
896
  894 0
@@ -905,13 +905,13 @@ index prediction
905
  903 0
906
  904 0
907
  905 0
908
- 906 1
909
  907 1
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
- 912 0
915
  913 0
916
  914 0
917
  915 0
@@ -954,7 +954,7 @@ index prediction
954
  952 0
955
  953 0
956
  954 0
957
- 955 0
958
  956 0
959
  957 1
960
  958 0
@@ -984,7 +984,7 @@ index prediction
984
  982 0
985
  983 0
986
  984 0
987
- 985 0
988
  986 1
989
  987 0
990
  988 0
 
12
  10 1
13
  11 1
14
  12 1
15
+ 13 0
16
  14 1
17
  15 0
18
  16 1
 
36
  34 0
37
  35 1
38
  36 1
39
+ 37 1
40
  38 1
41
  39 0
42
  40 1
 
44
  42 1
45
  43 0
46
  44 0
47
+ 45 1
48
  46 1
49
  47 1
50
  48 1
 
80
  78 1
81
  79 1
82
  80 1
83
+ 81 1
84
  82 1
85
  83 1
86
  84 1
 
163
  161 1
164
  162 1
165
  163 1
166
+ 164 0
167
  165 0
168
  166 1
169
  167 1
 
173
  171 1
174
  172 1
175
  173 0
176
+ 174 0
177
  175 1
178
  176 1
179
  177 0
 
206
  204 1
207
  205 0
208
  206 1
209
+ 207 0
210
  208 1
211
  209 1
212
  210 1
 
229
  227 0
230
  228 1
231
  229 0
232
+ 230 0
233
  231 1
234
  232 1
235
  233 1
 
273
  271 1
274
  272 1
275
  273 1
276
+ 274 0
277
  275 1
278
  276 1
279
  277 1
 
315
  313 0
316
  314 1
317
  315 0
318
+ 316 0
319
  317 0
320
  318 1
321
  319 0
 
345
  343 0
346
  344 0
347
  345 0
348
+ 346 1
349
  347 0
350
  348 0
351
  349 0
 
400
  398 1
401
  399 0
402
  400 0
403
+ 401 0
404
  402 1
405
  403 0
406
  404 0
 
470
  468 0
471
  469 0
472
  470 0
473
+ 471 1
474
  472 0
475
  473 0
476
  474 0
 
486
  484 0
487
  485 0
488
  486 0
489
+ 487 1
490
  488 0
491
  489 0
492
  490 0
 
502
  500 0
503
  501 0
504
  502 0
505
+ 503 0
506
  504 0
507
  505 0
508
  506 0
 
523
  521 0
524
  522 0
525
  523 0
526
+ 524 1
527
  525 0
528
  526 0
529
  527 0
 
559
  557 0
560
  558 0
561
  559 0
562
+ 560 0
563
  561 0
564
  562 0
565
  563 0
 
609
  607 0
610
  608 1
611
  609 0
612
+ 610 0
613
  611 1
614
  612 0
615
  613 0
 
662
  660 0
663
  661 0
664
  662 0
665
+ 663 1
666
  664 0
667
  665 0
668
  666 0
 
726
  724 0
727
  725 0
728
  726 0
729
+ 727 0
730
  728 1
731
  729 0
732
  730 0
 
755
  753 0
756
  754 0
757
  755 0
758
+ 756 0
759
  757 0
760
  758 0
761
  759 0
 
848
  846 0
849
  847 0
850
  848 0
851
+ 849 0
852
  850 0
853
  851 0
854
  852 0
 
858
  856 0
859
  857 0
860
  858 0
861
+ 859 1
862
  860 0
863
  861 0
864
  862 0
 
867
  865 0
868
  866 0
869
  867 0
870
+ 868 0
871
  869 0
872
  870 0
873
  871 0
 
890
  888 0
891
  889 0
892
  890 0
893
+ 891 0
894
  892 0
895
  893 0
896
  894 0
 
905
  903 0
906
  904 0
907
  905 0
908
+ 906 0
909
  907 1
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
+ 912 1
915
  913 0
916
  914 0
917
  915 0
 
954
  952 0
955
  953 0
956
  954 0
957
+ 955 1
958
  956 0
959
  957 1
960
  958 0
 
984
  982 0
985
  983 0
986
  984 0
987
+ 985 1
988
  986 1
989
  987 0
990
  988 0
runs/Jun03_15-35-02_a358b85c7679/events.out.tfevents.1717429663.a358b85c7679.176319.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64b97d0aeb9a761ac51c9b5a1eb0e7c6d525fdf7f65fd7b686b2580bee14a17d
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.21111928674041247,
4
- "train_runtime": 2276.8554,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 31.956,
7
- "train_steps_per_second": 1.072
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2159292881606055,
4
+ "train_runtime": 745.0279,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 97.661,
7
+ "train_steps_per_second": 3.275
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.3501811027526855,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5459,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7468671679197995,
21
- "eval_f1": 0.6572777139941319,
22
- "eval_loss": 0.46394461393356323,
23
- "eval_precision": 0.6922477833692786,
24
- "eval_recall": 0.6458901618476087,
25
- "eval_runtime": 5.2607,
26
- "eval_samples_per_second": 75.846,
27
- "eval_steps_per_second": 9.504,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.848093271255493,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4335,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
  "eval_accuracy": 0.7844611528822055,
40
- "eval_f1": 0.7634169884169884,
41
- "eval_loss": 0.41080254316329956,
42
- "eval_precision": 0.7551731309140064,
43
- "eval_recall": 0.7975086379341698,
44
- "eval_runtime": 5.5475,
45
- "eval_samples_per_second": 71.924,
46
- "eval_steps_per_second": 9.013,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 4.800747394561768,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3375,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8596491228070176,
59
- "eval_f1": 0.8271551457392166,
60
- "eval_loss": 0.3282613754272461,
61
- "eval_precision": 0.8347358430876305,
62
- "eval_recall": 0.8206946717585015,
63
- "eval_runtime": 5.6194,
64
- "eval_samples_per_second": 71.004,
65
- "eval_steps_per_second": 8.898,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 2.052788019180298,
71
  "learning_rate": 4e-05,
72
- "loss": 0.2801,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8596491228070176,
78
- "eval_f1": 0.8347043853938569,
79
- "eval_loss": 0.32024893164634705,
80
- "eval_precision": 0.8277993283927745,
81
- "eval_recall": 0.8431987634115294,
82
- "eval_runtime": 5.5699,
83
- "eval_samples_per_second": 71.636,
84
- "eval_steps_per_second": 8.977,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.35927698016166687,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2572,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.87468671679198,
97
- "eval_f1": 0.8550061050061051,
98
- "eval_loss": 0.31085968017578125,
99
- "eval_precision": 0.8437710437710437,
100
- "eval_recall": 0.8713402436806692,
101
- "eval_runtime": 5.607,
102
- "eval_samples_per_second": 71.161,
103
- "eval_steps_per_second": 8.917,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.9262036085128784,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2339,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8671679197994987,
116
- "eval_f1": 0.8472754847795472,
117
- "eval_loss": 0.3074397146701813,
118
- "eval_precision": 0.8352906879893387,
119
- "eval_recall": 0.8660210947444991,
120
- "eval_runtime": 5.5884,
121
- "eval_samples_per_second": 71.398,
122
- "eval_steps_per_second": 8.947,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.7879945635795593,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2249,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8671679197994987,
135
- "eval_f1": 0.8472754847795472,
136
- "eval_loss": 0.2915370464324951,
137
- "eval_precision": 0.8352906879893387,
138
- "eval_recall": 0.8660210947444991,
139
- "eval_runtime": 5.5808,
140
- "eval_samples_per_second": 71.495,
141
- "eval_steps_per_second": 8.959,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 2.2202210426330566,
147
  "learning_rate": 3e-05,
148
- "loss": 0.193,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8972431077694235,
154
- "eval_f1": 0.8751002084335417,
155
- "eval_loss": 0.25404733419418335,
156
- "eval_precision": 0.8780701754385964,
157
- "eval_recall": 0.8722949627204946,
158
- "eval_runtime": 5.6126,
159
- "eval_samples_per_second": 71.09,
160
- "eval_steps_per_second": 8.909,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 8.847825050354004,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.1899,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8822055137844611,
173
- "eval_f1": 0.8628173897776901,
174
- "eval_loss": 0.26359453797340393,
175
- "eval_precision": 0.8526132107592781,
176
- "eval_recall": 0.8766593926168395,
177
- "eval_runtime": 5.6712,
178
- "eval_samples_per_second": 70.355,
179
- "eval_steps_per_second": 8.816,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 1.7180150747299194,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.1801,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.9072681704260651,
192
- "eval_f1": 0.8900228699985846,
193
- "eval_loss": 0.23711469769477844,
194
- "eval_precision": 0.8840175953079179,
195
- "eval_recall": 0.8968903436988543,
196
- "eval_runtime": 5.6014,
197
- "eval_samples_per_second": 71.232,
198
- "eval_steps_per_second": 8.926,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.6990593671798706,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.157,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.899749373433584,
211
- "eval_f1": 0.882467302933899,
212
- "eval_loss": 0.2566535174846649,
213
- "eval_precision": 0.8732988802756245,
214
- "eval_recall": 0.8940716493907983,
215
- "eval_runtime": 5.5662,
216
- "eval_samples_per_second": 71.683,
217
- "eval_steps_per_second": 8.983,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 3.7353861331939697,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1553,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8972431077694235,
230
- "eval_f1": 0.8792560061999484,
231
- "eval_loss": 0.25933051109313965,
232
- "eval_precision": 0.8707622232472325,
233
- "eval_recall": 0.889798145117294,
234
- "eval_runtime": 5.5701,
235
- "eval_samples_per_second": 71.632,
236
- "eval_steps_per_second": 8.976,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 1.2111871242523193,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1381,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.9172932330827067,
249
- "eval_f1": 0.8999810095332144,
250
- "eval_loss": 0.24901245534420013,
251
- "eval_precision": 0.9009991470695747,
252
- "eval_recall": 0.8989816330241862,
253
- "eval_runtime": 5.6002,
254
- "eval_samples_per_second": 71.248,
255
- "eval_steps_per_second": 8.928,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 9.943241119384766,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1476,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.899749373433584,
268
- "eval_f1": 0.881931703852755,
269
- "eval_loss": 0.27014100551605225,
270
- "eval_precision": 0.8740012737378415,
271
- "eval_recall": 0.8915711947626841,
272
- "eval_runtime": 5.5922,
273
- "eval_samples_per_second": 71.349,
274
- "eval_steps_per_second": 8.941,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.20924903452396393,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1447,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9122807017543859,
287
- "eval_f1": 0.8959675797283908,
288
- "eval_loss": 0.2611282765865326,
289
- "eval_precision": 0.8898680351906159,
290
- "eval_recall": 0.9029368976177486,
291
- "eval_runtime": 5.5841,
292
- "eval_samples_per_second": 71.453,
293
- "eval_steps_per_second": 8.954,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.40175938606262207,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1336,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.899749373433584,
306
- "eval_f1": 0.884004884004884,
307
- "eval_loss": 0.31003570556640625,
308
- "eval_precision": 0.8717592592592593,
309
- "eval_recall": 0.9015730132751409,
310
- "eval_runtime": 5.5685,
311
- "eval_samples_per_second": 71.653,
312
- "eval_steps_per_second": 8.979,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 7.287301540374756,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1192,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8972431077694235,
325
- "eval_f1": 0.8803300634230913,
326
- "eval_loss": 0.29346275329589844,
327
- "eval_precision": 0.8695948246510044,
328
- "eval_recall": 0.8947990543735225,
329
- "eval_runtime": 5.5988,
330
- "eval_samples_per_second": 71.266,
331
- "eval_steps_per_second": 8.931,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 6.602230072021484,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1247,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.9022556390977443,
344
- "eval_f1": 0.885145957117024,
345
- "eval_loss": 0.28685662150382996,
346
- "eval_precision": 0.876513491697417,
347
- "eval_recall": 0.8958446990361884,
348
- "eval_runtime": 5.5796,
349
- "eval_samples_per_second": 71.51,
350
- "eval_steps_per_second": 8.961,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 7.744582653045654,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.117,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.9022556390977443,
363
- "eval_f1": 0.884617951284618,
364
- "eval_loss": 0.2761416733264923,
365
- "eval_precision": 0.8772893772893773,
366
- "eval_recall": 0.8933442444080741,
367
- "eval_runtime": 5.559,
368
- "eval_samples_per_second": 71.775,
369
- "eval_steps_per_second": 8.994,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.1699504405260086,
375
  "learning_rate": 0.0,
376
- "loss": 0.1092,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.9022556390977443,
382
- "eval_f1": 0.884617951284618,
383
- "eval_loss": 0.2810901701450348,
384
- "eval_precision": 0.8772893772893773,
385
- "eval_recall": 0.8933442444080741,
386
- "eval_runtime": 5.6476,
387
- "eval_samples_per_second": 70.649,
388
- "eval_steps_per_second": 8.853,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8551203605328000.0,
395
- "train_loss": 0.21111928674041247,
396
- "train_runtime": 2276.8554,
397
- "train_samples_per_second": 31.956,
398
- "train_steps_per_second": 1.072
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.798983097076416,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5538,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7192982456140351,
21
+ "eval_f1": 0.6358539765319426,
22
+ "eval_loss": 0.4789246916770935,
23
+ "eval_precision": 0.6517232767232768,
24
+ "eval_recall": 0.628887070376432,
25
+ "eval_runtime": 1.9849,
26
+ "eval_samples_per_second": 201.02,
27
+ "eval_steps_per_second": 25.19,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 4.214308261871338,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4356,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
  "eval_accuracy": 0.7844611528822055,
40
+ "eval_f1": 0.7610046247283668,
41
+ "eval_loss": 0.40878012776374817,
42
+ "eval_precision": 0.751842396167816,
43
+ "eval_recall": 0.7900072740498272,
44
+ "eval_runtime": 1.9915,
45
+ "eval_samples_per_second": 200.347,
46
+ "eval_steps_per_second": 25.106,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 4.247888088226318,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3417,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8571428571428571,
59
+ "eval_f1": 0.820640215771169,
60
+ "eval_loss": 0.3368583023548126,
61
+ "eval_precision": 0.8364527629233511,
62
+ "eval_recall": 0.8089198036006546,
63
+ "eval_runtime": 2.0317,
64
+ "eval_samples_per_second": 196.389,
65
+ "eval_steps_per_second": 24.61,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.456321954727173,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.2904,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8671679197994987,
78
+ "eval_f1": 0.8377065410088949,
79
+ "eval_loss": 0.32671821117401123,
80
+ "eval_precision": 0.8423344947735192,
81
+ "eval_recall": 0.8335151845790143,
82
+ "eval_runtime": 2.0365,
83
+ "eval_samples_per_second": 195.924,
84
+ "eval_steps_per_second": 24.552,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 0.4103873074054718,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.263,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8671679197994987,
97
+ "eval_f1": 0.8453047161322887,
98
+ "eval_loss": 0.3209967315196991,
99
+ "eval_precision": 0.835631596867552,
100
+ "eval_recall": 0.8585197308601564,
101
+ "eval_runtime": 2.0233,
102
+ "eval_samples_per_second": 197.202,
103
+ "eval_steps_per_second": 24.712,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 0.9892892837524414,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2463,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8421052631578947,
116
+ "eval_f1": 0.8220326383234805,
117
+ "eval_loss": 0.3550713360309601,
118
+ "eval_precision": 0.8093437062560569,
119
+ "eval_recall": 0.8482905982905984,
120
+ "eval_runtime": 2.0535,
121
+ "eval_samples_per_second": 194.306,
122
+ "eval_steps_per_second": 24.349,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.5059836506843567,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2303,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8721804511278195,
135
+ "eval_f1": 0.8524204953403198,
136
+ "eval_loss": 0.30282893776893616,
137
+ "eval_precision": 0.8409052784611943,
138
+ "eval_recall": 0.8695671940352792,
139
+ "eval_runtime": 2.0509,
140
+ "eval_samples_per_second": 194.546,
141
+ "eval_steps_per_second": 24.379,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 4.26680326461792,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2208,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8897243107769424,
154
+ "eval_f1": 0.8656154317207594,
155
+ "eval_loss": 0.2672863006591797,
156
+ "eval_precision": 0.869546382820719,
157
+ "eval_recall": 0.861974904528096,
158
+ "eval_runtime": 2.0377,
159
+ "eval_samples_per_second": 195.809,
160
+ "eval_steps_per_second": 24.538,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 7.01179313659668,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.1994,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8897243107769424,
173
+ "eval_f1": 0.8682773109243698,
174
+ "eval_loss": 0.2715359330177307,
175
+ "eval_precision": 0.864855223259409,
176
+ "eval_recall": 0.8719767230405528,
177
+ "eval_runtime": 2.0475,
178
+ "eval_samples_per_second": 194.876,
179
+ "eval_steps_per_second": 24.421,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 1.0255930423736572,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.1836,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.9097744360902256,
192
+ "eval_f1": 0.8882839721254355,
193
+ "eval_loss": 0.2594870328903198,
194
+ "eval_precision": 0.8998687748047625,
195
+ "eval_recall": 0.8786597563193308,
196
+ "eval_runtime": 2.0498,
197
+ "eval_samples_per_second": 194.658,
198
+ "eval_steps_per_second": 24.393,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 7.171597957611084,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1706,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8922305764411027,
211
+ "eval_f1": 0.8733660552828726,
212
+ "eval_loss": 0.2833162844181061,
213
+ "eval_precision": 0.8650109547970479,
214
+ "eval_recall": 0.8837515911983997,
215
+ "eval_runtime": 2.0562,
216
+ "eval_samples_per_second": 194.044,
217
+ "eval_steps_per_second": 24.316,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 8.375700950622559,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1623,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8872180451127819,
230
+ "eval_f1": 0.8668668668668669,
231
+ "eval_loss": 0.2992556393146515,
232
+ "eval_precision": 0.8598901098901099,
233
+ "eval_recall": 0.8752045826513912,
234
+ "eval_runtime": 2.0606,
235
+ "eval_samples_per_second": 193.629,
236
+ "eval_steps_per_second": 24.264,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.5983966588974,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1478,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8972431077694235,
249
+ "eval_f1": 0.8724195749658803,
250
+ "eval_loss": 0.28637126088142395,
251
+ "eval_precision": 0.8848766823362741,
252
+ "eval_recall": 0.8622931442080378,
253
+ "eval_runtime": 2.0479,
254
+ "eval_samples_per_second": 194.834,
255
+ "eval_steps_per_second": 24.415,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 7.667685031890869,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1467,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8972431077694235,
268
+ "eval_f1": 0.8763538792940554,
269
+ "eval_loss": 0.28052201867103577,
270
+ "eval_precision": 0.8754297605404427,
271
+ "eval_recall": 0.877295871976723,
272
+ "eval_runtime": 2.0543,
273
+ "eval_samples_per_second": 194.227,
274
+ "eval_steps_per_second": 24.339,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.43923285603523254,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.132,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.899749373433584,
287
+ "eval_f1": 0.8813841488792438,
288
+ "eval_loss": 0.28690698742866516,
289
+ "eval_precision": 0.8748029197080291,
290
+ "eval_recall": 0.8890707401345699,
291
+ "eval_runtime": 2.0519,
292
+ "eval_samples_per_second": 194.451,
293
+ "eval_steps_per_second": 24.367,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 5.0243120193481445,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.125,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8972431077694235,
306
+ "eval_f1": 0.8781334505389722,
307
+ "eval_loss": 0.30517804622650146,
308
+ "eval_precision": 0.872316715542522,
309
+ "eval_recall": 0.8847972358610656,
310
+ "eval_runtime": 2.055,
311
+ "eval_samples_per_second": 194.159,
312
+ "eval_steps_per_second": 24.331,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 4.027547359466553,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1183,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8897243107769424,
325
+ "eval_f1": 0.8682773109243698,
326
+ "eval_loss": 0.2968387007713318,
327
+ "eval_precision": 0.864855223259409,
328
+ "eval_recall": 0.8719767230405528,
329
+ "eval_runtime": 2.0605,
330
+ "eval_samples_per_second": 193.639,
331
+ "eval_steps_per_second": 24.266,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 8.528005599975586,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1185,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8922305764411027,
344
+ "eval_f1": 0.8715803025426456,
345
+ "eval_loss": 0.30326613783836365,
346
+ "eval_precision": 0.8673433153814287,
347
+ "eval_recall": 0.8762502273140571,
348
+ "eval_runtime": 2.0521,
349
+ "eval_samples_per_second": 194.435,
350
+ "eval_steps_per_second": 24.365,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 1.2970513105392456,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1132,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8897243107769424,
363
+ "eval_f1": 0.8689068100358424,
364
+ "eval_loss": 0.30629363656044006,
365
+ "eval_precision": 0.863953693884765,
366
+ "eval_recall": 0.8744771776686671,
367
+ "eval_runtime": 2.0602,
368
+ "eval_samples_per_second": 193.667,
369
+ "eval_steps_per_second": 24.269,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.4699600636959076,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1195,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8922305764411027,
382
+ "eval_f1": 0.8703223612108386,
383
+ "eval_loss": 0.3013243079185486,
384
+ "eval_precision": 0.8694131129742446,
385
+ "eval_recall": 0.8712493180578287,
386
+ "eval_runtime": 2.0489,
387
+ "eval_samples_per_second": 194.739,
388
+ "eval_steps_per_second": 24.403,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8551203605328000.0,
395
+ "train_loss": 0.2159292881606055,
396
+ "train_runtime": 745.0279,
397
+ "train_samples_per_second": 97.661,
398
+ "train_steps_per_second": 3.275
399
  }
400
  ],
401
  "logging_steps": 500,