apwic commited on
Commit
bd83858
1 Parent(s): 38f8304

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.8941641938674579,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.9022556390977443,
5
- "eval_f1": 0.8829621606985718,
6
- "eval_loss": 0.2765507400035858,
7
- "eval_precision": 0.8802419354838709,
8
- "eval_recall": 0.8858428805237315,
9
- "eval_runtime": 5.0092,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.653,
12
- "eval_steps_per_second": 9.982,
13
- "f1": 0.8742883638556821,
14
- "precision": 0.8699655489205329,
15
- "recall": 0.8790652032719297,
16
- "train_loss": 0.23107414167435442,
17
- "train_runtime": 1909.2167,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 38.11,
20
- "train_steps_per_second": 1.278
21
  }
 
1
  {
2
+ "accuracy": 0.8961424332344213,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8972431077694235,
5
+ "eval_f1": 0.8781334505389722,
6
+ "eval_loss": 0.2797949016094208,
7
+ "eval_precision": 0.872316715542522,
8
+ "eval_recall": 0.8847972358610656,
9
+ "eval_runtime": 1.8194,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 219.307,
12
+ "eval_steps_per_second": 27.482,
13
+ "f1": 0.8761730346384224,
14
+ "precision": 0.8730752146875925,
15
+ "recall": 0.8794911377392057,
16
+ "train_loss": 0.24180538537072355,
17
+ "train_runtime": 620.8783,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 117.189,
20
+ "train_steps_per_second": 3.93
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9022556390977443,
4
- "eval_f1": 0.8829621606985718,
5
- "eval_loss": 0.2765507400035858,
6
- "eval_precision": 0.8802419354838709,
7
- "eval_recall": 0.8858428805237315,
8
- "eval_runtime": 5.0092,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.653,
11
- "eval_steps_per_second": 9.982
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8972431077694235,
4
+ "eval_f1": 0.8781334505389722,
5
+ "eval_loss": 0.2797949016094208,
6
+ "eval_precision": 0.872316715542522,
7
+ "eval_recall": 0.8847972358610656,
8
+ "eval_runtime": 1.8194,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 219.307,
11
+ "eval_steps_per_second": 27.482
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.8941641938674579,
3
- "f1": 0.8742883638556821,
4
- "precision": 0.8699655489205329,
5
- "recall": 0.8790652032719297
6
  }
 
1
  {
2
+ "accuracy": 0.8961424332344213,
3
+ "f1": 0.8761730346384224,
4
+ "precision": 0.8730752146875925,
5
+ "recall": 0.8794911377392057
6
  }
predict_results.txt CHANGED
@@ -1,6 +1,6 @@
1
  index prediction
2
  0 1
3
- 1 0
4
  2 0
5
  3 1
6
  4 0
@@ -12,7 +12,7 @@ index prediction
12
  10 1
13
  11 1
14
  12 1
15
- 13 0
16
  14 1
17
  15 0
18
  16 1
@@ -58,7 +58,7 @@ index prediction
58
  56 1
59
  57 0
60
  58 1
61
- 59 0
62
  60 1
63
  61 1
64
  62 1
@@ -70,17 +70,17 @@ index prediction
70
  68 1
71
  69 1
72
  70 1
73
- 71 0
74
  72 1
75
  73 1
76
  74 1
77
  75 1
78
  76 1
79
- 77 1
80
  78 1
81
  79 0
82
  80 1
83
- 81 0
84
  82 1
85
  83 1
86
  84 1
@@ -91,7 +91,7 @@ index prediction
91
  89 1
92
  90 1
93
  91 1
94
- 92 0
95
  93 1
96
  94 1
97
  95 1
@@ -117,7 +117,7 @@ index prediction
117
  115 1
118
  116 1
119
  117 1
120
- 118 1
121
  119 1
122
  120 1
123
  121 1
@@ -163,7 +163,7 @@ index prediction
163
  161 1
164
  162 1
165
  163 1
166
- 164 1
167
  165 0
168
  166 1
169
  167 1
@@ -171,7 +171,7 @@ index prediction
171
  169 1
172
  170 1
173
  171 1
174
- 172 1
175
  173 0
176
  174 1
177
  175 1
@@ -209,7 +209,7 @@ index prediction
209
  207 1
210
  208 1
211
  209 1
212
- 210 0
213
  211 1
214
  212 1
215
  213 1
@@ -227,7 +227,7 @@ index prediction
227
  225 1
228
  226 0
229
  227 0
230
- 228 1
231
  229 0
232
  230 0
233
  231 1
@@ -248,7 +248,7 @@ index prediction
248
  246 1
249
  247 1
250
  248 1
251
- 249 1
252
  250 0
253
  251 1
254
  252 1
@@ -264,7 +264,7 @@ index prediction
264
  262 1
265
  263 1
266
  264 1
267
- 265 1
268
  266 1
269
  267 1
270
  268 1
@@ -272,7 +272,7 @@ index prediction
272
  270 1
273
  271 1
274
  272 1
275
- 273 1
276
  274 1
277
  275 1
278
  276 1
@@ -313,9 +313,9 @@ index prediction
313
  311 0
314
  312 0
315
  313 0
316
- 314 1
317
  315 0
318
- 316 1
319
  317 0
320
  318 1
321
  319 0
@@ -343,9 +343,9 @@ index prediction
343
  341 0
344
  342 0
345
  343 0
346
- 344 1
347
  345 0
348
- 346 1
349
  347 0
350
  348 0
351
  349 0
@@ -363,7 +363,7 @@ index prediction
363
  361 0
364
  362 0
365
  363 0
366
- 364 0
367
  365 0
368
  366 1
369
  367 0
@@ -400,7 +400,7 @@ index prediction
400
  398 1
401
  399 0
402
  400 0
403
- 401 0
404
  402 1
405
  403 0
406
  404 0
@@ -419,8 +419,8 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 1
423
- 421 1
424
  422 0
425
  423 0
426
  424 0
@@ -486,7 +486,7 @@ index prediction
486
  484 0
487
  485 0
488
  486 0
489
- 487 0
490
  488 0
491
  489 0
492
  490 0
@@ -517,7 +517,7 @@ index prediction
517
  515 0
518
  516 0
519
  517 0
520
- 518 0
521
  519 0
522
  520 0
523
  521 0
@@ -537,11 +537,11 @@ index prediction
537
  535 0
538
  536 0
539
  537 0
540
- 538 1
541
  539 0
542
  540 0
543
  541 0
544
- 542 0
545
  543 0
546
  544 0
547
  545 0
@@ -625,10 +625,10 @@ index prediction
625
  623 0
626
  624 0
627
  625 0
628
- 626 1
629
  627 0
630
  628 0
631
- 629 1
632
  630 0
633
  631 0
634
  632 0
@@ -726,8 +726,8 @@ index prediction
726
  724 0
727
  725 0
728
  726 0
729
- 727 1
730
- 728 1
731
  729 0
732
  730 0
733
  731 0
@@ -773,7 +773,7 @@ index prediction
773
  771 0
774
  772 0
775
  773 0
776
- 774 0
777
  775 0
778
  776 0
779
  777 1
@@ -794,12 +794,12 @@ index prediction
794
  792 0
795
  793 0
796
  794 0
797
- 795 1
798
  796 0
799
  797 0
800
  798 0
801
  799 0
802
- 800 1
803
  801 0
804
  802 0
805
  803 0
@@ -831,7 +831,7 @@ index prediction
831
  829 0
832
  830 0
833
  831 0
834
- 832 0
835
  833 1
836
  834 0
837
  835 0
@@ -847,7 +847,7 @@ index prediction
847
  845 0
848
  846 0
849
  847 0
850
- 848 0
851
  849 0
852
  850 0
853
  851 0
@@ -949,7 +949,7 @@ index prediction
949
  947 0
950
  948 0
951
  949 0
952
- 950 0
953
  951 0
954
  952 0
955
  953 0
@@ -968,7 +968,7 @@ index prediction
968
  966 0
969
  967 0
970
  968 0
971
- 969 0
972
  970 0
973
  971 0
974
  972 0
 
1
  index prediction
2
  0 1
3
+ 1 1
4
  2 0
5
  3 1
6
  4 0
 
12
  10 1
13
  11 1
14
  12 1
15
+ 13 1
16
  14 1
17
  15 0
18
  16 1
 
58
  56 1
59
  57 0
60
  58 1
61
+ 59 1
62
  60 1
63
  61 1
64
  62 1
 
70
  68 1
71
  69 1
72
  70 1
73
+ 71 1
74
  72 1
75
  73 1
76
  74 1
77
  75 1
78
  76 1
79
+ 77 0
80
  78 1
81
  79 0
82
  80 1
83
+ 81 1
84
  82 1
85
  83 1
86
  84 1
 
91
  89 1
92
  90 1
93
  91 1
94
+ 92 1
95
  93 1
96
  94 1
97
  95 1
 
117
  115 1
118
  116 1
119
  117 1
120
+ 118 0
121
  119 1
122
  120 1
123
  121 1
 
163
  161 1
164
  162 1
165
  163 1
166
+ 164 0
167
  165 0
168
  166 1
169
  167 1
 
171
  169 1
172
  170 1
173
  171 1
174
+ 172 0
175
  173 0
176
  174 1
177
  175 1
 
209
  207 1
210
  208 1
211
  209 1
212
+ 210 1
213
  211 1
214
  212 1
215
  213 1
 
227
  225 1
228
  226 0
229
  227 0
230
+ 228 0
231
  229 0
232
  230 0
233
  231 1
 
248
  246 1
249
  247 1
250
  248 1
251
+ 249 0
252
  250 0
253
  251 1
254
  252 1
 
264
  262 1
265
  263 1
266
  264 1
267
+ 265 0
268
  266 1
269
  267 1
270
  268 1
 
272
  270 1
273
  271 1
274
  272 1
275
+ 273 0
276
  274 1
277
  275 1
278
  276 1
 
313
  311 0
314
  312 0
315
  313 0
316
+ 314 0
317
  315 0
318
+ 316 0
319
  317 0
320
  318 1
321
  319 0
 
343
  341 0
344
  342 0
345
  343 0
346
+ 344 0
347
  345 0
348
+ 346 0
349
  347 0
350
  348 0
351
  349 0
 
363
  361 0
364
  362 0
365
  363 0
366
+ 364 1
367
  365 0
368
  366 1
369
  367 0
 
400
  398 1
401
  399 0
402
  400 0
403
+ 401 1
404
  402 1
405
  403 0
406
  404 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 0
423
+ 421 0
424
  422 0
425
  423 0
426
  424 0
 
486
  484 0
487
  485 0
488
  486 0
489
+ 487 1
490
  488 0
491
  489 0
492
  490 0
 
517
  515 0
518
  516 0
519
  517 0
520
+ 518 1
521
  519 0
522
  520 0
523
  521 0
 
537
  535 0
538
  536 0
539
  537 0
540
+ 538 0
541
  539 0
542
  540 0
543
  541 0
544
+ 542 1
545
  543 0
546
  544 0
547
  545 0
 
625
  623 0
626
  624 0
627
  625 0
628
+ 626 0
629
  627 0
630
  628 0
631
+ 629 0
632
  630 0
633
  631 0
634
  632 0
 
726
  724 0
727
  725 0
728
  726 0
729
+ 727 0
730
+ 728 0
731
  729 0
732
  730 0
733
  731 0
 
773
  771 0
774
  772 0
775
  773 0
776
+ 774 1
777
  775 0
778
  776 0
779
  777 1
 
794
  792 0
795
  793 0
796
  794 0
797
+ 795 0
798
  796 0
799
  797 0
800
  798 0
801
  799 0
802
+ 800 0
803
  801 0
804
  802 0
805
  803 0
 
831
  829 0
832
  830 0
833
  831 0
834
+ 832 1
835
  833 1
836
  834 0
837
  835 0
 
847
  845 0
848
  846 0
849
  847 0
850
+ 848 1
851
  849 0
852
  850 0
853
  851 0
 
949
  947 0
950
  948 0
951
  949 0
952
+ 950 1
953
  951 0
954
  952 0
955
  953 0
 
968
  966 0
969
  967 0
970
  968 0
971
+ 969 1
972
  970 0
973
  971 0
974
  972 0
runs/Jun03_12-49-09_a358b85c7679/events.out.tfevents.1717419586.a358b85c7679.87787.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ecd50dd70d947628baa78aee72e620ed1aefb36a63b5c224b663cffb13395e7
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.23107414167435442,
4
- "train_runtime": 1909.2167,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 38.11,
7
- "train_steps_per_second": 1.278
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.24180538537072355,
4
+ "train_runtime": 620.8783,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 117.189,
7
+ "train_steps_per_second": 3.93
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.234379768371582,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5459,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7393483709273183,
21
- "eval_f1": 0.6458660476548099,
22
- "eval_loss": 0.47629594802856445,
23
- "eval_precision": 0.6804065499717673,
24
- "eval_recall": 0.63557010365521,
25
- "eval_runtime": 4.8279,
26
- "eval_samples_per_second": 82.644,
27
- "eval_steps_per_second": 10.356,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 4.299708843231201,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4528,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7844611528822055,
40
- "eval_f1": 0.7677594888732471,
41
- "eval_loss": 0.43055105209350586,
42
- "eval_precision": 0.7630357142857143,
43
- "eval_recall": 0.8125113657028551,
44
- "eval_runtime": 5.0594,
45
- "eval_samples_per_second": 78.863,
46
- "eval_steps_per_second": 9.883,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 3.729501962661743,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3653,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
  "eval_accuracy": 0.8621553884711779,
59
- "eval_f1": 0.8217317487266553,
60
- "eval_loss": 0.3334697186946869,
61
- "eval_precision": 0.853331681065005,
62
- "eval_recall": 0.802464084378978,
63
- "eval_runtime": 5.0455,
64
- "eval_samples_per_second": 79.08,
65
- "eval_steps_per_second": 9.91,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 2.0170559883117676,
71
  "learning_rate": 4e-05,
72
- "loss": 0.2987,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8546365914786967,
78
- "eval_f1": 0.8246499363520641,
79
- "eval_loss": 0.335675448179245,
80
- "eval_precision": 0.8246499363520641,
81
- "eval_recall": 0.8246499363520641,
82
- "eval_runtime": 5.2896,
83
- "eval_samples_per_second": 75.431,
84
- "eval_steps_per_second": 9.452,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.7316147089004517,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2746,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8546365914786967,
97
- "eval_f1": 0.8338931955211025,
98
- "eval_loss": 0.3400849401950836,
99
- "eval_precision": 0.8216641926439057,
100
- "eval_recall": 0.8546553918894344,
101
- "eval_runtime": 5.081,
102
- "eval_samples_per_second": 78.527,
103
- "eval_steps_per_second": 9.841,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.9782907962799072,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2477,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.849624060150376,
116
- "eval_f1": 0.8308270676691729,
117
- "eval_loss": 0.33233991265296936,
118
- "eval_precision": 0.8176375838926174,
119
- "eval_recall": 0.858610656482997,
120
- "eval_runtime": 5.0582,
121
- "eval_samples_per_second": 78.882,
122
- "eval_steps_per_second": 9.885,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.8530523777008057,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.24,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8646616541353384,
135
- "eval_f1": 0.8447157518450185,
136
- "eval_loss": 0.3171246647834778,
137
- "eval_precision": 0.8325401217487549,
138
- "eval_recall": 0.864248045099109,
139
- "eval_runtime": 5.0489,
140
- "eval_samples_per_second": 79.028,
141
- "eval_steps_per_second": 9.903,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 3.7897450923919678,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2069,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
  "eval_accuracy": 0.8922305764411027,
154
- "eval_f1": 0.8683279483657071,
155
- "eval_loss": 0.27698931097984314,
156
- "eval_precision": 0.873366724738676,
157
- "eval_recall": 0.863747954173486,
158
- "eval_runtime": 5.0659,
159
- "eval_samples_per_second": 78.762,
160
- "eval_steps_per_second": 9.87,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 5.382436275482178,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2197,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8671679197994987,
173
- "eval_f1": 0.8491192020377148,
174
- "eval_loss": 0.3091033399105072,
175
- "eval_precision": 0.8356209150326797,
176
- "eval_recall": 0.8735224586288416,
177
- "eval_runtime": 5.2322,
178
- "eval_samples_per_second": 76.258,
179
- "eval_steps_per_second": 9.556,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 3.9839751720428467,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.2005,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.9022556390977443,
192
- "eval_f1": 0.8811928811928812,
193
- "eval_loss": 0.2552439570426941,
194
- "eval_precision": 0.8842105263157894,
195
- "eval_recall": 0.878341516639389,
196
- "eval_runtime": 5.0419,
197
- "eval_samples_per_second": 79.136,
198
- "eval_steps_per_second": 9.917,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.8527041673660278,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1867,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.9047619047619048,
211
- "eval_f1": 0.8867831541218638,
212
- "eval_loss": 0.2726534903049469,
213
- "eval_precision": 0.88158359844468,
214
- "eval_recall": 0.8926168394253501,
215
- "eval_runtime": 5.0607,
216
- "eval_samples_per_second": 78.843,
217
- "eval_steps_per_second": 9.88,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 2.9348244667053223,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1722,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8922305764411027,
230
- "eval_f1": 0.8727838950061173,
231
- "eval_loss": 0.27391675114631653,
232
- "eval_precision": 0.8656898656898657,
233
- "eval_recall": 0.8812511365702855,
234
- "eval_runtime": 5.0491,
235
- "eval_samples_per_second": 79.024,
236
- "eval_steps_per_second": 9.903,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 1.8993942737579346,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.161,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.899749373433584,
249
- "eval_f1": 0.8765393898137261,
250
- "eval_loss": 0.2714451551437378,
251
- "eval_precision": 0.8852261942423283,
252
- "eval_recall": 0.8690671031096563,
253
- "eval_runtime": 5.0752,
254
- "eval_samples_per_second": 78.618,
255
- "eval_steps_per_second": 9.852,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 5.6604695320129395,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1684,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8972431077694235,
268
- "eval_f1": 0.8781334505389722,
269
- "eval_loss": 0.27736207842826843,
270
- "eval_precision": 0.872316715542522,
271
- "eval_recall": 0.8847972358610656,
272
- "eval_runtime": 5.281,
273
- "eval_samples_per_second": 75.554,
274
- "eval_steps_per_second": 9.468,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.4771318733692169,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1548,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.899749373433584,
287
- "eval_f1": 0.8802521008403361,
288
- "eval_loss": 0.27421677112579346,
289
- "eval_precision": 0.8767168083714847,
290
- "eval_recall": 0.8840698308783415,
291
- "eval_runtime": 5.0853,
292
- "eval_samples_per_second": 78.462,
293
- "eval_steps_per_second": 9.832,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.8061890006065369,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1526,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8872180451127819,
306
- "eval_f1": 0.8703282417939551,
307
- "eval_loss": 0.29698580503463745,
308
- "eval_precision": 0.8574462768615692,
309
- "eval_recall": 0.8902073104200764,
310
- "eval_runtime": 5.0518,
311
- "eval_samples_per_second": 78.982,
312
- "eval_steps_per_second": 9.898,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 2.3830015659332275,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1467,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8897243107769424,
325
- "eval_f1": 0.8707140332272888,
326
- "eval_loss": 0.2729242146015167,
327
- "eval_precision": 0.8618432385874246,
328
- "eval_recall": 0.8819785415530097,
329
- "eval_runtime": 5.0506,
330
- "eval_samples_per_second": 79.001,
331
- "eval_steps_per_second": 9.9,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 4.170547962188721,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1484,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8972431077694235,
344
- "eval_f1": 0.8781334505389722,
345
- "eval_loss": 0.27389636635780334,
346
- "eval_precision": 0.872316715542522,
347
- "eval_recall": 0.8847972358610656,
348
- "eval_runtime": 5.0897,
349
- "eval_samples_per_second": 78.393,
350
- "eval_steps_per_second": 9.824,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 3.1922006607055664,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1434,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.899749373433584,
363
- "eval_f1": 0.879667048676036,
364
- "eval_loss": 0.27286645770072937,
365
- "eval_precision": 0.8778361344537815,
366
- "eval_recall": 0.8815693762502272,
367
- "eval_runtime": 5.3802,
368
- "eval_samples_per_second": 74.161,
369
- "eval_steps_per_second": 9.293,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 1.1650974750518799,
375
  "learning_rate": 0.0,
376
- "loss": 0.1354,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.9022556390977443,
382
- "eval_f1": 0.8829621606985718,
383
- "eval_loss": 0.2765507400035858,
384
- "eval_precision": 0.8802419354838709,
385
- "eval_recall": 0.8858428805237315,
386
- "eval_runtime": 5.0601,
387
- "eval_samples_per_second": 78.853,
388
- "eval_steps_per_second": 9.881,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
- "train_loss": 0.23107414167435442,
396
- "train_runtime": 1909.2167,
397
- "train_samples_per_second": 38.11,
398
- "train_steps_per_second": 1.278
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.232791900634766,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5568,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7243107769423559,
21
+ "eval_f1": 0.6144317942230656,
22
+ "eval_loss": 0.48217353224754333,
23
+ "eval_precision": 0.65566534914361,
24
+ "eval_recall": 0.6074286233860702,
25
+ "eval_runtime": 1.8124,
26
+ "eval_samples_per_second": 220.155,
27
+ "eval_steps_per_second": 27.588,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.6621267795562744,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4661,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7543859649122807,
40
+ "eval_f1": 0.7304054054054054,
41
+ "eval_loss": 0.44529902935028076,
42
+ "eval_precision": 0.7240563585317666,
43
+ "eval_recall": 0.7612293144208038,
44
+ "eval_runtime": 1.7908,
45
+ "eval_samples_per_second": 222.81,
46
+ "eval_steps_per_second": 27.921,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.223342180252075,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3875,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
  "eval_accuracy": 0.8621553884711779,
59
+ "eval_f1": 0.8238834717707957,
60
+ "eval_loss": 0.3446956276893616,
61
+ "eval_precision": 0.8487520627062706,
62
+ "eval_recall": 0.8074649936352064,
63
+ "eval_runtime": 1.7891,
64
+ "eval_samples_per_second": 223.02,
65
+ "eval_steps_per_second": 27.947,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 3.7967684268951416,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.318,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.849624060150376,
78
+ "eval_f1": 0.8267427049559981,
79
+ "eval_loss": 0.34423500299453735,
80
+ "eval_precision": 0.8158466596088483,
81
+ "eval_recall": 0.8436079287143117,
82
+ "eval_runtime": 1.786,
83
+ "eval_samples_per_second": 223.404,
84
+ "eval_steps_per_second": 27.996,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 0.4756224453449249,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2855,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.849624060150376,
97
+ "eval_f1": 0.826007326007326,
98
+ "eval_loss": 0.3348763883113861,
99
+ "eval_precision": 0.8157828282828283,
100
+ "eval_recall": 0.8411074740861975,
101
+ "eval_runtime": 1.7834,
102
+ "eval_samples_per_second": 223.733,
103
+ "eval_steps_per_second": 28.037,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 1.1933702230453491,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2638,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8370927318295739,
116
+ "eval_f1": 0.8177454831659652,
117
+ "eval_loss": 0.35479673743247986,
118
+ "eval_precision": 0.805161943319838,
119
+ "eval_recall": 0.8472449536279323,
120
+ "eval_runtime": 1.7837,
121
+ "eval_samples_per_second": 223.688,
122
+ "eval_steps_per_second": 28.031,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.3131154775619507,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2397,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8646616541353384,
135
+ "eval_f1": 0.8434065934065934,
136
+ "eval_loss": 0.3253527581691742,
137
+ "eval_precision": 0.8325757575757575,
138
+ "eval_recall": 0.8592471358428806,
139
+ "eval_runtime": 1.7938,
140
+ "eval_samples_per_second": 222.438,
141
+ "eval_steps_per_second": 27.874,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 5.750446319580078,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2428,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
  "eval_accuracy": 0.8922305764411027,
154
+ "eval_f1": 0.8654532336864889,
155
+ "eval_loss": 0.2798740565776825,
156
+ "eval_precision": 0.8804269882659713,
157
+ "eval_recall": 0.8537461356610292,
158
+ "eval_runtime": 1.7849,
159
+ "eval_samples_per_second": 223.546,
160
+ "eval_steps_per_second": 28.013,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 5.672217845916748,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2229,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8721804511278195,
173
+ "eval_f1": 0.8484099018899409,
174
+ "eval_loss": 0.29030779004096985,
175
+ "eval_precision": 0.8430645161290322,
176
+ "eval_recall": 0.8545644662665939,
177
+ "eval_runtime": 1.7837,
178
+ "eval_samples_per_second": 223.689,
179
+ "eval_steps_per_second": 28.031,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 2.1360011100769043,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2144,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8972431077694235,
192
+ "eval_f1": 0.8769602202215754,
193
+ "eval_loss": 0.2583388686180115,
194
+ "eval_precision": 0.8742831541218639,
195
+ "eval_recall": 0.8797963266048372,
196
+ "eval_runtime": 1.7944,
197
+ "eval_samples_per_second": 222.362,
198
+ "eval_steps_per_second": 27.865,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 4.677872657775879,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1967,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8822055137844611,
211
+ "eval_f1": 0.8622085718274466,
212
+ "eval_loss": 0.27431806921958923,
213
+ "eval_precision": 0.8530168716042322,
214
+ "eval_recall": 0.8741589379887251,
215
+ "eval_runtime": 1.7823,
216
+ "eval_samples_per_second": 223.874,
217
+ "eval_steps_per_second": 28.054,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 2.564518451690674,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1855,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8771929824561403,
230
+ "eval_f1": 0.8563451067988272,
231
+ "eval_loss": 0.29132312536239624,
232
+ "eval_precision": 0.8473119816985988,
233
+ "eval_recall": 0.8681123840698308,
234
+ "eval_runtime": 1.7831,
235
+ "eval_samples_per_second": 223.773,
236
+ "eval_steps_per_second": 28.042,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 2.314499855041504,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1761,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.9022556390977443,
249
+ "eval_f1": 0.8786430103333984,
250
+ "eval_loss": 0.26596176624298096,
251
+ "eval_precision": 0.8913001481099878,
252
+ "eval_recall": 0.8683396981269322,
253
+ "eval_runtime": 1.7906,
254
+ "eval_samples_per_second": 222.829,
255
+ "eval_steps_per_second": 27.923,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 7.584296226501465,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1733,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8822055137844611,
268
+ "eval_f1": 0.8622085718274466,
269
+ "eval_loss": 0.28683483600616455,
270
+ "eval_precision": 0.8530168716042322,
271
+ "eval_recall": 0.8741589379887251,
272
+ "eval_runtime": 1.7857,
273
+ "eval_samples_per_second": 223.438,
274
+ "eval_steps_per_second": 28.0,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 1.435178518295288,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1582,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8847117794486216,
287
+ "eval_f1": 0.8648373983739837,
288
+ "eval_loss": 0.28010857105255127,
289
+ "eval_precision": 0.8561154177433248,
290
+ "eval_recall": 0.8759319876341153,
291
+ "eval_runtime": 1.788,
292
+ "eval_samples_per_second": 223.159,
293
+ "eval_steps_per_second": 27.965,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 1.7755215167999268,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1537,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.87468671679198,
306
+ "eval_f1": 0.8550061050061051,
307
+ "eval_loss": 0.30731528997421265,
308
+ "eval_precision": 0.8437710437710437,
309
+ "eval_recall": 0.8713402436806692,
310
+ "eval_runtime": 1.7883,
311
+ "eval_samples_per_second": 223.118,
312
+ "eval_steps_per_second": 27.96,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 3.1951849460601807,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1537,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8972431077694235,
325
+ "eval_f1": 0.8781334505389722,
326
+ "eval_loss": 0.2702249586582184,
327
+ "eval_precision": 0.872316715542522,
328
+ "eval_recall": 0.8847972358610656,
329
+ "eval_runtime": 1.7845,
330
+ "eval_samples_per_second": 223.597,
331
+ "eval_steps_per_second": 28.02,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 3.639573097229004,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1461,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8947368421052632,
344
+ "eval_f1": 0.8760282890453928,
345
+ "eval_loss": 0.29228049516677856,
346
+ "eval_precision": 0.8682260305697083,
347
+ "eval_recall": 0.8855246408437898,
348
+ "eval_runtime": 1.8077,
349
+ "eval_samples_per_second": 220.726,
350
+ "eval_steps_per_second": 27.66,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 3.177137613296509,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1449,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8947368421052632,
363
+ "eval_f1": 0.8754533563232059,
364
+ "eval_loss": 0.27906104922294617,
365
+ "eval_precision": 0.8689781021897811,
366
+ "eval_recall": 0.8830241862156756,
367
+ "eval_runtime": 1.7931,
368
+ "eval_samples_per_second": 222.52,
369
+ "eval_steps_per_second": 27.885,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.6333515048027039,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1502,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8972431077694235,
382
+ "eval_f1": 0.8781334505389722,
383
+ "eval_loss": 0.2797949016094208,
384
+ "eval_precision": 0.872316715542522,
385
+ "eval_recall": 0.8847972358610656,
386
+ "eval_runtime": 1.7912,
387
+ "eval_samples_per_second": 222.75,
388
+ "eval_steps_per_second": 27.914,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8444128359504000.0,
395
+ "train_loss": 0.24180538537072355,
396
+ "train_runtime": 620.8783,
397
+ "train_samples_per_second": 117.189,
398
+ "train_steps_per_second": 3.93
399
  }
400
  ],
401
  "logging_steps": 500,