apwic commited on
Commit
bde61b1
1 Parent(s): 7f7f816

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9173306772908366,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8847117794486216,
5
- "eval_f1": 0.8609292598654301,
6
- "eval_loss": 0.2950780689716339,
7
- "eval_precision": 0.8609292598654301,
8
- "eval_recall": 0.8609292598654301,
9
- "eval_runtime": 4.9799,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 80.122,
12
- "eval_steps_per_second": 10.04,
13
- "f1": 0.8996936595347718,
14
- "precision": 0.9017567207878627,
15
- "recall": 0.8977052792948165,
16
- "train_loss": 0.2363556080177182,
17
- "train_runtime": 1909.167,
18
  "train_samples": 3645,
19
- "train_samples_per_second": 38.184,
20
- "train_steps_per_second": 1.278
21
  }
 
1
  {
2
+ "accuracy": 0.9183266932270916,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8822055137844611,
5
+ "eval_f1": 0.855319904024935,
6
+ "eval_loss": 0.3026413023471832,
7
+ "eval_precision": 0.862378106322743,
8
+ "eval_recall": 0.8491543917075832,
9
+ "eval_runtime": 1.8078,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 220.713,
12
+ "eval_steps_per_second": 27.658,
13
+ "f1": 0.9005988602337487,
14
+ "precision": 0.9039776379609248,
15
+ "recall": 0.897413049726933,
16
+ "train_loss": 0.24652244458433056,
17
+ "train_runtime": 623.7969,
18
  "train_samples": 3645,
19
+ "train_samples_per_second": 116.865,
20
+ "train_steps_per_second": 3.912
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8847117794486216,
4
- "eval_f1": 0.8609292598654301,
5
- "eval_loss": 0.2950780689716339,
6
- "eval_precision": 0.8609292598654301,
7
- "eval_recall": 0.8609292598654301,
8
- "eval_runtime": 4.9799,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 80.122,
11
- "eval_steps_per_second": 10.04
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8822055137844611,
4
+ "eval_f1": 0.855319904024935,
5
+ "eval_loss": 0.3026413023471832,
6
+ "eval_precision": 0.862378106322743,
7
+ "eval_recall": 0.8491543917075832,
8
+ "eval_runtime": 1.8078,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 220.713,
11
+ "eval_steps_per_second": 27.658
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9173306772908366,
3
- "f1": 0.8996936595347718,
4
- "precision": 0.9017567207878627,
5
- "recall": 0.8977052792948165
6
  }
 
1
  {
2
+ "accuracy": 0.9183266932270916,
3
+ "f1": 0.9005988602337487,
4
+ "precision": 0.9039776379609248,
5
+ "recall": 0.897413049726933
6
  }
predict_results.txt CHANGED
@@ -13,8 +13,8 @@ index prediction
13
  11 1
14
  12 0
15
  13 1
16
- 14 1
17
- 15 1
18
  16 0
19
  17 1
20
  18 1
@@ -37,16 +37,16 @@ index prediction
37
  35 1
38
  36 1
39
  37 1
40
- 38 1
41
  39 0
42
  40 0
43
  41 1
44
- 42 1
45
  43 1
46
  44 1
47
  45 1
48
  46 1
49
- 47 1
50
  48 0
51
  49 1
52
  50 1
@@ -57,7 +57,7 @@ index prediction
57
  55 1
58
  56 1
59
  57 1
60
- 58 0
61
  59 1
62
  60 1
63
  61 1
@@ -85,7 +85,7 @@ index prediction
85
  83 1
86
  84 1
87
  85 1
88
- 86 0
89
  87 1
90
  88 1
91
  89 1
@@ -113,14 +113,14 @@ index prediction
113
  111 1
114
  112 1
115
  113 1
116
- 114 1
117
  115 1
118
  116 1
119
  117 1
120
  118 1
121
  119 1
122
  120 1
123
- 121 0
124
  122 1
125
  123 1
126
  124 1
@@ -152,11 +152,11 @@ index prediction
152
  150 1
153
  151 1
154
  152 1
155
- 153 1
156
  154 1
157
  155 1
158
  156 1
159
- 157 1
160
  158 1
161
  159 1
162
  160 1
@@ -192,7 +192,7 @@ index prediction
192
  190 0
193
  191 1
194
  192 1
195
- 193 0
196
  194 1
197
  195 1
198
  196 1
@@ -207,7 +207,7 @@ index prediction
207
  205 1
208
  206 1
209
  207 0
210
- 208 1
211
  209 1
212
  210 1
213
  211 1
@@ -220,11 +220,11 @@ index prediction
220
  218 1
221
  219 1
222
  220 1
223
- 221 1
224
  222 1
225
  223 1
226
  224 1
227
- 225 0
228
  226 1
229
  227 1
230
  228 1
@@ -245,7 +245,7 @@ index prediction
245
  243 1
246
  244 1
247
  245 1
248
- 246 0
249
  247 1
250
  248 1
251
  249 1
@@ -258,7 +258,7 @@ index prediction
258
  256 1
259
  257 1
260
  258 1
261
- 259 0
262
  260 0
263
  261 1
264
  262 1
@@ -271,7 +271,7 @@ index prediction
271
  269 0
272
  270 1
273
  271 1
274
- 272 0
275
  273 1
276
  274 1
277
  275 1
@@ -288,7 +288,7 @@ index prediction
288
  286 1
289
  287 0
290
  288 1
291
- 289 0
292
  290 1
293
  291 0
294
  292 1
@@ -300,7 +300,7 @@ index prediction
300
  298 0
301
  299 0
302
  300 0
303
- 301 1
304
  302 0
305
  303 0
306
  304 0
@@ -356,10 +356,10 @@ index prediction
356
  354 0
357
  355 0
358
  356 0
359
- 357 0
360
  358 0
361
  359 0
362
- 360 1
363
  361 0
364
  362 0
365
  363 0
@@ -396,7 +396,7 @@ index prediction
396
  394 0
397
  395 0
398
  396 1
399
- 397 1
400
  398 0
401
  399 0
402
  400 0
@@ -423,7 +423,7 @@ index prediction
423
  421 0
424
  422 0
425
  423 0
426
- 424 0
427
  425 0
428
  426 0
429
  427 0
@@ -436,7 +436,7 @@ index prediction
436
  434 0
437
  435 0
438
  436 0
439
- 437 1
440
  438 0
441
  439 0
442
  440 0
@@ -536,7 +536,7 @@ index prediction
536
  534 0
537
  535 0
538
  536 0
539
- 537 0
540
  538 0
541
  539 0
542
  540 0
@@ -558,7 +558,7 @@ index prediction
558
  556 0
559
  557 0
560
  558 0
561
- 559 1
562
  560 0
563
  561 0
564
  562 0
@@ -610,7 +610,7 @@ index prediction
610
  608 0
611
  609 0
612
  610 0
613
- 611 0
614
  612 0
615
  613 0
616
  614 0
@@ -626,7 +626,7 @@ index prediction
626
  624 0
627
  625 0
628
  626 0
629
- 627 1
630
  628 0
631
  629 0
632
  630 0
@@ -656,7 +656,7 @@ index prediction
656
  654 0
657
  655 0
658
  656 0
659
- 657 1
660
  658 1
661
  659 0
662
  660 0
@@ -716,7 +716,7 @@ index prediction
716
  714 0
717
  715 0
718
  716 0
719
- 717 1
720
  718 0
721
  719 0
722
  720 0
@@ -753,7 +753,7 @@ index prediction
753
  751 0
754
  752 0
755
  753 0
756
- 754 1
757
  755 0
758
  756 0
759
  757 0
@@ -762,7 +762,7 @@ index prediction
762
  760 0
763
  761 0
764
  762 0
765
- 763 0
766
  764 0
767
  765 0
768
  766 0
@@ -772,13 +772,13 @@ index prediction
772
  770 0
773
  771 0
774
  772 0
775
- 773 1
776
- 774 0
777
  775 0
778
  776 0
779
  777 0
780
  778 0
781
- 779 1
782
  780 0
783
  781 0
784
  782 0
@@ -792,11 +792,11 @@ index prediction
792
  790 0
793
  791 0
794
  792 0
795
- 793 0
796
  794 0
797
  795 0
798
  796 0
799
- 797 0
800
  798 1
801
  799 0
802
  800 0
@@ -851,7 +851,7 @@ index prediction
851
  849 0
852
  850 0
853
  851 0
854
- 852 0
855
  853 0
856
  854 0
857
  855 0
 
13
  11 1
14
  12 0
15
  13 1
16
+ 14 0
17
+ 15 0
18
  16 0
19
  17 1
20
  18 1
 
37
  35 1
38
  36 1
39
  37 1
40
+ 38 0
41
  39 0
42
  40 0
43
  41 1
44
+ 42 0
45
  43 1
46
  44 1
47
  45 1
48
  46 1
49
+ 47 0
50
  48 0
51
  49 1
52
  50 1
 
57
  55 1
58
  56 1
59
  57 1
60
+ 58 1
61
  59 1
62
  60 1
63
  61 1
 
85
  83 1
86
  84 1
87
  85 1
88
+ 86 1
89
  87 1
90
  88 1
91
  89 1
 
113
  111 1
114
  112 1
115
  113 1
116
+ 114 0
117
  115 1
118
  116 1
119
  117 1
120
  118 1
121
  119 1
122
  120 1
123
+ 121 1
124
  122 1
125
  123 1
126
  124 1
 
152
  150 1
153
  151 1
154
  152 1
155
+ 153 0
156
  154 1
157
  155 1
158
  156 1
159
+ 157 0
160
  158 1
161
  159 1
162
  160 1
 
192
  190 0
193
  191 1
194
  192 1
195
+ 193 1
196
  194 1
197
  195 1
198
  196 1
 
207
  205 1
208
  206 1
209
  207 0
210
+ 208 0
211
  209 1
212
  210 1
213
  211 1
 
220
  218 1
221
  219 1
222
  220 1
223
+ 221 0
224
  222 1
225
  223 1
226
  224 1
227
+ 225 1
228
  226 1
229
  227 1
230
  228 1
 
245
  243 1
246
  244 1
247
  245 1
248
+ 246 1
249
  247 1
250
  248 1
251
  249 1
 
258
  256 1
259
  257 1
260
  258 1
261
+ 259 1
262
  260 0
263
  261 1
264
  262 1
 
271
  269 0
272
  270 1
273
  271 1
274
+ 272 1
275
  273 1
276
  274 1
277
  275 1
 
288
  286 1
289
  287 0
290
  288 1
291
+ 289 1
292
  290 1
293
  291 0
294
  292 1
 
300
  298 0
301
  299 0
302
  300 0
303
+ 301 0
304
  302 0
305
  303 0
306
  304 0
 
356
  354 0
357
  355 0
358
  356 0
359
+ 357 1
360
  358 0
361
  359 0
362
+ 360 0
363
  361 0
364
  362 0
365
  363 0
 
396
  394 0
397
  395 0
398
  396 1
399
+ 397 0
400
  398 0
401
  399 0
402
  400 0
 
423
  421 0
424
  422 0
425
  423 0
426
+ 424 1
427
  425 0
428
  426 0
429
  427 0
 
436
  434 0
437
  435 0
438
  436 0
439
+ 437 0
440
  438 0
441
  439 0
442
  440 0
 
536
  534 0
537
  535 0
538
  536 0
539
+ 537 1
540
  538 0
541
  539 0
542
  540 0
 
558
  556 0
559
  557 0
560
  558 0
561
+ 559 0
562
  560 0
563
  561 0
564
  562 0
 
610
  608 0
611
  609 0
612
  610 0
613
+ 611 1
614
  612 0
615
  613 0
616
  614 0
 
626
  624 0
627
  625 0
628
  626 0
629
+ 627 0
630
  628 0
631
  629 0
632
  630 0
 
656
  654 0
657
  655 0
658
  656 0
659
+ 657 0
660
  658 1
661
  659 0
662
  660 0
 
716
  714 0
717
  715 0
718
  716 0
719
+ 717 0
720
  718 0
721
  719 0
722
  720 0
 
753
  751 0
754
  752 0
755
  753 0
756
+ 754 0
757
  755 0
758
  756 0
759
  757 0
 
762
  760 0
763
  761 0
764
  762 0
765
+ 763 1
766
  764 0
767
  765 0
768
  766 0
 
772
  770 0
773
  771 0
774
  772 0
775
+ 773 0
776
+ 774 1
777
  775 0
778
  776 0
779
  777 0
780
  778 0
781
+ 779 0
782
  780 0
783
  781 0
784
  782 0
 
792
  790 0
793
  791 0
794
  792 0
795
+ 793 1
796
  794 0
797
  795 0
798
  796 0
799
+ 797 1
800
  798 1
801
  799 0
802
  800 0
 
851
  849 0
852
  850 0
853
  851 0
854
+ 852 1
855
  853 0
856
  854 0
857
  855 0
runs/Jun03_15-01-46_a358b85c7679/events.out.tfevents.1717427545.a358b85c7679.158543.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9106848a02fa4c3a2a8c888b728e37234caabffdf8b01b4ebb53776969a294fa
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.2363556080177182,
4
- "train_runtime": 1909.167,
5
  "train_samples": 3645,
6
- "train_samples_per_second": 38.184,
7
- "train_steps_per_second": 1.278
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.24652244458433056,
4
+ "train_runtime": 623.7969,
5
  "train_samples": 3645,
6
+ "train_samples_per_second": 116.865,
7
+ "train_steps_per_second": 3.912
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 2.953822374343872,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5448,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7243107769423559,
21
- "eval_f1": 0.6567661411411412,
22
- "eval_loss": 0.5046952366828918,
23
- "eval_precision": 0.6629251700680272,
24
- "eval_recall": 0.6524368066921259,
25
- "eval_runtime": 5.0686,
26
- "eval_samples_per_second": 78.72,
27
- "eval_steps_per_second": 9.865,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 2.6171164512634277,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4527,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7944862155388471,
40
- "eval_f1": 0.775152564736929,
41
- "eval_loss": 0.4319760501384735,
42
- "eval_precision": 0.7667055167055168,
43
- "eval_recall": 0.8121022004000726,
44
- "eval_runtime": 5.0454,
45
- "eval_samples_per_second": 79.082,
46
- "eval_steps_per_second": 9.91,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 4.439111709594727,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3603,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8471177944862155,
59
- "eval_f1": 0.7984931903796002,
60
- "eval_loss": 0.33696243166923523,
61
- "eval_precision": 0.8393082695594026,
62
- "eval_recall": 0.7768230587379523,
63
- "eval_runtime": 5.0496,
64
- "eval_samples_per_second": 79.016,
65
- "eval_steps_per_second": 9.902,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 0.9325271248817444,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3081,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8721804511278195,
78
- "eval_f1": 0.8461962888779714,
79
- "eval_loss": 0.29947736859321594,
80
- "eval_precision": 0.8453465227094517,
81
- "eval_recall": 0.8470631023822512,
82
- "eval_runtime": 5.0527,
83
- "eval_samples_per_second": 78.967,
84
- "eval_steps_per_second": 9.896,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 1.1945856809616089,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2793,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.87468671679198,
97
- "eval_f1": 0.8456742372671576,
98
- "eval_loss": 0.3008015751838684,
99
- "eval_precision": 0.8536697247706422,
100
- "eval_recall": 0.8388343335151845,
101
- "eval_runtime": 5.0466,
102
- "eval_samples_per_second": 79.063,
103
- "eval_steps_per_second": 9.908,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 3.8788158893585205,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2526,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
  "eval_accuracy": 0.8696741854636592,
116
- "eval_f1": 0.8411818738518064,
117
- "eval_loss": 0.2986568510532379,
118
- "eval_precision": 0.8448542607834644,
119
- "eval_recall": 0.8377886888525186,
120
- "eval_runtime": 5.0571,
121
- "eval_samples_per_second": 78.899,
122
- "eval_steps_per_second": 9.887,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.2110929489135742,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2478,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8771929824561403,
135
- "eval_f1": 0.846679266293906,
136
- "eval_loss": 0.3030013144016266,
137
- "eval_precision": 0.8609191655801824,
138
- "eval_recall": 0.8356064739043463,
139
- "eval_runtime": 5.0491,
140
- "eval_samples_per_second": 79.023,
141
- "eval_steps_per_second": 9.903,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 3.5065371990203857,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2337,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8671679197994987,
154
- "eval_f1": 0.8350789627607721,
155
- "eval_loss": 0.2974022924900055,
156
- "eval_precision": 0.8463358876939919,
157
- "eval_recall": 0.8260138206946717,
158
- "eval_runtime": 5.0637,
159
- "eval_samples_per_second": 78.796,
160
- "eval_steps_per_second": 9.874,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 4.127802848815918,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.217,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8721804511278195,
173
- "eval_f1": 0.8395201930584144,
174
- "eval_loss": 0.2773597240447998,
175
- "eval_precision": 0.8562091503267973,
176
- "eval_recall": 0.8270594653573378,
177
- "eval_runtime": 5.0754,
178
- "eval_samples_per_second": 78.615,
179
- "eval_steps_per_second": 9.851,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 2.408611297607422,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.1966,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8696741854636592,
192
- "eval_f1": 0.844327731092437,
193
- "eval_loss": 0.2846048176288605,
194
- "eval_precision": 0.8411320530352577,
195
- "eval_recall": 0.8477905073649754,
196
- "eval_runtime": 5.0388,
197
- "eval_samples_per_second": 79.185,
198
- "eval_steps_per_second": 9.923,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 2.3639743328094482,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.199,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8822055137844611,
211
- "eval_f1": 0.8545433391506689,
212
- "eval_loss": 0.2909916341304779,
213
- "eval_precision": 0.8639270714012982,
214
- "eval_recall": 0.846653937079469,
215
- "eval_runtime": 5.0638,
216
- "eval_samples_per_second": 78.794,
217
- "eval_steps_per_second": 9.874,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 3.4640684127807617,
223
  "learning_rate": 2e-05,
224
- "loss": 0.187,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8771929824561403,
230
- "eval_f1": 0.846679266293906,
231
- "eval_loss": 0.2870851755142212,
232
- "eval_precision": 0.8609191655801824,
233
- "eval_recall": 0.8356064739043463,
234
- "eval_runtime": 5.0487,
235
- "eval_samples_per_second": 79.031,
236
- "eval_steps_per_second": 9.904,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 2.2104856967926025,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1812,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8796992481203008,
249
- "eval_f1": 0.8526315789473684,
250
- "eval_loss": 0.2812826931476593,
251
- "eval_precision": 0.8585304054054055,
252
- "eval_recall": 0.8473813420621932,
253
- "eval_runtime": 5.0484,
254
- "eval_samples_per_second": 79.034,
255
- "eval_steps_per_second": 9.904,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 3.044590950012207,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1633,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8822055137844611,
268
- "eval_f1": 0.8596342841745197,
269
- "eval_loss": 0.2956988215446472,
270
- "eval_precision": 0.8555364857667042,
271
- "eval_recall": 0.8641571194762684,
272
- "eval_runtime": 5.0481,
273
- "eval_samples_per_second": 79.04,
274
- "eval_steps_per_second": 9.905,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.9169403314590454,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1607,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8922305764411027,
287
- "eval_f1": 0.8696722245432793,
288
- "eval_loss": 0.28752827644348145,
289
- "eval_precision": 0.8706135006701596,
290
- "eval_recall": 0.8687488634297145,
291
- "eval_runtime": 5.0457,
292
- "eval_samples_per_second": 79.078,
293
- "eval_steps_per_second": 9.91,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 1.9245001077651978,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1584,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
  "eval_accuracy": 0.8822055137844611,
306
- "eval_f1": 0.8560793854229822,
307
- "eval_loss": 0.2859440743923187,
308
- "eval_precision": 0.8609538327526132,
309
- "eval_recall": 0.8516548463356974,
310
- "eval_runtime": 5.086,
311
- "eval_samples_per_second": 78.451,
312
- "eval_steps_per_second": 9.831,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 3.737988233566284,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1535,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8847117794486216,
325
- "eval_f1": 0.8609292598654301,
326
- "eval_loss": 0.29243767261505127,
327
- "eval_precision": 0.8609292598654301,
328
- "eval_recall": 0.8609292598654301,
329
- "eval_runtime": 5.0479,
330
- "eval_samples_per_second": 79.042,
331
- "eval_steps_per_second": 9.905,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 3.7625505924224854,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1432,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8847117794486216,
344
- "eval_f1": 0.8616171059774413,
345
- "eval_loss": 0.29657039046287537,
346
- "eval_precision": 0.859873949579832,
347
- "eval_recall": 0.8634297144935443,
348
- "eval_runtime": 5.0616,
349
- "eval_samples_per_second": 78.829,
350
- "eval_steps_per_second": 9.878,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.5611210465431213,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1466,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8822055137844611,
363
- "eval_f1": 0.8568221901555235,
364
- "eval_loss": 0.29467570781707764,
365
- "eval_precision": 0.8596491228070176,
366
- "eval_recall": 0.8541553009638116,
367
- "eval_runtime": 5.0458,
368
- "eval_samples_per_second": 79.075,
369
- "eval_steps_per_second": 9.909,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 7.14449405670166,
375
  "learning_rate": 0.0,
376
- "loss": 0.1411,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8847117794486216,
382
- "eval_f1": 0.8609292598654301,
383
- "eval_loss": 0.2950780689716339,
384
- "eval_precision": 0.8609292598654301,
385
- "eval_recall": 0.8609292598654301,
386
- "eval_runtime": 5.0549,
387
- "eval_samples_per_second": 78.933,
388
- "eval_steps_per_second": 9.891,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8460375995160000.0,
395
- "train_loss": 0.2363556080177182,
396
- "train_runtime": 1909.167,
397
- "train_samples_per_second": 38.184,
398
- "train_steps_per_second": 1.278
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 3.928847551345825,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5514,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7218045112781954,
21
+ "eval_f1": 0.6545993371027491,
22
+ "eval_loss": 0.5084339380264282,
23
+ "eval_precision": 0.6600553802562947,
24
+ "eval_recall": 0.6506637570467357,
25
+ "eval_runtime": 1.7877,
26
+ "eval_samples_per_second": 223.186,
27
+ "eval_steps_per_second": 27.968,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.589020013809204,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4753,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8170426065162907,
40
+ "eval_f1": 0.7662453352594198,
41
+ "eval_loss": 0.40073099732398987,
42
+ "eval_precision": 0.7870255775577557,
43
+ "eval_recall": 0.7530460083651573,
44
+ "eval_runtime": 1.8277,
45
+ "eval_samples_per_second": 218.302,
46
+ "eval_steps_per_second": 27.356,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 7.217240810394287,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3834,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8395989974937343,
59
+ "eval_f1": 0.7805280528052805,
60
+ "eval_loss": 0.35415172576904297,
61
+ "eval_precision": 0.8448765432098766,
62
+ "eval_recall": 0.7540007274049827,
63
+ "eval_runtime": 1.8275,
64
+ "eval_samples_per_second": 218.333,
65
+ "eval_steps_per_second": 27.36,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 1.0342143774032593,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.3188,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8621553884711779,
78
+ "eval_f1": 0.8333016825553572,
79
+ "eval_loss": 0.32140952348709106,
80
+ "eval_precision": 0.8341507249908615,
81
+ "eval_recall": 0.8324695399163484,
82
+ "eval_runtime": 1.8265,
83
+ "eval_samples_per_second": 218.445,
84
+ "eval_steps_per_second": 27.374,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 1.17711341381073,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2981,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8822055137844611,
97
+ "eval_f1": 0.855319904024935,
98
+ "eval_loss": 0.2984267771244049,
99
+ "eval_precision": 0.862378106322743,
100
+ "eval_recall": 0.8491543917075832,
101
+ "eval_runtime": 1.8288,
102
+ "eval_samples_per_second": 218.18,
103
+ "eval_steps_per_second": 27.341,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 4.153679370880127,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2835,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
  "eval_accuracy": 0.8696741854636592,
116
+ "eval_f1": 0.8368354828562441,
117
+ "eval_loss": 0.28104665875434875,
118
+ "eval_precision": 0.8520237470480189,
119
+ "eval_recall": 0.8252864157119476,
120
+ "eval_runtime": 1.8275,
121
+ "eval_samples_per_second": 218.335,
122
+ "eval_steps_per_second": 27.36,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.0657362937927246,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2517,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8872180451127819,
135
+ "eval_f1": 0.8622036668943447,
136
+ "eval_loss": 0.28660351037979126,
137
+ "eval_precision": 0.8671602787456446,
138
+ "eval_recall": 0.8577014002545917,
139
+ "eval_runtime": 1.7903,
140
+ "eval_samples_per_second": 222.862,
141
+ "eval_steps_per_second": 27.928,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 3.21960711479187,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2374,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8796992481203008,
154
+ "eval_f1": 0.8485289465359063,
155
+ "eval_loss": 0.2996794879436493,
156
+ "eval_precision": 0.8671008040401356,
157
+ "eval_recall": 0.8348790689216221,
158
+ "eval_runtime": 1.7958,
159
+ "eval_samples_per_second": 222.185,
160
+ "eval_steps_per_second": 27.843,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 4.313467025756836,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2293,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8796992481203008,
173
+ "eval_f1": 0.8518472677764712,
174
+ "eval_loss": 0.29089975357055664,
175
+ "eval_precision": 0.8599810186649794,
176
+ "eval_recall": 0.844880887434079,
177
+ "eval_runtime": 1.7949,
178
+ "eval_samples_per_second": 222.299,
179
+ "eval_steps_per_second": 27.857,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 2.5997090339660645,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2091,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8822055137844611,
192
+ "eval_f1": 0.8589543987905864,
193
+ "eval_loss": 0.29276683926582336,
194
+ "eval_precision": 0.8564068100358423,
195
+ "eval_recall": 0.8616566648481543,
196
+ "eval_runtime": 1.7944,
197
+ "eval_samples_per_second": 222.357,
198
+ "eval_steps_per_second": 27.864,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 3.3290369510650635,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.198,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8796992481203008,
211
+ "eval_f1": 0.8569892473118279,
212
+ "eval_loss": 0.28468698263168335,
213
+ "eval_precision": 0.8522004241781549,
214
+ "eval_recall": 0.8623840698308783,
215
+ "eval_runtime": 1.8002,
216
+ "eval_samples_per_second": 221.645,
217
+ "eval_steps_per_second": 27.775,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 5.568458557128906,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.1906,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.87468671679198,
230
+ "eval_f1": 0.843111041207927,
231
+ "eval_loss": 0.31200090050697327,
232
+ "eval_precision": 0.8585673051692468,
233
+ "eval_recall": 0.831332969630842,
234
+ "eval_runtime": 1.7936,
235
+ "eval_samples_per_second": 222.458,
236
+ "eval_steps_per_second": 27.877,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.5475130081176758,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1818,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8771929824561403,
249
+ "eval_f1": 0.850729517396184,
250
+ "eval_loss": 0.2906310558319092,
251
+ "eval_precision": 0.8535087719298247,
252
+ "eval_recall": 0.8481087470449173,
253
+ "eval_runtime": 1.795,
254
+ "eval_samples_per_second": 222.279,
255
+ "eval_steps_per_second": 27.854,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 5.56436014175415,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1756,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8771929824561403,
268
+ "eval_f1": 0.8514869535493182,
269
+ "eval_loss": 0.28101998567581177,
270
+ "eval_precision": 0.8523821128305106,
271
+ "eval_recall": 0.8506092016730314,
272
+ "eval_runtime": 1.7948,
273
+ "eval_samples_per_second": 222.311,
274
+ "eval_steps_per_second": 27.859,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 1.7677656412124634,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.174,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8847117794486216,
287
+ "eval_f1": 0.8595070422535211,
288
+ "eval_loss": 0.2828814685344696,
289
+ "eval_precision": 0.8633733523114054,
290
+ "eval_recall": 0.8559283506092017,
291
+ "eval_runtime": 1.7987,
292
+ "eval_samples_per_second": 221.825,
293
+ "eval_steps_per_second": 27.798,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 2.1199889183044434,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1705,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
  "eval_accuracy": 0.8822055137844611,
306
+ "eval_f1": 0.855319904024935,
307
+ "eval_loss": 0.292202889919281,
308
+ "eval_precision": 0.862378106322743,
309
+ "eval_recall": 0.8491543917075832,
310
+ "eval_runtime": 1.7972,
311
+ "eval_samples_per_second": 222.011,
312
+ "eval_steps_per_second": 27.821,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 2.521127223968506,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1509,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8822055137844611,
325
+ "eval_f1": 0.8568221901555235,
326
+ "eval_loss": 0.2991296947002411,
327
+ "eval_precision": 0.8596491228070176,
328
+ "eval_recall": 0.8541553009638116,
329
+ "eval_runtime": 1.7965,
330
+ "eval_samples_per_second": 222.104,
331
+ "eval_steps_per_second": 27.833,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 3.4539577960968018,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1549,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8822055137844611,
344
+ "eval_f1": 0.855319904024935,
345
+ "eval_loss": 0.3000391125679016,
346
+ "eval_precision": 0.862378106322743,
347
+ "eval_recall": 0.8491543917075832,
348
+ "eval_runtime": 1.7991,
349
+ "eval_samples_per_second": 221.772,
350
+ "eval_steps_per_second": 27.791,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 1.3508776426315308,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1469,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8847117794486216,
363
+ "eval_f1": 0.8609292598654301,
364
+ "eval_loss": 0.2942558228969574,
365
+ "eval_precision": 0.8609292598654301,
366
+ "eval_recall": 0.8609292598654301,
367
+ "eval_runtime": 1.8067,
368
+ "eval_samples_per_second": 220.841,
369
+ "eval_steps_per_second": 27.674,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.1654489040374756,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1493,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8822055137844611,
382
+ "eval_f1": 0.855319904024935,
383
+ "eval_loss": 0.3026413023471832,
384
+ "eval_precision": 0.862378106322743,
385
+ "eval_recall": 0.8491543917075832,
386
+ "eval_runtime": 1.7968,
387
+ "eval_samples_per_second": 222.067,
388
+ "eval_steps_per_second": 27.828,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8460375995160000.0,
395
+ "train_loss": 0.24652244458433056,
396
+ "train_runtime": 623.7969,
397
+ "train_samples_per_second": 116.865,
398
+ "train_steps_per_second": 3.912
399
  }
400
  ],
401
  "logging_steps": 500,