apwic commited on
Commit
7c83ae7
1 Parent(s): 99e24bb

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "accuracy": 0.9203187250996016,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8822055137844611,
5
- "eval_f1": 0.858259325044405,
6
- "eval_loss": 0.30716511607170105,
7
- "eval_precision": 0.8573798178418481,
8
- "eval_recall": 0.8591562102200401,
9
- "eval_runtime": 5.0211,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.464,
12
- "eval_steps_per_second": 9.958,
13
- "f1": 0.9039946451196481,
14
- "precision": 0.9032008703618873,
15
- "recall": 0.9048002299511353,
16
- "train_loss": 0.22629446436147221,
17
- "train_runtime": 1956.0503,
18
  "train_samples": 3645,
19
- "train_samples_per_second": 37.269,
20
- "train_steps_per_second": 1.247
21
  }
 
1
  {
2
  "accuracy": 0.9203187250996016,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8796992481203008,
5
+ "eval_f1": 0.8563025210084034,
6
+ "eval_loss": 0.31680676341056824,
7
+ "eval_precision": 0.8529936381473334,
8
+ "eval_recall": 0.8598836152027641,
9
+ "eval_runtime": 1.8096,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 220.495,
12
+ "eval_steps_per_second": 27.631,
13
+ "f1": 0.9045572941422515,
14
+ "precision": 0.901511292239769,
15
+ "recall": 0.9077895947111239,
16
+ "train_loss": 0.2318932650519199,
17
+ "train_runtime": 643.2373,
18
  "train_samples": 3645,
19
+ "train_samples_per_second": 113.333,
20
+ "train_steps_per_second": 3.793
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8822055137844611,
4
- "eval_f1": 0.858259325044405,
5
- "eval_loss": 0.30716511607170105,
6
- "eval_precision": 0.8573798178418481,
7
- "eval_recall": 0.8591562102200401,
8
- "eval_runtime": 5.0211,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.464,
11
- "eval_steps_per_second": 9.958
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8796992481203008,
4
+ "eval_f1": 0.8563025210084034,
5
+ "eval_loss": 0.31680676341056824,
6
+ "eval_precision": 0.8529936381473334,
7
+ "eval_recall": 0.8598836152027641,
8
+ "eval_runtime": 1.8096,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 220.495,
11
+ "eval_steps_per_second": 27.631
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "accuracy": 0.9203187250996016,
3
- "f1": 0.9039946451196481,
4
- "precision": 0.9032008703618873,
5
- "recall": 0.9048002299511353
6
  }
 
1
  {
2
  "accuracy": 0.9203187250996016,
3
+ "f1": 0.9045572941422515,
4
+ "precision": 0.901511292239769,
5
+ "recall": 0.9077895947111239
6
  }
predict_results.txt CHANGED
@@ -18,7 +18,7 @@ index prediction
18
  16 1
19
  17 1
20
  18 1
21
- 19 0
22
  20 1
23
  21 0
24
  22 1
@@ -39,9 +39,9 @@ index prediction
39
  37 1
40
  38 0
41
  39 1
42
- 40 0
43
  41 1
44
- 42 1
45
  43 1
46
  44 1
47
  45 1
@@ -55,7 +55,7 @@ index prediction
55
  53 1
56
  54 1
57
  55 1
58
- 56 1
59
  57 1
60
  58 1
61
  59 1
@@ -70,17 +70,17 @@ index prediction
70
  68 0
71
  69 1
72
  70 1
73
- 71 1
74
  72 1
75
  73 1
76
  74 1
77
- 75 1
78
  76 1
79
  77 1
80
  78 0
81
  79 1
82
  80 1
83
- 81 1
84
  82 1
85
  83 1
86
  84 1
@@ -99,7 +99,7 @@ index prediction
99
  97 1
100
  98 1
101
  99 0
102
- 100 0
103
  101 1
104
  102 1
105
  103 1
@@ -113,7 +113,7 @@ index prediction
113
  111 1
114
  112 1
115
  113 1
116
- 114 0
117
  115 1
118
  116 1
119
  117 1
@@ -169,7 +169,7 @@ index prediction
169
  167 1
170
  168 1
171
  169 1
172
- 170 0
173
  171 1
174
  172 1
175
  173 1
@@ -215,7 +215,7 @@ index prediction
215
  213 1
216
  214 1
217
  215 1
218
- 216 0
219
  217 1
220
  218 1
221
  219 1
@@ -258,8 +258,8 @@ index prediction
258
  256 1
259
  257 1
260
  258 1
261
- 259 0
262
- 260 1
263
  261 1
264
  262 1
265
  263 1
@@ -286,11 +286,11 @@ index prediction
286
  284 1
287
  285 1
288
  286 1
289
- 287 0
290
  288 1
291
  289 0
292
  290 1
293
- 291 0
294
  292 1
295
  293 1
296
  294 0
@@ -359,7 +359,7 @@ index prediction
359
  357 0
360
  358 0
361
  359 0
362
- 360 0
363
  361 0
364
  362 0
365
  363 0
@@ -372,8 +372,8 @@ index prediction
372
  370 0
373
  371 0
374
  372 0
375
- 373 0
376
- 374 0
377
  375 0
378
  376 0
379
  377 0
@@ -383,7 +383,7 @@ index prediction
383
  381 0
384
  382 0
385
  383 0
386
- 384 1
387
  385 1
388
  386 0
389
  387 0
@@ -418,7 +418,7 @@ index prediction
418
  416 0
419
  417 0
420
  418 0
421
- 419 1
422
  420 0
423
  421 0
424
  422 0
@@ -436,7 +436,7 @@ index prediction
436
  434 0
437
  435 0
438
  436 0
439
- 437 0
440
  438 0
441
  439 0
442
  440 0
@@ -485,7 +485,7 @@ index prediction
485
  483 0
486
  484 0
487
  485 0
488
- 486 1
489
  487 0
490
  488 0
491
  489 0
@@ -536,7 +536,7 @@ index prediction
536
  534 0
537
  535 0
538
  536 0
539
- 537 1
540
  538 0
541
  539 0
542
  540 0
@@ -569,7 +569,7 @@ index prediction
569
  567 0
570
  568 0
571
  569 0
572
- 570 1
573
  571 0
574
  572 0
575
  573 0
@@ -585,13 +585,13 @@ index prediction
585
  583 0
586
  584 0
587
  585 0
588
- 586 0
589
  587 0
590
  588 0
591
  589 0
592
  590 0
593
  591 0
594
- 592 0
595
  593 0
596
  594 0
597
  595 0
@@ -626,7 +626,7 @@ index prediction
626
  624 0
627
  625 0
628
  626 0
629
- 627 0
630
  628 0
631
  629 0
632
  630 0
@@ -656,7 +656,7 @@ index prediction
656
  654 0
657
  655 0
658
  656 0
659
- 657 1
660
  658 1
661
  659 0
662
  660 0
@@ -672,7 +672,7 @@ index prediction
672
  670 0
673
  671 0
674
  672 0
675
- 673 0
676
  674 0
677
  675 0
678
  676 0
@@ -681,7 +681,7 @@ index prediction
681
  679 0
682
  680 0
683
  681 0
684
- 682 1
685
  683 0
686
  684 0
687
  685 0
@@ -716,9 +716,9 @@ index prediction
716
  714 0
717
  715 0
718
  716 0
719
- 717 0
720
  718 0
721
- 719 0
722
  720 0
723
  721 0
724
  722 0
@@ -739,7 +739,7 @@ index prediction
739
  737 0
740
  738 0
741
  739 0
742
- 740 0
743
  741 0
744
  742 0
745
  743 0
@@ -753,7 +753,7 @@ index prediction
753
  751 0
754
  752 0
755
  753 0
756
- 754 0
757
  755 0
758
  756 0
759
  757 0
@@ -773,7 +773,7 @@ index prediction
773
  771 0
774
  772 0
775
  773 0
776
- 774 1
777
  775 0
778
  776 0
779
  777 0
@@ -811,7 +811,7 @@ index prediction
811
  809 0
812
  810 0
813
  811 0
814
- 812 1
815
  813 0
816
  814 0
817
  815 0
@@ -824,7 +824,7 @@ index prediction
824
  822 0
825
  823 0
826
  824 0
827
- 825 0
828
  826 0
829
  827 0
830
  828 0
@@ -851,7 +851,7 @@ index prediction
851
  849 0
852
  850 0
853
  851 0
854
- 852 1
855
  853 0
856
  854 0
857
  855 0
 
18
  16 1
19
  17 1
20
  18 1
21
+ 19 1
22
  20 1
23
  21 0
24
  22 1
 
39
  37 1
40
  38 0
41
  39 1
42
+ 40 1
43
  41 1
44
+ 42 0
45
  43 1
46
  44 1
47
  45 1
 
55
  53 1
56
  54 1
57
  55 1
58
+ 56 0
59
  57 1
60
  58 1
61
  59 1
 
70
  68 0
71
  69 1
72
  70 1
73
+ 71 0
74
  72 1
75
  73 1
76
  74 1
77
+ 75 0
78
  76 1
79
  77 1
80
  78 0
81
  79 1
82
  80 1
83
+ 81 0
84
  82 1
85
  83 1
86
  84 1
 
99
  97 1
100
  98 1
101
  99 0
102
+ 100 1
103
  101 1
104
  102 1
105
  103 1
 
113
  111 1
114
  112 1
115
  113 1
116
+ 114 1
117
  115 1
118
  116 1
119
  117 1
 
169
  167 1
170
  168 1
171
  169 1
172
+ 170 1
173
  171 1
174
  172 1
175
  173 1
 
215
  213 1
216
  214 1
217
  215 1
218
+ 216 1
219
  217 1
220
  218 1
221
  219 1
 
258
  256 1
259
  257 1
260
  258 1
261
+ 259 1
262
+ 260 0
263
  261 1
264
  262 1
265
  263 1
 
286
  284 1
287
  285 1
288
  286 1
289
+ 287 1
290
  288 1
291
  289 0
292
  290 1
293
+ 291 1
294
  292 1
295
  293 1
296
  294 0
 
359
  357 0
360
  358 0
361
  359 0
362
+ 360 1
363
  361 0
364
  362 0
365
  363 0
 
372
  370 0
373
  371 0
374
  372 0
375
+ 373 1
376
+ 374 1
377
  375 0
378
  376 0
379
  377 0
 
383
  381 0
384
  382 0
385
  383 0
386
+ 384 0
387
  385 1
388
  386 0
389
  387 0
 
418
  416 0
419
  417 0
420
  418 0
421
+ 419 0
422
  420 0
423
  421 0
424
  422 0
 
436
  434 0
437
  435 0
438
  436 0
439
+ 437 1
440
  438 0
441
  439 0
442
  440 0
 
485
  483 0
486
  484 0
487
  485 0
488
+ 486 0
489
  487 0
490
  488 0
491
  489 0
 
536
  534 0
537
  535 0
538
  536 0
539
+ 537 0
540
  538 0
541
  539 0
542
  540 0
 
569
  567 0
570
  568 0
571
  569 0
572
+ 570 0
573
  571 0
574
  572 0
575
  573 0
 
585
  583 0
586
  584 0
587
  585 0
588
+ 586 1
589
  587 0
590
  588 0
591
  589 0
592
  590 0
593
  591 0
594
+ 592 1
595
  593 0
596
  594 0
597
  595 0
 
626
  624 0
627
  625 0
628
  626 0
629
+ 627 1
630
  628 0
631
  629 0
632
  630 0
 
656
  654 0
657
  655 0
658
  656 0
659
+ 657 0
660
  658 1
661
  659 0
662
  660 0
 
672
  670 0
673
  671 0
674
  672 0
675
+ 673 1
676
  674 0
677
  675 0
678
  676 0
 
681
  679 0
682
  680 0
683
  681 0
684
+ 682 0
685
  683 0
686
  684 0
687
  685 0
 
716
  714 0
717
  715 0
718
  716 0
719
+ 717 1
720
  718 0
721
+ 719 1
722
  720 0
723
  721 0
724
  722 0
 
739
  737 0
740
  738 0
741
  739 0
742
+ 740 1
743
  741 0
744
  742 0
745
  743 0
 
753
  751 0
754
  752 0
755
  753 0
756
+ 754 1
757
  755 0
758
  756 0
759
  757 0
 
773
  771 0
774
  772 0
775
  773 0
776
+ 774 0
777
  775 0
778
  776 0
779
  777 0
 
811
  809 0
812
  810 0
813
  811 0
814
+ 812 0
815
  813 0
816
  814 0
817
  815 0
 
824
  822 0
825
  823 0
826
  824 0
827
+ 825 1
828
  826 0
829
  827 0
830
  828 0
 
851
  849 0
852
  850 0
853
  851 0
854
+ 852 0
855
  853 0
856
  854 0
857
  855 0
runs/Jun03_15-23-46_a358b85c7679/events.out.tfevents.1717428885.a358b85c7679.170314.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:230b1beb8a7077d9c3359fe928ab04829edae8c1bf07d6d7eb58b5ad4d3c671e
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.22629446436147221,
4
- "train_runtime": 1956.0503,
5
  "train_samples": 3645,
6
- "train_samples_per_second": 37.269,
7
- "train_steps_per_second": 1.247
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2318932650519199,
4
+ "train_runtime": 643.2373,
5
  "train_samples": 3645,
6
+ "train_samples_per_second": 113.333,
7
+ "train_steps_per_second": 3.793
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 3.569611072540283,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5438,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7218045112781954,
21
- "eval_f1": 0.6545993371027491,
22
- "eval_loss": 0.49882158637046814,
23
- "eval_precision": 0.6600553802562947,
24
- "eval_recall": 0.6506637570467357,
25
- "eval_runtime": 5.2428,
26
- "eval_samples_per_second": 76.104,
27
- "eval_steps_per_second": 9.537,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.2452878952026367,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4428,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8446115288220551,
40
- "eval_f1": 0.8161454307628278,
41
- "eval_loss": 0.3788329064846039,
42
- "eval_precision": 0.8107299270072992,
43
- "eval_recall": 0.8225586470267321,
44
- "eval_runtime": 5.1661,
45
- "eval_samples_per_second": 77.234,
46
- "eval_steps_per_second": 9.679,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 3.797173500061035,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3441,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8596491228070176,
59
- "eval_f1": 0.8179269882659713,
60
- "eval_loss": 0.3289283514022827,
61
- "eval_precision": 0.8510239760239761,
62
- "eval_recall": 0.7981905801054737,
63
- "eval_runtime": 5.1072,
64
- "eval_samples_per_second": 78.125,
65
- "eval_steps_per_second": 9.79,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.9981327056884766,
71
  "learning_rate": 4e-05,
72
- "loss": 0.2986,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8796992481203008,
78
- "eval_f1": 0.8533986527862829,
79
- "eval_loss": 0.28838610649108887,
80
- "eval_precision": 0.8572003218020917,
81
- "eval_recall": 0.8498817966903074,
82
- "eval_runtime": 5.1209,
83
- "eval_samples_per_second": 77.916,
84
- "eval_steps_per_second": 9.764,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 1.763756513595581,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2667,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8771929824561403,
97
- "eval_f1": 0.850729517396184,
98
- "eval_loss": 0.26981133222579956,
99
- "eval_precision": 0.8535087719298247,
100
- "eval_recall": 0.8481087470449173,
101
- "eval_runtime": 5.1232,
102
- "eval_samples_per_second": 77.881,
103
- "eval_steps_per_second": 9.759,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 2.7370102405548096,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2524,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8847117794486216,
116
- "eval_f1": 0.8609292598654301,
117
- "eval_loss": 0.27233511209487915,
118
- "eval_precision": 0.8609292598654301,
119
- "eval_recall": 0.8609292598654301,
120
- "eval_runtime": 5.1586,
121
- "eval_samples_per_second": 77.347,
122
- "eval_steps_per_second": 9.693,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 1.2413272857666016,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2343,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8646616541353384,
135
- "eval_f1": 0.8265664241097301,
136
- "eval_loss": 0.3179858326911926,
137
- "eval_precision": 0.8532894736842105,
138
- "eval_recall": 0.8092380432805966,
139
- "eval_runtime": 5.1498,
140
- "eval_samples_per_second": 77.479,
141
- "eval_steps_per_second": 9.709,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 3.469871997833252,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2212,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8822055137844611,
154
- "eval_f1": 0.852937255424767,
155
- "eval_loss": 0.29489362239837646,
156
- "eval_precision": 0.8674217731421121,
157
- "eval_recall": 0.8416530278232406,
158
- "eval_runtime": 5.155,
159
- "eval_samples_per_second": 77.4,
160
- "eval_steps_per_second": 9.699,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 4.085997581481934,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2142,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
  "eval_accuracy": 0.8847117794486216,
173
- "eval_f1": 0.8564658408408408,
174
- "eval_loss": 0.2828481197357178,
175
- "eval_precision": 0.8697278911564625,
176
- "eval_recall": 0.8459265320967448,
177
- "eval_runtime": 5.1873,
178
- "eval_samples_per_second": 76.919,
179
- "eval_steps_per_second": 9.639,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 3.2397539615631104,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.1958,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8696741854636592,
192
- "eval_f1": 0.8457993935430168,
193
- "eval_loss": 0.28871509432792664,
194
- "eval_precision": 0.8398540145985401,
195
- "eval_recall": 0.8527914166212038,
196
- "eval_runtime": 5.1373,
197
- "eval_samples_per_second": 77.667,
198
- "eval_steps_per_second": 9.733,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 2.4002835750579834,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.1855,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8822055137844611,
211
- "eval_f1": 0.8602993213495533,
212
- "eval_loss": 0.2867955267429352,
213
- "eval_precision": 0.8547653958944281,
214
- "eval_recall": 0.8666575741043827,
215
- "eval_runtime": 5.1317,
216
- "eval_samples_per_second": 77.752,
217
- "eval_steps_per_second": 9.743,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 1.633034348487854,
223
  "learning_rate": 2e-05,
224
- "loss": 0.1742,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.87468671679198,
230
- "eval_f1": 0.8448388501742161,
231
- "eval_loss": 0.29811105132102966,
232
- "eval_precision": 0.8551721930610677,
233
- "eval_recall": 0.8363338788870704,
234
- "eval_runtime": 5.1494,
235
- "eval_samples_per_second": 77.484,
236
- "eval_steps_per_second": 9.71,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 2.338294506072998,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1601,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.8796992481203008,
249
- "eval_f1": 0.8556004584112431,
250
- "eval_loss": 0.29304954409599304,
251
- "eval_precision": 0.8538865546218487,
252
- "eval_recall": 0.85738316057465,
253
- "eval_runtime": 5.0982,
254
- "eval_samples_per_second": 78.263,
255
- "eval_steps_per_second": 9.807,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 1.3382197618484497,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1602,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8796992481203008,
268
- "eval_f1": 0.8589607635206786,
269
- "eval_loss": 0.29793980717658997,
270
- "eval_precision": 0.8503875968992248,
271
- "eval_recall": 0.8698854337152209,
272
- "eval_runtime": 5.1508,
273
- "eval_samples_per_second": 77.464,
274
- "eval_steps_per_second": 9.707,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.5800030827522278,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1497,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8872180451127819,
287
- "eval_f1": 0.8662440310793597,
288
- "eval_loss": 0.29690659046173096,
289
- "eval_precision": 0.8606158357771261,
290
- "eval_recall": 0.872704128023277,
291
- "eval_runtime": 5.1461,
292
- "eval_samples_per_second": 77.534,
293
- "eval_steps_per_second": 9.716,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 6.755856037139893,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1447,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8847117794486216,
306
- "eval_f1": 0.8616171059774413,
307
- "eval_loss": 0.29627636075019836,
308
- "eval_precision": 0.859873949579832,
309
- "eval_recall": 0.8634297144935443,
310
- "eval_runtime": 5.1639,
311
- "eval_samples_per_second": 77.267,
312
- "eval_steps_per_second": 9.683,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 2.1538662910461426,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1394,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8822055137844611,
325
- "eval_f1": 0.8589543987905864,
326
- "eval_loss": 0.30176377296447754,
327
- "eval_precision": 0.8564068100358423,
328
- "eval_recall": 0.8616566648481543,
329
- "eval_runtime": 5.17,
330
- "eval_samples_per_second": 77.176,
331
- "eval_steps_per_second": 9.671,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 2.54630708694458,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1333,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8822055137844611,
344
- "eval_f1": 0.858259325044405,
345
- "eval_loss": 0.30650317668914795,
346
- "eval_precision": 0.8573798178418481,
347
- "eval_recall": 0.8591562102200401,
348
- "eval_runtime": 5.128,
349
- "eval_samples_per_second": 77.807,
350
- "eval_steps_per_second": 9.75,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.7696042656898499,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.1406,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8822055137844611,
363
- "eval_f1": 0.858259325044405,
364
- "eval_loss": 0.30623340606689453,
365
- "eval_precision": 0.8573798178418481,
366
- "eval_recall": 0.8591562102200401,
367
- "eval_runtime": 5.1122,
368
- "eval_samples_per_second": 78.049,
369
- "eval_steps_per_second": 9.781,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 3.025254487991333,
375
  "learning_rate": 0.0,
376
- "loss": 0.1243,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8822055137844611,
382
- "eval_f1": 0.858259325044405,
383
- "eval_loss": 0.30716511607170105,
384
- "eval_precision": 0.8573798178418481,
385
- "eval_recall": 0.8591562102200401,
386
- "eval_runtime": 5.0928,
387
- "eval_samples_per_second": 78.347,
388
- "eval_steps_per_second": 9.818,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8460375995160000.0,
395
- "train_loss": 0.22629446436147221,
396
- "train_runtime": 1956.0503,
397
- "train_samples_per_second": 37.269,
398
- "train_steps_per_second": 1.247
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 3.639183521270752,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5413,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7243107769423559,
21
+ "eval_f1": 0.6445578231292517,
22
+ "eval_loss": 0.5006802678108215,
23
+ "eval_precision": 0.6593400801180687,
24
+ "eval_recall": 0.6374340789234406,
25
+ "eval_runtime": 1.8003,
26
+ "eval_samples_per_second": 221.628,
27
+ "eval_steps_per_second": 27.773,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.4983344078063965,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.4584,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8295739348370927,
40
+ "eval_f1": 0.7760942760942761,
41
+ "eval_loss": 0.3855762183666229,
42
+ "eval_precision": 0.8122789566755084,
43
+ "eval_recall": 0.7569103473358793,
44
+ "eval_runtime": 1.8373,
45
+ "eval_samples_per_second": 217.166,
46
+ "eval_steps_per_second": 27.214,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 5.97755765914917,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3559,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8571428571428571,
59
+ "eval_f1": 0.8079089841803424,
60
+ "eval_loss": 0.3407064378261566,
61
+ "eval_precision": 0.8638322884012539,
62
+ "eval_recall": 0.7814148026913984,
63
+ "eval_runtime": 1.8397,
64
+ "eval_samples_per_second": 216.877,
65
+ "eval_steps_per_second": 27.178,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.0363476276397705,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.2961,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8696741854636592,
78
+ "eval_f1": 0.8419946387230413,
79
+ "eval_loss": 0.3088829517364502,
80
+ "eval_precision": 0.8437691365584814,
81
+ "eval_recall": 0.8402891434806329,
82
+ "eval_runtime": 1.8466,
83
+ "eval_samples_per_second": 216.069,
84
+ "eval_steps_per_second": 27.076,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 2.3318538665771484,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.276,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8621553884711779,
97
+ "eval_f1": 0.8365204824303285,
98
+ "eval_loss": 0.29173794388771057,
99
+ "eval_precision": 0.8313636363636363,
100
+ "eval_recall": 0.8424713584288053,
101
+ "eval_runtime": 1.8414,
102
+ "eval_samples_per_second": 216.677,
103
+ "eval_steps_per_second": 27.153,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 2.3067033290863037,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2555,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8696741854636592,
116
+ "eval_f1": 0.842789598108747,
117
+ "eval_loss": 0.29054704308509827,
118
+ "eval_precision": 0.842789598108747,
119
+ "eval_recall": 0.842789598108747,
120
+ "eval_runtime": 1.847,
121
+ "eval_samples_per_second": 216.025,
122
+ "eval_steps_per_second": 27.071,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.5437530279159546,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2427,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8771929824561403,
135
+ "eval_f1": 0.84402249790578,
136
+ "eval_loss": 0.30313801765441895,
137
+ "eval_precision": 0.8669909824394875,
138
+ "eval_recall": 0.8281051100200036,
139
+ "eval_runtime": 1.8479,
140
+ "eval_samples_per_second": 215.922,
141
+ "eval_steps_per_second": 27.058,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 3.033709764480591,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2219,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8771929824561403,
154
+ "eval_f1": 0.8522278069611882,
155
+ "eval_loss": 0.2907596528530121,
156
+ "eval_precision": 0.8513631702756499,
157
+ "eval_recall": 0.8531096563011457,
158
+ "eval_runtime": 1.8468,
159
+ "eval_samples_per_second": 216.053,
160
+ "eval_steps_per_second": 27.074,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 2.670888900756836,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2158,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
  "eval_accuracy": 0.8847117794486216,
173
+ "eval_f1": 0.8539996181748759,
174
+ "eval_loss": 0.3083769381046295,
175
+ "eval_precision": 0.8759595959595959,
176
+ "eval_recall": 0.8384251682124022,
177
+ "eval_runtime": 1.856,
178
+ "eval_samples_per_second": 214.979,
179
+ "eval_steps_per_second": 26.94,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 3.6058239936828613,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.2,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.87468671679198,
192
+ "eval_f1": 0.8517301860990547,
193
+ "eval_loss": 0.29381993412971497,
194
+ "eval_precision": 0.8456788321167883,
195
+ "eval_recall": 0.8588379705400981,
196
+ "eval_runtime": 1.8468,
197
+ "eval_samples_per_second": 216.045,
198
+ "eval_steps_per_second": 27.073,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 3.181007146835327,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1885,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8771929824561403,
211
+ "eval_f1": 0.8514869535493182,
212
+ "eval_loss": 0.2976568639278412,
213
+ "eval_precision": 0.8523821128305106,
214
+ "eval_recall": 0.8506092016730314,
215
+ "eval_runtime": 1.8451,
216
+ "eval_samples_per_second": 216.253,
217
+ "eval_steps_per_second": 27.099,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 3.1322133541107178,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.183,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8847117794486216,
230
+ "eval_f1": 0.8556621579112929,
231
+ "eval_loss": 0.30698344111442566,
232
+ "eval_precision": 0.871654421411703,
233
+ "eval_recall": 0.8434260774686306,
234
+ "eval_runtime": 1.8533,
235
+ "eval_samples_per_second": 215.295,
236
+ "eval_steps_per_second": 26.979,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.6321003437042236,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1752,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
  "eval_accuracy": 0.8796992481203008,
249
+ "eval_f1": 0.8569892473118279,
250
+ "eval_loss": 0.29585033655166626,
251
+ "eval_precision": 0.8522004241781549,
252
+ "eval_recall": 0.8623840698308783,
253
+ "eval_runtime": 1.846,
254
+ "eval_samples_per_second": 216.146,
255
+ "eval_steps_per_second": 27.086,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 3.546229600906372,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1558,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.87468671679198,
268
+ "eval_f1": 0.8530841286673736,
269
+ "eval_loss": 0.3040328025817871,
270
+ "eval_precision": 0.8446597760551249,
271
+ "eval_recall": 0.8638388797963266,
272
+ "eval_runtime": 1.8508,
273
+ "eval_samples_per_second": 215.586,
274
+ "eval_steps_per_second": 27.016,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.5455009937286377,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1538,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8721804511278195,
287
+ "eval_f1": 0.8484099018899409,
288
+ "eval_loss": 0.30823931097984314,
289
+ "eval_precision": 0.8430645161290322,
290
+ "eval_recall": 0.8545644662665939,
291
+ "eval_runtime": 1.848,
292
+ "eval_samples_per_second": 215.904,
293
+ "eval_steps_per_second": 27.056,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 2.4319658279418945,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.152,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8771929824561403,
306
+ "eval_f1": 0.8483536940081443,
307
+ "eval_loss": 0.30997762084007263,
308
+ "eval_precision": 0.8575792287132493,
309
+ "eval_recall": 0.8406073831605747,
310
+ "eval_runtime": 1.8455,
311
+ "eval_samples_per_second": 216.202,
312
+ "eval_steps_per_second": 27.093,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 3.420119285583496,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1436,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.87468671679198,
325
+ "eval_f1": 0.8510304659498208,
326
+ "eval_loss": 0.31050172448158264,
327
+ "eval_precision": 0.8463237893248498,
328
+ "eval_recall": 0.8563375159119839,
329
+ "eval_runtime": 1.8478,
330
+ "eval_samples_per_second": 215.931,
331
+ "eval_steps_per_second": 27.059,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.7008257508277893,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1426,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.87468671679198,
344
+ "eval_f1": 0.8503151260504201,
345
+ "eval_loss": 0.3118613362312317,
346
+ "eval_precision": 0.8470628455912955,
347
+ "eval_recall": 0.8538370612838698,
348
+ "eval_runtime": 1.8481,
349
+ "eval_samples_per_second": 215.903,
350
+ "eval_steps_per_second": 27.056,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 2.8680572509765625,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1398,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8796992481203008,
363
+ "eval_f1": 0.8569892473118279,
364
+ "eval_loss": 0.316354900598526,
365
+ "eval_precision": 0.8522004241781549,
366
+ "eval_recall": 0.8623840698308783,
367
+ "eval_runtime": 1.8465,
368
+ "eval_samples_per_second": 216.08,
369
+ "eval_steps_per_second": 27.078,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.442784547805786,
375
  "learning_rate": 0.0,
376
+ "loss": 0.14,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8796992481203008,
382
+ "eval_f1": 0.8563025210084034,
383
+ "eval_loss": 0.31680676341056824,
384
+ "eval_precision": 0.8529936381473334,
385
+ "eval_recall": 0.8598836152027641,
386
+ "eval_runtime": 1.8511,
387
+ "eval_samples_per_second": 215.549,
388
+ "eval_steps_per_second": 27.011,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8460375995160000.0,
395
+ "train_loss": 0.2318932650519199,
396
+ "train_runtime": 643.2373,
397
+ "train_samples_per_second": 113.333,
398
+ "train_steps_per_second": 3.793
399
  }
400
  ],
401
  "logging_steps": 500,