apwic commited on
Commit
abd6558
1 Parent(s): f5ff9c6

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -2,20 +2,20 @@
2
  "accuracy": 0.9149357072205737,
3
  "epoch": 20.0,
4
  "eval_accuracy": 0.9022556390977443,
5
- "eval_f1": 0.8793019197207679,
6
- "eval_loss": 0.7049560546875,
7
- "eval_precision": 0.8893184421534936,
8
- "eval_recall": 0.8708401527550463,
9
- "eval_runtime": 4.7519,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 83.966,
12
- "eval_steps_per_second": 10.522,
13
- "f1": 0.8976980712934288,
14
- "precision": 0.8976980712934288,
15
- "recall": 0.8976980712934288,
16
- "train_loss": 0.05485985055565834,
17
- "train_runtime": 2706.9642,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 26.879,
20
- "train_steps_per_second": 0.901
21
  }
 
2
  "accuracy": 0.9149357072205737,
3
  "epoch": 20.0,
4
  "eval_accuracy": 0.9022556390977443,
5
+ "eval_f1": 0.8799463033398397,
6
+ "eval_loss": 0.803022563457489,
7
+ "eval_precision": 0.8874803397294746,
8
+ "eval_recall": 0.8733406073831607,
9
+ "eval_runtime": 1.6501,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 241.808,
12
+ "eval_steps_per_second": 30.302,
13
+ "f1": 0.8960423740972787,
14
+ "precision": 0.9029638385430347,
15
+ "recall": 0.8898853506782006,
16
+ "train_loss": 0.05501617935226589,
17
+ "train_runtime": 867.5592,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 83.867,
20
+ "train_steps_per_second": 2.812
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9022556390977443,
4
- "eval_f1": 0.8793019197207679,
5
- "eval_loss": 0.7049560546875,
6
- "eval_precision": 0.8893184421534936,
7
- "eval_recall": 0.8708401527550463,
8
- "eval_runtime": 4.7519,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 83.966,
11
- "eval_steps_per_second": 10.522
12
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9022556390977443,
4
+ "eval_f1": 0.8799463033398397,
5
+ "eval_loss": 0.803022563457489,
6
+ "eval_precision": 0.8874803397294746,
7
+ "eval_recall": 0.8733406073831607,
8
+ "eval_runtime": 1.6501,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 241.808,
11
+ "eval_steps_per_second": 30.302
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "accuracy": 0.9149357072205737,
3
- "f1": 0.8976980712934288,
4
- "precision": 0.8976980712934288,
5
- "recall": 0.8976980712934288
6
  }
 
1
  {
2
  "accuracy": 0.9149357072205737,
3
+ "f1": 0.8960423740972787,
4
+ "precision": 0.9029638385430347,
5
+ "recall": 0.8898853506782006
6
  }
predict_results.txt CHANGED
@@ -12,19 +12,19 @@ index prediction
12
  10 1
13
  11 1
14
  12 1
15
- 13 0
16
  14 1
17
  15 1
18
- 16 0
19
  17 1
20
  18 0
21
  19 1
22
  20 1
23
- 21 1
24
  22 1
25
  23 1
26
  24 1
27
- 25 1
28
  26 1
29
  27 1
30
  28 1
@@ -60,8 +60,8 @@ index prediction
60
  58 1
61
  59 1
62
  60 1
63
- 61 1
64
- 62 1
65
  63 1
66
  64 1
67
  65 1
@@ -90,7 +90,7 @@ index prediction
90
  88 1
91
  89 1
92
  90 0
93
- 91 1
94
  92 1
95
  93 1
96
  94 1
@@ -105,7 +105,7 @@ index prediction
105
  103 1
106
  104 1
107
  105 0
108
- 106 1
109
  107 1
110
  108 1
111
  109 0
@@ -127,9 +127,9 @@ index prediction
127
  125 1
128
  126 1
129
  127 1
130
- 128 1
131
- 129 1
132
- 130 1
133
  131 1
134
  132 1
135
  133 1
@@ -138,8 +138,8 @@ index prediction
138
  136 1
139
  137 1
140
  138 1
141
- 139 0
142
- 140 0
143
  141 1
144
  142 1
145
  143 1
@@ -167,7 +167,7 @@ index prediction
167
  165 1
168
  166 1
169
  167 1
170
- 168 1
171
  169 1
172
  170 1
173
  171 1
@@ -186,13 +186,13 @@ index prediction
186
  184 1
187
  185 1
188
  186 1
189
- 187 0
190
  188 1
191
  189 0
192
- 190 1
193
  191 1
194
  192 1
195
- 193 1
196
  194 1
197
  195 1
198
  196 1
@@ -217,7 +217,7 @@ index prediction
217
  215 1
218
  216 1
219
  217 0
220
- 218 1
221
  219 1
222
  220 1
223
  221 1
@@ -247,7 +247,7 @@ index prediction
247
  245 1
248
  246 1
249
  247 1
250
- 248 0
251
  249 1
252
  250 1
253
  251 1
@@ -270,27 +270,27 @@ index prediction
270
  268 1
271
  269 1
272
  270 1
273
- 271 0
274
  272 1
275
  273 1
276
  274 1
277
  275 1
278
- 276 1
279
  277 1
280
  278 1
281
  279 1
282
- 280 1
283
  281 1
284
  282 0
285
  283 1
286
- 284 1
287
  285 1
288
  286 1
289
  287 1
290
  288 1
291
  289 0
292
  290 1
293
- 291 0
294
  292 1
295
  293 1
296
  294 1
@@ -349,14 +349,14 @@ index prediction
349
  347 0
350
  348 0
351
  349 0
352
- 350 0
353
  351 0
354
  352 0
355
  353 0
356
  354 0
357
  355 0
358
  356 0
359
- 357 0
360
  358 0
361
  359 0
362
  360 0
@@ -365,7 +365,7 @@ index prediction
365
  363 0
366
  364 0
367
  365 0
368
- 366 1
369
  367 0
370
  368 0
371
  369 0
@@ -377,7 +377,7 @@ index prediction
377
  375 0
378
  376 0
379
  377 0
380
- 378 1
381
  379 0
382
  380 0
383
  381 0
@@ -407,7 +407,7 @@ index prediction
407
  405 0
408
  406 0
409
  407 0
410
- 408 1
411
  409 0
412
  410 0
413
  411 0
@@ -419,7 +419,7 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 0
423
  421 0
424
  422 0
425
  423 0
@@ -441,7 +441,7 @@ index prediction
441
  439 0
442
  440 0
443
  441 0
444
- 442 1
445
  443 0
446
  444 0
447
  445 0
@@ -463,7 +463,7 @@ index prediction
463
  461 0
464
  462 0
465
  463 0
466
- 464 1
467
  465 0
468
  466 0
469
  467 0
@@ -472,7 +472,7 @@ index prediction
472
  470 0
473
  471 0
474
  472 0
475
- 473 1
476
  474 1
477
  475 0
478
  476 1
@@ -590,13 +590,13 @@ index prediction
590
  588 0
591
  589 0
592
  590 0
593
- 591 0
594
  592 0
595
  593 0
596
  594 0
597
  595 0
598
  596 0
599
- 597 0
600
  598 0
601
  599 0
602
  600 0
@@ -653,7 +653,7 @@ index prediction
653
  651 0
654
  652 0
655
  653 0
656
- 654 0
657
  655 0
658
  656 0
659
  657 0
@@ -665,7 +665,7 @@ index prediction
665
  663 0
666
  664 0
667
  665 0
668
- 666 1
669
  667 0
670
  668 0
671
  669 0
@@ -700,7 +700,7 @@ index prediction
700
  698 0
701
  699 0
702
  700 0
703
- 701 1
704
  702 0
705
  703 0
706
  704 0
@@ -776,7 +776,7 @@ index prediction
776
  774 0
777
  775 0
778
  776 0
779
- 777 1
780
  778 0
781
  779 0
782
  780 0
@@ -787,7 +787,7 @@ index prediction
787
  785 0
788
  786 0
789
  787 0
790
- 788 1
791
  789 0
792
  790 0
793
  791 0
@@ -797,11 +797,11 @@ index prediction
797
  795 1
798
  796 0
799
  797 0
800
- 798 1
801
  799 0
802
  800 0
803
  801 0
804
- 802 1
805
  803 0
806
  804 0
807
  805 0
@@ -827,7 +827,7 @@ index prediction
827
  825 0
828
  826 0
829
  827 0
830
- 828 1
831
  829 0
832
  830 0
833
  831 1
@@ -874,7 +874,7 @@ index prediction
874
  872 0
875
  873 0
876
  874 0
877
- 875 0
878
  876 0
879
  877 0
880
  878 0
@@ -885,7 +885,7 @@ index prediction
885
  883 0
886
  884 0
887
  885 0
888
- 886 0
889
  887 0
890
  888 0
891
  889 0
@@ -911,7 +911,7 @@ index prediction
911
  909 0
912
  910 0
913
  911 0
914
- 912 1
915
  913 1
916
  914 0
917
  915 0
@@ -940,7 +940,7 @@ index prediction
940
  938 0
941
  939 0
942
  940 0
943
- 941 1
944
  942 0
945
  943 0
946
  944 0
@@ -1003,7 +1003,7 @@ index prediction
1003
  1001 0
1004
  1002 0
1005
  1003 0
1006
- 1004 1
1007
  1005 0
1008
  1006 0
1009
  1007 0
 
12
  10 1
13
  11 1
14
  12 1
15
+ 13 1
16
  14 1
17
  15 1
18
+ 16 1
19
  17 1
20
  18 0
21
  19 1
22
  20 1
23
+ 21 0
24
  22 1
25
  23 1
26
  24 1
27
+ 25 0
28
  26 1
29
  27 1
30
  28 1
 
60
  58 1
61
  59 1
62
  60 1
63
+ 61 0
64
+ 62 0
65
  63 1
66
  64 1
67
  65 1
 
90
  88 1
91
  89 1
92
  90 0
93
+ 91 0
94
  92 1
95
  93 1
96
  94 1
 
105
  103 1
106
  104 1
107
  105 0
108
+ 106 0
109
  107 1
110
  108 1
111
  109 0
 
127
  125 1
128
  126 1
129
  127 1
130
+ 128 0
131
+ 129 0
132
+ 130 0
133
  131 1
134
  132 1
135
  133 1
 
138
  136 1
139
  137 1
140
  138 1
141
+ 139 1
142
+ 140 1
143
  141 1
144
  142 1
145
  143 1
 
167
  165 1
168
  166 1
169
  167 1
170
+ 168 0
171
  169 1
172
  170 1
173
  171 1
 
186
  184 1
187
  185 1
188
  186 1
189
+ 187 1
190
  188 1
191
  189 0
192
+ 190 0
193
  191 1
194
  192 1
195
+ 193 0
196
  194 1
197
  195 1
198
  196 1
 
217
  215 1
218
  216 1
219
  217 0
220
+ 218 0
221
  219 1
222
  220 1
223
  221 1
 
247
  245 1
248
  246 1
249
  247 1
250
+ 248 1
251
  249 1
252
  250 1
253
  251 1
 
270
  268 1
271
  269 1
272
  270 1
273
+ 271 1
274
  272 1
275
  273 1
276
  274 1
277
  275 1
278
+ 276 0
279
  277 1
280
  278 1
281
  279 1
282
+ 280 0
283
  281 1
284
  282 0
285
  283 1
286
+ 284 0
287
  285 1
288
  286 1
289
  287 1
290
  288 1
291
  289 0
292
  290 1
293
+ 291 1
294
  292 1
295
  293 1
296
  294 1
 
349
  347 0
350
  348 0
351
  349 0
352
+ 350 1
353
  351 0
354
  352 0
355
  353 0
356
  354 0
357
  355 0
358
  356 0
359
+ 357 1
360
  358 0
361
  359 0
362
  360 0
 
365
  363 0
366
  364 0
367
  365 0
368
+ 366 0
369
  367 0
370
  368 0
371
  369 0
 
377
  375 0
378
  376 0
379
  377 0
380
+ 378 0
381
  379 0
382
  380 0
383
  381 0
 
407
  405 0
408
  406 0
409
  407 0
410
+ 408 0
411
  409 0
412
  410 0
413
  411 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 1
423
  421 0
424
  422 0
425
  423 0
 
441
  439 0
442
  440 0
443
  441 0
444
+ 442 0
445
  443 0
446
  444 0
447
  445 0
 
463
  461 0
464
  462 0
465
  463 0
466
+ 464 0
467
  465 0
468
  466 0
469
  467 0
 
472
  470 0
473
  471 0
474
  472 0
475
+ 473 0
476
  474 1
477
  475 0
478
  476 1
 
590
  588 0
591
  589 0
592
  590 0
593
+ 591 1
594
  592 0
595
  593 0
596
  594 0
597
  595 0
598
  596 0
599
+ 597 1
600
  598 0
601
  599 0
602
  600 0
 
653
  651 0
654
  652 0
655
  653 0
656
+ 654 1
657
  655 0
658
  656 0
659
  657 0
 
665
  663 0
666
  664 0
667
  665 0
668
+ 666 0
669
  667 0
670
  668 0
671
  669 0
 
700
  698 0
701
  699 0
702
  700 0
703
+ 701 0
704
  702 0
705
  703 0
706
  704 0
 
776
  774 0
777
  775 0
778
  776 0
779
+ 777 0
780
  778 0
781
  779 0
782
  780 0
 
787
  785 0
788
  786 0
789
  787 0
790
+ 788 0
791
  789 0
792
  790 0
793
  791 0
 
797
  795 1
798
  796 0
799
  797 0
800
+ 798 0
801
  799 0
802
  800 0
803
  801 0
804
+ 802 0
805
  803 0
806
  804 0
807
  805 0
 
827
  825 0
828
  826 0
829
  827 0
830
+ 828 0
831
  829 0
832
  830 0
833
  831 1
 
874
  872 0
875
  873 0
876
  874 0
877
+ 875 1
878
  876 0
879
  877 0
880
  878 0
 
885
  883 0
886
  884 0
887
  885 0
888
+ 886 1
889
  887 0
890
  888 0
891
  889 0
 
911
  909 0
912
  910 0
913
  911 0
914
+ 912 0
915
  913 1
916
  914 0
917
  915 0
 
940
  938 0
941
  939 0
942
  940 0
943
+ 941 0
944
  942 0
945
  943 0
946
  944 0
 
1003
  1001 0
1004
  1002 0
1005
  1003 0
1006
+ 1004 0
1007
  1005 0
1008
  1006 0
1009
  1007 0
runs/Jun03_10-28-14_a358b85c7679/events.out.tfevents.1717411391.a358b85c7679.30286.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7c81ad822ad450bc8ed62964362077ff8695d1b3a2d53cddc0f6d031b25630c
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.05485985055565834,
4
- "train_runtime": 2706.9642,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 26.879,
7
- "train_steps_per_second": 0.901
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.05501617935226589,
4
+ "train_runtime": 867.5592,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 83.867,
7
+ "train_steps_per_second": 2.812
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 10.745368957519531,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.3971,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8822055137844611,
21
- "eval_f1": 0.851257564821498,
22
- "eval_loss": 0.27043774724006653,
23
- "eval_precision": 0.8714882943143812,
24
- "eval_recall": 0.8366521185670122,
25
- "eval_runtime": 4.9491,
26
- "eval_samples_per_second": 80.621,
27
- "eval_steps_per_second": 10.103,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 7.182548999786377,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.214,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.9197994987468672,
40
- "eval_f1": 0.9032551372976905,
41
- "eval_loss": 0.23814059793949127,
42
- "eval_precision": 0.9032551372976905,
43
- "eval_recall": 0.9032551372976905,
44
- "eval_runtime": 4.9784,
45
- "eval_samples_per_second": 80.146,
46
- "eval_steps_per_second": 10.043,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 93.81620025634766,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.1301,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
  "eval_accuracy": 0.8796992481203008,
59
- "eval_f1": 0.8439374185136896,
60
- "eval_loss": 0.38320210576057434,
61
- "eval_precision": 0.8794955044955045,
62
- "eval_recall": 0.822376795781051,
63
- "eval_runtime": 5.0811,
64
- "eval_samples_per_second": 78.526,
65
- "eval_steps_per_second": 9.84,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 0.22968685626983643,
71
  "learning_rate": 4e-05,
72
- "loss": 0.0904,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8947368421052632,
78
- "eval_f1": 0.8782051282051282,
79
- "eval_loss": 0.3873072564601898,
80
- "eval_precision": 0.8661616161616161,
81
- "eval_recall": 0.8955264593562466,
82
- "eval_runtime": 4.9496,
83
- "eval_samples_per_second": 80.613,
84
- "eval_steps_per_second": 10.102,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.057639382779598236,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.0587,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.9147869674185464,
97
- "eval_f1": 0.8956140350877193,
98
- "eval_loss": 0.4033690392971039,
99
- "eval_precision": 0.9022147147147147,
100
- "eval_recall": 0.8897072194944535,
101
- "eval_runtime": 4.9468,
102
- "eval_samples_per_second": 80.659,
103
- "eval_steps_per_second": 10.108,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 0.12450123578310013,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.0496,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8922305764411027,
116
- "eval_f1": 0.8755702215614461,
117
- "eval_loss": 0.5248555541038513,
118
- "eval_precision": 0.8632157235517781,
119
- "eval_recall": 0.8937534097108566,
120
- "eval_runtime": 4.9627,
121
- "eval_samples_per_second": 80.399,
122
- "eval_steps_per_second": 10.075,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.029339170083403587,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.0362,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.9072681704260651,
135
- "eval_f1": 0.8842272010790718,
136
- "eval_loss": 0.533008873462677,
137
- "eval_precision": 0.8999348109517601,
138
- "eval_recall": 0.8718857974177123,
139
- "eval_runtime": 4.956,
140
- "eval_samples_per_second": 80.508,
141
- "eval_steps_per_second": 10.089,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 0.012251710519194603,
147
  "learning_rate": 3e-05,
148
- "loss": 0.0223,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8972431077694235,
154
- "eval_f1": 0.8769602202215754,
155
- "eval_loss": 0.6168325543403625,
156
- "eval_precision": 0.8742831541218639,
157
- "eval_recall": 0.8797963266048372,
158
- "eval_runtime": 4.9435,
159
- "eval_samples_per_second": 80.712,
160
- "eval_steps_per_second": 10.114,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 0.027291299775242805,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.0191,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8947368421052632,
173
- "eval_f1": 0.8674628282189181,
174
- "eval_loss": 0.7103565335273743,
175
- "eval_precision": 0.8869858462356303,
176
- "eval_recall": 0.8530187306783051,
177
- "eval_runtime": 4.9624,
178
- "eval_samples_per_second": 80.405,
179
- "eval_steps_per_second": 10.076,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 0.26321402192115784,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.0147,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
  "eval_accuracy": 0.8972431077694235,
192
- "eval_f1": 0.8775533117267087,
193
- "eval_loss": 0.6125138998031616,
194
- "eval_precision": 0.873246730188791,
195
- "eval_recall": 0.8822967812329514,
196
- "eval_runtime": 4.95,
197
- "eval_samples_per_second": 80.606,
198
- "eval_steps_per_second": 10.101,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.00528654595836997,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.0121,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.9047619047619048,
211
- "eval_f1": 0.888343937787204,
212
- "eval_loss": 0.6882844567298889,
213
- "eval_precision": 0.8790267011197244,
214
- "eval_recall": 0.9001182033096926,
215
- "eval_runtime": 4.9822,
216
- "eval_samples_per_second": 80.085,
217
- "eval_steps_per_second": 10.036,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 0.007998102344572544,
223
  "learning_rate": 2e-05,
224
- "loss": 0.0118,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
  "eval_accuracy": 0.8972431077694235,
230
- "eval_f1": 0.8781334505389722,
231
- "eval_loss": 0.6760030388832092,
232
- "eval_precision": 0.872316715542522,
233
- "eval_recall": 0.8847972358610656,
234
- "eval_runtime": 4.9668,
235
- "eval_samples_per_second": 80.334,
236
- "eval_steps_per_second": 10.067,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.002160990610718727,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.0034,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.9047619047619048,
249
- "eval_f1": 0.8851154755410074,
250
- "eval_loss": 0.7163070440292358,
251
- "eval_precision": 0.8851154755410074,
252
- "eval_recall": 0.8851154755410074,
253
- "eval_runtime": 4.9658,
254
- "eval_samples_per_second": 80.349,
255
- "eval_steps_per_second": 10.069,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 0.09897086024284363,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.0064,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
  "eval_accuracy": 0.9072681704260651,
268
  "eval_f1": 0.8884169154604891,
269
- "eval_loss": 0.7180221080780029,
270
  "eval_precision": 0.8874630556728391,
271
  "eval_recall": 0.8893889798145117,
272
- "eval_runtime": 4.9417,
273
- "eval_samples_per_second": 80.741,
274
- "eval_steps_per_second": 10.118,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.004853605292737484,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.008,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9047619047619048,
287
- "eval_f1": 0.8807643913180245,
288
- "eval_loss": 0.6914934515953064,
289
- "eval_precision": 0.8978286538966151,
290
- "eval_recall": 0.8676122931442081,
291
- "eval_runtime": 4.9615,
292
- "eval_samples_per_second": 80.42,
293
- "eval_steps_per_second": 10.078,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.0016086915275081992,
299
  "learning_rate": 1e-05,
300
- "loss": 0.0052,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.9072681704260651,
306
- "eval_f1": 0.8884169154604891,
307
- "eval_loss": 0.6778000593185425,
308
- "eval_precision": 0.8874630556728391,
309
- "eval_recall": 0.8893889798145117,
310
- "eval_runtime": 4.9627,
311
- "eval_samples_per_second": 80.4,
312
- "eval_steps_per_second": 10.075,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.0021244632080197334,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.0066,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.9072681704260651,
325
- "eval_f1": 0.8884169154604891,
326
- "eval_loss": 0.6992811560630798,
327
- "eval_precision": 0.8874630556728391,
328
- "eval_recall": 0.8893889798145117,
329
- "eval_runtime": 4.9649,
330
- "eval_samples_per_second": 80.364,
331
- "eval_steps_per_second": 10.071,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 0.0017945035360753536,
337
  "learning_rate": 5e-06,
338
- "loss": 0.0053,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.9047619047619048,
344
- "eval_f1": 0.8814283033033032,
345
- "eval_loss": 0.6966382265090942,
346
- "eval_precision": 0.895578231292517,
347
- "eval_recall": 0.8701127477723223,
348
- "eval_runtime": 4.9631,
349
- "eval_samples_per_second": 80.393,
350
- "eval_steps_per_second": 10.074,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.0012101498432457447,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.0022,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.899749373433584,
363
- "eval_f1": 0.8765393898137261,
364
- "eval_loss": 0.7112175822257996,
365
- "eval_precision": 0.8852261942423283,
366
- "eval_recall": 0.8690671031096563,
367
- "eval_runtime": 4.9613,
368
- "eval_samples_per_second": 80.422,
369
- "eval_steps_per_second": 10.078,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.009784560650587082,
375
  "learning_rate": 0.0,
376
- "loss": 0.0042,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
  "eval_accuracy": 0.9022556390977443,
382
- "eval_f1": 0.8793019197207679,
383
- "eval_loss": 0.7049560546875,
384
- "eval_precision": 0.8893184421534936,
385
- "eval_recall": 0.8708401527550463,
386
- "eval_runtime": 4.9243,
387
- "eval_samples_per_second": 81.027,
388
- "eval_steps_per_second": 10.154,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7584162436176000.0,
395
- "train_loss": 0.05485985055565834,
396
- "train_runtime": 2706.9642,
397
- "train_samples_per_second": 26.879,
398
- "train_steps_per_second": 0.901
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 8.300418853759766,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.4092,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8521303258145363,
21
+ "eval_f1": 0.7892045810686176,
22
+ "eval_loss": 0.3456897437572479,
23
+ "eval_precision": 0.8929804104477612,
24
+ "eval_recall": 0.7553646117475905,
25
+ "eval_runtime": 1.6637,
26
+ "eval_samples_per_second": 239.829,
27
+ "eval_steps_per_second": 30.054,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 9.286004066467285,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.2282,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8922305764411027,
40
+ "eval_f1": 0.8676331036823873,
41
+ "eval_loss": 0.258427232503891,
42
+ "eval_precision": 0.8749292230261088,
43
+ "eval_recall": 0.8612474995453718,
44
+ "eval_runtime": 1.6592,
45
+ "eval_samples_per_second": 240.478,
46
+ "eval_steps_per_second": 30.135,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 13.956122398376465,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.138,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
  "eval_accuracy": 0.8796992481203008,
59
+ "eval_f1": 0.8429546050905274,
60
+ "eval_loss": 0.44172462821006775,
61
+ "eval_precision": 0.882529902138456,
62
+ "eval_recall": 0.8198763411529368,
63
+ "eval_runtime": 1.6567,
64
+ "eval_samples_per_second": 240.84,
65
+ "eval_steps_per_second": 30.18,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 0.09012622386217117,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.0837,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.9022556390977443,
78
+ "eval_f1": 0.8793019197207679,
79
+ "eval_loss": 0.4037090241909027,
80
+ "eval_precision": 0.8893184421534936,
81
+ "eval_recall": 0.8708401527550463,
82
+ "eval_runtime": 1.6737,
83
+ "eval_samples_per_second": 238.388,
84
+ "eval_steps_per_second": 29.873,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 5.347772121429443,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.0426,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.9047619047619048,
97
+ "eval_f1": 0.8873149414352814,
98
+ "eval_loss": 0.5462044477462769,
99
+ "eval_precision": 0.8806277372262774,
100
+ "eval_recall": 0.8951172940534643,
101
+ "eval_runtime": 1.6751,
102
+ "eval_samples_per_second": 238.188,
103
+ "eval_steps_per_second": 29.848,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 0.011684279888868332,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.0502,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8897243107769424,
116
+ "eval_f1": 0.8707140332272888,
117
+ "eval_loss": 0.5626452565193176,
118
+ "eval_precision": 0.8618432385874246,
119
+ "eval_recall": 0.8819785415530097,
120
+ "eval_runtime": 1.6815,
121
+ "eval_samples_per_second": 237.294,
122
+ "eval_steps_per_second": 29.736,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.015737071633338928,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.0242,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.9072681704260651,
135
+ "eval_f1": 0.8848664457009163,
136
+ "eval_loss": 0.6241247653961182,
137
+ "eval_precision": 0.8977236138837015,
138
+ "eval_recall": 0.8743862520458265,
139
+ "eval_runtime": 1.6765,
140
+ "eval_samples_per_second": 238.002,
141
+ "eval_steps_per_second": 29.825,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 0.004997015465050936,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.0217,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8872180451127819,
154
+ "eval_f1": 0.8692251105268142,
155
+ "eval_loss": 0.7096332907676697,
156
+ "eval_precision": 0.8579132638693325,
157
+ "eval_recall": 0.885206401163848,
158
+ "eval_runtime": 1.6742,
159
+ "eval_samples_per_second": 238.324,
160
+ "eval_steps_per_second": 29.865,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 0.004219838418066502,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.0229,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.9122807017543859,
173
+ "eval_f1": 0.8954723392788977,
174
+ "eval_loss": 0.611499547958374,
175
+ "eval_precision": 0.8909569746108776,
176
+ "eval_recall": 0.9004364429896345,
177
+ "eval_runtime": 1.66,
178
+ "eval_samples_per_second": 240.365,
179
+ "eval_steps_per_second": 30.121,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 0.006534805987030268,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.0109,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
  "eval_accuracy": 0.8972431077694235,
192
+ "eval_f1": 0.8744522298370696,
193
+ "eval_loss": 0.7575166821479797,
194
+ "eval_precision": 0.8795731707317074,
195
+ "eval_recall": 0.8697945080923805,
196
+ "eval_runtime": 1.6781,
197
+ "eval_samples_per_second": 237.769,
198
+ "eval_steps_per_second": 29.796,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 0.00184684619307518,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.0068,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.9072681704260651,
211
+ "eval_f1": 0.8861029031685659,
212
+ "eval_loss": 0.7536790370941162,
213
+ "eval_precision": 0.8937558980811576,
214
+ "eval_recall": 0.879387161302055,
215
+ "eval_runtime": 1.6764,
216
+ "eval_samples_per_second": 238.006,
217
+ "eval_steps_per_second": 29.825,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 0.01189060416072607,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.0131,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
  "eval_accuracy": 0.8972431077694235,
230
+ "eval_f1": 0.8775533117267087,
231
+ "eval_loss": 0.7247006297111511,
232
+ "eval_precision": 0.873246730188791,
233
+ "eval_recall": 0.8822967812329514,
234
+ "eval_runtime": 1.6752,
235
+ "eval_samples_per_second": 238.178,
236
+ "eval_steps_per_second": 29.847,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.0015333497431129217,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.0101,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8972431077694235,
249
+ "eval_f1": 0.8763538792940554,
250
+ "eval_loss": 0.7927835583686829,
251
+ "eval_precision": 0.8754297605404427,
252
+ "eval_recall": 0.877295871976723,
253
+ "eval_runtime": 1.6839,
254
+ "eval_samples_per_second": 236.944,
255
+ "eval_steps_per_second": 29.692,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 0.0037907068617641926,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.0061,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
  "eval_accuracy": 0.9072681704260651,
268
  "eval_f1": 0.8884169154604891,
269
+ "eval_loss": 0.784883975982666,
270
  "eval_precision": 0.8874630556728391,
271
  "eval_recall": 0.8893889798145117,
272
+ "eval_runtime": 1.6745,
273
+ "eval_samples_per_second": 238.284,
274
+ "eval_steps_per_second": 29.86,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.0031544596422463655,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.0135,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8972431077694235,
287
+ "eval_f1": 0.8731122745782431,
288
+ "eval_loss": 0.781574010848999,
289
+ "eval_precision": 0.8829705994654449,
290
+ "eval_recall": 0.864793598836152,
291
+ "eval_runtime": 1.6721,
292
+ "eval_samples_per_second": 238.624,
293
+ "eval_steps_per_second": 29.903,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.0015803646529093385,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.0081,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8972431077694235,
306
+ "eval_f1": 0.8757339815412664,
307
+ "eval_loss": 0.7727145552635193,
308
+ "eval_precision": 0.8766906299500427,
309
+ "eval_recall": 0.8747954173486088,
310
+ "eval_runtime": 1.6771,
311
+ "eval_samples_per_second": 237.909,
312
+ "eval_steps_per_second": 29.813,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.00223415601067245,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.0027,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8972431077694235,
325
+ "eval_f1": 0.8763538792940554,
326
+ "eval_loss": 0.81281578540802,
327
+ "eval_precision": 0.8754297605404427,
328
+ "eval_recall": 0.877295871976723,
329
+ "eval_runtime": 1.6614,
330
+ "eval_samples_per_second": 240.156,
331
+ "eval_steps_per_second": 30.095,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.0023393542505800724,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.0041,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.9022556390977443,
344
+ "eval_f1": 0.8817957385392532,
345
+ "eval_loss": 0.8081415891647339,
346
+ "eval_precision": 0.8827677592299257,
347
+ "eval_recall": 0.8808419712675032,
348
+ "eval_runtime": 1.6587,
349
+ "eval_samples_per_second": 240.553,
350
+ "eval_steps_per_second": 30.145,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.0010473760776221752,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.0018,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.9022556390977443,
363
+ "eval_f1": 0.8793019197207679,
364
+ "eval_loss": 0.8038576245307922,
365
+ "eval_precision": 0.8893184421534936,
366
+ "eval_recall": 0.8708401527550463,
367
+ "eval_runtime": 1.6566,
368
+ "eval_samples_per_second": 240.849,
369
+ "eval_steps_per_second": 30.182,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.0015798051608726382,
375
  "learning_rate": 0.0,
376
+ "loss": 0.0025,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
  "eval_accuracy": 0.9022556390977443,
382
+ "eval_f1": 0.8799463033398397,
383
+ "eval_loss": 0.803022563457489,
384
+ "eval_precision": 0.8874803397294746,
385
+ "eval_recall": 0.8733406073831607,
386
+ "eval_runtime": 1.6611,
387
+ "eval_samples_per_second": 240.198,
388
+ "eval_steps_per_second": 30.1,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7584162436176000.0,
395
+ "train_loss": 0.05501617935226589,
396
+ "train_runtime": 867.5592,
397
+ "train_samples_per_second": 83.867,
398
+ "train_steps_per_second": 2.812
399
  }
400
  ],
401
  "logging_steps": 500,