apwic commited on
Commit
eb7e226
1 Parent(s): a3921bb

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9109792284866469,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8972431077694235,
5
- "eval_f1": 0.8744522298370696,
6
- "eval_loss": 0.7891051769256592,
7
- "eval_precision": 0.8795731707317074,
8
- "eval_recall": 0.8697945080923805,
9
- "eval_runtime": 4.7492,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 84.015,
12
- "eval_steps_per_second": 10.528,
13
- "f1": 0.8941622310727313,
14
- "precision": 0.8899245317748838,
15
- "recall": 0.8987993825126839,
16
- "train_loss": 0.05962105130807298,
17
- "train_runtime": 2691.1784,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 27.036,
20
- "train_steps_per_second": 0.907
21
  }
 
1
  {
2
+ "accuracy": 0.9119683481701286,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.9022556390977443,
5
+ "eval_f1": 0.8799463033398397,
6
+ "eval_loss": 0.790817379951477,
7
+ "eval_precision": 0.8874803397294746,
8
+ "eval_recall": 0.8733406073831607,
9
+ "eval_runtime": 1.6569,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 240.805,
12
+ "eval_steps_per_second": 30.176,
13
+ "f1": 0.8952398693685564,
14
+ "precision": 0.8913160733549084,
15
+ "recall": 0.8995006447847737,
16
+ "train_loss": 0.0588726386183598,
17
+ "train_runtime": 864.0501,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 84.208,
20
+ "train_steps_per_second": 2.824
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8972431077694235,
4
- "eval_f1": 0.8744522298370696,
5
- "eval_loss": 0.7891051769256592,
6
- "eval_precision": 0.8795731707317074,
7
- "eval_recall": 0.8697945080923805,
8
- "eval_runtime": 4.7492,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 84.015,
11
- "eval_steps_per_second": 10.528
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.9022556390977443,
4
+ "eval_f1": 0.8799463033398397,
5
+ "eval_loss": 0.790817379951477,
6
+ "eval_precision": 0.8874803397294746,
7
+ "eval_recall": 0.8733406073831607,
8
+ "eval_runtime": 1.6569,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 240.805,
11
+ "eval_steps_per_second": 30.176
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9109792284866469,
3
- "f1": 0.8941622310727313,
4
- "precision": 0.8899245317748838,
5
- "recall": 0.8987993825126839
6
  }
 
1
  {
2
+ "accuracy": 0.9119683481701286,
3
+ "f1": 0.8952398693685564,
4
+ "precision": 0.8913160733549084,
5
+ "recall": 0.8995006447847737
6
  }
predict_results.txt CHANGED
@@ -3,7 +3,7 @@ index prediction
3
  1 1
4
  2 1
5
  3 1
6
- 4 1
7
  5 0
8
  6 1
9
  7 1
@@ -20,12 +20,12 @@ index prediction
20
  18 1
21
  19 1
22
  20 1
23
- 21 0
24
- 22 1
25
  23 1
26
  24 1
27
- 25 0
28
- 26 0
29
  27 1
30
  28 1
31
  29 1
@@ -35,7 +35,7 @@ index prediction
35
  33 1
36
  34 1
37
  35 1
38
- 36 0
39
  37 1
40
  38 1
41
  39 1
@@ -50,13 +50,13 @@ index prediction
50
  48 1
51
  49 1
52
  50 1
53
- 51 1
54
  52 1
55
  53 1
56
  54 0
57
  55 1
58
  56 1
59
- 57 1
60
  58 1
61
  59 1
62
  60 0
@@ -66,7 +66,7 @@ index prediction
66
  64 1
67
  65 1
68
  66 1
69
- 67 0
70
  68 0
71
  69 1
72
  70 1
@@ -86,7 +86,7 @@ index prediction
86
  84 1
87
  85 1
88
  86 1
89
- 87 0
90
  88 1
91
  89 1
92
  90 0
@@ -109,17 +109,17 @@ index prediction
109
  107 1
110
  108 1
111
  109 1
112
- 110 1
113
  111 1
114
  112 1
115
  113 1
116
  114 1
117
- 115 0
118
  116 1
119
  117 1
120
  118 1
121
  119 1
122
- 120 0
123
  121 0
124
  122 1
125
  123 1
@@ -130,7 +130,7 @@ index prediction
130
  128 1
131
  129 1
132
  130 0
133
- 131 1
134
  132 1
135
  133 1
136
  134 1
@@ -148,9 +148,9 @@ index prediction
148
  146 1
149
  147 1
150
  148 1
151
- 149 1
152
  150 1
153
- 151 0
154
  152 1
155
  153 1
156
  154 1
@@ -180,7 +180,7 @@ index prediction
180
  178 1
181
  179 1
182
  180 1
183
- 181 1
184
  182 1
185
  183 1
186
  184 1
@@ -239,7 +239,7 @@ index prediction
239
  237 0
240
  238 1
241
  239 1
242
- 240 0
243
  241 1
244
  242 1
245
  243 1
@@ -253,16 +253,16 @@ index prediction
253
  251 1
254
  252 1
255
  253 1
256
- 254 0
257
  255 1
258
- 256 1
259
- 257 0
260
- 258 1
261
  259 1
262
- 260 1
263
  261 1
264
  262 1
265
- 263 0
266
  264 1
267
  265 1
268
  266 1
@@ -285,10 +285,10 @@ index prediction
285
  283 1
286
  284 1
287
  285 1
288
- 286 1
289
  287 1
290
  288 1
291
- 289 1
292
  290 1
293
  291 1
294
  292 1
@@ -322,7 +322,7 @@ index prediction
322
  320 0
323
  321 0
324
  322 0
325
- 323 0
326
  324 0
327
  325 0
328
  326 0
@@ -339,9 +339,9 @@ index prediction
339
  337 0
340
  338 1
341
  339 0
342
- 340 0
343
  341 0
344
- 342 1
345
  343 0
346
  344 0
347
  345 0
@@ -368,7 +368,7 @@ index prediction
368
  366 0
369
  367 0
370
  368 0
371
- 369 0
372
  370 0
373
  371 0
374
  372 0
@@ -434,7 +434,7 @@ index prediction
434
  432 0
435
  433 0
436
  434 1
437
- 435 1
438
  436 0
439
  437 0
440
  438 0
@@ -456,7 +456,7 @@ index prediction
456
  454 0
457
  455 1
458
  456 0
459
- 457 0
460
  458 0
461
  459 0
462
  460 0
@@ -486,7 +486,7 @@ index prediction
486
  484 0
487
  485 0
488
  486 0
489
- 487 0
490
  488 0
491
  489 0
492
  490 0
@@ -497,7 +497,7 @@ index prediction
497
  495 0
498
  496 0
499
  497 0
500
- 498 0
501
  499 0
502
  500 0
503
  501 0
@@ -508,7 +508,7 @@ index prediction
508
  506 0
509
  507 0
510
  508 1
511
- 509 0
512
  510 0
513
  511 0
514
  512 0
@@ -521,8 +521,8 @@ index prediction
521
  519 0
522
  520 0
523
  521 0
524
- 522 0
525
- 523 1
526
  524 0
527
  525 0
528
  526 0
@@ -580,7 +580,7 @@ index prediction
580
  578 0
581
  579 1
582
  580 0
583
- 581 1
584
  582 1
585
  583 0
586
  584 0
@@ -590,7 +590,7 @@ index prediction
590
  588 1
591
  589 0
592
  590 0
593
- 591 1
594
  592 0
595
  593 0
596
  594 0
@@ -605,7 +605,7 @@ index prediction
605
  603 0
606
  604 1
607
  605 0
608
- 606 1
609
  607 0
610
  608 0
611
  609 0
@@ -664,7 +664,7 @@ index prediction
664
  662 0
665
  663 0
666
  664 0
667
- 665 1
668
  666 0
669
  667 1
670
  668 0
@@ -709,7 +709,7 @@ index prediction
709
  707 0
710
  708 0
711
  709 0
712
- 710 0
713
  711 0
714
  712 1
715
  713 0
@@ -723,7 +723,7 @@ index prediction
723
  721 0
724
  722 0
725
  723 0
726
- 724 1
727
  725 0
728
  726 0
729
  727 0
@@ -736,7 +736,7 @@ index prediction
736
  734 1
737
  735 0
738
  736 0
739
- 737 0
740
  738 0
741
  739 0
742
  740 0
@@ -812,7 +812,7 @@ index prediction
812
  810 0
813
  811 0
814
  812 1
815
- 813 1
816
  814 0
817
  815 0
818
  816 0
@@ -838,19 +838,19 @@ index prediction
838
  836 0
839
  837 0
840
  838 0
841
- 839 1
842
  840 0
843
  841 0
844
  842 0
845
  843 0
846
  844 0
847
  845 0
848
- 846 0
849
  847 0
850
  848 0
851
  849 0
852
- 850 0
853
- 851 0
854
  852 1
855
  853 0
856
  854 1
@@ -862,7 +862,7 @@ index prediction
862
  860 0
863
  861 0
864
  862 0
865
- 863 1
866
  864 0
867
  865 1
868
  866 0
@@ -892,13 +892,13 @@ index prediction
892
  890 0
893
  891 0
894
  892 0
895
- 893 0
896
  894 0
897
  895 0
898
  896 1
899
  897 0
900
  898 0
901
- 899 1
902
  900 0
903
  901 0
904
  902 0
@@ -916,7 +916,7 @@ index prediction
916
  914 0
917
  915 0
918
  916 0
919
- 917 1
920
  918 0
921
  919 0
922
  920 0
@@ -953,7 +953,7 @@ index prediction
953
  951 0
954
  952 0
955
  953 0
956
- 954 1
957
  955 0
958
  956 0
959
  957 0
@@ -981,7 +981,7 @@ index prediction
981
  979 0
982
  980 0
983
  981 0
984
- 982 1
985
  983 0
986
  984 0
987
  985 0
 
3
  1 1
4
  2 1
5
  3 1
6
+ 4 0
7
  5 0
8
  6 1
9
  7 1
 
20
  18 1
21
  19 1
22
  20 1
23
+ 21 1
24
+ 22 0
25
  23 1
26
  24 1
27
+ 25 1
28
+ 26 1
29
  27 1
30
  28 1
31
  29 1
 
35
  33 1
36
  34 1
37
  35 1
38
+ 36 1
39
  37 1
40
  38 1
41
  39 1
 
50
  48 1
51
  49 1
52
  50 1
53
+ 51 0
54
  52 1
55
  53 1
56
  54 0
57
  55 1
58
  56 1
59
+ 57 0
60
  58 1
61
  59 1
62
  60 0
 
66
  64 1
67
  65 1
68
  66 1
69
+ 67 1
70
  68 0
71
  69 1
72
  70 1
 
86
  84 1
87
  85 1
88
  86 1
89
+ 87 1
90
  88 1
91
  89 1
92
  90 0
 
109
  107 1
110
  108 1
111
  109 1
112
+ 110 0
113
  111 1
114
  112 1
115
  113 1
116
  114 1
117
+ 115 1
118
  116 1
119
  117 1
120
  118 1
121
  119 1
122
+ 120 1
123
  121 0
124
  122 1
125
  123 1
 
130
  128 1
131
  129 1
132
  130 0
133
+ 131 0
134
  132 1
135
  133 1
136
  134 1
 
148
  146 1
149
  147 1
150
  148 1
151
+ 149 0
152
  150 1
153
+ 151 1
154
  152 1
155
  153 1
156
  154 1
 
180
  178 1
181
  179 1
182
  180 1
183
+ 181 0
184
  182 1
185
  183 1
186
  184 1
 
239
  237 0
240
  238 1
241
  239 1
242
+ 240 1
243
  241 1
244
  242 1
245
  243 1
 
253
  251 1
254
  252 1
255
  253 1
256
+ 254 1
257
  255 1
258
+ 256 0
259
+ 257 1
260
+ 258 0
261
  259 1
262
+ 260 0
263
  261 1
264
  262 1
265
+ 263 1
266
  264 1
267
  265 1
268
  266 1
 
285
  283 1
286
  284 1
287
  285 1
288
+ 286 0
289
  287 1
290
  288 1
291
+ 289 0
292
  290 1
293
  291 1
294
  292 1
 
322
  320 0
323
  321 0
324
  322 0
325
+ 323 1
326
  324 0
327
  325 0
328
  326 0
 
339
  337 0
340
  338 1
341
  339 0
342
+ 340 1
343
  341 0
344
+ 342 0
345
  343 0
346
  344 0
347
  345 0
 
368
  366 0
369
  367 0
370
  368 0
371
+ 369 1
372
  370 0
373
  371 0
374
  372 0
 
434
  432 0
435
  433 0
436
  434 1
437
+ 435 0
438
  436 0
439
  437 0
440
  438 0
 
456
  454 0
457
  455 1
458
  456 0
459
+ 457 1
460
  458 0
461
  459 0
462
  460 0
 
486
  484 0
487
  485 0
488
  486 0
489
+ 487 1
490
  488 0
491
  489 0
492
  490 0
 
497
  495 0
498
  496 0
499
  497 0
500
+ 498 1
501
  499 0
502
  500 0
503
  501 0
 
508
  506 0
509
  507 0
510
  508 1
511
+ 509 1
512
  510 0
513
  511 0
514
  512 0
 
521
  519 0
522
  520 0
523
  521 0
524
+ 522 1
525
+ 523 0
526
  524 0
527
  525 0
528
  526 0
 
580
  578 0
581
  579 1
582
  580 0
583
+ 581 0
584
  582 1
585
  583 0
586
  584 0
 
590
  588 1
591
  589 0
592
  590 0
593
+ 591 0
594
  592 0
595
  593 0
596
  594 0
 
605
  603 0
606
  604 1
607
  605 0
608
+ 606 0
609
  607 0
610
  608 0
611
  609 0
 
664
  662 0
665
  663 0
666
  664 0
667
+ 665 0
668
  666 0
669
  667 1
670
  668 0
 
709
  707 0
710
  708 0
711
  709 0
712
+ 710 1
713
  711 0
714
  712 1
715
  713 0
 
723
  721 0
724
  722 0
725
  723 0
726
+ 724 0
727
  725 0
728
  726 0
729
  727 0
 
736
  734 1
737
  735 0
738
  736 0
739
+ 737 1
740
  738 0
741
  739 0
742
  740 0
 
812
  810 0
813
  811 0
814
  812 1
815
+ 813 0
816
  814 0
817
  815 0
818
  816 0
 
838
  836 0
839
  837 0
840
  838 0
841
+ 839 0
842
  840 0
843
  841 0
844
  842 0
845
  843 0
846
  844 0
847
  845 0
848
+ 846 1
849
  847 0
850
  848 0
851
  849 0
852
+ 850 1
853
+ 851 1
854
  852 1
855
  853 0
856
  854 1
 
862
  860 0
863
  861 0
864
  862 0
865
+ 863 0
866
  864 0
867
  865 1
868
  866 0
 
892
  890 0
893
  891 0
894
  892 0
895
+ 893 1
896
  894 0
897
  895 0
898
  896 1
899
  897 0
900
  898 0
901
+ 899 0
902
  900 0
903
  901 0
904
  902 0
 
916
  914 0
917
  915 0
918
  916 0
919
+ 917 0
920
  918 0
921
  919 0
922
  920 0
 
953
  951 0
954
  952 0
955
  953 0
956
+ 954 0
957
  955 0
958
  956 0
959
  957 0
 
981
  979 0
982
  980 0
983
  981 0
984
+ 982 0
985
  983 0
986
  984 0
987
  985 0
runs/Jun03_09-57-46_a358b85c7679/events.out.tfevents.1717409561.a358b85c7679.18986.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d48601fe52b0be1678a007861adb9d43e2e9a7da888092d086e7a47c15375e71
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.05962105130807298,
4
- "train_runtime": 2691.1784,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 27.036,
7
- "train_steps_per_second": 0.907
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.0588726386183598,
4
+ "train_runtime": 864.0501,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 84.208,
7
+ "train_steps_per_second": 2.824
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 21.05661964416504,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.411,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8721804511278195,
21
- "eval_f1": 0.8446368446368446,
22
- "eval_loss": 0.275076687335968,
23
- "eval_precision": 0.8473684210526315,
24
- "eval_recall": 0.8420621931260229,
25
- "eval_runtime": 4.9569,
26
- "eval_samples_per_second": 80.493,
27
- "eval_steps_per_second": 10.087,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 21.74665069580078,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.2264,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8872180451127819,
40
- "eval_f1": 0.8718936621074937,
41
- "eval_loss": 0.3036659359931946,
42
- "eval_precision": 0.8573529411764707,
43
- "eval_recall": 0.897708674304419,
44
- "eval_runtime": 4.9408,
45
- "eval_samples_per_second": 80.756,
46
- "eval_steps_per_second": 10.12,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 2.0689539909362793,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.1467,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8771929824561403,
59
- "eval_f1": 0.8582079268956014,
60
- "eval_loss": 0.3442274332046509,
61
- "eval_precision": 0.8464828897338403,
62
- "eval_recall": 0.8756137479541735,
63
- "eval_runtime": 4.9579,
64
- "eval_samples_per_second": 80.478,
65
- "eval_steps_per_second": 10.085,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 1.8201183080673218,
71
  "learning_rate": 4e-05,
72
- "loss": 0.0961,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.899749373433584,
78
- "eval_f1": 0.8778322106552358,
79
- "eval_loss": 0.3736657500267029,
80
- "eval_precision": 0.8818924438393465,
81
- "eval_recall": 0.8740680123658847,
82
- "eval_runtime": 4.9699,
83
- "eval_samples_per_second": 80.283,
84
- "eval_steps_per_second": 10.061,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.0896231159567833,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.0726,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.899749373433584,
97
- "eval_f1": 0.8771929824561404,
98
- "eval_loss": 0.43064403533935547,
99
- "eval_precision": 0.8834928678678678,
100
- "eval_recall": 0.8715675577377705,
101
- "eval_runtime": 4.9329,
102
- "eval_samples_per_second": 80.886,
103
- "eval_steps_per_second": 10.136,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 53.643978118896484,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.0514,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8847117794486216,
116
- "eval_f1": 0.8677208256457565,
117
- "eval_loss": 0.6448621153831482,
118
- "eval_precision": 0.8546209186496956,
119
- "eval_recall": 0.8884342607746863,
120
- "eval_runtime": 4.9504,
121
- "eval_samples_per_second": 80.6,
122
- "eval_steps_per_second": 10.1,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.03521590679883957,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.0532,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8972431077694235,
135
- "eval_f1": 0.8763538792940554,
136
- "eval_loss": 0.5595228672027588,
137
- "eval_precision": 0.8754297605404427,
138
- "eval_recall": 0.877295871976723,
139
- "eval_runtime": 4.952,
140
- "eval_samples_per_second": 80.574,
141
- "eval_steps_per_second": 10.097,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 0.004897149745374918,
147
  "learning_rate": 3e-05,
148
- "loss": 0.0274,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8872180451127819,
154
- "eval_f1": 0.8614765038536611,
155
- "eval_loss": 0.6727907657623291,
156
- "eval_precision": 0.8686536646744258,
157
- "eval_recall": 0.8552009456264775,
158
- "eval_runtime": 4.9909,
159
- "eval_samples_per_second": 79.945,
160
- "eval_steps_per_second": 10.018,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 0.02297130785882473,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.0186,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.9072681704260651,
173
- "eval_f1": 0.8848664457009163,
174
- "eval_loss": 0.6217536926269531,
175
- "eval_precision": 0.8977236138837015,
176
- "eval_recall": 0.8743862520458265,
177
- "eval_runtime": 4.9504,
178
- "eval_samples_per_second": 80.6,
179
- "eval_steps_per_second": 10.1,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 0.018355082720518112,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.0121,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8922305764411027,
192
- "eval_f1": 0.8669226294357184,
193
- "eval_loss": 0.6576113104820251,
194
- "eval_precision": 0.8766227567773959,
195
- "eval_recall": 0.8587470449172576,
196
- "eval_runtime": 4.9651,
197
- "eval_samples_per_second": 80.361,
198
- "eval_steps_per_second": 10.07,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.0026700079906731844,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.0244,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8972431077694235,
211
- "eval_f1": 0.8694882125334078,
212
- "eval_loss": 0.7506579160690308,
213
- "eval_precision": 0.8940436639772188,
214
- "eval_recall": 0.8522913256955811,
215
- "eval_runtime": 4.9691,
216
- "eval_samples_per_second": 80.296,
217
- "eval_steps_per_second": 10.062,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 0.002809323836117983,
223
  "learning_rate": 2e-05,
224
- "loss": 0.0062,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8972431077694235,
230
- "eval_f1": 0.8724195749658803,
231
- "eval_loss": 0.6859227418899536,
232
- "eval_precision": 0.8848766823362741,
233
- "eval_recall": 0.8622931442080378,
234
- "eval_runtime": 4.9578,
235
- "eval_samples_per_second": 80.48,
236
- "eval_steps_per_second": 10.085,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.004050145391374826,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.0099,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.9072681704260651,
249
- "eval_f1": 0.8872855539522206,
250
- "eval_loss": 0.6514401435852051,
251
- "eval_precision": 0.8903508771929824,
252
- "eval_recall": 0.8843880705582834,
253
- "eval_runtime": 4.9373,
254
- "eval_samples_per_second": 80.813,
255
- "eval_steps_per_second": 10.127,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 0.0045247310772538185,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.0087,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.899749373433584,
268
- "eval_f1": 0.8765393898137261,
269
- "eval_loss": 0.7604307532310486,
270
- "eval_precision": 0.8852261942423283,
271
- "eval_recall": 0.8690671031096563,
272
- "eval_runtime": 4.9312,
273
- "eval_samples_per_second": 80.913,
274
- "eval_steps_per_second": 10.14,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.002084016567096114,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.0056,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9022556390977443,
287
- "eval_f1": 0.8799463033398397,
288
- "eval_loss": 0.7281515598297119,
289
- "eval_precision": 0.8874803397294746,
290
- "eval_recall": 0.8733406073831607,
291
- "eval_runtime": 4.9315,
292
- "eval_samples_per_second": 80.909,
293
- "eval_steps_per_second": 10.139,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.0016144708497449756,
299
  "learning_rate": 1e-05,
300
- "loss": 0.0063,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.9122807017543859,
306
- "eval_f1": 0.89337822671156,
307
- "eval_loss": 0.6987277269363403,
308
- "eval_precision": 0.8964912280701754,
309
- "eval_recall": 0.8904346244771777,
310
- "eval_runtime": 4.9746,
311
- "eval_samples_per_second": 80.207,
312
- "eval_steps_per_second": 10.051,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.0015528218355029821,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.0071,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.9047619047619048,
325
- "eval_f1": 0.8833333333333333,
326
- "eval_loss": 0.7402216792106628,
327
- "eval_precision": 0.8897334834834836,
328
- "eval_recall": 0.8776141116566649,
329
- "eval_runtime": 4.9544,
330
- "eval_samples_per_second": 80.535,
331
- "eval_steps_per_second": 10.092,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 0.0025696582160890102,
337
  "learning_rate": 5e-06,
338
- "loss": 0.0023,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8922305764411027,
344
- "eval_f1": 0.8690075356742023,
345
- "eval_loss": 0.7846499085426331,
346
- "eval_precision": 0.8719298245614036,
347
- "eval_recall": 0.8662484088016003,
348
- "eval_runtime": 4.9898,
349
- "eval_samples_per_second": 79.962,
350
- "eval_steps_per_second": 10.02,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.0012161381309852004,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.0043,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8922305764411027,
363
- "eval_f1": 0.8690075356742023,
364
- "eval_loss": 0.7948206067085266,
365
- "eval_precision": 0.8719298245614036,
366
- "eval_recall": 0.8662484088016003,
367
- "eval_runtime": 4.9737,
368
- "eval_samples_per_second": 80.222,
369
- "eval_steps_per_second": 10.053,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.0017919199308380485,
375
  "learning_rate": 0.0,
376
- "loss": 0.0021,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8972431077694235,
382
- "eval_f1": 0.8744522298370696,
383
- "eval_loss": 0.7891051769256592,
384
- "eval_precision": 0.8795731707317074,
385
- "eval_recall": 0.8697945080923805,
386
- "eval_runtime": 4.9669,
387
- "eval_samples_per_second": 80.331,
388
- "eval_steps_per_second": 10.067,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7584162436176000.0,
395
- "train_loss": 0.05962105130807298,
396
- "train_runtime": 2691.1784,
397
- "train_samples_per_second": 27.036,
398
- "train_steps_per_second": 0.907
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 20.136756896972656,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.3889,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8045112781954887,
21
+ "eval_f1": 0.7109554944646705,
22
+ "eval_loss": 0.4199941158294678,
23
+ "eval_precision": 0.8255285412262157,
24
+ "eval_recall": 0.6866703036915802,
25
+ "eval_runtime": 1.6394,
26
+ "eval_samples_per_second": 243.375,
27
+ "eval_steps_per_second": 30.498,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 24.683944702148438,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.2335,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8922305764411027,
40
+ "eval_f1": 0.8739355018846853,
41
+ "eval_loss": 0.3136064410209656,
42
+ "eval_precision": 0.864426651415499,
43
+ "eval_recall": 0.886252045826514,
44
+ "eval_runtime": 1.6497,
45
+ "eval_samples_per_second": 241.866,
46
+ "eval_steps_per_second": 30.309,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 66.46725463867188,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.1411,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8972431077694235,
59
+ "eval_f1": 0.8751002084335417,
60
+ "eval_loss": 0.35689812898635864,
61
+ "eval_precision": 0.8780701754385964,
62
+ "eval_recall": 0.8722949627204946,
63
+ "eval_runtime": 1.6606,
64
+ "eval_samples_per_second": 240.275,
65
+ "eval_steps_per_second": 30.11,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 42.06414031982422,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.1078,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.9147869674185464,
78
+ "eval_f1": 0.8991765265473572,
79
+ "eval_loss": 0.35370269417762756,
80
+ "eval_precision": 0.8922773722627737,
81
+ "eval_recall": 0.9072104018912529,
82
+ "eval_runtime": 1.653,
83
+ "eval_samples_per_second": 241.378,
84
+ "eval_steps_per_second": 30.248,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 88.54315185546875,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.0822,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8796992481203008,
97
+ "eval_f1": 0.8439374185136896,
98
+ "eval_loss": 0.5069139003753662,
99
+ "eval_precision": 0.8794955044955045,
100
+ "eval_recall": 0.822376795781051,
101
+ "eval_runtime": 1.6524,
102
+ "eval_samples_per_second": 241.466,
103
+ "eval_steps_per_second": 30.259,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 114.8245849609375,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.0529,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.9072681704260651,
116
+ "eval_f1": 0.888964101175568,
117
+ "eval_loss": 0.42624175548553467,
118
+ "eval_precision": 0.8862007168458781,
119
+ "eval_recall": 0.8918894344426259,
120
+ "eval_runtime": 1.6561,
121
+ "eval_samples_per_second": 240.934,
122
+ "eval_steps_per_second": 30.192,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.022069375962018967,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.0365,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
  "eval_accuracy": 0.8972431077694235,
135
+ "eval_f1": 0.8769602202215754,
136
+ "eval_loss": 0.5586097836494446,
137
+ "eval_precision": 0.8742831541218639,
138
+ "eval_recall": 0.8797963266048372,
139
+ "eval_runtime": 1.6532,
140
+ "eval_samples_per_second": 241.352,
141
+ "eval_steps_per_second": 30.245,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 0.0406961552798748,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.033,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8947368421052632,
154
+ "eval_f1": 0.8674628282189181,
155
+ "eval_loss": 0.5012311935424805,
156
+ "eval_precision": 0.8869858462356303,
157
+ "eval_recall": 0.8530187306783051,
158
+ "eval_runtime": 1.6551,
159
+ "eval_samples_per_second": 241.075,
160
+ "eval_steps_per_second": 30.21,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 0.6461573243141174,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.0248,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8922305764411027,
173
+ "eval_f1": 0.8631217838765008,
174
+ "eval_loss": 0.583283007144928,
175
+ "eval_precision": 0.8872804935927859,
176
+ "eval_recall": 0.8462447717766868,
177
+ "eval_runtime": 1.6572,
178
+ "eval_samples_per_second": 240.772,
179
+ "eval_steps_per_second": 30.172,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 0.12847253680229187,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.0123,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.9022556390977443,
192
+ "eval_f1": 0.8805765113084321,
193
+ "eval_loss": 0.6610547304153442,
194
+ "eval_precision": 0.8857796167247387,
195
+ "eval_recall": 0.8758410620112748,
196
+ "eval_runtime": 1.6505,
197
+ "eval_samples_per_second": 241.744,
198
+ "eval_steps_per_second": 30.294,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 0.003805552376434207,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.0088,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8947368421052632,
211
+ "eval_f1": 0.8682132746146587,
212
+ "eval_loss": 0.6935672760009766,
213
+ "eval_precision": 0.884741537654159,
214
+ "eval_recall": 0.8555191853064193,
215
+ "eval_runtime": 1.6547,
216
+ "eval_samples_per_second": 241.138,
217
+ "eval_steps_per_second": 30.218,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 0.0037182692904025316,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.0074,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.9022556390977443,
230
+ "eval_f1": 0.8805765113084321,
231
+ "eval_loss": 0.6789939403533936,
232
+ "eval_precision": 0.8857796167247387,
233
+ "eval_recall": 0.8758410620112748,
234
+ "eval_runtime": 1.6567,
235
+ "eval_samples_per_second": 240.838,
236
+ "eval_steps_per_second": 30.18,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.0025616472121328115,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.0141,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8972431077694235,
249
+ "eval_f1": 0.8731122745782431,
250
+ "eval_loss": 0.6981470584869385,
251
+ "eval_precision": 0.8829705994654449,
252
+ "eval_recall": 0.864793598836152,
253
+ "eval_runtime": 1.6639,
254
+ "eval_samples_per_second": 239.794,
255
+ "eval_steps_per_second": 30.049,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 0.006673410069197416,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.0034,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8972431077694235,
268
+ "eval_f1": 0.8751002084335417,
269
+ "eval_loss": 0.7144644856452942,
270
+ "eval_precision": 0.8780701754385964,
271
+ "eval_recall": 0.8722949627204946,
272
+ "eval_runtime": 1.6531,
273
+ "eval_samples_per_second": 241.366,
274
+ "eval_steps_per_second": 30.246,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.0030696168541908264,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.0059,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.899749373433584,
287
+ "eval_f1": 0.8758710801393728,
288
+ "eval_loss": 0.7303631901741028,
289
+ "eval_precision": 0.8870983228779925,
290
+ "eval_recall": 0.8665666484815421,
291
+ "eval_runtime": 1.6541,
292
+ "eval_samples_per_second": 241.215,
293
+ "eval_steps_per_second": 30.227,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.0017388605047017336,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.0056,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.899749373433584,
306
+ "eval_f1": 0.879667048676036,
307
+ "eval_loss": 0.7517656683921814,
308
+ "eval_precision": 0.8778361344537815,
309
+ "eval_recall": 0.8815693762502272,
310
+ "eval_runtime": 1.6536,
311
+ "eval_samples_per_second": 241.288,
312
+ "eval_steps_per_second": 30.237,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.002333118114620447,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.0039,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.9022556390977443,
325
+ "eval_f1": 0.8793019197207679,
326
+ "eval_loss": 0.7390431761741638,
327
+ "eval_precision": 0.8893184421534936,
328
+ "eval_recall": 0.8708401527550463,
329
+ "eval_runtime": 1.655,
330
+ "eval_samples_per_second": 241.08,
331
+ "eval_steps_per_second": 30.211,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.0018157872837036848,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.004,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.9022556390977443,
344
+ "eval_f1": 0.8799463033398397,
345
+ "eval_loss": 0.764133095741272,
346
+ "eval_precision": 0.8874803397294746,
347
+ "eval_recall": 0.8733406073831607,
348
+ "eval_runtime": 1.6667,
349
+ "eval_samples_per_second": 239.389,
350
+ "eval_steps_per_second": 29.999,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.0015570241957902908,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.007,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.9022556390977443,
363
+ "eval_f1": 0.8799463033398397,
364
+ "eval_loss": 0.7847548723220825,
365
+ "eval_precision": 0.8874803397294746,
366
+ "eval_recall": 0.8733406073831607,
367
+ "eval_runtime": 1.664,
368
+ "eval_samples_per_second": 239.788,
369
+ "eval_steps_per_second": 30.049,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.002853752113878727,
375
  "learning_rate": 0.0,
376
+ "loss": 0.0042,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.9022556390977443,
382
+ "eval_f1": 0.8799463033398397,
383
+ "eval_loss": 0.790817379951477,
384
+ "eval_precision": 0.8874803397294746,
385
+ "eval_recall": 0.8733406073831607,
386
+ "eval_runtime": 1.6678,
387
+ "eval_samples_per_second": 239.236,
388
+ "eval_steps_per_second": 29.979,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7584162436176000.0,
395
+ "train_loss": 0.0588726386183598,
396
+ "train_runtime": 864.0501,
397
+ "train_samples_per_second": 84.208,
398
+ "train_steps_per_second": 2.824
399
  }
400
  ],
401
  "logging_steps": 500,