apwic commited on
Commit
7ec98c8
1 Parent(s): f9b7a51

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9000989119683481,
3
  "epoch": 20.0,
4
  "eval_accuracy": 0.9022556390977443,
5
- "eval_f1": 0.884617951284618,
6
- "eval_loss": 0.2810901701450348,
7
- "eval_precision": 0.8772893772893773,
8
- "eval_recall": 0.8933442444080741,
9
- "eval_runtime": 5.7274,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 69.665,
12
- "eval_steps_per_second": 8.73,
13
- "f1": 0.881994210133999,
14
- "precision": 0.875823871126101,
15
- "recall": 0.8891323173658895,
16
- "train_loss": 0.21111928674041247,
17
- "train_runtime": 2283.5364,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 31.863,
20
- "train_steps_per_second": 1.069
21
  }
 
1
  {
2
+ "accuracy": 0.9090009891196835,
3
  "epoch": 20.0,
4
  "eval_accuracy": 0.9022556390977443,
5
+ "eval_f1": 0.8811928811928812,
6
+ "eval_loss": 0.29284632205963135,
7
+ "eval_precision": 0.8842105263157894,
8
+ "eval_recall": 0.878341516639389,
9
+ "eval_runtime": 5.5622,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 71.734,
12
+ "eval_steps_per_second": 8.989,
13
+ "f1": 0.8907735522904062,
14
+ "precision": 0.8900210970464135,
15
+ "recall": 0.8915373175070833,
16
+ "train_loss": 0.2130153269064231,
17
+ "train_runtime": 2280.4037,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 31.907,
20
+ "train_steps_per_second": 1.07
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9022556390977443,
4
- "eval_f1": 0.884617951284618,
5
- "eval_loss": 0.2810901701450348,
6
- "eval_precision": 0.8772893772893773,
7
- "eval_recall": 0.8933442444080741,
8
- "eval_runtime": 5.7274,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 69.665,
11
- "eval_steps_per_second": 8.73
12
  }
 
1
  {
2
  "epoch": 20.0,
3
  "eval_accuracy": 0.9022556390977443,
4
+ "eval_f1": 0.8811928811928812,
5
+ "eval_loss": 0.29284632205963135,
6
+ "eval_precision": 0.8842105263157894,
7
+ "eval_recall": 0.878341516639389,
8
+ "eval_runtime": 5.5622,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 71.734,
11
+ "eval_steps_per_second": 8.989
12
  }
predict_results.txt CHANGED
@@ -1,10 +1,10 @@
1
  index prediction
2
  0 1
3
- 1 0
4
  2 1
5
  3 1
6
- 4 0
7
- 5 1
8
  6 1
9
  7 1
10
  8 0
@@ -14,7 +14,7 @@ index prediction
14
  12 1
15
  13 1
16
  14 1
17
- 15 0
18
  16 1
19
  17 1
20
  18 1
@@ -24,49 +24,49 @@ index prediction
24
  22 0
25
  23 1
26
  24 1
27
- 25 1
28
- 26 1
29
  27 1
30
  28 1
31
  29 1
32
  30 1
33
  31 1
34
- 32 1
35
  33 1
36
- 34 0
37
  35 1
38
  36 1
39
- 37 0
40
  38 1
41
- 39 0
42
  40 1
43
- 41 1
44
  42 1
45
- 43 0
46
- 44 0
47
  45 0
48
- 46 1
49
  47 1
50
  48 1
51
- 49 0
52
  50 1
53
- 51 1
54
- 52 0
55
  53 1
56
  54 1
57
  55 1
58
  56 1
59
- 57 0
60
  58 1
61
  59 1
62
- 60 1
63
  61 1
64
  62 1
65
  63 1
66
- 64 1
67
  65 1
68
  66 1
69
- 67 1
70
  68 1
71
  69 1
72
  70 1
@@ -74,37 +74,37 @@ index prediction
74
  72 1
75
  73 1
76
  74 1
77
- 75 1
78
- 76 1
79
  77 1
80
  78 1
81
  79 1
82
  80 1
83
- 81 0
84
- 82 1
85
- 83 1
86
  84 1
87
- 85 0
88
  86 1
89
  87 1
90
  88 1
91
  89 1
92
- 90 1
93
  91 1
94
  92 1
95
  93 1
96
  94 1
97
  95 1
98
  96 1
99
- 97 0
100
  98 1
101
- 99 0
102
  100 1
103
- 101 0
104
- 102 1
105
  103 1
106
  104 1
107
- 105 1
108
  106 1
109
  107 1
110
  108 1
@@ -114,28 +114,28 @@ index prediction
114
  112 1
115
  113 1
116
  114 1
117
- 115 1
118
  116 1
119
  117 1
120
- 118 0
121
  119 1
122
- 120 1
123
- 121 1
124
  122 1
125
  123 1
126
- 124 1
127
  125 1
128
  126 1
129
- 127 1
130
  128 1
131
  129 1
132
- 130 1
133
- 131 0
134
  132 1
135
  133 1
136
  134 1
137
  135 1
138
- 136 0
139
  137 1
140
  138 1
141
  139 1
@@ -150,11 +150,11 @@ index prediction
150
  148 1
151
  149 1
152
  150 1
153
- 151 1
154
  152 1
155
  153 1
156
  154 1
157
- 155 0
158
  156 1
159
  157 1
160
  158 1
@@ -164,23 +164,23 @@ index prediction
164
  162 1
165
  163 1
166
  164 1
167
- 165 0
168
  166 1
169
- 167 1
170
- 168 0
171
  169 1
172
- 170 1
173
  171 1
174
  172 1
175
- 173 0
176
- 174 1
177
  175 1
178
  176 1
179
- 177 0
180
  178 1
181
- 179 1
182
  180 1
183
- 181 1
184
  182 1
185
  183 1
186
  184 1
@@ -191,20 +191,20 @@ index prediction
191
  189 1
192
  190 1
193
  191 1
194
- 192 1
195
  193 1
196
  194 1
197
  195 1
198
  196 1
199
  197 1
200
  198 1
201
- 199 0
202
  200 1
203
  201 1
204
  202 1
205
  203 1
206
  204 1
207
- 205 0
208
  206 1
209
  207 1
210
  208 1
@@ -213,20 +213,20 @@ index prediction
213
  211 1
214
  212 1
215
  213 1
216
- 214 0
217
- 215 1
218
- 216 0
219
  217 1
220
- 218 0
221
  219 1
222
- 220 0
223
  221 1
224
  222 1
225
  223 1
226
  224 0
227
  225 1
228
- 226 0
229
- 227 0
230
  228 1
231
  229 0
232
  230 1
@@ -234,9 +234,9 @@ index prediction
234
  232 1
235
  233 1
236
  234 1
237
- 235 1
238
  236 1
239
- 237 1
240
  238 1
241
  239 1
242
  240 1
@@ -245,14 +245,14 @@ index prediction
245
  243 1
246
  244 1
247
  245 1
248
- 246 0
249
  247 1
250
- 248 1
251
  249 1
252
- 250 0
253
  251 1
254
  252 1
255
- 253 1
256
  254 1
257
  255 1
258
  256 1
@@ -267,20 +267,20 @@ index prediction
267
  265 1
268
  266 1
269
  267 1
270
- 268 1
271
  269 1
272
  270 1
273
  271 1
274
- 272 1
275
  273 1
276
  274 1
277
- 275 1
278
- 276 1
279
  277 1
280
  278 1
281
  279 1
282
- 280 0
283
- 281 1
284
  282 1
285
  283 1
286
  284 1
@@ -288,7 +288,7 @@ index prediction
288
  286 1
289
  287 1
290
  288 1
291
- 289 0
292
  290 1
293
  291 1
294
  292 1
@@ -301,33 +301,33 @@ index prediction
301
  299 0
302
  300 0
303
  301 0
304
- 302 1
305
  303 0
306
  304 0
307
- 305 1
308
  306 0
309
  307 0
310
  308 0
311
- 309 0
312
  310 0
313
  311 0
314
  312 0
315
  313 0
316
- 314 1
317
  315 0
318
- 316 1
319
  317 0
320
- 318 1
321
  319 0
322
  320 0
323
  321 0
324
  322 0
325
- 323 0
326
- 324 0
327
  325 0
328
  326 0
329
  327 0
330
- 328 1
331
  329 0
332
  330 0
333
  331 0
@@ -337,11 +337,11 @@ index prediction
337
  335 0
338
  336 0
339
  337 0
340
- 338 0
341
  339 0
342
- 340 0
343
  341 0
344
- 342 0
345
  343 0
346
  344 0
347
  345 0
@@ -350,7 +350,7 @@ index prediction
350
  348 0
351
  349 0
352
  350 0
353
- 351 1
354
  352 0
355
  353 0
356
  354 0
@@ -365,7 +365,7 @@ index prediction
365
  363 0
366
  364 0
367
  365 0
368
- 366 1
369
  367 0
370
  368 0
371
  369 0
@@ -381,7 +381,7 @@ index prediction
381
  379 0
382
  380 0
383
  381 0
384
- 382 0
385
  383 0
386
  384 0
387
  385 0
@@ -400,8 +400,8 @@ index prediction
400
  398 1
401
  399 0
402
  400 0
403
- 401 1
404
- 402 1
405
  403 0
406
  404 0
407
  405 0
@@ -419,26 +419,26 @@ index prediction
419
  417 0
420
  418 0
421
  419 0
422
- 420 1
423
  421 0
424
  422 0
425
  423 0
426
  424 0
427
- 425 1
428
  426 0
429
  427 0
430
  428 0
431
- 429 0
432
  430 0
433
  431 0
434
  432 0
435
  433 0
436
- 434 0
437
- 435 0
438
  436 0
439
  437 0
440
  438 0
441
- 439 1
442
  440 0
443
  441 0
444
  442 0
@@ -446,7 +446,7 @@ index prediction
446
  444 0
447
  445 0
448
  446 0
449
- 447 1
450
  448 0
451
  449 0
452
  450 0
@@ -454,15 +454,15 @@ index prediction
454
  452 0
455
  453 0
456
  454 0
457
- 455 0
458
  456 0
459
- 457 1
460
  458 0
461
  459 0
462
  460 0
463
  461 0
464
  462 0
465
- 463 0
466
  464 0
467
  465 0
468
  466 0
@@ -486,18 +486,18 @@ index prediction
486
  484 0
487
  485 0
488
  486 0
489
- 487 0
490
  488 0
491
  489 0
492
  490 0
493
- 491 0
494
  492 0
495
  493 0
496
  494 0
497
  495 0
498
  496 0
499
  497 0
500
- 498 0
501
  499 0
502
  500 0
503
  501 0
@@ -510,7 +510,7 @@ index prediction
510
  508 0
511
  509 0
512
  510 0
513
- 511 1
514
  512 0
515
  513 0
516
  514 0
@@ -521,14 +521,14 @@ index prediction
521
  519 0
522
  520 0
523
  521 0
524
- 522 0
525
- 523 0
526
  524 0
527
  525 0
528
  526 0
529
  527 0
530
  528 0
531
- 529 1
532
  530 0
533
  531 0
534
  532 0
@@ -559,7 +559,7 @@ index prediction
559
  557 0
560
  558 0
561
  559 0
562
- 560 1
563
  561 0
564
  562 0
565
  563 0
@@ -578,10 +578,10 @@ index prediction
578
  576 0
579
  577 0
580
  578 0
581
- 579 0
582
  580 0
583
- 581 0
584
- 582 0
585
  583 0
586
  584 0
587
  585 0
@@ -590,12 +590,12 @@ index prediction
590
  588 0
591
  589 0
592
  590 0
593
- 591 0
594
  592 0
595
  593 0
596
  594 0
597
  595 0
598
- 596 1
599
  597 0
600
  598 0
601
  599 0
@@ -607,10 +607,10 @@ index prediction
607
  605 0
608
  606 0
609
  607 0
610
- 608 1
611
  609 0
612
- 610 1
613
- 611 1
614
  612 0
615
  613 0
616
  614 0
@@ -620,7 +620,7 @@ index prediction
620
  618 0
621
  619 0
622
  620 0
623
- 621 1
624
  622 0
625
  623 0
626
  624 0
@@ -632,13 +632,13 @@ index prediction
632
  630 0
633
  631 0
634
  632 0
635
- 633 1
636
  634 0
637
- 635 1
638
  636 0
639
  637 0
640
  638 0
641
- 639 1
642
  640 0
643
  641 0
644
  642 0
@@ -651,12 +651,12 @@ index prediction
651
  649 0
652
  650 0
653
  651 0
654
- 652 1
655
  653 0
656
  654 0
657
  655 0
658
  656 0
659
- 657 1
660
  658 0
661
  659 0
662
  660 0
@@ -664,15 +664,15 @@ index prediction
664
  662 0
665
  663 0
666
  664 0
667
- 665 0
668
  666 0
669
- 667 0
670
  668 0
671
- 669 1
672
  670 0
673
  671 0
674
  672 0
675
- 673 0
676
  674 0
677
  675 0
678
  676 0
@@ -687,7 +687,7 @@ index prediction
687
  685 0
688
  686 0
689
  687 0
690
- 688 0
691
  689 0
692
  690 0
693
  691 0
@@ -709,12 +709,12 @@ index prediction
709
  707 0
710
  708 0
711
  709 0
712
- 710 0
713
  711 0
714
- 712 0
715
  713 0
716
  714 0
717
- 715 0
718
  716 0
719
  717 0
720
  718 0
@@ -726,24 +726,24 @@ index prediction
726
  724 0
727
  725 0
728
  726 0
729
- 727 1
730
- 728 1
731
  729 0
732
  730 0
733
  731 0
734
  732 0
735
- 733 1
736
- 734 0
737
  735 0
738
  736 0
739
- 737 0
740
  738 0
741
  739 0
742
  740 0
743
  741 0
744
  742 0
745
  743 0
746
- 744 0
747
  745 0
748
  746 0
749
  747 0
@@ -755,28 +755,28 @@ index prediction
755
  753 0
756
  754 0
757
  755 0
758
- 756 1
759
  757 0
760
  758 0
761
  759 0
762
  760 0
763
  761 0
764
- 762 0
765
  763 0
766
  764 0
767
- 765 1
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
- 770 1
773
  771 0
774
  772 0
775
  773 0
776
- 774 0
777
  775 0
778
  776 0
779
- 777 1
780
  778 0
781
  779 0
782
  780 0
@@ -785,7 +785,7 @@ index prediction
785
  783 0
786
  784 0
787
  785 0
788
- 786 0
789
  787 0
790
  788 0
791
  789 0
@@ -813,12 +813,12 @@ index prediction
813
  811 0
814
  812 0
815
  813 0
816
- 814 1
817
  815 0
818
  816 0
819
  817 0
820
  818 0
821
- 819 1
822
  820 0
823
  821 0
824
  822 0
@@ -831,8 +831,8 @@ index prediction
831
  829 0
832
  830 0
833
  831 0
834
- 832 1
835
- 833 1
836
  834 0
837
  835 0
838
  836 0
@@ -848,15 +848,15 @@ index prediction
848
  846 0
849
  847 0
850
  848 0
851
- 849 1
852
  850 0
853
- 851 0
854
  852 0
855
  853 0
856
- 854 0
857
  855 0
858
  856 0
859
- 857 0
860
  858 0
861
  859 0
862
  860 0
@@ -864,10 +864,10 @@ index prediction
864
  862 0
865
  863 0
866
  864 0
867
- 865 0
868
  866 0
869
  867 0
870
- 868 1
871
  869 0
872
  870 0
873
  871 0
@@ -882,7 +882,7 @@ index prediction
882
  880 0
883
  881 0
884
  882 0
885
- 883 1
886
  884 0
887
  885 0
888
  886 0
@@ -890,13 +890,13 @@ index prediction
890
  888 0
891
  889 0
892
  890 0
893
- 891 1
894
  892 0
895
- 893 0
896
  894 0
897
  895 0
898
- 896 0
899
- 897 1
900
  898 0
901
  899 0
902
  900 0
@@ -905,8 +905,8 @@ index prediction
905
  903 0
906
  904 0
907
  905 0
908
- 906 1
909
- 907 1
910
  908 0
911
  909 0
912
  910 0
@@ -915,7 +915,7 @@ index prediction
915
  913 0
916
  914 0
917
  915 0
918
- 916 1
919
  917 0
920
  918 0
921
  919 0
@@ -927,36 +927,36 @@ index prediction
927
  925 0
928
  926 0
929
  927 0
930
- 928 1
931
  929 0
932
  930 0
933
  931 0
934
  932 0
935
  933 0
936
  934 0
937
- 935 0
938
- 936 0
939
  937 0
940
  938 0
941
  939 0
942
  940 0
943
  941 0
944
  942 0
945
- 943 1
946
  944 0
947
  945 0
948
  946 0
949
  947 0
950
  948 0
951
  949 0
952
- 950 1
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
  955 0
958
  956 0
959
- 957 1
960
  958 0
961
  959 0
962
  960 0
@@ -968,7 +968,7 @@ index prediction
968
  966 0
969
  967 0
970
  968 0
971
- 969 1
972
  970 0
973
  971 0
974
  972 0
@@ -979,26 +979,26 @@ index prediction
979
  977 0
980
  978 0
981
  979 0
982
- 980 1
983
  981 0
984
  982 0
985
  983 0
986
  984 0
987
  985 0
988
- 986 1
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
- 991 1
994
  992 0
995
- 993 0
996
  994 0
997
- 995 0
998
  996 0
999
  997 0
1000
  998 0
1001
- 999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
 
1
  index prediction
2
  0 1
3
+ 1 1
4
  2 1
5
  3 1
6
+ 4 1
7
+ 5 0
8
  6 1
9
  7 1
10
  8 0
 
14
  12 1
15
  13 1
16
  14 1
17
+ 15 1
18
  16 1
19
  17 1
20
  18 1
 
24
  22 0
25
  23 1
26
  24 1
27
+ 25 0
28
+ 26 0
29
  27 1
30
  28 1
31
  29 1
32
  30 1
33
  31 1
34
+ 32 0
35
  33 1
36
+ 34 1
37
  35 1
38
  36 1
39
+ 37 1
40
  38 1
41
+ 39 1
42
  40 1
43
+ 41 0
44
  42 1
45
+ 43 1
46
+ 44 1
47
  45 0
48
+ 46 0
49
  47 1
50
  48 1
51
+ 49 1
52
  50 1
53
+ 51 0
54
+ 52 1
55
  53 1
56
  54 1
57
  55 1
58
  56 1
59
+ 57 1
60
  58 1
61
  59 1
62
+ 60 0
63
  61 1
64
  62 1
65
  63 1
66
+ 64 0
67
  65 1
68
  66 1
69
+ 67 0
70
  68 1
71
  69 1
72
  70 1
 
74
  72 1
75
  73 1
76
  74 1
77
+ 75 0
78
+ 76 0
79
  77 1
80
  78 1
81
  79 1
82
  80 1
83
+ 81 1
84
+ 82 0
85
+ 83 0
86
  84 1
87
+ 85 1
88
  86 1
89
  87 1
90
  88 1
91
  89 1
92
+ 90 0
93
  91 1
94
  92 1
95
  93 1
96
  94 1
97
  95 1
98
  96 1
99
+ 97 1
100
  98 1
101
+ 99 1
102
  100 1
103
+ 101 1
104
+ 102 0
105
  103 1
106
  104 1
107
+ 105 0
108
  106 1
109
  107 1
110
  108 1
 
114
  112 1
115
  113 1
116
  114 1
117
+ 115 0
118
  116 1
119
  117 1
120
+ 118 1
121
  119 1
122
+ 120 0
123
+ 121 0
124
  122 1
125
  123 1
126
+ 124 0
127
  125 1
128
  126 1
129
+ 127 0
130
  128 1
131
  129 1
132
+ 130 0
133
+ 131 1
134
  132 1
135
  133 1
136
  134 1
137
  135 1
138
+ 136 1
139
  137 1
140
  138 1
141
  139 1
 
150
  148 1
151
  149 1
152
  150 1
153
+ 151 0
154
  152 1
155
  153 1
156
  154 1
157
+ 155 1
158
  156 1
159
  157 1
160
  158 1
 
164
  162 1
165
  163 1
166
  164 1
167
+ 165 1
168
  166 1
169
+ 167 0
170
+ 168 1
171
  169 1
172
+ 170 0
173
  171 1
174
  172 1
175
+ 173 1
176
+ 174 0
177
  175 1
178
  176 1
179
+ 177 1
180
  178 1
181
+ 179 0
182
  180 1
183
+ 181 0
184
  182 1
185
  183 1
186
  184 1
 
191
  189 1
192
  190 1
193
  191 1
194
+ 192 0
195
  193 1
196
  194 1
197
  195 1
198
  196 1
199
  197 1
200
  198 1
201
+ 199 1
202
  200 1
203
  201 1
204
  202 1
205
  203 1
206
  204 1
207
+ 205 1
208
  206 1
209
  207 1
210
  208 1
 
213
  211 1
214
  212 1
215
  213 1
216
+ 214 1
217
+ 215 0
218
+ 216 1
219
  217 1
220
+ 218 1
221
  219 1
222
+ 220 1
223
  221 1
224
  222 1
225
  223 1
226
  224 0
227
  225 1
228
+ 226 1
229
+ 227 1
230
  228 1
231
  229 0
232
  230 1
 
234
  232 1
235
  233 1
236
  234 1
237
+ 235 0
238
  236 1
239
+ 237 0
240
  238 1
241
  239 1
242
  240 1
 
245
  243 1
246
  244 1
247
  245 1
248
+ 246 1
249
  247 1
250
+ 248 0
251
  249 1
252
+ 250 1
253
  251 1
254
  252 1
255
+ 253 0
256
  254 1
257
  255 1
258
  256 1
 
267
  265 1
268
  266 1
269
  267 1
270
+ 268 0
271
  269 1
272
  270 1
273
  271 1
274
+ 272 0
275
  273 1
276
  274 1
277
+ 275 0
278
+ 276 0
279
  277 1
280
  278 1
281
  279 1
282
+ 280 1
283
+ 281 0
284
  282 1
285
  283 1
286
  284 1
 
288
  286 1
289
  287 1
290
  288 1
291
+ 289 1
292
  290 1
293
  291 1
294
  292 1
 
301
  299 0
302
  300 0
303
  301 0
304
+ 302 0
305
  303 0
306
  304 0
307
+ 305 0
308
  306 0
309
  307 0
310
  308 0
311
+ 309 1
312
  310 0
313
  311 0
314
  312 0
315
  313 0
316
+ 314 0
317
  315 0
318
+ 316 0
319
  317 0
320
+ 318 0
321
  319 0
322
  320 0
323
  321 0
324
  322 0
325
+ 323 1
326
+ 324 1
327
  325 0
328
  326 0
329
  327 0
330
+ 328 0
331
  329 0
332
  330 0
333
  331 0
 
337
  335 0
338
  336 0
339
  337 0
340
+ 338 1
341
  339 0
342
+ 340 1
343
  341 0
344
+ 342 1
345
  343 0
346
  344 0
347
  345 0
 
350
  348 0
351
  349 0
352
  350 0
353
+ 351 0
354
  352 0
355
  353 0
356
  354 0
 
365
  363 0
366
  364 0
367
  365 0
368
+ 366 0
369
  367 0
370
  368 0
371
  369 0
 
381
  379 0
382
  380 0
383
  381 0
384
+ 382 1
385
  383 0
386
  384 0
387
  385 0
 
400
  398 1
401
  399 0
402
  400 0
403
+ 401 0
404
+ 402 0
405
  403 0
406
  404 0
407
  405 0
 
419
  417 0
420
  418 0
421
  419 0
422
+ 420 0
423
  421 0
424
  422 0
425
  423 0
426
  424 0
427
+ 425 0
428
  426 0
429
  427 0
430
  428 0
431
+ 429 1
432
  430 0
433
  431 0
434
  432 0
435
  433 0
436
+ 434 1
437
+ 435 1
438
  436 0
439
  437 0
440
  438 0
441
+ 439 0
442
  440 0
443
  441 0
444
  442 0
 
446
  444 0
447
  445 0
448
  446 0
449
+ 447 0
450
  448 0
451
  449 0
452
  450 0
 
454
  452 0
455
  453 0
456
  454 0
457
+ 455 1
458
  456 0
459
+ 457 0
460
  458 0
461
  459 0
462
  460 0
463
  461 0
464
  462 0
465
+ 463 1
466
  464 0
467
  465 0
468
  466 0
 
486
  484 0
487
  485 0
488
  486 0
489
+ 487 1
490
  488 0
491
  489 0
492
  490 0
493
+ 491 1
494
  492 0
495
  493 0
496
  494 0
497
  495 0
498
  496 0
499
  497 0
500
+ 498 1
501
  499 0
502
  500 0
503
  501 0
 
510
  508 0
511
  509 0
512
  510 0
513
+ 511 0
514
  512 0
515
  513 0
516
  514 0
 
521
  519 0
522
  520 0
523
  521 0
524
+ 522 1
525
+ 523 1
526
  524 0
527
  525 0
528
  526 0
529
  527 0
530
  528 0
531
+ 529 0
532
  530 0
533
  531 0
534
  532 0
 
559
  557 0
560
  558 0
561
  559 0
562
+ 560 0
563
  561 0
564
  562 0
565
  563 0
 
578
  576 0
579
  577 0
580
  578 0
581
+ 579 1
582
  580 0
583
+ 581 1
584
+ 582 1
585
  583 0
586
  584 0
587
  585 0
 
590
  588 0
591
  589 0
592
  590 0
593
+ 591 1
594
  592 0
595
  593 0
596
  594 0
597
  595 0
598
+ 596 0
599
  597 0
600
  598 0
601
  599 0
 
607
  605 0
608
  606 0
609
  607 0
610
+ 608 0
611
  609 0
612
+ 610 0
613
+ 611 0
614
  612 0
615
  613 0
616
  614 0
 
620
  618 0
621
  619 0
622
  620 0
623
+ 621 0
624
  622 0
625
  623 0
626
  624 0
 
632
  630 0
633
  631 0
634
  632 0
635
+ 633 0
636
  634 0
637
+ 635 0
638
  636 0
639
  637 0
640
  638 0
641
+ 639 0
642
  640 0
643
  641 0
644
  642 0
 
651
  649 0
652
  650 0
653
  651 0
654
+ 652 0
655
  653 0
656
  654 0
657
  655 0
658
  656 0
659
+ 657 0
660
  658 0
661
  659 0
662
  660 0
 
664
  662 0
665
  663 0
666
  664 0
667
+ 665 1
668
  666 0
669
+ 667 1
670
  668 0
671
+ 669 0
672
  670 0
673
  671 0
674
  672 0
675
+ 673 1
676
  674 0
677
  675 0
678
  676 0
 
687
  685 0
688
  686 0
689
  687 0
690
+ 688 1
691
  689 0
692
  690 0
693
  691 0
 
709
  707 0
710
  708 0
711
  709 0
712
+ 710 1
713
  711 0
714
+ 712 1
715
  713 0
716
  714 0
717
+ 715 1
718
  716 0
719
  717 0
720
  718 0
 
726
  724 0
727
  725 0
728
  726 0
729
+ 727 0
730
+ 728 0
731
  729 0
732
  730 0
733
  731 0
734
  732 0
735
+ 733 0
736
+ 734 1
737
  735 0
738
  736 0
739
+ 737 1
740
  738 0
741
  739 0
742
  740 0
743
  741 0
744
  742 0
745
  743 0
746
+ 744 1
747
  745 0
748
  746 0
749
  747 0
 
755
  753 0
756
  754 0
757
  755 0
758
+ 756 0
759
  757 0
760
  758 0
761
  759 0
762
  760 0
763
  761 0
764
+ 762 1
765
  763 0
766
  764 0
767
+ 765 0
768
  766 0
769
  767 0
770
  768 0
771
  769 0
772
+ 770 0
773
  771 0
774
  772 0
775
  773 0
776
+ 774 1
777
  775 0
778
  776 0
779
+ 777 0
780
  778 0
781
  779 0
782
  780 0
 
785
  783 0
786
  784 0
787
  785 0
788
+ 786 1
789
  787 0
790
  788 0
791
  789 0
 
813
  811 0
814
  812 0
815
  813 0
816
+ 814 0
817
  815 0
818
  816 0
819
  817 0
820
  818 0
821
+ 819 0
822
  820 0
823
  821 0
824
  822 0
 
831
  829 0
832
  830 0
833
  831 0
834
+ 832 0
835
+ 833 0
836
  834 0
837
  835 0
838
  836 0
 
848
  846 0
849
  847 0
850
  848 0
851
+ 849 0
852
  850 0
853
+ 851 1
854
  852 0
855
  853 0
856
+ 854 1
857
  855 0
858
  856 0
859
+ 857 1
860
  858 0
861
  859 0
862
  860 0
 
864
  862 0
865
  863 0
866
  864 0
867
+ 865 1
868
  866 0
869
  867 0
870
+ 868 0
871
  869 0
872
  870 0
873
  871 0
 
882
  880 0
883
  881 0
884
  882 0
885
+ 883 0
886
  884 0
887
  885 0
888
  886 0
 
890
  888 0
891
  889 0
892
  890 0
893
+ 891 0
894
  892 0
895
+ 893 1
896
  894 0
897
  895 0
898
+ 896 1
899
+ 897 0
900
  898 0
901
  899 0
902
  900 0
 
905
  903 0
906
  904 0
907
  905 0
908
+ 906 0
909
+ 907 0
910
  908 0
911
  909 0
912
  910 0
 
915
  913 0
916
  914 0
917
  915 0
918
+ 916 0
919
  917 0
920
  918 0
921
  919 0
 
927
  925 0
928
  926 0
929
  927 0
930
+ 928 0
931
  929 0
932
  930 0
933
  931 0
934
  932 0
935
  933 0
936
  934 0
937
+ 935 1
938
+ 936 1
939
  937 0
940
  938 0
941
  939 0
942
  940 0
943
  941 0
944
  942 0
945
+ 943 0
946
  944 0
947
  945 0
948
  946 0
949
  947 0
950
  948 0
951
  949 0
952
+ 950 0
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
  955 0
958
  956 0
959
+ 957 0
960
  958 0
961
  959 0
962
  960 0
 
968
  966 0
969
  967 0
970
  968 0
971
+ 969 0
972
  970 0
973
  971 0
974
  972 0
 
979
  977 0
980
  978 0
981
  979 0
982
+ 980 0
983
  981 0
984
  982 0
985
  983 0
986
  984 0
987
  985 0
988
+ 986 0
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
+ 991 0
994
  992 0
995
+ 993 1
996
  994 0
997
+ 995 1
998
  996 0
999
  997 0
1000
  998 0
1001
+ 999 0
1002
  1000 0
1003
  1001 0
1004
  1002 0
runs/May20_05-56-39_indolem-petl-vm/events.out.tfevents.1716186904.indolem-petl-vm.2737008.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9636c835e603941ce911ab7e6300870d7602cc9d7dbe5130e7d4590b71f43a6a
3
+ size 560
test_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9000989119683481,
3
- "f1": 0.881994210133999,
4
- "precision": 0.875823871126101,
5
- "recall": 0.8891323173658895
6
  }
 
1
  {
2
+ "accuracy": 0.9090009891196835,
3
+ "f1": 0.8907735522904062,
4
+ "precision": 0.8900210970464135,
5
+ "recall": 0.8915373175070833
6
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.21111928674041247,
4
- "train_runtime": 2283.5364,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 31.863,
7
- "train_steps_per_second": 1.069
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2130153269064231,
4
+ "train_runtime": 2280.4037,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 31.907,
7
+ "train_steps_per_second": 1.07
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.3501811027526855,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5459,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7468671679197995,
21
- "eval_f1": 0.6572777139941319,
22
- "eval_loss": 0.46394461393356323,
23
- "eval_precision": 0.6922477833692786,
24
- "eval_recall": 0.6458901618476087,
25
- "eval_runtime": 5.3143,
26
- "eval_samples_per_second": 75.081,
27
- "eval_steps_per_second": 9.409,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.848093271255493,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.4335,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7844611528822055,
40
- "eval_f1": 0.7634169884169884,
41
- "eval_loss": 0.41080254316329956,
42
- "eval_precision": 0.7551731309140064,
43
- "eval_recall": 0.7975086379341698,
44
- "eval_runtime": 5.5615,
45
- "eval_samples_per_second": 71.743,
46
- "eval_steps_per_second": 8.99,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 4.800747394561768,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.3375,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8596491228070176,
59
- "eval_f1": 0.8271551457392166,
60
- "eval_loss": 0.3282613754272461,
61
- "eval_precision": 0.8347358430876305,
62
- "eval_recall": 0.8206946717585015,
63
- "eval_runtime": 5.5806,
64
- "eval_samples_per_second": 71.498,
65
- "eval_steps_per_second": 8.96,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 2.052788019180298,
71
  "learning_rate": 4e-05,
72
- "loss": 0.2801,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8596491228070176,
78
- "eval_f1": 0.8347043853938569,
79
- "eval_loss": 0.32024893164634705,
80
- "eval_precision": 0.8277993283927745,
81
- "eval_recall": 0.8431987634115294,
82
- "eval_runtime": 5.6891,
83
- "eval_samples_per_second": 70.134,
84
- "eval_steps_per_second": 8.789,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.35927698016166687,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.2572,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.87468671679198,
97
- "eval_f1": 0.8550061050061051,
98
- "eval_loss": 0.31085968017578125,
99
- "eval_precision": 0.8437710437710437,
100
- "eval_recall": 0.8713402436806692,
101
- "eval_runtime": 5.6502,
102
- "eval_samples_per_second": 70.617,
103
- "eval_steps_per_second": 8.849,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 1.9262036085128784,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.2339,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8671679197994987,
116
- "eval_f1": 0.8472754847795472,
117
- "eval_loss": 0.3074397146701813,
118
- "eval_precision": 0.8352906879893387,
119
- "eval_recall": 0.8660210947444991,
120
- "eval_runtime": 5.5676,
121
- "eval_samples_per_second": 71.664,
122
- "eval_steps_per_second": 8.98,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 0.7879945635795593,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.2249,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8671679197994987,
135
- "eval_f1": 0.8472754847795472,
136
- "eval_loss": 0.2915370464324951,
137
- "eval_precision": 0.8352906879893387,
138
- "eval_recall": 0.8660210947444991,
139
- "eval_runtime": 5.5784,
140
- "eval_samples_per_second": 71.526,
141
- "eval_steps_per_second": 8.963,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 2.2202210426330566,
147
  "learning_rate": 3e-05,
148
- "loss": 0.193,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8972431077694235,
154
- "eval_f1": 0.8751002084335417,
155
- "eval_loss": 0.25404733419418335,
156
- "eval_precision": 0.8780701754385964,
157
- "eval_recall": 0.8722949627204946,
158
- "eval_runtime": 5.5919,
159
- "eval_samples_per_second": 71.353,
160
- "eval_steps_per_second": 8.942,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 8.847825050354004,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.1899,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8822055137844611,
173
- "eval_f1": 0.8628173897776901,
174
- "eval_loss": 0.26359453797340393,
175
- "eval_precision": 0.8526132107592781,
176
- "eval_recall": 0.8766593926168395,
177
- "eval_runtime": 5.5857,
178
- "eval_samples_per_second": 71.432,
179
- "eval_steps_per_second": 8.951,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 1.7180150747299194,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.1801,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.9072681704260651,
192
- "eval_f1": 0.8900228699985846,
193
- "eval_loss": 0.23711469769477844,
194
- "eval_precision": 0.8840175953079179,
195
- "eval_recall": 0.8968903436988543,
196
- "eval_runtime": 5.6079,
197
- "eval_samples_per_second": 71.15,
198
- "eval_steps_per_second": 8.916,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.6990593671798706,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.157,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.899749373433584,
211
- "eval_f1": 0.882467302933899,
212
- "eval_loss": 0.2566535174846649,
213
- "eval_precision": 0.8732988802756245,
214
- "eval_recall": 0.8940716493907983,
215
- "eval_runtime": 5.5787,
216
- "eval_samples_per_second": 71.522,
217
- "eval_steps_per_second": 8.963,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 3.7353861331939697,
223
  "learning_rate": 2e-05,
224
  "loss": 0.1553,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8972431077694235,
230
- "eval_f1": 0.8792560061999484,
231
- "eval_loss": 0.25933051109313965,
232
- "eval_precision": 0.8707622232472325,
233
- "eval_recall": 0.889798145117294,
234
- "eval_runtime": 5.6004,
235
- "eval_samples_per_second": 71.245,
236
- "eval_steps_per_second": 8.928,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 1.2111871242523193,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.1381,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.9172932330827067,
249
- "eval_f1": 0.8999810095332144,
250
- "eval_loss": 0.24901245534420013,
251
- "eval_precision": 0.9009991470695747,
252
- "eval_recall": 0.8989816330241862,
253
- "eval_runtime": 5.6194,
254
- "eval_samples_per_second": 71.004,
255
- "eval_steps_per_second": 8.898,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 9.943241119384766,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.1476,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.899749373433584,
268
- "eval_f1": 0.881931703852755,
269
- "eval_loss": 0.27014100551605225,
270
- "eval_precision": 0.8740012737378415,
271
- "eval_recall": 0.8915711947626841,
272
- "eval_runtime": 5.611,
273
- "eval_samples_per_second": 71.11,
274
- "eval_steps_per_second": 8.911,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.20924903452396393,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.1447,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9122807017543859,
287
- "eval_f1": 0.8959675797283908,
288
- "eval_loss": 0.2611282765865326,
289
- "eval_precision": 0.8898680351906159,
290
- "eval_recall": 0.9029368976177486,
291
- "eval_runtime": 5.6134,
292
- "eval_samples_per_second": 71.08,
293
- "eval_steps_per_second": 8.907,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.40175938606262207,
299
  "learning_rate": 1e-05,
300
- "loss": 0.1336,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.899749373433584,
306
- "eval_f1": 0.884004884004884,
307
- "eval_loss": 0.31003570556640625,
308
- "eval_precision": 0.8717592592592593,
309
- "eval_recall": 0.9015730132751409,
310
- "eval_runtime": 5.6634,
311
- "eval_samples_per_second": 70.453,
312
- "eval_steps_per_second": 8.829,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 7.287301540374756,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.1192,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8972431077694235,
325
- "eval_f1": 0.8803300634230913,
326
- "eval_loss": 0.29346275329589844,
327
- "eval_precision": 0.8695948246510044,
328
- "eval_recall": 0.8947990543735225,
329
- "eval_runtime": 5.5767,
330
- "eval_samples_per_second": 71.548,
331
- "eval_steps_per_second": 8.966,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 6.602230072021484,
337
  "learning_rate": 5e-06,
338
- "loss": 0.1247,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.9022556390977443,
344
- "eval_f1": 0.885145957117024,
345
- "eval_loss": 0.28685662150382996,
346
- "eval_precision": 0.876513491697417,
347
- "eval_recall": 0.8958446990361884,
348
- "eval_runtime": 5.5755,
349
- "eval_samples_per_second": 71.563,
350
- "eval_steps_per_second": 8.968,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 7.744582653045654,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.117,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.9022556390977443,
363
- "eval_f1": 0.884617951284618,
364
- "eval_loss": 0.2761416733264923,
365
- "eval_precision": 0.8772893772893773,
366
- "eval_recall": 0.8933442444080741,
367
- "eval_runtime": 5.5884,
368
- "eval_samples_per_second": 71.398,
369
- "eval_steps_per_second": 8.947,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.1699504405260086,
375
  "learning_rate": 0.0,
376
- "loss": 0.1092,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
  "eval_accuracy": 0.9022556390977443,
382
- "eval_f1": 0.884617951284618,
383
- "eval_loss": 0.2810901701450348,
384
- "eval_precision": 0.8772893772893773,
385
- "eval_recall": 0.8933442444080741,
386
- "eval_runtime": 5.588,
387
- "eval_samples_per_second": 71.403,
388
- "eval_steps_per_second": 8.948,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8551203605328000.0,
395
- "train_loss": 0.21111928674041247,
396
- "train_runtime": 2283.5364,
397
- "train_samples_per_second": 31.863,
398
- "train_steps_per_second": 1.069
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 4.303664207458496,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5535,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7293233082706767,
21
+ "eval_f1": 0.6372727272727272,
22
+ "eval_loss": 0.49923330545425415,
23
+ "eval_precision": 0.6645702306079665,
24
+ "eval_recall": 0.6284779050736498,
25
+ "eval_runtime": 5.2819,
26
+ "eval_samples_per_second": 75.541,
27
+ "eval_steps_per_second": 9.466,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 4.080423831939697,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.444,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8170426065162907,
40
+ "eval_f1": 0.7960536910871955,
41
+ "eval_loss": 0.4052737355232239,
42
+ "eval_precision": 0.7846938775510204,
43
+ "eval_recall": 0.8255591925804692,
44
+ "eval_runtime": 5.5544,
45
+ "eval_samples_per_second": 71.835,
46
+ "eval_steps_per_second": 9.002,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 4.110426425933838,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.3464,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8421052631578947,
59
+ "eval_f1": 0.7905197629940748,
60
+ "eval_loss": 0.3424628674983978,
61
+ "eval_precision": 0.8345238095238094,
62
+ "eval_recall": 0.7682760501909438,
63
+ "eval_runtime": 5.6127,
64
+ "eval_samples_per_second": 71.089,
65
+ "eval_steps_per_second": 8.908,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 1.6437464952468872,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.2852,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8721804511278195,
78
+ "eval_f1": 0.8469505178365937,
79
+ "eval_loss": 0.3135569393634796,
80
+ "eval_precision": 0.844489247311828,
81
+ "eval_recall": 0.8495635570103655,
82
+ "eval_runtime": 5.5998,
83
+ "eval_samples_per_second": 71.253,
84
+ "eval_steps_per_second": 8.929,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 3.6121034622192383,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.2608,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8721804511278195,
97
+ "eval_f1": 0.8469505178365937,
98
+ "eval_loss": 0.3060314953327179,
99
+ "eval_precision": 0.844489247311828,
100
+ "eval_recall": 0.8495635570103655,
101
+ "eval_runtime": 5.6686,
102
+ "eval_samples_per_second": 70.388,
103
+ "eval_steps_per_second": 8.821,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 2.0154571533203125,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.2415,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8646616541353384,
116
+ "eval_f1": 0.8447157518450185,
117
+ "eval_loss": 0.3100413978099823,
118
+ "eval_precision": 0.8325401217487549,
119
+ "eval_recall": 0.864248045099109,
120
+ "eval_runtime": 5.5541,
121
+ "eval_samples_per_second": 71.838,
122
+ "eval_steps_per_second": 9.002,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.5025161504745483,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.2329,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8847117794486216,
135
+ "eval_f1": 0.8642214594306682,
136
+ "eval_loss": 0.28597915172576904,
137
+ "eval_precision": 0.8566755442334414,
138
+ "eval_recall": 0.8734315330060011,
139
+ "eval_runtime": 5.5629,
140
+ "eval_samples_per_second": 71.725,
141
+ "eval_steps_per_second": 8.988,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 2.4278862476348877,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.199,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8872180451127819,
154
+ "eval_f1": 0.8622036668943447,
155
+ "eval_loss": 0.2878971993923187,
156
+ "eval_precision": 0.8671602787456446,
157
+ "eval_recall": 0.8577014002545917,
158
+ "eval_runtime": 5.6077,
159
+ "eval_samples_per_second": 71.152,
160
+ "eval_steps_per_second": 8.916,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 8.504670143127441,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.1939,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8897243107769424,
173
+ "eval_f1": 0.8676337535436396,
174
+ "eval_loss": 0.28258949518203735,
175
+ "eval_precision": 0.8658613445378152,
176
+ "eval_recall": 0.8694762684124386,
177
+ "eval_runtime": 5.5938,
178
+ "eval_samples_per_second": 71.329,
179
+ "eval_steps_per_second": 8.939,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 2.462061882019043,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.1806,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8796992481203008,
192
+ "eval_f1": 0.8439374185136896,
193
+ "eval_loss": 0.2981988787651062,
194
+ "eval_precision": 0.8794955044955045,
195
+ "eval_recall": 0.822376795781051,
196
+ "eval_runtime": 5.5576,
197
+ "eval_samples_per_second": 71.793,
198
+ "eval_steps_per_second": 8.997,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 1.0077548027038574,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.1674,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8947368421052632,
211
+ "eval_f1": 0.8730223677032187,
212
+ "eval_loss": 0.2734816372394562,
213
+ "eval_precision": 0.8730223677032187,
214
+ "eval_recall": 0.8730223677032187,
215
+ "eval_runtime": 5.5924,
216
+ "eval_samples_per_second": 71.346,
217
+ "eval_steps_per_second": 8.941,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 3.9673709869384766,
223
  "learning_rate": 2e-05,
224
  "loss": 0.1553,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8947368421052632,
230
+ "eval_f1": 0.8717238211879976,
231
+ "eval_loss": 0.2753015458583832,
232
+ "eval_precision": 0.8757194133300328,
233
+ "eval_recall": 0.8680214584469903,
234
+ "eval_runtime": 5.5661,
235
+ "eval_samples_per_second": 71.684,
236
+ "eval_steps_per_second": 8.983,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 3.968949794769287,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.1431,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8922305764411027,
249
+ "eval_f1": 0.8661961395983623,
250
+ "eval_loss": 0.2937251627445221,
251
+ "eval_precision": 0.8784532165625604,
252
+ "eval_recall": 0.8562465902891435,
253
+ "eval_runtime": 5.5699,
254
+ "eval_samples_per_second": 71.635,
255
+ "eval_steps_per_second": 8.977,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 8.566404342651367,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.1417,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.9072681704260651,
268
+ "eval_f1": 0.8910359080340997,
269
+ "eval_loss": 0.29110613465309143,
270
+ "eval_precision": 0.8822647601476015,
271
+ "eval_recall": 0.9018912529550827,
272
+ "eval_runtime": 5.5858,
273
+ "eval_samples_per_second": 71.432,
274
+ "eval_steps_per_second": 8.951,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.1758796125650406,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.1236,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.9022556390977443,
287
+ "eval_f1": 0.8817957385392532,
288
+ "eval_loss": 0.2955999970436096,
289
+ "eval_precision": 0.8827677592299257,
290
+ "eval_recall": 0.8808419712675032,
291
+ "eval_runtime": 5.6052,
292
+ "eval_samples_per_second": 71.184,
293
+ "eval_steps_per_second": 8.92,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.7015694975852966,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.1304,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.9022556390977443,
306
+ "eval_f1": 0.884617951284618,
307
+ "eval_loss": 0.3010990023612976,
308
+ "eval_precision": 0.8772893772893773,
309
+ "eval_recall": 0.8933442444080741,
310
+ "eval_runtime": 5.6368,
311
+ "eval_samples_per_second": 70.785,
312
+ "eval_steps_per_second": 8.87,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.19915825128555298,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.1164,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.899749373433584,
325
+ "eval_f1": 0.879667048676036,
326
+ "eval_loss": 0.29428762197494507,
327
+ "eval_precision": 0.8778361344537815,
328
+ "eval_recall": 0.8815693762502272,
329
+ "eval_runtime": 5.5793,
330
+ "eval_samples_per_second": 71.514,
331
+ "eval_steps_per_second": 8.962,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 9.03445816040039,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.1144,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8972431077694235,
344
+ "eval_f1": 0.8775533117267087,
345
+ "eval_loss": 0.2937219738960266,
346
+ "eval_precision": 0.873246730188791,
347
+ "eval_recall": 0.8822967812329514,
348
+ "eval_runtime": 5.5648,
349
+ "eval_samples_per_second": 71.7,
350
+ "eval_steps_per_second": 8.985,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 5.45957612991333,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.1198,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8972431077694235,
363
+ "eval_f1": 0.8737897035111135,
364
+ "eval_loss": 0.29848915338516235,
365
+ "eval_precision": 0.8812047813777917,
366
+ "eval_recall": 0.8672940534642661,
367
+ "eval_runtime": 5.5642,
368
+ "eval_samples_per_second": 71.709,
369
+ "eval_steps_per_second": 8.986,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.5000033378601074,
375
  "learning_rate": 0.0,
376
+ "loss": 0.1104,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
  "eval_accuracy": 0.9022556390977443,
382
+ "eval_f1": 0.8811928811928812,
383
+ "eval_loss": 0.29284632205963135,
384
+ "eval_precision": 0.8842105263157894,
385
+ "eval_recall": 0.878341516639389,
386
+ "eval_runtime": 5.5868,
387
+ "eval_samples_per_second": 71.418,
388
+ "eval_steps_per_second": 8.95,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 8551203605328000.0,
395
+ "train_loss": 0.2130153269064231,
396
+ "train_runtime": 2280.4037,
397
+ "train_samples_per_second": 31.907,
398
+ "train_steps_per_second": 1.07
399
  }
400
  ],
401
  "logging_steps": 500,