apwic commited on
Commit
b507968
1 Parent(s): b323d45

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.8100890207715133,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8145363408521303,
5
- "eval_f1": 0.7762775050009092,
6
- "eval_loss": 0.40420523285865784,
7
- "eval_precision": 0.7762775050009092,
8
- "eval_recall": 0.7762775050009092,
9
- "eval_runtime": 4.9659,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 80.349,
12
- "eval_steps_per_second": 10.069,
13
- "f1": 0.7742127596218269,
14
- "precision": 0.7712898624085408,
15
- "recall": 0.7774645368374483,
16
- "train_loss": 0.45743675075593543,
17
- "train_runtime": 1951.2471,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 37.289,
20
- "train_steps_per_second": 1.25
21
  }
 
1
  {
2
+ "accuracy": 0.8308605341246291,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.7869674185463659,
5
+ "eval_f1": 0.7325336550973572,
6
+ "eval_loss": 0.445127934217453,
7
+ "eval_precision": 0.7442562883739354,
8
+ "eval_recall": 0.7242680487361338,
9
+ "eval_runtime": 4.9605,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 80.435,
12
+ "eval_steps_per_second": 10.08,
13
+ "f1": 0.7908926081061303,
14
+ "precision": 0.800266779694824,
15
+ "recall": 0.7834017338592016,
16
+ "train_loss": 0.4626342241881324,
17
+ "train_runtime": 1944.0727,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 37.427,
20
+ "train_steps_per_second": 1.255
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8145363408521303,
4
- "eval_f1": 0.7762775050009092,
5
- "eval_loss": 0.40420523285865784,
6
- "eval_precision": 0.7762775050009092,
7
- "eval_recall": 0.7762775050009092,
8
- "eval_runtime": 4.9659,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 80.349,
11
- "eval_steps_per_second": 10.069
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.7869674185463659,
4
+ "eval_f1": 0.7325336550973572,
5
+ "eval_loss": 0.445127934217453,
6
+ "eval_precision": 0.7442562883739354,
7
+ "eval_recall": 0.7242680487361338,
8
+ "eval_runtime": 4.9605,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 80.435,
11
+ "eval_steps_per_second": 10.08
12
  }
predict_results.txt CHANGED
@@ -1,352 +1,352 @@
1
  index prediction
2
- 0 1
3
- 1 0
4
- 2 1
5
  3 1
6
  4 0
7
- 5 0
8
  6 1
9
  7 1
10
- 8 0
11
  9 1
12
- 10 1
13
  11 1
14
- 12 0
15
  13 1
16
  14 1
17
- 15 0
18
- 16 1
19
  17 1
20
  18 1
21
- 19 0
22
- 20 1
23
  21 1
24
- 22 0
25
  23 1
26
  24 1
27
- 25 1
28
- 26 1
29
  27 1
30
  28 1
31
- 29 0
32
  30 1
33
- 31 1
34
- 32 0
35
- 33 1
36
  34 0
37
  35 1
38
- 36 1
39
- 37 0
40
  38 1
41
  39 0
42
  40 1
43
  41 0
44
- 42 0
45
- 43 0
46
- 44 0
47
  45 0
48
- 46 0
49
  47 1
50
  48 1
51
- 49 0
52
- 50 1
53
- 51 0
54
- 52 1
55
  53 1
56
  54 0
57
- 55 1
58
  56 1
59
  57 0
60
- 58 0
61
- 59 0
62
- 60 1
63
  61 1
64
  62 1
65
  63 1
66
  64 0
67
  65 1
68
- 66 1
69
- 67 1
70
  68 1
71
- 69 0
72
  70 1
73
  71 1
74
- 72 0
75
  73 1
76
  74 1
77
- 75 0
78
- 76 1
79
- 77 0
80
  78 1
81
- 79 1
82
- 80 0
83
- 81 1
84
- 82 1
85
- 83 1
86
- 84 0
87
- 85 0
88
- 86 0
89
- 87 1
90
- 88 0
91
  89 1
92
- 90 1
93
  91 0
94
- 92 0
95
- 93 0
96
- 94 1
97
  95 1
98
  96 1
99
- 97 0
100
- 98 0
101
- 99 1
102
  100 0
103
- 101 0
104
- 102 1
105
  103 1
106
- 104 0
107
- 105 0
108
  106 0
109
- 107 1
110
  108 1
111
  109 1
112
- 110 1
113
  111 1
114
  112 1
115
- 113 0
116
  114 1
117
- 115 1
118
  116 1
119
  117 1
120
- 118 0
121
  119 1
122
  120 1
123
  121 1
124
  122 0
125
  123 1
126
- 124 1
127
  125 1
128
- 126 1
129
- 127 1
130
- 128 1
131
  129 1
132
- 130 1
133
- 131 0
134
  132 1
135
  133 1
136
  134 1
137
- 135 0
138
- 136 0
139
- 137 0
140
  138 1
141
  139 1
142
- 140 1
143
  141 1
144
  142 1
145
  143 1
146
  144 1
147
- 145 1
148
  146 1
149
  147 0
150
- 148 1
151
- 149 1
152
  150 1
153
  151 1
154
  152 1
155
  153 1
156
  154 1
157
- 155 0
158
  156 0
159
  157 1
160
- 158 0
161
- 159 1
162
- 160 1
163
  161 1
164
  162 1
165
  163 1
166
- 164 0
167
- 165 0
168
- 166 0
169
  167 1
170
- 168 0
171
- 169 0
172
- 170 1
173
- 171 1
174
  172 1
175
  173 1
176
  174 0
177
  175 1
178
- 176 0
179
- 177 1
180
  178 1
181
- 179 1
182
  180 1
183
- 181 1
184
  182 1
185
  183 1
186
  184 1
187
- 185 1
188
- 186 0
189
- 187 1
190
  188 1
191
- 189 1
192
- 190 0
193
- 191 0
194
  192 1
195
  193 1
196
  194 1
197
  195 1
198
- 196 1
199
  197 1
200
  198 1
201
  199 1
202
- 200 0
203
  201 1
204
  202 1
205
  203 1
206
  204 1
207
- 205 0
208
- 206 1
209
- 207 1
210
  208 1
211
  209 1
212
- 210 0
213
  211 1
214
  212 1
215
  213 1
216
- 214 1
217
- 215 1
218
  216 0
219
- 217 0
220
  218 0
221
  219 1
222
  220 0
223
  221 1
224
- 222 1
225
  223 1
226
- 224 1
227
  225 1
228
- 226 0
229
  227 0
230
  228 0
231
- 229 0
232
  230 1
233
  231 1
234
- 232 0
235
  233 1
236
  234 1
237
- 235 0
238
  236 1
239
  237 1
240
  238 1
241
- 239 1
242
- 240 1
243
- 241 1
244
  242 1
245
  243 1
246
  244 1
247
  245 1
248
  246 1
249
  247 1
250
- 248 1
251
  249 1
252
  250 0
253
  251 1
254
  252 1
255
- 253 1
256
  254 1
257
- 255 0
258
  256 1
259
- 257 0
260
- 258 1
261
  259 1
262
- 260 1
263
  261 0
264
  262 1
265
- 263 1
266
- 264 1
267
- 265 0
268
  266 1
269
- 267 1
270
  268 1
271
- 269 0
272
  270 1
273
  271 1
274
- 272 1
275
- 273 1
276
  274 0
277
- 275 1
278
- 276 1
279
  277 1
280
  278 1
281
  279 1
282
  280 0
283
  281 1
284
- 282 1
285
- 283 1
286
- 284 1
287
  285 1
288
- 286 1
289
- 287 1
290
  288 1
291
- 289 0
292
  290 1
293
  291 1
294
  292 1
295
- 293 0
296
- 294 0
297
  295 1
298
  296 1
299
- 297 1
300
  298 0
301
  299 0
302
- 300 0
303
  301 0
304
- 302 1
305
  303 0
306
  304 0
307
- 305 1
308
  306 0
309
- 307 1
310
  308 0
311
- 309 0
312
  310 0
313
  311 0
314
  312 0
315
  313 0
316
- 314 1
317
  315 0
318
- 316 1
319
  317 0
320
- 318 1
321
  319 0
322
  320 0
323
  321 0
324
  322 0
325
  323 0
326
- 324 0
327
  325 0
328
- 326 1
329
  327 0
330
  328 0
331
- 329 0
332
  330 0
333
- 331 1
334
- 332 0
335
  333 0
336
- 334 1
337
  335 0
338
  336 1
339
  337 0
340
  338 1
341
- 339 0
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
- 344 1
347
  345 0
348
- 346 1
349
- 347 0
350
  348 0
351
  349 0
352
  350 0
@@ -355,52 +355,52 @@ index prediction
355
  353 0
356
  354 0
357
  355 0
358
- 356 1
359
  357 0
360
  358 0
361
  359 0
362
- 360 1
363
  361 0
364
  362 0
365
- 363 1
366
  364 0
367
  365 0
368
- 366 1
369
  367 0
370
  368 0
371
  369 0
372
- 370 1
373
  371 0
374
  372 0
375
- 373 0
376
  374 0
377
  375 0
378
- 376 1
379
  377 0
380
  378 0
381
  379 0
382
  380 0
383
- 381 1
384
  382 0
385
  383 0
386
  384 0
387
- 385 0
388
  386 0
389
  387 0
390
  388 0
391
  389 0
392
  390 0
393
  391 0
394
- 392 0
395
  393 0
396
  394 0
397
  395 0
398
  396 0
399
  397 0
400
- 398 1
401
  399 0
402
  400 0
403
- 401 1
404
  402 0
405
  403 0
406
  404 0
@@ -417,18 +417,18 @@ index prediction
417
  415 0
418
  416 0
419
  417 0
420
- 418 1
421
  419 0
422
  420 0
423
  421 0
424
  422 0
425
  423 0
426
  424 0
427
- 425 1
428
- 426 0
429
  427 0
430
  428 0
431
- 429 0
432
  430 0
433
  431 0
434
  432 0
@@ -438,10 +438,10 @@ index prediction
438
  436 0
439
  437 0
440
  438 0
441
- 439 1
442
  440 0
443
  441 0
444
- 442 0
445
  443 0
446
  444 0
447
  445 0
@@ -449,24 +449,24 @@ index prediction
449
  447 0
450
  448 0
451
  449 0
452
- 450 1
453
  451 0
454
  452 0
455
  453 0
456
  454 0
457
  455 1
458
  456 0
459
- 457 0
460
  458 0
461
  459 0
462
  460 0
463
  461 0
464
  462 0
465
- 463 0
466
  464 0
467
- 465 0
468
  466 0
469
- 467 1
470
  468 0
471
  469 0
472
  470 0
@@ -479,25 +479,25 @@ index prediction
479
  477 0
480
  478 0
481
  479 0
482
- 480 0
483
  481 0
484
  482 0
485
  483 0
486
  484 0
487
  485 0
488
- 486 1
489
  487 0
490
  488 0
491
  489 0
492
- 490 1
493
- 491 0
494
  492 0
495
  493 0
496
  494 0
497
  495 0
498
- 496 1
499
  497 0
500
- 498 0
501
  499 0
502
  500 0
503
  501 0
@@ -511,9 +511,9 @@ index prediction
511
  509 0
512
  510 0
513
  511 0
514
- 512 0
515
  513 0
516
- 514 1
517
  515 1
518
  516 0
519
  517 0
@@ -522,18 +522,18 @@ index prediction
522
  520 0
523
  521 0
524
  522 1
525
- 523 0
526
  524 0
527
  525 0
528
  526 0
529
  527 0
530
- 528 1
531
  529 1
532
  530 0
533
  531 0
534
  532 0
535
  533 0
536
- 534 1
537
  535 0
538
  536 0
539
  537 0
@@ -542,13 +542,13 @@ index prediction
542
  540 0
543
  541 0
544
  542 0
545
- 543 1
546
  544 0
547
  545 0
548
  546 0
549
  547 0
550
  548 0
551
- 549 0
552
  550 0
553
  551 0
554
  552 0
@@ -556,26 +556,26 @@ index prediction
556
  554 0
557
  555 0
558
  556 0
559
- 557 1
560
  558 0
561
  559 0
562
  560 0
563
  561 0
564
- 562 0
565
  563 0
566
  564 0
567
  565 0
568
  566 0
569
- 567 0
570
  568 0
571
  569 0
572
  570 0
573
- 571 0
574
  572 0
575
  573 0
576
  574 0
577
  575 0
578
- 576 0
579
  577 0
580
  578 0
581
  579 0
@@ -587,47 +587,47 @@ index prediction
587
  585 0
588
  586 0
589
  587 0
590
- 588 0
591
  589 0
592
  590 0
593
- 591 0
594
- 592 0
595
  593 0
596
  594 0
597
- 595 1
598
- 596 1
599
  597 0
600
  598 0
601
- 599 0
602
  600 0
603
  601 0
604
- 602 0
605
  603 0
606
  604 0
607
  605 0
608
  606 0
609
- 607 1
610
  608 0
611
  609 0
612
  610 0
613
  611 0
614
  612 0
615
  613 0
616
- 614 1
617
  615 0
618
  616 0
619
  617 0
620
  618 0
621
  619 0
622
  620 0
623
- 621 1
624
  622 0
625
  623 0
626
- 624 1
627
  625 0
628
  626 0
629
  627 0
630
- 628 1
631
  629 0
632
  630 0
633
  631 0
@@ -638,25 +638,25 @@ index prediction
638
  636 0
639
  637 0
640
  638 0
641
- 639 1
642
- 640 1
643
  641 0
644
- 642 1
645
- 643 0
646
  644 0
647
  645 0
648
  646 0
649
- 647 1
650
- 648 1
651
  649 0
652
- 650 1
653
- 651 0
654
- 652 1
655
  653 0
656
  654 0
657
  655 0
658
  656 0
659
- 657 1
660
  658 0
661
  659 0
662
  660 0
@@ -667,14 +667,14 @@ index prediction
667
  665 0
668
  666 0
669
  667 0
670
- 668 0
671
  669 0
672
  670 0
673
  671 0
674
  672 0
675
- 673 0
676
  674 0
677
- 675 1
678
  676 0
679
  677 0
680
  678 0
@@ -684,7 +684,7 @@ index prediction
684
  682 0
685
  683 0
686
  684 0
687
- 685 0
688
  686 0
689
  687 0
690
  688 0
@@ -698,9 +698,9 @@ index prediction
698
  696 0
699
  697 0
700
  698 0
701
- 699 0
702
  700 0
703
- 701 1
704
  702 0
705
  703 0
706
  704 0
@@ -711,13 +711,13 @@ index prediction
711
  709 0
712
  710 0
713
  711 0
714
- 712 0
715
- 713 1
716
  714 0
717
  715 0
718
  716 0
719
  717 0
720
- 718 0
721
  719 0
722
  720 0
723
  721 0
@@ -726,22 +726,22 @@ index prediction
726
  724 0
727
  725 0
728
  726 0
729
- 727 1
730
- 728 1
731
  729 0
732
  730 0
733
  731 0
734
- 732 0
735
- 733 1
736
- 734 0
737
  735 0
738
  736 0
739
  737 0
740
  738 0
741
  739 0
742
- 740 1
743
  741 0
744
- 742 0
745
  743 0
746
  744 0
747
  745 0
@@ -751,13 +751,13 @@ index prediction
751
  749 0
752
  750 0
753
  751 0
754
- 752 0
755
- 753 1
756
  754 0
757
  755 0
758
  756 0
759
  757 0
760
- 758 1
761
  759 0
762
  760 0
763
  761 0
@@ -769,20 +769,20 @@ index prediction
769
  767 0
770
  768 0
771
  769 0
772
- 770 1
773
  771 0
774
- 772 1
775
  773 0
776
  774 0
777
  775 0
778
- 776 1
779
- 777 1
780
  778 0
781
  779 0
782
  780 0
783
  781 0
784
  782 0
785
- 783 0
786
  784 0
787
  785 0
788
  786 0
@@ -794,13 +794,13 @@ index prediction
794
  792 0
795
  793 0
796
  794 0
797
- 795 1
798
  796 0
799
  797 0
800
  798 0
801
  799 0
802
  800 0
803
- 801 1
804
  802 0
805
  803 0
806
  804 0
@@ -808,7 +808,7 @@ index prediction
808
  806 0
809
  807 0
810
  808 0
811
- 809 1
812
  810 0
813
  811 0
814
  812 0
@@ -822,38 +822,38 @@ index prediction
822
  820 0
823
  821 0
824
  822 0
825
- 823 1
826
  824 0
827
  825 0
828
- 826 1
829
  827 0
830
  828 0
831
  829 0
832
  830 0
833
  831 0
834
- 832 1
835
- 833 1
836
  834 0
837
  835 0
838
- 836 0
839
  837 0
840
  838 0
841
  839 0
842
  840 0
843
  841 1
844
- 842 1
845
  843 0
846
- 844 0
847
  845 0
848
- 846 1
849
  847 0
850
- 848 1
851
- 849 1
852
- 850 0
853
  851 0
854
  852 0
855
  853 0
856
- 854 0
857
  855 0
858
  856 0
859
  857 0
@@ -863,8 +863,8 @@ index prediction
863
  861 0
864
  862 0
865
  863 0
866
- 864 0
867
- 865 0
868
  866 0
869
  867 0
870
  868 0
@@ -877,35 +877,35 @@ index prediction
877
  875 0
878
  876 0
879
  877 0
880
- 878 0
881
  879 0
882
  880 0
883
  881 0
884
  882 0
885
  883 0
886
- 884 1
887
  885 0
888
  886 0
889
  887 0
890
  888 0
891
  889 0
892
  890 0
893
- 891 1
894
  892 0
895
- 893 0
896
  894 1
897
  895 0
898
  896 0
899
- 897 0
900
  898 0
901
  899 0
902
  900 0
903
- 901 1
904
  902 0
905
  903 0
906
  904 0
907
  905 0
908
- 906 1
909
  907 0
910
  908 0
911
  909 0
@@ -915,7 +915,7 @@ index prediction
915
  913 0
916
  914 0
917
  915 0
918
- 916 1
919
  917 0
920
  918 0
921
  919 0
@@ -927,18 +927,18 @@ index prediction
927
  925 0
928
  926 0
929
  927 0
930
- 928 1
931
  929 0
932
- 930 0
933
  931 0
934
  932 0
935
  933 0
936
  934 0
937
- 935 0
938
- 936 0
939
  937 0
940
  938 0
941
- 939 0
942
  940 0
943
  941 0
944
  942 0
@@ -947,31 +947,31 @@ index prediction
947
  945 0
948
  946 0
949
  947 0
950
- 948 0
951
- 949 1
952
- 950 1
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
  955 0
958
- 956 1
959
- 957 1
960
  958 0
961
  959 0
962
  960 0
963
  961 0
964
- 962 0
965
  963 0
966
  964 0
967
  965 0
968
- 966 1
969
  967 0
970
  968 0
971
  969 0
972
  970 0
973
  971 0
974
- 972 1
975
  973 0
976
  974 0
977
  975 0
@@ -983,22 +983,22 @@ index prediction
983
  981 0
984
  982 0
985
  983 0
986
- 984 1
987
  985 0
988
- 986 1
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
- 991 1
994
- 992 1
995
  993 0
996
  994 0
997
  995 1
998
- 996 0
999
  997 0
1000
  998 0
1001
- 999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
@@ -1009,4 +1009,4 @@ index prediction
1009
  1007 0
1010
  1008 0
1011
  1009 0
1012
- 1010 1
 
1
  index prediction
2
+ 0 0
3
+ 1 1
4
+ 2 0
5
  3 1
6
  4 0
7
+ 5 1
8
  6 1
9
  7 1
10
+ 8 1
11
  9 1
12
+ 10 0
13
  11 1
14
+ 12 1
15
  13 1
16
  14 1
17
+ 15 1
18
+ 16 0
19
  17 1
20
  18 1
21
+ 19 1
22
+ 20 0
23
  21 1
24
+ 22 1
25
  23 1
26
  24 1
27
+ 25 0
28
+ 26 0
29
  27 1
30
  28 1
31
+ 29 1
32
  30 1
33
+ 31 0
34
+ 32 1
35
+ 33 0
36
  34 0
37
  35 1
38
+ 36 0
39
+ 37 1
40
  38 1
41
  39 0
42
  40 1
43
  41 0
44
+ 42 1
45
+ 43 1
46
+ 44 1
47
  45 0
48
+ 46 1
49
  47 1
50
  48 1
51
+ 49 1
52
+ 50 0
53
+ 51 1
54
+ 52 0
55
  53 1
56
  54 0
57
+ 55 0
58
  56 1
59
  57 0
60
+ 58 1
61
+ 59 1
62
+ 60 0
63
  61 1
64
  62 1
65
  63 1
66
  64 0
67
  65 1
68
+ 66 0
69
+ 67 0
70
  68 1
71
+ 69 1
72
  70 1
73
  71 1
74
+ 72 1
75
  73 1
76
  74 1
77
+ 75 1
78
+ 76 0
79
+ 77 1
80
  78 1
81
+ 79 0
82
+ 80 1
83
+ 81 0
84
+ 82 0
85
+ 83 0
86
+ 84 1
87
+ 85 1
88
+ 86 1
89
+ 87 0
90
+ 88 1
91
  89 1
92
+ 90 0
93
  91 0
94
+ 92 1
95
+ 93 1
96
+ 94 0
97
  95 1
98
  96 1
99
+ 97 1
100
+ 98 1
101
+ 99 0
102
  100 0
103
+ 101 1
104
+ 102 0
105
  103 1
106
+ 104 1
107
+ 105 1
108
  106 0
109
+ 107 0
110
  108 1
111
  109 1
112
+ 110 0
113
  111 1
114
  112 1
115
+ 113 1
116
  114 1
117
+ 115 0
118
  116 1
119
  117 1
120
+ 118 1
121
  119 1
122
  120 1
123
  121 1
124
  122 0
125
  123 1
126
+ 124 0
127
  125 1
128
+ 126 0
129
+ 127 0
130
+ 128 0
131
  129 1
132
+ 130 0
133
+ 131 1
134
  132 1
135
  133 1
136
  134 1
137
+ 135 1
138
+ 136 1
139
+ 137 1
140
  138 1
141
  139 1
142
+ 140 0
143
  141 1
144
  142 1
145
  143 1
146
  144 1
147
+ 145 0
148
  146 1
149
  147 0
150
+ 148 0
151
+ 149 0
152
  150 1
153
  151 1
154
  152 1
155
  153 1
156
  154 1
157
+ 155 1
158
  156 0
159
  157 1
160
+ 158 1
161
+ 159 0
162
+ 160 0
163
  161 1
164
  162 1
165
  163 1
166
+ 164 1
167
+ 165 1
168
+ 166 1
169
  167 1
170
+ 168 1
171
+ 169 1
172
+ 170 0
173
+ 171 0
174
  172 1
175
  173 1
176
  174 0
177
  175 1
178
+ 176 1
179
+ 177 0
180
  178 1
181
+ 179 0
182
  180 1
183
+ 181 0
184
  182 1
185
  183 1
186
  184 1
187
+ 185 0
188
+ 186 1
189
+ 187 0
190
  188 1
191
+ 189 0
192
+ 190 1
193
+ 191 1
194
  192 1
195
  193 1
196
  194 1
197
  195 1
198
+ 196 0
199
  197 1
200
  198 1
201
  199 1
202
+ 200 1
203
  201 1
204
  202 1
205
  203 1
206
  204 1
207
+ 205 1
208
+ 206 0
209
+ 207 0
210
  208 1
211
  209 1
212
+ 210 1
213
  211 1
214
  212 1
215
  213 1
216
+ 214 0
217
+ 215 0
218
  216 0
219
+ 217 1
220
  218 0
221
  219 1
222
  220 0
223
  221 1
224
+ 222 0
225
  223 1
226
+ 224 0
227
  225 1
228
+ 226 1
229
  227 0
230
  228 0
231
+ 229 1
232
  230 1
233
  231 1
234
+ 232 1
235
  233 1
236
  234 1
237
+ 235 1
238
  236 1
239
  237 1
240
  238 1
241
+ 239 0
242
+ 240 0
243
+ 241 0
244
  242 1
245
  243 1
246
  244 1
247
  245 1
248
  246 1
249
  247 1
250
+ 248 0
251
  249 1
252
  250 0
253
  251 1
254
  252 1
255
+ 253 0
256
  254 1
257
+ 255 1
258
  256 1
259
+ 257 1
260
+ 258 0
261
  259 1
262
+ 260 0
263
  261 0
264
  262 1
265
+ 263 0
266
+ 264 0
267
+ 265 1
268
  266 1
269
+ 267 0
270
  268 1
271
+ 269 1
272
  270 1
273
  271 1
274
+ 272 0
275
+ 273 0
276
  274 0
277
+ 275 0
278
+ 276 0
279
  277 1
280
  278 1
281
  279 1
282
  280 0
283
  281 1
284
+ 282 0
285
+ 283 0
286
+ 284 0
287
  285 1
288
+ 286 0
289
+ 287 0
290
  288 1
291
+ 289 1
292
  290 1
293
  291 1
294
  292 1
295
+ 293 1
296
+ 294 1
297
  295 1
298
  296 1
299
+ 297 0
300
  298 0
301
  299 0
302
+ 300 1
303
  301 0
304
+ 302 0
305
  303 0
306
  304 0
307
+ 305 0
308
  306 0
309
+ 307 0
310
  308 0
311
+ 309 1
312
  310 0
313
  311 0
314
  312 0
315
  313 0
316
+ 314 0
317
  315 0
318
+ 316 0
319
  317 0
320
+ 318 0
321
  319 0
322
  320 0
323
  321 0
324
  322 0
325
  323 0
326
+ 324 1
327
  325 0
328
+ 326 0
329
  327 0
330
  328 0
331
+ 329 1
332
  330 0
333
+ 331 0
334
+ 332 1
335
  333 0
336
+ 334 0
337
  335 0
338
  336 1
339
  337 0
340
  338 1
341
+ 339 1
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
+ 344 0
347
  345 0
348
+ 346 0
349
+ 347 1
350
  348 0
351
  349 0
352
  350 0
 
355
  353 0
356
  354 0
357
  355 0
358
+ 356 0
359
  357 0
360
  358 0
361
  359 0
362
+ 360 0
363
  361 0
364
  362 0
365
+ 363 0
366
  364 0
367
  365 0
368
+ 366 0
369
  367 0
370
  368 0
371
  369 0
372
+ 370 0
373
  371 0
374
  372 0
375
+ 373 1
376
  374 0
377
  375 0
378
+ 376 0
379
  377 0
380
  378 0
381
  379 0
382
  380 0
383
+ 381 0
384
  382 0
385
  383 0
386
  384 0
387
+ 385 1
388
  386 0
389
  387 0
390
  388 0
391
  389 0
392
  390 0
393
  391 0
394
+ 392 1
395
  393 0
396
  394 0
397
  395 0
398
  396 0
399
  397 0
400
+ 398 0
401
  399 0
402
  400 0
403
+ 401 0
404
  402 0
405
  403 0
406
  404 0
 
417
  415 0
418
  416 0
419
  417 0
420
+ 418 0
421
  419 0
422
  420 0
423
  421 0
424
  422 0
425
  423 0
426
  424 0
427
+ 425 0
428
+ 426 1
429
  427 0
430
  428 0
431
+ 429 1
432
  430 0
433
  431 0
434
  432 0
 
438
  436 0
439
  437 0
440
  438 0
441
+ 439 0
442
  440 0
443
  441 0
444
+ 442 1
445
  443 0
446
  444 0
447
  445 0
 
449
  447 0
450
  448 0
451
  449 0
452
+ 450 0
453
  451 0
454
  452 0
455
  453 0
456
  454 0
457
  455 1
458
  456 0
459
+ 457 1
460
  458 0
461
  459 0
462
  460 0
463
  461 0
464
  462 0
465
+ 463 1
466
  464 0
467
+ 465 1
468
  466 0
469
+ 467 0
470
  468 0
471
  469 0
472
  470 0
 
479
  477 0
480
  478 0
481
  479 0
482
+ 480 1
483
  481 0
484
  482 0
485
  483 0
486
  484 0
487
  485 0
488
+ 486 0
489
  487 0
490
  488 0
491
  489 0
492
+ 490 0
493
+ 491 1
494
  492 0
495
  493 0
496
  494 0
497
  495 0
498
+ 496 0
499
  497 0
500
+ 498 1
501
  499 0
502
  500 0
503
  501 0
 
511
  509 0
512
  510 0
513
  511 0
514
+ 512 1
515
  513 0
516
+ 514 0
517
  515 1
518
  516 0
519
  517 0
 
522
  520 0
523
  521 0
524
  522 1
525
+ 523 1
526
  524 0
527
  525 0
528
  526 0
529
  527 0
530
+ 528 0
531
  529 1
532
  530 0
533
  531 0
534
  532 0
535
  533 0
536
+ 534 0
537
  535 0
538
  536 0
539
  537 0
 
542
  540 0
543
  541 0
544
  542 0
545
+ 543 0
546
  544 0
547
  545 0
548
  546 0
549
  547 0
550
  548 0
551
+ 549 1
552
  550 0
553
  551 0
554
  552 0
 
556
  554 0
557
  555 0
558
  556 0
559
+ 557 0
560
  558 0
561
  559 0
562
  560 0
563
  561 0
564
+ 562 1
565
  563 0
566
  564 0
567
  565 0
568
  566 0
569
+ 567 1
570
  568 0
571
  569 0
572
  570 0
573
+ 571 1
574
  572 0
575
  573 0
576
  574 0
577
  575 0
578
+ 576 1
579
  577 0
580
  578 0
581
  579 0
 
587
  585 0
588
  586 0
589
  587 0
590
+ 588 1
591
  589 0
592
  590 0
593
+ 591 1
594
+ 592 1
595
  593 0
596
  594 0
597
+ 595 0
598
+ 596 0
599
  597 0
600
  598 0
601
+ 599 1
602
  600 0
603
  601 0
604
+ 602 1
605
  603 0
606
  604 0
607
  605 0
608
  606 0
609
+ 607 0
610
  608 0
611
  609 0
612
  610 0
613
  611 0
614
  612 0
615
  613 0
616
+ 614 0
617
  615 0
618
  616 0
619
  617 0
620
  618 0
621
  619 0
622
  620 0
623
+ 621 0
624
  622 0
625
  623 0
626
+ 624 0
627
  625 0
628
  626 0
629
  627 0
630
+ 628 0
631
  629 0
632
  630 0
633
  631 0
 
638
  636 0
639
  637 0
640
  638 0
641
+ 639 0
642
+ 640 0
643
  641 0
644
+ 642 0
645
+ 643 1
646
  644 0
647
  645 0
648
  646 0
649
+ 647 0
650
+ 648 0
651
  649 0
652
+ 650 0
653
+ 651 1
654
+ 652 0
655
  653 0
656
  654 0
657
  655 0
658
  656 0
659
+ 657 0
660
  658 0
661
  659 0
662
  660 0
 
667
  665 0
668
  666 0
669
  667 0
670
+ 668 1
671
  669 0
672
  670 0
673
  671 0
674
  672 0
675
+ 673 1
676
  674 0
677
+ 675 0
678
  676 0
679
  677 0
680
  678 0
 
684
  682 0
685
  683 0
686
  684 0
687
+ 685 1
688
  686 0
689
  687 0
690
  688 0
 
698
  696 0
699
  697 0
700
  698 0
701
+ 699 1
702
  700 0
703
+ 701 0
704
  702 0
705
  703 0
706
  704 0
 
711
  709 0
712
  710 0
713
  711 0
714
+ 712 1
715
+ 713 0
716
  714 0
717
  715 0
718
  716 0
719
  717 0
720
+ 718 1
721
  719 0
722
  720 0
723
  721 0
 
726
  724 0
727
  725 0
728
  726 0
729
+ 727 0
730
+ 728 0
731
  729 0
732
  730 0
733
  731 0
734
+ 732 1
735
+ 733 0
736
+ 734 1
737
  735 0
738
  736 0
739
  737 0
740
  738 0
741
  739 0
742
+ 740 0
743
  741 0
744
+ 742 1
745
  743 0
746
  744 0
747
  745 0
 
751
  749 0
752
  750 0
753
  751 0
754
+ 752 1
755
+ 753 0
756
  754 0
757
  755 0
758
  756 0
759
  757 0
760
+ 758 0
761
  759 0
762
  760 0
763
  761 0
 
769
  767 0
770
  768 0
771
  769 0
772
+ 770 0
773
  771 0
774
+ 772 0
775
  773 0
776
  774 0
777
  775 0
778
+ 776 0
779
+ 777 0
780
  778 0
781
  779 0
782
  780 0
783
  781 0
784
  782 0
785
+ 783 1
786
  784 0
787
  785 0
788
  786 0
 
794
  792 0
795
  793 0
796
  794 0
797
+ 795 0
798
  796 0
799
  797 0
800
  798 0
801
  799 0
802
  800 0
803
+ 801 0
804
  802 0
805
  803 0
806
  804 0
 
808
  806 0
809
  807 0
810
  808 0
811
+ 809 0
812
  810 0
813
  811 0
814
  812 0
 
822
  820 0
823
  821 0
824
  822 0
825
+ 823 0
826
  824 0
827
  825 0
828
+ 826 0
829
  827 0
830
  828 0
831
  829 0
832
  830 0
833
  831 0
834
+ 832 0
835
+ 833 0
836
  834 0
837
  835 0
838
+ 836 1
839
  837 0
840
  838 0
841
  839 0
842
  840 0
843
  841 1
844
+ 842 0
845
  843 0
846
+ 844 1
847
  845 0
848
+ 846 0
849
  847 0
850
+ 848 0
851
+ 849 0
852
+ 850 1
853
  851 0
854
  852 0
855
  853 0
856
+ 854 1
857
  855 0
858
  856 0
859
  857 0
 
863
  861 0
864
  862 0
865
  863 0
866
+ 864 1
867
+ 865 1
868
  866 0
869
  867 0
870
  868 0
 
877
  875 0
878
  876 0
879
  877 0
880
+ 878 1
881
  879 0
882
  880 0
883
  881 0
884
  882 0
885
  883 0
886
+ 884 0
887
  885 0
888
  886 0
889
  887 0
890
  888 0
891
  889 0
892
  890 0
893
+ 891 0
894
  892 0
895
+ 893 1
896
  894 1
897
  895 0
898
  896 0
899
+ 897 1
900
  898 0
901
  899 0
902
  900 0
903
+ 901 0
904
  902 0
905
  903 0
906
  904 0
907
  905 0
908
+ 906 0
909
  907 0
910
  908 0
911
  909 0
 
915
  913 0
916
  914 0
917
  915 0
918
+ 916 0
919
  917 0
920
  918 0
921
  919 0
 
927
  925 0
928
  926 0
929
  927 0
930
+ 928 0
931
  929 0
932
+ 930 1
933
  931 0
934
  932 0
935
  933 0
936
  934 0
937
+ 935 1
938
+ 936 1
939
  937 0
940
  938 0
941
+ 939 1
942
  940 0
943
  941 0
944
  942 0
 
947
  945 0
948
  946 0
949
  947 0
950
+ 948 1
951
+ 949 0
952
+ 950 0
953
  951 0
954
  952 0
955
  953 0
956
  954 0
957
  955 0
958
+ 956 0
959
+ 957 0
960
  958 0
961
  959 0
962
  960 0
963
  961 0
964
+ 962 1
965
  963 0
966
  964 0
967
  965 0
968
+ 966 0
969
  967 0
970
  968 0
971
  969 0
972
  970 0
973
  971 0
974
+ 972 0
975
  973 0
976
  974 0
977
  975 0
 
983
  981 0
984
  982 0
985
  983 0
986
+ 984 0
987
  985 0
988
+ 986 0
989
  987 0
990
  988 0
991
  989 0
992
  990 0
993
+ 991 0
994
+ 992 0
995
  993 0
996
  994 0
997
  995 1
998
+ 996 1
999
  997 0
1000
  998 0
1001
+ 999 0
1002
  1000 0
1003
  1001 0
1004
  1002 0
 
1009
  1007 0
1010
  1008 0
1011
  1009 0
1012
+ 1010 0
runs/May20_05-06-29_indolem-petl-vm/events.out.tfevents.1716183553.indolem-petl-vm.2721960.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74626c29b311ab2f7febd8de0e13fa74e4a103a1080886bc8397ab30398e6574
3
+ size 560
test_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.8100890207715133,
3
- "f1": 0.7742127596218269,
4
- "precision": 0.7712898624085408,
5
- "recall": 0.7774645368374483
6
  }
 
1
  {
2
+ "accuracy": 0.8308605341246291,
3
+ "f1": 0.7908926081061303,
4
+ "precision": 0.800266779694824,
5
+ "recall": 0.7834017338592016
6
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.45743675075593543,
4
- "train_runtime": 1951.2471,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 37.289,
7
- "train_steps_per_second": 1.25
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.4626342241881324,
4
+ "train_runtime": 1944.0727,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 37.427,
7
+ "train_steps_per_second": 1.255
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 5.584501266479492,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5636,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7243107769423559,
21
- "eval_f1": 0.6354408930825969,
22
- "eval_loss": 0.507041335105896,
23
- "eval_precision": 0.6575309104533533,
24
- "eval_recall": 0.6274322604109838,
25
- "eval_runtime": 4.6793,
26
- "eval_samples_per_second": 85.269,
27
- "eval_steps_per_second": 10.685,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 4.185054302215576,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.5128,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7343358395989975,
40
- "eval_f1": 0.6976408350014298,
41
- "eval_loss": 0.501506507396698,
42
- "eval_precision": 0.6911057692307692,
43
- "eval_recall": 0.7120385524640844,
44
- "eval_runtime": 5.0462,
45
- "eval_samples_per_second": 79.069,
46
- "eval_steps_per_second": 9.908,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 4.350452423095703,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.4941,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.7468671679197995,
59
- "eval_f1": 0.6969020059116857,
60
- "eval_loss": 0.470926433801651,
61
- "eval_precision": 0.6955197132616487,
62
- "eval_recall": 0.698399709038007,
63
- "eval_runtime": 5.0701,
64
- "eval_samples_per_second": 78.696,
65
- "eval_steps_per_second": 9.862,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 3.6379525661468506,
71
  "learning_rate": 4e-05,
72
- "loss": 0.4702,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.7744360902255639,
78
- "eval_f1": 0.7207099303135889,
79
- "eval_loss": 0.4496114253997803,
80
- "eval_precision": 0.7274676737933683,
81
- "eval_recall": 0.7154028005091835,
82
- "eval_runtime": 5.0568,
83
- "eval_samples_per_second": 78.904,
84
- "eval_steps_per_second": 9.888,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 2.561305046081543,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.4704,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
  "eval_accuracy": 0.7719298245614035,
97
  "eval_f1": 0.7320072332730561,
98
- "eval_loss": 0.45210200548171997,
99
  "eval_precision": 0.7269805119926199,
100
  "eval_recall": 0.7386342971449354,
101
- "eval_runtime": 5.0475,
102
- "eval_samples_per_second": 79.049,
103
- "eval_steps_per_second": 9.906,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 3.5164988040924072,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.4616,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.7644110275689223,
116
- "eval_f1": 0.7212527498662227,
117
- "eval_loss": 0.4490063786506653,
118
- "eval_precision": 0.7175328467153285,
119
- "eval_recall": 0.7258137843244226,
120
- "eval_runtime": 5.059,
121
- "eval_samples_per_second": 78.869,
122
- "eval_steps_per_second": 9.883,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 2.614987373352051,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.4543,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.7819548872180451,
135
- "eval_f1": 0.7449392712550607,
136
- "eval_loss": 0.4381465017795563,
137
- "eval_precision": 0.7389190734915643,
138
- "eval_recall": 0.7532278596108384,
139
- "eval_runtime": 5.0493,
140
- "eval_samples_per_second": 79.021,
141
- "eval_steps_per_second": 9.902,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 4.577010631561279,
147
  "learning_rate": 3e-05,
148
- "loss": 0.4532,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8070175438596491,
154
- "eval_f1": 0.7519478107817887,
155
- "eval_loss": 0.41970905661582947,
156
- "eval_precision": 0.7744157656086501,
157
- "eval_recall": 0.7384524458992544,
158
- "eval_runtime": 5.0679,
159
- "eval_samples_per_second": 78.731,
160
- "eval_steps_per_second": 9.866,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 3.4122800827026367,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.4517,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.7969924812030075,
173
- "eval_f1": 0.7544988415815261,
174
- "eval_loss": 0.4194534420967102,
175
- "eval_precision": 0.7551480443523821,
176
- "eval_recall": 0.753864338970722,
177
- "eval_runtime": 5.0418,
178
- "eval_samples_per_second": 79.138,
179
- "eval_steps_per_second": 9.917,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 3.07973575592041,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.4438,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8170426065162907,
192
- "eval_f1": 0.7539886990379824,
193
- "eval_loss": 0.41020727157592773,
194
- "eval_precision": 0.8012931034482759,
195
- "eval_recall": 0.7330423713402436,
196
- "eval_runtime": 5.0907,
197
- "eval_samples_per_second": 78.378,
198
- "eval_steps_per_second": 9.822,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 3.512185573577881,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.4389,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8270676691729323,
211
- "eval_f1": 0.7875973059089472,
212
- "eval_loss": 0.41120436787605286,
213
- "eval_precision": 0.7933469644542308,
214
- "eval_recall": 0.7826422985997454,
215
- "eval_runtime": 5.0564,
216
- "eval_samples_per_second": 78.909,
217
- "eval_steps_per_second": 9.888,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 6.24143123626709,
223
  "learning_rate": 2e-05,
224
- "loss": 0.4428,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.7969924812030075,
230
- "eval_f1": 0.7603603603603604,
231
- "eval_loss": 0.417868971824646,
232
- "eval_precision": 0.7554945054945055,
233
- "eval_recall": 0.766366612111293,
234
- "eval_runtime": 5.0497,
235
- "eval_samples_per_second": 79.014,
236
- "eval_steps_per_second": 9.901,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 1.4772980213165283,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.4421,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8320802005012531,
249
- "eval_f1": 0.7828368575397437,
250
- "eval_loss": 0.4030059576034546,
251
- "eval_precision": 0.810950998442147,
252
- "eval_recall": 0.7661847608656118,
253
- "eval_runtime": 5.0599,
254
- "eval_samples_per_second": 78.855,
255
- "eval_steps_per_second": 9.882,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 4.054733753204346,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.4403,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8320802005012531,
268
- "eval_f1": 0.7915149151881459,
269
- "eval_loss": 0.4036868214607239,
270
- "eval_precision": 0.801371627277996,
271
- "eval_recall": 0.7836879432624113,
272
- "eval_runtime": 5.0499,
273
- "eval_samples_per_second": 79.012,
274
- "eval_steps_per_second": 9.901,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 2.8372957706451416,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.4392,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8220551378446115,
287
- "eval_f1": 0.7858811080458032,
288
- "eval_loss": 0.4077085256576538,
289
- "eval_precision": 0.7851800470474697,
290
- "eval_recall": 0.7865975631933079,
291
- "eval_runtime": 5.0546,
292
- "eval_samples_per_second": 78.938,
293
- "eval_steps_per_second": 9.892,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 3.2562994956970215,
299
  "learning_rate": 1e-05,
300
- "loss": 0.4329,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8195488721804511,
306
- "eval_f1": 0.7834006876168647,
307
- "eval_loss": 0.40617087483406067,
308
- "eval_precision": 0.7820378151260504,
309
- "eval_recall": 0.7848245135479177,
310
- "eval_runtime": 5.0513,
311
- "eval_samples_per_second": 78.99,
312
- "eval_steps_per_second": 9.898,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 3.811532497406006,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.4338,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8145363408521303,
325
- "eval_f1": 0.7773840400506664,
326
- "eval_loss": 0.4058062434196472,
327
- "eval_precision": 0.7760504201680672,
328
- "eval_recall": 0.7787779596290234,
329
- "eval_runtime": 5.0681,
330
- "eval_samples_per_second": 78.728,
331
- "eval_steps_per_second": 9.866,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 3.1403231620788574,
337
  "learning_rate": 5e-06,
338
- "loss": 0.4407,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8145363408521303,
344
- "eval_f1": 0.7762775050009092,
345
- "eval_loss": 0.40417465567588806,
346
- "eval_precision": 0.7762775050009092,
347
- "eval_recall": 0.7762775050009092,
348
- "eval_runtime": 5.0621,
349
- "eval_samples_per_second": 78.821,
350
- "eval_steps_per_second": 9.877,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 2.9727184772491455,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.4329,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8195488721804511,
363
- "eval_f1": 0.781223345924211,
364
- "eval_loss": 0.4033084809780121,
365
- "eval_precision": 0.7826852418860992,
366
- "eval_recall": 0.7798236042916894,
367
- "eval_runtime": 5.0559,
368
- "eval_samples_per_second": 78.918,
369
- "eval_steps_per_second": 9.889,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 5.092881202697754,
375
  "learning_rate": 0.0,
376
- "loss": 0.4292,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8145363408521303,
382
- "eval_f1": 0.7762775050009092,
383
- "eval_loss": 0.40420523285865784,
384
- "eval_precision": 0.7762775050009092,
385
- "eval_recall": 0.7762775050009092,
386
- "eval_runtime": 5.0503,
387
- "eval_samples_per_second": 79.006,
388
- "eval_steps_per_second": 9.9,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7588990440528000.0,
395
- "train_loss": 0.45743675075593543,
396
- "train_runtime": 1951.2471,
397
- "train_samples_per_second": 37.289,
398
- "train_steps_per_second": 1.25
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 5.431522846221924,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5653,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7218045112781954,
21
+ "eval_f1": 0.6333236742973052,
22
+ "eval_loss": 0.5245500802993774,
23
+ "eval_precision": 0.6540233301136786,
24
+ "eval_recall": 0.6256592107655937,
25
+ "eval_runtime": 4.6575,
26
+ "eval_samples_per_second": 85.668,
27
+ "eval_steps_per_second": 10.735,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.8715531826019287,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.5167,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7293233082706767,
40
+ "eval_f1": 0.6854598540145985,
41
+ "eval_loss": 0.521543025970459,
42
+ "eval_precision": 0.6804511278195489,
43
+ "eval_recall": 0.6934897254046191,
44
+ "eval_runtime": 5.0566,
45
+ "eval_samples_per_second": 78.907,
46
+ "eval_steps_per_second": 9.888,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 4.720804691314697,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.4984,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.7443609022556391,
59
+ "eval_f1": 0.6916257501363885,
60
+ "eval_loss": 0.49747392535209656,
61
+ "eval_precision": 0.6916257501363885,
62
+ "eval_recall": 0.6916257501363885,
63
+ "eval_runtime": 5.0553,
64
+ "eval_samples_per_second": 78.927,
65
+ "eval_steps_per_second": 9.891,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 3.2620692253112793,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.4765,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.7418546365914787,
78
+ "eval_f1": 0.6619339448031918,
79
+ "eval_loss": 0.4854496121406555,
80
+ "eval_precision": 0.6836634025138848,
81
+ "eval_recall": 0.6523458810692853,
82
+ "eval_runtime": 5.0451,
83
+ "eval_samples_per_second": 79.087,
84
+ "eval_steps_per_second": 9.911,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 4.330729007720947,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.4797,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
  "eval_accuracy": 0.7719298245614035,
97
  "eval_f1": 0.7320072332730561,
98
+ "eval_loss": 0.485201358795166,
99
  "eval_precision": 0.7269805119926199,
100
  "eval_recall": 0.7386342971449354,
101
+ "eval_runtime": 5.0467,
102
+ "eval_samples_per_second": 79.062,
103
+ "eval_steps_per_second": 9.907,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 2.9019949436187744,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.4668,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.7669172932330827,
116
+ "eval_f1": 0.7195344091304183,
117
+ "eval_loss": 0.47377943992614746,
118
+ "eval_precision": 0.7189969238192895,
119
+ "eval_recall": 0.7200854700854701,
120
+ "eval_runtime": 5.0582,
121
+ "eval_samples_per_second": 78.882,
122
+ "eval_steps_per_second": 9.885,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 14.114830017089844,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.4622,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.7719298245614035,
135
+ "eval_f1": 0.7295157072938161,
136
+ "eval_loss": 0.47686928510665894,
137
+ "eval_precision": 0.7260557184750733,
138
+ "eval_recall": 0.7336333878887071,
139
+ "eval_runtime": 5.045,
140
+ "eval_samples_per_second": 79.088,
141
+ "eval_steps_per_second": 9.911,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 4.223310947418213,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.4621,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.7493734335839599,
154
+ "eval_f1": 0.6685826300750881,
155
+ "eval_loss": 0.46253928542137146,
156
+ "eval_precision": 0.6949044585987261,
157
+ "eval_recall": 0.6576650300054556,
158
+ "eval_runtime": 5.052,
159
+ "eval_samples_per_second": 78.979,
160
+ "eval_steps_per_second": 9.897,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 4.2552032470703125,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.4561,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.7769423558897243,
173
+ "eval_f1": 0.7199470035725271,
174
+ "eval_loss": 0.4609311521053314,
175
+ "eval_precision": 0.7310853634383045,
176
+ "eval_recall": 0.7121749408983451,
177
+ "eval_runtime": 5.0567,
178
+ "eval_samples_per_second": 78.905,
179
+ "eval_steps_per_second": 9.888,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 3.524580240249634,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.4519,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.7669172932330827,
192
+ "eval_f1": 0.6822078533807219,
193
+ "eval_loss": 0.4608353078365326,
194
+ "eval_precision": 0.7252321981424149,
195
+ "eval_recall": 0.6675759228950718,
196
+ "eval_runtime": 5.0558,
197
+ "eval_samples_per_second": 78.919,
198
+ "eval_steps_per_second": 9.89,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 3.3073718547821045,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.4413,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.7694235588972431,
211
+ "eval_f1": 0.7079992363497518,
212
+ "eval_loss": 0.4543740451335907,
213
+ "eval_precision": 0.7214646464646465,
214
+ "eval_recall": 0.6993544280778323,
215
+ "eval_runtime": 5.0512,
216
+ "eval_samples_per_second": 78.992,
217
+ "eval_steps_per_second": 9.899,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 6.039628982543945,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.4449,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.7844611528822055,
230
+ "eval_f1": 0.7412841546534773,
231
+ "eval_loss": 0.4569094777107239,
232
+ "eval_precision": 0.7401260504201681,
233
+ "eval_recall": 0.7424986361156574,
234
+ "eval_runtime": 5.0491,
235
+ "eval_samples_per_second": 79.023,
236
+ "eval_steps_per_second": 9.903,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 1.8723957538604736,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.4506,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.7644110275689223,
249
+ "eval_f1": 0.6821309919316564,
250
+ "eval_loss": 0.4527250826358795,
251
+ "eval_precision": 0.7196598101265823,
252
+ "eval_recall": 0.6683033278777959,
253
+ "eval_runtime": 5.0532,
254
+ "eval_samples_per_second": 78.959,
255
+ "eval_steps_per_second": 9.895,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 2.8429393768310547,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.4446,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.7794486215538847,
268
+ "eval_f1": 0.7120834426659669,
269
+ "eval_loss": 0.4487856924533844,
270
+ "eval_precision": 0.7379122870605291,
271
+ "eval_recall": 0.69894526277505,
272
+ "eval_runtime": 5.0594,
273
+ "eval_samples_per_second": 78.864,
274
+ "eval_steps_per_second": 9.883,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 2.688943386077881,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.4426,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.7869674185463659,
287
+ "eval_f1": 0.7355039968804835,
288
+ "eval_loss": 0.44907739758491516,
289
+ "eval_precision": 0.7435604353145727,
290
+ "eval_recall": 0.7292689579923622,
291
+ "eval_runtime": 5.0481,
292
+ "eval_samples_per_second": 79.039,
293
+ "eval_steps_per_second": 9.905,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 2.842677593231201,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.4409,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.7719298245614035,
306
+ "eval_f1": 0.7068474127421138,
307
+ "eval_loss": 0.44651278853416443,
308
+ "eval_precision": 0.725706313219393,
309
+ "eval_recall": 0.696126568466994,
310
+ "eval_runtime": 5.0391,
311
+ "eval_samples_per_second": 79.18,
312
+ "eval_steps_per_second": 9.922,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 3.7264163494110107,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.4348,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.7869674185463659,
325
+ "eval_f1": 0.7355039968804835,
326
+ "eval_loss": 0.4473975598812103,
327
+ "eval_precision": 0.7435604353145727,
328
+ "eval_recall": 0.7292689579923622,
329
+ "eval_runtime": 5.0762,
330
+ "eval_samples_per_second": 78.602,
331
+ "eval_steps_per_second": 9.85,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 3.3701038360595703,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.4478,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.7844611528822055,
344
+ "eval_f1": 0.7301509908776345,
345
+ "eval_loss": 0.4460136294364929,
346
+ "eval_precision": 0.7407832589871425,
347
+ "eval_recall": 0.7224949990907438,
348
+ "eval_runtime": 5.1002,
349
+ "eval_samples_per_second": 78.233,
350
+ "eval_steps_per_second": 9.804,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 3.103198289871216,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.4382,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.7869674185463659,
363
+ "eval_f1": 0.7309977236133474,
364
+ "eval_loss": 0.44484180212020874,
365
+ "eval_precision": 0.7447157190635452,
366
+ "eval_recall": 0.7217675941080197,
367
+ "eval_runtime": 5.061,
368
+ "eval_samples_per_second": 78.838,
369
+ "eval_steps_per_second": 9.879,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 6.061123847961426,
375
  "learning_rate": 0.0,
376
+ "loss": 0.4313,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.7869674185463659,
382
+ "eval_f1": 0.7325336550973572,
383
+ "eval_loss": 0.445127934217453,
384
+ "eval_precision": 0.7442562883739354,
385
+ "eval_recall": 0.7242680487361338,
386
+ "eval_runtime": 5.049,
387
+ "eval_samples_per_second": 79.025,
388
+ "eval_steps_per_second": 9.903,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7588990440528000.0,
395
+ "train_loss": 0.4626342241881324,
396
+ "train_runtime": 1944.0727,
397
+ "train_samples_per_second": 37.427,
398
+ "train_steps_per_second": 1.255
399
  }
400
  ],
401
  "logging_steps": 500,