apwic commited on
Commit
33e5cf0
1 Parent(s): ac9207e

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.8951533135509396,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8771929824561403,
5
- "eval_f1": 0.8475258334958082,
6
- "eval_loss": 0.29292526841163635,
7
- "eval_precision": 0.8591828192414193,
8
- "eval_recall": 0.8381069285324605,
9
- "eval_runtime": 5.0231,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.432,
12
- "eval_steps_per_second": 9.954,
13
- "f1": 0.8729043918822759,
14
- "precision": 0.8759888086469941,
15
- "recall": 0.8700005647749842,
16
- "train_loss": 0.32195694876498865,
17
- "train_runtime": 1936.2334,
18
  "train_samples": 3638,
19
- "train_samples_per_second": 37.578,
20
- "train_steps_per_second": 1.26
21
  }
 
1
  {
2
+ "accuracy": 0.8941641938674579,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8847117794486216,
5
+ "eval_f1": 0.8587719298245614,
6
+ "eval_loss": 0.2786270081996918,
7
+ "eval_precision": 0.864771021021021,
8
+ "eval_recall": 0.8534278959810875,
9
+ "eval_runtime": 5.0263,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 79.383,
12
+ "eval_steps_per_second": 9.948,
13
+ "f1": 0.872841399982368,
14
+ "precision": 0.8724798955319228,
15
+ "recall": 0.8732056628105085,
16
+ "train_loss": 0.30967485005738304,
17
+ "train_runtime": 1934.4038,
18
  "train_samples": 3638,
19
+ "train_samples_per_second": 37.614,
20
+ "train_steps_per_second": 1.261
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8771929824561403,
4
- "eval_f1": 0.8475258334958082,
5
- "eval_loss": 0.29292526841163635,
6
- "eval_precision": 0.8591828192414193,
7
- "eval_recall": 0.8381069285324605,
8
- "eval_runtime": 5.0231,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.432,
11
- "eval_steps_per_second": 9.954
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8847117794486216,
4
+ "eval_f1": 0.8587719298245614,
5
+ "eval_loss": 0.2786270081996918,
6
+ "eval_precision": 0.864771021021021,
7
+ "eval_recall": 0.8534278959810875,
8
+ "eval_runtime": 5.0263,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 79.383,
11
+ "eval_steps_per_second": 9.948
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.8951533135509396,
3
- "f1": 0.8729043918822759,
4
- "precision": 0.8759888086469941,
5
- "recall": 0.8700005647749842
6
  }
 
1
  {
2
+ "accuracy": 0.8941641938674579,
3
+ "f1": 0.872841399982368,
4
+ "precision": 0.8724798955319228,
5
+ "recall": 0.8732056628105085
6
  }
predict_results.txt CHANGED
@@ -2,61 +2,61 @@ index prediction
2
  0 1
3
  1 1
4
  2 1
5
- 3 1
6
- 4 0
7
- 5 0
8
  6 0
9
  7 1
10
- 8 1
11
  9 1
12
- 10 0
13
  11 1
14
  12 1
15
  13 0
16
- 14 1
17
  15 1
18
- 16 0
19
  17 1
20
- 18 0
21
- 19 0
22
  20 1
23
  21 1
24
  22 1
25
  23 1
26
  24 1
27
- 25 1
28
  26 1
29
  27 1
30
  28 1
31
  29 1
32
- 30 0
33
  31 0
34
  32 1
35
  33 1
36
  34 1
37
  35 1
38
- 36 0
39
  37 1
40
  38 1
41
- 39 0
42
- 40 0
43
  41 1
44
  42 1
45
  43 1
46
- 44 0
47
  45 1
48
  46 0
49
- 47 1
50
  48 1
51
  49 1
52
  50 1
53
  51 1
54
- 52 1
55
- 53 1
56
  54 1
57
  55 1
58
  56 1
59
- 57 0
60
  58 1
61
  59 1
62
  60 1
@@ -64,37 +64,37 @@ index prediction
64
  62 1
65
  63 1
66
  64 1
67
- 65 1
68
  66 1
69
  67 1
70
  68 1
71
- 69 0
72
  70 1
73
  71 1
74
  72 1
75
- 73 0
76
  74 1
77
- 75 1
78
- 76 1
79
- 77 0
80
  78 1
81
- 79 1
82
  80 1
83
  81 1
84
  82 1
85
- 83 1
86
- 84 0
87
  85 1
88
  86 1
89
- 87 0
90
- 88 1
91
  89 1
92
- 90 1
93
- 91 0
94
  92 1
95
  93 1
96
  94 1
97
- 95 1
98
  96 1
99
  97 1
100
  98 1
@@ -103,15 +103,15 @@ index prediction
103
  101 1
104
  102 1
105
  103 1
106
- 104 0
107
  105 0
108
  106 1
109
- 107 0
110
  108 1
111
- 109 1
112
- 110 0
113
  111 1
114
- 112 0
115
  113 1
116
  114 1
117
  115 1
@@ -122,13 +122,13 @@ index prediction
122
  120 1
123
  121 1
124
  122 1
125
- 123 0
126
  124 1
127
- 125 0
128
  126 1
129
  127 1
130
- 128 0
131
- 129 1
132
  130 1
133
  131 1
134
  132 1
@@ -139,86 +139,86 @@ index prediction
139
  137 1
140
  138 1
141
  139 1
142
- 140 1
143
  141 1
144
- 142 0
145
  143 1
146
  144 1
147
- 145 1
148
  146 1
149
  147 1
150
- 148 1
151
  149 1
152
  150 1
153
  151 1
154
- 152 0
155
  153 1
156
- 154 1
157
  155 1
158
- 156 1
159
  157 1
160
- 158 1
161
- 159 1
162
  160 1
163
- 161 1
164
  162 1
165
  163 1
166
- 164 0
167
  165 1
168
- 166 1
169
- 167 1
170
  168 1
171
- 169 1
172
  170 1
173
  171 1
174
  172 1
175
- 173 0
176
  174 1
177
  175 1
178
- 176 0
179
- 177 0
180
  178 1
181
  179 1
182
  180 0
183
- 181 0
184
  182 1
185
  183 1
186
  184 1
187
  185 1
188
  186 1
189
- 187 1
190
  188 1
191
- 189 1
192
- 190 1
193
  191 1
194
  192 1
195
  193 1
196
  194 1
197
  195 1
198
  196 1
199
- 197 0
200
  198 1
201
  199 1
202
- 200 0
203
  201 1
204
  202 1
205
- 203 0
206
  204 1
207
  205 1
208
- 206 1
209
  207 1
210
  208 1
211
  209 1
212
  210 1
213
  211 1
214
- 212 1
215
  213 1
216
- 214 1
217
  215 0
218
  216 1
219
  217 0
220
- 218 0
221
- 219 0
222
  220 1
223
  221 1
224
  222 1
@@ -226,21 +226,21 @@ index prediction
226
  224 1
227
  225 1
228
  226 1
229
- 227 1
230
  228 1
231
  229 1
232
- 230 1
233
  231 1
234
  232 1
235
- 233 1
236
- 234 0
237
- 235 0
238
- 236 0
239
  237 1
240
- 238 0
241
- 239 0
242
  240 1
243
- 241 1
244
  242 1
245
  243 1
246
  244 1
@@ -251,7 +251,7 @@ index prediction
251
  249 1
252
  250 1
253
  251 1
254
- 252 1
255
  253 1
256
  254 1
257
  255 1
@@ -259,50 +259,50 @@ index prediction
259
  257 1
260
  258 1
261
  259 1
262
- 260 0
263
  261 1
264
  262 1
265
  263 1
266
  264 1
267
- 265 0
268
- 266 0
269
  267 1
270
  268 1
271
  269 1
272
  270 1
273
- 271 1
274
  272 1
275
  273 1
276
  274 1
277
- 275 0
278
  276 1
279
  277 1
280
  278 1
281
  279 1
282
- 280 0
283
- 281 1
284
- 282 1
285
  283 1
286
  284 1
287
  285 1
288
- 286 0
289
- 287 1
290
  288 1
291
- 289 1
292
  290 1
293
  291 1
294
  292 1
295
  293 1
296
  294 1
297
  295 1
298
- 296 0
299
- 297 1
300
- 298 1
301
  299 0
302
  300 0
303
  301 0
304
  302 0
305
- 303 0
306
  304 0
307
  305 0
308
  306 0
@@ -311,8 +311,8 @@ index prediction
311
  309 0
312
  310 0
313
  311 0
314
- 312 0
315
- 313 0
316
  314 0
317
  315 0
318
  316 0
@@ -325,27 +325,27 @@ index prediction
325
  323 0
326
  324 0
327
  325 0
328
- 326 0
329
  327 0
330
  328 0
331
  329 0
332
  330 0
333
  331 0
334
- 332 0
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
  338 0
341
- 339 0
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
  344 0
347
- 345 1
348
- 346 0
349
  347 0
350
  348 0
351
  349 0
@@ -365,30 +365,30 @@ index prediction
365
  363 0
366
  364 0
367
  365 0
368
- 366 1
369
  367 0
370
  368 0
371
  369 0
372
  370 0
373
  371 0
374
- 372 0
375
  373 0
376
  374 0
377
  375 0
378
  376 0
379
  377 0
380
  378 0
381
- 379 1
382
  380 0
383
  381 0
384
  382 0
385
  383 0
386
- 384 0
387
  385 0
388
- 386 0
389
  387 0
390
  388 0
391
- 389 1
392
  390 0
393
  391 0
394
  392 0
@@ -400,7 +400,7 @@ index prediction
400
  398 0
401
  399 0
402
  400 0
403
- 401 1
404
  402 0
405
  403 0
406
  404 0
@@ -408,21 +408,21 @@ index prediction
408
  406 0
409
  407 0
410
  408 0
411
- 409 1
412
- 410 1
413
  411 0
414
  412 0
415
  413 0
416
  414 0
417
  415 0
418
  416 0
419
- 417 1
420
  418 0
421
  419 0
422
  420 0
423
  421 0
424
  422 0
425
- 423 1
426
  424 0
427
  425 0
428
  426 0
@@ -434,13 +434,13 @@ index prediction
434
  432 0
435
  433 0
436
  434 0
437
- 435 0
438
  436 0
439
  437 0
440
  438 0
441
  439 0
442
- 440 0
443
- 441 1
444
  442 0
445
  443 0
446
  444 0
@@ -451,9 +451,9 @@ index prediction
451
  449 0
452
  450 0
453
  451 0
454
- 452 1
455
  453 0
456
- 454 1
457
  455 0
458
  456 0
459
  457 0
@@ -463,7 +463,7 @@ index prediction
463
  461 0
464
  462 0
465
  463 0
466
- 464 0
467
  465 0
468
  466 0
469
  467 0
@@ -471,25 +471,25 @@ index prediction
471
  469 0
472
  470 0
473
  471 0
474
- 472 0
475
  473 0
476
- 474 0
477
  475 0
478
- 476 0
479
  477 0
480
  478 0
481
  479 0
482
- 480 0
483
  481 0
484
  482 0
485
  483 0
486
  484 0
487
  485 0
488
- 486 0
489
  487 0
490
  488 0
491
  489 0
492
- 490 0
493
  491 0
494
  492 0
495
  493 0
@@ -497,7 +497,7 @@ index prediction
497
  495 0
498
  496 0
499
  497 0
500
- 498 1
501
  499 0
502
  500 0
503
  501 0
@@ -507,7 +507,7 @@ index prediction
507
  505 0
508
  506 0
509
  507 0
510
- 508 1
511
  509 0
512
  510 0
513
  511 0
@@ -522,7 +522,7 @@ index prediction
522
  520 0
523
  521 0
524
  522 0
525
- 523 0
526
  524 0
527
  525 0
528
  526 0
@@ -532,13 +532,13 @@ index prediction
532
  530 0
533
  531 0
534
  532 0
535
- 533 1
536
- 534 0
537
  535 0
538
  536 0
539
  537 0
540
  538 0
541
- 539 0
542
  540 0
543
  541 0
544
  542 0
@@ -561,12 +561,12 @@ index prediction
561
  559 0
562
  560 0
563
  561 0
564
- 562 1
565
  563 0
566
  564 0
567
- 565 0
568
  566 0
569
- 567 0
570
  568 0
571
  569 0
572
  570 0
@@ -596,13 +596,13 @@ index prediction
596
  594 0
597
  595 0
598
  596 0
599
- 597 0
600
  598 0
601
  599 0
602
  600 0
603
  601 0
604
  602 0
605
- 603 1
606
  604 0
607
  605 1
608
  606 0
@@ -622,9 +622,9 @@ index prediction
622
  620 0
623
  621 0
624
  622 0
625
- 623 1
626
  624 0
627
- 625 1
628
  626 0
629
  627 0
630
  628 0
@@ -642,7 +642,7 @@ index prediction
642
  640 0
643
  641 0
644
  642 0
645
- 643 1
646
  644 0
647
  645 0
648
  646 0
@@ -662,8 +662,8 @@ index prediction
662
  660 0
663
  661 0
664
  662 0
665
- 663 1
666
- 664 1
667
  665 0
668
  666 0
669
  667 0
@@ -672,7 +672,7 @@ index prediction
672
  670 0
673
  671 0
674
  672 0
675
- 673 0
676
  674 0
677
  675 0
678
  676 0
@@ -688,11 +688,11 @@ index prediction
688
  686 0
689
  687 0
690
  688 0
691
- 689 0
692
  690 0
693
  691 0
694
  692 0
695
- 693 0
696
  694 0
697
  695 0
698
  696 0
@@ -700,12 +700,12 @@ index prediction
700
  698 0
701
  699 0
702
  700 0
703
- 701 0
704
  702 0
705
  703 0
706
  704 0
707
  705 0
708
- 706 1
709
  707 0
710
  708 0
711
  709 0
@@ -720,14 +720,14 @@ index prediction
720
  718 0
721
  719 0
722
  720 0
723
- 721 1
724
  722 0
725
  723 0
726
  724 0
727
- 725 1
728
  726 0
729
  727 0
730
- 728 0
731
  729 0
732
  730 0
733
  731 0
@@ -746,7 +746,7 @@ index prediction
746
  744 0
747
  745 0
748
  746 0
749
- 747 0
750
  748 0
751
  749 0
752
  750 0
@@ -754,7 +754,7 @@ index prediction
754
  752 0
755
  753 0
756
  754 0
757
- 755 1
758
  756 0
759
  757 0
760
  758 0
@@ -779,10 +779,10 @@ index prediction
779
  777 0
780
  778 0
781
  779 0
782
- 780 1
783
  781 0
784
  782 0
785
- 783 1
786
  784 0
787
  785 0
788
  786 0
@@ -797,19 +797,19 @@ index prediction
797
  795 0
798
  796 0
799
  797 0
800
- 798 0
801
- 799 0
802
  800 0
803
  801 0
804
- 802 0
805
  803 0
806
  804 0
807
  805 0
808
  806 0
809
  807 0
810
  808 0
811
- 809 0
812
- 810 0
813
  811 0
814
  812 0
815
  813 0
@@ -825,23 +825,23 @@ index prediction
825
  823 0
826
  824 0
827
  825 0
828
- 826 0
829
  827 0
830
  828 0
831
  829 0
832
  830 0
833
- 831 0
834
- 832 0
835
  833 0
836
  834 0
837
  835 0
838
  836 0
839
  837 0
840
- 838 0
841
  839 0
842
  840 0
843
  841 0
844
- 842 1
845
  843 0
846
  844 0
847
  845 0
@@ -859,19 +859,19 @@ index prediction
859
  857 0
860
  858 0
861
  859 0
862
- 860 1
863
  861 0
864
  862 0
865
  863 0
866
  864 0
867
  865 0
868
  866 0
869
- 867 1
870
  868 0
871
  869 0
872
  870 0
873
  871 0
874
- 872 1
875
  873 0
876
  874 0
877
  875 0
@@ -889,16 +889,16 @@ index prediction
889
  887 0
890
  888 0
891
  889 0
892
- 890 1
893
  891 0
894
  892 0
895
- 893 1
896
  894 0
897
  895 0
898
  896 0
899
  897 0
900
  898 0
901
- 899 1
902
  900 0
903
  901 0
904
  902 0
@@ -909,9 +909,9 @@ index prediction
909
  907 0
910
  908 0
911
  909 0
912
- 910 1
913
  911 0
914
- 912 0
915
  913 0
916
  914 0
917
  915 0
@@ -920,39 +920,39 @@ index prediction
920
  918 0
921
  919 0
922
  920 0
923
- 921 0
924
  922 0
925
  923 0
926
- 924 0
927
- 925 1
928
- 926 0
929
- 927 0
930
  928 0
931
- 929 0
932
  930 0
933
  931 0
934
  932 0
935
- 933 1
936
  934 0
937
  935 0
938
  936 0
939
- 937 1
940
  938 0
941
  939 0
942
  940 0
943
- 941 0
944
  942 0
945
  943 0
946
  944 0
947
- 945 0
948
  946 0
949
  947 0
950
  948 0
951
  949 0
952
  950 0
953
  951 0
954
- 952 1
955
- 953 1
956
  954 0
957
  955 0
958
  956 0
@@ -960,31 +960,31 @@ index prediction
960
  958 0
961
  959 0
962
  960 0
963
- 961 0
964
- 962 1
965
  963 0
966
  964 0
967
  965 0
968
- 966 1
969
  967 0
970
  968 0
971
- 969 1
972
  970 0
973
  971 0
974
  972 0
975
  973 0
976
- 974 0
977
  975 0
978
- 976 1
979
- 977 0
980
  978 0
981
  979 0
982
- 980 1
983
  981 0
984
  982 0
985
  983 0
986
  984 0
987
- 985 1
988
  986 0
989
  987 0
990
  988 0
@@ -998,7 +998,7 @@ index prediction
998
  996 0
999
  997 0
1000
  998 0
1001
- 999 0
1002
  1000 0
1003
  1001 0
1004
  1002 0
@@ -1008,5 +1008,5 @@ index prediction
1008
  1006 0
1009
  1007 0
1010
  1008 0
1011
- 1009 0
1012
  1010 0
 
2
  0 1
3
  1 1
4
  2 1
5
+ 3 0
6
+ 4 1
7
+ 5 1
8
  6 0
9
  7 1
10
+ 8 0
11
  9 1
12
+ 10 1
13
  11 1
14
  12 1
15
  13 0
16
+ 14 0
17
  15 1
18
+ 16 1
19
  17 1
20
+ 18 1
21
+ 19 1
22
  20 1
23
  21 1
24
  22 1
25
  23 1
26
  24 1
27
+ 25 0
28
  26 1
29
  27 1
30
  28 1
31
  29 1
32
+ 30 1
33
  31 0
34
  32 1
35
  33 1
36
  34 1
37
  35 1
38
+ 36 1
39
  37 1
40
  38 1
41
+ 39 1
42
+ 40 1
43
  41 1
44
  42 1
45
  43 1
46
+ 44 1
47
  45 1
48
  46 0
49
+ 47 0
50
  48 1
51
  49 1
52
  50 1
53
  51 1
54
+ 52 0
55
+ 53 0
56
  54 1
57
  55 1
58
  56 1
59
+ 57 1
60
  58 1
61
  59 1
62
  60 1
 
64
  62 1
65
  63 1
66
  64 1
67
+ 65 0
68
  66 1
69
  67 1
70
  68 1
71
+ 69 1
72
  70 1
73
  71 1
74
  72 1
75
+ 73 1
76
  74 1
77
+ 75 0
78
+ 76 0
79
+ 77 1
80
  78 1
81
+ 79 0
82
  80 1
83
  81 1
84
  82 1
85
+ 83 0
86
+ 84 1
87
  85 1
88
  86 1
89
+ 87 1
90
+ 88 0
91
  89 1
92
+ 90 0
93
+ 91 1
94
  92 1
95
  93 1
96
  94 1
97
+ 95 0
98
  96 1
99
  97 1
100
  98 1
 
103
  101 1
104
  102 1
105
  103 1
106
+ 104 1
107
  105 0
108
  106 1
109
+ 107 1
110
  108 1
111
+ 109 0
112
+ 110 1
113
  111 1
114
+ 112 1
115
  113 1
116
  114 1
117
  115 1
 
122
  120 1
123
  121 1
124
  122 1
125
+ 123 1
126
  124 1
127
+ 125 1
128
  126 1
129
  127 1
130
+ 128 1
131
+ 129 0
132
  130 1
133
  131 1
134
  132 1
 
139
  137 1
140
  138 1
141
  139 1
142
+ 140 0
143
  141 1
144
+ 142 1
145
  143 1
146
  144 1
147
+ 145 0
148
  146 1
149
  147 1
150
+ 148 0
151
  149 1
152
  150 1
153
  151 1
154
+ 152 1
155
  153 1
156
+ 154 0
157
  155 1
158
+ 156 0
159
  157 1
160
+ 158 0
161
+ 159 0
162
  160 1
163
+ 161 0
164
  162 1
165
  163 1
166
+ 164 1
167
  165 1
168
+ 166 0
169
+ 167 0
170
  168 1
171
+ 169 0
172
  170 1
173
  171 1
174
  172 1
175
+ 173 1
176
  174 1
177
  175 1
178
+ 176 1
179
+ 177 1
180
  178 1
181
  179 1
182
  180 0
183
+ 181 1
184
  182 1
185
  183 1
186
  184 1
187
  185 1
188
  186 1
189
+ 187 0
190
  188 1
191
+ 189 0
192
+ 190 0
193
  191 1
194
  192 1
195
  193 1
196
  194 1
197
  195 1
198
  196 1
199
+ 197 1
200
  198 1
201
  199 1
202
+ 200 1
203
  201 1
204
  202 1
205
+ 203 1
206
  204 1
207
  205 1
208
+ 206 0
209
  207 1
210
  208 1
211
  209 1
212
  210 1
213
  211 1
214
+ 212 0
215
  213 1
216
+ 214 0
217
  215 0
218
  216 1
219
  217 0
220
+ 218 1
221
+ 219 1
222
  220 1
223
  221 1
224
  222 1
 
226
  224 1
227
  225 1
228
  226 1
229
+ 227 0
230
  228 1
231
  229 1
232
+ 230 0
233
  231 1
234
  232 1
235
+ 233 0
236
+ 234 1
237
+ 235 1
238
+ 236 1
239
  237 1
240
+ 238 1
241
+ 239 1
242
  240 1
243
+ 241 0
244
  242 1
245
  243 1
246
  244 1
 
251
  249 1
252
  250 1
253
  251 1
254
+ 252 0
255
  253 1
256
  254 1
257
  255 1
 
259
  257 1
260
  258 1
261
  259 1
262
+ 260 1
263
  261 1
264
  262 1
265
  263 1
266
  264 1
267
+ 265 1
268
+ 266 1
269
  267 1
270
  268 1
271
  269 1
272
  270 1
273
+ 271 0
274
  272 1
275
  273 1
276
  274 1
277
+ 275 1
278
  276 1
279
  277 1
280
  278 1
281
  279 1
282
+ 280 1
283
+ 281 0
284
+ 282 0
285
  283 1
286
  284 1
287
  285 1
288
+ 286 1
289
+ 287 0
290
  288 1
291
+ 289 0
292
  290 1
293
  291 1
294
  292 1
295
  293 1
296
  294 1
297
  295 1
298
+ 296 1
299
+ 297 0
300
+ 298 0
301
  299 0
302
  300 0
303
  301 0
304
  302 0
305
+ 303 1
306
  304 0
307
  305 0
308
  306 0
 
311
  309 0
312
  310 0
313
  311 0
314
+ 312 1
315
+ 313 1
316
  314 0
317
  315 0
318
  316 0
 
325
  323 0
326
  324 0
327
  325 0
328
+ 326 1
329
  327 0
330
  328 0
331
  329 0
332
  330 0
333
  331 0
334
+ 332 1
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
  338 0
341
+ 339 1
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
  344 0
347
+ 345 0
348
+ 346 1
349
  347 0
350
  348 0
351
  349 0
 
365
  363 0
366
  364 0
367
  365 0
368
+ 366 0
369
  367 0
370
  368 0
371
  369 0
372
  370 0
373
  371 0
374
+ 372 1
375
  373 0
376
  374 0
377
  375 0
378
  376 0
379
  377 0
380
  378 0
381
+ 379 0
382
  380 0
383
  381 0
384
  382 0
385
  383 0
386
+ 384 1
387
  385 0
388
+ 386 1
389
  387 0
390
  388 0
391
+ 389 0
392
  390 0
393
  391 0
394
  392 0
 
400
  398 0
401
  399 0
402
  400 0
403
+ 401 0
404
  402 0
405
  403 0
406
  404 0
 
408
  406 0
409
  407 0
410
  408 0
411
+ 409 0
412
+ 410 0
413
  411 0
414
  412 0
415
  413 0
416
  414 0
417
  415 0
418
  416 0
419
+ 417 0
420
  418 0
421
  419 0
422
  420 0
423
  421 0
424
  422 0
425
+ 423 0
426
  424 0
427
  425 0
428
  426 0
 
434
  432 0
435
  433 0
436
  434 0
437
+ 435 1
438
  436 0
439
  437 0
440
  438 0
441
  439 0
442
+ 440 1
443
+ 441 0
444
  442 0
445
  443 0
446
  444 0
 
451
  449 0
452
  450 0
453
  451 0
454
+ 452 0
455
  453 0
456
+ 454 0
457
  455 0
458
  456 0
459
  457 0
 
463
  461 0
464
  462 0
465
  463 0
466
+ 464 1
467
  465 0
468
  466 0
469
  467 0
 
471
  469 0
472
  470 0
473
  471 0
474
+ 472 1
475
  473 0
476
+ 474 1
477
  475 0
478
+ 476 1
479
  477 0
480
  478 0
481
  479 0
482
+ 480 1
483
  481 0
484
  482 0
485
  483 0
486
  484 0
487
  485 0
488
+ 486 1
489
  487 0
490
  488 0
491
  489 0
492
+ 490 1
493
  491 0
494
  492 0
495
  493 0
 
497
  495 0
498
  496 0
499
  497 0
500
+ 498 0
501
  499 0
502
  500 0
503
  501 0
 
507
  505 0
508
  506 0
509
  507 0
510
+ 508 0
511
  509 0
512
  510 0
513
  511 0
 
522
  520 0
523
  521 0
524
  522 0
525
+ 523 1
526
  524 0
527
  525 0
528
  526 0
 
532
  530 0
533
  531 0
534
  532 0
535
+ 533 0
536
+ 534 1
537
  535 0
538
  536 0
539
  537 0
540
  538 0
541
+ 539 1
542
  540 0
543
  541 0
544
  542 0
 
561
  559 0
562
  560 0
563
  561 0
564
+ 562 0
565
  563 0
566
  564 0
567
+ 565 1
568
  566 0
569
+ 567 1
570
  568 0
571
  569 0
572
  570 0
 
596
  594 0
597
  595 0
598
  596 0
599
+ 597 1
600
  598 0
601
  599 0
602
  600 0
603
  601 0
604
  602 0
605
+ 603 0
606
  604 0
607
  605 1
608
  606 0
 
622
  620 0
623
  621 0
624
  622 0
625
+ 623 0
626
  624 0
627
+ 625 0
628
  626 0
629
  627 0
630
  628 0
 
642
  640 0
643
  641 0
644
  642 0
645
+ 643 0
646
  644 0
647
  645 0
648
  646 0
 
662
  660 0
663
  661 0
664
  662 0
665
+ 663 0
666
+ 664 0
667
  665 0
668
  666 0
669
  667 0
 
672
  670 0
673
  671 0
674
  672 0
675
+ 673 1
676
  674 0
677
  675 0
678
  676 0
 
688
  686 0
689
  687 0
690
  688 0
691
+ 689 1
692
  690 0
693
  691 0
694
  692 0
695
+ 693 1
696
  694 0
697
  695 0
698
  696 0
 
700
  698 0
701
  699 0
702
  700 0
703
+ 701 1
704
  702 0
705
  703 0
706
  704 0
707
  705 0
708
+ 706 0
709
  707 0
710
  708 0
711
  709 0
 
720
  718 0
721
  719 0
722
  720 0
723
+ 721 0
724
  722 0
725
  723 0
726
  724 0
727
+ 725 0
728
  726 0
729
  727 0
730
+ 728 1
731
  729 0
732
  730 0
733
  731 0
 
746
  744 0
747
  745 0
748
  746 0
749
+ 747 1
750
  748 0
751
  749 0
752
  750 0
 
754
  752 0
755
  753 0
756
  754 0
757
+ 755 0
758
  756 0
759
  757 0
760
  758 0
 
779
  777 0
780
  778 0
781
  779 0
782
+ 780 0
783
  781 0
784
  782 0
785
+ 783 0
786
  784 0
787
  785 0
788
  786 0
 
797
  795 0
798
  796 0
799
  797 0
800
+ 798 1
801
+ 799 1
802
  800 0
803
  801 0
804
+ 802 1
805
  803 0
806
  804 0
807
  805 0
808
  806 0
809
  807 0
810
  808 0
811
+ 809 1
812
+ 810 1
813
  811 0
814
  812 0
815
  813 0
 
825
  823 0
826
  824 0
827
  825 0
828
+ 826 1
829
  827 0
830
  828 0
831
  829 0
832
  830 0
833
+ 831 1
834
+ 832 1
835
  833 0
836
  834 0
837
  835 0
838
  836 0
839
  837 0
840
+ 838 1
841
  839 0
842
  840 0
843
  841 0
844
+ 842 0
845
  843 0
846
  844 0
847
  845 0
 
859
  857 0
860
  858 0
861
  859 0
862
+ 860 0
863
  861 0
864
  862 0
865
  863 0
866
  864 0
867
  865 0
868
  866 0
869
+ 867 0
870
  868 0
871
  869 0
872
  870 0
873
  871 0
874
+ 872 0
875
  873 0
876
  874 0
877
  875 0
 
889
  887 0
890
  888 0
891
  889 0
892
+ 890 0
893
  891 0
894
  892 0
895
+ 893 0
896
  894 0
897
  895 0
898
  896 0
899
  897 0
900
  898 0
901
+ 899 0
902
  900 0
903
  901 0
904
  902 0
 
909
  907 0
910
  908 0
911
  909 0
912
+ 910 0
913
  911 0
914
+ 912 1
915
  913 0
916
  914 0
917
  915 0
 
920
  918 0
921
  919 0
922
  920 0
923
+ 921 1
924
  922 0
925
  923 0
926
+ 924 1
927
+ 925 0
928
+ 926 1
929
+ 927 1
930
  928 0
931
+ 929 1
932
  930 0
933
  931 0
934
  932 0
935
+ 933 0
936
  934 0
937
  935 0
938
  936 0
939
+ 937 0
940
  938 0
941
  939 0
942
  940 0
943
+ 941 1
944
  942 0
945
  943 0
946
  944 0
947
+ 945 1
948
  946 0
949
  947 0
950
  948 0
951
  949 0
952
  950 0
953
  951 0
954
+ 952 0
955
+ 953 0
956
  954 0
957
  955 0
958
  956 0
 
960
  958 0
961
  959 0
962
  960 0
963
+ 961 1
964
+ 962 0
965
  963 0
966
  964 0
967
  965 0
968
+ 966 0
969
  967 0
970
  968 0
971
+ 969 0
972
  970 0
973
  971 0
974
  972 0
975
  973 0
976
+ 974 1
977
  975 0
978
+ 976 0
979
+ 977 1
980
  978 0
981
  979 0
982
+ 980 0
983
  981 0
984
  982 0
985
  983 0
986
  984 0
987
+ 985 0
988
  986 0
989
  987 0
990
  988 0
 
998
  996 0
999
  997 0
1000
  998 0
1001
+ 999 1
1002
  1000 0
1003
  1001 0
1004
  1002 0
 
1008
  1006 0
1009
  1007 0
1010
  1008 0
1011
+ 1009 1
1012
  1010 0
runs/May26_23-05-07_indolem-petl-vm/events.out.tfevents.1716766661.indolem-petl-vm.3153905.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16e78608395111700e37d5488b56b49e9846a08f2ecf80d0b4962ce418dd3c97
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.32195694876498865,
4
- "train_runtime": 1936.2334,
5
  "train_samples": 3638,
6
- "train_samples_per_second": 37.578,
7
- "train_steps_per_second": 1.26
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.30967485005738304,
4
+ "train_runtime": 1934.4038,
5
  "train_samples": 3638,
6
+ "train_samples_per_second": 37.614,
7
+ "train_steps_per_second": 1.261
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 4.912528038024902,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5594,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.7268170426065163,
21
- "eval_f1": 0.6375659391484787,
22
- "eval_loss": 0.5211195945739746,
23
- "eval_precision": 0.6611111111111112,
24
- "eval_recall": 0.6292053100563739,
25
- "eval_runtime": 5.0726,
26
- "eval_samples_per_second": 78.658,
27
- "eval_steps_per_second": 9.857,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 4.199721813201904,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.5065,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7418546365914787,
40
- "eval_f1": 0.6993657690872781,
41
- "eval_loss": 0.5046526789665222,
42
- "eval_precision": 0.6941181477698332,
43
- "eval_recall": 0.7073558828877977,
44
- "eval_runtime": 5.0545,
45
- "eval_samples_per_second": 78.94,
46
- "eval_steps_per_second": 9.892,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 4.364508628845215,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.4577,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.7944862155388471,
59
- "eval_f1": 0.7482456140350877,
60
- "eval_loss": 0.43356266617774963,
61
- "eval_precision": 0.75243993993994,
62
- "eval_recall": 0.7445899254409893,
63
- "eval_runtime": 5.0567,
64
- "eval_samples_per_second": 78.906,
65
- "eval_steps_per_second": 9.888,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 3.2636358737945557,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3996,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.8320802005012531,
78
- "eval_f1": 0.7814806640672896,
79
- "eval_loss": 0.3853450417518616,
80
- "eval_precision": 0.8128371089536138,
81
- "eval_recall": 0.7636843062374977,
82
- "eval_runtime": 5.064,
83
- "eval_samples_per_second": 78.792,
84
- "eval_steps_per_second": 9.874,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 4.141108989715576,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.3611,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.849624060150376,
97
- "eval_f1": 0.8167483159828537,
98
- "eval_loss": 0.3553084433078766,
99
- "eval_precision": 0.8201621387462095,
100
- "eval_recall": 0.8136024731769412,
101
- "eval_runtime": 5.0819,
102
- "eval_samples_per_second": 78.515,
103
- "eval_steps_per_second": 9.839,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 6.039860725402832,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.3333,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.849624060150376,
116
- "eval_f1": 0.8167483159828537,
117
- "eval_loss": 0.3422495126724243,
118
- "eval_precision": 0.8201621387462095,
119
- "eval_recall": 0.8136024731769412,
120
- "eval_runtime": 5.0614,
121
- "eval_samples_per_second": 78.832,
122
- "eval_steps_per_second": 9.879,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 3.7580935955047607,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.3085,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8596491228070176,
135
- "eval_f1": 0.8262195121951219,
136
- "eval_loss": 0.3352087438106537,
137
- "eval_precision": 0.8360165151709128,
138
- "eval_recall": 0.8181942171303873,
139
- "eval_runtime": 5.0504,
140
- "eval_samples_per_second": 79.004,
141
- "eval_steps_per_second": 9.9,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 6.729907989501953,
147
  "learning_rate": 3e-05,
148
- "loss": 0.3104,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8621553884711779,
154
- "eval_f1": 0.8279052989013229,
155
- "eval_loss": 0.32583731412887573,
156
- "eval_precision": 0.8414113428943938,
157
- "eval_recall": 0.8174668121476631,
158
- "eval_runtime": 5.0514,
159
- "eval_samples_per_second": 78.987,
160
- "eval_steps_per_second": 9.898,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 8.78210735321045,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.3015,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8571428571428571,
173
- "eval_f1": 0.8263588263588264,
174
- "eval_loss": 0.3172130286693573,
175
- "eval_precision": 0.8289473684210527,
176
- "eval_recall": 0.8239225313693399,
177
- "eval_runtime": 5.0606,
178
- "eval_samples_per_second": 78.844,
179
- "eval_steps_per_second": 9.88,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 5.539211750030518,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.2856,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8596491228070176,
192
- "eval_f1": 0.8222604047346316,
193
- "eval_loss": 0.3109637498855591,
194
- "eval_precision": 0.8423737373737374,
195
- "eval_recall": 0.8081923986179305,
196
- "eval_runtime": 5.0538,
197
- "eval_samples_per_second": 78.95,
198
- "eval_steps_per_second": 9.894,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 3.2519192695617676,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.276,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.8671679197994987,
211
- "eval_f1": 0.8302877091609486,
212
- "eval_loss": 0.30823540687561035,
213
- "eval_precision": 0.8556105610561056,
214
- "eval_recall": 0.8135115475541008,
215
- "eval_runtime": 5.0588,
216
- "eval_samples_per_second": 78.873,
217
- "eval_steps_per_second": 9.884,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 10.9083833694458,
223
  "learning_rate": 2e-05,
224
- "loss": 0.2772,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.8671679197994987,
230
- "eval_f1": 0.8401647707947546,
231
- "eval_loss": 0.3042956292629242,
232
- "eval_precision": 0.8393298751432535,
233
- "eval_recall": 0.8410165484633569,
234
- "eval_runtime": 5.0676,
235
- "eval_samples_per_second": 78.735,
236
- "eval_steps_per_second": 9.867,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 2.2276933193206787,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.2719,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8721804511278195,
249
- "eval_f1": 0.8385986341680085,
250
- "eval_loss": 0.3041522204875946,
251
- "eval_precision": 0.8581438127090301,
252
- "eval_recall": 0.8245590107292236,
253
- "eval_runtime": 5.0652,
254
- "eval_samples_per_second": 78.772,
255
- "eval_steps_per_second": 9.871,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 7.6559953689575195,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.2595,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.87468671679198,
268
- "eval_f1": 0.8456742372671576,
269
- "eval_loss": 0.2962762117385864,
270
- "eval_precision": 0.8536697247706422,
271
- "eval_recall": 0.8388343335151845,
272
- "eval_runtime": 5.0567,
273
- "eval_samples_per_second": 78.905,
274
- "eval_steps_per_second": 9.888,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 1.5493590831756592,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.2621,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8771929824561403,
287
- "eval_f1": 0.8514869535493182,
288
- "eval_loss": 0.2936766743659973,
289
- "eval_precision": 0.8523821128305106,
290
- "eval_recall": 0.8506092016730314,
291
- "eval_runtime": 5.0512,
292
- "eval_samples_per_second": 78.991,
293
- "eval_steps_per_second": 9.899,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 2.2587976455688477,
299
  "learning_rate": 1e-05,
300
- "loss": 0.2653,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8796992481203008,
306
- "eval_f1": 0.8533986527862829,
307
- "eval_loss": 0.2934819459915161,
308
- "eval_precision": 0.8572003218020917,
309
- "eval_recall": 0.8498817966903074,
310
- "eval_runtime": 5.0676,
311
- "eval_samples_per_second": 78.736,
312
- "eval_steps_per_second": 9.867,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 1.1797109842300415,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.2496,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8822055137844611,
325
- "eval_f1": 0.852937255424767,
326
- "eval_loss": 0.2959519028663635,
327
- "eval_precision": 0.8674217731421121,
328
- "eval_recall": 0.8416530278232406,
329
- "eval_runtime": 5.0512,
330
- "eval_samples_per_second": 78.991,
331
- "eval_steps_per_second": 9.899,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 4.01472806930542,
337
  "learning_rate": 5e-06,
338
- "loss": 0.2579,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8721804511278195,
344
- "eval_f1": 0.8421640488656195,
345
- "eval_loss": 0.29394522309303284,
346
- "eval_precision": 0.8512313860252005,
347
- "eval_recall": 0.8345608292416803,
348
- "eval_runtime": 5.1326,
349
- "eval_samples_per_second": 77.739,
350
- "eval_steps_per_second": 9.742,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 4.343449115753174,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.2533,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8771929824561403,
363
- "eval_f1": 0.8475258334958082,
364
- "eval_loss": 0.29310622811317444,
365
- "eval_precision": 0.8591828192414193,
366
- "eval_recall": 0.8381069285324605,
367
- "eval_runtime": 5.06,
368
- "eval_samples_per_second": 78.853,
369
- "eval_steps_per_second": 9.881,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 3.7858877182006836,
375
  "learning_rate": 0.0,
376
- "loss": 0.2429,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8771929824561403,
382
- "eval_f1": 0.8475258334958082,
383
- "eval_loss": 0.29292526841163635,
384
- "eval_precision": 0.8591828192414193,
385
- "eval_recall": 0.8381069285324605,
386
- "eval_runtime": 5.0884,
387
- "eval_samples_per_second": 78.414,
388
- "eval_steps_per_second": 9.826,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7609911792720000.0,
395
- "train_loss": 0.32195694876498865,
396
- "train_runtime": 1936.2334,
397
- "train_samples_per_second": 37.578,
398
- "train_steps_per_second": 1.26
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 5.93964147567749,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.5623,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
  "eval_accuracy": 0.7268170426065163,
21
+ "eval_f1": 0.6301313943104988,
22
+ "eval_loss": 0.5216777324676514,
23
+ "eval_precision": 0.6603762281332375,
24
+ "eval_recall": 0.6217039461720313,
25
+ "eval_runtime": 5.0877,
26
+ "eval_samples_per_second": 78.424,
27
+ "eval_steps_per_second": 9.828,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.8485560417175293,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.5061,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7568922305764411,
40
+ "eval_f1": 0.7089058868656783,
41
+ "eval_loss": 0.48978015780448914,
42
+ "eval_precision": 0.7074372759856631,
43
+ "eval_recall": 0.7104928168757956,
44
+ "eval_runtime": 5.0629,
45
+ "eval_samples_per_second": 78.809,
46
+ "eval_steps_per_second": 9.876,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 5.005080699920654,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.4443,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8120300751879699,
59
+ "eval_f1": 0.7678883071553229,
60
+ "eval_loss": 0.40850991010665894,
61
+ "eval_precision": 0.7750572737686139,
62
+ "eval_recall": 0.7620021822149482,
63
+ "eval_runtime": 5.144,
64
+ "eval_samples_per_second": 77.566,
65
+ "eval_steps_per_second": 9.72,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 2.8029582500457764,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.3805,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8245614035087719,
78
+ "eval_f1": 0.7751786979200206,
79
+ "eval_loss": 0.3672122061252594,
80
+ "eval_precision": 0.7980263157894737,
81
+ "eval_recall": 0.7608656119294417,
82
+ "eval_runtime": 5.0838,
83
+ "eval_samples_per_second": 78.485,
84
+ "eval_steps_per_second": 9.835,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 3.751908540725708,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.3488,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8521303258145363,
97
+ "eval_f1": 0.8229427559286084,
98
+ "eval_loss": 0.35348454117774963,
99
+ "eval_precision": 0.8206541218637993,
100
+ "eval_recall": 0.8253773413347881,
101
+ "eval_runtime": 5.0536,
102
+ "eval_samples_per_second": 78.953,
103
+ "eval_steps_per_second": 9.894,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 4.316796779632568,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.3156,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8571428571428571,
116
+ "eval_f1": 0.8254579780661698,
117
+ "eval_loss": 0.33368828892707825,
118
+ "eval_precision": 0.8299216027874565,
119
+ "eval_recall": 0.8214220767412257,
120
+ "eval_runtime": 5.0533,
121
+ "eval_samples_per_second": 78.959,
122
+ "eval_steps_per_second": 9.895,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 18.979951858520508,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.3055,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8621553884711779,
135
+ "eval_f1": 0.8297847585805701,
136
+ "eval_loss": 0.3217175304889679,
137
+ "eval_precision": 0.8385357006491028,
138
+ "eval_recall": 0.8224677214038916,
139
+ "eval_runtime": 5.0744,
140
+ "eval_samples_per_second": 78.63,
141
+ "eval_steps_per_second": 9.853,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 5.143575191497803,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.2995,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8596491228070176,
154
+ "eval_f1": 0.8271551457392166,
155
+ "eval_loss": 0.31452828645706177,
156
+ "eval_precision": 0.8347358430876305,
157
+ "eval_recall": 0.8206946717585015,
158
+ "eval_runtime": 5.0883,
159
+ "eval_samples_per_second": 78.414,
160
+ "eval_steps_per_second": 9.826,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 6.147844314575195,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2825,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8671679197994987,
173
+ "eval_f1": 0.8393634395533442,
174
+ "eval_loss": 0.3090471923351288,
175
+ "eval_precision": 0.8402278542707444,
176
+ "eval_recall": 0.8385160938352427,
177
+ "eval_runtime": 5.0782,
178
+ "eval_samples_per_second": 78.572,
179
+ "eval_steps_per_second": 9.846,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 5.50480842590332,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.272,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8721804511278195,
192
+ "eval_f1": 0.8461962888779714,
193
+ "eval_loss": 0.29923897981643677,
194
+ "eval_precision": 0.8453465227094517,
195
+ "eval_recall": 0.8470631023822512,
196
+ "eval_runtime": 5.0745,
197
+ "eval_samples_per_second": 78.629,
198
+ "eval_steps_per_second": 9.853,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 6.085771083831787,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.2626,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.87468671679198,
211
+ "eval_f1": 0.8439846096096095,
212
+ "eval_loss": 0.30075788497924805,
213
+ "eval_precision": 0.8568027210884354,
214
+ "eval_recall": 0.8338334242589562,
215
+ "eval_runtime": 5.0607,
216
+ "eval_samples_per_second": 78.843,
217
+ "eval_steps_per_second": 9.88,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 10.957924842834473,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.2641,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.87468671679198,
230
+ "eval_f1": 0.8488361520276414,
231
+ "eval_loss": 0.2949255406856537,
232
+ "eval_precision": 0.8488361520276414,
233
+ "eval_recall": 0.8488361520276414,
234
+ "eval_runtime": 5.0742,
235
+ "eval_samples_per_second": 78.634,
236
+ "eval_steps_per_second": 9.854,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 3.114020347595215,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.257,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8771929824561403,
249
+ "eval_f1": 0.8475258334958082,
250
+ "eval_loss": 0.28850188851356506,
251
+ "eval_precision": 0.8591828192414193,
252
+ "eval_recall": 0.8381069285324605,
253
+ "eval_runtime": 5.0549,
254
+ "eval_samples_per_second": 78.934,
255
+ "eval_steps_per_second": 9.891,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 4.347434043884277,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.2473,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8822055137844611,
268
+ "eval_f1": 0.8568221901555235,
269
+ "eval_loss": 0.2826312482357025,
270
+ "eval_precision": 0.8596491228070176,
271
+ "eval_recall": 0.8541553009638116,
272
+ "eval_runtime": 5.0506,
273
+ "eval_samples_per_second": 79.001,
274
+ "eval_steps_per_second": 9.9,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 1.6674721240997314,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.2456,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8847117794486216,
287
+ "eval_f1": 0.8609292598654301,
288
+ "eval_loss": 0.2825632393360138,
289
+ "eval_precision": 0.8609292598654301,
290
+ "eval_recall": 0.8609292598654301,
291
+ "eval_runtime": 5.071,
292
+ "eval_samples_per_second": 78.683,
293
+ "eval_steps_per_second": 9.86,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 5.605799674987793,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.2477,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8847117794486216,
306
+ "eval_f1": 0.8602260265626904,
307
+ "eval_loss": 0.2795054018497467,
308
+ "eval_precision": 0.8620943049601959,
309
+ "eval_recall": 0.8584288052373159,
310
+ "eval_runtime": 5.063,
311
+ "eval_samples_per_second": 78.807,
312
+ "eval_steps_per_second": 9.876,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 1.4996304512023926,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.2426,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8796992481203008,
325
+ "eval_f1": 0.8526315789473684,
326
+ "eval_loss": 0.2793760895729065,
327
+ "eval_precision": 0.8585304054054055,
328
+ "eval_recall": 0.8473813420621932,
329
+ "eval_runtime": 5.0574,
330
+ "eval_samples_per_second": 78.894,
331
+ "eval_steps_per_second": 9.886,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 3.353811264038086,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.2359,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8872180451127819,
344
+ "eval_f1": 0.8629148629148629,
345
+ "eval_loss": 0.27959930896759033,
346
+ "eval_precision": 0.8657894736842104,
347
+ "eval_recall": 0.860201854882706,
348
+ "eval_runtime": 5.0719,
349
+ "eval_samples_per_second": 78.668,
350
+ "eval_steps_per_second": 9.858,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 6.069816589355469,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.2417,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8847117794486216,
363
+ "eval_f1": 0.8587719298245614,
364
+ "eval_loss": 0.27870801091194153,
365
+ "eval_precision": 0.864771021021021,
366
+ "eval_recall": 0.8534278959810875,
367
+ "eval_runtime": 5.0523,
368
+ "eval_samples_per_second": 78.973,
369
+ "eval_steps_per_second": 9.896,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 4.868233680725098,
375
  "learning_rate": 0.0,
376
+ "loss": 0.2319,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8847117794486216,
382
+ "eval_f1": 0.8587719298245614,
383
+ "eval_loss": 0.2786270081996918,
384
+ "eval_precision": 0.864771021021021,
385
+ "eval_recall": 0.8534278959810875,
386
+ "eval_runtime": 5.0608,
387
+ "eval_samples_per_second": 78.841,
388
+ "eval_steps_per_second": 9.88,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7609911792720000.0,
395
+ "train_loss": 0.30967485005738304,
396
+ "train_runtime": 1934.4038,
397
+ "train_samples_per_second": 37.614,
398
+ "train_steps_per_second": 1.261
399
  }
400
  ],
401
  "logging_steps": 500,