apwic commited on
Commit
99f9f74
1 Parent(s): a4132c1

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.8941641938674579,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.8847117794486216,
5
- "eval_f1": 0.8587719298245614,
6
- "eval_loss": 0.2786270081996918,
7
- "eval_precision": 0.864771021021021,
8
- "eval_recall": 0.8534278959810875,
9
- "eval_runtime": 5.0263,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 79.383,
12
- "eval_steps_per_second": 9.948,
13
- "f1": 0.872841399982368,
14
- "precision": 0.8724798955319228,
15
- "recall": 0.8732056628105085,
16
- "train_loss": 0.30967485005738304,
17
- "train_runtime": 1934.4038,
18
- "train_samples": 3638,
19
- "train_samples_per_second": 37.614,
20
- "train_steps_per_second": 1.261
21
  }
 
1
  {
2
+ "accuracy": 0.9023904382470119,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8771929824561403,
5
+ "eval_f1": 0.850729517396184,
6
+ "eval_loss": 0.29076284170150757,
7
+ "eval_precision": 0.8535087719298247,
8
+ "eval_recall": 0.8481087470449173,
9
+ "eval_runtime": 5.0376,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 79.205,
12
+ "eval_steps_per_second": 9.925,
13
+ "f1": 0.8812035158891143,
14
+ "precision": 0.8844205933342424,
15
+ "recall": 0.8781738047331609,
16
+ "train_loss": 0.32445813476062213,
17
+ "train_runtime": 1939.7236,
18
+ "train_samples": 3645,
19
+ "train_samples_per_second": 37.583,
20
+ "train_steps_per_second": 1.258
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.8847117794486216,
4
- "eval_f1": 0.8587719298245614,
5
- "eval_loss": 0.2786270081996918,
6
- "eval_precision": 0.864771021021021,
7
- "eval_recall": 0.8534278959810875,
8
- "eval_runtime": 5.0263,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 79.383,
11
- "eval_steps_per_second": 9.948
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8771929824561403,
4
+ "eval_f1": 0.850729517396184,
5
+ "eval_loss": 0.29076284170150757,
6
+ "eval_precision": 0.8535087719298247,
7
+ "eval_recall": 0.8481087470449173,
8
+ "eval_runtime": 5.0376,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 79.205,
11
+ "eval_steps_per_second": 9.925
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.8941641938674579,
3
- "f1": 0.872841399982368,
4
- "precision": 0.8724798955319228,
5
- "recall": 0.8732056628105085
6
  }
 
1
  {
2
+ "accuracy": 0.9023904382470119,
3
+ "f1": 0.8812035158891143,
4
+ "precision": 0.8844205933342424,
5
+ "recall": 0.8781738047331609
6
  }
predict_results.txt CHANGED
@@ -2,60 +2,60 @@ index prediction
2
  0 1
3
  1 1
4
  2 1
5
- 3 0
6
  4 1
7
- 5 1
8
  6 0
9
  7 1
10
- 8 0
11
  9 1
12
  10 1
13
  11 1
14
- 12 1
15
- 13 0
16
- 14 0
17
  15 1
18
  16 1
19
- 17 1
20
  18 1
21
  19 1
22
  20 1
23
- 21 1
24
  22 1
25
  23 1
26
  24 1
27
- 25 0
28
  26 1
29
  27 1
30
  28 1
31
- 29 1
32
- 30 1
33
- 31 0
34
  32 1
35
  33 1
36
  34 1
37
  35 1
38
  36 1
39
  37 1
40
- 38 1
41
- 39 1
42
  40 1
43
  41 1
44
- 42 1
45
  43 1
46
  44 1
47
  45 1
48
- 46 0
49
  47 0
50
- 48 1
51
  49 1
52
  50 1
53
- 51 1
54
- 52 0
55
- 53 0
56
  54 1
57
  55 1
58
- 56 1
59
  57 1
60
  58 1
61
  59 1
@@ -64,41 +64,41 @@ index prediction
64
  62 1
65
  63 1
66
  64 1
67
- 65 0
68
  66 1
69
  67 1
70
- 68 1
71
  69 1
72
  70 1
73
  71 1
74
  72 1
75
  73 1
76
  74 1
77
- 75 0
78
- 76 0
79
  77 1
80
- 78 1
81
- 79 0
82
  80 1
83
- 81 1
84
  82 1
85
- 83 0
86
  84 1
87
  85 1
88
  86 1
89
  87 1
90
- 88 0
91
  89 1
92
- 90 0
93
  91 1
94
- 92 1
95
  93 1
96
  94 1
97
- 95 0
98
  96 1
99
  97 1
100
  98 1
101
- 99 1
102
  100 1
103
  101 1
104
  102 1
@@ -108,7 +108,7 @@ index prediction
108
  106 1
109
  107 1
110
  108 1
111
- 109 0
112
  110 1
113
  111 1
114
  112 1
@@ -127,49 +127,49 @@ index prediction
127
  125 1
128
  126 1
129
  127 1
130
- 128 1
131
- 129 0
132
  130 1
133
  131 1
134
- 132 1
135
  133 1
136
- 134 1
137
  135 1
138
  136 1
139
  137 1
140
  138 1
141
  139 1
142
- 140 0
143
  141 1
144
  142 1
145
- 143 1
146
  144 1
147
- 145 0
148
  146 1
149
  147 1
150
- 148 0
151
  149 1
152
  150 1
153
  151 1
154
  152 1
155
- 153 1
156
- 154 0
157
  155 1
158
- 156 0
159
  157 1
160
- 158 0
161
- 159 0
162
  160 1
163
- 161 0
164
  162 1
165
  163 1
166
  164 1
167
  165 1
168
- 166 0
169
- 167 0
170
  168 1
171
  169 0
172
- 170 1
173
  171 1
174
  172 1
175
  173 1
@@ -179,65 +179,65 @@ index prediction
179
  177 1
180
  178 1
181
  179 1
182
- 180 0
183
  181 1
184
  182 1
185
  183 1
186
- 184 1
187
- 185 1
188
  186 1
189
  187 0
190
  188 1
191
- 189 0
192
- 190 0
193
  191 1
194
  192 1
195
  193 1
196
  194 1
197
  195 1
198
  196 1
199
- 197 1
200
  198 1
201
  199 1
202
  200 1
203
  201 1
204
  202 1
205
- 203 1
206
  204 1
207
  205 1
208
- 206 0
209
- 207 1
210
- 208 1
211
  209 1
212
  210 1
213
  211 1
214
- 212 0
215
- 213 1
216
- 214 0
217
- 215 0
218
- 216 1
219
- 217 0
220
  218 1
221
  219 1
222
  220 1
223
- 221 1
224
  222 1
225
  223 1
226
- 224 1
227
- 225 1
228
  226 1
229
  227 0
230
  228 1
231
  229 1
232
- 230 0
233
  231 1
234
  232 1
235
- 233 0
236
  234 1
237
  235 1
238
  236 1
239
  237 1
240
- 238 1
241
  239 1
242
  240 1
243
  241 0
@@ -245,64 +245,64 @@ index prediction
245
  243 1
246
  244 1
247
  245 1
248
- 246 1
249
  247 1
250
  248 1
251
  249 1
252
  250 1
253
  251 1
254
- 252 0
255
  253 1
256
- 254 1
257
  255 1
258
  256 1
259
  257 1
260
  258 1
261
  259 1
262
- 260 1
263
  261 1
264
- 262 1
265
- 263 1
266
  264 1
267
  265 1
268
  266 1
269
  267 1
270
- 268 1
271
- 269 1
272
  270 1
273
- 271 0
274
  272 1
275
  273 1
276
  274 1
277
  275 1
278
  276 1
279
- 277 1
280
  278 1
281
- 279 1
282
  280 1
283
  281 0
284
- 282 0
285
  283 1
286
  284 1
287
  285 1
288
  286 1
289
- 287 0
290
  288 1
291
- 289 0
292
  290 1
293
- 291 1
294
  292 1
295
  293 1
296
- 294 1
297
- 295 1
298
- 296 1
299
  297 0
300
  298 0
301
  299 0
302
  300 0
303
  301 0
304
  302 0
305
- 303 1
306
  304 0
307
  305 0
308
  306 0
@@ -312,7 +312,7 @@ index prediction
312
  310 0
313
  311 0
314
  312 1
315
- 313 1
316
  314 0
317
  315 0
318
  316 0
@@ -325,27 +325,27 @@ index prediction
325
  323 0
326
  324 0
327
  325 0
328
- 326 1
329
  327 0
330
  328 0
331
  329 0
332
  330 0
333
  331 0
334
- 332 1
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
  338 0
341
- 339 1
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
  344 0
347
  345 0
348
- 346 1
349
  347 0
350
  348 0
351
  349 0
@@ -359,19 +359,19 @@ index prediction
359
  357 0
360
  358 0
361
  359 0
362
- 360 0
363
  361 0
364
  362 0
365
  363 0
366
  364 0
367
- 365 0
368
  366 0
369
  367 0
370
  368 0
371
  369 0
372
  370 0
373
  371 0
374
- 372 1
375
  373 0
376
  374 0
377
  375 0
@@ -383,9 +383,9 @@ index prediction
383
  381 0
384
  382 0
385
  383 0
386
- 384 1
387
- 385 0
388
- 386 1
389
  387 0
390
  388 0
391
  389 0
@@ -395,8 +395,8 @@ index prediction
395
  393 0
396
  394 0
397
  395 0
398
- 396 0
399
- 397 0
400
  398 0
401
  399 0
402
  400 0
@@ -423,23 +423,23 @@ index prediction
423
  421 0
424
  422 0
425
  423 0
426
- 424 0
427
  425 0
428
  426 0
429
  427 0
430
  428 0
431
  429 0
432
- 430 0
433
  431 0
434
  432 0
435
  433 0
436
  434 0
437
- 435 1
438
  436 0
439
  437 0
440
  438 0
441
  439 0
442
- 440 1
443
  441 0
444
  442 0
445
  443 0
@@ -451,7 +451,7 @@ index prediction
451
  449 0
452
  450 0
453
  451 0
454
- 452 0
455
  453 0
456
  454 0
457
  455 0
@@ -467,19 +467,19 @@ index prediction
467
  465 0
468
  466 0
469
  467 0
470
- 468 0
471
  469 0
472
  470 0
473
  471 0
474
- 472 1
475
  473 0
476
- 474 1
477
  475 0
478
- 476 1
479
  477 0
480
  478 0
481
  479 0
482
- 480 1
483
  481 0
484
  482 0
485
  483 0
@@ -489,7 +489,7 @@ index prediction
489
  487 0
490
  488 0
491
  489 0
492
- 490 1
493
  491 0
494
  492 0
495
  493 0
@@ -498,11 +498,11 @@ index prediction
498
  496 0
499
  497 0
500
  498 0
501
- 499 0
502
- 500 0
503
  501 0
504
  502 0
505
- 503 0
506
  504 0
507
  505 0
508
  506 0
@@ -522,7 +522,7 @@ index prediction
522
  520 0
523
  521 0
524
  522 0
525
- 523 1
526
  524 0
527
  525 0
528
  526 0
@@ -531,14 +531,14 @@ index prediction
531
  529 0
532
  530 0
533
  531 0
534
- 532 0
535
  533 0
536
- 534 1
537
  535 0
538
  536 0
539
  537 0
540
  538 0
541
- 539 1
542
  540 0
543
  541 0
544
  542 0
@@ -546,7 +546,7 @@ index prediction
546
  544 0
547
  545 0
548
  546 0
549
- 547 0
550
  548 0
551
  549 0
552
  550 0
@@ -564,9 +564,9 @@ index prediction
564
  562 0
565
  563 0
566
  564 0
567
- 565 1
568
  566 0
569
- 567 1
570
  568 0
571
  569 0
572
  570 0
@@ -574,18 +574,18 @@ index prediction
574
  572 0
575
  573 0
576
  574 0
577
- 575 0
578
  576 0
579
  577 0
580
  578 0
581
  579 0
582
  580 0
583
  581 0
584
- 582 0
585
  583 0
586
  584 0
587
  585 0
588
- 586 0
589
  587 0
590
  588 0
591
  589 0
@@ -596,7 +596,7 @@ index prediction
596
  594 0
597
  595 0
598
  596 0
599
- 597 1
600
  598 0
601
  599 0
602
  600 0
@@ -604,13 +604,13 @@ index prediction
604
  602 0
605
  603 0
606
  604 0
607
- 605 1
608
  606 0
609
  607 0
610
  608 0
611
  609 0
612
  610 0
613
- 611 0
614
  612 0
615
  613 0
616
  614 0
@@ -656,12 +656,12 @@ index prediction
656
  654 0
657
  655 0
658
  656 0
659
- 657 0
660
- 658 0
661
  659 0
662
  660 0
663
  661 0
664
- 662 0
665
  663 0
666
  664 0
667
  665 0
@@ -672,7 +672,7 @@ index prediction
672
  670 0
673
  671 0
674
  672 0
675
- 673 1
676
  674 0
677
  675 0
678
  676 0
@@ -681,18 +681,18 @@ index prediction
681
  679 0
682
  680 0
683
  681 0
684
- 682 0
685
  683 0
686
  684 0
687
  685 0
688
  686 0
689
  687 0
690
  688 0
691
- 689 1
692
  690 0
693
  691 0
694
  692 0
695
- 693 1
696
  694 0
697
  695 0
698
  696 0
@@ -700,7 +700,7 @@ index prediction
700
  698 0
701
  699 0
702
  700 0
703
- 701 1
704
  702 0
705
  703 0
706
  704 0
@@ -727,9 +727,9 @@ index prediction
727
  725 0
728
  726 0
729
  727 0
730
- 728 1
731
- 729 0
732
- 730 0
733
  731 0
734
  732 0
735
  733 0
@@ -746,7 +746,7 @@ index prediction
746
  744 0
747
  745 0
748
  746 0
749
- 747 1
750
  748 0
751
  749 0
752
  750 0
@@ -766,7 +766,7 @@ index prediction
766
  764 0
767
  765 0
768
  766 0
769
- 767 0
770
  768 0
771
  769 0
772
  770 0
@@ -790,33 +790,33 @@ index prediction
790
  788 0
791
  789 0
792
  790 0
793
- 791 0
794
  792 0
795
- 793 0
796
  794 0
797
  795 0
798
  796 0
799
- 797 0
800
  798 1
801
- 799 1
802
  800 0
803
  801 0
804
- 802 1
805
  803 0
806
  804 0
807
  805 0
808
  806 0
809
  807 0
810
  808 0
811
- 809 1
812
- 810 1
813
  811 0
814
  812 0
815
  813 0
816
  814 0
817
  815 0
818
  816 0
819
- 817 0
820
  818 0
821
  819 0
822
  820 0
@@ -825,19 +825,19 @@ index prediction
825
  823 0
826
  824 0
827
  825 0
828
- 826 1
829
  827 0
830
  828 0
831
- 829 0
832
  830 0
833
- 831 1
834
- 832 1
835
  833 0
836
  834 0
837
- 835 0
838
  836 0
839
  837 0
840
- 838 1
841
  839 0
842
  840 0
843
  841 0
@@ -893,7 +893,7 @@ index prediction
893
  891 0
894
  892 0
895
  893 0
896
- 894 0
897
  895 0
898
  896 0
899
  897 0
@@ -905,13 +905,13 @@ index prediction
905
  903 0
906
  904 0
907
  905 0
908
- 906 0
909
  907 0
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
- 912 1
915
  913 0
916
  914 0
917
  915 0
@@ -920,15 +920,15 @@ index prediction
920
  918 0
921
  919 0
922
  920 0
923
- 921 1
924
- 922 0
925
  923 0
926
- 924 1
927
  925 0
928
- 926 1
929
- 927 1
930
  928 0
931
- 929 1
932
  930 0
933
  931 0
934
  932 0
@@ -938,13 +938,13 @@ index prediction
938
  936 0
939
  937 0
940
  938 0
941
- 939 0
942
  940 0
943
- 941 1
944
  942 0
945
  943 0
946
  944 0
947
- 945 1
948
  946 0
949
  947 0
950
  948 0
@@ -956,11 +956,11 @@ index prediction
956
  954 0
957
  955 0
958
  956 0
959
- 957 0
960
  958 0
961
  959 0
962
  960 0
963
- 961 1
964
  962 0
965
  963 0
966
  964 0
@@ -973,12 +973,12 @@ index prediction
973
  971 0
974
  972 0
975
  973 0
976
- 974 1
977
  975 0
978
  976 0
979
- 977 1
980
  978 0
981
- 979 0
982
  980 0
983
  981 0
984
  982 0
@@ -988,25 +988,18 @@ index prediction
988
  986 0
989
  987 0
990
  988 0
991
- 989 0
992
  990 0
993
  991 0
994
- 992 0
995
  993 0
996
  994 0
997
  995 0
998
  996 0
999
  997 0
1000
  998 0
1001
- 999 1
1002
  1000 0
1003
- 1001 0
1004
  1002 0
1005
- 1003 0
1006
- 1004 0
1007
- 1005 0
1008
- 1006 0
1009
- 1007 0
1010
- 1008 0
1011
- 1009 1
1012
- 1010 0
 
2
  0 1
3
  1 1
4
  2 1
5
+ 3 1
6
  4 1
7
+ 5 0
8
  6 0
9
  7 1
10
+ 8 1
11
  9 1
12
  10 1
13
  11 1
14
+ 12 0
15
+ 13 1
16
+ 14 1
17
  15 1
18
  16 1
19
+ 17 0
20
  18 1
21
  19 1
22
  20 1
23
+ 21 0
24
  22 1
25
  23 1
26
  24 1
27
+ 25 1
28
  26 1
29
  27 1
30
  28 1
31
+ 29 0
32
+ 30 0
33
+ 31 1
34
  32 1
35
  33 1
36
  34 1
37
  35 1
38
  36 1
39
  37 1
40
+ 38 0
41
+ 39 0
42
  40 1
43
  41 1
44
+ 42 0
45
  43 1
46
  44 1
47
  45 1
48
+ 46 1
49
  47 0
50
+ 48 0
51
  49 1
52
  50 1
53
+ 51 0
54
+ 52 1
55
+ 53 1
56
  54 1
57
  55 1
58
+ 56 0
59
  57 1
60
  58 1
61
  59 1
 
64
  62 1
65
  63 1
66
  64 1
67
+ 65 1
68
  66 1
69
  67 1
70
+ 68 0
71
  69 1
72
  70 1
73
  71 1
74
  72 1
75
  73 1
76
  74 1
77
+ 75 1
78
+ 76 1
79
  77 1
80
+ 78 0
81
+ 79 1
82
  80 1
83
+ 81 0
84
  82 1
85
+ 83 1
86
  84 1
87
  85 1
88
  86 1
89
  87 1
90
+ 88 1
91
  89 1
92
+ 90 1
93
  91 1
94
+ 92 0
95
  93 1
96
  94 1
97
+ 95 1
98
  96 1
99
  97 1
100
  98 1
101
+ 99 0
102
  100 1
103
  101 1
104
  102 1
 
108
  106 1
109
  107 1
110
  108 1
111
+ 109 1
112
  110 1
113
  111 1
114
  112 1
 
127
  125 1
128
  126 1
129
  127 1
130
+ 128 0
131
+ 129 1
132
  130 1
133
  131 1
134
+ 132 0
135
  133 1
136
+ 134 0
137
  135 1
138
  136 1
139
  137 1
140
  138 1
141
  139 1
142
+ 140 1
143
  141 1
144
  142 1
145
+ 143 0
146
  144 1
147
+ 145 1
148
  146 1
149
  147 1
150
+ 148 1
151
  149 1
152
  150 1
153
  151 1
154
  152 1
155
+ 153 0
156
+ 154 1
157
  155 1
158
+ 156 1
159
  157 1
160
+ 158 1
161
+ 159 1
162
  160 1
163
+ 161 1
164
  162 1
165
  163 1
166
  164 1
167
  165 1
168
+ 166 1
169
+ 167 1
170
  168 1
171
  169 0
172
+ 170 0
173
  171 1
174
  172 1
175
  173 1
 
179
  177 1
180
  178 1
181
  179 1
182
+ 180 1
183
  181 1
184
  182 1
185
  183 1
186
+ 184 0
187
+ 185 0
188
  186 1
189
  187 0
190
  188 1
191
+ 189 1
192
+ 190 1
193
  191 1
194
  192 1
195
  193 1
196
  194 1
197
  195 1
198
  196 1
199
+ 197 0
200
  198 1
201
  199 1
202
  200 1
203
  201 1
204
  202 1
205
+ 203 0
206
  204 1
207
  205 1
208
+ 206 1
209
+ 207 0
210
+ 208 0
211
  209 1
212
  210 1
213
  211 1
214
+ 212 1
215
+ 213 0
216
+ 214 1
217
+ 215 1
218
+ 216 0
219
+ 217 1
220
  218 1
221
  219 1
222
  220 1
223
+ 221 0
224
  222 1
225
  223 1
226
+ 224 0
227
+ 225 0
228
  226 1
229
  227 0
230
  228 1
231
  229 1
232
+ 230 1
233
  231 1
234
  232 1
235
+ 233 1
236
  234 1
237
  235 1
238
  236 1
239
  237 1
240
+ 238 0
241
  239 1
242
  240 1
243
  241 0
 
245
  243 1
246
  244 1
247
  245 1
248
+ 246 0
249
  247 1
250
  248 1
251
  249 1
252
  250 1
253
  251 1
254
+ 252 1
255
  253 1
256
+ 254 0
257
  255 1
258
  256 1
259
  257 1
260
  258 1
261
  259 1
262
+ 260 0
263
  261 1
264
+ 262 0
265
+ 263 0
266
  264 1
267
  265 1
268
  266 1
269
  267 1
270
+ 268 0
271
+ 269 0
272
  270 1
273
+ 271 1
274
  272 1
275
  273 1
276
  274 1
277
  275 1
278
  276 1
279
+ 277 0
280
  278 1
281
+ 279 0
282
  280 1
283
  281 0
284
+ 282 1
285
  283 1
286
  284 1
287
  285 1
288
  286 1
289
+ 287 1
290
  288 1
291
+ 289 1
292
  290 1
293
+ 291 0
294
  292 1
295
  293 1
296
+ 294 0
297
+ 295 0
298
+ 296 0
299
  297 0
300
  298 0
301
  299 0
302
  300 0
303
  301 0
304
  302 0
305
+ 303 0
306
  304 0
307
  305 0
308
  306 0
 
312
  310 0
313
  311 0
314
  312 1
315
+ 313 0
316
  314 0
317
  315 0
318
  316 0
 
325
  323 0
326
  324 0
327
  325 0
328
+ 326 0
329
  327 0
330
  328 0
331
  329 0
332
  330 0
333
  331 0
334
+ 332 0
335
  333 0
336
  334 0
337
  335 0
338
  336 0
339
  337 0
340
  338 0
341
+ 339 0
342
  340 0
343
  341 0
344
  342 0
345
  343 0
346
  344 0
347
  345 0
348
+ 346 0
349
  347 0
350
  348 0
351
  349 0
 
359
  357 0
360
  358 0
361
  359 0
362
+ 360 1
363
  361 0
364
  362 0
365
  363 0
366
  364 0
367
+ 365 1
368
  366 0
369
  367 0
370
  368 0
371
  369 0
372
  370 0
373
  371 0
374
+ 372 0
375
  373 0
376
  374 0
377
  375 0
 
383
  381 0
384
  382 0
385
  383 0
386
+ 384 0
387
+ 385 1
388
+ 386 0
389
  387 0
390
  388 0
391
  389 0
 
395
  393 0
396
  394 0
397
  395 0
398
+ 396 1
399
+ 397 1
400
  398 0
401
  399 0
402
  400 0
 
423
  421 0
424
  422 0
425
  423 0
426
+ 424 1
427
  425 0
428
  426 0
429
  427 0
430
  428 0
431
  429 0
432
+ 430 1
433
  431 0
434
  432 0
435
  433 0
436
  434 0
437
+ 435 0
438
  436 0
439
  437 0
440
  438 0
441
  439 0
442
+ 440 0
443
  441 0
444
  442 0
445
  443 0
 
451
  449 0
452
  450 0
453
  451 0
454
+ 452 1
455
  453 0
456
  454 0
457
  455 0
 
467
  465 0
468
  466 0
469
  467 0
470
+ 468 1
471
  469 0
472
  470 0
473
  471 0
474
+ 472 0
475
  473 0
476
+ 474 0
477
  475 0
478
+ 476 0
479
  477 0
480
  478 0
481
  479 0
482
+ 480 0
483
  481 0
484
  482 0
485
  483 0
 
489
  487 0
490
  488 0
491
  489 0
492
+ 490 0
493
  491 0
494
  492 0
495
  493 0
 
498
  496 0
499
  497 0
500
  498 0
501
+ 499 1
502
+ 500 1
503
  501 0
504
  502 0
505
+ 503 1
506
  504 0
507
  505 0
508
  506 0
 
522
  520 0
523
  521 0
524
  522 0
525
+ 523 0
526
  524 0
527
  525 0
528
  526 0
 
531
  529 0
532
  530 0
533
  531 0
534
+ 532 1
535
  533 0
536
+ 534 0
537
  535 0
538
  536 0
539
  537 0
540
  538 0
541
+ 539 0
542
  540 0
543
  541 0
544
  542 0
 
546
  544 0
547
  545 0
548
  546 0
549
+ 547 1
550
  548 0
551
  549 0
552
  550 0
 
564
  562 0
565
  563 0
566
  564 0
567
+ 565 0
568
  566 0
569
+ 567 0
570
  568 0
571
  569 0
572
  570 0
 
574
  572 0
575
  573 0
576
  574 0
577
+ 575 1
578
  576 0
579
  577 0
580
  578 0
581
  579 0
582
  580 0
583
  581 0
584
+ 582 1
585
  583 0
586
  584 0
587
  585 0
588
+ 586 1
589
  587 0
590
  588 0
591
  589 0
 
596
  594 0
597
  595 0
598
  596 0
599
+ 597 0
600
  598 0
601
  599 0
602
  600 0
 
604
  602 0
605
  603 0
606
  604 0
607
+ 605 0
608
  606 0
609
  607 0
610
  608 0
611
  609 0
612
  610 0
613
+ 611 1
614
  612 0
615
  613 0
616
  614 0
 
656
  654 0
657
  655 0
658
  656 0
659
+ 657 1
660
+ 658 1
661
  659 0
662
  660 0
663
  661 0
664
+ 662 1
665
  663 0
666
  664 0
667
  665 0
 
672
  670 0
673
  671 0
674
  672 0
675
+ 673 0
676
  674 0
677
  675 0
678
  676 0
 
681
  679 0
682
  680 0
683
  681 0
684
+ 682 1
685
  683 0
686
  684 0
687
  685 0
688
  686 0
689
  687 0
690
  688 0
691
+ 689 0
692
  690 0
693
  691 0
694
  692 0
695
+ 693 0
696
  694 0
697
  695 0
698
  696 0
 
700
  698 0
701
  699 0
702
  700 0
703
+ 701 0
704
  702 0
705
  703 0
706
  704 0
 
727
  725 0
728
  726 0
729
  727 0
730
+ 728 0
731
+ 729 1
732
+ 730 1
733
  731 0
734
  732 0
735
  733 0
 
746
  744 0
747
  745 0
748
  746 0
749
+ 747 0
750
  748 0
751
  749 0
752
  750 0
 
766
  764 0
767
  765 0
768
  766 0
769
+ 767 1
770
  768 0
771
  769 0
772
  770 0
 
790
  788 0
791
  789 0
792
  790 0
793
+ 791 1
794
  792 0
795
+ 793 1
796
  794 0
797
  795 0
798
  796 0
799
+ 797 1
800
  798 1
801
+ 799 0
802
  800 0
803
  801 0
804
+ 802 0
805
  803 0
806
  804 0
807
  805 0
808
  806 0
809
  807 0
810
  808 0
811
+ 809 0
812
+ 810 0
813
  811 0
814
  812 0
815
  813 0
816
  814 0
817
  815 0
818
  816 0
819
+ 817 1
820
  818 0
821
  819 0
822
  820 0
 
825
  823 0
826
  824 0
827
  825 0
828
+ 826 0
829
  827 0
830
  828 0
831
+ 829 1
832
  830 0
833
+ 831 0
834
+ 832 0
835
  833 0
836
  834 0
837
+ 835 1
838
  836 0
839
  837 0
840
+ 838 0
841
  839 0
842
  840 0
843
  841 0
 
893
  891 0
894
  892 0
895
  893 0
896
+ 894 1
897
  895 0
898
  896 0
899
  897 0
 
905
  903 0
906
  904 0
907
  905 0
908
+ 906 1
909
  907 0
910
  908 0
911
  909 0
912
  910 0
913
  911 0
914
+ 912 0
915
  913 0
916
  914 0
917
  915 0
 
920
  918 0
921
  919 0
922
  920 0
923
+ 921 0
924
+ 922 1
925
  923 0
926
+ 924 0
927
  925 0
928
+ 926 0
929
+ 927 0
930
  928 0
931
+ 929 0
932
  930 0
933
  931 0
934
  932 0
 
938
  936 0
939
  937 0
940
  938 0
941
+ 939 1
942
  940 0
943
+ 941 0
944
  942 0
945
  943 0
946
  944 0
947
+ 945 0
948
  946 0
949
  947 0
950
  948 0
 
956
  954 0
957
  955 0
958
  956 0
959
+ 957 1
960
  958 0
961
  959 0
962
  960 0
963
+ 961 0
964
  962 0
965
  963 0
966
  964 0
 
973
  971 0
974
  972 0
975
  973 0
976
+ 974 0
977
  975 0
978
  976 0
979
+ 977 0
980
  978 0
981
+ 979 1
982
  980 0
983
  981 0
984
  982 0
 
988
  986 0
989
  987 0
990
  988 0
991
+ 989 1
992
  990 0
993
  991 0
994
+ 992 1
995
  993 0
996
  994 0
997
  995 0
998
  996 0
999
  997 0
1000
  998 0
1001
+ 999 0
1002
  1000 0
1003
+ 1001 1
1004
  1002 0
1005
+ 1003 1
 
 
 
 
 
 
 
runs/May27_00-11-16_indolem-petl-vm/events.out.tfevents.1716770635.indolem-petl-vm.3191687.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c7640c7371cf49078cfb32411c7e83a09df491ca05ec168b44f41968494294
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.30967485005738304,
4
- "train_runtime": 1934.4038,
5
- "train_samples": 3638,
6
- "train_samples_per_second": 37.614,
7
- "train_steps_per_second": 1.261
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.32445813476062213,
4
+ "train_runtime": 1939.7236,
5
+ "train_samples": 3645,
6
+ "train_samples_per_second": 37.583,
7
+ "train_steps_per_second": 1.258
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 5.93964147567749,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.5623,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7268170426065163,
21
- "eval_f1": 0.6301313943104988,
22
- "eval_loss": 0.5216777324676514,
23
- "eval_precision": 0.6603762281332375,
24
- "eval_recall": 0.6217039461720313,
25
- "eval_runtime": 5.0877,
26
- "eval_samples_per_second": 78.424,
27
- "eval_steps_per_second": 9.828,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 3.8485560417175293,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.5061,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.7568922305764411,
40
- "eval_f1": 0.7089058868656783,
41
- "eval_loss": 0.48978015780448914,
42
- "eval_precision": 0.7074372759856631,
43
- "eval_recall": 0.7104928168757956,
44
- "eval_runtime": 5.0629,
45
- "eval_samples_per_second": 78.809,
46
- "eval_steps_per_second": 9.876,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 5.005080699920654,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.4443,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8120300751879699,
59
- "eval_f1": 0.7678883071553229,
60
- "eval_loss": 0.40850991010665894,
61
- "eval_precision": 0.7750572737686139,
62
- "eval_recall": 0.7620021822149482,
63
- "eval_runtime": 5.144,
64
- "eval_samples_per_second": 77.566,
65
- "eval_steps_per_second": 9.72,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 2.8029582500457764,
71
  "learning_rate": 4e-05,
72
- "loss": 0.3805,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
  "eval_accuracy": 0.8245614035087719,
78
- "eval_f1": 0.7751786979200206,
79
- "eval_loss": 0.3672122061252594,
80
- "eval_precision": 0.7980263157894737,
81
- "eval_recall": 0.7608656119294417,
82
- "eval_runtime": 5.0838,
83
- "eval_samples_per_second": 78.485,
84
- "eval_steps_per_second": 9.835,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 3.751908540725708,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.3488,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8521303258145363,
97
- "eval_f1": 0.8229427559286084,
98
- "eval_loss": 0.35348454117774963,
99
- "eval_precision": 0.8206541218637993,
100
- "eval_recall": 0.8253773413347881,
101
- "eval_runtime": 5.0536,
102
- "eval_samples_per_second": 78.953,
103
- "eval_steps_per_second": 9.894,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 4.316796779632568,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.3156,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.8571428571428571,
116
- "eval_f1": 0.8254579780661698,
117
- "eval_loss": 0.33368828892707825,
118
- "eval_precision": 0.8299216027874565,
119
- "eval_recall": 0.8214220767412257,
120
- "eval_runtime": 5.0533,
121
- "eval_samples_per_second": 78.959,
122
- "eval_steps_per_second": 9.895,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 18.979951858520508,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.3055,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.8621553884711779,
135
- "eval_f1": 0.8297847585805701,
136
- "eval_loss": 0.3217175304889679,
137
- "eval_precision": 0.8385357006491028,
138
- "eval_recall": 0.8224677214038916,
139
- "eval_runtime": 5.0744,
140
- "eval_samples_per_second": 78.63,
141
- "eval_steps_per_second": 9.853,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 5.143575191497803,
147
  "learning_rate": 3e-05,
148
- "loss": 0.2995,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.8596491228070176,
154
- "eval_f1": 0.8271551457392166,
155
- "eval_loss": 0.31452828645706177,
156
- "eval_precision": 0.8347358430876305,
157
- "eval_recall": 0.8206946717585015,
158
- "eval_runtime": 5.0883,
159
- "eval_samples_per_second": 78.414,
160
- "eval_steps_per_second": 9.826,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 6.147844314575195,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.2825,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8671679197994987,
173
- "eval_f1": 0.8393634395533442,
174
- "eval_loss": 0.3090471923351288,
175
- "eval_precision": 0.8402278542707444,
176
- "eval_recall": 0.8385160938352427,
177
- "eval_runtime": 5.0782,
178
- "eval_samples_per_second": 78.572,
179
- "eval_steps_per_second": 9.846,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 5.50480842590332,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.272,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.8721804511278195,
192
- "eval_f1": 0.8461962888779714,
193
- "eval_loss": 0.29923897981643677,
194
- "eval_precision": 0.8453465227094517,
195
- "eval_recall": 0.8470631023822512,
196
- "eval_runtime": 5.0745,
197
- "eval_samples_per_second": 78.629,
198
- "eval_steps_per_second": 9.853,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 6.085771083831787,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.2626,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.87468671679198,
211
- "eval_f1": 0.8439846096096095,
212
- "eval_loss": 0.30075788497924805,
213
- "eval_precision": 0.8568027210884354,
214
- "eval_recall": 0.8338334242589562,
215
- "eval_runtime": 5.0607,
216
- "eval_samples_per_second": 78.843,
217
- "eval_steps_per_second": 9.88,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 10.957924842834473,
223
  "learning_rate": 2e-05,
224
- "loss": 0.2641,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
- "eval_accuracy": 0.87468671679198,
230
- "eval_f1": 0.8488361520276414,
231
- "eval_loss": 0.2949255406856537,
232
- "eval_precision": 0.8488361520276414,
233
- "eval_recall": 0.8488361520276414,
234
- "eval_runtime": 5.0742,
235
- "eval_samples_per_second": 78.634,
236
- "eval_steps_per_second": 9.854,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 3.114020347595215,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.257,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.8771929824561403,
249
- "eval_f1": 0.8475258334958082,
250
- "eval_loss": 0.28850188851356506,
251
- "eval_precision": 0.8591828192414193,
252
- "eval_recall": 0.8381069285324605,
253
- "eval_runtime": 5.0549,
254
- "eval_samples_per_second": 78.934,
255
- "eval_steps_per_second": 9.891,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 4.347434043884277,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.2473,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.8822055137844611,
268
- "eval_f1": 0.8568221901555235,
269
- "eval_loss": 0.2826312482357025,
270
- "eval_precision": 0.8596491228070176,
271
- "eval_recall": 0.8541553009638116,
272
- "eval_runtime": 5.0506,
273
- "eval_samples_per_second": 79.001,
274
- "eval_steps_per_second": 9.9,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 1.6674721240997314,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.2456,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.8847117794486216,
287
- "eval_f1": 0.8609292598654301,
288
- "eval_loss": 0.2825632393360138,
289
- "eval_precision": 0.8609292598654301,
290
- "eval_recall": 0.8609292598654301,
291
- "eval_runtime": 5.071,
292
- "eval_samples_per_second": 78.683,
293
- "eval_steps_per_second": 9.86,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 5.605799674987793,
299
  "learning_rate": 1e-05,
300
- "loss": 0.2477,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.8847117794486216,
306
- "eval_f1": 0.8602260265626904,
307
- "eval_loss": 0.2795054018497467,
308
- "eval_precision": 0.8620943049601959,
309
- "eval_recall": 0.8584288052373159,
310
- "eval_runtime": 5.063,
311
- "eval_samples_per_second": 78.807,
312
- "eval_steps_per_second": 9.876,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 1.4996304512023926,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.2426,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.8796992481203008,
325
- "eval_f1": 0.8526315789473684,
326
- "eval_loss": 0.2793760895729065,
327
- "eval_precision": 0.8585304054054055,
328
- "eval_recall": 0.8473813420621932,
329
- "eval_runtime": 5.0574,
330
- "eval_samples_per_second": 78.894,
331
- "eval_steps_per_second": 9.886,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 3.353811264038086,
337
  "learning_rate": 5e-06,
338
- "loss": 0.2359,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.8872180451127819,
344
- "eval_f1": 0.8629148629148629,
345
- "eval_loss": 0.27959930896759033,
346
- "eval_precision": 0.8657894736842104,
347
- "eval_recall": 0.860201854882706,
348
- "eval_runtime": 5.0719,
349
- "eval_samples_per_second": 78.668,
350
- "eval_steps_per_second": 9.858,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 6.069816589355469,
356
  "learning_rate": 2.5e-06,
357
- "loss": 0.2417,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.8847117794486216,
363
- "eval_f1": 0.8587719298245614,
364
- "eval_loss": 0.27870801091194153,
365
- "eval_precision": 0.864771021021021,
366
- "eval_recall": 0.8534278959810875,
367
- "eval_runtime": 5.0523,
368
- "eval_samples_per_second": 78.973,
369
- "eval_steps_per_second": 9.896,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 4.868233680725098,
375
  "learning_rate": 0.0,
376
- "loss": 0.2319,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.8847117794486216,
382
- "eval_f1": 0.8587719298245614,
383
- "eval_loss": 0.2786270081996918,
384
- "eval_precision": 0.864771021021021,
385
- "eval_recall": 0.8534278959810875,
386
- "eval_runtime": 5.0608,
387
- "eval_samples_per_second": 78.841,
388
- "eval_steps_per_second": 9.88,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
- "total_flos": 7609911792720000.0,
395
- "train_loss": 0.30967485005738304,
396
- "train_runtime": 1934.4038,
397
- "train_samples_per_second": 37.614,
398
- "train_steps_per_second": 1.261
399
  }
400
  ],
401
  "logging_steps": 500,
@@ -403,7 +403,7 @@
403
  "num_input_tokens_seen": 0,
404
  "num_train_epochs": 20,
405
  "save_steps": 500,
406
- "total_flos": 7609911792720000.0,
407
  "train_batch_size": 30,
408
  "trial_name": null,
409
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 3.6051695346832275,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.556,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7167919799498746,
21
+ "eval_f1": 0.6641239002659476,
22
+ "eval_loss": 0.5324748158454895,
23
+ "eval_precision": 0.6617008797653958,
24
+ "eval_recall": 0.6671212947808693,
25
+ "eval_runtime": 5.0818,
26
+ "eval_samples_per_second": 78.516,
27
+ "eval_steps_per_second": 9.839,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 3.362328290939331,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.5103,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.7719298245614035,
40
+ "eval_f1": 0.6523624874335775,
41
+ "eval_loss": 0.4822019934654236,
42
+ "eval_precision": 0.7714565527065527,
43
+ "eval_recall": 0.6386161120203673,
44
+ "eval_runtime": 5.0545,
45
+ "eval_samples_per_second": 78.94,
46
+ "eval_steps_per_second": 9.892,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 4.656192779541016,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.4637,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.8045112781954887,
59
+ "eval_f1": 0.7479591836734694,
60
+ "eval_loss": 0.42453086376190186,
61
+ "eval_precision": 0.771505376344086,
62
+ "eval_recall": 0.7341789416257501,
63
+ "eval_runtime": 5.075,
64
+ "eval_samples_per_second": 78.62,
65
+ "eval_steps_per_second": 9.852,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 1.4060696363449097,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.4173,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
  "eval_accuracy": 0.8245614035087719,
78
+ "eval_f1": 0.7873004752040941,
79
+ "eval_loss": 0.38980478048324585,
80
+ "eval_precision": 0.7887936313533375,
81
+ "eval_recall": 0.7858701582105838,
82
+ "eval_runtime": 5.064,
83
+ "eval_samples_per_second": 78.791,
84
+ "eval_steps_per_second": 9.874,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 3.650151491165161,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.3674,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8370927318295739,
97
+ "eval_f1": 0.7999105055663995,
98
+ "eval_loss": 0.3571353554725647,
99
+ "eval_precision": 0.8058980811575966,
100
+ "eval_recall": 0.794735406437534,
101
+ "eval_runtime": 5.0507,
102
+ "eval_samples_per_second": 78.999,
103
+ "eval_steps_per_second": 9.9,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 2.5915608406066895,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.3484,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.8370927318295739,
116
+ "eval_f1": 0.8029928975654221,
117
+ "eval_loss": 0.3431943356990814,
118
+ "eval_precision": 0.8037650785914463,
119
+ "eval_recall": 0.8022367703218767,
120
+ "eval_runtime": 5.135,
121
+ "eval_samples_per_second": 77.703,
122
+ "eval_steps_per_second": 9.737,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 0.9446077346801758,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.3247,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8521303258145363,
135
+ "eval_f1": 0.816408656658218,
136
+ "eval_loss": 0.3298611342906952,
137
+ "eval_precision": 0.8270654903728508,
138
+ "eval_recall": 0.8078741589379888,
139
+ "eval_runtime": 5.0753,
140
+ "eval_samples_per_second": 78.617,
141
+ "eval_steps_per_second": 9.852,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 9.143532752990723,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.3102,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8621553884711779,
154
+ "eval_f1": 0.8228198648441347,
155
+ "eval_loss": 0.3259894549846649,
156
+ "eval_precision": 0.8509591907917684,
157
+ "eval_recall": 0.8049645390070922,
158
+ "eval_runtime": 5.087,
159
+ "eval_samples_per_second": 78.435,
160
+ "eval_steps_per_second": 9.829,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 4.818455696105957,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.2991,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.8571428571428571,
173
+ "eval_f1": 0.821647309770462,
174
+ "eval_loss": 0.31378358602523804,
175
+ "eval_precision": 0.8349087353324641,
176
+ "eval_recall": 0.8114202582287688,
177
+ "eval_runtime": 5.0565,
178
+ "eval_samples_per_second": 78.909,
179
+ "eval_steps_per_second": 9.888,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 3.8728206157684326,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.29,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.8546365914786967,
192
+ "eval_f1": 0.8180088078011953,
193
+ "eval_loss": 0.31225934624671936,
194
+ "eval_precision": 0.8323930726843348,
195
+ "eval_recall": 0.8071467539552646,
196
+ "eval_runtime": 5.0562,
197
+ "eval_samples_per_second": 78.913,
198
+ "eval_steps_per_second": 9.889,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 2.6045541763305664,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.2778,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.8671679197994987,
211
+ "eval_f1": 0.8377065410088949,
212
+ "eval_loss": 0.30650559067726135,
213
+ "eval_precision": 0.8423344947735192,
214
+ "eval_recall": 0.8335151845790143,
215
+ "eval_runtime": 5.0839,
216
+ "eval_samples_per_second": 78.483,
217
+ "eval_steps_per_second": 9.835,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 1.5327140092849731,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.2702,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
+ "eval_accuracy": 0.8571428571428571,
230
+ "eval_f1": 0.821647309770462,
231
+ "eval_loss": 0.3005804717540741,
232
+ "eval_precision": 0.8349087353324641,
233
+ "eval_recall": 0.8114202582287688,
234
+ "eval_runtime": 5.0646,
235
+ "eval_samples_per_second": 78.782,
236
+ "eval_steps_per_second": 9.872,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 5.446022987365723,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.2664,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.8596491228070176,
249
+ "eval_f1": 0.8298403801632752,
250
+ "eval_loss": 0.29961732029914856,
251
+ "eval_precision": 0.8315523576240049,
252
+ "eval_recall": 0.8281960356428442,
253
+ "eval_runtime": 5.0899,
254
+ "eval_samples_per_second": 78.39,
255
+ "eval_steps_per_second": 9.823,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 2.8527348041534424,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.264,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.8721804511278195,
268
+ "eval_f1": 0.8476882658063937,
269
+ "eval_loss": 0.2987360656261444,
270
+ "eval_precision": 0.8437296561519796,
271
+ "eval_recall": 0.8520640116384797,
272
+ "eval_runtime": 5.0668,
273
+ "eval_samples_per_second": 78.748,
274
+ "eval_steps_per_second": 9.868,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 2.7352912425994873,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.254,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8771929824561403,
287
+ "eval_f1": 0.8522278069611882,
288
+ "eval_loss": 0.2951277792453766,
289
+ "eval_precision": 0.8513631702756499,
290
+ "eval_recall": 0.8531096563011457,
291
+ "eval_runtime": 5.222,
292
+ "eval_samples_per_second": 76.408,
293
+ "eval_steps_per_second": 9.575,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 2.442108154296875,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.2571,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8671679197994987,
306
+ "eval_f1": 0.8350789627607721,
307
+ "eval_loss": 0.2944652736186981,
308
+ "eval_precision": 0.8463358876939919,
309
+ "eval_recall": 0.8260138206946717,
310
+ "eval_runtime": 5.0611,
311
+ "eval_samples_per_second": 78.837,
312
+ "eval_steps_per_second": 9.879,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 3.852628231048584,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.2511,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8721804511278195,
325
+ "eval_f1": 0.8454251965513313,
326
+ "eval_loss": 0.2917979061603546,
327
+ "eval_precision": 0.8463049835506276,
328
+ "eval_recall": 0.8445626477541371,
329
+ "eval_runtime": 5.0682,
330
+ "eval_samples_per_second": 78.727,
331
+ "eval_steps_per_second": 9.866,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 5.464624881744385,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.2574,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.87468671679198,
344
+ "eval_f1": 0.8472902633190447,
345
+ "eval_loss": 0.29094478487968445,
346
+ "eval_precision": 0.8510272912927781,
347
+ "eval_recall": 0.8438352427714131,
348
+ "eval_runtime": 5.1003,
349
+ "eval_samples_per_second": 78.231,
350
+ "eval_steps_per_second": 9.803,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 2.1257989406585693,
356
  "learning_rate": 2.5e-06,
357
+ "loss": 0.2508,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8771929824561403,
363
+ "eval_f1": 0.850729517396184,
364
+ "eval_loss": 0.29074448347091675,
365
+ "eval_precision": 0.8535087719298247,
366
+ "eval_recall": 0.8481087470449173,
367
+ "eval_runtime": 5.1136,
368
+ "eval_samples_per_second": 78.027,
369
+ "eval_steps_per_second": 9.778,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 3.8716917037963867,
375
  "learning_rate": 0.0,
376
+ "loss": 0.2536,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8771929824561403,
382
+ "eval_f1": 0.850729517396184,
383
+ "eval_loss": 0.29076284170150757,
384
+ "eval_precision": 0.8535087719298247,
385
+ "eval_recall": 0.8481087470449173,
386
+ "eval_runtime": 5.0627,
387
+ "eval_samples_per_second": 78.811,
388
+ "eval_steps_per_second": 9.876,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
+ "total_flos": 7624554283800000.0,
395
+ "train_loss": 0.32445813476062213,
396
+ "train_runtime": 1939.7236,
397
+ "train_samples_per_second": 37.583,
398
+ "train_steps_per_second": 1.258
399
  }
400
  ],
401
  "logging_steps": 500,
 
403
  "num_input_tokens_seen": 0,
404
  "num_train_epochs": 20,
405
  "save_steps": 500,
406
+ "total_flos": 7624554283800000.0,
407
  "train_batch_size": 30,
408
  "trial_name": null,
409
  "trial_params": null