{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 6378, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.77742946708464e-07, "loss": 1.1515, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.1316614420062697e-06, "loss": 1.0942, "step": 20 }, { "epoch": 0.01, "learning_rate": 3.3855799373040753e-06, "loss": 0.825, "step": 30 }, { "epoch": 0.02, "learning_rate": 4.639498432601881e-06, "loss": 0.5304, "step": 40 }, { "epoch": 0.02, "learning_rate": 5.8934169278996865e-06, "loss": 0.3473, "step": 50 }, { "epoch": 0.03, "learning_rate": 7.147335423197492e-06, "loss": 0.2823, "step": 60 }, { "epoch": 0.03, "learning_rate": 8.4012539184953e-06, "loss": 0.2224, "step": 70 }, { "epoch": 0.04, "learning_rate": 9.655172413793105e-06, "loss": 0.1987, "step": 80 }, { "epoch": 0.04, "learning_rate": 1.0909090909090909e-05, "loss": 0.1516, "step": 90 }, { "epoch": 0.05, "learning_rate": 1.2163009404388715e-05, "loss": 0.1241, "step": 100 }, { "epoch": 0.05, "learning_rate": 1.341692789968652e-05, "loss": 0.1199, "step": 110 }, { "epoch": 0.06, "learning_rate": 1.4670846394984329e-05, "loss": 0.1113, "step": 120 }, { "epoch": 0.06, "learning_rate": 1.5924764890282133e-05, "loss": 0.105, "step": 130 }, { "epoch": 0.07, "learning_rate": 1.717868338557994e-05, "loss": 0.0879, "step": 140 }, { "epoch": 0.07, "learning_rate": 1.8432601880877744e-05, "loss": 0.105, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.968652037617555e-05, "loss": 0.1043, "step": 160 }, { "epoch": 0.08, "learning_rate": 2.0940438871473355e-05, "loss": 0.1107, "step": 170 }, { "epoch": 0.08, "learning_rate": 2.2194357366771163e-05, "loss": 0.1098, "step": 180 }, { "epoch": 0.09, "learning_rate": 2.3448275862068967e-05, "loss": 0.0981, "step": 190 }, { "epoch": 0.09, "learning_rate": 2.4702194357366774e-05, "loss": 0.0919, "step": 200 }, { "epoch": 0.1, "learning_rate": 2.595611285266458e-05, "loss": 0.0782, "step": 210 }, { "epoch": 0.1, "learning_rate": 2.7210031347962385e-05, "loss": 0.0843, "step": 220 }, { "epoch": 0.11, "learning_rate": 2.8463949843260192e-05, "loss": 0.1103, "step": 230 }, { "epoch": 0.11, "learning_rate": 2.9717868338557996e-05, "loss": 0.0937, "step": 240 }, { "epoch": 0.12, "learning_rate": 3.097178683385581e-05, "loss": 0.092, "step": 250 }, { "epoch": 0.12, "learning_rate": 3.222570532915361e-05, "loss": 0.0871, "step": 260 }, { "epoch": 0.13, "learning_rate": 3.3479623824451415e-05, "loss": 0.0908, "step": 270 }, { "epoch": 0.13, "learning_rate": 3.4733542319749215e-05, "loss": 0.0849, "step": 280 }, { "epoch": 0.14, "learning_rate": 3.598746081504703e-05, "loss": 0.0972, "step": 290 }, { "epoch": 0.14, "learning_rate": 3.724137931034483e-05, "loss": 0.0819, "step": 300 }, { "epoch": 0.15, "learning_rate": 3.849529780564264e-05, "loss": 0.085, "step": 310 }, { "epoch": 0.15, "learning_rate": 3.974921630094044e-05, "loss": 0.0736, "step": 320 }, { "epoch": 0.16, "learning_rate": 4.1003134796238245e-05, "loss": 0.0918, "step": 330 }, { "epoch": 0.16, "learning_rate": 4.225705329153606e-05, "loss": 0.087, "step": 340 }, { "epoch": 0.16, "learning_rate": 4.351097178683386e-05, "loss": 0.0958, "step": 350 }, { "epoch": 0.17, "learning_rate": 4.476489028213166e-05, "loss": 0.1006, "step": 360 }, { "epoch": 0.17, "learning_rate": 4.601880877742947e-05, "loss": 0.0849, "step": 370 }, { "epoch": 0.18, "learning_rate": 4.727272727272728e-05, "loss": 0.0772, "step": 380 }, { "epoch": 0.18, "learning_rate": 4.852664576802508e-05, "loss": 0.0852, "step": 390 }, { "epoch": 0.19, "learning_rate": 4.978056426332288e-05, "loss": 0.0865, "step": 400 }, { "epoch": 0.19, "learning_rate": 5.10344827586207e-05, "loss": 0.0936, "step": 410 }, { "epoch": 0.2, "learning_rate": 5.2288401253918504e-05, "loss": 0.1028, "step": 420 }, { "epoch": 0.2, "learning_rate": 5.3542319749216304e-05, "loss": 0.0849, "step": 430 }, { "epoch": 0.21, "learning_rate": 5.4796238244514105e-05, "loss": 0.0935, "step": 440 }, { "epoch": 0.21, "learning_rate": 5.605015673981192e-05, "loss": 0.0838, "step": 450 }, { "epoch": 0.22, "learning_rate": 5.7304075235109726e-05, "loss": 0.0851, "step": 460 }, { "epoch": 0.22, "learning_rate": 5.855799373040753e-05, "loss": 0.0916, "step": 470 }, { "epoch": 0.23, "learning_rate": 5.9811912225705334e-05, "loss": 0.0926, "step": 480 }, { "epoch": 0.23, "learning_rate": 6.106583072100315e-05, "loss": 0.0742, "step": 490 }, { "epoch": 0.24, "learning_rate": 6.231974921630095e-05, "loss": 0.0882, "step": 500 }, { "epoch": 0.24, "learning_rate": 6.357366771159875e-05, "loss": 0.0789, "step": 510 }, { "epoch": 0.24, "learning_rate": 6.482758620689655e-05, "loss": 0.0847, "step": 520 }, { "epoch": 0.25, "learning_rate": 6.608150470219436e-05, "loss": 0.0764, "step": 530 }, { "epoch": 0.25, "learning_rate": 6.733542319749216e-05, "loss": 0.0963, "step": 540 }, { "epoch": 0.26, "learning_rate": 6.858934169278998e-05, "loss": 0.0695, "step": 550 }, { "epoch": 0.26, "learning_rate": 6.984326018808778e-05, "loss": 0.0693, "step": 560 }, { "epoch": 0.27, "learning_rate": 7.109717868338559e-05, "loss": 0.0908, "step": 570 }, { "epoch": 0.27, "learning_rate": 7.23510971786834e-05, "loss": 0.0779, "step": 580 }, { "epoch": 0.28, "learning_rate": 7.36050156739812e-05, "loss": 0.0825, "step": 590 }, { "epoch": 0.28, "learning_rate": 7.485893416927901e-05, "loss": 0.0813, "step": 600 }, { "epoch": 0.29, "learning_rate": 7.611285266457681e-05, "loss": 0.072, "step": 610 }, { "epoch": 0.29, "learning_rate": 7.736677115987461e-05, "loss": 0.0813, "step": 620 }, { "epoch": 0.3, "learning_rate": 7.862068965517242e-05, "loss": 0.0906, "step": 630 }, { "epoch": 0.3, "learning_rate": 7.987460815047022e-05, "loss": 0.0868, "step": 640 }, { "epoch": 0.31, "learning_rate": 7.987456445993032e-05, "loss": 0.0675, "step": 650 }, { "epoch": 0.31, "learning_rate": 7.973519163763066e-05, "loss": 0.0942, "step": 660 }, { "epoch": 0.32, "learning_rate": 7.959581881533101e-05, "loss": 0.0671, "step": 670 }, { "epoch": 0.32, "learning_rate": 7.945644599303136e-05, "loss": 0.0841, "step": 680 }, { "epoch": 0.32, "learning_rate": 7.931707317073171e-05, "loss": 0.0778, "step": 690 }, { "epoch": 0.33, "learning_rate": 7.917770034843206e-05, "loss": 0.0899, "step": 700 }, { "epoch": 0.33, "learning_rate": 7.90383275261324e-05, "loss": 0.0925, "step": 710 }, { "epoch": 0.34, "learning_rate": 7.889895470383276e-05, "loss": 0.0822, "step": 720 }, { "epoch": 0.34, "learning_rate": 7.87595818815331e-05, "loss": 0.0837, "step": 730 }, { "epoch": 0.35, "learning_rate": 7.862020905923346e-05, "loss": 0.077, "step": 740 }, { "epoch": 0.35, "learning_rate": 7.84808362369338e-05, "loss": 0.0705, "step": 750 }, { "epoch": 0.36, "learning_rate": 7.834146341463415e-05, "loss": 0.0849, "step": 760 }, { "epoch": 0.36, "learning_rate": 7.82020905923345e-05, "loss": 0.0792, "step": 770 }, { "epoch": 0.37, "learning_rate": 7.806271777003485e-05, "loss": 0.0786, "step": 780 }, { "epoch": 0.37, "learning_rate": 7.79233449477352e-05, "loss": 0.0862, "step": 790 }, { "epoch": 0.38, "learning_rate": 7.778397212543555e-05, "loss": 0.0828, "step": 800 }, { "epoch": 0.38, "learning_rate": 7.76445993031359e-05, "loss": 0.0918, "step": 810 }, { "epoch": 0.39, "learning_rate": 7.750522648083624e-05, "loss": 0.0749, "step": 820 }, { "epoch": 0.39, "learning_rate": 7.736585365853659e-05, "loss": 0.083, "step": 830 }, { "epoch": 0.4, "learning_rate": 7.722648083623694e-05, "loss": 0.0666, "step": 840 }, { "epoch": 0.4, "learning_rate": 7.708710801393729e-05, "loss": 0.0811, "step": 850 }, { "epoch": 0.4, "learning_rate": 7.694773519163764e-05, "loss": 0.0877, "step": 860 }, { "epoch": 0.41, "learning_rate": 7.680836236933799e-05, "loss": 0.0825, "step": 870 }, { "epoch": 0.41, "learning_rate": 7.666898954703834e-05, "loss": 0.0759, "step": 880 }, { "epoch": 0.42, "learning_rate": 7.652961672473867e-05, "loss": 0.0723, "step": 890 }, { "epoch": 0.42, "learning_rate": 7.639024390243902e-05, "loss": 0.0763, "step": 900 }, { "epoch": 0.43, "learning_rate": 7.625087108013937e-05, "loss": 0.0716, "step": 910 }, { "epoch": 0.43, "learning_rate": 7.611149825783972e-05, "loss": 0.0698, "step": 920 }, { "epoch": 0.44, "learning_rate": 7.597212543554007e-05, "loss": 0.0873, "step": 930 }, { "epoch": 0.44, "learning_rate": 7.583275261324042e-05, "loss": 0.0701, "step": 940 }, { "epoch": 0.45, "learning_rate": 7.569337979094077e-05, "loss": 0.0775, "step": 950 }, { "epoch": 0.45, "learning_rate": 7.555400696864112e-05, "loss": 0.0705, "step": 960 }, { "epoch": 0.46, "learning_rate": 7.541463414634147e-05, "loss": 0.0792, "step": 970 }, { "epoch": 0.46, "learning_rate": 7.527526132404182e-05, "loss": 0.0695, "step": 980 }, { "epoch": 0.47, "learning_rate": 7.513588850174217e-05, "loss": 0.0934, "step": 990 }, { "epoch": 0.47, "learning_rate": 7.499651567944252e-05, "loss": 0.0903, "step": 1000 }, { "epoch": 0.48, "learning_rate": 7.485714285714287e-05, "loss": 0.0863, "step": 1010 }, { "epoch": 0.48, "learning_rate": 7.471777003484322e-05, "loss": 0.0801, "step": 1020 }, { "epoch": 0.48, "learning_rate": 7.457839721254357e-05, "loss": 0.0744, "step": 1030 }, { "epoch": 0.49, "learning_rate": 7.443902439024392e-05, "loss": 0.075, "step": 1040 }, { "epoch": 0.49, "learning_rate": 7.429965156794425e-05, "loss": 0.0711, "step": 1050 }, { "epoch": 0.5, "learning_rate": 7.41602787456446e-05, "loss": 0.0904, "step": 1060 }, { "epoch": 0.5, "learning_rate": 7.402090592334495e-05, "loss": 0.0771, "step": 1070 }, { "epoch": 0.51, "learning_rate": 7.38815331010453e-05, "loss": 0.0707, "step": 1080 }, { "epoch": 0.51, "learning_rate": 7.374216027874565e-05, "loss": 0.0655, "step": 1090 }, { "epoch": 0.52, "learning_rate": 7.3602787456446e-05, "loss": 0.077, "step": 1100 }, { "epoch": 0.52, "learning_rate": 7.346341463414635e-05, "loss": 0.0775, "step": 1110 }, { "epoch": 0.53, "learning_rate": 7.33240418118467e-05, "loss": 0.0825, "step": 1120 }, { "epoch": 0.53, "learning_rate": 7.318466898954704e-05, "loss": 0.0704, "step": 1130 }, { "epoch": 0.54, "learning_rate": 7.304529616724739e-05, "loss": 0.0614, "step": 1140 }, { "epoch": 0.54, "learning_rate": 7.290592334494774e-05, "loss": 0.0685, "step": 1150 }, { "epoch": 0.55, "learning_rate": 7.276655052264809e-05, "loss": 0.0766, "step": 1160 }, { "epoch": 0.55, "learning_rate": 7.262717770034844e-05, "loss": 0.0746, "step": 1170 }, { "epoch": 0.56, "learning_rate": 7.248780487804878e-05, "loss": 0.0723, "step": 1180 }, { "epoch": 0.56, "learning_rate": 7.234843205574913e-05, "loss": 0.0824, "step": 1190 }, { "epoch": 0.56, "learning_rate": 7.220905923344948e-05, "loss": 0.0708, "step": 1200 }, { "epoch": 0.57, "learning_rate": 7.206968641114983e-05, "loss": 0.0745, "step": 1210 }, { "epoch": 0.57, "learning_rate": 7.193031358885018e-05, "loss": 0.0743, "step": 1220 }, { "epoch": 0.58, "learning_rate": 7.179094076655053e-05, "loss": 0.0792, "step": 1230 }, { "epoch": 0.58, "learning_rate": 7.165156794425088e-05, "loss": 0.0729, "step": 1240 }, { "epoch": 0.59, "learning_rate": 7.151219512195123e-05, "loss": 0.0822, "step": 1250 }, { "epoch": 0.59, "learning_rate": 7.137282229965158e-05, "loss": 0.0766, "step": 1260 }, { "epoch": 0.6, "learning_rate": 7.123344947735193e-05, "loss": 0.07, "step": 1270 }, { "epoch": 0.6, "learning_rate": 7.109407665505227e-05, "loss": 0.0818, "step": 1280 }, { "epoch": 0.61, "learning_rate": 7.095470383275262e-05, "loss": 0.0748, "step": 1290 }, { "epoch": 0.61, "learning_rate": 7.081533101045297e-05, "loss": 0.0727, "step": 1300 }, { "epoch": 0.62, "learning_rate": 7.067595818815332e-05, "loss": 0.0798, "step": 1310 }, { "epoch": 0.62, "learning_rate": 7.053658536585367e-05, "loss": 0.0736, "step": 1320 }, { "epoch": 0.63, "learning_rate": 7.039721254355402e-05, "loss": 0.065, "step": 1330 }, { "epoch": 0.63, "learning_rate": 7.025783972125437e-05, "loss": 0.0699, "step": 1340 }, { "epoch": 0.63, "learning_rate": 7.011846689895471e-05, "loss": 0.066, "step": 1350 }, { "epoch": 0.64, "learning_rate": 6.997909407665505e-05, "loss": 0.0703, "step": 1360 }, { "epoch": 0.64, "learning_rate": 6.98397212543554e-05, "loss": 0.0801, "step": 1370 }, { "epoch": 0.65, "learning_rate": 6.970034843205575e-05, "loss": 0.0749, "step": 1380 }, { "epoch": 0.65, "learning_rate": 6.95609756097561e-05, "loss": 0.0658, "step": 1390 }, { "epoch": 0.66, "learning_rate": 6.942160278745645e-05, "loss": 0.0607, "step": 1400 }, { "epoch": 0.66, "learning_rate": 6.92822299651568e-05, "loss": 0.07, "step": 1410 }, { "epoch": 0.67, "learning_rate": 6.914285714285715e-05, "loss": 0.0747, "step": 1420 }, { "epoch": 0.67, "learning_rate": 6.90034843205575e-05, "loss": 0.0731, "step": 1430 }, { "epoch": 0.68, "learning_rate": 6.886411149825785e-05, "loss": 0.0703, "step": 1440 }, { "epoch": 0.68, "learning_rate": 6.87247386759582e-05, "loss": 0.0634, "step": 1450 }, { "epoch": 0.69, "learning_rate": 6.858536585365855e-05, "loss": 0.0748, "step": 1460 }, { "epoch": 0.69, "learning_rate": 6.84459930313589e-05, "loss": 0.0797, "step": 1470 }, { "epoch": 0.7, "learning_rate": 6.830662020905925e-05, "loss": 0.0899, "step": 1480 }, { "epoch": 0.7, "learning_rate": 6.81672473867596e-05, "loss": 0.0904, "step": 1490 }, { "epoch": 0.71, "learning_rate": 6.802787456445995e-05, "loss": 0.0723, "step": 1500 }, { "epoch": 0.71, "learning_rate": 6.788850174216028e-05, "loss": 0.0623, "step": 1510 }, { "epoch": 0.71, "learning_rate": 6.774912891986063e-05, "loss": 0.0698, "step": 1520 }, { "epoch": 0.72, "learning_rate": 6.760975609756098e-05, "loss": 0.0739, "step": 1530 }, { "epoch": 0.72, "learning_rate": 6.747038327526133e-05, "loss": 0.0655, "step": 1540 }, { "epoch": 0.73, "learning_rate": 6.733101045296168e-05, "loss": 0.0869, "step": 1550 }, { "epoch": 0.73, "learning_rate": 6.719163763066203e-05, "loss": 0.0708, "step": 1560 }, { "epoch": 0.74, "learning_rate": 6.705226480836238e-05, "loss": 0.0661, "step": 1570 }, { "epoch": 0.74, "learning_rate": 6.691289198606273e-05, "loss": 0.0795, "step": 1580 }, { "epoch": 0.75, "learning_rate": 6.677351916376307e-05, "loss": 0.0694, "step": 1590 }, { "epoch": 0.75, "learning_rate": 6.663414634146341e-05, "loss": 0.0923, "step": 1600 }, { "epoch": 0.76, "learning_rate": 6.649477351916376e-05, "loss": 0.0765, "step": 1610 }, { "epoch": 0.76, "learning_rate": 6.635540069686411e-05, "loss": 0.0703, "step": 1620 }, { "epoch": 0.77, "learning_rate": 6.621602787456446e-05, "loss": 0.0757, "step": 1630 }, { "epoch": 0.77, "learning_rate": 6.607665505226481e-05, "loss": 0.0761, "step": 1640 }, { "epoch": 0.78, "learning_rate": 6.593728222996516e-05, "loss": 0.0721, "step": 1650 }, { "epoch": 0.78, "learning_rate": 6.579790940766551e-05, "loss": 0.077, "step": 1660 }, { "epoch": 0.79, "learning_rate": 6.565853658536585e-05, "loss": 0.0598, "step": 1670 }, { "epoch": 0.79, "learning_rate": 6.55191637630662e-05, "loss": 0.0727, "step": 1680 }, { "epoch": 0.79, "learning_rate": 6.537979094076655e-05, "loss": 0.0827, "step": 1690 }, { "epoch": 0.8, "learning_rate": 6.52404181184669e-05, "loss": 0.0725, "step": 1700 }, { "epoch": 0.8, "learning_rate": 6.510104529616725e-05, "loss": 0.0749, "step": 1710 }, { "epoch": 0.81, "learning_rate": 6.49616724738676e-05, "loss": 0.0773, "step": 1720 }, { "epoch": 0.81, "learning_rate": 6.482229965156795e-05, "loss": 0.081, "step": 1730 }, { "epoch": 0.82, "learning_rate": 6.46829268292683e-05, "loss": 0.0824, "step": 1740 }, { "epoch": 0.82, "learning_rate": 6.454355400696865e-05, "loss": 0.0662, "step": 1750 }, { "epoch": 0.83, "learning_rate": 6.4404181184669e-05, "loss": 0.0548, "step": 1760 }, { "epoch": 0.83, "learning_rate": 6.426480836236935e-05, "loss": 0.0847, "step": 1770 }, { "epoch": 0.84, "learning_rate": 6.41254355400697e-05, "loss": 0.0758, "step": 1780 }, { "epoch": 0.84, "learning_rate": 6.398606271777004e-05, "loss": 0.0636, "step": 1790 }, { "epoch": 0.85, "learning_rate": 6.38466898954704e-05, "loss": 0.0615, "step": 1800 }, { "epoch": 0.85, "learning_rate": 6.370731707317074e-05, "loss": 0.0672, "step": 1810 }, { "epoch": 0.86, "learning_rate": 6.356794425087109e-05, "loss": 0.0858, "step": 1820 }, { "epoch": 0.86, "learning_rate": 6.342857142857143e-05, "loss": 0.0699, "step": 1830 }, { "epoch": 0.87, "learning_rate": 6.328919860627178e-05, "loss": 0.0608, "step": 1840 }, { "epoch": 0.87, "learning_rate": 6.314982578397213e-05, "loss": 0.0749, "step": 1850 }, { "epoch": 0.87, "learning_rate": 6.301045296167248e-05, "loss": 0.083, "step": 1860 }, { "epoch": 0.88, "learning_rate": 6.287108013937283e-05, "loss": 0.0763, "step": 1870 }, { "epoch": 0.88, "learning_rate": 6.273170731707318e-05, "loss": 0.0634, "step": 1880 }, { "epoch": 0.89, "learning_rate": 6.259233449477353e-05, "loss": 0.0595, "step": 1890 }, { "epoch": 0.89, "learning_rate": 6.245296167247386e-05, "loss": 0.0555, "step": 1900 }, { "epoch": 0.9, "learning_rate": 6.231358885017421e-05, "loss": 0.0607, "step": 1910 }, { "epoch": 0.9, "learning_rate": 6.217421602787456e-05, "loss": 0.078, "step": 1920 }, { "epoch": 0.91, "learning_rate": 6.203484320557491e-05, "loss": 0.0894, "step": 1930 }, { "epoch": 0.91, "learning_rate": 6.189547038327526e-05, "loss": 0.0672, "step": 1940 }, { "epoch": 0.92, "learning_rate": 6.175609756097561e-05, "loss": 0.067, "step": 1950 }, { "epoch": 0.92, "learning_rate": 6.161672473867596e-05, "loss": 0.0733, "step": 1960 }, { "epoch": 0.93, "learning_rate": 6.147735191637631e-05, "loss": 0.0634, "step": 1970 }, { "epoch": 0.93, "learning_rate": 6.133797909407666e-05, "loss": 0.0795, "step": 1980 }, { "epoch": 0.94, "learning_rate": 6.119860627177701e-05, "loss": 0.0757, "step": 1990 }, { "epoch": 0.94, "learning_rate": 6.105923344947736e-05, "loss": 0.0769, "step": 2000 }, { "epoch": 0.95, "learning_rate": 6.09198606271777e-05, "loss": 0.0834, "step": 2010 }, { "epoch": 0.95, "learning_rate": 6.078048780487805e-05, "loss": 0.0655, "step": 2020 }, { "epoch": 0.95, "learning_rate": 6.06411149825784e-05, "loss": 0.0669, "step": 2030 }, { "epoch": 0.96, "learning_rate": 6.050174216027875e-05, "loss": 0.0723, "step": 2040 }, { "epoch": 0.96, "learning_rate": 6.03623693379791e-05, "loss": 0.0748, "step": 2050 }, { "epoch": 0.97, "learning_rate": 6.0222996515679443e-05, "loss": 0.077, "step": 2060 }, { "epoch": 0.97, "learning_rate": 6.008362369337979e-05, "loss": 0.0669, "step": 2070 }, { "epoch": 0.98, "learning_rate": 5.994425087108014e-05, "loss": 0.0818, "step": 2080 }, { "epoch": 0.98, "learning_rate": 5.980487804878049e-05, "loss": 0.0667, "step": 2090 }, { "epoch": 0.99, "learning_rate": 5.966550522648084e-05, "loss": 0.0674, "step": 2100 }, { "epoch": 0.99, "learning_rate": 5.952613240418119e-05, "loss": 0.0902, "step": 2110 }, { "epoch": 1.0, "learning_rate": 5.938675958188154e-05, "loss": 0.0752, "step": 2120 }, { "epoch": 1.0, "eval_accuracy": 0.975717042417269, "eval_f1": 0.7687279607809409, "eval_loss": 0.0664275586605072, "eval_precision": 0.7416196481911715, "eval_recall": 0.7978932333511873, "eval_runtime": 16.3951, "eval_samples_per_second": 863.673, "eval_steps_per_second": 107.959, "step": 2126 }, { "epoch": 1.0, "learning_rate": 5.924738675958189e-05, "loss": 0.0914, "step": 2130 }, { "epoch": 1.01, "learning_rate": 5.9108013937282234e-05, "loss": 0.0682, "step": 2140 }, { "epoch": 1.01, "learning_rate": 5.896864111498258e-05, "loss": 0.0591, "step": 2150 }, { "epoch": 1.02, "learning_rate": 5.882926829268293e-05, "loss": 0.0622, "step": 2160 }, { "epoch": 1.02, "learning_rate": 5.868989547038328e-05, "loss": 0.0692, "step": 2170 }, { "epoch": 1.03, "learning_rate": 5.855052264808363e-05, "loss": 0.054, "step": 2180 }, { "epoch": 1.03, "learning_rate": 5.841114982578398e-05, "loss": 0.0494, "step": 2190 }, { "epoch": 1.03, "learning_rate": 5.827177700348433e-05, "loss": 0.0648, "step": 2200 }, { "epoch": 1.04, "learning_rate": 5.813240418118467e-05, "loss": 0.0639, "step": 2210 }, { "epoch": 1.04, "learning_rate": 5.799303135888502e-05, "loss": 0.0595, "step": 2220 }, { "epoch": 1.05, "learning_rate": 5.785365853658537e-05, "loss": 0.0548, "step": 2230 }, { "epoch": 1.05, "learning_rate": 5.7714285714285716e-05, "loss": 0.0542, "step": 2240 }, { "epoch": 1.06, "learning_rate": 5.7574912891986066e-05, "loss": 0.0527, "step": 2250 }, { "epoch": 1.06, "learning_rate": 5.7435540069686416e-05, "loss": 0.0477, "step": 2260 }, { "epoch": 1.07, "learning_rate": 5.7296167247386765e-05, "loss": 0.0699, "step": 2270 }, { "epoch": 1.07, "learning_rate": 5.7156794425087115e-05, "loss": 0.0546, "step": 2280 }, { "epoch": 1.08, "learning_rate": 5.701742160278746e-05, "loss": 0.0543, "step": 2290 }, { "epoch": 1.08, "learning_rate": 5.687804878048781e-05, "loss": 0.052, "step": 2300 }, { "epoch": 1.09, "learning_rate": 5.673867595818816e-05, "loss": 0.0614, "step": 2310 }, { "epoch": 1.09, "learning_rate": 5.659930313588851e-05, "loss": 0.0692, "step": 2320 }, { "epoch": 1.1, "learning_rate": 5.6459930313588856e-05, "loss": 0.058, "step": 2330 }, { "epoch": 1.1, "learning_rate": 5.6320557491289206e-05, "loss": 0.064, "step": 2340 }, { "epoch": 1.11, "learning_rate": 5.6181184668989555e-05, "loss": 0.062, "step": 2350 }, { "epoch": 1.11, "learning_rate": 5.6041811846689905e-05, "loss": 0.0627, "step": 2360 }, { "epoch": 1.11, "learning_rate": 5.590243902439025e-05, "loss": 0.0613, "step": 2370 }, { "epoch": 1.12, "learning_rate": 5.57630662020906e-05, "loss": 0.0653, "step": 2380 }, { "epoch": 1.12, "learning_rate": 5.562369337979095e-05, "loss": 0.0592, "step": 2390 }, { "epoch": 1.13, "learning_rate": 5.54843205574913e-05, "loss": 0.0563, "step": 2400 }, { "epoch": 1.13, "learning_rate": 5.5344947735191646e-05, "loss": 0.0516, "step": 2410 }, { "epoch": 1.14, "learning_rate": 5.520557491289199e-05, "loss": 0.0619, "step": 2420 }, { "epoch": 1.14, "learning_rate": 5.506620209059234e-05, "loss": 0.0698, "step": 2430 }, { "epoch": 1.15, "learning_rate": 5.492682926829269e-05, "loss": 0.0541, "step": 2440 }, { "epoch": 1.15, "learning_rate": 5.478745644599303e-05, "loss": 0.0459, "step": 2450 }, { "epoch": 1.16, "learning_rate": 5.464808362369338e-05, "loss": 0.0544, "step": 2460 }, { "epoch": 1.16, "learning_rate": 5.450871080139373e-05, "loss": 0.0533, "step": 2470 }, { "epoch": 1.17, "learning_rate": 5.436933797909408e-05, "loss": 0.0597, "step": 2480 }, { "epoch": 1.17, "learning_rate": 5.422996515679443e-05, "loss": 0.0553, "step": 2490 }, { "epoch": 1.18, "learning_rate": 5.409059233449478e-05, "loss": 0.0583, "step": 2500 }, { "epoch": 1.18, "learning_rate": 5.395121951219513e-05, "loss": 0.0552, "step": 2510 }, { "epoch": 1.19, "learning_rate": 5.381184668989547e-05, "loss": 0.0582, "step": 2520 }, { "epoch": 1.19, "learning_rate": 5.367247386759582e-05, "loss": 0.059, "step": 2530 }, { "epoch": 1.19, "learning_rate": 5.353310104529617e-05, "loss": 0.0657, "step": 2540 }, { "epoch": 1.2, "learning_rate": 5.339372822299652e-05, "loss": 0.0563, "step": 2550 }, { "epoch": 1.2, "learning_rate": 5.325435540069687e-05, "loss": 0.0568, "step": 2560 }, { "epoch": 1.21, "learning_rate": 5.311498257839722e-05, "loss": 0.067, "step": 2570 }, { "epoch": 1.21, "learning_rate": 5.297560975609757e-05, "loss": 0.064, "step": 2580 }, { "epoch": 1.22, "learning_rate": 5.283623693379792e-05, "loss": 0.0546, "step": 2590 }, { "epoch": 1.22, "learning_rate": 5.2696864111498255e-05, "loss": 0.0575, "step": 2600 }, { "epoch": 1.23, "learning_rate": 5.2557491289198605e-05, "loss": 0.0497, "step": 2610 }, { "epoch": 1.23, "learning_rate": 5.2418118466898955e-05, "loss": 0.0616, "step": 2620 }, { "epoch": 1.24, "learning_rate": 5.2278745644599304e-05, "loss": 0.0498, "step": 2630 }, { "epoch": 1.24, "learning_rate": 5.2139372822299654e-05, "loss": 0.0622, "step": 2640 }, { "epoch": 1.25, "learning_rate": 5.2000000000000004e-05, "loss": 0.0542, "step": 2650 }, { "epoch": 1.25, "learning_rate": 5.186062717770035e-05, "loss": 0.0625, "step": 2660 }, { "epoch": 1.26, "learning_rate": 5.17212543554007e-05, "loss": 0.0494, "step": 2670 }, { "epoch": 1.26, "learning_rate": 5.1581881533101046e-05, "loss": 0.0511, "step": 2680 }, { "epoch": 1.27, "learning_rate": 5.1442508710801395e-05, "loss": 0.0562, "step": 2690 }, { "epoch": 1.27, "learning_rate": 5.1303135888501745e-05, "loss": 0.067, "step": 2700 }, { "epoch": 1.27, "learning_rate": 5.1163763066202095e-05, "loss": 0.0528, "step": 2710 }, { "epoch": 1.28, "learning_rate": 5.1024390243902444e-05, "loss": 0.0677, "step": 2720 }, { "epoch": 1.28, "learning_rate": 5.0885017421602794e-05, "loss": 0.0668, "step": 2730 }, { "epoch": 1.29, "learning_rate": 5.0745644599303143e-05, "loss": 0.0677, "step": 2740 }, { "epoch": 1.29, "learning_rate": 5.060627177700349e-05, "loss": 0.0446, "step": 2750 }, { "epoch": 1.3, "learning_rate": 5.0466898954703836e-05, "loss": 0.0474, "step": 2760 }, { "epoch": 1.3, "learning_rate": 5.0327526132404186e-05, "loss": 0.0634, "step": 2770 }, { "epoch": 1.31, "learning_rate": 5.0188153310104535e-05, "loss": 0.0588, "step": 2780 }, { "epoch": 1.31, "learning_rate": 5.0048780487804885e-05, "loss": 0.0587, "step": 2790 }, { "epoch": 1.32, "learning_rate": 4.9909407665505234e-05, "loss": 0.0653, "step": 2800 }, { "epoch": 1.32, "learning_rate": 4.9770034843205584e-05, "loss": 0.057, "step": 2810 }, { "epoch": 1.33, "learning_rate": 4.9630662020905934e-05, "loss": 0.0584, "step": 2820 }, { "epoch": 1.33, "learning_rate": 4.949128919860627e-05, "loss": 0.0485, "step": 2830 }, { "epoch": 1.34, "learning_rate": 4.935191637630662e-05, "loss": 0.0595, "step": 2840 }, { "epoch": 1.34, "learning_rate": 4.921254355400697e-05, "loss": 0.0595, "step": 2850 }, { "epoch": 1.35, "learning_rate": 4.907317073170732e-05, "loss": 0.0536, "step": 2860 }, { "epoch": 1.35, "learning_rate": 4.893379790940767e-05, "loss": 0.0674, "step": 2870 }, { "epoch": 1.35, "learning_rate": 4.879442508710802e-05, "loss": 0.061, "step": 2880 }, { "epoch": 1.36, "learning_rate": 4.865505226480837e-05, "loss": 0.0528, "step": 2890 }, { "epoch": 1.36, "learning_rate": 4.851567944250872e-05, "loss": 0.0531, "step": 2900 }, { "epoch": 1.37, "learning_rate": 4.837630662020906e-05, "loss": 0.0558, "step": 2910 }, { "epoch": 1.37, "learning_rate": 4.823693379790941e-05, "loss": 0.0708, "step": 2920 }, { "epoch": 1.38, "learning_rate": 4.809756097560976e-05, "loss": 0.0595, "step": 2930 }, { "epoch": 1.38, "learning_rate": 4.795818815331011e-05, "loss": 0.0486, "step": 2940 }, { "epoch": 1.39, "learning_rate": 4.781881533101046e-05, "loss": 0.0644, "step": 2950 }, { "epoch": 1.39, "learning_rate": 4.767944250871081e-05, "loss": 0.0525, "step": 2960 }, { "epoch": 1.4, "learning_rate": 4.754006968641116e-05, "loss": 0.0704, "step": 2970 }, { "epoch": 1.4, "learning_rate": 4.740069686411151e-05, "loss": 0.0612, "step": 2980 }, { "epoch": 1.41, "learning_rate": 4.726132404181185e-05, "loss": 0.061, "step": 2990 }, { "epoch": 1.41, "learning_rate": 4.71219512195122e-05, "loss": 0.0616, "step": 3000 }, { "epoch": 1.42, "learning_rate": 4.698257839721254e-05, "loss": 0.0682, "step": 3010 }, { "epoch": 1.42, "learning_rate": 4.684320557491289e-05, "loss": 0.0509, "step": 3020 }, { "epoch": 1.43, "learning_rate": 4.670383275261324e-05, "loss": 0.0587, "step": 3030 }, { "epoch": 1.43, "learning_rate": 4.656445993031359e-05, "loss": 0.0562, "step": 3040 }, { "epoch": 1.43, "learning_rate": 4.642508710801394e-05, "loss": 0.0606, "step": 3050 }, { "epoch": 1.44, "learning_rate": 4.628571428571429e-05, "loss": 0.0576, "step": 3060 }, { "epoch": 1.44, "learning_rate": 4.6146341463414634e-05, "loss": 0.0461, "step": 3070 }, { "epoch": 1.45, "learning_rate": 4.600696864111498e-05, "loss": 0.0582, "step": 3080 }, { "epoch": 1.45, "learning_rate": 4.586759581881533e-05, "loss": 0.0583, "step": 3090 }, { "epoch": 1.46, "learning_rate": 4.572822299651568e-05, "loss": 0.054, "step": 3100 }, { "epoch": 1.46, "learning_rate": 4.558885017421603e-05, "loss": 0.0495, "step": 3110 }, { "epoch": 1.47, "learning_rate": 4.544947735191638e-05, "loss": 0.0565, "step": 3120 }, { "epoch": 1.47, "learning_rate": 4.531010452961673e-05, "loss": 0.0439, "step": 3130 }, { "epoch": 1.48, "learning_rate": 4.5170731707317074e-05, "loss": 0.055, "step": 3140 }, { "epoch": 1.48, "learning_rate": 4.5031358885017424e-05, "loss": 0.0626, "step": 3150 }, { "epoch": 1.49, "learning_rate": 4.4891986062717773e-05, "loss": 0.0628, "step": 3160 }, { "epoch": 1.49, "learning_rate": 4.475261324041812e-05, "loss": 0.0627, "step": 3170 }, { "epoch": 1.5, "learning_rate": 4.461324041811847e-05, "loss": 0.0544, "step": 3180 }, { "epoch": 1.5, "learning_rate": 4.447386759581882e-05, "loss": 0.0464, "step": 3190 }, { "epoch": 1.51, "learning_rate": 4.433449477351917e-05, "loss": 0.0494, "step": 3200 }, { "epoch": 1.51, "learning_rate": 4.419512195121952e-05, "loss": 0.052, "step": 3210 }, { "epoch": 1.51, "learning_rate": 4.405574912891986e-05, "loss": 0.0553, "step": 3220 }, { "epoch": 1.52, "learning_rate": 4.391637630662021e-05, "loss": 0.0565, "step": 3230 }, { "epoch": 1.52, "learning_rate": 4.377700348432056e-05, "loss": 0.0576, "step": 3240 }, { "epoch": 1.53, "learning_rate": 4.3637630662020907e-05, "loss": 0.0613, "step": 3250 }, { "epoch": 1.53, "learning_rate": 4.3498257839721256e-05, "loss": 0.0601, "step": 3260 }, { "epoch": 1.54, "learning_rate": 4.3358885017421606e-05, "loss": 0.0566, "step": 3270 }, { "epoch": 1.54, "learning_rate": 4.3219512195121955e-05, "loss": 0.0551, "step": 3280 }, { "epoch": 1.55, "learning_rate": 4.3080139372822305e-05, "loss": 0.0478, "step": 3290 }, { "epoch": 1.55, "learning_rate": 4.294076655052265e-05, "loss": 0.0561, "step": 3300 }, { "epoch": 1.56, "learning_rate": 4.2801393728223e-05, "loss": 0.0577, "step": 3310 }, { "epoch": 1.56, "learning_rate": 4.266202090592335e-05, "loss": 0.0533, "step": 3320 }, { "epoch": 1.57, "learning_rate": 4.25226480836237e-05, "loss": 0.0603, "step": 3330 }, { "epoch": 1.57, "learning_rate": 4.2383275261324046e-05, "loss": 0.0533, "step": 3340 }, { "epoch": 1.58, "learning_rate": 4.2243902439024396e-05, "loss": 0.0718, "step": 3350 }, { "epoch": 1.58, "learning_rate": 4.2104529616724746e-05, "loss": 0.0557, "step": 3360 }, { "epoch": 1.59, "learning_rate": 4.1965156794425095e-05, "loss": 0.0656, "step": 3370 }, { "epoch": 1.59, "learning_rate": 4.182578397212544e-05, "loss": 0.0608, "step": 3380 }, { "epoch": 1.59, "learning_rate": 4.168641114982579e-05, "loss": 0.0588, "step": 3390 }, { "epoch": 1.6, "learning_rate": 4.154703832752614e-05, "loss": 0.0556, "step": 3400 }, { "epoch": 1.6, "learning_rate": 4.140766550522649e-05, "loss": 0.0615, "step": 3410 }, { "epoch": 1.61, "learning_rate": 4.1268292682926837e-05, "loss": 0.0524, "step": 3420 }, { "epoch": 1.61, "learning_rate": 4.1128919860627186e-05, "loss": 0.0688, "step": 3430 }, { "epoch": 1.62, "learning_rate": 4.0989547038327536e-05, "loss": 0.053, "step": 3440 }, { "epoch": 1.62, "learning_rate": 4.085017421602787e-05, "loss": 0.0657, "step": 3450 }, { "epoch": 1.63, "learning_rate": 4.071080139372822e-05, "loss": 0.0641, "step": 3460 }, { "epoch": 1.63, "learning_rate": 4.057142857142857e-05, "loss": 0.0628, "step": 3470 }, { "epoch": 1.64, "learning_rate": 4.043205574912892e-05, "loss": 0.0599, "step": 3480 }, { "epoch": 1.64, "learning_rate": 4.029268292682927e-05, "loss": 0.0512, "step": 3490 }, { "epoch": 1.65, "learning_rate": 4.015331010452962e-05, "loss": 0.0552, "step": 3500 }, { "epoch": 1.65, "learning_rate": 4.001393728222997e-05, "loss": 0.0572, "step": 3510 }, { "epoch": 1.66, "learning_rate": 3.987456445993032e-05, "loss": 0.063, "step": 3520 }, { "epoch": 1.66, "learning_rate": 3.973519163763067e-05, "loss": 0.0634, "step": 3530 }, { "epoch": 1.67, "learning_rate": 3.959581881533102e-05, "loss": 0.061, "step": 3540 }, { "epoch": 1.67, "learning_rate": 3.945644599303136e-05, "loss": 0.0543, "step": 3550 }, { "epoch": 1.67, "learning_rate": 3.931707317073171e-05, "loss": 0.0698, "step": 3560 }, { "epoch": 1.68, "learning_rate": 3.917770034843206e-05, "loss": 0.0638, "step": 3570 }, { "epoch": 1.68, "learning_rate": 3.903832752613241e-05, "loss": 0.0584, "step": 3580 }, { "epoch": 1.69, "learning_rate": 3.889895470383275e-05, "loss": 0.0532, "step": 3590 }, { "epoch": 1.69, "learning_rate": 3.87595818815331e-05, "loss": 0.0547, "step": 3600 }, { "epoch": 1.7, "learning_rate": 3.862020905923345e-05, "loss": 0.0624, "step": 3610 }, { "epoch": 1.7, "learning_rate": 3.84808362369338e-05, "loss": 0.0597, "step": 3620 }, { "epoch": 1.71, "learning_rate": 3.8341463414634145e-05, "loss": 0.0636, "step": 3630 }, { "epoch": 1.71, "learning_rate": 3.8202090592334494e-05, "loss": 0.0603, "step": 3640 }, { "epoch": 1.72, "learning_rate": 3.8062717770034844e-05, "loss": 0.0618, "step": 3650 }, { "epoch": 1.72, "learning_rate": 3.7923344947735194e-05, "loss": 0.059, "step": 3660 }, { "epoch": 1.73, "learning_rate": 3.778397212543554e-05, "loss": 0.0674, "step": 3670 }, { "epoch": 1.73, "learning_rate": 3.764459930313589e-05, "loss": 0.0628, "step": 3680 }, { "epoch": 1.74, "learning_rate": 3.750522648083624e-05, "loss": 0.0623, "step": 3690 }, { "epoch": 1.74, "learning_rate": 3.736585365853659e-05, "loss": 0.0545, "step": 3700 }, { "epoch": 1.75, "learning_rate": 3.7226480836236935e-05, "loss": 0.0559, "step": 3710 }, { "epoch": 1.75, "learning_rate": 3.7087108013937285e-05, "loss": 0.0563, "step": 3720 }, { "epoch": 1.75, "learning_rate": 3.6947735191637634e-05, "loss": 0.0634, "step": 3730 }, { "epoch": 1.76, "learning_rate": 3.680836236933798e-05, "loss": 0.0624, "step": 3740 }, { "epoch": 1.76, "learning_rate": 3.666898954703833e-05, "loss": 0.0485, "step": 3750 }, { "epoch": 1.77, "learning_rate": 3.6529616724738676e-05, "loss": 0.0564, "step": 3760 }, { "epoch": 1.77, "learning_rate": 3.6390243902439026e-05, "loss": 0.0605, "step": 3770 }, { "epoch": 1.78, "learning_rate": 3.6250871080139376e-05, "loss": 0.0603, "step": 3780 }, { "epoch": 1.78, "learning_rate": 3.6111498257839725e-05, "loss": 0.0635, "step": 3790 }, { "epoch": 1.79, "learning_rate": 3.5972125435540075e-05, "loss": 0.056, "step": 3800 }, { "epoch": 1.79, "learning_rate": 3.5832752613240425e-05, "loss": 0.0578, "step": 3810 }, { "epoch": 1.8, "learning_rate": 3.569337979094077e-05, "loss": 0.0574, "step": 3820 }, { "epoch": 1.8, "learning_rate": 3.555400696864112e-05, "loss": 0.0649, "step": 3830 }, { "epoch": 1.81, "learning_rate": 3.541463414634147e-05, "loss": 0.0566, "step": 3840 }, { "epoch": 1.81, "learning_rate": 3.5275261324041816e-05, "loss": 0.0562, "step": 3850 }, { "epoch": 1.82, "learning_rate": 3.513588850174216e-05, "loss": 0.0475, "step": 3860 }, { "epoch": 1.82, "learning_rate": 3.499651567944251e-05, "loss": 0.0559, "step": 3870 }, { "epoch": 1.83, "learning_rate": 3.485714285714286e-05, "loss": 0.0487, "step": 3880 }, { "epoch": 1.83, "learning_rate": 3.471777003484321e-05, "loss": 0.065, "step": 3890 }, { "epoch": 1.83, "learning_rate": 3.457839721254356e-05, "loss": 0.0655, "step": 3900 }, { "epoch": 1.84, "learning_rate": 3.443902439024391e-05, "loss": 0.056, "step": 3910 }, { "epoch": 1.84, "learning_rate": 3.429965156794426e-05, "loss": 0.0547, "step": 3920 }, { "epoch": 1.85, "learning_rate": 3.4160278745644606e-05, "loss": 0.0573, "step": 3930 }, { "epoch": 1.85, "learning_rate": 3.402090592334495e-05, "loss": 0.0618, "step": 3940 }, { "epoch": 1.86, "learning_rate": 3.38815331010453e-05, "loss": 0.0564, "step": 3950 }, { "epoch": 1.86, "learning_rate": 3.374216027874565e-05, "loss": 0.0547, "step": 3960 }, { "epoch": 1.87, "learning_rate": 3.3602787456446e-05, "loss": 0.0644, "step": 3970 }, { "epoch": 1.87, "learning_rate": 3.346341463414634e-05, "loss": 0.0564, "step": 3980 }, { "epoch": 1.88, "learning_rate": 3.332404181184669e-05, "loss": 0.0633, "step": 3990 }, { "epoch": 1.88, "learning_rate": 3.318466898954704e-05, "loss": 0.0574, "step": 4000 }, { "epoch": 1.89, "learning_rate": 3.304529616724739e-05, "loss": 0.0547, "step": 4010 }, { "epoch": 1.89, "learning_rate": 3.290592334494774e-05, "loss": 0.0665, "step": 4020 }, { "epoch": 1.9, "learning_rate": 3.276655052264809e-05, "loss": 0.0545, "step": 4030 }, { "epoch": 1.9, "learning_rate": 3.262717770034844e-05, "loss": 0.0692, "step": 4040 }, { "epoch": 1.9, "learning_rate": 3.248780487804879e-05, "loss": 0.0648, "step": 4050 }, { "epoch": 1.91, "learning_rate": 3.234843205574913e-05, "loss": 0.0542, "step": 4060 }, { "epoch": 1.91, "learning_rate": 3.220905923344948e-05, "loss": 0.0562, "step": 4070 }, { "epoch": 1.92, "learning_rate": 3.206968641114983e-05, "loss": 0.0531, "step": 4080 }, { "epoch": 1.92, "learning_rate": 3.193031358885017e-05, "loss": 0.0615, "step": 4090 }, { "epoch": 1.93, "learning_rate": 3.179094076655052e-05, "loss": 0.0484, "step": 4100 }, { "epoch": 1.93, "learning_rate": 3.165156794425087e-05, "loss": 0.0492, "step": 4110 }, { "epoch": 1.94, "learning_rate": 3.151219512195122e-05, "loss": 0.0594, "step": 4120 }, { "epoch": 1.94, "learning_rate": 3.137282229965157e-05, "loss": 0.0489, "step": 4130 }, { "epoch": 1.95, "learning_rate": 3.123344947735192e-05, "loss": 0.0619, "step": 4140 }, { "epoch": 1.95, "learning_rate": 3.1094076655052264e-05, "loss": 0.0491, "step": 4150 }, { "epoch": 1.96, "learning_rate": 3.0954703832752614e-05, "loss": 0.0627, "step": 4160 }, { "epoch": 1.96, "learning_rate": 3.0815331010452964e-05, "loss": 0.0503, "step": 4170 }, { "epoch": 1.97, "learning_rate": 3.067595818815331e-05, "loss": 0.0495, "step": 4180 }, { "epoch": 1.97, "learning_rate": 3.053658536585366e-05, "loss": 0.064, "step": 4190 }, { "epoch": 1.98, "learning_rate": 3.0397212543554012e-05, "loss": 0.0634, "step": 4200 }, { "epoch": 1.98, "learning_rate": 3.0257839721254355e-05, "loss": 0.0582, "step": 4210 }, { "epoch": 1.98, "learning_rate": 3.0118466898954705e-05, "loss": 0.0654, "step": 4220 }, { "epoch": 1.99, "learning_rate": 2.9979094076655055e-05, "loss": 0.0524, "step": 4230 }, { "epoch": 1.99, "learning_rate": 2.9839721254355404e-05, "loss": 0.0613, "step": 4240 }, { "epoch": 2.0, "learning_rate": 2.970034843205575e-05, "loss": 0.0484, "step": 4250 }, { "epoch": 2.0, "eval_accuracy": 0.9767567988685532, "eval_f1": 0.7813079163357163, "eval_loss": 0.06523581594228745, "eval_precision": 0.7725130890052356, "eval_recall": 0.7903053026245314, "eval_runtime": 16.6261, "eval_samples_per_second": 851.671, "eval_steps_per_second": 106.459, "step": 4252 }, { "epoch": 2.0, "learning_rate": 2.95609756097561e-05, "loss": 0.0381, "step": 4260 }, { "epoch": 2.01, "learning_rate": 2.942160278745645e-05, "loss": 0.0487, "step": 4270 }, { "epoch": 2.01, "learning_rate": 2.92822299651568e-05, "loss": 0.046, "step": 4280 }, { "epoch": 2.02, "learning_rate": 2.9142857142857146e-05, "loss": 0.0403, "step": 4290 }, { "epoch": 2.02, "learning_rate": 2.9003484320557492e-05, "loss": 0.0403, "step": 4300 }, { "epoch": 2.03, "learning_rate": 2.886411149825784e-05, "loss": 0.04, "step": 4310 }, { "epoch": 2.03, "learning_rate": 2.872473867595819e-05, "loss": 0.0481, "step": 4320 }, { "epoch": 2.04, "learning_rate": 2.8585365853658537e-05, "loss": 0.0436, "step": 4330 }, { "epoch": 2.04, "learning_rate": 2.8445993031358887e-05, "loss": 0.0363, "step": 4340 }, { "epoch": 2.05, "learning_rate": 2.8306620209059237e-05, "loss": 0.0476, "step": 4350 }, { "epoch": 2.05, "learning_rate": 2.8167247386759586e-05, "loss": 0.0398, "step": 4360 }, { "epoch": 2.06, "learning_rate": 2.8027874564459932e-05, "loss": 0.0443, "step": 4370 }, { "epoch": 2.06, "learning_rate": 2.7888501742160282e-05, "loss": 0.0451, "step": 4380 }, { "epoch": 2.06, "learning_rate": 2.774912891986063e-05, "loss": 0.0411, "step": 4390 }, { "epoch": 2.07, "learning_rate": 2.7609756097560974e-05, "loss": 0.0524, "step": 4400 }, { "epoch": 2.07, "learning_rate": 2.7470383275261324e-05, "loss": 0.0449, "step": 4410 }, { "epoch": 2.08, "learning_rate": 2.7331010452961674e-05, "loss": 0.0405, "step": 4420 }, { "epoch": 2.08, "learning_rate": 2.7191637630662023e-05, "loss": 0.0502, "step": 4430 }, { "epoch": 2.09, "learning_rate": 2.705226480836237e-05, "loss": 0.0444, "step": 4440 }, { "epoch": 2.09, "learning_rate": 2.691289198606272e-05, "loss": 0.0371, "step": 4450 }, { "epoch": 2.1, "learning_rate": 2.677351916376307e-05, "loss": 0.0485, "step": 4460 }, { "epoch": 2.1, "learning_rate": 2.663414634146342e-05, "loss": 0.0444, "step": 4470 }, { "epoch": 2.11, "learning_rate": 2.6494773519163765e-05, "loss": 0.0392, "step": 4480 }, { "epoch": 2.11, "learning_rate": 2.6355400696864114e-05, "loss": 0.0461, "step": 4490 }, { "epoch": 2.12, "learning_rate": 2.6216027874564464e-05, "loss": 0.0459, "step": 4500 }, { "epoch": 2.12, "learning_rate": 2.6076655052264814e-05, "loss": 0.0537, "step": 4510 }, { "epoch": 2.13, "learning_rate": 2.5937282229965156e-05, "loss": 0.0409, "step": 4520 }, { "epoch": 2.13, "learning_rate": 2.5797909407665506e-05, "loss": 0.0446, "step": 4530 }, { "epoch": 2.14, "learning_rate": 2.5658536585365856e-05, "loss": 0.0512, "step": 4540 }, { "epoch": 2.14, "learning_rate": 2.5519163763066205e-05, "loss": 0.0475, "step": 4550 }, { "epoch": 2.14, "learning_rate": 2.537979094076655e-05, "loss": 0.045, "step": 4560 }, { "epoch": 2.15, "learning_rate": 2.52404181184669e-05, "loss": 0.0443, "step": 4570 }, { "epoch": 2.15, "learning_rate": 2.510104529616725e-05, "loss": 0.0409, "step": 4580 }, { "epoch": 2.16, "learning_rate": 2.49616724738676e-05, "loss": 0.0511, "step": 4590 }, { "epoch": 2.16, "learning_rate": 2.4822299651567943e-05, "loss": 0.0429, "step": 4600 }, { "epoch": 2.17, "learning_rate": 2.4682926829268293e-05, "loss": 0.0407, "step": 4610 }, { "epoch": 2.17, "learning_rate": 2.4543554006968642e-05, "loss": 0.0422, "step": 4620 }, { "epoch": 2.18, "learning_rate": 2.4404181184668992e-05, "loss": 0.0381, "step": 4630 }, { "epoch": 2.18, "learning_rate": 2.426480836236934e-05, "loss": 0.0382, "step": 4640 }, { "epoch": 2.19, "learning_rate": 2.4125435540069688e-05, "loss": 0.044, "step": 4650 }, { "epoch": 2.19, "learning_rate": 2.3986062717770038e-05, "loss": 0.0462, "step": 4660 }, { "epoch": 2.2, "learning_rate": 2.3846689895470387e-05, "loss": 0.0453, "step": 4670 }, { "epoch": 2.2, "learning_rate": 2.3707317073170733e-05, "loss": 0.0449, "step": 4680 }, { "epoch": 2.21, "learning_rate": 2.3567944250871083e-05, "loss": 0.0404, "step": 4690 }, { "epoch": 2.21, "learning_rate": 2.3428571428571433e-05, "loss": 0.0375, "step": 4700 }, { "epoch": 2.22, "learning_rate": 2.3289198606271776e-05, "loss": 0.0422, "step": 4710 }, { "epoch": 2.22, "learning_rate": 2.3149825783972125e-05, "loss": 0.0448, "step": 4720 }, { "epoch": 2.22, "learning_rate": 2.3010452961672475e-05, "loss": 0.0368, "step": 4730 }, { "epoch": 2.23, "learning_rate": 2.2871080139372824e-05, "loss": 0.0477, "step": 4740 }, { "epoch": 2.23, "learning_rate": 2.273170731707317e-05, "loss": 0.0418, "step": 4750 }, { "epoch": 2.24, "learning_rate": 2.259233449477352e-05, "loss": 0.0372, "step": 4760 }, { "epoch": 2.24, "learning_rate": 2.245296167247387e-05, "loss": 0.0428, "step": 4770 }, { "epoch": 2.25, "learning_rate": 2.231358885017422e-05, "loss": 0.0353, "step": 4780 }, { "epoch": 2.25, "learning_rate": 2.2174216027874566e-05, "loss": 0.046, "step": 4790 }, { "epoch": 2.26, "learning_rate": 2.2034843205574915e-05, "loss": 0.042, "step": 4800 }, { "epoch": 2.26, "learning_rate": 2.1895470383275265e-05, "loss": 0.0436, "step": 4810 }, { "epoch": 2.27, "learning_rate": 2.1756097560975615e-05, "loss": 0.0398, "step": 4820 }, { "epoch": 2.27, "learning_rate": 2.1616724738675958e-05, "loss": 0.0396, "step": 4830 }, { "epoch": 2.28, "learning_rate": 2.1477351916376307e-05, "loss": 0.0474, "step": 4840 }, { "epoch": 2.28, "learning_rate": 2.1337979094076657e-05, "loss": 0.0405, "step": 4850 }, { "epoch": 2.29, "learning_rate": 2.1198606271777006e-05, "loss": 0.0473, "step": 4860 }, { "epoch": 2.29, "learning_rate": 2.1059233449477353e-05, "loss": 0.044, "step": 4870 }, { "epoch": 2.3, "learning_rate": 2.0919860627177702e-05, "loss": 0.0388, "step": 4880 }, { "epoch": 2.3, "learning_rate": 2.0780487804878052e-05, "loss": 0.0403, "step": 4890 }, { "epoch": 2.3, "learning_rate": 2.06411149825784e-05, "loss": 0.041, "step": 4900 }, { "epoch": 2.31, "learning_rate": 2.0501742160278744e-05, "loss": 0.0429, "step": 4910 }, { "epoch": 2.31, "learning_rate": 2.0362369337979094e-05, "loss": 0.0363, "step": 4920 }, { "epoch": 2.32, "learning_rate": 2.0222996515679444e-05, "loss": 0.0414, "step": 4930 }, { "epoch": 2.32, "learning_rate": 2.0083623693379793e-05, "loss": 0.0515, "step": 4940 }, { "epoch": 2.33, "learning_rate": 1.9944250871080143e-05, "loss": 0.0447, "step": 4950 }, { "epoch": 2.33, "learning_rate": 1.980487804878049e-05, "loss": 0.0545, "step": 4960 }, { "epoch": 2.34, "learning_rate": 1.966550522648084e-05, "loss": 0.0416, "step": 4970 }, { "epoch": 2.34, "learning_rate": 1.9526132404181185e-05, "loss": 0.0387, "step": 4980 }, { "epoch": 2.35, "learning_rate": 1.9386759581881535e-05, "loss": 0.0332, "step": 4990 }, { "epoch": 2.35, "learning_rate": 1.9247386759581884e-05, "loss": 0.0409, "step": 5000 }, { "epoch": 2.36, "learning_rate": 1.910801393728223e-05, "loss": 0.0371, "step": 5010 }, { "epoch": 2.36, "learning_rate": 1.896864111498258e-05, "loss": 0.0448, "step": 5020 }, { "epoch": 2.37, "learning_rate": 1.8829268292682926e-05, "loss": 0.0517, "step": 5030 }, { "epoch": 2.37, "learning_rate": 1.8689895470383276e-05, "loss": 0.0436, "step": 5040 }, { "epoch": 2.38, "learning_rate": 1.8550522648083626e-05, "loss": 0.0394, "step": 5050 }, { "epoch": 2.38, "learning_rate": 1.8411149825783975e-05, "loss": 0.046, "step": 5060 }, { "epoch": 2.38, "learning_rate": 1.827177700348432e-05, "loss": 0.0414, "step": 5070 }, { "epoch": 2.39, "learning_rate": 1.813240418118467e-05, "loss": 0.0338, "step": 5080 }, { "epoch": 2.39, "learning_rate": 1.7993031358885017e-05, "loss": 0.046, "step": 5090 }, { "epoch": 2.4, "learning_rate": 1.7853658536585367e-05, "loss": 0.0481, "step": 5100 }, { "epoch": 2.4, "learning_rate": 1.7714285714285717e-05, "loss": 0.0428, "step": 5110 }, { "epoch": 2.41, "learning_rate": 1.7574912891986066e-05, "loss": 0.0365, "step": 5120 }, { "epoch": 2.41, "learning_rate": 1.7435540069686412e-05, "loss": 0.046, "step": 5130 }, { "epoch": 2.42, "learning_rate": 1.7296167247386762e-05, "loss": 0.0357, "step": 5140 }, { "epoch": 2.42, "learning_rate": 1.7156794425087108e-05, "loss": 0.0387, "step": 5150 }, { "epoch": 2.43, "learning_rate": 1.7017421602787458e-05, "loss": 0.0364, "step": 5160 }, { "epoch": 2.43, "learning_rate": 1.6878048780487804e-05, "loss": 0.041, "step": 5170 }, { "epoch": 2.44, "learning_rate": 1.6738675958188154e-05, "loss": 0.0431, "step": 5180 }, { "epoch": 2.44, "learning_rate": 1.6599303135888503e-05, "loss": 0.0456, "step": 5190 }, { "epoch": 2.45, "learning_rate": 1.6459930313588853e-05, "loss": 0.0446, "step": 5200 }, { "epoch": 2.45, "learning_rate": 1.63205574912892e-05, "loss": 0.0349, "step": 5210 }, { "epoch": 2.46, "learning_rate": 1.618118466898955e-05, "loss": 0.0405, "step": 5220 }, { "epoch": 2.46, "learning_rate": 1.6041811846689895e-05, "loss": 0.0447, "step": 5230 }, { "epoch": 2.46, "learning_rate": 1.5902439024390245e-05, "loss": 0.0359, "step": 5240 }, { "epoch": 2.47, "learning_rate": 1.5763066202090594e-05, "loss": 0.0391, "step": 5250 }, { "epoch": 2.47, "learning_rate": 1.5623693379790944e-05, "loss": 0.0412, "step": 5260 }, { "epoch": 2.48, "learning_rate": 1.548432055749129e-05, "loss": 0.0384, "step": 5270 }, { "epoch": 2.48, "learning_rate": 1.534494773519164e-05, "loss": 0.0417, "step": 5280 }, { "epoch": 2.49, "learning_rate": 1.5205574912891988e-05, "loss": 0.0442, "step": 5290 }, { "epoch": 2.49, "learning_rate": 1.5066202090592337e-05, "loss": 0.0462, "step": 5300 }, { "epoch": 2.5, "learning_rate": 1.4926829268292684e-05, "loss": 0.0357, "step": 5310 }, { "epoch": 2.5, "learning_rate": 1.4787456445993033e-05, "loss": 0.0431, "step": 5320 }, { "epoch": 2.51, "learning_rate": 1.4648083623693381e-05, "loss": 0.0464, "step": 5330 }, { "epoch": 2.51, "learning_rate": 1.4508710801393729e-05, "loss": 0.0324, "step": 5340 }, { "epoch": 2.52, "learning_rate": 1.4369337979094079e-05, "loss": 0.046, "step": 5350 }, { "epoch": 2.52, "learning_rate": 1.4229965156794425e-05, "loss": 0.0494, "step": 5360 }, { "epoch": 2.53, "learning_rate": 1.4090592334494775e-05, "loss": 0.0406, "step": 5370 }, { "epoch": 2.53, "learning_rate": 1.3951219512195122e-05, "loss": 0.0474, "step": 5380 }, { "epoch": 2.54, "learning_rate": 1.3811846689895472e-05, "loss": 0.0434, "step": 5390 }, { "epoch": 2.54, "learning_rate": 1.3672473867595818e-05, "loss": 0.0502, "step": 5400 }, { "epoch": 2.54, "learning_rate": 1.3533101045296168e-05, "loss": 0.0461, "step": 5410 }, { "epoch": 2.55, "learning_rate": 1.3393728222996516e-05, "loss": 0.0414, "step": 5420 }, { "epoch": 2.55, "learning_rate": 1.3254355400696866e-05, "loss": 0.0386, "step": 5430 }, { "epoch": 2.56, "learning_rate": 1.3114982578397213e-05, "loss": 0.0377, "step": 5440 }, { "epoch": 2.56, "learning_rate": 1.2975609756097563e-05, "loss": 0.0501, "step": 5450 }, { "epoch": 2.57, "learning_rate": 1.283623693379791e-05, "loss": 0.0433, "step": 5460 }, { "epoch": 2.57, "learning_rate": 1.2696864111498259e-05, "loss": 0.0405, "step": 5470 }, { "epoch": 2.58, "learning_rate": 1.2557491289198607e-05, "loss": 0.044, "step": 5480 }, { "epoch": 2.58, "learning_rate": 1.2418118466898957e-05, "loss": 0.0463, "step": 5490 }, { "epoch": 2.59, "learning_rate": 1.2278745644599304e-05, "loss": 0.0442, "step": 5500 }, { "epoch": 2.59, "learning_rate": 1.2139372822299652e-05, "loss": 0.0422, "step": 5510 }, { "epoch": 2.6, "learning_rate": 1.2e-05, "loss": 0.0366, "step": 5520 }, { "epoch": 2.6, "learning_rate": 1.186062717770035e-05, "loss": 0.0283, "step": 5530 }, { "epoch": 2.61, "learning_rate": 1.1721254355400698e-05, "loss": 0.034, "step": 5540 }, { "epoch": 2.61, "learning_rate": 1.1581881533101047e-05, "loss": 0.0441, "step": 5550 }, { "epoch": 2.62, "learning_rate": 1.1442508710801394e-05, "loss": 0.0364, "step": 5560 }, { "epoch": 2.62, "learning_rate": 1.1303135888501743e-05, "loss": 0.0535, "step": 5570 }, { "epoch": 2.62, "learning_rate": 1.1163763066202091e-05, "loss": 0.0502, "step": 5580 }, { "epoch": 2.63, "learning_rate": 1.1024390243902441e-05, "loss": 0.0441, "step": 5590 }, { "epoch": 2.63, "learning_rate": 1.0885017421602789e-05, "loss": 0.0359, "step": 5600 }, { "epoch": 2.64, "learning_rate": 1.0745644599303138e-05, "loss": 0.0343, "step": 5610 }, { "epoch": 2.64, "learning_rate": 1.0606271777003485e-05, "loss": 0.0461, "step": 5620 }, { "epoch": 2.65, "learning_rate": 1.0466898954703834e-05, "loss": 0.0396, "step": 5630 }, { "epoch": 2.65, "learning_rate": 1.0327526132404182e-05, "loss": 0.0348, "step": 5640 }, { "epoch": 2.66, "learning_rate": 1.018815331010453e-05, "loss": 0.0428, "step": 5650 }, { "epoch": 2.66, "learning_rate": 1.0048780487804878e-05, "loss": 0.0394, "step": 5660 }, { "epoch": 2.67, "learning_rate": 9.909407665505228e-06, "loss": 0.0348, "step": 5670 }, { "epoch": 2.67, "learning_rate": 9.770034843205576e-06, "loss": 0.0484, "step": 5680 }, { "epoch": 2.68, "learning_rate": 9.630662020905924e-06, "loss": 0.0452, "step": 5690 }, { "epoch": 2.68, "learning_rate": 9.491289198606273e-06, "loss": 0.0365, "step": 5700 }, { "epoch": 2.69, "learning_rate": 9.351916376306621e-06, "loss": 0.0365, "step": 5710 }, { "epoch": 2.69, "learning_rate": 9.212543554006969e-06, "loss": 0.0368, "step": 5720 }, { "epoch": 2.7, "learning_rate": 9.073170731707319e-06, "loss": 0.051, "step": 5730 }, { "epoch": 2.7, "learning_rate": 8.933797909407667e-06, "loss": 0.0454, "step": 5740 }, { "epoch": 2.7, "learning_rate": 8.794425087108015e-06, "loss": 0.0429, "step": 5750 }, { "epoch": 2.71, "learning_rate": 8.655052264808364e-06, "loss": 0.0384, "step": 5760 }, { "epoch": 2.71, "learning_rate": 8.515679442508712e-06, "loss": 0.0391, "step": 5770 }, { "epoch": 2.72, "learning_rate": 8.37630662020906e-06, "loss": 0.0351, "step": 5780 }, { "epoch": 2.72, "learning_rate": 8.23693379790941e-06, "loss": 0.05, "step": 5790 }, { "epoch": 2.73, "learning_rate": 8.097560975609758e-06, "loss": 0.0361, "step": 5800 }, { "epoch": 2.73, "learning_rate": 7.958188153310104e-06, "loss": 0.0365, "step": 5810 }, { "epoch": 2.74, "learning_rate": 7.818815331010453e-06, "loss": 0.0414, "step": 5820 }, { "epoch": 2.74, "learning_rate": 7.679442508710801e-06, "loss": 0.0513, "step": 5830 }, { "epoch": 2.75, "learning_rate": 7.54006968641115e-06, "loss": 0.0406, "step": 5840 }, { "epoch": 2.75, "learning_rate": 7.400696864111498e-06, "loss": 0.0449, "step": 5850 }, { "epoch": 2.76, "learning_rate": 7.261324041811847e-06, "loss": 0.0413, "step": 5860 }, { "epoch": 2.76, "learning_rate": 7.121951219512196e-06, "loss": 0.0389, "step": 5870 }, { "epoch": 2.77, "learning_rate": 6.982578397212544e-06, "loss": 0.0362, "step": 5880 }, { "epoch": 2.77, "learning_rate": 6.843205574912892e-06, "loss": 0.0418, "step": 5890 }, { "epoch": 2.78, "learning_rate": 6.703832752613241e-06, "loss": 0.0365, "step": 5900 }, { "epoch": 2.78, "learning_rate": 6.564459930313589e-06, "loss": 0.0377, "step": 5910 }, { "epoch": 2.78, "learning_rate": 6.425087108013938e-06, "loss": 0.0476, "step": 5920 }, { "epoch": 2.79, "learning_rate": 6.285714285714286e-06, "loss": 0.0453, "step": 5930 }, { "epoch": 2.79, "learning_rate": 6.1463414634146346e-06, "loss": 0.0412, "step": 5940 }, { "epoch": 2.8, "learning_rate": 6.006968641114983e-06, "loss": 0.0493, "step": 5950 }, { "epoch": 2.8, "learning_rate": 5.867595818815331e-06, "loss": 0.0429, "step": 5960 }, { "epoch": 2.81, "learning_rate": 5.72822299651568e-06, "loss": 0.0406, "step": 5970 }, { "epoch": 2.81, "learning_rate": 5.588850174216028e-06, "loss": 0.0423, "step": 5980 }, { "epoch": 2.82, "learning_rate": 5.449477351916377e-06, "loss": 0.0353, "step": 5990 }, { "epoch": 2.82, "learning_rate": 5.3101045296167255e-06, "loss": 0.0362, "step": 6000 }, { "epoch": 2.83, "learning_rate": 5.1707317073170735e-06, "loss": 0.0478, "step": 6010 }, { "epoch": 2.83, "learning_rate": 5.031358885017422e-06, "loss": 0.0431, "step": 6020 }, { "epoch": 2.84, "learning_rate": 4.891986062717771e-06, "loss": 0.0354, "step": 6030 }, { "epoch": 2.84, "learning_rate": 4.752613240418119e-06, "loss": 0.0414, "step": 6040 }, { "epoch": 2.85, "learning_rate": 4.613240418118467e-06, "loss": 0.0397, "step": 6050 }, { "epoch": 2.85, "learning_rate": 4.473867595818816e-06, "loss": 0.0351, "step": 6060 }, { "epoch": 2.86, "learning_rate": 4.334494773519164e-06, "loss": 0.0331, "step": 6070 }, { "epoch": 2.86, "learning_rate": 4.195121951219512e-06, "loss": 0.0423, "step": 6080 }, { "epoch": 2.86, "learning_rate": 4.055749128919861e-06, "loss": 0.042, "step": 6090 }, { "epoch": 2.87, "learning_rate": 3.916376306620209e-06, "loss": 0.0285, "step": 6100 }, { "epoch": 2.87, "learning_rate": 3.777003484320558e-06, "loss": 0.0447, "step": 6110 }, { "epoch": 2.88, "learning_rate": 3.6376306620209062e-06, "loss": 0.0389, "step": 6120 }, { "epoch": 2.88, "learning_rate": 3.4982578397212546e-06, "loss": 0.0395, "step": 6130 }, { "epoch": 2.89, "learning_rate": 3.358885017421603e-06, "loss": 0.0364, "step": 6140 }, { "epoch": 2.89, "learning_rate": 3.2195121951219517e-06, "loss": 0.054, "step": 6150 }, { "epoch": 2.9, "learning_rate": 3.0801393728223e-06, "loss": 0.0442, "step": 6160 }, { "epoch": 2.9, "learning_rate": 2.9407665505226484e-06, "loss": 0.0413, "step": 6170 }, { "epoch": 2.91, "learning_rate": 2.8013937282229968e-06, "loss": 0.0406, "step": 6180 }, { "epoch": 2.91, "learning_rate": 2.6620209059233455e-06, "loss": 0.047, "step": 6190 }, { "epoch": 2.92, "learning_rate": 2.522648083623694e-06, "loss": 0.0382, "step": 6200 }, { "epoch": 2.92, "learning_rate": 2.383275261324042e-06, "loss": 0.043, "step": 6210 }, { "epoch": 2.93, "learning_rate": 2.2439024390243906e-06, "loss": 0.0435, "step": 6220 }, { "epoch": 2.93, "learning_rate": 2.104529616724739e-06, "loss": 0.0436, "step": 6230 }, { "epoch": 2.94, "learning_rate": 1.9651567944250873e-06, "loss": 0.0472, "step": 6240 }, { "epoch": 2.94, "learning_rate": 1.8257839721254357e-06, "loss": 0.0368, "step": 6250 }, { "epoch": 2.94, "learning_rate": 1.686411149825784e-06, "loss": 0.038, "step": 6260 }, { "epoch": 2.95, "learning_rate": 1.5470383275261324e-06, "loss": 0.0437, "step": 6270 }, { "epoch": 2.95, "learning_rate": 1.407665505226481e-06, "loss": 0.0353, "step": 6280 }, { "epoch": 2.96, "learning_rate": 1.2682926829268293e-06, "loss": 0.044, "step": 6290 }, { "epoch": 2.96, "learning_rate": 1.1289198606271779e-06, "loss": 0.0358, "step": 6300 }, { "epoch": 2.97, "learning_rate": 9.895470383275262e-07, "loss": 0.0378, "step": 6310 }, { "epoch": 2.97, "learning_rate": 8.501742160278746e-07, "loss": 0.0346, "step": 6320 }, { "epoch": 2.98, "learning_rate": 7.10801393728223e-07, "loss": 0.0456, "step": 6330 }, { "epoch": 2.98, "learning_rate": 5.714285714285715e-07, "loss": 0.0494, "step": 6340 }, { "epoch": 2.99, "learning_rate": 4.320557491289199e-07, "loss": 0.0406, "step": 6350 }, { "epoch": 2.99, "learning_rate": 2.926829268292683e-07, "loss": 0.0386, "step": 6360 }, { "epoch": 3.0, "learning_rate": 1.5331010452961674e-07, "loss": 0.0415, "step": 6370 }, { "epoch": 3.0, "eval_accuracy": 0.9769126125154315, "eval_f1": 0.7849694196330357, "eval_loss": 0.06933891773223877, "eval_precision": 0.7739696312364425, "eval_recall": 0.7962863774326013, "eval_runtime": 16.4386, "eval_samples_per_second": 861.385, "eval_steps_per_second": 107.673, "step": 6378 }, { "epoch": 3.0, "step": 6378, "total_flos": 1.2518895383371872e+16, "train_loss": 0.06623676680927928, "train_runtime": 577.8703, "train_samples_per_second": 529.624, "train_steps_per_second": 11.037 } ], "logging_steps": 10, "max_steps": 6378, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.2518895383371872e+16, "trial_name": null, "trial_params": null }