{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8889547373879547, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9999729068921297e-05, "loss": 1.8898, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.9998916281557476e-05, "loss": 1.7273, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.999756165552527e-05, "loss": 1.6799, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.999566522018553e-05, "loss": 1.6431, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.999322701664249e-05, "loss": 1.6153, "step": 50 }, { "epoch": 0.02, "learning_rate": 4.9990247097742984e-05, "loss": 1.5933, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.9986725528075205e-05, "loss": 1.5913, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.998266238396737e-05, "loss": 1.5434, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.997805775348605e-05, "loss": 1.5304, "step": 90 }, { "epoch": 0.03, "learning_rate": 4.997291173643424e-05, "loss": 1.5531, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.996722444434921e-05, "loss": 1.5446, "step": 110 }, { "epoch": 0.04, "learning_rate": 4.99609960005001e-05, "loss": 1.5352, "step": 120 }, { "epoch": 0.04, "learning_rate": 4.995422653988524e-05, "loss": 1.5303, "step": 130 }, { "epoch": 0.04, "learning_rate": 4.994691620922919e-05, "loss": 1.5449, "step": 140 }, { "epoch": 0.04, "learning_rate": 4.993906516697964e-05, "loss": 1.5114, "step": 150 }, { "epoch": 0.05, "learning_rate": 4.9930673583303865e-05, "loss": 1.5043, "step": 160 }, { "epoch": 0.05, "learning_rate": 4.992174164008515e-05, "loss": 1.5476, "step": 170 }, { "epoch": 0.05, "learning_rate": 4.991226953091877e-05, "loss": 1.5107, "step": 180 }, { "epoch": 0.06, "learning_rate": 4.9902257461107824e-05, "loss": 1.5104, "step": 190 }, { "epoch": 0.06, "learning_rate": 4.9891705647658795e-05, "loss": 1.5298, "step": 200 }, { "epoch": 0.06, "learning_rate": 4.988061431927681e-05, "loss": 1.4907, "step": 210 }, { "epoch": 0.07, "learning_rate": 4.986898371636071e-05, "loss": 1.5127, "step": 220 }, { "epoch": 0.07, "learning_rate": 4.985681409099784e-05, "loss": 1.5037, "step": 230 }, { "epoch": 0.07, "learning_rate": 4.984410570695858e-05, "loss": 1.5029, "step": 240 }, { "epoch": 0.07, "learning_rate": 4.983085883969063e-05, "loss": 1.4725, "step": 250 }, { "epoch": 0.08, "learning_rate": 4.981707377631303e-05, "loss": 1.5148, "step": 260 }, { "epoch": 0.08, "learning_rate": 4.9802750815609936e-05, "loss": 1.4993, "step": 270 }, { "epoch": 0.08, "learning_rate": 4.978789026802419e-05, "loss": 1.5006, "step": 280 }, { "epoch": 0.09, "learning_rate": 4.9772492455650494e-05, "loss": 1.4885, "step": 290 }, { "epoch": 0.09, "learning_rate": 4.975655771222855e-05, "loss": 1.4898, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.9740086383135706e-05, "loss": 1.4906, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.97230788253796e-05, "loss": 1.4796, "step": 320 }, { "epoch": 0.1, "learning_rate": 4.970553540759028e-05, "loss": 1.4861, "step": 330 }, { "epoch": 0.1, "learning_rate": 4.968745651001231e-05, "loss": 1.4827, "step": 340 }, { "epoch": 0.1, "learning_rate": 4.9668842524496526e-05, "loss": 1.4884, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.964969385449149e-05, "loss": 1.4873, "step": 360 }, { "epoch": 0.11, "learning_rate": 4.96300109150348e-05, "loss": 1.4848, "step": 370 }, { "epoch": 0.11, "learning_rate": 4.960979413274404e-05, "loss": 1.4881, "step": 380 }, { "epoch": 0.12, "learning_rate": 4.9589043945807594e-05, "loss": 1.4618, "step": 390 }, { "epoch": 0.12, "learning_rate": 4.9567760803975105e-05, "loss": 1.4858, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.954594516854773e-05, "loss": 1.4777, "step": 410 }, { "epoch": 0.12, "learning_rate": 4.952359751236817e-05, "loss": 1.4828, "step": 420 }, { "epoch": 0.13, "learning_rate": 4.950071831981038e-05, "loss": 1.4571, "step": 430 }, { "epoch": 0.13, "learning_rate": 4.9477308086769117e-05, "loss": 1.4724, "step": 440 }, { "epoch": 0.13, "learning_rate": 4.945336732064915e-05, "loss": 1.4771, "step": 450 }, { "epoch": 0.14, "learning_rate": 4.9428896540354294e-05, "loss": 1.4604, "step": 460 }, { "epoch": 0.14, "learning_rate": 4.940389627627613e-05, "loss": 1.4815, "step": 470 }, { "epoch": 0.14, "learning_rate": 4.937836707028255e-05, "loss": 1.4859, "step": 480 }, { "epoch": 0.15, "learning_rate": 4.935230947570597e-05, "loss": 1.4715, "step": 490 }, { "epoch": 0.15, "learning_rate": 4.932572405733137e-05, "loss": 1.4759, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.929861139138404e-05, "loss": 1.4678, "step": 510 }, { "epoch": 0.15, "learning_rate": 4.9270972065517083e-05, "loss": 1.4754, "step": 520 }, { "epoch": 0.16, "learning_rate": 4.924280667879869e-05, "loss": 1.462, "step": 530 }, { "epoch": 0.16, "learning_rate": 4.921411584169915e-05, "loss": 1.4704, "step": 540 }, { "epoch": 0.16, "learning_rate": 4.918490017607761e-05, "loss": 1.4661, "step": 550 }, { "epoch": 0.17, "learning_rate": 4.915516031516863e-05, "loss": 1.471, "step": 560 }, { "epoch": 0.17, "learning_rate": 4.912489690356841e-05, "loss": 1.451, "step": 570 }, { "epoch": 0.17, "learning_rate": 4.909411059722084e-05, "loss": 1.4411, "step": 580 }, { "epoch": 0.17, "learning_rate": 4.9062802063403316e-05, "loss": 1.456, "step": 590 }, { "epoch": 0.18, "learning_rate": 4.90309719807122e-05, "loss": 1.4678, "step": 600 }, { "epoch": 0.18, "learning_rate": 4.8998621039048205e-05, "loss": 1.479, "step": 610 }, { "epoch": 0.18, "learning_rate": 4.896574993960136e-05, "loss": 1.4471, "step": 620 }, { "epoch": 0.19, "learning_rate": 4.893235939483587e-05, "loss": 1.453, "step": 630 }, { "epoch": 0.19, "learning_rate": 4.8898450128474626e-05, "loss": 1.4696, "step": 640 }, { "epoch": 0.19, "learning_rate": 4.886402287548357e-05, "loss": 1.4526, "step": 650 }, { "epoch": 0.2, "learning_rate": 4.8829078382055725e-05, "loss": 1.4429, "step": 660 }, { "epoch": 0.2, "learning_rate": 4.8793617405595025e-05, "loss": 1.4491, "step": 670 }, { "epoch": 0.2, "learning_rate": 4.8757640714699924e-05, "loss": 1.4411, "step": 680 }, { "epoch": 0.2, "learning_rate": 4.872114908914671e-05, "loss": 1.4543, "step": 690 }, { "epoch": 0.21, "learning_rate": 4.8684143319872636e-05, "loss": 1.4556, "step": 700 }, { "epoch": 0.21, "learning_rate": 4.864662420895873e-05, "loss": 1.4506, "step": 710 }, { "epoch": 0.21, "learning_rate": 4.860859256961244e-05, "loss": 1.4671, "step": 720 }, { "epoch": 0.22, "learning_rate": 4.857004922615002e-05, "loss": 1.4469, "step": 730 }, { "epoch": 0.22, "learning_rate": 4.8530995013978645e-05, "loss": 1.4554, "step": 740 }, { "epoch": 0.22, "learning_rate": 4.84914307795783e-05, "loss": 1.4671, "step": 750 }, { "epoch": 0.23, "learning_rate": 4.845135738048343e-05, "loss": 1.445, "step": 760 }, { "epoch": 0.23, "learning_rate": 4.841077568526439e-05, "loss": 1.4469, "step": 770 }, { "epoch": 0.23, "learning_rate": 4.836968657350857e-05, "loss": 1.4677, "step": 780 }, { "epoch": 0.23, "learning_rate": 4.832809093580135e-05, "loss": 1.4653, "step": 790 }, { "epoch": 0.24, "learning_rate": 4.8285989673706826e-05, "loss": 1.4342, "step": 800 }, { "epoch": 0.24, "learning_rate": 4.824338369974822e-05, "loss": 1.458, "step": 810 }, { "epoch": 0.24, "learning_rate": 4.8200273937388126e-05, "loss": 1.4541, "step": 820 }, { "epoch": 0.25, "learning_rate": 4.81566613210085e-05, "loss": 1.4324, "step": 830 }, { "epoch": 0.25, "learning_rate": 4.81125467958904e-05, "loss": 1.4405, "step": 840 }, { "epoch": 0.25, "learning_rate": 4.80679313181935e-05, "loss": 1.4408, "step": 850 }, { "epoch": 0.25, "learning_rate": 4.8022815854935356e-05, "loss": 1.4395, "step": 860 }, { "epoch": 0.26, "learning_rate": 4.797720138397045e-05, "loss": 1.4359, "step": 870 }, { "epoch": 0.26, "learning_rate": 4.793108889396902e-05, "loss": 1.442, "step": 880 }, { "epoch": 0.26, "learning_rate": 4.7884479384395594e-05, "loss": 1.4566, "step": 890 }, { "epoch": 0.27, "learning_rate": 4.7837373865487345e-05, "loss": 1.4257, "step": 900 }, { "epoch": 0.27, "learning_rate": 4.77897733582322e-05, "loss": 1.4755, "step": 910 }, { "epoch": 0.27, "learning_rate": 4.774167889434671e-05, "loss": 1.4476, "step": 920 }, { "epoch": 0.28, "learning_rate": 4.769309151625366e-05, "loss": 1.4531, "step": 930 }, { "epoch": 0.28, "learning_rate": 4.7644012277059516e-05, "loss": 1.447, "step": 940 }, { "epoch": 0.28, "learning_rate": 4.7594442240531574e-05, "loss": 1.4201, "step": 950 }, { "epoch": 0.28, "learning_rate": 4.754438248107491e-05, "loss": 1.4323, "step": 960 }, { "epoch": 0.29, "learning_rate": 4.7493834083709104e-05, "loss": 1.4432, "step": 970 }, { "epoch": 0.29, "learning_rate": 4.7442798144044695e-05, "loss": 1.4339, "step": 980 }, { "epoch": 0.29, "learning_rate": 4.739127576825945e-05, "loss": 1.4477, "step": 990 }, { "epoch": 0.3, "learning_rate": 4.733926807307441e-05, "loss": 1.4242, "step": 1000 }, { "epoch": 0.3, "learning_rate": 4.728677618572965e-05, "loss": 1.4341, "step": 1010 }, { "epoch": 0.3, "learning_rate": 4.723380124395985e-05, "loss": 1.4526, "step": 1020 }, { "epoch": 0.31, "learning_rate": 4.7180344395969675e-05, "loss": 1.4402, "step": 1030 }, { "epoch": 0.31, "learning_rate": 4.712640680040884e-05, "loss": 1.4257, "step": 1040 }, { "epoch": 0.31, "learning_rate": 4.707198962634701e-05, "loss": 1.4232, "step": 1050 }, { "epoch": 0.31, "learning_rate": 4.70170940532485e-05, "loss": 1.4485, "step": 1060 }, { "epoch": 0.32, "learning_rate": 4.6961721270946635e-05, "loss": 1.456, "step": 1070 }, { "epoch": 0.32, "learning_rate": 4.690587247961804e-05, "loss": 1.4555, "step": 1080 }, { "epoch": 0.32, "learning_rate": 4.684954888975657e-05, "loss": 1.4376, "step": 1090 }, { "epoch": 0.33, "learning_rate": 4.6792751722147104e-05, "loss": 1.4353, "step": 1100 }, { "epoch": 0.33, "learning_rate": 4.6735482207839074e-05, "loss": 1.4226, "step": 1110 }, { "epoch": 0.33, "learning_rate": 4.6677741588119784e-05, "loss": 1.4315, "step": 1120 }, { "epoch": 0.33, "learning_rate": 4.66195311144875e-05, "loss": 1.4303, "step": 1130 }, { "epoch": 0.34, "learning_rate": 4.6560852048624345e-05, "loss": 1.4288, "step": 1140 }, { "epoch": 0.34, "learning_rate": 4.650170566236892e-05, "loss": 1.4539, "step": 1150 }, { "epoch": 0.34, "learning_rate": 4.6442093237688756e-05, "loss": 1.4527, "step": 1160 }, { "epoch": 0.35, "learning_rate": 4.6382016066652556e-05, "loss": 1.4406, "step": 1170 }, { "epoch": 0.35, "learning_rate": 4.632147545140212e-05, "loss": 1.4233, "step": 1180 }, { "epoch": 0.35, "learning_rate": 4.626047270412419e-05, "loss": 1.426, "step": 1190 }, { "epoch": 0.36, "learning_rate": 4.619900914702198e-05, "loss": 1.4577, "step": 1200 }, { "epoch": 0.36, "learning_rate": 4.613708611228652e-05, "loss": 1.4313, "step": 1210 }, { "epoch": 0.36, "learning_rate": 4.607470494206776e-05, "loss": 1.4129, "step": 1220 }, { "epoch": 0.36, "learning_rate": 4.601186698844554e-05, "loss": 1.4368, "step": 1230 }, { "epoch": 0.37, "learning_rate": 4.594857361340021e-05, "loss": 1.4342, "step": 1240 }, { "epoch": 0.37, "learning_rate": 4.588482618878316e-05, "loss": 1.4438, "step": 1250 }, { "epoch": 0.37, "learning_rate": 4.582062609628709e-05, "loss": 1.4263, "step": 1260 }, { "epoch": 0.38, "learning_rate": 4.575597472741601e-05, "loss": 1.4379, "step": 1270 }, { "epoch": 0.38, "learning_rate": 4.569087348345512e-05, "loss": 1.4221, "step": 1280 }, { "epoch": 0.38, "learning_rate": 4.562532377544046e-05, "loss": 1.4414, "step": 1290 }, { "epoch": 0.39, "learning_rate": 4.5559327024128265e-05, "loss": 1.4395, "step": 1300 }, { "epoch": 0.39, "learning_rate": 4.549288465996421e-05, "loss": 1.4278, "step": 1310 }, { "epoch": 0.39, "learning_rate": 4.542599812305243e-05, "loss": 1.4344, "step": 1320 }, { "epoch": 0.39, "learning_rate": 4.535866886312423e-05, "loss": 1.4352, "step": 1330 }, { "epoch": 0.4, "learning_rate": 4.529089833950675e-05, "loss": 1.4133, "step": 1340 }, { "epoch": 0.4, "learning_rate": 4.5222688021091266e-05, "loss": 1.4506, "step": 1350 }, { "epoch": 0.4, "learning_rate": 4.5154039386301385e-05, "loss": 1.4295, "step": 1360 }, { "epoch": 0.41, "learning_rate": 4.5084953923061016e-05, "loss": 1.4389, "step": 1370 }, { "epoch": 0.41, "learning_rate": 4.5015433128762065e-05, "loss": 1.4247, "step": 1380 }, { "epoch": 0.41, "learning_rate": 4.494547851023205e-05, "loss": 1.4347, "step": 1390 }, { "epoch": 0.41, "learning_rate": 4.487509158370139e-05, "loss": 1.4133, "step": 1400 }, { "epoch": 0.42, "learning_rate": 4.480427387477056e-05, "loss": 1.4296, "step": 1410 }, { "epoch": 0.42, "learning_rate": 4.473302691837702e-05, "loss": 1.4353, "step": 1420 }, { "epoch": 0.42, "learning_rate": 4.466135225876194e-05, "loss": 1.4377, "step": 1430 }, { "epoch": 0.43, "learning_rate": 4.458925144943676e-05, "loss": 1.4168, "step": 1440 }, { "epoch": 0.43, "learning_rate": 4.451672605314948e-05, "loss": 1.4334, "step": 1450 }, { "epoch": 0.43, "learning_rate": 4.444377764185082e-05, "loss": 1.44, "step": 1460 }, { "epoch": 0.44, "learning_rate": 4.43704077966601e-05, "loss": 1.4375, "step": 1470 }, { "epoch": 0.44, "learning_rate": 4.4296618107831036e-05, "loss": 1.447, "step": 1480 }, { "epoch": 0.44, "learning_rate": 4.422241017471722e-05, "loss": 1.4151, "step": 1490 }, { "epoch": 0.44, "learning_rate": 4.414778560573749e-05, "loss": 1.4388, "step": 1500 }, { "epoch": 0.45, "learning_rate": 4.4072746018341036e-05, "loss": 1.4228, "step": 1510 }, { "epoch": 0.45, "learning_rate": 4.399729303897238e-05, "loss": 1.4104, "step": 1520 }, { "epoch": 0.45, "learning_rate": 4.392142830303608e-05, "loss": 1.4441, "step": 1530 }, { "epoch": 0.46, "learning_rate": 4.384515345486131e-05, "loss": 1.4282, "step": 1540 }, { "epoch": 0.46, "learning_rate": 4.376847014766623e-05, "loss": 1.4271, "step": 1550 }, { "epoch": 0.46, "learning_rate": 4.369138004352212e-05, "loss": 1.4223, "step": 1560 }, { "epoch": 0.47, "learning_rate": 4.3613884813317406e-05, "loss": 1.425, "step": 1570 }, { "epoch": 0.47, "learning_rate": 4.3535986136721377e-05, "loss": 1.4392, "step": 1580 }, { "epoch": 0.47, "learning_rate": 4.3457685702147834e-05, "loss": 1.4097, "step": 1590 }, { "epoch": 0.47, "learning_rate": 4.3378985206718484e-05, "loss": 1.4405, "step": 1600 }, { "epoch": 0.48, "learning_rate": 4.329988635622611e-05, "loss": 1.4311, "step": 1610 }, { "epoch": 0.48, "learning_rate": 4.322039086509769e-05, "loss": 1.4358, "step": 1620 }, { "epoch": 0.48, "learning_rate": 4.3140500456357145e-05, "loss": 1.4114, "step": 1630 }, { "epoch": 0.49, "learning_rate": 4.306021686158805e-05, "loss": 1.4165, "step": 1640 }, { "epoch": 0.49, "learning_rate": 4.297954182089609e-05, "loss": 1.4309, "step": 1650 }, { "epoch": 0.49, "learning_rate": 4.289847708287129e-05, "loss": 1.4215, "step": 1660 }, { "epoch": 0.49, "learning_rate": 4.2817024404550246e-05, "loss": 1.4124, "step": 1670 }, { "epoch": 0.5, "learning_rate": 4.2735185551377895e-05, "loss": 1.4001, "step": 1680 }, { "epoch": 0.5, "learning_rate": 4.265296229716935e-05, "loss": 1.4302, "step": 1690 }, { "epoch": 0.5, "learning_rate": 4.25703564240714e-05, "loss": 1.4211, "step": 1700 }, { "epoch": 0.51, "learning_rate": 4.2487369722523906e-05, "loss": 1.4423, "step": 1710 }, { "epoch": 0.51, "learning_rate": 4.240400399122101e-05, "loss": 1.4299, "step": 1720 }, { "epoch": 0.51, "learning_rate": 4.232026103707209e-05, "loss": 1.4214, "step": 1730 }, { "epoch": 0.52, "learning_rate": 4.223614267516268e-05, "loss": 1.4348, "step": 1740 }, { "epoch": 0.52, "learning_rate": 4.215165072871505e-05, "loss": 1.4315, "step": 1750 }, { "epoch": 0.52, "learning_rate": 4.206678702904874e-05, "loss": 1.4098, "step": 1760 }, { "epoch": 0.52, "learning_rate": 4.198155341554084e-05, "loss": 1.4242, "step": 1770 }, { "epoch": 0.53, "learning_rate": 4.1895951735586145e-05, "loss": 1.4272, "step": 1780 }, { "epoch": 0.53, "learning_rate": 4.1809983844557085e-05, "loss": 1.4452, "step": 1790 }, { "epoch": 0.53, "learning_rate": 4.172365160576355e-05, "loss": 1.431, "step": 1800 }, { "epoch": 0.54, "learning_rate": 4.163695689041245e-05, "loss": 1.4389, "step": 1810 }, { "epoch": 0.54, "learning_rate": 4.154990157756722e-05, "loss": 1.413, "step": 1820 }, { "epoch": 0.54, "learning_rate": 4.1462487554107036e-05, "loss": 1.3893, "step": 1830 }, { "epoch": 0.55, "learning_rate": 4.137471671468596e-05, "loss": 1.4052, "step": 1840 }, { "epoch": 0.55, "learning_rate": 4.128659096169183e-05, "loss": 1.4173, "step": 1850 }, { "epoch": 0.55, "learning_rate": 4.1198112205205096e-05, "loss": 1.4012, "step": 1860 }, { "epoch": 0.55, "learning_rate": 4.110928236295734e-05, "loss": 1.4119, "step": 1870 }, { "epoch": 0.56, "learning_rate": 4.102010336028975e-05, "loss": 1.4111, "step": 1880 }, { "epoch": 0.56, "learning_rate": 4.0930577130111424e-05, "loss": 1.4156, "step": 1890 }, { "epoch": 0.56, "learning_rate": 4.084070561285739e-05, "loss": 1.4419, "step": 1900 }, { "epoch": 0.57, "learning_rate": 4.0750490756446624e-05, "loss": 1.4121, "step": 1910 }, { "epoch": 0.57, "learning_rate": 4.0659934516239795e-05, "loss": 1.4204, "step": 1920 }, { "epoch": 0.57, "learning_rate": 4.056903885499689e-05, "loss": 1.4032, "step": 1930 }, { "epoch": 0.57, "learning_rate": 4.047780574283466e-05, "loss": 1.4207, "step": 1940 }, { "epoch": 0.58, "learning_rate": 4.038623715718397e-05, "loss": 1.4095, "step": 1950 }, { "epoch": 0.58, "learning_rate": 4.029433508274686e-05, "loss": 1.4228, "step": 1960 }, { "epoch": 0.58, "learning_rate": 4.0202101511453586e-05, "loss": 1.4141, "step": 1970 }, { "epoch": 0.59, "learning_rate": 4.010953844241943e-05, "loss": 1.4323, "step": 1980 }, { "epoch": 0.59, "learning_rate": 4.001664788190135e-05, "loss": 1.4087, "step": 1990 }, { "epoch": 0.59, "learning_rate": 3.992343184325453e-05, "loss": 1.4186, "step": 2000 }, { "epoch": 0.6, "learning_rate": 3.982989234688873e-05, "loss": 1.4264, "step": 2010 }, { "epoch": 0.6, "learning_rate": 3.973603142022448e-05, "loss": 1.4417, "step": 2020 }, { "epoch": 0.6, "learning_rate": 3.964185109764915e-05, "loss": 1.4075, "step": 2030 }, { "epoch": 0.6, "learning_rate": 3.954735342047285e-05, "loss": 1.4143, "step": 2040 }, { "epoch": 0.61, "learning_rate": 3.945254043688419e-05, "loss": 1.4176, "step": 2050 }, { "epoch": 0.61, "learning_rate": 3.935741420190587e-05, "loss": 1.4214, "step": 2060 }, { "epoch": 0.61, "learning_rate": 3.926197677735018e-05, "loss": 1.4256, "step": 2070 }, { "epoch": 0.62, "learning_rate": 3.9166230231774276e-05, "loss": 1.4075, "step": 2080 }, { "epoch": 0.62, "learning_rate": 3.9070176640435335e-05, "loss": 1.3887, "step": 2090 }, { "epoch": 0.62, "learning_rate": 3.897381808524562e-05, "loss": 1.4225, "step": 2100 }, { "epoch": 0.63, "learning_rate": 3.887715665472729e-05, "loss": 1.4114, "step": 2110 }, { "epoch": 0.63, "learning_rate": 3.8780194443967226e-05, "loss": 1.4316, "step": 2120 }, { "epoch": 0.63, "learning_rate": 3.8682933554571524e-05, "loss": 1.4168, "step": 2130 }, { "epoch": 0.63, "learning_rate": 3.858537609461999e-05, "loss": 1.4237, "step": 2140 }, { "epoch": 0.64, "learning_rate": 3.8487524178620464e-05, "loss": 1.4373, "step": 2150 }, { "epoch": 0.64, "learning_rate": 3.838937992746295e-05, "loss": 1.4089, "step": 2160 }, { "epoch": 0.64, "learning_rate": 3.8290945468373684e-05, "loss": 1.4319, "step": 2170 }, { "epoch": 0.65, "learning_rate": 3.8192222934869e-05, "loss": 1.4035, "step": 2180 }, { "epoch": 0.65, "learning_rate": 3.809321446670909e-05, "loss": 1.4161, "step": 2190 }, { "epoch": 0.65, "learning_rate": 3.799392220985164e-05, "loss": 1.4136, "step": 2200 }, { "epoch": 0.65, "learning_rate": 3.789434831640533e-05, "loss": 1.4188, "step": 2210 }, { "epoch": 0.66, "learning_rate": 3.779449494458312e-05, "loss": 1.4203, "step": 2220 }, { "epoch": 0.66, "learning_rate": 3.769436425865557e-05, "loss": 1.4263, "step": 2230 }, { "epoch": 0.66, "learning_rate": 3.759395842890384e-05, "loss": 1.4295, "step": 2240 }, { "epoch": 0.67, "learning_rate": 3.749327963157274e-05, "loss": 1.4144, "step": 2250 }, { "epoch": 0.67, "learning_rate": 3.739233004882346e-05, "loss": 1.4162, "step": 2260 }, { "epoch": 0.67, "learning_rate": 3.729111186868635e-05, "loss": 1.4099, "step": 2270 }, { "epoch": 0.68, "learning_rate": 3.718962728501348e-05, "loss": 1.3878, "step": 2280 }, { "epoch": 0.68, "learning_rate": 3.708787849743106e-05, "loss": 1.4399, "step": 2290 }, { "epoch": 0.68, "learning_rate": 3.69858677112918e-05, "loss": 1.4249, "step": 2300 }, { "epoch": 0.68, "learning_rate": 3.688359713762707e-05, "loss": 1.3925, "step": 2310 }, { "epoch": 0.69, "learning_rate": 3.6781068993099034e-05, "loss": 1.4036, "step": 2320 }, { "epoch": 0.69, "learning_rate": 3.667828549995255e-05, "loss": 1.3986, "step": 2330 }, { "epoch": 0.69, "learning_rate": 3.657524888596703e-05, "loss": 1.4298, "step": 2340 }, { "epoch": 0.7, "learning_rate": 3.6471961384408155e-05, "loss": 1.4016, "step": 2350 }, { "epoch": 0.7, "learning_rate": 3.636842523397945e-05, "loss": 1.3992, "step": 2360 }, { "epoch": 0.7, "learning_rate": 3.626464267877381e-05, "loss": 1.4441, "step": 2370 }, { "epoch": 0.71, "learning_rate": 3.616061596822478e-05, "loss": 1.3967, "step": 2380 }, { "epoch": 0.71, "learning_rate": 3.6056347357057893e-05, "loss": 1.4252, "step": 2390 }, { "epoch": 0.71, "learning_rate": 3.595183910524173e-05, "loss": 1.4209, "step": 2400 }, { "epoch": 0.71, "learning_rate": 3.5847093477938956e-05, "loss": 1.4133, "step": 2410 }, { "epoch": 0.72, "learning_rate": 3.5742112745457235e-05, "loss": 1.4313, "step": 2420 }, { "epoch": 0.72, "learning_rate": 3.563689918320002e-05, "loss": 1.4275, "step": 2430 }, { "epoch": 0.72, "learning_rate": 3.5531455071617226e-05, "loss": 1.421, "step": 2440 }, { "epoch": 0.73, "learning_rate": 3.542578269615579e-05, "loss": 1.4402, "step": 2450 }, { "epoch": 0.73, "learning_rate": 3.5319884347210186e-05, "loss": 1.4176, "step": 2460 }, { "epoch": 0.73, "learning_rate": 3.521376232007271e-05, "loss": 1.4117, "step": 2470 }, { "epoch": 0.73, "learning_rate": 3.5107418914883794e-05, "loss": 1.41, "step": 2480 }, { "epoch": 0.74, "learning_rate": 3.500085643658211e-05, "loss": 1.4313, "step": 2490 }, { "epoch": 0.74, "learning_rate": 3.489407719485464e-05, "loss": 1.4035, "step": 2500 }, { "epoch": 0.74, "learning_rate": 3.4787083504086605e-05, "loss": 1.4057, "step": 2510 }, { "epoch": 0.75, "learning_rate": 3.467987768331127e-05, "loss": 1.4125, "step": 2520 }, { "epoch": 0.75, "learning_rate": 3.457246205615974e-05, "loss": 1.4056, "step": 2530 }, { "epoch": 0.75, "learning_rate": 3.446483895081054e-05, "loss": 1.4082, "step": 2540 }, { "epoch": 0.76, "learning_rate": 3.4357010699939215e-05, "loss": 1.3915, "step": 2550 }, { "epoch": 0.76, "learning_rate": 3.424897964066769e-05, "loss": 1.4012, "step": 2560 }, { "epoch": 0.76, "learning_rate": 3.4140748114513685e-05, "loss": 1.4251, "step": 2570 }, { "epoch": 0.76, "learning_rate": 3.403231846733994e-05, "loss": 1.4013, "step": 2580 }, { "epoch": 0.77, "learning_rate": 3.392369304930334e-05, "loss": 1.4076, "step": 2590 }, { "epoch": 0.77, "learning_rate": 3.3814874214804034e-05, "loss": 1.3978, "step": 2600 }, { "epoch": 0.77, "learning_rate": 3.3705864322434354e-05, "loss": 1.408, "step": 2610 }, { "epoch": 0.78, "learning_rate": 3.359666573492772e-05, "loss": 1.3888, "step": 2620 }, { "epoch": 0.78, "learning_rate": 3.3487280819107415e-05, "loss": 1.4052, "step": 2630 }, { "epoch": 0.78, "learning_rate": 3.33777119458353e-05, "loss": 1.4286, "step": 2640 }, { "epoch": 0.79, "learning_rate": 3.326796148996042e-05, "loss": 1.4241, "step": 2650 }, { "epoch": 0.79, "learning_rate": 3.315803183026753e-05, "loss": 1.4049, "step": 2660 }, { "epoch": 0.79, "learning_rate": 3.304792534942553e-05, "loss": 1.3826, "step": 2670 }, { "epoch": 0.79, "learning_rate": 3.293764443393582e-05, "loss": 1.413, "step": 2680 }, { "epoch": 0.8, "learning_rate": 3.2827191474080605e-05, "loss": 1.4161, "step": 2690 }, { "epoch": 0.8, "learning_rate": 3.2716568863871044e-05, "loss": 1.382, "step": 2700 }, { "epoch": 0.8, "learning_rate": 3.260577900099539e-05, "loss": 1.381, "step": 2710 }, { "epoch": 0.81, "learning_rate": 3.2494824286767e-05, "loss": 1.396, "step": 2720 }, { "epoch": 0.81, "learning_rate": 3.2383707126072315e-05, "loss": 1.3923, "step": 2730 }, { "epoch": 0.81, "learning_rate": 3.2272429927318707e-05, "loss": 1.4044, "step": 2740 }, { "epoch": 0.81, "learning_rate": 3.21609951023823e-05, "loss": 1.4073, "step": 2750 }, { "epoch": 0.82, "learning_rate": 3.204940506655568e-05, "loss": 1.4178, "step": 2760 }, { "epoch": 0.82, "learning_rate": 3.1937662238495544e-05, "loss": 1.4179, "step": 2770 }, { "epoch": 0.82, "learning_rate": 3.1825769040170285e-05, "loss": 1.4003, "step": 2780 }, { "epoch": 0.83, "learning_rate": 3.1713727896807505e-05, "loss": 1.4176, "step": 2790 }, { "epoch": 0.83, "learning_rate": 3.160154123684143e-05, "loss": 1.4179, "step": 2800 }, { "epoch": 0.83, "learning_rate": 3.1489211491860276e-05, "loss": 1.4098, "step": 2810 }, { "epoch": 0.84, "learning_rate": 3.1376741096553576e-05, "loss": 1.4087, "step": 2820 }, { "epoch": 0.84, "learning_rate": 3.126413248865935e-05, "loss": 1.3971, "step": 2830 }, { "epoch": 0.84, "learning_rate": 3.115138810891134e-05, "loss": 1.3915, "step": 2840 }, { "epoch": 0.84, "learning_rate": 3.103851040098607e-05, "loss": 1.4041, "step": 2850 }, { "epoch": 0.85, "learning_rate": 3.0925501811449855e-05, "loss": 1.4129, "step": 2860 }, { "epoch": 0.85, "learning_rate": 3.081236478970583e-05, "loss": 1.3948, "step": 2870 }, { "epoch": 0.85, "learning_rate": 3.069910178794082e-05, "loss": 1.4116, "step": 2880 }, { "epoch": 0.86, "learning_rate": 3.0585715261072206e-05, "loss": 1.4029, "step": 2890 }, { "epoch": 0.86, "learning_rate": 3.04722076666947e-05, "loss": 1.399, "step": 2900 }, { "epoch": 0.86, "learning_rate": 3.0358581465027125e-05, "loss": 1.4061, "step": 2910 }, { "epoch": 0.87, "learning_rate": 3.024483911885901e-05, "loss": 1.4152, "step": 2920 }, { "epoch": 0.87, "learning_rate": 3.013098309349729e-05, "loss": 1.4257, "step": 2930 }, { "epoch": 0.87, "learning_rate": 3.0017015856712814e-05, "loss": 1.417, "step": 2940 }, { "epoch": 0.87, "learning_rate": 2.9902939878686915e-05, "loss": 1.3952, "step": 2950 }, { "epoch": 0.88, "learning_rate": 2.978875763195779e-05, "loss": 1.4252, "step": 2960 }, { "epoch": 0.88, "learning_rate": 2.9674471591367005e-05, "loss": 1.3982, "step": 2970 }, { "epoch": 0.88, "learning_rate": 2.9560084234005765e-05, "loss": 1.3948, "step": 2980 }, { "epoch": 0.89, "learning_rate": 2.944559803916128e-05, "loss": 1.4127, "step": 2990 }, { "epoch": 0.89, "learning_rate": 2.9331015488263024e-05, "loss": 1.4239, "step": 3000 } ], "max_steps": 6748, "num_train_epochs": 2, "total_flos": 5.192765691303821e+18, "trial_name": null, "trial_params": null }