{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5926364915919697, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9999729068921297e-05, "loss": 1.8898, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.9998916281557476e-05, "loss": 1.7273, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.999756165552527e-05, "loss": 1.6799, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.999566522018553e-05, "loss": 1.6431, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.999322701664249e-05, "loss": 1.6153, "step": 50 }, { "epoch": 0.02, "learning_rate": 4.9990247097742984e-05, "loss": 1.5933, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.9986725528075205e-05, "loss": 1.5913, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.998266238396737e-05, "loss": 1.5434, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.997805775348605e-05, "loss": 1.5304, "step": 90 }, { "epoch": 0.03, "learning_rate": 4.997291173643424e-05, "loss": 1.5531, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.996722444434921e-05, "loss": 1.5446, "step": 110 }, { "epoch": 0.04, "learning_rate": 4.99609960005001e-05, "loss": 1.5352, "step": 120 }, { "epoch": 0.04, "learning_rate": 4.995422653988524e-05, "loss": 1.5303, "step": 130 }, { "epoch": 0.04, "learning_rate": 4.994691620922919e-05, "loss": 1.5449, "step": 140 }, { "epoch": 0.04, "learning_rate": 4.993906516697964e-05, "loss": 1.5114, "step": 150 }, { "epoch": 0.05, "learning_rate": 4.9930673583303865e-05, "loss": 1.5043, "step": 160 }, { "epoch": 0.05, "learning_rate": 4.992174164008515e-05, "loss": 1.5476, "step": 170 }, { "epoch": 0.05, "learning_rate": 4.991226953091877e-05, "loss": 1.5107, "step": 180 }, { "epoch": 0.06, "learning_rate": 4.9902257461107824e-05, "loss": 1.5104, "step": 190 }, { "epoch": 0.06, "learning_rate": 4.9891705647658795e-05, "loss": 1.5298, "step": 200 }, { "epoch": 0.06, "learning_rate": 4.988061431927681e-05, "loss": 1.4907, "step": 210 }, { "epoch": 0.07, "learning_rate": 4.986898371636071e-05, "loss": 1.5127, "step": 220 }, { "epoch": 0.07, "learning_rate": 4.985681409099784e-05, "loss": 1.5037, "step": 230 }, { "epoch": 0.07, "learning_rate": 4.984410570695858e-05, "loss": 1.5029, "step": 240 }, { "epoch": 0.07, "learning_rate": 4.983085883969063e-05, "loss": 1.4725, "step": 250 }, { "epoch": 0.08, "learning_rate": 4.981707377631303e-05, "loss": 1.5148, "step": 260 }, { "epoch": 0.08, "learning_rate": 4.9802750815609936e-05, "loss": 1.4993, "step": 270 }, { "epoch": 0.08, "learning_rate": 4.978789026802419e-05, "loss": 1.5006, "step": 280 }, { "epoch": 0.09, "learning_rate": 4.9772492455650494e-05, "loss": 1.4885, "step": 290 }, { "epoch": 0.09, "learning_rate": 4.975655771222855e-05, "loss": 1.4898, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.9740086383135706e-05, "loss": 1.4906, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.97230788253796e-05, "loss": 1.4796, "step": 320 }, { "epoch": 0.1, "learning_rate": 4.970553540759028e-05, "loss": 1.4861, "step": 330 }, { "epoch": 0.1, "learning_rate": 4.968745651001231e-05, "loss": 1.4827, "step": 340 }, { "epoch": 0.1, "learning_rate": 4.9668842524496526e-05, "loss": 1.4884, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.964969385449149e-05, "loss": 1.4873, "step": 360 }, { "epoch": 0.11, "learning_rate": 4.96300109150348e-05, "loss": 1.4848, "step": 370 }, { "epoch": 0.11, "learning_rate": 4.960979413274404e-05, "loss": 1.4881, "step": 380 }, { "epoch": 0.12, "learning_rate": 4.9589043945807594e-05, "loss": 1.4618, "step": 390 }, { "epoch": 0.12, "learning_rate": 4.9567760803975105e-05, "loss": 1.4858, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.954594516854773e-05, "loss": 1.4777, "step": 410 }, { "epoch": 0.12, "learning_rate": 4.952359751236817e-05, "loss": 1.4828, "step": 420 }, { "epoch": 0.13, "learning_rate": 4.950071831981038e-05, "loss": 1.4571, "step": 430 }, { "epoch": 0.13, "learning_rate": 4.9477308086769117e-05, "loss": 1.4724, "step": 440 }, { "epoch": 0.13, "learning_rate": 4.945336732064915e-05, "loss": 1.4771, "step": 450 }, { "epoch": 0.14, "learning_rate": 4.9428896540354294e-05, "loss": 1.4604, "step": 460 }, { "epoch": 0.14, "learning_rate": 4.940389627627613e-05, "loss": 1.4815, "step": 470 }, { "epoch": 0.14, "learning_rate": 4.937836707028255e-05, "loss": 1.4859, "step": 480 }, { "epoch": 0.15, "learning_rate": 4.935230947570597e-05, "loss": 1.4715, "step": 490 }, { "epoch": 0.15, "learning_rate": 4.932572405733137e-05, "loss": 1.4759, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.929861139138404e-05, "loss": 1.4678, "step": 510 }, { "epoch": 0.15, "learning_rate": 4.9270972065517083e-05, "loss": 1.4754, "step": 520 }, { "epoch": 0.16, "learning_rate": 4.924280667879869e-05, "loss": 1.462, "step": 530 }, { "epoch": 0.16, "learning_rate": 4.921411584169915e-05, "loss": 1.4704, "step": 540 }, { "epoch": 0.16, "learning_rate": 4.918490017607761e-05, "loss": 1.4661, "step": 550 }, { "epoch": 0.17, "learning_rate": 4.915516031516863e-05, "loss": 1.471, "step": 560 }, { "epoch": 0.17, "learning_rate": 4.912489690356841e-05, "loss": 1.451, "step": 570 }, { "epoch": 0.17, "learning_rate": 4.909411059722084e-05, "loss": 1.4411, "step": 580 }, { "epoch": 0.17, "learning_rate": 4.9062802063403316e-05, "loss": 1.456, "step": 590 }, { "epoch": 0.18, "learning_rate": 4.90309719807122e-05, "loss": 1.4678, "step": 600 }, { "epoch": 0.18, "learning_rate": 4.8998621039048205e-05, "loss": 1.479, "step": 610 }, { "epoch": 0.18, "learning_rate": 4.896574993960136e-05, "loss": 1.4471, "step": 620 }, { "epoch": 0.19, "learning_rate": 4.893235939483587e-05, "loss": 1.453, "step": 630 }, { "epoch": 0.19, "learning_rate": 4.8898450128474626e-05, "loss": 1.4696, "step": 640 }, { "epoch": 0.19, "learning_rate": 4.886402287548357e-05, "loss": 1.4526, "step": 650 }, { "epoch": 0.2, "learning_rate": 4.8829078382055725e-05, "loss": 1.4429, "step": 660 }, { "epoch": 0.2, "learning_rate": 4.8793617405595025e-05, "loss": 1.4491, "step": 670 }, { "epoch": 0.2, "learning_rate": 4.8757640714699924e-05, "loss": 1.4411, "step": 680 }, { "epoch": 0.2, "learning_rate": 4.872114908914671e-05, "loss": 1.4543, "step": 690 }, { "epoch": 0.21, "learning_rate": 4.8684143319872636e-05, "loss": 1.4556, "step": 700 }, { "epoch": 0.21, "learning_rate": 4.864662420895873e-05, "loss": 1.4506, "step": 710 }, { "epoch": 0.21, "learning_rate": 4.860859256961244e-05, "loss": 1.4671, "step": 720 }, { "epoch": 0.22, "learning_rate": 4.857004922615002e-05, "loss": 1.4469, "step": 730 }, { "epoch": 0.22, "learning_rate": 4.8530995013978645e-05, "loss": 1.4554, "step": 740 }, { "epoch": 0.22, "learning_rate": 4.84914307795783e-05, "loss": 1.4671, "step": 750 }, { "epoch": 0.23, "learning_rate": 4.845135738048343e-05, "loss": 1.445, "step": 760 }, { "epoch": 0.23, "learning_rate": 4.841077568526439e-05, "loss": 1.4469, "step": 770 }, { "epoch": 0.23, "learning_rate": 4.836968657350857e-05, "loss": 1.4677, "step": 780 }, { "epoch": 0.23, "learning_rate": 4.832809093580135e-05, "loss": 1.4653, "step": 790 }, { "epoch": 0.24, "learning_rate": 4.8285989673706826e-05, "loss": 1.4342, "step": 800 }, { "epoch": 0.24, "learning_rate": 4.824338369974822e-05, "loss": 1.458, "step": 810 }, { "epoch": 0.24, "learning_rate": 4.8200273937388126e-05, "loss": 1.4541, "step": 820 }, { "epoch": 0.25, "learning_rate": 4.81566613210085e-05, "loss": 1.4324, "step": 830 }, { "epoch": 0.25, "learning_rate": 4.81125467958904e-05, "loss": 1.4405, "step": 840 }, { "epoch": 0.25, "learning_rate": 4.80679313181935e-05, "loss": 1.4408, "step": 850 }, { "epoch": 0.25, "learning_rate": 4.8022815854935356e-05, "loss": 1.4395, "step": 860 }, { "epoch": 0.26, "learning_rate": 4.797720138397045e-05, "loss": 1.4359, "step": 870 }, { "epoch": 0.26, "learning_rate": 4.793108889396902e-05, "loss": 1.442, "step": 880 }, { "epoch": 0.26, "learning_rate": 4.7884479384395594e-05, "loss": 1.4566, "step": 890 }, { "epoch": 0.27, "learning_rate": 4.7837373865487345e-05, "loss": 1.4257, "step": 900 }, { "epoch": 0.27, "learning_rate": 4.77897733582322e-05, "loss": 1.4755, "step": 910 }, { "epoch": 0.27, "learning_rate": 4.774167889434671e-05, "loss": 1.4476, "step": 920 }, { "epoch": 0.28, "learning_rate": 4.769309151625366e-05, "loss": 1.4531, "step": 930 }, { "epoch": 0.28, "learning_rate": 4.7644012277059516e-05, "loss": 1.447, "step": 940 }, { "epoch": 0.28, "learning_rate": 4.7594442240531574e-05, "loss": 1.4201, "step": 950 }, { "epoch": 0.28, "learning_rate": 4.754438248107491e-05, "loss": 1.4323, "step": 960 }, { "epoch": 0.29, "learning_rate": 4.7493834083709104e-05, "loss": 1.4432, "step": 970 }, { "epoch": 0.29, "learning_rate": 4.7442798144044695e-05, "loss": 1.4339, "step": 980 }, { "epoch": 0.29, "learning_rate": 4.739127576825945e-05, "loss": 1.4477, "step": 990 }, { "epoch": 0.3, "learning_rate": 4.733926807307441e-05, "loss": 1.4242, "step": 1000 }, { "epoch": 0.3, "learning_rate": 4.728677618572965e-05, "loss": 1.4341, "step": 1010 }, { "epoch": 0.3, "learning_rate": 4.723380124395985e-05, "loss": 1.4526, "step": 1020 }, { "epoch": 0.31, "learning_rate": 4.7180344395969675e-05, "loss": 1.4402, "step": 1030 }, { "epoch": 0.31, "learning_rate": 4.712640680040884e-05, "loss": 1.4257, "step": 1040 }, { "epoch": 0.31, "learning_rate": 4.707198962634701e-05, "loss": 1.4232, "step": 1050 }, { "epoch": 0.31, "learning_rate": 4.70170940532485e-05, "loss": 1.4485, "step": 1060 }, { "epoch": 0.32, "learning_rate": 4.6961721270946635e-05, "loss": 1.456, "step": 1070 }, { "epoch": 0.32, "learning_rate": 4.690587247961804e-05, "loss": 1.4555, "step": 1080 }, { "epoch": 0.32, "learning_rate": 4.684954888975657e-05, "loss": 1.4376, "step": 1090 }, { "epoch": 0.33, "learning_rate": 4.6792751722147104e-05, "loss": 1.4353, "step": 1100 }, { "epoch": 0.33, "learning_rate": 4.6735482207839074e-05, "loss": 1.4226, "step": 1110 }, { "epoch": 0.33, "learning_rate": 4.6677741588119784e-05, "loss": 1.4315, "step": 1120 }, { "epoch": 0.33, "learning_rate": 4.66195311144875e-05, "loss": 1.4303, "step": 1130 }, { "epoch": 0.34, "learning_rate": 4.6560852048624345e-05, "loss": 1.4288, "step": 1140 }, { "epoch": 0.34, "learning_rate": 4.650170566236892e-05, "loss": 1.4539, "step": 1150 }, { "epoch": 0.34, "learning_rate": 4.6442093237688756e-05, "loss": 1.4527, "step": 1160 }, { "epoch": 0.35, "learning_rate": 4.6382016066652556e-05, "loss": 1.4406, "step": 1170 }, { "epoch": 0.35, "learning_rate": 4.632147545140212e-05, "loss": 1.4233, "step": 1180 }, { "epoch": 0.35, "learning_rate": 4.626047270412419e-05, "loss": 1.426, "step": 1190 }, { "epoch": 0.36, "learning_rate": 4.619900914702198e-05, "loss": 1.4577, "step": 1200 }, { "epoch": 0.36, "learning_rate": 4.613708611228652e-05, "loss": 1.4313, "step": 1210 }, { "epoch": 0.36, "learning_rate": 4.607470494206776e-05, "loss": 1.4129, "step": 1220 }, { "epoch": 0.36, "learning_rate": 4.601186698844554e-05, "loss": 1.4368, "step": 1230 }, { "epoch": 0.37, "learning_rate": 4.594857361340021e-05, "loss": 1.4342, "step": 1240 }, { "epoch": 0.37, "learning_rate": 4.588482618878316e-05, "loss": 1.4438, "step": 1250 }, { "epoch": 0.37, "learning_rate": 4.582062609628709e-05, "loss": 1.4263, "step": 1260 }, { "epoch": 0.38, "learning_rate": 4.575597472741601e-05, "loss": 1.4379, "step": 1270 }, { "epoch": 0.38, "learning_rate": 4.569087348345512e-05, "loss": 1.4221, "step": 1280 }, { "epoch": 0.38, "learning_rate": 4.562532377544046e-05, "loss": 1.4414, "step": 1290 }, { "epoch": 0.39, "learning_rate": 4.5559327024128265e-05, "loss": 1.4395, "step": 1300 }, { "epoch": 0.39, "learning_rate": 4.549288465996421e-05, "loss": 1.4278, "step": 1310 }, { "epoch": 0.39, "learning_rate": 4.542599812305243e-05, "loss": 1.4344, "step": 1320 }, { "epoch": 0.39, "learning_rate": 4.535866886312423e-05, "loss": 1.4352, "step": 1330 }, { "epoch": 0.4, "learning_rate": 4.529089833950675e-05, "loss": 1.4133, "step": 1340 }, { "epoch": 0.4, "learning_rate": 4.5222688021091266e-05, "loss": 1.4506, "step": 1350 }, { "epoch": 0.4, "learning_rate": 4.5154039386301385e-05, "loss": 1.4295, "step": 1360 }, { "epoch": 0.41, "learning_rate": 4.5084953923061016e-05, "loss": 1.4389, "step": 1370 }, { "epoch": 0.41, "learning_rate": 4.5015433128762065e-05, "loss": 1.4247, "step": 1380 }, { "epoch": 0.41, "learning_rate": 4.494547851023205e-05, "loss": 1.4347, "step": 1390 }, { "epoch": 0.41, "learning_rate": 4.487509158370139e-05, "loss": 1.4133, "step": 1400 }, { "epoch": 0.42, "learning_rate": 4.480427387477056e-05, "loss": 1.4296, "step": 1410 }, { "epoch": 0.42, "learning_rate": 4.473302691837702e-05, "loss": 1.4353, "step": 1420 }, { "epoch": 0.42, "learning_rate": 4.466135225876194e-05, "loss": 1.4377, "step": 1430 }, { "epoch": 0.43, "learning_rate": 4.458925144943676e-05, "loss": 1.4168, "step": 1440 }, { "epoch": 0.43, "learning_rate": 4.451672605314948e-05, "loss": 1.4334, "step": 1450 }, { "epoch": 0.43, "learning_rate": 4.444377764185082e-05, "loss": 1.44, "step": 1460 }, { "epoch": 0.44, "learning_rate": 4.43704077966601e-05, "loss": 1.4375, "step": 1470 }, { "epoch": 0.44, "learning_rate": 4.4296618107831036e-05, "loss": 1.447, "step": 1480 }, { "epoch": 0.44, "learning_rate": 4.422241017471722e-05, "loss": 1.4151, "step": 1490 }, { "epoch": 0.44, "learning_rate": 4.414778560573749e-05, "loss": 1.4388, "step": 1500 }, { "epoch": 0.45, "learning_rate": 4.4072746018341036e-05, "loss": 1.4228, "step": 1510 }, { "epoch": 0.45, "learning_rate": 4.399729303897238e-05, "loss": 1.4104, "step": 1520 }, { "epoch": 0.45, "learning_rate": 4.392142830303608e-05, "loss": 1.4441, "step": 1530 }, { "epoch": 0.46, "learning_rate": 4.384515345486131e-05, "loss": 1.4282, "step": 1540 }, { "epoch": 0.46, "learning_rate": 4.376847014766623e-05, "loss": 1.4271, "step": 1550 }, { "epoch": 0.46, "learning_rate": 4.369138004352212e-05, "loss": 1.4223, "step": 1560 }, { "epoch": 0.47, "learning_rate": 4.3613884813317406e-05, "loss": 1.425, "step": 1570 }, { "epoch": 0.47, "learning_rate": 4.3535986136721377e-05, "loss": 1.4392, "step": 1580 }, { "epoch": 0.47, "learning_rate": 4.3457685702147834e-05, "loss": 1.4097, "step": 1590 }, { "epoch": 0.47, "learning_rate": 4.3378985206718484e-05, "loss": 1.4405, "step": 1600 }, { "epoch": 0.48, "learning_rate": 4.329988635622611e-05, "loss": 1.4311, "step": 1610 }, { "epoch": 0.48, "learning_rate": 4.322039086509769e-05, "loss": 1.4358, "step": 1620 }, { "epoch": 0.48, "learning_rate": 4.3140500456357145e-05, "loss": 1.4114, "step": 1630 }, { "epoch": 0.49, "learning_rate": 4.306021686158805e-05, "loss": 1.4165, "step": 1640 }, { "epoch": 0.49, "learning_rate": 4.297954182089609e-05, "loss": 1.4309, "step": 1650 }, { "epoch": 0.49, "learning_rate": 4.289847708287129e-05, "loss": 1.4215, "step": 1660 }, { "epoch": 0.49, "learning_rate": 4.2817024404550246e-05, "loss": 1.4124, "step": 1670 }, { "epoch": 0.5, "learning_rate": 4.2735185551377895e-05, "loss": 1.4001, "step": 1680 }, { "epoch": 0.5, "learning_rate": 4.265296229716935e-05, "loss": 1.4302, "step": 1690 }, { "epoch": 0.5, "learning_rate": 4.25703564240714e-05, "loss": 1.4211, "step": 1700 }, { "epoch": 0.51, "learning_rate": 4.2487369722523906e-05, "loss": 1.4423, "step": 1710 }, { "epoch": 0.51, "learning_rate": 4.240400399122101e-05, "loss": 1.4299, "step": 1720 }, { "epoch": 0.51, "learning_rate": 4.232026103707209e-05, "loss": 1.4214, "step": 1730 }, { "epoch": 0.52, "learning_rate": 4.223614267516268e-05, "loss": 1.4348, "step": 1740 }, { "epoch": 0.52, "learning_rate": 4.215165072871505e-05, "loss": 1.4315, "step": 1750 }, { "epoch": 0.52, "learning_rate": 4.206678702904874e-05, "loss": 1.4098, "step": 1760 }, { "epoch": 0.52, "learning_rate": 4.198155341554084e-05, "loss": 1.4242, "step": 1770 }, { "epoch": 0.53, "learning_rate": 4.1895951735586145e-05, "loss": 1.4272, "step": 1780 }, { "epoch": 0.53, "learning_rate": 4.1809983844557085e-05, "loss": 1.4452, "step": 1790 }, { "epoch": 0.53, "learning_rate": 4.172365160576355e-05, "loss": 1.431, "step": 1800 }, { "epoch": 0.54, "learning_rate": 4.163695689041245e-05, "loss": 1.4389, "step": 1810 }, { "epoch": 0.54, "learning_rate": 4.154990157756722e-05, "loss": 1.413, "step": 1820 }, { "epoch": 0.54, "learning_rate": 4.1462487554107036e-05, "loss": 1.3893, "step": 1830 }, { "epoch": 0.55, "learning_rate": 4.137471671468596e-05, "loss": 1.4052, "step": 1840 }, { "epoch": 0.55, "learning_rate": 4.128659096169183e-05, "loss": 1.4173, "step": 1850 }, { "epoch": 0.55, "learning_rate": 4.1198112205205096e-05, "loss": 1.4012, "step": 1860 }, { "epoch": 0.55, "learning_rate": 4.110928236295734e-05, "loss": 1.4119, "step": 1870 }, { "epoch": 0.56, "learning_rate": 4.102010336028975e-05, "loss": 1.4111, "step": 1880 }, { "epoch": 0.56, "learning_rate": 4.0930577130111424e-05, "loss": 1.4156, "step": 1890 }, { "epoch": 0.56, "learning_rate": 4.084070561285739e-05, "loss": 1.4419, "step": 1900 }, { "epoch": 0.57, "learning_rate": 4.0750490756446624e-05, "loss": 1.4121, "step": 1910 }, { "epoch": 0.57, "learning_rate": 4.0659934516239795e-05, "loss": 1.4204, "step": 1920 }, { "epoch": 0.57, "learning_rate": 4.056903885499689e-05, "loss": 1.4032, "step": 1930 }, { "epoch": 0.57, "learning_rate": 4.047780574283466e-05, "loss": 1.4207, "step": 1940 }, { "epoch": 0.58, "learning_rate": 4.038623715718397e-05, "loss": 1.4095, "step": 1950 }, { "epoch": 0.58, "learning_rate": 4.029433508274686e-05, "loss": 1.4228, "step": 1960 }, { "epoch": 0.58, "learning_rate": 4.0202101511453586e-05, "loss": 1.4141, "step": 1970 }, { "epoch": 0.59, "learning_rate": 4.010953844241943e-05, "loss": 1.4323, "step": 1980 }, { "epoch": 0.59, "learning_rate": 4.001664788190135e-05, "loss": 1.4087, "step": 1990 }, { "epoch": 0.59, "learning_rate": 3.992343184325453e-05, "loss": 1.4186, "step": 2000 } ], "max_steps": 6748, "num_train_epochs": 2, "total_flos": 3.456686675214729e+18, "trial_name": null, "trial_params": null }