{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.999555522631306, "global_step": 6748, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9999729068921297e-05, "loss": 1.8898, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.9998916281557476e-05, "loss": 1.7273, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.999756165552527e-05, "loss": 1.6799, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.999566522018553e-05, "loss": 1.6431, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.999322701664249e-05, "loss": 1.6153, "step": 50 }, { "epoch": 0.02, "learning_rate": 4.9990247097742984e-05, "loss": 1.5933, "step": 60 }, { "epoch": 0.02, "learning_rate": 4.9986725528075205e-05, "loss": 1.5913, "step": 70 }, { "epoch": 0.02, "learning_rate": 4.998266238396737e-05, "loss": 1.5434, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.997805775348605e-05, "loss": 1.5304, "step": 90 }, { "epoch": 0.03, "learning_rate": 4.997291173643424e-05, "loss": 1.5531, "step": 100 }, { "epoch": 0.03, "learning_rate": 4.996722444434921e-05, "loss": 1.5446, "step": 110 }, { "epoch": 0.04, "learning_rate": 4.99609960005001e-05, "loss": 1.5352, "step": 120 }, { "epoch": 0.04, "learning_rate": 4.995422653988524e-05, "loss": 1.5303, "step": 130 }, { "epoch": 0.04, "learning_rate": 4.994691620922919e-05, "loss": 1.5449, "step": 140 }, { "epoch": 0.04, "learning_rate": 4.993906516697964e-05, "loss": 1.5114, "step": 150 }, { "epoch": 0.05, "learning_rate": 4.9930673583303865e-05, "loss": 1.5043, "step": 160 }, { "epoch": 0.05, "learning_rate": 4.992174164008515e-05, "loss": 1.5476, "step": 170 }, { "epoch": 0.05, "learning_rate": 4.991226953091877e-05, "loss": 1.5107, "step": 180 }, { "epoch": 0.06, "learning_rate": 4.9902257461107824e-05, "loss": 1.5104, "step": 190 }, { "epoch": 0.06, "learning_rate": 4.9891705647658795e-05, "loss": 1.5298, "step": 200 }, { "epoch": 0.06, "learning_rate": 4.988061431927681e-05, "loss": 1.4907, "step": 210 }, { "epoch": 0.07, "learning_rate": 4.986898371636071e-05, "loss": 1.5127, "step": 220 }, { "epoch": 0.07, "learning_rate": 4.985681409099784e-05, "loss": 1.5037, "step": 230 }, { "epoch": 0.07, "learning_rate": 4.984410570695858e-05, "loss": 1.5029, "step": 240 }, { "epoch": 0.07, "learning_rate": 4.983085883969063e-05, "loss": 1.4725, "step": 250 }, { "epoch": 0.08, "learning_rate": 4.981707377631303e-05, "loss": 1.5148, "step": 260 }, { "epoch": 0.08, "learning_rate": 4.9802750815609936e-05, "loss": 1.4993, "step": 270 }, { "epoch": 0.08, "learning_rate": 4.978789026802419e-05, "loss": 1.5006, "step": 280 }, { "epoch": 0.09, "learning_rate": 4.9772492455650494e-05, "loss": 1.4885, "step": 290 }, { "epoch": 0.09, "learning_rate": 4.975655771222855e-05, "loss": 1.4898, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.9740086383135706e-05, "loss": 1.4906, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.97230788253796e-05, "loss": 1.4796, "step": 320 }, { "epoch": 0.1, "learning_rate": 4.970553540759028e-05, "loss": 1.4861, "step": 330 }, { "epoch": 0.1, "learning_rate": 4.968745651001231e-05, "loss": 1.4827, "step": 340 }, { "epoch": 0.1, "learning_rate": 4.9668842524496526e-05, "loss": 1.4884, "step": 350 }, { "epoch": 0.11, "learning_rate": 4.964969385449149e-05, "loss": 1.4873, "step": 360 }, { "epoch": 0.11, "learning_rate": 4.96300109150348e-05, "loss": 1.4848, "step": 370 }, { "epoch": 0.11, "learning_rate": 4.960979413274404e-05, "loss": 1.4881, "step": 380 }, { "epoch": 0.12, "learning_rate": 4.9589043945807594e-05, "loss": 1.4618, "step": 390 }, { "epoch": 0.12, "learning_rate": 4.9567760803975105e-05, "loss": 1.4858, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.954594516854773e-05, "loss": 1.4777, "step": 410 }, { "epoch": 0.12, "learning_rate": 4.952359751236817e-05, "loss": 1.4828, "step": 420 }, { "epoch": 0.13, "learning_rate": 4.950071831981038e-05, "loss": 1.4571, "step": 430 }, { "epoch": 0.13, "learning_rate": 4.9477308086769117e-05, "loss": 1.4724, "step": 440 }, { "epoch": 0.13, "learning_rate": 4.945336732064915e-05, "loss": 1.4771, "step": 450 }, { "epoch": 0.14, "learning_rate": 4.9428896540354294e-05, "loss": 1.4604, "step": 460 }, { "epoch": 0.14, "learning_rate": 4.940389627627613e-05, "loss": 1.4815, "step": 470 }, { "epoch": 0.14, "learning_rate": 4.937836707028255e-05, "loss": 1.4859, "step": 480 }, { "epoch": 0.15, "learning_rate": 4.935230947570597e-05, "loss": 1.4715, "step": 490 }, { "epoch": 0.15, "learning_rate": 4.932572405733137e-05, "loss": 1.4759, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.929861139138404e-05, "loss": 1.4678, "step": 510 }, { "epoch": 0.15, "learning_rate": 4.9270972065517083e-05, "loss": 1.4754, "step": 520 }, { "epoch": 0.16, "learning_rate": 4.924280667879869e-05, "loss": 1.462, "step": 530 }, { "epoch": 0.16, "learning_rate": 4.921411584169915e-05, "loss": 1.4704, "step": 540 }, { "epoch": 0.16, "learning_rate": 4.918490017607761e-05, "loss": 1.4661, "step": 550 }, { "epoch": 0.17, "learning_rate": 4.915516031516863e-05, "loss": 1.471, "step": 560 }, { "epoch": 0.17, "learning_rate": 4.912489690356841e-05, "loss": 1.451, "step": 570 }, { "epoch": 0.17, "learning_rate": 4.909411059722084e-05, "loss": 1.4411, "step": 580 }, { "epoch": 0.17, "learning_rate": 4.9062802063403316e-05, "loss": 1.456, "step": 590 }, { "epoch": 0.18, "learning_rate": 4.90309719807122e-05, "loss": 1.4678, "step": 600 }, { "epoch": 0.18, "learning_rate": 4.8998621039048205e-05, "loss": 1.479, "step": 610 }, { "epoch": 0.18, "learning_rate": 4.896574993960136e-05, "loss": 1.4471, "step": 620 }, { "epoch": 0.19, "learning_rate": 4.893235939483587e-05, "loss": 1.453, "step": 630 }, { "epoch": 0.19, "learning_rate": 4.8898450128474626e-05, "loss": 1.4696, "step": 640 }, { "epoch": 0.19, "learning_rate": 4.886402287548357e-05, "loss": 1.4526, "step": 650 }, { "epoch": 0.2, "learning_rate": 4.8829078382055725e-05, "loss": 1.4429, "step": 660 }, { "epoch": 0.2, "learning_rate": 4.8793617405595025e-05, "loss": 1.4491, "step": 670 }, { "epoch": 0.2, "learning_rate": 4.8757640714699924e-05, "loss": 1.4411, "step": 680 }, { "epoch": 0.2, "learning_rate": 4.872114908914671e-05, "loss": 1.4543, "step": 690 }, { "epoch": 0.21, "learning_rate": 4.8684143319872636e-05, "loss": 1.4556, "step": 700 }, { "epoch": 0.21, "learning_rate": 4.864662420895873e-05, "loss": 1.4506, "step": 710 }, { "epoch": 0.21, "learning_rate": 4.860859256961244e-05, "loss": 1.4671, "step": 720 }, { "epoch": 0.22, "learning_rate": 4.857004922615002e-05, "loss": 1.4469, "step": 730 }, { "epoch": 0.22, "learning_rate": 4.8530995013978645e-05, "loss": 1.4554, "step": 740 }, { "epoch": 0.22, "learning_rate": 4.84914307795783e-05, "loss": 1.4671, "step": 750 }, { "epoch": 0.23, "learning_rate": 4.845135738048343e-05, "loss": 1.445, "step": 760 }, { "epoch": 0.23, "learning_rate": 4.841077568526439e-05, "loss": 1.4469, "step": 770 }, { "epoch": 0.23, "learning_rate": 4.836968657350857e-05, "loss": 1.4677, "step": 780 }, { "epoch": 0.23, "learning_rate": 4.832809093580135e-05, "loss": 1.4653, "step": 790 }, { "epoch": 0.24, "learning_rate": 4.8285989673706826e-05, "loss": 1.4342, "step": 800 }, { "epoch": 0.24, "learning_rate": 4.824338369974822e-05, "loss": 1.458, "step": 810 }, { "epoch": 0.24, "learning_rate": 4.8200273937388126e-05, "loss": 1.4541, "step": 820 }, { "epoch": 0.25, "learning_rate": 4.81566613210085e-05, "loss": 1.4324, "step": 830 }, { "epoch": 0.25, "learning_rate": 4.81125467958904e-05, "loss": 1.4405, "step": 840 }, { "epoch": 0.25, "learning_rate": 4.80679313181935e-05, "loss": 1.4408, "step": 850 }, { "epoch": 0.25, "learning_rate": 4.8022815854935356e-05, "loss": 1.4395, "step": 860 }, { "epoch": 0.26, "learning_rate": 4.797720138397045e-05, "loss": 1.4359, "step": 870 }, { "epoch": 0.26, "learning_rate": 4.793108889396902e-05, "loss": 1.442, "step": 880 }, { "epoch": 0.26, "learning_rate": 4.7884479384395594e-05, "loss": 1.4566, "step": 890 }, { "epoch": 0.27, "learning_rate": 4.7837373865487345e-05, "loss": 1.4257, "step": 900 }, { "epoch": 0.27, "learning_rate": 4.77897733582322e-05, "loss": 1.4755, "step": 910 }, { "epoch": 0.27, "learning_rate": 4.774167889434671e-05, "loss": 1.4476, "step": 920 }, { "epoch": 0.28, "learning_rate": 4.769309151625366e-05, "loss": 1.4531, "step": 930 }, { "epoch": 0.28, "learning_rate": 4.7644012277059516e-05, "loss": 1.447, "step": 940 }, { "epoch": 0.28, "learning_rate": 4.7594442240531574e-05, "loss": 1.4201, "step": 950 }, { "epoch": 0.28, "learning_rate": 4.754438248107491e-05, "loss": 1.4323, "step": 960 }, { "epoch": 0.29, "learning_rate": 4.7493834083709104e-05, "loss": 1.4432, "step": 970 }, { "epoch": 0.29, "learning_rate": 4.7442798144044695e-05, "loss": 1.4339, "step": 980 }, { "epoch": 0.29, "learning_rate": 4.739127576825945e-05, "loss": 1.4477, "step": 990 }, { "epoch": 0.3, "learning_rate": 4.733926807307441e-05, "loss": 1.4242, "step": 1000 }, { "epoch": 0.3, "learning_rate": 4.728677618572965e-05, "loss": 1.4341, "step": 1010 }, { "epoch": 0.3, "learning_rate": 4.723380124395985e-05, "loss": 1.4526, "step": 1020 }, { "epoch": 0.31, "learning_rate": 4.7180344395969675e-05, "loss": 1.4402, "step": 1030 }, { "epoch": 0.31, "learning_rate": 4.712640680040884e-05, "loss": 1.4257, "step": 1040 }, { "epoch": 0.31, "learning_rate": 4.707198962634701e-05, "loss": 1.4232, "step": 1050 }, { "epoch": 0.31, "learning_rate": 4.70170940532485e-05, "loss": 1.4485, "step": 1060 }, { "epoch": 0.32, "learning_rate": 4.6961721270946635e-05, "loss": 1.456, "step": 1070 }, { "epoch": 0.32, "learning_rate": 4.690587247961804e-05, "loss": 1.4555, "step": 1080 }, { "epoch": 0.32, "learning_rate": 4.684954888975657e-05, "loss": 1.4376, "step": 1090 }, { "epoch": 0.33, "learning_rate": 4.6792751722147104e-05, "loss": 1.4353, "step": 1100 }, { "epoch": 0.33, "learning_rate": 4.6735482207839074e-05, "loss": 1.4226, "step": 1110 }, { "epoch": 0.33, "learning_rate": 4.6677741588119784e-05, "loss": 1.4315, "step": 1120 }, { "epoch": 0.33, "learning_rate": 4.66195311144875e-05, "loss": 1.4303, "step": 1130 }, { "epoch": 0.34, "learning_rate": 4.6560852048624345e-05, "loss": 1.4288, "step": 1140 }, { "epoch": 0.34, "learning_rate": 4.650170566236892e-05, "loss": 1.4539, "step": 1150 }, { "epoch": 0.34, "learning_rate": 4.6442093237688756e-05, "loss": 1.4527, "step": 1160 }, { "epoch": 0.35, "learning_rate": 4.6382016066652556e-05, "loss": 1.4406, "step": 1170 }, { "epoch": 0.35, "learning_rate": 4.632147545140212e-05, "loss": 1.4233, "step": 1180 }, { "epoch": 0.35, "learning_rate": 4.626047270412419e-05, "loss": 1.426, "step": 1190 }, { "epoch": 0.36, "learning_rate": 4.619900914702198e-05, "loss": 1.4577, "step": 1200 }, { "epoch": 0.36, "learning_rate": 4.613708611228652e-05, "loss": 1.4313, "step": 1210 }, { "epoch": 0.36, "learning_rate": 4.607470494206776e-05, "loss": 1.4129, "step": 1220 }, { "epoch": 0.36, "learning_rate": 4.601186698844554e-05, "loss": 1.4368, "step": 1230 }, { "epoch": 0.37, "learning_rate": 4.594857361340021e-05, "loss": 1.4342, "step": 1240 }, { "epoch": 0.37, "learning_rate": 4.588482618878316e-05, "loss": 1.4438, "step": 1250 }, { "epoch": 0.37, "learning_rate": 4.582062609628709e-05, "loss": 1.4263, "step": 1260 }, { "epoch": 0.38, "learning_rate": 4.575597472741601e-05, "loss": 1.4379, "step": 1270 }, { "epoch": 0.38, "learning_rate": 4.569087348345512e-05, "loss": 1.4221, "step": 1280 }, { "epoch": 0.38, "learning_rate": 4.562532377544046e-05, "loss": 1.4414, "step": 1290 }, { "epoch": 0.39, "learning_rate": 4.5559327024128265e-05, "loss": 1.4395, "step": 1300 }, { "epoch": 0.39, "learning_rate": 4.549288465996421e-05, "loss": 1.4278, "step": 1310 }, { "epoch": 0.39, "learning_rate": 4.542599812305243e-05, "loss": 1.4344, "step": 1320 }, { "epoch": 0.39, "learning_rate": 4.535866886312423e-05, "loss": 1.4352, "step": 1330 }, { "epoch": 0.4, "learning_rate": 4.529089833950675e-05, "loss": 1.4133, "step": 1340 }, { "epoch": 0.4, "learning_rate": 4.5222688021091266e-05, "loss": 1.4506, "step": 1350 }, { "epoch": 0.4, "learning_rate": 4.5154039386301385e-05, "loss": 1.4295, "step": 1360 }, { "epoch": 0.41, "learning_rate": 4.5084953923061016e-05, "loss": 1.4389, "step": 1370 }, { "epoch": 0.41, "learning_rate": 4.5015433128762065e-05, "loss": 1.4247, "step": 1380 }, { "epoch": 0.41, "learning_rate": 4.494547851023205e-05, "loss": 1.4347, "step": 1390 }, { "epoch": 0.41, "learning_rate": 4.487509158370139e-05, "loss": 1.4133, "step": 1400 }, { "epoch": 0.42, "learning_rate": 4.480427387477056e-05, "loss": 1.4296, "step": 1410 }, { "epoch": 0.42, "learning_rate": 4.473302691837702e-05, "loss": 1.4353, "step": 1420 }, { "epoch": 0.42, "learning_rate": 4.466135225876194e-05, "loss": 1.4377, "step": 1430 }, { "epoch": 0.43, "learning_rate": 4.458925144943676e-05, "loss": 1.4168, "step": 1440 }, { "epoch": 0.43, "learning_rate": 4.451672605314948e-05, "loss": 1.4334, "step": 1450 }, { "epoch": 0.43, "learning_rate": 4.444377764185082e-05, "loss": 1.44, "step": 1460 }, { "epoch": 0.44, "learning_rate": 4.43704077966601e-05, "loss": 1.4375, "step": 1470 }, { "epoch": 0.44, "learning_rate": 4.4296618107831036e-05, "loss": 1.447, "step": 1480 }, { "epoch": 0.44, "learning_rate": 4.422241017471722e-05, "loss": 1.4151, "step": 1490 }, { "epoch": 0.44, "learning_rate": 4.414778560573749e-05, "loss": 1.4388, "step": 1500 }, { "epoch": 0.45, "learning_rate": 4.4072746018341036e-05, "loss": 1.4228, "step": 1510 }, { "epoch": 0.45, "learning_rate": 4.399729303897238e-05, "loss": 1.4104, "step": 1520 }, { "epoch": 0.45, "learning_rate": 4.392142830303608e-05, "loss": 1.4441, "step": 1530 }, { "epoch": 0.46, "learning_rate": 4.384515345486131e-05, "loss": 1.4282, "step": 1540 }, { "epoch": 0.46, "learning_rate": 4.376847014766623e-05, "loss": 1.4271, "step": 1550 }, { "epoch": 0.46, "learning_rate": 4.369138004352212e-05, "loss": 1.4223, "step": 1560 }, { "epoch": 0.47, "learning_rate": 4.3613884813317406e-05, "loss": 1.425, "step": 1570 }, { "epoch": 0.47, "learning_rate": 4.3535986136721377e-05, "loss": 1.4392, "step": 1580 }, { "epoch": 0.47, "learning_rate": 4.3457685702147834e-05, "loss": 1.4097, "step": 1590 }, { "epoch": 0.47, "learning_rate": 4.3378985206718484e-05, "loss": 1.4405, "step": 1600 }, { "epoch": 0.48, "learning_rate": 4.329988635622611e-05, "loss": 1.4311, "step": 1610 }, { "epoch": 0.48, "learning_rate": 4.322039086509769e-05, "loss": 1.4358, "step": 1620 }, { "epoch": 0.48, "learning_rate": 4.3140500456357145e-05, "loss": 1.4114, "step": 1630 }, { "epoch": 0.49, "learning_rate": 4.306021686158805e-05, "loss": 1.4165, "step": 1640 }, { "epoch": 0.49, "learning_rate": 4.297954182089609e-05, "loss": 1.4309, "step": 1650 }, { "epoch": 0.49, "learning_rate": 4.289847708287129e-05, "loss": 1.4215, "step": 1660 }, { "epoch": 0.49, "learning_rate": 4.2817024404550246e-05, "loss": 1.4124, "step": 1670 }, { "epoch": 0.5, "learning_rate": 4.2735185551377895e-05, "loss": 1.4001, "step": 1680 }, { "epoch": 0.5, "learning_rate": 4.265296229716935e-05, "loss": 1.4302, "step": 1690 }, { "epoch": 0.5, "learning_rate": 4.25703564240714e-05, "loss": 1.4211, "step": 1700 }, { "epoch": 0.51, "learning_rate": 4.2487369722523906e-05, "loss": 1.4423, "step": 1710 }, { "epoch": 0.51, "learning_rate": 4.240400399122101e-05, "loss": 1.4299, "step": 1720 }, { "epoch": 0.51, "learning_rate": 4.232026103707209e-05, "loss": 1.4214, "step": 1730 }, { "epoch": 0.52, "learning_rate": 4.223614267516268e-05, "loss": 1.4348, "step": 1740 }, { "epoch": 0.52, "learning_rate": 4.215165072871505e-05, "loss": 1.4315, "step": 1750 }, { "epoch": 0.52, "learning_rate": 4.206678702904874e-05, "loss": 1.4098, "step": 1760 }, { "epoch": 0.52, "learning_rate": 4.198155341554084e-05, "loss": 1.4242, "step": 1770 }, { "epoch": 0.53, "learning_rate": 4.1895951735586145e-05, "loss": 1.4272, "step": 1780 }, { "epoch": 0.53, "learning_rate": 4.1809983844557085e-05, "loss": 1.4452, "step": 1790 }, { "epoch": 0.53, "learning_rate": 4.172365160576355e-05, "loss": 1.431, "step": 1800 }, { "epoch": 0.54, "learning_rate": 4.163695689041245e-05, "loss": 1.4389, "step": 1810 }, { "epoch": 0.54, "learning_rate": 4.154990157756722e-05, "loss": 1.413, "step": 1820 }, { "epoch": 0.54, "learning_rate": 4.1462487554107036e-05, "loss": 1.3893, "step": 1830 }, { "epoch": 0.55, "learning_rate": 4.137471671468596e-05, "loss": 1.4052, "step": 1840 }, { "epoch": 0.55, "learning_rate": 4.128659096169183e-05, "loss": 1.4173, "step": 1850 }, { "epoch": 0.55, "learning_rate": 4.1198112205205096e-05, "loss": 1.4012, "step": 1860 }, { "epoch": 0.55, "learning_rate": 4.110928236295734e-05, "loss": 1.4119, "step": 1870 }, { "epoch": 0.56, "learning_rate": 4.102010336028975e-05, "loss": 1.4111, "step": 1880 }, { "epoch": 0.56, "learning_rate": 4.0930577130111424e-05, "loss": 1.4156, "step": 1890 }, { "epoch": 0.56, "learning_rate": 4.084070561285739e-05, "loss": 1.4419, "step": 1900 }, { "epoch": 0.57, "learning_rate": 4.0750490756446624e-05, "loss": 1.4121, "step": 1910 }, { "epoch": 0.57, "learning_rate": 4.0659934516239795e-05, "loss": 1.4204, "step": 1920 }, { "epoch": 0.57, "learning_rate": 4.056903885499689e-05, "loss": 1.4032, "step": 1930 }, { "epoch": 0.57, "learning_rate": 4.047780574283466e-05, "loss": 1.4207, "step": 1940 }, { "epoch": 0.58, "learning_rate": 4.038623715718397e-05, "loss": 1.4095, "step": 1950 }, { "epoch": 0.58, "learning_rate": 4.029433508274686e-05, "loss": 1.4228, "step": 1960 }, { "epoch": 0.58, "learning_rate": 4.0202101511453586e-05, "loss": 1.4141, "step": 1970 }, { "epoch": 0.59, "learning_rate": 4.010953844241943e-05, "loss": 1.4323, "step": 1980 }, { "epoch": 0.59, "learning_rate": 4.001664788190135e-05, "loss": 1.4087, "step": 1990 }, { "epoch": 0.59, "learning_rate": 3.992343184325453e-05, "loss": 1.4186, "step": 2000 }, { "epoch": 0.6, "learning_rate": 3.982989234688873e-05, "loss": 1.4264, "step": 2010 }, { "epoch": 0.6, "learning_rate": 3.973603142022448e-05, "loss": 1.4417, "step": 2020 }, { "epoch": 0.6, "learning_rate": 3.964185109764915e-05, "loss": 1.4075, "step": 2030 }, { "epoch": 0.6, "learning_rate": 3.954735342047285e-05, "loss": 1.4143, "step": 2040 }, { "epoch": 0.61, "learning_rate": 3.945254043688419e-05, "loss": 1.4176, "step": 2050 }, { "epoch": 0.61, "learning_rate": 3.935741420190587e-05, "loss": 1.4214, "step": 2060 }, { "epoch": 0.61, "learning_rate": 3.926197677735018e-05, "loss": 1.4256, "step": 2070 }, { "epoch": 0.62, "learning_rate": 3.9166230231774276e-05, "loss": 1.4075, "step": 2080 }, { "epoch": 0.62, "learning_rate": 3.9070176640435335e-05, "loss": 1.3887, "step": 2090 }, { "epoch": 0.62, "learning_rate": 3.897381808524562e-05, "loss": 1.4225, "step": 2100 }, { "epoch": 0.63, "learning_rate": 3.887715665472729e-05, "loss": 1.4114, "step": 2110 }, { "epoch": 0.63, "learning_rate": 3.8780194443967226e-05, "loss": 1.4316, "step": 2120 }, { "epoch": 0.63, "learning_rate": 3.8682933554571524e-05, "loss": 1.4168, "step": 2130 }, { "epoch": 0.63, "learning_rate": 3.858537609461999e-05, "loss": 1.4237, "step": 2140 }, { "epoch": 0.64, "learning_rate": 3.8487524178620464e-05, "loss": 1.4373, "step": 2150 }, { "epoch": 0.64, "learning_rate": 3.838937992746295e-05, "loss": 1.4089, "step": 2160 }, { "epoch": 0.64, "learning_rate": 3.8290945468373684e-05, "loss": 1.4319, "step": 2170 }, { "epoch": 0.65, "learning_rate": 3.8192222934869e-05, "loss": 1.4035, "step": 2180 }, { "epoch": 0.65, "learning_rate": 3.809321446670909e-05, "loss": 1.4161, "step": 2190 }, { "epoch": 0.65, "learning_rate": 3.799392220985164e-05, "loss": 1.4136, "step": 2200 }, { "epoch": 0.65, "learning_rate": 3.789434831640533e-05, "loss": 1.4188, "step": 2210 }, { "epoch": 0.66, "learning_rate": 3.779449494458312e-05, "loss": 1.4203, "step": 2220 }, { "epoch": 0.66, "learning_rate": 3.769436425865557e-05, "loss": 1.4263, "step": 2230 }, { "epoch": 0.66, "learning_rate": 3.759395842890384e-05, "loss": 1.4295, "step": 2240 }, { "epoch": 0.67, "learning_rate": 3.749327963157274e-05, "loss": 1.4144, "step": 2250 }, { "epoch": 0.67, "learning_rate": 3.739233004882346e-05, "loss": 1.4162, "step": 2260 }, { "epoch": 0.67, "learning_rate": 3.729111186868635e-05, "loss": 1.4099, "step": 2270 }, { "epoch": 0.68, "learning_rate": 3.718962728501348e-05, "loss": 1.3878, "step": 2280 }, { "epoch": 0.68, "learning_rate": 3.708787849743106e-05, "loss": 1.4399, "step": 2290 }, { "epoch": 0.68, "learning_rate": 3.69858677112918e-05, "loss": 1.4249, "step": 2300 }, { "epoch": 0.68, "learning_rate": 3.688359713762707e-05, "loss": 1.3925, "step": 2310 }, { "epoch": 0.69, "learning_rate": 3.6781068993099034e-05, "loss": 1.4036, "step": 2320 }, { "epoch": 0.69, "learning_rate": 3.667828549995255e-05, "loss": 1.3986, "step": 2330 }, { "epoch": 0.69, "learning_rate": 3.657524888596703e-05, "loss": 1.4298, "step": 2340 }, { "epoch": 0.7, "learning_rate": 3.6471961384408155e-05, "loss": 1.4016, "step": 2350 }, { "epoch": 0.7, "learning_rate": 3.636842523397945e-05, "loss": 1.3992, "step": 2360 }, { "epoch": 0.7, "learning_rate": 3.626464267877381e-05, "loss": 1.4441, "step": 2370 }, { "epoch": 0.71, "learning_rate": 3.616061596822478e-05, "loss": 1.3967, "step": 2380 }, { "epoch": 0.71, "learning_rate": 3.6056347357057893e-05, "loss": 1.4252, "step": 2390 }, { "epoch": 0.71, "learning_rate": 3.595183910524173e-05, "loss": 1.4209, "step": 2400 }, { "epoch": 0.71, "learning_rate": 3.5847093477938956e-05, "loss": 1.4133, "step": 2410 }, { "epoch": 0.72, "learning_rate": 3.5742112745457235e-05, "loss": 1.4313, "step": 2420 }, { "epoch": 0.72, "learning_rate": 3.563689918320002e-05, "loss": 1.4275, "step": 2430 }, { "epoch": 0.72, "learning_rate": 3.5531455071617226e-05, "loss": 1.421, "step": 2440 }, { "epoch": 0.73, "learning_rate": 3.542578269615579e-05, "loss": 1.4402, "step": 2450 }, { "epoch": 0.73, "learning_rate": 3.5319884347210186e-05, "loss": 1.4176, "step": 2460 }, { "epoch": 0.73, "learning_rate": 3.521376232007271e-05, "loss": 1.4117, "step": 2470 }, { "epoch": 0.73, "learning_rate": 3.5107418914883794e-05, "loss": 1.41, "step": 2480 }, { "epoch": 0.74, "learning_rate": 3.500085643658211e-05, "loss": 1.4313, "step": 2490 }, { "epoch": 0.74, "learning_rate": 3.489407719485464e-05, "loss": 1.4035, "step": 2500 }, { "epoch": 0.74, "learning_rate": 3.4787083504086605e-05, "loss": 1.4057, "step": 2510 }, { "epoch": 0.75, "learning_rate": 3.467987768331127e-05, "loss": 1.4125, "step": 2520 }, { "epoch": 0.75, "learning_rate": 3.457246205615974e-05, "loss": 1.4056, "step": 2530 }, { "epoch": 0.75, "learning_rate": 3.446483895081054e-05, "loss": 1.4082, "step": 2540 }, { "epoch": 0.76, "learning_rate": 3.4357010699939215e-05, "loss": 1.3915, "step": 2550 }, { "epoch": 0.76, "learning_rate": 3.424897964066769e-05, "loss": 1.4012, "step": 2560 }, { "epoch": 0.76, "learning_rate": 3.4140748114513685e-05, "loss": 1.4251, "step": 2570 }, { "epoch": 0.76, "learning_rate": 3.403231846733994e-05, "loss": 1.4013, "step": 2580 }, { "epoch": 0.77, "learning_rate": 3.392369304930334e-05, "loss": 1.4076, "step": 2590 }, { "epoch": 0.77, "learning_rate": 3.3814874214804034e-05, "loss": 1.3978, "step": 2600 }, { "epoch": 0.77, "learning_rate": 3.3705864322434354e-05, "loss": 1.408, "step": 2610 }, { "epoch": 0.78, "learning_rate": 3.359666573492772e-05, "loss": 1.3888, "step": 2620 }, { "epoch": 0.78, "learning_rate": 3.3487280819107415e-05, "loss": 1.4052, "step": 2630 }, { "epoch": 0.78, "learning_rate": 3.33777119458353e-05, "loss": 1.4286, "step": 2640 }, { "epoch": 0.79, "learning_rate": 3.326796148996042e-05, "loss": 1.4241, "step": 2650 }, { "epoch": 0.79, "learning_rate": 3.315803183026753e-05, "loss": 1.4049, "step": 2660 }, { "epoch": 0.79, "learning_rate": 3.304792534942553e-05, "loss": 1.3826, "step": 2670 }, { "epoch": 0.79, "learning_rate": 3.293764443393582e-05, "loss": 1.413, "step": 2680 }, { "epoch": 0.8, "learning_rate": 3.2827191474080605e-05, "loss": 1.4161, "step": 2690 }, { "epoch": 0.8, "learning_rate": 3.2716568863871044e-05, "loss": 1.382, "step": 2700 }, { "epoch": 0.8, "learning_rate": 3.260577900099539e-05, "loss": 1.381, "step": 2710 }, { "epoch": 0.81, "learning_rate": 3.2494824286767e-05, "loss": 1.396, "step": 2720 }, { "epoch": 0.81, "learning_rate": 3.2383707126072315e-05, "loss": 1.3923, "step": 2730 }, { "epoch": 0.81, "learning_rate": 3.2272429927318707e-05, "loss": 1.4044, "step": 2740 }, { "epoch": 0.81, "learning_rate": 3.21609951023823e-05, "loss": 1.4073, "step": 2750 }, { "epoch": 0.82, "learning_rate": 3.204940506655568e-05, "loss": 1.4178, "step": 2760 }, { "epoch": 0.82, "learning_rate": 3.1937662238495544e-05, "loss": 1.4179, "step": 2770 }, { "epoch": 0.82, "learning_rate": 3.1825769040170285e-05, "loss": 1.4003, "step": 2780 }, { "epoch": 0.83, "learning_rate": 3.1713727896807505e-05, "loss": 1.4176, "step": 2790 }, { "epoch": 0.83, "learning_rate": 3.160154123684143e-05, "loss": 1.4179, "step": 2800 }, { "epoch": 0.83, "learning_rate": 3.1489211491860276e-05, "loss": 1.4098, "step": 2810 }, { "epoch": 0.84, "learning_rate": 3.1376741096553576e-05, "loss": 1.4087, "step": 2820 }, { "epoch": 0.84, "learning_rate": 3.126413248865935e-05, "loss": 1.3971, "step": 2830 }, { "epoch": 0.84, "learning_rate": 3.115138810891134e-05, "loss": 1.3915, "step": 2840 }, { "epoch": 0.84, "learning_rate": 3.103851040098607e-05, "loss": 1.4041, "step": 2850 }, { "epoch": 0.85, "learning_rate": 3.0925501811449855e-05, "loss": 1.4129, "step": 2860 }, { "epoch": 0.85, "learning_rate": 3.081236478970583e-05, "loss": 1.3948, "step": 2870 }, { "epoch": 0.85, "learning_rate": 3.069910178794082e-05, "loss": 1.4116, "step": 2880 }, { "epoch": 0.86, "learning_rate": 3.0585715261072206e-05, "loss": 1.4029, "step": 2890 }, { "epoch": 0.86, "learning_rate": 3.04722076666947e-05, "loss": 1.399, "step": 2900 }, { "epoch": 0.86, "learning_rate": 3.0358581465027125e-05, "loss": 1.4061, "step": 2910 }, { "epoch": 0.87, "learning_rate": 3.024483911885901e-05, "loss": 1.4152, "step": 2920 }, { "epoch": 0.87, "learning_rate": 3.013098309349729e-05, "loss": 1.4257, "step": 2930 }, { "epoch": 0.87, "learning_rate": 3.0017015856712814e-05, "loss": 1.417, "step": 2940 }, { "epoch": 0.87, "learning_rate": 2.9902939878686915e-05, "loss": 1.3952, "step": 2950 }, { "epoch": 0.88, "learning_rate": 2.978875763195779e-05, "loss": 1.4252, "step": 2960 }, { "epoch": 0.88, "learning_rate": 2.9674471591367005e-05, "loss": 1.3982, "step": 2970 }, { "epoch": 0.88, "learning_rate": 2.9560084234005765e-05, "loss": 1.3948, "step": 2980 }, { "epoch": 0.89, "learning_rate": 2.944559803916128e-05, "loss": 1.4127, "step": 2990 }, { "epoch": 0.89, "learning_rate": 2.9331015488263024e-05, "loss": 1.4239, "step": 3000 }, { "epoch": 0.89, "learning_rate": 2.9216339064828914e-05, "loss": 1.3889, "step": 3010 }, { "epoch": 0.89, "learning_rate": 2.910157125441152e-05, "loss": 1.403, "step": 3020 }, { "epoch": 0.9, "learning_rate": 2.898671454454418e-05, "loss": 1.4106, "step": 3030 }, { "epoch": 0.9, "learning_rate": 2.8871771424687078e-05, "loss": 1.4123, "step": 3040 }, { "epoch": 0.9, "learning_rate": 2.8756744386173284e-05, "loss": 1.4137, "step": 3050 }, { "epoch": 0.91, "learning_rate": 2.8641635922154774e-05, "loss": 1.4009, "step": 3060 }, { "epoch": 0.91, "learning_rate": 2.8526448527548372e-05, "loss": 1.4159, "step": 3070 }, { "epoch": 0.91, "learning_rate": 2.8411184698981684e-05, "loss": 1.4071, "step": 3080 }, { "epoch": 0.92, "learning_rate": 2.829584693473899e-05, "loss": 1.41, "step": 3090 }, { "epoch": 0.92, "learning_rate": 2.8180437734707064e-05, "loss": 1.4038, "step": 3100 }, { "epoch": 0.92, "learning_rate": 2.8064959600321043e-05, "loss": 1.4069, "step": 3110 }, { "epoch": 0.92, "learning_rate": 2.7949415034510163e-05, "loss": 1.4096, "step": 3120 }, { "epoch": 0.93, "learning_rate": 2.7833806541643544e-05, "loss": 1.3821, "step": 3130 }, { "epoch": 0.93, "learning_rate": 2.7718136627475865e-05, "loss": 1.3886, "step": 3140 }, { "epoch": 0.93, "learning_rate": 2.76024077990931e-05, "loss": 1.3923, "step": 3150 }, { "epoch": 0.94, "learning_rate": 2.748662256485816e-05, "loss": 1.4072, "step": 3160 }, { "epoch": 0.94, "learning_rate": 2.7370783434356512e-05, "loss": 1.4126, "step": 3170 }, { "epoch": 0.94, "learning_rate": 2.7254892918341802e-05, "loss": 1.4238, "step": 3180 }, { "epoch": 0.95, "learning_rate": 2.713895352868144e-05, "loss": 1.4183, "step": 3190 }, { "epoch": 0.95, "learning_rate": 2.702296777830212e-05, "loss": 1.4056, "step": 3200 }, { "epoch": 0.95, "learning_rate": 2.6906938181135423e-05, "loss": 1.4096, "step": 3210 }, { "epoch": 0.95, "learning_rate": 2.6790867252063247e-05, "loss": 1.4018, "step": 3220 }, { "epoch": 0.96, "learning_rate": 2.6674757506863357e-05, "loss": 1.3922, "step": 3230 }, { "epoch": 0.96, "learning_rate": 2.655861146215483e-05, "loss": 1.4054, "step": 3240 }, { "epoch": 0.96, "learning_rate": 2.6442431635343528e-05, "loss": 1.3914, "step": 3250 }, { "epoch": 0.97, "learning_rate": 2.6326220544567514e-05, "loss": 1.3851, "step": 3260 }, { "epoch": 0.97, "learning_rate": 2.620998070864248e-05, "loss": 1.4102, "step": 3270 }, { "epoch": 0.97, "learning_rate": 2.6093714647007156e-05, "loss": 1.4069, "step": 3280 }, { "epoch": 0.97, "learning_rate": 2.5977424879668705e-05, "loss": 1.3919, "step": 3290 }, { "epoch": 0.98, "learning_rate": 2.5861113927148096e-05, "loss": 1.4073, "step": 3300 }, { "epoch": 0.98, "learning_rate": 2.5744784310425467e-05, "loss": 1.4025, "step": 3310 }, { "epoch": 0.98, "learning_rate": 2.562843855088551e-05, "loss": 1.3805, "step": 3320 }, { "epoch": 0.99, "learning_rate": 2.5512079170262793e-05, "loss": 1.4032, "step": 3330 }, { "epoch": 0.99, "learning_rate": 2.5395708690587117e-05, "loss": 1.4232, "step": 3340 }, { "epoch": 0.99, "learning_rate": 2.527932963412885e-05, "loss": 1.3897, "step": 3350 }, { "epoch": 1.0, "learning_rate": 2.5162944523344256e-05, "loss": 1.4008, "step": 3360 }, { "epoch": 1.0, "learning_rate": 2.5046555880820826e-05, "loss": 1.3936, "step": 3370 }, { "epoch": 1.0, "learning_rate": 2.4930166229222597e-05, "loss": 1.394, "step": 3380 }, { "epoch": 1.0, "learning_rate": 2.481377809123547e-05, "loss": 1.3903, "step": 3390 }, { "epoch": 1.01, "learning_rate": 2.469739398951256e-05, "loss": 1.3869, "step": 3400 }, { "epoch": 1.01, "learning_rate": 2.458101644661947e-05, "loss": 1.429, "step": 3410 }, { "epoch": 1.01, "learning_rate": 2.4464647984979667e-05, "loss": 1.3987, "step": 3420 }, { "epoch": 1.02, "learning_rate": 2.4348291126819783e-05, "loss": 1.38, "step": 3430 }, { "epoch": 1.02, "learning_rate": 2.4231948394114936e-05, "loss": 1.3906, "step": 3440 }, { "epoch": 1.02, "learning_rate": 2.4115622308534096e-05, "loss": 1.3931, "step": 3450 }, { "epoch": 1.03, "learning_rate": 2.399931539138541e-05, "loss": 1.4135, "step": 3460 }, { "epoch": 1.03, "learning_rate": 2.388303016356156e-05, "loss": 1.3952, "step": 3470 }, { "epoch": 1.03, "learning_rate": 2.3766769145485125e-05, "loss": 1.3972, "step": 3480 }, { "epoch": 1.03, "learning_rate": 2.3650534857053943e-05, "loss": 1.3937, "step": 3490 }, { "epoch": 1.04, "learning_rate": 2.3534329817586513e-05, "loss": 1.3936, "step": 3500 }, { "epoch": 1.04, "learning_rate": 2.3418156545767365e-05, "loss": 1.397, "step": 3510 }, { "epoch": 1.04, "learning_rate": 2.3302017559592494e-05, "loss": 1.3849, "step": 3520 }, { "epoch": 1.05, "learning_rate": 2.318591537631476e-05, "loss": 1.4118, "step": 3530 }, { "epoch": 1.05, "learning_rate": 2.3069852512389335e-05, "loss": 1.414, "step": 3540 }, { "epoch": 1.05, "learning_rate": 2.2953831483419184e-05, "loss": 1.4088, "step": 3550 }, { "epoch": 1.05, "learning_rate": 2.2837854804100504e-05, "loss": 1.3773, "step": 3560 }, { "epoch": 1.06, "learning_rate": 2.272192498816825e-05, "loss": 1.3977, "step": 3570 }, { "epoch": 1.06, "learning_rate": 2.260604454834162e-05, "loss": 1.3591, "step": 3580 }, { "epoch": 1.06, "learning_rate": 2.2490215996269617e-05, "loss": 1.4023, "step": 3590 }, { "epoch": 1.07, "learning_rate": 2.237444184247661e-05, "loss": 1.3873, "step": 3600 }, { "epoch": 1.07, "learning_rate": 2.2258724596307915e-05, "loss": 1.3826, "step": 3610 }, { "epoch": 1.07, "learning_rate": 2.214306676587539e-05, "loss": 1.3732, "step": 3620 }, { "epoch": 1.08, "learning_rate": 2.2027470858003098e-05, "loss": 1.3988, "step": 3630 }, { "epoch": 1.08, "learning_rate": 2.1911939378172956e-05, "loss": 1.4036, "step": 3640 }, { "epoch": 1.08, "learning_rate": 2.1796474830470447e-05, "loss": 1.4236, "step": 3650 }, { "epoch": 1.08, "learning_rate": 2.1681079717530328e-05, "loss": 1.4032, "step": 3660 }, { "epoch": 1.09, "learning_rate": 2.156575654048239e-05, "loss": 1.39, "step": 3670 }, { "epoch": 1.09, "learning_rate": 2.145050779889725e-05, "loss": 1.3757, "step": 3680 }, { "epoch": 1.09, "learning_rate": 2.1335335990732186e-05, "loss": 1.3934, "step": 3690 }, { "epoch": 1.1, "learning_rate": 2.1220243612276964e-05, "loss": 1.3979, "step": 3700 }, { "epoch": 1.1, "learning_rate": 2.110523315809978e-05, "loss": 1.4181, "step": 3710 }, { "epoch": 1.1, "learning_rate": 2.0990307120993134e-05, "loss": 1.406, "step": 3720 }, { "epoch": 1.11, "learning_rate": 2.0875467991919854e-05, "loss": 1.4036, "step": 3730 }, { "epoch": 1.11, "learning_rate": 2.076071825995906e-05, "loss": 1.4095, "step": 3740 }, { "epoch": 1.11, "learning_rate": 2.0646060412252246e-05, "loss": 1.4048, "step": 3750 }, { "epoch": 1.11, "learning_rate": 2.0531496933949363e-05, "loss": 1.3874, "step": 3760 }, { "epoch": 1.12, "learning_rate": 2.0417030308154953e-05, "loss": 1.3793, "step": 3770 }, { "epoch": 1.12, "learning_rate": 2.0302663015874322e-05, "loss": 1.4152, "step": 3780 }, { "epoch": 1.12, "learning_rate": 2.0188397535959785e-05, "loss": 1.3738, "step": 3790 }, { "epoch": 1.13, "learning_rate": 2.007423634505692e-05, "loss": 1.4033, "step": 3800 }, { "epoch": 1.13, "learning_rate": 1.9960181917550897e-05, "loss": 1.3753, "step": 3810 }, { "epoch": 1.13, "learning_rate": 1.9846236725512835e-05, "loss": 1.3791, "step": 3820 }, { "epoch": 1.13, "learning_rate": 1.973240323864624e-05, "loss": 1.3837, "step": 3830 }, { "epoch": 1.14, "learning_rate": 1.9618683924233467e-05, "loss": 1.3945, "step": 3840 }, { "epoch": 1.14, "learning_rate": 1.9505081247082237e-05, "loss": 1.3804, "step": 3850 }, { "epoch": 1.14, "learning_rate": 1.9391597669472213e-05, "loss": 1.3964, "step": 3860 }, { "epoch": 1.15, "learning_rate": 1.927823565110165e-05, "loss": 1.3983, "step": 3870 }, { "epoch": 1.15, "learning_rate": 1.9164997649034058e-05, "loss": 1.4169, "step": 3880 }, { "epoch": 1.15, "learning_rate": 1.9051886117644963e-05, "loss": 1.4101, "step": 3890 }, { "epoch": 1.16, "learning_rate": 1.89389035085687e-05, "loss": 1.3823, "step": 3900 }, { "epoch": 1.16, "learning_rate": 1.8826052270645276e-05, "loss": 1.3827, "step": 3910 }, { "epoch": 1.16, "learning_rate": 1.8713334849867315e-05, "loss": 1.4035, "step": 3920 }, { "epoch": 1.16, "learning_rate": 1.8600753689327e-05, "loss": 1.4081, "step": 3930 }, { "epoch": 1.17, "learning_rate": 1.8488311229163152e-05, "loss": 1.3919, "step": 3940 }, { "epoch": 1.17, "learning_rate": 1.8376009906508338e-05, "loss": 1.3854, "step": 3950 }, { "epoch": 1.17, "learning_rate": 1.826385215543603e-05, "loss": 1.3924, "step": 3960 }, { "epoch": 1.18, "learning_rate": 1.8151840406907873e-05, "loss": 1.3851, "step": 3970 }, { "epoch": 1.18, "learning_rate": 1.8039977088720972e-05, "loss": 1.3707, "step": 3980 }, { "epoch": 1.18, "learning_rate": 1.7928264625455282e-05, "loss": 1.3998, "step": 3990 }, { "epoch": 1.19, "learning_rate": 1.7816705438421064e-05, "loss": 1.3931, "step": 4000 }, { "epoch": 1.19, "learning_rate": 1.7705301945606384e-05, "loss": 1.3976, "step": 4010 }, { "epoch": 1.19, "learning_rate": 1.7594056561624716e-05, "loss": 1.3785, "step": 4020 }, { "epoch": 1.19, "learning_rate": 1.748297169766262e-05, "loss": 1.3845, "step": 4030 }, { "epoch": 1.2, "learning_rate": 1.7372049761427457e-05, "loss": 1.3926, "step": 4040 }, { "epoch": 1.2, "learning_rate": 1.7261293157095204e-05, "loss": 1.4075, "step": 4050 }, { "epoch": 1.2, "learning_rate": 1.7150704285258375e-05, "loss": 1.3938, "step": 4060 }, { "epoch": 1.21, "learning_rate": 1.7040285542873945e-05, "loss": 1.3884, "step": 4070 }, { "epoch": 1.21, "learning_rate": 1.6930039323211448e-05, "loss": 1.4066, "step": 4080 }, { "epoch": 1.21, "learning_rate": 1.6819968015801048e-05, "loss": 1.3992, "step": 4090 }, { "epoch": 1.21, "learning_rate": 1.6710074006381797e-05, "loss": 1.4066, "step": 4100 }, { "epoch": 1.22, "learning_rate": 1.6600359676849892e-05, "loss": 1.4076, "step": 4110 }, { "epoch": 1.22, "learning_rate": 1.6490827405207062e-05, "loss": 1.4078, "step": 4120 }, { "epoch": 1.22, "learning_rate": 1.638147956550904e-05, "loss": 1.4026, "step": 4130 }, { "epoch": 1.23, "learning_rate": 1.627231852781407e-05, "loss": 1.3861, "step": 4140 }, { "epoch": 1.23, "learning_rate": 1.6163346658131567e-05, "loss": 1.3915, "step": 4150 }, { "epoch": 1.23, "learning_rate": 1.6054566318370832e-05, "loss": 1.3828, "step": 4160 }, { "epoch": 1.24, "learning_rate": 1.5945979866289844e-05, "loss": 1.3952, "step": 4170 }, { "epoch": 1.24, "learning_rate": 1.583758965544417e-05, "loss": 1.3892, "step": 4180 }, { "epoch": 1.24, "learning_rate": 1.5729398035135957e-05, "loss": 1.3973, "step": 4190 }, { "epoch": 1.24, "learning_rate": 1.5621407350362986e-05, "loss": 1.4225, "step": 4200 }, { "epoch": 1.25, "learning_rate": 1.5513619941767886e-05, "loss": 1.3948, "step": 4210 }, { "epoch": 1.25, "learning_rate": 1.540603814558736e-05, "loss": 1.4074, "step": 4220 }, { "epoch": 1.25, "learning_rate": 1.5298664293601574e-05, "loss": 1.3965, "step": 4230 }, { "epoch": 1.26, "learning_rate": 1.5191500713083615e-05, "loss": 1.3743, "step": 4240 }, { "epoch": 1.26, "learning_rate": 1.508454972674904e-05, "loss": 1.384, "step": 4250 }, { "epoch": 1.26, "learning_rate": 1.4977813652705535e-05, "loss": 1.4018, "step": 4260 }, { "epoch": 1.27, "learning_rate": 1.4871294804402675e-05, "loss": 1.3904, "step": 4270 }, { "epoch": 1.27, "learning_rate": 1.4764995490581779e-05, "loss": 1.3981, "step": 4280 }, { "epoch": 1.27, "learning_rate": 1.465891801522587e-05, "loss": 1.4144, "step": 4290 }, { "epoch": 1.27, "learning_rate": 1.4553064677509731e-05, "loss": 1.4172, "step": 4300 }, { "epoch": 1.28, "learning_rate": 1.4447437771750078e-05, "loss": 1.3873, "step": 4310 }, { "epoch": 1.28, "learning_rate": 1.4342039587355832e-05, "loss": 1.3983, "step": 4320 }, { "epoch": 1.28, "learning_rate": 1.423687240877849e-05, "loss": 1.4007, "step": 4330 }, { "epoch": 1.29, "learning_rate": 1.4131938515462639e-05, "loss": 1.4088, "step": 4340 }, { "epoch": 1.29, "learning_rate": 1.4027240181796508e-05, "loss": 1.3941, "step": 4350 }, { "epoch": 1.29, "learning_rate": 1.3922779677062689e-05, "loss": 1.3975, "step": 4360 }, { "epoch": 1.29, "learning_rate": 1.3818559265388964e-05, "loss": 1.3842, "step": 4370 }, { "epoch": 1.3, "learning_rate": 1.3714581205699214e-05, "loss": 1.4011, "step": 4380 }, { "epoch": 1.3, "learning_rate": 1.3610847751664473e-05, "loss": 1.3881, "step": 4390 }, { "epoch": 1.3, "learning_rate": 1.3507361151654067e-05, "loss": 1.4028, "step": 4400 }, { "epoch": 1.31, "learning_rate": 1.340412364868689e-05, "loss": 1.3973, "step": 4410 }, { "epoch": 1.31, "learning_rate": 1.3301137480382786e-05, "loss": 1.445, "step": 4420 }, { "epoch": 1.31, "learning_rate": 1.3198404878914044e-05, "loss": 1.3957, "step": 4430 }, { "epoch": 1.32, "learning_rate": 1.3095928070957037e-05, "loss": 1.395, "step": 4440 }, { "epoch": 1.32, "learning_rate": 1.2993709277643922e-05, "loss": 1.4157, "step": 4450 }, { "epoch": 1.32, "learning_rate": 1.2891750714514545e-05, "loss": 1.4074, "step": 4460 }, { "epoch": 1.32, "learning_rate": 1.2790054591468381e-05, "loss": 1.3988, "step": 4470 }, { "epoch": 1.33, "learning_rate": 1.2688623112716652e-05, "loss": 1.3914, "step": 4480 }, { "epoch": 1.33, "learning_rate": 1.2587458476734559e-05, "loss": 1.3864, "step": 4490 }, { "epoch": 1.33, "learning_rate": 1.248656287621362e-05, "loss": 1.3934, "step": 4500 }, { "epoch": 1.34, "learning_rate": 1.2385938498014138e-05, "loss": 1.3893, "step": 4510 }, { "epoch": 1.34, "learning_rate": 1.2285587523117825e-05, "loss": 1.3991, "step": 4520 }, { "epoch": 1.34, "learning_rate": 1.2185512126580512e-05, "loss": 1.376, "step": 4530 }, { "epoch": 1.35, "learning_rate": 1.2085714477484997e-05, "loss": 1.3799, "step": 4540 }, { "epoch": 1.35, "learning_rate": 1.1986196738894078e-05, "loss": 1.3738, "step": 4550 }, { "epoch": 1.35, "learning_rate": 1.188696106780361e-05, "loss": 1.3754, "step": 4560 }, { "epoch": 1.35, "learning_rate": 1.178800961509578e-05, "loss": 1.4006, "step": 4570 }, { "epoch": 1.36, "learning_rate": 1.1689344525492497e-05, "loss": 1.4012, "step": 4580 }, { "epoch": 1.36, "learning_rate": 1.1590967937508895e-05, "loss": 1.3973, "step": 4590 }, { "epoch": 1.36, "learning_rate": 1.149288198340698e-05, "loss": 1.3737, "step": 4600 }, { "epoch": 1.37, "learning_rate": 1.1395088789149419e-05, "loss": 1.3998, "step": 4610 }, { "epoch": 1.37, "learning_rate": 1.1297590474353464e-05, "loss": 1.4053, "step": 4620 }, { "epoch": 1.37, "learning_rate": 1.1200389152245003e-05, "loss": 1.4038, "step": 4630 }, { "epoch": 1.37, "learning_rate": 1.1103486929612759e-05, "loss": 1.3968, "step": 4640 }, { "epoch": 1.38, "learning_rate": 1.1006885906762626e-05, "loss": 1.4037, "step": 4650 }, { "epoch": 1.38, "learning_rate": 1.0910588177472153e-05, "loss": 1.3901, "step": 4660 }, { "epoch": 1.38, "learning_rate": 1.0814595828945154e-05, "loss": 1.379, "step": 4670 }, { "epoch": 1.39, "learning_rate": 1.0718910941766478e-05, "loss": 1.3808, "step": 4680 }, { "epoch": 1.39, "learning_rate": 1.0623535589856887e-05, "loss": 1.4105, "step": 4690 }, { "epoch": 1.39, "learning_rate": 1.0528471840428142e-05, "loss": 1.3756, "step": 4700 }, { "epoch": 1.4, "learning_rate": 1.0433721753938182e-05, "loss": 1.3708, "step": 4710 }, { "epoch": 1.4, "learning_rate": 1.0339287384046462e-05, "loss": 1.3924, "step": 4720 }, { "epoch": 1.4, "learning_rate": 1.024517077756943e-05, "loss": 1.3854, "step": 4730 }, { "epoch": 1.4, "learning_rate": 1.0151373974436184e-05, "loss": 1.3908, "step": 4740 }, { "epoch": 1.41, "learning_rate": 1.0057899007644245e-05, "loss": 1.3953, "step": 4750 }, { "epoch": 1.41, "learning_rate": 9.964747903215513e-06, "loss": 1.3933, "step": 4760 }, { "epoch": 1.41, "learning_rate": 9.871922680152318e-06, "loss": 1.3854, "step": 4770 }, { "epoch": 1.42, "learning_rate": 9.779425350393685e-06, "loss": 1.4026, "step": 4780 }, { "epoch": 1.42, "learning_rate": 9.687257918771719e-06, "loss": 1.3958, "step": 4790 }, { "epoch": 1.42, "learning_rate": 9.595422382968156e-06, "loss": 1.3777, "step": 4800 }, { "epoch": 1.43, "learning_rate": 9.503920733471052e-06, "loss": 1.3835, "step": 4810 }, { "epoch": 1.43, "learning_rate": 9.412754953531663e-06, "loss": 1.3768, "step": 4820 }, { "epoch": 1.43, "learning_rate": 9.321927019121435e-06, "loss": 1.3846, "step": 4830 }, { "epoch": 1.43, "learning_rate": 9.231438898889184e-06, "loss": 1.3878, "step": 4840 }, { "epoch": 1.44, "learning_rate": 9.141292554118435e-06, "loss": 1.38, "step": 4850 }, { "epoch": 1.44, "learning_rate": 9.051489938684903e-06, "loss": 1.3841, "step": 4860 }, { "epoch": 1.44, "learning_rate": 8.962032999014144e-06, "loss": 1.4122, "step": 4870 }, { "epoch": 1.45, "learning_rate": 8.87292367403937e-06, "loss": 1.3839, "step": 4880 }, { "epoch": 1.45, "learning_rate": 8.784163895159428e-06, "loss": 1.3932, "step": 4890 }, { "epoch": 1.45, "learning_rate": 8.695755586196924e-06, "loss": 1.4012, "step": 4900 }, { "epoch": 1.45, "learning_rate": 8.607700663356543e-06, "loss": 1.3931, "step": 4910 }, { "epoch": 1.46, "learning_rate": 8.520001035183503e-06, "loss": 1.4003, "step": 4920 }, { "epoch": 1.46, "learning_rate": 8.432658602522193e-06, "loss": 1.4064, "step": 4930 }, { "epoch": 1.46, "learning_rate": 8.345675258474969e-06, "loss": 1.383, "step": 4940 }, { "epoch": 1.47, "learning_rate": 8.259052888361132e-06, "loss": 1.4147, "step": 4950 }, { "epoch": 1.47, "learning_rate": 8.172793369676052e-06, "loss": 1.4064, "step": 4960 }, { "epoch": 1.47, "learning_rate": 8.086898572050494e-06, "loss": 1.3894, "step": 4970 }, { "epoch": 1.48, "learning_rate": 8.00137035721007e-06, "loss": 1.3928, "step": 4980 }, { "epoch": 1.48, "learning_rate": 7.916210578934896e-06, "loss": 1.4049, "step": 4990 }, { "epoch": 1.48, "learning_rate": 7.831421083019422e-06, "loss": 1.402, "step": 5000 }, { "epoch": 1.48, "learning_rate": 7.747003707232415e-06, "loss": 1.4144, "step": 5010 }, { "epoch": 1.49, "learning_rate": 7.66296028127713e-06, "loss": 1.3884, "step": 5020 }, { "epoch": 1.49, "learning_rate": 7.579292626751647e-06, "loss": 1.4116, "step": 5030 }, { "epoch": 1.49, "learning_rate": 7.4960025571094025e-06, "loss": 1.3828, "step": 5040 }, { "epoch": 1.5, "learning_rate": 7.413091877619868e-06, "loss": 1.3821, "step": 5050 }, { "epoch": 1.5, "learning_rate": 7.330562385329429e-06, "loss": 1.4068, "step": 5060 }, { "epoch": 1.5, "learning_rate": 7.248415869022434e-06, "loss": 1.3842, "step": 5070 }, { "epoch": 1.51, "learning_rate": 7.16665410918243e-06, "loss": 1.3937, "step": 5080 }, { "epoch": 1.51, "learning_rate": 7.085278877953558e-06, "loss": 1.4077, "step": 5090 }, { "epoch": 1.51, "learning_rate": 7.004291939102148e-06, "loss": 1.3989, "step": 5100 }, { "epoch": 1.51, "learning_rate": 6.923695047978502e-06, "loss": 1.3727, "step": 5110 }, { "epoch": 1.52, "learning_rate": 6.843489951478829e-06, "loss": 1.3842, "step": 5120 }, { "epoch": 1.52, "learning_rate": 6.763678388007394e-06, "loss": 1.3662, "step": 5130 }, { "epoch": 1.52, "learning_rate": 6.684262087438839e-06, "loss": 1.4092, "step": 5140 }, { "epoch": 1.53, "learning_rate": 6.605242771080686e-06, "loss": 1.399, "step": 5150 }, { "epoch": 1.53, "learning_rate": 6.526622151636011e-06, "loss": 1.3931, "step": 5160 }, { "epoch": 1.53, "learning_rate": 6.448401933166351e-06, "loss": 1.3824, "step": 5170 }, { "epoch": 1.53, "learning_rate": 6.370583811054778e-06, "loss": 1.3764, "step": 5180 }, { "epoch": 1.54, "learning_rate": 6.293169471969104e-06, "loss": 1.3835, "step": 5190 }, { "epoch": 1.54, "learning_rate": 6.216160593825363e-06, "loss": 1.382, "step": 5200 }, { "epoch": 1.54, "learning_rate": 6.1395588457514226e-06, "loss": 1.3983, "step": 5210 }, { "epoch": 1.55, "learning_rate": 6.063365888050829e-06, "loss": 1.3709, "step": 5220 }, { "epoch": 1.55, "learning_rate": 5.987583372166794e-06, "loss": 1.4037, "step": 5230 }, { "epoch": 1.55, "learning_rate": 5.912212940646422e-06, "loss": 1.3955, "step": 5240 }, { "epoch": 1.56, "learning_rate": 5.8372562271051e-06, "loss": 1.384, "step": 5250 }, { "epoch": 1.56, "learning_rate": 5.762714856191087e-06, "loss": 1.3772, "step": 5260 }, { "epoch": 1.56, "learning_rate": 5.688590443550304e-06, "loss": 1.3818, "step": 5270 }, { "epoch": 1.56, "learning_rate": 5.61488459579132e-06, "loss": 1.4039, "step": 5280 }, { "epoch": 1.57, "learning_rate": 5.541598910450518e-06, "loss": 1.3935, "step": 5290 }, { "epoch": 1.57, "learning_rate": 5.4687349759574845e-06, "loss": 1.402, "step": 5300 }, { "epoch": 1.57, "learning_rate": 5.396294371600569e-06, "loss": 1.3774, "step": 5310 }, { "epoch": 1.58, "learning_rate": 5.3242786674926545e-06, "loss": 1.3936, "step": 5320 }, { "epoch": 1.58, "learning_rate": 5.252689424537139e-06, "loss": 1.3914, "step": 5330 }, { "epoch": 1.58, "learning_rate": 5.181528194394081e-06, "loss": 1.3931, "step": 5340 }, { "epoch": 1.59, "learning_rate": 5.11079651944659e-06, "loss": 1.3854, "step": 5350 }, { "epoch": 1.59, "learning_rate": 5.040495932767386e-06, "loss": 1.4101, "step": 5360 }, { "epoch": 1.59, "learning_rate": 4.970627958085574e-06, "loss": 1.3929, "step": 5370 }, { "epoch": 1.59, "learning_rate": 4.901194109753607e-06, "loss": 1.3826, "step": 5380 }, { "epoch": 1.6, "learning_rate": 4.832195892714489e-06, "loss": 1.3974, "step": 5390 }, { "epoch": 1.6, "learning_rate": 4.763634802469124e-06, "loss": 1.3988, "step": 5400 }, { "epoch": 1.6, "learning_rate": 4.6955123250439245e-06, "loss": 1.3875, "step": 5410 }, { "epoch": 1.61, "learning_rate": 4.6278299369585916e-06, "loss": 1.3881, "step": 5420 }, { "epoch": 1.61, "learning_rate": 4.560589105194121e-06, "loss": 1.3924, "step": 5430 }, { "epoch": 1.61, "learning_rate": 4.493791287160998e-06, "loss": 1.4058, "step": 5440 }, { "epoch": 1.61, "learning_rate": 4.4274379306676164e-06, "loss": 1.3946, "step": 5450 }, { "epoch": 1.62, "learning_rate": 4.361530473888889e-06, "loss": 1.4045, "step": 5460 }, { "epoch": 1.62, "learning_rate": 4.296070345335085e-06, "loss": 1.3817, "step": 5470 }, { "epoch": 1.62, "learning_rate": 4.231058963820867e-06, "loss": 1.3989, "step": 5480 }, { "epoch": 1.63, "learning_rate": 4.166497738434527e-06, "loss": 1.4004, "step": 5490 }, { "epoch": 1.63, "learning_rate": 4.102388068507465e-06, "loss": 1.3905, "step": 5500 }, { "epoch": 1.63, "learning_rate": 4.03873134358384e-06, "loss": 1.3731, "step": 5510 }, { "epoch": 1.64, "learning_rate": 3.9755289433904694e-06, "loss": 1.383, "step": 5520 }, { "epoch": 1.64, "learning_rate": 3.912782237806903e-06, "loss": 1.3853, "step": 5530 }, { "epoch": 1.64, "learning_rate": 3.850492586835755e-06, "loss": 1.4039, "step": 5540 }, { "epoch": 1.64, "learning_rate": 3.788661340573213e-06, "loss": 1.3703, "step": 5550 }, { "epoch": 1.65, "learning_rate": 3.7272898391797734e-06, "loss": 1.388, "step": 5560 }, { "epoch": 1.65, "learning_rate": 3.6663794128512038e-06, "loss": 1.3785, "step": 5570 }, { "epoch": 1.65, "learning_rate": 3.6059313817897065e-06, "loss": 1.3901, "step": 5580 }, { "epoch": 1.66, "learning_rate": 3.5459470561753e-06, "loss": 1.3894, "step": 5590 }, { "epoch": 1.66, "learning_rate": 3.4864277361374264e-06, "loss": 1.3799, "step": 5600 }, { "epoch": 1.66, "learning_rate": 3.4273747117267774e-06, "loss": 1.3798, "step": 5610 }, { "epoch": 1.67, "learning_rate": 3.3687892628873175e-06, "loss": 1.3948, "step": 5620 }, { "epoch": 1.67, "learning_rate": 3.310672659428557e-06, "loss": 1.4071, "step": 5630 }, { "epoch": 1.67, "learning_rate": 3.2530261609980183e-06, "loss": 1.3993, "step": 5640 }, { "epoch": 1.67, "learning_rate": 3.195851017053944e-06, "loss": 1.3885, "step": 5650 }, { "epoch": 1.68, "learning_rate": 3.1391484668382073e-06, "loss": 1.3919, "step": 5660 }, { "epoch": 1.68, "learning_rate": 3.0829197393494548e-06, "loss": 1.3965, "step": 5670 }, { "epoch": 1.68, "learning_rate": 3.0271660533164714e-06, "loss": 1.4135, "step": 5680 }, { "epoch": 1.69, "learning_rate": 2.9718886171717613e-06, "loss": 1.3923, "step": 5690 }, { "epoch": 1.69, "learning_rate": 2.9170886290253552e-06, "loss": 1.3663, "step": 5700 }, { "epoch": 1.69, "learning_rate": 2.8627672766388448e-06, "loss": 1.3772, "step": 5710 }, { "epoch": 1.69, "learning_rate": 2.8089257373996424e-06, "loss": 1.373, "step": 5720 }, { "epoch": 1.7, "learning_rate": 2.755565178295447e-06, "loss": 1.3858, "step": 5730 }, { "epoch": 1.7, "learning_rate": 2.7026867558889694e-06, "loss": 1.3996, "step": 5740 }, { "epoch": 1.7, "learning_rate": 2.6502916162928463e-06, "loss": 1.4194, "step": 5750 }, { "epoch": 1.71, "learning_rate": 2.5983808951448196e-06, "loss": 1.3895, "step": 5760 }, { "epoch": 1.71, "learning_rate": 2.5469557175830993e-06, "loss": 1.376, "step": 5770 }, { "epoch": 1.71, "learning_rate": 2.496017198221995e-06, "loss": 1.3901, "step": 5780 }, { "epoch": 1.72, "learning_rate": 2.445566441127742e-06, "loss": 1.3782, "step": 5790 }, { "epoch": 1.72, "learning_rate": 2.3956045397945826e-06, "loss": 1.3723, "step": 5800 }, { "epoch": 1.72, "learning_rate": 2.3461325771210683e-06, "loss": 1.3705, "step": 5810 }, { "epoch": 1.72, "learning_rate": 2.297151625386576e-06, "loss": 1.4018, "step": 5820 }, { "epoch": 1.73, "learning_rate": 2.2486627462280724e-06, "loss": 1.3811, "step": 5830 }, { "epoch": 1.73, "learning_rate": 2.200666990617098e-06, "loss": 1.3894, "step": 5840 }, { "epoch": 1.73, "learning_rate": 2.153165398837009e-06, "loss": 1.3931, "step": 5850 }, { "epoch": 1.74, "learning_rate": 2.1061590004603978e-06, "loss": 1.3775, "step": 5860 }, { "epoch": 1.74, "learning_rate": 2.059648814326806e-06, "loss": 1.3897, "step": 5870 }, { "epoch": 1.74, "learning_rate": 2.013635848520626e-06, "loss": 1.3919, "step": 5880 }, { "epoch": 1.75, "learning_rate": 1.9681211003492543e-06, "loss": 1.4191, "step": 5890 }, { "epoch": 1.75, "learning_rate": 1.923105556321475e-06, "loss": 1.4027, "step": 5900 }, { "epoch": 1.75, "learning_rate": 1.8785901921260784e-06, "loss": 1.3767, "step": 5910 }, { "epoch": 1.75, "learning_rate": 1.8345759726107193e-06, "loss": 1.3801, "step": 5920 }, { "epoch": 1.76, "learning_rate": 1.7910638517609962e-06, "loss": 1.3881, "step": 5930 }, { "epoch": 1.76, "learning_rate": 1.748054772679772e-06, "loss": 1.3915, "step": 5940 }, { "epoch": 1.76, "learning_rate": 1.705549667566747e-06, "loss": 1.403, "step": 5950 }, { "epoch": 1.77, "learning_rate": 1.6635494576982353e-06, "loss": 1.3882, "step": 5960 }, { "epoch": 1.77, "learning_rate": 1.6220550534072094e-06, "loss": 1.3902, "step": 5970 }, { "epoch": 1.77, "learning_rate": 1.5810673540635702e-06, "loss": 1.3928, "step": 5980 }, { "epoch": 1.77, "learning_rate": 1.540587248054645e-06, "loss": 1.3819, "step": 5990 }, { "epoch": 1.78, "learning_rate": 1.5006156127659348e-06, "loss": 1.3948, "step": 6000 }, { "epoch": 1.78, "learning_rate": 1.4611533145621026e-06, "loss": 1.3914, "step": 6010 }, { "epoch": 1.78, "learning_rate": 1.422201208768187e-06, "loss": 1.3713, "step": 6020 }, { "epoch": 1.79, "learning_rate": 1.3837601396510745e-06, "loss": 1.3922, "step": 6030 }, { "epoch": 1.79, "learning_rate": 1.345830940401191e-06, "loss": 1.3775, "step": 6040 }, { "epoch": 1.79, "learning_rate": 1.3084144331144439e-06, "loss": 1.3974, "step": 6050 }, { "epoch": 1.8, "learning_rate": 1.2715114287744079e-06, "loss": 1.3859, "step": 6060 }, { "epoch": 1.8, "learning_rate": 1.2351227272347444e-06, "loss": 1.3795, "step": 6070 }, { "epoch": 1.8, "learning_rate": 1.19924911720187e-06, "loss": 1.376, "step": 6080 }, { "epoch": 1.8, "learning_rate": 1.1638913762178489e-06, "loss": 1.3892, "step": 6090 }, { "epoch": 1.81, "learning_rate": 1.1290502706435584e-06, "loss": 1.3866, "step": 6100 }, { "epoch": 1.81, "learning_rate": 1.0947265556420588e-06, "loss": 1.3911, "step": 6110 }, { "epoch": 1.81, "learning_rate": 1.060920975162244e-06, "loss": 1.3535, "step": 6120 }, { "epoch": 1.82, "learning_rate": 1.0276342619227024e-06, "loss": 1.3815, "step": 6130 }, { "epoch": 1.82, "learning_rate": 9.948671373958468e-07, "loss": 1.3989, "step": 6140 }, { "epoch": 1.82, "learning_rate": 9.626203117922672e-07, "loss": 1.3974, "step": 6150 }, { "epoch": 1.83, "learning_rate": 9.308944840453415e-07, "loss": 1.3717, "step": 6160 }, { "epoch": 1.83, "learning_rate": 8.996903417960917e-07, "loss": 1.3797, "step": 6170 }, { "epoch": 1.83, "learning_rate": 8.690085613782706e-07, "loss": 1.3846, "step": 6180 }, { "epoch": 1.83, "learning_rate": 8.388498078037044e-07, "loss": 1.379, "step": 6190 }, { "epoch": 1.84, "learning_rate": 8.092147347478873e-07, "loss": 1.408, "step": 6200 }, { "epoch": 1.84, "learning_rate": 7.801039845358044e-07, "loss": 1.3776, "step": 6210 }, { "epoch": 1.84, "learning_rate": 7.515181881280115e-07, "loss": 1.397, "step": 6220 }, { "epoch": 1.85, "learning_rate": 7.234579651069578e-07, "loss": 1.3768, "step": 6230 }, { "epoch": 1.85, "learning_rate": 6.959239236635662e-07, "loss": 1.3807, "step": 6240 }, { "epoch": 1.85, "learning_rate": 6.68916660584043e-07, "loss": 1.3947, "step": 6250 }, { "epoch": 1.85, "learning_rate": 6.424367612369364e-07, "loss": 1.3785, "step": 6260 }, { "epoch": 1.86, "learning_rate": 6.164847995604656e-07, "loss": 1.3828, "step": 6270 }, { "epoch": 1.86, "learning_rate": 5.910613380500696e-07, "loss": 1.3765, "step": 6280 }, { "epoch": 1.86, "learning_rate": 5.66166927746209e-07, "loss": 1.3994, "step": 6290 }, { "epoch": 1.87, "learning_rate": 5.418021082224472e-07, "loss": 1.4081, "step": 6300 }, { "epoch": 1.87, "learning_rate": 5.179674075737273e-07, "loss": 1.3885, "step": 6310 }, { "epoch": 1.87, "learning_rate": 4.946633424049413e-07, "loss": 1.3748, "step": 6320 }, { "epoch": 1.88, "learning_rate": 4.7189041781973144e-07, "loss": 1.3988, "step": 6330 }, { "epoch": 1.88, "learning_rate": 4.4964912740954045e-07, "loss": 1.3815, "step": 6340 }, { "epoch": 1.88, "learning_rate": 4.2793995324290903e-07, "loss": 1.3736, "step": 6350 }, { "epoch": 1.88, "learning_rate": 4.067633658550396e-07, "loss": 1.3847, "step": 6360 }, { "epoch": 1.89, "learning_rate": 3.861198242375852e-07, "loss": 1.3907, "step": 6370 }, { "epoch": 1.89, "learning_rate": 3.660097758287018e-07, "loss": 1.3914, "step": 6380 }, { "epoch": 1.89, "learning_rate": 3.464336565033588e-07, "loss": 1.3861, "step": 6390 }, { "epoch": 1.9, "learning_rate": 3.273918905638912e-07, "loss": 1.3937, "step": 6400 }, { "epoch": 1.9, "learning_rate": 3.088848907307873e-07, "loss": 1.3783, "step": 6410 }, { "epoch": 1.9, "learning_rate": 2.909130581337655e-07, "loss": 1.4057, "step": 6420 }, { "epoch": 1.91, "learning_rate": 2.7347678230306427e-07, "loss": 1.3672, "step": 6430 }, { "epoch": 1.91, "learning_rate": 2.5657644116100497e-07, "loss": 1.3742, "step": 6440 }, { "epoch": 1.91, "learning_rate": 2.4021240101379793e-07, "loss": 1.3964, "step": 6450 }, { "epoch": 1.91, "learning_rate": 2.243850165436129e-07, "loss": 1.4025, "step": 6460 }, { "epoch": 1.92, "learning_rate": 2.0909463080087123e-07, "loss": 1.386, "step": 6470 }, { "epoch": 1.92, "learning_rate": 1.9434157519683248e-07, "loss": 1.3875, "step": 6480 }, { "epoch": 1.92, "learning_rate": 1.8012616949640015e-07, "loss": 1.3945, "step": 6490 }, { "epoch": 1.93, "learning_rate": 1.6644872181118543e-07, "loss": 1.3725, "step": 6500 }, { "epoch": 1.93, "learning_rate": 1.533095285928432e-07, "loss": 1.3732, "step": 6510 }, { "epoch": 1.93, "learning_rate": 1.4070887462663552e-07, "loss": 1.3791, "step": 6520 }, { "epoch": 1.93, "learning_rate": 1.2864703302526703e-07, "loss": 1.3922, "step": 6530 }, { "epoch": 1.94, "learning_rate": 1.171242652229565e-07, "loss": 1.3737, "step": 6540 }, { "epoch": 1.94, "learning_rate": 1.0614082096977729e-07, "loss": 1.3879, "step": 6550 }, { "epoch": 1.94, "learning_rate": 9.569693832623961e-08, "loss": 1.3962, "step": 6560 }, { "epoch": 1.95, "learning_rate": 8.579284365813623e-08, "loss": 1.4029, "step": 6570 }, { "epoch": 1.95, "learning_rate": 7.642875163162977e-08, "loss": 1.3903, "step": 6580 }, { "epoch": 1.95, "learning_rate": 6.760486520860643e-08, "loss": 1.3986, "step": 6590 }, { "epoch": 1.96, "learning_rate": 5.932137564227391e-08, "loss": 1.3624, "step": 6600 }, { "epoch": 1.96, "learning_rate": 5.157846247300646e-08, "loss": 1.3732, "step": 6610 }, { "epoch": 1.96, "learning_rate": 4.43762935244757e-08, "loss": 1.3838, "step": 6620 }, { "epoch": 1.96, "learning_rate": 3.7715024899989684e-08, "loss": 1.3711, "step": 6630 }, { "epoch": 1.97, "learning_rate": 3.159480097912615e-08, "loss": 1.3974, "step": 6640 }, { "epoch": 1.97, "learning_rate": 2.6015754414593363e-08, "loss": 1.3918, "step": 6650 }, { "epoch": 1.97, "learning_rate": 2.097800612936296e-08, "loss": 1.3933, "step": 6660 }, { "epoch": 1.98, "learning_rate": 1.6481665314035944e-08, "loss": 1.3815, "step": 6670 }, { "epoch": 1.98, "learning_rate": 1.2526829424494569e-08, "loss": 1.3671, "step": 6680 }, { "epoch": 1.98, "learning_rate": 9.113584179770707e-09, "loss": 1.4008, "step": 6690 }, { "epoch": 1.99, "learning_rate": 6.242003560205656e-09, "loss": 1.3876, "step": 6700 }, { "epoch": 1.99, "learning_rate": 3.9121498058292124e-09, "loss": 1.3722, "step": 6710 }, { "epoch": 1.99, "learning_rate": 2.124073415030181e-09, "loss": 1.3823, "step": 6720 }, { "epoch": 1.99, "learning_rate": 8.778131434433734e-10, "loss": 1.3815, "step": 6730 }, { "epoch": 2.0, "learning_rate": 1.7339600311971903e-10, "loss": 1.3889, "step": 6740 }, { "epoch": 2.0, "step": 6748, "total_flos": 1.1679618660044898e+19, "train_loss": 1.4147593358881598, "train_runtime": 86303.1376, "train_samples_per_second": 7.507, "train_steps_per_second": 0.078 } ], "max_steps": 6748, "num_train_epochs": 2, "total_flos": 1.1679618660044898e+19, "trial_name": null, "trial_params": null }