{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.8348623853211012e-07, "loss": 1.6972, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.174311926605506e-07, "loss": 1.6464, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.8348623853211011e-06, "loss": 1.3918, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.7522935779816517e-06, "loss": 1.2141, "step": 15 }, { "epoch": 0.02, "learning_rate": 3.6697247706422022e-06, "loss": 1.1699, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.587155963302753e-06, "loss": 1.1172, "step": 25 }, { "epoch": 0.03, "learning_rate": 5.504587155963303e-06, "loss": 1.0704, "step": 30 }, { "epoch": 0.03, "learning_rate": 6.422018348623854e-06, "loss": 1.0575, "step": 35 }, { "epoch": 0.04, "learning_rate": 7.3394495412844045e-06, "loss": 1.052, "step": 40 }, { "epoch": 0.04, "learning_rate": 8.256880733944956e-06, "loss": 1.0469, "step": 45 }, { "epoch": 0.05, "learning_rate": 9.174311926605506e-06, "loss": 1.0309, "step": 50 }, { "epoch": 0.05, "learning_rate": 1.0091743119266055e-05, "loss": 1.0092, "step": 55 }, { "epoch": 0.06, "learning_rate": 1.1009174311926607e-05, "loss": 1.0362, "step": 60 }, { "epoch": 0.06, "learning_rate": 1.1926605504587156e-05, "loss": 1.0082, "step": 65 }, { "epoch": 0.06, "learning_rate": 1.2844036697247708e-05, "loss": 1.0004, "step": 70 }, { "epoch": 0.07, "learning_rate": 1.3761467889908258e-05, "loss": 1.0004, "step": 75 }, { "epoch": 0.07, "learning_rate": 1.4678899082568809e-05, "loss": 0.992, "step": 80 }, { "epoch": 0.08, "learning_rate": 1.559633027522936e-05, "loss": 1.0151, "step": 85 }, { "epoch": 0.08, "learning_rate": 1.6513761467889912e-05, "loss": 0.9973, "step": 90 }, { "epoch": 0.09, "learning_rate": 1.743119266055046e-05, "loss": 1.0153, "step": 95 }, { "epoch": 0.09, "learning_rate": 1.834862385321101e-05, "loss": 1.0163, "step": 100 }, { "epoch": 0.1, "learning_rate": 1.9266055045871563e-05, "loss": 0.9873, "step": 105 }, { "epoch": 0.1, "learning_rate": 1.999994872196626e-05, "loss": 1.0055, "step": 110 }, { "epoch": 0.11, "learning_rate": 1.9998154046002822e-05, "loss": 1.014, "step": 115 }, { "epoch": 0.11, "learning_rate": 1.999379599421534e-05, "loss": 1.0058, "step": 120 }, { "epoch": 0.11, "learning_rate": 1.9986875683942535e-05, "loss": 1.02, "step": 125 }, { "epoch": 0.12, "learning_rate": 1.9977394889447526e-05, "loss": 1.0237, "step": 130 }, { "epoch": 0.12, "learning_rate": 1.9965356041462954e-05, "loss": 1.0304, "step": 135 }, { "epoch": 0.13, "learning_rate": 1.9950762226567783e-05, "loss": 1.0283, "step": 140 }, { "epoch": 0.13, "learning_rate": 1.9933617186395917e-05, "loss": 1.0046, "step": 145 }, { "epoch": 0.14, "learning_rate": 1.9913925316676946e-05, "loss": 1.003, "step": 150 }, { "epoch": 0.14, "learning_rate": 1.9891691666109112e-05, "loss": 1.023, "step": 155 }, { "epoch": 0.15, "learning_rate": 1.9866921935064907e-05, "loss": 1.014, "step": 160 }, { "epoch": 0.15, "learning_rate": 1.9839622474129595e-05, "loss": 1.017, "step": 165 }, { "epoch": 0.16, "learning_rate": 1.9809800282473014e-05, "loss": 0.9986, "step": 170 }, { "epoch": 0.16, "learning_rate": 1.977746300605507e-05, "loss": 1.0227, "step": 175 }, { "epoch": 0.17, "learning_rate": 1.9742618935665478e-05, "loss": 1.0091, "step": 180 }, { "epoch": 0.17, "learning_rate": 1.9705277004798072e-05, "loss": 0.9988, "step": 185 }, { "epoch": 0.17, "learning_rate": 1.9665446787360444e-05, "loss": 1.0117, "step": 190 }, { "epoch": 0.18, "learning_rate": 1.9623138495219292e-05, "loss": 1.0089, "step": 195 }, { "epoch": 0.18, "learning_rate": 1.957836297558229e-05, "loss": 1.0276, "step": 200 }, { "epoch": 0.19, "learning_rate": 1.9531131708217005e-05, "loss": 0.9893, "step": 205 }, { "epoch": 0.19, "learning_rate": 1.948145680250766e-05, "loss": 1.0117, "step": 210 }, { "epoch": 0.2, "learning_rate": 1.9429350994350483e-05, "loss": 0.9932, "step": 215 }, { "epoch": 0.2, "learning_rate": 1.93748276428884e-05, "loss": 1.0232, "step": 220 }, { "epoch": 0.21, "learning_rate": 1.931790072708596e-05, "loss": 1.0191, "step": 225 }, { "epoch": 0.21, "learning_rate": 1.9258584842145342e-05, "loss": 1.002, "step": 230 }, { "epoch": 0.22, "learning_rate": 1.9196895195764363e-05, "loss": 0.9959, "step": 235 }, { "epoch": 0.22, "learning_rate": 1.913284760423745e-05, "loss": 1.0212, "step": 240 }, { "epoch": 0.22, "learning_rate": 1.9066458488400586e-05, "loss": 1.0255, "step": 245 }, { "epoch": 0.23, "learning_rate": 1.8997744869421248e-05, "loss": 1.0037, "step": 250 }, { "epoch": 0.23, "learning_rate": 1.8926724364434447e-05, "loss": 0.9952, "step": 255 }, { "epoch": 0.24, "learning_rate": 1.8853415182025953e-05, "loss": 1.0087, "step": 260 }, { "epoch": 0.24, "learning_rate": 1.8777836117563894e-05, "loss": 1.0198, "step": 265 }, { "epoch": 0.25, "learning_rate": 1.8700006548379898e-05, "loss": 0.9921, "step": 270 }, { "epoch": 0.25, "learning_rate": 1.861994642880105e-05, "loss": 1.0037, "step": 275 }, { "epoch": 0.26, "learning_rate": 1.8537676285033886e-05, "loss": 1.0064, "step": 280 }, { "epoch": 0.26, "learning_rate": 1.845321720990181e-05, "loss": 1.0098, "step": 285 }, { "epoch": 0.27, "learning_rate": 1.8366590857437182e-05, "loss": 1.0154, "step": 290 }, { "epoch": 0.27, "learning_rate": 1.8277819437329577e-05, "loss": 0.9931, "step": 295 }, { "epoch": 0.28, "learning_rate": 1.8186925709231534e-05, "loss": 0.9875, "step": 300 }, { "epoch": 0.28, "learning_rate": 1.809393297692334e-05, "loss": 0.9892, "step": 305 }, { "epoch": 0.28, "learning_rate": 1.799886508233829e-05, "loss": 0.9865, "step": 310 }, { "epoch": 0.29, "learning_rate": 1.790174639944997e-05, "loss": 0.9894, "step": 315 }, { "epoch": 0.29, "learning_rate": 1.780260182802314e-05, "loss": 0.9981, "step": 320 }, { "epoch": 0.3, "learning_rate": 1.7701456787229805e-05, "loss": 0.9876, "step": 325 }, { "epoch": 0.3, "learning_rate": 1.7598337209132142e-05, "loss": 1.0101, "step": 330 }, { "epoch": 0.31, "learning_rate": 1.7493269532033882e-05, "loss": 0.994, "step": 335 }, { "epoch": 0.31, "learning_rate": 1.738628069370195e-05, "loss": 0.9979, "step": 340 }, { "epoch": 0.32, "learning_rate": 1.7277398124460022e-05, "loss": 1.0069, "step": 345 }, { "epoch": 0.32, "learning_rate": 1.71666497401558e-05, "loss": 0.9926, "step": 350 }, { "epoch": 0.33, "learning_rate": 1.7054063935003813e-05, "loss": 0.9974, "step": 355 }, { "epoch": 0.33, "learning_rate": 1.6939669574305565e-05, "loss": 1.007, "step": 360 }, { "epoch": 0.33, "learning_rate": 1.6823495987048922e-05, "loss": 0.9704, "step": 365 }, { "epoch": 0.34, "learning_rate": 1.6705572958388576e-05, "loss": 0.9991, "step": 370 }, { "epoch": 0.34, "learning_rate": 1.6585930722009602e-05, "loss": 0.9951, "step": 375 }, { "epoch": 0.35, "learning_rate": 1.6464599952375998e-05, "loss": 0.997, "step": 380 }, { "epoch": 0.35, "learning_rate": 1.63416117568662e-05, "loss": 0.9813, "step": 385 }, { "epoch": 0.36, "learning_rate": 1.621699766779763e-05, "loss": 1.0091, "step": 390 }, { "epoch": 0.36, "learning_rate": 1.6090789634342278e-05, "loss": 1.0028, "step": 395 }, { "epoch": 0.37, "learning_rate": 1.5963020014335437e-05, "loss": 0.9934, "step": 400 }, { "epoch": 0.37, "learning_rate": 1.583372156597961e-05, "loss": 0.9753, "step": 405 }, { "epoch": 0.38, "learning_rate": 1.570292743944583e-05, "loss": 1.009, "step": 410 }, { "epoch": 0.38, "learning_rate": 1.557067116837444e-05, "loss": 0.9679, "step": 415 }, { "epoch": 0.39, "learning_rate": 1.5436986661277578e-05, "loss": 0.9958, "step": 420 }, { "epoch": 0.39, "learning_rate": 1.530190819284555e-05, "loss": 1.0129, "step": 425 }, { "epoch": 0.39, "learning_rate": 1.5165470395159314e-05, "loss": 0.9882, "step": 430 }, { "epoch": 0.4, "learning_rate": 1.5027708248811331e-05, "loss": 0.9889, "step": 435 }, { "epoch": 0.4, "learning_rate": 1.4888657073937077e-05, "loss": 0.9789, "step": 440 }, { "epoch": 0.41, "learning_rate": 1.4748352521159492e-05, "loss": 0.9768, "step": 445 }, { "epoch": 0.41, "learning_rate": 1.4606830562448692e-05, "loss": 0.9878, "step": 450 }, { "epoch": 0.42, "learning_rate": 1.4464127481899312e-05, "loss": 1.0, "step": 455 }, { "epoch": 0.42, "learning_rate": 1.4320279866427798e-05, "loss": 0.979, "step": 460 }, { "epoch": 0.43, "learning_rate": 1.4175324596392075e-05, "loss": 0.9831, "step": 465 }, { "epoch": 0.43, "learning_rate": 1.402929883613599e-05, "loss": 0.9766, "step": 470 }, { "epoch": 0.44, "learning_rate": 1.3882240024460928e-05, "loss": 0.9862, "step": 475 }, { "epoch": 0.44, "learning_rate": 1.3734185865027061e-05, "loss": 0.984, "step": 480 }, { "epoch": 0.44, "learning_rate": 1.358517431668672e-05, "loss": 0.9834, "step": 485 }, { "epoch": 0.45, "learning_rate": 1.3435243583752294e-05, "loss": 0.9753, "step": 490 }, { "epoch": 0.45, "learning_rate": 1.3284432106201233e-05, "loss": 0.9799, "step": 495 }, { "epoch": 0.46, "learning_rate": 1.313277854982062e-05, "loss": 0.9583, "step": 500 }, { "epoch": 0.46, "learning_rate": 1.2980321796293838e-05, "loss": 0.9887, "step": 505 }, { "epoch": 0.47, "learning_rate": 1.2827100933231904e-05, "loss": 0.9697, "step": 510 }, { "epoch": 0.47, "learning_rate": 1.2673155244151985e-05, "loss": 0.9796, "step": 515 }, { "epoch": 0.48, "learning_rate": 1.2518524198405699e-05, "loss": 0.9662, "step": 520 }, { "epoch": 0.48, "learning_rate": 1.2363247441059775e-05, "loss": 0.9711, "step": 525 }, { "epoch": 0.49, "learning_rate": 1.2207364782731657e-05, "loss": 0.9799, "step": 530 }, { "epoch": 0.49, "learning_rate": 1.2050916189382646e-05, "loss": 0.9637, "step": 535 }, { "epoch": 0.5, "learning_rate": 1.189394177207125e-05, "loss": 0.9827, "step": 540 }, { "epoch": 0.5, "learning_rate": 1.1736481776669307e-05, "loss": 0.9821, "step": 545 }, { "epoch": 0.5, "learning_rate": 1.1578576573543541e-05, "loss": 0.9614, "step": 550 }, { "epoch": 0.51, "learning_rate": 1.1420266647205232e-05, "loss": 0.9688, "step": 555 }, { "epoch": 0.51, "learning_rate": 1.1261592585930576e-05, "loss": 0.9879, "step": 560 }, { "epoch": 0.52, "learning_rate": 1.1102595071354471e-05, "loss": 0.9449, "step": 565 }, { "epoch": 0.52, "learning_rate": 1.0943314868040365e-05, "loss": 0.9586, "step": 570 }, { "epoch": 0.53, "learning_rate": 1.0783792813028828e-05, "loss": 0.9777, "step": 575 }, { "epoch": 0.53, "learning_rate": 1.0624069805367558e-05, "loss": 0.9405, "step": 580 }, { "epoch": 0.54, "learning_rate": 1.0464186795625481e-05, "loss": 0.9637, "step": 585 }, { "epoch": 0.54, "learning_rate": 1.0304184775393642e-05, "loss": 0.9574, "step": 590 }, { "epoch": 0.55, "learning_rate": 1.0144104766775574e-05, "loss": 0.9699, "step": 595 }, { "epoch": 0.55, "learning_rate": 9.983987811869863e-06, "loss": 0.9671, "step": 600 }, { "epoch": 0.56, "learning_rate": 9.823874962247565e-06, "loss": 0.9656, "step": 605 }, { "epoch": 0.56, "learning_rate": 9.663807268427197e-06, "loss": 0.9515, "step": 610 }, { "epoch": 0.56, "learning_rate": 9.503825769350016e-06, "loss": 0.9738, "step": 615 }, { "epoch": 0.57, "learning_rate": 9.343971481858246e-06, "loss": 0.9491, "step": 620 }, { "epoch": 0.57, "learning_rate": 9.184285390178978e-06, "loss": 0.964, "step": 625 }, { "epoch": 0.58, "learning_rate": 9.024808435416435e-06, "loss": 0.9433, "step": 630 }, { "epoch": 0.58, "learning_rate": 8.865581505055292e-06, "loss": 0.9508, "step": 635 }, { "epoch": 0.59, "learning_rate": 8.706645422477739e-06, "loss": 0.9619, "step": 640 }, { "epoch": 0.59, "learning_rate": 8.548040936496989e-06, "loss": 0.9462, "step": 645 }, { "epoch": 0.6, "learning_rate": 8.389808710909881e-06, "loss": 0.9751, "step": 650 }, { "epoch": 0.6, "learning_rate": 8.231989314071318e-06, "loss": 0.9619, "step": 655 }, { "epoch": 0.61, "learning_rate": 8.07462320849313e-06, "loss": 0.9613, "step": 660 }, { "epoch": 0.61, "learning_rate": 7.917750740470116e-06, "loss": 0.9713, "step": 665 }, { "epoch": 0.61, "learning_rate": 7.761412129735853e-06, "loss": 0.9636, "step": 670 }, { "epoch": 0.62, "learning_rate": 7.605647459150961e-06, "loss": 0.9491, "step": 675 }, { "epoch": 0.62, "learning_rate": 7.4504966644264775e-06, "loss": 0.9493, "step": 680 }, { "epoch": 0.63, "learning_rate": 7.295999523884921e-06, "loss": 0.9434, "step": 685 }, { "epoch": 0.63, "learning_rate": 7.142195648261747e-06, "loss": 0.9653, "step": 690 }, { "epoch": 0.64, "learning_rate": 6.989124470549746e-06, "loss": 0.9607, "step": 695 }, { "epoch": 0.64, "learning_rate": 6.83682523588902e-06, "loss": 0.9462, "step": 700 }, { "epoch": 0.65, "learning_rate": 6.685336991505122e-06, "loss": 0.958, "step": 705 }, { "epoch": 0.65, "learning_rate": 6.5346985766979384e-06, "loss": 0.9505, "step": 710 }, { "epoch": 0.66, "learning_rate": 6.384948612883872e-06, "loss": 0.9479, "step": 715 }, { "epoch": 0.66, "learning_rate": 6.2361254936939e-06, "loss": 0.9394, "step": 720 }, { "epoch": 0.67, "learning_rate": 6.0882673751300235e-06, "loss": 0.9506, "step": 725 }, { "epoch": 0.67, "learning_rate": 5.941412165782645e-06, "loss": 0.9491, "step": 730 }, { "epoch": 0.67, "learning_rate": 5.79559751711138e-06, "loss": 0.9355, "step": 735 }, { "epoch": 0.68, "learning_rate": 5.650860813791786e-06, "loss": 0.9451, "step": 740 }, { "epoch": 0.68, "learning_rate": 5.507239164130501e-06, "loss": 0.9533, "step": 745 }, { "epoch": 0.69, "learning_rate": 5.364769390551225e-06, "loss": 0.9515, "step": 750 }, { "epoch": 0.69, "learning_rate": 5.223488020154028e-06, "loss": 0.9487, "step": 755 }, { "epoch": 0.7, "learning_rate": 5.083431275350312e-06, "loss": 0.9399, "step": 760 }, { "epoch": 0.7, "learning_rate": 4.9446350645759885e-06, "loss": 0.9471, "step": 765 }, { "epoch": 0.71, "learning_rate": 4.807134973085036e-06, "loss": 0.9443, "step": 770 }, { "epoch": 0.71, "learning_rate": 4.670966253826027e-06, "loss": 0.9518, "step": 775 }, { "epoch": 0.72, "learning_rate": 4.53616381840377e-06, "loss": 0.934, "step": 780 }, { "epoch": 0.72, "learning_rate": 4.402762228128531e-06, "loss": 0.9413, "step": 785 }, { "epoch": 0.72, "learning_rate": 4.270795685155001e-06, "loss": 0.9488, "step": 790 }, { "epoch": 0.73, "learning_rate": 4.140298023713416e-06, "loss": 0.9291, "step": 795 }, { "epoch": 0.73, "learning_rate": 4.0113027014349374e-06, "loss": 0.9442, "step": 800 }, { "epoch": 0.74, "learning_rate": 3.883842790773647e-06, "loss": 0.9326, "step": 805 }, { "epoch": 0.74, "learning_rate": 3.757950970527249e-06, "loss": 0.9369, "step": 810 }, { "epoch": 0.75, "learning_rate": 3.633659517458736e-06, "loss": 0.9314, "step": 815 }, { "epoch": 0.75, "learning_rate": 3.511000298021098e-06, "loss": 0.9502, "step": 820 }, { "epoch": 0.76, "learning_rate": 3.39000476018726e-06, "loss": 0.9445, "step": 825 }, { "epoch": 0.76, "learning_rate": 3.2707039253872796e-06, "loss": 0.9357, "step": 830 }, { "epoch": 0.77, "learning_rate": 3.153128380554941e-06, "loss": 0.9423, "step": 835 }, { "epoch": 0.77, "learning_rate": 3.037308270285709e-06, "loss": 0.9206, "step": 840 }, { "epoch": 0.78, "learning_rate": 2.923273289108115e-06, "loss": 0.9466, "step": 845 }, { "epoch": 0.78, "learning_rate": 2.8110526738705345e-06, "loss": 0.9378, "step": 850 }, { "epoch": 0.78, "learning_rate": 2.700675196245288e-06, "loss": 0.9469, "step": 855 }, { "epoch": 0.79, "learning_rate": 2.592169155352031e-06, "loss": 0.9159, "step": 860 }, { "epoch": 0.79, "learning_rate": 2.485562370502279e-06, "loss": 0.9286, "step": 865 }, { "epoch": 0.8, "learning_rate": 2.3808821740669608e-06, "loss": 0.9296, "step": 870 }, { "epoch": 0.8, "learning_rate": 2.2781554044688015e-06, "loss": 0.9405, "step": 875 }, { "epoch": 0.81, "learning_rate": 2.1774083993013715e-06, "loss": 0.9364, "step": 880 }, { "epoch": 0.81, "learning_rate": 2.0786669885765044e-06, "loss": 0.9349, "step": 885 }, { "epoch": 0.82, "learning_rate": 1.981956488101898e-06, "loss": 0.9247, "step": 890 }, { "epoch": 0.82, "learning_rate": 1.8873016929904942e-06, "loss": 0.9339, "step": 895 }, { "epoch": 0.83, "learning_rate": 1.7947268713034128e-06, "loss": 0.9484, "step": 900 }, { "epoch": 0.83, "learning_rate": 1.704255757827963e-06, "loss": 0.9355, "step": 905 }, { "epoch": 0.83, "learning_rate": 1.6159115479924259e-06, "loss": 0.9242, "step": 910 }, { "epoch": 0.84, "learning_rate": 1.529716891919074e-06, "loss": 0.9215, "step": 915 }, { "epoch": 0.84, "learning_rate": 1.4456938886170413e-06, "loss": 0.9383, "step": 920 }, { "epoch": 0.85, "learning_rate": 1.3638640803164516e-06, "loss": 0.925, "step": 925 }, { "epoch": 0.85, "learning_rate": 1.2842484469453365e-06, "loss": 0.916, "step": 930 }, { "epoch": 0.86, "learning_rate": 1.2068674007506787e-06, "loss": 0.9289, "step": 935 }, { "epoch": 0.86, "learning_rate": 1.1317407810650372e-06, "loss": 0.9217, "step": 940 }, { "epoch": 0.87, "learning_rate": 1.0588878492200261e-06, "loss": 0.9345, "step": 945 }, { "epoch": 0.87, "learning_rate": 9.883272836080116e-07, "loss": 0.9297, "step": 950 }, { "epoch": 0.88, "learning_rate": 9.200771748932513e-07, "loss": 0.942, "step": 955 }, { "epoch": 0.88, "learning_rate": 8.541550213737171e-07, "loss": 0.9243, "step": 960 }, { "epoch": 0.89, "learning_rate": 7.905777244947954e-07, "loss": 0.9195, "step": 965 }, { "epoch": 0.89, "learning_rate": 7.293615845160196e-07, "loss": 0.9132, "step": 970 }, { "epoch": 0.89, "learning_rate": 6.705222963319191e-07, "loss": 0.9282, "step": 975 }, { "epoch": 0.9, "learning_rate": 6.140749454480932e-07, "loss": 0.9386, "step": 980 }, { "epoch": 0.9, "learning_rate": 5.600340041135133e-07, "loss": 0.9263, "step": 985 }, { "epoch": 0.91, "learning_rate": 5.0841332761005e-07, "loss": 0.9281, "step": 990 }, { "epoch": 0.91, "learning_rate": 4.592261507001994e-07, "loss": 0.9252, "step": 995 }, { "epoch": 0.92, "learning_rate": 4.124850842338779e-07, "loss": 0.9253, "step": 1000 }, { "epoch": 0.92, "learning_rate": 3.6820211191520127e-07, "loss": 0.9236, "step": 1005 }, { "epoch": 0.93, "learning_rate": 3.263885872300343e-07, "loss": 0.9091, "step": 1010 }, { "epoch": 0.93, "learning_rate": 2.870552305351382e-07, "loss": 0.9294, "step": 1015 }, { "epoch": 0.94, "learning_rate": 2.5021212630962246e-07, "loss": 0.933, "step": 1020 }, { "epoch": 0.94, "learning_rate": 2.158687205694443e-07, "loss": 0.9217, "step": 1025 }, { "epoch": 0.94, "learning_rate": 1.840338184455881e-07, "loss": 0.9146, "step": 1030 }, { "epoch": 0.95, "learning_rate": 1.5471558192656776e-07, "loss": 0.9233, "step": 1035 }, { "epoch": 0.95, "learning_rate": 1.279215277658097e-07, "loss": 0.9128, "step": 1040 }, { "epoch": 0.96, "learning_rate": 1.0365852555447642e-07, "loss": 0.9408, "step": 1045 }, { "epoch": 0.96, "learning_rate": 8.19327959602012e-08, "loss": 0.9371, "step": 1050 }, { "epoch": 0.97, "learning_rate": 6.274990913221035e-08, "loss": 0.9205, "step": 1055 }, { "epoch": 0.97, "learning_rate": 4.6114783273213395e-08, "loss": 0.9216, "step": 1060 }, { "epoch": 0.98, "learning_rate": 3.203168337845508e-08, "loss": 0.9128, "step": 1065 }, { "epoch": 0.98, "learning_rate": 2.05042201422323e-08, "loss": 0.9273, "step": 1070 }, { "epoch": 0.99, "learning_rate": 1.1535349032167908e-08, "loss": 0.9248, "step": 1075 }, { "epoch": 0.99, "learning_rate": 5.127369531473525e-09, "loss": 0.9462, "step": 1080 }, { "epoch": 1.0, "learning_rate": 1.2819245493955746e-09, "loss": 0.9164, "step": 1085 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.9082, "step": 1090 }, { "epoch": 1.0, "eval_loss": 0.9357138276100159, "eval_runtime": 338.0357, "eval_samples_per_second": 45.649, "eval_steps_per_second": 0.716, "step": 1090 }, { "epoch": 1.0, "step": 1090, "total_flos": 456447649382400.0, "train_loss": 0.07625311886498687, "train_runtime": 1283.1445, "train_samples_per_second": 108.66, "train_steps_per_second": 0.849 } ], "logging_steps": 5, "max_steps": 1090, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 456447649382400.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }