{ "best_metric": null, "best_model_checkpoint": null, "epoch": 28.5, "global_step": 2786844, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 0.020000000000000004, "loss": 5.3687, "step": 10000 }, { "epoch": 0.2, "learning_rate": 0.04000000000000001, "loss": 4.0531, "step": 20000 }, { "epoch": 0.31, "learning_rate": 0.04993567245443037, "loss": 3.8149, "step": 30000 }, { "epoch": 0.41, "learning_rate": 0.0498070173632911, "loss": 3.6549, "step": 40000 }, { "epoch": 0.51, "learning_rate": 0.04967836227215183, "loss": 3.5544, "step": 50000 }, { "epoch": 0.61, "learning_rate": 0.049549707181012564, "loss": 3.4935, "step": 60000 }, { "epoch": 0.72, "learning_rate": 0.0494210520898733, "loss": 3.447, "step": 70000 }, { "epoch": 0.82, "learning_rate": 0.049292396998734035, "loss": 3.4134, "step": 80000 }, { "epoch": 0.92, "learning_rate": 0.04916374190759477, "loss": 3.3861, "step": 90000 }, { "epoch": 1.02, "learning_rate": 0.049035086816455506, "loss": 3.3523, "step": 100000 }, { "epoch": 1.12, "learning_rate": 0.04890643172531624, "loss": 3.3063, "step": 110000 }, { "epoch": 1.23, "learning_rate": 0.04877777663417697, "loss": 3.2983, "step": 120000 }, { "epoch": 1.33, "learning_rate": 0.0486491215430377, "loss": 3.2874, "step": 130000 }, { "epoch": 1.43, "learning_rate": 0.04852046645189844, "loss": 3.2785, "step": 140000 }, { "epoch": 1.53, "learning_rate": 0.04839181136075917, "loss": 3.2698, "step": 150000 }, { "epoch": 1.64, "learning_rate": 0.048263156269619904, "loss": 3.2599, "step": 160000 }, { "epoch": 1.74, "learning_rate": 0.04813450117848064, "loss": 3.2503, "step": 170000 }, { "epoch": 1.84, "learning_rate": 0.048005846087341375, "loss": 3.2432, "step": 180000 }, { "epoch": 1.94, "learning_rate": 0.04787719099620211, "loss": 3.2347, "step": 190000 }, { "epoch": 2.05, "learning_rate": 0.04774853590506284, "loss": 3.1812, "step": 200000 }, { "epoch": 2.15, "learning_rate": 0.04761988081392357, "loss": 3.1865, "step": 210000 }, { "epoch": 2.25, "learning_rate": 0.0474912257227843, "loss": 3.1873, "step": 220000 }, { "epoch": 2.35, "learning_rate": 0.047362570631645035, "loss": 3.1842, "step": 230000 }, { "epoch": 2.45, "learning_rate": 0.047233915540505766, "loss": 3.1824, "step": 240000 }, { "epoch": 2.56, "learning_rate": 0.047105260449366505, "loss": 3.1806, "step": 250000 }, { "epoch": 2.66, "learning_rate": 0.04697660535822724, "loss": 3.177, "step": 260000 }, { "epoch": 2.76, "learning_rate": 0.04684795026708797, "loss": 3.1741, "step": 270000 }, { "epoch": 2.86, "learning_rate": 0.04671929517594871, "loss": 3.1709, "step": 280000 }, { "epoch": 2.97, "learning_rate": 0.04659064008480944, "loss": 3.1682, "step": 290000 }, { "epoch": 3.07, "learning_rate": 0.04646198499367017, "loss": 3.1382, "step": 300000 }, { "epoch": 3.17, "learning_rate": 0.046333329902530904, "loss": 3.1283, "step": 310000 }, { "epoch": 3.27, "learning_rate": 0.04620467481139164, "loss": 3.1318, "step": 320000 }, { "epoch": 3.37, "learning_rate": 0.046076019720252374, "loss": 3.1319, "step": 330000 }, { "epoch": 3.48, "learning_rate": 0.045947364629113106, "loss": 3.1335, "step": 340000 }, { "epoch": 3.58, "learning_rate": 0.04581870953797384, "loss": 3.1326, "step": 350000 }, { "epoch": 3.68, "learning_rate": 0.04569005444683458, "loss": 3.1306, "step": 360000 }, { "epoch": 3.78, "learning_rate": 0.04556139935569531, "loss": 3.1289, "step": 370000 }, { "epoch": 3.89, "learning_rate": 0.04543274426455604, "loss": 3.1275, "step": 380000 }, { "epoch": 3.99, "learning_rate": 0.04530408917341677, "loss": 3.1259, "step": 390000 }, { "epoch": 4.09, "learning_rate": 0.045175434082277505, "loss": 3.0882, "step": 400000 }, { "epoch": 4.19, "learning_rate": 0.04504677899113824, "loss": 3.094, "step": 410000 }, { "epoch": 4.3, "learning_rate": 0.04491812389999897, "loss": 3.0958, "step": 420000 }, { "epoch": 4.4, "learning_rate": 0.04478946880885971, "loss": 3.0976, "step": 430000 }, { "epoch": 4.5, "learning_rate": 0.04466081371772044, "loss": 3.0996, "step": 440000 }, { "epoch": 4.6, "learning_rate": 0.04453215862658117, "loss": 3.099, "step": 450000 }, { "epoch": 4.7, "learning_rate": 0.04440350353544191, "loss": 3.1, "step": 460000 }, { "epoch": 4.81, "learning_rate": 0.04427484844430264, "loss": 3.0993, "step": 470000 }, { "epoch": 4.91, "learning_rate": 0.044146193353163374, "loss": 3.0985, "step": 480000 }, { "epoch": 5.01, "learning_rate": 0.044017538262024106, "loss": 3.094, "step": 490000 }, { "epoch": 5.11, "learning_rate": 0.043888883170884845, "loss": 3.0646, "step": 500000 }, { "epoch": 5.22, "learning_rate": 0.043760228079745576, "loss": 3.0694, "step": 510000 }, { "epoch": 5.32, "learning_rate": 0.04363157298860631, "loss": 3.0712, "step": 520000 }, { "epoch": 5.42, "learning_rate": 0.04350291789746704, "loss": 3.0736, "step": 530000 }, { "epoch": 5.52, "learning_rate": 0.04337426280632778, "loss": 3.0752, "step": 540000 }, { "epoch": 5.62, "learning_rate": 0.04324560771518851, "loss": 3.0759, "step": 550000 }, { "epoch": 5.73, "learning_rate": 0.04311695262404924, "loss": 3.0762, "step": 560000 }, { "epoch": 5.83, "learning_rate": 0.042988297532909975, "loss": 3.0756, "step": 570000 }, { "epoch": 5.93, "learning_rate": 0.04285964244177071, "loss": 3.076, "step": 580000 }, { "epoch": 6.03, "learning_rate": 0.04273098735063144, "loss": 3.0412, "step": 590000 }, { "epoch": 6.14, "learning_rate": 0.04260233225949217, "loss": 3.0464, "step": 600000 }, { "epoch": 6.24, "learning_rate": 0.04247367716835291, "loss": 3.05, "step": 610000 }, { "epoch": 6.34, "learning_rate": 0.04234502207721364, "loss": 3.0539, "step": 620000 }, { "epoch": 6.44, "learning_rate": 0.04221636698607437, "loss": 3.0554, "step": 630000 }, { "epoch": 6.55, "learning_rate": 0.042087711894935105, "loss": 3.0558, "step": 640000 }, { "epoch": 6.65, "learning_rate": 0.041959056803795844, "loss": 3.057, "step": 650000 }, { "epoch": 6.75, "learning_rate": 0.041830401712656576, "loss": 3.0584, "step": 660000 }, { "epoch": 6.85, "learning_rate": 0.04170174662151731, "loss": 3.0585, "step": 670000 }, { "epoch": 6.95, "learning_rate": 0.04157309153037805, "loss": 3.0593, "step": 680000 }, { "epoch": 7.06, "learning_rate": 0.04144443643923878, "loss": 3.0408, "step": 690000 }, { "epoch": 7.16, "learning_rate": 0.04131578134809951, "loss": 3.0325, "step": 700000 }, { "epoch": 7.26, "learning_rate": 0.04118712625696024, "loss": 3.035, "step": 710000 }, { "epoch": 7.36, "learning_rate": 0.04105847116582098, "loss": 3.0373, "step": 720000 }, { "epoch": 7.47, "learning_rate": 0.04092981607468171, "loss": 3.0405, "step": 730000 }, { "epoch": 7.57, "learning_rate": 0.040801160983542445, "loss": 3.0403, "step": 740000 }, { "epoch": 7.67, "learning_rate": 0.04067250589240318, "loss": 3.0431, "step": 750000 }, { "epoch": 7.77, "learning_rate": 0.04054385080126391, "loss": 3.0444, "step": 760000 }, { "epoch": 7.87, "learning_rate": 0.04041519571012464, "loss": 3.0445, "step": 770000 }, { "epoch": 7.98, "learning_rate": 0.04028654061898537, "loss": 3.0452, "step": 780000 }, { "epoch": 8.08, "learning_rate": 0.04015788552784611, "loss": 3.0217, "step": 790000 }, { "epoch": 8.18, "learning_rate": 0.04002923043670684, "loss": 3.02, "step": 800000 }, { "epoch": 8.28, "learning_rate": 0.039900575345567575, "loss": 3.0233, "step": 810000 }, { "epoch": 8.39, "learning_rate": 0.03977192025442831, "loss": 3.0259, "step": 820000 }, { "epoch": 8.49, "learning_rate": 0.039643265163289046, "loss": 3.0271, "step": 830000 }, { "epoch": 8.59, "learning_rate": 0.03951461007214978, "loss": 3.0121, "step": 840000 }, { "epoch": 8.69, "learning_rate": 0.03938595498101051, "loss": 3.0161, "step": 850000 }, { "epoch": 8.79, "learning_rate": 0.03925729988987125, "loss": 3.0195, "step": 860000 }, { "epoch": 8.9, "learning_rate": 0.03912864479873198, "loss": 3.021, "step": 870000 }, { "epoch": 9.0, "learning_rate": 0.03899998970759271, "loss": 3.0228, "step": 880000 }, { "epoch": 9.1, "learning_rate": 0.038871334616453444, "loss": 3.0073, "step": 890000 }, { "epoch": 9.2, "learning_rate": 0.03874267952531418, "loss": 3.0114, "step": 900000 }, { "epoch": 9.31, "learning_rate": 0.038614024434174915, "loss": 3.0151, "step": 910000 }, { "epoch": 9.41, "learning_rate": 0.03848536934303565, "loss": 3.0175, "step": 920000 }, { "epoch": 9.51, "learning_rate": 0.03835671425189638, "loss": 3.0193, "step": 930000 }, { "epoch": 9.61, "learning_rate": 0.03822805916075711, "loss": 3.0185, "step": 940000 }, { "epoch": 9.72, "learning_rate": 0.03809940406961784, "loss": 3.0228, "step": 950000 }, { "epoch": 9.82, "learning_rate": 0.037970748978478575, "loss": 3.0226, "step": 960000 }, { "epoch": 9.92, "learning_rate": 0.03784209388733931, "loss": 3.0227, "step": 970000 }, { "epoch": 10.02, "learning_rate": 0.037713438796200045, "loss": 3.017, "step": 980000 }, { "epoch": 10.12, "learning_rate": 0.03758478370506078, "loss": 2.9992, "step": 990000 }, { "epoch": 10.23, "learning_rate": 0.03745612861392151, "loss": 3.0007, "step": 1000000 }, { "epoch": 10.33, "learning_rate": 0.03732747352278225, "loss": 3.0047, "step": 1010000 }, { "epoch": 10.43, "learning_rate": 0.03719881843164298, "loss": 3.0075, "step": 1020000 }, { "epoch": 10.53, "learning_rate": 0.03707016334050371, "loss": 3.0095, "step": 1030000 }, { "epoch": 10.64, "learning_rate": 0.03694150824936445, "loss": 3.0101, "step": 1040000 }, { "epoch": 10.74, "learning_rate": 0.03681285315822518, "loss": 3.0123, "step": 1050000 }, { "epoch": 10.84, "learning_rate": 0.036684198067085914, "loss": 3.013, "step": 1060000 }, { "epoch": 10.94, "learning_rate": 0.036555542975946646, "loss": 3.0121, "step": 1070000 }, { "epoch": 11.04, "learning_rate": 0.03642688788480738, "loss": 2.9892, "step": 1080000 }, { "epoch": 11.15, "learning_rate": 0.03629823279366811, "loss": 2.9908, "step": 1090000 }, { "epoch": 11.25, "learning_rate": 0.03616957770252884, "loss": 2.9941, "step": 1100000 }, { "epoch": 11.35, "learning_rate": 0.036040922611389574, "loss": 2.9976, "step": 1110000 }, { "epoch": 11.45, "learning_rate": 0.03591226752025031, "loss": 2.9992, "step": 1120000 }, { "epoch": 11.56, "learning_rate": 0.035783612429111045, "loss": 3.0007, "step": 1130000 }, { "epoch": 11.66, "learning_rate": 0.03565495733797178, "loss": 3.0021, "step": 1140000 }, { "epoch": 11.76, "learning_rate": 0.035526302246832515, "loss": 3.0032, "step": 1150000 }, { "epoch": 11.86, "learning_rate": 0.03539764715569325, "loss": 3.0057, "step": 1160000 }, { "epoch": 11.97, "learning_rate": 0.03526899206455398, "loss": 3.0053, "step": 1170000 }, { "epoch": 12.07, "learning_rate": 0.03514033697341471, "loss": 2.9898, "step": 1180000 }, { "epoch": 12.17, "learning_rate": 0.03501168188227545, "loss": 2.9848, "step": 1190000 }, { "epoch": 12.27, "learning_rate": 0.03488302679113618, "loss": 2.9871, "step": 1200000 }, { "epoch": 12.37, "learning_rate": 0.034754371699996914, "loss": 2.9903, "step": 1210000 }, { "epoch": 12.48, "learning_rate": 0.03462571660885765, "loss": 2.9918, "step": 1220000 }, { "epoch": 12.58, "learning_rate": 0.034497061517718385, "loss": 2.9948, "step": 1230000 }, { "epoch": 12.68, "learning_rate": 0.034368406426579116, "loss": 2.9955, "step": 1240000 }, { "epoch": 12.78, "learning_rate": 0.03423975133543985, "loss": 2.9971, "step": 1250000 }, { "epoch": 12.89, "learning_rate": 0.03411109624430058, "loss": 2.9978, "step": 1260000 }, { "epoch": 12.99, "learning_rate": 0.03398244115316131, "loss": 2.9985, "step": 1270000 }, { "epoch": 13.09, "learning_rate": 0.033853786062022044, "loss": 2.9789, "step": 1280000 }, { "epoch": 13.19, "learning_rate": 0.033725130970882776, "loss": 2.9795, "step": 1290000 }, { "epoch": 13.29, "learning_rate": 0.033596475879743515, "loss": 2.9835, "step": 1300000 }, { "epoch": 13.4, "learning_rate": 0.03346782078860425, "loss": 2.9829, "step": 1310000 }, { "epoch": 13.5, "learning_rate": 0.03333916569746498, "loss": 2.9869, "step": 1320000 }, { "epoch": 13.6, "learning_rate": 0.03321051060632572, "loss": 2.9755, "step": 1330000 }, { "epoch": 13.7, "learning_rate": 0.03308185551518645, "loss": 2.978, "step": 1340000 }, { "epoch": 13.81, "learning_rate": 0.03295320042404718, "loss": 2.9811, "step": 1350000 }, { "epoch": 13.91, "learning_rate": 0.03282454533290791, "loss": 2.9824, "step": 1360000 }, { "epoch": 14.01, "learning_rate": 0.03269589024176865, "loss": 2.9832, "step": 1370000 }, { "epoch": 14.11, "learning_rate": 0.032567235150629384, "loss": 2.9726, "step": 1380000 }, { "epoch": 14.22, "learning_rate": 0.032438580059490116, "loss": 2.9762, "step": 1390000 }, { "epoch": 14.32, "learning_rate": 0.03230992496835085, "loss": 2.9786, "step": 1400000 }, { "epoch": 14.42, "learning_rate": 0.03218126987721159, "loss": 2.9804, "step": 1410000 }, { "epoch": 14.52, "learning_rate": 0.03205261478607232, "loss": 2.9821, "step": 1420000 }, { "epoch": 14.62, "learning_rate": 0.03192395969493305, "loss": 2.9825, "step": 1430000 }, { "epoch": 14.73, "learning_rate": 0.03179530460379378, "loss": 2.985, "step": 1440000 }, { "epoch": 14.83, "learning_rate": 0.031666649512654514, "loss": 2.9851, "step": 1450000 }, { "epoch": 14.93, "learning_rate": 0.031537994421515246, "loss": 2.9859, "step": 1460000 }, { "epoch": 15.03, "learning_rate": 0.03140933933037598, "loss": 2.9795, "step": 1470000 }, { "epoch": 15.14, "learning_rate": 0.03128068423923672, "loss": 2.9681, "step": 1480000 }, { "epoch": 15.24, "learning_rate": 0.03115202914809745, "loss": 2.9707, "step": 1490000 }, { "epoch": 15.34, "learning_rate": 0.03102337405695818, "loss": 2.9727, "step": 1500000 }, { "epoch": 15.44, "learning_rate": 0.03089471896581892, "loss": 2.9747, "step": 1510000 }, { "epoch": 15.54, "learning_rate": 0.03076606387467965, "loss": 2.9769, "step": 1520000 }, { "epoch": 15.65, "learning_rate": 0.030637408783540383, "loss": 2.9778, "step": 1530000 }, { "epoch": 15.75, "learning_rate": 0.030508753692401115, "loss": 2.9788, "step": 1540000 }, { "epoch": 15.85, "learning_rate": 0.030380098601261854, "loss": 2.9789, "step": 1550000 }, { "epoch": 15.95, "learning_rate": 0.030251443510122586, "loss": 2.9807, "step": 1560000 }, { "epoch": 16.06, "learning_rate": 0.030122788418983318, "loss": 2.9619, "step": 1570000 }, { "epoch": 16.16, "learning_rate": 0.02999413332784405, "loss": 2.9638, "step": 1580000 }, { "epoch": 16.26, "learning_rate": 0.029865478236704785, "loss": 2.9654, "step": 1590000 }, { "epoch": 16.36, "learning_rate": 0.029736823145565517, "loss": 2.9679, "step": 1600000 }, { "epoch": 16.46, "learning_rate": 0.02960816805442625, "loss": 2.9704, "step": 1610000 }, { "epoch": 16.57, "learning_rate": 0.029479512963286988, "loss": 2.9726, "step": 1620000 }, { "epoch": 16.67, "learning_rate": 0.02935085787214772, "loss": 2.9725, "step": 1630000 }, { "epoch": 16.77, "learning_rate": 0.02922220278100845, "loss": 2.9738, "step": 1640000 }, { "epoch": 16.87, "learning_rate": 0.029093547689869183, "loss": 2.9747, "step": 1650000 }, { "epoch": 16.98, "learning_rate": 0.02896489259872992, "loss": 2.9763, "step": 1660000 }, { "epoch": 17.08, "learning_rate": 0.02883623750759065, "loss": 2.9617, "step": 1670000 }, { "epoch": 17.18, "learning_rate": 0.028707582416451383, "loss": 2.9618, "step": 1680000 }, { "epoch": 17.28, "learning_rate": 0.028578927325312115, "loss": 2.9612, "step": 1690000 }, { "epoch": 17.39, "learning_rate": 0.028450272234172853, "loss": 2.9632, "step": 1700000 }, { "epoch": 17.49, "learning_rate": 0.028321617143033585, "loss": 2.9655, "step": 1710000 }, { "epoch": 17.59, "learning_rate": 0.028192962051894317, "loss": 2.9672, "step": 1720000 }, { "epoch": 17.69, "learning_rate": 0.028064306960755056, "loss": 2.9691, "step": 1730000 }, { "epoch": 17.79, "learning_rate": 0.027935651869615788, "loss": 2.9698, "step": 1740000 }, { "epoch": 17.9, "learning_rate": 0.02780699677847652, "loss": 2.9702, "step": 1750000 }, { "epoch": 18.0, "learning_rate": 0.027678341687337252, "loss": 2.9719, "step": 1760000 }, { "epoch": 18.1, "learning_rate": 0.027549686596197987, "loss": 2.9546, "step": 1770000 }, { "epoch": 18.2, "learning_rate": 0.02742103150505872, "loss": 2.9567, "step": 1780000 }, { "epoch": 18.31, "learning_rate": 0.02729237641391945, "loss": 2.9586, "step": 1790000 }, { "epoch": 18.41, "learning_rate": 0.02716372132278019, "loss": 2.9606, "step": 1800000 }, { "epoch": 18.51, "learning_rate": 0.02703506623164092, "loss": 2.9506, "step": 1810000 }, { "epoch": 18.61, "learning_rate": 0.026906411140501654, "loss": 2.9518, "step": 1820000 }, { "epoch": 18.71, "learning_rate": 0.026777756049362385, "loss": 2.9575, "step": 1830000 }, { "epoch": 18.82, "learning_rate": 0.02664910095822312, "loss": 2.9584, "step": 1840000 }, { "epoch": 18.92, "learning_rate": 0.026520445867083853, "loss": 2.9594, "step": 1850000 }, { "epoch": 19.02, "learning_rate": 0.026391790775944585, "loss": 2.9578, "step": 1860000 }, { "epoch": 19.12, "learning_rate": 0.026263135684805317, "loss": 2.9535, "step": 1870000 }, { "epoch": 19.23, "learning_rate": 0.026134480593666055, "loss": 2.9552, "step": 1880000 }, { "epoch": 19.33, "learning_rate": 0.026005825502526787, "loss": 2.9568, "step": 1890000 }, { "epoch": 19.43, "learning_rate": 0.02587717041138752, "loss": 2.9583, "step": 1900000 }, { "epoch": 19.53, "learning_rate": 0.025748515320248258, "loss": 2.9596, "step": 1910000 }, { "epoch": 19.64, "learning_rate": 0.02561986022910899, "loss": 2.9599, "step": 1920000 }, { "epoch": 19.74, "learning_rate": 0.025491205137969722, "loss": 2.9615, "step": 1930000 }, { "epoch": 19.84, "learning_rate": 0.025362550046830454, "loss": 2.9622, "step": 1940000 }, { "epoch": 19.94, "learning_rate": 0.02523389495569119, "loss": 2.9637, "step": 1950000 }, { "epoch": 20.04, "learning_rate": 0.02510523986455192, "loss": 2.9559, "step": 1960000 }, { "epoch": 20.15, "learning_rate": 0.024976584773412653, "loss": 2.9496, "step": 1970000 }, { "epoch": 20.25, "learning_rate": 0.02484792968227339, "loss": 2.9516, "step": 1980000 }, { "epoch": 20.35, "learning_rate": 0.024719274591134124, "loss": 2.9522, "step": 1990000 }, { "epoch": 20.45, "learning_rate": 0.024590619499994856, "loss": 2.9538, "step": 2000000 }, { "epoch": 20.56, "learning_rate": 0.02446196440885559, "loss": 2.9552, "step": 2010000 }, { "epoch": 20.66, "learning_rate": 0.024333309317716323, "loss": 2.957, "step": 2020000 }, { "epoch": 20.76, "learning_rate": 0.024204654226577055, "loss": 2.9572, "step": 2030000 }, { "epoch": 20.86, "learning_rate": 0.024075999135437787, "loss": 2.9586, "step": 2040000 }, { "epoch": 20.96, "learning_rate": 0.023947344044298522, "loss": 2.9609, "step": 2050000 }, { "epoch": 21.07, "learning_rate": 0.023818688953159254, "loss": 2.9429, "step": 2060000 }, { "epoch": 21.17, "learning_rate": 0.02369003386201999, "loss": 2.947, "step": 2070000 }, { "epoch": 21.27, "learning_rate": 0.023561378770880725, "loss": 2.9488, "step": 2080000 }, { "epoch": 21.37, "learning_rate": 0.023432723679741457, "loss": 2.9491, "step": 2090000 }, { "epoch": 21.48, "learning_rate": 0.023304068588602192, "loss": 2.9514, "step": 2100000 }, { "epoch": 21.58, "learning_rate": 0.023175413497462924, "loss": 2.9513, "step": 2110000 }, { "epoch": 21.68, "learning_rate": 0.023046758406323656, "loss": 2.9537, "step": 2120000 }, { "epoch": 21.78, "learning_rate": 0.022918103315184388, "loss": 2.9548, "step": 2130000 }, { "epoch": 21.88, "learning_rate": 0.022789448224045123, "loss": 2.9573, "step": 2140000 }, { "epoch": 21.99, "learning_rate": 0.022660793132905855, "loss": 2.9565, "step": 2150000 }, { "epoch": 22.09, "learning_rate": 0.02253213804176659, "loss": 2.9432, "step": 2160000 }, { "epoch": 22.19, "learning_rate": 0.022403482950627322, "loss": 2.9445, "step": 2170000 }, { "epoch": 22.29, "learning_rate": 0.022274827859488058, "loss": 2.9465, "step": 2180000 }, { "epoch": 22.4, "learning_rate": 0.022146172768348793, "loss": 2.9478, "step": 2190000 }, { "epoch": 22.5, "learning_rate": 0.022017517677209525, "loss": 2.9474, "step": 2200000 }, { "epoch": 22.6, "learning_rate": 0.021888862586070257, "loss": 2.9497, "step": 2210000 }, { "epoch": 22.7, "learning_rate": 0.02176020749493099, "loss": 2.9507, "step": 2220000 }, { "epoch": 22.81, "learning_rate": 0.021631552403791724, "loss": 2.9518, "step": 2230000 }, { "epoch": 22.91, "learning_rate": 0.021502897312652456, "loss": 2.9523, "step": 2240000 }, { "epoch": 23.01, "learning_rate": 0.02137424222151319, "loss": 2.951, "step": 2250000 }, { "epoch": 23.11, "learning_rate": 0.021245587130373923, "loss": 2.9394, "step": 2260000 }, { "epoch": 23.21, "learning_rate": 0.02111693203923466, "loss": 2.9426, "step": 2270000 }, { "epoch": 23.32, "learning_rate": 0.020988276948095394, "loss": 2.9425, "step": 2280000 }, { "epoch": 23.42, "learning_rate": 0.020859621856956126, "loss": 2.9449, "step": 2290000 }, { "epoch": 23.52, "learning_rate": 0.020730966765816858, "loss": 2.939, "step": 2300000 }, { "epoch": 23.62, "learning_rate": 0.02060231167467759, "loss": 2.939, "step": 2310000 }, { "epoch": 23.73, "learning_rate": 0.020473656583538325, "loss": 2.9414, "step": 2320000 }, { "epoch": 23.83, "learning_rate": 0.020345001492399057, "loss": 2.9433, "step": 2330000 }, { "epoch": 23.93, "learning_rate": 0.020216346401259792, "loss": 2.9436, "step": 2340000 }, { "epoch": 24.03, "learning_rate": 0.020087691310120524, "loss": 2.9421, "step": 2350000 }, { "epoch": 24.13, "learning_rate": 0.01995903621898126, "loss": 2.9385, "step": 2360000 }, { "epoch": 24.24, "learning_rate": 0.019830381127841995, "loss": 2.9413, "step": 2370000 }, { "epoch": 24.34, "learning_rate": 0.019701726036702727, "loss": 2.9426, "step": 2380000 }, { "epoch": 24.44, "learning_rate": 0.01957307094556346, "loss": 2.9423, "step": 2390000 }, { "epoch": 24.54, "learning_rate": 0.01944441585442419, "loss": 2.9442, "step": 2400000 }, { "epoch": 24.65, "learning_rate": 0.019315760763284926, "loss": 2.9457, "step": 2410000 }, { "epoch": 24.75, "learning_rate": 0.019187105672145658, "loss": 2.9455, "step": 2420000 }, { "epoch": 24.85, "learning_rate": 0.019058450581006393, "loss": 2.947, "step": 2430000 }, { "epoch": 24.95, "learning_rate": 0.018929795489867125, "loss": 2.9464, "step": 2440000 }, { "epoch": 25.06, "learning_rate": 0.01880114039872786, "loss": 2.9411, "step": 2450000 }, { "epoch": 25.16, "learning_rate": 0.018672485307588593, "loss": 2.9362, "step": 2460000 }, { "epoch": 25.26, "learning_rate": 0.018543830216449324, "loss": 2.9373, "step": 2470000 }, { "epoch": 25.36, "learning_rate": 0.01841517512531006, "loss": 2.9395, "step": 2480000 }, { "epoch": 25.46, "learning_rate": 0.018286520034170792, "loss": 2.9404, "step": 2490000 }, { "epoch": 25.57, "learning_rate": 0.018157864943031527, "loss": 2.9412, "step": 2500000 }, { "epoch": 25.67, "learning_rate": 0.01802920985189226, "loss": 2.9425, "step": 2510000 }, { "epoch": 25.77, "learning_rate": 0.017900554760752994, "loss": 2.943, "step": 2520000 }, { "epoch": 25.87, "learning_rate": 0.017771899669613726, "loss": 2.9446, "step": 2530000 }, { "epoch": 25.98, "learning_rate": 0.01764324457847446, "loss": 2.945, "step": 2540000 }, { "epoch": 26.08, "learning_rate": 0.017514589487335194, "loss": 2.9327, "step": 2550000 }, { "epoch": 26.18, "learning_rate": 0.017385934396195925, "loss": 2.9341, "step": 2560000 }, { "epoch": 26.28, "learning_rate": 0.01725727930505666, "loss": 2.9366, "step": 2570000 }, { "epoch": 26.38, "learning_rate": 0.017128624213917393, "loss": 2.9376, "step": 2580000 }, { "epoch": 26.49, "learning_rate": 0.016999969122778128, "loss": 2.9373, "step": 2590000 }, { "epoch": 26.59, "learning_rate": 0.01687131403163886, "loss": 2.9388, "step": 2600000 }, { "epoch": 26.69, "learning_rate": 0.016742658940499595, "loss": 2.9404, "step": 2610000 }, { "epoch": 26.79, "learning_rate": 0.016614003849360327, "loss": 2.9411, "step": 2620000 }, { "epoch": 26.9, "learning_rate": 0.016485348758221063, "loss": 2.943, "step": 2630000 }, { "epoch": 27.0, "learning_rate": 0.016356693667081795, "loss": 2.9421, "step": 2640000 }, { "epoch": 27.1, "learning_rate": 0.016228038575942526, "loss": 2.9313, "step": 2650000 }, { "epoch": 27.2, "learning_rate": 0.016099383484803262, "loss": 2.9337, "step": 2660000 }, { "epoch": 27.31, "learning_rate": 0.015970728393663994, "loss": 2.9341, "step": 2670000 }, { "epoch": 27.41, "learning_rate": 0.01584207330252473, "loss": 2.9353, "step": 2680000 }, { "epoch": 27.51, "learning_rate": 0.01571341821138546, "loss": 2.9359, "step": 2690000 }, { "epoch": 27.61, "learning_rate": 0.015584763120246196, "loss": 2.9363, "step": 2700000 }, { "epoch": 27.71, "learning_rate": 0.015456108029106928, "loss": 2.9387, "step": 2710000 }, { "epoch": 27.82, "learning_rate": 0.015327452937967662, "loss": 2.9388, "step": 2720000 }, { "epoch": 27.92, "learning_rate": 0.015198797846828394, "loss": 2.9399, "step": 2730000 }, { "epoch": 28.02, "learning_rate": 0.01507014275568913, "loss": 2.9384, "step": 2740000 }, { "epoch": 28.12, "learning_rate": 0.014941487664549863, "loss": 2.9305, "step": 2750000 }, { "epoch": 28.23, "learning_rate": 0.014812832573410595, "loss": 2.9325, "step": 2760000 }, { "epoch": 28.33, "learning_rate": 0.01468417748227133, "loss": 2.9332, "step": 2770000 }, { "epoch": 28.43, "learning_rate": 0.014555522391132062, "loss": 2.9339, "step": 2780000 } ], "max_steps": 3911360, "num_train_epochs": 40, "total_flos": 1.8137583487197538e+20, "trial_name": null, "trial_params": null }