{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9549530730091467, "global_step": 320000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9925394291171165e-05, "loss": 8.8395, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.985078858234232e-05, "loss": 8.3374, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.9776182873513485e-05, "loss": 8.1726, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.970157716468464e-05, "loss": 8.0495, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.9626971455855804e-05, "loss": 7.9605, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.955236574702696e-05, "loss": 7.9116, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.947776003819812e-05, "loss": 7.8568, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.9403154329369286e-05, "loss": 7.8353, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.932854862054044e-05, "loss": 7.7883, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.925394291171161e-05, "loss": 7.7753, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.917933720288277e-05, "loss": 7.7461, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.910473149405393e-05, "loss": 7.7202, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.903012578522509e-05, "loss": 7.7102, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.895552007639625e-05, "loss": 7.6642, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.888091436756741e-05, "loss": 7.6294, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.880630865873857e-05, "loss": 7.6012, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.873170294990973e-05, "loss": 7.5369, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.865709724108089e-05, "loss": 7.4688, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.858249153225205e-05, "loss": 7.3744, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.850788582342321e-05, "loss": 7.2764, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.843328011459437e-05, "loss": 7.1243, "step": 10500 }, { "epoch": 0.03, "learning_rate": 4.835867440576553e-05, "loss": 7.0236, "step": 11000 }, { "epoch": 0.03, "learning_rate": 4.828406869693669e-05, "loss": 6.8878, "step": 11500 }, { "epoch": 0.04, "learning_rate": 4.820946298810785e-05, "loss": 6.795, "step": 12000 }, { "epoch": 0.04, "learning_rate": 4.813485727927901e-05, "loss": 6.694, "step": 12500 }, { "epoch": 0.04, "learning_rate": 4.806025157045017e-05, "loss": 6.6033, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.798564586162133e-05, "loss": 6.492, "step": 13500 }, { "epoch": 0.04, "learning_rate": 4.791104015279249e-05, "loss": 6.4154, "step": 14000 }, { "epoch": 0.04, "learning_rate": 4.7836434443963654e-05, "loss": 6.3564, "step": 14500 }, { "epoch": 0.04, "learning_rate": 4.776182873513482e-05, "loss": 6.2773, "step": 15000 }, { "epoch": 0.05, "learning_rate": 4.768722302630598e-05, "loss": 6.2294, "step": 15500 }, { "epoch": 0.05, "learning_rate": 4.7612617317477136e-05, "loss": 6.1543, "step": 16000 }, { "epoch": 0.05, "learning_rate": 4.75380116086483e-05, "loss": 6.1043, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.7463405899819455e-05, "loss": 6.0266, "step": 17000 }, { "epoch": 0.05, "learning_rate": 4.738880019099062e-05, "loss": 5.9925, "step": 17500 }, { "epoch": 0.05, "learning_rate": 4.731419448216178e-05, "loss": 5.955, "step": 18000 }, { "epoch": 0.06, "learning_rate": 4.723958877333294e-05, "loss": 5.8802, "step": 18500 }, { "epoch": 0.06, "learning_rate": 4.71649830645041e-05, "loss": 5.8472, "step": 19000 }, { "epoch": 0.06, "learning_rate": 4.7090377355675256e-05, "loss": 5.7843, "step": 19500 }, { "epoch": 0.06, "learning_rate": 4.701577164684642e-05, "loss": 5.7739, "step": 20000 }, { "epoch": 0.06, "learning_rate": 4.6941165938017575e-05, "loss": 5.6922, "step": 20500 }, { "epoch": 0.06, "learning_rate": 4.686656022918874e-05, "loss": 5.6593, "step": 21000 }, { "epoch": 0.06, "learning_rate": 4.67919545203599e-05, "loss": 5.6313, "step": 21500 }, { "epoch": 0.07, "learning_rate": 4.671734881153106e-05, "loss": 5.593, "step": 22000 }, { "epoch": 0.07, "learning_rate": 4.664274310270222e-05, "loss": 5.5471, "step": 22500 }, { "epoch": 0.07, "learning_rate": 4.656813739387338e-05, "loss": 5.49, "step": 23000 }, { "epoch": 0.07, "learning_rate": 4.6493531685044546e-05, "loss": 5.4655, "step": 23500 }, { "epoch": 0.07, "learning_rate": 4.64189259762157e-05, "loss": 5.4349, "step": 24000 }, { "epoch": 0.07, "learning_rate": 4.6344320267386866e-05, "loss": 5.3876, "step": 24500 }, { "epoch": 0.07, "learning_rate": 4.626971455855802e-05, "loss": 5.378, "step": 25000 }, { "epoch": 0.08, "learning_rate": 4.6195108849729185e-05, "loss": 5.3231, "step": 25500 }, { "epoch": 0.08, "learning_rate": 4.612050314090035e-05, "loss": 5.2968, "step": 26000 }, { "epoch": 0.08, "learning_rate": 4.6045897432071504e-05, "loss": 5.2649, "step": 26500 }, { "epoch": 0.08, "learning_rate": 4.597129172324267e-05, "loss": 5.2329, "step": 27000 }, { "epoch": 0.08, "learning_rate": 4.589668601441382e-05, "loss": 5.2015, "step": 27500 }, { "epoch": 0.08, "learning_rate": 4.5822080305584986e-05, "loss": 5.1858, "step": 28000 }, { "epoch": 0.09, "learning_rate": 4.574747459675615e-05, "loss": 5.1435, "step": 28500 }, { "epoch": 0.09, "learning_rate": 4.5672868887927305e-05, "loss": 5.1239, "step": 29000 }, { "epoch": 0.09, "learning_rate": 4.559826317909847e-05, "loss": 5.0947, "step": 29500 }, { "epoch": 0.09, "learning_rate": 4.5523657470269624e-05, "loss": 5.0852, "step": 30000 }, { "epoch": 0.09, "learning_rate": 4.544905176144079e-05, "loss": 5.0373, "step": 30500 }, { "epoch": 0.09, "learning_rate": 4.5374446052611943e-05, "loss": 5.0134, "step": 31000 }, { "epoch": 0.09, "learning_rate": 4.5299840343783106e-05, "loss": 5.0094, "step": 31500 }, { "epoch": 0.1, "learning_rate": 4.522523463495427e-05, "loss": 4.9855, "step": 32000 }, { "epoch": 0.1, "learning_rate": 4.5150628926125426e-05, "loss": 4.943, "step": 32500 }, { "epoch": 0.1, "learning_rate": 4.5076023217296595e-05, "loss": 4.9091, "step": 33000 }, { "epoch": 0.1, "learning_rate": 4.500141750846775e-05, "loss": 4.8988, "step": 33500 }, { "epoch": 0.1, "learning_rate": 4.4926811799638914e-05, "loss": 4.8703, "step": 34000 }, { "epoch": 0.1, "learning_rate": 4.485220609081007e-05, "loss": 4.8395, "step": 34500 }, { "epoch": 0.1, "learning_rate": 4.4777600381981234e-05, "loss": 4.8175, "step": 35000 }, { "epoch": 0.11, "learning_rate": 4.470299467315239e-05, "loss": 4.8181, "step": 35500 }, { "epoch": 0.11, "learning_rate": 4.462838896432355e-05, "loss": 4.8026, "step": 36000 }, { "epoch": 0.11, "learning_rate": 4.4553783255494716e-05, "loss": 4.7798, "step": 36500 }, { "epoch": 0.11, "learning_rate": 4.447917754666587e-05, "loss": 4.741, "step": 37000 }, { "epoch": 0.11, "learning_rate": 4.4404571837837035e-05, "loss": 4.7337, "step": 37500 }, { "epoch": 0.11, "learning_rate": 4.432996612900819e-05, "loss": 4.7278, "step": 38000 }, { "epoch": 0.11, "learning_rate": 4.4255360420179354e-05, "loss": 4.7057, "step": 38500 }, { "epoch": 0.12, "learning_rate": 4.418075471135052e-05, "loss": 4.6702, "step": 39000 }, { "epoch": 0.12, "learning_rate": 4.410614900252167e-05, "loss": 4.6704, "step": 39500 }, { "epoch": 0.12, "learning_rate": 4.4031543293692836e-05, "loss": 4.647, "step": 40000 }, { "epoch": 0.12, "learning_rate": 4.395693758486399e-05, "loss": 4.6325, "step": 40500 }, { "epoch": 0.12, "learning_rate": 4.3882331876035155e-05, "loss": 4.6142, "step": 41000 }, { "epoch": 0.12, "learning_rate": 4.380772616720631e-05, "loss": 4.5979, "step": 41500 }, { "epoch": 0.13, "learning_rate": 4.3733120458377474e-05, "loss": 4.5934, "step": 42000 }, { "epoch": 0.13, "learning_rate": 4.365851474954864e-05, "loss": 4.558, "step": 42500 }, { "epoch": 0.13, "learning_rate": 4.35839090407198e-05, "loss": 4.5537, "step": 43000 }, { "epoch": 0.13, "learning_rate": 4.350930333189096e-05, "loss": 4.5202, "step": 43500 }, { "epoch": 0.13, "learning_rate": 4.343469762306212e-05, "loss": 4.5238, "step": 44000 }, { "epoch": 0.13, "learning_rate": 4.336009191423328e-05, "loss": 4.5052, "step": 44500 }, { "epoch": 0.13, "learning_rate": 4.328548620540444e-05, "loss": 4.4972, "step": 45000 }, { "epoch": 0.14, "learning_rate": 4.32108804965756e-05, "loss": 4.4751, "step": 45500 }, { "epoch": 0.14, "learning_rate": 4.313627478774676e-05, "loss": 4.4441, "step": 46000 }, { "epoch": 0.14, "learning_rate": 4.306166907891792e-05, "loss": 4.4482, "step": 46500 }, { "epoch": 0.14, "learning_rate": 4.2987063370089084e-05, "loss": 4.4365, "step": 47000 }, { "epoch": 0.14, "learning_rate": 4.291245766126024e-05, "loss": 4.4211, "step": 47500 }, { "epoch": 0.14, "learning_rate": 4.28378519524314e-05, "loss": 4.4135, "step": 48000 }, { "epoch": 0.14, "learning_rate": 4.276324624360256e-05, "loss": 4.3697, "step": 48500 }, { "epoch": 0.15, "learning_rate": 4.268864053477372e-05, "loss": 4.4008, "step": 49000 }, { "epoch": 0.15, "learning_rate": 4.2614034825944885e-05, "loss": 4.3786, "step": 49500 }, { "epoch": 0.15, "learning_rate": 4.253942911711604e-05, "loss": 4.3466, "step": 50000 }, { "epoch": 0.15, "learning_rate": 4.2464823408287204e-05, "loss": 4.3375, "step": 50500 }, { "epoch": 0.15, "learning_rate": 4.239021769945836e-05, "loss": 4.3199, "step": 51000 }, { "epoch": 0.15, "learning_rate": 4.231561199062952e-05, "loss": 4.3171, "step": 51500 }, { "epoch": 0.16, "learning_rate": 4.2241006281800686e-05, "loss": 4.3012, "step": 52000 }, { "epoch": 0.16, "learning_rate": 4.216640057297185e-05, "loss": 4.273, "step": 52500 }, { "epoch": 0.16, "learning_rate": 4.2091794864143005e-05, "loss": 4.287, "step": 53000 }, { "epoch": 0.16, "learning_rate": 4.201718915531417e-05, "loss": 4.2666, "step": 53500 }, { "epoch": 0.16, "learning_rate": 4.194258344648533e-05, "loss": 4.2536, "step": 54000 }, { "epoch": 0.16, "learning_rate": 4.186797773765649e-05, "loss": 4.2345, "step": 54500 }, { "epoch": 0.16, "learning_rate": 4.179337202882765e-05, "loss": 4.2495, "step": 55000 }, { "epoch": 0.17, "learning_rate": 4.1718766319998807e-05, "loss": 4.2232, "step": 55500 }, { "epoch": 0.17, "learning_rate": 4.164416061116997e-05, "loss": 4.219, "step": 56000 }, { "epoch": 0.17, "learning_rate": 4.1569554902341126e-05, "loss": 4.203, "step": 56500 }, { "epoch": 0.17, "learning_rate": 4.149494919351229e-05, "loss": 4.208, "step": 57000 }, { "epoch": 0.17, "learning_rate": 4.142034348468345e-05, "loss": 4.1815, "step": 57500 }, { "epoch": 0.17, "learning_rate": 4.134573777585461e-05, "loss": 4.1828, "step": 58000 }, { "epoch": 0.17, "learning_rate": 4.127113206702577e-05, "loss": 4.1539, "step": 58500 }, { "epoch": 0.18, "learning_rate": 4.119652635819693e-05, "loss": 4.1365, "step": 59000 }, { "epoch": 0.18, "learning_rate": 4.112192064936809e-05, "loss": 4.1362, "step": 59500 }, { "epoch": 0.18, "learning_rate": 4.104731494053925e-05, "loss": 4.1366, "step": 60000 }, { "epoch": 0.18, "learning_rate": 4.097270923171041e-05, "loss": 4.1265, "step": 60500 }, { "epoch": 0.18, "learning_rate": 4.089810352288157e-05, "loss": 4.1251, "step": 61000 }, { "epoch": 0.18, "learning_rate": 4.0823497814052735e-05, "loss": 4.1033, "step": 61500 }, { "epoch": 0.19, "learning_rate": 4.07488921052239e-05, "loss": 4.0914, "step": 62000 }, { "epoch": 0.19, "learning_rate": 4.0674286396395054e-05, "loss": 4.064, "step": 62500 }, { "epoch": 0.19, "learning_rate": 4.059968068756622e-05, "loss": 4.0919, "step": 63000 }, { "epoch": 0.19, "learning_rate": 4.052507497873737e-05, "loss": 4.0719, "step": 63500 }, { "epoch": 0.19, "learning_rate": 4.0450469269908536e-05, "loss": 4.0527, "step": 64000 }, { "epoch": 0.19, "learning_rate": 4.03758635610797e-05, "loss": 4.0483, "step": 64500 }, { "epoch": 0.19, "learning_rate": 4.0301257852250855e-05, "loss": 4.0287, "step": 65000 }, { "epoch": 0.2, "learning_rate": 4.022665214342202e-05, "loss": 4.0354, "step": 65500 }, { "epoch": 0.2, "learning_rate": 4.0152046434593175e-05, "loss": 4.0273, "step": 66000 }, { "epoch": 0.2, "learning_rate": 4.007744072576434e-05, "loss": 4.0207, "step": 66500 }, { "epoch": 0.2, "learning_rate": 4.0002835016935494e-05, "loss": 4.0063, "step": 67000 }, { "epoch": 0.2, "learning_rate": 3.992822930810666e-05, "loss": 3.9946, "step": 67500 }, { "epoch": 0.2, "learning_rate": 3.985362359927782e-05, "loss": 3.9905, "step": 68000 }, { "epoch": 0.2, "learning_rate": 3.9779017890448976e-05, "loss": 3.9737, "step": 68500 }, { "epoch": 0.21, "learning_rate": 3.970441218162014e-05, "loss": 3.965, "step": 69000 }, { "epoch": 0.21, "learning_rate": 3.9629806472791295e-05, "loss": 3.9595, "step": 69500 }, { "epoch": 0.21, "learning_rate": 3.955520076396246e-05, "loss": 3.9496, "step": 70000 }, { "epoch": 0.21, "learning_rate": 3.948059505513362e-05, "loss": 3.9545, "step": 70500 }, { "epoch": 0.21, "learning_rate": 3.9405989346304784e-05, "loss": 3.9179, "step": 71000 }, { "epoch": 0.21, "learning_rate": 3.933138363747595e-05, "loss": 3.9365, "step": 71500 }, { "epoch": 0.21, "learning_rate": 3.92567779286471e-05, "loss": 3.9161, "step": 72000 }, { "epoch": 0.22, "learning_rate": 3.9182172219818266e-05, "loss": 3.8996, "step": 72500 }, { "epoch": 0.22, "learning_rate": 3.910756651098942e-05, "loss": 3.8962, "step": 73000 }, { "epoch": 0.22, "learning_rate": 3.9032960802160585e-05, "loss": 3.8996, "step": 73500 }, { "epoch": 0.22, "learning_rate": 3.895835509333174e-05, "loss": 3.8986, "step": 74000 }, { "epoch": 0.22, "learning_rate": 3.8883749384502904e-05, "loss": 3.8726, "step": 74500 }, { "epoch": 0.22, "learning_rate": 3.880914367567407e-05, "loss": 3.8836, "step": 75000 }, { "epoch": 0.23, "learning_rate": 3.8734537966845223e-05, "loss": 3.8637, "step": 75500 }, { "epoch": 0.23, "learning_rate": 3.8659932258016386e-05, "loss": 3.8579, "step": 76000 }, { "epoch": 0.23, "learning_rate": 3.858532654918754e-05, "loss": 3.8529, "step": 76500 }, { "epoch": 0.23, "learning_rate": 3.8510720840358706e-05, "loss": 3.8406, "step": 77000 }, { "epoch": 0.23, "learning_rate": 3.843611513152986e-05, "loss": 3.8452, "step": 77500 }, { "epoch": 0.23, "learning_rate": 3.8361509422701025e-05, "loss": 3.8343, "step": 78000 }, { "epoch": 0.23, "learning_rate": 3.828690371387219e-05, "loss": 3.8337, "step": 78500 }, { "epoch": 0.24, "learning_rate": 3.8212298005043344e-05, "loss": 3.82, "step": 79000 }, { "epoch": 0.24, "learning_rate": 3.813769229621451e-05, "loss": 3.8208, "step": 79500 }, { "epoch": 0.24, "learning_rate": 3.806308658738567e-05, "loss": 3.8178, "step": 80000 }, { "epoch": 0.24, "learning_rate": 3.798848087855683e-05, "loss": 3.7932, "step": 80500 }, { "epoch": 0.24, "learning_rate": 3.791387516972799e-05, "loss": 3.7966, "step": 81000 }, { "epoch": 0.24, "learning_rate": 3.783926946089915e-05, "loss": 3.7977, "step": 81500 }, { "epoch": 0.24, "learning_rate": 3.7764663752070315e-05, "loss": 3.7823, "step": 82000 }, { "epoch": 0.25, "learning_rate": 3.769005804324147e-05, "loss": 3.7802, "step": 82500 }, { "epoch": 0.25, "learning_rate": 3.7615452334412634e-05, "loss": 3.7686, "step": 83000 }, { "epoch": 0.25, "learning_rate": 3.754084662558379e-05, "loss": 3.7709, "step": 83500 }, { "epoch": 0.25, "learning_rate": 3.746624091675495e-05, "loss": 3.7671, "step": 84000 }, { "epoch": 0.25, "learning_rate": 3.739163520792611e-05, "loss": 3.7559, "step": 84500 }, { "epoch": 0.25, "learning_rate": 3.731702949909727e-05, "loss": 3.7743, "step": 85000 }, { "epoch": 0.26, "learning_rate": 3.7242423790268435e-05, "loss": 3.7467, "step": 85500 }, { "epoch": 0.26, "learning_rate": 3.716781808143959e-05, "loss": 3.745, "step": 86000 }, { "epoch": 0.26, "learning_rate": 3.7093212372610754e-05, "loss": 3.7379, "step": 86500 }, { "epoch": 0.26, "learning_rate": 3.701860666378191e-05, "loss": 3.7105, "step": 87000 }, { "epoch": 0.26, "learning_rate": 3.6944000954953074e-05, "loss": 3.7341, "step": 87500 }, { "epoch": 0.26, "learning_rate": 3.6869395246124237e-05, "loss": 3.7121, "step": 88000 }, { "epoch": 0.26, "learning_rate": 3.679478953729539e-05, "loss": 3.7154, "step": 88500 }, { "epoch": 0.27, "learning_rate": 3.6720183828466556e-05, "loss": 3.6959, "step": 89000 }, { "epoch": 0.27, "learning_rate": 3.664557811963772e-05, "loss": 3.6943, "step": 89500 }, { "epoch": 0.27, "learning_rate": 3.657097241080888e-05, "loss": 3.6909, "step": 90000 }, { "epoch": 0.27, "learning_rate": 3.649636670198004e-05, "loss": 3.6937, "step": 90500 }, { "epoch": 0.27, "learning_rate": 3.64217609931512e-05, "loss": 3.6994, "step": 91000 }, { "epoch": 0.27, "learning_rate": 3.634715528432236e-05, "loss": 3.6758, "step": 91500 }, { "epoch": 0.27, "learning_rate": 3.627254957549352e-05, "loss": 3.6812, "step": 92000 }, { "epoch": 0.28, "learning_rate": 3.619794386666468e-05, "loss": 3.6782, "step": 92500 }, { "epoch": 0.28, "learning_rate": 3.612333815783584e-05, "loss": 3.6732, "step": 93000 }, { "epoch": 0.28, "learning_rate": 3.6048732449007e-05, "loss": 3.6677, "step": 93500 }, { "epoch": 0.28, "learning_rate": 3.597412674017816e-05, "loss": 3.6845, "step": 94000 }, { "epoch": 0.28, "learning_rate": 3.589952103134932e-05, "loss": 3.6512, "step": 94500 }, { "epoch": 0.28, "learning_rate": 3.582491532252048e-05, "loss": 3.6636, "step": 95000 }, { "epoch": 0.28, "learning_rate": 3.575030961369164e-05, "loss": 3.6355, "step": 95500 }, { "epoch": 0.29, "learning_rate": 3.56757039048628e-05, "loss": 3.6325, "step": 96000 }, { "epoch": 0.29, "learning_rate": 3.560109819603396e-05, "loss": 3.6498, "step": 96500 }, { "epoch": 0.29, "learning_rate": 3.552649248720512e-05, "loss": 3.6384, "step": 97000 }, { "epoch": 0.29, "learning_rate": 3.545188677837628e-05, "loss": 3.6325, "step": 97500 }, { "epoch": 0.29, "learning_rate": 3.537728106954744e-05, "loss": 3.6313, "step": 98000 }, { "epoch": 0.29, "learning_rate": 3.5302675360718605e-05, "loss": 3.6097, "step": 98500 }, { "epoch": 0.3, "learning_rate": 3.522806965188977e-05, "loss": 3.6196, "step": 99000 }, { "epoch": 0.3, "learning_rate": 3.5153463943060924e-05, "loss": 3.6109, "step": 99500 }, { "epoch": 0.3, "learning_rate": 3.507885823423209e-05, "loss": 3.6011, "step": 100000 }, { "epoch": 0.3, "learning_rate": 3.500425252540325e-05, "loss": 3.6124, "step": 100500 }, { "epoch": 0.3, "learning_rate": 3.4929646816574406e-05, "loss": 3.5833, "step": 101000 }, { "epoch": 0.3, "learning_rate": 3.485504110774557e-05, "loss": 3.6067, "step": 101500 }, { "epoch": 0.3, "learning_rate": 3.4780435398916725e-05, "loss": 3.5863, "step": 102000 }, { "epoch": 0.31, "learning_rate": 3.470582969008789e-05, "loss": 3.6031, "step": 102500 }, { "epoch": 0.31, "learning_rate": 3.463122398125905e-05, "loss": 3.571, "step": 103000 }, { "epoch": 0.31, "learning_rate": 3.455661827243021e-05, "loss": 3.5863, "step": 103500 }, { "epoch": 0.31, "learning_rate": 3.448201256360137e-05, "loss": 3.5817, "step": 104000 }, { "epoch": 0.31, "learning_rate": 3.4407406854772526e-05, "loss": 3.5754, "step": 104500 }, { "epoch": 0.31, "learning_rate": 3.433280114594369e-05, "loss": 3.5687, "step": 105000 }, { "epoch": 0.31, "learning_rate": 3.4258195437114845e-05, "loss": 3.5607, "step": 105500 }, { "epoch": 0.32, "learning_rate": 3.418358972828601e-05, "loss": 3.5635, "step": 106000 }, { "epoch": 0.32, "learning_rate": 3.410898401945717e-05, "loss": 3.5597, "step": 106500 }, { "epoch": 0.32, "learning_rate": 3.403437831062833e-05, "loss": 3.5514, "step": 107000 }, { "epoch": 0.32, "learning_rate": 3.395977260179949e-05, "loss": 3.5589, "step": 107500 }, { "epoch": 0.32, "learning_rate": 3.388516689297065e-05, "loss": 3.5548, "step": 108000 }, { "epoch": 0.32, "learning_rate": 3.3810561184141816e-05, "loss": 3.5499, "step": 108500 }, { "epoch": 0.33, "learning_rate": 3.373595547531297e-05, "loss": 3.5236, "step": 109000 }, { "epoch": 0.33, "learning_rate": 3.3661349766484135e-05, "loss": 3.5433, "step": 109500 }, { "epoch": 0.33, "learning_rate": 3.358674405765529e-05, "loss": 3.5355, "step": 110000 }, { "epoch": 0.33, "learning_rate": 3.3512138348826455e-05, "loss": 3.5246, "step": 110500 }, { "epoch": 0.33, "learning_rate": 3.343753263999762e-05, "loss": 3.5352, "step": 111000 }, { "epoch": 0.33, "learning_rate": 3.3362926931168774e-05, "loss": 3.5268, "step": 111500 }, { "epoch": 0.33, "learning_rate": 3.328832122233994e-05, "loss": 3.5209, "step": 112000 }, { "epoch": 0.34, "learning_rate": 3.321371551351109e-05, "loss": 3.4999, "step": 112500 }, { "epoch": 0.34, "learning_rate": 3.3139109804682256e-05, "loss": 3.5197, "step": 113000 }, { "epoch": 0.34, "learning_rate": 3.306450409585342e-05, "loss": 3.5116, "step": 113500 }, { "epoch": 0.34, "learning_rate": 3.2989898387024575e-05, "loss": 3.5009, "step": 114000 }, { "epoch": 0.34, "learning_rate": 3.291529267819574e-05, "loss": 3.5102, "step": 114500 }, { "epoch": 0.34, "learning_rate": 3.2840686969366894e-05, "loss": 3.5053, "step": 115000 }, { "epoch": 0.34, "learning_rate": 3.276608126053806e-05, "loss": 3.4856, "step": 115500 }, { "epoch": 0.35, "learning_rate": 3.269147555170921e-05, "loss": 3.4952, "step": 116000 }, { "epoch": 0.35, "learning_rate": 3.2616869842880376e-05, "loss": 3.4888, "step": 116500 }, { "epoch": 0.35, "learning_rate": 3.254226413405154e-05, "loss": 3.493, "step": 117000 }, { "epoch": 0.35, "learning_rate": 3.24676584252227e-05, "loss": 3.4838, "step": 117500 }, { "epoch": 0.35, "learning_rate": 3.2393052716393865e-05, "loss": 3.4716, "step": 118000 }, { "epoch": 0.35, "learning_rate": 3.231844700756502e-05, "loss": 3.4816, "step": 118500 }, { "epoch": 0.36, "learning_rate": 3.2243841298736184e-05, "loss": 3.4613, "step": 119000 }, { "epoch": 0.36, "learning_rate": 3.216923558990734e-05, "loss": 3.4446, "step": 119500 }, { "epoch": 0.36, "learning_rate": 3.2094629881078503e-05, "loss": 3.4699, "step": 120000 }, { "epoch": 0.36, "learning_rate": 3.202002417224966e-05, "loss": 3.4482, "step": 120500 }, { "epoch": 0.36, "learning_rate": 3.194541846342082e-05, "loss": 3.464, "step": 121000 }, { "epoch": 0.36, "learning_rate": 3.1870812754591986e-05, "loss": 3.4557, "step": 121500 }, { "epoch": 0.36, "learning_rate": 3.179620704576314e-05, "loss": 3.4628, "step": 122000 }, { "epoch": 0.37, "learning_rate": 3.1721601336934305e-05, "loss": 3.4543, "step": 122500 }, { "epoch": 0.37, "learning_rate": 3.164699562810546e-05, "loss": 3.4459, "step": 123000 }, { "epoch": 0.37, "learning_rate": 3.1572389919276624e-05, "loss": 3.4314, "step": 123500 }, { "epoch": 0.37, "learning_rate": 3.149778421044779e-05, "loss": 3.4374, "step": 124000 }, { "epoch": 0.37, "learning_rate": 3.142317850161894e-05, "loss": 3.4492, "step": 124500 }, { "epoch": 0.37, "learning_rate": 3.1348572792790106e-05, "loss": 3.4237, "step": 125000 }, { "epoch": 0.37, "learning_rate": 3.127396708396126e-05, "loss": 3.4221, "step": 125500 }, { "epoch": 0.38, "learning_rate": 3.1199361375132425e-05, "loss": 3.4291, "step": 126000 }, { "epoch": 0.38, "learning_rate": 3.112475566630358e-05, "loss": 3.4286, "step": 126500 }, { "epoch": 0.38, "learning_rate": 3.105014995747475e-05, "loss": 3.4244, "step": 127000 }, { "epoch": 0.38, "learning_rate": 3.097554424864591e-05, "loss": 3.4044, "step": 127500 }, { "epoch": 0.38, "learning_rate": 3.090093853981707e-05, "loss": 3.414, "step": 128000 }, { "epoch": 0.38, "learning_rate": 3.082633283098823e-05, "loss": 3.3993, "step": 128500 }, { "epoch": 0.38, "learning_rate": 3.075172712215939e-05, "loss": 3.3988, "step": 129000 }, { "epoch": 0.39, "learning_rate": 3.067712141333055e-05, "loss": 3.4115, "step": 129500 }, { "epoch": 0.39, "learning_rate": 3.060251570450171e-05, "loss": 3.4092, "step": 130000 }, { "epoch": 0.39, "learning_rate": 3.052790999567287e-05, "loss": 3.4079, "step": 130500 }, { "epoch": 0.39, "learning_rate": 3.045330428684403e-05, "loss": 3.4052, "step": 131000 }, { "epoch": 0.39, "learning_rate": 3.037869857801519e-05, "loss": 3.4042, "step": 131500 }, { "epoch": 0.39, "learning_rate": 3.030409286918635e-05, "loss": 3.4048, "step": 132000 }, { "epoch": 0.4, "learning_rate": 3.022948716035751e-05, "loss": 3.394, "step": 132500 }, { "epoch": 0.4, "learning_rate": 3.0154881451528673e-05, "loss": 3.3992, "step": 133000 }, { "epoch": 0.4, "learning_rate": 3.0080275742699832e-05, "loss": 3.3902, "step": 133500 }, { "epoch": 0.4, "learning_rate": 3.0005670033870992e-05, "loss": 3.3865, "step": 134000 }, { "epoch": 0.4, "learning_rate": 2.993106432504215e-05, "loss": 3.3717, "step": 134500 }, { "epoch": 0.4, "learning_rate": 2.985645861621331e-05, "loss": 3.3644, "step": 135000 }, { "epoch": 0.4, "learning_rate": 2.978185290738447e-05, "loss": 3.3742, "step": 135500 }, { "epoch": 0.41, "learning_rate": 2.9707247198555634e-05, "loss": 3.3768, "step": 136000 }, { "epoch": 0.41, "learning_rate": 2.9632641489726797e-05, "loss": 3.3777, "step": 136500 }, { "epoch": 0.41, "learning_rate": 2.955803578089796e-05, "loss": 3.3829, "step": 137000 }, { "epoch": 0.41, "learning_rate": 2.948343007206912e-05, "loss": 3.3844, "step": 137500 }, { "epoch": 0.41, "learning_rate": 2.940882436324028e-05, "loss": 3.3808, "step": 138000 }, { "epoch": 0.41, "learning_rate": 2.9334218654411438e-05, "loss": 3.383, "step": 138500 }, { "epoch": 0.41, "learning_rate": 2.9259612945582598e-05, "loss": 3.3472, "step": 139000 }, { "epoch": 0.42, "learning_rate": 2.9185007236753757e-05, "loss": 3.3513, "step": 139500 }, { "epoch": 0.42, "learning_rate": 2.911040152792492e-05, "loss": 3.3708, "step": 140000 }, { "epoch": 0.42, "learning_rate": 2.903579581909608e-05, "loss": 3.3629, "step": 140500 }, { "epoch": 0.42, "learning_rate": 2.896119011026724e-05, "loss": 3.347, "step": 141000 }, { "epoch": 0.42, "learning_rate": 2.88865844014384e-05, "loss": 3.3549, "step": 141500 }, { "epoch": 0.42, "learning_rate": 2.881197869260956e-05, "loss": 3.3466, "step": 142000 }, { "epoch": 0.43, "learning_rate": 2.8737372983780718e-05, "loss": 3.3347, "step": 142500 }, { "epoch": 0.43, "learning_rate": 2.8662767274951878e-05, "loss": 3.3401, "step": 143000 }, { "epoch": 0.43, "learning_rate": 2.858816156612304e-05, "loss": 3.3469, "step": 143500 }, { "epoch": 0.43, "learning_rate": 2.85135558572942e-05, "loss": 3.338, "step": 144000 }, { "epoch": 0.43, "learning_rate": 2.843895014846536e-05, "loss": 3.3338, "step": 144500 }, { "epoch": 0.43, "learning_rate": 2.836434443963652e-05, "loss": 3.3499, "step": 145000 }, { "epoch": 0.43, "learning_rate": 2.828973873080768e-05, "loss": 3.3313, "step": 145500 }, { "epoch": 0.44, "learning_rate": 2.8215133021978845e-05, "loss": 3.3263, "step": 146000 }, { "epoch": 0.44, "learning_rate": 2.8140527313150005e-05, "loss": 3.3386, "step": 146500 }, { "epoch": 0.44, "learning_rate": 2.8065921604321165e-05, "loss": 3.3229, "step": 147000 }, { "epoch": 0.44, "learning_rate": 2.7991315895492327e-05, "loss": 3.3186, "step": 147500 }, { "epoch": 0.44, "learning_rate": 2.7916710186663487e-05, "loss": 3.3272, "step": 148000 }, { "epoch": 0.44, "learning_rate": 2.7842104477834647e-05, "loss": 3.3052, "step": 148500 }, { "epoch": 0.44, "learning_rate": 2.7767498769005806e-05, "loss": 3.321, "step": 149000 }, { "epoch": 0.45, "learning_rate": 2.7692893060176966e-05, "loss": 3.3188, "step": 149500 }, { "epoch": 0.45, "learning_rate": 2.7618287351348125e-05, "loss": 3.3118, "step": 150000 }, { "epoch": 0.45, "learning_rate": 2.7543681642519288e-05, "loss": 3.3176, "step": 150500 }, { "epoch": 0.45, "learning_rate": 2.7469075933690448e-05, "loss": 3.3039, "step": 151000 }, { "epoch": 0.45, "learning_rate": 2.7394470224861607e-05, "loss": 3.2896, "step": 151500 }, { "epoch": 0.45, "learning_rate": 2.7319864516032767e-05, "loss": 3.2964, "step": 152000 }, { "epoch": 0.46, "learning_rate": 2.7245258807203927e-05, "loss": 3.2906, "step": 152500 }, { "epoch": 0.46, "learning_rate": 2.7170653098375086e-05, "loss": 3.2886, "step": 153000 }, { "epoch": 0.46, "learning_rate": 2.709604738954625e-05, "loss": 3.2987, "step": 153500 }, { "epoch": 0.46, "learning_rate": 2.702144168071741e-05, "loss": 3.272, "step": 154000 }, { "epoch": 0.46, "learning_rate": 2.6946835971888568e-05, "loss": 3.2735, "step": 154500 }, { "epoch": 0.46, "learning_rate": 2.6872230263059735e-05, "loss": 3.2632, "step": 155000 }, { "epoch": 0.46, "learning_rate": 2.6797624554230894e-05, "loss": 3.2933, "step": 155500 }, { "epoch": 0.47, "learning_rate": 2.6723018845402054e-05, "loss": 3.2911, "step": 156000 }, { "epoch": 0.47, "learning_rate": 2.6648413136573213e-05, "loss": 3.3155, "step": 156500 }, { "epoch": 0.47, "learning_rate": 2.6573807427744373e-05, "loss": 3.2821, "step": 157000 }, { "epoch": 0.47, "learning_rate": 2.6499201718915532e-05, "loss": 3.287, "step": 157500 }, { "epoch": 0.47, "learning_rate": 2.6424596010086695e-05, "loss": 3.2701, "step": 158000 }, { "epoch": 0.47, "learning_rate": 2.6349990301257855e-05, "loss": 3.2554, "step": 158500 }, { "epoch": 0.47, "learning_rate": 2.6275384592429015e-05, "loss": 3.2938, "step": 159000 }, { "epoch": 0.48, "learning_rate": 2.6200778883600174e-05, "loss": 3.2812, "step": 159500 }, { "epoch": 0.48, "learning_rate": 2.6126173174771334e-05, "loss": 3.2739, "step": 160000 }, { "epoch": 0.48, "learning_rate": 2.6051567465942493e-05, "loss": 3.2512, "step": 160500 }, { "epoch": 0.48, "learning_rate": 2.5976961757113656e-05, "loss": 3.2567, "step": 161000 }, { "epoch": 0.48, "learning_rate": 2.5902356048284816e-05, "loss": 3.2687, "step": 161500 }, { "epoch": 0.48, "learning_rate": 2.5827750339455975e-05, "loss": 3.2876, "step": 162000 }, { "epoch": 0.48, "learning_rate": 2.5753144630627135e-05, "loss": 3.2741, "step": 162500 }, { "epoch": 0.49, "learning_rate": 2.5678538921798295e-05, "loss": 3.2556, "step": 163000 }, { "epoch": 0.49, "learning_rate": 2.5603933212969454e-05, "loss": 3.2568, "step": 163500 }, { "epoch": 0.49, "learning_rate": 2.5529327504140617e-05, "loss": 3.2432, "step": 164000 }, { "epoch": 0.49, "learning_rate": 2.545472179531178e-05, "loss": 3.2467, "step": 164500 }, { "epoch": 0.49, "learning_rate": 2.538011608648294e-05, "loss": 3.2575, "step": 165000 }, { "epoch": 0.49, "learning_rate": 2.5305510377654103e-05, "loss": 3.248, "step": 165500 }, { "epoch": 0.5, "learning_rate": 2.5230904668825262e-05, "loss": 3.2438, "step": 166000 }, { "epoch": 0.5, "learning_rate": 2.5156298959996422e-05, "loss": 3.2516, "step": 166500 }, { "epoch": 0.5, "learning_rate": 2.508169325116758e-05, "loss": 3.2483, "step": 167000 }, { "epoch": 0.5, "learning_rate": 2.500708754233874e-05, "loss": 3.2313, "step": 167500 }, { "epoch": 0.5, "learning_rate": 2.49324818335099e-05, "loss": 3.2331, "step": 168000 }, { "epoch": 0.5, "learning_rate": 2.4857876124681063e-05, "loss": 3.2462, "step": 168500 }, { "epoch": 0.5, "learning_rate": 2.4783270415852223e-05, "loss": 3.2219, "step": 169000 }, { "epoch": 0.51, "learning_rate": 2.4708664707023383e-05, "loss": 3.2406, "step": 169500 }, { "epoch": 0.51, "learning_rate": 2.4634058998194542e-05, "loss": 3.2086, "step": 170000 }, { "epoch": 0.51, "learning_rate": 2.4559453289365702e-05, "loss": 3.2183, "step": 170500 }, { "epoch": 0.51, "learning_rate": 2.448484758053686e-05, "loss": 3.222, "step": 171000 }, { "epoch": 0.51, "learning_rate": 2.4410241871708024e-05, "loss": 3.2144, "step": 171500 }, { "epoch": 0.51, "learning_rate": 2.4335636162879187e-05, "loss": 3.2212, "step": 172000 }, { "epoch": 0.51, "learning_rate": 2.4261030454050347e-05, "loss": 3.2072, "step": 172500 }, { "epoch": 0.52, "learning_rate": 2.4186424745221506e-05, "loss": 3.2082, "step": 173000 }, { "epoch": 0.52, "learning_rate": 2.4111819036392666e-05, "loss": 3.2211, "step": 173500 }, { "epoch": 0.52, "learning_rate": 2.4037213327563826e-05, "loss": 3.1991, "step": 174000 }, { "epoch": 0.52, "learning_rate": 2.3962607618734985e-05, "loss": 3.222, "step": 174500 }, { "epoch": 0.52, "learning_rate": 2.3888001909906148e-05, "loss": 3.1956, "step": 175000 }, { "epoch": 0.52, "learning_rate": 2.3813396201077308e-05, "loss": 3.2101, "step": 175500 }, { "epoch": 0.53, "learning_rate": 2.3738790492248467e-05, "loss": 3.197, "step": 176000 }, { "epoch": 0.53, "learning_rate": 2.366418478341963e-05, "loss": 3.2057, "step": 176500 }, { "epoch": 0.53, "learning_rate": 2.358957907459079e-05, "loss": 3.1931, "step": 177000 }, { "epoch": 0.53, "learning_rate": 2.351497336576195e-05, "loss": 3.2, "step": 177500 }, { "epoch": 0.53, "learning_rate": 2.344036765693311e-05, "loss": 3.1849, "step": 178000 }, { "epoch": 0.53, "learning_rate": 2.336576194810427e-05, "loss": 3.2024, "step": 178500 }, { "epoch": 0.53, "learning_rate": 2.329115623927543e-05, "loss": 3.1923, "step": 179000 }, { "epoch": 0.54, "learning_rate": 2.321655053044659e-05, "loss": 3.1988, "step": 179500 }, { "epoch": 0.54, "learning_rate": 2.314194482161775e-05, "loss": 3.1788, "step": 180000 }, { "epoch": 0.54, "learning_rate": 2.306733911278891e-05, "loss": 3.1927, "step": 180500 }, { "epoch": 0.54, "learning_rate": 2.2992733403960073e-05, "loss": 3.1854, "step": 181000 }, { "epoch": 0.54, "learning_rate": 2.2918127695131233e-05, "loss": 3.2029, "step": 181500 }, { "epoch": 0.54, "learning_rate": 2.2843521986302392e-05, "loss": 3.1958, "step": 182000 }, { "epoch": 0.54, "learning_rate": 2.2768916277473555e-05, "loss": 3.1791, "step": 182500 }, { "epoch": 0.55, "learning_rate": 2.2694310568644715e-05, "loss": 3.1791, "step": 183000 }, { "epoch": 0.55, "learning_rate": 2.2619704859815874e-05, "loss": 3.1921, "step": 183500 }, { "epoch": 0.55, "learning_rate": 2.2545099150987034e-05, "loss": 3.193, "step": 184000 }, { "epoch": 0.55, "learning_rate": 2.2470493442158194e-05, "loss": 3.2009, "step": 184500 }, { "epoch": 0.55, "learning_rate": 2.2395887733329353e-05, "loss": 3.1873, "step": 185000 }, { "epoch": 0.55, "learning_rate": 2.2321282024500516e-05, "loss": 3.1703, "step": 185500 }, { "epoch": 0.56, "learning_rate": 2.2246676315671676e-05, "loss": 3.1822, "step": 186000 }, { "epoch": 0.56, "learning_rate": 2.217207060684284e-05, "loss": 3.1651, "step": 186500 }, { "epoch": 0.56, "learning_rate": 2.2097464898013998e-05, "loss": 3.161, "step": 187000 }, { "epoch": 0.56, "learning_rate": 2.2022859189185158e-05, "loss": 3.1794, "step": 187500 }, { "epoch": 0.56, "learning_rate": 2.1948253480356317e-05, "loss": 3.166, "step": 188000 }, { "epoch": 0.56, "learning_rate": 2.1873647771527477e-05, "loss": 3.1738, "step": 188500 }, { "epoch": 0.56, "learning_rate": 2.1799042062698636e-05, "loss": 3.1563, "step": 189000 }, { "epoch": 0.57, "learning_rate": 2.17244363538698e-05, "loss": 3.1549, "step": 189500 }, { "epoch": 0.57, "learning_rate": 2.164983064504096e-05, "loss": 3.152, "step": 190000 }, { "epoch": 0.57, "learning_rate": 2.1575224936212122e-05, "loss": 3.1475, "step": 190500 }, { "epoch": 0.57, "learning_rate": 2.150061922738328e-05, "loss": 3.1561, "step": 191000 }, { "epoch": 0.57, "learning_rate": 2.142601351855444e-05, "loss": 3.1452, "step": 191500 }, { "epoch": 0.57, "learning_rate": 2.13514078097256e-05, "loss": 3.1559, "step": 192000 }, { "epoch": 0.57, "learning_rate": 2.127680210089676e-05, "loss": 3.1523, "step": 192500 }, { "epoch": 0.58, "learning_rate": 2.1202196392067923e-05, "loss": 3.1547, "step": 193000 }, { "epoch": 0.58, "learning_rate": 2.1127590683239083e-05, "loss": 3.1415, "step": 193500 }, { "epoch": 0.58, "learning_rate": 2.1052984974410242e-05, "loss": 3.1577, "step": 194000 }, { "epoch": 0.58, "learning_rate": 2.0978379265581402e-05, "loss": 3.1476, "step": 194500 }, { "epoch": 0.58, "learning_rate": 2.0903773556752565e-05, "loss": 3.1473, "step": 195000 }, { "epoch": 0.58, "learning_rate": 2.0829167847923725e-05, "loss": 3.1553, "step": 195500 }, { "epoch": 0.58, "learning_rate": 2.0754562139094884e-05, "loss": 3.1435, "step": 196000 }, { "epoch": 0.59, "learning_rate": 2.0679956430266047e-05, "loss": 3.1384, "step": 196500 }, { "epoch": 0.59, "learning_rate": 2.0605350721437207e-05, "loss": 3.1253, "step": 197000 }, { "epoch": 0.59, "learning_rate": 2.0530745012608366e-05, "loss": 3.1279, "step": 197500 }, { "epoch": 0.59, "learning_rate": 2.0456139303779526e-05, "loss": 3.1418, "step": 198000 }, { "epoch": 0.59, "learning_rate": 2.0381533594950685e-05, "loss": 3.1448, "step": 198500 }, { "epoch": 0.59, "learning_rate": 2.0306927886121845e-05, "loss": 3.1398, "step": 199000 }, { "epoch": 0.6, "learning_rate": 2.0232322177293004e-05, "loss": 3.1293, "step": 199500 }, { "epoch": 0.6, "learning_rate": 2.0157716468464167e-05, "loss": 3.1252, "step": 200000 }, { "epoch": 0.6, "learning_rate": 2.008311075963533e-05, "loss": 3.1121, "step": 200500 }, { "epoch": 0.6, "learning_rate": 2.000850505080649e-05, "loss": 3.1314, "step": 201000 }, { "epoch": 0.6, "learning_rate": 1.993389934197765e-05, "loss": 3.1345, "step": 201500 }, { "epoch": 0.6, "learning_rate": 1.985929363314881e-05, "loss": 3.1291, "step": 202000 }, { "epoch": 0.6, "learning_rate": 1.978468792431997e-05, "loss": 3.1316, "step": 202500 }, { "epoch": 0.61, "learning_rate": 1.9710082215491128e-05, "loss": 3.1365, "step": 203000 }, { "epoch": 0.61, "learning_rate": 1.963547650666229e-05, "loss": 3.1363, "step": 203500 }, { "epoch": 0.61, "learning_rate": 1.956087079783345e-05, "loss": 3.1304, "step": 204000 }, { "epoch": 0.61, "learning_rate": 1.9486265089004614e-05, "loss": 3.1117, "step": 204500 }, { "epoch": 0.61, "learning_rate": 1.9411659380175773e-05, "loss": 3.116, "step": 205000 }, { "epoch": 0.61, "learning_rate": 1.9337053671346933e-05, "loss": 3.1004, "step": 205500 }, { "epoch": 0.61, "learning_rate": 1.9262447962518092e-05, "loss": 3.1145, "step": 206000 }, { "epoch": 0.62, "learning_rate": 1.9187842253689252e-05, "loss": 3.1189, "step": 206500 }, { "epoch": 0.62, "learning_rate": 1.9113236544860415e-05, "loss": 3.1107, "step": 207000 }, { "epoch": 0.62, "learning_rate": 1.9038630836031575e-05, "loss": 3.1045, "step": 207500 }, { "epoch": 0.62, "learning_rate": 1.8964025127202734e-05, "loss": 3.1394, "step": 208000 }, { "epoch": 0.62, "learning_rate": 1.8889419418373894e-05, "loss": 3.1042, "step": 208500 }, { "epoch": 0.62, "learning_rate": 1.8814813709545057e-05, "loss": 3.1121, "step": 209000 }, { "epoch": 0.63, "learning_rate": 1.8740208000716216e-05, "loss": 3.1009, "step": 209500 }, { "epoch": 0.63, "learning_rate": 1.8665602291887376e-05, "loss": 3.1056, "step": 210000 }, { "epoch": 0.63, "learning_rate": 1.8590996583058535e-05, "loss": 3.1007, "step": 210500 }, { "epoch": 0.63, "learning_rate": 1.85163908742297e-05, "loss": 3.0885, "step": 211000 }, { "epoch": 0.63, "learning_rate": 1.8441785165400858e-05, "loss": 3.0931, "step": 211500 }, { "epoch": 0.63, "learning_rate": 1.8367179456572018e-05, "loss": 3.0962, "step": 212000 }, { "epoch": 0.63, "learning_rate": 1.8292573747743177e-05, "loss": 3.0888, "step": 212500 }, { "epoch": 0.64, "learning_rate": 1.8217968038914337e-05, "loss": 3.0904, "step": 213000 }, { "epoch": 0.64, "learning_rate": 1.8143362330085496e-05, "loss": 3.1059, "step": 213500 }, { "epoch": 0.64, "learning_rate": 1.806875662125666e-05, "loss": 3.1071, "step": 214000 }, { "epoch": 0.64, "learning_rate": 1.7994150912427822e-05, "loss": 3.0838, "step": 214500 }, { "epoch": 0.64, "learning_rate": 1.7919545203598982e-05, "loss": 3.1019, "step": 215000 }, { "epoch": 0.64, "learning_rate": 1.784493949477014e-05, "loss": 3.0909, "step": 215500 }, { "epoch": 0.64, "learning_rate": 1.77703337859413e-05, "loss": 3.0948, "step": 216000 }, { "epoch": 0.65, "learning_rate": 1.769572807711246e-05, "loss": 3.0653, "step": 216500 }, { "epoch": 0.65, "learning_rate": 1.762112236828362e-05, "loss": 3.1038, "step": 217000 }, { "epoch": 0.65, "learning_rate": 1.7546516659454783e-05, "loss": 3.0913, "step": 217500 }, { "epoch": 0.65, "learning_rate": 1.7471910950625943e-05, "loss": 3.0811, "step": 218000 }, { "epoch": 0.65, "learning_rate": 1.7397305241797106e-05, "loss": 3.0839, "step": 218500 }, { "epoch": 0.65, "learning_rate": 1.7322699532968265e-05, "loss": 3.0852, "step": 219000 }, { "epoch": 0.66, "learning_rate": 1.7248093824139425e-05, "loss": 3.0573, "step": 219500 }, { "epoch": 0.66, "learning_rate": 1.7173488115310584e-05, "loss": 3.0985, "step": 220000 }, { "epoch": 0.66, "learning_rate": 1.7098882406481744e-05, "loss": 3.0628, "step": 220500 }, { "epoch": 0.66, "learning_rate": 1.7024276697652903e-05, "loss": 3.076, "step": 221000 }, { "epoch": 0.66, "learning_rate": 1.6949670988824066e-05, "loss": 3.0765, "step": 221500 }, { "epoch": 0.66, "learning_rate": 1.6875065279995226e-05, "loss": 3.0835, "step": 222000 }, { "epoch": 0.66, "learning_rate": 1.6800459571166386e-05, "loss": 3.0633, "step": 222500 }, { "epoch": 0.67, "learning_rate": 1.672585386233755e-05, "loss": 3.0676, "step": 223000 }, { "epoch": 0.67, "learning_rate": 1.6651248153508708e-05, "loss": 3.0719, "step": 223500 }, { "epoch": 0.67, "learning_rate": 1.6576642444679868e-05, "loss": 3.0795, "step": 224000 }, { "epoch": 0.67, "learning_rate": 1.6502036735851027e-05, "loss": 3.0631, "step": 224500 }, { "epoch": 0.67, "learning_rate": 1.642743102702219e-05, "loss": 3.0544, "step": 225000 }, { "epoch": 0.67, "learning_rate": 1.635282531819335e-05, "loss": 3.0803, "step": 225500 }, { "epoch": 0.67, "learning_rate": 1.627821960936451e-05, "loss": 3.0572, "step": 226000 }, { "epoch": 0.68, "learning_rate": 1.620361390053567e-05, "loss": 3.0625, "step": 226500 }, { "epoch": 0.68, "learning_rate": 1.612900819170683e-05, "loss": 3.076, "step": 227000 }, { "epoch": 0.68, "learning_rate": 1.6054402482877988e-05, "loss": 3.059, "step": 227500 }, { "epoch": 0.68, "learning_rate": 1.597979677404915e-05, "loss": 3.0651, "step": 228000 }, { "epoch": 0.68, "learning_rate": 1.5905191065220314e-05, "loss": 3.0331, "step": 228500 }, { "epoch": 0.68, "learning_rate": 1.5830585356391474e-05, "loss": 3.073, "step": 229000 }, { "epoch": 0.68, "learning_rate": 1.5755979647562633e-05, "loss": 3.0653, "step": 229500 }, { "epoch": 0.69, "learning_rate": 1.5681373938733793e-05, "loss": 3.0541, "step": 230000 }, { "epoch": 0.69, "learning_rate": 1.5606768229904952e-05, "loss": 3.0459, "step": 230500 }, { "epoch": 0.69, "learning_rate": 1.5532162521076112e-05, "loss": 3.0559, "step": 231000 }, { "epoch": 0.69, "learning_rate": 1.545755681224727e-05, "loss": 3.0597, "step": 231500 }, { "epoch": 0.69, "learning_rate": 1.5382951103418434e-05, "loss": 3.0524, "step": 232000 }, { "epoch": 0.69, "learning_rate": 1.5308345394589597e-05, "loss": 3.0592, "step": 232500 }, { "epoch": 0.7, "learning_rate": 1.5233739685760757e-05, "loss": 3.0682, "step": 233000 }, { "epoch": 0.7, "learning_rate": 1.5159133976931917e-05, "loss": 3.0332, "step": 233500 }, { "epoch": 0.7, "learning_rate": 1.5084528268103076e-05, "loss": 3.0509, "step": 234000 }, { "epoch": 0.7, "learning_rate": 1.5009922559274237e-05, "loss": 3.0395, "step": 234500 }, { "epoch": 0.7, "learning_rate": 1.4935316850445397e-05, "loss": 3.063, "step": 235000 }, { "epoch": 0.7, "learning_rate": 1.4860711141616556e-05, "loss": 3.0484, "step": 235500 }, { "epoch": 0.7, "learning_rate": 1.4786105432787716e-05, "loss": 3.0505, "step": 236000 }, { "epoch": 0.71, "learning_rate": 1.4711499723958877e-05, "loss": 3.0375, "step": 236500 }, { "epoch": 0.71, "learning_rate": 1.4636894015130037e-05, "loss": 3.0443, "step": 237000 }, { "epoch": 0.71, "learning_rate": 1.45622883063012e-05, "loss": 3.0334, "step": 237500 }, { "epoch": 0.71, "learning_rate": 1.448768259747236e-05, "loss": 3.0349, "step": 238000 }, { "epoch": 0.71, "learning_rate": 1.441307688864352e-05, "loss": 3.0277, "step": 238500 }, { "epoch": 0.71, "learning_rate": 1.433847117981468e-05, "loss": 3.0345, "step": 239000 }, { "epoch": 0.71, "learning_rate": 1.426386547098584e-05, "loss": 3.0494, "step": 239500 }, { "epoch": 0.72, "learning_rate": 1.4189259762157001e-05, "loss": 3.0363, "step": 240000 }, { "epoch": 0.72, "learning_rate": 1.411465405332816e-05, "loss": 3.0398, "step": 240500 }, { "epoch": 0.72, "learning_rate": 1.404004834449932e-05, "loss": 3.0372, "step": 241000 }, { "epoch": 0.72, "learning_rate": 1.3965442635670482e-05, "loss": 3.0286, "step": 241500 }, { "epoch": 0.72, "learning_rate": 1.3890836926841645e-05, "loss": 3.034, "step": 242000 }, { "epoch": 0.72, "learning_rate": 1.3816231218012804e-05, "loss": 3.0132, "step": 242500 }, { "epoch": 0.73, "learning_rate": 1.3741625509183964e-05, "loss": 3.0232, "step": 243000 }, { "epoch": 0.73, "learning_rate": 1.3667019800355125e-05, "loss": 3.0399, "step": 243500 }, { "epoch": 0.73, "learning_rate": 1.3592414091526284e-05, "loss": 3.029, "step": 244000 }, { "epoch": 0.73, "learning_rate": 1.3517808382697444e-05, "loss": 3.0386, "step": 244500 }, { "epoch": 0.73, "learning_rate": 1.3443202673868605e-05, "loss": 3.0186, "step": 245000 }, { "epoch": 0.73, "learning_rate": 1.3368596965039765e-05, "loss": 3.0282, "step": 245500 }, { "epoch": 0.73, "learning_rate": 1.3293991256210924e-05, "loss": 3.0252, "step": 246000 }, { "epoch": 0.74, "learning_rate": 1.3219385547382087e-05, "loss": 3.0175, "step": 246500 }, { "epoch": 0.74, "learning_rate": 1.3144779838553247e-05, "loss": 3.0221, "step": 247000 }, { "epoch": 0.74, "learning_rate": 1.3070174129724408e-05, "loss": 3.0187, "step": 247500 }, { "epoch": 0.74, "learning_rate": 1.2995568420895568e-05, "loss": 3.0149, "step": 248000 }, { "epoch": 0.74, "learning_rate": 1.2920962712066727e-05, "loss": 3.021, "step": 248500 }, { "epoch": 0.74, "learning_rate": 1.2846357003237889e-05, "loss": 3.0241, "step": 249000 }, { "epoch": 0.74, "learning_rate": 1.2771751294409048e-05, "loss": 3.005, "step": 249500 }, { "epoch": 0.75, "learning_rate": 1.2697145585580208e-05, "loss": 3.0068, "step": 250000 }, { "epoch": 0.75, "learning_rate": 1.2622539876751369e-05, "loss": 3.0047, "step": 250500 }, { "epoch": 0.75, "learning_rate": 1.2547934167922529e-05, "loss": 3.0196, "step": 251000 }, { "epoch": 0.75, "learning_rate": 1.247332845909369e-05, "loss": 3.0043, "step": 251500 }, { "epoch": 0.75, "learning_rate": 1.239872275026485e-05, "loss": 3.0094, "step": 252000 }, { "epoch": 0.75, "learning_rate": 1.2324117041436013e-05, "loss": 3.0014, "step": 252500 }, { "epoch": 0.76, "learning_rate": 1.2249511332607172e-05, "loss": 2.9893, "step": 253000 }, { "epoch": 0.76, "learning_rate": 1.2174905623778332e-05, "loss": 2.9995, "step": 253500 }, { "epoch": 0.76, "learning_rate": 1.2100299914949493e-05, "loss": 3.0127, "step": 254000 }, { "epoch": 0.76, "learning_rate": 1.2025694206120652e-05, "loss": 3.0093, "step": 254500 }, { "epoch": 0.76, "learning_rate": 1.1951088497291814e-05, "loss": 3.002, "step": 255000 }, { "epoch": 0.76, "learning_rate": 1.1876482788462973e-05, "loss": 3.0212, "step": 255500 }, { "epoch": 0.76, "learning_rate": 1.1801877079634135e-05, "loss": 3.0069, "step": 256000 }, { "epoch": 0.77, "learning_rate": 1.1727271370805294e-05, "loss": 3.0155, "step": 256500 }, { "epoch": 0.77, "learning_rate": 1.1652665661976454e-05, "loss": 2.9904, "step": 257000 }, { "epoch": 0.77, "learning_rate": 1.1578059953147615e-05, "loss": 2.9839, "step": 257500 }, { "epoch": 0.77, "learning_rate": 1.1503454244318776e-05, "loss": 3.0073, "step": 258000 }, { "epoch": 0.77, "learning_rate": 1.1428848535489936e-05, "loss": 2.9917, "step": 258500 }, { "epoch": 0.77, "learning_rate": 1.1354242826661095e-05, "loss": 3.0195, "step": 259000 }, { "epoch": 0.77, "learning_rate": 1.1279637117832258e-05, "loss": 3.0205, "step": 259500 }, { "epoch": 0.78, "learning_rate": 1.1205031409003418e-05, "loss": 2.9868, "step": 260000 }, { "epoch": 0.78, "learning_rate": 1.1130425700174578e-05, "loss": 2.9808, "step": 260500 }, { "epoch": 0.78, "learning_rate": 1.1055819991345739e-05, "loss": 3.0043, "step": 261000 }, { "epoch": 0.78, "learning_rate": 1.0981214282516898e-05, "loss": 3.0056, "step": 261500 }, { "epoch": 0.78, "learning_rate": 1.090660857368806e-05, "loss": 2.995, "step": 262000 }, { "epoch": 0.78, "learning_rate": 1.083200286485922e-05, "loss": 2.997, "step": 262500 }, { "epoch": 0.78, "learning_rate": 1.075739715603038e-05, "loss": 2.9913, "step": 263000 }, { "epoch": 0.79, "learning_rate": 1.068279144720154e-05, "loss": 2.9802, "step": 263500 }, { "epoch": 0.79, "learning_rate": 1.06081857383727e-05, "loss": 2.9788, "step": 264000 }, { "epoch": 0.79, "learning_rate": 1.0533580029543861e-05, "loss": 2.9925, "step": 264500 }, { "epoch": 0.79, "learning_rate": 1.0458974320715022e-05, "loss": 3.0028, "step": 265000 }, { "epoch": 0.79, "learning_rate": 1.0384368611886182e-05, "loss": 2.9988, "step": 265500 }, { "epoch": 0.79, "learning_rate": 1.0309762903057341e-05, "loss": 2.9944, "step": 266000 }, { "epoch": 0.8, "learning_rate": 1.0235157194228504e-05, "loss": 2.9768, "step": 266500 }, { "epoch": 0.8, "learning_rate": 1.0160551485399664e-05, "loss": 2.9891, "step": 267000 }, { "epoch": 0.8, "learning_rate": 1.0085945776570823e-05, "loss": 2.9815, "step": 267500 }, { "epoch": 0.8, "learning_rate": 1.0011340067741983e-05, "loss": 2.9839, "step": 268000 }, { "epoch": 0.8, "learning_rate": 9.936734358913144e-06, "loss": 2.9852, "step": 268500 }, { "epoch": 0.8, "learning_rate": 9.862128650084306e-06, "loss": 2.9759, "step": 269000 }, { "epoch": 0.8, "learning_rate": 9.787522941255465e-06, "loss": 2.985, "step": 269500 }, { "epoch": 0.81, "learning_rate": 9.712917232426626e-06, "loss": 2.9627, "step": 270000 }, { "epoch": 0.81, "learning_rate": 9.638311523597786e-06, "loss": 2.9925, "step": 270500 }, { "epoch": 0.81, "learning_rate": 9.563705814768946e-06, "loss": 2.9701, "step": 271000 }, { "epoch": 0.81, "learning_rate": 9.489100105940107e-06, "loss": 2.9856, "step": 271500 }, { "epoch": 0.81, "learning_rate": 9.414494397111268e-06, "loss": 2.9915, "step": 272000 }, { "epoch": 0.81, "learning_rate": 9.339888688282428e-06, "loss": 2.9746, "step": 272500 }, { "epoch": 0.81, "learning_rate": 9.265282979453587e-06, "loss": 2.9878, "step": 273000 }, { "epoch": 0.82, "learning_rate": 9.190677270624748e-06, "loss": 2.9855, "step": 273500 }, { "epoch": 0.82, "learning_rate": 9.11607156179591e-06, "loss": 2.96, "step": 274000 }, { "epoch": 0.82, "learning_rate": 9.04146585296707e-06, "loss": 2.9613, "step": 274500 }, { "epoch": 0.82, "learning_rate": 8.966860144138229e-06, "loss": 2.9575, "step": 275000 }, { "epoch": 0.82, "learning_rate": 8.89225443530939e-06, "loss": 2.9729, "step": 275500 }, { "epoch": 0.82, "learning_rate": 8.817648726480551e-06, "loss": 2.9773, "step": 276000 }, { "epoch": 0.83, "learning_rate": 8.743043017651711e-06, "loss": 2.9643, "step": 276500 }, { "epoch": 0.83, "learning_rate": 8.668437308822872e-06, "loss": 2.9676, "step": 277000 }, { "epoch": 0.83, "learning_rate": 8.593831599994032e-06, "loss": 2.953, "step": 277500 }, { "epoch": 0.83, "learning_rate": 8.519225891165191e-06, "loss": 2.9523, "step": 278000 }, { "epoch": 0.83, "learning_rate": 8.444620182336353e-06, "loss": 2.9833, "step": 278500 }, { "epoch": 0.83, "learning_rate": 8.370014473507514e-06, "loss": 2.971, "step": 279000 }, { "epoch": 0.83, "learning_rate": 8.295408764678674e-06, "loss": 2.9665, "step": 279500 }, { "epoch": 0.84, "learning_rate": 8.220803055849833e-06, "loss": 2.9677, "step": 280000 }, { "epoch": 0.84, "learning_rate": 8.146197347020994e-06, "loss": 2.9644, "step": 280500 }, { "epoch": 0.84, "learning_rate": 8.071591638192156e-06, "loss": 2.9662, "step": 281000 }, { "epoch": 0.84, "learning_rate": 7.996985929363315e-06, "loss": 2.9606, "step": 281500 }, { "epoch": 0.84, "learning_rate": 7.922380220534475e-06, "loss": 2.9538, "step": 282000 }, { "epoch": 0.84, "learning_rate": 7.847774511705636e-06, "loss": 2.956, "step": 282500 }, { "epoch": 0.84, "learning_rate": 7.773168802876797e-06, "loss": 2.9607, "step": 283000 }, { "epoch": 0.85, "learning_rate": 7.698563094047957e-06, "loss": 2.9597, "step": 283500 }, { "epoch": 0.85, "learning_rate": 7.623957385219117e-06, "loss": 2.959, "step": 284000 }, { "epoch": 0.85, "learning_rate": 7.549351676390278e-06, "loss": 2.9556, "step": 284500 }, { "epoch": 0.85, "learning_rate": 7.474745967561437e-06, "loss": 2.956, "step": 285000 }, { "epoch": 0.85, "learning_rate": 7.4001402587325994e-06, "loss": 2.9547, "step": 285500 }, { "epoch": 0.85, "learning_rate": 7.325534549903759e-06, "loss": 2.9661, "step": 286000 }, { "epoch": 0.85, "learning_rate": 7.2509288410749194e-06, "loss": 2.9524, "step": 286500 }, { "epoch": 0.86, "learning_rate": 7.176323132246079e-06, "loss": 2.9557, "step": 287000 }, { "epoch": 0.86, "learning_rate": 7.1017174234172394e-06, "loss": 2.9675, "step": 287500 }, { "epoch": 0.86, "learning_rate": 7.027111714588401e-06, "loss": 2.9587, "step": 288000 }, { "epoch": 0.86, "learning_rate": 6.952506005759561e-06, "loss": 2.9318, "step": 288500 }, { "epoch": 0.86, "learning_rate": 6.8779002969307216e-06, "loss": 2.9583, "step": 289000 }, { "epoch": 0.86, "learning_rate": 6.803294588101881e-06, "loss": 2.9556, "step": 289500 }, { "epoch": 0.87, "learning_rate": 6.728688879273043e-06, "loss": 2.9436, "step": 290000 }, { "epoch": 0.87, "learning_rate": 6.654083170444203e-06, "loss": 2.954, "step": 290500 }, { "epoch": 0.87, "learning_rate": 6.579477461615363e-06, "loss": 2.9691, "step": 291000 }, { "epoch": 0.87, "learning_rate": 6.504871752786524e-06, "loss": 2.9443, "step": 291500 }, { "epoch": 0.87, "learning_rate": 6.430266043957683e-06, "loss": 2.9236, "step": 292000 }, { "epoch": 0.87, "learning_rate": 6.3556603351288445e-06, "loss": 2.9331, "step": 292500 }, { "epoch": 0.87, "learning_rate": 6.281054626300005e-06, "loss": 2.9342, "step": 293000 }, { "epoch": 0.88, "learning_rate": 6.206448917471165e-06, "loss": 2.9598, "step": 293500 }, { "epoch": 0.88, "learning_rate": 6.131843208642326e-06, "loss": 2.9562, "step": 294000 }, { "epoch": 0.88, "learning_rate": 6.057237499813486e-06, "loss": 2.9264, "step": 294500 }, { "epoch": 0.88, "learning_rate": 5.982631790984647e-06, "loss": 2.9349, "step": 295000 }, { "epoch": 0.88, "learning_rate": 5.908026082155807e-06, "loss": 2.9384, "step": 295500 }, { "epoch": 0.88, "learning_rate": 5.8334203733269674e-06, "loss": 2.9622, "step": 296000 }, { "epoch": 0.88, "learning_rate": 5.758814664498128e-06, "loss": 2.9589, "step": 296500 }, { "epoch": 0.89, "learning_rate": 5.684208955669288e-06, "loss": 2.9395, "step": 297000 }, { "epoch": 0.89, "learning_rate": 5.609603246840448e-06, "loss": 2.9342, "step": 297500 }, { "epoch": 0.89, "learning_rate": 5.534997538011609e-06, "loss": 2.9262, "step": 298000 }, { "epoch": 0.89, "learning_rate": 5.460391829182769e-06, "loss": 2.9391, "step": 298500 }, { "epoch": 0.89, "learning_rate": 5.38578612035393e-06, "loss": 2.9451, "step": 299000 }, { "epoch": 0.89, "learning_rate": 5.31118041152509e-06, "loss": 2.9182, "step": 299500 }, { "epoch": 0.9, "learning_rate": 5.236574702696251e-06, "loss": 2.9519, "step": 300000 }, { "epoch": 0.9, "learning_rate": 5.161968993867411e-06, "loss": 2.9299, "step": 300500 }, { "epoch": 0.9, "learning_rate": 5.087363285038571e-06, "loss": 2.9176, "step": 301000 }, { "epoch": 0.9, "learning_rate": 5.012757576209732e-06, "loss": 2.9499, "step": 301500 }, { "epoch": 0.9, "learning_rate": 4.938151867380892e-06, "loss": 2.9258, "step": 302000 }, { "epoch": 0.9, "learning_rate": 4.863546158552053e-06, "loss": 2.9396, "step": 302500 }, { "epoch": 0.9, "learning_rate": 4.788940449723213e-06, "loss": 2.941, "step": 303000 }, { "epoch": 0.91, "learning_rate": 4.714334740894374e-06, "loss": 2.9308, "step": 303500 }, { "epoch": 0.91, "learning_rate": 4.639729032065534e-06, "loss": 2.9213, "step": 304000 }, { "epoch": 0.91, "learning_rate": 4.565123323236694e-06, "loss": 2.9097, "step": 304500 }, { "epoch": 0.91, "learning_rate": 4.490517614407855e-06, "loss": 2.9218, "step": 305000 }, { "epoch": 0.91, "learning_rate": 4.415911905579015e-06, "loss": 2.9276, "step": 305500 }, { "epoch": 0.91, "learning_rate": 4.341306196750176e-06, "loss": 2.9296, "step": 306000 }, { "epoch": 0.91, "learning_rate": 4.2667004879213354e-06, "loss": 2.9332, "step": 306500 }, { "epoch": 0.92, "learning_rate": 4.192094779092497e-06, "loss": 2.9266, "step": 307000 }, { "epoch": 0.92, "learning_rate": 4.117489070263657e-06, "loss": 2.9239, "step": 307500 }, { "epoch": 0.92, "learning_rate": 4.042883361434817e-06, "loss": 2.9279, "step": 308000 }, { "epoch": 0.92, "learning_rate": 3.968277652605978e-06, "loss": 2.9062, "step": 308500 }, { "epoch": 0.92, "learning_rate": 3.8936719437771375e-06, "loss": 2.9261, "step": 309000 }, { "epoch": 0.92, "learning_rate": 3.819066234948299e-06, "loss": 2.9265, "step": 309500 }, { "epoch": 0.93, "learning_rate": 3.744460526119459e-06, "loss": 2.9235, "step": 310000 }, { "epoch": 0.93, "learning_rate": 3.6698548172906197e-06, "loss": 2.9121, "step": 310500 }, { "epoch": 0.93, "learning_rate": 3.5952491084617797e-06, "loss": 2.9207, "step": 311000 }, { "epoch": 0.93, "learning_rate": 3.5206433996329397e-06, "loss": 2.9282, "step": 311500 }, { "epoch": 0.93, "learning_rate": 3.4460376908041005e-06, "loss": 2.9205, "step": 312000 }, { "epoch": 0.93, "learning_rate": 3.371431981975261e-06, "loss": 2.9228, "step": 312500 }, { "epoch": 0.93, "learning_rate": 3.2968262731464213e-06, "loss": 2.9271, "step": 313000 }, { "epoch": 0.94, "learning_rate": 3.2222205643175818e-06, "loss": 2.9108, "step": 313500 }, { "epoch": 0.94, "learning_rate": 3.1476148554887426e-06, "loss": 2.9248, "step": 314000 }, { "epoch": 0.94, "learning_rate": 3.0730091466599026e-06, "loss": 2.9111, "step": 314500 }, { "epoch": 0.94, "learning_rate": 2.998403437831063e-06, "loss": 2.9206, "step": 315000 }, { "epoch": 0.94, "learning_rate": 2.9237977290022234e-06, "loss": 2.9175, "step": 315500 }, { "epoch": 0.94, "learning_rate": 2.8491920201733834e-06, "loss": 2.9326, "step": 316000 }, { "epoch": 0.94, "learning_rate": 2.7745863113445443e-06, "loss": 2.9216, "step": 316500 }, { "epoch": 0.95, "learning_rate": 2.6999806025157047e-06, "loss": 2.9272, "step": 317000 }, { "epoch": 0.95, "learning_rate": 2.625374893686865e-06, "loss": 2.903, "step": 317500 }, { "epoch": 0.95, "learning_rate": 2.5507691848580256e-06, "loss": 2.8962, "step": 318000 }, { "epoch": 0.95, "learning_rate": 2.476163476029186e-06, "loss": 2.9097, "step": 318500 }, { "epoch": 0.95, "learning_rate": 2.4015577672003464e-06, "loss": 2.9369, "step": 319000 }, { "epoch": 0.95, "learning_rate": 2.3269520583715064e-06, "loss": 2.9132, "step": 319500 }, { "epoch": 0.95, "learning_rate": 2.252346349542667e-06, "loss": 2.9185, "step": 320000 } ], "max_steps": 335095, "num_train_epochs": 1, "total_flos": 2.1086164784256e+16, "trial_name": null, "trial_params": null }