{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.28888591681155545, "global_step": 108000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9973251303998932e-05, "loss": 3.5144, "step": 500 }, { "epoch": 0.0, "learning_rate": 1.9946502607997863e-05, "loss": 2.2536, "step": 1000 }, { "epoch": 0.0, "learning_rate": 1.991975391199679e-05, "loss": 2.0076, "step": 1500 }, { "epoch": 0.01, "learning_rate": 1.9893005215995724e-05, "loss": 1.77, "step": 2000 }, { "epoch": 0.01, "learning_rate": 1.986625651999465e-05, "loss": 1.6057, "step": 2500 }, { "epoch": 0.01, "learning_rate": 1.983950782399358e-05, "loss": 1.4921, "step": 3000 }, { "epoch": 0.01, "learning_rate": 1.981275912799251e-05, "loss": 1.4723, "step": 3500 }, { "epoch": 0.01, "learning_rate": 1.9786010431991442e-05, "loss": 1.3909, "step": 4000 }, { "epoch": 0.01, "learning_rate": 1.9759261735990373e-05, "loss": 1.3073, "step": 4500 }, { "epoch": 0.01, "learning_rate": 1.9732513039989303e-05, "loss": 1.1996, "step": 5000 }, { "epoch": 0.01, "learning_rate": 1.970576434398823e-05, "loss": 1.1979, "step": 5500 }, { "epoch": 0.02, "learning_rate": 1.9679015647987164e-05, "loss": 1.1541, "step": 6000 }, { "epoch": 0.02, "learning_rate": 1.965226695198609e-05, "loss": 1.1183, "step": 6500 }, { "epoch": 0.02, "learning_rate": 1.962551825598502e-05, "loss": 1.0842, "step": 7000 }, { "epoch": 0.02, "learning_rate": 1.9598769559983952e-05, "loss": 1.0402, "step": 7500 }, { "epoch": 0.02, "learning_rate": 1.9572020863982883e-05, "loss": 1.0074, "step": 8000 }, { "epoch": 0.02, "learning_rate": 1.9545272167981813e-05, "loss": 1.0338, "step": 8500 }, { "epoch": 0.02, "learning_rate": 1.951852347198074e-05, "loss": 0.9924, "step": 9000 }, { "epoch": 0.03, "learning_rate": 1.9491774775979674e-05, "loss": 0.9631, "step": 9500 }, { "epoch": 0.03, "learning_rate": 1.94650260799786e-05, "loss": 0.9394, "step": 10000 }, { "epoch": 0.03, "learning_rate": 1.943827738397753e-05, "loss": 0.9232, "step": 10500 }, { "epoch": 0.03, "learning_rate": 1.9411528687976462e-05, "loss": 0.8787, "step": 11000 }, { "epoch": 0.03, "learning_rate": 1.9384779991975393e-05, "loss": 0.8995, "step": 11500 }, { "epoch": 0.03, "learning_rate": 1.9358031295974323e-05, "loss": 0.8935, "step": 12000 }, { "epoch": 0.03, "learning_rate": 1.9331282599973254e-05, "loss": 0.8847, "step": 12500 }, { "epoch": 0.03, "learning_rate": 1.9304533903972184e-05, "loss": 0.8572, "step": 13000 }, { "epoch": 0.04, "learning_rate": 1.9277785207971115e-05, "loss": 0.8075, "step": 13500 }, { "epoch": 0.04, "learning_rate": 1.925103651197004e-05, "loss": 0.8435, "step": 14000 }, { "epoch": 0.04, "learning_rate": 1.9224287815968975e-05, "loss": 0.8183, "step": 14500 }, { "epoch": 0.04, "learning_rate": 1.9197539119967903e-05, "loss": 0.8101, "step": 15000 }, { "epoch": 0.04, "learning_rate": 1.9170790423966833e-05, "loss": 0.7885, "step": 15500 }, { "epoch": 0.04, "learning_rate": 1.9144041727965764e-05, "loss": 0.7753, "step": 16000 }, { "epoch": 0.04, "learning_rate": 1.9117293031964694e-05, "loss": 0.797, "step": 16500 }, { "epoch": 0.05, "learning_rate": 1.9090544335963624e-05, "loss": 0.784, "step": 17000 }, { "epoch": 0.05, "learning_rate": 1.906379563996255e-05, "loss": 0.7849, "step": 17500 }, { "epoch": 0.05, "learning_rate": 1.9037046943961482e-05, "loss": 0.7358, "step": 18000 }, { "epoch": 0.05, "learning_rate": 1.9010298247960413e-05, "loss": 0.7414, "step": 18500 }, { "epoch": 0.05, "learning_rate": 1.8983549551959343e-05, "loss": 0.7606, "step": 19000 }, { "epoch": 0.05, "learning_rate": 1.8956800855958274e-05, "loss": 0.7404, "step": 19500 }, { "epoch": 0.05, "learning_rate": 1.8930052159957204e-05, "loss": 0.7261, "step": 20000 }, { "epoch": 0.05, "learning_rate": 1.8903303463956134e-05, "loss": 0.7436, "step": 20500 }, { "epoch": 0.06, "learning_rate": 1.8876554767955065e-05, "loss": 0.7183, "step": 21000 }, { "epoch": 0.06, "learning_rate": 1.8849806071953992e-05, "loss": 0.7043, "step": 21500 }, { "epoch": 0.06, "learning_rate": 1.8823057375952926e-05, "loss": 0.7424, "step": 22000 }, { "epoch": 0.06, "learning_rate": 1.8796308679951853e-05, "loss": 0.7286, "step": 22500 }, { "epoch": 0.06, "learning_rate": 1.8769559983950784e-05, "loss": 0.7385, "step": 23000 }, { "epoch": 0.06, "learning_rate": 1.8742811287949714e-05, "loss": 0.7158, "step": 23500 }, { "epoch": 0.06, "learning_rate": 1.8716062591948644e-05, "loss": 0.7042, "step": 24000 }, { "epoch": 0.07, "learning_rate": 1.8689313895947575e-05, "loss": 0.6892, "step": 24500 }, { "epoch": 0.07, "learning_rate": 1.8662565199946505e-05, "loss": 0.729, "step": 25000 }, { "epoch": 0.07, "learning_rate": 1.8635816503945433e-05, "loss": 0.6994, "step": 25500 }, { "epoch": 0.07, "learning_rate": 1.8609067807944363e-05, "loss": 0.6768, "step": 26000 }, { "epoch": 0.07, "learning_rate": 1.8582319111943294e-05, "loss": 0.6858, "step": 26500 }, { "epoch": 0.07, "learning_rate": 1.8555570415942224e-05, "loss": 0.693, "step": 27000 }, { "epoch": 0.07, "learning_rate": 1.8528821719941154e-05, "loss": 0.6863, "step": 27500 }, { "epoch": 0.07, "learning_rate": 1.8502073023940085e-05, "loss": 0.6703, "step": 28000 }, { "epoch": 0.08, "learning_rate": 1.8475324327939015e-05, "loss": 0.6785, "step": 28500 }, { "epoch": 0.08, "learning_rate": 1.8448575631937943e-05, "loss": 0.6847, "step": 29000 }, { "epoch": 0.08, "learning_rate": 1.8421826935936876e-05, "loss": 0.6583, "step": 29500 }, { "epoch": 0.08, "learning_rate": 1.8395078239935803e-05, "loss": 0.6659, "step": 30000 }, { "epoch": 0.08, "learning_rate": 1.8368329543934734e-05, "loss": 0.7055, "step": 30500 }, { "epoch": 0.08, "learning_rate": 1.8341580847933664e-05, "loss": 0.6751, "step": 31000 }, { "epoch": 0.08, "learning_rate": 1.8314832151932595e-05, "loss": 0.6628, "step": 31500 }, { "epoch": 0.09, "learning_rate": 1.8288083455931525e-05, "loss": 0.6568, "step": 32000 }, { "epoch": 0.09, "learning_rate": 1.8261334759930456e-05, "loss": 0.694, "step": 32500 }, { "epoch": 0.09, "learning_rate": 1.8234586063929386e-05, "loss": 0.663, "step": 33000 }, { "epoch": 0.09, "learning_rate": 1.8207837367928317e-05, "loss": 0.6523, "step": 33500 }, { "epoch": 0.09, "learning_rate": 1.8181088671927244e-05, "loss": 0.6485, "step": 34000 }, { "epoch": 0.09, "learning_rate": 1.8154339975926174e-05, "loss": 0.6472, "step": 34500 }, { "epoch": 0.09, "learning_rate": 1.8127591279925105e-05, "loss": 0.6857, "step": 35000 }, { "epoch": 0.09, "learning_rate": 1.8100842583924035e-05, "loss": 0.6471, "step": 35500 }, { "epoch": 0.1, "learning_rate": 1.8074093887922966e-05, "loss": 0.6687, "step": 36000 }, { "epoch": 0.1, "learning_rate": 1.8047345191921893e-05, "loss": 0.6438, "step": 36500 }, { "epoch": 0.1, "learning_rate": 1.8020596495920827e-05, "loss": 0.6579, "step": 37000 }, { "epoch": 0.1, "learning_rate": 1.7993847799919754e-05, "loss": 0.657, "step": 37500 }, { "epoch": 0.1, "learning_rate": 1.7967099103918684e-05, "loss": 0.6459, "step": 38000 }, { "epoch": 0.1, "learning_rate": 1.7940350407917615e-05, "loss": 0.644, "step": 38500 }, { "epoch": 0.1, "learning_rate": 1.7913601711916545e-05, "loss": 0.6419, "step": 39000 }, { "epoch": 0.11, "learning_rate": 1.7886853015915476e-05, "loss": 0.6392, "step": 39500 }, { "epoch": 0.11, "learning_rate": 1.7860104319914406e-05, "loss": 0.6446, "step": 40000 }, { "epoch": 0.11, "learning_rate": 1.7833355623913337e-05, "loss": 0.6546, "step": 40500 }, { "epoch": 0.11, "learning_rate": 1.7806606927912267e-05, "loss": 0.6212, "step": 41000 }, { "epoch": 0.11, "learning_rate": 1.7779858231911194e-05, "loss": 0.6387, "step": 41500 }, { "epoch": 0.11, "learning_rate": 1.7753109535910128e-05, "loss": 0.6368, "step": 42000 }, { "epoch": 0.11, "learning_rate": 1.7726360839909055e-05, "loss": 0.6526, "step": 42500 }, { "epoch": 0.12, "learning_rate": 1.7699612143907986e-05, "loss": 0.6342, "step": 43000 }, { "epoch": 0.12, "learning_rate": 1.7672863447906916e-05, "loss": 0.6234, "step": 43500 }, { "epoch": 0.12, "learning_rate": 1.7646114751905843e-05, "loss": 0.6028, "step": 44000 }, { "epoch": 0.12, "learning_rate": 1.7619366055904777e-05, "loss": 0.6556, "step": 44500 }, { "epoch": 0.12, "learning_rate": 1.7592617359903704e-05, "loss": 0.625, "step": 45000 }, { "epoch": 0.12, "learning_rate": 1.7565868663902635e-05, "loss": 0.6198, "step": 45500 }, { "epoch": 0.12, "learning_rate": 1.7539119967901565e-05, "loss": 0.6538, "step": 46000 }, { "epoch": 0.12, "learning_rate": 1.7512371271900496e-05, "loss": 0.6265, "step": 46500 }, { "epoch": 0.13, "learning_rate": 1.7485622575899426e-05, "loss": 0.6375, "step": 47000 }, { "epoch": 0.13, "learning_rate": 1.7458873879898357e-05, "loss": 0.6325, "step": 47500 }, { "epoch": 0.13, "learning_rate": 1.7432125183897287e-05, "loss": 0.6258, "step": 48000 }, { "epoch": 0.13, "learning_rate": 1.7405376487896218e-05, "loss": 0.6235, "step": 48500 }, { "epoch": 0.13, "learning_rate": 1.7378627791895145e-05, "loss": 0.6386, "step": 49000 }, { "epoch": 0.13, "learning_rate": 1.735187909589408e-05, "loss": 0.6243, "step": 49500 }, { "epoch": 0.13, "learning_rate": 1.7325130399893006e-05, "loss": 0.6471, "step": 50000 }, { "epoch": 0.14, "learning_rate": 1.7298381703891936e-05, "loss": 0.6396, "step": 50500 }, { "epoch": 0.14, "learning_rate": 1.7271633007890867e-05, "loss": 0.6164, "step": 51000 }, { "epoch": 0.14, "learning_rate": 1.7244884311889797e-05, "loss": 0.5981, "step": 51500 }, { "epoch": 0.14, "learning_rate": 1.7218135615888728e-05, "loss": 0.6505, "step": 52000 }, { "epoch": 0.14, "learning_rate": 1.7191386919887655e-05, "loss": 0.651, "step": 52500 }, { "epoch": 0.14, "learning_rate": 1.716463822388659e-05, "loss": 0.629, "step": 53000 }, { "epoch": 0.14, "learning_rate": 1.7137889527885516e-05, "loss": 0.6393, "step": 53500 }, { "epoch": 0.14, "learning_rate": 1.7111140831884446e-05, "loss": 0.6168, "step": 54000 }, { "epoch": 0.15, "learning_rate": 1.7084392135883377e-05, "loss": 0.6253, "step": 54500 }, { "epoch": 0.15, "learning_rate": 1.7057643439882307e-05, "loss": 0.625, "step": 55000 }, { "epoch": 0.15, "learning_rate": 1.7030894743881238e-05, "loss": 0.6131, "step": 55500 }, { "epoch": 0.15, "learning_rate": 1.7004146047880168e-05, "loss": 0.6018, "step": 56000 }, { "epoch": 0.15, "learning_rate": 1.6977397351879095e-05, "loss": 0.6389, "step": 56500 }, { "epoch": 0.15, "learning_rate": 1.695064865587803e-05, "loss": 0.6068, "step": 57000 }, { "epoch": 0.15, "learning_rate": 1.6923899959876956e-05, "loss": 0.6238, "step": 57500 }, { "epoch": 0.16, "learning_rate": 1.6897151263875887e-05, "loss": 0.6017, "step": 58000 }, { "epoch": 0.16, "learning_rate": 1.6870402567874817e-05, "loss": 0.6052, "step": 58500 }, { "epoch": 0.16, "learning_rate": 1.6843653871873748e-05, "loss": 0.6021, "step": 59000 }, { "epoch": 0.16, "learning_rate": 1.6816905175872678e-05, "loss": 0.6168, "step": 59500 }, { "epoch": 0.16, "learning_rate": 1.6790156479871605e-05, "loss": 0.6086, "step": 60000 }, { "epoch": 0.16, "learning_rate": 1.676340778387054e-05, "loss": 0.5905, "step": 60500 }, { "epoch": 0.16, "learning_rate": 1.6736659087869466e-05, "loss": 0.6268, "step": 61000 }, { "epoch": 0.16, "learning_rate": 1.6709910391868397e-05, "loss": 0.626, "step": 61500 }, { "epoch": 0.17, "learning_rate": 1.6683161695867327e-05, "loss": 0.631, "step": 62000 }, { "epoch": 0.17, "learning_rate": 1.6656412999866258e-05, "loss": 0.5936, "step": 62500 }, { "epoch": 0.17, "learning_rate": 1.6629664303865188e-05, "loss": 0.6137, "step": 63000 }, { "epoch": 0.17, "learning_rate": 1.660291560786412e-05, "loss": 0.6185, "step": 63500 }, { "epoch": 0.17, "learning_rate": 1.6576166911863046e-05, "loss": 0.6009, "step": 64000 }, { "epoch": 0.17, "learning_rate": 1.654941821586198e-05, "loss": 0.6038, "step": 64500 }, { "epoch": 0.17, "learning_rate": 1.6522669519860907e-05, "loss": 0.6081, "step": 65000 }, { "epoch": 0.18, "learning_rate": 1.6495920823859837e-05, "loss": 0.6032, "step": 65500 }, { "epoch": 0.18, "learning_rate": 1.6469172127858768e-05, "loss": 0.6166, "step": 66000 }, { "epoch": 0.18, "learning_rate": 1.6442423431857698e-05, "loss": 0.6125, "step": 66500 }, { "epoch": 0.18, "learning_rate": 1.641567473585663e-05, "loss": 0.5948, "step": 67000 }, { "epoch": 0.18, "learning_rate": 1.638892603985556e-05, "loss": 0.5957, "step": 67500 }, { "epoch": 0.18, "learning_rate": 1.636217734385449e-05, "loss": 0.5965, "step": 68000 }, { "epoch": 0.18, "learning_rate": 1.6335428647853417e-05, "loss": 0.5997, "step": 68500 }, { "epoch": 0.18, "learning_rate": 1.6308679951852347e-05, "loss": 0.5736, "step": 69000 }, { "epoch": 0.19, "learning_rate": 1.6281931255851278e-05, "loss": 0.6116, "step": 69500 }, { "epoch": 0.19, "learning_rate": 1.6255182559850208e-05, "loss": 0.6081, "step": 70000 }, { "epoch": 0.19, "learning_rate": 1.622843386384914e-05, "loss": 0.5891, "step": 70500 }, { "epoch": 0.19, "learning_rate": 1.620168516784807e-05, "loss": 0.5942, "step": 71000 }, { "epoch": 0.19, "learning_rate": 1.6174936471847e-05, "loss": 0.6041, "step": 71500 }, { "epoch": 0.19, "learning_rate": 1.614818777584593e-05, "loss": 0.5901, "step": 72000 }, { "epoch": 0.19, "learning_rate": 1.6121439079844857e-05, "loss": 0.5926, "step": 72500 }, { "epoch": 0.2, "learning_rate": 1.609469038384379e-05, "loss": 0.6103, "step": 73000 }, { "epoch": 0.2, "learning_rate": 1.6067941687842718e-05, "loss": 0.6269, "step": 73500 }, { "epoch": 0.2, "learning_rate": 1.604119299184165e-05, "loss": 0.5939, "step": 74000 }, { "epoch": 0.2, "learning_rate": 1.601444429584058e-05, "loss": 0.6014, "step": 74500 }, { "epoch": 0.2, "learning_rate": 1.598769559983951e-05, "loss": 0.5945, "step": 75000 }, { "epoch": 0.2, "learning_rate": 1.596094690383844e-05, "loss": 0.6079, "step": 75500 }, { "epoch": 0.2, "learning_rate": 1.593419820783737e-05, "loss": 0.5995, "step": 76000 }, { "epoch": 0.2, "learning_rate": 1.5907449511836298e-05, "loss": 0.5761, "step": 76500 }, { "epoch": 0.21, "learning_rate": 1.5880700815835228e-05, "loss": 0.6073, "step": 77000 }, { "epoch": 0.21, "learning_rate": 1.585395211983416e-05, "loss": 0.6005, "step": 77500 }, { "epoch": 0.21, "learning_rate": 1.582720342383309e-05, "loss": 0.6059, "step": 78000 }, { "epoch": 0.21, "learning_rate": 1.580045472783202e-05, "loss": 0.5678, "step": 78500 }, { "epoch": 0.21, "learning_rate": 1.577370603183095e-05, "loss": 0.6119, "step": 79000 }, { "epoch": 0.21, "learning_rate": 1.574695733582988e-05, "loss": 0.5892, "step": 79500 }, { "epoch": 0.21, "learning_rate": 1.5720208639828808e-05, "loss": 0.5889, "step": 80000 }, { "epoch": 0.22, "learning_rate": 1.569345994382774e-05, "loss": 0.5922, "step": 80500 }, { "epoch": 0.22, "learning_rate": 1.566671124782667e-05, "loss": 0.582, "step": 81000 }, { "epoch": 0.22, "learning_rate": 1.56399625518256e-05, "loss": 0.5695, "step": 81500 }, { "epoch": 0.22, "learning_rate": 1.561321385582453e-05, "loss": 0.5804, "step": 82000 }, { "epoch": 0.22, "learning_rate": 1.558646515982346e-05, "loss": 0.5809, "step": 82500 }, { "epoch": 0.22, "learning_rate": 1.555971646382239e-05, "loss": 0.5828, "step": 83000 }, { "epoch": 0.22, "learning_rate": 1.553296776782132e-05, "loss": 0.5754, "step": 83500 }, { "epoch": 0.22, "learning_rate": 1.5506219071820248e-05, "loss": 0.5772, "step": 84000 }, { "epoch": 0.23, "learning_rate": 1.5479470375819182e-05, "loss": 0.5668, "step": 84500 }, { "epoch": 0.23, "learning_rate": 1.545272167981811e-05, "loss": 0.5886, "step": 85000 }, { "epoch": 0.23, "learning_rate": 1.542597298381704e-05, "loss": 0.5698, "step": 85500 }, { "epoch": 0.23, "learning_rate": 1.539922428781597e-05, "loss": 0.5891, "step": 86000 }, { "epoch": 0.23, "learning_rate": 1.53724755918149e-05, "loss": 0.5818, "step": 86500 }, { "epoch": 0.23, "learning_rate": 1.534572689581383e-05, "loss": 0.58, "step": 87000 }, { "epoch": 0.23, "learning_rate": 1.5318978199812758e-05, "loss": 0.5969, "step": 87500 }, { "epoch": 0.24, "learning_rate": 1.5292229503811692e-05, "loss": 0.579, "step": 88000 }, { "epoch": 0.24, "learning_rate": 1.526548080781062e-05, "loss": 0.5803, "step": 88500 }, { "epoch": 0.24, "learning_rate": 1.5238732111809551e-05, "loss": 0.5756, "step": 89000 }, { "epoch": 0.24, "learning_rate": 1.521198341580848e-05, "loss": 0.5794, "step": 89500 }, { "epoch": 0.24, "learning_rate": 1.518523471980741e-05, "loss": 0.5718, "step": 90000 }, { "epoch": 0.24, "learning_rate": 1.5158486023806341e-05, "loss": 0.5849, "step": 90500 }, { "epoch": 0.24, "learning_rate": 1.5131737327805272e-05, "loss": 0.5704, "step": 91000 }, { "epoch": 0.24, "learning_rate": 1.51049886318042e-05, "loss": 0.5817, "step": 91500 }, { "epoch": 0.25, "learning_rate": 1.5078239935803133e-05, "loss": 0.5716, "step": 92000 }, { "epoch": 0.25, "learning_rate": 1.5051491239802061e-05, "loss": 0.5957, "step": 92500 }, { "epoch": 0.25, "learning_rate": 1.5024742543800992e-05, "loss": 0.5686, "step": 93000 }, { "epoch": 0.25, "learning_rate": 1.499799384779992e-05, "loss": 0.5764, "step": 93500 }, { "epoch": 0.25, "learning_rate": 1.497124515179885e-05, "loss": 0.5837, "step": 94000 }, { "epoch": 0.25, "learning_rate": 1.4944496455797782e-05, "loss": 0.5668, "step": 94500 }, { "epoch": 0.25, "learning_rate": 1.491774775979671e-05, "loss": 0.579, "step": 95000 }, { "epoch": 0.26, "learning_rate": 1.489099906379564e-05, "loss": 0.5738, "step": 95500 }, { "epoch": 0.26, "learning_rate": 1.486425036779457e-05, "loss": 0.5675, "step": 96000 }, { "epoch": 0.26, "learning_rate": 1.4837501671793502e-05, "loss": 0.5844, "step": 96500 }, { "epoch": 0.26, "learning_rate": 1.481075297579243e-05, "loss": 0.5907, "step": 97000 }, { "epoch": 0.26, "learning_rate": 1.4784004279791361e-05, "loss": 0.5837, "step": 97500 }, { "epoch": 0.26, "learning_rate": 1.4757255583790292e-05, "loss": 0.5634, "step": 98000 }, { "epoch": 0.26, "learning_rate": 1.4730506887789222e-05, "loss": 0.5743, "step": 98500 }, { "epoch": 0.26, "learning_rate": 1.470375819178815e-05, "loss": 0.5821, "step": 99000 }, { "epoch": 0.27, "learning_rate": 1.4677009495787083e-05, "loss": 0.5779, "step": 99500 }, { "epoch": 0.27, "learning_rate": 1.4650260799786012e-05, "loss": 0.569, "step": 100000 }, { "epoch": 0.27, "learning_rate": 1.4623512103784942e-05, "loss": 0.5907, "step": 100500 }, { "epoch": 0.27, "learning_rate": 1.4596763407783871e-05, "loss": 0.5851, "step": 101000 }, { "epoch": 0.27, "learning_rate": 1.4570014711782803e-05, "loss": 0.5933, "step": 101500 }, { "epoch": 0.27, "learning_rate": 1.4543266015781732e-05, "loss": 0.5823, "step": 102000 }, { "epoch": 0.27, "learning_rate": 1.451651731978066e-05, "loss": 0.5701, "step": 102500 }, { "epoch": 0.28, "learning_rate": 1.4489768623779591e-05, "loss": 0.5872, "step": 103000 }, { "epoch": 0.28, "learning_rate": 1.4463019927778522e-05, "loss": 0.5675, "step": 103500 }, { "epoch": 0.28, "learning_rate": 1.4436271231777452e-05, "loss": 0.5736, "step": 104000 }, { "epoch": 0.28, "learning_rate": 1.4409522535776381e-05, "loss": 0.5574, "step": 104500 }, { "epoch": 0.28, "learning_rate": 1.4382773839775313e-05, "loss": 0.5631, "step": 105000 }, { "epoch": 0.28, "learning_rate": 1.4356025143774242e-05, "loss": 0.5662, "step": 105500 }, { "epoch": 0.28, "learning_rate": 1.4329276447773172e-05, "loss": 0.5702, "step": 106000 }, { "epoch": 0.28, "learning_rate": 1.4302527751772101e-05, "loss": 0.5507, "step": 106500 }, { "epoch": 0.29, "learning_rate": 1.4275779055771033e-05, "loss": 0.5718, "step": 107000 }, { "epoch": 0.29, "learning_rate": 1.4249030359769962e-05, "loss": 0.5545, "step": 107500 }, { "epoch": 0.29, "learning_rate": 1.4222281663768893e-05, "loss": 0.5699, "step": 108000 } ], "max_steps": 373850, "num_train_epochs": 1, "total_flos": 4.308854213121552e+16, "trial_name": null, "trial_params": null }