|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.793966261045491, |
|
"global_step": 187816, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9950413059216727e-05, |
|
"loss": 14.0627, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9900826118433453e-05, |
|
"loss": 6.3799, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9851239177650176e-05, |
|
"loss": 5.31, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9801652236866898e-05, |
|
"loss": 4.9781, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9752065296083624e-05, |
|
"loss": 4.7392, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.970247835530035e-05, |
|
"loss": 4.5779, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9652891414517075e-05, |
|
"loss": 4.4691, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.96033044737338e-05, |
|
"loss": 4.3745, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9553717532950524e-05, |
|
"loss": 4.2883, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9504130592167246e-05, |
|
"loss": 4.2342, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9454543651383972e-05, |
|
"loss": 4.1614, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9404956710600698e-05, |
|
"loss": 4.1279, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9355369769817423e-05, |
|
"loss": 4.0802, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.930578282903415e-05, |
|
"loss": 4.0298, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.925619588825087e-05, |
|
"loss": 3.9697, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9206608947467594e-05, |
|
"loss": 3.9584, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.915702200668432e-05, |
|
"loss": 3.9196, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9107435065901046e-05, |
|
"loss": 3.9081, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.905784812511777e-05, |
|
"loss": 3.8419, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9008261184334497e-05, |
|
"loss": 3.8363, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.895867424355122e-05, |
|
"loss": 3.8047, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8909087302767945e-05, |
|
"loss": 3.7728, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8859500361984668e-05, |
|
"loss": 3.7731, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8809913421201393e-05, |
|
"loss": 3.7408, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.876032648041812e-05, |
|
"loss": 3.7027, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8710739539634845e-05, |
|
"loss": 3.6865, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8661152598851567e-05, |
|
"loss": 3.6456, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8611565658068293e-05, |
|
"loss": 3.6539, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8561978717285016e-05, |
|
"loss": 3.6222, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.851239177650174e-05, |
|
"loss": 3.6127, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8462804835718467e-05, |
|
"loss": 3.6133, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8413217894935193e-05, |
|
"loss": 3.5863, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8363630954151915e-05, |
|
"loss": 3.5669, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.831404401336864e-05, |
|
"loss": 3.5518, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8264457072585367e-05, |
|
"loss": 3.5368, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.821487013180209e-05, |
|
"loss": 3.5294, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8165283191018815e-05, |
|
"loss": 3.5097, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.811569625023554e-05, |
|
"loss": 3.5198, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8066109309452263e-05, |
|
"loss": 3.4702, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.801652236866899e-05, |
|
"loss": 3.485, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7966935427885715e-05, |
|
"loss": 3.4853, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7917348487102437e-05, |
|
"loss": 3.4395, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.7867761546319163e-05, |
|
"loss": 3.4515, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.781817460553589e-05, |
|
"loss": 3.4307, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.776858766475261e-05, |
|
"loss": 3.4343, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7719000723969337e-05, |
|
"loss": 3.4053, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.7669413783186063e-05, |
|
"loss": 3.4008, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7619826842402785e-05, |
|
"loss": 3.3951, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.757023990161951e-05, |
|
"loss": 3.3871, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7520652960836234e-05, |
|
"loss": 3.3822, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.747106602005296e-05, |
|
"loss": 3.3816, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7421479079269685e-05, |
|
"loss": 3.3759, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.737189213848641e-05, |
|
"loss": 3.3624, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7322305197703137e-05, |
|
"loss": 3.3535, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.727271825691986e-05, |
|
"loss": 3.3366, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.722313131613658e-05, |
|
"loss": 3.3245, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7173544375353307e-05, |
|
"loss": 3.3575, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7123957434570033e-05, |
|
"loss": 3.3133, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.707437049378676e-05, |
|
"loss": 3.3124, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7024783553003485e-05, |
|
"loss": 3.3295, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6975196612220207e-05, |
|
"loss": 3.3192, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.692560967143693e-05, |
|
"loss": 3.3241, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6876022730653655e-05, |
|
"loss": 3.2989, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.682643578987038e-05, |
|
"loss": 3.2956, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6776848849087107e-05, |
|
"loss": 3.2889, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6727261908303833e-05, |
|
"loss": 3.2934, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.6677674967520555e-05, |
|
"loss": 3.2642, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6628088026737277e-05, |
|
"loss": 3.2513, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6578501085954003e-05, |
|
"loss": 3.2584, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.652891414517073e-05, |
|
"loss": 3.2576, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6479327204387455e-05, |
|
"loss": 3.2532, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.642974026360418e-05, |
|
"loss": 3.2349, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6380153322820903e-05, |
|
"loss": 3.2349, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6330566382037625e-05, |
|
"loss": 3.2158, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.628097944125435e-05, |
|
"loss": 3.2309, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6231392500471077e-05, |
|
"loss": 3.2227, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6181805559687803e-05, |
|
"loss": 3.2134, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.613221861890453e-05, |
|
"loss": 3.2206, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.608263167812125e-05, |
|
"loss": 3.2002, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6033044737337973e-05, |
|
"loss": 3.1988, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.59834577965547e-05, |
|
"loss": 3.2081, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5933870855771425e-05, |
|
"loss": 3.1891, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.588428391498815e-05, |
|
"loss": 3.2007, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5834696974204877e-05, |
|
"loss": 3.1948, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.57851100334216e-05, |
|
"loss": 3.1673, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5735523092638325e-05, |
|
"loss": 3.158, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5685936151855047e-05, |
|
"loss": 3.1561, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5636349211071773e-05, |
|
"loss": 3.1734, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.55867622702885e-05, |
|
"loss": 3.1401, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5537175329505225e-05, |
|
"loss": 3.1463, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5487588388721947e-05, |
|
"loss": 3.1431, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5438001447938673e-05, |
|
"loss": 3.1316, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5388414507155395e-05, |
|
"loss": 3.1606, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.533882756637212e-05, |
|
"loss": 3.1362, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5289240625588847e-05, |
|
"loss": 3.1335, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.523965368480557e-05, |
|
"loss": 3.149, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5190066744022297e-05, |
|
"loss": 3.1293, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.514047980323902e-05, |
|
"loss": 3.1286, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5090892862455743e-05, |
|
"loss": 3.1196, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5041305921672469e-05, |
|
"loss": 3.1238, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4991718980889195e-05, |
|
"loss": 3.1033, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4942132040105919e-05, |
|
"loss": 3.1112, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4892545099322645e-05, |
|
"loss": 3.0936, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4842958158539369e-05, |
|
"loss": 3.107, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4793371217756094e-05, |
|
"loss": 3.1063, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4743784276972817e-05, |
|
"loss": 3.0639, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4694197336189543e-05, |
|
"loss": 3.1028, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4644610395406267e-05, |
|
"loss": 3.0821, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4595023454622992e-05, |
|
"loss": 3.0596, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4545436513839717e-05, |
|
"loss": 3.0787, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4495849573056442e-05, |
|
"loss": 3.0755, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4446262632273165e-05, |
|
"loss": 3.066, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.439667569148989e-05, |
|
"loss": 3.0695, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4347088750706615e-05, |
|
"loss": 3.059, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.429750180992334e-05, |
|
"loss": 3.0628, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4247914869140065e-05, |
|
"loss": 3.0733, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.419832792835679e-05, |
|
"loss": 3.0591, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4148740987573514e-05, |
|
"loss": 3.0468, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4099154046790237e-05, |
|
"loss": 3.0265, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4049567106006963e-05, |
|
"loss": 3.0282, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3999980165223688e-05, |
|
"loss": 3.0222, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3950393224440413e-05, |
|
"loss": 3.0275, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3900806283657138e-05, |
|
"loss": 3.0277, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3851219342873862e-05, |
|
"loss": 3.0551, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3801632402090585e-05, |
|
"loss": 3.0205, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.375204546130731e-05, |
|
"loss": 3.023, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3702458520524036e-05, |
|
"loss": 3.0244, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.365287157974076e-05, |
|
"loss": 3.0116, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3603284638957486e-05, |
|
"loss": 3.0141, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.355369769817421e-05, |
|
"loss": 3.0284, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3504110757390933e-05, |
|
"loss": 3.0236, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3454523816607659e-05, |
|
"loss": 3.013, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3404936875824384e-05, |
|
"loss": 3.0027, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3355349935041108e-05, |
|
"loss": 3.0155, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 11.298551127218651, |
|
"eval_loss": 2.3749005794525146, |
|
"eval_runtime": 4929.9601, |
|
"eval_samples_per_second": 8.201, |
|
"eval_steps_per_second": 0.513, |
|
"step": 67222 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3305762994257834e-05, |
|
"loss": 3.0195, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3256176053474558e-05, |
|
"loss": 2.9924, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3206589112691284e-05, |
|
"loss": 2.997, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3157002171908007e-05, |
|
"loss": 2.9694, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3107415231124732e-05, |
|
"loss": 2.9804, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3057828290341456e-05, |
|
"loss": 2.9879, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.3008241349558182e-05, |
|
"loss": 2.9919, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2958654408774906e-05, |
|
"loss": 2.9875, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2909067467991632e-05, |
|
"loss": 2.9912, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.2859480527208354e-05, |
|
"loss": 2.974, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.280989358642508e-05, |
|
"loss": 2.9581, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2760306645641804e-05, |
|
"loss": 2.975, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.271071970485853e-05, |
|
"loss": 2.9737, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2661132764075254e-05, |
|
"loss": 2.9722, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.261154582329198e-05, |
|
"loss": 2.9727, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2561958882508702e-05, |
|
"loss": 2.9618, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2512371941725428e-05, |
|
"loss": 2.9554, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2462785000942152e-05, |
|
"loss": 2.961, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2413198060158878e-05, |
|
"loss": 2.9627, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2363611119375602e-05, |
|
"loss": 2.9896, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2314024178592328e-05, |
|
"loss": 2.9433, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2264437237809052e-05, |
|
"loss": 2.9329, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2214850297025776e-05, |
|
"loss": 2.9552, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.21652633562425e-05, |
|
"loss": 2.9382, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2115676415459226e-05, |
|
"loss": 2.9629, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.206608947467595e-05, |
|
"loss": 2.9555, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.2016502533892676e-05, |
|
"loss": 2.9364, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.19669155931094e-05, |
|
"loss": 2.9296, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1917328652326124e-05, |
|
"loss": 2.9483, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1867741711542848e-05, |
|
"loss": 2.9605, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1818154770759574e-05, |
|
"loss": 2.928, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1768567829976298e-05, |
|
"loss": 2.9216, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1718980889193024e-05, |
|
"loss": 2.9402, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1669393948409748e-05, |
|
"loss": 2.9311, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1619807007626474e-05, |
|
"loss": 2.9537, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1570220066843196e-05, |
|
"loss": 2.919, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1520633126059922e-05, |
|
"loss": 2.918, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1471046185276646e-05, |
|
"loss": 2.9339, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1421459244493372e-05, |
|
"loss": 2.9071, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1371872303710096e-05, |
|
"loss": 2.9397, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1322285362926822e-05, |
|
"loss": 2.9225, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.1272698422143544e-05, |
|
"loss": 2.9248, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.122311148136027e-05, |
|
"loss": 2.9132, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1173524540576994e-05, |
|
"loss": 2.8945, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.112393759979372e-05, |
|
"loss": 2.905, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.1074350659010444e-05, |
|
"loss": 2.9256, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.102476371822717e-05, |
|
"loss": 2.9089, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0975176777443892e-05, |
|
"loss": 2.9104, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0925589836660618e-05, |
|
"loss": 2.9226, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0876002895877342e-05, |
|
"loss": 2.902, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0826415955094068e-05, |
|
"loss": 2.8831, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0776829014310792e-05, |
|
"loss": 2.906, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0727242073527518e-05, |
|
"loss": 2.906, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0677655132744242e-05, |
|
"loss": 2.8901, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0628068191960966e-05, |
|
"loss": 2.9063, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.057848125117769e-05, |
|
"loss": 2.8765, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0528894310394416e-05, |
|
"loss": 2.9022, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.047930736961114e-05, |
|
"loss": 2.8906, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0429720428827866e-05, |
|
"loss": 2.8627, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.038013348804459e-05, |
|
"loss": 2.8789, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0330546547261314e-05, |
|
"loss": 2.8782, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0280959606478038e-05, |
|
"loss": 2.8706, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0231372665694764e-05, |
|
"loss": 2.8434, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0181785724911488e-05, |
|
"loss": 2.8851, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0132198784128214e-05, |
|
"loss": 2.8806, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0082611843344938e-05, |
|
"loss": 2.8695, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0033024902561664e-05, |
|
"loss": 2.8775, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.983437961778388e-06, |
|
"loss": 2.8717, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.933851020995112e-06, |
|
"loss": 2.8616, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.884264080211836e-06, |
|
"loss": 2.8656, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.834677139428562e-06, |
|
"loss": 2.8867, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.785090198645286e-06, |
|
"loss": 2.8491, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.73550325786201e-06, |
|
"loss": 2.8716, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.685916317078736e-06, |
|
"loss": 2.8743, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.63632937629546e-06, |
|
"loss": 2.8503, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.586742435512184e-06, |
|
"loss": 2.8625, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.53715549472891e-06, |
|
"loss": 2.8237, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.487568553945634e-06, |
|
"loss": 2.8619, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.437981613162358e-06, |
|
"loss": 2.8629, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.388394672379084e-06, |
|
"loss": 2.8441, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.338807731595808e-06, |
|
"loss": 2.8569, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.289220790812532e-06, |
|
"loss": 2.8511, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.239633850029258e-06, |
|
"loss": 2.8701, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.190046909245982e-06, |
|
"loss": 2.8572, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.140459968462706e-06, |
|
"loss": 2.8673, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.090873027679432e-06, |
|
"loss": 2.8621, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.041286086896156e-06, |
|
"loss": 2.8592, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.99169914611288e-06, |
|
"loss": 2.8582, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.942112205329606e-06, |
|
"loss": 2.8666, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.89252526454633e-06, |
|
"loss": 2.8588, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.842938323763054e-06, |
|
"loss": 2.8475, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.79335138297978e-06, |
|
"loss": 2.8357, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.743764442196504e-06, |
|
"loss": 2.8608, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.69417750141323e-06, |
|
"loss": 2.8532, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.644590560629953e-06, |
|
"loss": 2.8545, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.595003619846678e-06, |
|
"loss": 2.8277, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.545416679063403e-06, |
|
"loss": 2.8509, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.495829738280127e-06, |
|
"loss": 2.8413, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.446242797496852e-06, |
|
"loss": 2.838, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.396655856713577e-06, |
|
"loss": 2.8543, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.347068915930301e-06, |
|
"loss": 2.8347, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.297481975147026e-06, |
|
"loss": 2.8669, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.247895034363751e-06, |
|
"loss": 2.8228, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.198308093580475e-06, |
|
"loss": 2.8385, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.1487211527972e-06, |
|
"loss": 2.8257, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.099134212013925e-06, |
|
"loss": 2.8362, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.04954727123065e-06, |
|
"loss": 2.8319, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.999960330447374e-06, |
|
"loss": 2.8356, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.9503733896641e-06, |
|
"loss": 2.8199, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.900786448880823e-06, |
|
"loss": 2.8039, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.851199508097548e-06, |
|
"loss": 2.832, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.801612567314273e-06, |
|
"loss": 2.8125, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.752025626530997e-06, |
|
"loss": 2.8005, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.702438685747721e-06, |
|
"loss": 2.8402, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.652851744964447e-06, |
|
"loss": 2.8186, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.603264804181172e-06, |
|
"loss": 2.8296, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.5536778633978955e-06, |
|
"loss": 2.8193, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.50409092261462e-06, |
|
"loss": 2.8093, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.454503981831346e-06, |
|
"loss": 2.8383, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.4049170410480695e-06, |
|
"loss": 2.821, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.355330100264794e-06, |
|
"loss": 2.7976, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.30574315948152e-06, |
|
"loss": 2.8183, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.2561562186982434e-06, |
|
"loss": 2.8089, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.206569277914968e-06, |
|
"loss": 2.818, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.156982337131694e-06, |
|
"loss": 2.8052, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.107395396348419e-06, |
|
"loss": 2.8183, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.057808455565142e-06, |
|
"loss": 2.8098, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.008221514781868e-06, |
|
"loss": 2.8155, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.958634573998593e-06, |
|
"loss": 2.8074, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.909047633215316e-06, |
|
"loss": 2.7913, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.859460692432042e-06, |
|
"loss": 2.8122, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.809873751648767e-06, |
|
"loss": 2.8327, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.76028681086549e-06, |
|
"loss": 2.7897, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.710699870082215e-06, |
|
"loss": 2.7777, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 13.585366050482984, |
|
"eval_loss": 2.2518081665039062, |
|
"eval_runtime": 4182.4693, |
|
"eval_samples_per_second": 9.667, |
|
"eval_steps_per_second": 0.604, |
|
"step": 134444 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.661112929298941e-06, |
|
"loss": 2.7994, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.611525988515664e-06, |
|
"loss": 2.8167, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.561939047732389e-06, |
|
"loss": 2.8123, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.512352106949115e-06, |
|
"loss": 2.7844, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.462765166165838e-06, |
|
"loss": 2.7956, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.413178225382563e-06, |
|
"loss": 2.7968, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.363591284599289e-06, |
|
"loss": 2.7916, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.314004343816014e-06, |
|
"loss": 2.7958, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.264417403032737e-06, |
|
"loss": 2.7855, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.214830462249463e-06, |
|
"loss": 2.7876, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.165243521466188e-06, |
|
"loss": 2.7724, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.115656580682911e-06, |
|
"loss": 2.8021, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.066069639899637e-06, |
|
"loss": 2.8024, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.016482699116362e-06, |
|
"loss": 2.7891, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 5.966895758333085e-06, |
|
"loss": 2.7592, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.917308817549811e-06, |
|
"loss": 2.7895, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.867721876766536e-06, |
|
"loss": 2.8106, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.818134935983259e-06, |
|
"loss": 2.7985, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.768547995199985e-06, |
|
"loss": 2.8137, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.71896105441671e-06, |
|
"loss": 2.7824, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.669374113633433e-06, |
|
"loss": 2.7878, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.619787172850158e-06, |
|
"loss": 2.7861, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.570200232066884e-06, |
|
"loss": 2.7886, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.520613291283607e-06, |
|
"loss": 2.7798, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.471026350500332e-06, |
|
"loss": 2.8015, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.421439409717058e-06, |
|
"loss": 2.8013, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.371852468933783e-06, |
|
"loss": 2.7567, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.322265528150506e-06, |
|
"loss": 2.8004, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.272678587367232e-06, |
|
"loss": 2.7817, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.223091646583957e-06, |
|
"loss": 2.7674, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.17350470580068e-06, |
|
"loss": 2.7882, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.123917765017406e-06, |
|
"loss": 2.7695, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.074330824234131e-06, |
|
"loss": 2.7708, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.024743883450854e-06, |
|
"loss": 2.7791, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.97515694266758e-06, |
|
"loss": 2.7998, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.925570001884304e-06, |
|
"loss": 2.7546, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.875983061101029e-06, |
|
"loss": 2.7576, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.826396120317754e-06, |
|
"loss": 2.7917, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.776809179534479e-06, |
|
"loss": 2.7828, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.727222238751203e-06, |
|
"loss": 2.7884, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.677635297967928e-06, |
|
"loss": 2.7807, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.628048357184653e-06, |
|
"loss": 2.7652, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.578461416401377e-06, |
|
"loss": 2.7918, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.528874475618102e-06, |
|
"loss": 2.7771, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.479287534834827e-06, |
|
"loss": 2.7561, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.429700594051551e-06, |
|
"loss": 2.7789, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.380113653268276e-06, |
|
"loss": 2.7629, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.330526712485001e-06, |
|
"loss": 2.7672, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.280939771701725e-06, |
|
"loss": 2.7643, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.23135283091845e-06, |
|
"loss": 2.7836, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.181765890135175e-06, |
|
"loss": 2.7742, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.132178949351899e-06, |
|
"loss": 2.7504, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.082592008568624e-06, |
|
"loss": 2.7738, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.033005067785349e-06, |
|
"loss": 2.7741, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.983418127002074e-06, |
|
"loss": 2.768, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.933831186218798e-06, |
|
"loss": 2.7874, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.884244245435523e-06, |
|
"loss": 2.7733, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.834657304652248e-06, |
|
"loss": 2.7677, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.7850703638689717e-06, |
|
"loss": 2.7601, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.735483423085696e-06, |
|
"loss": 2.7832, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.685896482302421e-06, |
|
"loss": 2.7704, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6363095415191457e-06, |
|
"loss": 2.7698, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.5867226007358706e-06, |
|
"loss": 2.7724, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.537135659952595e-06, |
|
"loss": 2.7647, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4875487191693196e-06, |
|
"loss": 2.7719, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4379617783860446e-06, |
|
"loss": 2.7578, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.388374837602769e-06, |
|
"loss": 2.7478, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3387878968194936e-06, |
|
"loss": 2.7747, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2892009560362186e-06, |
|
"loss": 2.7533, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.239614015252943e-06, |
|
"loss": 2.7488, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.190027074469668e-06, |
|
"loss": 2.7577, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.1404401336863925e-06, |
|
"loss": 2.7646, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.090853192903117e-06, |
|
"loss": 2.7556, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.041266252119842e-06, |
|
"loss": 2.7578, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9916793113365665e-06, |
|
"loss": 2.7428, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.942092370553291e-06, |
|
"loss": 2.7487, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.892505429770016e-06, |
|
"loss": 2.7457, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8429184889867405e-06, |
|
"loss": 2.7366, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.7933315482034655e-06, |
|
"loss": 2.7497, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.74374460742019e-06, |
|
"loss": 2.7467, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6941576666369145e-06, |
|
"loss": 2.7633, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6445707258536394e-06, |
|
"loss": 2.7681, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.594983785070364e-06, |
|
"loss": 2.7552, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.5453968442870885e-06, |
|
"loss": 2.7539, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.4958099035038134e-06, |
|
"loss": 2.7393, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.446222962720538e-06, |
|
"loss": 2.7727, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.396636021937263e-06, |
|
"loss": 2.736, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3470490811539874e-06, |
|
"loss": 2.7546, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.297462140370712e-06, |
|
"loss": 2.7601, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.247875199587437e-06, |
|
"loss": 2.7456, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.1982882588041614e-06, |
|
"loss": 2.76, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.1487013180208863e-06, |
|
"loss": 2.7396, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.099114377237611e-06, |
|
"loss": 2.761, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0495274364543354e-06, |
|
"loss": 2.7603, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.9999404956710603e-06, |
|
"loss": 2.7614, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.950353554887785e-06, |
|
"loss": 2.7638, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.9007666141045096e-06, |
|
"loss": 2.7806, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.8511796733212343e-06, |
|
"loss": 2.7561, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.801592732537959e-06, |
|
"loss": 2.7473, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7520057917546838e-06, |
|
"loss": 2.7405, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7024188509714083e-06, |
|
"loss": 2.742, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.652831910188133e-06, |
|
"loss": 2.7387, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.6032449694048577e-06, |
|
"loss": 2.7681, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.5536580286215825e-06, |
|
"loss": 2.7599, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.504071087838307e-06, |
|
"loss": 2.7497, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.4544841470550317e-06, |
|
"loss": 2.7417, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4048972062717565e-06, |
|
"loss": 2.739, |
|
"step": 187500 |
|
} |
|
], |
|
"max_steps": 201666, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.2090056412387738e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|