HealthTeam's picture
Training in progress, step 187816
58148d9
raw
history blame
46.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.793966261045491,
"global_step": 187816,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9950413059216727e-05,
"loss": 14.0627,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.9900826118433453e-05,
"loss": 6.3799,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 1.9851239177650176e-05,
"loss": 5.31,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 1.9801652236866898e-05,
"loss": 4.9781,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 1.9752065296083624e-05,
"loss": 4.7392,
"step": 2500
},
{
"epoch": 0.04,
"learning_rate": 1.970247835530035e-05,
"loss": 4.5779,
"step": 3000
},
{
"epoch": 0.05,
"learning_rate": 1.9652891414517075e-05,
"loss": 4.4691,
"step": 3500
},
{
"epoch": 0.06,
"learning_rate": 1.96033044737338e-05,
"loss": 4.3745,
"step": 4000
},
{
"epoch": 0.07,
"learning_rate": 1.9553717532950524e-05,
"loss": 4.2883,
"step": 4500
},
{
"epoch": 0.07,
"learning_rate": 1.9504130592167246e-05,
"loss": 4.2342,
"step": 5000
},
{
"epoch": 0.08,
"learning_rate": 1.9454543651383972e-05,
"loss": 4.1614,
"step": 5500
},
{
"epoch": 0.09,
"learning_rate": 1.9404956710600698e-05,
"loss": 4.1279,
"step": 6000
},
{
"epoch": 0.1,
"learning_rate": 1.9355369769817423e-05,
"loss": 4.0802,
"step": 6500
},
{
"epoch": 0.1,
"learning_rate": 1.930578282903415e-05,
"loss": 4.0298,
"step": 7000
},
{
"epoch": 0.11,
"learning_rate": 1.925619588825087e-05,
"loss": 3.9697,
"step": 7500
},
{
"epoch": 0.12,
"learning_rate": 1.9206608947467594e-05,
"loss": 3.9584,
"step": 8000
},
{
"epoch": 0.13,
"learning_rate": 1.915702200668432e-05,
"loss": 3.9196,
"step": 8500
},
{
"epoch": 0.13,
"learning_rate": 1.9107435065901046e-05,
"loss": 3.9081,
"step": 9000
},
{
"epoch": 0.14,
"learning_rate": 1.905784812511777e-05,
"loss": 3.8419,
"step": 9500
},
{
"epoch": 0.15,
"learning_rate": 1.9008261184334497e-05,
"loss": 3.8363,
"step": 10000
},
{
"epoch": 0.16,
"learning_rate": 1.895867424355122e-05,
"loss": 3.8047,
"step": 10500
},
{
"epoch": 0.16,
"learning_rate": 1.8909087302767945e-05,
"loss": 3.7728,
"step": 11000
},
{
"epoch": 0.17,
"learning_rate": 1.8859500361984668e-05,
"loss": 3.7731,
"step": 11500
},
{
"epoch": 0.18,
"learning_rate": 1.8809913421201393e-05,
"loss": 3.7408,
"step": 12000
},
{
"epoch": 0.19,
"learning_rate": 1.876032648041812e-05,
"loss": 3.7027,
"step": 12500
},
{
"epoch": 0.19,
"learning_rate": 1.8710739539634845e-05,
"loss": 3.6865,
"step": 13000
},
{
"epoch": 0.2,
"learning_rate": 1.8661152598851567e-05,
"loss": 3.6456,
"step": 13500
},
{
"epoch": 0.21,
"learning_rate": 1.8611565658068293e-05,
"loss": 3.6539,
"step": 14000
},
{
"epoch": 0.22,
"learning_rate": 1.8561978717285016e-05,
"loss": 3.6222,
"step": 14500
},
{
"epoch": 0.22,
"learning_rate": 1.851239177650174e-05,
"loss": 3.6127,
"step": 15000
},
{
"epoch": 0.23,
"learning_rate": 1.8462804835718467e-05,
"loss": 3.6133,
"step": 15500
},
{
"epoch": 0.24,
"learning_rate": 1.8413217894935193e-05,
"loss": 3.5863,
"step": 16000
},
{
"epoch": 0.25,
"learning_rate": 1.8363630954151915e-05,
"loss": 3.5669,
"step": 16500
},
{
"epoch": 0.25,
"learning_rate": 1.831404401336864e-05,
"loss": 3.5518,
"step": 17000
},
{
"epoch": 0.26,
"learning_rate": 1.8264457072585367e-05,
"loss": 3.5368,
"step": 17500
},
{
"epoch": 0.27,
"learning_rate": 1.821487013180209e-05,
"loss": 3.5294,
"step": 18000
},
{
"epoch": 0.28,
"learning_rate": 1.8165283191018815e-05,
"loss": 3.5097,
"step": 18500
},
{
"epoch": 0.28,
"learning_rate": 1.811569625023554e-05,
"loss": 3.5198,
"step": 19000
},
{
"epoch": 0.29,
"learning_rate": 1.8066109309452263e-05,
"loss": 3.4702,
"step": 19500
},
{
"epoch": 0.3,
"learning_rate": 1.801652236866899e-05,
"loss": 3.485,
"step": 20000
},
{
"epoch": 0.3,
"learning_rate": 1.7966935427885715e-05,
"loss": 3.4853,
"step": 20500
},
{
"epoch": 0.31,
"learning_rate": 1.7917348487102437e-05,
"loss": 3.4395,
"step": 21000
},
{
"epoch": 0.32,
"learning_rate": 1.7867761546319163e-05,
"loss": 3.4515,
"step": 21500
},
{
"epoch": 0.33,
"learning_rate": 1.781817460553589e-05,
"loss": 3.4307,
"step": 22000
},
{
"epoch": 0.33,
"learning_rate": 1.776858766475261e-05,
"loss": 3.4343,
"step": 22500
},
{
"epoch": 0.34,
"learning_rate": 1.7719000723969337e-05,
"loss": 3.4053,
"step": 23000
},
{
"epoch": 0.35,
"learning_rate": 1.7669413783186063e-05,
"loss": 3.4008,
"step": 23500
},
{
"epoch": 0.36,
"learning_rate": 1.7619826842402785e-05,
"loss": 3.3951,
"step": 24000
},
{
"epoch": 0.36,
"learning_rate": 1.757023990161951e-05,
"loss": 3.3871,
"step": 24500
},
{
"epoch": 0.37,
"learning_rate": 1.7520652960836234e-05,
"loss": 3.3822,
"step": 25000
},
{
"epoch": 0.38,
"learning_rate": 1.747106602005296e-05,
"loss": 3.3816,
"step": 25500
},
{
"epoch": 0.39,
"learning_rate": 1.7421479079269685e-05,
"loss": 3.3759,
"step": 26000
},
{
"epoch": 0.39,
"learning_rate": 1.737189213848641e-05,
"loss": 3.3624,
"step": 26500
},
{
"epoch": 0.4,
"learning_rate": 1.7322305197703137e-05,
"loss": 3.3535,
"step": 27000
},
{
"epoch": 0.41,
"learning_rate": 1.727271825691986e-05,
"loss": 3.3366,
"step": 27500
},
{
"epoch": 0.42,
"learning_rate": 1.722313131613658e-05,
"loss": 3.3245,
"step": 28000
},
{
"epoch": 0.42,
"learning_rate": 1.7173544375353307e-05,
"loss": 3.3575,
"step": 28500
},
{
"epoch": 0.43,
"learning_rate": 1.7123957434570033e-05,
"loss": 3.3133,
"step": 29000
},
{
"epoch": 0.44,
"learning_rate": 1.707437049378676e-05,
"loss": 3.3124,
"step": 29500
},
{
"epoch": 0.45,
"learning_rate": 1.7024783553003485e-05,
"loss": 3.3295,
"step": 30000
},
{
"epoch": 0.45,
"learning_rate": 1.6975196612220207e-05,
"loss": 3.3192,
"step": 30500
},
{
"epoch": 0.46,
"learning_rate": 1.692560967143693e-05,
"loss": 3.3241,
"step": 31000
},
{
"epoch": 0.47,
"learning_rate": 1.6876022730653655e-05,
"loss": 3.2989,
"step": 31500
},
{
"epoch": 0.48,
"learning_rate": 1.682643578987038e-05,
"loss": 3.2956,
"step": 32000
},
{
"epoch": 0.48,
"learning_rate": 1.6776848849087107e-05,
"loss": 3.2889,
"step": 32500
},
{
"epoch": 0.49,
"learning_rate": 1.6727261908303833e-05,
"loss": 3.2934,
"step": 33000
},
{
"epoch": 0.5,
"learning_rate": 1.6677674967520555e-05,
"loss": 3.2642,
"step": 33500
},
{
"epoch": 0.51,
"learning_rate": 1.6628088026737277e-05,
"loss": 3.2513,
"step": 34000
},
{
"epoch": 0.51,
"learning_rate": 1.6578501085954003e-05,
"loss": 3.2584,
"step": 34500
},
{
"epoch": 0.52,
"learning_rate": 1.652891414517073e-05,
"loss": 3.2576,
"step": 35000
},
{
"epoch": 0.53,
"learning_rate": 1.6479327204387455e-05,
"loss": 3.2532,
"step": 35500
},
{
"epoch": 0.54,
"learning_rate": 1.642974026360418e-05,
"loss": 3.2349,
"step": 36000
},
{
"epoch": 0.54,
"learning_rate": 1.6380153322820903e-05,
"loss": 3.2349,
"step": 36500
},
{
"epoch": 0.55,
"learning_rate": 1.6330566382037625e-05,
"loss": 3.2158,
"step": 37000
},
{
"epoch": 0.56,
"learning_rate": 1.628097944125435e-05,
"loss": 3.2309,
"step": 37500
},
{
"epoch": 0.57,
"learning_rate": 1.6231392500471077e-05,
"loss": 3.2227,
"step": 38000
},
{
"epoch": 0.57,
"learning_rate": 1.6181805559687803e-05,
"loss": 3.2134,
"step": 38500
},
{
"epoch": 0.58,
"learning_rate": 1.613221861890453e-05,
"loss": 3.2206,
"step": 39000
},
{
"epoch": 0.59,
"learning_rate": 1.608263167812125e-05,
"loss": 3.2002,
"step": 39500
},
{
"epoch": 0.6,
"learning_rate": 1.6033044737337973e-05,
"loss": 3.1988,
"step": 40000
},
{
"epoch": 0.6,
"learning_rate": 1.59834577965547e-05,
"loss": 3.2081,
"step": 40500
},
{
"epoch": 0.61,
"learning_rate": 1.5933870855771425e-05,
"loss": 3.1891,
"step": 41000
},
{
"epoch": 0.62,
"learning_rate": 1.588428391498815e-05,
"loss": 3.2007,
"step": 41500
},
{
"epoch": 0.62,
"learning_rate": 1.5834696974204877e-05,
"loss": 3.1948,
"step": 42000
},
{
"epoch": 0.63,
"learning_rate": 1.57851100334216e-05,
"loss": 3.1673,
"step": 42500
},
{
"epoch": 0.64,
"learning_rate": 1.5735523092638325e-05,
"loss": 3.158,
"step": 43000
},
{
"epoch": 0.65,
"learning_rate": 1.5685936151855047e-05,
"loss": 3.1561,
"step": 43500
},
{
"epoch": 0.65,
"learning_rate": 1.5636349211071773e-05,
"loss": 3.1734,
"step": 44000
},
{
"epoch": 0.66,
"learning_rate": 1.55867622702885e-05,
"loss": 3.1401,
"step": 44500
},
{
"epoch": 0.67,
"learning_rate": 1.5537175329505225e-05,
"loss": 3.1463,
"step": 45000
},
{
"epoch": 0.68,
"learning_rate": 1.5487588388721947e-05,
"loss": 3.1431,
"step": 45500
},
{
"epoch": 0.68,
"learning_rate": 1.5438001447938673e-05,
"loss": 3.1316,
"step": 46000
},
{
"epoch": 0.69,
"learning_rate": 1.5388414507155395e-05,
"loss": 3.1606,
"step": 46500
},
{
"epoch": 0.7,
"learning_rate": 1.533882756637212e-05,
"loss": 3.1362,
"step": 47000
},
{
"epoch": 0.71,
"learning_rate": 1.5289240625588847e-05,
"loss": 3.1335,
"step": 47500
},
{
"epoch": 0.71,
"learning_rate": 1.523965368480557e-05,
"loss": 3.149,
"step": 48000
},
{
"epoch": 0.72,
"learning_rate": 1.5190066744022297e-05,
"loss": 3.1293,
"step": 48500
},
{
"epoch": 0.73,
"learning_rate": 1.514047980323902e-05,
"loss": 3.1286,
"step": 49000
},
{
"epoch": 0.74,
"learning_rate": 1.5090892862455743e-05,
"loss": 3.1196,
"step": 49500
},
{
"epoch": 0.74,
"learning_rate": 1.5041305921672469e-05,
"loss": 3.1238,
"step": 50000
},
{
"epoch": 0.75,
"learning_rate": 1.4991718980889195e-05,
"loss": 3.1033,
"step": 50500
},
{
"epoch": 0.76,
"learning_rate": 1.4942132040105919e-05,
"loss": 3.1112,
"step": 51000
},
{
"epoch": 0.77,
"learning_rate": 1.4892545099322645e-05,
"loss": 3.0936,
"step": 51500
},
{
"epoch": 0.77,
"learning_rate": 1.4842958158539369e-05,
"loss": 3.107,
"step": 52000
},
{
"epoch": 0.78,
"learning_rate": 1.4793371217756094e-05,
"loss": 3.1063,
"step": 52500
},
{
"epoch": 0.79,
"learning_rate": 1.4743784276972817e-05,
"loss": 3.0639,
"step": 53000
},
{
"epoch": 0.8,
"learning_rate": 1.4694197336189543e-05,
"loss": 3.1028,
"step": 53500
},
{
"epoch": 0.8,
"learning_rate": 1.4644610395406267e-05,
"loss": 3.0821,
"step": 54000
},
{
"epoch": 0.81,
"learning_rate": 1.4595023454622992e-05,
"loss": 3.0596,
"step": 54500
},
{
"epoch": 0.82,
"learning_rate": 1.4545436513839717e-05,
"loss": 3.0787,
"step": 55000
},
{
"epoch": 0.83,
"learning_rate": 1.4495849573056442e-05,
"loss": 3.0755,
"step": 55500
},
{
"epoch": 0.83,
"learning_rate": 1.4446262632273165e-05,
"loss": 3.066,
"step": 56000
},
{
"epoch": 0.84,
"learning_rate": 1.439667569148989e-05,
"loss": 3.0695,
"step": 56500
},
{
"epoch": 0.85,
"learning_rate": 1.4347088750706615e-05,
"loss": 3.059,
"step": 57000
},
{
"epoch": 0.86,
"learning_rate": 1.429750180992334e-05,
"loss": 3.0628,
"step": 57500
},
{
"epoch": 0.86,
"learning_rate": 1.4247914869140065e-05,
"loss": 3.0733,
"step": 58000
},
{
"epoch": 0.87,
"learning_rate": 1.419832792835679e-05,
"loss": 3.0591,
"step": 58500
},
{
"epoch": 0.88,
"learning_rate": 1.4148740987573514e-05,
"loss": 3.0468,
"step": 59000
},
{
"epoch": 0.89,
"learning_rate": 1.4099154046790237e-05,
"loss": 3.0265,
"step": 59500
},
{
"epoch": 0.89,
"learning_rate": 1.4049567106006963e-05,
"loss": 3.0282,
"step": 60000
},
{
"epoch": 0.9,
"learning_rate": 1.3999980165223688e-05,
"loss": 3.0222,
"step": 60500
},
{
"epoch": 0.91,
"learning_rate": 1.3950393224440413e-05,
"loss": 3.0275,
"step": 61000
},
{
"epoch": 0.91,
"learning_rate": 1.3900806283657138e-05,
"loss": 3.0277,
"step": 61500
},
{
"epoch": 0.92,
"learning_rate": 1.3851219342873862e-05,
"loss": 3.0551,
"step": 62000
},
{
"epoch": 0.93,
"learning_rate": 1.3801632402090585e-05,
"loss": 3.0205,
"step": 62500
},
{
"epoch": 0.94,
"learning_rate": 1.375204546130731e-05,
"loss": 3.023,
"step": 63000
},
{
"epoch": 0.94,
"learning_rate": 1.3702458520524036e-05,
"loss": 3.0244,
"step": 63500
},
{
"epoch": 0.95,
"learning_rate": 1.365287157974076e-05,
"loss": 3.0116,
"step": 64000
},
{
"epoch": 0.96,
"learning_rate": 1.3603284638957486e-05,
"loss": 3.0141,
"step": 64500
},
{
"epoch": 0.97,
"learning_rate": 1.355369769817421e-05,
"loss": 3.0284,
"step": 65000
},
{
"epoch": 0.97,
"learning_rate": 1.3504110757390933e-05,
"loss": 3.0236,
"step": 65500
},
{
"epoch": 0.98,
"learning_rate": 1.3454523816607659e-05,
"loss": 3.013,
"step": 66000
},
{
"epoch": 0.99,
"learning_rate": 1.3404936875824384e-05,
"loss": 3.0027,
"step": 66500
},
{
"epoch": 1.0,
"learning_rate": 1.3355349935041108e-05,
"loss": 3.0155,
"step": 67000
},
{
"epoch": 1.0,
"eval_bleu": 11.298551127218651,
"eval_loss": 2.3749005794525146,
"eval_runtime": 4929.9601,
"eval_samples_per_second": 8.201,
"eval_steps_per_second": 0.513,
"step": 67222
},
{
"epoch": 1.0,
"learning_rate": 1.3305762994257834e-05,
"loss": 3.0195,
"step": 67500
},
{
"epoch": 1.01,
"learning_rate": 1.3256176053474558e-05,
"loss": 2.9924,
"step": 68000
},
{
"epoch": 1.02,
"learning_rate": 1.3206589112691284e-05,
"loss": 2.997,
"step": 68500
},
{
"epoch": 1.03,
"learning_rate": 1.3157002171908007e-05,
"loss": 2.9694,
"step": 69000
},
{
"epoch": 1.03,
"learning_rate": 1.3107415231124732e-05,
"loss": 2.9804,
"step": 69500
},
{
"epoch": 1.04,
"learning_rate": 1.3057828290341456e-05,
"loss": 2.9879,
"step": 70000
},
{
"epoch": 1.05,
"learning_rate": 1.3008241349558182e-05,
"loss": 2.9919,
"step": 70500
},
{
"epoch": 1.06,
"learning_rate": 1.2958654408774906e-05,
"loss": 2.9875,
"step": 71000
},
{
"epoch": 1.06,
"learning_rate": 1.2909067467991632e-05,
"loss": 2.9912,
"step": 71500
},
{
"epoch": 1.07,
"learning_rate": 1.2859480527208354e-05,
"loss": 2.974,
"step": 72000
},
{
"epoch": 1.08,
"learning_rate": 1.280989358642508e-05,
"loss": 2.9581,
"step": 72500
},
{
"epoch": 1.09,
"learning_rate": 1.2760306645641804e-05,
"loss": 2.975,
"step": 73000
},
{
"epoch": 1.09,
"learning_rate": 1.271071970485853e-05,
"loss": 2.9737,
"step": 73500
},
{
"epoch": 1.1,
"learning_rate": 1.2661132764075254e-05,
"loss": 2.9722,
"step": 74000
},
{
"epoch": 1.11,
"learning_rate": 1.261154582329198e-05,
"loss": 2.9727,
"step": 74500
},
{
"epoch": 1.12,
"learning_rate": 1.2561958882508702e-05,
"loss": 2.9618,
"step": 75000
},
{
"epoch": 1.12,
"learning_rate": 1.2512371941725428e-05,
"loss": 2.9554,
"step": 75500
},
{
"epoch": 1.13,
"learning_rate": 1.2462785000942152e-05,
"loss": 2.961,
"step": 76000
},
{
"epoch": 1.14,
"learning_rate": 1.2413198060158878e-05,
"loss": 2.9627,
"step": 76500
},
{
"epoch": 1.15,
"learning_rate": 1.2363611119375602e-05,
"loss": 2.9896,
"step": 77000
},
{
"epoch": 1.15,
"learning_rate": 1.2314024178592328e-05,
"loss": 2.9433,
"step": 77500
},
{
"epoch": 1.16,
"learning_rate": 1.2264437237809052e-05,
"loss": 2.9329,
"step": 78000
},
{
"epoch": 1.17,
"learning_rate": 1.2214850297025776e-05,
"loss": 2.9552,
"step": 78500
},
{
"epoch": 1.18,
"learning_rate": 1.21652633562425e-05,
"loss": 2.9382,
"step": 79000
},
{
"epoch": 1.18,
"learning_rate": 1.2115676415459226e-05,
"loss": 2.9629,
"step": 79500
},
{
"epoch": 1.19,
"learning_rate": 1.206608947467595e-05,
"loss": 2.9555,
"step": 80000
},
{
"epoch": 1.2,
"learning_rate": 1.2016502533892676e-05,
"loss": 2.9364,
"step": 80500
},
{
"epoch": 1.2,
"learning_rate": 1.19669155931094e-05,
"loss": 2.9296,
"step": 81000
},
{
"epoch": 1.21,
"learning_rate": 1.1917328652326124e-05,
"loss": 2.9483,
"step": 81500
},
{
"epoch": 1.22,
"learning_rate": 1.1867741711542848e-05,
"loss": 2.9605,
"step": 82000
},
{
"epoch": 1.23,
"learning_rate": 1.1818154770759574e-05,
"loss": 2.928,
"step": 82500
},
{
"epoch": 1.23,
"learning_rate": 1.1768567829976298e-05,
"loss": 2.9216,
"step": 83000
},
{
"epoch": 1.24,
"learning_rate": 1.1718980889193024e-05,
"loss": 2.9402,
"step": 83500
},
{
"epoch": 1.25,
"learning_rate": 1.1669393948409748e-05,
"loss": 2.9311,
"step": 84000
},
{
"epoch": 1.26,
"learning_rate": 1.1619807007626474e-05,
"loss": 2.9537,
"step": 84500
},
{
"epoch": 1.26,
"learning_rate": 1.1570220066843196e-05,
"loss": 2.919,
"step": 85000
},
{
"epoch": 1.27,
"learning_rate": 1.1520633126059922e-05,
"loss": 2.918,
"step": 85500
},
{
"epoch": 1.28,
"learning_rate": 1.1471046185276646e-05,
"loss": 2.9339,
"step": 86000
},
{
"epoch": 1.29,
"learning_rate": 1.1421459244493372e-05,
"loss": 2.9071,
"step": 86500
},
{
"epoch": 1.29,
"learning_rate": 1.1371872303710096e-05,
"loss": 2.9397,
"step": 87000
},
{
"epoch": 1.3,
"learning_rate": 1.1322285362926822e-05,
"loss": 2.9225,
"step": 87500
},
{
"epoch": 1.31,
"learning_rate": 1.1272698422143544e-05,
"loss": 2.9248,
"step": 88000
},
{
"epoch": 1.32,
"learning_rate": 1.122311148136027e-05,
"loss": 2.9132,
"step": 88500
},
{
"epoch": 1.32,
"learning_rate": 1.1173524540576994e-05,
"loss": 2.8945,
"step": 89000
},
{
"epoch": 1.33,
"learning_rate": 1.112393759979372e-05,
"loss": 2.905,
"step": 89500
},
{
"epoch": 1.34,
"learning_rate": 1.1074350659010444e-05,
"loss": 2.9256,
"step": 90000
},
{
"epoch": 1.35,
"learning_rate": 1.102476371822717e-05,
"loss": 2.9089,
"step": 90500
},
{
"epoch": 1.35,
"learning_rate": 1.0975176777443892e-05,
"loss": 2.9104,
"step": 91000
},
{
"epoch": 1.36,
"learning_rate": 1.0925589836660618e-05,
"loss": 2.9226,
"step": 91500
},
{
"epoch": 1.37,
"learning_rate": 1.0876002895877342e-05,
"loss": 2.902,
"step": 92000
},
{
"epoch": 1.38,
"learning_rate": 1.0826415955094068e-05,
"loss": 2.8831,
"step": 92500
},
{
"epoch": 1.38,
"learning_rate": 1.0776829014310792e-05,
"loss": 2.906,
"step": 93000
},
{
"epoch": 1.39,
"learning_rate": 1.0727242073527518e-05,
"loss": 2.906,
"step": 93500
},
{
"epoch": 1.4,
"learning_rate": 1.0677655132744242e-05,
"loss": 2.8901,
"step": 94000
},
{
"epoch": 1.41,
"learning_rate": 1.0628068191960966e-05,
"loss": 2.9063,
"step": 94500
},
{
"epoch": 1.41,
"learning_rate": 1.057848125117769e-05,
"loss": 2.8765,
"step": 95000
},
{
"epoch": 1.42,
"learning_rate": 1.0528894310394416e-05,
"loss": 2.9022,
"step": 95500
},
{
"epoch": 1.43,
"learning_rate": 1.047930736961114e-05,
"loss": 2.8906,
"step": 96000
},
{
"epoch": 1.44,
"learning_rate": 1.0429720428827866e-05,
"loss": 2.8627,
"step": 96500
},
{
"epoch": 1.44,
"learning_rate": 1.038013348804459e-05,
"loss": 2.8789,
"step": 97000
},
{
"epoch": 1.45,
"learning_rate": 1.0330546547261314e-05,
"loss": 2.8782,
"step": 97500
},
{
"epoch": 1.46,
"learning_rate": 1.0280959606478038e-05,
"loss": 2.8706,
"step": 98000
},
{
"epoch": 1.47,
"learning_rate": 1.0231372665694764e-05,
"loss": 2.8434,
"step": 98500
},
{
"epoch": 1.47,
"learning_rate": 1.0181785724911488e-05,
"loss": 2.8851,
"step": 99000
},
{
"epoch": 1.48,
"learning_rate": 1.0132198784128214e-05,
"loss": 2.8806,
"step": 99500
},
{
"epoch": 1.49,
"learning_rate": 1.0082611843344938e-05,
"loss": 2.8695,
"step": 100000
},
{
"epoch": 1.5,
"learning_rate": 1.0033024902561664e-05,
"loss": 2.8775,
"step": 100500
},
{
"epoch": 1.5,
"learning_rate": 9.983437961778388e-06,
"loss": 2.8717,
"step": 101000
},
{
"epoch": 1.51,
"learning_rate": 9.933851020995112e-06,
"loss": 2.8616,
"step": 101500
},
{
"epoch": 1.52,
"learning_rate": 9.884264080211836e-06,
"loss": 2.8656,
"step": 102000
},
{
"epoch": 1.52,
"learning_rate": 9.834677139428562e-06,
"loss": 2.8867,
"step": 102500
},
{
"epoch": 1.53,
"learning_rate": 9.785090198645286e-06,
"loss": 2.8491,
"step": 103000
},
{
"epoch": 1.54,
"learning_rate": 9.73550325786201e-06,
"loss": 2.8716,
"step": 103500
},
{
"epoch": 1.55,
"learning_rate": 9.685916317078736e-06,
"loss": 2.8743,
"step": 104000
},
{
"epoch": 1.55,
"learning_rate": 9.63632937629546e-06,
"loss": 2.8503,
"step": 104500
},
{
"epoch": 1.56,
"learning_rate": 9.586742435512184e-06,
"loss": 2.8625,
"step": 105000
},
{
"epoch": 1.57,
"learning_rate": 9.53715549472891e-06,
"loss": 2.8237,
"step": 105500
},
{
"epoch": 1.58,
"learning_rate": 9.487568553945634e-06,
"loss": 2.8619,
"step": 106000
},
{
"epoch": 1.58,
"learning_rate": 9.437981613162358e-06,
"loss": 2.8629,
"step": 106500
},
{
"epoch": 1.59,
"learning_rate": 9.388394672379084e-06,
"loss": 2.8441,
"step": 107000
},
{
"epoch": 1.6,
"learning_rate": 9.338807731595808e-06,
"loss": 2.8569,
"step": 107500
},
{
"epoch": 1.61,
"learning_rate": 9.289220790812532e-06,
"loss": 2.8511,
"step": 108000
},
{
"epoch": 1.61,
"learning_rate": 9.239633850029258e-06,
"loss": 2.8701,
"step": 108500
},
{
"epoch": 1.62,
"learning_rate": 9.190046909245982e-06,
"loss": 2.8572,
"step": 109000
},
{
"epoch": 1.63,
"learning_rate": 9.140459968462706e-06,
"loss": 2.8673,
"step": 109500
},
{
"epoch": 1.64,
"learning_rate": 9.090873027679432e-06,
"loss": 2.8621,
"step": 110000
},
{
"epoch": 1.64,
"learning_rate": 9.041286086896156e-06,
"loss": 2.8592,
"step": 110500
},
{
"epoch": 1.65,
"learning_rate": 8.99169914611288e-06,
"loss": 2.8582,
"step": 111000
},
{
"epoch": 1.66,
"learning_rate": 8.942112205329606e-06,
"loss": 2.8666,
"step": 111500
},
{
"epoch": 1.67,
"learning_rate": 8.89252526454633e-06,
"loss": 2.8588,
"step": 112000
},
{
"epoch": 1.67,
"learning_rate": 8.842938323763054e-06,
"loss": 2.8475,
"step": 112500
},
{
"epoch": 1.68,
"learning_rate": 8.79335138297978e-06,
"loss": 2.8357,
"step": 113000
},
{
"epoch": 1.69,
"learning_rate": 8.743764442196504e-06,
"loss": 2.8608,
"step": 113500
},
{
"epoch": 1.7,
"learning_rate": 8.69417750141323e-06,
"loss": 2.8532,
"step": 114000
},
{
"epoch": 1.7,
"learning_rate": 8.644590560629953e-06,
"loss": 2.8545,
"step": 114500
},
{
"epoch": 1.71,
"learning_rate": 8.595003619846678e-06,
"loss": 2.8277,
"step": 115000
},
{
"epoch": 1.72,
"learning_rate": 8.545416679063403e-06,
"loss": 2.8509,
"step": 115500
},
{
"epoch": 1.73,
"learning_rate": 8.495829738280127e-06,
"loss": 2.8413,
"step": 116000
},
{
"epoch": 1.73,
"learning_rate": 8.446242797496852e-06,
"loss": 2.838,
"step": 116500
},
{
"epoch": 1.74,
"learning_rate": 8.396655856713577e-06,
"loss": 2.8543,
"step": 117000
},
{
"epoch": 1.75,
"learning_rate": 8.347068915930301e-06,
"loss": 2.8347,
"step": 117500
},
{
"epoch": 1.76,
"learning_rate": 8.297481975147026e-06,
"loss": 2.8669,
"step": 118000
},
{
"epoch": 1.76,
"learning_rate": 8.247895034363751e-06,
"loss": 2.8228,
"step": 118500
},
{
"epoch": 1.77,
"learning_rate": 8.198308093580475e-06,
"loss": 2.8385,
"step": 119000
},
{
"epoch": 1.78,
"learning_rate": 8.1487211527972e-06,
"loss": 2.8257,
"step": 119500
},
{
"epoch": 1.79,
"learning_rate": 8.099134212013925e-06,
"loss": 2.8362,
"step": 120000
},
{
"epoch": 1.79,
"learning_rate": 8.04954727123065e-06,
"loss": 2.8319,
"step": 120500
},
{
"epoch": 1.8,
"learning_rate": 7.999960330447374e-06,
"loss": 2.8356,
"step": 121000
},
{
"epoch": 1.81,
"learning_rate": 7.9503733896641e-06,
"loss": 2.8199,
"step": 121500
},
{
"epoch": 1.81,
"learning_rate": 7.900786448880823e-06,
"loss": 2.8039,
"step": 122000
},
{
"epoch": 1.82,
"learning_rate": 7.851199508097548e-06,
"loss": 2.832,
"step": 122500
},
{
"epoch": 1.83,
"learning_rate": 7.801612567314273e-06,
"loss": 2.8125,
"step": 123000
},
{
"epoch": 1.84,
"learning_rate": 7.752025626530997e-06,
"loss": 2.8005,
"step": 123500
},
{
"epoch": 1.84,
"learning_rate": 7.702438685747721e-06,
"loss": 2.8402,
"step": 124000
},
{
"epoch": 1.85,
"learning_rate": 7.652851744964447e-06,
"loss": 2.8186,
"step": 124500
},
{
"epoch": 1.86,
"learning_rate": 7.603264804181172e-06,
"loss": 2.8296,
"step": 125000
},
{
"epoch": 1.87,
"learning_rate": 7.5536778633978955e-06,
"loss": 2.8193,
"step": 125500
},
{
"epoch": 1.87,
"learning_rate": 7.50409092261462e-06,
"loss": 2.8093,
"step": 126000
},
{
"epoch": 1.88,
"learning_rate": 7.454503981831346e-06,
"loss": 2.8383,
"step": 126500
},
{
"epoch": 1.89,
"learning_rate": 7.4049170410480695e-06,
"loss": 2.821,
"step": 127000
},
{
"epoch": 1.9,
"learning_rate": 7.355330100264794e-06,
"loss": 2.7976,
"step": 127500
},
{
"epoch": 1.9,
"learning_rate": 7.30574315948152e-06,
"loss": 2.8183,
"step": 128000
},
{
"epoch": 1.91,
"learning_rate": 7.2561562186982434e-06,
"loss": 2.8089,
"step": 128500
},
{
"epoch": 1.92,
"learning_rate": 7.206569277914968e-06,
"loss": 2.818,
"step": 129000
},
{
"epoch": 1.93,
"learning_rate": 7.156982337131694e-06,
"loss": 2.8052,
"step": 129500
},
{
"epoch": 1.93,
"learning_rate": 7.107395396348419e-06,
"loss": 2.8183,
"step": 130000
},
{
"epoch": 1.94,
"learning_rate": 7.057808455565142e-06,
"loss": 2.8098,
"step": 130500
},
{
"epoch": 1.95,
"learning_rate": 7.008221514781868e-06,
"loss": 2.8155,
"step": 131000
},
{
"epoch": 1.96,
"learning_rate": 6.958634573998593e-06,
"loss": 2.8074,
"step": 131500
},
{
"epoch": 1.96,
"learning_rate": 6.909047633215316e-06,
"loss": 2.7913,
"step": 132000
},
{
"epoch": 1.97,
"learning_rate": 6.859460692432042e-06,
"loss": 2.8122,
"step": 132500
},
{
"epoch": 1.98,
"learning_rate": 6.809873751648767e-06,
"loss": 2.8327,
"step": 133000
},
{
"epoch": 1.99,
"learning_rate": 6.76028681086549e-06,
"loss": 2.7897,
"step": 133500
},
{
"epoch": 1.99,
"learning_rate": 6.710699870082215e-06,
"loss": 2.7777,
"step": 134000
},
{
"epoch": 2.0,
"eval_bleu": 13.585366050482984,
"eval_loss": 2.2518081665039062,
"eval_runtime": 4182.4693,
"eval_samples_per_second": 9.667,
"eval_steps_per_second": 0.604,
"step": 134444
},
{
"epoch": 2.0,
"learning_rate": 6.661112929298941e-06,
"loss": 2.7994,
"step": 134500
},
{
"epoch": 2.01,
"learning_rate": 6.611525988515664e-06,
"loss": 2.8167,
"step": 135000
},
{
"epoch": 2.02,
"learning_rate": 6.561939047732389e-06,
"loss": 2.8123,
"step": 135500
},
{
"epoch": 2.02,
"learning_rate": 6.512352106949115e-06,
"loss": 2.7844,
"step": 136000
},
{
"epoch": 2.03,
"learning_rate": 6.462765166165838e-06,
"loss": 2.7956,
"step": 136500
},
{
"epoch": 2.04,
"learning_rate": 6.413178225382563e-06,
"loss": 2.7968,
"step": 137000
},
{
"epoch": 2.05,
"learning_rate": 6.363591284599289e-06,
"loss": 2.7916,
"step": 137500
},
{
"epoch": 2.05,
"learning_rate": 6.314004343816014e-06,
"loss": 2.7958,
"step": 138000
},
{
"epoch": 2.06,
"learning_rate": 6.264417403032737e-06,
"loss": 2.7855,
"step": 138500
},
{
"epoch": 2.07,
"learning_rate": 6.214830462249463e-06,
"loss": 2.7876,
"step": 139000
},
{
"epoch": 2.08,
"learning_rate": 6.165243521466188e-06,
"loss": 2.7724,
"step": 139500
},
{
"epoch": 2.08,
"learning_rate": 6.115656580682911e-06,
"loss": 2.8021,
"step": 140000
},
{
"epoch": 2.09,
"learning_rate": 6.066069639899637e-06,
"loss": 2.8024,
"step": 140500
},
{
"epoch": 2.1,
"learning_rate": 6.016482699116362e-06,
"loss": 2.7891,
"step": 141000
},
{
"epoch": 2.1,
"learning_rate": 5.966895758333085e-06,
"loss": 2.7592,
"step": 141500
},
{
"epoch": 2.11,
"learning_rate": 5.917308817549811e-06,
"loss": 2.7895,
"step": 142000
},
{
"epoch": 2.12,
"learning_rate": 5.867721876766536e-06,
"loss": 2.8106,
"step": 142500
},
{
"epoch": 2.13,
"learning_rate": 5.818134935983259e-06,
"loss": 2.7985,
"step": 143000
},
{
"epoch": 2.13,
"learning_rate": 5.768547995199985e-06,
"loss": 2.8137,
"step": 143500
},
{
"epoch": 2.14,
"learning_rate": 5.71896105441671e-06,
"loss": 2.7824,
"step": 144000
},
{
"epoch": 2.15,
"learning_rate": 5.669374113633433e-06,
"loss": 2.7878,
"step": 144500
},
{
"epoch": 2.16,
"learning_rate": 5.619787172850158e-06,
"loss": 2.7861,
"step": 145000
},
{
"epoch": 2.16,
"learning_rate": 5.570200232066884e-06,
"loss": 2.7886,
"step": 145500
},
{
"epoch": 2.17,
"learning_rate": 5.520613291283607e-06,
"loss": 2.7798,
"step": 146000
},
{
"epoch": 2.18,
"learning_rate": 5.471026350500332e-06,
"loss": 2.8015,
"step": 146500
},
{
"epoch": 2.19,
"learning_rate": 5.421439409717058e-06,
"loss": 2.8013,
"step": 147000
},
{
"epoch": 2.19,
"learning_rate": 5.371852468933783e-06,
"loss": 2.7567,
"step": 147500
},
{
"epoch": 2.2,
"learning_rate": 5.322265528150506e-06,
"loss": 2.8004,
"step": 148000
},
{
"epoch": 2.21,
"learning_rate": 5.272678587367232e-06,
"loss": 2.7817,
"step": 148500
},
{
"epoch": 2.22,
"learning_rate": 5.223091646583957e-06,
"loss": 2.7674,
"step": 149000
},
{
"epoch": 2.22,
"learning_rate": 5.17350470580068e-06,
"loss": 2.7882,
"step": 149500
},
{
"epoch": 2.23,
"learning_rate": 5.123917765017406e-06,
"loss": 2.7695,
"step": 150000
},
{
"epoch": 2.24,
"learning_rate": 5.074330824234131e-06,
"loss": 2.7708,
"step": 150500
},
{
"epoch": 2.25,
"learning_rate": 5.024743883450854e-06,
"loss": 2.7791,
"step": 151000
},
{
"epoch": 2.25,
"learning_rate": 4.97515694266758e-06,
"loss": 2.7998,
"step": 151500
},
{
"epoch": 2.26,
"learning_rate": 4.925570001884304e-06,
"loss": 2.7546,
"step": 152000
},
{
"epoch": 2.27,
"learning_rate": 4.875983061101029e-06,
"loss": 2.7576,
"step": 152500
},
{
"epoch": 2.28,
"learning_rate": 4.826396120317754e-06,
"loss": 2.7917,
"step": 153000
},
{
"epoch": 2.28,
"learning_rate": 4.776809179534479e-06,
"loss": 2.7828,
"step": 153500
},
{
"epoch": 2.29,
"learning_rate": 4.727222238751203e-06,
"loss": 2.7884,
"step": 154000
},
{
"epoch": 2.3,
"learning_rate": 4.677635297967928e-06,
"loss": 2.7807,
"step": 154500
},
{
"epoch": 2.31,
"learning_rate": 4.628048357184653e-06,
"loss": 2.7652,
"step": 155000
},
{
"epoch": 2.31,
"learning_rate": 4.578461416401377e-06,
"loss": 2.7918,
"step": 155500
},
{
"epoch": 2.32,
"learning_rate": 4.528874475618102e-06,
"loss": 2.7771,
"step": 156000
},
{
"epoch": 2.33,
"learning_rate": 4.479287534834827e-06,
"loss": 2.7561,
"step": 156500
},
{
"epoch": 2.34,
"learning_rate": 4.429700594051551e-06,
"loss": 2.7789,
"step": 157000
},
{
"epoch": 2.34,
"learning_rate": 4.380113653268276e-06,
"loss": 2.7629,
"step": 157500
},
{
"epoch": 2.35,
"learning_rate": 4.330526712485001e-06,
"loss": 2.7672,
"step": 158000
},
{
"epoch": 2.36,
"learning_rate": 4.280939771701725e-06,
"loss": 2.7643,
"step": 158500
},
{
"epoch": 2.37,
"learning_rate": 4.23135283091845e-06,
"loss": 2.7836,
"step": 159000
},
{
"epoch": 2.37,
"learning_rate": 4.181765890135175e-06,
"loss": 2.7742,
"step": 159500
},
{
"epoch": 2.38,
"learning_rate": 4.132178949351899e-06,
"loss": 2.7504,
"step": 160000
},
{
"epoch": 2.39,
"learning_rate": 4.082592008568624e-06,
"loss": 2.7738,
"step": 160500
},
{
"epoch": 2.4,
"learning_rate": 4.033005067785349e-06,
"loss": 2.7741,
"step": 161000
},
{
"epoch": 2.4,
"learning_rate": 3.983418127002074e-06,
"loss": 2.768,
"step": 161500
},
{
"epoch": 2.41,
"learning_rate": 3.933831186218798e-06,
"loss": 2.7874,
"step": 162000
},
{
"epoch": 2.42,
"learning_rate": 3.884244245435523e-06,
"loss": 2.7733,
"step": 162500
},
{
"epoch": 2.42,
"learning_rate": 3.834657304652248e-06,
"loss": 2.7677,
"step": 163000
},
{
"epoch": 2.43,
"learning_rate": 3.7850703638689717e-06,
"loss": 2.7601,
"step": 163500
},
{
"epoch": 2.44,
"learning_rate": 3.735483423085696e-06,
"loss": 2.7832,
"step": 164000
},
{
"epoch": 2.45,
"learning_rate": 3.685896482302421e-06,
"loss": 2.7704,
"step": 164500
},
{
"epoch": 2.45,
"learning_rate": 3.6363095415191457e-06,
"loss": 2.7698,
"step": 165000
},
{
"epoch": 2.46,
"learning_rate": 3.5867226007358706e-06,
"loss": 2.7724,
"step": 165500
},
{
"epoch": 2.47,
"learning_rate": 3.537135659952595e-06,
"loss": 2.7647,
"step": 166000
},
{
"epoch": 2.48,
"learning_rate": 3.4875487191693196e-06,
"loss": 2.7719,
"step": 166500
},
{
"epoch": 2.48,
"learning_rate": 3.4379617783860446e-06,
"loss": 2.7578,
"step": 167000
},
{
"epoch": 2.49,
"learning_rate": 3.388374837602769e-06,
"loss": 2.7478,
"step": 167500
},
{
"epoch": 2.5,
"learning_rate": 3.3387878968194936e-06,
"loss": 2.7747,
"step": 168000
},
{
"epoch": 2.51,
"learning_rate": 3.2892009560362186e-06,
"loss": 2.7533,
"step": 168500
},
{
"epoch": 2.51,
"learning_rate": 3.239614015252943e-06,
"loss": 2.7488,
"step": 169000
},
{
"epoch": 2.52,
"learning_rate": 3.190027074469668e-06,
"loss": 2.7577,
"step": 169500
},
{
"epoch": 2.53,
"learning_rate": 3.1404401336863925e-06,
"loss": 2.7646,
"step": 170000
},
{
"epoch": 2.54,
"learning_rate": 3.090853192903117e-06,
"loss": 2.7556,
"step": 170500
},
{
"epoch": 2.54,
"learning_rate": 3.041266252119842e-06,
"loss": 2.7578,
"step": 171000
},
{
"epoch": 2.55,
"learning_rate": 2.9916793113365665e-06,
"loss": 2.7428,
"step": 171500
},
{
"epoch": 2.56,
"learning_rate": 2.942092370553291e-06,
"loss": 2.7487,
"step": 172000
},
{
"epoch": 2.57,
"learning_rate": 2.892505429770016e-06,
"loss": 2.7457,
"step": 172500
},
{
"epoch": 2.57,
"learning_rate": 2.8429184889867405e-06,
"loss": 2.7366,
"step": 173000
},
{
"epoch": 2.58,
"learning_rate": 2.7933315482034655e-06,
"loss": 2.7497,
"step": 173500
},
{
"epoch": 2.59,
"learning_rate": 2.74374460742019e-06,
"loss": 2.7467,
"step": 174000
},
{
"epoch": 2.6,
"learning_rate": 2.6941576666369145e-06,
"loss": 2.7633,
"step": 174500
},
{
"epoch": 2.6,
"learning_rate": 2.6445707258536394e-06,
"loss": 2.7681,
"step": 175000
},
{
"epoch": 2.61,
"learning_rate": 2.594983785070364e-06,
"loss": 2.7552,
"step": 175500
},
{
"epoch": 2.62,
"learning_rate": 2.5453968442870885e-06,
"loss": 2.7539,
"step": 176000
},
{
"epoch": 2.63,
"learning_rate": 2.4958099035038134e-06,
"loss": 2.7393,
"step": 176500
},
{
"epoch": 2.63,
"learning_rate": 2.446222962720538e-06,
"loss": 2.7727,
"step": 177000
},
{
"epoch": 2.64,
"learning_rate": 2.396636021937263e-06,
"loss": 2.736,
"step": 177500
},
{
"epoch": 2.65,
"learning_rate": 2.3470490811539874e-06,
"loss": 2.7546,
"step": 178000
},
{
"epoch": 2.66,
"learning_rate": 2.297462140370712e-06,
"loss": 2.7601,
"step": 178500
},
{
"epoch": 2.66,
"learning_rate": 2.247875199587437e-06,
"loss": 2.7456,
"step": 179000
},
{
"epoch": 2.67,
"learning_rate": 2.1982882588041614e-06,
"loss": 2.76,
"step": 179500
},
{
"epoch": 2.68,
"learning_rate": 2.1487013180208863e-06,
"loss": 2.7396,
"step": 180000
},
{
"epoch": 2.69,
"learning_rate": 2.099114377237611e-06,
"loss": 2.761,
"step": 180500
},
{
"epoch": 2.69,
"learning_rate": 2.0495274364543354e-06,
"loss": 2.7603,
"step": 181000
},
{
"epoch": 2.7,
"learning_rate": 1.9999404956710603e-06,
"loss": 2.7614,
"step": 181500
},
{
"epoch": 2.71,
"learning_rate": 1.950353554887785e-06,
"loss": 2.7638,
"step": 182000
},
{
"epoch": 2.71,
"learning_rate": 1.9007666141045096e-06,
"loss": 2.7806,
"step": 182500
},
{
"epoch": 2.72,
"learning_rate": 1.8511796733212343e-06,
"loss": 2.7561,
"step": 183000
},
{
"epoch": 2.73,
"learning_rate": 1.801592732537959e-06,
"loss": 2.7473,
"step": 183500
},
{
"epoch": 2.74,
"learning_rate": 1.7520057917546838e-06,
"loss": 2.7405,
"step": 184000
},
{
"epoch": 2.74,
"learning_rate": 1.7024188509714083e-06,
"loss": 2.742,
"step": 184500
},
{
"epoch": 2.75,
"learning_rate": 1.652831910188133e-06,
"loss": 2.7387,
"step": 185000
},
{
"epoch": 2.76,
"learning_rate": 1.6032449694048577e-06,
"loss": 2.7681,
"step": 185500
},
{
"epoch": 2.77,
"learning_rate": 1.5536580286215825e-06,
"loss": 2.7599,
"step": 186000
},
{
"epoch": 2.77,
"learning_rate": 1.504071087838307e-06,
"loss": 2.7497,
"step": 186500
},
{
"epoch": 2.78,
"learning_rate": 1.4544841470550317e-06,
"loss": 2.7417,
"step": 187000
},
{
"epoch": 2.79,
"learning_rate": 1.4048972062717565e-06,
"loss": 2.739,
"step": 187500
}
],
"max_steps": 201666,
"num_train_epochs": 3,
"total_flos": 2.2090056412387738e+17,
"trial_name": null,
"trial_params": null
}