t2p-t5-large-commonvoice / trainer_state.json
cecilemacaire's picture
Upload 12 files
915bfcb verified
{
"best_metric": 0.16174831986427307,
"best_model_checkpoint": "checkpoints_commonvoice/checkpoint-543873",
"epoch": 54.0,
"global_step": 889974,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 1.9984922031430132e-05,
"loss": 2.3359,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 1.996975304896548e-05,
"loss": 1.5264,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 1.995458406650082e-05,
"loss": 1.3191,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 1.9939415084036163e-05,
"loss": 1.1985,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 1.9924246101571506e-05,
"loss": 1.098,
"step": 2500
},
{
"epoch": 0.18,
"learning_rate": 1.9909077119106852e-05,
"loss": 1.0314,
"step": 3000
},
{
"epoch": 0.21,
"learning_rate": 1.9893908136642194e-05,
"loss": 0.9714,
"step": 3500
},
{
"epoch": 0.24,
"learning_rate": 1.987873915417754e-05,
"loss": 0.9334,
"step": 4000
},
{
"epoch": 0.27,
"learning_rate": 1.9863570171712883e-05,
"loss": 0.8831,
"step": 4500
},
{
"epoch": 0.3,
"learning_rate": 1.984840118924823e-05,
"loss": 0.8505,
"step": 5000
},
{
"epoch": 0.33,
"learning_rate": 1.983323220678357e-05,
"loss": 0.8127,
"step": 5500
},
{
"epoch": 0.36,
"learning_rate": 1.9818063224318917e-05,
"loss": 0.786,
"step": 6000
},
{
"epoch": 0.39,
"learning_rate": 1.980289424185426e-05,
"loss": 0.7604,
"step": 6500
},
{
"epoch": 0.42,
"learning_rate": 1.9787725259389602e-05,
"loss": 0.7358,
"step": 7000
},
{
"epoch": 0.46,
"learning_rate": 1.9772556276924944e-05,
"loss": 0.7159,
"step": 7500
},
{
"epoch": 0.49,
"learning_rate": 1.9757387294460287e-05,
"loss": 0.6955,
"step": 8000
},
{
"epoch": 0.52,
"learning_rate": 1.9742218311995633e-05,
"loss": 0.6626,
"step": 8500
},
{
"epoch": 0.55,
"learning_rate": 1.9727049329530975e-05,
"loss": 0.6589,
"step": 9000
},
{
"epoch": 0.58,
"learning_rate": 1.971188034706632e-05,
"loss": 0.6474,
"step": 9500
},
{
"epoch": 0.61,
"learning_rate": 1.9696711364601664e-05,
"loss": 0.6252,
"step": 10000
},
{
"epoch": 0.64,
"learning_rate": 1.9681572720101938e-05,
"loss": 0.6126,
"step": 10500
},
{
"epoch": 0.67,
"learning_rate": 1.966640373763728e-05,
"loss": 0.6078,
"step": 11000
},
{
"epoch": 0.7,
"learning_rate": 1.9651234755172626e-05,
"loss": 0.5942,
"step": 11500
},
{
"epoch": 0.73,
"learning_rate": 1.9636096110672897e-05,
"loss": 0.5797,
"step": 12000
},
{
"epoch": 0.76,
"learning_rate": 1.962092712820824e-05,
"loss": 0.5718,
"step": 12500
},
{
"epoch": 0.79,
"learning_rate": 1.9605758145743585e-05,
"loss": 0.5652,
"step": 13000
},
{
"epoch": 0.82,
"learning_rate": 1.9590589163278928e-05,
"loss": 0.5503,
"step": 13500
},
{
"epoch": 0.85,
"learning_rate": 1.9575450518779202e-05,
"loss": 0.5468,
"step": 14000
},
{
"epoch": 0.88,
"learning_rate": 1.9560311874279473e-05,
"loss": 0.5312,
"step": 14500
},
{
"epoch": 0.91,
"learning_rate": 1.954514289181482e-05,
"loss": 0.5217,
"step": 15000
},
{
"epoch": 0.94,
"learning_rate": 1.952997390935016e-05,
"loss": 0.5175,
"step": 15500
},
{
"epoch": 0.97,
"learning_rate": 1.9514804926885507e-05,
"loss": 0.5108,
"step": 16000
},
{
"epoch": 1.0,
"eval_bleu": 69.1957,
"eval_gen_len": 13.9687,
"eval_loss": 0.39875420928001404,
"eval_runtime": 174.0832,
"eval_samples_per_second": 92.622,
"eval_steps_per_second": 2.895,
"step": 16481
},
{
"epoch": 1.0,
"learning_rate": 1.949963594442085e-05,
"loss": 0.5057,
"step": 16500
},
{
"epoch": 1.03,
"learning_rate": 1.9484466961956195e-05,
"loss": 0.4922,
"step": 17000
},
{
"epoch": 1.06,
"learning_rate": 1.9469297979491538e-05,
"loss": 0.4873,
"step": 17500
},
{
"epoch": 1.09,
"learning_rate": 1.945412899702688e-05,
"loss": 0.485,
"step": 18000
},
{
"epoch": 1.12,
"learning_rate": 1.9438960014562226e-05,
"loss": 0.4729,
"step": 18500
},
{
"epoch": 1.15,
"learning_rate": 1.942379103209757e-05,
"loss": 0.4682,
"step": 19000
},
{
"epoch": 1.18,
"learning_rate": 1.940862204963291e-05,
"loss": 0.456,
"step": 19500
},
{
"epoch": 1.21,
"learning_rate": 1.9393453067168254e-05,
"loss": 0.4587,
"step": 20000
},
{
"epoch": 1.24,
"learning_rate": 1.9378314422668528e-05,
"loss": 0.4527,
"step": 20500
},
{
"epoch": 1.27,
"learning_rate": 1.936314544020387e-05,
"loss": 0.4474,
"step": 21000
},
{
"epoch": 1.3,
"learning_rate": 1.9348006795704145e-05,
"loss": 0.4398,
"step": 21500
},
{
"epoch": 1.33,
"learning_rate": 1.933283781323949e-05,
"loss": 0.4378,
"step": 22000
},
{
"epoch": 1.37,
"learning_rate": 1.9317668830774833e-05,
"loss": 0.4355,
"step": 22500
},
{
"epoch": 1.4,
"learning_rate": 1.9302530186275107e-05,
"loss": 0.4331,
"step": 23000
},
{
"epoch": 1.43,
"learning_rate": 1.928736120381045e-05,
"loss": 0.4294,
"step": 23500
},
{
"epoch": 1.46,
"learning_rate": 1.9272192221345792e-05,
"loss": 0.4286,
"step": 24000
},
{
"epoch": 1.49,
"learning_rate": 1.9257023238881138e-05,
"loss": 0.4174,
"step": 24500
},
{
"epoch": 1.52,
"learning_rate": 1.924185425641648e-05,
"loss": 0.4158,
"step": 25000
},
{
"epoch": 1.55,
"learning_rate": 1.9226685273951823e-05,
"loss": 0.4142,
"step": 25500
},
{
"epoch": 1.58,
"learning_rate": 1.921151629148717e-05,
"loss": 0.4067,
"step": 26000
},
{
"epoch": 1.61,
"learning_rate": 1.919634730902251e-05,
"loss": 0.4071,
"step": 26500
},
{
"epoch": 1.64,
"learning_rate": 1.9181178326557857e-05,
"loss": 0.4024,
"step": 27000
},
{
"epoch": 1.67,
"learning_rate": 1.91660093440932e-05,
"loss": 0.3952,
"step": 27500
},
{
"epoch": 1.7,
"learning_rate": 1.9150840361628546e-05,
"loss": 0.3987,
"step": 28000
},
{
"epoch": 1.73,
"learning_rate": 1.9135671379163888e-05,
"loss": 0.3952,
"step": 28500
},
{
"epoch": 1.76,
"learning_rate": 1.912056307262909e-05,
"loss": 0.3901,
"step": 29000
},
{
"epoch": 1.79,
"learning_rate": 1.9105394090164433e-05,
"loss": 0.3846,
"step": 29500
},
{
"epoch": 1.82,
"learning_rate": 1.909022510769978e-05,
"loss": 0.384,
"step": 30000
},
{
"epoch": 1.85,
"learning_rate": 1.907505612523512e-05,
"loss": 0.3851,
"step": 30500
},
{
"epoch": 1.88,
"learning_rate": 1.9059887142770464e-05,
"loss": 0.3849,
"step": 31000
},
{
"epoch": 1.91,
"learning_rate": 1.904471816030581e-05,
"loss": 0.3804,
"step": 31500
},
{
"epoch": 1.94,
"learning_rate": 1.9029549177841152e-05,
"loss": 0.3719,
"step": 32000
},
{
"epoch": 1.97,
"learning_rate": 1.9014380195376495e-05,
"loss": 0.3739,
"step": 32500
},
{
"epoch": 2.0,
"eval_bleu": 74.3843,
"eval_gen_len": 13.9751,
"eval_loss": 0.3009156882762909,
"eval_runtime": 172.1504,
"eval_samples_per_second": 93.662,
"eval_steps_per_second": 2.928,
"step": 32962
},
{
"epoch": 2.0,
"learning_rate": 1.8999211212911837e-05,
"loss": 0.3771,
"step": 33000
},
{
"epoch": 2.03,
"learning_rate": 1.8984042230447183e-05,
"loss": 0.3596,
"step": 33500
},
{
"epoch": 2.06,
"learning_rate": 1.8968873247982526e-05,
"loss": 0.3572,
"step": 34000
},
{
"epoch": 2.09,
"learning_rate": 1.8953704265517872e-05,
"loss": 0.357,
"step": 34500
},
{
"epoch": 2.12,
"learning_rate": 1.8938595958983074e-05,
"loss": 0.3532,
"step": 35000
},
{
"epoch": 2.15,
"learning_rate": 1.8923426976518417e-05,
"loss": 0.3548,
"step": 35500
},
{
"epoch": 2.18,
"learning_rate": 1.890825799405376e-05,
"loss": 0.3508,
"step": 36000
},
{
"epoch": 2.21,
"learning_rate": 1.8893119349554033e-05,
"loss": 0.3457,
"step": 36500
},
{
"epoch": 2.25,
"learning_rate": 1.8877950367089376e-05,
"loss": 0.3467,
"step": 37000
},
{
"epoch": 2.28,
"learning_rate": 1.886278138462472e-05,
"loss": 0.3457,
"step": 37500
},
{
"epoch": 2.31,
"learning_rate": 1.8847612402160064e-05,
"loss": 0.3488,
"step": 38000
},
{
"epoch": 2.34,
"learning_rate": 1.8832443419695407e-05,
"loss": 0.3419,
"step": 38500
},
{
"epoch": 2.37,
"learning_rate": 1.8817274437230753e-05,
"loss": 0.344,
"step": 39000
},
{
"epoch": 2.4,
"learning_rate": 1.8802135792731023e-05,
"loss": 0.3382,
"step": 39500
},
{
"epoch": 2.43,
"learning_rate": 1.878696681026637e-05,
"loss": 0.3381,
"step": 40000
},
{
"epoch": 2.46,
"learning_rate": 1.8771797827801712e-05,
"loss": 0.3378,
"step": 40500
},
{
"epoch": 2.49,
"learning_rate": 1.8756628845337058e-05,
"loss": 0.337,
"step": 41000
},
{
"epoch": 2.52,
"learning_rate": 1.8741520538802257e-05,
"loss": 0.3354,
"step": 41500
},
{
"epoch": 2.55,
"learning_rate": 1.8726351556337603e-05,
"loss": 0.3339,
"step": 42000
},
{
"epoch": 2.58,
"learning_rate": 1.8711212911837874e-05,
"loss": 0.3324,
"step": 42500
},
{
"epoch": 2.61,
"learning_rate": 1.869604392937322e-05,
"loss": 0.3233,
"step": 43000
},
{
"epoch": 2.64,
"learning_rate": 1.8680874946908562e-05,
"loss": 0.3307,
"step": 43500
},
{
"epoch": 2.67,
"learning_rate": 1.8665736302408836e-05,
"loss": 0.3245,
"step": 44000
},
{
"epoch": 2.7,
"learning_rate": 1.865056731994418e-05,
"loss": 0.3249,
"step": 44500
},
{
"epoch": 2.73,
"learning_rate": 1.8635398337479525e-05,
"loss": 0.3181,
"step": 45000
},
{
"epoch": 2.76,
"learning_rate": 1.8620229355014867e-05,
"loss": 0.3238,
"step": 45500
},
{
"epoch": 2.79,
"learning_rate": 1.860506037255021e-05,
"loss": 0.3256,
"step": 46000
},
{
"epoch": 2.82,
"learning_rate": 1.8589891390085555e-05,
"loss": 0.3203,
"step": 46500
},
{
"epoch": 2.85,
"learning_rate": 1.8574722407620898e-05,
"loss": 0.3174,
"step": 47000
},
{
"epoch": 2.88,
"learning_rate": 1.8559553425156244e-05,
"loss": 0.3185,
"step": 47500
},
{
"epoch": 2.91,
"learning_rate": 1.8544384442691586e-05,
"loss": 0.3181,
"step": 48000
},
{
"epoch": 2.94,
"learning_rate": 1.852921546022693e-05,
"loss": 0.3172,
"step": 48500
},
{
"epoch": 2.97,
"learning_rate": 1.8514046477762275e-05,
"loss": 0.3179,
"step": 49000
},
{
"epoch": 3.0,
"eval_bleu": 76.4066,
"eval_gen_len": 13.9632,
"eval_loss": 0.26052698493003845,
"eval_runtime": 171.9366,
"eval_samples_per_second": 93.779,
"eval_steps_per_second": 2.931,
"step": 49443
},
{
"epoch": 3.0,
"learning_rate": 1.8498877495297617e-05,
"loss": 0.3118,
"step": 49500
},
{
"epoch": 3.03,
"learning_rate": 1.848370851283296e-05,
"loss": 0.2996,
"step": 50000
},
{
"epoch": 3.06,
"learning_rate": 1.8468539530368302e-05,
"loss": 0.3008,
"step": 50500
},
{
"epoch": 3.09,
"learning_rate": 1.8453370547903648e-05,
"loss": 0.3075,
"step": 51000
},
{
"epoch": 3.12,
"learning_rate": 1.843820156543899e-05,
"loss": 0.2995,
"step": 51500
},
{
"epoch": 3.16,
"learning_rate": 1.8423032582974336e-05,
"loss": 0.2985,
"step": 52000
},
{
"epoch": 3.19,
"learning_rate": 1.840786360050968e-05,
"loss": 0.2996,
"step": 52500
},
{
"epoch": 3.22,
"learning_rate": 1.8392694618045025e-05,
"loss": 0.2993,
"step": 53000
},
{
"epoch": 3.25,
"learning_rate": 1.8377525635580367e-05,
"loss": 0.3027,
"step": 53500
},
{
"epoch": 3.28,
"learning_rate": 1.8362356653115713e-05,
"loss": 0.2922,
"step": 54000
},
{
"epoch": 3.31,
"learning_rate": 1.8347187670651056e-05,
"loss": 0.2934,
"step": 54500
},
{
"epoch": 3.34,
"learning_rate": 1.8332018688186398e-05,
"loss": 0.297,
"step": 55000
},
{
"epoch": 3.37,
"learning_rate": 1.8316880043686672e-05,
"loss": 0.2923,
"step": 55500
},
{
"epoch": 3.4,
"learning_rate": 1.8301741399186943e-05,
"loss": 0.2942,
"step": 56000
},
{
"epoch": 3.43,
"learning_rate": 1.828657241672229e-05,
"loss": 0.2946,
"step": 56500
},
{
"epoch": 3.46,
"learning_rate": 1.827140343425763e-05,
"loss": 0.2859,
"step": 57000
},
{
"epoch": 3.49,
"learning_rate": 1.8256234451792977e-05,
"loss": 0.2873,
"step": 57500
},
{
"epoch": 3.52,
"learning_rate": 1.824106546932832e-05,
"loss": 0.2934,
"step": 58000
},
{
"epoch": 3.55,
"learning_rate": 1.8225896486863662e-05,
"loss": 0.2912,
"step": 58500
},
{
"epoch": 3.58,
"learning_rate": 1.8210727504399005e-05,
"loss": 0.2886,
"step": 59000
},
{
"epoch": 3.61,
"learning_rate": 1.819555852193435e-05,
"loss": 0.2859,
"step": 59500
},
{
"epoch": 3.64,
"learning_rate": 1.8180389539469693e-05,
"loss": 0.2891,
"step": 60000
},
{
"epoch": 3.67,
"learning_rate": 1.8165220557005036e-05,
"loss": 0.2834,
"step": 60500
},
{
"epoch": 3.7,
"learning_rate": 1.815005157454038e-05,
"loss": 0.2842,
"step": 61000
},
{
"epoch": 3.73,
"learning_rate": 1.8134882592075724e-05,
"loss": 0.2812,
"step": 61500
},
{
"epoch": 3.76,
"learning_rate": 1.8119743947575998e-05,
"loss": 0.2834,
"step": 62000
},
{
"epoch": 3.79,
"learning_rate": 1.810457496511134e-05,
"loss": 0.286,
"step": 62500
},
{
"epoch": 3.82,
"learning_rate": 1.8089405982646687e-05,
"loss": 0.2779,
"step": 63000
},
{
"epoch": 3.85,
"learning_rate": 1.807423700018203e-05,
"loss": 0.2847,
"step": 63500
},
{
"epoch": 3.88,
"learning_rate": 1.8059068017717375e-05,
"loss": 0.2834,
"step": 64000
},
{
"epoch": 3.91,
"learning_rate": 1.8043929373217646e-05,
"loss": 0.2792,
"step": 64500
},
{
"epoch": 3.94,
"learning_rate": 1.802879072871792e-05,
"loss": 0.2834,
"step": 65000
},
{
"epoch": 3.97,
"learning_rate": 1.8013621746253263e-05,
"loss": 0.2795,
"step": 65500
},
{
"epoch": 4.0,
"eval_bleu": 77.5034,
"eval_gen_len": 14.0025,
"eval_loss": 0.23620256781578064,
"eval_runtime": 168.2,
"eval_samples_per_second": 95.862,
"eval_steps_per_second": 2.996,
"step": 65924
},
{
"epoch": 4.0,
"learning_rate": 1.799845276378861e-05,
"loss": 0.2778,
"step": 66000
},
{
"epoch": 4.03,
"learning_rate": 1.798328378132395e-05,
"loss": 0.2714,
"step": 66500
},
{
"epoch": 4.07,
"learning_rate": 1.7968145136824225e-05,
"loss": 0.2727,
"step": 67000
},
{
"epoch": 4.1,
"learning_rate": 1.7952976154359568e-05,
"loss": 0.2677,
"step": 67500
},
{
"epoch": 4.13,
"learning_rate": 1.793780717189491e-05,
"loss": 0.2651,
"step": 68000
},
{
"epoch": 4.16,
"learning_rate": 1.7922638189430256e-05,
"loss": 0.2666,
"step": 68500
},
{
"epoch": 4.19,
"learning_rate": 1.79074692069656e-05,
"loss": 0.2655,
"step": 69000
},
{
"epoch": 4.22,
"learning_rate": 1.789230022450094e-05,
"loss": 0.2649,
"step": 69500
},
{
"epoch": 4.25,
"learning_rate": 1.7877131242036287e-05,
"loss": 0.2613,
"step": 70000
},
{
"epoch": 4.28,
"learning_rate": 1.786196225957163e-05,
"loss": 0.2708,
"step": 70500
},
{
"epoch": 4.31,
"learning_rate": 1.7846823615071904e-05,
"loss": 0.266,
"step": 71000
},
{
"epoch": 4.34,
"learning_rate": 1.7831654632607246e-05,
"loss": 0.2631,
"step": 71500
},
{
"epoch": 4.37,
"learning_rate": 1.781648565014259e-05,
"loss": 0.262,
"step": 72000
},
{
"epoch": 4.4,
"learning_rate": 1.780131666767793e-05,
"loss": 0.2614,
"step": 72500
},
{
"epoch": 4.43,
"learning_rate": 1.7786147685213277e-05,
"loss": 0.2607,
"step": 73000
},
{
"epoch": 4.46,
"learning_rate": 1.7771009040713548e-05,
"loss": 0.2646,
"step": 73500
},
{
"epoch": 4.49,
"learning_rate": 1.7755840058248894e-05,
"loss": 0.2619,
"step": 74000
},
{
"epoch": 4.52,
"learning_rate": 1.7740731751714096e-05,
"loss": 0.2572,
"step": 74500
},
{
"epoch": 4.55,
"learning_rate": 1.772559310721437e-05,
"loss": 0.2618,
"step": 75000
},
{
"epoch": 4.58,
"learning_rate": 1.7710424124749713e-05,
"loss": 0.2596,
"step": 75500
},
{
"epoch": 4.61,
"learning_rate": 1.769525514228506e-05,
"loss": 0.2612,
"step": 76000
},
{
"epoch": 4.64,
"learning_rate": 1.76800861598204e-05,
"loss": 0.2555,
"step": 76500
},
{
"epoch": 4.67,
"learning_rate": 1.7664917177355744e-05,
"loss": 0.2597,
"step": 77000
},
{
"epoch": 4.7,
"learning_rate": 1.7649748194891086e-05,
"loss": 0.2563,
"step": 77500
},
{
"epoch": 4.73,
"learning_rate": 1.7634579212426432e-05,
"loss": 0.2587,
"step": 78000
},
{
"epoch": 4.76,
"learning_rate": 1.7619440567926703e-05,
"loss": 0.2564,
"step": 78500
},
{
"epoch": 4.79,
"learning_rate": 1.760427158546205e-05,
"loss": 0.2589,
"step": 79000
},
{
"epoch": 4.82,
"learning_rate": 1.758910260299739e-05,
"loss": 0.2564,
"step": 79500
},
{
"epoch": 4.85,
"learning_rate": 1.7573933620532737e-05,
"loss": 0.2586,
"step": 80000
},
{
"epoch": 4.88,
"learning_rate": 1.755876463806808e-05,
"loss": 0.2519,
"step": 80500
},
{
"epoch": 4.91,
"learning_rate": 1.7543595655603422e-05,
"loss": 0.2539,
"step": 81000
},
{
"epoch": 4.95,
"learning_rate": 1.7528426673138768e-05,
"loss": 0.2547,
"step": 81500
},
{
"epoch": 4.98,
"learning_rate": 1.751325769067411e-05,
"loss": 0.2543,
"step": 82000
},
{
"epoch": 5.0,
"eval_bleu": 78.2577,
"eval_gen_len": 14.0135,
"eval_loss": 0.21999071538448334,
"eval_runtime": 172.5496,
"eval_samples_per_second": 93.446,
"eval_steps_per_second": 2.921,
"step": 82405
},
{
"epoch": 5.01,
"learning_rate": 1.7498088708209456e-05,
"loss": 0.2506,
"step": 82500
},
{
"epoch": 5.04,
"learning_rate": 1.74829197257448e-05,
"loss": 0.2437,
"step": 83000
},
{
"epoch": 5.07,
"learning_rate": 1.746775074328014e-05,
"loss": 0.2438,
"step": 83500
},
{
"epoch": 5.1,
"learning_rate": 1.7452581760815487e-05,
"loss": 0.2487,
"step": 84000
},
{
"epoch": 5.13,
"learning_rate": 1.743741277835083e-05,
"loss": 0.2456,
"step": 84500
},
{
"epoch": 5.16,
"learning_rate": 1.7422274133851104e-05,
"loss": 0.2423,
"step": 85000
},
{
"epoch": 5.19,
"learning_rate": 1.7407105151386446e-05,
"loss": 0.2457,
"step": 85500
},
{
"epoch": 5.22,
"learning_rate": 1.739193616892179e-05,
"loss": 0.2449,
"step": 86000
},
{
"epoch": 5.25,
"learning_rate": 1.737676718645713e-05,
"loss": 0.2429,
"step": 86500
},
{
"epoch": 5.28,
"learning_rate": 1.7361598203992477e-05,
"loss": 0.2453,
"step": 87000
},
{
"epoch": 5.31,
"learning_rate": 1.734642922152782e-05,
"loss": 0.2453,
"step": 87500
},
{
"epoch": 5.34,
"learning_rate": 1.7331290577028094e-05,
"loss": 0.247,
"step": 88000
},
{
"epoch": 5.37,
"learning_rate": 1.7316121594563436e-05,
"loss": 0.2429,
"step": 88500
},
{
"epoch": 5.4,
"learning_rate": 1.7300952612098782e-05,
"loss": 0.2361,
"step": 89000
},
{
"epoch": 5.43,
"learning_rate": 1.7285783629634125e-05,
"loss": 0.2405,
"step": 89500
},
{
"epoch": 5.46,
"learning_rate": 1.727061464716947e-05,
"loss": 0.2454,
"step": 90000
},
{
"epoch": 5.49,
"learning_rate": 1.7255445664704813e-05,
"loss": 0.2409,
"step": 90500
},
{
"epoch": 5.52,
"learning_rate": 1.7240276682240156e-05,
"loss": 0.2417,
"step": 91000
},
{
"epoch": 5.55,
"learning_rate": 1.72251076997755e-05,
"loss": 0.24,
"step": 91500
},
{
"epoch": 5.58,
"learning_rate": 1.7209969055275772e-05,
"loss": 0.2356,
"step": 92000
},
{
"epoch": 5.61,
"learning_rate": 1.7194800072811118e-05,
"loss": 0.2387,
"step": 92500
},
{
"epoch": 5.64,
"learning_rate": 1.717963109034646e-05,
"loss": 0.2354,
"step": 93000
},
{
"epoch": 5.67,
"learning_rate": 1.7164462107881807e-05,
"loss": 0.2392,
"step": 93500
},
{
"epoch": 5.7,
"learning_rate": 1.714929312541715e-05,
"loss": 0.2404,
"step": 94000
},
{
"epoch": 5.73,
"learning_rate": 1.7134154480917423e-05,
"loss": 0.2354,
"step": 94500
},
{
"epoch": 5.76,
"learning_rate": 1.7118985498452766e-05,
"loss": 0.2372,
"step": 95000
},
{
"epoch": 5.79,
"learning_rate": 1.710381651598811e-05,
"loss": 0.2343,
"step": 95500
},
{
"epoch": 5.82,
"learning_rate": 1.7088647533523454e-05,
"loss": 0.2383,
"step": 96000
},
{
"epoch": 5.86,
"learning_rate": 1.7073508889023725e-05,
"loss": 0.2363,
"step": 96500
},
{
"epoch": 5.89,
"learning_rate": 1.7058370244524e-05,
"loss": 0.2352,
"step": 97000
},
{
"epoch": 5.92,
"learning_rate": 1.7043201262059345e-05,
"loss": 0.2367,
"step": 97500
},
{
"epoch": 5.95,
"learning_rate": 1.7028032279594688e-05,
"loss": 0.2327,
"step": 98000
},
{
"epoch": 5.98,
"learning_rate": 1.7012893635094962e-05,
"loss": 0.2341,
"step": 98500
},
{
"epoch": 6.0,
"eval_bleu": 78.7501,
"eval_gen_len": 14.013,
"eval_loss": 0.2089208960533142,
"eval_runtime": 172.3801,
"eval_samples_per_second": 93.537,
"eval_steps_per_second": 2.924,
"step": 98886
},
{
"epoch": 6.01,
"learning_rate": 1.6997724652630304e-05,
"loss": 0.2272,
"step": 99000
},
{
"epoch": 6.04,
"learning_rate": 1.6982555670165647e-05,
"loss": 0.2315,
"step": 99500
},
{
"epoch": 6.07,
"learning_rate": 1.696738668770099e-05,
"loss": 0.2238,
"step": 100000
},
{
"epoch": 6.1,
"learning_rate": 1.6952217705236335e-05,
"loss": 0.2268,
"step": 100500
},
{
"epoch": 6.13,
"learning_rate": 1.6937048722771678e-05,
"loss": 0.2264,
"step": 101000
},
{
"epoch": 6.16,
"learning_rate": 1.692187974030702e-05,
"loss": 0.2255,
"step": 101500
},
{
"epoch": 6.19,
"learning_rate": 1.6906710757842366e-05,
"loss": 0.2262,
"step": 102000
},
{
"epoch": 6.22,
"learning_rate": 1.689154177537771e-05,
"loss": 0.2279,
"step": 102500
},
{
"epoch": 6.25,
"learning_rate": 1.6876372792913054e-05,
"loss": 0.2279,
"step": 103000
},
{
"epoch": 6.28,
"learning_rate": 1.6861203810448397e-05,
"loss": 0.2272,
"step": 103500
},
{
"epoch": 6.31,
"learning_rate": 1.684603482798374e-05,
"loss": 0.2235,
"step": 104000
},
{
"epoch": 6.34,
"learning_rate": 1.6830896183484014e-05,
"loss": 0.227,
"step": 104500
},
{
"epoch": 6.37,
"learning_rate": 1.6815727201019356e-05,
"loss": 0.2232,
"step": 105000
},
{
"epoch": 6.4,
"learning_rate": 1.6800558218554702e-05,
"loss": 0.2255,
"step": 105500
},
{
"epoch": 6.43,
"learning_rate": 1.6785389236090044e-05,
"loss": 0.2255,
"step": 106000
},
{
"epoch": 6.46,
"learning_rate": 1.6770280929555247e-05,
"loss": 0.2282,
"step": 106500
},
{
"epoch": 6.49,
"learning_rate": 1.675511194709059e-05,
"loss": 0.2219,
"step": 107000
},
{
"epoch": 6.52,
"learning_rate": 1.6739942964625935e-05,
"loss": 0.2246,
"step": 107500
},
{
"epoch": 6.55,
"learning_rate": 1.6724773982161278e-05,
"loss": 0.2238,
"step": 108000
},
{
"epoch": 6.58,
"learning_rate": 1.6709604999696624e-05,
"loss": 0.2246,
"step": 108500
},
{
"epoch": 6.61,
"learning_rate": 1.6694466355196895e-05,
"loss": 0.2237,
"step": 109000
},
{
"epoch": 6.64,
"learning_rate": 1.667929737273224e-05,
"loss": 0.2241,
"step": 109500
},
{
"epoch": 6.67,
"learning_rate": 1.666415872823251e-05,
"loss": 0.2259,
"step": 110000
},
{
"epoch": 6.7,
"learning_rate": 1.6648989745767857e-05,
"loss": 0.2225,
"step": 110500
},
{
"epoch": 6.74,
"learning_rate": 1.66338207633032e-05,
"loss": 0.2229,
"step": 111000
},
{
"epoch": 6.77,
"learning_rate": 1.6618651780838542e-05,
"loss": 0.2234,
"step": 111500
},
{
"epoch": 6.8,
"learning_rate": 1.6603482798373888e-05,
"loss": 0.2211,
"step": 112000
},
{
"epoch": 6.83,
"learning_rate": 1.658831381590923e-05,
"loss": 0.2203,
"step": 112500
},
{
"epoch": 6.86,
"learning_rate": 1.6573144833444573e-05,
"loss": 0.2195,
"step": 113000
},
{
"epoch": 6.89,
"learning_rate": 1.6557975850979915e-05,
"loss": 0.2219,
"step": 113500
},
{
"epoch": 6.92,
"learning_rate": 1.654280686851526e-05,
"loss": 0.22,
"step": 114000
},
{
"epoch": 6.95,
"learning_rate": 1.6527668224015536e-05,
"loss": 0.2217,
"step": 114500
},
{
"epoch": 6.98,
"learning_rate": 1.6512499241550878e-05,
"loss": 0.2182,
"step": 115000
},
{
"epoch": 7.0,
"eval_bleu": 79.0915,
"eval_gen_len": 14.0505,
"eval_loss": 0.1989297866821289,
"eval_runtime": 170.8481,
"eval_samples_per_second": 94.376,
"eval_steps_per_second": 2.95,
"step": 115367
},
{
"epoch": 7.01,
"learning_rate": 1.649733025908622e-05,
"loss": 0.2219,
"step": 115500
},
{
"epoch": 7.04,
"learning_rate": 1.6482161276621566e-05,
"loss": 0.2165,
"step": 116000
},
{
"epoch": 7.07,
"learning_rate": 1.6467022632121837e-05,
"loss": 0.214,
"step": 116500
},
{
"epoch": 7.1,
"learning_rate": 1.645185364965718e-05,
"loss": 0.2143,
"step": 117000
},
{
"epoch": 7.13,
"learning_rate": 1.6436684667192526e-05,
"loss": 0.2193,
"step": 117500
},
{
"epoch": 7.16,
"learning_rate": 1.6421515684727868e-05,
"loss": 0.2139,
"step": 118000
},
{
"epoch": 7.19,
"learning_rate": 1.6406346702263214e-05,
"loss": 0.2176,
"step": 118500
},
{
"epoch": 7.22,
"learning_rate": 1.6391177719798556e-05,
"loss": 0.2162,
"step": 119000
},
{
"epoch": 7.25,
"learning_rate": 1.6376008737333902e-05,
"loss": 0.2114,
"step": 119500
},
{
"epoch": 7.28,
"learning_rate": 1.6360839754869245e-05,
"loss": 0.2172,
"step": 120000
},
{
"epoch": 7.31,
"learning_rate": 1.634567077240459e-05,
"loss": 0.2127,
"step": 120500
},
{
"epoch": 7.34,
"learning_rate": 1.6330501789939933e-05,
"loss": 0.2134,
"step": 121000
},
{
"epoch": 7.37,
"learning_rate": 1.6315332807475276e-05,
"loss": 0.2131,
"step": 121500
},
{
"epoch": 7.4,
"learning_rate": 1.6300163825010618e-05,
"loss": 0.2072,
"step": 122000
},
{
"epoch": 7.43,
"learning_rate": 1.6284994842545964e-05,
"loss": 0.2125,
"step": 122500
},
{
"epoch": 7.46,
"learning_rate": 1.6269856198046238e-05,
"loss": 0.2117,
"step": 123000
},
{
"epoch": 7.49,
"learning_rate": 1.625468721558158e-05,
"loss": 0.21,
"step": 123500
},
{
"epoch": 7.52,
"learning_rate": 1.6239518233116923e-05,
"loss": 0.2144,
"step": 124000
},
{
"epoch": 7.55,
"learning_rate": 1.6224379588617197e-05,
"loss": 0.2103,
"step": 124500
},
{
"epoch": 7.58,
"learning_rate": 1.620921060615254e-05,
"loss": 0.2081,
"step": 125000
},
{
"epoch": 7.61,
"learning_rate": 1.6194071961652814e-05,
"loss": 0.2102,
"step": 125500
},
{
"epoch": 7.65,
"learning_rate": 1.6178902979188157e-05,
"loss": 0.2059,
"step": 126000
},
{
"epoch": 7.68,
"learning_rate": 1.616376433468843e-05,
"loss": 0.2104,
"step": 126500
},
{
"epoch": 7.71,
"learning_rate": 1.6148595352223773e-05,
"loss": 0.2104,
"step": 127000
},
{
"epoch": 7.74,
"learning_rate": 1.613342636975912e-05,
"loss": 0.2068,
"step": 127500
},
{
"epoch": 7.77,
"learning_rate": 1.6118257387294462e-05,
"loss": 0.2107,
"step": 128000
},
{
"epoch": 7.8,
"learning_rate": 1.6103088404829804e-05,
"loss": 0.2086,
"step": 128500
},
{
"epoch": 7.83,
"learning_rate": 1.6087919422365147e-05,
"loss": 0.2097,
"step": 129000
},
{
"epoch": 7.86,
"learning_rate": 1.6072811115830353e-05,
"loss": 0.2097,
"step": 129500
},
{
"epoch": 7.89,
"learning_rate": 1.6057642133365695e-05,
"loss": 0.2083,
"step": 130000
},
{
"epoch": 7.92,
"learning_rate": 1.6042473150901038e-05,
"loss": 0.2117,
"step": 130500
},
{
"epoch": 7.95,
"learning_rate": 1.602730416843638e-05,
"loss": 0.2084,
"step": 131000
},
{
"epoch": 7.98,
"learning_rate": 1.6012135185971726e-05,
"loss": 0.2083,
"step": 131500
},
{
"epoch": 8.0,
"eval_bleu": 79.6569,
"eval_gen_len": 14.0215,
"eval_loss": 0.19143153727054596,
"eval_runtime": 170.1846,
"eval_samples_per_second": 94.744,
"eval_steps_per_second": 2.961,
"step": 131848
},
{
"epoch": 8.01,
"learning_rate": 1.599696620350707e-05,
"loss": 0.2077,
"step": 132000
},
{
"epoch": 8.04,
"learning_rate": 1.5981797221042414e-05,
"loss": 0.2018,
"step": 132500
},
{
"epoch": 8.07,
"learning_rate": 1.5966628238577757e-05,
"loss": 0.204,
"step": 133000
},
{
"epoch": 8.1,
"learning_rate": 1.5951459256113103e-05,
"loss": 0.2021,
"step": 133500
},
{
"epoch": 8.13,
"learning_rate": 1.5936290273648445e-05,
"loss": 0.2026,
"step": 134000
},
{
"epoch": 8.16,
"learning_rate": 1.592112129118379e-05,
"loss": 0.2012,
"step": 134500
},
{
"epoch": 8.19,
"learning_rate": 1.5905952308719134e-05,
"loss": 0.2002,
"step": 135000
},
{
"epoch": 8.22,
"learning_rate": 1.5890783326254476e-05,
"loss": 0.1998,
"step": 135500
},
{
"epoch": 8.25,
"learning_rate": 1.5875614343789822e-05,
"loss": 0.2048,
"step": 136000
},
{
"epoch": 8.28,
"learning_rate": 1.5860445361325164e-05,
"loss": 0.2008,
"step": 136500
},
{
"epoch": 8.31,
"learning_rate": 1.5845276378860507e-05,
"loss": 0.2055,
"step": 137000
},
{
"epoch": 8.34,
"learning_rate": 1.583010739639585e-05,
"loss": 0.2053,
"step": 137500
},
{
"epoch": 8.37,
"learning_rate": 1.5814938413931195e-05,
"loss": 0.2024,
"step": 138000
},
{
"epoch": 8.4,
"learning_rate": 1.5799769431466538e-05,
"loss": 0.2022,
"step": 138500
},
{
"epoch": 8.43,
"learning_rate": 1.578460044900188e-05,
"loss": 0.2003,
"step": 139000
},
{
"epoch": 8.46,
"learning_rate": 1.5769431466537226e-05,
"loss": 0.2016,
"step": 139500
},
{
"epoch": 8.49,
"learning_rate": 1.575426248407257e-05,
"loss": 0.2011,
"step": 140000
},
{
"epoch": 8.52,
"learning_rate": 1.5739123839572843e-05,
"loss": 0.2025,
"step": 140500
},
{
"epoch": 8.56,
"learning_rate": 1.5723954857108185e-05,
"loss": 0.1995,
"step": 141000
},
{
"epoch": 8.59,
"learning_rate": 1.570878587464353e-05,
"loss": 0.2035,
"step": 141500
},
{
"epoch": 8.62,
"learning_rate": 1.5693616892178874e-05,
"loss": 0.2002,
"step": 142000
},
{
"epoch": 8.65,
"learning_rate": 1.5678478247679148e-05,
"loss": 0.2016,
"step": 142500
},
{
"epoch": 8.68,
"learning_rate": 1.566330926521449e-05,
"loss": 0.1985,
"step": 143000
},
{
"epoch": 8.71,
"learning_rate": 1.5648170620714765e-05,
"loss": 0.2004,
"step": 143500
},
{
"epoch": 8.74,
"learning_rate": 1.5633001638250107e-05,
"loss": 0.1992,
"step": 144000
},
{
"epoch": 8.77,
"learning_rate": 1.561786299375038e-05,
"loss": 0.2037,
"step": 144500
},
{
"epoch": 8.8,
"learning_rate": 1.5602694011285724e-05,
"loss": 0.203,
"step": 145000
},
{
"epoch": 8.83,
"learning_rate": 1.558752502882107e-05,
"loss": 0.202,
"step": 145500
},
{
"epoch": 8.86,
"learning_rate": 1.5572356046356412e-05,
"loss": 0.198,
"step": 146000
},
{
"epoch": 8.89,
"learning_rate": 1.5557187063891755e-05,
"loss": 0.1992,
"step": 146500
},
{
"epoch": 8.92,
"learning_rate": 1.55420180814271e-05,
"loss": 0.1988,
"step": 147000
},
{
"epoch": 8.95,
"learning_rate": 1.5526849098962443e-05,
"loss": 0.1992,
"step": 147500
},
{
"epoch": 8.98,
"learning_rate": 1.5511680116497786e-05,
"loss": 0.1986,
"step": 148000
},
{
"epoch": 9.0,
"eval_bleu": 79.9315,
"eval_gen_len": 14.0219,
"eval_loss": 0.18681135773658752,
"eval_runtime": 171.5642,
"eval_samples_per_second": 93.982,
"eval_steps_per_second": 2.938,
"step": 148329
},
{
"epoch": 9.01,
"learning_rate": 1.549651113403313e-05,
"loss": 0.1942,
"step": 148500
},
{
"epoch": 9.04,
"learning_rate": 1.5481342151568474e-05,
"loss": 0.195,
"step": 149000
},
{
"epoch": 9.07,
"learning_rate": 1.5466173169103816e-05,
"loss": 0.1933,
"step": 149500
},
{
"epoch": 9.1,
"learning_rate": 1.5451004186639162e-05,
"loss": 0.1949,
"step": 150000
},
{
"epoch": 9.13,
"learning_rate": 1.5435865542139433e-05,
"loss": 0.1963,
"step": 150500
},
{
"epoch": 9.16,
"learning_rate": 1.542069655967478e-05,
"loss": 0.1926,
"step": 151000
},
{
"epoch": 9.19,
"learning_rate": 1.540552757721012e-05,
"loss": 0.1897,
"step": 151500
},
{
"epoch": 9.22,
"learning_rate": 1.5390358594745464e-05,
"loss": 0.1931,
"step": 152000
},
{
"epoch": 9.25,
"learning_rate": 1.537518961228081e-05,
"loss": 0.1907,
"step": 152500
},
{
"epoch": 9.28,
"learning_rate": 1.5360020629816152e-05,
"loss": 0.1919,
"step": 153000
},
{
"epoch": 9.31,
"learning_rate": 1.5344851647351498e-05,
"loss": 0.1925,
"step": 153500
},
{
"epoch": 9.34,
"learning_rate": 1.532968266488684e-05,
"loss": 0.1918,
"step": 154000
},
{
"epoch": 9.37,
"learning_rate": 1.5314513682422187e-05,
"loss": 0.1933,
"step": 154500
},
{
"epoch": 9.4,
"learning_rate": 1.5299375037922457e-05,
"loss": 0.1944,
"step": 155000
},
{
"epoch": 9.44,
"learning_rate": 1.5284206055457803e-05,
"loss": 0.1929,
"step": 155500
},
{
"epoch": 9.47,
"learning_rate": 1.5269037072993146e-05,
"loss": 0.1932,
"step": 156000
},
{
"epoch": 9.5,
"learning_rate": 1.5253868090528488e-05,
"loss": 0.1929,
"step": 156500
},
{
"epoch": 9.53,
"learning_rate": 1.523872944602876e-05,
"loss": 0.1916,
"step": 157000
},
{
"epoch": 9.56,
"learning_rate": 1.5223560463564105e-05,
"loss": 0.1938,
"step": 157500
},
{
"epoch": 9.59,
"learning_rate": 1.5208391481099449e-05,
"loss": 0.1891,
"step": 158000
},
{
"epoch": 9.62,
"learning_rate": 1.5193222498634792e-05,
"loss": 0.1875,
"step": 158500
},
{
"epoch": 9.65,
"learning_rate": 1.5178083854135066e-05,
"loss": 0.192,
"step": 159000
},
{
"epoch": 9.68,
"learning_rate": 1.5162945209635338e-05,
"loss": 0.1919,
"step": 159500
},
{
"epoch": 9.71,
"learning_rate": 1.5147776227170683e-05,
"loss": 0.1924,
"step": 160000
},
{
"epoch": 9.74,
"learning_rate": 1.5132607244706025e-05,
"loss": 0.1947,
"step": 160500
},
{
"epoch": 9.77,
"learning_rate": 1.5117438262241371e-05,
"loss": 0.1895,
"step": 161000
},
{
"epoch": 9.8,
"learning_rate": 1.5102269279776713e-05,
"loss": 0.1886,
"step": 161500
},
{
"epoch": 9.83,
"learning_rate": 1.5087100297312058e-05,
"loss": 0.1948,
"step": 162000
},
{
"epoch": 9.86,
"learning_rate": 1.507196165281233e-05,
"loss": 0.1947,
"step": 162500
},
{
"epoch": 9.89,
"learning_rate": 1.5056792670347676e-05,
"loss": 0.1904,
"step": 163000
},
{
"epoch": 9.92,
"learning_rate": 1.5041623687883018e-05,
"loss": 0.1895,
"step": 163500
},
{
"epoch": 9.95,
"learning_rate": 1.5026454705418361e-05,
"loss": 0.1907,
"step": 164000
},
{
"epoch": 9.98,
"learning_rate": 1.5011285722953705e-05,
"loss": 0.1956,
"step": 164500
},
{
"epoch": 10.0,
"eval_bleu": 80.0539,
"eval_gen_len": 14.0305,
"eval_loss": 0.18285295367240906,
"eval_runtime": 168.5447,
"eval_samples_per_second": 95.666,
"eval_steps_per_second": 2.99,
"step": 164810
},
{
"epoch": 10.01,
"learning_rate": 1.4996116740489048e-05,
"loss": 0.1904,
"step": 165000
},
{
"epoch": 10.04,
"learning_rate": 1.4980947758024394e-05,
"loss": 0.185,
"step": 165500
},
{
"epoch": 10.07,
"learning_rate": 1.4965778775559736e-05,
"loss": 0.1841,
"step": 166000
},
{
"epoch": 10.1,
"learning_rate": 1.4950609793095082e-05,
"loss": 0.1853,
"step": 166500
},
{
"epoch": 10.13,
"learning_rate": 1.4935440810630424e-05,
"loss": 0.1851,
"step": 167000
},
{
"epoch": 10.16,
"learning_rate": 1.4920302166130699e-05,
"loss": 0.1842,
"step": 167500
},
{
"epoch": 10.19,
"learning_rate": 1.4905133183666041e-05,
"loss": 0.1846,
"step": 168000
},
{
"epoch": 10.22,
"learning_rate": 1.4889964201201385e-05,
"loss": 0.1852,
"step": 168500
},
{
"epoch": 10.25,
"learning_rate": 1.4874795218736728e-05,
"loss": 0.1871,
"step": 169000
},
{
"epoch": 10.28,
"learning_rate": 1.4859656574237002e-05,
"loss": 0.1875,
"step": 169500
},
{
"epoch": 10.31,
"learning_rate": 1.4844487591772344e-05,
"loss": 0.1857,
"step": 170000
},
{
"epoch": 10.35,
"learning_rate": 1.4829318609307689e-05,
"loss": 0.1856,
"step": 170500
},
{
"epoch": 10.38,
"learning_rate": 1.4814149626843033e-05,
"loss": 0.1867,
"step": 171000
},
{
"epoch": 10.41,
"learning_rate": 1.4799010982343305e-05,
"loss": 0.1858,
"step": 171500
},
{
"epoch": 10.44,
"learning_rate": 1.478384199987865e-05,
"loss": 0.1872,
"step": 172000
},
{
"epoch": 10.47,
"learning_rate": 1.4768673017413992e-05,
"loss": 0.1825,
"step": 172500
},
{
"epoch": 10.5,
"learning_rate": 1.4753504034949338e-05,
"loss": 0.1839,
"step": 173000
},
{
"epoch": 10.53,
"learning_rate": 1.473833505248468e-05,
"loss": 0.1862,
"step": 173500
},
{
"epoch": 10.56,
"learning_rate": 1.4723166070020025e-05,
"loss": 0.186,
"step": 174000
},
{
"epoch": 10.59,
"learning_rate": 1.4707997087555369e-05,
"loss": 0.1856,
"step": 174500
},
{
"epoch": 10.62,
"learning_rate": 1.4692828105090711e-05,
"loss": 0.1851,
"step": 175000
},
{
"epoch": 10.65,
"learning_rate": 1.4677659122626055e-05,
"loss": 0.1869,
"step": 175500
},
{
"epoch": 10.68,
"learning_rate": 1.4662520478126328e-05,
"loss": 0.1843,
"step": 176000
},
{
"epoch": 10.71,
"learning_rate": 1.4647351495661672e-05,
"loss": 0.1811,
"step": 176500
},
{
"epoch": 10.74,
"learning_rate": 1.4632182513197015e-05,
"loss": 0.1865,
"step": 177000
},
{
"epoch": 10.77,
"learning_rate": 1.461701353073236e-05,
"loss": 0.1843,
"step": 177500
},
{
"epoch": 10.8,
"learning_rate": 1.4601844548267703e-05,
"loss": 0.1836,
"step": 178000
},
{
"epoch": 10.83,
"learning_rate": 1.4586736241732906e-05,
"loss": 0.1832,
"step": 178500
},
{
"epoch": 10.86,
"learning_rate": 1.457156725926825e-05,
"loss": 0.182,
"step": 179000
},
{
"epoch": 10.89,
"learning_rate": 1.4556398276803594e-05,
"loss": 0.1841,
"step": 179500
},
{
"epoch": 10.92,
"learning_rate": 1.4541229294338936e-05,
"loss": 0.1866,
"step": 180000
},
{
"epoch": 10.95,
"learning_rate": 1.4526060311874282e-05,
"loss": 0.1851,
"step": 180500
},
{
"epoch": 10.98,
"learning_rate": 1.4510891329409625e-05,
"loss": 0.1826,
"step": 181000
},
{
"epoch": 11.0,
"eval_bleu": 80.3158,
"eval_gen_len": 14.0452,
"eval_loss": 0.17846250534057617,
"eval_runtime": 168.9854,
"eval_samples_per_second": 95.417,
"eval_steps_per_second": 2.983,
"step": 181291
},
{
"epoch": 11.01,
"learning_rate": 1.4495722346944967e-05,
"loss": 0.1829,
"step": 181500
},
{
"epoch": 11.04,
"learning_rate": 1.4480553364480311e-05,
"loss": 0.1748,
"step": 182000
},
{
"epoch": 11.07,
"learning_rate": 1.4465414719980584e-05,
"loss": 0.1809,
"step": 182500
},
{
"epoch": 11.1,
"learning_rate": 1.4450245737515928e-05,
"loss": 0.1768,
"step": 183000
},
{
"epoch": 11.13,
"learning_rate": 1.4435076755051272e-05,
"loss": 0.1795,
"step": 183500
},
{
"epoch": 11.16,
"learning_rate": 1.4419907772586617e-05,
"loss": 0.1767,
"step": 184000
},
{
"epoch": 11.19,
"learning_rate": 1.4404738790121959e-05,
"loss": 0.1777,
"step": 184500
},
{
"epoch": 11.23,
"learning_rate": 1.4389569807657305e-05,
"loss": 0.1764,
"step": 185000
},
{
"epoch": 11.26,
"learning_rate": 1.4374431163157576e-05,
"loss": 0.1796,
"step": 185500
},
{
"epoch": 11.29,
"learning_rate": 1.4359262180692922e-05,
"loss": 0.1804,
"step": 186000
},
{
"epoch": 11.32,
"learning_rate": 1.4344093198228264e-05,
"loss": 0.1801,
"step": 186500
},
{
"epoch": 11.35,
"learning_rate": 1.4328924215763608e-05,
"loss": 0.1775,
"step": 187000
},
{
"epoch": 11.38,
"learning_rate": 1.431375523329895e-05,
"loss": 0.1775,
"step": 187500
},
{
"epoch": 11.41,
"learning_rate": 1.4298616588799223e-05,
"loss": 0.1789,
"step": 188000
},
{
"epoch": 11.44,
"learning_rate": 1.4283447606334569e-05,
"loss": 0.1802,
"step": 188500
},
{
"epoch": 11.47,
"learning_rate": 1.4268278623869912e-05,
"loss": 0.1791,
"step": 189000
},
{
"epoch": 11.5,
"learning_rate": 1.4253109641405256e-05,
"loss": 0.1786,
"step": 189500
},
{
"epoch": 11.53,
"learning_rate": 1.4237970996905528e-05,
"loss": 0.1833,
"step": 190000
},
{
"epoch": 11.56,
"learning_rate": 1.4222832352405803e-05,
"loss": 0.1777,
"step": 190500
},
{
"epoch": 11.59,
"learning_rate": 1.4207693707906073e-05,
"loss": 0.1786,
"step": 191000
},
{
"epoch": 11.62,
"learning_rate": 1.419252472544142e-05,
"loss": 0.1769,
"step": 191500
},
{
"epoch": 11.65,
"learning_rate": 1.4177355742976762e-05,
"loss": 0.1775,
"step": 192000
},
{
"epoch": 11.68,
"learning_rate": 1.4162186760512106e-05,
"loss": 0.1827,
"step": 192500
},
{
"epoch": 11.71,
"learning_rate": 1.414701777804745e-05,
"loss": 0.178,
"step": 193000
},
{
"epoch": 11.74,
"learning_rate": 1.4131879133547724e-05,
"loss": 0.1771,
"step": 193500
},
{
"epoch": 11.77,
"learning_rate": 1.4116710151083067e-05,
"loss": 0.1781,
"step": 194000
},
{
"epoch": 11.8,
"learning_rate": 1.4101541168618411e-05,
"loss": 0.1774,
"step": 194500
},
{
"epoch": 11.83,
"learning_rate": 1.4086372186153753e-05,
"loss": 0.1763,
"step": 195000
},
{
"epoch": 11.86,
"learning_rate": 1.4071203203689096e-05,
"loss": 0.1768,
"step": 195500
},
{
"epoch": 11.89,
"learning_rate": 1.4056034221224442e-05,
"loss": 0.1793,
"step": 196000
},
{
"epoch": 11.92,
"learning_rate": 1.4040865238759784e-05,
"loss": 0.1809,
"step": 196500
},
{
"epoch": 11.95,
"learning_rate": 1.4025696256295129e-05,
"loss": 0.1797,
"step": 197000
},
{
"epoch": 11.98,
"learning_rate": 1.4010527273830473e-05,
"loss": 0.1787,
"step": 197500
},
{
"epoch": 12.0,
"eval_bleu": 80.3265,
"eval_gen_len": 14.0303,
"eval_loss": 0.17605634033679962,
"eval_runtime": 172.6821,
"eval_samples_per_second": 93.374,
"eval_steps_per_second": 2.919,
"step": 197772
},
{
"epoch": 12.01,
"learning_rate": 1.3995358291365817e-05,
"loss": 0.1764,
"step": 198000
},
{
"epoch": 12.04,
"learning_rate": 1.398018930890116e-05,
"loss": 0.1735,
"step": 198500
},
{
"epoch": 12.07,
"learning_rate": 1.3965020326436505e-05,
"loss": 0.1738,
"step": 199000
},
{
"epoch": 12.1,
"learning_rate": 1.3949881681936776e-05,
"loss": 0.1731,
"step": 199500
},
{
"epoch": 12.14,
"learning_rate": 1.3934712699472122e-05,
"loss": 0.1759,
"step": 200000
},
{
"epoch": 12.17,
"learning_rate": 1.3919543717007464e-05,
"loss": 0.1726,
"step": 200500
},
{
"epoch": 12.2,
"learning_rate": 1.3904374734542807e-05,
"loss": 0.1771,
"step": 201000
},
{
"epoch": 12.23,
"learning_rate": 1.3889205752078153e-05,
"loss": 0.1766,
"step": 201500
},
{
"epoch": 12.26,
"learning_rate": 1.3874067107578424e-05,
"loss": 0.1695,
"step": 202000
},
{
"epoch": 12.29,
"learning_rate": 1.385889812511377e-05,
"loss": 0.1714,
"step": 202500
},
{
"epoch": 12.32,
"learning_rate": 1.384378981857897e-05,
"loss": 0.1727,
"step": 203000
},
{
"epoch": 12.35,
"learning_rate": 1.3828620836114315e-05,
"loss": 0.1725,
"step": 203500
},
{
"epoch": 12.38,
"learning_rate": 1.3813451853649657e-05,
"loss": 0.1722,
"step": 204000
},
{
"epoch": 12.41,
"learning_rate": 1.3798282871185003e-05,
"loss": 0.179,
"step": 204500
},
{
"epoch": 12.44,
"learning_rate": 1.3783113888720345e-05,
"loss": 0.1732,
"step": 205000
},
{
"epoch": 12.47,
"learning_rate": 1.376794490625569e-05,
"loss": 0.1722,
"step": 205500
},
{
"epoch": 12.5,
"learning_rate": 1.3752775923791034e-05,
"loss": 0.1723,
"step": 206000
},
{
"epoch": 12.53,
"learning_rate": 1.3737606941326378e-05,
"loss": 0.1716,
"step": 206500
},
{
"epoch": 12.56,
"learning_rate": 1.372243795886172e-05,
"loss": 0.1738,
"step": 207000
},
{
"epoch": 12.59,
"learning_rate": 1.3707299314361995e-05,
"loss": 0.1732,
"step": 207500
},
{
"epoch": 12.62,
"learning_rate": 1.3692130331897337e-05,
"loss": 0.1736,
"step": 208000
},
{
"epoch": 12.65,
"learning_rate": 1.367699168739761e-05,
"loss": 0.1719,
"step": 208500
},
{
"epoch": 12.68,
"learning_rate": 1.3661822704932954e-05,
"loss": 0.1717,
"step": 209000
},
{
"epoch": 12.71,
"learning_rate": 1.3646653722468296e-05,
"loss": 0.1704,
"step": 209500
},
{
"epoch": 12.74,
"learning_rate": 1.3631484740003642e-05,
"loss": 0.174,
"step": 210000
},
{
"epoch": 12.77,
"learning_rate": 1.3616315757538985e-05,
"loss": 0.1718,
"step": 210500
},
{
"epoch": 12.8,
"learning_rate": 1.360114677507433e-05,
"loss": 0.1726,
"step": 211000
},
{
"epoch": 12.83,
"learning_rate": 1.3585977792609673e-05,
"loss": 0.1691,
"step": 211500
},
{
"epoch": 12.86,
"learning_rate": 1.3570808810145017e-05,
"loss": 0.1705,
"step": 212000
},
{
"epoch": 12.89,
"learning_rate": 1.355563982768036e-05,
"loss": 0.1715,
"step": 212500
},
{
"epoch": 12.92,
"learning_rate": 1.3540470845215702e-05,
"loss": 0.1712,
"step": 213000
},
{
"epoch": 12.95,
"learning_rate": 1.3525301862751048e-05,
"loss": 0.1731,
"step": 213500
},
{
"epoch": 12.98,
"learning_rate": 1.351013288028639e-05,
"loss": 0.1725,
"step": 214000
},
{
"epoch": 13.0,
"eval_bleu": 80.3976,
"eval_gen_len": 14.0256,
"eval_loss": 0.1731211394071579,
"eval_runtime": 168.6298,
"eval_samples_per_second": 95.618,
"eval_steps_per_second": 2.989,
"step": 214253
},
{
"epoch": 13.01,
"learning_rate": 1.3494994235786665e-05,
"loss": 0.1726,
"step": 214500
},
{
"epoch": 13.05,
"learning_rate": 1.3479855591286937e-05,
"loss": 0.169,
"step": 215000
},
{
"epoch": 13.08,
"learning_rate": 1.3464686608822282e-05,
"loss": 0.1673,
"step": 215500
},
{
"epoch": 13.11,
"learning_rate": 1.3449517626357624e-05,
"loss": 0.1696,
"step": 216000
},
{
"epoch": 13.14,
"learning_rate": 1.3434378981857898e-05,
"loss": 0.1696,
"step": 216500
},
{
"epoch": 13.17,
"learning_rate": 1.341920999939324e-05,
"loss": 0.1679,
"step": 217000
},
{
"epoch": 13.2,
"learning_rate": 1.3404041016928587e-05,
"loss": 0.1695,
"step": 217500
},
{
"epoch": 13.23,
"learning_rate": 1.3388872034463929e-05,
"loss": 0.166,
"step": 218000
},
{
"epoch": 13.26,
"learning_rate": 1.3373703051999273e-05,
"loss": 0.1677,
"step": 218500
},
{
"epoch": 13.29,
"learning_rate": 1.3358534069534617e-05,
"loss": 0.1636,
"step": 219000
},
{
"epoch": 13.32,
"learning_rate": 1.334336508706996e-05,
"loss": 0.1637,
"step": 219500
},
{
"epoch": 13.35,
"learning_rate": 1.3328196104605304e-05,
"loss": 0.1665,
"step": 220000
},
{
"epoch": 13.38,
"learning_rate": 1.3313027122140647e-05,
"loss": 0.1657,
"step": 220500
},
{
"epoch": 13.41,
"learning_rate": 1.3297858139675992e-05,
"loss": 0.168,
"step": 221000
},
{
"epoch": 13.44,
"learning_rate": 1.3282689157211335e-05,
"loss": 0.1676,
"step": 221500
},
{
"epoch": 13.47,
"learning_rate": 1.326752017474668e-05,
"loss": 0.169,
"step": 222000
},
{
"epoch": 13.5,
"learning_rate": 1.3252351192282022e-05,
"loss": 0.1677,
"step": 222500
},
{
"epoch": 13.53,
"learning_rate": 1.3237212547782296e-05,
"loss": 0.1676,
"step": 223000
},
{
"epoch": 13.56,
"learning_rate": 1.322204356531764e-05,
"loss": 0.1667,
"step": 223500
},
{
"epoch": 13.59,
"learning_rate": 1.3206874582852984e-05,
"loss": 0.1663,
"step": 224000
},
{
"epoch": 13.62,
"learning_rate": 1.3191705600388327e-05,
"loss": 0.1659,
"step": 224500
},
{
"epoch": 13.65,
"learning_rate": 1.3176566955888601e-05,
"loss": 0.1672,
"step": 225000
},
{
"epoch": 13.68,
"learning_rate": 1.3161397973423943e-05,
"loss": 0.1683,
"step": 225500
},
{
"epoch": 13.71,
"learning_rate": 1.3146228990959286e-05,
"loss": 0.1681,
"step": 226000
},
{
"epoch": 13.74,
"learning_rate": 1.3131060008494632e-05,
"loss": 0.1659,
"step": 226500
},
{
"epoch": 13.77,
"learning_rate": 1.3115921363994903e-05,
"loss": 0.1681,
"step": 227000
},
{
"epoch": 13.8,
"learning_rate": 1.3100752381530249e-05,
"loss": 0.1703,
"step": 227500
},
{
"epoch": 13.83,
"learning_rate": 1.3085583399065591e-05,
"loss": 0.1679,
"step": 228000
},
{
"epoch": 13.86,
"learning_rate": 1.3070414416600937e-05,
"loss": 0.167,
"step": 228500
},
{
"epoch": 13.89,
"learning_rate": 1.3055275772101208e-05,
"loss": 0.1656,
"step": 229000
},
{
"epoch": 13.93,
"learning_rate": 1.3040137127601482e-05,
"loss": 0.172,
"step": 229500
},
{
"epoch": 13.96,
"learning_rate": 1.3024968145136824e-05,
"loss": 0.1678,
"step": 230000
},
{
"epoch": 13.99,
"learning_rate": 1.300979916267217e-05,
"loss": 0.1718,
"step": 230500
},
{
"epoch": 14.0,
"eval_bleu": 80.4785,
"eval_gen_len": 14.0177,
"eval_loss": 0.1709802895784378,
"eval_runtime": 169.4766,
"eval_samples_per_second": 95.14,
"eval_steps_per_second": 2.974,
"step": 230734
},
{
"epoch": 14.02,
"learning_rate": 1.2994630180207513e-05,
"loss": 0.1667,
"step": 231000
},
{
"epoch": 14.05,
"learning_rate": 1.2979461197742857e-05,
"loss": 0.1626,
"step": 231500
},
{
"epoch": 14.08,
"learning_rate": 1.29642922152782e-05,
"loss": 0.1616,
"step": 232000
},
{
"epoch": 14.11,
"learning_rate": 1.2949123232813544e-05,
"loss": 0.163,
"step": 232500
},
{
"epoch": 14.14,
"learning_rate": 1.2933954250348888e-05,
"loss": 0.161,
"step": 233000
},
{
"epoch": 14.17,
"learning_rate": 1.291881560584916e-05,
"loss": 0.1664,
"step": 233500
},
{
"epoch": 14.2,
"learning_rate": 1.2903646623384505e-05,
"loss": 0.1609,
"step": 234000
},
{
"epoch": 14.23,
"learning_rate": 1.2888477640919847e-05,
"loss": 0.1619,
"step": 234500
},
{
"epoch": 14.26,
"learning_rate": 1.2873308658455193e-05,
"loss": 0.1632,
"step": 235000
},
{
"epoch": 14.29,
"learning_rate": 1.2858139675990535e-05,
"loss": 0.1654,
"step": 235500
},
{
"epoch": 14.32,
"learning_rate": 1.284300103149081e-05,
"loss": 0.1608,
"step": 236000
},
{
"epoch": 14.35,
"learning_rate": 1.2827832049026152e-05,
"loss": 0.1629,
"step": 236500
},
{
"epoch": 14.38,
"learning_rate": 1.2812663066561496e-05,
"loss": 0.1618,
"step": 237000
},
{
"epoch": 14.41,
"learning_rate": 1.279749408409684e-05,
"loss": 0.1618,
"step": 237500
},
{
"epoch": 14.44,
"learning_rate": 1.2782355439597115e-05,
"loss": 0.1651,
"step": 238000
},
{
"epoch": 14.47,
"learning_rate": 1.2767186457132457e-05,
"loss": 0.1639,
"step": 238500
},
{
"epoch": 14.5,
"learning_rate": 1.27520174746678e-05,
"loss": 0.1627,
"step": 239000
},
{
"epoch": 14.53,
"learning_rate": 1.2736848492203144e-05,
"loss": 0.161,
"step": 239500
},
{
"epoch": 14.56,
"learning_rate": 1.2721709847703416e-05,
"loss": 0.1646,
"step": 240000
},
{
"epoch": 14.59,
"learning_rate": 1.270654086523876e-05,
"loss": 0.1646,
"step": 240500
},
{
"epoch": 14.62,
"learning_rate": 1.2691371882774105e-05,
"loss": 0.1619,
"step": 241000
},
{
"epoch": 14.65,
"learning_rate": 1.2676233238274377e-05,
"loss": 0.1633,
"step": 241500
},
{
"epoch": 14.68,
"learning_rate": 1.2661064255809721e-05,
"loss": 0.1668,
"step": 242000
},
{
"epoch": 14.71,
"learning_rate": 1.2645895273345066e-05,
"loss": 0.1645,
"step": 242500
},
{
"epoch": 14.74,
"learning_rate": 1.2630726290880408e-05,
"loss": 0.1634,
"step": 243000
},
{
"epoch": 14.77,
"learning_rate": 1.2615557308415754e-05,
"loss": 0.1641,
"step": 243500
},
{
"epoch": 14.8,
"learning_rate": 1.2600388325951096e-05,
"loss": 0.1626,
"step": 244000
},
{
"epoch": 14.84,
"learning_rate": 1.2585219343486439e-05,
"loss": 0.1599,
"step": 244500
},
{
"epoch": 14.87,
"learning_rate": 1.2570050361021783e-05,
"loss": 0.162,
"step": 245000
},
{
"epoch": 14.9,
"learning_rate": 1.2554881378557127e-05,
"loss": 0.1651,
"step": 245500
},
{
"epoch": 14.93,
"learning_rate": 1.2539712396092471e-05,
"loss": 0.1628,
"step": 246000
},
{
"epoch": 14.96,
"learning_rate": 1.2524543413627814e-05,
"loss": 0.1633,
"step": 246500
},
{
"epoch": 14.99,
"learning_rate": 1.2509404769128088e-05,
"loss": 0.1635,
"step": 247000
},
{
"epoch": 15.0,
"eval_bleu": 80.6536,
"eval_gen_len": 14.0499,
"eval_loss": 0.16988149285316467,
"eval_runtime": 171.1611,
"eval_samples_per_second": 94.204,
"eval_steps_per_second": 2.945,
"step": 247215
},
{
"epoch": 15.02,
"learning_rate": 1.249423578666343e-05,
"loss": 0.161,
"step": 247500
},
{
"epoch": 15.05,
"learning_rate": 1.2479066804198777e-05,
"loss": 0.158,
"step": 248000
},
{
"epoch": 15.08,
"learning_rate": 1.2463897821734119e-05,
"loss": 0.1592,
"step": 248500
},
{
"epoch": 15.11,
"learning_rate": 1.2448728839269463e-05,
"loss": 0.1589,
"step": 249000
},
{
"epoch": 15.14,
"learning_rate": 1.2433590194769736e-05,
"loss": 0.1584,
"step": 249500
},
{
"epoch": 15.17,
"learning_rate": 1.241842121230508e-05,
"loss": 0.1577,
"step": 250000
},
{
"epoch": 15.2,
"learning_rate": 1.2403252229840424e-05,
"loss": 0.1568,
"step": 250500
},
{
"epoch": 15.23,
"learning_rate": 1.2388083247375767e-05,
"loss": 0.1579,
"step": 251000
},
{
"epoch": 15.26,
"learning_rate": 1.237291426491111e-05,
"loss": 0.1568,
"step": 251500
},
{
"epoch": 15.29,
"learning_rate": 1.2357775620411383e-05,
"loss": 0.1591,
"step": 252000
},
{
"epoch": 15.32,
"learning_rate": 1.2342636975911658e-05,
"loss": 0.1615,
"step": 252500
},
{
"epoch": 15.35,
"learning_rate": 1.2327467993447e-05,
"loss": 0.1582,
"step": 253000
},
{
"epoch": 15.38,
"learning_rate": 1.2312299010982344e-05,
"loss": 0.1609,
"step": 253500
},
{
"epoch": 15.41,
"learning_rate": 1.2297130028517688e-05,
"loss": 0.1601,
"step": 254000
},
{
"epoch": 15.44,
"learning_rate": 1.2281961046053033e-05,
"loss": 0.1594,
"step": 254500
},
{
"epoch": 15.47,
"learning_rate": 1.2266792063588375e-05,
"loss": 0.1569,
"step": 255000
},
{
"epoch": 15.5,
"learning_rate": 1.2251623081123721e-05,
"loss": 0.1602,
"step": 255500
},
{
"epoch": 15.53,
"learning_rate": 1.2236454098659063e-05,
"loss": 0.1618,
"step": 256000
},
{
"epoch": 15.56,
"learning_rate": 1.2221285116194406e-05,
"loss": 0.1566,
"step": 256500
},
{
"epoch": 15.59,
"learning_rate": 1.220614647169468e-05,
"loss": 0.1584,
"step": 257000
},
{
"epoch": 15.62,
"learning_rate": 1.2190977489230023e-05,
"loss": 0.1573,
"step": 257500
},
{
"epoch": 15.65,
"learning_rate": 1.2175808506765367e-05,
"loss": 0.1591,
"step": 258000
},
{
"epoch": 15.68,
"learning_rate": 1.2160639524300711e-05,
"loss": 0.1569,
"step": 258500
},
{
"epoch": 15.72,
"learning_rate": 1.2145470541836055e-05,
"loss": 0.1584,
"step": 259000
},
{
"epoch": 15.75,
"learning_rate": 1.2130331897336328e-05,
"loss": 0.1621,
"step": 259500
},
{
"epoch": 15.78,
"learning_rate": 1.2115162914871672e-05,
"loss": 0.1607,
"step": 260000
},
{
"epoch": 15.81,
"learning_rate": 1.2099993932407014e-05,
"loss": 0.1606,
"step": 260500
},
{
"epoch": 15.84,
"learning_rate": 1.208482494994236e-05,
"loss": 0.1587,
"step": 261000
},
{
"epoch": 15.87,
"learning_rate": 1.2069655967477703e-05,
"loss": 0.1588,
"step": 261500
},
{
"epoch": 15.9,
"learning_rate": 1.2054517322977977e-05,
"loss": 0.1574,
"step": 262000
},
{
"epoch": 15.93,
"learning_rate": 1.203934834051332e-05,
"loss": 0.1639,
"step": 262500
},
{
"epoch": 15.96,
"learning_rate": 1.2024209696013594e-05,
"loss": 0.1579,
"step": 263000
},
{
"epoch": 15.99,
"learning_rate": 1.2009040713548936e-05,
"loss": 0.1593,
"step": 263500
},
{
"epoch": 16.0,
"eval_bleu": 80.6166,
"eval_gen_len": 14.0258,
"eval_loss": 0.16955877840518951,
"eval_runtime": 170.3679,
"eval_samples_per_second": 94.642,
"eval_steps_per_second": 2.958,
"step": 263696
},
{
"epoch": 16.02,
"learning_rate": 1.1993871731084279e-05,
"loss": 0.1546,
"step": 264000
},
{
"epoch": 16.05,
"learning_rate": 1.1978702748619624e-05,
"loss": 0.1561,
"step": 264500
},
{
"epoch": 16.08,
"learning_rate": 1.1963533766154967e-05,
"loss": 0.1527,
"step": 265000
},
{
"epoch": 16.11,
"learning_rate": 1.1948364783690311e-05,
"loss": 0.1549,
"step": 265500
},
{
"epoch": 16.14,
"learning_rate": 1.1933195801225654e-05,
"loss": 0.1568,
"step": 266000
},
{
"epoch": 16.17,
"learning_rate": 1.1918026818761e-05,
"loss": 0.1508,
"step": 266500
},
{
"epoch": 16.2,
"learning_rate": 1.190288817426127e-05,
"loss": 0.1554,
"step": 267000
},
{
"epoch": 16.23,
"learning_rate": 1.1887719191796616e-05,
"loss": 0.1554,
"step": 267500
},
{
"epoch": 16.26,
"learning_rate": 1.1872580547296889e-05,
"loss": 0.1558,
"step": 268000
},
{
"epoch": 16.29,
"learning_rate": 1.1857411564832233e-05,
"loss": 0.1548,
"step": 268500
},
{
"epoch": 16.32,
"learning_rate": 1.1842242582367575e-05,
"loss": 0.1567,
"step": 269000
},
{
"epoch": 16.35,
"learning_rate": 1.1827073599902918e-05,
"loss": 0.1536,
"step": 269500
},
{
"epoch": 16.38,
"learning_rate": 1.1811904617438264e-05,
"loss": 0.1529,
"step": 270000
},
{
"epoch": 16.41,
"learning_rate": 1.1796765972938535e-05,
"loss": 0.1546,
"step": 270500
},
{
"epoch": 16.44,
"learning_rate": 1.178159699047388e-05,
"loss": 0.1538,
"step": 271000
},
{
"epoch": 16.47,
"learning_rate": 1.1766428008009223e-05,
"loss": 0.1579,
"step": 271500
},
{
"epoch": 16.5,
"learning_rate": 1.1751259025544567e-05,
"loss": 0.1526,
"step": 272000
},
{
"epoch": 16.53,
"learning_rate": 1.1736090043079911e-05,
"loss": 0.1516,
"step": 272500
},
{
"epoch": 16.56,
"learning_rate": 1.1720921060615256e-05,
"loss": 0.1541,
"step": 273000
},
{
"epoch": 16.59,
"learning_rate": 1.1705752078150598e-05,
"loss": 0.1539,
"step": 273500
},
{
"epoch": 16.63,
"learning_rate": 1.1690583095685944e-05,
"loss": 0.1567,
"step": 274000
},
{
"epoch": 16.66,
"learning_rate": 1.1675444451186215e-05,
"loss": 0.1529,
"step": 274500
},
{
"epoch": 16.69,
"learning_rate": 1.1660275468721557e-05,
"loss": 0.1569,
"step": 275000
},
{
"epoch": 16.72,
"learning_rate": 1.1645106486256903e-05,
"loss": 0.1554,
"step": 275500
},
{
"epoch": 16.75,
"learning_rate": 1.1629937503792246e-05,
"loss": 0.1555,
"step": 276000
},
{
"epoch": 16.78,
"learning_rate": 1.1614768521327591e-05,
"loss": 0.1583,
"step": 276500
},
{
"epoch": 16.81,
"learning_rate": 1.1599599538862934e-05,
"loss": 0.1585,
"step": 277000
},
{
"epoch": 16.84,
"learning_rate": 1.1584430556398278e-05,
"loss": 0.1558,
"step": 277500
},
{
"epoch": 16.87,
"learning_rate": 1.156926157393362e-05,
"loss": 0.1564,
"step": 278000
},
{
"epoch": 16.9,
"learning_rate": 1.1554092591468967e-05,
"loss": 0.156,
"step": 278500
},
{
"epoch": 16.93,
"learning_rate": 1.1538953946969237e-05,
"loss": 0.1573,
"step": 279000
},
{
"epoch": 16.96,
"learning_rate": 1.1523784964504583e-05,
"loss": 0.1524,
"step": 279500
},
{
"epoch": 16.99,
"learning_rate": 1.1508615982039926e-05,
"loss": 0.1561,
"step": 280000
},
{
"epoch": 17.0,
"eval_bleu": 80.7199,
"eval_gen_len": 14.0692,
"eval_loss": 0.16820241510868073,
"eval_runtime": 169.5628,
"eval_samples_per_second": 95.092,
"eval_steps_per_second": 2.972,
"step": 280177
},
{
"epoch": 17.02,
"learning_rate": 1.1493446999575268e-05,
"loss": 0.1497,
"step": 280500
},
{
"epoch": 17.05,
"learning_rate": 1.1478278017110614e-05,
"loss": 0.15,
"step": 281000
},
{
"epoch": 17.08,
"learning_rate": 1.1463139372610885e-05,
"loss": 0.1535,
"step": 281500
},
{
"epoch": 17.11,
"learning_rate": 1.1448000728111159e-05,
"loss": 0.1501,
"step": 282000
},
{
"epoch": 17.14,
"learning_rate": 1.1432831745646502e-05,
"loss": 0.1513,
"step": 282500
},
{
"epoch": 17.17,
"learning_rate": 1.1417662763181847e-05,
"loss": 0.1535,
"step": 283000
},
{
"epoch": 17.2,
"learning_rate": 1.140249378071719e-05,
"loss": 0.1525,
"step": 283500
},
{
"epoch": 17.23,
"learning_rate": 1.1387324798252534e-05,
"loss": 0.151,
"step": 284000
},
{
"epoch": 17.26,
"learning_rate": 1.1372155815787878e-05,
"loss": 0.1497,
"step": 284500
},
{
"epoch": 17.29,
"learning_rate": 1.1356986833323223e-05,
"loss": 0.1517,
"step": 285000
},
{
"epoch": 17.32,
"learning_rate": 1.1341817850858565e-05,
"loss": 0.1525,
"step": 285500
},
{
"epoch": 17.35,
"learning_rate": 1.1326648868393908e-05,
"loss": 0.1548,
"step": 286000
},
{
"epoch": 17.38,
"learning_rate": 1.1311479885929253e-05,
"loss": 0.1516,
"step": 286500
},
{
"epoch": 17.41,
"learning_rate": 1.1296310903464596e-05,
"loss": 0.1546,
"step": 287000
},
{
"epoch": 17.44,
"learning_rate": 1.128114192099994e-05,
"loss": 0.1492,
"step": 287500
},
{
"epoch": 17.47,
"learning_rate": 1.1266003276500213e-05,
"loss": 0.1506,
"step": 288000
},
{
"epoch": 17.51,
"learning_rate": 1.1250864632000487e-05,
"loss": 0.1509,
"step": 288500
},
{
"epoch": 17.54,
"learning_rate": 1.123569564953583e-05,
"loss": 0.1521,
"step": 289000
},
{
"epoch": 17.57,
"learning_rate": 1.1220526667071175e-05,
"loss": 0.1528,
"step": 289500
},
{
"epoch": 17.6,
"learning_rate": 1.1205357684606518e-05,
"loss": 0.1543,
"step": 290000
},
{
"epoch": 17.63,
"learning_rate": 1.1190188702141862e-05,
"loss": 0.1492,
"step": 290500
},
{
"epoch": 17.66,
"learning_rate": 1.1175050057642134e-05,
"loss": 0.1526,
"step": 291000
},
{
"epoch": 17.69,
"learning_rate": 1.1159881075177479e-05,
"loss": 0.1516,
"step": 291500
},
{
"epoch": 17.72,
"learning_rate": 1.1144712092712821e-05,
"loss": 0.15,
"step": 292000
},
{
"epoch": 17.75,
"learning_rate": 1.1129543110248164e-05,
"loss": 0.151,
"step": 292500
},
{
"epoch": 17.78,
"learning_rate": 1.1114404465748438e-05,
"loss": 0.1508,
"step": 293000
},
{
"epoch": 17.81,
"learning_rate": 1.1099235483283782e-05,
"loss": 0.1501,
"step": 293500
},
{
"epoch": 17.84,
"learning_rate": 1.1084066500819126e-05,
"loss": 0.1517,
"step": 294000
},
{
"epoch": 17.87,
"learning_rate": 1.1068897518354469e-05,
"loss": 0.1499,
"step": 294500
},
{
"epoch": 17.9,
"learning_rate": 1.1053728535889814e-05,
"loss": 0.1504,
"step": 295000
},
{
"epoch": 17.93,
"learning_rate": 1.1038589891390085e-05,
"loss": 0.1514,
"step": 295500
},
{
"epoch": 17.96,
"learning_rate": 1.1023420908925431e-05,
"loss": 0.1493,
"step": 296000
},
{
"epoch": 17.99,
"learning_rate": 1.1008251926460774e-05,
"loss": 0.1532,
"step": 296500
},
{
"epoch": 18.0,
"eval_bleu": 80.7365,
"eval_gen_len": 14.0628,
"eval_loss": 0.16694927215576172,
"eval_runtime": 170.5372,
"eval_samples_per_second": 94.548,
"eval_steps_per_second": 2.955,
"step": 296658
},
{
"epoch": 18.02,
"learning_rate": 1.0993113281961048e-05,
"loss": 0.1487,
"step": 297000
},
{
"epoch": 18.05,
"learning_rate": 1.097794429949639e-05,
"loss": 0.1455,
"step": 297500
},
{
"epoch": 18.08,
"learning_rate": 1.0962775317031735e-05,
"loss": 0.1449,
"step": 298000
},
{
"epoch": 18.11,
"learning_rate": 1.0947636672532007e-05,
"loss": 0.1498,
"step": 298500
},
{
"epoch": 18.14,
"learning_rate": 1.0932467690067353e-05,
"loss": 0.1475,
"step": 299000
},
{
"epoch": 18.17,
"learning_rate": 1.0917298707602695e-05,
"loss": 0.1477,
"step": 299500
},
{
"epoch": 18.2,
"learning_rate": 1.0902129725138038e-05,
"loss": 0.1488,
"step": 300000
},
{
"epoch": 18.23,
"learning_rate": 1.0886960742673382e-05,
"loss": 0.1471,
"step": 300500
},
{
"epoch": 18.26,
"learning_rate": 1.0871822098173655e-05,
"loss": 0.1503,
"step": 301000
},
{
"epoch": 18.29,
"learning_rate": 1.0856653115708999e-05,
"loss": 0.15,
"step": 301500
},
{
"epoch": 18.32,
"learning_rate": 1.0841484133244341e-05,
"loss": 0.1477,
"step": 302000
},
{
"epoch": 18.35,
"learning_rate": 1.0826315150779687e-05,
"loss": 0.1491,
"step": 302500
},
{
"epoch": 18.38,
"learning_rate": 1.081114616831503e-05,
"loss": 0.149,
"step": 303000
},
{
"epoch": 18.42,
"learning_rate": 1.0795977185850376e-05,
"loss": 0.1472,
"step": 303500
},
{
"epoch": 18.45,
"learning_rate": 1.0780808203385718e-05,
"loss": 0.1467,
"step": 304000
},
{
"epoch": 18.48,
"learning_rate": 1.0765669558885992e-05,
"loss": 0.1496,
"step": 304500
},
{
"epoch": 18.51,
"learning_rate": 1.0750500576421335e-05,
"loss": 0.148,
"step": 305000
},
{
"epoch": 18.54,
"learning_rate": 1.0735331593956679e-05,
"loss": 0.1456,
"step": 305500
},
{
"epoch": 18.57,
"learning_rate": 1.0720192949456951e-05,
"loss": 0.149,
"step": 306000
},
{
"epoch": 18.6,
"learning_rate": 1.0705023966992294e-05,
"loss": 0.1485,
"step": 306500
},
{
"epoch": 18.63,
"learning_rate": 1.0689854984527638e-05,
"loss": 0.1478,
"step": 307000
},
{
"epoch": 18.66,
"learning_rate": 1.0674686002062982e-05,
"loss": 0.1473,
"step": 307500
},
{
"epoch": 18.69,
"learning_rate": 1.0659517019598326e-05,
"loss": 0.1484,
"step": 308000
},
{
"epoch": 18.72,
"learning_rate": 1.0644348037133669e-05,
"loss": 0.149,
"step": 308500
},
{
"epoch": 18.75,
"learning_rate": 1.0629179054669015e-05,
"loss": 0.1484,
"step": 309000
},
{
"epoch": 18.78,
"learning_rate": 1.0614010072204357e-05,
"loss": 0.1463,
"step": 309500
},
{
"epoch": 18.81,
"learning_rate": 1.0598841089739702e-05,
"loss": 0.1482,
"step": 310000
},
{
"epoch": 18.84,
"learning_rate": 1.0583672107275044e-05,
"loss": 0.1466,
"step": 310500
},
{
"epoch": 18.87,
"learning_rate": 1.0568503124810388e-05,
"loss": 0.148,
"step": 311000
},
{
"epoch": 18.9,
"learning_rate": 1.0553364480310662e-05,
"loss": 0.1492,
"step": 311500
},
{
"epoch": 18.93,
"learning_rate": 1.0538195497846005e-05,
"loss": 0.1491,
"step": 312000
},
{
"epoch": 18.96,
"learning_rate": 1.0523026515381349e-05,
"loss": 0.1485,
"step": 312500
},
{
"epoch": 18.99,
"learning_rate": 1.0507857532916692e-05,
"loss": 0.15,
"step": 313000
},
{
"epoch": 19.0,
"eval_bleu": 80.8654,
"eval_gen_len": 14.0824,
"eval_loss": 0.16597765684127808,
"eval_runtime": 172.4859,
"eval_samples_per_second": 93.48,
"eval_steps_per_second": 2.922,
"step": 313139
},
{
"epoch": 19.02,
"learning_rate": 1.0492688550452037e-05,
"loss": 0.1465,
"step": 313500
},
{
"epoch": 19.05,
"learning_rate": 1.0477549905952308e-05,
"loss": 0.1446,
"step": 314000
},
{
"epoch": 19.08,
"learning_rate": 1.0462380923487654e-05,
"loss": 0.1455,
"step": 314500
},
{
"epoch": 19.11,
"learning_rate": 1.0447211941022997e-05,
"loss": 0.1444,
"step": 315000
},
{
"epoch": 19.14,
"learning_rate": 1.043204295855834e-05,
"loss": 0.147,
"step": 315500
},
{
"epoch": 19.17,
"learning_rate": 1.0416873976093685e-05,
"loss": 0.1457,
"step": 316000
},
{
"epoch": 19.2,
"learning_rate": 1.040170499362903e-05,
"loss": 0.1434,
"step": 316500
},
{
"epoch": 19.23,
"learning_rate": 1.0386536011164372e-05,
"loss": 0.1455,
"step": 317000
},
{
"epoch": 19.26,
"learning_rate": 1.0371367028699714e-05,
"loss": 0.1456,
"step": 317500
},
{
"epoch": 19.29,
"learning_rate": 1.035619804623506e-05,
"loss": 0.1443,
"step": 318000
},
{
"epoch": 19.33,
"learning_rate": 1.0341029063770403e-05,
"loss": 0.1452,
"step": 318500
},
{
"epoch": 19.36,
"learning_rate": 1.0325860081305747e-05,
"loss": 0.1417,
"step": 319000
},
{
"epoch": 19.39,
"learning_rate": 1.0310691098841091e-05,
"loss": 0.1423,
"step": 319500
},
{
"epoch": 19.42,
"learning_rate": 1.0295522116376435e-05,
"loss": 0.1466,
"step": 320000
},
{
"epoch": 19.45,
"learning_rate": 1.0280353133911778e-05,
"loss": 0.149,
"step": 320500
},
{
"epoch": 19.48,
"learning_rate": 1.026518415144712e-05,
"loss": 0.1459,
"step": 321000
},
{
"epoch": 19.51,
"learning_rate": 1.0250045506947394e-05,
"loss": 0.1427,
"step": 321500
},
{
"epoch": 19.54,
"learning_rate": 1.0234876524482737e-05,
"loss": 0.1446,
"step": 322000
},
{
"epoch": 19.57,
"learning_rate": 1.0219707542018083e-05,
"loss": 0.1439,
"step": 322500
},
{
"epoch": 19.6,
"learning_rate": 1.0204538559553425e-05,
"loss": 0.1456,
"step": 323000
},
{
"epoch": 19.63,
"learning_rate": 1.01893999150537e-05,
"loss": 0.1463,
"step": 323500
},
{
"epoch": 19.66,
"learning_rate": 1.0174230932589042e-05,
"loss": 0.1454,
"step": 324000
},
{
"epoch": 19.69,
"learning_rate": 1.0159061950124388e-05,
"loss": 0.1439,
"step": 324500
},
{
"epoch": 19.72,
"learning_rate": 1.014389296765973e-05,
"loss": 0.1457,
"step": 325000
},
{
"epoch": 19.75,
"learning_rate": 1.0128723985195074e-05,
"loss": 0.1487,
"step": 325500
},
{
"epoch": 19.78,
"learning_rate": 1.0113585340695347e-05,
"loss": 0.1442,
"step": 326000
},
{
"epoch": 19.81,
"learning_rate": 1.0098416358230691e-05,
"loss": 0.1444,
"step": 326500
},
{
"epoch": 19.84,
"learning_rate": 1.0083277713730964e-05,
"loss": 0.1442,
"step": 327000
},
{
"epoch": 19.87,
"learning_rate": 1.0068108731266308e-05,
"loss": 0.1452,
"step": 327500
},
{
"epoch": 19.9,
"learning_rate": 1.005293974880165e-05,
"loss": 0.1451,
"step": 328000
},
{
"epoch": 19.93,
"learning_rate": 1.0037770766336994e-05,
"loss": 0.1439,
"step": 328500
},
{
"epoch": 19.96,
"learning_rate": 1.0022601783872339e-05,
"loss": 0.1444,
"step": 329000
},
{
"epoch": 19.99,
"learning_rate": 1.0007432801407681e-05,
"loss": 0.1479,
"step": 329500
},
{
"epoch": 20.0,
"eval_bleu": 80.8523,
"eval_gen_len": 14.0607,
"eval_loss": 0.16498535871505737,
"eval_runtime": 169.9695,
"eval_samples_per_second": 94.864,
"eval_steps_per_second": 2.965,
"step": 329620
},
{
"epoch": 20.02,
"learning_rate": 9.992263818943027e-06,
"loss": 0.1415,
"step": 330000
},
{
"epoch": 20.05,
"learning_rate": 9.97709483647837e-06,
"loss": 0.1391,
"step": 330500
},
{
"epoch": 20.08,
"learning_rate": 9.961925854013714e-06,
"loss": 0.1411,
"step": 331000
},
{
"epoch": 20.11,
"learning_rate": 9.946756871549056e-06,
"loss": 0.1435,
"step": 331500
},
{
"epoch": 20.14,
"learning_rate": 9.93161822704933e-06,
"loss": 0.1422,
"step": 332000
},
{
"epoch": 20.17,
"learning_rate": 9.916449244584675e-06,
"loss": 0.1434,
"step": 332500
},
{
"epoch": 20.21,
"learning_rate": 9.901280262120017e-06,
"loss": 0.1407,
"step": 333000
},
{
"epoch": 20.24,
"learning_rate": 9.886111279655361e-06,
"loss": 0.1434,
"step": 333500
},
{
"epoch": 20.27,
"learning_rate": 9.870942297190705e-06,
"loss": 0.1433,
"step": 334000
},
{
"epoch": 20.3,
"learning_rate": 9.855803652690978e-06,
"loss": 0.1432,
"step": 334500
},
{
"epoch": 20.33,
"learning_rate": 9.840634670226322e-06,
"loss": 0.145,
"step": 335000
},
{
"epoch": 20.36,
"learning_rate": 9.825465687761666e-06,
"loss": 0.1427,
"step": 335500
},
{
"epoch": 20.39,
"learning_rate": 9.81029670529701e-06,
"loss": 0.1454,
"step": 336000
},
{
"epoch": 20.42,
"learning_rate": 9.795158060797283e-06,
"loss": 0.1467,
"step": 336500
},
{
"epoch": 20.45,
"learning_rate": 9.780019416297556e-06,
"loss": 0.1409,
"step": 337000
},
{
"epoch": 20.48,
"learning_rate": 9.7648504338329e-06,
"loss": 0.1432,
"step": 337500
},
{
"epoch": 20.51,
"learning_rate": 9.749681451368242e-06,
"loss": 0.1411,
"step": 338000
},
{
"epoch": 20.54,
"learning_rate": 9.734512468903586e-06,
"loss": 0.1394,
"step": 338500
},
{
"epoch": 20.57,
"learning_rate": 9.71934348643893e-06,
"loss": 0.1431,
"step": 339000
},
{
"epoch": 20.6,
"learning_rate": 9.704174503974275e-06,
"loss": 0.1451,
"step": 339500
},
{
"epoch": 20.63,
"learning_rate": 9.689005521509617e-06,
"loss": 0.1431,
"step": 340000
},
{
"epoch": 20.66,
"learning_rate": 9.673836539044961e-06,
"loss": 0.1452,
"step": 340500
},
{
"epoch": 20.69,
"learning_rate": 9.658667556580306e-06,
"loss": 0.1417,
"step": 341000
},
{
"epoch": 20.72,
"learning_rate": 9.64349857411565e-06,
"loss": 0.1422,
"step": 341500
},
{
"epoch": 20.75,
"learning_rate": 9.628359929615922e-06,
"loss": 0.1398,
"step": 342000
},
{
"epoch": 20.78,
"learning_rate": 9.613190947151267e-06,
"loss": 0.1409,
"step": 342500
},
{
"epoch": 20.81,
"learning_rate": 9.598021964686609e-06,
"loss": 0.1432,
"step": 343000
},
{
"epoch": 20.84,
"learning_rate": 9.582852982221953e-06,
"loss": 0.1418,
"step": 343500
},
{
"epoch": 20.87,
"learning_rate": 9.567714337722226e-06,
"loss": 0.1435,
"step": 344000
},
{
"epoch": 20.9,
"learning_rate": 9.55254535525757e-06,
"loss": 0.142,
"step": 344500
},
{
"epoch": 20.93,
"learning_rate": 9.537376372792914e-06,
"loss": 0.1402,
"step": 345000
},
{
"epoch": 20.96,
"learning_rate": 9.522207390328258e-06,
"loss": 0.1449,
"step": 345500
},
{
"epoch": 20.99,
"learning_rate": 9.5070384078636e-06,
"loss": 0.1404,
"step": 346000
},
{
"epoch": 21.0,
"eval_bleu": 80.9578,
"eval_gen_len": 14.0655,
"eval_loss": 0.164962500333786,
"eval_runtime": 171.5734,
"eval_samples_per_second": 93.977,
"eval_steps_per_second": 2.938,
"step": 346101
},
{
"epoch": 21.02,
"learning_rate": 9.491899763363875e-06,
"loss": 0.1392,
"step": 346500
},
{
"epoch": 21.05,
"learning_rate": 9.476730780899217e-06,
"loss": 0.1375,
"step": 347000
},
{
"epoch": 21.08,
"learning_rate": 9.461561798434562e-06,
"loss": 0.142,
"step": 347500
},
{
"epoch": 21.12,
"learning_rate": 9.446392815969906e-06,
"loss": 0.139,
"step": 348000
},
{
"epoch": 21.15,
"learning_rate": 9.43122383350525e-06,
"loss": 0.1405,
"step": 348500
},
{
"epoch": 21.18,
"learning_rate": 9.416054851040593e-06,
"loss": 0.1399,
"step": 349000
},
{
"epoch": 21.21,
"learning_rate": 9.400885868575937e-06,
"loss": 0.1388,
"step": 349500
},
{
"epoch": 21.24,
"learning_rate": 9.38574722407621e-06,
"loss": 0.1403,
"step": 350000
},
{
"epoch": 21.27,
"learning_rate": 9.370578241611553e-06,
"loss": 0.1415,
"step": 350500
},
{
"epoch": 21.3,
"learning_rate": 9.355409259146898e-06,
"loss": 0.1387,
"step": 351000
},
{
"epoch": 21.33,
"learning_rate": 9.34024027668224e-06,
"loss": 0.1384,
"step": 351500
},
{
"epoch": 21.36,
"learning_rate": 9.325101632182514e-06,
"loss": 0.1396,
"step": 352000
},
{
"epoch": 21.39,
"learning_rate": 9.309932649717858e-06,
"loss": 0.1417,
"step": 352500
},
{
"epoch": 21.42,
"learning_rate": 9.294763667253201e-06,
"loss": 0.1388,
"step": 353000
},
{
"epoch": 21.45,
"learning_rate": 9.279594684788545e-06,
"loss": 0.139,
"step": 353500
},
{
"epoch": 21.48,
"learning_rate": 9.26442570232389e-06,
"loss": 0.1398,
"step": 354000
},
{
"epoch": 21.51,
"learning_rate": 9.249256719859234e-06,
"loss": 0.1402,
"step": 354500
},
{
"epoch": 21.54,
"learning_rate": 9.234087737394576e-06,
"loss": 0.1402,
"step": 355000
},
{
"epoch": 21.57,
"learning_rate": 9.21891875492992e-06,
"loss": 0.1401,
"step": 355500
},
{
"epoch": 21.6,
"learning_rate": 9.203780110430193e-06,
"loss": 0.1391,
"step": 356000
},
{
"epoch": 21.63,
"learning_rate": 9.188641465930465e-06,
"loss": 0.1394,
"step": 356500
},
{
"epoch": 21.66,
"learning_rate": 9.17347248346581e-06,
"loss": 0.1382,
"step": 357000
},
{
"epoch": 21.69,
"learning_rate": 9.158303501001154e-06,
"loss": 0.1388,
"step": 357500
},
{
"epoch": 21.72,
"learning_rate": 9.143134518536498e-06,
"loss": 0.1379,
"step": 358000
},
{
"epoch": 21.75,
"learning_rate": 9.12799587403677e-06,
"loss": 0.1417,
"step": 358500
},
{
"epoch": 21.78,
"learning_rate": 9.112826891572114e-06,
"loss": 0.1428,
"step": 359000
},
{
"epoch": 21.81,
"learning_rate": 9.097657909107459e-06,
"loss": 0.1406,
"step": 359500
},
{
"epoch": 21.84,
"learning_rate": 9.082488926642801e-06,
"loss": 0.1398,
"step": 360000
},
{
"epoch": 21.87,
"learning_rate": 9.067319944178145e-06,
"loss": 0.1396,
"step": 360500
},
{
"epoch": 21.9,
"learning_rate": 9.052181299678418e-06,
"loss": 0.1405,
"step": 361000
},
{
"epoch": 21.93,
"learning_rate": 9.037012317213762e-06,
"loss": 0.1384,
"step": 361500
},
{
"epoch": 21.96,
"learning_rate": 9.021843334749105e-06,
"loss": 0.1411,
"step": 362000
},
{
"epoch": 22.0,
"learning_rate": 9.006674352284449e-06,
"loss": 0.1428,
"step": 362500
},
{
"epoch": 22.0,
"eval_bleu": 80.9035,
"eval_gen_len": 14.1041,
"eval_loss": 0.16399070620536804,
"eval_runtime": 169.4187,
"eval_samples_per_second": 95.173,
"eval_steps_per_second": 2.975,
"step": 362582
},
{
"epoch": 22.03,
"learning_rate": 8.991505369819793e-06,
"loss": 0.1366,
"step": 363000
},
{
"epoch": 22.06,
"learning_rate": 8.976336387355137e-06,
"loss": 0.138,
"step": 363500
},
{
"epoch": 22.09,
"learning_rate": 8.96119774285541e-06,
"loss": 0.1386,
"step": 364000
},
{
"epoch": 22.12,
"learning_rate": 8.946028760390754e-06,
"loss": 0.1391,
"step": 364500
},
{
"epoch": 22.15,
"learning_rate": 8.930859777926098e-06,
"loss": 0.1354,
"step": 365000
},
{
"epoch": 22.18,
"learning_rate": 8.915690795461442e-06,
"loss": 0.1372,
"step": 365500
},
{
"epoch": 22.21,
"learning_rate": 8.900521812996785e-06,
"loss": 0.1361,
"step": 366000
},
{
"epoch": 22.24,
"learning_rate": 8.885352830532129e-06,
"loss": 0.1341,
"step": 366500
},
{
"epoch": 22.27,
"learning_rate": 8.870183848067473e-06,
"loss": 0.1345,
"step": 367000
},
{
"epoch": 22.3,
"learning_rate": 8.855014865602816e-06,
"loss": 0.1382,
"step": 367500
},
{
"epoch": 22.33,
"learning_rate": 8.839876221103088e-06,
"loss": 0.1363,
"step": 368000
},
{
"epoch": 22.36,
"learning_rate": 8.824707238638432e-06,
"loss": 0.1369,
"step": 368500
},
{
"epoch": 22.39,
"learning_rate": 8.809538256173776e-06,
"loss": 0.1381,
"step": 369000
},
{
"epoch": 22.42,
"learning_rate": 8.79436927370912e-06,
"loss": 0.1346,
"step": 369500
},
{
"epoch": 22.45,
"learning_rate": 8.779200291244465e-06,
"loss": 0.1378,
"step": 370000
},
{
"epoch": 22.48,
"learning_rate": 8.764031308779807e-06,
"loss": 0.1393,
"step": 370500
},
{
"epoch": 22.51,
"learning_rate": 8.748862326315151e-06,
"loss": 0.1396,
"step": 371000
},
{
"epoch": 22.54,
"learning_rate": 8.733693343850496e-06,
"loss": 0.1395,
"step": 371500
},
{
"epoch": 22.57,
"learning_rate": 8.718554699350768e-06,
"loss": 0.1349,
"step": 372000
},
{
"epoch": 22.6,
"learning_rate": 8.703385716886112e-06,
"loss": 0.1376,
"step": 372500
},
{
"epoch": 22.63,
"learning_rate": 8.688216734421455e-06,
"loss": 0.1389,
"step": 373000
},
{
"epoch": 22.66,
"learning_rate": 8.673078089921729e-06,
"loss": 0.1368,
"step": 373500
},
{
"epoch": 22.69,
"learning_rate": 8.657909107457072e-06,
"loss": 0.1379,
"step": 374000
},
{
"epoch": 22.72,
"learning_rate": 8.642740124992416e-06,
"loss": 0.1348,
"step": 374500
},
{
"epoch": 22.75,
"learning_rate": 8.62757114252776e-06,
"loss": 0.136,
"step": 375000
},
{
"epoch": 22.78,
"learning_rate": 8.612402160063104e-06,
"loss": 0.1374,
"step": 375500
},
{
"epoch": 22.81,
"learning_rate": 8.597233177598448e-06,
"loss": 0.1396,
"step": 376000
},
{
"epoch": 22.84,
"learning_rate": 8.58206419513379e-06,
"loss": 0.1383,
"step": 376500
},
{
"epoch": 22.87,
"learning_rate": 8.566925550634065e-06,
"loss": 0.1387,
"step": 377000
},
{
"epoch": 22.91,
"learning_rate": 8.551756568169407e-06,
"loss": 0.1387,
"step": 377500
},
{
"epoch": 22.94,
"learning_rate": 8.536587585704752e-06,
"loss": 0.1391,
"step": 378000
},
{
"epoch": 22.97,
"learning_rate": 8.521418603240096e-06,
"loss": 0.1402,
"step": 378500
},
{
"epoch": 23.0,
"learning_rate": 8.506249620775438e-06,
"loss": 0.1378,
"step": 379000
},
{
"epoch": 23.0,
"eval_bleu": 80.9901,
"eval_gen_len": 14.0593,
"eval_loss": 0.16328243911266327,
"eval_runtime": 168.8208,
"eval_samples_per_second": 95.51,
"eval_steps_per_second": 2.985,
"step": 379063
},
{
"epoch": 23.03,
"learning_rate": 8.491080638310782e-06,
"loss": 0.1356,
"step": 379500
},
{
"epoch": 23.06,
"learning_rate": 8.475911655846127e-06,
"loss": 0.1338,
"step": 380000
},
{
"epoch": 23.09,
"learning_rate": 8.4607730113464e-06,
"loss": 0.136,
"step": 380500
},
{
"epoch": 23.12,
"learning_rate": 8.445634366846672e-06,
"loss": 0.1362,
"step": 381000
},
{
"epoch": 23.15,
"learning_rate": 8.430465384382016e-06,
"loss": 0.1355,
"step": 381500
},
{
"epoch": 23.18,
"learning_rate": 8.41529640191736e-06,
"loss": 0.1359,
"step": 382000
},
{
"epoch": 23.21,
"learning_rate": 8.400127419452704e-06,
"loss": 0.1318,
"step": 382500
},
{
"epoch": 23.24,
"learning_rate": 8.384958436988048e-06,
"loss": 0.1335,
"step": 383000
},
{
"epoch": 23.27,
"learning_rate": 8.369789454523391e-06,
"loss": 0.1354,
"step": 383500
},
{
"epoch": 23.3,
"learning_rate": 8.354620472058735e-06,
"loss": 0.1351,
"step": 384000
},
{
"epoch": 23.33,
"learning_rate": 8.33945148959408e-06,
"loss": 0.1377,
"step": 384500
},
{
"epoch": 23.36,
"learning_rate": 8.324312845094352e-06,
"loss": 0.1367,
"step": 385000
},
{
"epoch": 23.39,
"learning_rate": 8.309143862629694e-06,
"loss": 0.1356,
"step": 385500
},
{
"epoch": 23.42,
"learning_rate": 8.293974880165039e-06,
"loss": 0.1352,
"step": 386000
},
{
"epoch": 23.45,
"learning_rate": 8.278805897700383e-06,
"loss": 0.1365,
"step": 386500
},
{
"epoch": 23.48,
"learning_rate": 8.263636915235727e-06,
"loss": 0.136,
"step": 387000
},
{
"epoch": 23.51,
"learning_rate": 8.248467932771071e-06,
"loss": 0.1374,
"step": 387500
},
{
"epoch": 23.54,
"learning_rate": 8.233329288271344e-06,
"loss": 0.1346,
"step": 388000
},
{
"epoch": 23.57,
"learning_rate": 8.218160305806688e-06,
"loss": 0.1364,
"step": 388500
},
{
"epoch": 23.6,
"learning_rate": 8.20299132334203e-06,
"loss": 0.1356,
"step": 389000
},
{
"epoch": 23.63,
"learning_rate": 8.187822340877374e-06,
"loss": 0.1349,
"step": 389500
},
{
"epoch": 23.66,
"learning_rate": 8.172653358412719e-06,
"loss": 0.1357,
"step": 390000
},
{
"epoch": 23.69,
"learning_rate": 8.157514713912991e-06,
"loss": 0.136,
"step": 390500
},
{
"epoch": 23.72,
"learning_rate": 8.142345731448335e-06,
"loss": 0.1334,
"step": 391000
},
{
"epoch": 23.75,
"learning_rate": 8.127207086948608e-06,
"loss": 0.1347,
"step": 391500
},
{
"epoch": 23.78,
"learning_rate": 8.112038104483952e-06,
"loss": 0.1359,
"step": 392000
},
{
"epoch": 23.82,
"learning_rate": 8.096869122019295e-06,
"loss": 0.1347,
"step": 392500
},
{
"epoch": 23.85,
"learning_rate": 8.081700139554639e-06,
"loss": 0.1378,
"step": 393000
},
{
"epoch": 23.88,
"learning_rate": 8.066531157089983e-06,
"loss": 0.1332,
"step": 393500
},
{
"epoch": 23.91,
"learning_rate": 8.051362174625327e-06,
"loss": 0.1352,
"step": 394000
},
{
"epoch": 23.94,
"learning_rate": 8.036193192160671e-06,
"loss": 0.1326,
"step": 394500
},
{
"epoch": 23.97,
"learning_rate": 8.021024209696014e-06,
"loss": 0.1347,
"step": 395000
},
{
"epoch": 24.0,
"learning_rate": 8.005855227231358e-06,
"loss": 0.1359,
"step": 395500
},
{
"epoch": 24.0,
"eval_bleu": 81.0343,
"eval_gen_len": 14.0796,
"eval_loss": 0.16283883154392242,
"eval_runtime": 168.3093,
"eval_samples_per_second": 95.8,
"eval_steps_per_second": 2.994,
"step": 395544
},
{
"epoch": 24.03,
"learning_rate": 7.990716582731632e-06,
"loss": 0.1295,
"step": 396000
},
{
"epoch": 24.06,
"learning_rate": 7.975547600266975e-06,
"loss": 0.1321,
"step": 396500
},
{
"epoch": 24.09,
"learning_rate": 7.960378617802319e-06,
"loss": 0.1309,
"step": 397000
},
{
"epoch": 24.12,
"learning_rate": 7.945209635337661e-06,
"loss": 0.1327,
"step": 397500
},
{
"epoch": 24.15,
"learning_rate": 7.930070990837936e-06,
"loss": 0.1325,
"step": 398000
},
{
"epoch": 24.18,
"learning_rate": 7.914902008373278e-06,
"loss": 0.1309,
"step": 398500
},
{
"epoch": 24.21,
"learning_rate": 7.899733025908622e-06,
"loss": 0.1318,
"step": 399000
},
{
"epoch": 24.24,
"learning_rate": 7.884564043443966e-06,
"loss": 0.1297,
"step": 399500
},
{
"epoch": 24.27,
"learning_rate": 7.86939506097931e-06,
"loss": 0.1364,
"step": 400000
},
{
"epoch": 24.3,
"learning_rate": 7.854226078514655e-06,
"loss": 0.1321,
"step": 400500
},
{
"epoch": 24.33,
"learning_rate": 7.839057096049997e-06,
"loss": 0.136,
"step": 401000
},
{
"epoch": 24.36,
"learning_rate": 7.823888113585341e-06,
"loss": 0.1332,
"step": 401500
},
{
"epoch": 24.39,
"learning_rate": 7.808719131120686e-06,
"loss": 0.1324,
"step": 402000
},
{
"epoch": 24.42,
"learning_rate": 7.793550148656028e-06,
"loss": 0.1347,
"step": 402500
},
{
"epoch": 24.45,
"learning_rate": 7.778381166191372e-06,
"loss": 0.1343,
"step": 403000
},
{
"epoch": 24.48,
"learning_rate": 7.763212183726716e-06,
"loss": 0.1366,
"step": 403500
},
{
"epoch": 24.51,
"learning_rate": 7.748073539226989e-06,
"loss": 0.1346,
"step": 404000
},
{
"epoch": 24.54,
"learning_rate": 7.732934894727261e-06,
"loss": 0.1348,
"step": 404500
},
{
"epoch": 24.57,
"learning_rate": 7.717765912262606e-06,
"loss": 0.1361,
"step": 405000
},
{
"epoch": 24.6,
"learning_rate": 7.70259692979795e-06,
"loss": 0.1354,
"step": 405500
},
{
"epoch": 24.63,
"learning_rate": 7.687427947333294e-06,
"loss": 0.1327,
"step": 406000
},
{
"epoch": 24.66,
"learning_rate": 7.672289302833567e-06,
"loss": 0.1342,
"step": 406500
},
{
"epoch": 24.7,
"learning_rate": 7.65712032036891e-06,
"loss": 0.1314,
"step": 407000
},
{
"epoch": 24.73,
"learning_rate": 7.641951337904255e-06,
"loss": 0.1329,
"step": 407500
},
{
"epoch": 24.76,
"learning_rate": 7.626782355439598e-06,
"loss": 0.1357,
"step": 408000
},
{
"epoch": 24.79,
"learning_rate": 7.611613372974942e-06,
"loss": 0.1345,
"step": 408500
},
{
"epoch": 24.82,
"learning_rate": 7.596444390510285e-06,
"loss": 0.1316,
"step": 409000
},
{
"epoch": 24.85,
"learning_rate": 7.581275408045628e-06,
"loss": 0.1354,
"step": 409500
},
{
"epoch": 24.88,
"learning_rate": 7.5661064255809725e-06,
"loss": 0.1365,
"step": 410000
},
{
"epoch": 24.91,
"learning_rate": 7.550937443116317e-06,
"loss": 0.1334,
"step": 410500
},
{
"epoch": 24.94,
"learning_rate": 7.53576846065166e-06,
"loss": 0.134,
"step": 411000
},
{
"epoch": 24.97,
"learning_rate": 7.520629816151933e-06,
"loss": 0.1316,
"step": 411500
},
{
"epoch": 25.0,
"learning_rate": 7.505460833687277e-06,
"loss": 0.131,
"step": 412000
},
{
"epoch": 25.0,
"eval_bleu": 81.0024,
"eval_gen_len": 14.078,
"eval_loss": 0.16371172666549683,
"eval_runtime": 168.3117,
"eval_samples_per_second": 95.798,
"eval_steps_per_second": 2.994,
"step": 412025
},
{
"epoch": 25.03,
"learning_rate": 7.490291851222621e-06,
"loss": 0.1291,
"step": 412500
},
{
"epoch": 25.06,
"learning_rate": 7.475122868757965e-06,
"loss": 0.1311,
"step": 413000
},
{
"epoch": 25.09,
"learning_rate": 7.459953886293308e-06,
"loss": 0.1326,
"step": 413500
},
{
"epoch": 25.12,
"learning_rate": 7.444815241793582e-06,
"loss": 0.1298,
"step": 414000
},
{
"epoch": 25.15,
"learning_rate": 7.429646259328925e-06,
"loss": 0.1309,
"step": 414500
},
{
"epoch": 25.18,
"learning_rate": 7.4144772768642684e-06,
"loss": 0.1319,
"step": 415000
},
{
"epoch": 25.21,
"learning_rate": 7.399308294399612e-06,
"loss": 0.1289,
"step": 415500
},
{
"epoch": 25.24,
"learning_rate": 7.384139311934956e-06,
"loss": 0.1313,
"step": 416000
},
{
"epoch": 25.27,
"learning_rate": 7.3690006674352285e-06,
"loss": 0.1304,
"step": 416500
},
{
"epoch": 25.3,
"learning_rate": 7.353831684970573e-06,
"loss": 0.1367,
"step": 417000
},
{
"epoch": 25.33,
"learning_rate": 7.338662702505917e-06,
"loss": 0.1327,
"step": 417500
},
{
"epoch": 25.36,
"learning_rate": 7.32349372004126e-06,
"loss": 0.1341,
"step": 418000
},
{
"epoch": 25.39,
"learning_rate": 7.3083550755415335e-06,
"loss": 0.1299,
"step": 418500
},
{
"epoch": 25.42,
"learning_rate": 7.293186093076877e-06,
"loss": 0.1322,
"step": 419000
},
{
"epoch": 25.45,
"learning_rate": 7.278017110612221e-06,
"loss": 0.1302,
"step": 419500
},
{
"epoch": 25.48,
"learning_rate": 7.262848128147565e-06,
"loss": 0.1285,
"step": 420000
},
{
"epoch": 25.51,
"learning_rate": 7.247679145682908e-06,
"loss": 0.1315,
"step": 420500
},
{
"epoch": 25.54,
"learning_rate": 7.232510163218251e-06,
"loss": 0.1329,
"step": 421000
},
{
"epoch": 25.57,
"learning_rate": 7.217341180753595e-06,
"loss": 0.1322,
"step": 421500
},
{
"epoch": 25.61,
"learning_rate": 7.2021721982889395e-06,
"loss": 0.1278,
"step": 422000
},
{
"epoch": 25.64,
"learning_rate": 7.187033553789212e-06,
"loss": 0.1288,
"step": 422500
},
{
"epoch": 25.67,
"learning_rate": 7.171864571324556e-06,
"loss": 0.1339,
"step": 423000
},
{
"epoch": 25.7,
"learning_rate": 7.156725926824829e-06,
"loss": 0.1319,
"step": 423500
},
{
"epoch": 25.73,
"learning_rate": 7.141556944360173e-06,
"loss": 0.1299,
"step": 424000
},
{
"epoch": 25.76,
"learning_rate": 7.126387961895517e-06,
"loss": 0.1321,
"step": 424500
},
{
"epoch": 25.79,
"learning_rate": 7.11121897943086e-06,
"loss": 0.131,
"step": 425000
},
{
"epoch": 25.82,
"learning_rate": 7.096080334931134e-06,
"loss": 0.1315,
"step": 425500
},
{
"epoch": 25.85,
"learning_rate": 7.080911352466477e-06,
"loss": 0.1305,
"step": 426000
},
{
"epoch": 25.88,
"learning_rate": 7.065742370001821e-06,
"loss": 0.1303,
"step": 426500
},
{
"epoch": 25.91,
"learning_rate": 7.0505733875371654e-06,
"loss": 0.1327,
"step": 427000
},
{
"epoch": 25.94,
"learning_rate": 7.035404405072508e-06,
"loss": 0.1303,
"step": 427500
},
{
"epoch": 25.97,
"learning_rate": 7.0202657605727804e-06,
"loss": 0.1306,
"step": 428000
},
{
"epoch": 26.0,
"learning_rate": 7.005096778108125e-06,
"loss": 0.1325,
"step": 428500
},
{
"epoch": 26.0,
"eval_bleu": 81.0978,
"eval_gen_len": 14.0985,
"eval_loss": 0.16260042786598206,
"eval_runtime": 168.279,
"eval_samples_per_second": 95.817,
"eval_steps_per_second": 2.995,
"step": 428506
},
{
"epoch": 26.03,
"learning_rate": 6.989927795643469e-06,
"loss": 0.1257,
"step": 429000
},
{
"epoch": 26.06,
"learning_rate": 6.974758813178812e-06,
"loss": 0.1238,
"step": 429500
},
{
"epoch": 26.09,
"learning_rate": 6.959589830714156e-06,
"loss": 0.1316,
"step": 430000
},
{
"epoch": 26.12,
"learning_rate": 6.944451186214429e-06,
"loss": 0.1319,
"step": 430500
},
{
"epoch": 26.15,
"learning_rate": 6.929282203749773e-06,
"loss": 0.1305,
"step": 431000
},
{
"epoch": 26.18,
"learning_rate": 6.914113221285117e-06,
"loss": 0.1303,
"step": 431500
},
{
"epoch": 26.21,
"learning_rate": 6.8989442388204606e-06,
"loss": 0.1297,
"step": 432000
},
{
"epoch": 26.24,
"learning_rate": 6.883835932285663e-06,
"loss": 0.1284,
"step": 432500
},
{
"epoch": 26.27,
"learning_rate": 6.8686669498210064e-06,
"loss": 0.132,
"step": 433000
},
{
"epoch": 26.3,
"learning_rate": 6.853497967356351e-06,
"loss": 0.1277,
"step": 433500
},
{
"epoch": 26.33,
"learning_rate": 6.838328984891695e-06,
"loss": 0.1302,
"step": 434000
},
{
"epoch": 26.36,
"learning_rate": 6.823160002427037e-06,
"loss": 0.1287,
"step": 434500
},
{
"epoch": 26.39,
"learning_rate": 6.8079910199623815e-06,
"loss": 0.1277,
"step": 435000
},
{
"epoch": 26.42,
"learning_rate": 6.792822037497725e-06,
"loss": 0.128,
"step": 435500
},
{
"epoch": 26.45,
"learning_rate": 6.777653055033069e-06,
"loss": 0.1283,
"step": 436000
},
{
"epoch": 26.49,
"learning_rate": 6.762484072568412e-06,
"loss": 0.1314,
"step": 436500
},
{
"epoch": 26.52,
"learning_rate": 6.7473150901037565e-06,
"loss": 0.1307,
"step": 437000
},
{
"epoch": 26.55,
"learning_rate": 6.73217644560403e-06,
"loss": 0.1316,
"step": 437500
},
{
"epoch": 26.58,
"learning_rate": 6.717007463139373e-06,
"loss": 0.1291,
"step": 438000
},
{
"epoch": 26.61,
"learning_rate": 6.701838480674717e-06,
"loss": 0.1313,
"step": 438500
},
{
"epoch": 26.64,
"learning_rate": 6.686669498210061e-06,
"loss": 0.1282,
"step": 439000
},
{
"epoch": 26.67,
"learning_rate": 6.671530853710334e-06,
"loss": 0.1304,
"step": 439500
},
{
"epoch": 26.7,
"learning_rate": 6.656361871245678e-06,
"loss": 0.1272,
"step": 440000
},
{
"epoch": 26.73,
"learning_rate": 6.641192888781021e-06,
"loss": 0.1255,
"step": 440500
},
{
"epoch": 26.76,
"learning_rate": 6.626023906316364e-06,
"loss": 0.1308,
"step": 441000
},
{
"epoch": 26.79,
"learning_rate": 6.610854923851708e-06,
"loss": 0.1294,
"step": 441500
},
{
"epoch": 26.82,
"learning_rate": 6.5956859413870525e-06,
"loss": 0.1313,
"step": 442000
},
{
"epoch": 26.85,
"learning_rate": 6.580516958922396e-06,
"loss": 0.1313,
"step": 442500
},
{
"epoch": 26.88,
"learning_rate": 6.56534797645774e-06,
"loss": 0.1279,
"step": 443000
},
{
"epoch": 26.91,
"learning_rate": 6.550178993993083e-06,
"loss": 0.1289,
"step": 443500
},
{
"epoch": 26.94,
"learning_rate": 6.535070687458286e-06,
"loss": 0.1302,
"step": 444000
},
{
"epoch": 26.97,
"learning_rate": 6.51990170499363e-06,
"loss": 0.1308,
"step": 444500
},
{
"epoch": 27.0,
"eval_bleu": 81.1159,
"eval_gen_len": 14.0921,
"eval_loss": 0.16248230636119843,
"eval_runtime": 170.0823,
"eval_samples_per_second": 94.801,
"eval_steps_per_second": 2.963,
"step": 444987
},
{
"epoch": 27.0,
"learning_rate": 6.504732722528973e-06,
"loss": 0.1315,
"step": 445000
},
{
"epoch": 27.03,
"learning_rate": 1.3244781870032159e-05,
"loss": 0.1289,
"step": 445500
},
{
"epoch": 27.06,
"learning_rate": 1.3237197378799832e-05,
"loss": 0.127,
"step": 446000
},
{
"epoch": 27.09,
"learning_rate": 1.3229612887567503e-05,
"loss": 0.1293,
"step": 446500
},
{
"epoch": 27.12,
"learning_rate": 1.3222028396335176e-05,
"loss": 0.13,
"step": 447000
},
{
"epoch": 27.15,
"learning_rate": 1.3214443905102847e-05,
"loss": 0.132,
"step": 447500
},
{
"epoch": 27.18,
"learning_rate": 1.3206859413870518e-05,
"loss": 0.1298,
"step": 448000
},
{
"epoch": 27.21,
"learning_rate": 1.319930526060312e-05,
"loss": 0.1287,
"step": 448500
},
{
"epoch": 27.24,
"learning_rate": 1.3191720769370793e-05,
"loss": 0.131,
"step": 449000
},
{
"epoch": 27.27,
"learning_rate": 1.3184136278138464e-05,
"loss": 0.1299,
"step": 449500
},
{
"epoch": 27.3,
"learning_rate": 1.3176551786906135e-05,
"loss": 0.1285,
"step": 450000
},
{
"epoch": 27.33,
"learning_rate": 1.3168967295673806e-05,
"loss": 0.1283,
"step": 450500
},
{
"epoch": 27.36,
"learning_rate": 1.3161382804441478e-05,
"loss": 0.1282,
"step": 451000
},
{
"epoch": 27.4,
"learning_rate": 1.3153798313209152e-05,
"loss": 0.1314,
"step": 451500
},
{
"epoch": 27.43,
"learning_rate": 1.3146213821976823e-05,
"loss": 0.1305,
"step": 452000
},
{
"epoch": 27.46,
"learning_rate": 1.3138629330744495e-05,
"loss": 0.1315,
"step": 452500
},
{
"epoch": 27.49,
"learning_rate": 1.3131044839512166e-05,
"loss": 0.1289,
"step": 453000
},
{
"epoch": 27.52,
"learning_rate": 1.3123460348279839e-05,
"loss": 0.1283,
"step": 453500
},
{
"epoch": 27.55,
"learning_rate": 1.311587585704751e-05,
"loss": 0.1302,
"step": 454000
},
{
"epoch": 27.58,
"learning_rate": 1.3108306534797647e-05,
"loss": 0.1299,
"step": 454500
},
{
"epoch": 27.61,
"learning_rate": 1.3100722043565318e-05,
"loss": 0.1259,
"step": 455000
},
{
"epoch": 27.64,
"learning_rate": 1.3093152721315456e-05,
"loss": 0.1309,
"step": 455500
},
{
"epoch": 27.67,
"learning_rate": 1.3085568230083129e-05,
"loss": 0.1307,
"step": 456000
},
{
"epoch": 27.7,
"learning_rate": 1.30779837388508e-05,
"loss": 0.1298,
"step": 456500
},
{
"epoch": 27.73,
"learning_rate": 1.3070399247618471e-05,
"loss": 0.1303,
"step": 457000
},
{
"epoch": 27.76,
"learning_rate": 1.3062814756386142e-05,
"loss": 0.1308,
"step": 457500
},
{
"epoch": 27.79,
"learning_rate": 1.3055230265153815e-05,
"loss": 0.1297,
"step": 458000
},
{
"epoch": 27.82,
"learning_rate": 1.3047645773921486e-05,
"loss": 0.1306,
"step": 458500
},
{
"epoch": 27.85,
"learning_rate": 1.3040076451671624e-05,
"loss": 0.1315,
"step": 459000
},
{
"epoch": 27.88,
"learning_rate": 1.3032491960439295e-05,
"loss": 0.1326,
"step": 459500
},
{
"epoch": 27.91,
"learning_rate": 1.3024907469206966e-05,
"loss": 0.1303,
"step": 460000
},
{
"epoch": 27.94,
"learning_rate": 1.3017322977974637e-05,
"loss": 0.1307,
"step": 460500
},
{
"epoch": 27.97,
"learning_rate": 1.3009738486742309e-05,
"loss": 0.1278,
"step": 461000
},
{
"epoch": 28.0,
"eval_bleu": 81.1163,
"eval_gen_len": 14.0657,
"eval_loss": 0.16175536811351776,
"eval_runtime": 180.1879,
"eval_samples_per_second": 89.484,
"eval_steps_per_second": 2.797,
"step": 461468
},
{
"epoch": 28.0,
"learning_rate": 1.3002153995509983e-05,
"loss": 0.1301,
"step": 461500
},
{
"epoch": 28.03,
"learning_rate": 1.2994569504277654e-05,
"loss": 0.128,
"step": 462000
},
{
"epoch": 28.06,
"learning_rate": 1.2986985013045326e-05,
"loss": 0.1277,
"step": 462500
},
{
"epoch": 28.09,
"learning_rate": 1.2979415690795463e-05,
"loss": 0.1279,
"step": 463000
},
{
"epoch": 28.12,
"learning_rate": 1.29718463685456e-05,
"loss": 0.1274,
"step": 463500
},
{
"epoch": 28.15,
"learning_rate": 1.2964261877313271e-05,
"loss": 0.1287,
"step": 464000
},
{
"epoch": 28.18,
"learning_rate": 1.2956677386080942e-05,
"loss": 0.124,
"step": 464500
},
{
"epoch": 28.21,
"learning_rate": 1.2949092894848614e-05,
"loss": 0.1281,
"step": 465000
},
{
"epoch": 28.24,
"learning_rate": 1.2941508403616288e-05,
"loss": 0.1265,
"step": 465500
},
{
"epoch": 28.27,
"learning_rate": 1.293392391238396e-05,
"loss": 0.1277,
"step": 466000
},
{
"epoch": 28.31,
"learning_rate": 1.292633942115163e-05,
"loss": 0.1294,
"step": 466500
},
{
"epoch": 28.34,
"learning_rate": 1.2918754929919302e-05,
"loss": 0.1259,
"step": 467000
},
{
"epoch": 28.37,
"learning_rate": 1.2911185607669439e-05,
"loss": 0.1259,
"step": 467500
},
{
"epoch": 28.4,
"learning_rate": 1.290360111643711e-05,
"loss": 0.1273,
"step": 468000
},
{
"epoch": 28.43,
"learning_rate": 1.2896016625204782e-05,
"loss": 0.1297,
"step": 468500
},
{
"epoch": 28.46,
"learning_rate": 1.2888432133972455e-05,
"loss": 0.1271,
"step": 469000
},
{
"epoch": 28.49,
"learning_rate": 1.2880847642740126e-05,
"loss": 0.126,
"step": 469500
},
{
"epoch": 28.52,
"learning_rate": 1.2873263151507797e-05,
"loss": 0.1317,
"step": 470000
},
{
"epoch": 28.55,
"learning_rate": 1.2865678660275468e-05,
"loss": 0.1267,
"step": 470500
},
{
"epoch": 28.58,
"learning_rate": 1.2858109338025605e-05,
"loss": 0.1259,
"step": 471000
},
{
"epoch": 28.61,
"learning_rate": 1.2850524846793278e-05,
"loss": 0.1288,
"step": 471500
},
{
"epoch": 28.64,
"learning_rate": 1.2842940355560951e-05,
"loss": 0.1264,
"step": 472000
},
{
"epoch": 28.67,
"learning_rate": 1.2835355864328622e-05,
"loss": 0.1278,
"step": 472500
},
{
"epoch": 28.7,
"learning_rate": 1.2827771373096294e-05,
"loss": 0.1289,
"step": 473000
},
{
"epoch": 28.73,
"learning_rate": 1.282020205084643e-05,
"loss": 0.128,
"step": 473500
},
{
"epoch": 28.76,
"learning_rate": 1.2812617559614102e-05,
"loss": 0.1254,
"step": 474000
},
{
"epoch": 28.79,
"learning_rate": 1.2805033068381773e-05,
"loss": 0.1273,
"step": 474500
},
{
"epoch": 28.82,
"learning_rate": 1.2797448577149445e-05,
"loss": 0.1245,
"step": 475000
},
{
"epoch": 28.85,
"learning_rate": 1.278986408591712e-05,
"loss": 0.1278,
"step": 475500
},
{
"epoch": 28.88,
"learning_rate": 1.278227959468479e-05,
"loss": 0.1289,
"step": 476000
},
{
"epoch": 28.91,
"learning_rate": 1.2774710272434928e-05,
"loss": 0.1273,
"step": 476500
},
{
"epoch": 28.94,
"learning_rate": 1.2767125781202599e-05,
"loss": 0.1283,
"step": 477000
},
{
"epoch": 28.97,
"learning_rate": 1.275954128997027e-05,
"loss": 0.1317,
"step": 477500
},
{
"epoch": 29.0,
"eval_bleu": 81.0592,
"eval_gen_len": 14.0958,
"eval_loss": 0.163426473736763,
"eval_runtime": 179.0747,
"eval_samples_per_second": 90.041,
"eval_steps_per_second": 2.814,
"step": 477949
},
{
"epoch": 29.0,
"learning_rate": 1.2751956798737941e-05,
"loss": 0.1268,
"step": 478000
},
{
"epoch": 29.03,
"learning_rate": 1.2744372307505613e-05,
"loss": 0.1252,
"step": 478500
},
{
"epoch": 29.06,
"learning_rate": 1.2736787816273285e-05,
"loss": 0.1219,
"step": 479000
},
{
"epoch": 29.09,
"learning_rate": 1.2729203325040957e-05,
"loss": 0.1253,
"step": 479500
},
{
"epoch": 29.12,
"learning_rate": 1.2721634002791094e-05,
"loss": 0.1198,
"step": 480000
},
{
"epoch": 29.15,
"learning_rate": 1.2714049511558765e-05,
"loss": 0.124,
"step": 480500
},
{
"epoch": 29.19,
"learning_rate": 1.2706480189308902e-05,
"loss": 0.1242,
"step": 481000
},
{
"epoch": 29.22,
"learning_rate": 1.2698895698076575e-05,
"loss": 0.1247,
"step": 481500
},
{
"epoch": 29.25,
"learning_rate": 1.2691311206844246e-05,
"loss": 0.1254,
"step": 482000
},
{
"epoch": 29.28,
"learning_rate": 1.2683741884594383e-05,
"loss": 0.1258,
"step": 482500
},
{
"epoch": 29.31,
"learning_rate": 1.2676157393362055e-05,
"loss": 0.1269,
"step": 483000
},
{
"epoch": 29.34,
"learning_rate": 1.2668572902129726e-05,
"loss": 0.1234,
"step": 483500
},
{
"epoch": 29.37,
"learning_rate": 1.2660988410897399e-05,
"loss": 0.1262,
"step": 484000
},
{
"epoch": 29.4,
"learning_rate": 1.265340391966507e-05,
"loss": 0.1248,
"step": 484500
},
{
"epoch": 29.43,
"learning_rate": 1.2645819428432741e-05,
"loss": 0.1247,
"step": 485000
},
{
"epoch": 29.46,
"learning_rate": 1.2638234937200413e-05,
"loss": 0.127,
"step": 485500
},
{
"epoch": 29.49,
"learning_rate": 1.2630650445968084e-05,
"loss": 0.1259,
"step": 486000
},
{
"epoch": 29.52,
"learning_rate": 1.2623065954735759e-05,
"loss": 0.1265,
"step": 486500
},
{
"epoch": 29.55,
"learning_rate": 1.2615496632485892e-05,
"loss": 0.1249,
"step": 487000
},
{
"epoch": 29.58,
"learning_rate": 1.2607912141253567e-05,
"loss": 0.1268,
"step": 487500
},
{
"epoch": 29.61,
"learning_rate": 1.2600327650021238e-05,
"loss": 0.1249,
"step": 488000
},
{
"epoch": 29.64,
"learning_rate": 1.259274315878891e-05,
"loss": 0.1256,
"step": 488500
},
{
"epoch": 29.67,
"learning_rate": 1.258515866755658e-05,
"loss": 0.1258,
"step": 489000
},
{
"epoch": 29.7,
"learning_rate": 1.2577574176324254e-05,
"loss": 0.1274,
"step": 489500
},
{
"epoch": 29.73,
"learning_rate": 1.2569989685091926e-05,
"loss": 0.1253,
"step": 490000
},
{
"epoch": 29.76,
"learning_rate": 1.2562405193859598e-05,
"loss": 0.1274,
"step": 490500
},
{
"epoch": 29.79,
"learning_rate": 1.2554820702627269e-05,
"loss": 0.1259,
"step": 491000
},
{
"epoch": 29.82,
"learning_rate": 1.254723621139494e-05,
"loss": 0.1251,
"step": 491500
},
{
"epoch": 29.85,
"learning_rate": 1.2539666889145077e-05,
"loss": 0.1261,
"step": 492000
},
{
"epoch": 29.88,
"learning_rate": 1.2532082397912749e-05,
"loss": 0.1282,
"step": 492500
},
{
"epoch": 29.91,
"learning_rate": 1.2524497906680421e-05,
"loss": 0.126,
"step": 493000
},
{
"epoch": 29.94,
"learning_rate": 1.2516913415448093e-05,
"loss": 0.1266,
"step": 493500
},
{
"epoch": 29.97,
"learning_rate": 1.2509328924215764e-05,
"loss": 0.1266,
"step": 494000
},
{
"epoch": 30.0,
"eval_bleu": 81.1592,
"eval_gen_len": 14.0861,
"eval_loss": 0.16239352524280548,
"eval_runtime": 179.0019,
"eval_samples_per_second": 90.077,
"eval_steps_per_second": 2.816,
"step": 494430
},
{
"epoch": 30.0,
"learning_rate": 1.2501759601965901e-05,
"loss": 0.1257,
"step": 494500
},
{
"epoch": 30.03,
"learning_rate": 1.2494175110733572e-05,
"loss": 0.1209,
"step": 495000
},
{
"epoch": 30.06,
"learning_rate": 1.2486590619501244e-05,
"loss": 0.1227,
"step": 495500
},
{
"epoch": 30.1,
"learning_rate": 1.2479006128268915e-05,
"loss": 0.1234,
"step": 496000
},
{
"epoch": 30.13,
"learning_rate": 1.247142163703659e-05,
"loss": 0.1218,
"step": 496500
},
{
"epoch": 30.16,
"learning_rate": 1.2463852314786725e-05,
"loss": 0.1228,
"step": 497000
},
{
"epoch": 30.19,
"learning_rate": 1.2456267823554398e-05,
"loss": 0.1226,
"step": 497500
},
{
"epoch": 30.22,
"learning_rate": 1.2448683332322069e-05,
"loss": 0.122,
"step": 498000
},
{
"epoch": 30.25,
"learning_rate": 1.244109884108974e-05,
"loss": 0.1225,
"step": 498500
},
{
"epoch": 30.28,
"learning_rate": 1.2433514349857412e-05,
"loss": 0.1223,
"step": 499000
},
{
"epoch": 30.31,
"learning_rate": 1.2425945027607549e-05,
"loss": 0.1232,
"step": 499500
},
{
"epoch": 30.34,
"learning_rate": 1.241836053637522e-05,
"loss": 0.1228,
"step": 500000
},
{
"epoch": 30.37,
"learning_rate": 1.2410776045142895e-05,
"loss": 0.1199,
"step": 500500
},
{
"epoch": 30.4,
"learning_rate": 1.2403191553910566e-05,
"loss": 0.1235,
"step": 501000
},
{
"epoch": 30.43,
"learning_rate": 1.2395607062678237e-05,
"loss": 0.1229,
"step": 501500
},
{
"epoch": 30.46,
"learning_rate": 1.2388037740428374e-05,
"loss": 0.1222,
"step": 502000
},
{
"epoch": 30.49,
"learning_rate": 1.2380453249196045e-05,
"loss": 0.121,
"step": 502500
},
{
"epoch": 30.52,
"learning_rate": 1.2372868757963717e-05,
"loss": 0.1235,
"step": 503000
},
{
"epoch": 30.55,
"learning_rate": 1.2365284266731388e-05,
"loss": 0.1218,
"step": 503500
},
{
"epoch": 30.58,
"learning_rate": 1.235769977549906e-05,
"loss": 0.1248,
"step": 504000
},
{
"epoch": 30.61,
"learning_rate": 1.2350115284266732e-05,
"loss": 0.1224,
"step": 504500
},
{
"epoch": 30.64,
"learning_rate": 1.2342530793034403e-05,
"loss": 0.1252,
"step": 505000
},
{
"epoch": 30.67,
"learning_rate": 1.233496147078454e-05,
"loss": 0.122,
"step": 505500
},
{
"epoch": 30.7,
"learning_rate": 1.2327376979552212e-05,
"loss": 0.1234,
"step": 506000
},
{
"epoch": 30.73,
"learning_rate": 1.2319792488319885e-05,
"loss": 0.1215,
"step": 506500
},
{
"epoch": 30.76,
"learning_rate": 1.2312207997087558e-05,
"loss": 0.1224,
"step": 507000
},
{
"epoch": 30.79,
"learning_rate": 1.2304623505855229e-05,
"loss": 0.1232,
"step": 507500
},
{
"epoch": 30.82,
"learning_rate": 1.22970390146229e-05,
"loss": 0.1266,
"step": 508000
},
{
"epoch": 30.85,
"learning_rate": 1.2289454523390571e-05,
"loss": 0.1274,
"step": 508500
},
{
"epoch": 30.88,
"learning_rate": 1.2281870032158243e-05,
"loss": 0.1242,
"step": 509000
},
{
"epoch": 30.91,
"learning_rate": 1.227430070990838e-05,
"loss": 0.1242,
"step": 509500
},
{
"epoch": 30.94,
"learning_rate": 1.2266716218676051e-05,
"loss": 0.1241,
"step": 510000
},
{
"epoch": 30.98,
"learning_rate": 1.2259131727443725e-05,
"loss": 0.1241,
"step": 510500
},
{
"epoch": 31.0,
"eval_bleu": 81.1058,
"eval_gen_len": 14.1131,
"eval_loss": 0.16349713504314423,
"eval_runtime": 179.2239,
"eval_samples_per_second": 89.966,
"eval_steps_per_second": 2.812,
"step": 510911
},
{
"epoch": 31.01,
"learning_rate": 1.2251547236211397e-05,
"loss": 0.1198,
"step": 511000
},
{
"epoch": 31.04,
"learning_rate": 1.2243962744979068e-05,
"loss": 0.1203,
"step": 511500
},
{
"epoch": 31.07,
"learning_rate": 1.223637825374674e-05,
"loss": 0.1183,
"step": 512000
},
{
"epoch": 31.1,
"learning_rate": 1.2228808931496876e-05,
"loss": 0.119,
"step": 512500
},
{
"epoch": 31.13,
"learning_rate": 1.2221224440264548e-05,
"loss": 0.1191,
"step": 513000
},
{
"epoch": 31.16,
"learning_rate": 1.2213655118014685e-05,
"loss": 0.12,
"step": 513500
},
{
"epoch": 31.19,
"learning_rate": 1.2206070626782356e-05,
"loss": 0.1192,
"step": 514000
},
{
"epoch": 31.22,
"learning_rate": 1.2198486135550027e-05,
"loss": 0.1182,
"step": 514500
},
{
"epoch": 31.25,
"learning_rate": 1.21909016443177e-05,
"loss": 0.1197,
"step": 515000
},
{
"epoch": 31.28,
"learning_rate": 1.2183317153085373e-05,
"loss": 0.1194,
"step": 515500
},
{
"epoch": 31.31,
"learning_rate": 1.2175732661853044e-05,
"loss": 0.1199,
"step": 516000
},
{
"epoch": 31.34,
"learning_rate": 1.2168148170620716e-05,
"loss": 0.1215,
"step": 516500
},
{
"epoch": 31.37,
"learning_rate": 1.2160563679388388e-05,
"loss": 0.1226,
"step": 517000
},
{
"epoch": 31.4,
"learning_rate": 1.215297918815606e-05,
"loss": 0.1214,
"step": 517500
},
{
"epoch": 31.43,
"learning_rate": 1.2145409865906197e-05,
"loss": 0.1203,
"step": 518000
},
{
"epoch": 31.46,
"learning_rate": 1.2137825374673868e-05,
"loss": 0.1202,
"step": 518500
},
{
"epoch": 31.49,
"learning_rate": 1.213024088344154e-05,
"loss": 0.1209,
"step": 519000
},
{
"epoch": 31.52,
"learning_rate": 1.212265639220921e-05,
"loss": 0.1223,
"step": 519500
},
{
"epoch": 31.55,
"learning_rate": 1.2115071900976882e-05,
"loss": 0.1221,
"step": 520000
},
{
"epoch": 31.58,
"learning_rate": 1.2107487409744556e-05,
"loss": 0.1204,
"step": 520500
},
{
"epoch": 31.61,
"learning_rate": 1.2099902918512228e-05,
"loss": 0.1206,
"step": 521000
},
{
"epoch": 31.64,
"learning_rate": 1.2092318427279899e-05,
"loss": 0.1194,
"step": 521500
},
{
"epoch": 31.67,
"learning_rate": 1.2084749105030036e-05,
"loss": 0.1212,
"step": 522000
},
{
"epoch": 31.7,
"learning_rate": 1.2077164613797707e-05,
"loss": 0.1207,
"step": 522500
},
{
"epoch": 31.73,
"learning_rate": 1.2069580122565379e-05,
"loss": 0.1173,
"step": 523000
},
{
"epoch": 31.76,
"learning_rate": 1.2062010800315516e-05,
"loss": 0.1198,
"step": 523500
},
{
"epoch": 31.79,
"learning_rate": 1.2054426309083187e-05,
"loss": 0.1219,
"step": 524000
},
{
"epoch": 31.82,
"learning_rate": 1.204684181785086e-05,
"loss": 0.122,
"step": 524500
},
{
"epoch": 31.85,
"learning_rate": 1.2039257326618533e-05,
"loss": 0.1208,
"step": 525000
},
{
"epoch": 31.89,
"learning_rate": 1.2031672835386204e-05,
"loss": 0.1227,
"step": 525500
},
{
"epoch": 31.92,
"learning_rate": 1.2024088344153875e-05,
"loss": 0.1218,
"step": 526000
},
{
"epoch": 31.95,
"learning_rate": 1.2016503852921546e-05,
"loss": 0.1238,
"step": 526500
},
{
"epoch": 31.98,
"learning_rate": 1.2008934530671684e-05,
"loss": 0.1233,
"step": 527000
},
{
"epoch": 32.0,
"eval_bleu": 81.231,
"eval_gen_len": 14.1063,
"eval_loss": 0.162692591547966,
"eval_runtime": 178.4186,
"eval_samples_per_second": 90.372,
"eval_steps_per_second": 2.825,
"step": 527392
},
{
"epoch": 32.01,
"learning_rate": 1.2001350039439355e-05,
"loss": 0.1201,
"step": 527500
},
{
"epoch": 32.04,
"learning_rate": 1.1993765548207028e-05,
"loss": 0.1185,
"step": 528000
},
{
"epoch": 32.07,
"learning_rate": 1.1986181056974699e-05,
"loss": 0.1199,
"step": 528500
},
{
"epoch": 32.1,
"learning_rate": 1.197859656574237e-05,
"loss": 0.1172,
"step": 529000
},
{
"epoch": 32.13,
"learning_rate": 1.1971012074510042e-05,
"loss": 0.117,
"step": 529500
},
{
"epoch": 32.16,
"learning_rate": 1.1963427583277716e-05,
"loss": 0.121,
"step": 530000
},
{
"epoch": 32.19,
"learning_rate": 1.1955843092045387e-05,
"loss": 0.1139,
"step": 530500
},
{
"epoch": 32.22,
"learning_rate": 1.1948273769795523e-05,
"loss": 0.1178,
"step": 531000
},
{
"epoch": 32.25,
"learning_rate": 1.1940689278563196e-05,
"loss": 0.118,
"step": 531500
},
{
"epoch": 32.28,
"learning_rate": 1.1933104787330867e-05,
"loss": 0.1152,
"step": 532000
},
{
"epoch": 32.31,
"learning_rate": 1.1925520296098538e-05,
"loss": 0.1178,
"step": 532500
},
{
"epoch": 32.34,
"learning_rate": 1.191793580486621e-05,
"loss": 0.119,
"step": 533000
},
{
"epoch": 32.37,
"learning_rate": 1.1910366482616347e-05,
"loss": 0.1164,
"step": 533500
},
{
"epoch": 32.4,
"learning_rate": 1.1902781991384018e-05,
"loss": 0.1202,
"step": 534000
},
{
"epoch": 32.43,
"learning_rate": 1.1895197500151692e-05,
"loss": 0.1187,
"step": 534500
},
{
"epoch": 32.46,
"learning_rate": 1.1887613008919364e-05,
"loss": 0.1176,
"step": 535000
},
{
"epoch": 32.49,
"learning_rate": 1.1880028517687035e-05,
"loss": 0.1178,
"step": 535500
},
{
"epoch": 32.52,
"learning_rate": 1.1872444026454706e-05,
"loss": 0.1192,
"step": 536000
},
{
"epoch": 32.55,
"learning_rate": 1.1864874704204843e-05,
"loss": 0.1172,
"step": 536500
},
{
"epoch": 32.58,
"learning_rate": 1.1857290212972515e-05,
"loss": 0.1203,
"step": 537000
},
{
"epoch": 32.61,
"learning_rate": 1.1849705721740186e-05,
"loss": 0.1174,
"step": 537500
},
{
"epoch": 32.64,
"learning_rate": 1.1842121230507859e-05,
"loss": 0.1171,
"step": 538000
},
{
"epoch": 32.67,
"learning_rate": 1.183453673927553e-05,
"loss": 0.1174,
"step": 538500
},
{
"epoch": 32.7,
"learning_rate": 1.1826967417025667e-05,
"loss": 0.1196,
"step": 539000
},
{
"epoch": 32.73,
"learning_rate": 1.1819382925793338e-05,
"loss": 0.1188,
"step": 539500
},
{
"epoch": 32.77,
"learning_rate": 1.181179843456101e-05,
"loss": 0.1184,
"step": 540000
},
{
"epoch": 32.8,
"learning_rate": 1.1804213943328683e-05,
"loss": 0.1192,
"step": 540500
},
{
"epoch": 32.83,
"learning_rate": 1.1796629452096355e-05,
"loss": 0.118,
"step": 541000
},
{
"epoch": 32.86,
"learning_rate": 1.1789044960864027e-05,
"loss": 0.1175,
"step": 541500
},
{
"epoch": 32.89,
"learning_rate": 1.1781475638614164e-05,
"loss": 0.1195,
"step": 542000
},
{
"epoch": 32.92,
"learning_rate": 1.1773891147381835e-05,
"loss": 0.1181,
"step": 542500
},
{
"epoch": 32.95,
"learning_rate": 1.1766306656149506e-05,
"loss": 0.1188,
"step": 543000
},
{
"epoch": 32.98,
"learning_rate": 1.1758737333899643e-05,
"loss": 0.1206,
"step": 543500
},
{
"epoch": 33.0,
"eval_bleu": 81.2442,
"eval_gen_len": 14.0953,
"eval_loss": 0.16174831986427307,
"eval_runtime": 179.0701,
"eval_samples_per_second": 90.043,
"eval_steps_per_second": 2.815,
"step": 543873
},
{
"epoch": 33.01,
"learning_rate": 1.1751152842667315e-05,
"loss": 0.119,
"step": 544000
},
{
"epoch": 33.04,
"learning_rate": 1.1743568351434986e-05,
"loss": 0.1132,
"step": 544500
},
{
"epoch": 33.07,
"learning_rate": 1.1735983860202657e-05,
"loss": 0.1148,
"step": 545000
},
{
"epoch": 33.1,
"learning_rate": 1.1728399368970332e-05,
"loss": 0.1156,
"step": 545500
},
{
"epoch": 33.13,
"learning_rate": 1.1720814877738003e-05,
"loss": 0.1142,
"step": 546000
},
{
"epoch": 33.16,
"learning_rate": 1.171324555548814e-05,
"loss": 0.1159,
"step": 546500
},
{
"epoch": 33.19,
"learning_rate": 1.1705661064255811e-05,
"loss": 0.1154,
"step": 547000
},
{
"epoch": 33.22,
"learning_rate": 1.1698076573023483e-05,
"loss": 0.1159,
"step": 547500
},
{
"epoch": 33.25,
"learning_rate": 1.1690492081791154e-05,
"loss": 0.1155,
"step": 548000
},
{
"epoch": 33.28,
"learning_rate": 1.1682907590558825e-05,
"loss": 0.115,
"step": 548500
},
{
"epoch": 33.31,
"learning_rate": 1.1675338268308962e-05,
"loss": 0.116,
"step": 549000
},
{
"epoch": 33.34,
"learning_rate": 1.1667753777076633e-05,
"loss": 0.117,
"step": 549500
},
{
"epoch": 33.37,
"learning_rate": 1.166018445482677e-05,
"loss": 0.1171,
"step": 550000
},
{
"epoch": 33.4,
"learning_rate": 1.1652599963594442e-05,
"loss": 0.1201,
"step": 550500
},
{
"epoch": 33.43,
"learning_rate": 1.1645015472362116e-05,
"loss": 0.1169,
"step": 551000
},
{
"epoch": 33.46,
"learning_rate": 1.1637430981129788e-05,
"loss": 0.1157,
"step": 551500
},
{
"epoch": 33.49,
"learning_rate": 1.1629846489897459e-05,
"loss": 0.1159,
"step": 552000
},
{
"epoch": 33.52,
"learning_rate": 1.162226199866513e-05,
"loss": 0.1144,
"step": 552500
},
{
"epoch": 33.55,
"learning_rate": 1.1614677507432803e-05,
"loss": 0.1186,
"step": 553000
},
{
"epoch": 33.58,
"learning_rate": 1.1607093016200474e-05,
"loss": 0.1142,
"step": 553500
},
{
"epoch": 33.61,
"learning_rate": 1.1599508524968146e-05,
"loss": 0.1179,
"step": 554000
},
{
"epoch": 33.64,
"learning_rate": 1.1591924033735817e-05,
"loss": 0.119,
"step": 554500
},
{
"epoch": 33.68,
"learning_rate": 1.1584339542503488e-05,
"loss": 0.1173,
"step": 555000
},
{
"epoch": 33.71,
"learning_rate": 1.1576755051271163e-05,
"loss": 0.1205,
"step": 555500
},
{
"epoch": 33.74,
"learning_rate": 1.1569170560038834e-05,
"loss": 0.1139,
"step": 556000
},
{
"epoch": 33.77,
"learning_rate": 1.1561601237788971e-05,
"loss": 0.119,
"step": 556500
},
{
"epoch": 33.8,
"learning_rate": 1.1554016746556642e-05,
"loss": 0.1156,
"step": 557000
},
{
"epoch": 33.83,
"learning_rate": 1.1546432255324314e-05,
"loss": 0.1152,
"step": 557500
},
{
"epoch": 33.86,
"learning_rate": 1.1538847764091985e-05,
"loss": 0.1155,
"step": 558000
},
{
"epoch": 33.89,
"learning_rate": 1.1531263272859658e-05,
"loss": 0.1154,
"step": 558500
},
{
"epoch": 33.92,
"learning_rate": 1.152367878162733e-05,
"loss": 0.1155,
"step": 559000
},
{
"epoch": 33.95,
"learning_rate": 1.1516094290395002e-05,
"loss": 0.117,
"step": 559500
},
{
"epoch": 33.98,
"learning_rate": 1.1508509799162673e-05,
"loss": 0.1184,
"step": 560000
},
{
"epoch": 34.0,
"eval_bleu": 81.2715,
"eval_gen_len": 14.1176,
"eval_loss": 0.16308197379112244,
"eval_runtime": 178.7834,
"eval_samples_per_second": 90.187,
"eval_steps_per_second": 2.819,
"step": 560354
},
{
"epoch": 34.01,
"learning_rate": 1.1500955645895276e-05,
"loss": 0.1148,
"step": 560500
},
{
"epoch": 34.04,
"learning_rate": 1.149338632364541e-05,
"loss": 0.1118,
"step": 561000
},
{
"epoch": 34.07,
"learning_rate": 1.1485801832413084e-05,
"loss": 0.1116,
"step": 561500
},
{
"epoch": 34.1,
"learning_rate": 1.1478217341180756e-05,
"loss": 0.1146,
"step": 562000
},
{
"epoch": 34.13,
"learning_rate": 1.1470632849948427e-05,
"loss": 0.113,
"step": 562500
},
{
"epoch": 34.16,
"learning_rate": 1.1463048358716098e-05,
"loss": 0.1133,
"step": 563000
},
{
"epoch": 34.19,
"learning_rate": 1.145546386748377e-05,
"loss": 0.1118,
"step": 563500
},
{
"epoch": 34.22,
"learning_rate": 1.1447879376251442e-05,
"loss": 0.1151,
"step": 564000
},
{
"epoch": 34.25,
"learning_rate": 1.1440294885019114e-05,
"loss": 0.1117,
"step": 564500
},
{
"epoch": 34.28,
"learning_rate": 1.1432710393786785e-05,
"loss": 0.1139,
"step": 565000
},
{
"epoch": 34.31,
"learning_rate": 1.1425141071536922e-05,
"loss": 0.1156,
"step": 565500
},
{
"epoch": 34.34,
"learning_rate": 1.1417556580304593e-05,
"loss": 0.1155,
"step": 566000
},
{
"epoch": 34.37,
"learning_rate": 1.1409972089072266e-05,
"loss": 0.1167,
"step": 566500
},
{
"epoch": 34.4,
"learning_rate": 1.1402402766822402e-05,
"loss": 0.1116,
"step": 567000
},
{
"epoch": 34.43,
"learning_rate": 1.1394818275590075e-05,
"loss": 0.1132,
"step": 567500
},
{
"epoch": 34.46,
"learning_rate": 1.1387233784357746e-05,
"loss": 0.1146,
"step": 568000
},
{
"epoch": 34.49,
"learning_rate": 1.1379649293125419e-05,
"loss": 0.1144,
"step": 568500
},
{
"epoch": 34.52,
"learning_rate": 1.137206480189309e-05,
"loss": 0.1155,
"step": 569000
},
{
"epoch": 34.55,
"learning_rate": 1.1364480310660761e-05,
"loss": 0.1173,
"step": 569500
},
{
"epoch": 34.59,
"learning_rate": 1.1356910988410898e-05,
"loss": 0.112,
"step": 570000
},
{
"epoch": 34.62,
"learning_rate": 1.134932649717857e-05,
"loss": 0.114,
"step": 570500
},
{
"epoch": 34.65,
"learning_rate": 1.134174200594624e-05,
"loss": 0.1137,
"step": 571000
},
{
"epoch": 34.68,
"learning_rate": 1.1334157514713915e-05,
"loss": 0.1157,
"step": 571500
},
{
"epoch": 34.71,
"learning_rate": 1.1326573023481587e-05,
"loss": 0.1154,
"step": 572000
},
{
"epoch": 34.74,
"learning_rate": 1.1318988532249258e-05,
"loss": 0.1176,
"step": 572500
},
{
"epoch": 34.77,
"learning_rate": 1.1311419209999395e-05,
"loss": 0.1134,
"step": 573000
},
{
"epoch": 34.8,
"learning_rate": 1.1303834718767066e-05,
"loss": 0.1145,
"step": 573500
},
{
"epoch": 34.83,
"learning_rate": 1.1296250227534738e-05,
"loss": 0.1155,
"step": 574000
},
{
"epoch": 34.86,
"learning_rate": 1.1288665736302409e-05,
"loss": 0.1136,
"step": 574500
},
{
"epoch": 34.89,
"learning_rate": 1.1281081245070082e-05,
"loss": 0.1147,
"step": 575000
},
{
"epoch": 34.92,
"learning_rate": 1.1273496753837753e-05,
"loss": 0.1145,
"step": 575500
},
{
"epoch": 34.95,
"learning_rate": 1.1265912262605426e-05,
"loss": 0.1143,
"step": 576000
},
{
"epoch": 34.98,
"learning_rate": 1.1258327771373097e-05,
"loss": 0.1148,
"step": 576500
},
{
"epoch": 35.0,
"eval_bleu": 81.1501,
"eval_gen_len": 14.0923,
"eval_loss": 0.16266627609729767,
"eval_runtime": 177.3751,
"eval_samples_per_second": 90.903,
"eval_steps_per_second": 2.841,
"step": 576835
},
{
"epoch": 35.01,
"learning_rate": 1.125074328014077e-05,
"loss": 0.1122,
"step": 577000
},
{
"epoch": 35.04,
"learning_rate": 1.1243158788908441e-05,
"loss": 0.1116,
"step": 577500
},
{
"epoch": 35.07,
"learning_rate": 1.1235589466658578e-05,
"loss": 0.1094,
"step": 578000
},
{
"epoch": 35.1,
"learning_rate": 1.122800497542625e-05,
"loss": 0.1106,
"step": 578500
},
{
"epoch": 35.13,
"learning_rate": 1.1220435653176387e-05,
"loss": 0.111,
"step": 579000
},
{
"epoch": 35.16,
"learning_rate": 1.1212851161944058e-05,
"loss": 0.1108,
"step": 579500
},
{
"epoch": 35.19,
"learning_rate": 1.120526667071173e-05,
"loss": 0.1088,
"step": 580000
},
{
"epoch": 35.22,
"learning_rate": 1.11976821794794e-05,
"loss": 0.1136,
"step": 580500
},
{
"epoch": 35.25,
"learning_rate": 1.1190097688247072e-05,
"loss": 0.1108,
"step": 581000
},
{
"epoch": 35.28,
"learning_rate": 1.1182513197014746e-05,
"loss": 0.1117,
"step": 581500
},
{
"epoch": 35.31,
"learning_rate": 1.1174928705782418e-05,
"loss": 0.1126,
"step": 582000
},
{
"epoch": 35.34,
"learning_rate": 1.1167344214550089e-05,
"loss": 0.1119,
"step": 582500
},
{
"epoch": 35.37,
"learning_rate": 1.115975972331776e-05,
"loss": 0.1165,
"step": 583000
},
{
"epoch": 35.4,
"learning_rate": 1.1152205570050363e-05,
"loss": 0.1123,
"step": 583500
},
{
"epoch": 35.43,
"learning_rate": 1.1144621078818034e-05,
"loss": 0.1094,
"step": 584000
},
{
"epoch": 35.47,
"learning_rate": 1.1137036587585706e-05,
"loss": 0.1129,
"step": 584500
},
{
"epoch": 35.5,
"learning_rate": 1.1129452096353377e-05,
"loss": 0.1117,
"step": 585000
},
{
"epoch": 35.53,
"learning_rate": 1.1121867605121048e-05,
"loss": 0.1128,
"step": 585500
},
{
"epoch": 35.56,
"learning_rate": 1.1114283113888723e-05,
"loss": 0.1143,
"step": 586000
},
{
"epoch": 35.59,
"learning_rate": 1.1106698622656394e-05,
"loss": 0.111,
"step": 586500
},
{
"epoch": 35.62,
"learning_rate": 1.1099114131424065e-05,
"loss": 0.1139,
"step": 587000
},
{
"epoch": 35.65,
"learning_rate": 1.1091529640191736e-05,
"loss": 0.1128,
"step": 587500
},
{
"epoch": 35.68,
"learning_rate": 1.108394514895941e-05,
"loss": 0.1109,
"step": 588000
},
{
"epoch": 35.71,
"learning_rate": 1.107636065772708e-05,
"loss": 0.1126,
"step": 588500
},
{
"epoch": 35.74,
"learning_rate": 1.1068791335477218e-05,
"loss": 0.1118,
"step": 589000
},
{
"epoch": 35.77,
"learning_rate": 1.1061206844244889e-05,
"loss": 0.1145,
"step": 589500
},
{
"epoch": 35.8,
"learning_rate": 1.105362235301256e-05,
"loss": 0.1114,
"step": 590000
},
{
"epoch": 35.83,
"learning_rate": 1.1046037861780231e-05,
"loss": 0.1086,
"step": 590500
},
{
"epoch": 35.86,
"learning_rate": 1.1038453370547903e-05,
"loss": 0.1159,
"step": 591000
},
{
"epoch": 35.89,
"learning_rate": 1.1030868879315577e-05,
"loss": 0.114,
"step": 591500
},
{
"epoch": 35.92,
"learning_rate": 1.1023284388083249e-05,
"loss": 0.1137,
"step": 592000
},
{
"epoch": 35.95,
"learning_rate": 1.1015715065833386e-05,
"loss": 0.11,
"step": 592500
},
{
"epoch": 35.98,
"learning_rate": 1.1008130574601057e-05,
"loss": 0.1132,
"step": 593000
},
{
"epoch": 36.0,
"eval_bleu": 81.3267,
"eval_gen_len": 14.11,
"eval_loss": 0.16219820082187653,
"eval_runtime": 176.9165,
"eval_samples_per_second": 91.139,
"eval_steps_per_second": 2.849,
"step": 593316
},
{
"epoch": 36.01,
"learning_rate": 1.1000546083368728e-05,
"loss": 0.1134,
"step": 593500
},
{
"epoch": 36.04,
"learning_rate": 1.09929615921364e-05,
"loss": 0.1088,
"step": 594000
},
{
"epoch": 36.07,
"learning_rate": 1.0985377100904074e-05,
"loss": 0.1086,
"step": 594500
},
{
"epoch": 36.1,
"learning_rate": 1.0977792609671745e-05,
"loss": 0.1081,
"step": 595000
},
{
"epoch": 36.13,
"learning_rate": 1.0970223287421882e-05,
"loss": 0.11,
"step": 595500
},
{
"epoch": 36.16,
"learning_rate": 1.0962638796189554e-05,
"loss": 0.1076,
"step": 596000
},
{
"epoch": 36.19,
"learning_rate": 1.0955054304957225e-05,
"loss": 0.1124,
"step": 596500
},
{
"epoch": 36.22,
"learning_rate": 1.0947469813724896e-05,
"loss": 0.1113,
"step": 597000
},
{
"epoch": 36.25,
"learning_rate": 1.0939885322492567e-05,
"loss": 0.11,
"step": 597500
},
{
"epoch": 36.28,
"learning_rate": 1.093230083126024e-05,
"loss": 0.1116,
"step": 598000
},
{
"epoch": 36.31,
"learning_rate": 1.0924716340027912e-05,
"loss": 0.1089,
"step": 598500
},
{
"epoch": 36.34,
"learning_rate": 1.0917147017778049e-05,
"loss": 0.1083,
"step": 599000
},
{
"epoch": 36.38,
"learning_rate": 1.090956252654572e-05,
"loss": 0.1105,
"step": 599500
},
{
"epoch": 36.41,
"learning_rate": 1.0901978035313391e-05,
"loss": 0.1121,
"step": 600000
},
{
"epoch": 36.44,
"learning_rate": 1.0894393544081064e-05,
"loss": 0.1101,
"step": 600500
},
{
"epoch": 36.47,
"learning_rate": 1.0886809052848737e-05,
"loss": 0.1082,
"step": 601000
},
{
"epoch": 36.5,
"learning_rate": 1.0879224561616408e-05,
"loss": 0.1109,
"step": 601500
},
{
"epoch": 36.53,
"learning_rate": 1.087164007038408e-05,
"loss": 0.1074,
"step": 602000
},
{
"epoch": 36.56,
"learning_rate": 1.086405557915175e-05,
"loss": 0.1113,
"step": 602500
},
{
"epoch": 36.59,
"learning_rate": 1.0856486256901888e-05,
"loss": 0.1108,
"step": 603000
},
{
"epoch": 36.62,
"learning_rate": 1.084890176566956e-05,
"loss": 0.1108,
"step": 603500
},
{
"epoch": 36.65,
"learning_rate": 1.084131727443723e-05,
"loss": 0.1084,
"step": 604000
},
{
"epoch": 36.68,
"learning_rate": 1.0833732783204905e-05,
"loss": 0.1094,
"step": 604500
},
{
"epoch": 36.71,
"learning_rate": 1.0826163460955039e-05,
"loss": 0.1086,
"step": 605000
},
{
"epoch": 36.74,
"learning_rate": 1.0818594138705176e-05,
"loss": 0.1101,
"step": 605500
},
{
"epoch": 36.77,
"learning_rate": 1.0811009647472847e-05,
"loss": 0.1124,
"step": 606000
},
{
"epoch": 36.8,
"learning_rate": 1.0803425156240522e-05,
"loss": 0.1128,
"step": 606500
},
{
"epoch": 36.83,
"learning_rate": 1.0795840665008193e-05,
"loss": 0.1106,
"step": 607000
},
{
"epoch": 36.86,
"learning_rate": 1.0788256173775864e-05,
"loss": 0.1103,
"step": 607500
},
{
"epoch": 36.89,
"learning_rate": 1.0780671682543535e-05,
"loss": 0.1125,
"step": 608000
},
{
"epoch": 36.92,
"learning_rate": 1.0773087191311207e-05,
"loss": 0.1079,
"step": 608500
},
{
"epoch": 36.95,
"learning_rate": 1.076550270007888e-05,
"loss": 0.1143,
"step": 609000
},
{
"epoch": 36.98,
"learning_rate": 1.0757948546811481e-05,
"loss": 0.1133,
"step": 609500
},
{
"epoch": 37.0,
"eval_bleu": 81.3249,
"eval_gen_len": 14.0937,
"eval_loss": 0.16307313740253448,
"eval_runtime": 177.1538,
"eval_samples_per_second": 91.017,
"eval_steps_per_second": 2.845,
"step": 609797
},
{
"epoch": 37.01,
"learning_rate": 1.0750364055579152e-05,
"loss": 0.1098,
"step": 610000
},
{
"epoch": 37.04,
"learning_rate": 1.0742779564346823e-05,
"loss": 0.1089,
"step": 610500
},
{
"epoch": 37.07,
"learning_rate": 1.0735195073114496e-05,
"loss": 0.1049,
"step": 611000
},
{
"epoch": 37.1,
"learning_rate": 1.072761058188217e-05,
"loss": 0.1083,
"step": 611500
},
{
"epoch": 37.13,
"learning_rate": 1.072002609064984e-05,
"loss": 0.108,
"step": 612000
},
{
"epoch": 37.16,
"learning_rate": 1.0712441599417512e-05,
"loss": 0.1036,
"step": 612500
},
{
"epoch": 37.19,
"learning_rate": 1.0704857108185185e-05,
"loss": 0.1112,
"step": 613000
},
{
"epoch": 37.22,
"learning_rate": 1.069728778593532e-05,
"loss": 0.107,
"step": 613500
},
{
"epoch": 37.26,
"learning_rate": 1.0689703294702993e-05,
"loss": 0.1078,
"step": 614000
},
{
"epoch": 37.29,
"learning_rate": 1.0682149141435594e-05,
"loss": 0.1092,
"step": 614500
},
{
"epoch": 37.32,
"learning_rate": 1.0674564650203266e-05,
"loss": 0.1054,
"step": 615000
},
{
"epoch": 37.35,
"learning_rate": 1.0666980158970937e-05,
"loss": 0.1072,
"step": 615500
},
{
"epoch": 37.38,
"learning_rate": 1.0659395667738608e-05,
"loss": 0.1114,
"step": 616000
},
{
"epoch": 37.41,
"learning_rate": 1.0651811176506281e-05,
"loss": 0.1099,
"step": 616500
},
{
"epoch": 37.44,
"learning_rate": 1.0644226685273952e-05,
"loss": 0.1072,
"step": 617000
},
{
"epoch": 37.47,
"learning_rate": 1.0636642194041624e-05,
"loss": 0.1109,
"step": 617500
},
{
"epoch": 37.5,
"learning_rate": 1.0629057702809295e-05,
"loss": 0.1071,
"step": 618000
},
{
"epoch": 37.53,
"learning_rate": 1.062147321157697e-05,
"loss": 0.111,
"step": 618500
},
{
"epoch": 37.56,
"learning_rate": 1.061388872034464e-05,
"loss": 0.1088,
"step": 619000
},
{
"epoch": 37.59,
"learning_rate": 1.0606304229112312e-05,
"loss": 0.1097,
"step": 619500
},
{
"epoch": 37.62,
"learning_rate": 1.0598719737879983e-05,
"loss": 0.1075,
"step": 620000
},
{
"epoch": 37.65,
"learning_rate": 1.059115041563012e-05,
"loss": 0.1083,
"step": 620500
},
{
"epoch": 37.68,
"learning_rate": 1.0583565924397791e-05,
"loss": 0.1095,
"step": 621000
},
{
"epoch": 37.71,
"learning_rate": 1.0575996602147929e-05,
"loss": 0.1106,
"step": 621500
},
{
"epoch": 37.74,
"learning_rate": 1.05684121109156e-05,
"loss": 0.1096,
"step": 622000
},
{
"epoch": 37.77,
"learning_rate": 1.0560827619683271e-05,
"loss": 0.1089,
"step": 622500
},
{
"epoch": 37.8,
"learning_rate": 1.0553243128450946e-05,
"loss": 0.1091,
"step": 623000
},
{
"epoch": 37.83,
"learning_rate": 1.0545658637218617e-05,
"loss": 0.1117,
"step": 623500
},
{
"epoch": 37.86,
"learning_rate": 1.0538089314968754e-05,
"loss": 0.1096,
"step": 624000
},
{
"epoch": 37.89,
"learning_rate": 1.0530504823736425e-05,
"loss": 0.108,
"step": 624500
},
{
"epoch": 37.92,
"learning_rate": 1.0522920332504097e-05,
"loss": 0.1094,
"step": 625000
},
{
"epoch": 37.95,
"learning_rate": 1.0515335841271768e-05,
"loss": 0.1094,
"step": 625500
},
{
"epoch": 37.98,
"learning_rate": 1.050775135003944e-05,
"loss": 0.1083,
"step": 626000
},
{
"epoch": 38.0,
"eval_bleu": 81.212,
"eval_gen_len": 14.1108,
"eval_loss": 0.16392718255519867,
"eval_runtime": 179.1355,
"eval_samples_per_second": 90.01,
"eval_steps_per_second": 2.814,
"step": 626278
},
{
"epoch": 38.01,
"learning_rate": 1.0500166858807112e-05,
"loss": 0.1056,
"step": 626500
},
{
"epoch": 38.04,
"learning_rate": 1.0492597536557249e-05,
"loss": 0.1035,
"step": 627000
},
{
"epoch": 38.07,
"learning_rate": 1.048501304532492e-05,
"loss": 0.1055,
"step": 627500
},
{
"epoch": 38.1,
"learning_rate": 1.0477428554092592e-05,
"loss": 0.1067,
"step": 628000
},
{
"epoch": 38.13,
"learning_rate": 1.0469844062860265e-05,
"loss": 0.1059,
"step": 628500
},
{
"epoch": 38.17,
"learning_rate": 1.0462259571627936e-05,
"loss": 0.1047,
"step": 629000
},
{
"epoch": 38.2,
"learning_rate": 1.0454690249378073e-05,
"loss": 0.1067,
"step": 629500
},
{
"epoch": 38.23,
"learning_rate": 1.0447105758145744e-05,
"loss": 0.1056,
"step": 630000
},
{
"epoch": 38.26,
"learning_rate": 1.0439521266913417e-05,
"loss": 0.1079,
"step": 630500
},
{
"epoch": 38.29,
"learning_rate": 1.0431936775681088e-05,
"loss": 0.1036,
"step": 631000
},
{
"epoch": 38.32,
"learning_rate": 1.0424367453431225e-05,
"loss": 0.1068,
"step": 631500
},
{
"epoch": 38.35,
"learning_rate": 1.0416782962198897e-05,
"loss": 0.1068,
"step": 632000
},
{
"epoch": 38.38,
"learning_rate": 1.0409198470966568e-05,
"loss": 0.1078,
"step": 632500
},
{
"epoch": 38.41,
"learning_rate": 1.0401613979734239e-05,
"loss": 0.1053,
"step": 633000
},
{
"epoch": 38.44,
"learning_rate": 1.039402948850191e-05,
"loss": 0.1061,
"step": 633500
},
{
"epoch": 38.47,
"learning_rate": 1.0386460166252047e-05,
"loss": 0.1077,
"step": 634000
},
{
"epoch": 38.5,
"learning_rate": 1.0378875675019719e-05,
"loss": 0.1079,
"step": 634500
},
{
"epoch": 38.53,
"learning_rate": 1.0371291183787393e-05,
"loss": 0.1095,
"step": 635000
},
{
"epoch": 38.56,
"learning_rate": 1.0363706692555065e-05,
"loss": 0.1091,
"step": 635500
},
{
"epoch": 38.59,
"learning_rate": 1.0356122201322736e-05,
"loss": 0.1058,
"step": 636000
},
{
"epoch": 38.62,
"learning_rate": 1.0348537710090407e-05,
"loss": 0.109,
"step": 636500
},
{
"epoch": 38.65,
"learning_rate": 1.0340968387840544e-05,
"loss": 0.1074,
"step": 637000
},
{
"epoch": 38.68,
"learning_rate": 1.0333383896608215e-05,
"loss": 0.1058,
"step": 637500
},
{
"epoch": 38.71,
"learning_rate": 1.0325799405375888e-05,
"loss": 0.1058,
"step": 638000
},
{
"epoch": 38.74,
"learning_rate": 1.0318214914143561e-05,
"loss": 0.1089,
"step": 638500
},
{
"epoch": 38.77,
"learning_rate": 1.0310645591893698e-05,
"loss": 0.1079,
"step": 639000
},
{
"epoch": 38.8,
"learning_rate": 1.030306110066137e-05,
"loss": 0.1065,
"step": 639500
},
{
"epoch": 38.83,
"learning_rate": 1.0295476609429041e-05,
"loss": 0.1056,
"step": 640000
},
{
"epoch": 38.86,
"learning_rate": 1.0287892118196712e-05,
"loss": 0.1068,
"step": 640500
},
{
"epoch": 38.89,
"learning_rate": 1.028032279594685e-05,
"loss": 0.1071,
"step": 641000
},
{
"epoch": 38.92,
"learning_rate": 1.027273830471452e-05,
"loss": 0.1077,
"step": 641500
},
{
"epoch": 38.95,
"learning_rate": 1.0265153813482192e-05,
"loss": 0.1078,
"step": 642000
},
{
"epoch": 38.98,
"learning_rate": 1.0257569322249865e-05,
"loss": 0.1067,
"step": 642500
},
{
"epoch": 39.0,
"eval_bleu": 81.2594,
"eval_gen_len": 14.1177,
"eval_loss": 0.16316094994544983,
"eval_runtime": 176.9924,
"eval_samples_per_second": 91.1,
"eval_steps_per_second": 2.848,
"step": 642759
},
{
"epoch": 39.01,
"learning_rate": 1.0249984831017536e-05,
"loss": 0.1061,
"step": 643000
},
{
"epoch": 39.04,
"learning_rate": 1.0242400339785207e-05,
"loss": 0.1052,
"step": 643500
},
{
"epoch": 39.08,
"learning_rate": 1.0234831017535344e-05,
"loss": 0.1055,
"step": 644000
},
{
"epoch": 39.11,
"learning_rate": 1.0227246526303016e-05,
"loss": 0.1047,
"step": 644500
},
{
"epoch": 39.14,
"learning_rate": 1.0219662035070688e-05,
"loss": 0.1047,
"step": 645000
},
{
"epoch": 39.17,
"learning_rate": 1.0212077543838361e-05,
"loss": 0.1046,
"step": 645500
},
{
"epoch": 39.2,
"learning_rate": 1.0204493052606033e-05,
"loss": 0.1053,
"step": 646000
},
{
"epoch": 39.23,
"learning_rate": 1.0196908561373704e-05,
"loss": 0.1046,
"step": 646500
},
{
"epoch": 39.26,
"learning_rate": 1.0189339239123841e-05,
"loss": 0.1035,
"step": 647000
},
{
"epoch": 39.29,
"learning_rate": 1.0181754747891512e-05,
"loss": 0.1025,
"step": 647500
},
{
"epoch": 39.32,
"learning_rate": 1.0174170256659184e-05,
"loss": 0.1069,
"step": 648000
},
{
"epoch": 39.35,
"learning_rate": 1.0166585765426855e-05,
"loss": 0.1038,
"step": 648500
},
{
"epoch": 39.38,
"learning_rate": 1.015900127419453e-05,
"loss": 0.1044,
"step": 649000
},
{
"epoch": 39.41,
"learning_rate": 1.01514167829622e-05,
"loss": 0.1055,
"step": 649500
},
{
"epoch": 39.44,
"learning_rate": 1.0143832291729872e-05,
"loss": 0.1039,
"step": 650000
},
{
"epoch": 39.47,
"learning_rate": 1.0136262969480009e-05,
"loss": 0.1057,
"step": 650500
},
{
"epoch": 39.5,
"learning_rate": 1.012867847824768e-05,
"loss": 0.1059,
"step": 651000
},
{
"epoch": 39.53,
"learning_rate": 1.0121093987015351e-05,
"loss": 0.1053,
"step": 651500
},
{
"epoch": 39.56,
"learning_rate": 1.0113509495783023e-05,
"loss": 0.1038,
"step": 652000
},
{
"epoch": 39.59,
"learning_rate": 1.010594017353316e-05,
"loss": 0.1049,
"step": 652500
},
{
"epoch": 39.62,
"learning_rate": 1.0098355682300831e-05,
"loss": 0.1046,
"step": 653000
},
{
"epoch": 39.65,
"learning_rate": 1.0090771191068504e-05,
"loss": 0.1033,
"step": 653500
},
{
"epoch": 39.68,
"learning_rate": 1.0083186699836175e-05,
"loss": 0.1063,
"step": 654000
},
{
"epoch": 39.71,
"learning_rate": 1.0075602208603848e-05,
"loss": 0.1053,
"step": 654500
},
{
"epoch": 39.74,
"learning_rate": 1.0068032886353985e-05,
"loss": 0.1048,
"step": 655000
},
{
"epoch": 39.77,
"learning_rate": 1.0060448395121657e-05,
"loss": 0.1078,
"step": 655500
},
{
"epoch": 39.8,
"learning_rate": 1.0052863903889328e-05,
"loss": 0.1056,
"step": 656000
},
{
"epoch": 39.83,
"learning_rate": 1.0045279412657e-05,
"loss": 0.1046,
"step": 656500
},
{
"epoch": 39.86,
"learning_rate": 1.0037694921424672e-05,
"loss": 0.1048,
"step": 657000
},
{
"epoch": 39.89,
"learning_rate": 1.0030110430192343e-05,
"loss": 0.1053,
"step": 657500
},
{
"epoch": 39.92,
"learning_rate": 1.002254110794248e-05,
"loss": 0.105,
"step": 658000
},
{
"epoch": 39.96,
"learning_rate": 1.0014956616710152e-05,
"loss": 0.1042,
"step": 658500
},
{
"epoch": 39.99,
"learning_rate": 1.0007372125477823e-05,
"loss": 0.1033,
"step": 659000
},
{
"epoch": 40.0,
"eval_bleu": 81.214,
"eval_gen_len": 14.1179,
"eval_loss": 0.16460604965686798,
"eval_runtime": 177.2705,
"eval_samples_per_second": 90.957,
"eval_steps_per_second": 2.843,
"step": 659240
},
{
"epoch": 40.02,
"learning_rate": 9.999787634245496e-06,
"loss": 0.1036,
"step": 659500
},
{
"epoch": 40.05,
"learning_rate": 9.992218311995631e-06,
"loss": 0.0997,
"step": 660000
},
{
"epoch": 40.08,
"learning_rate": 9.984633820763304e-06,
"loss": 0.1017,
"step": 660500
},
{
"epoch": 40.11,
"learning_rate": 9.977049329530975e-06,
"loss": 0.1038,
"step": 661000
},
{
"epoch": 40.14,
"learning_rate": 9.969464838298648e-06,
"loss": 0.1001,
"step": 661500
},
{
"epoch": 40.17,
"learning_rate": 9.96188034706632e-06,
"loss": 0.1035,
"step": 662000
},
{
"epoch": 40.2,
"learning_rate": 9.954295855833992e-06,
"loss": 0.1037,
"step": 662500
},
{
"epoch": 40.23,
"learning_rate": 9.946711364601664e-06,
"loss": 0.102,
"step": 663000
},
{
"epoch": 40.26,
"learning_rate": 9.9391420423518e-06,
"loss": 0.1026,
"step": 663500
},
{
"epoch": 40.29,
"learning_rate": 9.931557551119472e-06,
"loss": 0.1033,
"step": 664000
},
{
"epoch": 40.32,
"learning_rate": 9.923973059887143e-06,
"loss": 0.1034,
"step": 664500
},
{
"epoch": 40.35,
"learning_rate": 9.916388568654816e-06,
"loss": 0.1009,
"step": 665000
},
{
"epoch": 40.38,
"learning_rate": 9.908804077422488e-06,
"loss": 0.1039,
"step": 665500
},
{
"epoch": 40.41,
"learning_rate": 9.901234755172625e-06,
"loss": 0.1046,
"step": 666000
},
{
"epoch": 40.44,
"learning_rate": 9.893650263940296e-06,
"loss": 0.1043,
"step": 666500
},
{
"epoch": 40.47,
"learning_rate": 9.886065772707967e-06,
"loss": 0.1007,
"step": 667000
},
{
"epoch": 40.5,
"learning_rate": 9.87848128147564e-06,
"loss": 0.1035,
"step": 667500
},
{
"epoch": 40.53,
"learning_rate": 9.870911959225775e-06,
"loss": 0.104,
"step": 668000
},
{
"epoch": 40.56,
"learning_rate": 9.863327467993448e-06,
"loss": 0.104,
"step": 668500
},
{
"epoch": 40.59,
"learning_rate": 9.85574297676112e-06,
"loss": 0.1037,
"step": 669000
},
{
"epoch": 40.62,
"learning_rate": 9.848158485528791e-06,
"loss": 0.1018,
"step": 669500
},
{
"epoch": 40.65,
"learning_rate": 9.840573994296462e-06,
"loss": 0.1027,
"step": 670000
},
{
"epoch": 40.68,
"learning_rate": 9.832989503064135e-06,
"loss": 0.1008,
"step": 670500
},
{
"epoch": 40.71,
"learning_rate": 9.825405011831806e-06,
"loss": 0.1043,
"step": 671000
},
{
"epoch": 40.74,
"learning_rate": 9.81782052059948e-06,
"loss": 0.1028,
"step": 671500
},
{
"epoch": 40.77,
"learning_rate": 9.810251198349615e-06,
"loss": 0.1049,
"step": 672000
},
{
"epoch": 40.8,
"learning_rate": 9.802681876099752e-06,
"loss": 0.1061,
"step": 672500
},
{
"epoch": 40.83,
"learning_rate": 9.795097384867423e-06,
"loss": 0.1042,
"step": 673000
},
{
"epoch": 40.87,
"learning_rate": 9.787512893635096e-06,
"loss": 0.1055,
"step": 673500
},
{
"epoch": 40.9,
"learning_rate": 9.779928402402767e-06,
"loss": 0.1029,
"step": 674000
},
{
"epoch": 40.93,
"learning_rate": 9.772359080152904e-06,
"loss": 0.1039,
"step": 674500
},
{
"epoch": 40.96,
"learning_rate": 9.764774588920576e-06,
"loss": 0.1074,
"step": 675000
},
{
"epoch": 40.99,
"learning_rate": 9.757190097688248e-06,
"loss": 0.1039,
"step": 675500
},
{
"epoch": 41.0,
"eval_bleu": 81.2844,
"eval_gen_len": 14.0989,
"eval_loss": 0.16462182998657227,
"eval_runtime": 177.1935,
"eval_samples_per_second": 90.997,
"eval_steps_per_second": 2.844,
"step": 675721
},
{
"epoch": 41.02,
"learning_rate": 9.74960560645592e-06,
"loss": 0.1037,
"step": 676000
},
{
"epoch": 41.05,
"learning_rate": 9.742021115223591e-06,
"loss": 0.0975,
"step": 676500
},
{
"epoch": 41.08,
"learning_rate": 9.734436623991264e-06,
"loss": 0.0995,
"step": 677000
},
{
"epoch": 41.11,
"learning_rate": 9.726852132758935e-06,
"loss": 0.1018,
"step": 677500
},
{
"epoch": 41.14,
"learning_rate": 9.719267641526608e-06,
"loss": 0.1007,
"step": 678000
},
{
"epoch": 41.17,
"learning_rate": 9.711698319276744e-06,
"loss": 0.1005,
"step": 678500
},
{
"epoch": 41.2,
"learning_rate": 9.70412899702688e-06,
"loss": 0.1025,
"step": 679000
},
{
"epoch": 41.23,
"learning_rate": 9.696544505794552e-06,
"loss": 0.1027,
"step": 679500
},
{
"epoch": 41.26,
"learning_rate": 9.688960014562225e-06,
"loss": 0.1008,
"step": 680000
},
{
"epoch": 41.29,
"learning_rate": 9.681375523329896e-06,
"loss": 0.1006,
"step": 680500
},
{
"epoch": 41.32,
"learning_rate": 9.673791032097567e-06,
"loss": 0.1012,
"step": 681000
},
{
"epoch": 41.35,
"learning_rate": 9.666221709847704e-06,
"loss": 0.1013,
"step": 681500
},
{
"epoch": 41.38,
"learning_rate": 9.658637218615377e-06,
"loss": 0.1035,
"step": 682000
},
{
"epoch": 41.41,
"learning_rate": 9.651052727383049e-06,
"loss": 0.1018,
"step": 682500
},
{
"epoch": 41.44,
"learning_rate": 9.64346823615072e-06,
"loss": 0.1001,
"step": 683000
},
{
"epoch": 41.47,
"learning_rate": 9.635898913900857e-06,
"loss": 0.1018,
"step": 683500
},
{
"epoch": 41.5,
"learning_rate": 9.628314422668528e-06,
"loss": 0.1005,
"step": 684000
},
{
"epoch": 41.53,
"learning_rate": 9.6207299314362e-06,
"loss": 0.1013,
"step": 684500
},
{
"epoch": 41.56,
"learning_rate": 9.613145440203872e-06,
"loss": 0.1012,
"step": 685000
},
{
"epoch": 41.59,
"learning_rate": 9.605560948971544e-06,
"loss": 0.1009,
"step": 685500
},
{
"epoch": 41.62,
"learning_rate": 9.59799162672168e-06,
"loss": 0.104,
"step": 686000
},
{
"epoch": 41.65,
"learning_rate": 9.590407135489352e-06,
"loss": 0.1023,
"step": 686500
},
{
"epoch": 41.68,
"learning_rate": 9.582822644257023e-06,
"loss": 0.1012,
"step": 687000
},
{
"epoch": 41.71,
"learning_rate": 9.575238153024696e-06,
"loss": 0.1013,
"step": 687500
},
{
"epoch": 41.75,
"learning_rate": 9.567653661792367e-06,
"loss": 0.1018,
"step": 688000
},
{
"epoch": 41.78,
"learning_rate": 9.56006917056004e-06,
"loss": 0.1026,
"step": 688500
},
{
"epoch": 41.81,
"learning_rate": 9.552484679327712e-06,
"loss": 0.103,
"step": 689000
},
{
"epoch": 41.84,
"learning_rate": 9.544900188095383e-06,
"loss": 0.1029,
"step": 689500
},
{
"epoch": 41.87,
"learning_rate": 9.53733086584552e-06,
"loss": 0.103,
"step": 690000
},
{
"epoch": 41.9,
"learning_rate": 9.529746374613191e-06,
"loss": 0.1027,
"step": 690500
},
{
"epoch": 41.93,
"learning_rate": 9.522161883380864e-06,
"loss": 0.0986,
"step": 691000
},
{
"epoch": 41.96,
"learning_rate": 9.514592561131e-06,
"loss": 0.1012,
"step": 691500
},
{
"epoch": 41.99,
"learning_rate": 9.507008069898672e-06,
"loss": 0.106,
"step": 692000
},
{
"epoch": 42.0,
"eval_bleu": 81.2701,
"eval_gen_len": 14.0995,
"eval_loss": 0.16560077667236328,
"eval_runtime": 176.936,
"eval_samples_per_second": 91.129,
"eval_steps_per_second": 2.848,
"step": 692202
},
{
"epoch": 42.02,
"learning_rate": 9.499423578666344e-06,
"loss": 0.1025,
"step": 692500
},
{
"epoch": 42.05,
"learning_rate": 9.491839087434017e-06,
"loss": 0.0978,
"step": 693000
},
{
"epoch": 42.08,
"learning_rate": 9.484254596201688e-06,
"loss": 0.0994,
"step": 693500
},
{
"epoch": 42.11,
"learning_rate": 9.476685273951825e-06,
"loss": 0.1013,
"step": 694000
},
{
"epoch": 42.14,
"learning_rate": 9.469100782719496e-06,
"loss": 0.1,
"step": 694500
},
{
"epoch": 42.17,
"learning_rate": 9.461516291487167e-06,
"loss": 0.0976,
"step": 695000
},
{
"epoch": 42.2,
"learning_rate": 9.45393180025484e-06,
"loss": 0.1008,
"step": 695500
},
{
"epoch": 42.23,
"learning_rate": 9.446347309022512e-06,
"loss": 0.0973,
"step": 696000
},
{
"epoch": 42.26,
"learning_rate": 9.438762817790183e-06,
"loss": 0.099,
"step": 696500
},
{
"epoch": 42.29,
"learning_rate": 9.431178326557854e-06,
"loss": 0.0995,
"step": 697000
},
{
"epoch": 42.32,
"learning_rate": 9.423609004307991e-06,
"loss": 0.1004,
"step": 697500
},
{
"epoch": 42.35,
"learning_rate": 9.416024513075663e-06,
"loss": 0.1004,
"step": 698000
},
{
"epoch": 42.38,
"learning_rate": 9.408440021843335e-06,
"loss": 0.1006,
"step": 698500
},
{
"epoch": 42.41,
"learning_rate": 9.400855530611007e-06,
"loss": 0.1003,
"step": 699000
},
{
"epoch": 42.44,
"learning_rate": 9.39327103937868e-06,
"loss": 0.1005,
"step": 699500
},
{
"epoch": 42.47,
"learning_rate": 9.385686548146351e-06,
"loss": 0.1009,
"step": 700000
},
{
"epoch": 42.5,
"learning_rate": 9.378117225896488e-06,
"loss": 0.1005,
"step": 700500
},
{
"epoch": 42.53,
"learning_rate": 9.37053273466416e-06,
"loss": 0.1015,
"step": 701000
},
{
"epoch": 42.56,
"learning_rate": 9.36294824343183e-06,
"loss": 0.1017,
"step": 701500
},
{
"epoch": 42.59,
"learning_rate": 9.355363752199503e-06,
"loss": 0.1009,
"step": 702000
},
{
"epoch": 42.62,
"learning_rate": 9.347779260967175e-06,
"loss": 0.0989,
"step": 702500
},
{
"epoch": 42.66,
"learning_rate": 9.340194769734848e-06,
"loss": 0.1006,
"step": 703000
},
{
"epoch": 42.69,
"learning_rate": 9.332625447484983e-06,
"loss": 0.1004,
"step": 703500
},
{
"epoch": 42.72,
"learning_rate": 9.325040956252656e-06,
"loss": 0.0999,
"step": 704000
},
{
"epoch": 42.75,
"learning_rate": 9.317456465020327e-06,
"loss": 0.1012,
"step": 704500
},
{
"epoch": 42.78,
"learning_rate": 9.309887142770464e-06,
"loss": 0.0996,
"step": 705000
},
{
"epoch": 42.81,
"learning_rate": 9.302302651538136e-06,
"loss": 0.0981,
"step": 705500
},
{
"epoch": 42.84,
"learning_rate": 9.294718160305808e-06,
"loss": 0.1001,
"step": 706000
},
{
"epoch": 42.87,
"learning_rate": 9.28713366907348e-06,
"loss": 0.0996,
"step": 706500
},
{
"epoch": 42.9,
"learning_rate": 9.279549177841151e-06,
"loss": 0.0994,
"step": 707000
},
{
"epoch": 42.93,
"learning_rate": 9.271964686608824e-06,
"loss": 0.0998,
"step": 707500
},
{
"epoch": 42.96,
"learning_rate": 9.264380195376495e-06,
"loss": 0.1009,
"step": 708000
},
{
"epoch": 42.99,
"learning_rate": 9.256795704144166e-06,
"loss": 0.1016,
"step": 708500
},
{
"epoch": 43.0,
"eval_bleu": 81.3319,
"eval_gen_len": 14.1486,
"eval_loss": 0.16639186441898346,
"eval_runtime": 176.8975,
"eval_samples_per_second": 91.149,
"eval_steps_per_second": 2.849,
"step": 708683
},
{
"epoch": 43.02,
"learning_rate": 9.249211212911838e-06,
"loss": 0.0985,
"step": 709000
},
{
"epoch": 43.05,
"learning_rate": 9.24162672167951e-06,
"loss": 0.097,
"step": 709500
},
{
"epoch": 43.08,
"learning_rate": 9.234042230447182e-06,
"loss": 0.0971,
"step": 710000
},
{
"epoch": 43.11,
"learning_rate": 9.226457739214855e-06,
"loss": 0.0939,
"step": 710500
},
{
"epoch": 43.14,
"learning_rate": 9.21888841696499e-06,
"loss": 0.0995,
"step": 711000
},
{
"epoch": 43.17,
"learning_rate": 9.211303925732663e-06,
"loss": 0.0955,
"step": 711500
},
{
"epoch": 43.2,
"learning_rate": 9.203719434500334e-06,
"loss": 0.0976,
"step": 712000
},
{
"epoch": 43.23,
"learning_rate": 9.196134943268006e-06,
"loss": 0.0969,
"step": 712500
},
{
"epoch": 43.26,
"learning_rate": 9.188565621018143e-06,
"loss": 0.098,
"step": 713000
},
{
"epoch": 43.29,
"learning_rate": 9.180981129785814e-06,
"loss": 0.0982,
"step": 713500
},
{
"epoch": 43.32,
"learning_rate": 9.173396638553487e-06,
"loss": 0.0977,
"step": 714000
},
{
"epoch": 43.35,
"learning_rate": 9.165812147321158e-06,
"loss": 0.098,
"step": 714500
},
{
"epoch": 43.38,
"learning_rate": 9.158242825071295e-06,
"loss": 0.0975,
"step": 715000
},
{
"epoch": 43.41,
"learning_rate": 9.15067350282143e-06,
"loss": 0.0994,
"step": 715500
},
{
"epoch": 43.44,
"learning_rate": 9.143089011589104e-06,
"loss": 0.0996,
"step": 716000
},
{
"epoch": 43.47,
"learning_rate": 9.135504520356775e-06,
"loss": 0.0983,
"step": 716500
},
{
"epoch": 43.5,
"learning_rate": 9.127920029124448e-06,
"loss": 0.0997,
"step": 717000
},
{
"epoch": 43.53,
"learning_rate": 9.120335537892119e-06,
"loss": 0.0983,
"step": 717500
},
{
"epoch": 43.57,
"learning_rate": 9.112766215642256e-06,
"loss": 0.099,
"step": 718000
},
{
"epoch": 43.6,
"learning_rate": 9.105196893392392e-06,
"loss": 0.0987,
"step": 718500
},
{
"epoch": 43.63,
"learning_rate": 9.097612402160064e-06,
"loss": 0.0992,
"step": 719000
},
{
"epoch": 43.66,
"learning_rate": 9.090027910927736e-06,
"loss": 0.0989,
"step": 719500
},
{
"epoch": 43.69,
"learning_rate": 9.082443419695409e-06,
"loss": 0.1004,
"step": 720000
},
{
"epoch": 43.72,
"learning_rate": 9.07485892846308e-06,
"loss": 0.0998,
"step": 720500
},
{
"epoch": 43.75,
"learning_rate": 9.067274437230751e-06,
"loss": 0.1005,
"step": 721000
},
{
"epoch": 43.78,
"learning_rate": 9.059689945998424e-06,
"loss": 0.0978,
"step": 721500
},
{
"epoch": 43.81,
"learning_rate": 9.052105454766095e-06,
"loss": 0.0998,
"step": 722000
},
{
"epoch": 43.84,
"learning_rate": 9.044536132516232e-06,
"loss": 0.0995,
"step": 722500
},
{
"epoch": 43.87,
"learning_rate": 9.036951641283904e-06,
"loss": 0.1005,
"step": 723000
},
{
"epoch": 43.9,
"learning_rate": 9.029367150051575e-06,
"loss": 0.1,
"step": 723500
},
{
"epoch": 43.93,
"learning_rate": 9.021782658819246e-06,
"loss": 0.0979,
"step": 724000
},
{
"epoch": 43.96,
"learning_rate": 9.014198167586919e-06,
"loss": 0.0997,
"step": 724500
},
{
"epoch": 43.99,
"learning_rate": 9.00661367635459e-06,
"loss": 0.0992,
"step": 725000
},
{
"epoch": 44.0,
"eval_bleu": 81.3473,
"eval_gen_len": 14.1242,
"eval_loss": 0.1655581146478653,
"eval_runtime": 178.9799,
"eval_samples_per_second": 90.088,
"eval_steps_per_second": 2.816,
"step": 725164
},
{
"epoch": 44.02,
"learning_rate": 8.999029185122262e-06,
"loss": 0.0972,
"step": 725500
},
{
"epoch": 44.05,
"learning_rate": 8.991444693889935e-06,
"loss": 0.0954,
"step": 726000
},
{
"epoch": 44.08,
"learning_rate": 8.98387537164007e-06,
"loss": 0.0954,
"step": 726500
},
{
"epoch": 44.11,
"learning_rate": 8.976290880407743e-06,
"loss": 0.0961,
"step": 727000
},
{
"epoch": 44.14,
"learning_rate": 8.968706389175414e-06,
"loss": 0.0941,
"step": 727500
},
{
"epoch": 44.17,
"learning_rate": 8.961121897943087e-06,
"loss": 0.0952,
"step": 728000
},
{
"epoch": 44.2,
"learning_rate": 8.953552575693223e-06,
"loss": 0.0993,
"step": 728500
},
{
"epoch": 44.23,
"learning_rate": 8.945968084460895e-06,
"loss": 0.0966,
"step": 729000
},
{
"epoch": 44.26,
"learning_rate": 8.938383593228567e-06,
"loss": 0.0966,
"step": 729500
},
{
"epoch": 44.29,
"learning_rate": 8.93079910199624e-06,
"loss": 0.0968,
"step": 730000
},
{
"epoch": 44.32,
"learning_rate": 8.923229779746375e-06,
"loss": 0.0987,
"step": 730500
},
{
"epoch": 44.35,
"learning_rate": 8.915660457496512e-06,
"loss": 0.0982,
"step": 731000
},
{
"epoch": 44.38,
"learning_rate": 8.908091135246648e-06,
"loss": 0.0954,
"step": 731500
},
{
"epoch": 44.41,
"learning_rate": 8.90050664401432e-06,
"loss": 0.0973,
"step": 732000
},
{
"epoch": 44.45,
"learning_rate": 8.892922152781992e-06,
"loss": 0.0968,
"step": 732500
},
{
"epoch": 44.48,
"learning_rate": 8.885337661549665e-06,
"loss": 0.0966,
"step": 733000
},
{
"epoch": 44.51,
"learning_rate": 8.877753170317336e-06,
"loss": 0.0962,
"step": 733500
},
{
"epoch": 44.54,
"learning_rate": 8.870168679085007e-06,
"loss": 0.0946,
"step": 734000
},
{
"epoch": 44.57,
"learning_rate": 8.86258418785268e-06,
"loss": 0.1004,
"step": 734500
},
{
"epoch": 44.6,
"learning_rate": 8.854999696620351e-06,
"loss": 0.0983,
"step": 735000
},
{
"epoch": 44.63,
"learning_rate": 8.847415205388024e-06,
"loss": 0.0979,
"step": 735500
},
{
"epoch": 44.66,
"learning_rate": 8.839830714155696e-06,
"loss": 0.0976,
"step": 736000
},
{
"epoch": 44.69,
"learning_rate": 8.832246222923367e-06,
"loss": 0.0984,
"step": 736500
},
{
"epoch": 44.72,
"learning_rate": 8.824661731691038e-06,
"loss": 0.099,
"step": 737000
},
{
"epoch": 44.75,
"learning_rate": 8.817092409441175e-06,
"loss": 0.0983,
"step": 737500
},
{
"epoch": 44.78,
"learning_rate": 8.809507918208846e-06,
"loss": 0.0969,
"step": 738000
},
{
"epoch": 44.81,
"learning_rate": 8.80192342697652e-06,
"loss": 0.0979,
"step": 738500
},
{
"epoch": 44.84,
"learning_rate": 8.794354104726656e-06,
"loss": 0.0963,
"step": 739000
},
{
"epoch": 44.87,
"learning_rate": 8.786769613494328e-06,
"loss": 0.098,
"step": 739500
},
{
"epoch": 44.9,
"learning_rate": 8.779185122261999e-06,
"loss": 0.0979,
"step": 740000
},
{
"epoch": 44.93,
"learning_rate": 8.77160063102967e-06,
"loss": 0.0995,
"step": 740500
},
{
"epoch": 44.96,
"learning_rate": 8.764016139797343e-06,
"loss": 0.0956,
"step": 741000
},
{
"epoch": 44.99,
"learning_rate": 8.756446817547479e-06,
"loss": 0.1002,
"step": 741500
},
{
"epoch": 45.0,
"eval_bleu": 81.3575,
"eval_gen_len": 14.1571,
"eval_loss": 0.16723676025867462,
"eval_runtime": 178.3486,
"eval_samples_per_second": 90.407,
"eval_steps_per_second": 2.826,
"step": 741645
},
{
"epoch": 45.02,
"learning_rate": 8.748862326315151e-06,
"loss": 0.0968,
"step": 742000
},
{
"epoch": 45.05,
"learning_rate": 8.741277835082823e-06,
"loss": 0.0942,
"step": 742500
},
{
"epoch": 45.08,
"learning_rate": 8.733693343850496e-06,
"loss": 0.0955,
"step": 743000
},
{
"epoch": 45.11,
"learning_rate": 8.726108852618167e-06,
"loss": 0.0939,
"step": 743500
},
{
"epoch": 45.14,
"learning_rate": 8.71852436138584e-06,
"loss": 0.0937,
"step": 744000
},
{
"epoch": 45.17,
"learning_rate": 8.710939870153511e-06,
"loss": 0.0963,
"step": 744500
},
{
"epoch": 45.2,
"learning_rate": 8.703355378921182e-06,
"loss": 0.0956,
"step": 745000
},
{
"epoch": 45.23,
"learning_rate": 8.69578605667132e-06,
"loss": 0.097,
"step": 745500
},
{
"epoch": 45.26,
"learning_rate": 8.688216734421455e-06,
"loss": 0.0942,
"step": 746000
},
{
"epoch": 45.29,
"learning_rate": 8.680632243189128e-06,
"loss": 0.0945,
"step": 746500
},
{
"epoch": 45.32,
"learning_rate": 8.673047751956799e-06,
"loss": 0.0945,
"step": 747000
},
{
"epoch": 45.36,
"learning_rate": 8.665463260724472e-06,
"loss": 0.0956,
"step": 747500
},
{
"epoch": 45.39,
"learning_rate": 8.657878769492143e-06,
"loss": 0.0974,
"step": 748000
},
{
"epoch": 45.42,
"learning_rate": 8.65030944724228e-06,
"loss": 0.0963,
"step": 748500
},
{
"epoch": 45.45,
"learning_rate": 8.642724956009952e-06,
"loss": 0.0948,
"step": 749000
},
{
"epoch": 45.48,
"learning_rate": 8.635140464777624e-06,
"loss": 0.0976,
"step": 749500
},
{
"epoch": 45.51,
"learning_rate": 8.627555973545296e-06,
"loss": 0.0956,
"step": 750000
},
{
"epoch": 45.54,
"learning_rate": 8.619971482312967e-06,
"loss": 0.0947,
"step": 750500
},
{
"epoch": 45.57,
"learning_rate": 8.612386991080638e-06,
"loss": 0.097,
"step": 751000
},
{
"epoch": 45.6,
"learning_rate": 8.604802499848311e-06,
"loss": 0.0932,
"step": 751500
},
{
"epoch": 45.63,
"learning_rate": 8.597218008615982e-06,
"loss": 0.094,
"step": 752000
},
{
"epoch": 45.66,
"learning_rate": 8.589663855348584e-06,
"loss": 0.0953,
"step": 752500
},
{
"epoch": 45.69,
"learning_rate": 8.582079364116257e-06,
"loss": 0.0957,
"step": 753000
},
{
"epoch": 45.72,
"learning_rate": 8.574510041866392e-06,
"loss": 0.0981,
"step": 753500
},
{
"epoch": 45.75,
"learning_rate": 8.566925550634065e-06,
"loss": 0.0956,
"step": 754000
},
{
"epoch": 45.78,
"learning_rate": 8.559341059401736e-06,
"loss": 0.0955,
"step": 754500
},
{
"epoch": 45.81,
"learning_rate": 8.551756568169407e-06,
"loss": 0.1007,
"step": 755000
},
{
"epoch": 45.84,
"learning_rate": 8.544172076937079e-06,
"loss": 0.0996,
"step": 755500
},
{
"epoch": 45.87,
"learning_rate": 8.536587585704752e-06,
"loss": 0.0957,
"step": 756000
},
{
"epoch": 45.9,
"learning_rate": 8.529003094472423e-06,
"loss": 0.0977,
"step": 756500
},
{
"epoch": 45.93,
"learning_rate": 8.521418603240096e-06,
"loss": 0.0952,
"step": 757000
},
{
"epoch": 45.96,
"learning_rate": 8.513834112007767e-06,
"loss": 0.0966,
"step": 757500
},
{
"epoch": 45.99,
"learning_rate": 8.506249620775438e-06,
"loss": 0.0957,
"step": 758000
},
{
"epoch": 46.0,
"eval_bleu": 81.2624,
"eval_gen_len": 14.1363,
"eval_loss": 0.16791266202926636,
"eval_runtime": 178.4651,
"eval_samples_per_second": 90.348,
"eval_steps_per_second": 2.824,
"step": 758126
},
{
"epoch": 46.02,
"learning_rate": 8.498680298525575e-06,
"loss": 0.0937,
"step": 758500
},
{
"epoch": 46.05,
"learning_rate": 8.491095807293247e-06,
"loss": 0.0933,
"step": 759000
},
{
"epoch": 46.08,
"learning_rate": 8.48351131606092e-06,
"loss": 0.0949,
"step": 759500
},
{
"epoch": 46.11,
"learning_rate": 8.47592682482859e-06,
"loss": 0.094,
"step": 760000
},
{
"epoch": 46.14,
"learning_rate": 8.468357502578728e-06,
"loss": 0.0919,
"step": 760500
},
{
"epoch": 46.17,
"learning_rate": 8.4607730113464e-06,
"loss": 0.0934,
"step": 761000
},
{
"epoch": 46.2,
"learning_rate": 8.453188520114072e-06,
"loss": 0.0947,
"step": 761500
},
{
"epoch": 46.24,
"learning_rate": 8.445604028881743e-06,
"loss": 0.0951,
"step": 762000
},
{
"epoch": 46.27,
"learning_rate": 8.438019537649416e-06,
"loss": 0.094,
"step": 762500
},
{
"epoch": 46.3,
"learning_rate": 8.430450215399552e-06,
"loss": 0.0931,
"step": 763000
},
{
"epoch": 46.33,
"learning_rate": 8.422865724167225e-06,
"loss": 0.0947,
"step": 763500
},
{
"epoch": 46.36,
"learning_rate": 8.415281232934896e-06,
"loss": 0.094,
"step": 764000
},
{
"epoch": 46.39,
"learning_rate": 8.407696741702567e-06,
"loss": 0.0927,
"step": 764500
},
{
"epoch": 46.42,
"learning_rate": 8.40011225047024e-06,
"loss": 0.0945,
"step": 765000
},
{
"epoch": 46.45,
"learning_rate": 8.392527759237911e-06,
"loss": 0.0951,
"step": 765500
},
{
"epoch": 46.48,
"learning_rate": 8.384958436988048e-06,
"loss": 0.0933,
"step": 766000
},
{
"epoch": 46.51,
"learning_rate": 8.37737394575572e-06,
"loss": 0.0941,
"step": 766500
},
{
"epoch": 46.54,
"learning_rate": 8.369804623505857e-06,
"loss": 0.0952,
"step": 767000
},
{
"epoch": 46.57,
"learning_rate": 8.362220132273528e-06,
"loss": 0.093,
"step": 767500
},
{
"epoch": 46.6,
"learning_rate": 8.3546356410412e-06,
"loss": 0.0954,
"step": 768000
},
{
"epoch": 46.63,
"learning_rate": 8.34705114980887e-06,
"loss": 0.0955,
"step": 768500
},
{
"epoch": 46.66,
"learning_rate": 8.339466658576543e-06,
"loss": 0.0932,
"step": 769000
},
{
"epoch": 46.69,
"learning_rate": 8.331882167344215e-06,
"loss": 0.0934,
"step": 769500
},
{
"epoch": 46.72,
"learning_rate": 8.324297676111886e-06,
"loss": 0.0939,
"step": 770000
},
{
"epoch": 46.75,
"learning_rate": 8.316713184879559e-06,
"loss": 0.0941,
"step": 770500
},
{
"epoch": 46.78,
"learning_rate": 8.30912869364723e-06,
"loss": 0.0966,
"step": 771000
},
{
"epoch": 46.81,
"learning_rate": 8.301559371397367e-06,
"loss": 0.0963,
"step": 771500
},
{
"epoch": 46.84,
"learning_rate": 8.293974880165039e-06,
"loss": 0.0924,
"step": 772000
},
{
"epoch": 46.87,
"learning_rate": 8.286390388932711e-06,
"loss": 0.0957,
"step": 772500
},
{
"epoch": 46.9,
"learning_rate": 8.278805897700383e-06,
"loss": 0.0963,
"step": 773000
},
{
"epoch": 46.93,
"learning_rate": 8.271221406468056e-06,
"loss": 0.0948,
"step": 773500
},
{
"epoch": 46.96,
"learning_rate": 8.263636915235727e-06,
"loss": 0.0978,
"step": 774000
},
{
"epoch": 46.99,
"learning_rate": 8.256067592985864e-06,
"loss": 0.0963,
"step": 774500
},
{
"epoch": 47.0,
"eval_bleu": 81.339,
"eval_gen_len": 14.1472,
"eval_loss": 0.16813451051712036,
"eval_runtime": 178.5484,
"eval_samples_per_second": 90.306,
"eval_steps_per_second": 2.823,
"step": 774607
},
{
"epoch": 47.02,
"learning_rate": 8.248483101753535e-06,
"loss": 0.0917,
"step": 775000
},
{
"epoch": 47.05,
"learning_rate": 8.240898610521208e-06,
"loss": 0.0903,
"step": 775500
},
{
"epoch": 47.08,
"learning_rate": 8.23331411928888e-06,
"loss": 0.094,
"step": 776000
},
{
"epoch": 47.11,
"learning_rate": 8.225744797039017e-06,
"loss": 0.0927,
"step": 776500
},
{
"epoch": 47.15,
"learning_rate": 8.218160305806688e-06,
"loss": 0.0906,
"step": 777000
},
{
"epoch": 47.18,
"learning_rate": 8.210575814574359e-06,
"loss": 0.0925,
"step": 777500
},
{
"epoch": 47.21,
"learning_rate": 8.20299132334203e-06,
"loss": 0.0925,
"step": 778000
},
{
"epoch": 47.24,
"learning_rate": 8.195406832109703e-06,
"loss": 0.0932,
"step": 778500
},
{
"epoch": 47.27,
"learning_rate": 8.187822340877374e-06,
"loss": 0.0928,
"step": 779000
},
{
"epoch": 47.3,
"learning_rate": 8.180237849645046e-06,
"loss": 0.0941,
"step": 779500
},
{
"epoch": 47.33,
"learning_rate": 8.172668527395183e-06,
"loss": 0.0902,
"step": 780000
},
{
"epoch": 47.36,
"learning_rate": 8.165084036162854e-06,
"loss": 0.0922,
"step": 780500
},
{
"epoch": 47.39,
"learning_rate": 8.157514713912991e-06,
"loss": 0.0929,
"step": 781000
},
{
"epoch": 47.42,
"learning_rate": 8.149930222680662e-06,
"loss": 0.0939,
"step": 781500
},
{
"epoch": 47.45,
"learning_rate": 8.142345731448335e-06,
"loss": 0.0942,
"step": 782000
},
{
"epoch": 47.48,
"learning_rate": 8.134761240216007e-06,
"loss": 0.093,
"step": 782500
},
{
"epoch": 47.51,
"learning_rate": 8.127176748983678e-06,
"loss": 0.0916,
"step": 783000
},
{
"epoch": 47.54,
"learning_rate": 8.11959225775135e-06,
"loss": 0.0931,
"step": 783500
},
{
"epoch": 47.57,
"learning_rate": 8.112007766519022e-06,
"loss": 0.0923,
"step": 784000
},
{
"epoch": 47.6,
"learning_rate": 8.104423275286695e-06,
"loss": 0.0928,
"step": 784500
},
{
"epoch": 47.63,
"learning_rate": 8.096838784054366e-06,
"loss": 0.0933,
"step": 785000
},
{
"epoch": 47.66,
"learning_rate": 8.089254292822039e-06,
"loss": 0.0935,
"step": 785500
},
{
"epoch": 47.69,
"learning_rate": 8.08166980158971e-06,
"loss": 0.0922,
"step": 786000
},
{
"epoch": 47.72,
"learning_rate": 8.074085310357382e-06,
"loss": 0.0941,
"step": 786500
},
{
"epoch": 47.75,
"learning_rate": 8.066515988107519e-06,
"loss": 0.0933,
"step": 787000
},
{
"epoch": 47.78,
"learning_rate": 8.05893149687519e-06,
"loss": 0.0939,
"step": 787500
},
{
"epoch": 47.81,
"learning_rate": 8.051347005642863e-06,
"loss": 0.093,
"step": 788000
},
{
"epoch": 47.84,
"learning_rate": 8.043762514410534e-06,
"loss": 0.095,
"step": 788500
},
{
"epoch": 47.87,
"learning_rate": 8.036193192160671e-06,
"loss": 0.0941,
"step": 789000
},
{
"epoch": 47.9,
"learning_rate": 8.028608700928342e-06,
"loss": 0.0957,
"step": 789500
},
{
"epoch": 47.93,
"learning_rate": 8.02103937867848e-06,
"loss": 0.0935,
"step": 790000
},
{
"epoch": 47.96,
"learning_rate": 8.01345488744615e-06,
"loss": 0.0941,
"step": 790500
},
{
"epoch": 47.99,
"learning_rate": 8.005885565196288e-06,
"loss": 0.0929,
"step": 791000
},
{
"epoch": 48.0,
"eval_bleu": 81.3094,
"eval_gen_len": 14.1392,
"eval_loss": 0.16808539628982544,
"eval_runtime": 178.2856,
"eval_samples_per_second": 90.439,
"eval_steps_per_second": 2.827,
"step": 791088
},
{
"epoch": 48.02,
"learning_rate": 7.99830107396396e-06,
"loss": 0.0911,
"step": 791500
},
{
"epoch": 48.06,
"learning_rate": 7.990716582731632e-06,
"loss": 0.0914,
"step": 792000
},
{
"epoch": 48.09,
"learning_rate": 7.983132091499303e-06,
"loss": 0.0919,
"step": 792500
},
{
"epoch": 48.12,
"learning_rate": 7.975547600266975e-06,
"loss": 0.0893,
"step": 793000
},
{
"epoch": 48.15,
"learning_rate": 7.967963109034646e-06,
"loss": 0.089,
"step": 793500
},
{
"epoch": 48.18,
"learning_rate": 7.960378617802319e-06,
"loss": 0.0903,
"step": 794000
},
{
"epoch": 48.21,
"learning_rate": 7.95279412656999e-06,
"loss": 0.0927,
"step": 794500
},
{
"epoch": 48.24,
"learning_rate": 7.945209635337661e-06,
"loss": 0.0933,
"step": 795000
},
{
"epoch": 48.27,
"learning_rate": 7.937640313087798e-06,
"loss": 0.0919,
"step": 795500
},
{
"epoch": 48.3,
"learning_rate": 7.93005582185547e-06,
"loss": 0.0903,
"step": 796000
},
{
"epoch": 48.33,
"learning_rate": 7.922471330623143e-06,
"loss": 0.0928,
"step": 796500
},
{
"epoch": 48.36,
"learning_rate": 7.914886839390814e-06,
"loss": 0.0925,
"step": 797000
},
{
"epoch": 48.39,
"learning_rate": 7.907302348158487e-06,
"loss": 0.0916,
"step": 797500
},
{
"epoch": 48.42,
"learning_rate": 7.899717856926158e-06,
"loss": 0.0943,
"step": 798000
},
{
"epoch": 48.45,
"learning_rate": 7.892148534676295e-06,
"loss": 0.0897,
"step": 798500
},
{
"epoch": 48.48,
"learning_rate": 7.884564043443966e-06,
"loss": 0.0908,
"step": 799000
},
{
"epoch": 48.51,
"learning_rate": 7.87697955221164e-06,
"loss": 0.0933,
"step": 799500
},
{
"epoch": 48.54,
"learning_rate": 7.86939506097931e-06,
"loss": 0.0925,
"step": 800000
},
{
"epoch": 48.57,
"learning_rate": 7.861810569746982e-06,
"loss": 0.0923,
"step": 800500
},
{
"epoch": 48.6,
"learning_rate": 7.854241247497119e-06,
"loss": 0.0923,
"step": 801000
},
{
"epoch": 48.63,
"learning_rate": 7.84665675626479e-06,
"loss": 0.091,
"step": 801500
},
{
"epoch": 48.66,
"learning_rate": 7.839072265032463e-06,
"loss": 0.0929,
"step": 802000
},
{
"epoch": 48.69,
"learning_rate": 7.831487773800134e-06,
"loss": 0.0954,
"step": 802500
},
{
"epoch": 48.72,
"learning_rate": 7.823918451550271e-06,
"loss": 0.092,
"step": 803000
},
{
"epoch": 48.75,
"learning_rate": 7.816333960317943e-06,
"loss": 0.092,
"step": 803500
},
{
"epoch": 48.78,
"learning_rate": 7.808749469085614e-06,
"loss": 0.0893,
"step": 804000
},
{
"epoch": 48.81,
"learning_rate": 7.801164977853287e-06,
"loss": 0.0913,
"step": 804500
},
{
"epoch": 48.84,
"learning_rate": 7.793580486620958e-06,
"loss": 0.0918,
"step": 805000
},
{
"epoch": 48.87,
"learning_rate": 7.786011164371095e-06,
"loss": 0.093,
"step": 805500
},
{
"epoch": 48.9,
"learning_rate": 7.778426673138766e-06,
"loss": 0.0937,
"step": 806000
},
{
"epoch": 48.94,
"learning_rate": 7.770842181906438e-06,
"loss": 0.0904,
"step": 806500
},
{
"epoch": 48.97,
"learning_rate": 7.763257690674109e-06,
"loss": 0.0962,
"step": 807000
},
{
"epoch": 49.0,
"learning_rate": 7.755673199441782e-06,
"loss": 0.0916,
"step": 807500
},
{
"epoch": 49.0,
"eval_bleu": 81.294,
"eval_gen_len": 14.1299,
"eval_loss": 0.16902963817119598,
"eval_runtime": 178.1975,
"eval_samples_per_second": 90.484,
"eval_steps_per_second": 2.828,
"step": 807569
},
{
"epoch": 49.03,
"learning_rate": 7.748103877191917e-06,
"loss": 0.0899,
"step": 808000
},
{
"epoch": 49.06,
"learning_rate": 7.74051938595959e-06,
"loss": 0.0887,
"step": 808500
},
{
"epoch": 49.09,
"learning_rate": 7.732934894727261e-06,
"loss": 0.0899,
"step": 809000
},
{
"epoch": 49.12,
"learning_rate": 7.725350403494934e-06,
"loss": 0.0889,
"step": 809500
},
{
"epoch": 49.15,
"learning_rate": 7.71778108124507e-06,
"loss": 0.0909,
"step": 810000
},
{
"epoch": 49.18,
"learning_rate": 7.710196590012743e-06,
"loss": 0.0898,
"step": 810500
},
{
"epoch": 49.21,
"learning_rate": 7.702612098780414e-06,
"loss": 0.0903,
"step": 811000
},
{
"epoch": 49.24,
"learning_rate": 7.695027607548087e-06,
"loss": 0.0886,
"step": 811500
},
{
"epoch": 49.27,
"learning_rate": 7.687443116315758e-06,
"loss": 0.0909,
"step": 812000
},
{
"epoch": 49.3,
"learning_rate": 7.67985862508343e-06,
"loss": 0.0901,
"step": 812500
},
{
"epoch": 49.33,
"learning_rate": 7.672289302833567e-06,
"loss": 0.0901,
"step": 813000
},
{
"epoch": 49.36,
"learning_rate": 7.664704811601238e-06,
"loss": 0.0913,
"step": 813500
},
{
"epoch": 49.39,
"learning_rate": 7.65712032036891e-06,
"loss": 0.0912,
"step": 814000
},
{
"epoch": 49.42,
"learning_rate": 7.649535829136582e-06,
"loss": 0.0903,
"step": 814500
},
{
"epoch": 49.45,
"learning_rate": 7.641951337904255e-06,
"loss": 0.0917,
"step": 815000
},
{
"epoch": 49.48,
"learning_rate": 7.634366846671926e-06,
"loss": 0.0896,
"step": 815500
},
{
"epoch": 49.51,
"learning_rate": 7.626782355439598e-06,
"loss": 0.0912,
"step": 816000
},
{
"epoch": 49.54,
"learning_rate": 7.6191978642072695e-06,
"loss": 0.089,
"step": 816500
},
{
"epoch": 49.57,
"learning_rate": 7.611628541957407e-06,
"loss": 0.0919,
"step": 817000
},
{
"epoch": 49.6,
"learning_rate": 7.604044050725078e-06,
"loss": 0.092,
"step": 817500
},
{
"epoch": 49.63,
"learning_rate": 7.596459559492751e-06,
"loss": 0.0916,
"step": 818000
},
{
"epoch": 49.66,
"learning_rate": 7.588875068260422e-06,
"loss": 0.0882,
"step": 818500
},
{
"epoch": 49.69,
"learning_rate": 7.5813057460105575e-06,
"loss": 0.0918,
"step": 819000
},
{
"epoch": 49.72,
"learning_rate": 7.57372125477823e-06,
"loss": 0.0904,
"step": 819500
},
{
"epoch": 49.75,
"learning_rate": 7.566136763545902e-06,
"loss": 0.0916,
"step": 820000
},
{
"epoch": 49.78,
"learning_rate": 7.558567441296039e-06,
"loss": 0.0902,
"step": 820500
},
{
"epoch": 49.81,
"learning_rate": 7.55098295006371e-06,
"loss": 0.0918,
"step": 821000
},
{
"epoch": 49.85,
"learning_rate": 7.543398458831382e-06,
"loss": 0.0892,
"step": 821500
},
{
"epoch": 49.88,
"learning_rate": 7.535813967599054e-06,
"loss": 0.0914,
"step": 822000
},
{
"epoch": 49.91,
"learning_rate": 7.528229476366726e-06,
"loss": 0.0906,
"step": 822500
},
{
"epoch": 49.94,
"learning_rate": 7.5206601541168625e-06,
"loss": 0.0915,
"step": 823000
},
{
"epoch": 49.97,
"learning_rate": 7.513075662884535e-06,
"loss": 0.0919,
"step": 823500
},
{
"epoch": 50.0,
"learning_rate": 7.505491171652206e-06,
"loss": 0.0918,
"step": 824000
},
{
"epoch": 50.0,
"eval_bleu": 81.2345,
"eval_gen_len": 14.1368,
"eval_loss": 0.16936491429805756,
"eval_runtime": 178.0893,
"eval_samples_per_second": 90.539,
"eval_steps_per_second": 2.83,
"step": 824050
},
{
"epoch": 50.03,
"learning_rate": 7.497906680419879e-06,
"loss": 0.0893,
"step": 824500
},
{
"epoch": 50.06,
"learning_rate": 7.49032218918755e-06,
"loss": 0.0881,
"step": 825000
},
{
"epoch": 50.09,
"learning_rate": 7.482737697955221e-06,
"loss": 0.0873,
"step": 825500
},
{
"epoch": 50.12,
"learning_rate": 7.475168375705358e-06,
"loss": 0.0892,
"step": 826000
},
{
"epoch": 50.15,
"learning_rate": 7.46758388447303e-06,
"loss": 0.0892,
"step": 826500
},
{
"epoch": 50.18,
"learning_rate": 7.459999393240703e-06,
"loss": 0.0907,
"step": 827000
},
{
"epoch": 50.21,
"learning_rate": 7.452414902008374e-06,
"loss": 0.0877,
"step": 827500
},
{
"epoch": 50.24,
"learning_rate": 7.444830410776046e-06,
"loss": 0.0893,
"step": 828000
},
{
"epoch": 50.27,
"learning_rate": 7.437245919543717e-06,
"loss": 0.0901,
"step": 828500
},
{
"epoch": 50.3,
"learning_rate": 7.42966142831139e-06,
"loss": 0.09,
"step": 829000
},
{
"epoch": 50.33,
"learning_rate": 7.422076937079061e-06,
"loss": 0.0872,
"step": 829500
},
{
"epoch": 50.36,
"learning_rate": 7.414522783811663e-06,
"loss": 0.0892,
"step": 830000
},
{
"epoch": 50.39,
"learning_rate": 7.406938292579334e-06,
"loss": 0.0883,
"step": 830500
},
{
"epoch": 50.42,
"learning_rate": 7.399353801347007e-06,
"loss": 0.0896,
"step": 831000
},
{
"epoch": 50.45,
"learning_rate": 7.391769310114678e-06,
"loss": 0.0871,
"step": 831500
},
{
"epoch": 50.48,
"learning_rate": 7.384184818882349e-06,
"loss": 0.0913,
"step": 832000
},
{
"epoch": 50.51,
"learning_rate": 7.376600327650022e-06,
"loss": 0.0898,
"step": 832500
},
{
"epoch": 50.54,
"learning_rate": 7.3690158364176935e-06,
"loss": 0.0899,
"step": 833000
},
{
"epoch": 50.57,
"learning_rate": 7.3614313451853656e-06,
"loss": 0.0902,
"step": 833500
},
{
"epoch": 50.6,
"learning_rate": 7.353862022935502e-06,
"loss": 0.0916,
"step": 834000
},
{
"epoch": 50.63,
"learning_rate": 7.346277531703174e-06,
"loss": 0.0918,
"step": 834500
},
{
"epoch": 50.66,
"learning_rate": 7.338693040470846e-06,
"loss": 0.0907,
"step": 835000
},
{
"epoch": 50.69,
"learning_rate": 7.331108549238518e-06,
"loss": 0.0891,
"step": 835500
},
{
"epoch": 50.73,
"learning_rate": 7.323524058006189e-06,
"loss": 0.0898,
"step": 836000
},
{
"epoch": 50.76,
"learning_rate": 7.315939566773861e-06,
"loss": 0.0908,
"step": 836500
},
{
"epoch": 50.79,
"learning_rate": 7.3083550755415335e-06,
"loss": 0.0904,
"step": 837000
},
{
"epoch": 50.82,
"learning_rate": 7.300770584309205e-06,
"loss": 0.0899,
"step": 837500
},
{
"epoch": 50.85,
"learning_rate": 7.293186093076877e-06,
"loss": 0.0902,
"step": 838000
},
{
"epoch": 50.88,
"learning_rate": 7.285601601844549e-06,
"loss": 0.0899,
"step": 838500
},
{
"epoch": 50.91,
"learning_rate": 7.278032279594685e-06,
"loss": 0.0891,
"step": 839000
},
{
"epoch": 50.94,
"learning_rate": 7.270447788362357e-06,
"loss": 0.0913,
"step": 839500
},
{
"epoch": 50.97,
"learning_rate": 7.262863297130029e-06,
"loss": 0.0903,
"step": 840000
},
{
"epoch": 51.0,
"learning_rate": 7.255278805897701e-06,
"loss": 0.0898,
"step": 840500
},
{
"epoch": 51.0,
"eval_bleu": 81.3532,
"eval_gen_len": 14.1407,
"eval_loss": 0.1701522320508957,
"eval_runtime": 178.4826,
"eval_samples_per_second": 90.339,
"eval_steps_per_second": 2.824,
"step": 840531
},
{
"epoch": 51.03,
"learning_rate": 7.247694314665374e-06,
"loss": 0.0886,
"step": 841000
},
{
"epoch": 51.06,
"learning_rate": 7.240109823433045e-06,
"loss": 0.088,
"step": 841500
},
{
"epoch": 51.09,
"learning_rate": 7.232540501183182e-06,
"loss": 0.089,
"step": 842000
},
{
"epoch": 51.12,
"learning_rate": 7.224956009950853e-06,
"loss": 0.0868,
"step": 842500
},
{
"epoch": 51.15,
"learning_rate": 7.2173715187185244e-06,
"loss": 0.0882,
"step": 843000
},
{
"epoch": 51.18,
"learning_rate": 7.209787027486197e-06,
"loss": 0.0886,
"step": 843500
},
{
"epoch": 51.21,
"learning_rate": 7.202202536253869e-06,
"loss": 0.0867,
"step": 844000
},
{
"epoch": 51.24,
"learning_rate": 7.194618045021541e-06,
"loss": 0.0873,
"step": 844500
},
{
"epoch": 51.27,
"learning_rate": 7.187033553789212e-06,
"loss": 0.0886,
"step": 845000
},
{
"epoch": 51.3,
"learning_rate": 7.179449062556885e-06,
"loss": 0.0872,
"step": 845500
},
{
"epoch": 51.33,
"learning_rate": 7.17187974030702e-06,
"loss": 0.0874,
"step": 846000
},
{
"epoch": 51.36,
"learning_rate": 7.164295249074693e-06,
"loss": 0.0881,
"step": 846500
},
{
"epoch": 51.39,
"learning_rate": 7.1567107578423645e-06,
"loss": 0.0867,
"step": 847000
},
{
"epoch": 51.42,
"learning_rate": 7.149126266610036e-06,
"loss": 0.0882,
"step": 847500
},
{
"epoch": 51.45,
"learning_rate": 7.141572113342638e-06,
"loss": 0.0897,
"step": 848000
},
{
"epoch": 51.48,
"learning_rate": 7.13398762211031e-06,
"loss": 0.0875,
"step": 848500
},
{
"epoch": 51.51,
"learning_rate": 7.126403130877981e-06,
"loss": 0.0874,
"step": 849000
},
{
"epoch": 51.54,
"learning_rate": 7.1188186396456524e-06,
"loss": 0.0894,
"step": 849500
},
{
"epoch": 51.57,
"learning_rate": 7.111234148413325e-06,
"loss": 0.0878,
"step": 850000
},
{
"epoch": 51.6,
"learning_rate": 7.103649657180997e-06,
"loss": 0.0886,
"step": 850500
},
{
"epoch": 51.64,
"learning_rate": 7.096065165948669e-06,
"loss": 0.09,
"step": 851000
},
{
"epoch": 51.67,
"learning_rate": 7.08848067471634e-06,
"loss": 0.0897,
"step": 851500
},
{
"epoch": 51.7,
"learning_rate": 7.080926521448942e-06,
"loss": 0.0896,
"step": 852000
},
{
"epoch": 51.73,
"learning_rate": 7.073342030216613e-06,
"loss": 0.0877,
"step": 852500
},
{
"epoch": 51.76,
"learning_rate": 7.065757538984286e-06,
"loss": 0.089,
"step": 853000
},
{
"epoch": 51.79,
"learning_rate": 7.0581730477519575e-06,
"loss": 0.0893,
"step": 853500
},
{
"epoch": 51.82,
"learning_rate": 7.05058855651963e-06,
"loss": 0.0897,
"step": 854000
},
{
"epoch": 51.85,
"learning_rate": 7.043004065287301e-06,
"loss": 0.0872,
"step": 854500
},
{
"epoch": 51.88,
"learning_rate": 7.035419574054972e-06,
"loss": 0.0877,
"step": 855000
},
{
"epoch": 51.91,
"learning_rate": 7.027835082822645e-06,
"loss": 0.0901,
"step": 855500
},
{
"epoch": 51.94,
"learning_rate": 7.0202657605727804e-06,
"loss": 0.0878,
"step": 856000
},
{
"epoch": 51.97,
"learning_rate": 7.012681269340453e-06,
"loss": 0.0901,
"step": 856500
},
{
"epoch": 52.0,
"learning_rate": 7.005096778108125e-06,
"loss": 0.0889,
"step": 857000
},
{
"epoch": 52.0,
"eval_bleu": 81.286,
"eval_gen_len": 14.1467,
"eval_loss": 0.17058435082435608,
"eval_runtime": 178.2603,
"eval_samples_per_second": 90.452,
"eval_steps_per_second": 2.827,
"step": 857012
},
{
"epoch": 52.03,
"learning_rate": 6.9975122868757976e-06,
"loss": 0.0847,
"step": 857500
},
{
"epoch": 52.06,
"learning_rate": 6.989942964625933e-06,
"loss": 0.0872,
"step": 858000
},
{
"epoch": 52.09,
"learning_rate": 6.982358473393606e-06,
"loss": 0.0868,
"step": 858500
},
{
"epoch": 52.12,
"learning_rate": 6.974773982161277e-06,
"loss": 0.0842,
"step": 859000
},
{
"epoch": 52.15,
"learning_rate": 6.967204659911414e-06,
"loss": 0.0865,
"step": 859500
},
{
"epoch": 52.18,
"learning_rate": 6.9596201686790855e-06,
"loss": 0.0862,
"step": 860000
},
{
"epoch": 52.21,
"learning_rate": 6.952035677446758e-06,
"loss": 0.0874,
"step": 860500
},
{
"epoch": 52.24,
"learning_rate": 6.944451186214429e-06,
"loss": 0.0897,
"step": 861000
},
{
"epoch": 52.27,
"learning_rate": 6.936866694982101e-06,
"loss": 0.0854,
"step": 861500
},
{
"epoch": 52.3,
"learning_rate": 6.929282203749773e-06,
"loss": 0.0866,
"step": 862000
},
{
"epoch": 52.33,
"learning_rate": 6.921697712517444e-06,
"loss": 0.0878,
"step": 862500
},
{
"epoch": 52.36,
"learning_rate": 6.914128390267581e-06,
"loss": 0.0867,
"step": 863000
},
{
"epoch": 52.39,
"learning_rate": 6.906543899035253e-06,
"loss": 0.0877,
"step": 863500
},
{
"epoch": 52.42,
"learning_rate": 6.8989594078029256e-06,
"loss": 0.0879,
"step": 864000
},
{
"epoch": 52.45,
"learning_rate": 6.891374916570597e-06,
"loss": 0.0893,
"step": 864500
},
{
"epoch": 52.48,
"learning_rate": 6.883790425338269e-06,
"loss": 0.0879,
"step": 865000
},
{
"epoch": 52.52,
"learning_rate": 6.876205934105941e-06,
"loss": 0.0851,
"step": 865500
},
{
"epoch": 52.55,
"learning_rate": 6.868636611856077e-06,
"loss": 0.0883,
"step": 866000
},
{
"epoch": 52.58,
"learning_rate": 6.861052120623749e-06,
"loss": 0.089,
"step": 866500
},
{
"epoch": 52.61,
"learning_rate": 6.8534676293914214e-06,
"loss": 0.0892,
"step": 867000
},
{
"epoch": 52.64,
"learning_rate": 6.845883138159093e-06,
"loss": 0.0878,
"step": 867500
},
{
"epoch": 52.67,
"learning_rate": 6.838298646926764e-06,
"loss": 0.0846,
"step": 868000
},
{
"epoch": 52.7,
"learning_rate": 6.830714155694437e-06,
"loss": 0.0863,
"step": 868500
},
{
"epoch": 52.73,
"learning_rate": 6.823129664462108e-06,
"loss": 0.0883,
"step": 869000
},
{
"epoch": 52.76,
"learning_rate": 6.81554517322978e-06,
"loss": 0.0879,
"step": 869500
},
{
"epoch": 52.79,
"learning_rate": 6.8079758509799165e-06,
"loss": 0.0882,
"step": 870000
},
{
"epoch": 52.82,
"learning_rate": 6.800391359747589e-06,
"loss": 0.0866,
"step": 870500
},
{
"epoch": 52.85,
"learning_rate": 6.792806868515261e-06,
"loss": 0.0883,
"step": 871000
},
{
"epoch": 52.88,
"learning_rate": 6.785222377282933e-06,
"loss": 0.0874,
"step": 871500
},
{
"epoch": 52.91,
"learning_rate": 6.777653055033069e-06,
"loss": 0.0852,
"step": 872000
},
{
"epoch": 52.94,
"learning_rate": 6.770068563800741e-06,
"loss": 0.0867,
"step": 872500
},
{
"epoch": 52.97,
"learning_rate": 6.762484072568412e-06,
"loss": 0.0876,
"step": 873000
},
{
"epoch": 53.0,
"eval_bleu": 81.2699,
"eval_gen_len": 14.1363,
"eval_loss": 0.1710740476846695,
"eval_runtime": 178.2258,
"eval_samples_per_second": 90.47,
"eval_steps_per_second": 2.828,
"step": 873493
},
{
"epoch": 53.0,
"learning_rate": 6.7548995813360844e-06,
"loss": 0.0885,
"step": 873500
},
{
"epoch": 53.03,
"learning_rate": 6.747330259086221e-06,
"loss": 0.0851,
"step": 874000
},
{
"epoch": 53.06,
"learning_rate": 6.739745767853893e-06,
"loss": 0.0841,
"step": 874500
},
{
"epoch": 53.09,
"learning_rate": 6.732161276621565e-06,
"loss": 0.0838,
"step": 875000
},
{
"epoch": 53.12,
"learning_rate": 6.724576785389236e-06,
"loss": 0.0861,
"step": 875500
},
{
"epoch": 53.15,
"learning_rate": 6.717007463139373e-06,
"loss": 0.0841,
"step": 876000
},
{
"epoch": 53.18,
"learning_rate": 6.7094229719070445e-06,
"loss": 0.0842,
"step": 876500
},
{
"epoch": 53.21,
"learning_rate": 6.701838480674717e-06,
"loss": 0.0843,
"step": 877000
},
{
"epoch": 53.24,
"learning_rate": 6.694253989442389e-06,
"loss": 0.0868,
"step": 877500
},
{
"epoch": 53.27,
"learning_rate": 6.686669498210061e-06,
"loss": 0.0848,
"step": 878000
},
{
"epoch": 53.3,
"learning_rate": 6.679085006977733e-06,
"loss": 0.0865,
"step": 878500
},
{
"epoch": 53.33,
"learning_rate": 6.671500515745404e-06,
"loss": 0.0859,
"step": 879000
},
{
"epoch": 53.36,
"learning_rate": 6.663916024513076e-06,
"loss": 0.0873,
"step": 879500
},
{
"epoch": 53.39,
"learning_rate": 6.656361871245678e-06,
"loss": 0.087,
"step": 880000
},
{
"epoch": 53.43,
"learning_rate": 6.6487773800133495e-06,
"loss": 0.0869,
"step": 880500
},
{
"epoch": 53.46,
"learning_rate": 6.641192888781021e-06,
"loss": 0.0862,
"step": 881000
},
{
"epoch": 53.49,
"learning_rate": 6.633608397548693e-06,
"loss": 0.0869,
"step": 881500
},
{
"epoch": 53.52,
"learning_rate": 6.626023906316364e-06,
"loss": 0.0854,
"step": 882000
},
{
"epoch": 53.55,
"learning_rate": 6.618439415084037e-06,
"loss": 0.0861,
"step": 882500
},
{
"epoch": 53.58,
"learning_rate": 6.610854923851708e-06,
"loss": 0.0872,
"step": 883000
},
{
"epoch": 53.61,
"learning_rate": 6.603270432619381e-06,
"loss": 0.0857,
"step": 883500
},
{
"epoch": 53.64,
"learning_rate": 6.595701110369517e-06,
"loss": 0.0866,
"step": 884000
},
{
"epoch": 53.67,
"learning_rate": 6.58811661913719e-06,
"loss": 0.0871,
"step": 884500
},
{
"epoch": 53.7,
"learning_rate": 6.580532127904861e-06,
"loss": 0.0858,
"step": 885000
},
{
"epoch": 53.73,
"learning_rate": 6.572947636672532e-06,
"loss": 0.0857,
"step": 885500
},
{
"epoch": 53.76,
"learning_rate": 6.565378314422669e-06,
"loss": 0.0877,
"step": 886000
},
{
"epoch": 53.79,
"learning_rate": 6.5577938231903404e-06,
"loss": 0.0879,
"step": 886500
},
{
"epoch": 53.82,
"learning_rate": 6.5502093319580125e-06,
"loss": 0.0882,
"step": 887000
},
{
"epoch": 53.85,
"learning_rate": 6.542640009708149e-06,
"loss": 0.0853,
"step": 887500
},
{
"epoch": 53.88,
"learning_rate": 6.535055518475822e-06,
"loss": 0.0861,
"step": 888000
},
{
"epoch": 53.91,
"learning_rate": 6.527471027243493e-06,
"loss": 0.0875,
"step": 888500
},
{
"epoch": 53.94,
"learning_rate": 6.519886536011165e-06,
"loss": 0.0883,
"step": 889000
},
{
"epoch": 53.97,
"learning_rate": 6.512302044778836e-06,
"loss": 0.0864,
"step": 889500
},
{
"epoch": 54.0,
"eval_bleu": 81.2685,
"eval_gen_len": 14.1365,
"eval_loss": 0.1724194884300232,
"eval_runtime": 178.1859,
"eval_samples_per_second": 90.49,
"eval_steps_per_second": 2.829,
"step": 889974
}
],
"max_steps": 1318480,
"num_train_epochs": 80,
"total_flos": 1.2180001336815514e+18,
"trial_name": null,
"trial_params": null
}