byt5-base-es_maz / trainer_state.json
mekjr1's picture
End of training
cc5a6f3
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 39300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 0.0473,
"eval_gen_len": 19.0,
"eval_loss": 1.034566044807434,
"eval_runtime": 35.414,
"eval_samples_per_second": 22.166,
"eval_steps_per_second": 1.412,
"step": 393
},
{
"epoch": 1.27,
"learning_rate": 4.936386768447838e-05,
"loss": 1.5209,
"step": 500
},
{
"epoch": 2.0,
"eval_bleu": 0.1413,
"eval_gen_len": 19.0,
"eval_loss": 0.8939195275306702,
"eval_runtime": 35.4148,
"eval_samples_per_second": 22.166,
"eval_steps_per_second": 1.412,
"step": 786
},
{
"epoch": 2.54,
"learning_rate": 4.8727735368956744e-05,
"loss": 1.0258,
"step": 1000
},
{
"epoch": 3.0,
"eval_bleu": 0.1641,
"eval_gen_len": 19.0,
"eval_loss": 0.8333808779716492,
"eval_runtime": 35.2354,
"eval_samples_per_second": 22.279,
"eval_steps_per_second": 1.419,
"step": 1179
},
{
"epoch": 3.82,
"learning_rate": 4.809160305343512e-05,
"loss": 0.9177,
"step": 1500
},
{
"epoch": 4.0,
"eval_bleu": 0.1729,
"eval_gen_len": 19.0,
"eval_loss": 0.7867220044136047,
"eval_runtime": 35.207,
"eval_samples_per_second": 22.297,
"eval_steps_per_second": 1.42,
"step": 1572
},
{
"epoch": 5.0,
"eval_bleu": 0.1742,
"eval_gen_len": 19.0,
"eval_loss": 0.7542837858200073,
"eval_runtime": 35.3809,
"eval_samples_per_second": 22.187,
"eval_steps_per_second": 1.413,
"step": 1965
},
{
"epoch": 5.09,
"learning_rate": 4.7455470737913485e-05,
"loss": 0.8482,
"step": 2000
},
{
"epoch": 6.0,
"eval_bleu": 0.1692,
"eval_gen_len": 19.0,
"eval_loss": 0.7316721677780151,
"eval_runtime": 35.4956,
"eval_samples_per_second": 22.115,
"eval_steps_per_second": 1.409,
"step": 2358
},
{
"epoch": 6.36,
"learning_rate": 4.681933842239186e-05,
"loss": 0.7957,
"step": 2500
},
{
"epoch": 7.0,
"eval_bleu": 0.1742,
"eval_gen_len": 19.0,
"eval_loss": 0.7106090188026428,
"eval_runtime": 35.1034,
"eval_samples_per_second": 22.362,
"eval_steps_per_second": 1.424,
"step": 2751
},
{
"epoch": 7.63,
"learning_rate": 4.618320610687023e-05,
"loss": 0.7557,
"step": 3000
},
{
"epoch": 8.0,
"eval_bleu": 0.216,
"eval_gen_len": 19.0,
"eval_loss": 0.6849327087402344,
"eval_runtime": 35.4066,
"eval_samples_per_second": 22.171,
"eval_steps_per_second": 1.412,
"step": 3144
},
{
"epoch": 8.91,
"learning_rate": 4.5547073791348604e-05,
"loss": 0.7204,
"step": 3500
},
{
"epoch": 9.0,
"eval_bleu": 0.189,
"eval_gen_len": 19.0,
"eval_loss": 0.6731250882148743,
"eval_runtime": 35.3153,
"eval_samples_per_second": 22.228,
"eval_steps_per_second": 1.416,
"step": 3537
},
{
"epoch": 10.0,
"eval_bleu": 0.2063,
"eval_gen_len": 19.0,
"eval_loss": 0.6562144160270691,
"eval_runtime": 35.358,
"eval_samples_per_second": 22.201,
"eval_steps_per_second": 1.414,
"step": 3930
},
{
"epoch": 10.18,
"learning_rate": 4.4910941475826975e-05,
"loss": 0.6901,
"step": 4000
},
{
"epoch": 11.0,
"eval_bleu": 0.2025,
"eval_gen_len": 19.0,
"eval_loss": 0.6510306596755981,
"eval_runtime": 35.1746,
"eval_samples_per_second": 22.317,
"eval_steps_per_second": 1.421,
"step": 4323
},
{
"epoch": 11.45,
"learning_rate": 4.4274809160305345e-05,
"loss": 0.6635,
"step": 4500
},
{
"epoch": 12.0,
"eval_bleu": 0.2266,
"eval_gen_len": 19.0,
"eval_loss": 0.6423279047012329,
"eval_runtime": 34.9781,
"eval_samples_per_second": 22.443,
"eval_steps_per_second": 1.429,
"step": 4716
},
{
"epoch": 12.72,
"learning_rate": 4.3638676844783716e-05,
"loss": 0.6346,
"step": 5000
},
{
"epoch": 13.0,
"eval_bleu": 0.2229,
"eval_gen_len": 19.0,
"eval_loss": 0.6330356597900391,
"eval_runtime": 35.0795,
"eval_samples_per_second": 22.378,
"eval_steps_per_second": 1.425,
"step": 5109
},
{
"epoch": 13.99,
"learning_rate": 4.300254452926209e-05,
"loss": 0.6132,
"step": 5500
},
{
"epoch": 14.0,
"eval_bleu": 0.2195,
"eval_gen_len": 19.0,
"eval_loss": 0.6257367134094238,
"eval_runtime": 34.9799,
"eval_samples_per_second": 22.441,
"eval_steps_per_second": 1.429,
"step": 5502
},
{
"epoch": 15.0,
"eval_bleu": 0.2344,
"eval_gen_len": 19.0,
"eval_loss": 0.6191786527633667,
"eval_runtime": 35.2808,
"eval_samples_per_second": 22.25,
"eval_steps_per_second": 1.417,
"step": 5895
},
{
"epoch": 15.27,
"learning_rate": 4.236641221374046e-05,
"loss": 0.5885,
"step": 6000
},
{
"epoch": 16.0,
"eval_bleu": 0.2424,
"eval_gen_len": 19.0,
"eval_loss": 0.6103959083557129,
"eval_runtime": 35.1057,
"eval_samples_per_second": 22.361,
"eval_steps_per_second": 1.424,
"step": 6288
},
{
"epoch": 16.54,
"learning_rate": 4.1730279898218835e-05,
"loss": 0.5682,
"step": 6500
},
{
"epoch": 17.0,
"eval_bleu": 0.2536,
"eval_gen_len": 19.0,
"eval_loss": 0.6047710180282593,
"eval_runtime": 35.079,
"eval_samples_per_second": 22.378,
"eval_steps_per_second": 1.425,
"step": 6681
},
{
"epoch": 17.81,
"learning_rate": 4.10941475826972e-05,
"loss": 0.5452,
"step": 7000
},
{
"epoch": 18.0,
"eval_bleu": 0.2541,
"eval_gen_len": 19.0,
"eval_loss": 0.6057348847389221,
"eval_runtime": 35.3873,
"eval_samples_per_second": 22.183,
"eval_steps_per_second": 1.413,
"step": 7074
},
{
"epoch": 19.0,
"eval_bleu": 0.2526,
"eval_gen_len": 19.0,
"eval_loss": 0.6047215461730957,
"eval_runtime": 35.004,
"eval_samples_per_second": 22.426,
"eval_steps_per_second": 1.428,
"step": 7467
},
{
"epoch": 19.08,
"learning_rate": 4.0458015267175576e-05,
"loss": 0.5294,
"step": 7500
},
{
"epoch": 20.0,
"eval_bleu": 0.2644,
"eval_gen_len": 19.0,
"eval_loss": 0.6065888404846191,
"eval_runtime": 35.2375,
"eval_samples_per_second": 22.277,
"eval_steps_per_second": 1.419,
"step": 7860
},
{
"epoch": 20.36,
"learning_rate": 3.982188295165395e-05,
"loss": 0.5072,
"step": 8000
},
{
"epoch": 21.0,
"eval_bleu": 0.2666,
"eval_gen_len": 19.0,
"eval_loss": 0.6080436706542969,
"eval_runtime": 35.085,
"eval_samples_per_second": 22.374,
"eval_steps_per_second": 1.425,
"step": 8253
},
{
"epoch": 21.63,
"learning_rate": 3.918575063613232e-05,
"loss": 0.4921,
"step": 8500
},
{
"epoch": 22.0,
"eval_bleu": 0.2499,
"eval_gen_len": 19.0,
"eval_loss": 0.6091626882553101,
"eval_runtime": 35.0913,
"eval_samples_per_second": 22.37,
"eval_steps_per_second": 1.425,
"step": 8646
},
{
"epoch": 22.9,
"learning_rate": 3.854961832061069e-05,
"loss": 0.4753,
"step": 9000
},
{
"epoch": 23.0,
"eval_bleu": 0.2719,
"eval_gen_len": 19.0,
"eval_loss": 0.6132063269615173,
"eval_runtime": 35.0981,
"eval_samples_per_second": 22.366,
"eval_steps_per_second": 1.425,
"step": 9039
},
{
"epoch": 24.0,
"eval_bleu": 0.2724,
"eval_gen_len": 19.0,
"eval_loss": 0.6088494658470154,
"eval_runtime": 35.1025,
"eval_samples_per_second": 22.363,
"eval_steps_per_second": 1.424,
"step": 9432
},
{
"epoch": 24.17,
"learning_rate": 3.791348600508906e-05,
"loss": 0.4597,
"step": 9500
},
{
"epoch": 25.0,
"eval_bleu": 0.2683,
"eval_gen_len": 19.0,
"eval_loss": 0.6127578020095825,
"eval_runtime": 35.0847,
"eval_samples_per_second": 22.374,
"eval_steps_per_second": 1.425,
"step": 9825
},
{
"epoch": 25.45,
"learning_rate": 3.727735368956743e-05,
"loss": 0.4443,
"step": 10000
},
{
"epoch": 26.0,
"eval_bleu": 0.2856,
"eval_gen_len": 19.0,
"eval_loss": 0.6183450818061829,
"eval_runtime": 35.0496,
"eval_samples_per_second": 22.397,
"eval_steps_per_second": 1.427,
"step": 10218
},
{
"epoch": 26.72,
"learning_rate": 3.66412213740458e-05,
"loss": 0.4301,
"step": 10500
},
{
"epoch": 27.0,
"eval_bleu": 0.3006,
"eval_gen_len": 19.0,
"eval_loss": 0.6246443390846252,
"eval_runtime": 35.1774,
"eval_samples_per_second": 22.315,
"eval_steps_per_second": 1.421,
"step": 10611
},
{
"epoch": 27.99,
"learning_rate": 3.600508905852418e-05,
"loss": 0.418,
"step": 11000
},
{
"epoch": 28.0,
"eval_bleu": 0.2788,
"eval_gen_len": 19.0,
"eval_loss": 0.6312348246574402,
"eval_runtime": 35.3395,
"eval_samples_per_second": 22.213,
"eval_steps_per_second": 1.415,
"step": 11004
},
{
"epoch": 29.0,
"eval_bleu": 0.2843,
"eval_gen_len": 19.0,
"eval_loss": 0.6295350790023804,
"eval_runtime": 35.2861,
"eval_samples_per_second": 22.247,
"eval_steps_per_second": 1.417,
"step": 11397
},
{
"epoch": 29.26,
"learning_rate": 3.536895674300255e-05,
"loss": 0.4002,
"step": 11500
},
{
"epoch": 30.0,
"eval_bleu": 0.2982,
"eval_gen_len": 19.0,
"eval_loss": 0.6349673271179199,
"eval_runtime": 35.0212,
"eval_samples_per_second": 22.415,
"eval_steps_per_second": 1.428,
"step": 11790
},
{
"epoch": 30.53,
"learning_rate": 3.473282442748092e-05,
"loss": 0.3913,
"step": 12000
},
{
"epoch": 31.0,
"eval_bleu": 0.2822,
"eval_gen_len": 19.0,
"eval_loss": 0.6441249847412109,
"eval_runtime": 35.0154,
"eval_samples_per_second": 22.419,
"eval_steps_per_second": 1.428,
"step": 12183
},
{
"epoch": 31.81,
"learning_rate": 3.409669211195929e-05,
"loss": 0.3755,
"step": 12500
},
{
"epoch": 32.0,
"eval_bleu": 0.3215,
"eval_gen_len": 19.0,
"eval_loss": 0.6430069804191589,
"eval_runtime": 35.1225,
"eval_samples_per_second": 22.35,
"eval_steps_per_second": 1.424,
"step": 12576
},
{
"epoch": 33.0,
"eval_bleu": 0.3024,
"eval_gen_len": 19.0,
"eval_loss": 0.6485504508018494,
"eval_runtime": 35.0519,
"eval_samples_per_second": 22.395,
"eval_steps_per_second": 1.426,
"step": 12969
},
{
"epoch": 33.08,
"learning_rate": 3.346055979643766e-05,
"loss": 0.3673,
"step": 13000
},
{
"epoch": 34.0,
"eval_bleu": 0.2985,
"eval_gen_len": 19.0,
"eval_loss": 0.6527048945426941,
"eval_runtime": 34.9946,
"eval_samples_per_second": 22.432,
"eval_steps_per_second": 1.429,
"step": 13362
},
{
"epoch": 34.35,
"learning_rate": 3.282442748091603e-05,
"loss": 0.352,
"step": 13500
},
{
"epoch": 35.0,
"eval_bleu": 0.31,
"eval_gen_len": 19.0,
"eval_loss": 0.6660041213035583,
"eval_runtime": 34.9679,
"eval_samples_per_second": 22.449,
"eval_steps_per_second": 1.43,
"step": 13755
},
{
"epoch": 35.62,
"learning_rate": 3.21882951653944e-05,
"loss": 0.3408,
"step": 14000
},
{
"epoch": 36.0,
"eval_bleu": 0.288,
"eval_gen_len": 19.0,
"eval_loss": 0.6737257838249207,
"eval_runtime": 35.0785,
"eval_samples_per_second": 22.378,
"eval_steps_per_second": 1.425,
"step": 14148
},
{
"epoch": 36.9,
"learning_rate": 3.155216284987277e-05,
"loss": 0.3307,
"step": 14500
},
{
"epoch": 37.0,
"eval_bleu": 0.2995,
"eval_gen_len": 19.0,
"eval_loss": 0.6772977113723755,
"eval_runtime": 35.0227,
"eval_samples_per_second": 22.414,
"eval_steps_per_second": 1.428,
"step": 14541
},
{
"epoch": 38.0,
"eval_bleu": 0.29,
"eval_gen_len": 19.0,
"eval_loss": 0.6903324723243713,
"eval_runtime": 35.1225,
"eval_samples_per_second": 22.35,
"eval_steps_per_second": 1.424,
"step": 14934
},
{
"epoch": 38.17,
"learning_rate": 3.091603053435115e-05,
"loss": 0.3182,
"step": 15000
},
{
"epoch": 39.0,
"eval_bleu": 0.2848,
"eval_gen_len": 19.0,
"eval_loss": 0.7059388160705566,
"eval_runtime": 34.9533,
"eval_samples_per_second": 22.459,
"eval_steps_per_second": 1.43,
"step": 15327
},
{
"epoch": 39.44,
"learning_rate": 3.0279898218829518e-05,
"loss": 0.3077,
"step": 15500
},
{
"epoch": 40.0,
"eval_bleu": 0.2878,
"eval_gen_len": 19.0,
"eval_loss": 0.6985650658607483,
"eval_runtime": 35.0758,
"eval_samples_per_second": 22.38,
"eval_steps_per_second": 1.425,
"step": 15720
},
{
"epoch": 40.71,
"learning_rate": 2.9643765903307892e-05,
"loss": 0.298,
"step": 16000
},
{
"epoch": 41.0,
"eval_bleu": 0.2859,
"eval_gen_len": 19.0,
"eval_loss": 0.7053300142288208,
"eval_runtime": 35.1562,
"eval_samples_per_second": 22.329,
"eval_steps_per_second": 1.422,
"step": 16113
},
{
"epoch": 41.98,
"learning_rate": 2.900763358778626e-05,
"loss": 0.29,
"step": 16500
},
{
"epoch": 42.0,
"eval_bleu": 0.2871,
"eval_gen_len": 19.0,
"eval_loss": 0.719767689704895,
"eval_runtime": 35.1382,
"eval_samples_per_second": 22.34,
"eval_steps_per_second": 1.423,
"step": 16506
},
{
"epoch": 43.0,
"eval_bleu": 0.2813,
"eval_gen_len": 19.0,
"eval_loss": 0.7274527549743652,
"eval_runtime": 35.0854,
"eval_samples_per_second": 22.374,
"eval_steps_per_second": 1.425,
"step": 16899
},
{
"epoch": 43.26,
"learning_rate": 2.8371501272264633e-05,
"loss": 0.2787,
"step": 17000
},
{
"epoch": 44.0,
"eval_bleu": 0.2972,
"eval_gen_len": 19.0,
"eval_loss": 0.7369562983512878,
"eval_runtime": 35.1464,
"eval_samples_per_second": 22.335,
"eval_steps_per_second": 1.423,
"step": 17292
},
{
"epoch": 44.53,
"learning_rate": 2.7735368956743e-05,
"loss": 0.268,
"step": 17500
},
{
"epoch": 45.0,
"eval_bleu": 0.26,
"eval_gen_len": 19.0,
"eval_loss": 0.7426473498344421,
"eval_runtime": 35.1116,
"eval_samples_per_second": 22.357,
"eval_steps_per_second": 1.424,
"step": 17685
},
{
"epoch": 45.8,
"learning_rate": 2.7099236641221375e-05,
"loss": 0.2638,
"step": 18000
},
{
"epoch": 46.0,
"eval_bleu": 0.2846,
"eval_gen_len": 19.0,
"eval_loss": 0.7529018521308899,
"eval_runtime": 35.1433,
"eval_samples_per_second": 22.337,
"eval_steps_per_second": 1.423,
"step": 18078
},
{
"epoch": 47.0,
"eval_bleu": 0.2898,
"eval_gen_len": 19.0,
"eval_loss": 0.7602871060371399,
"eval_runtime": 35.1079,
"eval_samples_per_second": 22.36,
"eval_steps_per_second": 1.424,
"step": 18471
},
{
"epoch": 47.07,
"learning_rate": 2.6463104325699745e-05,
"loss": 0.253,
"step": 18500
},
{
"epoch": 48.0,
"eval_bleu": 0.277,
"eval_gen_len": 19.0,
"eval_loss": 0.7711123824119568,
"eval_runtime": 35.1881,
"eval_samples_per_second": 22.309,
"eval_steps_per_second": 1.421,
"step": 18864
},
{
"epoch": 48.35,
"learning_rate": 2.582697201017812e-05,
"loss": 0.244,
"step": 19000
},
{
"epoch": 49.0,
"eval_bleu": 0.3005,
"eval_gen_len": 19.0,
"eval_loss": 0.7778590321540833,
"eval_runtime": 35.1873,
"eval_samples_per_second": 22.309,
"eval_steps_per_second": 1.421,
"step": 19257
},
{
"epoch": 49.62,
"learning_rate": 2.5190839694656487e-05,
"loss": 0.2368,
"step": 19500
},
{
"epoch": 50.0,
"eval_bleu": 0.2931,
"eval_gen_len": 19.0,
"eval_loss": 0.7815132737159729,
"eval_runtime": 35.108,
"eval_samples_per_second": 22.36,
"eval_steps_per_second": 1.424,
"step": 19650
},
{
"epoch": 50.89,
"learning_rate": 2.455470737913486e-05,
"loss": 0.2301,
"step": 20000
},
{
"epoch": 51.0,
"eval_bleu": 0.2998,
"eval_gen_len": 19.0,
"eval_loss": 0.8020169734954834,
"eval_runtime": 35.1735,
"eval_samples_per_second": 22.318,
"eval_steps_per_second": 1.422,
"step": 20043
},
{
"epoch": 52.0,
"eval_bleu": 0.2806,
"eval_gen_len": 19.0,
"eval_loss": 0.8051058053970337,
"eval_runtime": 35.0481,
"eval_samples_per_second": 22.398,
"eval_steps_per_second": 1.427,
"step": 20436
},
{
"epoch": 52.16,
"learning_rate": 2.391857506361323e-05,
"loss": 0.2217,
"step": 20500
},
{
"epoch": 53.0,
"eval_bleu": 0.294,
"eval_gen_len": 19.0,
"eval_loss": 0.811935544013977,
"eval_runtime": 35.104,
"eval_samples_per_second": 22.362,
"eval_steps_per_second": 1.424,
"step": 20829
},
{
"epoch": 53.44,
"learning_rate": 2.3282442748091605e-05,
"loss": 0.2158,
"step": 21000
},
{
"epoch": 54.0,
"eval_bleu": 0.2921,
"eval_gen_len": 19.0,
"eval_loss": 0.8288211226463318,
"eval_runtime": 35.1084,
"eval_samples_per_second": 22.359,
"eval_steps_per_second": 1.424,
"step": 21222
},
{
"epoch": 54.71,
"learning_rate": 2.2646310432569976e-05,
"loss": 0.2079,
"step": 21500
},
{
"epoch": 55.0,
"eval_bleu": 0.2954,
"eval_gen_len": 19.0,
"eval_loss": 0.8340888619422913,
"eval_runtime": 35.1775,
"eval_samples_per_second": 22.315,
"eval_steps_per_second": 1.421,
"step": 21615
},
{
"epoch": 55.98,
"learning_rate": 2.2010178117048347e-05,
"loss": 0.2027,
"step": 22000
},
{
"epoch": 56.0,
"eval_bleu": 0.2884,
"eval_gen_len": 19.0,
"eval_loss": 0.8364927768707275,
"eval_runtime": 35.072,
"eval_samples_per_second": 22.383,
"eval_steps_per_second": 1.426,
"step": 22008
},
{
"epoch": 57.0,
"eval_bleu": 0.2995,
"eval_gen_len": 19.0,
"eval_loss": 0.8441442251205444,
"eval_runtime": 35.1154,
"eval_samples_per_second": 22.355,
"eval_steps_per_second": 1.424,
"step": 22401
},
{
"epoch": 57.25,
"learning_rate": 2.1374045801526718e-05,
"loss": 0.1954,
"step": 22500
},
{
"epoch": 58.0,
"eval_bleu": 0.3115,
"eval_gen_len": 19.0,
"eval_loss": 0.8488335609436035,
"eval_runtime": 34.9312,
"eval_samples_per_second": 22.473,
"eval_steps_per_second": 1.431,
"step": 22794
},
{
"epoch": 58.52,
"learning_rate": 2.0737913486005088e-05,
"loss": 0.1918,
"step": 23000
},
{
"epoch": 59.0,
"eval_bleu": 0.3085,
"eval_gen_len": 19.0,
"eval_loss": 0.8709967732429504,
"eval_runtime": 35.0738,
"eval_samples_per_second": 22.381,
"eval_steps_per_second": 1.426,
"step": 23187
},
{
"epoch": 59.8,
"learning_rate": 2.0101781170483462e-05,
"loss": 0.1857,
"step": 23500
},
{
"epoch": 60.0,
"eval_bleu": 0.2932,
"eval_gen_len": 19.0,
"eval_loss": 0.8718471527099609,
"eval_runtime": 35.1809,
"eval_samples_per_second": 22.313,
"eval_steps_per_second": 1.421,
"step": 23580
},
{
"epoch": 61.0,
"eval_bleu": 0.2923,
"eval_gen_len": 19.0,
"eval_loss": 0.8777070045471191,
"eval_runtime": 35.0923,
"eval_samples_per_second": 22.37,
"eval_steps_per_second": 1.425,
"step": 23973
},
{
"epoch": 61.07,
"learning_rate": 1.9465648854961833e-05,
"loss": 0.1796,
"step": 24000
},
{
"epoch": 62.0,
"eval_bleu": 0.3038,
"eval_gen_len": 19.0,
"eval_loss": 0.8831952214241028,
"eval_runtime": 35.1476,
"eval_samples_per_second": 22.334,
"eval_steps_per_second": 1.423,
"step": 24366
},
{
"epoch": 62.34,
"learning_rate": 1.8829516539440204e-05,
"loss": 0.1753,
"step": 24500
},
{
"epoch": 63.0,
"eval_bleu": 0.3063,
"eval_gen_len": 19.0,
"eval_loss": 0.8996883630752563,
"eval_runtime": 35.1466,
"eval_samples_per_second": 22.335,
"eval_steps_per_second": 1.423,
"step": 24759
},
{
"epoch": 63.61,
"learning_rate": 1.8193384223918574e-05,
"loss": 0.1703,
"step": 25000
},
{
"epoch": 64.0,
"eval_bleu": 0.3047,
"eval_gen_len": 19.0,
"eval_loss": 0.9198061227798462,
"eval_runtime": 35.8655,
"eval_samples_per_second": 21.887,
"eval_steps_per_second": 1.394,
"step": 25152
},
{
"epoch": 64.89,
"learning_rate": 1.7557251908396945e-05,
"loss": 0.1661,
"step": 25500
},
{
"epoch": 65.0,
"eval_bleu": 0.3159,
"eval_gen_len": 19.0,
"eval_loss": 0.9193503856658936,
"eval_runtime": 35.1603,
"eval_samples_per_second": 22.326,
"eval_steps_per_second": 1.422,
"step": 25545
},
{
"epoch": 66.0,
"eval_bleu": 0.2962,
"eval_gen_len": 19.0,
"eval_loss": 0.9242938756942749,
"eval_runtime": 35.073,
"eval_samples_per_second": 22.382,
"eval_steps_per_second": 1.426,
"step": 25938
},
{
"epoch": 66.16,
"learning_rate": 1.692111959287532e-05,
"loss": 0.1606,
"step": 26000
},
{
"epoch": 67.0,
"eval_bleu": 0.3065,
"eval_gen_len": 19.0,
"eval_loss": 0.9375536441802979,
"eval_runtime": 34.9169,
"eval_samples_per_second": 22.482,
"eval_steps_per_second": 1.432,
"step": 26331
},
{
"epoch": 67.43,
"learning_rate": 1.628498727735369e-05,
"loss": 0.1582,
"step": 26500
},
{
"epoch": 68.0,
"eval_bleu": 0.3002,
"eval_gen_len": 19.0,
"eval_loss": 0.9338624477386475,
"eval_runtime": 35.1084,
"eval_samples_per_second": 22.359,
"eval_steps_per_second": 1.424,
"step": 26724
},
{
"epoch": 68.7,
"learning_rate": 1.5648854961832064e-05,
"loss": 0.1533,
"step": 27000
},
{
"epoch": 69.0,
"eval_bleu": 0.3096,
"eval_gen_len": 19.0,
"eval_loss": 0.9420493841171265,
"eval_runtime": 35.122,
"eval_samples_per_second": 22.351,
"eval_steps_per_second": 1.424,
"step": 27117
},
{
"epoch": 69.97,
"learning_rate": 1.5012722646310435e-05,
"loss": 0.1503,
"step": 27500
},
{
"epoch": 70.0,
"eval_bleu": 0.2919,
"eval_gen_len": 19.0,
"eval_loss": 0.9522092342376709,
"eval_runtime": 35.4031,
"eval_samples_per_second": 22.173,
"eval_steps_per_second": 1.412,
"step": 27510
},
{
"epoch": 71.0,
"eval_bleu": 0.3085,
"eval_gen_len": 19.0,
"eval_loss": 0.9620120525360107,
"eval_runtime": 35.926,
"eval_samples_per_second": 21.85,
"eval_steps_per_second": 1.392,
"step": 27903
},
{
"epoch": 71.25,
"learning_rate": 1.4376590330788805e-05,
"loss": 0.1469,
"step": 28000
},
{
"epoch": 72.0,
"eval_bleu": 0.2946,
"eval_gen_len": 19.0,
"eval_loss": 0.9673256874084473,
"eval_runtime": 35.209,
"eval_samples_per_second": 22.295,
"eval_steps_per_second": 1.42,
"step": 28296
},
{
"epoch": 72.52,
"learning_rate": 1.3740458015267178e-05,
"loss": 0.1416,
"step": 28500
},
{
"epoch": 73.0,
"eval_bleu": 0.3019,
"eval_gen_len": 19.0,
"eval_loss": 0.9706256985664368,
"eval_runtime": 35.1594,
"eval_samples_per_second": 22.327,
"eval_steps_per_second": 1.422,
"step": 28689
},
{
"epoch": 73.79,
"learning_rate": 1.3104325699745548e-05,
"loss": 0.1401,
"step": 29000
},
{
"epoch": 74.0,
"eval_bleu": 0.3103,
"eval_gen_len": 19.0,
"eval_loss": 0.9876586198806763,
"eval_runtime": 35.0824,
"eval_samples_per_second": 22.376,
"eval_steps_per_second": 1.425,
"step": 29082
},
{
"epoch": 75.0,
"eval_bleu": 0.2903,
"eval_gen_len": 19.0,
"eval_loss": 0.9860377311706543,
"eval_runtime": 35.2841,
"eval_samples_per_second": 22.248,
"eval_steps_per_second": 1.417,
"step": 29475
},
{
"epoch": 75.06,
"learning_rate": 1.2468193384223919e-05,
"loss": 0.1376,
"step": 29500
},
{
"epoch": 76.0,
"eval_bleu": 0.2855,
"eval_gen_len": 19.0,
"eval_loss": 1.0073317289352417,
"eval_runtime": 35.1549,
"eval_samples_per_second": 22.33,
"eval_steps_per_second": 1.422,
"step": 29868
},
{
"epoch": 76.34,
"learning_rate": 1.1832061068702292e-05,
"loss": 0.1341,
"step": 30000
},
{
"epoch": 77.0,
"eval_bleu": 0.2927,
"eval_gen_len": 19.0,
"eval_loss": 1.0067394971847534,
"eval_runtime": 35.7641,
"eval_samples_per_second": 21.949,
"eval_steps_per_second": 1.398,
"step": 30261
},
{
"epoch": 77.61,
"learning_rate": 1.1195928753180662e-05,
"loss": 0.1307,
"step": 30500
},
{
"epoch": 78.0,
"eval_bleu": 0.3,
"eval_gen_len": 19.0,
"eval_loss": 1.006367564201355,
"eval_runtime": 35.3184,
"eval_samples_per_second": 22.226,
"eval_steps_per_second": 1.416,
"step": 30654
},
{
"epoch": 78.88,
"learning_rate": 1.0559796437659033e-05,
"loss": 0.1296,
"step": 31000
},
{
"epoch": 79.0,
"eval_bleu": 0.2886,
"eval_gen_len": 19.0,
"eval_loss": 1.022121787071228,
"eval_runtime": 35.3415,
"eval_samples_per_second": 22.212,
"eval_steps_per_second": 1.415,
"step": 31047
},
{
"epoch": 80.0,
"eval_bleu": 0.297,
"eval_gen_len": 19.0,
"eval_loss": 1.021680235862732,
"eval_runtime": 35.3179,
"eval_samples_per_second": 22.227,
"eval_steps_per_second": 1.416,
"step": 31440
},
{
"epoch": 80.15,
"learning_rate": 9.923664122137405e-06,
"loss": 0.126,
"step": 31500
},
{
"epoch": 81.0,
"eval_bleu": 0.2919,
"eval_gen_len": 19.0,
"eval_loss": 1.027828335762024,
"eval_runtime": 35.4035,
"eval_samples_per_second": 22.173,
"eval_steps_per_second": 1.412,
"step": 31833
},
{
"epoch": 81.42,
"learning_rate": 9.287531806615776e-06,
"loss": 0.1238,
"step": 32000
},
{
"epoch": 82.0,
"eval_bleu": 0.2951,
"eval_gen_len": 19.0,
"eval_loss": 1.03290855884552,
"eval_runtime": 35.2364,
"eval_samples_per_second": 22.278,
"eval_steps_per_second": 1.419,
"step": 32226
},
{
"epoch": 82.7,
"learning_rate": 8.651399491094148e-06,
"loss": 0.1214,
"step": 32500
},
{
"epoch": 83.0,
"eval_bleu": 0.3043,
"eval_gen_len": 19.0,
"eval_loss": 1.0350806713104248,
"eval_runtime": 35.1544,
"eval_samples_per_second": 22.33,
"eval_steps_per_second": 1.422,
"step": 32619
},
{
"epoch": 83.97,
"learning_rate": 8.015267175572519e-06,
"loss": 0.1206,
"step": 33000
},
{
"epoch": 84.0,
"eval_bleu": 0.2964,
"eval_gen_len": 19.0,
"eval_loss": 1.04984450340271,
"eval_runtime": 35.3457,
"eval_samples_per_second": 22.209,
"eval_steps_per_second": 1.415,
"step": 33012
},
{
"epoch": 85.0,
"eval_bleu": 0.2971,
"eval_gen_len": 19.0,
"eval_loss": 1.0432653427124023,
"eval_runtime": 35.2727,
"eval_samples_per_second": 22.255,
"eval_steps_per_second": 1.418,
"step": 33405
},
{
"epoch": 85.24,
"learning_rate": 7.379134860050891e-06,
"loss": 0.1186,
"step": 33500
},
{
"epoch": 86.0,
"eval_bleu": 0.2964,
"eval_gen_len": 19.0,
"eval_loss": 1.05250883102417,
"eval_runtime": 35.1615,
"eval_samples_per_second": 22.326,
"eval_steps_per_second": 1.422,
"step": 33798
},
{
"epoch": 86.51,
"learning_rate": 6.743002544529263e-06,
"loss": 0.116,
"step": 34000
},
{
"epoch": 87.0,
"eval_bleu": 0.2943,
"eval_gen_len": 19.0,
"eval_loss": 1.0547308921813965,
"eval_runtime": 35.1782,
"eval_samples_per_second": 22.315,
"eval_steps_per_second": 1.421,
"step": 34191
},
{
"epoch": 87.79,
"learning_rate": 6.106870229007634e-06,
"loss": 0.116,
"step": 34500
},
{
"epoch": 88.0,
"eval_bleu": 0.2876,
"eval_gen_len": 19.0,
"eval_loss": 1.0584968328475952,
"eval_runtime": 35.1038,
"eval_samples_per_second": 22.362,
"eval_steps_per_second": 1.424,
"step": 34584
},
{
"epoch": 89.0,
"eval_bleu": 0.2904,
"eval_gen_len": 19.0,
"eval_loss": 1.0630890130996704,
"eval_runtime": 35.1555,
"eval_samples_per_second": 22.329,
"eval_steps_per_second": 1.422,
"step": 34977
},
{
"epoch": 89.06,
"learning_rate": 5.470737913486006e-06,
"loss": 0.1131,
"step": 35000
},
{
"epoch": 90.0,
"eval_bleu": 0.2859,
"eval_gen_len": 19.0,
"eval_loss": 1.0678483247756958,
"eval_runtime": 35.1133,
"eval_samples_per_second": 22.356,
"eval_steps_per_second": 1.424,
"step": 35370
},
{
"epoch": 90.33,
"learning_rate": 4.834605597964377e-06,
"loss": 0.1124,
"step": 35500
},
{
"epoch": 91.0,
"eval_bleu": 0.3027,
"eval_gen_len": 19.0,
"eval_loss": 1.0763660669326782,
"eval_runtime": 35.192,
"eval_samples_per_second": 22.306,
"eval_steps_per_second": 1.421,
"step": 35763
},
{
"epoch": 91.6,
"learning_rate": 4.198473282442748e-06,
"loss": 0.1109,
"step": 36000
},
{
"epoch": 92.0,
"eval_bleu": 0.3037,
"eval_gen_len": 19.0,
"eval_loss": 1.0759409666061401,
"eval_runtime": 35.1267,
"eval_samples_per_second": 22.348,
"eval_steps_per_second": 1.423,
"step": 36156
},
{
"epoch": 92.88,
"learning_rate": 3.56234096692112e-06,
"loss": 0.1097,
"step": 36500
},
{
"epoch": 93.0,
"eval_bleu": 0.2962,
"eval_gen_len": 19.0,
"eval_loss": 1.073776364326477,
"eval_runtime": 35.2856,
"eval_samples_per_second": 22.247,
"eval_steps_per_second": 1.417,
"step": 36549
},
{
"epoch": 94.0,
"eval_bleu": 0.2966,
"eval_gen_len": 19.0,
"eval_loss": 1.0854607820510864,
"eval_runtime": 35.1676,
"eval_samples_per_second": 22.322,
"eval_steps_per_second": 1.422,
"step": 36942
},
{
"epoch": 94.15,
"learning_rate": 2.9262086513994914e-06,
"loss": 0.1093,
"step": 37000
},
{
"epoch": 95.0,
"eval_bleu": 0.2968,
"eval_gen_len": 19.0,
"eval_loss": 1.090211033821106,
"eval_runtime": 35.1673,
"eval_samples_per_second": 22.322,
"eval_steps_per_second": 1.422,
"step": 37335
},
{
"epoch": 95.42,
"learning_rate": 2.2900763358778625e-06,
"loss": 0.1082,
"step": 37500
},
{
"epoch": 96.0,
"eval_bleu": 0.2958,
"eval_gen_len": 19.0,
"eval_loss": 1.0858522653579712,
"eval_runtime": 35.1286,
"eval_samples_per_second": 22.346,
"eval_steps_per_second": 1.423,
"step": 37728
},
{
"epoch": 96.69,
"learning_rate": 1.653944020356234e-06,
"loss": 0.1073,
"step": 38000
},
{
"epoch": 97.0,
"eval_bleu": 0.3023,
"eval_gen_len": 19.0,
"eval_loss": 1.0867019891738892,
"eval_runtime": 35.1622,
"eval_samples_per_second": 22.325,
"eval_steps_per_second": 1.422,
"step": 38121
},
{
"epoch": 97.96,
"learning_rate": 1.0178117048346056e-06,
"loss": 0.1063,
"step": 38500
},
{
"epoch": 98.0,
"eval_bleu": 0.3004,
"eval_gen_len": 19.0,
"eval_loss": 1.090205430984497,
"eval_runtime": 35.3454,
"eval_samples_per_second": 22.209,
"eval_steps_per_second": 1.415,
"step": 38514
},
{
"epoch": 99.0,
"eval_bleu": 0.3018,
"eval_gen_len": 19.0,
"eval_loss": 1.0909733772277832,
"eval_runtime": 35.2503,
"eval_samples_per_second": 22.269,
"eval_steps_per_second": 1.418,
"step": 38907
},
{
"epoch": 99.24,
"learning_rate": 3.816793893129771e-07,
"loss": 0.1065,
"step": 39000
},
{
"epoch": 100.0,
"eval_bleu": 0.3021,
"eval_gen_len": 19.0,
"eval_loss": 1.0916588306427002,
"eval_runtime": 35.2577,
"eval_samples_per_second": 22.265,
"eval_steps_per_second": 1.418,
"step": 39300
},
{
"epoch": 100.0,
"step": 39300,
"total_flos": 2.232877359218688e+17,
"train_loss": 0.3279189860123108,
"train_runtime": 20714.0027,
"train_samples_per_second": 30.313,
"train_steps_per_second": 1.897
}
],
"max_steps": 39300,
"num_train_epochs": 100,
"total_flos": 2.232877359218688e+17,
"trial_name": null,
"trial_params": null
}