byt5-base-es_hch / trainer_state.json
mekjr1's picture
End of training
c2b59ec
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 39800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_bleu": 0.1048,
"eval_gen_len": 19.0,
"eval_loss": 1.1654638051986694,
"eval_runtime": 35.8,
"eval_samples_per_second": 22.207,
"eval_steps_per_second": 1.397,
"step": 398
},
{
"epoch": 1.26,
"learning_rate": 4.937185929648241e-05,
"loss": 1.5993,
"step": 500
},
{
"epoch": 2.0,
"eval_bleu": 0.0762,
"eval_gen_len": 19.0,
"eval_loss": 1.0294471979141235,
"eval_runtime": 35.6457,
"eval_samples_per_second": 22.303,
"eval_steps_per_second": 1.403,
"step": 796
},
{
"epoch": 2.51,
"learning_rate": 4.874371859296483e-05,
"loss": 1.1714,
"step": 1000
},
{
"epoch": 3.0,
"eval_bleu": 0.0863,
"eval_gen_len": 19.0,
"eval_loss": 0.9574553370475769,
"eval_runtime": 35.6866,
"eval_samples_per_second": 22.277,
"eval_steps_per_second": 1.401,
"step": 1194
},
{
"epoch": 3.77,
"learning_rate": 4.8115577889447235e-05,
"loss": 1.0539,
"step": 1500
},
{
"epoch": 4.0,
"eval_bleu": 0.0769,
"eval_gen_len": 19.0,
"eval_loss": 0.9043193459510803,
"eval_runtime": 35.4817,
"eval_samples_per_second": 22.406,
"eval_steps_per_second": 1.409,
"step": 1592
},
{
"epoch": 5.0,
"eval_bleu": 0.0792,
"eval_gen_len": 19.0,
"eval_loss": 0.8518753051757812,
"eval_runtime": 35.6246,
"eval_samples_per_second": 22.316,
"eval_steps_per_second": 1.404,
"step": 1990
},
{
"epoch": 5.03,
"learning_rate": 4.748743718592965e-05,
"loss": 0.9762,
"step": 2000
},
{
"epoch": 6.0,
"eval_bleu": 0.0563,
"eval_gen_len": 19.0,
"eval_loss": 0.8147059679031372,
"eval_runtime": 35.5873,
"eval_samples_per_second": 22.339,
"eval_steps_per_second": 1.405,
"step": 2388
},
{
"epoch": 6.28,
"learning_rate": 4.685929648241206e-05,
"loss": 0.9072,
"step": 2500
},
{
"epoch": 7.0,
"eval_bleu": 0.0856,
"eval_gen_len": 19.0,
"eval_loss": 0.7833035588264465,
"eval_runtime": 35.6208,
"eval_samples_per_second": 22.318,
"eval_steps_per_second": 1.404,
"step": 2786
},
{
"epoch": 7.54,
"learning_rate": 4.6231155778894475e-05,
"loss": 0.8502,
"step": 3000
},
{
"epoch": 8.0,
"eval_bleu": 0.091,
"eval_gen_len": 19.0,
"eval_loss": 0.7526289820671082,
"eval_runtime": 35.6502,
"eval_samples_per_second": 22.3,
"eval_steps_per_second": 1.403,
"step": 3184
},
{
"epoch": 8.79,
"learning_rate": 4.5603015075376884e-05,
"loss": 0.8081,
"step": 3500
},
{
"epoch": 9.0,
"eval_bleu": 0.1344,
"eval_gen_len": 19.0,
"eval_loss": 0.7389398813247681,
"eval_runtime": 35.7611,
"eval_samples_per_second": 22.231,
"eval_steps_per_second": 1.398,
"step": 3582
},
{
"epoch": 10.0,
"eval_bleu": 0.1271,
"eval_gen_len": 19.0,
"eval_loss": 0.718666672706604,
"eval_runtime": 35.7669,
"eval_samples_per_second": 22.227,
"eval_steps_per_second": 1.398,
"step": 3980
},
{
"epoch": 10.05,
"learning_rate": 4.49748743718593e-05,
"loss": 0.7683,
"step": 4000
},
{
"epoch": 11.0,
"eval_bleu": 0.1299,
"eval_gen_len": 19.0,
"eval_loss": 0.7038307189941406,
"eval_runtime": 35.6501,
"eval_samples_per_second": 22.3,
"eval_steps_per_second": 1.403,
"step": 4378
},
{
"epoch": 11.31,
"learning_rate": 4.434673366834171e-05,
"loss": 0.7318,
"step": 4500
},
{
"epoch": 12.0,
"eval_bleu": 0.1213,
"eval_gen_len": 19.0,
"eval_loss": 0.6900615692138672,
"eval_runtime": 35.6533,
"eval_samples_per_second": 22.298,
"eval_steps_per_second": 1.402,
"step": 4776
},
{
"epoch": 12.56,
"learning_rate": 4.3718592964824124e-05,
"loss": 0.6998,
"step": 5000
},
{
"epoch": 13.0,
"eval_bleu": 0.1583,
"eval_gen_len": 19.0,
"eval_loss": 0.6753336787223816,
"eval_runtime": 35.6402,
"eval_samples_per_second": 22.306,
"eval_steps_per_second": 1.403,
"step": 5174
},
{
"epoch": 13.82,
"learning_rate": 4.309045226130653e-05,
"loss": 0.6683,
"step": 5500
},
{
"epoch": 14.0,
"eval_bleu": 0.145,
"eval_gen_len": 19.0,
"eval_loss": 0.6631056070327759,
"eval_runtime": 35.7643,
"eval_samples_per_second": 22.229,
"eval_steps_per_second": 1.398,
"step": 5572
},
{
"epoch": 15.0,
"eval_bleu": 0.1516,
"eval_gen_len": 19.0,
"eval_loss": 0.6530159711837769,
"eval_runtime": 35.7246,
"eval_samples_per_second": 22.254,
"eval_steps_per_second": 1.4,
"step": 5970
},
{
"epoch": 15.08,
"learning_rate": 4.246231155778895e-05,
"loss": 0.6406,
"step": 6000
},
{
"epoch": 16.0,
"eval_bleu": 0.1599,
"eval_gen_len": 19.0,
"eval_loss": 0.6454012393951416,
"eval_runtime": 35.7142,
"eval_samples_per_second": 22.26,
"eval_steps_per_second": 1.4,
"step": 6368
},
{
"epoch": 16.33,
"learning_rate": 4.183417085427136e-05,
"loss": 0.6128,
"step": 6500
},
{
"epoch": 17.0,
"eval_bleu": 0.1478,
"eval_gen_len": 19.0,
"eval_loss": 0.6383265256881714,
"eval_runtime": 35.664,
"eval_samples_per_second": 22.291,
"eval_steps_per_second": 1.402,
"step": 6766
},
{
"epoch": 17.59,
"learning_rate": 4.120603015075377e-05,
"loss": 0.5911,
"step": 7000
},
{
"epoch": 18.0,
"eval_bleu": 0.1571,
"eval_gen_len": 19.0,
"eval_loss": 0.6369075179100037,
"eval_runtime": 35.6591,
"eval_samples_per_second": 22.294,
"eval_steps_per_second": 1.402,
"step": 7164
},
{
"epoch": 18.84,
"learning_rate": 4.057788944723618e-05,
"loss": 0.5721,
"step": 7500
},
{
"epoch": 19.0,
"eval_bleu": 0.1668,
"eval_gen_len": 19.0,
"eval_loss": 0.6339399814605713,
"eval_runtime": 35.6693,
"eval_samples_per_second": 22.288,
"eval_steps_per_second": 1.402,
"step": 7562
},
{
"epoch": 20.0,
"eval_bleu": 0.1611,
"eval_gen_len": 19.0,
"eval_loss": 0.6295469403266907,
"eval_runtime": 35.5912,
"eval_samples_per_second": 22.337,
"eval_steps_per_second": 1.405,
"step": 7960
},
{
"epoch": 20.1,
"learning_rate": 3.9949748743718597e-05,
"loss": 0.547,
"step": 8000
},
{
"epoch": 21.0,
"eval_bleu": 0.1722,
"eval_gen_len": 19.0,
"eval_loss": 0.6267198324203491,
"eval_runtime": 35.8366,
"eval_samples_per_second": 22.184,
"eval_steps_per_second": 1.395,
"step": 8358
},
{
"epoch": 21.36,
"learning_rate": 3.9321608040201005e-05,
"loss": 0.529,
"step": 8500
},
{
"epoch": 22.0,
"eval_bleu": 0.1656,
"eval_gen_len": 19.0,
"eval_loss": 0.6275119781494141,
"eval_runtime": 35.667,
"eval_samples_per_second": 22.289,
"eval_steps_per_second": 1.402,
"step": 8756
},
{
"epoch": 22.61,
"learning_rate": 3.869346733668342e-05,
"loss": 0.5115,
"step": 9000
},
{
"epoch": 23.0,
"eval_bleu": 0.1684,
"eval_gen_len": 19.0,
"eval_loss": 0.6284548044204712,
"eval_runtime": 35.6512,
"eval_samples_per_second": 22.299,
"eval_steps_per_second": 1.402,
"step": 9154
},
{
"epoch": 23.87,
"learning_rate": 3.806532663316583e-05,
"loss": 0.4934,
"step": 9500
},
{
"epoch": 24.0,
"eval_bleu": 0.1696,
"eval_gen_len": 19.0,
"eval_loss": 0.6268807053565979,
"eval_runtime": 35.7633,
"eval_samples_per_second": 22.229,
"eval_steps_per_second": 1.398,
"step": 9552
},
{
"epoch": 25.0,
"eval_bleu": 0.182,
"eval_gen_len": 19.0,
"eval_loss": 0.6358157992362976,
"eval_runtime": 36.0997,
"eval_samples_per_second": 22.022,
"eval_steps_per_second": 1.385,
"step": 9950
},
{
"epoch": 25.13,
"learning_rate": 3.7437185929648245e-05,
"loss": 0.4773,
"step": 10000
},
{
"epoch": 26.0,
"eval_bleu": 0.1699,
"eval_gen_len": 19.0,
"eval_loss": 0.6337732672691345,
"eval_runtime": 35.9321,
"eval_samples_per_second": 22.125,
"eval_steps_per_second": 1.392,
"step": 10348
},
{
"epoch": 26.38,
"learning_rate": 3.6809045226130654e-05,
"loss": 0.4591,
"step": 10500
},
{
"epoch": 27.0,
"eval_bleu": 0.1855,
"eval_gen_len": 19.0,
"eval_loss": 0.6358336806297302,
"eval_runtime": 35.7576,
"eval_samples_per_second": 22.233,
"eval_steps_per_second": 1.398,
"step": 10746
},
{
"epoch": 27.64,
"learning_rate": 3.618090452261307e-05,
"loss": 0.4449,
"step": 11000
},
{
"epoch": 28.0,
"eval_bleu": 0.1759,
"eval_gen_len": 19.0,
"eval_loss": 0.6440271735191345,
"eval_runtime": 35.6217,
"eval_samples_per_second": 22.318,
"eval_steps_per_second": 1.404,
"step": 11144
},
{
"epoch": 28.89,
"learning_rate": 3.555276381909548e-05,
"loss": 0.4285,
"step": 11500
},
{
"epoch": 29.0,
"eval_bleu": 0.1786,
"eval_gen_len": 19.0,
"eval_loss": 0.6438360810279846,
"eval_runtime": 35.5837,
"eval_samples_per_second": 22.342,
"eval_steps_per_second": 1.405,
"step": 11542
},
{
"epoch": 30.0,
"eval_bleu": 0.1874,
"eval_gen_len": 19.0,
"eval_loss": 0.647448718547821,
"eval_runtime": 35.5495,
"eval_samples_per_second": 22.363,
"eval_steps_per_second": 1.406,
"step": 11940
},
{
"epoch": 30.15,
"learning_rate": 3.4924623115577894e-05,
"loss": 0.4137,
"step": 12000
},
{
"epoch": 31.0,
"eval_bleu": 0.1968,
"eval_gen_len": 19.0,
"eval_loss": 0.651654839515686,
"eval_runtime": 35.6719,
"eval_samples_per_second": 22.286,
"eval_steps_per_second": 1.402,
"step": 12338
},
{
"epoch": 31.41,
"learning_rate": 3.42964824120603e-05,
"loss": 0.4012,
"step": 12500
},
{
"epoch": 32.0,
"eval_bleu": 0.1735,
"eval_gen_len": 19.0,
"eval_loss": 0.6562197804450989,
"eval_runtime": 35.627,
"eval_samples_per_second": 22.315,
"eval_steps_per_second": 1.403,
"step": 12736
},
{
"epoch": 32.66,
"learning_rate": 3.366834170854272e-05,
"loss": 0.3858,
"step": 13000
},
{
"epoch": 33.0,
"eval_bleu": 0.18,
"eval_gen_len": 19.0,
"eval_loss": 0.6581218838691711,
"eval_runtime": 35.6835,
"eval_samples_per_second": 22.279,
"eval_steps_per_second": 1.401,
"step": 13134
},
{
"epoch": 33.92,
"learning_rate": 3.3040201005025127e-05,
"loss": 0.3753,
"step": 13500
},
{
"epoch": 34.0,
"eval_bleu": 0.1837,
"eval_gen_len": 19.0,
"eval_loss": 0.6713840961456299,
"eval_runtime": 36.1204,
"eval_samples_per_second": 22.01,
"eval_steps_per_second": 1.384,
"step": 13532
},
{
"epoch": 35.0,
"eval_bleu": 0.177,
"eval_gen_len": 19.0,
"eval_loss": 0.6749709844589233,
"eval_runtime": 35.6201,
"eval_samples_per_second": 22.319,
"eval_steps_per_second": 1.404,
"step": 13930
},
{
"epoch": 35.18,
"learning_rate": 3.241206030150754e-05,
"loss": 0.3613,
"step": 14000
},
{
"epoch": 36.0,
"eval_bleu": 0.177,
"eval_gen_len": 19.0,
"eval_loss": 0.6772740483283997,
"eval_runtime": 35.673,
"eval_samples_per_second": 22.286,
"eval_steps_per_second": 1.402,
"step": 14328
},
{
"epoch": 36.43,
"learning_rate": 3.178391959798995e-05,
"loss": 0.3493,
"step": 14500
},
{
"epoch": 37.0,
"eval_bleu": 0.1859,
"eval_gen_len": 19.0,
"eval_loss": 0.6915194392204285,
"eval_runtime": 36.1411,
"eval_samples_per_second": 21.997,
"eval_steps_per_second": 1.383,
"step": 14726
},
{
"epoch": 37.69,
"learning_rate": 3.1155778894472366e-05,
"loss": 0.339,
"step": 15000
},
{
"epoch": 38.0,
"eval_bleu": 0.1756,
"eval_gen_len": 19.0,
"eval_loss": 0.7031569480895996,
"eval_runtime": 36.1897,
"eval_samples_per_second": 21.968,
"eval_steps_per_second": 1.382,
"step": 15124
},
{
"epoch": 38.94,
"learning_rate": 3.0527638190954775e-05,
"loss": 0.3263,
"step": 15500
},
{
"epoch": 39.0,
"eval_bleu": 0.1844,
"eval_gen_len": 19.0,
"eval_loss": 0.7003222703933716,
"eval_runtime": 36.2516,
"eval_samples_per_second": 21.93,
"eval_steps_per_second": 1.379,
"step": 15522
},
{
"epoch": 40.0,
"eval_bleu": 0.1795,
"eval_gen_len": 19.0,
"eval_loss": 0.7169303297996521,
"eval_runtime": 36.3049,
"eval_samples_per_second": 21.898,
"eval_steps_per_second": 1.377,
"step": 15920
},
{
"epoch": 40.2,
"learning_rate": 2.989949748743719e-05,
"loss": 0.3153,
"step": 16000
},
{
"epoch": 41.0,
"eval_bleu": 0.1903,
"eval_gen_len": 19.0,
"eval_loss": 0.7180814743041992,
"eval_runtime": 36.2151,
"eval_samples_per_second": 21.952,
"eval_steps_per_second": 1.381,
"step": 16318
},
{
"epoch": 41.46,
"learning_rate": 2.9271356783919603e-05,
"loss": 0.3047,
"step": 16500
},
{
"epoch": 42.0,
"eval_bleu": 0.1864,
"eval_gen_len": 19.0,
"eval_loss": 0.7283141613006592,
"eval_runtime": 36.2969,
"eval_samples_per_second": 21.903,
"eval_steps_per_second": 1.378,
"step": 16716
},
{
"epoch": 42.71,
"learning_rate": 2.8643216080402015e-05,
"loss": 0.2933,
"step": 17000
},
{
"epoch": 43.0,
"eval_bleu": 0.188,
"eval_gen_len": 19.0,
"eval_loss": 0.7462304830551147,
"eval_runtime": 35.9176,
"eval_samples_per_second": 22.134,
"eval_steps_per_second": 1.392,
"step": 17114
},
{
"epoch": 43.97,
"learning_rate": 2.8015075376884427e-05,
"loss": 0.2888,
"step": 17500
},
{
"epoch": 44.0,
"eval_bleu": 0.1841,
"eval_gen_len": 19.0,
"eval_loss": 0.7420201301574707,
"eval_runtime": 36.2045,
"eval_samples_per_second": 21.959,
"eval_steps_per_second": 1.381,
"step": 17512
},
{
"epoch": 45.0,
"eval_bleu": 0.1748,
"eval_gen_len": 19.0,
"eval_loss": 0.7574421167373657,
"eval_runtime": 36.2479,
"eval_samples_per_second": 21.932,
"eval_steps_per_second": 1.379,
"step": 17910
},
{
"epoch": 45.23,
"learning_rate": 2.738693467336684e-05,
"loss": 0.2762,
"step": 18000
},
{
"epoch": 46.0,
"eval_bleu": 0.1747,
"eval_gen_len": 19.0,
"eval_loss": 0.7617235779762268,
"eval_runtime": 36.159,
"eval_samples_per_second": 21.986,
"eval_steps_per_second": 1.383,
"step": 18308
},
{
"epoch": 46.48,
"learning_rate": 2.6758793969849248e-05,
"loss": 0.2671,
"step": 18500
},
{
"epoch": 47.0,
"eval_bleu": 0.1743,
"eval_gen_len": 19.0,
"eval_loss": 0.7677585482597351,
"eval_runtime": 36.1812,
"eval_samples_per_second": 21.973,
"eval_steps_per_second": 1.382,
"step": 18706
},
{
"epoch": 47.74,
"learning_rate": 2.613065326633166e-05,
"loss": 0.2585,
"step": 19000
},
{
"epoch": 48.0,
"eval_bleu": 0.1902,
"eval_gen_len": 19.0,
"eval_loss": 0.7697047591209412,
"eval_runtime": 36.1036,
"eval_samples_per_second": 22.02,
"eval_steps_per_second": 1.385,
"step": 19104
},
{
"epoch": 48.99,
"learning_rate": 2.5502512562814072e-05,
"loss": 0.252,
"step": 19500
},
{
"epoch": 49.0,
"eval_bleu": 0.208,
"eval_gen_len": 19.0,
"eval_loss": 0.7865097522735596,
"eval_runtime": 36.1641,
"eval_samples_per_second": 21.983,
"eval_steps_per_second": 1.383,
"step": 19502
},
{
"epoch": 50.0,
"eval_bleu": 0.1777,
"eval_gen_len": 19.0,
"eval_loss": 0.8058604001998901,
"eval_runtime": 36.0936,
"eval_samples_per_second": 22.026,
"eval_steps_per_second": 1.385,
"step": 19900
},
{
"epoch": 50.25,
"learning_rate": 2.4874371859296484e-05,
"loss": 0.2411,
"step": 20000
},
{
"epoch": 51.0,
"eval_bleu": 0.212,
"eval_gen_len": 19.0,
"eval_loss": 0.7906444072723389,
"eval_runtime": 36.7349,
"eval_samples_per_second": 21.642,
"eval_steps_per_second": 1.361,
"step": 20298
},
{
"epoch": 51.51,
"learning_rate": 2.4246231155778896e-05,
"loss": 0.2358,
"step": 20500
},
{
"epoch": 52.0,
"eval_bleu": 0.1778,
"eval_gen_len": 19.0,
"eval_loss": 0.8143441081047058,
"eval_runtime": 36.1702,
"eval_samples_per_second": 21.979,
"eval_steps_per_second": 1.382,
"step": 20696
},
{
"epoch": 52.76,
"learning_rate": 2.361809045226131e-05,
"loss": 0.2273,
"step": 21000
},
{
"epoch": 53.0,
"eval_bleu": 0.218,
"eval_gen_len": 19.0,
"eval_loss": 0.8184289932250977,
"eval_runtime": 36.341,
"eval_samples_per_second": 21.876,
"eval_steps_per_second": 1.376,
"step": 21094
},
{
"epoch": 54.0,
"eval_bleu": 0.2243,
"eval_gen_len": 19.0,
"eval_loss": 0.8261227607727051,
"eval_runtime": 36.1608,
"eval_samples_per_second": 21.985,
"eval_steps_per_second": 1.383,
"step": 21492
},
{
"epoch": 54.02,
"learning_rate": 2.298994974874372e-05,
"loss": 0.223,
"step": 21500
},
{
"epoch": 55.0,
"eval_bleu": 0.2196,
"eval_gen_len": 19.0,
"eval_loss": 0.8429352641105652,
"eval_runtime": 35.7014,
"eval_samples_per_second": 22.268,
"eval_steps_per_second": 1.401,
"step": 21890
},
{
"epoch": 55.28,
"learning_rate": 2.2361809045226133e-05,
"loss": 0.2131,
"step": 22000
},
{
"epoch": 56.0,
"eval_bleu": 0.2402,
"eval_gen_len": 19.0,
"eval_loss": 0.847459077835083,
"eval_runtime": 35.7383,
"eval_samples_per_second": 22.245,
"eval_steps_per_second": 1.399,
"step": 22288
},
{
"epoch": 56.53,
"learning_rate": 2.1733668341708545e-05,
"loss": 0.2083,
"step": 22500
},
{
"epoch": 57.0,
"eval_bleu": 0.2163,
"eval_gen_len": 19.0,
"eval_loss": 0.8617640733718872,
"eval_runtime": 35.56,
"eval_samples_per_second": 22.357,
"eval_steps_per_second": 1.406,
"step": 22686
},
{
"epoch": 57.79,
"learning_rate": 2.1105527638190957e-05,
"loss": 0.202,
"step": 23000
},
{
"epoch": 58.0,
"eval_bleu": 0.2164,
"eval_gen_len": 19.0,
"eval_loss": 0.8572230339050293,
"eval_runtime": 35.5968,
"eval_samples_per_second": 22.333,
"eval_steps_per_second": 1.405,
"step": 23084
},
{
"epoch": 59.0,
"eval_bleu": 0.217,
"eval_gen_len": 19.0,
"eval_loss": 0.873598575592041,
"eval_runtime": 35.5807,
"eval_samples_per_second": 22.344,
"eval_steps_per_second": 1.405,
"step": 23482
},
{
"epoch": 59.05,
"learning_rate": 2.047738693467337e-05,
"loss": 0.1968,
"step": 23500
},
{
"epoch": 60.0,
"eval_bleu": 0.2166,
"eval_gen_len": 19.0,
"eval_loss": 0.8894439339637756,
"eval_runtime": 35.6815,
"eval_samples_per_second": 22.28,
"eval_steps_per_second": 1.401,
"step": 23880
},
{
"epoch": 60.3,
"learning_rate": 1.984924623115578e-05,
"loss": 0.1904,
"step": 24000
},
{
"epoch": 61.0,
"eval_bleu": 0.2241,
"eval_gen_len": 19.0,
"eval_loss": 0.8927697539329529,
"eval_runtime": 35.5709,
"eval_samples_per_second": 22.35,
"eval_steps_per_second": 1.406,
"step": 24278
},
{
"epoch": 61.56,
"learning_rate": 1.9221105527638193e-05,
"loss": 0.1847,
"step": 24500
},
{
"epoch": 62.0,
"eval_bleu": 0.2219,
"eval_gen_len": 19.0,
"eval_loss": 0.9057827591896057,
"eval_runtime": 35.9775,
"eval_samples_per_second": 22.097,
"eval_steps_per_second": 1.39,
"step": 24676
},
{
"epoch": 62.81,
"learning_rate": 1.8592964824120602e-05,
"loss": 0.1803,
"step": 25000
},
{
"epoch": 63.0,
"eval_bleu": 0.2336,
"eval_gen_len": 19.0,
"eval_loss": 0.9056702852249146,
"eval_runtime": 35.7167,
"eval_samples_per_second": 22.259,
"eval_steps_per_second": 1.4,
"step": 25074
},
{
"epoch": 64.0,
"eval_bleu": 0.2156,
"eval_gen_len": 19.0,
"eval_loss": 0.9173711538314819,
"eval_runtime": 35.7315,
"eval_samples_per_second": 22.249,
"eval_steps_per_second": 1.399,
"step": 25472
},
{
"epoch": 64.07,
"learning_rate": 1.7964824120603014e-05,
"loss": 0.1758,
"step": 25500
},
{
"epoch": 65.0,
"eval_bleu": 0.1951,
"eval_gen_len": 19.0,
"eval_loss": 0.922991156578064,
"eval_runtime": 35.7331,
"eval_samples_per_second": 22.248,
"eval_steps_per_second": 1.399,
"step": 25870
},
{
"epoch": 65.33,
"learning_rate": 1.7336683417085427e-05,
"loss": 0.1701,
"step": 26000
},
{
"epoch": 66.0,
"eval_bleu": 0.2249,
"eval_gen_len": 19.0,
"eval_loss": 0.9349916577339172,
"eval_runtime": 35.6317,
"eval_samples_per_second": 22.312,
"eval_steps_per_second": 1.403,
"step": 26268
},
{
"epoch": 66.58,
"learning_rate": 1.670854271356784e-05,
"loss": 0.1673,
"step": 26500
},
{
"epoch": 67.0,
"eval_bleu": 0.2224,
"eval_gen_len": 19.0,
"eval_loss": 0.9416642189025879,
"eval_runtime": 35.7511,
"eval_samples_per_second": 22.237,
"eval_steps_per_second": 1.399,
"step": 26666
},
{
"epoch": 67.84,
"learning_rate": 1.608040201005025e-05,
"loss": 0.1614,
"step": 27000
},
{
"epoch": 68.0,
"eval_bleu": 0.2161,
"eval_gen_len": 19.0,
"eval_loss": 0.9508859515190125,
"eval_runtime": 35.6367,
"eval_samples_per_second": 22.308,
"eval_steps_per_second": 1.403,
"step": 27064
},
{
"epoch": 69.0,
"eval_bleu": 0.2183,
"eval_gen_len": 19.0,
"eval_loss": 0.9652993083000183,
"eval_runtime": 35.6263,
"eval_samples_per_second": 22.315,
"eval_steps_per_second": 1.403,
"step": 27462
},
{
"epoch": 69.1,
"learning_rate": 1.5452261306532663e-05,
"loss": 0.1578,
"step": 27500
},
{
"epoch": 70.0,
"eval_bleu": 0.2113,
"eval_gen_len": 19.0,
"eval_loss": 0.9633088111877441,
"eval_runtime": 35.7594,
"eval_samples_per_second": 22.232,
"eval_steps_per_second": 1.398,
"step": 27860
},
{
"epoch": 70.35,
"learning_rate": 1.4824120603015077e-05,
"loss": 0.1536,
"step": 28000
},
{
"epoch": 71.0,
"eval_bleu": 0.2177,
"eval_gen_len": 19.0,
"eval_loss": 0.9783052802085876,
"eval_runtime": 35.5411,
"eval_samples_per_second": 22.368,
"eval_steps_per_second": 1.407,
"step": 28258
},
{
"epoch": 71.61,
"learning_rate": 1.4195979899497489e-05,
"loss": 0.1513,
"step": 28500
},
{
"epoch": 72.0,
"eval_bleu": 0.2179,
"eval_gen_len": 19.0,
"eval_loss": 0.9754663109779358,
"eval_runtime": 35.6862,
"eval_samples_per_second": 22.278,
"eval_steps_per_second": 1.401,
"step": 28656
},
{
"epoch": 72.86,
"learning_rate": 1.3567839195979901e-05,
"loss": 0.147,
"step": 29000
},
{
"epoch": 73.0,
"eval_bleu": 0.2273,
"eval_gen_len": 19.0,
"eval_loss": 0.9910703897476196,
"eval_runtime": 35.9105,
"eval_samples_per_second": 22.138,
"eval_steps_per_second": 1.392,
"step": 29054
},
{
"epoch": 74.0,
"eval_bleu": 0.2157,
"eval_gen_len": 19.0,
"eval_loss": 0.9854773283004761,
"eval_runtime": 35.6352,
"eval_samples_per_second": 22.309,
"eval_steps_per_second": 1.403,
"step": 29452
},
{
"epoch": 74.12,
"learning_rate": 1.2939698492462313e-05,
"loss": 0.1443,
"step": 29500
},
{
"epoch": 75.0,
"eval_bleu": 0.2169,
"eval_gen_len": 19.0,
"eval_loss": 0.9998270273208618,
"eval_runtime": 35.64,
"eval_samples_per_second": 22.306,
"eval_steps_per_second": 1.403,
"step": 29850
},
{
"epoch": 75.38,
"learning_rate": 1.2311557788944725e-05,
"loss": 0.1401,
"step": 30000
},
{
"epoch": 76.0,
"eval_bleu": 0.2124,
"eval_gen_len": 19.0,
"eval_loss": 1.0127789974212646,
"eval_runtime": 35.7323,
"eval_samples_per_second": 22.249,
"eval_steps_per_second": 1.399,
"step": 30248
},
{
"epoch": 76.63,
"learning_rate": 1.1683417085427137e-05,
"loss": 0.1377,
"step": 30500
},
{
"epoch": 77.0,
"eval_bleu": 0.2159,
"eval_gen_len": 19.0,
"eval_loss": 1.0114222764968872,
"eval_runtime": 35.7601,
"eval_samples_per_second": 22.231,
"eval_steps_per_second": 1.398,
"step": 30646
},
{
"epoch": 77.89,
"learning_rate": 1.105527638190955e-05,
"loss": 0.1342,
"step": 31000
},
{
"epoch": 78.0,
"eval_bleu": 0.2152,
"eval_gen_len": 19.0,
"eval_loss": 1.0248533487319946,
"eval_runtime": 35.6808,
"eval_samples_per_second": 22.281,
"eval_steps_per_second": 1.401,
"step": 31044
},
{
"epoch": 79.0,
"eval_bleu": 0.2233,
"eval_gen_len": 19.0,
"eval_loss": 1.0258084535598755,
"eval_runtime": 35.6984,
"eval_samples_per_second": 22.27,
"eval_steps_per_second": 1.401,
"step": 31442
},
{
"epoch": 79.15,
"learning_rate": 1.042713567839196e-05,
"loss": 0.1336,
"step": 31500
},
{
"epoch": 80.0,
"eval_bleu": 0.2194,
"eval_gen_len": 19.0,
"eval_loss": 1.0308655500411987,
"eval_runtime": 35.6369,
"eval_samples_per_second": 22.308,
"eval_steps_per_second": 1.403,
"step": 31840
},
{
"epoch": 80.4,
"learning_rate": 9.798994974874372e-06,
"loss": 0.1307,
"step": 32000
},
{
"epoch": 81.0,
"eval_bleu": 0.2122,
"eval_gen_len": 19.0,
"eval_loss": 1.032060146331787,
"eval_runtime": 35.5991,
"eval_samples_per_second": 22.332,
"eval_steps_per_second": 1.405,
"step": 32238
},
{
"epoch": 81.66,
"learning_rate": 9.170854271356784e-06,
"loss": 0.1277,
"step": 32500
},
{
"epoch": 82.0,
"eval_bleu": 0.2191,
"eval_gen_len": 19.0,
"eval_loss": 1.034020185470581,
"eval_runtime": 35.662,
"eval_samples_per_second": 22.293,
"eval_steps_per_second": 1.402,
"step": 32636
},
{
"epoch": 82.91,
"learning_rate": 8.542713567839196e-06,
"loss": 0.1262,
"step": 33000
},
{
"epoch": 83.0,
"eval_bleu": 0.2123,
"eval_gen_len": 19.0,
"eval_loss": 1.0493375062942505,
"eval_runtime": 35.6012,
"eval_samples_per_second": 22.331,
"eval_steps_per_second": 1.404,
"step": 33034
},
{
"epoch": 84.0,
"eval_bleu": 0.2273,
"eval_gen_len": 19.0,
"eval_loss": 1.0544501543045044,
"eval_runtime": 35.7115,
"eval_samples_per_second": 22.262,
"eval_steps_per_second": 1.4,
"step": 33432
},
{
"epoch": 84.17,
"learning_rate": 7.914572864321608e-06,
"loss": 0.1233,
"step": 33500
},
{
"epoch": 85.0,
"eval_bleu": 0.2184,
"eval_gen_len": 19.0,
"eval_loss": 1.0550196170806885,
"eval_runtime": 35.6365,
"eval_samples_per_second": 22.309,
"eval_steps_per_second": 1.403,
"step": 33830
},
{
"epoch": 85.43,
"learning_rate": 7.28643216080402e-06,
"loss": 0.1233,
"step": 34000
},
{
"epoch": 86.0,
"eval_bleu": 0.2241,
"eval_gen_len": 19.0,
"eval_loss": 1.0545953512191772,
"eval_runtime": 35.7457,
"eval_samples_per_second": 22.24,
"eval_steps_per_second": 1.399,
"step": 34228
},
{
"epoch": 86.68,
"learning_rate": 6.658291457286432e-06,
"loss": 0.1205,
"step": 34500
},
{
"epoch": 87.0,
"eval_bleu": 0.2246,
"eval_gen_len": 19.0,
"eval_loss": 1.0695993900299072,
"eval_runtime": 35.6623,
"eval_samples_per_second": 22.292,
"eval_steps_per_second": 1.402,
"step": 34626
},
{
"epoch": 87.94,
"learning_rate": 6.030150753768844e-06,
"loss": 0.1189,
"step": 35000
},
{
"epoch": 88.0,
"eval_bleu": 0.2237,
"eval_gen_len": 19.0,
"eval_loss": 1.0730416774749756,
"eval_runtime": 35.8819,
"eval_samples_per_second": 22.156,
"eval_steps_per_second": 1.393,
"step": 35024
},
{
"epoch": 89.0,
"eval_bleu": 0.2308,
"eval_gen_len": 19.0,
"eval_loss": 1.068780541419983,
"eval_runtime": 35.7114,
"eval_samples_per_second": 22.262,
"eval_steps_per_second": 1.4,
"step": 35422
},
{
"epoch": 89.2,
"learning_rate": 5.402010050251256e-06,
"loss": 0.1173,
"step": 35500
},
{
"epoch": 90.0,
"eval_bleu": 0.2267,
"eval_gen_len": 19.0,
"eval_loss": 1.0783226490020752,
"eval_runtime": 35.7523,
"eval_samples_per_second": 22.236,
"eval_steps_per_second": 1.399,
"step": 35820
},
{
"epoch": 90.45,
"learning_rate": 4.773869346733668e-06,
"loss": 0.1154,
"step": 36000
},
{
"epoch": 91.0,
"eval_bleu": 0.2262,
"eval_gen_len": 19.0,
"eval_loss": 1.0766741037368774,
"eval_runtime": 35.7535,
"eval_samples_per_second": 22.236,
"eval_steps_per_second": 1.398,
"step": 36218
},
{
"epoch": 91.71,
"learning_rate": 4.1457286432160804e-06,
"loss": 0.115,
"step": 36500
},
{
"epoch": 92.0,
"eval_bleu": 0.2214,
"eval_gen_len": 19.0,
"eval_loss": 1.0834720134735107,
"eval_runtime": 35.7567,
"eval_samples_per_second": 22.234,
"eval_steps_per_second": 1.398,
"step": 36616
},
{
"epoch": 92.96,
"learning_rate": 3.5175879396984926e-06,
"loss": 0.1136,
"step": 37000
},
{
"epoch": 93.0,
"eval_bleu": 0.2284,
"eval_gen_len": 19.0,
"eval_loss": 1.0788373947143555,
"eval_runtime": 35.8397,
"eval_samples_per_second": 22.182,
"eval_steps_per_second": 1.395,
"step": 37014
},
{
"epoch": 94.0,
"eval_bleu": 0.2269,
"eval_gen_len": 19.0,
"eval_loss": 1.0876238346099854,
"eval_runtime": 35.7552,
"eval_samples_per_second": 22.235,
"eval_steps_per_second": 1.398,
"step": 37412
},
{
"epoch": 94.22,
"learning_rate": 2.8894472361809047e-06,
"loss": 0.1126,
"step": 37500
},
{
"epoch": 95.0,
"eval_bleu": 0.2212,
"eval_gen_len": 19.0,
"eval_loss": 1.0935641527175903,
"eval_runtime": 35.7207,
"eval_samples_per_second": 22.256,
"eval_steps_per_second": 1.4,
"step": 37810
},
{
"epoch": 95.48,
"learning_rate": 2.261306532663317e-06,
"loss": 0.1118,
"step": 38000
},
{
"epoch": 96.0,
"eval_bleu": 0.2207,
"eval_gen_len": 19.0,
"eval_loss": 1.0917831659317017,
"eval_runtime": 35.5884,
"eval_samples_per_second": 22.339,
"eval_steps_per_second": 1.405,
"step": 38208
},
{
"epoch": 96.73,
"learning_rate": 1.6331658291457288e-06,
"loss": 0.111,
"step": 38500
},
{
"epoch": 97.0,
"eval_bleu": 0.2217,
"eval_gen_len": 19.0,
"eval_loss": 1.0943822860717773,
"eval_runtime": 35.7423,
"eval_samples_per_second": 22.243,
"eval_steps_per_second": 1.399,
"step": 38606
},
{
"epoch": 97.99,
"learning_rate": 1.0050251256281407e-06,
"loss": 0.1106,
"step": 39000
},
{
"epoch": 98.0,
"eval_bleu": 0.2203,
"eval_gen_len": 19.0,
"eval_loss": 1.0962368249893188,
"eval_runtime": 35.6215,
"eval_samples_per_second": 22.318,
"eval_steps_per_second": 1.404,
"step": 39004
},
{
"epoch": 99.0,
"eval_bleu": 0.2182,
"eval_gen_len": 19.0,
"eval_loss": 1.0994266271591187,
"eval_runtime": 35.6909,
"eval_samples_per_second": 22.275,
"eval_steps_per_second": 1.401,
"step": 39402
},
{
"epoch": 99.25,
"learning_rate": 3.7688442211055275e-07,
"loss": 0.1088,
"step": 39500
},
{
"epoch": 100.0,
"eval_bleu": 0.2193,
"eval_gen_len": 19.0,
"eval_loss": 1.099919319152832,
"eval_runtime": 35.8327,
"eval_samples_per_second": 22.186,
"eval_steps_per_second": 1.395,
"step": 39800
},
{
"epoch": 100.0,
"step": 39800,
"total_flos": 2.262748628238336e+17,
"train_loss": 0.35148891755683936,
"train_runtime": 21066.5572,
"train_samples_per_second": 30.204,
"train_steps_per_second": 1.889
}
],
"max_steps": 39800,
"num_train_epochs": 100,
"total_flos": 2.262748628238336e+17,
"trial_name": null,
"trial_params": null
}