{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 39800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.1048, "eval_gen_len": 19.0, "eval_loss": 1.1654638051986694, "eval_runtime": 35.8, "eval_samples_per_second": 22.207, "eval_steps_per_second": 1.397, "step": 398 }, { "epoch": 1.26, "learning_rate": 4.937185929648241e-05, "loss": 1.5993, "step": 500 }, { "epoch": 2.0, "eval_bleu": 0.0762, "eval_gen_len": 19.0, "eval_loss": 1.0294471979141235, "eval_runtime": 35.6457, "eval_samples_per_second": 22.303, "eval_steps_per_second": 1.403, "step": 796 }, { "epoch": 2.51, "learning_rate": 4.874371859296483e-05, "loss": 1.1714, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 0.0863, "eval_gen_len": 19.0, "eval_loss": 0.9574553370475769, "eval_runtime": 35.6866, "eval_samples_per_second": 22.277, "eval_steps_per_second": 1.401, "step": 1194 }, { "epoch": 3.77, "learning_rate": 4.8115577889447235e-05, "loss": 1.0539, "step": 1500 }, { "epoch": 4.0, "eval_bleu": 0.0769, "eval_gen_len": 19.0, "eval_loss": 0.9043193459510803, "eval_runtime": 35.4817, "eval_samples_per_second": 22.406, "eval_steps_per_second": 1.409, "step": 1592 }, { "epoch": 5.0, "eval_bleu": 0.0792, "eval_gen_len": 19.0, "eval_loss": 0.8518753051757812, "eval_runtime": 35.6246, "eval_samples_per_second": 22.316, "eval_steps_per_second": 1.404, "step": 1990 }, { "epoch": 5.03, "learning_rate": 4.748743718592965e-05, "loss": 0.9762, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 0.0563, "eval_gen_len": 19.0, "eval_loss": 0.8147059679031372, "eval_runtime": 35.5873, "eval_samples_per_second": 22.339, "eval_steps_per_second": 1.405, "step": 2388 }, { "epoch": 6.28, "learning_rate": 4.685929648241206e-05, "loss": 0.9072, "step": 2500 }, { "epoch": 7.0, "eval_bleu": 0.0856, "eval_gen_len": 19.0, "eval_loss": 0.7833035588264465, "eval_runtime": 35.6208, "eval_samples_per_second": 22.318, "eval_steps_per_second": 1.404, "step": 2786 }, { "epoch": 7.54, "learning_rate": 4.6231155778894475e-05, "loss": 0.8502, "step": 3000 }, { "epoch": 8.0, "eval_bleu": 0.091, "eval_gen_len": 19.0, "eval_loss": 0.7526289820671082, "eval_runtime": 35.6502, "eval_samples_per_second": 22.3, "eval_steps_per_second": 1.403, "step": 3184 }, { "epoch": 8.79, "learning_rate": 4.5603015075376884e-05, "loss": 0.8081, "step": 3500 }, { "epoch": 9.0, "eval_bleu": 0.1344, "eval_gen_len": 19.0, "eval_loss": 0.7389398813247681, "eval_runtime": 35.7611, "eval_samples_per_second": 22.231, "eval_steps_per_second": 1.398, "step": 3582 }, { "epoch": 10.0, "eval_bleu": 0.1271, "eval_gen_len": 19.0, "eval_loss": 0.718666672706604, "eval_runtime": 35.7669, "eval_samples_per_second": 22.227, "eval_steps_per_second": 1.398, "step": 3980 }, { "epoch": 10.05, "learning_rate": 4.49748743718593e-05, "loss": 0.7683, "step": 4000 }, { "epoch": 11.0, "eval_bleu": 0.1299, "eval_gen_len": 19.0, "eval_loss": 0.7038307189941406, "eval_runtime": 35.6501, "eval_samples_per_second": 22.3, "eval_steps_per_second": 1.403, "step": 4378 }, { "epoch": 11.31, "learning_rate": 4.434673366834171e-05, "loss": 0.7318, "step": 4500 }, { "epoch": 12.0, "eval_bleu": 0.1213, "eval_gen_len": 19.0, "eval_loss": 0.6900615692138672, "eval_runtime": 35.6533, "eval_samples_per_second": 22.298, "eval_steps_per_second": 1.402, "step": 4776 }, { "epoch": 12.56, "learning_rate": 4.3718592964824124e-05, "loss": 0.6998, "step": 5000 }, { "epoch": 13.0, "eval_bleu": 0.1583, "eval_gen_len": 19.0, "eval_loss": 0.6753336787223816, "eval_runtime": 35.6402, "eval_samples_per_second": 22.306, "eval_steps_per_second": 1.403, "step": 5174 }, { "epoch": 13.82, "learning_rate": 4.309045226130653e-05, "loss": 0.6683, "step": 5500 }, { "epoch": 14.0, "eval_bleu": 0.145, "eval_gen_len": 19.0, "eval_loss": 0.6631056070327759, "eval_runtime": 35.7643, "eval_samples_per_second": 22.229, "eval_steps_per_second": 1.398, "step": 5572 }, { "epoch": 15.0, "eval_bleu": 0.1516, "eval_gen_len": 19.0, "eval_loss": 0.6530159711837769, "eval_runtime": 35.7246, "eval_samples_per_second": 22.254, "eval_steps_per_second": 1.4, "step": 5970 }, { "epoch": 15.08, "learning_rate": 4.246231155778895e-05, "loss": 0.6406, "step": 6000 }, { "epoch": 16.0, "eval_bleu": 0.1599, "eval_gen_len": 19.0, "eval_loss": 0.6454012393951416, "eval_runtime": 35.7142, "eval_samples_per_second": 22.26, "eval_steps_per_second": 1.4, "step": 6368 }, { "epoch": 16.33, "learning_rate": 4.183417085427136e-05, "loss": 0.6128, "step": 6500 }, { "epoch": 17.0, "eval_bleu": 0.1478, "eval_gen_len": 19.0, "eval_loss": 0.6383265256881714, "eval_runtime": 35.664, "eval_samples_per_second": 22.291, "eval_steps_per_second": 1.402, "step": 6766 }, { "epoch": 17.59, "learning_rate": 4.120603015075377e-05, "loss": 0.5911, "step": 7000 }, { "epoch": 18.0, "eval_bleu": 0.1571, "eval_gen_len": 19.0, "eval_loss": 0.6369075179100037, "eval_runtime": 35.6591, "eval_samples_per_second": 22.294, "eval_steps_per_second": 1.402, "step": 7164 }, { "epoch": 18.84, "learning_rate": 4.057788944723618e-05, "loss": 0.5721, "step": 7500 }, { "epoch": 19.0, "eval_bleu": 0.1668, "eval_gen_len": 19.0, "eval_loss": 0.6339399814605713, "eval_runtime": 35.6693, "eval_samples_per_second": 22.288, "eval_steps_per_second": 1.402, "step": 7562 }, { "epoch": 20.0, "eval_bleu": 0.1611, "eval_gen_len": 19.0, "eval_loss": 0.6295469403266907, "eval_runtime": 35.5912, "eval_samples_per_second": 22.337, "eval_steps_per_second": 1.405, "step": 7960 }, { "epoch": 20.1, "learning_rate": 3.9949748743718597e-05, "loss": 0.547, "step": 8000 }, { "epoch": 21.0, "eval_bleu": 0.1722, "eval_gen_len": 19.0, "eval_loss": 0.6267198324203491, "eval_runtime": 35.8366, "eval_samples_per_second": 22.184, "eval_steps_per_second": 1.395, "step": 8358 }, { "epoch": 21.36, "learning_rate": 3.9321608040201005e-05, "loss": 0.529, "step": 8500 }, { "epoch": 22.0, "eval_bleu": 0.1656, "eval_gen_len": 19.0, "eval_loss": 0.6275119781494141, "eval_runtime": 35.667, "eval_samples_per_second": 22.289, "eval_steps_per_second": 1.402, "step": 8756 }, { "epoch": 22.61, "learning_rate": 3.869346733668342e-05, "loss": 0.5115, "step": 9000 }, { "epoch": 23.0, "eval_bleu": 0.1684, "eval_gen_len": 19.0, "eval_loss": 0.6284548044204712, "eval_runtime": 35.6512, "eval_samples_per_second": 22.299, "eval_steps_per_second": 1.402, "step": 9154 }, { "epoch": 23.87, "learning_rate": 3.806532663316583e-05, "loss": 0.4934, "step": 9500 }, { "epoch": 24.0, "eval_bleu": 0.1696, "eval_gen_len": 19.0, "eval_loss": 0.6268807053565979, "eval_runtime": 35.7633, "eval_samples_per_second": 22.229, "eval_steps_per_second": 1.398, "step": 9552 }, { "epoch": 25.0, "eval_bleu": 0.182, "eval_gen_len": 19.0, "eval_loss": 0.6358157992362976, "eval_runtime": 36.0997, "eval_samples_per_second": 22.022, "eval_steps_per_second": 1.385, "step": 9950 }, { "epoch": 25.13, "learning_rate": 3.7437185929648245e-05, "loss": 0.4773, "step": 10000 }, { "epoch": 26.0, "eval_bleu": 0.1699, "eval_gen_len": 19.0, "eval_loss": 0.6337732672691345, "eval_runtime": 35.9321, "eval_samples_per_second": 22.125, "eval_steps_per_second": 1.392, "step": 10348 }, { "epoch": 26.38, "learning_rate": 3.6809045226130654e-05, "loss": 0.4591, "step": 10500 }, { "epoch": 27.0, "eval_bleu": 0.1855, "eval_gen_len": 19.0, "eval_loss": 0.6358336806297302, "eval_runtime": 35.7576, "eval_samples_per_second": 22.233, "eval_steps_per_second": 1.398, "step": 10746 }, { "epoch": 27.64, "learning_rate": 3.618090452261307e-05, "loss": 0.4449, "step": 11000 }, { "epoch": 28.0, "eval_bleu": 0.1759, "eval_gen_len": 19.0, "eval_loss": 0.6440271735191345, "eval_runtime": 35.6217, "eval_samples_per_second": 22.318, "eval_steps_per_second": 1.404, "step": 11144 }, { "epoch": 28.89, "learning_rate": 3.555276381909548e-05, "loss": 0.4285, "step": 11500 }, { "epoch": 29.0, "eval_bleu": 0.1786, "eval_gen_len": 19.0, "eval_loss": 0.6438360810279846, "eval_runtime": 35.5837, "eval_samples_per_second": 22.342, "eval_steps_per_second": 1.405, "step": 11542 }, { "epoch": 30.0, "eval_bleu": 0.1874, "eval_gen_len": 19.0, "eval_loss": 0.647448718547821, "eval_runtime": 35.5495, "eval_samples_per_second": 22.363, "eval_steps_per_second": 1.406, "step": 11940 }, { "epoch": 30.15, "learning_rate": 3.4924623115577894e-05, "loss": 0.4137, "step": 12000 }, { "epoch": 31.0, "eval_bleu": 0.1968, "eval_gen_len": 19.0, "eval_loss": 0.651654839515686, "eval_runtime": 35.6719, "eval_samples_per_second": 22.286, "eval_steps_per_second": 1.402, "step": 12338 }, { "epoch": 31.41, "learning_rate": 3.42964824120603e-05, "loss": 0.4012, "step": 12500 }, { "epoch": 32.0, "eval_bleu": 0.1735, "eval_gen_len": 19.0, "eval_loss": 0.6562197804450989, "eval_runtime": 35.627, "eval_samples_per_second": 22.315, "eval_steps_per_second": 1.403, "step": 12736 }, { "epoch": 32.66, "learning_rate": 3.366834170854272e-05, "loss": 0.3858, "step": 13000 }, { "epoch": 33.0, "eval_bleu": 0.18, "eval_gen_len": 19.0, "eval_loss": 0.6581218838691711, "eval_runtime": 35.6835, "eval_samples_per_second": 22.279, "eval_steps_per_second": 1.401, "step": 13134 }, { "epoch": 33.92, "learning_rate": 3.3040201005025127e-05, "loss": 0.3753, "step": 13500 }, { "epoch": 34.0, "eval_bleu": 0.1837, "eval_gen_len": 19.0, "eval_loss": 0.6713840961456299, "eval_runtime": 36.1204, "eval_samples_per_second": 22.01, "eval_steps_per_second": 1.384, "step": 13532 }, { "epoch": 35.0, "eval_bleu": 0.177, "eval_gen_len": 19.0, "eval_loss": 0.6749709844589233, "eval_runtime": 35.6201, "eval_samples_per_second": 22.319, "eval_steps_per_second": 1.404, "step": 13930 }, { "epoch": 35.18, "learning_rate": 3.241206030150754e-05, "loss": 0.3613, "step": 14000 }, { "epoch": 36.0, "eval_bleu": 0.177, "eval_gen_len": 19.0, "eval_loss": 0.6772740483283997, "eval_runtime": 35.673, "eval_samples_per_second": 22.286, "eval_steps_per_second": 1.402, "step": 14328 }, { "epoch": 36.43, "learning_rate": 3.178391959798995e-05, "loss": 0.3493, "step": 14500 }, { "epoch": 37.0, "eval_bleu": 0.1859, "eval_gen_len": 19.0, "eval_loss": 0.6915194392204285, "eval_runtime": 36.1411, "eval_samples_per_second": 21.997, "eval_steps_per_second": 1.383, "step": 14726 }, { "epoch": 37.69, "learning_rate": 3.1155778894472366e-05, "loss": 0.339, "step": 15000 }, { "epoch": 38.0, "eval_bleu": 0.1756, "eval_gen_len": 19.0, "eval_loss": 0.7031569480895996, "eval_runtime": 36.1897, "eval_samples_per_second": 21.968, "eval_steps_per_second": 1.382, "step": 15124 }, { "epoch": 38.94, "learning_rate": 3.0527638190954775e-05, "loss": 0.3263, "step": 15500 }, { "epoch": 39.0, "eval_bleu": 0.1844, "eval_gen_len": 19.0, "eval_loss": 0.7003222703933716, "eval_runtime": 36.2516, "eval_samples_per_second": 21.93, "eval_steps_per_second": 1.379, "step": 15522 }, { "epoch": 40.0, "eval_bleu": 0.1795, "eval_gen_len": 19.0, "eval_loss": 0.7169303297996521, "eval_runtime": 36.3049, "eval_samples_per_second": 21.898, "eval_steps_per_second": 1.377, "step": 15920 }, { "epoch": 40.2, "learning_rate": 2.989949748743719e-05, "loss": 0.3153, "step": 16000 }, { "epoch": 41.0, "eval_bleu": 0.1903, "eval_gen_len": 19.0, "eval_loss": 0.7180814743041992, "eval_runtime": 36.2151, "eval_samples_per_second": 21.952, "eval_steps_per_second": 1.381, "step": 16318 }, { "epoch": 41.46, "learning_rate": 2.9271356783919603e-05, "loss": 0.3047, "step": 16500 }, { "epoch": 42.0, "eval_bleu": 0.1864, "eval_gen_len": 19.0, "eval_loss": 0.7283141613006592, "eval_runtime": 36.2969, "eval_samples_per_second": 21.903, "eval_steps_per_second": 1.378, "step": 16716 }, { "epoch": 42.71, "learning_rate": 2.8643216080402015e-05, "loss": 0.2933, "step": 17000 }, { "epoch": 43.0, "eval_bleu": 0.188, "eval_gen_len": 19.0, "eval_loss": 0.7462304830551147, "eval_runtime": 35.9176, "eval_samples_per_second": 22.134, "eval_steps_per_second": 1.392, "step": 17114 }, { "epoch": 43.97, "learning_rate": 2.8015075376884427e-05, "loss": 0.2888, "step": 17500 }, { "epoch": 44.0, "eval_bleu": 0.1841, "eval_gen_len": 19.0, "eval_loss": 0.7420201301574707, "eval_runtime": 36.2045, "eval_samples_per_second": 21.959, "eval_steps_per_second": 1.381, "step": 17512 }, { "epoch": 45.0, "eval_bleu": 0.1748, "eval_gen_len": 19.0, "eval_loss": 0.7574421167373657, "eval_runtime": 36.2479, "eval_samples_per_second": 21.932, "eval_steps_per_second": 1.379, "step": 17910 }, { "epoch": 45.23, "learning_rate": 2.738693467336684e-05, "loss": 0.2762, "step": 18000 }, { "epoch": 46.0, "eval_bleu": 0.1747, "eval_gen_len": 19.0, "eval_loss": 0.7617235779762268, "eval_runtime": 36.159, "eval_samples_per_second": 21.986, "eval_steps_per_second": 1.383, "step": 18308 }, { "epoch": 46.48, "learning_rate": 2.6758793969849248e-05, "loss": 0.2671, "step": 18500 }, { "epoch": 47.0, "eval_bleu": 0.1743, "eval_gen_len": 19.0, "eval_loss": 0.7677585482597351, "eval_runtime": 36.1812, "eval_samples_per_second": 21.973, "eval_steps_per_second": 1.382, "step": 18706 }, { "epoch": 47.74, "learning_rate": 2.613065326633166e-05, "loss": 0.2585, "step": 19000 }, { "epoch": 48.0, "eval_bleu": 0.1902, "eval_gen_len": 19.0, "eval_loss": 0.7697047591209412, "eval_runtime": 36.1036, "eval_samples_per_second": 22.02, "eval_steps_per_second": 1.385, "step": 19104 }, { "epoch": 48.99, "learning_rate": 2.5502512562814072e-05, "loss": 0.252, "step": 19500 }, { "epoch": 49.0, "eval_bleu": 0.208, "eval_gen_len": 19.0, "eval_loss": 0.7865097522735596, "eval_runtime": 36.1641, "eval_samples_per_second": 21.983, "eval_steps_per_second": 1.383, "step": 19502 }, { "epoch": 50.0, "eval_bleu": 0.1777, "eval_gen_len": 19.0, "eval_loss": 0.8058604001998901, "eval_runtime": 36.0936, "eval_samples_per_second": 22.026, "eval_steps_per_second": 1.385, "step": 19900 }, { "epoch": 50.25, "learning_rate": 2.4874371859296484e-05, "loss": 0.2411, "step": 20000 }, { "epoch": 51.0, "eval_bleu": 0.212, "eval_gen_len": 19.0, "eval_loss": 0.7906444072723389, "eval_runtime": 36.7349, "eval_samples_per_second": 21.642, "eval_steps_per_second": 1.361, "step": 20298 }, { "epoch": 51.51, "learning_rate": 2.4246231155778896e-05, "loss": 0.2358, "step": 20500 }, { "epoch": 52.0, "eval_bleu": 0.1778, "eval_gen_len": 19.0, "eval_loss": 0.8143441081047058, "eval_runtime": 36.1702, "eval_samples_per_second": 21.979, "eval_steps_per_second": 1.382, "step": 20696 }, { "epoch": 52.76, "learning_rate": 2.361809045226131e-05, "loss": 0.2273, "step": 21000 }, { "epoch": 53.0, "eval_bleu": 0.218, "eval_gen_len": 19.0, "eval_loss": 0.8184289932250977, "eval_runtime": 36.341, "eval_samples_per_second": 21.876, "eval_steps_per_second": 1.376, "step": 21094 }, { "epoch": 54.0, "eval_bleu": 0.2243, "eval_gen_len": 19.0, "eval_loss": 0.8261227607727051, "eval_runtime": 36.1608, "eval_samples_per_second": 21.985, "eval_steps_per_second": 1.383, "step": 21492 }, { "epoch": 54.02, "learning_rate": 2.298994974874372e-05, "loss": 0.223, "step": 21500 }, { "epoch": 55.0, "eval_bleu": 0.2196, "eval_gen_len": 19.0, "eval_loss": 0.8429352641105652, "eval_runtime": 35.7014, "eval_samples_per_second": 22.268, "eval_steps_per_second": 1.401, "step": 21890 }, { "epoch": 55.28, "learning_rate": 2.2361809045226133e-05, "loss": 0.2131, "step": 22000 }, { "epoch": 56.0, "eval_bleu": 0.2402, "eval_gen_len": 19.0, "eval_loss": 0.847459077835083, "eval_runtime": 35.7383, "eval_samples_per_second": 22.245, "eval_steps_per_second": 1.399, "step": 22288 }, { "epoch": 56.53, "learning_rate": 2.1733668341708545e-05, "loss": 0.2083, "step": 22500 }, { "epoch": 57.0, "eval_bleu": 0.2163, "eval_gen_len": 19.0, "eval_loss": 0.8617640733718872, "eval_runtime": 35.56, "eval_samples_per_second": 22.357, "eval_steps_per_second": 1.406, "step": 22686 }, { "epoch": 57.79, "learning_rate": 2.1105527638190957e-05, "loss": 0.202, "step": 23000 }, { "epoch": 58.0, "eval_bleu": 0.2164, "eval_gen_len": 19.0, "eval_loss": 0.8572230339050293, "eval_runtime": 35.5968, "eval_samples_per_second": 22.333, "eval_steps_per_second": 1.405, "step": 23084 }, { "epoch": 59.0, "eval_bleu": 0.217, "eval_gen_len": 19.0, "eval_loss": 0.873598575592041, "eval_runtime": 35.5807, "eval_samples_per_second": 22.344, "eval_steps_per_second": 1.405, "step": 23482 }, { "epoch": 59.05, "learning_rate": 2.047738693467337e-05, "loss": 0.1968, "step": 23500 }, { "epoch": 60.0, "eval_bleu": 0.2166, "eval_gen_len": 19.0, "eval_loss": 0.8894439339637756, "eval_runtime": 35.6815, "eval_samples_per_second": 22.28, "eval_steps_per_second": 1.401, "step": 23880 }, { "epoch": 60.3, "learning_rate": 1.984924623115578e-05, "loss": 0.1904, "step": 24000 }, { "epoch": 61.0, "eval_bleu": 0.2241, "eval_gen_len": 19.0, "eval_loss": 0.8927697539329529, "eval_runtime": 35.5709, "eval_samples_per_second": 22.35, "eval_steps_per_second": 1.406, "step": 24278 }, { "epoch": 61.56, "learning_rate": 1.9221105527638193e-05, "loss": 0.1847, "step": 24500 }, { "epoch": 62.0, "eval_bleu": 0.2219, "eval_gen_len": 19.0, "eval_loss": 0.9057827591896057, "eval_runtime": 35.9775, "eval_samples_per_second": 22.097, "eval_steps_per_second": 1.39, "step": 24676 }, { "epoch": 62.81, "learning_rate": 1.8592964824120602e-05, "loss": 0.1803, "step": 25000 }, { "epoch": 63.0, "eval_bleu": 0.2336, "eval_gen_len": 19.0, "eval_loss": 0.9056702852249146, "eval_runtime": 35.7167, "eval_samples_per_second": 22.259, "eval_steps_per_second": 1.4, "step": 25074 }, { "epoch": 64.0, "eval_bleu": 0.2156, "eval_gen_len": 19.0, "eval_loss": 0.9173711538314819, "eval_runtime": 35.7315, "eval_samples_per_second": 22.249, "eval_steps_per_second": 1.399, "step": 25472 }, { "epoch": 64.07, "learning_rate": 1.7964824120603014e-05, "loss": 0.1758, "step": 25500 }, { "epoch": 65.0, "eval_bleu": 0.1951, "eval_gen_len": 19.0, "eval_loss": 0.922991156578064, "eval_runtime": 35.7331, "eval_samples_per_second": 22.248, "eval_steps_per_second": 1.399, "step": 25870 }, { "epoch": 65.33, "learning_rate": 1.7336683417085427e-05, "loss": 0.1701, "step": 26000 }, { "epoch": 66.0, "eval_bleu": 0.2249, "eval_gen_len": 19.0, "eval_loss": 0.9349916577339172, "eval_runtime": 35.6317, "eval_samples_per_second": 22.312, "eval_steps_per_second": 1.403, "step": 26268 }, { "epoch": 66.58, "learning_rate": 1.670854271356784e-05, "loss": 0.1673, "step": 26500 }, { "epoch": 67.0, "eval_bleu": 0.2224, "eval_gen_len": 19.0, "eval_loss": 0.9416642189025879, "eval_runtime": 35.7511, "eval_samples_per_second": 22.237, "eval_steps_per_second": 1.399, "step": 26666 }, { "epoch": 67.84, "learning_rate": 1.608040201005025e-05, "loss": 0.1614, "step": 27000 }, { "epoch": 68.0, "eval_bleu": 0.2161, "eval_gen_len": 19.0, "eval_loss": 0.9508859515190125, "eval_runtime": 35.6367, "eval_samples_per_second": 22.308, "eval_steps_per_second": 1.403, "step": 27064 }, { "epoch": 69.0, "eval_bleu": 0.2183, "eval_gen_len": 19.0, "eval_loss": 0.9652993083000183, "eval_runtime": 35.6263, "eval_samples_per_second": 22.315, "eval_steps_per_second": 1.403, "step": 27462 }, { "epoch": 69.1, "learning_rate": 1.5452261306532663e-05, "loss": 0.1578, "step": 27500 }, { "epoch": 70.0, "eval_bleu": 0.2113, "eval_gen_len": 19.0, "eval_loss": 0.9633088111877441, "eval_runtime": 35.7594, "eval_samples_per_second": 22.232, "eval_steps_per_second": 1.398, "step": 27860 }, { "epoch": 70.35, "learning_rate": 1.4824120603015077e-05, "loss": 0.1536, "step": 28000 }, { "epoch": 71.0, "eval_bleu": 0.2177, "eval_gen_len": 19.0, "eval_loss": 0.9783052802085876, "eval_runtime": 35.5411, "eval_samples_per_second": 22.368, "eval_steps_per_second": 1.407, "step": 28258 }, { "epoch": 71.61, "learning_rate": 1.4195979899497489e-05, "loss": 0.1513, "step": 28500 }, { "epoch": 72.0, "eval_bleu": 0.2179, "eval_gen_len": 19.0, "eval_loss": 0.9754663109779358, "eval_runtime": 35.6862, "eval_samples_per_second": 22.278, "eval_steps_per_second": 1.401, "step": 28656 }, { "epoch": 72.86, "learning_rate": 1.3567839195979901e-05, "loss": 0.147, "step": 29000 }, { "epoch": 73.0, "eval_bleu": 0.2273, "eval_gen_len": 19.0, "eval_loss": 0.9910703897476196, "eval_runtime": 35.9105, "eval_samples_per_second": 22.138, "eval_steps_per_second": 1.392, "step": 29054 }, { "epoch": 74.0, "eval_bleu": 0.2157, "eval_gen_len": 19.0, "eval_loss": 0.9854773283004761, "eval_runtime": 35.6352, "eval_samples_per_second": 22.309, "eval_steps_per_second": 1.403, "step": 29452 }, { "epoch": 74.12, "learning_rate": 1.2939698492462313e-05, "loss": 0.1443, "step": 29500 }, { "epoch": 75.0, "eval_bleu": 0.2169, "eval_gen_len": 19.0, "eval_loss": 0.9998270273208618, "eval_runtime": 35.64, "eval_samples_per_second": 22.306, "eval_steps_per_second": 1.403, "step": 29850 }, { "epoch": 75.38, "learning_rate": 1.2311557788944725e-05, "loss": 0.1401, "step": 30000 }, { "epoch": 76.0, "eval_bleu": 0.2124, "eval_gen_len": 19.0, "eval_loss": 1.0127789974212646, "eval_runtime": 35.7323, "eval_samples_per_second": 22.249, "eval_steps_per_second": 1.399, "step": 30248 }, { "epoch": 76.63, "learning_rate": 1.1683417085427137e-05, "loss": 0.1377, "step": 30500 }, { "epoch": 77.0, "eval_bleu": 0.2159, "eval_gen_len": 19.0, "eval_loss": 1.0114222764968872, "eval_runtime": 35.7601, "eval_samples_per_second": 22.231, "eval_steps_per_second": 1.398, "step": 30646 }, { "epoch": 77.89, "learning_rate": 1.105527638190955e-05, "loss": 0.1342, "step": 31000 }, { "epoch": 78.0, "eval_bleu": 0.2152, "eval_gen_len": 19.0, "eval_loss": 1.0248533487319946, "eval_runtime": 35.6808, "eval_samples_per_second": 22.281, "eval_steps_per_second": 1.401, "step": 31044 }, { "epoch": 79.0, "eval_bleu": 0.2233, "eval_gen_len": 19.0, "eval_loss": 1.0258084535598755, "eval_runtime": 35.6984, "eval_samples_per_second": 22.27, "eval_steps_per_second": 1.401, "step": 31442 }, { "epoch": 79.15, "learning_rate": 1.042713567839196e-05, "loss": 0.1336, "step": 31500 }, { "epoch": 80.0, "eval_bleu": 0.2194, "eval_gen_len": 19.0, "eval_loss": 1.0308655500411987, "eval_runtime": 35.6369, "eval_samples_per_second": 22.308, "eval_steps_per_second": 1.403, "step": 31840 }, { "epoch": 80.4, "learning_rate": 9.798994974874372e-06, "loss": 0.1307, "step": 32000 }, { "epoch": 81.0, "eval_bleu": 0.2122, "eval_gen_len": 19.0, "eval_loss": 1.032060146331787, "eval_runtime": 35.5991, "eval_samples_per_second": 22.332, "eval_steps_per_second": 1.405, "step": 32238 }, { "epoch": 81.66, "learning_rate": 9.170854271356784e-06, "loss": 0.1277, "step": 32500 }, { "epoch": 82.0, "eval_bleu": 0.2191, "eval_gen_len": 19.0, "eval_loss": 1.034020185470581, "eval_runtime": 35.662, "eval_samples_per_second": 22.293, "eval_steps_per_second": 1.402, "step": 32636 }, { "epoch": 82.91, "learning_rate": 8.542713567839196e-06, "loss": 0.1262, "step": 33000 }, { "epoch": 83.0, "eval_bleu": 0.2123, "eval_gen_len": 19.0, "eval_loss": 1.0493375062942505, "eval_runtime": 35.6012, "eval_samples_per_second": 22.331, "eval_steps_per_second": 1.404, "step": 33034 }, { "epoch": 84.0, "eval_bleu": 0.2273, "eval_gen_len": 19.0, "eval_loss": 1.0544501543045044, "eval_runtime": 35.7115, "eval_samples_per_second": 22.262, "eval_steps_per_second": 1.4, "step": 33432 }, { "epoch": 84.17, "learning_rate": 7.914572864321608e-06, "loss": 0.1233, "step": 33500 }, { "epoch": 85.0, "eval_bleu": 0.2184, "eval_gen_len": 19.0, "eval_loss": 1.0550196170806885, "eval_runtime": 35.6365, "eval_samples_per_second": 22.309, "eval_steps_per_second": 1.403, "step": 33830 }, { "epoch": 85.43, "learning_rate": 7.28643216080402e-06, "loss": 0.1233, "step": 34000 }, { "epoch": 86.0, "eval_bleu": 0.2241, "eval_gen_len": 19.0, "eval_loss": 1.0545953512191772, "eval_runtime": 35.7457, "eval_samples_per_second": 22.24, "eval_steps_per_second": 1.399, "step": 34228 }, { "epoch": 86.68, "learning_rate": 6.658291457286432e-06, "loss": 0.1205, "step": 34500 }, { "epoch": 87.0, "eval_bleu": 0.2246, "eval_gen_len": 19.0, "eval_loss": 1.0695993900299072, "eval_runtime": 35.6623, "eval_samples_per_second": 22.292, "eval_steps_per_second": 1.402, "step": 34626 }, { "epoch": 87.94, "learning_rate": 6.030150753768844e-06, "loss": 0.1189, "step": 35000 }, { "epoch": 88.0, "eval_bleu": 0.2237, "eval_gen_len": 19.0, "eval_loss": 1.0730416774749756, "eval_runtime": 35.8819, "eval_samples_per_second": 22.156, "eval_steps_per_second": 1.393, "step": 35024 }, { "epoch": 89.0, "eval_bleu": 0.2308, "eval_gen_len": 19.0, "eval_loss": 1.068780541419983, "eval_runtime": 35.7114, "eval_samples_per_second": 22.262, "eval_steps_per_second": 1.4, "step": 35422 }, { "epoch": 89.2, "learning_rate": 5.402010050251256e-06, "loss": 0.1173, "step": 35500 }, { "epoch": 90.0, "eval_bleu": 0.2267, "eval_gen_len": 19.0, "eval_loss": 1.0783226490020752, "eval_runtime": 35.7523, "eval_samples_per_second": 22.236, "eval_steps_per_second": 1.399, "step": 35820 }, { "epoch": 90.45, "learning_rate": 4.773869346733668e-06, "loss": 0.1154, "step": 36000 }, { "epoch": 91.0, "eval_bleu": 0.2262, "eval_gen_len": 19.0, "eval_loss": 1.0766741037368774, "eval_runtime": 35.7535, "eval_samples_per_second": 22.236, "eval_steps_per_second": 1.398, "step": 36218 }, { "epoch": 91.71, "learning_rate": 4.1457286432160804e-06, "loss": 0.115, "step": 36500 }, { "epoch": 92.0, "eval_bleu": 0.2214, "eval_gen_len": 19.0, "eval_loss": 1.0834720134735107, "eval_runtime": 35.7567, "eval_samples_per_second": 22.234, "eval_steps_per_second": 1.398, "step": 36616 }, { "epoch": 92.96, "learning_rate": 3.5175879396984926e-06, "loss": 0.1136, "step": 37000 }, { "epoch": 93.0, "eval_bleu": 0.2284, "eval_gen_len": 19.0, "eval_loss": 1.0788373947143555, "eval_runtime": 35.8397, "eval_samples_per_second": 22.182, "eval_steps_per_second": 1.395, "step": 37014 }, { "epoch": 94.0, "eval_bleu": 0.2269, "eval_gen_len": 19.0, "eval_loss": 1.0876238346099854, "eval_runtime": 35.7552, "eval_samples_per_second": 22.235, "eval_steps_per_second": 1.398, "step": 37412 }, { "epoch": 94.22, "learning_rate": 2.8894472361809047e-06, "loss": 0.1126, "step": 37500 }, { "epoch": 95.0, "eval_bleu": 0.2212, "eval_gen_len": 19.0, "eval_loss": 1.0935641527175903, "eval_runtime": 35.7207, "eval_samples_per_second": 22.256, "eval_steps_per_second": 1.4, "step": 37810 }, { "epoch": 95.48, "learning_rate": 2.261306532663317e-06, "loss": 0.1118, "step": 38000 }, { "epoch": 96.0, "eval_bleu": 0.2207, "eval_gen_len": 19.0, "eval_loss": 1.0917831659317017, "eval_runtime": 35.5884, "eval_samples_per_second": 22.339, "eval_steps_per_second": 1.405, "step": 38208 }, { "epoch": 96.73, "learning_rate": 1.6331658291457288e-06, "loss": 0.111, "step": 38500 }, { "epoch": 97.0, "eval_bleu": 0.2217, "eval_gen_len": 19.0, "eval_loss": 1.0943822860717773, "eval_runtime": 35.7423, "eval_samples_per_second": 22.243, "eval_steps_per_second": 1.399, "step": 38606 }, { "epoch": 97.99, "learning_rate": 1.0050251256281407e-06, "loss": 0.1106, "step": 39000 }, { "epoch": 98.0, "eval_bleu": 0.2203, "eval_gen_len": 19.0, "eval_loss": 1.0962368249893188, "eval_runtime": 35.6215, "eval_samples_per_second": 22.318, "eval_steps_per_second": 1.404, "step": 39004 }, { "epoch": 99.0, "eval_bleu": 0.2182, "eval_gen_len": 19.0, "eval_loss": 1.0994266271591187, "eval_runtime": 35.6909, "eval_samples_per_second": 22.275, "eval_steps_per_second": 1.401, "step": 39402 }, { "epoch": 99.25, "learning_rate": 3.7688442211055275e-07, "loss": 0.1088, "step": 39500 }, { "epoch": 100.0, "eval_bleu": 0.2193, "eval_gen_len": 19.0, "eval_loss": 1.099919319152832, "eval_runtime": 35.8327, "eval_samples_per_second": 22.186, "eval_steps_per_second": 1.395, "step": 39800 }, { "epoch": 100.0, "step": 39800, "total_flos": 2.262748628238336e+17, "train_loss": 0.35148891755683936, "train_runtime": 21066.5572, "train_samples_per_second": 30.204, "train_steps_per_second": 1.889 } ], "max_steps": 39800, "num_train_epochs": 100, "total_flos": 2.262748628238336e+17, "trial_name": null, "trial_params": null }