{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 39800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.0907, "eval_gen_len": 19.0, "eval_loss": 0.9955226182937622, "eval_runtime": 38.0064, "eval_samples_per_second": 20.891, "eval_steps_per_second": 1.316, "step": 398 }, { "epoch": 1.26, "learning_rate": 4.937185929648241e-05, "loss": 1.4196, "step": 500 }, { "epoch": 2.0, "eval_bleu": 0.5548, "eval_gen_len": 19.0, "eval_loss": 0.8669910430908203, "eval_runtime": 36.5925, "eval_samples_per_second": 21.698, "eval_steps_per_second": 1.366, "step": 796 }, { "epoch": 2.51, "learning_rate": 4.874371859296483e-05, "loss": 0.9762, "step": 1000 }, { "epoch": 3.0, "eval_bleu": 0.2508, "eval_gen_len": 19.0, "eval_loss": 0.8083329796791077, "eval_runtime": 36.4935, "eval_samples_per_second": 21.757, "eval_steps_per_second": 1.37, "step": 1194 }, { "epoch": 3.77, "learning_rate": 4.8115577889447235e-05, "loss": 0.8703, "step": 1500 }, { "epoch": 4.0, "eval_bleu": 0.5692, "eval_gen_len": 19.0, "eval_loss": 0.7637783288955688, "eval_runtime": 36.123, "eval_samples_per_second": 21.98, "eval_steps_per_second": 1.384, "step": 1592 }, { "epoch": 5.0, "eval_bleu": 0.3461, "eval_gen_len": 19.0, "eval_loss": 0.7334607243537903, "eval_runtime": 36.0793, "eval_samples_per_second": 22.007, "eval_steps_per_second": 1.386, "step": 1990 }, { "epoch": 5.03, "learning_rate": 4.748743718592965e-05, "loss": 0.8098, "step": 2000 }, { "epoch": 6.0, "eval_bleu": 0.399, "eval_gen_len": 19.0, "eval_loss": 0.7078682780265808, "eval_runtime": 36.1012, "eval_samples_per_second": 21.994, "eval_steps_per_second": 1.385, "step": 2388 }, { "epoch": 6.28, "learning_rate": 4.685929648241206e-05, "loss": 0.7592, "step": 2500 }, { "epoch": 7.0, "eval_bleu": 0.3376, "eval_gen_len": 19.0, "eval_loss": 0.6845763325691223, "eval_runtime": 35.9042, "eval_samples_per_second": 22.114, "eval_steps_per_second": 1.393, "step": 2786 }, { "epoch": 7.54, "learning_rate": 4.6231155778894475e-05, "loss": 0.7167, "step": 3000 }, { "epoch": 8.0, "eval_bleu": 0.4617, "eval_gen_len": 19.0, "eval_loss": 0.6675065755844116, "eval_runtime": 35.9616, "eval_samples_per_second": 22.079, "eval_steps_per_second": 1.39, "step": 3184 }, { "epoch": 8.79, "learning_rate": 4.5603015075376884e-05, "loss": 0.6881, "step": 3500 }, { "epoch": 9.0, "eval_bleu": 0.438, "eval_gen_len": 19.0, "eval_loss": 0.6496185660362244, "eval_runtime": 36.0502, "eval_samples_per_second": 22.025, "eval_steps_per_second": 1.387, "step": 3582 }, { "epoch": 10.0, "eval_bleu": 0.4397, "eval_gen_len": 19.0, "eval_loss": 0.6296620965003967, "eval_runtime": 36.04, "eval_samples_per_second": 22.031, "eval_steps_per_second": 1.387, "step": 3980 }, { "epoch": 10.05, "learning_rate": 4.49748743718593e-05, "loss": 0.6543, "step": 4000 }, { "epoch": 11.0, "eval_bleu": 0.4078, "eval_gen_len": 19.0, "eval_loss": 0.6143925189971924, "eval_runtime": 36.2369, "eval_samples_per_second": 21.911, "eval_steps_per_second": 1.38, "step": 4378 }, { "epoch": 11.31, "learning_rate": 4.434673366834171e-05, "loss": 0.6245, "step": 4500 }, { "epoch": 12.0, "eval_bleu": 0.3468, "eval_gen_len": 19.0, "eval_loss": 0.6090631484985352, "eval_runtime": 36.2496, "eval_samples_per_second": 21.904, "eval_steps_per_second": 1.379, "step": 4776 }, { "epoch": 12.56, "learning_rate": 4.3718592964824124e-05, "loss": 0.5959, "step": 5000 }, { "epoch": 13.0, "eval_bleu": 0.433, "eval_gen_len": 19.0, "eval_loss": 0.603904664516449, "eval_runtime": 36.0829, "eval_samples_per_second": 22.005, "eval_steps_per_second": 1.386, "step": 5174 }, { "epoch": 13.82, "learning_rate": 4.309045226130653e-05, "loss": 0.5766, "step": 5500 }, { "epoch": 14.0, "eval_bleu": 0.4332, "eval_gen_len": 19.0, "eval_loss": 0.5970507264137268, "eval_runtime": 36.1622, "eval_samples_per_second": 21.957, "eval_steps_per_second": 1.383, "step": 5572 }, { "epoch": 15.0, "eval_bleu": 0.4291, "eval_gen_len": 19.0, "eval_loss": 0.5931375622749329, "eval_runtime": 36.0907, "eval_samples_per_second": 22.0, "eval_steps_per_second": 1.385, "step": 5970 }, { "epoch": 15.08, "learning_rate": 4.246231155778895e-05, "loss": 0.5541, "step": 6000 }, { "epoch": 16.0, "eval_bleu": 0.4504, "eval_gen_len": 19.0, "eval_loss": 0.5876542329788208, "eval_runtime": 36.3841, "eval_samples_per_second": 21.823, "eval_steps_per_second": 1.374, "step": 6368 }, { "epoch": 16.33, "learning_rate": 4.183417085427136e-05, "loss": 0.5331, "step": 6500 }, { "epoch": 17.0, "eval_bleu": 0.4359, "eval_gen_len": 19.0, "eval_loss": 0.5872967839241028, "eval_runtime": 36.0616, "eval_samples_per_second": 22.018, "eval_steps_per_second": 1.387, "step": 6766 }, { "epoch": 17.59, "learning_rate": 4.120603015075377e-05, "loss": 0.5169, "step": 7000 }, { "epoch": 18.0, "eval_bleu": 0.419, "eval_gen_len": 19.0, "eval_loss": 0.5864170789718628, "eval_runtime": 36.0624, "eval_samples_per_second": 22.017, "eval_steps_per_second": 1.386, "step": 7164 }, { "epoch": 18.84, "learning_rate": 4.057788944723618e-05, "loss": 0.4991, "step": 7500 }, { "epoch": 19.0, "eval_bleu": 0.4191, "eval_gen_len": 19.0, "eval_loss": 0.5880436897277832, "eval_runtime": 36.0649, "eval_samples_per_second": 22.016, "eval_steps_per_second": 1.386, "step": 7562 }, { "epoch": 20.0, "eval_bleu": 0.4535, "eval_gen_len": 19.0, "eval_loss": 0.5844656825065613, "eval_runtime": 36.1564, "eval_samples_per_second": 21.96, "eval_steps_per_second": 1.383, "step": 7960 }, { "epoch": 20.1, "learning_rate": 3.9949748743718597e-05, "loss": 0.4827, "step": 8000 }, { "epoch": 21.0, "eval_bleu": 0.4614, "eval_gen_len": 19.0, "eval_loss": 0.588854968547821, "eval_runtime": 36.034, "eval_samples_per_second": 22.035, "eval_steps_per_second": 1.388, "step": 8358 }, { "epoch": 21.36, "learning_rate": 3.9321608040201005e-05, "loss": 0.4646, "step": 8500 }, { "epoch": 22.0, "eval_bleu": 0.4075, "eval_gen_len": 19.0, "eval_loss": 0.5893688201904297, "eval_runtime": 36.2919, "eval_samples_per_second": 21.878, "eval_steps_per_second": 1.378, "step": 8756 }, { "epoch": 22.61, "learning_rate": 3.869346733668342e-05, "loss": 0.4523, "step": 9000 }, { "epoch": 23.0, "eval_bleu": 0.4399, "eval_gen_len": 19.0, "eval_loss": 0.5904817581176758, "eval_runtime": 36.1578, "eval_samples_per_second": 21.959, "eval_steps_per_second": 1.383, "step": 9154 }, { "epoch": 23.87, "learning_rate": 3.806532663316583e-05, "loss": 0.437, "step": 9500 }, { "epoch": 24.0, "eval_bleu": 0.4369, "eval_gen_len": 19.0, "eval_loss": 0.5984958410263062, "eval_runtime": 36.0725, "eval_samples_per_second": 22.011, "eval_steps_per_second": 1.386, "step": 9552 }, { "epoch": 25.0, "eval_bleu": 0.4056, "eval_gen_len": 19.0, "eval_loss": 0.5959633588790894, "eval_runtime": 35.9929, "eval_samples_per_second": 22.06, "eval_steps_per_second": 1.389, "step": 9950 }, { "epoch": 25.13, "learning_rate": 3.7437185929648245e-05, "loss": 0.4229, "step": 10000 }, { "epoch": 26.0, "eval_bleu": 0.4252, "eval_gen_len": 19.0, "eval_loss": 0.5961562395095825, "eval_runtime": 36.1884, "eval_samples_per_second": 21.941, "eval_steps_per_second": 1.382, "step": 10348 }, { "epoch": 26.38, "learning_rate": 3.6809045226130654e-05, "loss": 0.4091, "step": 10500 }, { "epoch": 27.0, "eval_bleu": 0.4713, "eval_gen_len": 19.0, "eval_loss": 0.6049270629882812, "eval_runtime": 36.1335, "eval_samples_per_second": 21.974, "eval_steps_per_second": 1.384, "step": 10746 }, { "epoch": 27.64, "learning_rate": 3.618090452261307e-05, "loss": 0.3965, "step": 11000 }, { "epoch": 28.0, "eval_bleu": 0.4242, "eval_gen_len": 19.0, "eval_loss": 0.6118378639221191, "eval_runtime": 36.0809, "eval_samples_per_second": 22.006, "eval_steps_per_second": 1.386, "step": 11144 }, { "epoch": 28.89, "learning_rate": 3.555276381909548e-05, "loss": 0.3842, "step": 11500 }, { "epoch": 29.0, "eval_bleu": 0.3924, "eval_gen_len": 19.0, "eval_loss": 0.6169702410697937, "eval_runtime": 35.9704, "eval_samples_per_second": 22.074, "eval_steps_per_second": 1.39, "step": 11542 }, { "epoch": 30.0, "eval_bleu": 0.3984, "eval_gen_len": 19.0, "eval_loss": 0.6114440560340881, "eval_runtime": 35.9519, "eval_samples_per_second": 22.085, "eval_steps_per_second": 1.391, "step": 11940 }, { "epoch": 30.15, "learning_rate": 3.4924623115577894e-05, "loss": 0.3718, "step": 12000 }, { "epoch": 31.0, "eval_bleu": 0.4186, "eval_gen_len": 19.0, "eval_loss": 0.6303505301475525, "eval_runtime": 36.0633, "eval_samples_per_second": 22.017, "eval_steps_per_second": 1.386, "step": 12338 }, { "epoch": 31.41, "learning_rate": 3.42964824120603e-05, "loss": 0.3585, "step": 12500 }, { "epoch": 32.0, "eval_bleu": 0.3846, "eval_gen_len": 19.0, "eval_loss": 0.6364237070083618, "eval_runtime": 36.1959, "eval_samples_per_second": 21.936, "eval_steps_per_second": 1.381, "step": 12736 }, { "epoch": 32.66, "learning_rate": 3.366834170854272e-05, "loss": 0.3473, "step": 13000 }, { "epoch": 33.0, "eval_bleu": 0.4058, "eval_gen_len": 19.0, "eval_loss": 0.6325276494026184, "eval_runtime": 36.1326, "eval_samples_per_second": 21.975, "eval_steps_per_second": 1.384, "step": 13134 }, { "epoch": 33.92, "learning_rate": 3.3040201005025127e-05, "loss": 0.3377, "step": 13500 }, { "epoch": 34.0, "eval_bleu": 0.3669, "eval_gen_len": 19.0, "eval_loss": 0.6433637142181396, "eval_runtime": 36.0557, "eval_samples_per_second": 22.021, "eval_steps_per_second": 1.387, "step": 13532 }, { "epoch": 35.0, "eval_bleu": 0.396, "eval_gen_len": 19.0, "eval_loss": 0.6559302806854248, "eval_runtime": 36.0966, "eval_samples_per_second": 21.997, "eval_steps_per_second": 1.385, "step": 13930 }, { "epoch": 35.18, "learning_rate": 3.241206030150754e-05, "loss": 0.3258, "step": 14000 }, { "epoch": 36.0, "eval_bleu": 0.4449, "eval_gen_len": 19.0, "eval_loss": 0.6613517999649048, "eval_runtime": 36.0456, "eval_samples_per_second": 22.028, "eval_steps_per_second": 1.387, "step": 14328 }, { "epoch": 36.43, "learning_rate": 3.178391959798995e-05, "loss": 0.3144, "step": 14500 }, { "epoch": 37.0, "eval_bleu": 0.3988, "eval_gen_len": 19.0, "eval_loss": 0.6619016528129578, "eval_runtime": 36.1443, "eval_samples_per_second": 21.967, "eval_steps_per_second": 1.383, "step": 14726 }, { "epoch": 37.69, "learning_rate": 3.1155778894472366e-05, "loss": 0.3062, "step": 15000 }, { "epoch": 38.0, "eval_bleu": 0.4133, "eval_gen_len": 19.0, "eval_loss": 0.681207537651062, "eval_runtime": 36.0276, "eval_samples_per_second": 22.039, "eval_steps_per_second": 1.388, "step": 15124 }, { "epoch": 38.94, "learning_rate": 3.0527638190954775e-05, "loss": 0.2976, "step": 15500 }, { "epoch": 39.0, "eval_bleu": 0.4102, "eval_gen_len": 19.0, "eval_loss": 0.679516077041626, "eval_runtime": 35.9945, "eval_samples_per_second": 22.059, "eval_steps_per_second": 1.389, "step": 15522 }, { "epoch": 40.0, "eval_bleu": 0.3953, "eval_gen_len": 19.0, "eval_loss": 0.679844856262207, "eval_runtime": 36.0821, "eval_samples_per_second": 22.005, "eval_steps_per_second": 1.386, "step": 15920 }, { "epoch": 40.2, "learning_rate": 2.989949748743719e-05, "loss": 0.2883, "step": 16000 }, { "epoch": 41.0, "eval_bleu": 0.3846, "eval_gen_len": 19.0, "eval_loss": 0.7088426947593689, "eval_runtime": 36.412, "eval_samples_per_second": 21.806, "eval_steps_per_second": 1.373, "step": 16318 }, { "epoch": 41.46, "learning_rate": 2.9271356783919603e-05, "loss": 0.2791, "step": 16500 }, { "epoch": 42.0, "eval_bleu": 0.3701, "eval_gen_len": 19.0, "eval_loss": 0.7109950184822083, "eval_runtime": 36.0424, "eval_samples_per_second": 22.03, "eval_steps_per_second": 1.387, "step": 16716 }, { "epoch": 42.71, "learning_rate": 2.8643216080402015e-05, "loss": 0.2701, "step": 17000 }, { "epoch": 43.0, "eval_bleu": 0.3985, "eval_gen_len": 19.0, "eval_loss": 0.7159502506256104, "eval_runtime": 36.0448, "eval_samples_per_second": 22.028, "eval_steps_per_second": 1.387, "step": 17114 }, { "epoch": 43.97, "learning_rate": 2.8015075376884427e-05, "loss": 0.2619, "step": 17500 }, { "epoch": 44.0, "eval_bleu": 0.3654, "eval_gen_len": 19.0, "eval_loss": 0.7150431275367737, "eval_runtime": 36.1383, "eval_samples_per_second": 21.971, "eval_steps_per_second": 1.384, "step": 17512 }, { "epoch": 45.0, "eval_bleu": 0.394, "eval_gen_len": 19.0, "eval_loss": 0.7196872234344482, "eval_runtime": 36.0028, "eval_samples_per_second": 22.054, "eval_steps_per_second": 1.389, "step": 17910 }, { "epoch": 45.23, "learning_rate": 2.738693467336684e-05, "loss": 0.2527, "step": 18000 }, { "epoch": 46.0, "eval_bleu": 0.4033, "eval_gen_len": 19.0, "eval_loss": 0.7386982440948486, "eval_runtime": 36.1228, "eval_samples_per_second": 21.981, "eval_steps_per_second": 1.384, "step": 18308 }, { "epoch": 46.48, "learning_rate": 2.6758793969849248e-05, "loss": 0.2444, "step": 18500 }, { "epoch": 47.0, "eval_bleu": 0.389, "eval_gen_len": 19.0, "eval_loss": 0.7438174486160278, "eval_runtime": 36.1308, "eval_samples_per_second": 21.976, "eval_steps_per_second": 1.384, "step": 18706 }, { "epoch": 47.74, "learning_rate": 2.613065326633166e-05, "loss": 0.239, "step": 19000 }, { "epoch": 48.0, "eval_bleu": 0.3948, "eval_gen_len": 19.0, "eval_loss": 0.75967937707901, "eval_runtime": 36.1293, "eval_samples_per_second": 21.977, "eval_steps_per_second": 1.384, "step": 19104 }, { "epoch": 48.99, "learning_rate": 2.5502512562814072e-05, "loss": 0.2303, "step": 19500 }, { "epoch": 49.0, "eval_bleu": 0.3976, "eval_gen_len": 19.0, "eval_loss": 0.7644997835159302, "eval_runtime": 35.9794, "eval_samples_per_second": 22.068, "eval_steps_per_second": 1.39, "step": 19502 }, { "epoch": 50.0, "eval_bleu": 0.385, "eval_gen_len": 19.0, "eval_loss": 0.7786132097244263, "eval_runtime": 36.1835, "eval_samples_per_second": 21.944, "eval_steps_per_second": 1.382, "step": 19900 }, { "epoch": 50.25, "learning_rate": 2.4874371859296484e-05, "loss": 0.2212, "step": 20000 }, { "epoch": 51.0, "eval_bleu": 0.3948, "eval_gen_len": 19.0, "eval_loss": 0.7699110507965088, "eval_runtime": 36.1227, "eval_samples_per_second": 21.981, "eval_steps_per_second": 1.384, "step": 20298 }, { "epoch": 51.51, "learning_rate": 2.4246231155778896e-05, "loss": 0.2157, "step": 20500 }, { "epoch": 52.0, "eval_bleu": 0.4265, "eval_gen_len": 19.0, "eval_loss": 0.7902213931083679, "eval_runtime": 36.0895, "eval_samples_per_second": 22.001, "eval_steps_per_second": 1.385, "step": 20696 }, { "epoch": 52.76, "learning_rate": 2.361809045226131e-05, "loss": 0.2108, "step": 21000 }, { "epoch": 53.0, "eval_bleu": 0.3924, "eval_gen_len": 19.0, "eval_loss": 0.7906249761581421, "eval_runtime": 35.9244, "eval_samples_per_second": 22.102, "eval_steps_per_second": 1.392, "step": 21094 }, { "epoch": 54.0, "eval_bleu": 0.3849, "eval_gen_len": 19.0, "eval_loss": 0.8097977042198181, "eval_runtime": 36.3421, "eval_samples_per_second": 21.848, "eval_steps_per_second": 1.376, "step": 21492 }, { "epoch": 54.02, "learning_rate": 2.298994974874372e-05, "loss": 0.2041, "step": 21500 }, { "epoch": 55.0, "eval_bleu": 0.3888, "eval_gen_len": 19.0, "eval_loss": 0.8166631460189819, "eval_runtime": 36.1502, "eval_samples_per_second": 21.964, "eval_steps_per_second": 1.383, "step": 21890 }, { "epoch": 55.28, "learning_rate": 2.2361809045226133e-05, "loss": 0.1959, "step": 22000 }, { "epoch": 56.0, "eval_bleu": 0.4139, "eval_gen_len": 19.0, "eval_loss": 0.8316635489463806, "eval_runtime": 36.2244, "eval_samples_per_second": 21.919, "eval_steps_per_second": 1.38, "step": 22288 }, { "epoch": 56.53, "learning_rate": 2.1733668341708545e-05, "loss": 0.1899, "step": 22500 }, { "epoch": 57.0, "eval_bleu": 0.4136, "eval_gen_len": 19.0, "eval_loss": 0.8344787359237671, "eval_runtime": 36.367, "eval_samples_per_second": 21.833, "eval_steps_per_second": 1.375, "step": 22686 }, { "epoch": 57.79, "learning_rate": 2.1105527638190957e-05, "loss": 0.1868, "step": 23000 }, { "epoch": 58.0, "eval_bleu": 0.4093, "eval_gen_len": 19.0, "eval_loss": 0.848408043384552, "eval_runtime": 36.1717, "eval_samples_per_second": 21.951, "eval_steps_per_second": 1.382, "step": 23084 }, { "epoch": 59.0, "eval_bleu": 0.4013, "eval_gen_len": 19.0, "eval_loss": 0.8663375973701477, "eval_runtime": 36.2951, "eval_samples_per_second": 21.876, "eval_steps_per_second": 1.378, "step": 23482 }, { "epoch": 59.05, "learning_rate": 2.047738693467337e-05, "loss": 0.1815, "step": 23500 }, { "epoch": 60.0, "eval_bleu": 0.3858, "eval_gen_len": 19.0, "eval_loss": 0.8709424734115601, "eval_runtime": 36.2881, "eval_samples_per_second": 21.88, "eval_steps_per_second": 1.378, "step": 23880 }, { "epoch": 60.3, "learning_rate": 1.984924623115578e-05, "loss": 0.1744, "step": 24000 }, { "epoch": 61.0, "eval_bleu": 0.3716, "eval_gen_len": 19.0, "eval_loss": 0.8845000863075256, "eval_runtime": 36.3319, "eval_samples_per_second": 21.854, "eval_steps_per_second": 1.376, "step": 24278 }, { "epoch": 61.56, "learning_rate": 1.9221105527638193e-05, "loss": 0.1709, "step": 24500 }, { "epoch": 62.0, "eval_bleu": 0.3781, "eval_gen_len": 19.0, "eval_loss": 0.878656268119812, "eval_runtime": 36.3634, "eval_samples_per_second": 21.835, "eval_steps_per_second": 1.375, "step": 24676 }, { "epoch": 62.81, "learning_rate": 1.8592964824120602e-05, "loss": 0.1659, "step": 25000 }, { "epoch": 63.0, "eval_bleu": 0.3642, "eval_gen_len": 19.0, "eval_loss": 0.8844317197799683, "eval_runtime": 36.2676, "eval_samples_per_second": 21.893, "eval_steps_per_second": 1.379, "step": 25074 }, { "epoch": 64.0, "eval_bleu": 0.3818, "eval_gen_len": 19.0, "eval_loss": 0.9034352898597717, "eval_runtime": 36.4466, "eval_samples_per_second": 21.785, "eval_steps_per_second": 1.372, "step": 25472 }, { "epoch": 64.07, "learning_rate": 1.7964824120603014e-05, "loss": 0.1625, "step": 25500 }, { "epoch": 65.0, "eval_bleu": 0.3522, "eval_gen_len": 19.0, "eval_loss": 0.9116728901863098, "eval_runtime": 36.294, "eval_samples_per_second": 21.877, "eval_steps_per_second": 1.378, "step": 25870 }, { "epoch": 65.33, "learning_rate": 1.7336683417085427e-05, "loss": 0.1568, "step": 26000 }, { "epoch": 66.0, "eval_bleu": 0.3892, "eval_gen_len": 19.0, "eval_loss": 0.905901312828064, "eval_runtime": 36.6662, "eval_samples_per_second": 21.655, "eval_steps_per_second": 1.364, "step": 26268 }, { "epoch": 66.58, "learning_rate": 1.670854271356784e-05, "loss": 0.1539, "step": 26500 }, { "epoch": 67.0, "eval_bleu": 0.398, "eval_gen_len": 19.0, "eval_loss": 0.9159536957740784, "eval_runtime": 36.246, "eval_samples_per_second": 21.906, "eval_steps_per_second": 1.379, "step": 26666 }, { "epoch": 67.84, "learning_rate": 1.608040201005025e-05, "loss": 0.1501, "step": 27000 }, { "epoch": 68.0, "eval_bleu": 0.3831, "eval_gen_len": 19.0, "eval_loss": 0.9332515001296997, "eval_runtime": 36.3025, "eval_samples_per_second": 21.872, "eval_steps_per_second": 1.377, "step": 27064 }, { "epoch": 69.0, "eval_bleu": 0.4036, "eval_gen_len": 19.0, "eval_loss": 0.9350541234016418, "eval_runtime": 36.3012, "eval_samples_per_second": 21.873, "eval_steps_per_second": 1.377, "step": 27462 }, { "epoch": 69.1, "learning_rate": 1.5452261306532663e-05, "loss": 0.1461, "step": 27500 }, { "epoch": 70.0, "eval_bleu": 0.3727, "eval_gen_len": 19.0, "eval_loss": 0.9484046697616577, "eval_runtime": 36.3461, "eval_samples_per_second": 21.846, "eval_steps_per_second": 1.376, "step": 27860 }, { "epoch": 70.35, "learning_rate": 1.4824120603015077e-05, "loss": 0.1413, "step": 28000 }, { "epoch": 71.0, "eval_bleu": 0.3638, "eval_gen_len": 19.0, "eval_loss": 0.9522455930709839, "eval_runtime": 36.2785, "eval_samples_per_second": 21.886, "eval_steps_per_second": 1.378, "step": 28258 }, { "epoch": 71.61, "learning_rate": 1.4195979899497489e-05, "loss": 0.1405, "step": 28500 }, { "epoch": 72.0, "eval_bleu": 0.3501, "eval_gen_len": 19.0, "eval_loss": 0.97245192527771, "eval_runtime": 36.3096, "eval_samples_per_second": 21.868, "eval_steps_per_second": 1.377, "step": 28656 }, { "epoch": 72.86, "learning_rate": 1.3567839195979901e-05, "loss": 0.1365, "step": 29000 }, { "epoch": 73.0, "eval_bleu": 0.372, "eval_gen_len": 19.0, "eval_loss": 0.9698021411895752, "eval_runtime": 36.0935, "eval_samples_per_second": 21.998, "eval_steps_per_second": 1.385, "step": 29054 }, { "epoch": 74.0, "eval_bleu": 0.3727, "eval_gen_len": 19.0, "eval_loss": 0.9703381657600403, "eval_runtime": 36.1692, "eval_samples_per_second": 21.952, "eval_steps_per_second": 1.382, "step": 29452 }, { "epoch": 74.12, "learning_rate": 1.2939698492462313e-05, "loss": 0.1328, "step": 29500 }, { "epoch": 75.0, "eval_bleu": 0.3834, "eval_gen_len": 19.0, "eval_loss": 0.9797690510749817, "eval_runtime": 36.3947, "eval_samples_per_second": 21.816, "eval_steps_per_second": 1.374, "step": 29850 }, { "epoch": 75.38, "learning_rate": 1.2311557788944725e-05, "loss": 0.1298, "step": 30000 }, { "epoch": 76.0, "eval_bleu": 0.4008, "eval_gen_len": 19.0, "eval_loss": 0.9849729537963867, "eval_runtime": 36.4039, "eval_samples_per_second": 21.811, "eval_steps_per_second": 1.373, "step": 30248 }, { "epoch": 76.63, "learning_rate": 1.1683417085427137e-05, "loss": 0.1283, "step": 30500 }, { "epoch": 77.0, "eval_bleu": 0.3815, "eval_gen_len": 19.0, "eval_loss": 0.9988215565681458, "eval_runtime": 36.385, "eval_samples_per_second": 21.822, "eval_steps_per_second": 1.374, "step": 30646 }, { "epoch": 77.89, "learning_rate": 1.105527638190955e-05, "loss": 0.1247, "step": 31000 }, { "epoch": 78.0, "eval_bleu": 0.3621, "eval_gen_len": 19.0, "eval_loss": 0.9895645380020142, "eval_runtime": 36.1282, "eval_samples_per_second": 21.977, "eval_steps_per_second": 1.384, "step": 31044 }, { "epoch": 79.0, "eval_bleu": 0.3761, "eval_gen_len": 19.0, "eval_loss": 1.0034637451171875, "eval_runtime": 36.0695, "eval_samples_per_second": 22.013, "eval_steps_per_second": 1.386, "step": 31442 }, { "epoch": 79.15, "learning_rate": 1.042713567839196e-05, "loss": 0.1222, "step": 31500 }, { "epoch": 80.0, "eval_bleu": 0.3729, "eval_gen_len": 19.0, "eval_loss": 1.0222601890563965, "eval_runtime": 36.0319, "eval_samples_per_second": 22.036, "eval_steps_per_second": 1.388, "step": 31840 }, { "epoch": 80.4, "learning_rate": 9.798994974874372e-06, "loss": 0.1195, "step": 32000 }, { "epoch": 81.0, "eval_bleu": 0.3866, "eval_gen_len": 19.0, "eval_loss": 1.0171366930007935, "eval_runtime": 36.1451, "eval_samples_per_second": 21.967, "eval_steps_per_second": 1.383, "step": 32238 }, { "epoch": 81.66, "learning_rate": 9.170854271356784e-06, "loss": 0.1189, "step": 32500 }, { "epoch": 82.0, "eval_bleu": 0.3698, "eval_gen_len": 19.0, "eval_loss": 1.0247294902801514, "eval_runtime": 36.1151, "eval_samples_per_second": 21.985, "eval_steps_per_second": 1.384, "step": 32636 }, { "epoch": 82.91, "learning_rate": 8.542713567839196e-06, "loss": 0.1175, "step": 33000 }, { "epoch": 83.0, "eval_bleu": 0.3657, "eval_gen_len": 19.0, "eval_loss": 1.0150783061981201, "eval_runtime": 36.212, "eval_samples_per_second": 21.926, "eval_steps_per_second": 1.381, "step": 33034 }, { "epoch": 84.0, "eval_bleu": 0.3786, "eval_gen_len": 19.0, "eval_loss": 1.0387886762619019, "eval_runtime": 36.1399, "eval_samples_per_second": 21.97, "eval_steps_per_second": 1.384, "step": 33432 }, { "epoch": 84.17, "learning_rate": 7.914572864321608e-06, "loss": 0.1146, "step": 33500 }, { "epoch": 85.0, "eval_bleu": 0.3737, "eval_gen_len": 19.0, "eval_loss": 1.0412553548812866, "eval_runtime": 36.1355, "eval_samples_per_second": 21.973, "eval_steps_per_second": 1.384, "step": 33830 }, { "epoch": 85.43, "learning_rate": 7.28643216080402e-06, "loss": 0.1124, "step": 34000 }, { "epoch": 86.0, "eval_bleu": 0.3803, "eval_gen_len": 19.0, "eval_loss": 1.0402462482452393, "eval_runtime": 36.3349, "eval_samples_per_second": 21.852, "eval_steps_per_second": 1.376, "step": 34228 }, { "epoch": 86.68, "learning_rate": 6.658291457286432e-06, "loss": 0.1125, "step": 34500 }, { "epoch": 87.0, "eval_bleu": 0.3746, "eval_gen_len": 19.0, "eval_loss": 1.0519098043441772, "eval_runtime": 36.2307, "eval_samples_per_second": 21.915, "eval_steps_per_second": 1.38, "step": 34626 }, { "epoch": 87.94, "learning_rate": 6.030150753768844e-06, "loss": 0.1102, "step": 35000 }, { "epoch": 88.0, "eval_bleu": 0.3863, "eval_gen_len": 19.0, "eval_loss": 1.054201364517212, "eval_runtime": 36.1231, "eval_samples_per_second": 21.98, "eval_steps_per_second": 1.384, "step": 35024 }, { "epoch": 89.0, "eval_bleu": 0.3839, "eval_gen_len": 19.0, "eval_loss": 1.0626344680786133, "eval_runtime": 36.1414, "eval_samples_per_second": 21.969, "eval_steps_per_second": 1.383, "step": 35422 }, { "epoch": 89.2, "learning_rate": 5.402010050251256e-06, "loss": 0.1075, "step": 35500 }, { "epoch": 90.0, "eval_bleu": 0.3615, "eval_gen_len": 19.0, "eval_loss": 1.060184359550476, "eval_runtime": 36.2676, "eval_samples_per_second": 21.893, "eval_steps_per_second": 1.379, "step": 35820 }, { "epoch": 90.45, "learning_rate": 4.773869346733668e-06, "loss": 0.1069, "step": 36000 }, { "epoch": 91.0, "eval_bleu": 0.3692, "eval_gen_len": 19.0, "eval_loss": 1.0700898170471191, "eval_runtime": 36.428, "eval_samples_per_second": 21.796, "eval_steps_per_second": 1.373, "step": 36218 }, { "epoch": 91.71, "learning_rate": 4.1457286432160804e-06, "loss": 0.1062, "step": 36500 }, { "epoch": 92.0, "eval_bleu": 0.3719, "eval_gen_len": 19.0, "eval_loss": 1.069925308227539, "eval_runtime": 36.0404, "eval_samples_per_second": 22.031, "eval_steps_per_second": 1.387, "step": 36616 }, { "epoch": 92.96, "learning_rate": 3.5175879396984926e-06, "loss": 0.1051, "step": 37000 }, { "epoch": 93.0, "eval_bleu": 0.3667, "eval_gen_len": 19.0, "eval_loss": 1.0732213258743286, "eval_runtime": 36.1316, "eval_samples_per_second": 21.975, "eval_steps_per_second": 1.384, "step": 37014 }, { "epoch": 94.0, "eval_bleu": 0.3701, "eval_gen_len": 19.0, "eval_loss": 1.0749058723449707, "eval_runtime": 36.2293, "eval_samples_per_second": 21.916, "eval_steps_per_second": 1.38, "step": 37412 }, { "epoch": 94.22, "learning_rate": 2.8894472361809047e-06, "loss": 0.1041, "step": 37500 }, { "epoch": 95.0, "eval_bleu": 0.3744, "eval_gen_len": 19.0, "eval_loss": 1.0795801877975464, "eval_runtime": 36.2285, "eval_samples_per_second": 21.916, "eval_steps_per_second": 1.38, "step": 37810 }, { "epoch": 95.48, "learning_rate": 2.261306532663317e-06, "loss": 0.1034, "step": 38000 }, { "epoch": 96.0, "eval_bleu": 0.3771, "eval_gen_len": 19.0, "eval_loss": 1.0823085308074951, "eval_runtime": 36.0893, "eval_samples_per_second": 22.001, "eval_steps_per_second": 1.385, "step": 38208 }, { "epoch": 96.73, "learning_rate": 1.6331658291457288e-06, "loss": 0.1031, "step": 38500 }, { "epoch": 97.0, "eval_bleu": 0.3775, "eval_gen_len": 19.0, "eval_loss": 1.0796533823013306, "eval_runtime": 36.264, "eval_samples_per_second": 21.895, "eval_steps_per_second": 1.379, "step": 38606 }, { "epoch": 97.99, "learning_rate": 1.0050251256281407e-06, "loss": 0.1015, "step": 39000 }, { "epoch": 98.0, "eval_bleu": 0.3822, "eval_gen_len": 19.0, "eval_loss": 1.0842037200927734, "eval_runtime": 36.1692, "eval_samples_per_second": 21.952, "eval_steps_per_second": 1.382, "step": 39004 }, { "epoch": 99.0, "eval_bleu": 0.3839, "eval_gen_len": 19.0, "eval_loss": 1.0859404802322388, "eval_runtime": 36.1094, "eval_samples_per_second": 21.989, "eval_steps_per_second": 1.385, "step": 39402 }, { "epoch": 99.25, "learning_rate": 3.7688442211055275e-07, "loss": 0.1007, "step": 39500 }, { "epoch": 100.0, "eval_bleu": 0.3829, "eval_gen_len": 19.0, "eval_loss": 1.0861998796463013, "eval_runtime": 36.126, "eval_samples_per_second": 21.979, "eval_steps_per_second": 1.384, "step": 39800 }, { "epoch": 100.0, "step": 39800, "total_flos": 2.2595375365462426e+17, "train_loss": 0.31051562630351465, "train_runtime": 21104.6673, "train_samples_per_second": 30.107, "train_steps_per_second": 1.886 } ], "max_steps": 39800, "num_train_epochs": 100, "total_flos": 2.2595375365462426e+17, "trial_name": null, "trial_params": null }