|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 39800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.0907, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9955226182937622, |
|
"eval_runtime": 38.0064, |
|
"eval_samples_per_second": 20.891, |
|
"eval_steps_per_second": 1.316, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.937185929648241e-05, |
|
"loss": 1.4196, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.5548, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8669910430908203, |
|
"eval_runtime": 36.5925, |
|
"eval_samples_per_second": 21.698, |
|
"eval_steps_per_second": 1.366, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.874371859296483e-05, |
|
"loss": 0.9762, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.2508, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8083329796791077, |
|
"eval_runtime": 36.4935, |
|
"eval_samples_per_second": 21.757, |
|
"eval_steps_per_second": 1.37, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.8115577889447235e-05, |
|
"loss": 0.8703, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.5692, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7637783288955688, |
|
"eval_runtime": 36.123, |
|
"eval_samples_per_second": 21.98, |
|
"eval_steps_per_second": 1.384, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.3461, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7334607243537903, |
|
"eval_runtime": 36.0793, |
|
"eval_samples_per_second": 22.007, |
|
"eval_steps_per_second": 1.386, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.748743718592965e-05, |
|
"loss": 0.8098, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.399, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7078682780265808, |
|
"eval_runtime": 36.1012, |
|
"eval_samples_per_second": 21.994, |
|
"eval_steps_per_second": 1.385, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 4.685929648241206e-05, |
|
"loss": 0.7592, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.3376, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6845763325691223, |
|
"eval_runtime": 35.9042, |
|
"eval_samples_per_second": 22.114, |
|
"eval_steps_per_second": 1.393, |
|
"step": 2786 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 4.6231155778894475e-05, |
|
"loss": 0.7167, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.4617, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6675065755844116, |
|
"eval_runtime": 35.9616, |
|
"eval_samples_per_second": 22.079, |
|
"eval_steps_per_second": 1.39, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 4.5603015075376884e-05, |
|
"loss": 0.6881, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.438, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6496185660362244, |
|
"eval_runtime": 36.0502, |
|
"eval_samples_per_second": 22.025, |
|
"eval_steps_per_second": 1.387, |
|
"step": 3582 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.4397, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6296620965003967, |
|
"eval_runtime": 36.04, |
|
"eval_samples_per_second": 22.031, |
|
"eval_steps_per_second": 1.387, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.49748743718593e-05, |
|
"loss": 0.6543, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.4078, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6143925189971924, |
|
"eval_runtime": 36.2369, |
|
"eval_samples_per_second": 21.911, |
|
"eval_steps_per_second": 1.38, |
|
"step": 4378 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.434673366834171e-05, |
|
"loss": 0.6245, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.3468, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6090631484985352, |
|
"eval_runtime": 36.2496, |
|
"eval_samples_per_second": 21.904, |
|
"eval_steps_per_second": 1.379, |
|
"step": 4776 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 4.3718592964824124e-05, |
|
"loss": 0.5959, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.433, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.603904664516449, |
|
"eval_runtime": 36.0829, |
|
"eval_samples_per_second": 22.005, |
|
"eval_steps_per_second": 1.386, |
|
"step": 5174 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 4.309045226130653e-05, |
|
"loss": 0.5766, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.4332, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5970507264137268, |
|
"eval_runtime": 36.1622, |
|
"eval_samples_per_second": 21.957, |
|
"eval_steps_per_second": 1.383, |
|
"step": 5572 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.4291, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5931375622749329, |
|
"eval_runtime": 36.0907, |
|
"eval_samples_per_second": 22.0, |
|
"eval_steps_per_second": 1.385, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 4.246231155778895e-05, |
|
"loss": 0.5541, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.4504, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5876542329788208, |
|
"eval_runtime": 36.3841, |
|
"eval_samples_per_second": 21.823, |
|
"eval_steps_per_second": 1.374, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 4.183417085427136e-05, |
|
"loss": 0.5331, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.4359, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5872967839241028, |
|
"eval_runtime": 36.0616, |
|
"eval_samples_per_second": 22.018, |
|
"eval_steps_per_second": 1.387, |
|
"step": 6766 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 4.120603015075377e-05, |
|
"loss": 0.5169, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 0.419, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5864170789718628, |
|
"eval_runtime": 36.0624, |
|
"eval_samples_per_second": 22.017, |
|
"eval_steps_per_second": 1.386, |
|
"step": 7164 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 4.057788944723618e-05, |
|
"loss": 0.4991, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 0.4191, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5880436897277832, |
|
"eval_runtime": 36.0649, |
|
"eval_samples_per_second": 22.016, |
|
"eval_steps_per_second": 1.386, |
|
"step": 7562 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 0.4535, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5844656825065613, |
|
"eval_runtime": 36.1564, |
|
"eval_samples_per_second": 21.96, |
|
"eval_steps_per_second": 1.383, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 20.1, |
|
"learning_rate": 3.9949748743718597e-05, |
|
"loss": 0.4827, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 0.4614, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.588854968547821, |
|
"eval_runtime": 36.034, |
|
"eval_samples_per_second": 22.035, |
|
"eval_steps_per_second": 1.388, |
|
"step": 8358 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 3.9321608040201005e-05, |
|
"loss": 0.4646, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 0.4075, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5893688201904297, |
|
"eval_runtime": 36.2919, |
|
"eval_samples_per_second": 21.878, |
|
"eval_steps_per_second": 1.378, |
|
"step": 8756 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 3.869346733668342e-05, |
|
"loss": 0.4523, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 0.4399, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5904817581176758, |
|
"eval_runtime": 36.1578, |
|
"eval_samples_per_second": 21.959, |
|
"eval_steps_per_second": 1.383, |
|
"step": 9154 |
|
}, |
|
{ |
|
"epoch": 23.87, |
|
"learning_rate": 3.806532663316583e-05, |
|
"loss": 0.437, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 0.4369, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5984958410263062, |
|
"eval_runtime": 36.0725, |
|
"eval_samples_per_second": 22.011, |
|
"eval_steps_per_second": 1.386, |
|
"step": 9552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 0.4056, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5959633588790894, |
|
"eval_runtime": 35.9929, |
|
"eval_samples_per_second": 22.06, |
|
"eval_steps_per_second": 1.389, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 25.13, |
|
"learning_rate": 3.7437185929648245e-05, |
|
"loss": 0.4229, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bleu": 0.4252, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.5961562395095825, |
|
"eval_runtime": 36.1884, |
|
"eval_samples_per_second": 21.941, |
|
"eval_steps_per_second": 1.382, |
|
"step": 10348 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 3.6809045226130654e-05, |
|
"loss": 0.4091, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bleu": 0.4713, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6049270629882812, |
|
"eval_runtime": 36.1335, |
|
"eval_samples_per_second": 21.974, |
|
"eval_steps_per_second": 1.384, |
|
"step": 10746 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"learning_rate": 3.618090452261307e-05, |
|
"loss": 0.3965, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bleu": 0.4242, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6118378639221191, |
|
"eval_runtime": 36.0809, |
|
"eval_samples_per_second": 22.006, |
|
"eval_steps_per_second": 1.386, |
|
"step": 11144 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"learning_rate": 3.555276381909548e-05, |
|
"loss": 0.3842, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bleu": 0.3924, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6169702410697937, |
|
"eval_runtime": 35.9704, |
|
"eval_samples_per_second": 22.074, |
|
"eval_steps_per_second": 1.39, |
|
"step": 11542 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bleu": 0.3984, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6114440560340881, |
|
"eval_runtime": 35.9519, |
|
"eval_samples_per_second": 22.085, |
|
"eval_steps_per_second": 1.391, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"learning_rate": 3.4924623115577894e-05, |
|
"loss": 0.3718, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bleu": 0.4186, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6303505301475525, |
|
"eval_runtime": 36.0633, |
|
"eval_samples_per_second": 22.017, |
|
"eval_steps_per_second": 1.386, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 31.41, |
|
"learning_rate": 3.42964824120603e-05, |
|
"loss": 0.3585, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bleu": 0.3846, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6364237070083618, |
|
"eval_runtime": 36.1959, |
|
"eval_samples_per_second": 21.936, |
|
"eval_steps_per_second": 1.381, |
|
"step": 12736 |
|
}, |
|
{ |
|
"epoch": 32.66, |
|
"learning_rate": 3.366834170854272e-05, |
|
"loss": 0.3473, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bleu": 0.4058, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6325276494026184, |
|
"eval_runtime": 36.1326, |
|
"eval_samples_per_second": 21.975, |
|
"eval_steps_per_second": 1.384, |
|
"step": 13134 |
|
}, |
|
{ |
|
"epoch": 33.92, |
|
"learning_rate": 3.3040201005025127e-05, |
|
"loss": 0.3377, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bleu": 0.3669, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6433637142181396, |
|
"eval_runtime": 36.0557, |
|
"eval_samples_per_second": 22.021, |
|
"eval_steps_per_second": 1.387, |
|
"step": 13532 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bleu": 0.396, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6559302806854248, |
|
"eval_runtime": 36.0966, |
|
"eval_samples_per_second": 21.997, |
|
"eval_steps_per_second": 1.385, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 35.18, |
|
"learning_rate": 3.241206030150754e-05, |
|
"loss": 0.3258, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bleu": 0.4449, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6613517999649048, |
|
"eval_runtime": 36.0456, |
|
"eval_samples_per_second": 22.028, |
|
"eval_steps_per_second": 1.387, |
|
"step": 14328 |
|
}, |
|
{ |
|
"epoch": 36.43, |
|
"learning_rate": 3.178391959798995e-05, |
|
"loss": 0.3144, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bleu": 0.3988, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6619016528129578, |
|
"eval_runtime": 36.1443, |
|
"eval_samples_per_second": 21.967, |
|
"eval_steps_per_second": 1.383, |
|
"step": 14726 |
|
}, |
|
{ |
|
"epoch": 37.69, |
|
"learning_rate": 3.1155778894472366e-05, |
|
"loss": 0.3062, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_bleu": 0.4133, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.681207537651062, |
|
"eval_runtime": 36.0276, |
|
"eval_samples_per_second": 22.039, |
|
"eval_steps_per_second": 1.388, |
|
"step": 15124 |
|
}, |
|
{ |
|
"epoch": 38.94, |
|
"learning_rate": 3.0527638190954775e-05, |
|
"loss": 0.2976, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bleu": 0.4102, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.679516077041626, |
|
"eval_runtime": 35.9945, |
|
"eval_samples_per_second": 22.059, |
|
"eval_steps_per_second": 1.389, |
|
"step": 15522 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_bleu": 0.3953, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.679844856262207, |
|
"eval_runtime": 36.0821, |
|
"eval_samples_per_second": 22.005, |
|
"eval_steps_per_second": 1.386, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"learning_rate": 2.989949748743719e-05, |
|
"loss": 0.2883, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_bleu": 0.3846, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7088426947593689, |
|
"eval_runtime": 36.412, |
|
"eval_samples_per_second": 21.806, |
|
"eval_steps_per_second": 1.373, |
|
"step": 16318 |
|
}, |
|
{ |
|
"epoch": 41.46, |
|
"learning_rate": 2.9271356783919603e-05, |
|
"loss": 0.2791, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_bleu": 0.3701, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7109950184822083, |
|
"eval_runtime": 36.0424, |
|
"eval_samples_per_second": 22.03, |
|
"eval_steps_per_second": 1.387, |
|
"step": 16716 |
|
}, |
|
{ |
|
"epoch": 42.71, |
|
"learning_rate": 2.8643216080402015e-05, |
|
"loss": 0.2701, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_bleu": 0.3985, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7159502506256104, |
|
"eval_runtime": 36.0448, |
|
"eval_samples_per_second": 22.028, |
|
"eval_steps_per_second": 1.387, |
|
"step": 17114 |
|
}, |
|
{ |
|
"epoch": 43.97, |
|
"learning_rate": 2.8015075376884427e-05, |
|
"loss": 0.2619, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_bleu": 0.3654, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7150431275367737, |
|
"eval_runtime": 36.1383, |
|
"eval_samples_per_second": 21.971, |
|
"eval_steps_per_second": 1.384, |
|
"step": 17512 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_bleu": 0.394, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7196872234344482, |
|
"eval_runtime": 36.0028, |
|
"eval_samples_per_second": 22.054, |
|
"eval_steps_per_second": 1.389, |
|
"step": 17910 |
|
}, |
|
{ |
|
"epoch": 45.23, |
|
"learning_rate": 2.738693467336684e-05, |
|
"loss": 0.2527, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_bleu": 0.4033, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7386982440948486, |
|
"eval_runtime": 36.1228, |
|
"eval_samples_per_second": 21.981, |
|
"eval_steps_per_second": 1.384, |
|
"step": 18308 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"learning_rate": 2.6758793969849248e-05, |
|
"loss": 0.2444, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_bleu": 0.389, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7438174486160278, |
|
"eval_runtime": 36.1308, |
|
"eval_samples_per_second": 21.976, |
|
"eval_steps_per_second": 1.384, |
|
"step": 18706 |
|
}, |
|
{ |
|
"epoch": 47.74, |
|
"learning_rate": 2.613065326633166e-05, |
|
"loss": 0.239, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_bleu": 0.3948, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.75967937707901, |
|
"eval_runtime": 36.1293, |
|
"eval_samples_per_second": 21.977, |
|
"eval_steps_per_second": 1.384, |
|
"step": 19104 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"learning_rate": 2.5502512562814072e-05, |
|
"loss": 0.2303, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_bleu": 0.3976, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7644997835159302, |
|
"eval_runtime": 35.9794, |
|
"eval_samples_per_second": 22.068, |
|
"eval_steps_per_second": 1.39, |
|
"step": 19502 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_bleu": 0.385, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7786132097244263, |
|
"eval_runtime": 36.1835, |
|
"eval_samples_per_second": 21.944, |
|
"eval_steps_per_second": 1.382, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 50.25, |
|
"learning_rate": 2.4874371859296484e-05, |
|
"loss": 0.2212, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_bleu": 0.3948, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7699110507965088, |
|
"eval_runtime": 36.1227, |
|
"eval_samples_per_second": 21.981, |
|
"eval_steps_per_second": 1.384, |
|
"step": 20298 |
|
}, |
|
{ |
|
"epoch": 51.51, |
|
"learning_rate": 2.4246231155778896e-05, |
|
"loss": 0.2157, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_bleu": 0.4265, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7902213931083679, |
|
"eval_runtime": 36.0895, |
|
"eval_samples_per_second": 22.001, |
|
"eval_steps_per_second": 1.385, |
|
"step": 20696 |
|
}, |
|
{ |
|
"epoch": 52.76, |
|
"learning_rate": 2.361809045226131e-05, |
|
"loss": 0.2108, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_bleu": 0.3924, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7906249761581421, |
|
"eval_runtime": 35.9244, |
|
"eval_samples_per_second": 22.102, |
|
"eval_steps_per_second": 1.392, |
|
"step": 21094 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_bleu": 0.3849, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8097977042198181, |
|
"eval_runtime": 36.3421, |
|
"eval_samples_per_second": 21.848, |
|
"eval_steps_per_second": 1.376, |
|
"step": 21492 |
|
}, |
|
{ |
|
"epoch": 54.02, |
|
"learning_rate": 2.298994974874372e-05, |
|
"loss": 0.2041, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_bleu": 0.3888, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8166631460189819, |
|
"eval_runtime": 36.1502, |
|
"eval_samples_per_second": 21.964, |
|
"eval_steps_per_second": 1.383, |
|
"step": 21890 |
|
}, |
|
{ |
|
"epoch": 55.28, |
|
"learning_rate": 2.2361809045226133e-05, |
|
"loss": 0.1959, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_bleu": 0.4139, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8316635489463806, |
|
"eval_runtime": 36.2244, |
|
"eval_samples_per_second": 21.919, |
|
"eval_steps_per_second": 1.38, |
|
"step": 22288 |
|
}, |
|
{ |
|
"epoch": 56.53, |
|
"learning_rate": 2.1733668341708545e-05, |
|
"loss": 0.1899, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_bleu": 0.4136, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8344787359237671, |
|
"eval_runtime": 36.367, |
|
"eval_samples_per_second": 21.833, |
|
"eval_steps_per_second": 1.375, |
|
"step": 22686 |
|
}, |
|
{ |
|
"epoch": 57.79, |
|
"learning_rate": 2.1105527638190957e-05, |
|
"loss": 0.1868, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_bleu": 0.4093, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.848408043384552, |
|
"eval_runtime": 36.1717, |
|
"eval_samples_per_second": 21.951, |
|
"eval_steps_per_second": 1.382, |
|
"step": 23084 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_bleu": 0.4013, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8663375973701477, |
|
"eval_runtime": 36.2951, |
|
"eval_samples_per_second": 21.876, |
|
"eval_steps_per_second": 1.378, |
|
"step": 23482 |
|
}, |
|
{ |
|
"epoch": 59.05, |
|
"learning_rate": 2.047738693467337e-05, |
|
"loss": 0.1815, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_bleu": 0.3858, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8709424734115601, |
|
"eval_runtime": 36.2881, |
|
"eval_samples_per_second": 21.88, |
|
"eval_steps_per_second": 1.378, |
|
"step": 23880 |
|
}, |
|
{ |
|
"epoch": 60.3, |
|
"learning_rate": 1.984924623115578e-05, |
|
"loss": 0.1744, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_bleu": 0.3716, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8845000863075256, |
|
"eval_runtime": 36.3319, |
|
"eval_samples_per_second": 21.854, |
|
"eval_steps_per_second": 1.376, |
|
"step": 24278 |
|
}, |
|
{ |
|
"epoch": 61.56, |
|
"learning_rate": 1.9221105527638193e-05, |
|
"loss": 0.1709, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_bleu": 0.3781, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.878656268119812, |
|
"eval_runtime": 36.3634, |
|
"eval_samples_per_second": 21.835, |
|
"eval_steps_per_second": 1.375, |
|
"step": 24676 |
|
}, |
|
{ |
|
"epoch": 62.81, |
|
"learning_rate": 1.8592964824120602e-05, |
|
"loss": 0.1659, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_bleu": 0.3642, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8844317197799683, |
|
"eval_runtime": 36.2676, |
|
"eval_samples_per_second": 21.893, |
|
"eval_steps_per_second": 1.379, |
|
"step": 25074 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_bleu": 0.3818, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9034352898597717, |
|
"eval_runtime": 36.4466, |
|
"eval_samples_per_second": 21.785, |
|
"eval_steps_per_second": 1.372, |
|
"step": 25472 |
|
}, |
|
{ |
|
"epoch": 64.07, |
|
"learning_rate": 1.7964824120603014e-05, |
|
"loss": 0.1625, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_bleu": 0.3522, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9116728901863098, |
|
"eval_runtime": 36.294, |
|
"eval_samples_per_second": 21.877, |
|
"eval_steps_per_second": 1.378, |
|
"step": 25870 |
|
}, |
|
{ |
|
"epoch": 65.33, |
|
"learning_rate": 1.7336683417085427e-05, |
|
"loss": 0.1568, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_bleu": 0.3892, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.905901312828064, |
|
"eval_runtime": 36.6662, |
|
"eval_samples_per_second": 21.655, |
|
"eval_steps_per_second": 1.364, |
|
"step": 26268 |
|
}, |
|
{ |
|
"epoch": 66.58, |
|
"learning_rate": 1.670854271356784e-05, |
|
"loss": 0.1539, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_bleu": 0.398, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9159536957740784, |
|
"eval_runtime": 36.246, |
|
"eval_samples_per_second": 21.906, |
|
"eval_steps_per_second": 1.379, |
|
"step": 26666 |
|
}, |
|
{ |
|
"epoch": 67.84, |
|
"learning_rate": 1.608040201005025e-05, |
|
"loss": 0.1501, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_bleu": 0.3831, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9332515001296997, |
|
"eval_runtime": 36.3025, |
|
"eval_samples_per_second": 21.872, |
|
"eval_steps_per_second": 1.377, |
|
"step": 27064 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_bleu": 0.4036, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9350541234016418, |
|
"eval_runtime": 36.3012, |
|
"eval_samples_per_second": 21.873, |
|
"eval_steps_per_second": 1.377, |
|
"step": 27462 |
|
}, |
|
{ |
|
"epoch": 69.1, |
|
"learning_rate": 1.5452261306532663e-05, |
|
"loss": 0.1461, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_bleu": 0.3727, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9484046697616577, |
|
"eval_runtime": 36.3461, |
|
"eval_samples_per_second": 21.846, |
|
"eval_steps_per_second": 1.376, |
|
"step": 27860 |
|
}, |
|
{ |
|
"epoch": 70.35, |
|
"learning_rate": 1.4824120603015077e-05, |
|
"loss": 0.1413, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_bleu": 0.3638, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9522455930709839, |
|
"eval_runtime": 36.2785, |
|
"eval_samples_per_second": 21.886, |
|
"eval_steps_per_second": 1.378, |
|
"step": 28258 |
|
}, |
|
{ |
|
"epoch": 71.61, |
|
"learning_rate": 1.4195979899497489e-05, |
|
"loss": 0.1405, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_bleu": 0.3501, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.97245192527771, |
|
"eval_runtime": 36.3096, |
|
"eval_samples_per_second": 21.868, |
|
"eval_steps_per_second": 1.377, |
|
"step": 28656 |
|
}, |
|
{ |
|
"epoch": 72.86, |
|
"learning_rate": 1.3567839195979901e-05, |
|
"loss": 0.1365, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_bleu": 0.372, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9698021411895752, |
|
"eval_runtime": 36.0935, |
|
"eval_samples_per_second": 21.998, |
|
"eval_steps_per_second": 1.385, |
|
"step": 29054 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_bleu": 0.3727, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9703381657600403, |
|
"eval_runtime": 36.1692, |
|
"eval_samples_per_second": 21.952, |
|
"eval_steps_per_second": 1.382, |
|
"step": 29452 |
|
}, |
|
{ |
|
"epoch": 74.12, |
|
"learning_rate": 1.2939698492462313e-05, |
|
"loss": 0.1328, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_bleu": 0.3834, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9797690510749817, |
|
"eval_runtime": 36.3947, |
|
"eval_samples_per_second": 21.816, |
|
"eval_steps_per_second": 1.374, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 75.38, |
|
"learning_rate": 1.2311557788944725e-05, |
|
"loss": 0.1298, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_bleu": 0.4008, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9849729537963867, |
|
"eval_runtime": 36.4039, |
|
"eval_samples_per_second": 21.811, |
|
"eval_steps_per_second": 1.373, |
|
"step": 30248 |
|
}, |
|
{ |
|
"epoch": 76.63, |
|
"learning_rate": 1.1683417085427137e-05, |
|
"loss": 0.1283, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_bleu": 0.3815, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9988215565681458, |
|
"eval_runtime": 36.385, |
|
"eval_samples_per_second": 21.822, |
|
"eval_steps_per_second": 1.374, |
|
"step": 30646 |
|
}, |
|
{ |
|
"epoch": 77.89, |
|
"learning_rate": 1.105527638190955e-05, |
|
"loss": 0.1247, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_bleu": 0.3621, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9895645380020142, |
|
"eval_runtime": 36.1282, |
|
"eval_samples_per_second": 21.977, |
|
"eval_steps_per_second": 1.384, |
|
"step": 31044 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_bleu": 0.3761, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0034637451171875, |
|
"eval_runtime": 36.0695, |
|
"eval_samples_per_second": 22.013, |
|
"eval_steps_per_second": 1.386, |
|
"step": 31442 |
|
}, |
|
{ |
|
"epoch": 79.15, |
|
"learning_rate": 1.042713567839196e-05, |
|
"loss": 0.1222, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_bleu": 0.3729, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0222601890563965, |
|
"eval_runtime": 36.0319, |
|
"eval_samples_per_second": 22.036, |
|
"eval_steps_per_second": 1.388, |
|
"step": 31840 |
|
}, |
|
{ |
|
"epoch": 80.4, |
|
"learning_rate": 9.798994974874372e-06, |
|
"loss": 0.1195, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_bleu": 0.3866, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0171366930007935, |
|
"eval_runtime": 36.1451, |
|
"eval_samples_per_second": 21.967, |
|
"eval_steps_per_second": 1.383, |
|
"step": 32238 |
|
}, |
|
{ |
|
"epoch": 81.66, |
|
"learning_rate": 9.170854271356784e-06, |
|
"loss": 0.1189, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_bleu": 0.3698, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0247294902801514, |
|
"eval_runtime": 36.1151, |
|
"eval_samples_per_second": 21.985, |
|
"eval_steps_per_second": 1.384, |
|
"step": 32636 |
|
}, |
|
{ |
|
"epoch": 82.91, |
|
"learning_rate": 8.542713567839196e-06, |
|
"loss": 0.1175, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_bleu": 0.3657, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0150783061981201, |
|
"eval_runtime": 36.212, |
|
"eval_samples_per_second": 21.926, |
|
"eval_steps_per_second": 1.381, |
|
"step": 33034 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_bleu": 0.3786, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0387886762619019, |
|
"eval_runtime": 36.1399, |
|
"eval_samples_per_second": 21.97, |
|
"eval_steps_per_second": 1.384, |
|
"step": 33432 |
|
}, |
|
{ |
|
"epoch": 84.17, |
|
"learning_rate": 7.914572864321608e-06, |
|
"loss": 0.1146, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_bleu": 0.3737, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0412553548812866, |
|
"eval_runtime": 36.1355, |
|
"eval_samples_per_second": 21.973, |
|
"eval_steps_per_second": 1.384, |
|
"step": 33830 |
|
}, |
|
{ |
|
"epoch": 85.43, |
|
"learning_rate": 7.28643216080402e-06, |
|
"loss": 0.1124, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_bleu": 0.3803, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0402462482452393, |
|
"eval_runtime": 36.3349, |
|
"eval_samples_per_second": 21.852, |
|
"eval_steps_per_second": 1.376, |
|
"step": 34228 |
|
}, |
|
{ |
|
"epoch": 86.68, |
|
"learning_rate": 6.658291457286432e-06, |
|
"loss": 0.1125, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_bleu": 0.3746, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0519098043441772, |
|
"eval_runtime": 36.2307, |
|
"eval_samples_per_second": 21.915, |
|
"eval_steps_per_second": 1.38, |
|
"step": 34626 |
|
}, |
|
{ |
|
"epoch": 87.94, |
|
"learning_rate": 6.030150753768844e-06, |
|
"loss": 0.1102, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_bleu": 0.3863, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.054201364517212, |
|
"eval_runtime": 36.1231, |
|
"eval_samples_per_second": 21.98, |
|
"eval_steps_per_second": 1.384, |
|
"step": 35024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_bleu": 0.3839, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0626344680786133, |
|
"eval_runtime": 36.1414, |
|
"eval_samples_per_second": 21.969, |
|
"eval_steps_per_second": 1.383, |
|
"step": 35422 |
|
}, |
|
{ |
|
"epoch": 89.2, |
|
"learning_rate": 5.402010050251256e-06, |
|
"loss": 0.1075, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_bleu": 0.3615, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.060184359550476, |
|
"eval_runtime": 36.2676, |
|
"eval_samples_per_second": 21.893, |
|
"eval_steps_per_second": 1.379, |
|
"step": 35820 |
|
}, |
|
{ |
|
"epoch": 90.45, |
|
"learning_rate": 4.773869346733668e-06, |
|
"loss": 0.1069, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_bleu": 0.3692, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0700898170471191, |
|
"eval_runtime": 36.428, |
|
"eval_samples_per_second": 21.796, |
|
"eval_steps_per_second": 1.373, |
|
"step": 36218 |
|
}, |
|
{ |
|
"epoch": 91.71, |
|
"learning_rate": 4.1457286432160804e-06, |
|
"loss": 0.1062, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_bleu": 0.3719, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.069925308227539, |
|
"eval_runtime": 36.0404, |
|
"eval_samples_per_second": 22.031, |
|
"eval_steps_per_second": 1.387, |
|
"step": 36616 |
|
}, |
|
{ |
|
"epoch": 92.96, |
|
"learning_rate": 3.5175879396984926e-06, |
|
"loss": 0.1051, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_bleu": 0.3667, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0732213258743286, |
|
"eval_runtime": 36.1316, |
|
"eval_samples_per_second": 21.975, |
|
"eval_steps_per_second": 1.384, |
|
"step": 37014 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_bleu": 0.3701, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0749058723449707, |
|
"eval_runtime": 36.2293, |
|
"eval_samples_per_second": 21.916, |
|
"eval_steps_per_second": 1.38, |
|
"step": 37412 |
|
}, |
|
{ |
|
"epoch": 94.22, |
|
"learning_rate": 2.8894472361809047e-06, |
|
"loss": 0.1041, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_bleu": 0.3744, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0795801877975464, |
|
"eval_runtime": 36.2285, |
|
"eval_samples_per_second": 21.916, |
|
"eval_steps_per_second": 1.38, |
|
"step": 37810 |
|
}, |
|
{ |
|
"epoch": 95.48, |
|
"learning_rate": 2.261306532663317e-06, |
|
"loss": 0.1034, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_bleu": 0.3771, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0823085308074951, |
|
"eval_runtime": 36.0893, |
|
"eval_samples_per_second": 22.001, |
|
"eval_steps_per_second": 1.385, |
|
"step": 38208 |
|
}, |
|
{ |
|
"epoch": 96.73, |
|
"learning_rate": 1.6331658291457288e-06, |
|
"loss": 0.1031, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_bleu": 0.3775, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0796533823013306, |
|
"eval_runtime": 36.264, |
|
"eval_samples_per_second": 21.895, |
|
"eval_steps_per_second": 1.379, |
|
"step": 38606 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"learning_rate": 1.0050251256281407e-06, |
|
"loss": 0.1015, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_bleu": 0.3822, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0842037200927734, |
|
"eval_runtime": 36.1692, |
|
"eval_samples_per_second": 21.952, |
|
"eval_steps_per_second": 1.382, |
|
"step": 39004 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_bleu": 0.3839, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0859404802322388, |
|
"eval_runtime": 36.1094, |
|
"eval_samples_per_second": 21.989, |
|
"eval_steps_per_second": 1.385, |
|
"step": 39402 |
|
}, |
|
{ |
|
"epoch": 99.25, |
|
"learning_rate": 3.7688442211055275e-07, |
|
"loss": 0.1007, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_bleu": 0.3829, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0861998796463013, |
|
"eval_runtime": 36.126, |
|
"eval_samples_per_second": 21.979, |
|
"eval_steps_per_second": 1.384, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 39800, |
|
"total_flos": 2.2595375365462426e+17, |
|
"train_loss": 0.31051562630351465, |
|
"train_runtime": 21104.6673, |
|
"train_samples_per_second": 30.107, |
|
"train_steps_per_second": 1.886 |
|
} |
|
], |
|
"max_steps": 39800, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.2595375365462426e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|