|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.3929173693086, |
|
"global_step": 11500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.578414839797639e-06, |
|
"loss": 0.3136, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 17.1673, |
|
"eval_gen_len": 43.5832, |
|
"eval_loss": 2.7004430294036865, |
|
"eval_runtime": 263.1603, |
|
"eval_samples_per_second": 3.792, |
|
"eval_steps_per_second": 0.475, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.156829679595279e-06, |
|
"loss": 0.2962, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 17.2012, |
|
"eval_gen_len": 43.6693, |
|
"eval_loss": 2.719139814376831, |
|
"eval_runtime": 263.247, |
|
"eval_samples_per_second": 3.791, |
|
"eval_steps_per_second": 0.475, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.735244519392918e-06, |
|
"loss": 0.2927, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 17.2291, |
|
"eval_gen_len": 43.482, |
|
"eval_loss": 2.7412936687469482, |
|
"eval_runtime": 260.7955, |
|
"eval_samples_per_second": 3.827, |
|
"eval_steps_per_second": 0.479, |
|
"step": 1779 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.313659359190556e-06, |
|
"loss": 0.2677, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 17.135, |
|
"eval_gen_len": 43.5862, |
|
"eval_loss": 2.7617862224578857, |
|
"eval_runtime": 261.1749, |
|
"eval_samples_per_second": 3.821, |
|
"eval_steps_per_second": 0.479, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 7.892074198988196e-06, |
|
"loss": 0.2591, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 17.5543, |
|
"eval_gen_len": 43.5922, |
|
"eval_loss": 2.7780115604400635, |
|
"eval_runtime": 262.647, |
|
"eval_samples_per_second": 3.8, |
|
"eval_steps_per_second": 0.476, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 7.470489038785835e-06, |
|
"loss": 0.2473, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.048903878583474e-06, |
|
"loss": 0.2282, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 17.226, |
|
"eval_gen_len": 43.6703, |
|
"eval_loss": 2.794311761856079, |
|
"eval_runtime": 263.8826, |
|
"eval_samples_per_second": 3.782, |
|
"eval_steps_per_second": 0.474, |
|
"step": 3558 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 6.6273187183811136e-06, |
|
"loss": 0.2244, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 17.615, |
|
"eval_gen_len": 43.6934, |
|
"eval_loss": 2.808680295944214, |
|
"eval_runtime": 264.0527, |
|
"eval_samples_per_second": 3.78, |
|
"eval_steps_per_second": 0.473, |
|
"step": 4151 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 6.2057335581787524e-06, |
|
"loss": 0.2196, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 17.3227, |
|
"eval_gen_len": 43.7715, |
|
"eval_loss": 2.825133800506592, |
|
"eval_runtime": 281.974, |
|
"eval_samples_per_second": 3.539, |
|
"eval_steps_per_second": 0.443, |
|
"step": 4744 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 5.784148397976391e-06, |
|
"loss": 0.2101, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 17.5072, |
|
"eval_gen_len": 43.7084, |
|
"eval_loss": 2.834676742553711, |
|
"eval_runtime": 263.2231, |
|
"eval_samples_per_second": 3.791, |
|
"eval_steps_per_second": 0.475, |
|
"step": 5337 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 5.362563237774031e-06, |
|
"loss": 0.2077, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 17.5712, |
|
"eval_gen_len": 43.8597, |
|
"eval_loss": 2.842376708984375, |
|
"eval_runtime": 270.4686, |
|
"eval_samples_per_second": 3.69, |
|
"eval_steps_per_second": 0.462, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 4.94097807757167e-06, |
|
"loss": 0.2034, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 4.519392917369309e-06, |
|
"loss": 0.1968, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 17.6007, |
|
"eval_gen_len": 43.6994, |
|
"eval_loss": 2.851884365081787, |
|
"eval_runtime": 261.9767, |
|
"eval_samples_per_second": 3.809, |
|
"eval_steps_per_second": 0.477, |
|
"step": 6523 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 4.097807757166948e-06, |
|
"loss": 0.1902, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 17.6333, |
|
"eval_gen_len": 43.6924, |
|
"eval_loss": 2.8614132404327393, |
|
"eval_runtime": 263.2972, |
|
"eval_samples_per_second": 3.79, |
|
"eval_steps_per_second": 0.475, |
|
"step": 7116 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 3.676222596964587e-06, |
|
"loss": 0.198, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 17.6153, |
|
"eval_gen_len": 43.7034, |
|
"eval_loss": 2.865877866744995, |
|
"eval_runtime": 261.219, |
|
"eval_samples_per_second": 3.821, |
|
"eval_steps_per_second": 0.479, |
|
"step": 7709 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 3.2546374367622263e-06, |
|
"loss": 0.1861, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 17.5959, |
|
"eval_gen_len": 43.7154, |
|
"eval_loss": 2.873347043991089, |
|
"eval_runtime": 260.1505, |
|
"eval_samples_per_second": 3.836, |
|
"eval_steps_per_second": 0.48, |
|
"step": 8302 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 2.8330522765598656e-06, |
|
"loss": 0.1956, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 17.6169, |
|
"eval_gen_len": 43.7164, |
|
"eval_loss": 2.876323938369751, |
|
"eval_runtime": 261.1714, |
|
"eval_samples_per_second": 3.821, |
|
"eval_steps_per_second": 0.479, |
|
"step": 8895 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 2.4114671163575045e-06, |
|
"loss": 0.1924, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 17.5443, |
|
"eval_gen_len": 43.7194, |
|
"eval_loss": 2.880269765853882, |
|
"eval_runtime": 261.8101, |
|
"eval_samples_per_second": 3.812, |
|
"eval_steps_per_second": 0.477, |
|
"step": 9488 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 1.9898819561551434e-06, |
|
"loss": 0.1946, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 1.5682967959527825e-06, |
|
"loss": 0.1946, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 17.577, |
|
"eval_gen_len": 43.6142, |
|
"eval_loss": 2.8834807872772217, |
|
"eval_runtime": 259.2401, |
|
"eval_samples_per_second": 3.85, |
|
"eval_steps_per_second": 0.482, |
|
"step": 10081 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 1.1467116357504218e-06, |
|
"loss": 0.1987, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 17.5677, |
|
"eval_gen_len": 43.6623, |
|
"eval_loss": 2.8818464279174805, |
|
"eval_runtime": 269.8962, |
|
"eval_samples_per_second": 3.698, |
|
"eval_steps_per_second": 0.463, |
|
"step": 10674 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 7.251264755480608e-07, |
|
"loss": 0.2011, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 17.6118, |
|
"eval_gen_len": 43.7395, |
|
"eval_loss": 2.882765531539917, |
|
"eval_runtime": 265.3175, |
|
"eval_samples_per_second": 3.762, |
|
"eval_steps_per_second": 0.471, |
|
"step": 11267 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 3.0354131534569986e-07, |
|
"loss": 0.2049, |
|
"step": 11500 |
|
} |
|
], |
|
"max_steps": 11860, |
|
"num_train_epochs": 20, |
|
"total_flos": 1623592378957824.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|