{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 3200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.060808744840238495, "eval_loss": 2.7135698795318604, "eval_runtime": 315.8806, "eval_samples_per_second": 3.603, "eval_steps_per_second": 0.114, "step": 160 }, { "epoch": 2.0, "eval_bleu": 0.23820890608280518, "eval_loss": 1.7017812728881836, "eval_runtime": 199.1113, "eval_samples_per_second": 5.715, "eval_steps_per_second": 0.181, "step": 320 }, { "epoch": 3.0, "eval_bleu": 0.2333157930860921, "eval_loss": 1.7553855180740356, "eval_runtime": 183.4244, "eval_samples_per_second": 6.204, "eval_steps_per_second": 0.196, "step": 480 }, { "epoch": 3.12, "learning_rate": 1.69125e-05, "loss": 2.1899, "step": 500 }, { "epoch": 4.0, "eval_bleu": 0.24403815580286167, "eval_loss": 1.729722499847412, "eval_runtime": 199.443, "eval_samples_per_second": 5.706, "eval_steps_per_second": 0.181, "step": 640 }, { "epoch": 5.0, "eval_bleu": 0.24248370965297772, "eval_loss": 1.7367736101150513, "eval_runtime": 195.0886, "eval_samples_per_second": 5.833, "eval_steps_per_second": 0.185, "step": 800 }, { "epoch": 6.0, "eval_bleu": 0.24708175633139415, "eval_loss": 1.7663674354553223, "eval_runtime": 190.8193, "eval_samples_per_second": 5.964, "eval_steps_per_second": 0.189, "step": 960 }, { "epoch": 6.25, "learning_rate": 1.37875e-05, "loss": 1.1629, "step": 1000 }, { "epoch": 7.0, "eval_bleu": 0.24734088993827677, "eval_loss": 1.8130639791488647, "eval_runtime": 185.235, "eval_samples_per_second": 6.144, "eval_steps_per_second": 0.194, "step": 1120 }, { "epoch": 8.0, "eval_bleu": 0.25035574867022287, "eval_loss": 1.860228419303894, "eval_runtime": 191.0903, "eval_samples_per_second": 5.955, "eval_steps_per_second": 0.188, "step": 1280 }, { "epoch": 9.0, "eval_bleu": 0.2550337582147487, "eval_loss": 1.9042994976043701, "eval_runtime": 192.163, "eval_samples_per_second": 5.922, "eval_steps_per_second": 0.187, "step": 1440 }, { "epoch": 9.38, "learning_rate": 1.06625e-05, "loss": 0.807, "step": 1500 }, { "epoch": 10.0, "eval_bleu": 0.255168022317063, "eval_loss": 1.9512995481491089, "eval_runtime": 193.6447, "eval_samples_per_second": 5.877, "eval_steps_per_second": 0.186, "step": 1600 }, { "epoch": 11.0, "eval_bleu": 0.2583105775089707, "eval_loss": 2.0014865398406982, "eval_runtime": 194.2032, "eval_samples_per_second": 5.86, "eval_steps_per_second": 0.185, "step": 1760 }, { "epoch": 12.0, "eval_bleu": 0.2611697071205468, "eval_loss": 2.036052942276001, "eval_runtime": 190.4985, "eval_samples_per_second": 5.974, "eval_steps_per_second": 0.189, "step": 1920 }, { "epoch": 12.5, "learning_rate": 7.537500000000001e-06, "loss": 0.5977, "step": 2000 }, { "epoch": 13.0, "eval_bleu": 0.2624028465673082, "eval_loss": 2.0794923305511475, "eval_runtime": 192.5774, "eval_samples_per_second": 5.909, "eval_steps_per_second": 0.187, "step": 2080 }, { "epoch": 14.0, "eval_bleu": 0.26034097889106955, "eval_loss": 2.1036157608032227, "eval_runtime": 198.097, "eval_samples_per_second": 5.745, "eval_steps_per_second": 0.182, "step": 2240 }, { "epoch": 15.0, "eval_bleu": 0.264538215714405, "eval_loss": 2.1185383796691895, "eval_runtime": 189.6413, "eval_samples_per_second": 6.001, "eval_steps_per_second": 0.19, "step": 2400 }, { "epoch": 15.62, "learning_rate": 4.4125000000000005e-06, "loss": 0.4697, "step": 2500 }, { "epoch": 16.0, "eval_bleu": 0.2666872542669057, "eval_loss": 2.1361563205718994, "eval_runtime": 189.4756, "eval_samples_per_second": 6.006, "eval_steps_per_second": 0.19, "step": 2560 }, { "epoch": 17.0, "eval_bleu": 0.2652516887552325, "eval_loss": 2.162111520767212, "eval_runtime": 193.0939, "eval_samples_per_second": 5.894, "eval_steps_per_second": 0.186, "step": 2720 }, { "epoch": 18.0, "eval_bleu": 0.2673360550776601, "eval_loss": 2.163081169128418, "eval_runtime": 190.9327, "eval_samples_per_second": 5.96, "eval_steps_per_second": 0.189, "step": 2880 }, { "epoch": 18.75, "learning_rate": 1.2875000000000002e-06, "loss": 0.4032, "step": 3000 }, { "epoch": 19.0, "eval_bleu": 0.2660614156233256, "eval_loss": 2.1683239936828613, "eval_runtime": 190.2616, "eval_samples_per_second": 5.981, "eval_steps_per_second": 0.189, "step": 3040 }, { "epoch": 20.0, "eval_bleu": 0.2674984140410235, "eval_loss": 2.173663377761841, "eval_runtime": 189.5283, "eval_samples_per_second": 6.004, "eval_steps_per_second": 0.19, "step": 3200 } ], "max_steps": 3200, "num_train_epochs": 20, "total_flos": 5.530638338624717e+16, "trial_name": null, "trial_params": null }