{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.3929173693086, "global_step": 11500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.84, "learning_rate": 9.578414839797639e-06, "loss": 0.3136, "step": 500 }, { "epoch": 1.0, "eval_bleu": 17.1673, "eval_gen_len": 43.5832, "eval_loss": 2.7004430294036865, "eval_runtime": 263.1603, "eval_samples_per_second": 3.792, "eval_steps_per_second": 0.475, "step": 593 }, { "epoch": 1.69, "learning_rate": 9.156829679595279e-06, "loss": 0.2962, "step": 1000 }, { "epoch": 2.0, "eval_bleu": 17.2012, "eval_gen_len": 43.6693, "eval_loss": 2.719139814376831, "eval_runtime": 263.247, "eval_samples_per_second": 3.791, "eval_steps_per_second": 0.475, "step": 1186 }, { "epoch": 2.53, "learning_rate": 8.735244519392918e-06, "loss": 0.2927, "step": 1500 }, { "epoch": 3.0, "eval_bleu": 17.2291, "eval_gen_len": 43.482, "eval_loss": 2.7412936687469482, "eval_runtime": 260.7955, "eval_samples_per_second": 3.827, "eval_steps_per_second": 0.479, "step": 1779 }, { "epoch": 3.37, "learning_rate": 8.313659359190556e-06, "loss": 0.2677, "step": 2000 }, { "epoch": 4.0, "eval_bleu": 17.135, "eval_gen_len": 43.5862, "eval_loss": 2.7617862224578857, "eval_runtime": 261.1749, "eval_samples_per_second": 3.821, "eval_steps_per_second": 0.479, "step": 2372 }, { "epoch": 4.22, "learning_rate": 7.892074198988196e-06, "loss": 0.2591, "step": 2500 }, { "epoch": 5.0, "eval_bleu": 17.5543, "eval_gen_len": 43.5922, "eval_loss": 2.7780115604400635, "eval_runtime": 262.647, "eval_samples_per_second": 3.8, "eval_steps_per_second": 0.476, "step": 2965 }, { "epoch": 5.06, "learning_rate": 7.470489038785835e-06, "loss": 0.2473, "step": 3000 }, { "epoch": 5.9, "learning_rate": 7.048903878583474e-06, "loss": 0.2282, "step": 3500 }, { "epoch": 6.0, "eval_bleu": 17.226, "eval_gen_len": 43.6703, "eval_loss": 2.794311761856079, "eval_runtime": 263.8826, "eval_samples_per_second": 3.782, "eval_steps_per_second": 0.474, "step": 3558 }, { "epoch": 6.75, "learning_rate": 6.6273187183811136e-06, "loss": 0.2244, "step": 4000 }, { "epoch": 7.0, "eval_bleu": 17.615, "eval_gen_len": 43.6934, "eval_loss": 2.808680295944214, "eval_runtime": 264.0527, "eval_samples_per_second": 3.78, "eval_steps_per_second": 0.473, "step": 4151 }, { "epoch": 7.59, "learning_rate": 6.2057335581787524e-06, "loss": 0.2196, "step": 4500 }, { "epoch": 8.0, "eval_bleu": 17.3227, "eval_gen_len": 43.7715, "eval_loss": 2.825133800506592, "eval_runtime": 281.974, "eval_samples_per_second": 3.539, "eval_steps_per_second": 0.443, "step": 4744 }, { "epoch": 8.43, "learning_rate": 5.784148397976391e-06, "loss": 0.2101, "step": 5000 }, { "epoch": 9.0, "eval_bleu": 17.5072, "eval_gen_len": 43.7084, "eval_loss": 2.834676742553711, "eval_runtime": 263.2231, "eval_samples_per_second": 3.791, "eval_steps_per_second": 0.475, "step": 5337 }, { "epoch": 9.27, "learning_rate": 5.362563237774031e-06, "loss": 0.2077, "step": 5500 }, { "epoch": 10.0, "eval_bleu": 17.5712, "eval_gen_len": 43.8597, "eval_loss": 2.842376708984375, "eval_runtime": 270.4686, "eval_samples_per_second": 3.69, "eval_steps_per_second": 0.462, "step": 5930 }, { "epoch": 10.12, "learning_rate": 4.94097807757167e-06, "loss": 0.2034, "step": 6000 }, { "epoch": 10.96, "learning_rate": 4.519392917369309e-06, "loss": 0.1968, "step": 6500 }, { "epoch": 11.0, "eval_bleu": 17.6007, "eval_gen_len": 43.6994, "eval_loss": 2.851884365081787, "eval_runtime": 261.9767, "eval_samples_per_second": 3.809, "eval_steps_per_second": 0.477, "step": 6523 }, { "epoch": 11.8, "learning_rate": 4.097807757166948e-06, "loss": 0.1902, "step": 7000 }, { "epoch": 12.0, "eval_bleu": 17.6333, "eval_gen_len": 43.6924, "eval_loss": 2.8614132404327393, "eval_runtime": 263.2972, "eval_samples_per_second": 3.79, "eval_steps_per_second": 0.475, "step": 7116 }, { "epoch": 12.65, "learning_rate": 3.676222596964587e-06, "loss": 0.198, "step": 7500 }, { "epoch": 13.0, "eval_bleu": 17.6153, "eval_gen_len": 43.7034, "eval_loss": 2.865877866744995, "eval_runtime": 261.219, "eval_samples_per_second": 3.821, "eval_steps_per_second": 0.479, "step": 7709 }, { "epoch": 13.49, "learning_rate": 3.2546374367622263e-06, "loss": 0.1861, "step": 8000 }, { "epoch": 14.0, "eval_bleu": 17.5959, "eval_gen_len": 43.7154, "eval_loss": 2.873347043991089, "eval_runtime": 260.1505, "eval_samples_per_second": 3.836, "eval_steps_per_second": 0.48, "step": 8302 }, { "epoch": 14.33, "learning_rate": 2.8330522765598656e-06, "loss": 0.1956, "step": 8500 }, { "epoch": 15.0, "eval_bleu": 17.6169, "eval_gen_len": 43.7164, "eval_loss": 2.876323938369751, "eval_runtime": 261.1714, "eval_samples_per_second": 3.821, "eval_steps_per_second": 0.479, "step": 8895 }, { "epoch": 15.18, "learning_rate": 2.4114671163575045e-06, "loss": 0.1924, "step": 9000 }, { "epoch": 16.0, "eval_bleu": 17.5443, "eval_gen_len": 43.7194, "eval_loss": 2.880269765853882, "eval_runtime": 261.8101, "eval_samples_per_second": 3.812, "eval_steps_per_second": 0.477, "step": 9488 }, { "epoch": 16.02, "learning_rate": 1.9898819561551434e-06, "loss": 0.1946, "step": 9500 }, { "epoch": 16.86, "learning_rate": 1.5682967959527825e-06, "loss": 0.1946, "step": 10000 }, { "epoch": 17.0, "eval_bleu": 17.577, "eval_gen_len": 43.6142, "eval_loss": 2.8834807872772217, "eval_runtime": 259.2401, "eval_samples_per_second": 3.85, "eval_steps_per_second": 0.482, "step": 10081 }, { "epoch": 17.71, "learning_rate": 1.1467116357504218e-06, "loss": 0.1987, "step": 10500 }, { "epoch": 18.0, "eval_bleu": 17.5677, "eval_gen_len": 43.6623, "eval_loss": 2.8818464279174805, "eval_runtime": 269.8962, "eval_samples_per_second": 3.698, "eval_steps_per_second": 0.463, "step": 10674 }, { "epoch": 18.55, "learning_rate": 7.251264755480608e-07, "loss": 0.2011, "step": 11000 }, { "epoch": 19.0, "eval_bleu": 17.6118, "eval_gen_len": 43.7395, "eval_loss": 2.882765531539917, "eval_runtime": 265.3175, "eval_samples_per_second": 3.762, "eval_steps_per_second": 0.471, "step": 11267 }, { "epoch": 19.39, "learning_rate": 3.0354131534569986e-07, "loss": 0.2049, "step": 11500 } ], "max_steps": 11860, "num_train_epochs": 20, "total_flos": 1623592378957824.0, "trial_name": null, "trial_params": null }