{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9984168865435357, "global_step": 946, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21, "learning_rate": 3.58139534883721e-05, "loss": 2.303, "step": 100 }, { "epoch": 0.42, "learning_rate": 3.158562367864693e-05, "loss": 1.8006, "step": 200 }, { "epoch": 0.63, "learning_rate": 2.739957716701903e-05, "loss": 1.7525, "step": 300 }, { "epoch": 0.84, "learning_rate": 2.317124735729387e-05, "loss": 1.6911, "step": 400 }, { "epoch": 1.06, "learning_rate": 1.8942917547568712e-05, "loss": 1.6314, "step": 500 }, { "epoch": 1.27, "learning_rate": 1.4714587737843554e-05, "loss": 1.5195, "step": 600 }, { "epoch": 1.48, "learning_rate": 1.0486257928118395e-05, "loss": 1.5048, "step": 700 }, { "epoch": 1.69, "learning_rate": 6.257928118393235e-06, "loss": 1.4919, "step": 800 }, { "epoch": 1.9, "learning_rate": 2.0295983086680763e-06, "loss": 1.5073, "step": 900 }, { "epoch": 2.0, "step": 946, "total_flos": 1.834379077681152e+16, "train_loss": 1.6799574886265567, "train_runtime": 1313.2029, "train_samples_per_second": 46.174, "train_steps_per_second": 0.72 }, { "epoch": 2.0, "eval_bleu": 12.619547537319557, "eval_loss": 1.4679548740386963, "eval_runtime": 33.7947, "eval_samples_per_second": 29.59, "eval_steps_per_second": 1.864, "step": 946 } ], "max_steps": 946, "num_train_epochs": 2, "total_flos": 1.834379077681152e+16, "trial_name": null, "trial_params": null }