{ "best_metric": 5.264073371887207, "best_model_checkpoint": "./coco_outputs/checkpoint-32", "epoch": 10.0, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 5.533133506774902, "eval_runtime": 0.5515, "eval_samples_per_second": 14.505, "eval_steps_per_second": 1.813, "step": 4 }, { "epoch": 2.0, "eval_loss": 5.527740478515625, "eval_runtime": 0.5553, "eval_samples_per_second": 14.406, "eval_steps_per_second": 1.801, "step": 8 }, { "epoch": 2.5, "learning_rate": 1.5000000000000002e-05, "loss": 5.4377, "step": 10 }, { "epoch": 3.0, "eval_loss": 5.444962024688721, "eval_runtime": 0.5512, "eval_samples_per_second": 14.513, "eval_steps_per_second": 1.814, "step": 12 }, { "epoch": 4.0, "eval_loss": 5.395962715148926, "eval_runtime": 0.5633, "eval_samples_per_second": 14.203, "eval_steps_per_second": 1.775, "step": 16 }, { "epoch": 5.0, "learning_rate": 1e-05, "loss": 5.1582, "step": 20 }, { "epoch": 5.0, "eval_loss": 5.334904670715332, "eval_runtime": 0.5523, "eval_samples_per_second": 14.484, "eval_steps_per_second": 1.81, "step": 20 }, { "epoch": 6.0, "eval_loss": 5.314438819885254, "eval_runtime": 0.552, "eval_samples_per_second": 14.493, "eval_steps_per_second": 1.812, "step": 24 }, { "epoch": 7.0, "eval_loss": 5.273761749267578, "eval_runtime": 0.5505, "eval_samples_per_second": 14.531, "eval_steps_per_second": 1.816, "step": 28 }, { "epoch": 7.5, "learning_rate": 5e-06, "loss": 5.0556, "step": 30 }, { "epoch": 8.0, "eval_loss": 5.264073371887207, "eval_runtime": 0.5581, "eval_samples_per_second": 14.334, "eval_steps_per_second": 1.792, "step": 32 }, { "epoch": 9.0, "eval_loss": 5.284775733947754, "eval_runtime": 0.5423, "eval_samples_per_second": 14.752, "eval_steps_per_second": 1.844, "step": 36 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 4.9784, "step": 40 }, { "epoch": 10.0, "eval_loss": 5.279171943664551, "eval_runtime": 0.5633, "eval_samples_per_second": 14.202, "eval_steps_per_second": 1.775, "step": 40 }, { "epoch": 10.0, "step": 40, "total_flos": 1.530004230144e+17, "train_loss": 5.157479572296142, "train_runtime": 182.2038, "train_samples_per_second": 1.756, "train_steps_per_second": 0.22 } ], "max_steps": 40, "num_train_epochs": 10, "total_flos": 1.530004230144e+17, "trial_name": null, "trial_params": null }