{ "best_metric": 0.8666666746139526, "best_model_checkpoint": "/content/best_model/checkpoint-3000", "epoch": 20.0, "global_step": 7020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.42, "learning_rate": 1.8575498575498575e-05, "loss": 0.5673, "step": 500 }, { "epoch": 1.42, "eval_accuracy": 0.8437275886535645, "eval_loss": 0.43583494424819946, "eval_runtime": 13.3841, "eval_samples_per_second": 104.228, "eval_steps_per_second": 3.287, "step": 500 }, { "epoch": 2.85, "learning_rate": 1.7150997150997152e-05, "loss": 0.2898, "step": 1000 }, { "epoch": 2.85, "eval_accuracy": 0.8523297309875488, "eval_loss": 0.4844760596752167, "eval_runtime": 13.3926, "eval_samples_per_second": 104.162, "eval_steps_per_second": 3.285, "step": 1000 }, { "epoch": 4.27, "learning_rate": 1.5726495726495726e-05, "loss": 0.1669, "step": 1500 }, { "epoch": 4.27, "eval_accuracy": 0.8573476672172546, "eval_loss": 0.6232547760009766, "eval_runtime": 13.3901, "eval_samples_per_second": 104.181, "eval_steps_per_second": 3.286, "step": 1500 }, { "epoch": 5.7, "learning_rate": 1.4301994301994305e-05, "loss": 0.1087, "step": 2000 }, { "epoch": 5.7, "eval_accuracy": 0.8573476672172546, "eval_loss": 0.7262628078460693, "eval_runtime": 13.3968, "eval_samples_per_second": 104.129, "eval_steps_per_second": 3.284, "step": 2000 }, { "epoch": 7.12, "learning_rate": 1.2877492877492879e-05, "loss": 0.0728, "step": 2500 }, { "epoch": 7.12, "eval_accuracy": 0.8637992739677429, "eval_loss": 0.8840554356575012, "eval_runtime": 13.3781, "eval_samples_per_second": 104.275, "eval_steps_per_second": 3.289, "step": 2500 }, { "epoch": 8.55, "learning_rate": 1.1452991452991454e-05, "loss": 0.0512, "step": 3000 }, { "epoch": 8.55, "eval_accuracy": 0.8666666746139526, "eval_loss": 0.9500740170478821, "eval_runtime": 13.4339, "eval_samples_per_second": 103.842, "eval_steps_per_second": 3.275, "step": 3000 }, { "epoch": 9.97, "learning_rate": 1.002849002849003e-05, "loss": 0.0372, "step": 3500 }, { "epoch": 9.97, "eval_accuracy": 0.856630802154541, "eval_loss": 1.0440385341644287, "eval_runtime": 13.391, "eval_samples_per_second": 104.175, "eval_steps_per_second": 3.286, "step": 3500 }, { "epoch": 11.4, "learning_rate": 8.603988603988605e-06, "loss": 0.0262, "step": 4000 }, { "epoch": 11.4, "eval_accuracy": 0.8609318733215332, "eval_loss": 1.0770107507705688, "eval_runtime": 13.3894, "eval_samples_per_second": 104.187, "eval_steps_per_second": 3.286, "step": 4000 }, { "epoch": 12.82, "learning_rate": 7.17948717948718e-06, "loss": 0.0243, "step": 4500 }, { "epoch": 12.82, "eval_accuracy": 0.8616487383842468, "eval_loss": 1.0931395292282104, "eval_runtime": 13.3865, "eval_samples_per_second": 104.209, "eval_steps_per_second": 3.287, "step": 4500 }, { "epoch": 14.25, "learning_rate": 5.7549857549857555e-06, "loss": 0.023, "step": 5000 }, { "epoch": 14.25, "eval_accuracy": 0.8630824089050293, "eval_loss": 1.1087766885757446, "eval_runtime": 13.3932, "eval_samples_per_second": 104.157, "eval_steps_per_second": 3.285, "step": 5000 }, { "epoch": 15.67, "learning_rate": 4.330484330484331e-06, "loss": 0.0163, "step": 5500 }, { "epoch": 15.67, "eval_accuracy": 0.8580645322799683, "eval_loss": 1.1263514757156372, "eval_runtime": 13.3902, "eval_samples_per_second": 104.18, "eval_steps_per_second": 3.286, "step": 5500 }, { "epoch": 17.09, "learning_rate": 2.9059829059829063e-06, "loss": 0.0111, "step": 6000 }, { "epoch": 17.09, "eval_accuracy": 0.8616487383842468, "eval_loss": 1.154114842414856, "eval_runtime": 13.3822, "eval_samples_per_second": 104.243, "eval_steps_per_second": 3.288, "step": 6000 }, { "epoch": 18.52, "learning_rate": 1.4814814814814815e-06, "loss": 0.0098, "step": 6500 }, { "epoch": 18.52, "eval_accuracy": 0.8630824089050293, "eval_loss": 1.1541603803634644, "eval_runtime": 13.3814, "eval_samples_per_second": 104.25, "eval_steps_per_second": 3.288, "step": 6500 }, { "epoch": 19.94, "learning_rate": 5.6980056980056986e-08, "loss": 0.0074, "step": 7000 }, { "epoch": 19.94, "eval_accuracy": 0.8637992739677429, "eval_loss": 1.1652880907058716, "eval_runtime": 13.3866, "eval_samples_per_second": 104.209, "eval_steps_per_second": 3.287, "step": 7000 }, { "epoch": 20.0, "step": 7020, "total_flos": 3.4632303608448e+16, "train_loss": 0.10057472327884626, "train_runtime": 6733.6679, "train_samples_per_second": 33.361, "train_steps_per_second": 1.043 } ], "max_steps": 7020, "num_train_epochs": 20, "total_flos": 3.4632303608448e+16, "trial_name": null, "trial_params": null }