{ "best_metric": 0.6528732180595398, "best_model_checkpoint": "output/checkpoint-200", "epoch": 0.32086633911561213, "eval_steps": 50, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3333333333333332e-06, "loss": 0.785, "step": 1 }, { "epoch": 0.04, "learning_rate": 5.9999999999999995e-05, "loss": 0.6547, "step": 50 }, { "epoch": 0.04, "eval_accuracy": 0.6626865671641791, "eval_loss": 0.8300915956497192, "eval_runtime": 61.5689, "eval_samples_per_second": 5.441, "eval_steps_per_second": 1.364, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.00012666666666666666, "loss": 0.6654, "step": 100 }, { "epoch": 0.08, "eval_accuracy": 0.6895522388059702, "eval_loss": 0.6663276553153992, "eval_runtime": 56.4633, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 100 }, { "epoch": 0.12, "learning_rate": 0.00019333333333333333, "loss": 0.6491, "step": 150 }, { "epoch": 0.12, "eval_accuracy": 0.6537313432835821, "eval_loss": 0.9172552824020386, "eval_runtime": 56.4705, "eval_samples_per_second": 5.932, "eval_steps_per_second": 1.488, "step": 150 }, { "epoch": 0.16, "learning_rate": 0.00026, "loss": 0.641, "step": 200 }, { "epoch": 0.16, "eval_accuracy": 0.6865671641791045, "eval_loss": 0.6528732180595398, "eval_runtime": 56.4485, "eval_samples_per_second": 5.935, "eval_steps_per_second": 1.488, "step": 200 }, { "epoch": 0.2, "learning_rate": 0.0002941234084231146, "loss": 0.6521, "step": 250 }, { "epoch": 0.2, "eval_accuracy": 0.6835820895522388, "eval_loss": 0.8197052478790283, "eval_runtime": 56.4577, "eval_samples_per_second": 5.934, "eval_steps_per_second": 1.488, "step": 250 }, { "epoch": 0.24, "learning_rate": 0.00027943192948090106, "loss": 0.638, "step": 300 }, { "epoch": 0.24, "eval_accuracy": 0.6835820895522388, "eval_loss": 0.6961421370506287, "eval_runtime": 56.4506, "eval_samples_per_second": 5.934, "eval_steps_per_second": 1.488, "step": 300 }, { "epoch": 0.28, "learning_rate": 0.00026474045053868754, "loss": 0.6857, "step": 350 }, { "epoch": 0.28, "eval_accuracy": 0.6835820895522388, "eval_loss": 0.7876304388046265, "eval_runtime": 56.4651, "eval_samples_per_second": 5.933, "eval_steps_per_second": 1.488, "step": 350 }, { "epoch": 0.32, "learning_rate": 0.000250048971596474, "loss": 0.6469, "step": 400 }, { "epoch": 0.32, "eval_accuracy": 0.6626865671641791, "eval_loss": 0.7487305402755737, "eval_runtime": 56.4489, "eval_samples_per_second": 5.935, "eval_steps_per_second": 1.488, "step": 400 } ], "logging_steps": 50, "max_steps": 1246, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.4970050098348032e+17, "trial_name": null, "trial_params": null }