{ "best_metric": 0.47385331988334656, "best_model_checkpoint": "output/checkpoint-150", "epoch": 0.20527558246946526, "eval_steps": 50, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 0.8903, "step": 1 }, { "epoch": 0.04, "learning_rate": 1.2222222222222224e-05, "loss": 0.7189, "step": 50 }, { "epoch": 0.04, "eval_accuracy": 0.8228571428571428, "eval_loss": 0.5415178537368774, "eval_runtime": 28.5756, "eval_samples_per_second": 6.124, "eval_steps_per_second": 1.54, "step": 50 }, { "epoch": 0.08, "learning_rate": 1.8658536585365855e-05, "loss": 0.5723, "step": 100 }, { "epoch": 0.08, "eval_accuracy": 0.8, "eval_loss": 0.49520450830459595, "eval_runtime": 28.516, "eval_samples_per_second": 6.137, "eval_steps_per_second": 1.543, "step": 100 }, { "epoch": 0.12, "learning_rate": 1.5609756097560978e-05, "loss": 0.5122, "step": 150 }, { "epoch": 0.12, "eval_accuracy": 0.7885714285714286, "eval_loss": 0.47385331988334656, "eval_runtime": 28.2004, "eval_samples_per_second": 6.206, "eval_steps_per_second": 1.56, "step": 150 }, { "epoch": 0.16, "learning_rate": 1.2560975609756098e-05, "loss": 0.4831, "step": 200 }, { "epoch": 0.16, "eval_accuracy": 0.7942857142857143, "eval_loss": 0.49147841334342957, "eval_runtime": 28.6363, "eval_samples_per_second": 6.111, "eval_steps_per_second": 1.537, "step": 200 }, { "epoch": 0.21, "learning_rate": 9.51219512195122e-06, "loss": 0.5009, "step": 250 }, { "epoch": 0.21, "eval_accuracy": 0.8, "eval_loss": 0.501244068145752, "eval_runtime": 27.6943, "eval_samples_per_second": 6.319, "eval_steps_per_second": 1.589, "step": 250 } ], "logging_steps": 50, "max_steps": 400, "num_train_epochs": 1, "save_steps": 50, "total_flos": 9.260539919806464e+16, "trial_name": null, "trial_params": null }