{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 14, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 7e-05, "loss": 2.6914, "step": 1 }, { "epoch": 0.14, "eval_accuracy": 0.44772838299951145, "eval_loss": 2.689453125, "eval_runtime": 2.0744, "eval_samples_per_second": 5.785, "eval_steps_per_second": 0.964, "step": 1 }, { "epoch": 0.29, "learning_rate": 7e-05, "loss": 2.6897, "step": 2 }, { "epoch": 0.29, "eval_accuracy": 0.44772838299951145, "eval_loss": 2.689453125, "eval_runtime": 1.2329, "eval_samples_per_second": 9.733, "eval_steps_per_second": 1.622, "step": 2 }, { "epoch": 0.43, "learning_rate": 6.5e-05, "loss": 2.668, "step": 3 }, { "epoch": 0.43, "eval_accuracy": 0.44031916625956685, "eval_loss": 2.703125, "eval_runtime": 1.3532, "eval_samples_per_second": 8.868, "eval_steps_per_second": 1.478, "step": 3 }, { "epoch": 0.57, "learning_rate": 5.9999999999999995e-05, "loss": 2.7434, "step": 4 }, { "epoch": 0.57, "eval_accuracy": 0.45330565054551375, "eval_loss": 2.591796875, "eval_runtime": 1.1891, "eval_samples_per_second": 10.092, "eval_steps_per_second": 1.682, "step": 4 }, { "epoch": 0.71, "learning_rate": 5.4999999999999995e-05, "loss": 2.6265, "step": 5 }, { "epoch": 0.71, "eval_accuracy": 0.4617733268197362, "eval_loss": 2.541015625, "eval_runtime": 1.3466, "eval_samples_per_second": 8.912, "eval_steps_per_second": 1.485, "step": 5 }, { "epoch": 0.86, "learning_rate": 4.9999999999999996e-05, "loss": 2.5259, "step": 6 }, { "epoch": 0.86, "eval_accuracy": 0.46405308581664223, "eval_loss": 2.515625, "eval_runtime": 1.3043, "eval_samples_per_second": 9.2, "eval_steps_per_second": 1.533, "step": 6 }, { "epoch": 1.0, "learning_rate": 4.4999999999999996e-05, "loss": 2.5566, "step": 7 }, { "epoch": 1.0, "eval_accuracy": 0.46665852467024915, "eval_loss": 2.490234375, "eval_runtime": 1.3073, "eval_samples_per_second": 9.179, "eval_steps_per_second": 1.53, "step": 7 }, { "epoch": 1.14, "learning_rate": 3.9999999999999996e-05, "loss": 2.2317, "step": 8 }, { "epoch": 1.14, "eval_accuracy": 0.4706888128969223, "eval_loss": 2.4765625, "eval_runtime": 2.2111, "eval_samples_per_second": 5.427, "eval_steps_per_second": 0.905, "step": 8 }, { "epoch": 1.29, "learning_rate": 3.5e-05, "loss": 2.2397, "step": 9 }, { "epoch": 1.29, "eval_accuracy": 0.4705259729685719, "eval_loss": 2.47265625, "eval_runtime": 1.2075, "eval_samples_per_second": 9.938, "eval_steps_per_second": 1.656, "step": 9 }, { "epoch": 1.43, "learning_rate": 2.9999999999999997e-05, "loss": 2.0162, "step": 10 }, { "epoch": 1.43, "eval_accuracy": 0.4689789936492428, "eval_loss": 2.4765625, "eval_runtime": 1.6528, "eval_samples_per_second": 7.26, "eval_steps_per_second": 1.21, "step": 10 }, { "epoch": 1.57, "learning_rate": 2.4999999999999998e-05, "loss": 2.0537, "step": 11 }, { "epoch": 1.57, "eval_accuracy": 0.4706888128969223, "eval_loss": 2.48046875, "eval_runtime": 1.6588, "eval_samples_per_second": 7.234, "eval_steps_per_second": 1.206, "step": 11 }, { "epoch": 1.71, "learning_rate": 1.9999999999999998e-05, "loss": 2.1432, "step": 12 }, { "epoch": 1.71, "eval_accuracy": 0.4714215925744993, "eval_loss": 2.470703125, "eval_runtime": 2.2121, "eval_samples_per_second": 5.425, "eval_steps_per_second": 0.904, "step": 12 }, { "epoch": 1.86, "learning_rate": 1.4999999999999999e-05, "loss": 2.0822, "step": 13 }, { "epoch": 1.86, "eval_accuracy": 0.47235792216251427, "eval_loss": 2.45703125, "eval_runtime": 1.2064, "eval_samples_per_second": 9.947, "eval_steps_per_second": 1.658, "step": 13 }, { "epoch": 2.0, "learning_rate": 9.999999999999999e-06, "loss": 1.9056, "step": 14 }, { "epoch": 2.0, "eval_accuracy": 0.47410845139228136, "eval_loss": 2.451171875, "eval_runtime": 2.1018, "eval_samples_per_second": 5.709, "eval_steps_per_second": 0.952, "step": 14 }, { "epoch": 2.0, "step": 14, "total_flos": 821815148544.0, "train_loss": 2.3695591517857144, "train_runtime": 69.0598, "train_samples_per_second": 1.448, "train_steps_per_second": 0.203 } ], "max_steps": 14, "num_train_epochs": 2, "total_flos": 821815148544.0, "trial_name": null, "trial_params": null }