{ "best_metric": 0.2832432985305786, "best_model_checkpoint": "/content/best_model/checkpoint-65000", "epoch": 2.0, "global_step": 69122, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 4.6385376580538756e-05, "loss": 0.3587, "step": 5000 }, { "epoch": 0.14, "eval_accuracy": 0.883381724357605, "eval_loss": 0.313699334859848, "eval_runtime": 130.2101, "eval_samples_per_second": 222.195, "eval_steps_per_second": 3.479, "step": 5000 }, { "epoch": 0.29, "learning_rate": 4.2770029802378405e-05, "loss": 0.3318, "step": 10000 }, { "epoch": 0.29, "eval_accuracy": 0.8830706477165222, "eval_loss": 0.31002119183540344, "eval_runtime": 130.4095, "eval_samples_per_second": 221.855, "eval_steps_per_second": 3.474, "step": 10000 }, { "epoch": 0.43, "learning_rate": 3.915395966551894e-05, "loss": 0.3286, "step": 15000 }, { "epoch": 0.43, "eval_accuracy": 0.8863887786865234, "eval_loss": 0.303303599357605, "eval_runtime": 130.6696, "eval_samples_per_second": 221.413, "eval_steps_per_second": 3.467, "step": 15000 }, { "epoch": 0.58, "learning_rate": 3.5537889528659474e-05, "loss": 0.3236, "step": 20000 }, { "epoch": 0.58, "eval_accuracy": 0.8862159252166748, "eval_loss": 0.3037019670009613, "eval_runtime": 130.8546, "eval_samples_per_second": 221.1, "eval_steps_per_second": 3.462, "step": 20000 }, { "epoch": 0.72, "learning_rate": 3.1922542750499116e-05, "loss": 0.3182, "step": 25000 }, { "epoch": 0.72, "eval_accuracy": 0.8876330852508545, "eval_loss": 0.2938973009586334, "eval_runtime": 130.8776, "eval_samples_per_second": 221.061, "eval_steps_per_second": 3.461, "step": 25000 }, { "epoch": 0.87, "learning_rate": 2.830647261363965e-05, "loss": 0.3129, "step": 30000 }, { "epoch": 0.87, "eval_accuracy": 0.8885317444801331, "eval_loss": 0.2910305857658386, "eval_runtime": 131.4925, "eval_samples_per_second": 220.028, "eval_steps_per_second": 3.445, "step": 30000 }, { "epoch": 1.01, "learning_rate": 2.469184919417841e-05, "loss": 0.3078, "step": 35000 }, { "epoch": 1.01, "eval_accuracy": 0.8887391090393066, "eval_loss": 0.29142218828201294, "eval_runtime": 131.659, "eval_samples_per_second": 219.75, "eval_steps_per_second": 3.441, "step": 35000 }, { "epoch": 1.16, "learning_rate": 2.107794913341628e-05, "loss": 0.2791, "step": 40000 }, { "epoch": 1.16, "eval_accuracy": 0.8873911499977112, "eval_loss": 0.2975434362888336, "eval_runtime": 132.2605, "eval_samples_per_second": 218.75, "eval_steps_per_second": 3.425, "step": 40000 }, { "epoch": 1.3, "learning_rate": 1.7462602355255925e-05, "loss": 0.2723, "step": 45000 }, { "epoch": 1.3, "eval_accuracy": 0.8906055688858032, "eval_loss": 0.29133233428001404, "eval_runtime": 132.58, "eval_samples_per_second": 218.223, "eval_steps_per_second": 3.417, "step": 45000 }, { "epoch": 1.45, "learning_rate": 1.384580885969735e-05, "loss": 0.2724, "step": 50000 }, { "epoch": 1.45, "eval_accuracy": 0.8903636336326599, "eval_loss": 0.2879472076892853, "eval_runtime": 132.7015, "eval_samples_per_second": 218.023, "eval_steps_per_second": 3.414, "step": 50000 }, { "epoch": 1.59, "learning_rate": 1.0229738722837882e-05, "loss": 0.27, "step": 55000 }, { "epoch": 1.59, "eval_accuracy": 0.8910548686981201, "eval_loss": 0.28737255930900574, "eval_runtime": 132.683, "eval_samples_per_second": 218.054, "eval_steps_per_second": 3.414, "step": 55000 }, { "epoch": 1.74, "learning_rate": 6.615115303376639e-06, "loss": 0.2681, "step": 60000 }, { "epoch": 1.74, "eval_accuracy": 0.8927831053733826, "eval_loss": 0.28480109572410583, "eval_runtime": 132.2639, "eval_samples_per_second": 218.744, "eval_steps_per_second": 3.425, "step": 60000 }, { "epoch": 1.88, "learning_rate": 2.9997685252162845e-06, "loss": 0.2672, "step": 65000 }, { "epoch": 1.88, "eval_accuracy": 0.893439769744873, "eval_loss": 0.2832432985305786, "eval_runtime": 132.917, "eval_samples_per_second": 217.67, "eval_steps_per_second": 3.408, "step": 65000 }, { "epoch": 2.0, "step": 69122, "total_flos": 2.955371035238809e+17, "train_loss": 0.29886255871073064, "train_runtime": 56655.4623, "train_samples_per_second": 78.081, "train_steps_per_second": 1.22 } ], "max_steps": 69122, "num_train_epochs": 2, "total_flos": 2.955371035238809e+17, "trial_name": null, "trial_params": null }