{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.996539792387544, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.8460490703582764, "eval_loss": 0.5365713834762573, "eval_runtime": 8.0333, "eval_samples_per_second": 91.369, "eval_steps_per_second": 45.685, "step": 180 }, { "epoch": 2.0, "eval_accuracy": 0.8092643022537231, "eval_loss": 0.5189609527587891, "eval_runtime": 8.0124, "eval_samples_per_second": 91.608, "eval_steps_per_second": 45.804, "step": 360 }, { "epoch": 2.78, "learning_rate": 4.309722222222222e-05, "loss": 0.4021, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.8283378481864929, "eval_loss": 0.6708077788352966, "eval_runtime": 8.0551, "eval_samples_per_second": 91.122, "eval_steps_per_second": 45.561, "step": 540 }, { "epoch": 4.0, "eval_accuracy": 0.8542234301567078, "eval_loss": 0.516476571559906, "eval_runtime": 8.0611, "eval_samples_per_second": 91.054, "eval_steps_per_second": 45.527, "step": 720 }, { "epoch": 5.0, "eval_accuracy": 0.8188011050224304, "eval_loss": 0.6029361486434937, "eval_runtime": 8.0681, "eval_samples_per_second": 90.975, "eval_steps_per_second": 45.488, "step": 900 }, { "epoch": 5.55, "learning_rate": 3.6166666666666674e-05, "loss": 0.2576, "step": 1000 }, { "epoch": 6.0, "eval_accuracy": 0.8487738370895386, "eval_loss": 0.6060934066772461, "eval_runtime": 8.0522, "eval_samples_per_second": 91.155, "eval_steps_per_second": 45.578, "step": 1080 }, { "epoch": 7.0, "eval_accuracy": 0.8514986634254456, "eval_loss": 0.748849630355835, "eval_runtime": 8.0692, "eval_samples_per_second": 90.963, "eval_steps_per_second": 45.481, "step": 1260 }, { "epoch": 8.0, "eval_accuracy": 0.8651226162910461, "eval_loss": 0.6119422912597656, "eval_runtime": 8.0635, "eval_samples_per_second": 91.028, "eval_steps_per_second": 45.514, "step": 1440 }, { "epoch": 8.33, "learning_rate": 2.9236111111111115e-05, "loss": 0.1738, "step": 1500 }, { "epoch": 9.0, "eval_accuracy": 0.8542234301567078, "eval_loss": 0.6864181160926819, "eval_runtime": 8.212, "eval_samples_per_second": 89.382, "eval_steps_per_second": 44.691, "step": 1620 }, { "epoch": 10.0, "eval_accuracy": 0.8446866273880005, "eval_loss": 0.7817405462265015, "eval_runtime": 8.0215, "eval_samples_per_second": 91.505, "eval_steps_per_second": 45.752, "step": 1800 }, { "epoch": 11.0, "eval_accuracy": 0.8514986634254456, "eval_loss": 0.6188392043113708, "eval_runtime": 8.0857, "eval_samples_per_second": 90.777, "eval_steps_per_second": 45.389, "step": 1980 }, { "epoch": 11.11, "learning_rate": 2.2305555555555556e-05, "loss": 0.1303, "step": 2000 }, { "epoch": 12.0, "eval_accuracy": 0.8569482564926147, "eval_loss": 0.5936163663864136, "eval_runtime": 8.0618, "eval_samples_per_second": 91.047, "eval_steps_per_second": 45.523, "step": 2160 }, { "epoch": 13.0, "eval_accuracy": 0.859673023223877, "eval_loss": 0.6109394431114197, "eval_runtime": 8.0512, "eval_samples_per_second": 91.167, "eval_steps_per_second": 45.583, "step": 2340 }, { "epoch": 13.89, "learning_rate": 1.5375e-05, "loss": 0.1226, "step": 2500 }, { "epoch": 14.0, "eval_accuracy": 0.8501362204551697, "eval_loss": 0.7600889205932617, "eval_runtime": 8.0176, "eval_samples_per_second": 91.549, "eval_steps_per_second": 45.774, "step": 2520 }, { "epoch": 15.0, "eval_accuracy": 0.8501362204551697, "eval_loss": 0.6596993803977966, "eval_runtime": 8.0565, "eval_samples_per_second": 91.107, "eval_steps_per_second": 45.553, "step": 2700 }, { "epoch": 16.0, "eval_accuracy": 0.8460490703582764, "eval_loss": 0.712175190448761, "eval_runtime": 8.0456, "eval_samples_per_second": 91.23, "eval_steps_per_second": 45.615, "step": 2880 }, { "epoch": 16.66, "learning_rate": 8.430555555555556e-06, "loss": 0.1261, "step": 3000 }, { "epoch": 17.0, "eval_accuracy": 0.8514986634254456, "eval_loss": 0.7294248938560486, "eval_runtime": 8.0369, "eval_samples_per_second": 91.329, "eval_steps_per_second": 45.664, "step": 3060 }, { "epoch": 18.0, "eval_accuracy": 0.863760232925415, "eval_loss": 0.6875426173210144, "eval_runtime": 8.0764, "eval_samples_per_second": 90.882, "eval_steps_per_second": 45.441, "step": 3240 }, { "epoch": 19.0, "eval_accuracy": 0.8664849996566772, "eval_loss": 0.6823601126670837, "eval_runtime": 8.0997, "eval_samples_per_second": 90.62, "eval_steps_per_second": 45.31, "step": 3420 }, { "epoch": 19.44, "learning_rate": 1.4861111111111113e-06, "loss": 0.1044, "step": 3500 }, { "epoch": 20.0, "eval_accuracy": 0.8623978495597839, "eval_loss": 0.6752045750617981, "eval_runtime": 8.0488, "eval_samples_per_second": 91.193, "eval_steps_per_second": 45.597, "step": 3600 }, { "epoch": 20.0, "step": 3600, "total_flos": 1.3459697175036672e+16, "train_loss": 0.18581116994222005, "train_runtime": 2514.2374, "train_samples_per_second": 22.981, "train_steps_per_second": 1.432 } ], "max_steps": 3600, "num_train_epochs": 20, "total_flos": 1.3459697175036672e+16, "trial_name": null, "trial_params": null }