{ "best_metric": 0.28330111503601074, "best_model_checkpoint": "/content/best_model/checkpoint-65000", "epoch": 2.0, "global_step": 69122, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 4.638320650444142e-05, "loss": 0.3604, "step": 5000 }, { "epoch": 0.14, "eval_accuracy": 0.8821028470993042, "eval_loss": 0.3162487745285034, "eval_runtime": 142.0782, "eval_samples_per_second": 203.634, "eval_steps_per_second": 3.188, "step": 5000 }, { "epoch": 0.29, "learning_rate": 4.276641300888285e-05, "loss": 0.3326, "step": 10000 }, { "epoch": 0.29, "eval_accuracy": 0.8842803835868835, "eval_loss": 0.311200350522995, "eval_runtime": 142.1706, "eval_samples_per_second": 203.502, "eval_steps_per_second": 3.186, "step": 10000 }, { "epoch": 0.43, "learning_rate": 3.914961951332427e-05, "loss": 0.3293, "step": 15000 }, { "epoch": 0.43, "eval_accuracy": 0.8869763612747192, "eval_loss": 0.3043750524520874, "eval_runtime": 142.2145, "eval_samples_per_second": 203.439, "eval_steps_per_second": 3.185, "step": 15000 }, { "epoch": 0.58, "learning_rate": 3.5532826017765694e-05, "loss": 0.3246, "step": 20000 }, { "epoch": 0.58, "eval_accuracy": 0.8871491551399231, "eval_loss": 0.30401167273521423, "eval_runtime": 142.2635, "eval_samples_per_second": 203.369, "eval_steps_per_second": 3.184, "step": 20000 }, { "epoch": 0.72, "learning_rate": 3.191603252220711e-05, "loss": 0.32, "step": 25000 }, { "epoch": 0.72, "eval_accuracy": 0.8887736797332764, "eval_loss": 0.29694026708602905, "eval_runtime": 142.2955, "eval_samples_per_second": 203.323, "eval_steps_per_second": 3.184, "step": 25000 }, { "epoch": 0.87, "learning_rate": 2.829923902664854e-05, "loss": 0.3143, "step": 30000 }, { "epoch": 0.87, "eval_accuracy": 0.8903290629386902, "eval_loss": 0.2928813695907593, "eval_runtime": 142.3659, "eval_samples_per_second": 203.223, "eval_steps_per_second": 3.182, "step": 30000 }, { "epoch": 1.01, "learning_rate": 2.468244553108996e-05, "loss": 0.3095, "step": 35000 }, { "epoch": 1.01, "eval_accuracy": 0.8899142742156982, "eval_loss": 0.29173970222473145, "eval_runtime": 142.5262, "eval_samples_per_second": 202.994, "eval_steps_per_second": 3.178, "step": 35000 }, { "epoch": 1.16, "learning_rate": 2.106565203553138e-05, "loss": 0.2844, "step": 40000 }, { "epoch": 1.16, "eval_accuracy": 0.8886008858680725, "eval_loss": 0.29569417238235474, "eval_runtime": 142.5524, "eval_samples_per_second": 202.957, "eval_steps_per_second": 3.178, "step": 40000 }, { "epoch": 1.3, "learning_rate": 1.7448858539972804e-05, "loss": 0.2778, "step": 45000 }, { "epoch": 1.3, "eval_accuracy": 0.890640139579773, "eval_loss": 0.2942551076412201, "eval_runtime": 142.6301, "eval_samples_per_second": 202.846, "eval_steps_per_second": 3.176, "step": 45000 }, { "epoch": 1.45, "learning_rate": 1.3832065044414225e-05, "loss": 0.2779, "step": 50000 }, { "epoch": 1.45, "eval_accuracy": 0.8934743404388428, "eval_loss": 0.28896576166152954, "eval_runtime": 142.7675, "eval_samples_per_second": 202.651, "eval_steps_per_second": 3.173, "step": 50000 }, { "epoch": 1.59, "learning_rate": 1.0215271548855646e-05, "loss": 0.2752, "step": 55000 }, { "epoch": 1.59, "eval_accuracy": 0.891884446144104, "eval_loss": 0.28808724880218506, "eval_runtime": 142.5778, "eval_samples_per_second": 202.921, "eval_steps_per_second": 3.177, "step": 55000 }, { "epoch": 1.74, "learning_rate": 6.598478053297069e-06, "loss": 0.2736, "step": 60000 }, { "epoch": 1.74, "eval_accuracy": 0.8943729996681213, "eval_loss": 0.28354716300964355, "eval_runtime": 142.8817, "eval_samples_per_second": 202.489, "eval_steps_per_second": 3.17, "step": 60000 }, { "epoch": 1.88, "learning_rate": 2.9816845577384916e-06, "loss": 0.2725, "step": 65000 }, { "epoch": 1.88, "eval_accuracy": 0.8941656351089478, "eval_loss": 0.28330111503601074, "eval_runtime": 142.8534, "eval_samples_per_second": 202.529, "eval_steps_per_second": 3.171, "step": 65000 }, { "epoch": 2.0, "step": 69122, "total_flos": 3.410043502198626e+17, "train_loss": 0.30218412260715205, "train_runtime": 63905.8286, "train_samples_per_second": 69.223, "train_steps_per_second": 1.082 } ], "max_steps": 69122, "num_train_epochs": 2, "total_flos": 3.410043502198626e+17, "trial_name": null, "trial_params": null }