{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.5641188621521, "learning_rate": 4.75e-05, "loss": 0.5535, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6401043033324587, "eval_loss": 0.5041041374206543, "eval_precision": 0.6583725987676694, "eval_recall": 0.6324331696672122, "eval_runtime": 1.8214, "eval_samples_per_second": 219.06, "eval_steps_per_second": 27.451, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.286947011947632, "learning_rate": 4.5e-05, "loss": 0.4636, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7669172932330827, "eval_f1": 0.7331142070096449, "eval_loss": 0.4692240059375763, "eval_precision": 0.7252895752895754, "eval_recall": 0.7475904709947263, "eval_runtime": 1.8328, "eval_samples_per_second": 217.703, "eval_steps_per_second": 27.281, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.800495147705078, "learning_rate": 4.25e-05, "loss": 0.4023, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.7809488416091623, "eval_loss": 0.36048197746276855, "eval_precision": 0.832562695924765, "eval_recall": 0.7572285870158211, "eval_runtime": 1.8261, "eval_samples_per_second": 218.495, "eval_steps_per_second": 27.38, "step": 366 }, { "epoch": 4.0, "grad_norm": 4.0396647453308105, "learning_rate": 4e-05, "loss": 0.3202, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8159125620465827, "eval_loss": 0.3256481885910034, "eval_precision": 0.8356565656565657, "eval_recall": 0.8021458446990362, "eval_runtime": 1.8317, "eval_samples_per_second": 217.825, "eval_steps_per_second": 27.296, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.8746237754821777, "learning_rate": 3.7500000000000003e-05, "loss": 0.2919, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8475258334958082, "eval_loss": 0.3067488968372345, "eval_precision": 0.8591828192414193, "eval_recall": 0.8381069285324605, "eval_runtime": 1.8315, "eval_samples_per_second": 217.857, "eval_steps_per_second": 27.3, "step": 610 }, { "epoch": 6.0, "grad_norm": 3.942033290863037, "learning_rate": 3.5e-05, "loss": 0.2657, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8320383569853806, "eval_loss": 0.3400041460990906, "eval_precision": 0.8193218954248366, "eval_recall": 0.8553827968721586, "eval_runtime": 1.8306, "eval_samples_per_second": 217.958, "eval_steps_per_second": 27.313, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.39800548553466797, "learning_rate": 3.2500000000000004e-05, "loss": 0.2559, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8524146298159436, "eval_loss": 0.2993007302284241, "eval_precision": 0.8451250578971746, "eval_recall": 0.8613384251682124, "eval_runtime": 1.8316, "eval_samples_per_second": 217.847, "eval_steps_per_second": 27.299, "step": 854 }, { "epoch": 8.0, "grad_norm": 7.434815406799316, "learning_rate": 3e-05, "loss": 0.2369, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8584001703456596, "eval_loss": 0.30177775025367737, "eval_precision": 0.8759655377302435, "eval_recall": 0.8451991271140207, "eval_runtime": 1.8332, "eval_samples_per_second": 217.658, "eval_steps_per_second": 27.275, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.123136043548584, "learning_rate": 2.7500000000000004e-05, "loss": 0.2178, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8595070422535211, "eval_loss": 0.29259544610977173, "eval_precision": 0.8633733523114054, "eval_recall": 0.8559283506092017, "eval_runtime": 1.8408, "eval_samples_per_second": 216.759, "eval_steps_per_second": 27.163, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.8655648231506348, "learning_rate": 2.5e-05, "loss": 0.2118, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8622036668943447, "eval_loss": 0.29553094506263733, "eval_precision": 0.8671602787456446, "eval_recall": 0.8577014002545917, "eval_runtime": 1.8333, "eval_samples_per_second": 217.636, "eval_steps_per_second": 27.273, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.4139134883880615, "learning_rate": 2.25e-05, "loss": 0.2034, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8572517421602788, "eval_loss": 0.2934134602546692, "eval_precision": 0.8679426449878376, "eval_recall": 0.8484269867248591, "eval_runtime": 1.8401, "eval_samples_per_second": 216.837, "eval_steps_per_second": 27.173, "step": 1342 }, { "epoch": 12.0, "grad_norm": 5.207653045654297, "learning_rate": 2e-05, "loss": 0.1856, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.297758549451828, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 1.8319, "eval_samples_per_second": 217.802, "eval_steps_per_second": 27.293, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.1223020553588867, "learning_rate": 1.75e-05, "loss": 0.1775, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8493865995596099, "eval_loss": 0.3038978576660156, "eval_precision": 0.8651108632904749, "eval_recall": 0.8373795235497363, "eval_runtime": 1.8374, "eval_samples_per_second": 217.149, "eval_steps_per_second": 27.212, "step": 1586 }, { "epoch": 14.0, "grad_norm": 7.688318252563477, "learning_rate": 1.5e-05, "loss": 0.1719, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8622036668943447, "eval_loss": 0.3036334812641144, "eval_precision": 0.8671602787456446, "eval_recall": 0.8577014002545917, "eval_runtime": 1.8354, "eval_samples_per_second": 217.392, "eval_steps_per_second": 27.242, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.8339260816574097, "learning_rate": 1.25e-05, "loss": 0.1621, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8596342841745197, "eval_loss": 0.299029141664505, "eval_precision": 0.8555364857667042, "eval_recall": 0.8641571194762684, "eval_runtime": 1.8346, "eval_samples_per_second": 217.487, "eval_steps_per_second": 27.254, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.4192665815353394, "learning_rate": 1e-05, "loss": 0.1535, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8616171059774413, "eval_loss": 0.3039585053920746, "eval_precision": 0.859873949579832, "eval_recall": 0.8634297144935443, "eval_runtime": 1.8339, "eval_samples_per_second": 217.57, "eval_steps_per_second": 27.264, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.2236837148666382, "learning_rate": 7.5e-06, "loss": 0.1504, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8510452961672474, "eval_loss": 0.31895172595977783, "eval_precision": 0.8615574190244527, "eval_recall": 0.8423804328059648, "eval_runtime": 1.8322, "eval_samples_per_second": 217.776, "eval_steps_per_second": 27.29, "step": 2074 }, { "epoch": 18.0, "grad_norm": 1.8861066102981567, "learning_rate": 5e-06, "loss": 0.1459, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8522278069611882, "eval_loss": 0.31010520458221436, "eval_precision": 0.8513631702756499, "eval_recall": 0.8531096563011457, "eval_runtime": 1.8303, "eval_samples_per_second": 217.993, "eval_steps_per_second": 27.317, "step": 2196 }, { "epoch": 19.0, "grad_norm": 2.291304349899292, "learning_rate": 2.5e-06, "loss": 0.1444, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.855319904024935, "eval_loss": 0.3119203448295593, "eval_precision": 0.862378106322743, "eval_recall": 0.8491543917075832, "eval_runtime": 1.8361, "eval_samples_per_second": 217.305, "eval_steps_per_second": 27.231, "step": 2318 }, { "epoch": 20.0, "grad_norm": 1.763808250427246, "learning_rate": 0.0, "loss": 0.1384, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.3090469241142273, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 1.8381, "eval_samples_per_second": 217.076, "eval_steps_per_second": 27.203, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.242632052937492, "train_runtime": 627.3825, "train_samples_per_second": 115.974, "train_steps_per_second": 3.889 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }