{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 21.05661964416504, "learning_rate": 4.75e-05, "loss": 0.411, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8446368446368446, "eval_loss": 0.275076687335968, "eval_precision": 0.8473684210526315, "eval_recall": 0.8420621931260229, "eval_runtime": 4.712, "eval_samples_per_second": 84.677, "eval_steps_per_second": 10.611, "step": 122 }, { "epoch": 2.0, "grad_norm": 21.74665069580078, "learning_rate": 4.5e-05, "loss": 0.2264, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8718936621074937, "eval_loss": 0.3036659359931946, "eval_precision": 0.8573529411764707, "eval_recall": 0.897708674304419, "eval_runtime": 4.9592, "eval_samples_per_second": 80.457, "eval_steps_per_second": 10.082, "step": 244 }, { "epoch": 3.0, "grad_norm": 2.0689539909362793, "learning_rate": 4.25e-05, "loss": 0.1467, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8582079268956014, "eval_loss": 0.3442274332046509, "eval_precision": 0.8464828897338403, "eval_recall": 0.8756137479541735, "eval_runtime": 4.9511, "eval_samples_per_second": 80.588, "eval_steps_per_second": 10.099, "step": 366 }, { "epoch": 4.0, "grad_norm": 1.8201183080673218, "learning_rate": 4e-05, "loss": 0.0961, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8778322106552358, "eval_loss": 0.3736657500267029, "eval_precision": 0.8818924438393465, "eval_recall": 0.8740680123658847, "eval_runtime": 4.9528, "eval_samples_per_second": 80.56, "eval_steps_per_second": 10.095, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.0896231159567833, "learning_rate": 3.7500000000000003e-05, "loss": 0.0726, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8771929824561404, "eval_loss": 0.43064403533935547, "eval_precision": 0.8834928678678678, "eval_recall": 0.8715675577377705, "eval_runtime": 4.9624, "eval_samples_per_second": 80.405, "eval_steps_per_second": 10.076, "step": 610 }, { "epoch": 6.0, "grad_norm": 53.643978118896484, "learning_rate": 3.5e-05, "loss": 0.0514, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8677208256457565, "eval_loss": 0.6448621153831482, "eval_precision": 0.8546209186496956, "eval_recall": 0.8884342607746863, "eval_runtime": 4.9531, "eval_samples_per_second": 80.556, "eval_steps_per_second": 10.095, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.03521590679883957, "learning_rate": 3.2500000000000004e-05, "loss": 0.0532, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8763538792940554, "eval_loss": 0.5595228672027588, "eval_precision": 0.8754297605404427, "eval_recall": 0.877295871976723, "eval_runtime": 4.9653, "eval_samples_per_second": 80.357, "eval_steps_per_second": 10.07, "step": 854 }, { "epoch": 8.0, "grad_norm": 0.004897149745374918, "learning_rate": 3e-05, "loss": 0.0274, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8614765038536611, "eval_loss": 0.6727907657623291, "eval_precision": 0.8686536646744258, "eval_recall": 0.8552009456264775, "eval_runtime": 4.9483, "eval_samples_per_second": 80.635, "eval_steps_per_second": 10.105, "step": 976 }, { "epoch": 9.0, "grad_norm": 0.02297130785882473, "learning_rate": 2.7500000000000004e-05, "loss": 0.0186, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8848664457009163, "eval_loss": 0.6217536926269531, "eval_precision": 0.8977236138837015, "eval_recall": 0.8743862520458265, "eval_runtime": 4.9609, "eval_samples_per_second": 80.43, "eval_steps_per_second": 10.079, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.018355082720518112, "learning_rate": 2.5e-05, "loss": 0.0121, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8669226294357184, "eval_loss": 0.6576113104820251, "eval_precision": 0.8766227567773959, "eval_recall": 0.8587470449172576, "eval_runtime": 4.9593, "eval_samples_per_second": 80.455, "eval_steps_per_second": 10.082, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.0026700079906731844, "learning_rate": 2.25e-05, "loss": 0.0244, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8694882125334078, "eval_loss": 0.7506579160690308, "eval_precision": 0.8940436639772188, "eval_recall": 0.8522913256955811, "eval_runtime": 4.9497, "eval_samples_per_second": 80.611, "eval_steps_per_second": 10.102, "step": 1342 }, { "epoch": 12.0, "grad_norm": 0.002809323836117983, "learning_rate": 2e-05, "loss": 0.0062, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8724195749658803, "eval_loss": 0.6859227418899536, "eval_precision": 0.8848766823362741, "eval_recall": 0.8622931442080378, "eval_runtime": 4.9227, "eval_samples_per_second": 81.053, "eval_steps_per_second": 10.157, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.004050145391374826, "learning_rate": 1.75e-05, "loss": 0.0099, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8872855539522206, "eval_loss": 0.6514401435852051, "eval_precision": 0.8903508771929824, "eval_recall": 0.8843880705582834, "eval_runtime": 4.9442, "eval_samples_per_second": 80.7, "eval_steps_per_second": 10.113, "step": 1586 }, { "epoch": 14.0, "grad_norm": 0.0045247310772538185, "learning_rate": 1.5e-05, "loss": 0.0087, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8765393898137261, "eval_loss": 0.7604307532310486, "eval_precision": 0.8852261942423283, "eval_recall": 0.8690671031096563, "eval_runtime": 4.9491, "eval_samples_per_second": 80.62, "eval_steps_per_second": 10.103, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.002084016567096114, "learning_rate": 1.25e-05, "loss": 0.0056, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8799463033398397, "eval_loss": 0.7281515598297119, "eval_precision": 0.8874803397294746, "eval_recall": 0.8733406073831607, "eval_runtime": 4.9475, "eval_samples_per_second": 80.646, "eval_steps_per_second": 10.106, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.0016144708497449756, "learning_rate": 1e-05, "loss": 0.0063, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.9122807017543859, "eval_f1": 0.89337822671156, "eval_loss": 0.6987277269363403, "eval_precision": 0.8964912280701754, "eval_recall": 0.8904346244771777, "eval_runtime": 4.9546, "eval_samples_per_second": 80.531, "eval_steps_per_second": 10.092, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.0015528218355029821, "learning_rate": 7.5e-06, "loss": 0.0071, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8833333333333333, "eval_loss": 0.7402216792106628, "eval_precision": 0.8897334834834836, "eval_recall": 0.8776141116566649, "eval_runtime": 4.977, "eval_samples_per_second": 80.17, "eval_steps_per_second": 10.046, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.0025696582160890102, "learning_rate": 5e-06, "loss": 0.0023, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8690075356742023, "eval_loss": 0.7846499085426331, "eval_precision": 0.8719298245614036, "eval_recall": 0.8662484088016003, "eval_runtime": 4.9435, "eval_samples_per_second": 80.712, "eval_steps_per_second": 10.114, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0012161381309852004, "learning_rate": 2.5e-06, "loss": 0.0043, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8690075356742023, "eval_loss": 0.7948206067085266, "eval_precision": 0.8719298245614036, "eval_recall": 0.8662484088016003, "eval_runtime": 4.9539, "eval_samples_per_second": 80.542, "eval_steps_per_second": 10.093, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.0017919199308380485, "learning_rate": 0.0, "loss": 0.0021, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8744522298370696, "eval_loss": 0.7891051769256592, "eval_precision": 0.8795731707317074, "eval_recall": 0.8697945080923805, "eval_runtime": 4.9391, "eval_samples_per_second": 80.783, "eval_steps_per_second": 10.123, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7584162436176000.0, "train_loss": 0.05962105130807298, "train_runtime": 2714.936, "train_samples_per_second": 26.8, "train_steps_per_second": 0.899 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7584162436176000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }