|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 21.05661964416504, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.411, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8446368446368446, |
|
"eval_loss": 0.275076687335968, |
|
"eval_precision": 0.8473684210526315, |
|
"eval_recall": 0.8420621931260229, |
|
"eval_runtime": 4.712, |
|
"eval_samples_per_second": 84.677, |
|
"eval_steps_per_second": 10.611, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 21.74665069580078, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2264, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8718936621074937, |
|
"eval_loss": 0.3036659359931946, |
|
"eval_precision": 0.8573529411764707, |
|
"eval_recall": 0.897708674304419, |
|
"eval_runtime": 4.9592, |
|
"eval_samples_per_second": 80.457, |
|
"eval_steps_per_second": 10.082, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.0689539909362793, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1467, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8582079268956014, |
|
"eval_loss": 0.3442274332046509, |
|
"eval_precision": 0.8464828897338403, |
|
"eval_recall": 0.8756137479541735, |
|
"eval_runtime": 4.9511, |
|
"eval_samples_per_second": 80.588, |
|
"eval_steps_per_second": 10.099, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.8201183080673218, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0961, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8778322106552358, |
|
"eval_loss": 0.3736657500267029, |
|
"eval_precision": 0.8818924438393465, |
|
"eval_recall": 0.8740680123658847, |
|
"eval_runtime": 4.9528, |
|
"eval_samples_per_second": 80.56, |
|
"eval_steps_per_second": 10.095, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.0896231159567833, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0726, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8771929824561404, |
|
"eval_loss": 0.43064403533935547, |
|
"eval_precision": 0.8834928678678678, |
|
"eval_recall": 0.8715675577377705, |
|
"eval_runtime": 4.9624, |
|
"eval_samples_per_second": 80.405, |
|
"eval_steps_per_second": 10.076, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 53.643978118896484, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0514, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8677208256457565, |
|
"eval_loss": 0.6448621153831482, |
|
"eval_precision": 0.8546209186496956, |
|
"eval_recall": 0.8884342607746863, |
|
"eval_runtime": 4.9531, |
|
"eval_samples_per_second": 80.556, |
|
"eval_steps_per_second": 10.095, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.03521590679883957, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0532, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8763538792940554, |
|
"eval_loss": 0.5595228672027588, |
|
"eval_precision": 0.8754297605404427, |
|
"eval_recall": 0.877295871976723, |
|
"eval_runtime": 4.9653, |
|
"eval_samples_per_second": 80.357, |
|
"eval_steps_per_second": 10.07, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.004897149745374918, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0274, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8614765038536611, |
|
"eval_loss": 0.6727907657623291, |
|
"eval_precision": 0.8686536646744258, |
|
"eval_recall": 0.8552009456264775, |
|
"eval_runtime": 4.9483, |
|
"eval_samples_per_second": 80.635, |
|
"eval_steps_per_second": 10.105, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.02297130785882473, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0186, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8848664457009163, |
|
"eval_loss": 0.6217536926269531, |
|
"eval_precision": 0.8977236138837015, |
|
"eval_recall": 0.8743862520458265, |
|
"eval_runtime": 4.9609, |
|
"eval_samples_per_second": 80.43, |
|
"eval_steps_per_second": 10.079, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.018355082720518112, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0121, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8669226294357184, |
|
"eval_loss": 0.6576113104820251, |
|
"eval_precision": 0.8766227567773959, |
|
"eval_recall": 0.8587470449172576, |
|
"eval_runtime": 4.9593, |
|
"eval_samples_per_second": 80.455, |
|
"eval_steps_per_second": 10.082, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.0026700079906731844, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0244, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8694882125334078, |
|
"eval_loss": 0.7506579160690308, |
|
"eval_precision": 0.8940436639772188, |
|
"eval_recall": 0.8522913256955811, |
|
"eval_runtime": 4.9497, |
|
"eval_samples_per_second": 80.611, |
|
"eval_steps_per_second": 10.102, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.002809323836117983, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0062, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8724195749658803, |
|
"eval_loss": 0.6859227418899536, |
|
"eval_precision": 0.8848766823362741, |
|
"eval_recall": 0.8622931442080378, |
|
"eval_runtime": 4.9227, |
|
"eval_samples_per_second": 81.053, |
|
"eval_steps_per_second": 10.157, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.004050145391374826, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0099, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.8872855539522206, |
|
"eval_loss": 0.6514401435852051, |
|
"eval_precision": 0.8903508771929824, |
|
"eval_recall": 0.8843880705582834, |
|
"eval_runtime": 4.9442, |
|
"eval_samples_per_second": 80.7, |
|
"eval_steps_per_second": 10.113, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.0045247310772538185, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0087, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8765393898137261, |
|
"eval_loss": 0.7604307532310486, |
|
"eval_precision": 0.8852261942423283, |
|
"eval_recall": 0.8690671031096563, |
|
"eval_runtime": 4.9491, |
|
"eval_samples_per_second": 80.62, |
|
"eval_steps_per_second": 10.103, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.002084016567096114, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0056, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8799463033398397, |
|
"eval_loss": 0.7281515598297119, |
|
"eval_precision": 0.8874803397294746, |
|
"eval_recall": 0.8733406073831607, |
|
"eval_runtime": 4.9475, |
|
"eval_samples_per_second": 80.646, |
|
"eval_steps_per_second": 10.106, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0016144708497449756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0063, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9122807017543859, |
|
"eval_f1": 0.89337822671156, |
|
"eval_loss": 0.6987277269363403, |
|
"eval_precision": 0.8964912280701754, |
|
"eval_recall": 0.8904346244771777, |
|
"eval_runtime": 4.9546, |
|
"eval_samples_per_second": 80.531, |
|
"eval_steps_per_second": 10.092, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.0015528218355029821, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0071, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8833333333333333, |
|
"eval_loss": 0.7402216792106628, |
|
"eval_precision": 0.8897334834834836, |
|
"eval_recall": 0.8776141116566649, |
|
"eval_runtime": 4.977, |
|
"eval_samples_per_second": 80.17, |
|
"eval_steps_per_second": 10.046, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0025696582160890102, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0023, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8690075356742023, |
|
"eval_loss": 0.7846499085426331, |
|
"eval_precision": 0.8719298245614036, |
|
"eval_recall": 0.8662484088016003, |
|
"eval_runtime": 4.9435, |
|
"eval_samples_per_second": 80.712, |
|
"eval_steps_per_second": 10.114, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0012161381309852004, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0043, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8690075356742023, |
|
"eval_loss": 0.7948206067085266, |
|
"eval_precision": 0.8719298245614036, |
|
"eval_recall": 0.8662484088016003, |
|
"eval_runtime": 4.9539, |
|
"eval_samples_per_second": 80.542, |
|
"eval_steps_per_second": 10.093, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.0017919199308380485, |
|
"learning_rate": 0.0, |
|
"loss": 0.0021, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8744522298370696, |
|
"eval_loss": 0.7891051769256592, |
|
"eval_precision": 0.8795731707317074, |
|
"eval_recall": 0.8697945080923805, |
|
"eval_runtime": 4.9391, |
|
"eval_samples_per_second": 80.783, |
|
"eval_steps_per_second": 10.123, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7584162436176000.0, |
|
"train_loss": 0.05962105130807298, |
|
"train_runtime": 2714.936, |
|
"train_samples_per_second": 26.8, |
|
"train_steps_per_second": 0.899 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7584162436176000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|