|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 20.136756896972656, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3889, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8045112781954887, |
|
"eval_f1": 0.7109554944646705, |
|
"eval_loss": 0.4199941158294678, |
|
"eval_precision": 0.8255285412262157, |
|
"eval_recall": 0.6866703036915802, |
|
"eval_runtime": 1.6394, |
|
"eval_samples_per_second": 243.375, |
|
"eval_steps_per_second": 30.498, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 24.683944702148438, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.2335, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8739355018846853, |
|
"eval_loss": 0.3136064410209656, |
|
"eval_precision": 0.864426651415499, |
|
"eval_recall": 0.886252045826514, |
|
"eval_runtime": 1.6497, |
|
"eval_samples_per_second": 241.866, |
|
"eval_steps_per_second": 30.309, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 66.46725463867188, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.1411, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8751002084335417, |
|
"eval_loss": 0.35689812898635864, |
|
"eval_precision": 0.8780701754385964, |
|
"eval_recall": 0.8722949627204946, |
|
"eval_runtime": 1.6606, |
|
"eval_samples_per_second": 240.275, |
|
"eval_steps_per_second": 30.11, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 42.06414031982422, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1078, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9147869674185464, |
|
"eval_f1": 0.8991765265473572, |
|
"eval_loss": 0.35370269417762756, |
|
"eval_precision": 0.8922773722627737, |
|
"eval_recall": 0.9072104018912529, |
|
"eval_runtime": 1.653, |
|
"eval_samples_per_second": 241.378, |
|
"eval_steps_per_second": 30.248, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 88.54315185546875, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0822, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8439374185136896, |
|
"eval_loss": 0.5069139003753662, |
|
"eval_precision": 0.8794955044955045, |
|
"eval_recall": 0.822376795781051, |
|
"eval_runtime": 1.6524, |
|
"eval_samples_per_second": 241.466, |
|
"eval_steps_per_second": 30.259, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 114.8245849609375, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0529, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9072681704260651, |
|
"eval_f1": 0.888964101175568, |
|
"eval_loss": 0.42624175548553467, |
|
"eval_precision": 0.8862007168458781, |
|
"eval_recall": 0.8918894344426259, |
|
"eval_runtime": 1.6561, |
|
"eval_samples_per_second": 240.934, |
|
"eval_steps_per_second": 30.192, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.022069375962018967, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0365, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8769602202215754, |
|
"eval_loss": 0.5586097836494446, |
|
"eval_precision": 0.8742831541218639, |
|
"eval_recall": 0.8797963266048372, |
|
"eval_runtime": 1.6532, |
|
"eval_samples_per_second": 241.352, |
|
"eval_steps_per_second": 30.245, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.0406961552798748, |
|
"learning_rate": 3e-05, |
|
"loss": 0.033, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8674628282189181, |
|
"eval_loss": 0.5012311935424805, |
|
"eval_precision": 0.8869858462356303, |
|
"eval_recall": 0.8530187306783051, |
|
"eval_runtime": 1.6551, |
|
"eval_samples_per_second": 241.075, |
|
"eval_steps_per_second": 30.21, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.6461573243141174, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0248, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8631217838765008, |
|
"eval_loss": 0.583283007144928, |
|
"eval_precision": 0.8872804935927859, |
|
"eval_recall": 0.8462447717766868, |
|
"eval_runtime": 1.6572, |
|
"eval_samples_per_second": 240.772, |
|
"eval_steps_per_second": 30.172, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.12847253680229187, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0123, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8805765113084321, |
|
"eval_loss": 0.6610547304153442, |
|
"eval_precision": 0.8857796167247387, |
|
"eval_recall": 0.8758410620112748, |
|
"eval_runtime": 1.6505, |
|
"eval_samples_per_second": 241.744, |
|
"eval_steps_per_second": 30.294, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.003805552376434207, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0088, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8682132746146587, |
|
"eval_loss": 0.6935672760009766, |
|
"eval_precision": 0.884741537654159, |
|
"eval_recall": 0.8555191853064193, |
|
"eval_runtime": 1.6547, |
|
"eval_samples_per_second": 241.138, |
|
"eval_steps_per_second": 30.218, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.0037182692904025316, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0074, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8805765113084321, |
|
"eval_loss": 0.6789939403533936, |
|
"eval_precision": 0.8857796167247387, |
|
"eval_recall": 0.8758410620112748, |
|
"eval_runtime": 1.6567, |
|
"eval_samples_per_second": 240.838, |
|
"eval_steps_per_second": 30.18, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.0025616472121328115, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0141, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8731122745782431, |
|
"eval_loss": 0.6981470584869385, |
|
"eval_precision": 0.8829705994654449, |
|
"eval_recall": 0.864793598836152, |
|
"eval_runtime": 1.6639, |
|
"eval_samples_per_second": 239.794, |
|
"eval_steps_per_second": 30.049, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.006673410069197416, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0034, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8751002084335417, |
|
"eval_loss": 0.7144644856452942, |
|
"eval_precision": 0.8780701754385964, |
|
"eval_recall": 0.8722949627204946, |
|
"eval_runtime": 1.6531, |
|
"eval_samples_per_second": 241.366, |
|
"eval_steps_per_second": 30.246, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.0030696168541908264, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0059, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8758710801393728, |
|
"eval_loss": 0.7303631901741028, |
|
"eval_precision": 0.8870983228779925, |
|
"eval_recall": 0.8665666484815421, |
|
"eval_runtime": 1.6541, |
|
"eval_samples_per_second": 241.215, |
|
"eval_steps_per_second": 30.227, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.0017388605047017336, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0056, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.879667048676036, |
|
"eval_loss": 0.7517656683921814, |
|
"eval_precision": 0.8778361344537815, |
|
"eval_recall": 0.8815693762502272, |
|
"eval_runtime": 1.6536, |
|
"eval_samples_per_second": 241.288, |
|
"eval_steps_per_second": 30.237, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.002333118114620447, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0039, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8793019197207679, |
|
"eval_loss": 0.7390431761741638, |
|
"eval_precision": 0.8893184421534936, |
|
"eval_recall": 0.8708401527550463, |
|
"eval_runtime": 1.655, |
|
"eval_samples_per_second": 241.08, |
|
"eval_steps_per_second": 30.211, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0018157872837036848, |
|
"learning_rate": 5e-06, |
|
"loss": 0.004, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8799463033398397, |
|
"eval_loss": 0.764133095741272, |
|
"eval_precision": 0.8874803397294746, |
|
"eval_recall": 0.8733406073831607, |
|
"eval_runtime": 1.6667, |
|
"eval_samples_per_second": 239.389, |
|
"eval_steps_per_second": 29.999, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0015570241957902908, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.007, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8799463033398397, |
|
"eval_loss": 0.7847548723220825, |
|
"eval_precision": 0.8874803397294746, |
|
"eval_recall": 0.8733406073831607, |
|
"eval_runtime": 1.664, |
|
"eval_samples_per_second": 239.788, |
|
"eval_steps_per_second": 30.049, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.002853752113878727, |
|
"learning_rate": 0.0, |
|
"loss": 0.0042, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8799463033398397, |
|
"eval_loss": 0.790817379951477, |
|
"eval_precision": 0.8874803397294746, |
|
"eval_recall": 0.8733406073831607, |
|
"eval_runtime": 1.6678, |
|
"eval_samples_per_second": 239.236, |
|
"eval_steps_per_second": 29.979, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7584162436176000.0, |
|
"train_loss": 0.0588726386183598, |
|
"train_runtime": 864.0501, |
|
"train_samples_per_second": 84.208, |
|
"train_steps_per_second": 2.824 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7584162436176000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|