|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.6051695346832275, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.556, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7167919799498746, |
|
"eval_f1": 0.6641239002659476, |
|
"eval_loss": 0.5324748158454895, |
|
"eval_precision": 0.6617008797653958, |
|
"eval_recall": 0.6671212947808693, |
|
"eval_runtime": 5.0818, |
|
"eval_samples_per_second": 78.516, |
|
"eval_steps_per_second": 9.839, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.362328290939331, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5103, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7719298245614035, |
|
"eval_f1": 0.6523624874335775, |
|
"eval_loss": 0.4822019934654236, |
|
"eval_precision": 0.7714565527065527, |
|
"eval_recall": 0.6386161120203673, |
|
"eval_runtime": 5.0545, |
|
"eval_samples_per_second": 78.94, |
|
"eval_steps_per_second": 9.892, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.656192779541016, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4637, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8045112781954887, |
|
"eval_f1": 0.7479591836734694, |
|
"eval_loss": 0.42453086376190186, |
|
"eval_precision": 0.771505376344086, |
|
"eval_recall": 0.7341789416257501, |
|
"eval_runtime": 5.075, |
|
"eval_samples_per_second": 78.62, |
|
"eval_steps_per_second": 9.852, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.4060696363449097, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4173, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.7873004752040941, |
|
"eval_loss": 0.38980478048324585, |
|
"eval_precision": 0.7887936313533375, |
|
"eval_recall": 0.7858701582105838, |
|
"eval_runtime": 5.064, |
|
"eval_samples_per_second": 78.791, |
|
"eval_steps_per_second": 9.874, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.650151491165161, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3674, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.7999105055663995, |
|
"eval_loss": 0.3571353554725647, |
|
"eval_precision": 0.8058980811575966, |
|
"eval_recall": 0.794735406437534, |
|
"eval_runtime": 5.0507, |
|
"eval_samples_per_second": 78.999, |
|
"eval_steps_per_second": 9.9, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.5915608406066895, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3484, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8029928975654221, |
|
"eval_loss": 0.3431943356990814, |
|
"eval_precision": 0.8037650785914463, |
|
"eval_recall": 0.8022367703218767, |
|
"eval_runtime": 5.135, |
|
"eval_samples_per_second": 77.703, |
|
"eval_steps_per_second": 9.737, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.9446077346801758, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3247, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.816408656658218, |
|
"eval_loss": 0.3298611342906952, |
|
"eval_precision": 0.8270654903728508, |
|
"eval_recall": 0.8078741589379888, |
|
"eval_runtime": 5.0753, |
|
"eval_samples_per_second": 78.617, |
|
"eval_steps_per_second": 9.852, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 9.143532752990723, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3102, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8228198648441347, |
|
"eval_loss": 0.3259894549846649, |
|
"eval_precision": 0.8509591907917684, |
|
"eval_recall": 0.8049645390070922, |
|
"eval_runtime": 5.087, |
|
"eval_samples_per_second": 78.435, |
|
"eval_steps_per_second": 9.829, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.818455696105957, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2991, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.821647309770462, |
|
"eval_loss": 0.31378358602523804, |
|
"eval_precision": 0.8349087353324641, |
|
"eval_recall": 0.8114202582287688, |
|
"eval_runtime": 5.0565, |
|
"eval_samples_per_second": 78.909, |
|
"eval_steps_per_second": 9.888, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.8728206157684326, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.29, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8180088078011953, |
|
"eval_loss": 0.31225934624671936, |
|
"eval_precision": 0.8323930726843348, |
|
"eval_recall": 0.8071467539552646, |
|
"eval_runtime": 5.0562, |
|
"eval_samples_per_second": 78.913, |
|
"eval_steps_per_second": 9.889, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.6045541763305664, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2778, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8377065410088949, |
|
"eval_loss": 0.30650559067726135, |
|
"eval_precision": 0.8423344947735192, |
|
"eval_recall": 0.8335151845790143, |
|
"eval_runtime": 5.0839, |
|
"eval_samples_per_second": 78.483, |
|
"eval_steps_per_second": 9.835, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.5327140092849731, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2702, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.821647309770462, |
|
"eval_loss": 0.3005804717540741, |
|
"eval_precision": 0.8349087353324641, |
|
"eval_recall": 0.8114202582287688, |
|
"eval_runtime": 5.0646, |
|
"eval_samples_per_second": 78.782, |
|
"eval_steps_per_second": 9.872, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 5.446022987365723, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2664, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8298403801632752, |
|
"eval_loss": 0.29961732029914856, |
|
"eval_precision": 0.8315523576240049, |
|
"eval_recall": 0.8281960356428442, |
|
"eval_runtime": 5.0899, |
|
"eval_samples_per_second": 78.39, |
|
"eval_steps_per_second": 9.823, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.8527348041534424, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.264, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8476882658063937, |
|
"eval_loss": 0.2987360656261444, |
|
"eval_precision": 0.8437296561519796, |
|
"eval_recall": 0.8520640116384797, |
|
"eval_runtime": 5.0668, |
|
"eval_samples_per_second": 78.748, |
|
"eval_steps_per_second": 9.868, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 2.7352912425994873, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.254, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8522278069611882, |
|
"eval_loss": 0.2951277792453766, |
|
"eval_precision": 0.8513631702756499, |
|
"eval_recall": 0.8531096563011457, |
|
"eval_runtime": 5.222, |
|
"eval_samples_per_second": 76.408, |
|
"eval_steps_per_second": 9.575, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.442108154296875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2571, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8350789627607721, |
|
"eval_loss": 0.2944652736186981, |
|
"eval_precision": 0.8463358876939919, |
|
"eval_recall": 0.8260138206946717, |
|
"eval_runtime": 5.0611, |
|
"eval_samples_per_second": 78.837, |
|
"eval_steps_per_second": 9.879, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.852628231048584, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2511, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8454251965513313, |
|
"eval_loss": 0.2917979061603546, |
|
"eval_precision": 0.8463049835506276, |
|
"eval_recall": 0.8445626477541371, |
|
"eval_runtime": 5.0682, |
|
"eval_samples_per_second": 78.727, |
|
"eval_steps_per_second": 9.866, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 5.464624881744385, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2574, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8472902633190447, |
|
"eval_loss": 0.29094478487968445, |
|
"eval_precision": 0.8510272912927781, |
|
"eval_recall": 0.8438352427714131, |
|
"eval_runtime": 5.1003, |
|
"eval_samples_per_second": 78.231, |
|
"eval_steps_per_second": 9.803, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.1257989406585693, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2508, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.850729517396184, |
|
"eval_loss": 0.29074448347091675, |
|
"eval_precision": 0.8535087719298247, |
|
"eval_recall": 0.8481087470449173, |
|
"eval_runtime": 5.1136, |
|
"eval_samples_per_second": 78.027, |
|
"eval_steps_per_second": 9.778, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.8716917037963867, |
|
"learning_rate": 0.0, |
|
"loss": 0.2536, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.850729517396184, |
|
"eval_loss": 0.29076284170150757, |
|
"eval_precision": 0.8535087719298247, |
|
"eval_recall": 0.8481087470449173, |
|
"eval_runtime": 5.0627, |
|
"eval_samples_per_second": 78.811, |
|
"eval_steps_per_second": 9.876, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7624554283800000.0, |
|
"train_loss": 0.32445813476062213, |
|
"train_runtime": 1939.7236, |
|
"train_samples_per_second": 37.583, |
|
"train_steps_per_second": 1.258 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7624554283800000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|