{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.6051695346832275, "learning_rate": 4.75e-05, "loss": 0.556, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7167919799498746, "eval_f1": 0.6641239002659476, "eval_loss": 0.5324748158454895, "eval_precision": 0.6617008797653958, "eval_recall": 0.6671212947808693, "eval_runtime": 5.0818, "eval_samples_per_second": 78.516, "eval_steps_per_second": 9.839, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.362328290939331, "learning_rate": 4.5e-05, "loss": 0.5103, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7719298245614035, "eval_f1": 0.6523624874335775, "eval_loss": 0.4822019934654236, "eval_precision": 0.7714565527065527, "eval_recall": 0.6386161120203673, "eval_runtime": 5.0545, "eval_samples_per_second": 78.94, "eval_steps_per_second": 9.892, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.656192779541016, "learning_rate": 4.25e-05, "loss": 0.4637, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8045112781954887, "eval_f1": 0.7479591836734694, "eval_loss": 0.42453086376190186, "eval_precision": 0.771505376344086, "eval_recall": 0.7341789416257501, "eval_runtime": 5.075, "eval_samples_per_second": 78.62, "eval_steps_per_second": 9.852, "step": 366 }, { "epoch": 4.0, "grad_norm": 1.4060696363449097, "learning_rate": 4e-05, "loss": 0.4173, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.7873004752040941, "eval_loss": 0.38980478048324585, "eval_precision": 0.7887936313533375, "eval_recall": 0.7858701582105838, "eval_runtime": 5.064, "eval_samples_per_second": 78.791, "eval_steps_per_second": 9.874, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.650151491165161, "learning_rate": 3.7500000000000003e-05, "loss": 0.3674, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.7999105055663995, "eval_loss": 0.3571353554725647, "eval_precision": 0.8058980811575966, "eval_recall": 0.794735406437534, "eval_runtime": 5.0507, "eval_samples_per_second": 78.999, "eval_steps_per_second": 9.9, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.5915608406066895, "learning_rate": 3.5e-05, "loss": 0.3484, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8029928975654221, "eval_loss": 0.3431943356990814, "eval_precision": 0.8037650785914463, "eval_recall": 0.8022367703218767, "eval_runtime": 5.135, "eval_samples_per_second": 77.703, "eval_steps_per_second": 9.737, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.9446077346801758, "learning_rate": 3.2500000000000004e-05, "loss": 0.3247, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.816408656658218, "eval_loss": 0.3298611342906952, "eval_precision": 0.8270654903728508, "eval_recall": 0.8078741589379888, "eval_runtime": 5.0753, "eval_samples_per_second": 78.617, "eval_steps_per_second": 9.852, "step": 854 }, { "epoch": 8.0, "grad_norm": 9.143532752990723, "learning_rate": 3e-05, "loss": 0.3102, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8228198648441347, "eval_loss": 0.3259894549846649, "eval_precision": 0.8509591907917684, "eval_recall": 0.8049645390070922, "eval_runtime": 5.087, "eval_samples_per_second": 78.435, "eval_steps_per_second": 9.829, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.818455696105957, "learning_rate": 2.7500000000000004e-05, "loss": 0.2991, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.821647309770462, "eval_loss": 0.31378358602523804, "eval_precision": 0.8349087353324641, "eval_recall": 0.8114202582287688, "eval_runtime": 5.0565, "eval_samples_per_second": 78.909, "eval_steps_per_second": 9.888, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.8728206157684326, "learning_rate": 2.5e-05, "loss": 0.29, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8180088078011953, "eval_loss": 0.31225934624671936, "eval_precision": 0.8323930726843348, "eval_recall": 0.8071467539552646, "eval_runtime": 5.0562, "eval_samples_per_second": 78.913, "eval_steps_per_second": 9.889, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.6045541763305664, "learning_rate": 2.25e-05, "loss": 0.2778, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8377065410088949, "eval_loss": 0.30650559067726135, "eval_precision": 0.8423344947735192, "eval_recall": 0.8335151845790143, "eval_runtime": 5.0839, "eval_samples_per_second": 78.483, "eval_steps_per_second": 9.835, "step": 1342 }, { "epoch": 12.0, "grad_norm": 1.5327140092849731, "learning_rate": 2e-05, "loss": 0.2702, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8571428571428571, "eval_f1": 0.821647309770462, "eval_loss": 0.3005804717540741, "eval_precision": 0.8349087353324641, "eval_recall": 0.8114202582287688, "eval_runtime": 5.0646, "eval_samples_per_second": 78.782, "eval_steps_per_second": 9.872, "step": 1464 }, { "epoch": 13.0, "grad_norm": 5.446022987365723, "learning_rate": 1.75e-05, "loss": 0.2664, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8298403801632752, "eval_loss": 0.29961732029914856, "eval_precision": 0.8315523576240049, "eval_recall": 0.8281960356428442, "eval_runtime": 5.0899, "eval_samples_per_second": 78.39, "eval_steps_per_second": 9.823, "step": 1586 }, { "epoch": 14.0, "grad_norm": 2.8527348041534424, "learning_rate": 1.5e-05, "loss": 0.264, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8476882658063937, "eval_loss": 0.2987360656261444, "eval_precision": 0.8437296561519796, "eval_recall": 0.8520640116384797, "eval_runtime": 5.0668, "eval_samples_per_second": 78.748, "eval_steps_per_second": 9.868, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.7352912425994873, "learning_rate": 1.25e-05, "loss": 0.254, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8522278069611882, "eval_loss": 0.2951277792453766, "eval_precision": 0.8513631702756499, "eval_recall": 0.8531096563011457, "eval_runtime": 5.222, "eval_samples_per_second": 76.408, "eval_steps_per_second": 9.575, "step": 1830 }, { "epoch": 16.0, "grad_norm": 2.442108154296875, "learning_rate": 1e-05, "loss": 0.2571, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8350789627607721, "eval_loss": 0.2944652736186981, "eval_precision": 0.8463358876939919, "eval_recall": 0.8260138206946717, "eval_runtime": 5.0611, "eval_samples_per_second": 78.837, "eval_steps_per_second": 9.879, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.852628231048584, "learning_rate": 7.5e-06, "loss": 0.2511, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8454251965513313, "eval_loss": 0.2917979061603546, "eval_precision": 0.8463049835506276, "eval_recall": 0.8445626477541371, "eval_runtime": 5.0682, "eval_samples_per_second": 78.727, "eval_steps_per_second": 9.866, "step": 2074 }, { "epoch": 18.0, "grad_norm": 5.464624881744385, "learning_rate": 5e-06, "loss": 0.2574, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8472902633190447, "eval_loss": 0.29094478487968445, "eval_precision": 0.8510272912927781, "eval_recall": 0.8438352427714131, "eval_runtime": 5.1003, "eval_samples_per_second": 78.231, "eval_steps_per_second": 9.803, "step": 2196 }, { "epoch": 19.0, "grad_norm": 2.1257989406585693, "learning_rate": 2.5e-06, "loss": 0.2508, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.850729517396184, "eval_loss": 0.29074448347091675, "eval_precision": 0.8535087719298247, "eval_recall": 0.8481087470449173, "eval_runtime": 5.1136, "eval_samples_per_second": 78.027, "eval_steps_per_second": 9.778, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.8716917037963867, "learning_rate": 0.0, "loss": 0.2536, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.850729517396184, "eval_loss": 0.29076284170150757, "eval_precision": 0.8535087719298247, "eval_recall": 0.8481087470449173, "eval_runtime": 5.0627, "eval_samples_per_second": 78.811, "eval_steps_per_second": 9.876, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7624554283800000.0, "train_loss": 0.32445813476062213, "train_runtime": 1939.7236, "train_samples_per_second": 37.583, "train_steps_per_second": 1.258 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7624554283800000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }