{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 20.136756896972656, "learning_rate": 4.75e-05, "loss": 0.3889, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.8045112781954887, "eval_f1": 0.7109554944646705, "eval_loss": 0.4199941158294678, "eval_precision": 0.8255285412262157, "eval_recall": 0.6866703036915802, "eval_runtime": 1.6394, "eval_samples_per_second": 243.375, "eval_steps_per_second": 30.498, "step": 122 }, { "epoch": 2.0, "grad_norm": 24.683944702148438, "learning_rate": 4.5e-05, "loss": 0.2335, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8739355018846853, "eval_loss": 0.3136064410209656, "eval_precision": 0.864426651415499, "eval_recall": 0.886252045826514, "eval_runtime": 1.6497, "eval_samples_per_second": 241.866, "eval_steps_per_second": 30.309, "step": 244 }, { "epoch": 3.0, "grad_norm": 66.46725463867188, "learning_rate": 4.25e-05, "loss": 0.1411, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8751002084335417, "eval_loss": 0.35689812898635864, "eval_precision": 0.8780701754385964, "eval_recall": 0.8722949627204946, "eval_runtime": 1.6606, "eval_samples_per_second": 240.275, "eval_steps_per_second": 30.11, "step": 366 }, { "epoch": 4.0, "grad_norm": 42.06414031982422, "learning_rate": 4e-05, "loss": 0.1078, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.9147869674185464, "eval_f1": 0.8991765265473572, "eval_loss": 0.35370269417762756, "eval_precision": 0.8922773722627737, "eval_recall": 0.9072104018912529, "eval_runtime": 1.653, "eval_samples_per_second": 241.378, "eval_steps_per_second": 30.248, "step": 488 }, { "epoch": 5.0, "grad_norm": 88.54315185546875, "learning_rate": 3.7500000000000003e-05, "loss": 0.0822, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8439374185136896, "eval_loss": 0.5069139003753662, "eval_precision": 0.8794955044955045, "eval_recall": 0.822376795781051, "eval_runtime": 1.6524, "eval_samples_per_second": 241.466, "eval_steps_per_second": 30.259, "step": 610 }, { "epoch": 6.0, "grad_norm": 114.8245849609375, "learning_rate": 3.5e-05, "loss": 0.0529, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.888964101175568, "eval_loss": 0.42624175548553467, "eval_precision": 0.8862007168458781, "eval_recall": 0.8918894344426259, "eval_runtime": 1.6561, "eval_samples_per_second": 240.934, "eval_steps_per_second": 30.192, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.022069375962018967, "learning_rate": 3.2500000000000004e-05, "loss": 0.0365, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8769602202215754, "eval_loss": 0.5586097836494446, "eval_precision": 0.8742831541218639, "eval_recall": 0.8797963266048372, "eval_runtime": 1.6532, "eval_samples_per_second": 241.352, "eval_steps_per_second": 30.245, "step": 854 }, { "epoch": 8.0, "grad_norm": 0.0406961552798748, "learning_rate": 3e-05, "loss": 0.033, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8674628282189181, "eval_loss": 0.5012311935424805, "eval_precision": 0.8869858462356303, "eval_recall": 0.8530187306783051, "eval_runtime": 1.6551, "eval_samples_per_second": 241.075, "eval_steps_per_second": 30.21, "step": 976 }, { "epoch": 9.0, "grad_norm": 0.6461573243141174, "learning_rate": 2.7500000000000004e-05, "loss": 0.0248, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8631217838765008, "eval_loss": 0.583283007144928, "eval_precision": 0.8872804935927859, "eval_recall": 0.8462447717766868, "eval_runtime": 1.6572, "eval_samples_per_second": 240.772, "eval_steps_per_second": 30.172, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.12847253680229187, "learning_rate": 2.5e-05, "loss": 0.0123, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8805765113084321, "eval_loss": 0.6610547304153442, "eval_precision": 0.8857796167247387, "eval_recall": 0.8758410620112748, "eval_runtime": 1.6505, "eval_samples_per_second": 241.744, "eval_steps_per_second": 30.294, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.003805552376434207, "learning_rate": 2.25e-05, "loss": 0.0088, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8682132746146587, "eval_loss": 0.6935672760009766, "eval_precision": 0.884741537654159, "eval_recall": 0.8555191853064193, "eval_runtime": 1.6547, "eval_samples_per_second": 241.138, "eval_steps_per_second": 30.218, "step": 1342 }, { "epoch": 12.0, "grad_norm": 0.0037182692904025316, "learning_rate": 2e-05, "loss": 0.0074, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8805765113084321, "eval_loss": 0.6789939403533936, "eval_precision": 0.8857796167247387, "eval_recall": 0.8758410620112748, "eval_runtime": 1.6567, "eval_samples_per_second": 240.838, "eval_steps_per_second": 30.18, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.0025616472121328115, "learning_rate": 1.75e-05, "loss": 0.0141, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8731122745782431, "eval_loss": 0.6981470584869385, "eval_precision": 0.8829705994654449, "eval_recall": 0.864793598836152, "eval_runtime": 1.6639, "eval_samples_per_second": 239.794, "eval_steps_per_second": 30.049, "step": 1586 }, { "epoch": 14.0, "grad_norm": 0.006673410069197416, "learning_rate": 1.5e-05, "loss": 0.0034, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8751002084335417, "eval_loss": 0.7144644856452942, "eval_precision": 0.8780701754385964, "eval_recall": 0.8722949627204946, "eval_runtime": 1.6531, "eval_samples_per_second": 241.366, "eval_steps_per_second": 30.246, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.0030696168541908264, "learning_rate": 1.25e-05, "loss": 0.0059, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8758710801393728, "eval_loss": 0.7303631901741028, "eval_precision": 0.8870983228779925, "eval_recall": 0.8665666484815421, "eval_runtime": 1.6541, "eval_samples_per_second": 241.215, "eval_steps_per_second": 30.227, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.0017388605047017336, "learning_rate": 1e-05, "loss": 0.0056, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.879667048676036, "eval_loss": 0.7517656683921814, "eval_precision": 0.8778361344537815, "eval_recall": 0.8815693762502272, "eval_runtime": 1.6536, "eval_samples_per_second": 241.288, "eval_steps_per_second": 30.237, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.002333118114620447, "learning_rate": 7.5e-06, "loss": 0.0039, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8793019197207679, "eval_loss": 0.7390431761741638, "eval_precision": 0.8893184421534936, "eval_recall": 0.8708401527550463, "eval_runtime": 1.655, "eval_samples_per_second": 241.08, "eval_steps_per_second": 30.211, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.0018157872837036848, "learning_rate": 5e-06, "loss": 0.004, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8799463033398397, "eval_loss": 0.764133095741272, "eval_precision": 0.8874803397294746, "eval_recall": 0.8733406073831607, "eval_runtime": 1.6667, "eval_samples_per_second": 239.389, "eval_steps_per_second": 29.999, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0015570241957902908, "learning_rate": 2.5e-06, "loss": 0.007, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8799463033398397, "eval_loss": 0.7847548723220825, "eval_precision": 0.8874803397294746, "eval_recall": 0.8733406073831607, "eval_runtime": 1.664, "eval_samples_per_second": 239.788, "eval_steps_per_second": 30.049, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.002853752113878727, "learning_rate": 0.0, "loss": 0.0042, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8799463033398397, "eval_loss": 0.790817379951477, "eval_precision": 0.8874803397294746, "eval_recall": 0.8733406073831607, "eval_runtime": 1.6678, "eval_samples_per_second": 239.236, "eval_steps_per_second": 29.979, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7584162436176000.0, "train_loss": 0.0588726386183598, "train_runtime": 864.0501, "train_samples_per_second": 84.208, "train_steps_per_second": 2.824 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7584162436176000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }