{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 10560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.2754160165786743, "learning_rate": 4.75e-05, "loss": 0.7665, "step": 528 }, { "epoch": 1.0, "eval_accuracy": 0.8710623074629832, "eval_f1": 0.1887287024901704, "eval_loss": 0.42895129323005676, "eval_precision": 0.38028169014084506, "eval_recall": 0.12550842533410808, "eval_runtime": 4.4537, "eval_samples_per_second": 209.94, "eval_steps_per_second": 3.368, "step": 528 }, { "epoch": 2.0, "grad_norm": 0.9667028784751892, "learning_rate": 4.5e-05, "loss": 0.336, "step": 1056 }, { "epoch": 2.0, "eval_accuracy": 0.9335188313624168, "eval_f1": 0.6881609491875161, "eval_loss": 0.21774514019489288, "eval_precision": 0.6187384044526901, "eval_recall": 0.7751307379430563, "eval_runtime": 4.576, "eval_samples_per_second": 204.325, "eval_steps_per_second": 3.278, "step": 1056 }, { "epoch": 3.0, "grad_norm": 1.0499272346496582, "learning_rate": 4.25e-05, "loss": 0.2067, "step": 1584 }, { "epoch": 3.0, "eval_accuracy": 0.9410215641458809, "eval_f1": 0.7261015202267457, "eval_loss": 0.17432348430156708, "eval_precision": 0.6523148148148148, "eval_recall": 0.8187100522951772, "eval_runtime": 4.5415, "eval_samples_per_second": 205.881, "eval_steps_per_second": 3.303, "step": 1584 }, { "epoch": 4.0, "grad_norm": 1.841440200805664, "learning_rate": 4e-05, "loss": 0.1734, "step": 2112 }, { "epoch": 4.0, "eval_accuracy": 0.9499652191195469, "eval_f1": 0.7669569807337029, "eval_loss": 0.15249943733215332, "eval_precision": 0.7026112185686654, "eval_recall": 0.8442765833817548, "eval_runtime": 4.5362, "eval_samples_per_second": 206.121, "eval_steps_per_second": 3.307, "step": 2112 }, { "epoch": 5.0, "grad_norm": 1.0053447484970093, "learning_rate": 3.7500000000000003e-05, "loss": 0.1557, "step": 2640 }, { "epoch": 5.0, "eval_accuracy": 0.9523501937791911, "eval_f1": 0.7757479481069631, "eval_loss": 0.14416351914405823, "eval_precision": 0.7125486381322957, "eval_recall": 0.8512492736780941, "eval_runtime": 4.4753, "eval_samples_per_second": 208.924, "eval_steps_per_second": 3.352, "step": 2640 }, { "epoch": 6.0, "grad_norm": 1.2348506450653076, "learning_rate": 3.5e-05, "loss": 0.146, "step": 3168 }, { "epoch": 6.0, "eval_accuracy": 0.9519526980025838, "eval_f1": 0.7780979827089337, "eval_loss": 0.14452077448368073, "eval_precision": 0.7084923664122137, "eval_recall": 0.862870424171993, "eval_runtime": 4.5269, "eval_samples_per_second": 206.545, "eval_steps_per_second": 3.314, "step": 3168 }, { "epoch": 7.0, "grad_norm": 2.4902212619781494, "learning_rate": 3.2500000000000004e-05, "loss": 0.1397, "step": 3696 }, { "epoch": 7.0, "eval_accuracy": 0.9525489416674948, "eval_f1": 0.7873728150273936, "eval_loss": 0.14437128603458405, "eval_precision": 0.7144886363636364, "eval_recall": 0.8768158047646717, "eval_runtime": 4.4995, "eval_samples_per_second": 207.8, "eval_steps_per_second": 3.334, "step": 3696 }, { "epoch": 8.0, "grad_norm": 0.4951108992099762, "learning_rate": 3e-05, "loss": 0.1338, "step": 4224 }, { "epoch": 8.0, "eval_accuracy": 0.9545364205505317, "eval_f1": 0.7905745300503045, "eval_loss": 0.1385926455259323, "eval_precision": 0.7261673151750972, "eval_recall": 0.8675188843695526, "eval_runtime": 4.5384, "eval_samples_per_second": 206.018, "eval_steps_per_second": 3.305, "step": 4224 }, { "epoch": 9.0, "grad_norm": 1.3395795822143555, "learning_rate": 2.7500000000000004e-05, "loss": 0.1277, "step": 4752 }, { "epoch": 9.0, "eval_accuracy": 0.9561264036569611, "eval_f1": 0.7964601769911505, "eval_loss": 0.13648679852485657, "eval_precision": 0.7395418326693227, "eval_recall": 0.862870424171993, "eval_runtime": 4.5184, "eval_samples_per_second": 206.93, "eval_steps_per_second": 3.32, "step": 4752 }, { "epoch": 10.0, "grad_norm": 2.1327812671661377, "learning_rate": 2.5e-05, "loss": 0.1255, "step": 5280 }, { "epoch": 10.0, "eval_accuracy": 0.9562754645731889, "eval_f1": 0.7936932121859969, "eval_loss": 0.1332310438156128, "eval_precision": 0.7347847600197922, "eval_recall": 0.862870424171993, "eval_runtime": 4.4981, "eval_samples_per_second": 207.864, "eval_steps_per_second": 3.335, "step": 5280 }, { "epoch": 11.0, "grad_norm": 11.329936027526855, "learning_rate": 2.25e-05, "loss": 0.1215, "step": 5808 }, { "epoch": 11.0, "eval_accuracy": 0.9557289078803538, "eval_f1": 0.7884564469155414, "eval_loss": 0.1330229640007019, "eval_precision": 0.7242217898832685, "eval_recall": 0.8651946542707728, "eval_runtime": 4.518, "eval_samples_per_second": 206.949, "eval_steps_per_second": 3.32, "step": 5808 }, { "epoch": 12.0, "grad_norm": 1.500403642654419, "learning_rate": 2e-05, "loss": 0.1189, "step": 6336 }, { "epoch": 12.0, "eval_accuracy": 0.9561264036569611, "eval_f1": 0.794345158708989, "eval_loss": 0.13398447632789612, "eval_precision": 0.7342209072978304, "eval_recall": 0.8651946542707728, "eval_runtime": 4.5331, "eval_samples_per_second": 206.26, "eval_steps_per_second": 3.309, "step": 6336 }, { "epoch": 13.0, "grad_norm": 1.2570128440856934, "learning_rate": 1.75e-05, "loss": 0.1179, "step": 6864 }, { "epoch": 13.0, "eval_accuracy": 0.9571201430984796, "eval_f1": 0.7973009446693656, "eval_loss": 0.12948381900787354, "eval_precision": 0.7444556451612904, "eval_recall": 0.8582219639744335, "eval_runtime": 4.4918, "eval_samples_per_second": 208.158, "eval_steps_per_second": 3.339, "step": 6864 }, { "epoch": 14.0, "grad_norm": 0.9663475751876831, "learning_rate": 1.5e-05, "loss": 0.114, "step": 7392 }, { "epoch": 14.0, "eval_accuracy": 0.9578654476796185, "eval_f1": 0.8013955984970478, "eval_loss": 0.1294805407524109, "eval_precision": 0.7446384039900249, "eval_recall": 0.8675188843695526, "eval_runtime": 4.5251, "eval_samples_per_second": 206.625, "eval_steps_per_second": 3.315, "step": 7392 }, { "epoch": 15.0, "grad_norm": 1.1781103610992432, "learning_rate": 1.25e-05, "loss": 0.1128, "step": 7920 }, { "epoch": 15.0, "eval_accuracy": 0.9571201430984796, "eval_f1": 0.7960438385458433, "eval_loss": 0.13167551159858704, "eval_precision": 0.7371287128712871, "eval_recall": 0.8651946542707728, "eval_runtime": 4.5381, "eval_samples_per_second": 206.032, "eval_steps_per_second": 3.305, "step": 7920 }, { "epoch": 16.0, "grad_norm": 1.9357908964157104, "learning_rate": 1e-05, "loss": 0.1115, "step": 8448 }, { "epoch": 16.0, "eval_accuracy": 0.957517638875087, "eval_f1": 0.7973297730307075, "eval_loss": 0.12998828291893005, "eval_precision": 0.7376482213438735, "eval_recall": 0.8675188843695526, "eval_runtime": 4.5484, "eval_samples_per_second": 205.566, "eval_steps_per_second": 3.298, "step": 8448 }, { "epoch": 17.0, "grad_norm": 1.123679280281067, "learning_rate": 7.5e-06, "loss": 0.1109, "step": 8976 }, { "epoch": 17.0, "eval_accuracy": 0.9577163867633907, "eval_f1": 0.7951935914552738, "eval_loss": 0.13070179522037506, "eval_precision": 0.7356719367588933, "eval_recall": 0.8651946542707728, "eval_runtime": 4.5117, "eval_samples_per_second": 207.237, "eval_steps_per_second": 3.325, "step": 8976 }, { "epoch": 18.0, "grad_norm": 1.571700096130371, "learning_rate": 5e-06, "loss": 0.1097, "step": 9504 }, { "epoch": 18.0, "eval_accuracy": 0.957517638875087, "eval_f1": 0.7968959058067969, "eval_loss": 0.1318700611591339, "eval_precision": 0.7385912698412699, "eval_recall": 0.8651946542707728, "eval_runtime": 4.5223, "eval_samples_per_second": 206.752, "eval_steps_per_second": 3.317, "step": 9504 }, { "epoch": 19.0, "grad_norm": 2.028625726699829, "learning_rate": 2.5e-06, "loss": 0.1086, "step": 10032 }, { "epoch": 19.0, "eval_accuracy": 0.9573188909867832, "eval_f1": 0.7943148297130599, "eval_loss": 0.12963609397411346, "eval_precision": 0.7375498007968128, "eval_recall": 0.8605461940732132, "eval_runtime": 4.9197, "eval_samples_per_second": 190.052, "eval_steps_per_second": 3.049, "step": 10032 }, { "epoch": 20.0, "grad_norm": 1.7686336040496826, "learning_rate": 0.0, "loss": 0.1094, "step": 10560 }, { "epoch": 20.0, "eval_accuracy": 0.9573188909867832, "eval_f1": 0.7943148297130599, "eval_loss": 0.13019020855426788, "eval_precision": 0.7375498007968128, "eval_recall": 0.8605461940732132, "eval_runtime": 4.5659, "eval_samples_per_second": 204.778, "eval_steps_per_second": 3.285, "step": 10560 }, { "epoch": 20.0, "step": 10560, "total_flos": 4545096690358590.0, "train_loss": 0.17230618686387034, "train_runtime": 1232.2084, "train_samples_per_second": 136.941, "train_steps_per_second": 8.57 } ], "logging_steps": 500, "max_steps": 10560, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4545096690358590.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }