{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.234379768371582, "learning_rate": 4.75e-05, "loss": 0.5459, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7393483709273183, "eval_f1": 0.6458660476548099, "eval_loss": 0.47629594802856445, "eval_precision": 0.6804065499717673, "eval_recall": 0.63557010365521, "eval_runtime": 4.7443, "eval_samples_per_second": 84.101, "eval_steps_per_second": 10.539, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.299708843231201, "learning_rate": 4.5e-05, "loss": 0.4528, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7844611528822055, "eval_f1": 0.7677594888732471, "eval_loss": 0.43055105209350586, "eval_precision": 0.7630357142857143, "eval_recall": 0.8125113657028551, "eval_runtime": 5.0592, "eval_samples_per_second": 78.866, "eval_steps_per_second": 9.883, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.729501962661743, "learning_rate": 4.25e-05, "loss": 0.3653, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8217317487266553, "eval_loss": 0.3334697186946869, "eval_precision": 0.853331681065005, "eval_recall": 0.802464084378978, "eval_runtime": 5.0519, "eval_samples_per_second": 78.98, "eval_steps_per_second": 9.897, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.0170559883117676, "learning_rate": 4e-05, "loss": 0.2987, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8246499363520641, "eval_loss": 0.335675448179245, "eval_precision": 0.8246499363520641, "eval_recall": 0.8246499363520641, "eval_runtime": 5.0598, "eval_samples_per_second": 78.858, "eval_steps_per_second": 9.882, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.7316147089004517, "learning_rate": 3.7500000000000003e-05, "loss": 0.2746, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8338931955211025, "eval_loss": 0.3400849401950836, "eval_precision": 0.8216641926439057, "eval_recall": 0.8546553918894344, "eval_runtime": 5.0556, "eval_samples_per_second": 78.923, "eval_steps_per_second": 9.89, "step": 610 }, { "epoch": 6.0, "grad_norm": 1.9782907962799072, "learning_rate": 3.5e-05, "loss": 0.2477, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8308270676691729, "eval_loss": 0.33233991265296936, "eval_precision": 0.8176375838926174, "eval_recall": 0.858610656482997, "eval_runtime": 5.0483, "eval_samples_per_second": 79.037, "eval_steps_per_second": 9.904, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.8530523777008057, "learning_rate": 3.2500000000000004e-05, "loss": 0.24, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8447157518450185, "eval_loss": 0.3171246647834778, "eval_precision": 0.8325401217487549, "eval_recall": 0.864248045099109, "eval_runtime": 5.2354, "eval_samples_per_second": 76.211, "eval_steps_per_second": 9.55, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.7897450923919678, "learning_rate": 3e-05, "loss": 0.2069, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8683279483657071, "eval_loss": 0.27698931097984314, "eval_precision": 0.873366724738676, "eval_recall": 0.863747954173486, "eval_runtime": 5.0519, "eval_samples_per_second": 78.98, "eval_steps_per_second": 9.897, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.382436275482178, "learning_rate": 2.7500000000000004e-05, "loss": 0.2197, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8491192020377148, "eval_loss": 0.3091033399105072, "eval_precision": 0.8356209150326797, "eval_recall": 0.8735224586288416, "eval_runtime": 5.0573, "eval_samples_per_second": 78.896, "eval_steps_per_second": 9.887, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.9839751720428467, "learning_rate": 2.5e-05, "loss": 0.2005, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8811928811928812, "eval_loss": 0.2552439570426941, "eval_precision": 0.8842105263157894, "eval_recall": 0.878341516639389, "eval_runtime": 5.0478, "eval_samples_per_second": 79.044, "eval_steps_per_second": 9.905, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.8527041673660278, "learning_rate": 2.25e-05, "loss": 0.1867, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8867831541218638, "eval_loss": 0.2726534903049469, "eval_precision": 0.88158359844468, "eval_recall": 0.8926168394253501, "eval_runtime": 5.0594, "eval_samples_per_second": 78.863, "eval_steps_per_second": 9.883, "step": 1342 }, { "epoch": 12.0, "grad_norm": 2.9348244667053223, "learning_rate": 2e-05, "loss": 0.1722, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8727838950061173, "eval_loss": 0.27391675114631653, "eval_precision": 0.8656898656898657, "eval_recall": 0.8812511365702855, "eval_runtime": 5.0516, "eval_samples_per_second": 78.984, "eval_steps_per_second": 9.898, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.8993942737579346, "learning_rate": 1.75e-05, "loss": 0.161, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8765393898137261, "eval_loss": 0.2714451551437378, "eval_precision": 0.8852261942423283, "eval_recall": 0.8690671031096563, "eval_runtime": 5.0831, "eval_samples_per_second": 78.495, "eval_steps_per_second": 9.836, "step": 1586 }, { "epoch": 14.0, "grad_norm": 5.6604695320129395, "learning_rate": 1.5e-05, "loss": 0.1684, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8781334505389722, "eval_loss": 0.27736207842826843, "eval_precision": 0.872316715542522, "eval_recall": 0.8847972358610656, "eval_runtime": 5.0631, "eval_samples_per_second": 78.805, "eval_steps_per_second": 9.875, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.4771318733692169, "learning_rate": 1.25e-05, "loss": 0.1548, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8802521008403361, "eval_loss": 0.27421677112579346, "eval_precision": 0.8767168083714847, "eval_recall": 0.8840698308783415, "eval_runtime": 5.0573, "eval_samples_per_second": 78.896, "eval_steps_per_second": 9.887, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.8061890006065369, "learning_rate": 1e-05, "loss": 0.1526, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8703282417939551, "eval_loss": 0.29698580503463745, "eval_precision": 0.8574462768615692, "eval_recall": 0.8902073104200764, "eval_runtime": 5.0603, "eval_samples_per_second": 78.848, "eval_steps_per_second": 9.881, "step": 1952 }, { "epoch": 17.0, "grad_norm": 2.3830015659332275, "learning_rate": 7.5e-06, "loss": 0.1467, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8707140332272888, "eval_loss": 0.2729242146015167, "eval_precision": 0.8618432385874246, "eval_recall": 0.8819785415530097, "eval_runtime": 5.0444, "eval_samples_per_second": 79.097, "eval_steps_per_second": 9.912, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.170547962188721, "learning_rate": 5e-06, "loss": 0.1484, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8781334505389722, "eval_loss": 0.27389636635780334, "eval_precision": 0.872316715542522, "eval_recall": 0.8847972358610656, "eval_runtime": 5.0888, "eval_samples_per_second": 78.407, "eval_steps_per_second": 9.825, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.1922006607055664, "learning_rate": 2.5e-06, "loss": 0.1434, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.879667048676036, "eval_loss": 0.27286645770072937, "eval_precision": 0.8778361344537815, "eval_recall": 0.8815693762502272, "eval_runtime": 5.0573, "eval_samples_per_second": 78.895, "eval_steps_per_second": 9.887, "step": 2318 }, { "epoch": 20.0, "grad_norm": 1.1650974750518799, "learning_rate": 0.0, "loss": 0.1354, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8829621606985718, "eval_loss": 0.2765507400035858, "eval_precision": 0.8802419354838709, "eval_recall": 0.8858428805237315, "eval_runtime": 5.0471, "eval_samples_per_second": 79.055, "eval_steps_per_second": 9.907, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.23107414167435442, "train_runtime": 1893.6576, "train_samples_per_second": 38.423, "train_steps_per_second": 1.289 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }