{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 10560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.3427704572677612, "learning_rate": 4.75e-05, "loss": 0.7018, "step": 528 }, { "epoch": 1.0, "eval_accuracy": 0.9115075027327835, "eval_f1": 0.5138413685847589, "eval_loss": 0.33528366684913635, "eval_precision": 0.5528781793842035, "eval_recall": 0.4799535153980244, "eval_runtime": 4.4519, "eval_samples_per_second": 210.021, "eval_steps_per_second": 3.369, "step": 528 }, { "epoch": 2.0, "grad_norm": 1.260859727859497, "learning_rate": 4.5e-05, "loss": 0.2639, "step": 1056 }, { "epoch": 2.0, "eval_accuracy": 0.9412203120341847, "eval_f1": 0.7251732101616628, "eval_loss": 0.1912250965833664, "eval_precision": 0.6493566176470589, "eval_recall": 0.821034282393957, "eval_runtime": 4.5644, "eval_samples_per_second": 204.846, "eval_steps_per_second": 3.286, "step": 1056 }, { "epoch": 3.0, "grad_norm": 2.1796255111694336, "learning_rate": 4.25e-05, "loss": 0.1862, "step": 1584 }, { "epoch": 3.0, "eval_accuracy": 0.9465865050183842, "eval_f1": 0.7531402204562931, "eval_loss": 0.1671685427427292, "eval_precision": 0.6738532110091743, "eval_recall": 0.8535735037768739, "eval_runtime": 4.5056, "eval_samples_per_second": 207.52, "eval_steps_per_second": 3.329, "step": 1584 }, { "epoch": 4.0, "grad_norm": 2.1285958290100098, "learning_rate": 4e-05, "loss": 0.1612, "step": 2112 }, { "epoch": 4.0, "eval_accuracy": 0.9539401768856206, "eval_f1": 0.7823765020026703, "eval_loss": 0.14461010694503784, "eval_precision": 0.7238142292490118, "eval_recall": 0.8512492736780941, "eval_runtime": 4.5298, "eval_samples_per_second": 206.41, "eval_steps_per_second": 3.311, "step": 2112 }, { "epoch": 5.0, "grad_norm": 1.1812392473220825, "learning_rate": 3.7500000000000003e-05, "loss": 0.1439, "step": 2640 }, { "epoch": 5.0, "eval_accuracy": 0.9545364205505317, "eval_f1": 0.7862656374767102, "eval_loss": 0.13903872668743134, "eval_precision": 0.7254420432220039, "eval_recall": 0.8582219639744335, "eval_runtime": 4.6125, "eval_samples_per_second": 202.712, "eval_steps_per_second": 3.252, "step": 2640 }, { "epoch": 6.0, "grad_norm": 1.9920209646224976, "learning_rate": 3.5e-05, "loss": 0.1358, "step": 3168 }, { "epoch": 6.0, "eval_accuracy": 0.9551326642154427, "eval_f1": 0.7892923403127484, "eval_loss": 0.1392282098531723, "eval_precision": 0.7256335282651072, "eval_recall": 0.8651946542707728, "eval_runtime": 4.8436, "eval_samples_per_second": 193.037, "eval_steps_per_second": 3.097, "step": 3168 }, { "epoch": 7.0, "grad_norm": 2.6399483680725098, "learning_rate": 3.2500000000000004e-05, "loss": 0.129, "step": 3696 }, { "epoch": 7.0, "eval_accuracy": 0.9561264036569611, "eval_f1": 0.7918540068764879, "eval_loss": 0.13837336003780365, "eval_precision": 0.7266990291262136, "eval_recall": 0.8698431144683324, "eval_runtime": 4.5425, "eval_samples_per_second": 205.834, "eval_steps_per_second": 3.302, "step": 3696 }, { "epoch": 8.0, "grad_norm": 0.901687741279602, "learning_rate": 3e-05, "loss": 0.1228, "step": 4224 }, { "epoch": 8.0, "eval_accuracy": 0.957517638875087, "eval_f1": 0.7969124301304232, "eval_loss": 0.13390584290027618, "eval_precision": 0.7352652259332023, "eval_recall": 0.8698431144683324, "eval_runtime": 4.5586, "eval_samples_per_second": 205.105, "eval_steps_per_second": 3.29, "step": 4224 }, { "epoch": 9.0, "grad_norm": 1.3437026739120483, "learning_rate": 2.7500000000000004e-05, "loss": 0.1168, "step": 4752 }, { "epoch": 9.0, "eval_accuracy": 0.9577163867633907, "eval_f1": 0.7960010807889759, "eval_loss": 0.13209262490272522, "eval_precision": 0.7439393939393939, "eval_recall": 0.8558977338756537, "eval_runtime": 4.5415, "eval_samples_per_second": 205.88, "eval_steps_per_second": 3.303, "step": 4752 }, { "epoch": 10.0, "grad_norm": 3.851469039916992, "learning_rate": 2.5e-05, "loss": 0.1146, "step": 5280 }, { "epoch": 10.0, "eval_accuracy": 0.958113882539998, "eval_f1": 0.7973009446693656, "eval_loss": 0.12995323538780212, "eval_precision": 0.7444556451612904, "eval_recall": 0.8582219639744335, "eval_runtime": 4.5491, "eval_samples_per_second": 205.534, "eval_steps_per_second": 3.297, "step": 5280 }, { "epoch": 11.0, "grad_norm": 2.413081645965576, "learning_rate": 2.25e-05, "loss": 0.1105, "step": 5808 }, { "epoch": 11.0, "eval_accuracy": 0.9571201430984796, "eval_f1": 0.7947830715996806, "eval_loss": 0.13270916044712067, "eval_precision": 0.7333005893909627, "eval_recall": 0.8675188843695526, "eval_runtime": 4.536, "eval_samples_per_second": 206.127, "eval_steps_per_second": 3.307, "step": 5808 }, { "epoch": 12.0, "grad_norm": 1.1331512928009033, "learning_rate": 2e-05, "loss": 0.1083, "step": 6336 }, { "epoch": 12.0, "eval_accuracy": 0.9569213952101759, "eval_f1": 0.794345158708989, "eval_loss": 0.1333465278148651, "eval_precision": 0.7342209072978304, "eval_recall": 0.8651946542707728, "eval_runtime": 4.5053, "eval_samples_per_second": 207.533, "eval_steps_per_second": 3.329, "step": 6336 }, { "epoch": 13.0, "grad_norm": 1.8731575012207031, "learning_rate": 1.75e-05, "loss": 0.106, "step": 6864 }, { "epoch": 13.0, "eval_accuracy": 0.9591076219815164, "eval_f1": 0.7998916869753587, "eval_loss": 0.12651574611663818, "eval_precision": 0.7489858012170385, "eval_recall": 0.8582219639744335, "eval_runtime": 4.4726, "eval_samples_per_second": 209.052, "eval_steps_per_second": 3.354, "step": 6864 }, { "epoch": 14.0, "grad_norm": 0.8700233697891235, "learning_rate": 1.5e-05, "loss": 0.1032, "step": 7392 }, { "epoch": 14.0, "eval_accuracy": 0.9589088740932128, "eval_f1": 0.7973009446693656, "eval_loss": 0.12690864503383636, "eval_precision": 0.7444556451612904, "eval_recall": 0.8582219639744335, "eval_runtime": 4.5513, "eval_samples_per_second": 205.436, "eval_steps_per_second": 3.296, "step": 7392 }, { "epoch": 15.0, "grad_norm": 1.2827842235565186, "learning_rate": 1.25e-05, "loss": 0.1023, "step": 7920 }, { "epoch": 15.0, "eval_accuracy": 0.9585113783166054, "eval_f1": 0.7998922704012928, "eval_loss": 0.12912563979625702, "eval_precision": 0.7454819277108434, "eval_recall": 0.862870424171993, "eval_runtime": 4.5324, "eval_samples_per_second": 206.293, "eval_steps_per_second": 3.31, "step": 7920 }, { "epoch": 16.0, "grad_norm": 1.694359302520752, "learning_rate": 1e-05, "loss": 0.1014, "step": 8448 }, { "epoch": 16.0, "eval_accuracy": 0.957517638875087, "eval_f1": 0.7947269303201507, "eval_loss": 0.12707427144050598, "eval_precision": 0.7399799599198397, "eval_recall": 0.8582219639744335, "eval_runtime": 4.562, "eval_samples_per_second": 204.955, "eval_steps_per_second": 3.288, "step": 8448 }, { "epoch": 17.0, "grad_norm": 1.139172911643982, "learning_rate": 7.5e-06, "loss": 0.1002, "step": 8976 }, { "epoch": 17.0, "eval_accuracy": 0.9589088740932128, "eval_f1": 0.8041789445486203, "eval_loss": 0.12810933589935303, "eval_precision": 0.7460238568588469, "eval_recall": 0.8721673445671121, "eval_runtime": 4.5254, "eval_samples_per_second": 206.614, "eval_steps_per_second": 3.315, "step": 8976 }, { "epoch": 18.0, "grad_norm": 1.6876777410507202, "learning_rate": 5e-06, "loss": 0.0986, "step": 9504 }, { "epoch": 18.0, "eval_accuracy": 0.9573188909867832, "eval_f1": 0.8016021361815754, "eval_loss": 0.13038571178913116, "eval_precision": 0.741600790513834, "eval_recall": 0.8721673445671121, "eval_runtime": 4.5245, "eval_samples_per_second": 206.653, "eval_steps_per_second": 3.315, "step": 9504 }, { "epoch": 19.0, "grad_norm": 0.5088372230529785, "learning_rate": 2.5e-06, "loss": 0.0978, "step": 10032 }, { "epoch": 19.0, "eval_accuracy": 0.9589088740932128, "eval_f1": 0.8046473925965956, "eval_loss": 0.1270894557237625, "eval_precision": 0.752020202020202, "eval_recall": 0.8651946542707728, "eval_runtime": 4.5248, "eval_samples_per_second": 206.64, "eval_steps_per_second": 3.315, "step": 10032 }, { "epoch": 20.0, "grad_norm": 2.988950490951538, "learning_rate": 0.0, "loss": 0.0984, "step": 10560 }, { "epoch": 20.0, "eval_accuracy": 0.9579151346516943, "eval_f1": 0.8007549204637368, "eval_loss": 0.12812790274620056, "eval_precision": 0.7469818913480886, "eval_recall": 0.862870424171993, "eval_runtime": 4.5277, "eval_samples_per_second": 206.507, "eval_steps_per_second": 3.313, "step": 10560 }, { "epoch": 20.0, "step": 10560, "total_flos": 4552961808488766.0, "train_loss": 0.1551312410470211, "train_runtime": 1231.1625, "train_samples_per_second": 137.057, "train_steps_per_second": 8.577 } ], "logging_steps": 500, "max_steps": 10560, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4552961808488766.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }