{ "best_metric": 0.8226299694189603, "best_model_checkpoint": "checkpoints/boolq-roberta/checkpoint-5605", "epoch": 19.0, "global_step": 5605, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6217125382262997, "eval_loss": 1.2398264408111572, "eval_runtime": 8.8922, "eval_samples_per_second": 367.737, "eval_steps_per_second": 45.995, "step": 295 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 1.0, "step": 295 }, { "epoch": 1.69, "learning_rate": 0.006406779661016949, "loss": 0.8402, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6422018348623854, "eval_loss": 0.6416757702827454, "eval_runtime": 8.9446, "eval_samples_per_second": 365.585, "eval_steps_per_second": 45.726, "step": 590 }, { "best_epoch": 1, "best_eval_accuracy": 0.6422018348623854, "epoch": 2.0, "step": 590 }, { "epoch": 3.0, "eval_accuracy": 0.7186544342507645, "eval_loss": 0.5749825239181519, "eval_runtime": 8.9497, "eval_samples_per_second": 365.375, "eval_steps_per_second": 45.7, "step": 885 }, { "best_epoch": 2, "best_eval_accuracy": 0.7186544342507645, "epoch": 3.0, "step": 885 }, { "epoch": 3.39, "learning_rate": 0.005813559322033898, "loss": 0.6921, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.6801223241590214, "eval_loss": 0.6661111116409302, "eval_runtime": 8.9486, "eval_samples_per_second": 365.422, "eval_steps_per_second": 45.706, "step": 1180 }, { "best_epoch": 2, "best_eval_accuracy": 0.7186544342507645, "epoch": 4.0, "step": 1180 }, { "epoch": 5.0, "eval_accuracy": 0.7642201834862385, "eval_loss": 0.4996618628501892, "eval_runtime": 8.9587, "eval_samples_per_second": 365.007, "eval_steps_per_second": 45.654, "step": 1475 }, { "best_epoch": 4, "best_eval_accuracy": 0.7642201834862385, "epoch": 5.0, "step": 1475 }, { "epoch": 5.08, "learning_rate": 0.005220338983050848, "loss": 0.5652, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.7896024464831805, "eval_loss": 0.5553064346313477, "eval_runtime": 8.9376, "eval_samples_per_second": 365.869, "eval_steps_per_second": 45.762, "step": 1770 }, { "best_epoch": 5, "best_eval_accuracy": 0.7896024464831805, "epoch": 6.0, "step": 1770 }, { "epoch": 6.78, "learning_rate": 0.004627118644067797, "loss": 0.516, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.8107033639143731, "eval_loss": 0.43402931094169617, "eval_runtime": 8.9391, "eval_samples_per_second": 365.811, "eval_steps_per_second": 45.754, "step": 2065 }, { "best_epoch": 6, "best_eval_accuracy": 0.8107033639143731, "epoch": 7.0, "step": 2065 }, { "epoch": 8.0, "eval_accuracy": 0.7577981651376147, "eval_loss": 0.5680531859397888, "eval_runtime": 8.9453, "eval_samples_per_second": 365.553, "eval_steps_per_second": 45.722, "step": 2360 }, { "best_epoch": 6, "best_eval_accuracy": 0.8107033639143731, "epoch": 8.0, "step": 2360 }, { "epoch": 8.47, "learning_rate": 0.004033898305084746, "loss": 0.4934, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.7944954128440367, "eval_loss": 0.4493260383605957, "eval_runtime": 8.9507, "eval_samples_per_second": 365.334, "eval_steps_per_second": 45.695, "step": 2655 }, { "best_epoch": 6, "best_eval_accuracy": 0.8107033639143731, "epoch": 9.0, "step": 2655 }, { "epoch": 10.0, "eval_accuracy": 0.8048929663608563, "eval_loss": 0.4496236741542816, "eval_runtime": 8.9349, "eval_samples_per_second": 365.983, "eval_steps_per_second": 45.776, "step": 2950 }, { "best_epoch": 6, "best_eval_accuracy": 0.8107033639143731, "epoch": 10.0, "step": 2950 }, { "epoch": 10.17, "learning_rate": 0.0034406779661016952, "loss": 0.4653, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.8140672782874617, "eval_loss": 0.435769259929657, "eval_runtime": 8.9447, "eval_samples_per_second": 365.58, "eval_steps_per_second": 45.725, "step": 3245 }, { "best_epoch": 10, "best_eval_accuracy": 0.8140672782874617, "epoch": 11.0, "step": 3245 }, { "epoch": 11.86, "learning_rate": 0.002847457627118644, "loss": 0.4084, "step": 3500 }, { "epoch": 12.0, "eval_accuracy": 0.8180428134556575, "eval_loss": 0.41644611954689026, "eval_runtime": 8.9366, "eval_samples_per_second": 365.912, "eval_steps_per_second": 45.767, "step": 3540 }, { "best_epoch": 11, "best_eval_accuracy": 0.8180428134556575, "epoch": 12.0, "step": 3540 }, { "epoch": 13.0, "eval_accuracy": 0.8103975535168195, "eval_loss": 0.452688992023468, "eval_runtime": 8.9757, "eval_samples_per_second": 364.318, "eval_steps_per_second": 45.568, "step": 3835 }, { "best_epoch": 11, "best_eval_accuracy": 0.8180428134556575, "epoch": 13.0, "step": 3835 }, { "epoch": 13.56, "learning_rate": 0.002254237288135593, "loss": 0.3903, "step": 4000 }, { "epoch": 14.0, "eval_accuracy": 0.818348623853211, "eval_loss": 0.44870346784591675, "eval_runtime": 8.9507, "eval_samples_per_second": 365.335, "eval_steps_per_second": 45.695, "step": 4130 }, { "best_epoch": 13, "best_eval_accuracy": 0.818348623853211, "epoch": 14.0, "step": 4130 }, { "epoch": 15.0, "eval_accuracy": 0.8155963302752294, "eval_loss": 0.4322122633457184, "eval_runtime": 8.9612, "eval_samples_per_second": 364.908, "eval_steps_per_second": 45.641, "step": 4425 }, { "best_epoch": 13, "best_eval_accuracy": 0.818348623853211, "epoch": 15.0, "step": 4425 }, { "epoch": 15.25, "learning_rate": 0.0016610169491525426, "loss": 0.3761, "step": 4500 }, { "epoch": 16.0, "eval_accuracy": 0.8211009174311926, "eval_loss": 0.4511786997318268, "eval_runtime": 8.9443, "eval_samples_per_second": 365.595, "eval_steps_per_second": 45.727, "step": 4720 }, { "best_epoch": 15, "best_eval_accuracy": 0.8211009174311926, "epoch": 16.0, "step": 4720 }, { "epoch": 16.95, "learning_rate": 0.0010677966101694915, "loss": 0.3515, "step": 5000 }, { "epoch": 17.0, "eval_accuracy": 0.8122324159021407, "eval_loss": 0.4329771399497986, "eval_runtime": 8.9372, "eval_samples_per_second": 365.885, "eval_steps_per_second": 45.764, "step": 5015 }, { "best_epoch": 15, "best_eval_accuracy": 0.8211009174311926, "epoch": 17.0, "step": 5015 }, { "epoch": 18.0, "eval_accuracy": 0.8171253822629969, "eval_loss": 0.4531969130039215, "eval_runtime": 8.9346, "eval_samples_per_second": 365.994, "eval_steps_per_second": 45.777, "step": 5310 }, { "best_epoch": 15, "best_eval_accuracy": 0.8211009174311926, "epoch": 18.0, "step": 5310 }, { "epoch": 18.64, "learning_rate": 0.0004745762711864407, "loss": 0.3487, "step": 5500 }, { "epoch": 19.0, "eval_accuracy": 0.8226299694189603, "eval_loss": 0.45660164952278137, "eval_runtime": 8.9479, "eval_samples_per_second": 365.448, "eval_steps_per_second": 45.709, "step": 5605 }, { "best_epoch": 18, "best_eval_accuracy": 0.8226299694189603, "epoch": 19.0, "step": 5605 } ], "max_steps": 5900, "num_train_epochs": 20, "total_flos": 4.173025556994202e+16, "trial_name": null, "trial_params": null }