|
{ |
|
"best_metric": 0.8226299694189603, |
|
"best_model_checkpoint": "checkpoints/boolq-roberta/checkpoint-5605", |
|
"epoch": 19.0, |
|
"global_step": 5605, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6217125382262997, |
|
"eval_loss": 1.2398264408111572, |
|
"eval_runtime": 8.8922, |
|
"eval_samples_per_second": 367.737, |
|
"eval_steps_per_second": 45.995, |
|
"step": 295 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.6217125382262997, |
|
"epoch": 1.0, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.006406779661016949, |
|
"loss": 0.8402, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6422018348623854, |
|
"eval_loss": 0.6416757702827454, |
|
"eval_runtime": 8.9446, |
|
"eval_samples_per_second": 365.585, |
|
"eval_steps_per_second": 45.726, |
|
"step": 590 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.6422018348623854, |
|
"epoch": 2.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7186544342507645, |
|
"eval_loss": 0.5749825239181519, |
|
"eval_runtime": 8.9497, |
|
"eval_samples_per_second": 365.375, |
|
"eval_steps_per_second": 45.7, |
|
"step": 885 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.7186544342507645, |
|
"epoch": 3.0, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.005813559322033898, |
|
"loss": 0.6921, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6801223241590214, |
|
"eval_loss": 0.6661111116409302, |
|
"eval_runtime": 8.9486, |
|
"eval_samples_per_second": 365.422, |
|
"eval_steps_per_second": 45.706, |
|
"step": 1180 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.7186544342507645, |
|
"epoch": 4.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7642201834862385, |
|
"eval_loss": 0.4996618628501892, |
|
"eval_runtime": 8.9587, |
|
"eval_samples_per_second": 365.007, |
|
"eval_steps_per_second": 45.654, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 4, |
|
"best_eval_accuracy": 0.7642201834862385, |
|
"epoch": 5.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.005220338983050848, |
|
"loss": 0.5652, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7896024464831805, |
|
"eval_loss": 0.5553064346313477, |
|
"eval_runtime": 8.9376, |
|
"eval_samples_per_second": 365.869, |
|
"eval_steps_per_second": 45.762, |
|
"step": 1770 |
|
}, |
|
{ |
|
"best_epoch": 5, |
|
"best_eval_accuracy": 0.7896024464831805, |
|
"epoch": 6.0, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.004627118644067797, |
|
"loss": 0.516, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8107033639143731, |
|
"eval_loss": 0.43402931094169617, |
|
"eval_runtime": 8.9391, |
|
"eval_samples_per_second": 365.811, |
|
"eval_steps_per_second": 45.754, |
|
"step": 2065 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.8107033639143731, |
|
"epoch": 7.0, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7577981651376147, |
|
"eval_loss": 0.5680531859397888, |
|
"eval_runtime": 8.9453, |
|
"eval_samples_per_second": 365.553, |
|
"eval_steps_per_second": 45.722, |
|
"step": 2360 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.8107033639143731, |
|
"epoch": 8.0, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 0.004033898305084746, |
|
"loss": 0.4934, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7944954128440367, |
|
"eval_loss": 0.4493260383605957, |
|
"eval_runtime": 8.9507, |
|
"eval_samples_per_second": 365.334, |
|
"eval_steps_per_second": 45.695, |
|
"step": 2655 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.8107033639143731, |
|
"epoch": 9.0, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8048929663608563, |
|
"eval_loss": 0.4496236741542816, |
|
"eval_runtime": 8.9349, |
|
"eval_samples_per_second": 365.983, |
|
"eval_steps_per_second": 45.776, |
|
"step": 2950 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.8107033639143731, |
|
"epoch": 10.0, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.0034406779661016952, |
|
"loss": 0.4653, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8140672782874617, |
|
"eval_loss": 0.435769259929657, |
|
"eval_runtime": 8.9447, |
|
"eval_samples_per_second": 365.58, |
|
"eval_steps_per_second": 45.725, |
|
"step": 3245 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.8140672782874617, |
|
"epoch": 11.0, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 0.002847457627118644, |
|
"loss": 0.4084, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8180428134556575, |
|
"eval_loss": 0.41644611954689026, |
|
"eval_runtime": 8.9366, |
|
"eval_samples_per_second": 365.912, |
|
"eval_steps_per_second": 45.767, |
|
"step": 3540 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.8180428134556575, |
|
"epoch": 12.0, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8103975535168195, |
|
"eval_loss": 0.452688992023468, |
|
"eval_runtime": 8.9757, |
|
"eval_samples_per_second": 364.318, |
|
"eval_steps_per_second": 45.568, |
|
"step": 3835 |
|
}, |
|
{ |
|
"best_epoch": 11, |
|
"best_eval_accuracy": 0.8180428134556575, |
|
"epoch": 13.0, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.002254237288135593, |
|
"loss": 0.3903, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.818348623853211, |
|
"eval_loss": 0.44870346784591675, |
|
"eval_runtime": 8.9507, |
|
"eval_samples_per_second": 365.335, |
|
"eval_steps_per_second": 45.695, |
|
"step": 4130 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.818348623853211, |
|
"epoch": 14.0, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8155963302752294, |
|
"eval_loss": 0.4322122633457184, |
|
"eval_runtime": 8.9612, |
|
"eval_samples_per_second": 364.908, |
|
"eval_steps_per_second": 45.641, |
|
"step": 4425 |
|
}, |
|
{ |
|
"best_epoch": 13, |
|
"best_eval_accuracy": 0.818348623853211, |
|
"epoch": 15.0, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 0.0016610169491525426, |
|
"loss": 0.3761, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8211009174311926, |
|
"eval_loss": 0.4511786997318268, |
|
"eval_runtime": 8.9443, |
|
"eval_samples_per_second": 365.595, |
|
"eval_steps_per_second": 45.727, |
|
"step": 4720 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.8211009174311926, |
|
"epoch": 16.0, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 0.0010677966101694915, |
|
"loss": 0.3515, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8122324159021407, |
|
"eval_loss": 0.4329771399497986, |
|
"eval_runtime": 8.9372, |
|
"eval_samples_per_second": 365.885, |
|
"eval_steps_per_second": 45.764, |
|
"step": 5015 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.8211009174311926, |
|
"epoch": 17.0, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8171253822629969, |
|
"eval_loss": 0.4531969130039215, |
|
"eval_runtime": 8.9346, |
|
"eval_samples_per_second": 365.994, |
|
"eval_steps_per_second": 45.777, |
|
"step": 5310 |
|
}, |
|
{ |
|
"best_epoch": 15, |
|
"best_eval_accuracy": 0.8211009174311926, |
|
"epoch": 18.0, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 0.0004745762711864407, |
|
"loss": 0.3487, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8226299694189603, |
|
"eval_loss": 0.45660164952278137, |
|
"eval_runtime": 8.9479, |
|
"eval_samples_per_second": 365.448, |
|
"eval_steps_per_second": 45.709, |
|
"step": 5605 |
|
}, |
|
{ |
|
"best_epoch": 18, |
|
"best_eval_accuracy": 0.8226299694189603, |
|
"epoch": 19.0, |
|
"step": 5605 |
|
} |
|
], |
|
"max_steps": 5900, |
|
"num_train_epochs": 20, |
|
"total_flos": 4.173025556994202e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|