|
{ |
|
"best_metric": 0.7163974642753601, |
|
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_final//finetune/mnli/checkpoint-6400", |
|
"epoch": 3.879907621247113, |
|
"global_step": 8400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.5423651337623596, |
|
"eval_loss": 0.9394116997718811, |
|
"eval_runtime": 12.766, |
|
"eval_samples_per_second": 514.022, |
|
"eval_steps_per_second": 64.312, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.884526558891455e-05, |
|
"loss": 0.9831, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.6263334155082703, |
|
"eval_loss": 0.8243611454963684, |
|
"eval_runtime": 12.7598, |
|
"eval_samples_per_second": 514.27, |
|
"eval_steps_per_second": 64.343, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7690531177829104e-05, |
|
"loss": 0.8453, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.6548308730125427, |
|
"eval_loss": 0.7803521156311035, |
|
"eval_runtime": 12.7994, |
|
"eval_samples_per_second": 512.682, |
|
"eval_steps_per_second": 64.144, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.653579676674365e-05, |
|
"loss": 0.792, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.6679366230964661, |
|
"eval_loss": 0.7586068511009216, |
|
"eval_runtime": 12.7729, |
|
"eval_samples_per_second": 513.744, |
|
"eval_steps_per_second": 64.277, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.53810623556582e-05, |
|
"loss": 0.762, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.6755562424659729, |
|
"eval_loss": 0.7486647963523865, |
|
"eval_runtime": 12.7741, |
|
"eval_samples_per_second": 513.696, |
|
"eval_steps_per_second": 64.271, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.6807375550270081, |
|
"eval_loss": 0.7358043789863586, |
|
"eval_runtime": 12.787, |
|
"eval_samples_per_second": 513.179, |
|
"eval_steps_per_second": 64.206, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.422632794457275e-05, |
|
"loss": 0.7076, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.6892715692520142, |
|
"eval_loss": 0.739337146282196, |
|
"eval_runtime": 12.7687, |
|
"eval_samples_per_second": 513.914, |
|
"eval_steps_per_second": 64.298, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.30715935334873e-05, |
|
"loss": 0.67, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.6915574669837952, |
|
"eval_loss": 0.7226890921592712, |
|
"eval_runtime": 12.7849, |
|
"eval_samples_per_second": 513.26, |
|
"eval_steps_per_second": 64.216, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.1916859122401844e-05, |
|
"loss": 0.6666, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.7013105750083923, |
|
"eval_loss": 0.7045397162437439, |
|
"eval_runtime": 12.7756, |
|
"eval_samples_per_second": 513.634, |
|
"eval_steps_per_second": 64.263, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.07621247113164e-05, |
|
"loss": 0.6618, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.7007009983062744, |
|
"eval_loss": 0.7101899981498718, |
|
"eval_runtime": 12.7944, |
|
"eval_samples_per_second": 512.881, |
|
"eval_steps_per_second": 64.169, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.7025297284126282, |
|
"eval_loss": 0.7343056201934814, |
|
"eval_runtime": 12.7896, |
|
"eval_samples_per_second": 513.073, |
|
"eval_steps_per_second": 64.193, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.960739030023095e-05, |
|
"loss": 0.6262, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.7081682682037354, |
|
"eval_loss": 0.7158433794975281, |
|
"eval_runtime": 12.7759, |
|
"eval_samples_per_second": 513.625, |
|
"eval_steps_per_second": 64.262, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.84526558891455e-05, |
|
"loss": 0.5627, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.7121304273605347, |
|
"eval_loss": 0.7221695184707642, |
|
"eval_runtime": 12.7709, |
|
"eval_samples_per_second": 513.824, |
|
"eval_steps_per_second": 64.287, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.729792147806005e-05, |
|
"loss": 0.5606, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.7043583989143372, |
|
"eval_loss": 0.7297407388687134, |
|
"eval_runtime": 12.7744, |
|
"eval_samples_per_second": 513.683, |
|
"eval_steps_per_second": 64.269, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.61431870669746e-05, |
|
"loss": 0.5698, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.7063395380973816, |
|
"eval_loss": 0.7177144885063171, |
|
"eval_runtime": 12.7828, |
|
"eval_samples_per_second": 513.345, |
|
"eval_steps_per_second": 64.227, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7163974642753601, |
|
"eval_loss": 0.7138766646385193, |
|
"eval_runtime": 12.7766, |
|
"eval_samples_per_second": 513.596, |
|
"eval_steps_per_second": 64.258, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.498845265588915e-05, |
|
"loss": 0.5694, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.7109113335609436, |
|
"eval_loss": 0.7803912162780762, |
|
"eval_runtime": 12.7706, |
|
"eval_samples_per_second": 513.836, |
|
"eval_steps_per_second": 64.288, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.38337182448037e-05, |
|
"loss": 0.4589, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy": 0.7136543989181519, |
|
"eval_loss": 0.7615983486175537, |
|
"eval_runtime": 12.7834, |
|
"eval_samples_per_second": 513.321, |
|
"eval_steps_per_second": 64.224, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.2678983833718243e-05, |
|
"loss": 0.4606, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_accuracy": 0.7084730267524719, |
|
"eval_loss": 0.7663705945014954, |
|
"eval_runtime": 12.7769, |
|
"eval_samples_per_second": 513.583, |
|
"eval_steps_per_second": 64.257, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.1524249422632794e-05, |
|
"loss": 0.4687, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_accuracy": 0.7130448222160339, |
|
"eval_loss": 0.7735299468040466, |
|
"eval_runtime": 12.7981, |
|
"eval_samples_per_second": 512.732, |
|
"eval_steps_per_second": 64.15, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_accuracy": 0.7145687341690063, |
|
"eval_loss": 0.7440381050109863, |
|
"eval_runtime": 12.7849, |
|
"eval_samples_per_second": 513.26, |
|
"eval_steps_per_second": 64.216, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"step": 8400, |
|
"total_flos": 8.411222207808e+16, |
|
"train_loss": 0.639952149164109, |
|
"train_runtime": 4648.7272, |
|
"train_samples_per_second": 558.82, |
|
"train_steps_per_second": 4.657 |
|
} |
|
], |
|
"max_steps": 21650, |
|
"num_train_epochs": 10, |
|
"total_flos": 8.411222207808e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|