|
{ |
|
"best_metric": 0.9861495731704059, |
|
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-7200", |
|
"epoch": 0.2213420316868593, |
|
"global_step": 7600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.765482430596001e-07, |
|
"loss": 1.0726, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.7982117455801666, |
|
"eval_f1": 0.7974911320075254, |
|
"eval_loss": 0.8326770067214966, |
|
"eval_runtime": 12.8669, |
|
"eval_samples_per_second": 764.91, |
|
"eval_steps_per_second": 47.875, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5530964861192002e-06, |
|
"loss": 0.3233, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.9642349116033326, |
|
"eval_f1": 0.9642718853707861, |
|
"eval_loss": 0.14329373836517334, |
|
"eval_runtime": 12.933, |
|
"eval_samples_per_second": 760.998, |
|
"eval_steps_per_second": 47.63, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.3296447291788007e-06, |
|
"loss": 0.173, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.9762243446453973, |
|
"eval_f1": 0.9761597449883214, |
|
"eval_loss": 0.1054357960820198, |
|
"eval_runtime": 12.9969, |
|
"eval_samples_per_second": 757.258, |
|
"eval_steps_per_second": 47.396, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1061929722384003e-06, |
|
"loss": 0.1452, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.9799837431416378, |
|
"eval_f1": 0.9799316333368723, |
|
"eval_loss": 0.09648650884628296, |
|
"eval_runtime": 13.0065, |
|
"eval_samples_per_second": 756.7, |
|
"eval_steps_per_second": 47.361, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.882741215298001e-06, |
|
"loss": 0.1449, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.9813046128835603, |
|
"eval_f1": 0.9812341372941731, |
|
"eval_loss": 0.09331633150577545, |
|
"eval_runtime": 12.921, |
|
"eval_samples_per_second": 761.706, |
|
"eval_steps_per_second": 47.674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.659289458357601e-06, |
|
"loss": 0.1303, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.9815078236130867, |
|
"eval_f1": 0.9814520761782233, |
|
"eval_loss": 0.10890379548072815, |
|
"eval_runtime": 12.8683, |
|
"eval_samples_per_second": 764.824, |
|
"eval_steps_per_second": 47.87, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.435837701417202e-06, |
|
"loss": 0.1372, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.9832351148140622, |
|
"eval_f1": 0.983181371856266, |
|
"eval_loss": 0.07784133404493332, |
|
"eval_runtime": 12.8322, |
|
"eval_samples_per_second": 766.977, |
|
"eval_steps_per_second": 48.004, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 6.212385944476801e-06, |
|
"loss": 0.1229, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.9820158504369031, |
|
"eval_f1": 0.9819483437268689, |
|
"eval_loss": 0.10309657454490662, |
|
"eval_runtime": 13.2533, |
|
"eval_samples_per_second": 742.608, |
|
"eval_steps_per_second": 46.479, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.988934187536401e-06, |
|
"loss": 0.1291, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.9842511684616948, |
|
"eval_f1": 0.9842093647131546, |
|
"eval_loss": 0.08313809335231781, |
|
"eval_runtime": 13.0051, |
|
"eval_samples_per_second": 756.778, |
|
"eval_steps_per_second": 47.366, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.765482430596002e-06, |
|
"loss": 0.1049, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.9824222718959561, |
|
"eval_f1": 0.9823847600112674, |
|
"eval_loss": 0.0907953679561615, |
|
"eval_runtime": 12.984, |
|
"eval_samples_per_second": 758.01, |
|
"eval_steps_per_second": 47.443, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.542030673655602e-06, |
|
"loss": 0.1243, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.9823206665311929, |
|
"eval_f1": 0.9822362137371193, |
|
"eval_loss": 0.0925893783569336, |
|
"eval_runtime": 12.9285, |
|
"eval_samples_per_second": 761.262, |
|
"eval_steps_per_second": 47.647, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.318578916715203e-06, |
|
"loss": 0.1291, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9855720382036172, |
|
"eval_f1": 0.9855364167633462, |
|
"eval_loss": 0.08309133350849152, |
|
"eval_runtime": 12.8482, |
|
"eval_samples_per_second": 766.023, |
|
"eval_steps_per_second": 47.945, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.994992693419992e-06, |
|
"loss": 0.1148, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.983946352367405, |
|
"eval_f1": 0.9839146445196306, |
|
"eval_loss": 0.08415436744689941, |
|
"eval_runtime": 12.8411, |
|
"eval_samples_per_second": 766.443, |
|
"eval_steps_per_second": 47.971, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.954116721338281e-06, |
|
"loss": 0.1256, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.9836415362731152, |
|
"eval_f1": 0.9836219614739408, |
|
"eval_loss": 0.084382563829422, |
|
"eval_runtime": 13.2723, |
|
"eval_samples_per_second": 741.546, |
|
"eval_steps_per_second": 46.413, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.91324074925657e-06, |
|
"loss": 0.1207, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.9829302987197724, |
|
"eval_f1": 0.9829157443588743, |
|
"eval_loss": 0.0957166850566864, |
|
"eval_runtime": 12.9479, |
|
"eval_samples_per_second": 760.125, |
|
"eval_steps_per_second": 47.575, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.872364777174857e-06, |
|
"loss": 0.1162, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.9859784596626702, |
|
"eval_f1": 0.9859267481206228, |
|
"eval_loss": 0.0752706453204155, |
|
"eval_runtime": 13.005, |
|
"eval_samples_per_second": 756.786, |
|
"eval_steps_per_second": 47.366, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.831488805093147e-06, |
|
"loss": 0.095, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.9844543791912213, |
|
"eval_f1": 0.9844184876885574, |
|
"eval_loss": 0.09373413771390915, |
|
"eval_runtime": 13.0171, |
|
"eval_samples_per_second": 756.082, |
|
"eval_steps_per_second": 47.322, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.790612833011435e-06, |
|
"loss": 0.1069, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.9861816703921967, |
|
"eval_f1": 0.9861495731704059, |
|
"eval_loss": 0.07330357283353806, |
|
"eval_runtime": 12.8964, |
|
"eval_samples_per_second": 763.158, |
|
"eval_steps_per_second": 47.765, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.749736860929725e-06, |
|
"loss": 0.1054, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9853688274740906, |
|
"eval_f1": 0.9853406697769191, |
|
"eval_loss": 0.07271973788738251, |
|
"eval_runtime": 12.8137, |
|
"eval_samples_per_second": 768.085, |
|
"eval_steps_per_second": 48.074, |
|
"step": 7600 |
|
} |
|
], |
|
"max_steps": 103008, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.4913660628723872e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|