|
{ |
|
"best_metric": 0.7073715562174238, |
|
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/qnli/checkpoint-1800", |
|
"epoch": 5.531295487627365, |
|
"global_step": 3800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.6146106719970703, |
|
"eval_f1": 0.6827511703276917, |
|
"eval_loss": 0.656548261642456, |
|
"eval_mcc": 0.22199360087926548, |
|
"eval_runtime": 4.8836, |
|
"eval_samples_per_second": 468.098, |
|
"eval_steps_per_second": 58.563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.6132983565330505, |
|
"eval_f1": 0.6691616766467064, |
|
"eval_loss": 0.6548586487770081, |
|
"eval_mcc": 0.21809949429968853, |
|
"eval_runtime": 4.9086, |
|
"eval_samples_per_second": 465.711, |
|
"eval_steps_per_second": 58.265, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.636098981077147e-05, |
|
"loss": 0.6749, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.6124234199523926, |
|
"eval_f1": 0.6869257950530036, |
|
"eval_loss": 0.6537702679634094, |
|
"eval_mcc": 0.2187527800424191, |
|
"eval_runtime": 4.9089, |
|
"eval_samples_per_second": 465.689, |
|
"eval_steps_per_second": 58.262, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.6338582634925842, |
|
"eval_f1": 0.686399400524541, |
|
"eval_loss": 0.6308510303497314, |
|
"eval_mcc": 0.26091967633126006, |
|
"eval_runtime": 4.8817, |
|
"eval_samples_per_second": 468.284, |
|
"eval_steps_per_second": 58.587, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.272197962154294e-05, |
|
"loss": 0.6288, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.6176728010177612, |
|
"eval_f1": 0.5810162991371045, |
|
"eval_loss": 0.6444646120071411, |
|
"eval_mcc": 0.2582529609636007, |
|
"eval_runtime": 4.8723, |
|
"eval_samples_per_second": 469.182, |
|
"eval_steps_per_second": 58.699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.6452318429946899, |
|
"eval_f1": 0.6884364195159431, |
|
"eval_loss": 0.6248251795768738, |
|
"eval_mcc": 0.2841162003572998, |
|
"eval_runtime": 4.856, |
|
"eval_samples_per_second": 470.754, |
|
"eval_steps_per_second": 58.896, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.6443569660186768, |
|
"eval_f1": 0.6553624417125902, |
|
"eval_loss": 0.6547794938087463, |
|
"eval_mcc": 0.28923332045259653, |
|
"eval_runtime": 4.8425, |
|
"eval_samples_per_second": 472.075, |
|
"eval_steps_per_second": 59.061, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.9082969432314415e-05, |
|
"loss": 0.5829, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.6557305455207825, |
|
"eval_f1": 0.67890656874745, |
|
"eval_loss": 0.6519333124160767, |
|
"eval_mcc": 0.307966407656791, |
|
"eval_runtime": 4.8379, |
|
"eval_samples_per_second": 472.517, |
|
"eval_steps_per_second": 59.116, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.6561679840087891, |
|
"eval_f1": 0.7073715562174238, |
|
"eval_loss": 0.6741649508476257, |
|
"eval_mcc": 0.30779028305110706, |
|
"eval_runtime": 4.8516, |
|
"eval_samples_per_second": 471.182, |
|
"eval_steps_per_second": 58.949, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.544395924308588e-05, |
|
"loss": 0.5121, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.6640419960021973, |
|
"eval_f1": 0.7034749034749035, |
|
"eval_loss": 0.7019718885421753, |
|
"eval_mcc": 0.32264171773943, |
|
"eval_runtime": 4.8827, |
|
"eval_samples_per_second": 468.184, |
|
"eval_steps_per_second": 58.574, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.6771653294563293, |
|
"eval_f1": 0.670829616413916, |
|
"eval_loss": 0.6962025165557861, |
|
"eval_mcc": 0.36331729204615415, |
|
"eval_runtime": 4.857, |
|
"eval_samples_per_second": 470.665, |
|
"eval_steps_per_second": 58.885, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_accuracy": 0.6692913174629211, |
|
"eval_f1": 0.6730103806228372, |
|
"eval_loss": 0.7017433643341064, |
|
"eval_mcc": 0.3420737265934342, |
|
"eval_runtime": 4.8442, |
|
"eval_samples_per_second": 471.908, |
|
"eval_steps_per_second": 59.04, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.1804949053857355e-05, |
|
"loss": 0.4192, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_accuracy": 0.6675415635108948, |
|
"eval_f1": 0.6712802768166091, |
|
"eval_loss": 0.6771557331085205, |
|
"eval_mcc": 0.3385643570211769, |
|
"eval_runtime": 4.8693, |
|
"eval_samples_per_second": 469.476, |
|
"eval_steps_per_second": 58.736, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.6701662540435791, |
|
"eval_f1": 0.6986410871302957, |
|
"eval_loss": 0.9257397055625916, |
|
"eval_mcc": 0.33573292641261127, |
|
"eval_runtime": 4.8915, |
|
"eval_samples_per_second": 467.337, |
|
"eval_steps_per_second": 58.468, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.816593886462882e-05, |
|
"loss": 0.3517, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_accuracy": 0.6706036925315857, |
|
"eval_f1": 0.6541111621497473, |
|
"eval_loss": 0.8104151487350464, |
|
"eval_mcc": 0.35637801084068055, |
|
"eval_runtime": 4.8987, |
|
"eval_samples_per_second": 466.65, |
|
"eval_steps_per_second": 58.382, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_accuracy": 0.6679790019989014, |
|
"eval_f1": 0.6597938144329897, |
|
"eval_loss": 0.8328930139541626, |
|
"eval_mcc": 0.3457806676963092, |
|
"eval_runtime": 4.8964, |
|
"eval_samples_per_second": 466.871, |
|
"eval_steps_per_second": 58.41, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.6815398335456848, |
|
"eval_f1": 0.6946308724832215, |
|
"eval_loss": 0.7547760009765625, |
|
"eval_mcc": 0.362460160804572, |
|
"eval_runtime": 4.8988, |
|
"eval_samples_per_second": 466.642, |
|
"eval_steps_per_second": 58.381, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.452692867540029e-05, |
|
"loss": 0.2998, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.6863517165184021, |
|
"eval_f1": 0.6789072995969547, |
|
"eval_loss": 0.933471143245697, |
|
"eval_mcc": 0.38264274350198313, |
|
"eval_runtime": 4.8881, |
|
"eval_samples_per_second": 467.669, |
|
"eval_steps_per_second": 58.51, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_accuracy": 0.6771653294563293, |
|
"eval_f1": 0.6883445945945946, |
|
"eval_loss": 0.9999232292175293, |
|
"eval_mcc": 0.3544971598234377, |
|
"eval_runtime": 4.8652, |
|
"eval_samples_per_second": 469.869, |
|
"eval_steps_per_second": 58.785, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"step": 3800, |
|
"total_flos": 2.22547390664832e+16, |
|
"train_loss": 0.473507473594264, |
|
"train_runtime": 1395.5937, |
|
"train_samples_per_second": 314.683, |
|
"train_steps_per_second": 4.923 |
|
} |
|
], |
|
"max_steps": 6870, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.22547390664832e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|