{ "best_metric": 0.7073715562174238, "best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/qnli/checkpoint-1800", "epoch": 5.531295487627365, "global_step": 3800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "eval_accuracy": 0.6146106719970703, "eval_f1": 0.6827511703276917, "eval_loss": 0.656548261642456, "eval_mcc": 0.22199360087926548, "eval_runtime": 4.8836, "eval_samples_per_second": 468.098, "eval_steps_per_second": 58.563, "step": 200 }, { "epoch": 0.58, "eval_accuracy": 0.6132983565330505, "eval_f1": 0.6691616766467064, "eval_loss": 0.6548586487770081, "eval_mcc": 0.21809949429968853, "eval_runtime": 4.9086, "eval_samples_per_second": 465.711, "eval_steps_per_second": 58.265, "step": 400 }, { "epoch": 0.73, "learning_rate": 4.636098981077147e-05, "loss": 0.6749, "step": 500 }, { "epoch": 0.87, "eval_accuracy": 0.6124234199523926, "eval_f1": 0.6869257950530036, "eval_loss": 0.6537702679634094, "eval_mcc": 0.2187527800424191, "eval_runtime": 4.9089, "eval_samples_per_second": 465.689, "eval_steps_per_second": 58.262, "step": 600 }, { "epoch": 1.16, "eval_accuracy": 0.6338582634925842, "eval_f1": 0.686399400524541, "eval_loss": 0.6308510303497314, "eval_mcc": 0.26091967633126006, "eval_runtime": 4.8817, "eval_samples_per_second": 468.284, "eval_steps_per_second": 58.587, "step": 800 }, { "epoch": 1.46, "learning_rate": 4.272197962154294e-05, "loss": 0.6288, "step": 1000 }, { "epoch": 1.46, "eval_accuracy": 0.6176728010177612, "eval_f1": 0.5810162991371045, "eval_loss": 0.6444646120071411, "eval_mcc": 0.2582529609636007, "eval_runtime": 4.8723, "eval_samples_per_second": 469.182, "eval_steps_per_second": 58.699, "step": 1000 }, { "epoch": 1.75, "eval_accuracy": 0.6452318429946899, "eval_f1": 0.6884364195159431, "eval_loss": 0.6248251795768738, "eval_mcc": 0.2841162003572998, "eval_runtime": 4.856, "eval_samples_per_second": 470.754, "eval_steps_per_second": 58.896, "step": 1200 }, { "epoch": 2.04, "eval_accuracy": 0.6443569660186768, "eval_f1": 0.6553624417125902, "eval_loss": 0.6547794938087463, "eval_mcc": 0.28923332045259653, "eval_runtime": 4.8425, "eval_samples_per_second": 472.075, "eval_steps_per_second": 59.061, "step": 1400 }, { "epoch": 2.18, "learning_rate": 3.9082969432314415e-05, "loss": 0.5829, "step": 1500 }, { "epoch": 2.33, "eval_accuracy": 0.6557305455207825, "eval_f1": 0.67890656874745, "eval_loss": 0.6519333124160767, "eval_mcc": 0.307966407656791, "eval_runtime": 4.8379, "eval_samples_per_second": 472.517, "eval_steps_per_second": 59.116, "step": 1600 }, { "epoch": 2.62, "eval_accuracy": 0.6561679840087891, "eval_f1": 0.7073715562174238, "eval_loss": 0.6741649508476257, "eval_mcc": 0.30779028305110706, "eval_runtime": 4.8516, "eval_samples_per_second": 471.182, "eval_steps_per_second": 58.949, "step": 1800 }, { "epoch": 2.91, "learning_rate": 3.544395924308588e-05, "loss": 0.5121, "step": 2000 }, { "epoch": 2.91, "eval_accuracy": 0.6640419960021973, "eval_f1": 0.7034749034749035, "eval_loss": 0.7019718885421753, "eval_mcc": 0.32264171773943, "eval_runtime": 4.8827, "eval_samples_per_second": 468.184, "eval_steps_per_second": 58.574, "step": 2000 }, { "epoch": 3.2, "eval_accuracy": 0.6771653294563293, "eval_f1": 0.670829616413916, "eval_loss": 0.6962025165557861, "eval_mcc": 0.36331729204615415, "eval_runtime": 4.857, "eval_samples_per_second": 470.665, "eval_steps_per_second": 58.885, "step": 2200 }, { "epoch": 3.49, "eval_accuracy": 0.6692913174629211, "eval_f1": 0.6730103806228372, "eval_loss": 0.7017433643341064, "eval_mcc": 0.3420737265934342, "eval_runtime": 4.8442, "eval_samples_per_second": 471.908, "eval_steps_per_second": 59.04, "step": 2400 }, { "epoch": 3.64, "learning_rate": 3.1804949053857355e-05, "loss": 0.4192, "step": 2500 }, { "epoch": 3.78, "eval_accuracy": 0.6675415635108948, "eval_f1": 0.6712802768166091, "eval_loss": 0.6771557331085205, "eval_mcc": 0.3385643570211769, "eval_runtime": 4.8693, "eval_samples_per_second": 469.476, "eval_steps_per_second": 58.736, "step": 2600 }, { "epoch": 4.08, "eval_accuracy": 0.6701662540435791, "eval_f1": 0.6986410871302957, "eval_loss": 0.9257397055625916, "eval_mcc": 0.33573292641261127, "eval_runtime": 4.8915, "eval_samples_per_second": 467.337, "eval_steps_per_second": 58.468, "step": 2800 }, { "epoch": 4.37, "learning_rate": 2.816593886462882e-05, "loss": 0.3517, "step": 3000 }, { "epoch": 4.37, "eval_accuracy": 0.6706036925315857, "eval_f1": 0.6541111621497473, "eval_loss": 0.8104151487350464, "eval_mcc": 0.35637801084068055, "eval_runtime": 4.8987, "eval_samples_per_second": 466.65, "eval_steps_per_second": 58.382, "step": 3000 }, { "epoch": 4.66, "eval_accuracy": 0.6679790019989014, "eval_f1": 0.6597938144329897, "eval_loss": 0.8328930139541626, "eval_mcc": 0.3457806676963092, "eval_runtime": 4.8964, "eval_samples_per_second": 466.871, "eval_steps_per_second": 58.41, "step": 3200 }, { "epoch": 4.95, "eval_accuracy": 0.6815398335456848, "eval_f1": 0.6946308724832215, "eval_loss": 0.7547760009765625, "eval_mcc": 0.362460160804572, "eval_runtime": 4.8988, "eval_samples_per_second": 466.642, "eval_steps_per_second": 58.381, "step": 3400 }, { "epoch": 5.09, "learning_rate": 2.452692867540029e-05, "loss": 0.2998, "step": 3500 }, { "epoch": 5.24, "eval_accuracy": 0.6863517165184021, "eval_f1": 0.6789072995969547, "eval_loss": 0.933471143245697, "eval_mcc": 0.38264274350198313, "eval_runtime": 4.8881, "eval_samples_per_second": 467.669, "eval_steps_per_second": 58.51, "step": 3600 }, { "epoch": 5.53, "eval_accuracy": 0.6771653294563293, "eval_f1": 0.6883445945945946, "eval_loss": 0.9999232292175293, "eval_mcc": 0.3544971598234377, "eval_runtime": 4.8652, "eval_samples_per_second": 469.869, "eval_steps_per_second": 58.785, "step": 3800 }, { "epoch": 5.53, "step": 3800, "total_flos": 2.22547390664832e+16, "train_loss": 0.473507473594264, "train_runtime": 1395.5937, "train_samples_per_second": 314.683, "train_steps_per_second": 4.923 } ], "max_steps": 6870, "num_train_epochs": 10, "total_flos": 2.22547390664832e+16, "trial_name": null, "trial_params": null }