{ "best_metric": 0.7163974642753601, "best_model_checkpoint": "final_models/glue_models/structroberta_s2_final//finetune/mnli/checkpoint-6400", "epoch": 3.879907621247113, "global_step": 8400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "eval_accuracy": 0.5423651337623596, "eval_loss": 0.9394116997718811, "eval_runtime": 12.766, "eval_samples_per_second": 514.022, "eval_steps_per_second": 64.312, "step": 400 }, { "epoch": 0.23, "learning_rate": 4.884526558891455e-05, "loss": 0.9831, "step": 500 }, { "epoch": 0.37, "eval_accuracy": 0.6263334155082703, "eval_loss": 0.8243611454963684, "eval_runtime": 12.7598, "eval_samples_per_second": 514.27, "eval_steps_per_second": 64.343, "step": 800 }, { "epoch": 0.46, "learning_rate": 4.7690531177829104e-05, "loss": 0.8453, "step": 1000 }, { "epoch": 0.55, "eval_accuracy": 0.6548308730125427, "eval_loss": 0.7803521156311035, "eval_runtime": 12.7994, "eval_samples_per_second": 512.682, "eval_steps_per_second": 64.144, "step": 1200 }, { "epoch": 0.69, "learning_rate": 4.653579676674365e-05, "loss": 0.792, "step": 1500 }, { "epoch": 0.74, "eval_accuracy": 0.6679366230964661, "eval_loss": 0.7586068511009216, "eval_runtime": 12.7729, "eval_samples_per_second": 513.744, "eval_steps_per_second": 64.277, "step": 1600 }, { "epoch": 0.92, "learning_rate": 4.53810623556582e-05, "loss": 0.762, "step": 2000 }, { "epoch": 0.92, "eval_accuracy": 0.6755562424659729, "eval_loss": 0.7486647963523865, "eval_runtime": 12.7741, "eval_samples_per_second": 513.696, "eval_steps_per_second": 64.271, "step": 2000 }, { "epoch": 1.11, "eval_accuracy": 0.6807375550270081, "eval_loss": 0.7358043789863586, "eval_runtime": 12.787, "eval_samples_per_second": 513.179, "eval_steps_per_second": 64.206, "step": 2400 }, { "epoch": 1.15, "learning_rate": 4.422632794457275e-05, "loss": 0.7076, "step": 2500 }, { "epoch": 1.29, "eval_accuracy": 0.6892715692520142, "eval_loss": 0.739337146282196, "eval_runtime": 12.7687, "eval_samples_per_second": 513.914, "eval_steps_per_second": 64.298, "step": 2800 }, { "epoch": 1.39, "learning_rate": 4.30715935334873e-05, "loss": 0.67, "step": 3000 }, { "epoch": 1.48, "eval_accuracy": 0.6915574669837952, "eval_loss": 0.7226890921592712, "eval_runtime": 12.7849, "eval_samples_per_second": 513.26, "eval_steps_per_second": 64.216, "step": 3200 }, { "epoch": 1.62, "learning_rate": 4.1916859122401844e-05, "loss": 0.6666, "step": 3500 }, { "epoch": 1.66, "eval_accuracy": 0.7013105750083923, "eval_loss": 0.7045397162437439, "eval_runtime": 12.7756, "eval_samples_per_second": 513.634, "eval_steps_per_second": 64.263, "step": 3600 }, { "epoch": 1.85, "learning_rate": 4.07621247113164e-05, "loss": 0.6618, "step": 4000 }, { "epoch": 1.85, "eval_accuracy": 0.7007009983062744, "eval_loss": 0.7101899981498718, "eval_runtime": 12.7944, "eval_samples_per_second": 512.881, "eval_steps_per_second": 64.169, "step": 4000 }, { "epoch": 2.03, "eval_accuracy": 0.7025297284126282, "eval_loss": 0.7343056201934814, "eval_runtime": 12.7896, "eval_samples_per_second": 513.073, "eval_steps_per_second": 64.193, "step": 4400 }, { "epoch": 2.08, "learning_rate": 3.960739030023095e-05, "loss": 0.6262, "step": 4500 }, { "epoch": 2.22, "eval_accuracy": 0.7081682682037354, "eval_loss": 0.7158433794975281, "eval_runtime": 12.7759, "eval_samples_per_second": 513.625, "eval_steps_per_second": 64.262, "step": 4800 }, { "epoch": 2.31, "learning_rate": 3.84526558891455e-05, "loss": 0.5627, "step": 5000 }, { "epoch": 2.4, "eval_accuracy": 0.7121304273605347, "eval_loss": 0.7221695184707642, "eval_runtime": 12.7709, "eval_samples_per_second": 513.824, "eval_steps_per_second": 64.287, "step": 5200 }, { "epoch": 2.54, "learning_rate": 3.729792147806005e-05, "loss": 0.5606, "step": 5500 }, { "epoch": 2.59, "eval_accuracy": 0.7043583989143372, "eval_loss": 0.7297407388687134, "eval_runtime": 12.7744, "eval_samples_per_second": 513.683, "eval_steps_per_second": 64.269, "step": 5600 }, { "epoch": 2.77, "learning_rate": 3.61431870669746e-05, "loss": 0.5698, "step": 6000 }, { "epoch": 2.77, "eval_accuracy": 0.7063395380973816, "eval_loss": 0.7177144885063171, "eval_runtime": 12.7828, "eval_samples_per_second": 513.345, "eval_steps_per_second": 64.227, "step": 6000 }, { "epoch": 2.96, "eval_accuracy": 0.7163974642753601, "eval_loss": 0.7138766646385193, "eval_runtime": 12.7766, "eval_samples_per_second": 513.596, "eval_steps_per_second": 64.258, "step": 6400 }, { "epoch": 3.0, "learning_rate": 3.498845265588915e-05, "loss": 0.5694, "step": 6500 }, { "epoch": 3.14, "eval_accuracy": 0.7109113335609436, "eval_loss": 0.7803912162780762, "eval_runtime": 12.7706, "eval_samples_per_second": 513.836, "eval_steps_per_second": 64.288, "step": 6800 }, { "epoch": 3.23, "learning_rate": 3.38337182448037e-05, "loss": 0.4589, "step": 7000 }, { "epoch": 3.33, "eval_accuracy": 0.7136543989181519, "eval_loss": 0.7615983486175537, "eval_runtime": 12.7834, "eval_samples_per_second": 513.321, "eval_steps_per_second": 64.224, "step": 7200 }, { "epoch": 3.46, "learning_rate": 3.2678983833718243e-05, "loss": 0.4606, "step": 7500 }, { "epoch": 3.51, "eval_accuracy": 0.7084730267524719, "eval_loss": 0.7663705945014954, "eval_runtime": 12.7769, "eval_samples_per_second": 513.583, "eval_steps_per_second": 64.257, "step": 7600 }, { "epoch": 3.7, "learning_rate": 3.1524249422632794e-05, "loss": 0.4687, "step": 8000 }, { "epoch": 3.7, "eval_accuracy": 0.7130448222160339, "eval_loss": 0.7735299468040466, "eval_runtime": 12.7981, "eval_samples_per_second": 512.732, "eval_steps_per_second": 64.15, "step": 8000 }, { "epoch": 3.88, "eval_accuracy": 0.7145687341690063, "eval_loss": 0.7440381050109863, "eval_runtime": 12.7849, "eval_samples_per_second": 513.26, "eval_steps_per_second": 64.216, "step": 8400 }, { "epoch": 3.88, "step": 8400, "total_flos": 8.411222207808e+16, "train_loss": 0.639952149164109, "train_runtime": 4648.7272, "train_samples_per_second": 558.82, "train_steps_per_second": 4.657 } ], "max_steps": 21650, "num_train_epochs": 10, "total_flos": 8.411222207808e+16, "trial_name": null, "trial_params": null }