{ "best_metric": 0.727402925491333, "best_model_checkpoint": "final_models/structroberta_sx2_final/finetune/mnli-mm/checkpoint-7600", "epoch": 3.695150115473441, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "eval_accuracy": 0.5369191765785217, "eval_loss": 0.9326773285865784, "eval_runtime": 13.2147, "eval_samples_per_second": 475.532, "eval_steps_per_second": 59.479, "step": 400 }, { "epoch": 0.23, "learning_rate": 4.884526558891455e-05, "loss": 1.0108, "step": 500 }, { "epoch": 0.37, "eval_accuracy": 0.6338319778442383, "eval_loss": 0.8264617919921875, "eval_runtime": 13.1634, "eval_samples_per_second": 477.383, "eval_steps_per_second": 59.711, "step": 800 }, { "epoch": 0.46, "learning_rate": 4.7690531177829104e-05, "loss": 0.8755, "step": 1000 }, { "epoch": 0.55, "eval_accuracy": 0.6682049632072449, "eval_loss": 0.7684645652770996, "eval_runtime": 13.1416, "eval_samples_per_second": 478.177, "eval_steps_per_second": 59.81, "step": 1200 }, { "epoch": 0.69, "learning_rate": 4.653579676674365e-05, "loss": 0.8192, "step": 1500 }, { "epoch": 0.74, "eval_accuracy": 0.6807765960693359, "eval_loss": 0.7440627813339233, "eval_runtime": 13.1521, "eval_samples_per_second": 477.793, "eval_steps_per_second": 59.762, "step": 1600 }, { "epoch": 0.92, "learning_rate": 4.53810623556582e-05, "loss": 0.7826, "step": 2000 }, { "epoch": 0.92, "eval_accuracy": 0.6958943605422974, "eval_loss": 0.7209344506263733, "eval_runtime": 13.2002, "eval_samples_per_second": 476.052, "eval_steps_per_second": 59.544, "step": 2000 }, { "epoch": 1.11, "eval_accuracy": 0.6958943605422974, "eval_loss": 0.7135202288627625, "eval_runtime": 13.1858, "eval_samples_per_second": 476.574, "eval_steps_per_second": 59.61, "step": 2400 }, { "epoch": 1.15, "learning_rate": 4.422632794457275e-05, "loss": 0.7308, "step": 2500 }, { "epoch": 1.29, "eval_accuracy": 0.7016231417655945, "eval_loss": 0.7067370414733887, "eval_runtime": 13.1691, "eval_samples_per_second": 477.178, "eval_steps_per_second": 59.685, "step": 2800 }, { "epoch": 1.39, "learning_rate": 4.30715935334873e-05, "loss": 0.6949, "step": 3000 }, { "epoch": 1.48, "eval_accuracy": 0.7043284773826599, "eval_loss": 0.6976953148841858, "eval_runtime": 13.1658, "eval_samples_per_second": 477.297, "eval_steps_per_second": 59.7, "step": 3200 }, { "epoch": 1.62, "learning_rate": 4.1916859122401844e-05, "loss": 0.6886, "step": 3500 }, { "epoch": 1.66, "eval_accuracy": 0.7098981738090515, "eval_loss": 0.6742778420448303, "eval_runtime": 13.1582, "eval_samples_per_second": 477.573, "eval_steps_per_second": 59.735, "step": 3600 }, { "epoch": 1.85, "learning_rate": 4.07621247113164e-05, "loss": 0.6823, "step": 4000 }, { "epoch": 1.85, "eval_accuracy": 0.7223106026649475, "eval_loss": 0.6749057173728943, "eval_runtime": 13.1591, "eval_samples_per_second": 477.539, "eval_steps_per_second": 59.73, "step": 4000 }, { "epoch": 2.03, "eval_accuracy": 0.7162635326385498, "eval_loss": 0.6849502921104431, "eval_runtime": 13.1655, "eval_samples_per_second": 477.306, "eval_steps_per_second": 59.701, "step": 4400 }, { "epoch": 2.08, "learning_rate": 3.960739030023095e-05, "loss": 0.6472, "step": 4500 }, { "epoch": 2.22, "eval_accuracy": 0.7194462418556213, "eval_loss": 0.6848563551902771, "eval_runtime": 13.2438, "eval_samples_per_second": 474.485, "eval_steps_per_second": 59.348, "step": 4800 }, { "epoch": 2.31, "learning_rate": 3.84526558891455e-05, "loss": 0.593, "step": 5000 }, { "epoch": 2.4, "eval_accuracy": 0.7170591950416565, "eval_loss": 0.6907213926315308, "eval_runtime": 13.1712, "eval_samples_per_second": 477.102, "eval_steps_per_second": 59.676, "step": 5200 }, { "epoch": 2.54, "learning_rate": 3.729792147806005e-05, "loss": 0.5886, "step": 5500 }, { "epoch": 2.59, "eval_accuracy": 0.7205601334571838, "eval_loss": 0.6818374991416931, "eval_runtime": 13.1911, "eval_samples_per_second": 476.38, "eval_steps_per_second": 59.585, "step": 5600 }, { "epoch": 2.77, "learning_rate": 3.61431870669746e-05, "loss": 0.5957, "step": 6000 }, { "epoch": 2.77, "eval_accuracy": 0.7272437810897827, "eval_loss": 0.6741925477981567, "eval_runtime": 13.1605, "eval_samples_per_second": 477.488, "eval_steps_per_second": 59.724, "step": 6000 }, { "epoch": 2.96, "eval_accuracy": 0.7243793606758118, "eval_loss": 0.6836484670639038, "eval_runtime": 13.1511, "eval_samples_per_second": 477.829, "eval_steps_per_second": 59.767, "step": 6400 }, { "epoch": 3.0, "learning_rate": 3.498845265588915e-05, "loss": 0.5926, "step": 6500 }, { "epoch": 3.14, "eval_accuracy": 0.7196053266525269, "eval_loss": 0.718159019947052, "eval_runtime": 13.1531, "eval_samples_per_second": 477.757, "eval_steps_per_second": 59.758, "step": 6800 }, { "epoch": 3.23, "learning_rate": 3.38337182448037e-05, "loss": 0.4959, "step": 7000 }, { "epoch": 3.33, "eval_accuracy": 0.7237428426742554, "eval_loss": 0.7056543827056885, "eval_runtime": 13.1632, "eval_samples_per_second": 477.393, "eval_steps_per_second": 59.712, "step": 7200 }, { "epoch": 3.46, "learning_rate": 3.2678983833718243e-05, "loss": 0.4987, "step": 7500 }, { "epoch": 3.51, "eval_accuracy": 0.727402925491333, "eval_loss": 0.7022201418876648, "eval_runtime": 13.1685, "eval_samples_per_second": 477.2, "eval_steps_per_second": 59.688, "step": 7600 }, { "epoch": 3.7, "learning_rate": 3.1524249422632794e-05, "loss": 0.5086, "step": 8000 }, { "epoch": 3.7, "eval_accuracy": 0.7262889742851257, "eval_loss": 0.7084487676620483, "eval_runtime": 13.2394, "eval_samples_per_second": 474.643, "eval_steps_per_second": 59.368, "step": 8000 }, { "epoch": 3.7, "step": 8000, "total_flos": 8.793486080672256e+16, "train_loss": 0.6753087520599366, "train_runtime": 4657.837, "train_samples_per_second": 557.727, "train_steps_per_second": 4.648 } ], "max_steps": 21650, "num_train_epochs": 10, "total_flos": 8.793486080672256e+16, "trial_name": null, "trial_params": null }