{ "best_metric": 70.84639498432603, "best_model_checkpoint": "outputs/bitfit/t5-base/superglue-wic/checkpoint-1700", "epoch": 20.0, "global_step": 3400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.59, "eval_accuracy": 64.57680250783699, "eval_average_metrics": 64.57680250783699, "eval_loss": 0.2697466313838959, "eval_runtime": 1.3478, "eval_samples_per_second": 236.682, "step": 100 }, { "epoch": 1.18, "eval_accuracy": 65.51724137931035, "eval_average_metrics": 65.51724137931035, "eval_loss": 0.3369597792625427, "eval_runtime": 1.5122, "eval_samples_per_second": 210.954, "step": 200 }, { "epoch": 1.76, "eval_accuracy": 67.39811912225704, "eval_average_metrics": 67.39811912225704, "eval_loss": 0.29254111647605896, "eval_runtime": 1.6714, "eval_samples_per_second": 190.86, "step": 300 }, { "epoch": 2.35, "eval_accuracy": 65.51724137931035, "eval_average_metrics": 65.51724137931035, "eval_loss": 0.30159813165664673, "eval_runtime": 1.399, "eval_samples_per_second": 228.019, "step": 400 }, { "epoch": 2.94, "learning_rate": 0.0002558823529411764, "loss": 0.2549, "step": 500 }, { "epoch": 2.94, "eval_accuracy": 69.90595611285266, "eval_average_metrics": 69.90595611285266, "eval_loss": 0.30272728204727173, "eval_runtime": 1.7009, "eval_samples_per_second": 187.551, "step": 500 }, { "epoch": 3.53, "eval_accuracy": 67.39811912225704, "eval_average_metrics": 67.39811912225704, "eval_loss": 0.3448639512062073, "eval_runtime": 1.6869, "eval_samples_per_second": 189.1, "step": 600 }, { "epoch": 4.12, "eval_accuracy": 69.27899686520375, "eval_average_metrics": 69.27899686520375, "eval_loss": 0.37638065218925476, "eval_runtime": 1.3564, "eval_samples_per_second": 235.188, "step": 700 }, { "epoch": 4.71, "eval_accuracy": 68.02507836990596, "eval_average_metrics": 68.02507836990596, "eval_loss": 0.3278528153896332, "eval_runtime": 1.436, "eval_samples_per_second": 222.137, "step": 800 }, { "epoch": 5.29, "eval_accuracy": 65.8307210031348, "eval_average_metrics": 65.8307210031348, "eval_loss": 0.37711796164512634, "eval_runtime": 1.7367, "eval_samples_per_second": 183.683, "step": 900 }, { "epoch": 5.88, "learning_rate": 0.00021176470588235295, "loss": 0.1531, "step": 1000 }, { "epoch": 5.88, "eval_accuracy": 68.96551724137932, "eval_average_metrics": 68.96551724137932, "eval_loss": 0.37187883257865906, "eval_runtime": 1.5128, "eval_samples_per_second": 210.871, "step": 1000 }, { "epoch": 6.47, "eval_accuracy": 66.14420062695925, "eval_average_metrics": 66.14420062695925, "eval_loss": 0.4265880882740021, "eval_runtime": 1.6439, "eval_samples_per_second": 194.053, "step": 1100 }, { "epoch": 7.06, "eval_accuracy": 67.0846394984326, "eval_average_metrics": 67.0846394984326, "eval_loss": 0.4190385937690735, "eval_runtime": 1.694, "eval_samples_per_second": 188.316, "step": 1200 }, { "epoch": 7.65, "eval_accuracy": 67.7115987460815, "eval_average_metrics": 67.7115987460815, "eval_loss": 0.4279927611351013, "eval_runtime": 1.6705, "eval_samples_per_second": 190.965, "step": 1300 }, { "epoch": 8.24, "eval_accuracy": 68.96551724137932, "eval_average_metrics": 68.96551724137932, "eval_loss": 0.4631330370903015, "eval_runtime": 1.6505, "eval_samples_per_second": 193.272, "step": 1400 }, { "epoch": 8.82, "learning_rate": 0.0001676470588235294, "loss": 0.0995, "step": 1500 }, { "epoch": 8.82, "eval_accuracy": 67.7115987460815, "eval_average_metrics": 67.7115987460815, "eval_loss": 0.46376463770866394, "eval_runtime": 1.4266, "eval_samples_per_second": 223.612, "step": 1500 }, { "epoch": 9.41, "eval_accuracy": 68.96551724137932, "eval_average_metrics": 68.96551724137932, "eval_loss": 0.509791374206543, "eval_runtime": 1.5279, "eval_samples_per_second": 208.785, "step": 1600 }, { "epoch": 10.0, "eval_accuracy": 70.84639498432603, "eval_average_metrics": 70.84639498432603, "eval_loss": 0.4690648913383484, "eval_runtime": 1.3636, "eval_samples_per_second": 233.934, "step": 1700 }, { "epoch": 10.59, "eval_accuracy": 69.27899686520375, "eval_average_metrics": 69.27899686520375, "eval_loss": 0.5513712763786316, "eval_runtime": 1.6761, "eval_samples_per_second": 190.323, "step": 1800 }, { "epoch": 11.18, "eval_accuracy": 68.65203761755487, "eval_average_metrics": 68.65203761755487, "eval_loss": 0.5640075206756592, "eval_runtime": 1.498, "eval_samples_per_second": 212.948, "step": 1900 }, { "epoch": 11.76, "learning_rate": 0.00012352941176470587, "loss": 0.0627, "step": 2000 }, { "epoch": 11.76, "eval_accuracy": 68.3385579937304, "eval_average_metrics": 68.3385579937304, "eval_loss": 0.5400956273078918, "eval_runtime": 1.7505, "eval_samples_per_second": 182.237, "step": 2000 }, { "epoch": 12.35, "eval_accuracy": 66.77115987460816, "eval_average_metrics": 66.77115987460816, "eval_loss": 0.6046501994132996, "eval_runtime": 1.6979, "eval_samples_per_second": 187.882, "step": 2100 }, { "epoch": 12.94, "eval_accuracy": 70.53291536050156, "eval_average_metrics": 70.53291536050156, "eval_loss": 0.6463834047317505, "eval_runtime": 1.3771, "eval_samples_per_second": 231.652, "step": 2200 }, { "epoch": 13.53, "eval_accuracy": 69.27899686520375, "eval_average_metrics": 69.27899686520375, "eval_loss": 0.6423152089118958, "eval_runtime": 1.6672, "eval_samples_per_second": 191.343, "step": 2300 }, { "epoch": 14.12, "eval_accuracy": 68.96551724137932, "eval_average_metrics": 68.96551724137932, "eval_loss": 0.6988397240638733, "eval_runtime": 1.6913, "eval_samples_per_second": 188.613, "step": 2400 }, { "epoch": 14.71, "learning_rate": 7.941176470588235e-05, "loss": 0.037, "step": 2500 }, { "epoch": 14.71, "eval_accuracy": 68.3385579937304, "eval_average_metrics": 68.3385579937304, "eval_loss": 0.7036728858947754, "eval_runtime": 1.7114, "eval_samples_per_second": 186.402, "step": 2500 }, { "epoch": 15.29, "eval_accuracy": 68.65203761755487, "eval_average_metrics": 68.65203761755487, "eval_loss": 0.6903794407844543, "eval_runtime": 1.6091, "eval_samples_per_second": 198.248, "step": 2600 }, { "epoch": 15.88, "eval_accuracy": 68.96551724137932, "eval_average_metrics": 68.96551724137932, "eval_loss": 0.7062700390815735, "eval_runtime": 1.7338, "eval_samples_per_second": 183.989, "step": 2700 }, { "epoch": 16.47, "eval_accuracy": 70.21943573667711, "eval_average_metrics": 70.21943573667711, "eval_loss": 0.7178221940994263, "eval_runtime": 1.3196, "eval_samples_per_second": 241.74, "step": 2800 }, { "epoch": 17.06, "eval_accuracy": 68.96551724137932, "eval_average_metrics": 68.96551724137932, "eval_loss": 0.7901595234870911, "eval_runtime": 1.6693, "eval_samples_per_second": 191.103, "step": 2900 }, { "epoch": 17.65, "learning_rate": 3.529411764705882e-05, "loss": 0.0252, "step": 3000 }, { "epoch": 17.65, "eval_accuracy": 69.27899686520375, "eval_average_metrics": 69.27899686520375, "eval_loss": 0.8428929448127747, "eval_runtime": 1.6724, "eval_samples_per_second": 190.74, "step": 3000 }, { "epoch": 18.24, "eval_accuracy": 68.96551724137932, "eval_average_metrics": 68.96551724137932, "eval_loss": 0.824032187461853, "eval_runtime": 1.1559, "eval_samples_per_second": 275.979, "step": 3100 }, { "epoch": 18.82, "eval_accuracy": 69.27899686520375, "eval_average_metrics": 69.27899686520375, "eval_loss": 0.8500473499298096, "eval_runtime": 1.7687, "eval_samples_per_second": 180.362, "step": 3200 }, { "epoch": 19.41, "eval_accuracy": 68.3385579937304, "eval_average_metrics": 68.3385579937304, "eval_loss": 0.8623896241188049, "eval_runtime": 1.7067, "eval_samples_per_second": 186.905, "step": 3300 }, { "epoch": 20.0, "eval_accuracy": 68.3385579937304, "eval_average_metrics": 68.3385579937304, "eval_loss": 0.8675826787948608, "eval_runtime": 1.7047, "eval_samples_per_second": 187.128, "step": 3400 }, { "epoch": 20.0, "step": 3400, "total_flos": 7227776409587712.0, "train_loss": 0.0949506963000578, "train_runtime": 1454.9221, "train_samples_per_second": 74.616, "train_steps_per_second": 2.337 } ], "max_steps": 3400, "num_train_epochs": 20, "total_flos": 7227776409587712.0, "trial_name": null, "trial_params": null }