{ "best_metric": 0.20898549258708954, "best_model_checkpoint": "deberta-v3-base-funetuned-qqa/checkpoint-2842", "epoch": 2.999736170961217, "global_step": 4263, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 1.912033779028853e-05, "loss": 0.3671, "step": 250 }, { "epoch": 0.35, "learning_rate": 1.824067558057706e-05, "loss": 0.2758, "step": 500 }, { "epoch": 0.53, "learning_rate": 1.736101337086559e-05, "loss": 0.2616, "step": 750 }, { "epoch": 0.7, "learning_rate": 1.6481351161154117e-05, "loss": 0.2469, "step": 1000 }, { "epoch": 0.88, "learning_rate": 1.5601688951442647e-05, "loss": 0.235, "step": 1250 }, { "epoch": 1.0, "eval_accuracy": 0.90576171875, "eval_binary_crossentropy_loss": 9.43606185913086, "eval_loss": 0.22764553129673004, "eval_runtime": 148.7776, "eval_samples_per_second": 271.748, "eval_steps_per_second": 16.985, "step": 1421 }, { "epoch": 1.06, "learning_rate": 1.4722026741731177e-05, "loss": 0.2228, "step": 1500 }, { "epoch": 1.23, "learning_rate": 1.3842364532019705e-05, "loss": 0.1956, "step": 1750 }, { "epoch": 1.41, "learning_rate": 1.2962702322308235e-05, "loss": 0.1994, "step": 2000 }, { "epoch": 1.58, "learning_rate": 1.2083040112596764e-05, "loss": 0.1957, "step": 2250 }, { "epoch": 1.76, "learning_rate": 1.1203377902885292e-05, "loss": 0.1893, "step": 2500 }, { "epoch": 1.94, "learning_rate": 1.0323715693173822e-05, "loss": 0.1908, "step": 2750 }, { "epoch": 2.0, "eval_accuracy": 0.91357421875, "eval_binary_crossentropy_loss": 8.624783515930176, "eval_loss": 0.20898549258708954, "eval_runtime": 148.8373, "eval_samples_per_second": 271.639, "eval_steps_per_second": 16.978, "step": 2842 }, { "epoch": 2.11, "learning_rate": 9.44405348346235e-06, "loss": 0.1706, "step": 3000 }, { "epoch": 2.29, "learning_rate": 8.56439127375088e-06, "loss": 0.1574, "step": 3250 }, { "epoch": 2.46, "learning_rate": 7.68472906403941e-06, "loss": 0.1573, "step": 3500 }, { "epoch": 2.64, "learning_rate": 6.808585503166785e-06, "loss": 0.1619, "step": 3750 }, { "epoch": 2.81, "learning_rate": 5.928923293455313e-06, "loss": 0.1574, "step": 4000 }, { "epoch": 2.99, "learning_rate": 5.049261083743843e-06, "loss": 0.1575, "step": 4250 }, { "epoch": 3.0, "eval_accuracy": 0.9169921875, "eval_binary_crossentropy_loss": 8.323027610778809, "eval_loss": 0.20993904769420624, "eval_runtime": 148.6942, "eval_samples_per_second": 271.9, "eval_steps_per_second": 16.995, "step": 4263 } ], "max_steps": 5684, "num_train_epochs": 4, "total_flos": 7.180021641700454e+16, "trial_name": null, "trial_params": null }