{ "best_metric": 0.6239899396896362, "best_model_checkpoint": "add_BERT_no_pretrain_mrpc/checkpoint-406", "epoch": 19.0, "global_step": 551, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.0004910344827586207, "loss": 1.154, "step": 29 }, { "epoch": 1.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6856198906898499, "eval_runtime": 0.6445, "eval_samples_per_second": 633.061, "eval_steps_per_second": 6.206, "step": 29 }, { "epoch": 2.0, "learning_rate": 0.0004810344827586207, "loss": 0.6781, "step": 58 }, { "epoch": 2.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6609018445014954, "eval_runtime": 0.634, "eval_samples_per_second": 643.583, "eval_steps_per_second": 6.31, "step": 58 }, { "epoch": 3.0, "learning_rate": 0.00047103448275862073, "loss": 0.6458, "step": 87 }, { "epoch": 3.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.634788990020752, "eval_runtime": 0.6427, "eval_samples_per_second": 634.811, "eval_steps_per_second": 6.224, "step": 87 }, { "epoch": 4.0, "learning_rate": 0.0004610344827586207, "loss": 0.6395, "step": 116 }, { "epoch": 4.0, "eval_accuracy": 0.31862745098039214, "eval_combined_score": 0.16288515406162463, "eval_f1": 0.0071428571428571435, "eval_loss": 19.635393142700195, "eval_runtime": 0.6439, "eval_samples_per_second": 633.614, "eval_steps_per_second": 6.212, "step": 116 }, { "epoch": 5.0, "learning_rate": 0.0004510344827586207, "loss": 1.1486, "step": 145 }, { "epoch": 5.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6657487750053406, "eval_runtime": 0.6436, "eval_samples_per_second": 633.968, "eval_steps_per_second": 6.215, "step": 145 }, { "epoch": 6.0, "learning_rate": 0.00044103448275862066, "loss": 0.6446, "step": 174 }, { "epoch": 6.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.627680778503418, "eval_runtime": 0.6427, "eval_samples_per_second": 634.78, "eval_steps_per_second": 6.223, "step": 174 }, { "epoch": 7.0, "learning_rate": 0.0004310344827586207, "loss": 0.644, "step": 203 }, { "epoch": 7.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6241939663887024, "eval_runtime": 0.6436, "eval_samples_per_second": 633.911, "eval_steps_per_second": 6.215, "step": 203 }, { "epoch": 8.0, "learning_rate": 0.0004210344827586207, "loss": 0.6337, "step": 232 }, { "epoch": 8.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6242448687553406, "eval_runtime": 0.6458, "eval_samples_per_second": 631.822, "eval_steps_per_second": 6.194, "step": 232 }, { "epoch": 9.0, "learning_rate": 0.0004110344827586207, "loss": 0.6388, "step": 261 }, { "epoch": 9.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6252579092979431, "eval_runtime": 0.6457, "eval_samples_per_second": 631.91, "eval_steps_per_second": 6.195, "step": 261 }, { "epoch": 10.0, "learning_rate": 0.0004010344827586207, "loss": 0.634, "step": 290 }, { "epoch": 10.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6241927742958069, "eval_runtime": 0.6462, "eval_samples_per_second": 631.361, "eval_steps_per_second": 6.19, "step": 290 }, { "epoch": 11.0, "learning_rate": 0.0003910344827586207, "loss": 0.6346, "step": 319 }, { "epoch": 11.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6264073848724365, "eval_runtime": 0.6465, "eval_samples_per_second": 631.046, "eval_steps_per_second": 6.187, "step": 319 }, { "epoch": 12.0, "learning_rate": 0.00038103448275862066, "loss": 0.6338, "step": 348 }, { "epoch": 12.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6273019909858704, "eval_runtime": 0.6464, "eval_samples_per_second": 631.183, "eval_steps_per_second": 6.188, "step": 348 }, { "epoch": 13.0, "learning_rate": 0.0003710344827586207, "loss": 0.6343, "step": 377 }, { "epoch": 13.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6262201070785522, "eval_runtime": 0.6462, "eval_samples_per_second": 631.42, "eval_steps_per_second": 6.19, "step": 377 }, { "epoch": 14.0, "learning_rate": 0.0003610344827586207, "loss": 0.6339, "step": 406 }, { "epoch": 14.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6239899396896362, "eval_runtime": 0.6481, "eval_samples_per_second": 629.552, "eval_steps_per_second": 6.172, "step": 406 }, { "epoch": 15.0, "learning_rate": 0.0003510344827586207, "loss": 0.635, "step": 435 }, { "epoch": 15.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6244243383407593, "eval_runtime": 0.6472, "eval_samples_per_second": 630.443, "eval_steps_per_second": 6.181, "step": 435 }, { "epoch": 16.0, "learning_rate": 0.0003410344827586207, "loss": 0.6331, "step": 464 }, { "epoch": 16.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6240312457084656, "eval_runtime": 0.6454, "eval_samples_per_second": 632.149, "eval_steps_per_second": 6.198, "step": 464 }, { "epoch": 17.0, "learning_rate": 0.0003310344827586207, "loss": 0.6328, "step": 493 }, { "epoch": 17.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6266641020774841, "eval_runtime": 0.646, "eval_samples_per_second": 631.583, "eval_steps_per_second": 6.192, "step": 493 }, { "epoch": 18.0, "learning_rate": 0.00032103448275862067, "loss": 0.6338, "step": 522 }, { "epoch": 18.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6256989240646362, "eval_runtime": 0.6466, "eval_samples_per_second": 630.959, "eval_steps_per_second": 6.186, "step": 522 }, { "epoch": 19.0, "learning_rate": 0.0003110344827586207, "loss": 0.6321, "step": 551 }, { "epoch": 19.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.623990535736084, "eval_runtime": 0.6488, "eval_samples_per_second": 628.807, "eval_steps_per_second": 6.165, "step": 551 }, { "epoch": 19.0, "step": 551, "total_flos": 9953687707320320.0, "train_loss": 0.6928667667340453, "train_runtime": 412.0299, "train_samples_per_second": 445.113, "train_steps_per_second": 3.519 } ], "max_steps": 1450, "num_train_epochs": 50, "total_flos": 9953687707320320.0, "trial_name": null, "trial_params": null }