{ "best_metric": 0.34253132343292236, "best_model_checkpoint": "sa_BERT_48_qqp/checkpoint-11373", "epoch": 8.0, "global_step": 30328, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.4679, "step": 3791 }, { "epoch": 1.0, "eval_accuracy": 0.8222359633935197, "eval_combined_score": 0.796364668588026, "eval_f1": 0.7704933737825324, "eval_loss": 0.3794594705104828, "eval_runtime": 85.3071, "eval_samples_per_second": 473.935, "eval_steps_per_second": 4.947, "step": 3791 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.3469, "step": 7582 }, { "epoch": 2.0, "eval_accuracy": 0.8446697996537225, "eval_combined_score": 0.8204728353358471, "eval_f1": 0.7962758710179718, "eval_loss": 0.35802027583122253, "eval_runtime": 85.9214, "eval_samples_per_second": 470.546, "eval_steps_per_second": 4.911, "step": 7582 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.2868, "step": 11373 }, { "epoch": 3.0, "eval_accuracy": 0.8510017313875835, "eval_combined_score": 0.8253212608245042, "eval_f1": 0.799640790261425, "eval_loss": 0.34253132343292236, "eval_runtime": 85.8338, "eval_samples_per_second": 471.026, "eval_steps_per_second": 4.916, "step": 11373 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.2372, "step": 15164 }, { "epoch": 4.0, "eval_accuracy": 0.8560969577046748, "eval_combined_score": 0.8354877050200858, "eval_f1": 0.8148784523354969, "eval_loss": 0.37059277296066284, "eval_runtime": 85.9472, "eval_samples_per_second": 470.405, "eval_steps_per_second": 4.91, "step": 15164 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.1938, "step": 18955 }, { "epoch": 5.0, "eval_accuracy": 0.8624783576552065, "eval_combined_score": 0.8410731175925228, "eval_f1": 0.819667877529839, "eval_loss": 0.36789774894714355, "eval_runtime": 85.3085, "eval_samples_per_second": 473.927, "eval_steps_per_second": 4.947, "step": 18955 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.1567, "step": 22746 }, { "epoch": 6.0, "eval_accuracy": 0.8639376700469948, "eval_combined_score": 0.8426697862913135, "eval_f1": 0.8214019025356321, "eval_loss": 0.42459815740585327, "eval_runtime": 85.3092, "eval_samples_per_second": 473.923, "eval_steps_per_second": 4.947, "step": 22746 }, { "epoch": 7.0, "learning_rate": 3.44e-05, "loss": 0.1294, "step": 26537 }, { "epoch": 7.0, "eval_accuracy": 0.8585456344298789, "eval_combined_score": 0.8387393361321469, "eval_f1": 0.8189330378344151, "eval_loss": 0.40472903847694397, "eval_runtime": 85.2488, "eval_samples_per_second": 474.259, "eval_steps_per_second": 4.95, "step": 26537 }, { "epoch": 8.0, "learning_rate": 3.3600000000000004e-05, "loss": 0.1059, "step": 30328 }, { "epoch": 8.0, "eval_accuracy": 0.8579025476131585, "eval_combined_score": 0.8380177209875599, "eval_f1": 0.8181328943619613, "eval_loss": 0.5063081383705139, "eval_runtime": 85.299, "eval_samples_per_second": 473.98, "eval_steps_per_second": 4.947, "step": 30328 }, { "epoch": 8.0, "step": 30328, "total_flos": 3.8292763616005325e+17, "train_loss": 0.24057756895378754, "train_runtime": 19188.7559, "train_samples_per_second": 948.071, "train_steps_per_second": 9.878 } ], "max_steps": 189550, "num_train_epochs": 50, "total_flos": 3.8292763616005325e+17, "trial_name": null, "trial_params": null }