{ "best_metric": 0.7581227436823105, "best_model_checkpoint": "./fine-tune/roberta-base/rte/checkpoint-780", "epoch": 8.0, "global_step": 1248, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.7072076797485352, "eval_runtime": 0.4902, "eval_samples_per_second": 565.102, "eval_steps_per_second": 71.403, "step": 156 }, { "epoch": 2.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6958328485488892, "eval_runtime": 0.689, "eval_samples_per_second": 402.039, "eval_steps_per_second": 50.799, "step": 312 }, { "epoch": 3.0, "eval_accuracy": 0.6462093862815884, "eval_loss": 0.6193148493766785, "eval_runtime": 0.5121, "eval_samples_per_second": 540.927, "eval_steps_per_second": 68.348, "step": 468 }, { "epoch": 3.21, "learning_rate": 1.446111869031378e-05, "loss": 0.6759, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.7075812274368231, "eval_loss": 0.6046352386474609, "eval_runtime": 0.5274, "eval_samples_per_second": 525.205, "eval_steps_per_second": 66.362, "step": 624 }, { "epoch": 5.0, "eval_accuracy": 0.7581227436823105, "eval_loss": 0.6365415453910828, "eval_runtime": 0.681, "eval_samples_per_second": 406.732, "eval_steps_per_second": 51.392, "step": 780 }, { "epoch": 6.0, "eval_accuracy": 0.7545126353790613, "eval_loss": 0.897487223148346, "eval_runtime": 0.6584, "eval_samples_per_second": 420.704, "eval_steps_per_second": 53.157, "step": 936 }, { "epoch": 6.41, "learning_rate": 7.639836289222374e-06, "loss": 0.3194, "step": 1000 }, { "epoch": 7.0, "eval_accuracy": 0.7581227436823105, "eval_loss": 1.2031357288360596, "eval_runtime": 0.5817, "eval_samples_per_second": 476.173, "eval_steps_per_second": 60.166, "step": 1092 }, { "epoch": 8.0, "eval_accuracy": 0.7581227436823105, "eval_loss": 1.2942094802856445, "eval_runtime": 0.6818, "eval_samples_per_second": 406.254, "eval_steps_per_second": 51.332, "step": 1248 }, { "epoch": 8.0, "step": 1248, "total_flos": 1310293055692800.0, "train_loss": 0.42912168074876833, "train_runtime": 173.8183, "train_samples_per_second": 143.253, "train_steps_per_second": 8.975 } ], "max_steps": 1560, "num_train_epochs": 10, "total_flos": 1310293055692800.0, "trial_name": null, "trial_params": null }