{ "best_metric": 0.8666666666666667, "best_model_checkpoint": "./outputs/xlm-roberta-large-best-model/xnli_16_0.1_0.00001_04-05-23_12-26/checkpoint-73632", "epoch": 4.0, "global_step": 98176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 7.978761445522026e-06, "loss": 0.548, "step": 24544 }, { "epoch": 1.0, "eval_accuracy": 0.8526104417670682, "eval_loss": 0.4045264422893524, "eval_runtime": 85.4254, "eval_samples_per_second": 29.148, "eval_steps_per_second": 3.652, "step": 24544 }, { "epoch": 2.0, "learning_rate": 5.319174297014683e-06, "loss": 0.3739, "step": 49088 }, { "epoch": 2.0, "eval_accuracy": 0.8538152610441767, "eval_loss": 0.3931969106197357, "eval_runtime": 85.4189, "eval_samples_per_second": 29.15, "eval_steps_per_second": 3.653, "step": 49088 }, { "epoch": 3.0, "learning_rate": 2.6595871485073417e-06, "loss": 0.2739, "step": 73632 }, { "epoch": 3.0, "eval_accuracy": 0.8666666666666667, "eval_loss": 0.457296758890152, "eval_runtime": 85.1723, "eval_samples_per_second": 29.235, "eval_steps_per_second": 3.663, "step": 73632 }, { "epoch": 4.0, "learning_rate": 0.0, "loss": 0.2075, "step": 98176 }, { "epoch": 4.0, "eval_accuracy": 0.8626506024096385, "eval_loss": 0.5882326364517212, "eval_runtime": 85.1965, "eval_samples_per_second": 29.227, "eval_steps_per_second": 3.662, "step": 98176 }, { "epoch": 4.0, "step": 98176, "total_flos": 1.4638901871267594e+18, "train_loss": 0.3508187235567374, "train_runtime": 165856.5757, "train_samples_per_second": 9.471, "train_steps_per_second": 0.592 } ], "max_steps": 98176, "num_train_epochs": 4, "total_flos": 1.4638901871267594e+18, "trial_name": null, "trial_params": null }