{ "best_metric": 0.6942489743232727, "best_model_checkpoint": "add_BERT_no_pretrain_rte/checkpoint-20", "epoch": 6.0, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 3.9200000000000004e-05, "loss": 0.7731, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.6942489743232727, "eval_runtime": 0.5221, "eval_samples_per_second": 530.559, "eval_steps_per_second": 5.746, "step": 20 }, { "epoch": 2.0, "learning_rate": 3.8400000000000005e-05, "loss": 0.709, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.71894770860672, "eval_runtime": 0.5216, "eval_samples_per_second": 531.047, "eval_steps_per_second": 5.751, "step": 40 }, { "epoch": 3.0, "learning_rate": 3.76e-05, "loss": 0.7188, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6948172450065613, "eval_runtime": 0.5214, "eval_samples_per_second": 531.271, "eval_steps_per_second": 5.754, "step": 60 }, { "epoch": 4.0, "learning_rate": 3.680000000000001e-05, "loss": 0.7007, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.6980084180831909, "eval_runtime": 0.5256, "eval_samples_per_second": 527.056, "eval_steps_per_second": 5.708, "step": 80 }, { "epoch": 5.0, "learning_rate": 3.6e-05, "loss": 0.7048, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.5270758122743683, "eval_loss": 0.7017844319343567, "eval_runtime": 0.5223, "eval_samples_per_second": 530.325, "eval_steps_per_second": 5.744, "step": 100 }, { "epoch": 6.0, "learning_rate": 3.52e-05, "loss": 0.7065, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.4729241877256318, "eval_loss": 0.726894199848175, "eval_runtime": 0.5221, "eval_samples_per_second": 530.593, "eval_steps_per_second": 5.746, "step": 120 }, { "epoch": 6.0, "step": 120, "total_flos": 2133790084300800.0, "train_loss": 0.7188253561655681, "train_runtime": 120.124, "train_samples_per_second": 1036.429, "train_steps_per_second": 8.325 } ], "max_steps": 1000, "num_train_epochs": 50, "total_flos": 2133790084300800.0, "trial_name": null, "trial_params": null }