{ "best_metric": 0.8354203935599285, "best_model_checkpoint": "outputs/bert-base-uncased_/checkpoint-565", "epoch": 5.0, "eval_steps": 500, "global_step": 565, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7477638640429338, "eval_loss": 0.705581784248352, "eval_runtime": 3.7666, "eval_samples_per_second": 148.408, "eval_steps_per_second": 4.779, "step": 113 }, { "epoch": 2.0, "eval_accuracy": 0.7692307692307693, "eval_loss": 0.561244547367096, "eval_runtime": 3.9311, "eval_samples_per_second": 142.2, "eval_steps_per_second": 4.579, "step": 226 }, { "epoch": 3.0, "eval_accuracy": 0.8032200357781754, "eval_loss": 0.48897784948349, "eval_runtime": 3.8551, "eval_samples_per_second": 145.003, "eval_steps_per_second": 4.669, "step": 339 }, { "epoch": 4.0, "eval_accuracy": 0.815742397137746, "eval_loss": 0.5680442452430725, "eval_runtime": 3.8496, "eval_samples_per_second": 145.21, "eval_steps_per_second": 4.676, "step": 452 }, { "epoch": 4.424778761061947, "grad_norm": 4.835990905761719, "learning_rate": 5.752212389380531e-06, "loss": 0.4786, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.8354203935599285, "eval_loss": 0.5574982166290283, "eval_runtime": 3.7884, "eval_samples_per_second": 147.554, "eval_steps_per_second": 4.751, "step": 565 }, { "epoch": 5.0, "step": 565, "total_flos": 1184668163320320.0, "train_loss": 0.43879880651963493, "train_runtime": 447.1428, "train_samples_per_second": 40.278, "train_steps_per_second": 1.264 } ], "logging_steps": 500, "max_steps": 565, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1184668163320320.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }