{ "best_metric": 0.9361986026141007, "best_model_checkpoint": "distilbert-base-uncased-finetuned-swiss_re_assignment-bodypart/run-7/checkpoint-3128", "epoch": 4.0, "eval_steps": 500, "global_step": 3128, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.64, "grad_norm": 3.043917655944824, "learning_rate": 1.1523885830392892e-05, "loss": 1.301, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.8952649216735153, "eval_loss": 0.44891732931137085, "eval_runtime": 4.2374, "eval_samples_per_second": 184.781, "eval_steps_per_second": 3.068, "step": 782 }, { "epoch": 1.28, "grad_norm": 46.61455154418945, "learning_rate": 9.83416650042326e-06, "loss": 0.4247, "step": 1000 }, { "epoch": 1.92, "grad_norm": 36.788780212402344, "learning_rate": 8.144447170453627e-06, "loss": 0.2417, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.9069869583316635, "eval_loss": 0.41448912024497986, "eval_runtime": 5.7752, "eval_samples_per_second": 135.58, "eval_steps_per_second": 2.251, "step": 1564 }, { "epoch": 2.56, "grad_norm": 0.9252055287361145, "learning_rate": 6.454727840483996e-06, "loss": 0.2104, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.9338136133834791, "eval_loss": 0.3658573627471924, "eval_runtime": 4.3869, "eval_samples_per_second": 178.488, "eval_steps_per_second": 2.963, "step": 2346 }, { "epoch": 3.2, "grad_norm": 0.9745433926582336, "learning_rate": 4.765008510514363e-06, "loss": 0.171, "step": 2500 }, { "epoch": 3.84, "grad_norm": 0.03158050402998924, "learning_rate": 3.075289180544731e-06, "loss": 0.1593, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.9361986026141007, "eval_loss": 0.3598783314228058, "eval_runtime": 4.2759, "eval_samples_per_second": 183.12, "eval_steps_per_second": 3.04, "step": 3128 } ], "logging_steps": 500, "max_steps": 3910, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 140988338859072.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "learning_rate": 1.3213605160362524e-05, "num_train_epochs": 5, "per_device_train_batch_size": 4, "seed": 10 } }