{ "best_metric": 0.029607130214571953, "best_model_checkpoint": "model/checkpoint-6500", "epoch": 2.138157894736842, "eval_steps": 500, "global_step": 6500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 4.7944078947368425e-05, "loss": 0.1494, "step": 500 }, { "epoch": 0.16, "eval_accuracy": 0.9837171052631579, "eval_loss": 0.0790267065167427, "eval_runtime": 113.0978, "eval_samples_per_second": 53.759, "eval_steps_per_second": 6.72, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.588815789473684e-05, "loss": 0.1072, "step": 1000 }, { "epoch": 0.33, "eval_accuracy": 0.9824013157894737, "eval_loss": 0.06455110013484955, "eval_runtime": 113.235, "eval_samples_per_second": 53.694, "eval_steps_per_second": 6.712, "step": 1000 }, { "epoch": 0.49, "learning_rate": 4.383223684210527e-05, "loss": 0.0765, "step": 1500 }, { "epoch": 0.49, "eval_accuracy": 0.9899671052631579, "eval_loss": 0.04877911135554314, "eval_runtime": 113.1524, "eval_samples_per_second": 53.733, "eval_steps_per_second": 6.717, "step": 1500 }, { "epoch": 0.66, "learning_rate": 4.177631578947369e-05, "loss": 0.0742, "step": 2000 }, { "epoch": 0.66, "eval_accuracy": 0.9886513157894737, "eval_loss": 0.054583221673965454, "eval_runtime": 113.2769, "eval_samples_per_second": 53.674, "eval_steps_per_second": 6.709, "step": 2000 }, { "epoch": 0.82, "learning_rate": 3.972039473684211e-05, "loss": 0.0748, "step": 2500 }, { "epoch": 0.82, "eval_accuracy": 0.990625, "eval_loss": 0.04383059963583946, "eval_runtime": 113.3147, "eval_samples_per_second": 53.656, "eval_steps_per_second": 6.707, "step": 2500 }, { "epoch": 0.99, "learning_rate": 3.7664473684210526e-05, "loss": 0.0437, "step": 3000 }, { "epoch": 0.99, "eval_accuracy": 0.9901315789473685, "eval_loss": 0.05416030064225197, "eval_runtime": 113.1123, "eval_samples_per_second": 53.752, "eval_steps_per_second": 6.719, "step": 3000 }, { "epoch": 1.15, "learning_rate": 3.560855263157895e-05, "loss": 0.0134, "step": 3500 }, { "epoch": 1.15, "eval_accuracy": 0.990953947368421, "eval_loss": 0.05749928951263428, "eval_runtime": 113.4787, "eval_samples_per_second": 53.578, "eval_steps_per_second": 6.697, "step": 3500 }, { "epoch": 1.32, "learning_rate": 3.355263157894737e-05, "loss": 0.0277, "step": 4000 }, { "epoch": 1.32, "eval_accuracy": 0.9904605263157895, "eval_loss": 0.06314379721879959, "eval_runtime": 113.4629, "eval_samples_per_second": 53.586, "eval_steps_per_second": 6.698, "step": 4000 }, { "epoch": 1.48, "learning_rate": 3.1496710526315794e-05, "loss": 0.0231, "step": 4500 }, { "epoch": 1.48, "eval_accuracy": 0.9912828947368421, "eval_loss": 0.04834901914000511, "eval_runtime": 113.5144, "eval_samples_per_second": 53.561, "eval_steps_per_second": 6.695, "step": 4500 }, { "epoch": 1.64, "learning_rate": 2.944078947368421e-05, "loss": 0.0243, "step": 5000 }, { "epoch": 1.64, "eval_accuracy": 0.9901315789473685, "eval_loss": 0.06605446338653564, "eval_runtime": 113.6185, "eval_samples_per_second": 53.512, "eval_steps_per_second": 6.689, "step": 5000 }, { "epoch": 1.81, "learning_rate": 2.7384868421052633e-05, "loss": 0.0232, "step": 5500 }, { "epoch": 1.81, "eval_accuracy": 0.9932565789473684, "eval_loss": 0.037462268024683, "eval_runtime": 112.9475, "eval_samples_per_second": 53.83, "eval_steps_per_second": 6.729, "step": 5500 }, { "epoch": 1.97, "learning_rate": 2.5328947368421052e-05, "loss": 0.0198, "step": 6000 }, { "epoch": 1.97, "eval_accuracy": 0.9945723684210527, "eval_loss": 0.034890029579401016, "eval_runtime": 112.9353, "eval_samples_per_second": 53.836, "eval_steps_per_second": 6.73, "step": 6000 }, { "epoch": 2.14, "learning_rate": 2.3273026315789475e-05, "loss": 0.0122, "step": 6500 }, { "epoch": 2.14, "eval_accuracy": 0.9962171052631579, "eval_loss": 0.029607130214571953, "eval_runtime": 112.9987, "eval_samples_per_second": 53.806, "eval_steps_per_second": 6.726, "step": 6500 } ], "logging_steps": 500, "max_steps": 12160, "num_train_epochs": 4, "save_steps": 500, "total_flos": 1.368177487872e+16, "trial_name": null, "trial_params": null }