{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "global_step": 9564, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31, "learning_rate": 4.7386030949393564e-05, "loss": 1.4207, "step": 500 }, { "epoch": 0.31, "eval_accuracy": 0.9008888888888889, "eval_loss": 0.7066789269447327, "eval_runtime": 2.6965, "eval_samples_per_second": 3337.621, "eval_steps_per_second": 417.203, "step": 500 }, { "epoch": 0.63, "learning_rate": 4.477206189878712e-05, "loss": 0.5086, "step": 1000 }, { "epoch": 0.63, "eval_accuracy": 0.9516666666666667, "eval_loss": 0.3055577874183655, "eval_runtime": 2.6576, "eval_samples_per_second": 3386.509, "eval_steps_per_second": 423.314, "step": 1000 }, { "epoch": 0.94, "learning_rate": 4.215809284818068e-05, "loss": 0.2731, "step": 1500 }, { "epoch": 0.94, "eval_accuracy": 0.9648888888888889, "eval_loss": 0.18555375933647156, "eval_runtime": 2.6597, "eval_samples_per_second": 3383.793, "eval_steps_per_second": 422.974, "step": 1500 }, { "epoch": 1.25, "learning_rate": 3.954412379757424e-05, "loss": 0.1976, "step": 2000 }, { "epoch": 1.25, "eval_accuracy": 0.9701111111111111, "eval_loss": 0.14159560203552246, "eval_runtime": 2.715, "eval_samples_per_second": 3314.86, "eval_steps_per_second": 414.357, "step": 2000 }, { "epoch": 1.57, "learning_rate": 3.69301547469678e-05, "loss": 0.1565, "step": 2500 }, { "epoch": 1.57, "eval_accuracy": 0.9738888888888889, "eval_loss": 0.11081045866012573, "eval_runtime": 2.6963, "eval_samples_per_second": 3337.905, "eval_steps_per_second": 417.238, "step": 2500 }, { "epoch": 1.88, "learning_rate": 3.431618569636136e-05, "loss": 0.128, "step": 3000 }, { "epoch": 1.88, "eval_accuracy": 0.976, "eval_loss": 0.09747562557458878, "eval_runtime": 2.6961, "eval_samples_per_second": 3338.209, "eval_steps_per_second": 417.276, "step": 3000 }, { "epoch": 2.2, "learning_rate": 3.170221664575492e-05, "loss": 0.1133, "step": 3500 }, { "epoch": 2.2, "eval_accuracy": 0.9788888888888889, "eval_loss": 0.08474569022655487, "eval_runtime": 2.7245, "eval_samples_per_second": 3303.375, "eval_steps_per_second": 412.922, "step": 3500 }, { "epoch": 2.51, "learning_rate": 2.9088247595148475e-05, "loss": 0.1031, "step": 4000 }, { "epoch": 2.51, "eval_accuracy": 0.9804444444444445, "eval_loss": 0.07724875211715698, "eval_runtime": 2.6363, "eval_samples_per_second": 3413.847, "eval_steps_per_second": 426.731, "step": 4000 }, { "epoch": 2.82, "learning_rate": 2.6474278544542037e-05, "loss": 0.09, "step": 4500 }, { "epoch": 2.82, "eval_accuracy": 0.9818888888888889, "eval_loss": 0.0697416290640831, "eval_runtime": 2.6295, "eval_samples_per_second": 3422.689, "eval_steps_per_second": 427.836, "step": 4500 }, { "epoch": 3.14, "learning_rate": 2.386030949393559e-05, "loss": 0.0871, "step": 5000 }, { "epoch": 3.14, "eval_accuracy": 0.9815555555555555, "eval_loss": 0.066066212952137, "eval_runtime": 2.6946, "eval_samples_per_second": 3340.06, "eval_steps_per_second": 417.507, "step": 5000 }, { "epoch": 3.45, "learning_rate": 2.1246340443329153e-05, "loss": 0.0733, "step": 5500 }, { "epoch": 3.45, "eval_accuracy": 0.9822222222222222, "eval_loss": 0.06342040002346039, "eval_runtime": 2.6897, "eval_samples_per_second": 3346.09, "eval_steps_per_second": 418.261, "step": 5500 }, { "epoch": 3.76, "learning_rate": 1.863237139272271e-05, "loss": 0.0761, "step": 6000 }, { "epoch": 3.76, "eval_accuracy": 0.983, "eval_loss": 0.06072380393743515, "eval_runtime": 2.6938, "eval_samples_per_second": 3340.98, "eval_steps_per_second": 417.623, "step": 6000 }, { "epoch": 4.08, "learning_rate": 1.601840234211627e-05, "loss": 0.0739, "step": 6500 }, { "epoch": 4.08, "eval_accuracy": 0.9832222222222222, "eval_loss": 0.05795769765973091, "eval_runtime": 2.6767, "eval_samples_per_second": 3362.391, "eval_steps_per_second": 420.299, "step": 6500 }, { "epoch": 4.39, "learning_rate": 1.340443329150983e-05, "loss": 0.0643, "step": 7000 }, { "epoch": 4.39, "eval_accuracy": 0.9844444444444445, "eval_loss": 0.05685265362262726, "eval_runtime": 2.6876, "eval_samples_per_second": 3348.672, "eval_steps_per_second": 418.584, "step": 7000 }, { "epoch": 4.71, "learning_rate": 1.0790464240903388e-05, "loss": 0.0678, "step": 7500 }, { "epoch": 4.71, "eval_accuracy": 0.984, "eval_loss": 0.05617769435048103, "eval_runtime": 2.6484, "eval_samples_per_second": 3398.278, "eval_steps_per_second": 424.785, "step": 7500 }, { "epoch": 5.02, "learning_rate": 8.176495190296946e-06, "loss": 0.0617, "step": 8000 }, { "epoch": 5.02, "eval_accuracy": 0.9853333333333333, "eval_loss": 0.053985536098480225, "eval_runtime": 2.672, "eval_samples_per_second": 3368.244, "eval_steps_per_second": 421.03, "step": 8000 }, { "epoch": 5.33, "learning_rate": 5.562526139690506e-06, "loss": 0.0571, "step": 8500 }, { "epoch": 5.33, "eval_accuracy": 0.9847777777777778, "eval_loss": 0.05352585390210152, "eval_runtime": 2.7082, "eval_samples_per_second": 3323.274, "eval_steps_per_second": 415.409, "step": 8500 }, { "epoch": 5.65, "learning_rate": 2.9485570890840656e-06, "loss": 0.0608, "step": 9000 }, { "epoch": 5.65, "eval_accuracy": 0.9851111111111112, "eval_loss": 0.053133774548769, "eval_runtime": 2.6753, "eval_samples_per_second": 3364.134, "eval_steps_per_second": 420.517, "step": 9000 }, { "epoch": 5.96, "learning_rate": 3.345880384776244e-07, "loss": 0.0571, "step": 9500 }, { "epoch": 5.96, "eval_accuracy": 0.9847777777777778, "eval_loss": 0.05344167724251747, "eval_runtime": 2.6425, "eval_samples_per_second": 3405.863, "eval_steps_per_second": 425.733, "step": 9500 }, { "epoch": 6.0, "step": 9564, "total_flos": 264960533376000.0, "train_loss": 0.1922683648263396, "train_runtime": 134.4457, "train_samples_per_second": 2276.012, "train_steps_per_second": 71.137 } ], "max_steps": 9564, "num_train_epochs": 6, "total_flos": 264960533376000.0, "trial_name": null, "trial_params": null }