{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9.55e-06, "loss": 0.1171, "step": 10 }, { "epoch": 0.05, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.1231689453125, "eval_runtime": 25.5746, "eval_samples_per_second": 7.82, "eval_steps_per_second": 1.955, "step": 10 }, { "epoch": 0.1, "learning_rate": 9.100000000000001e-06, "loss": 0.2892, "step": 20 }, { "epoch": 0.1, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.1242372915148735, "eval_runtime": 11.4531, "eval_samples_per_second": 17.463, "eval_steps_per_second": 4.366, "step": 20 }, { "epoch": 0.15, "learning_rate": 8.65e-06, "loss": 0.1402, "step": 30 }, { "epoch": 0.15, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.12265077233314514, "eval_runtime": 11.5293, "eval_samples_per_second": 17.347, "eval_steps_per_second": 4.337, "step": 30 }, { "epoch": 0.2, "learning_rate": 8.15e-06, "loss": 0.0031, "step": 40 }, { "epoch": 0.2, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.11888507753610611, "eval_runtime": 10.4745, "eval_samples_per_second": 19.094, "eval_steps_per_second": 4.774, "step": 40 }, { "epoch": 0.25, "learning_rate": 7.650000000000001e-06, "loss": 0.0396, "step": 50 }, { "epoch": 0.25, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.114516980946064, "eval_runtime": 12.1068, "eval_samples_per_second": 16.52, "eval_steps_per_second": 4.13, "step": 50 }, { "epoch": 0.3, "learning_rate": 7.15e-06, "loss": 0.002, "step": 60 }, { "epoch": 0.3, "eval_accuracy": 0.9850000143051147, "eval_loss": 0.0992085188627243, "eval_runtime": 14.5419, "eval_samples_per_second": 13.753, "eval_steps_per_second": 3.438, "step": 60 }, { "epoch": 0.35, "learning_rate": 6.650000000000001e-06, "loss": 0.0017, "step": 70 }, { "epoch": 0.35, "eval_accuracy": 0.9850000143051147, "eval_loss": 0.0962493047118187, "eval_runtime": 11.2088, "eval_samples_per_second": 17.843, "eval_steps_per_second": 4.461, "step": 70 }, { "epoch": 0.4, "learning_rate": 6.15e-06, "loss": 0.2019, "step": 80 }, { "epoch": 0.4, "eval_accuracy": 0.9850000143051147, "eval_loss": 0.09745711088180542, "eval_runtime": 11.6004, "eval_samples_per_second": 17.241, "eval_steps_per_second": 4.31, "step": 80 }, { "epoch": 0.45, "learning_rate": 5.65e-06, "loss": 0.3375, "step": 90 }, { "epoch": 0.45, "eval_accuracy": 0.9850000143051147, "eval_loss": 0.10359522700309753, "eval_runtime": 11.2369, "eval_samples_per_second": 17.798, "eval_steps_per_second": 4.45, "step": 90 }, { "epoch": 0.5, "learning_rate": 5.150000000000001e-06, "loss": 0.1627, "step": 100 }, { "epoch": 0.5, "eval_accuracy": 0.9850000143051147, "eval_loss": 0.10511761903762817, "eval_runtime": 16.5862, "eval_samples_per_second": 12.058, "eval_steps_per_second": 3.015, "step": 100 }, { "epoch": 0.55, "learning_rate": 4.65e-06, "loss": 0.0135, "step": 110 }, { "epoch": 0.55, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.10832720994949341, "eval_runtime": 12.361, "eval_samples_per_second": 16.18, "eval_steps_per_second": 4.045, "step": 110 }, { "epoch": 0.6, "learning_rate": 4.15e-06, "loss": 0.3365, "step": 120 }, { "epoch": 0.6, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.10196716338396072, "eval_runtime": 11.3569, "eval_samples_per_second": 17.61, "eval_steps_per_second": 4.403, "step": 120 }, { "epoch": 0.65, "learning_rate": 3.65e-06, "loss": 0.0401, "step": 130 }, { "epoch": 0.65, "eval_accuracy": 0.9850000143051147, "eval_loss": 0.0968363955616951, "eval_runtime": 11.6857, "eval_samples_per_second": 17.115, "eval_steps_per_second": 4.279, "step": 130 }, { "epoch": 0.7, "learning_rate": 3.1500000000000003e-06, "loss": 0.5678, "step": 140 }, { "epoch": 0.7, "eval_accuracy": 0.9850000143051147, "eval_loss": 0.09780912101268768, "eval_runtime": 11.6042, "eval_samples_per_second": 17.235, "eval_steps_per_second": 4.309, "step": 140 }, { "epoch": 0.75, "learning_rate": 2.6500000000000005e-06, "loss": 0.2519, "step": 150 }, { "epoch": 0.75, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.10378458350896835, "eval_runtime": 11.6217, "eval_samples_per_second": 17.209, "eval_steps_per_second": 4.302, "step": 150 }, { "epoch": 0.8, "learning_rate": 2.15e-06, "loss": 0.0373, "step": 160 }, { "epoch": 0.8, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.11212433129549026, "eval_runtime": 11.5639, "eval_samples_per_second": 17.295, "eval_steps_per_second": 4.324, "step": 160 }, { "epoch": 0.85, "learning_rate": 1.6500000000000003e-06, "loss": 0.2023, "step": 170 }, { "epoch": 0.85, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.1162676215171814, "eval_runtime": 10.3566, "eval_samples_per_second": 19.311, "eval_steps_per_second": 4.828, "step": 170 }, { "epoch": 0.9, "learning_rate": 1.1500000000000002e-06, "loss": 0.0061, "step": 180 }, { "epoch": 0.9, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.11709693819284439, "eval_runtime": 10.4736, "eval_samples_per_second": 19.096, "eval_steps_per_second": 4.774, "step": 180 }, { "epoch": 0.95, "learning_rate": 6.5e-07, "loss": 0.0844, "step": 190 }, { "epoch": 0.95, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.115566685795784, "eval_runtime": 11.1708, "eval_samples_per_second": 17.904, "eval_steps_per_second": 4.476, "step": 190 }, { "epoch": 1.0, "learning_rate": 1.5000000000000002e-07, "loss": 0.0854, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.9800000190734863, "eval_loss": 0.1153903380036354, "eval_runtime": 11.4553, "eval_samples_per_second": 17.459, "eval_steps_per_second": 4.365, "step": 200 }, { "epoch": 1.0, "step": 200, "total_flos": 5.769435596456942e+16, "train_loss": 0.14601652294397355, "train_runtime": 502.1897, "train_samples_per_second": 1.593, "train_steps_per_second": 0.398 } ], "max_steps": 200, "num_train_epochs": 1, "total_flos": 5.769435596456942e+16, "trial_name": null, "trial_params": null }