{ "best_metric": 0.4382284382284382, "best_model_checkpoint": "convnextv2-base-1k-224-for-pre_evaluation/checkpoint-268", "epoch": 27.906976744186046, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.93, "learning_rate": 1.6666666666666667e-05, "loss": 1.5952, "step": 10 }, { "epoch": 0.93, "eval_accuracy": 0.29603729603729606, "eval_loss": 1.5510554313659668, "eval_runtime": 8.4324, "eval_samples_per_second": 50.875, "eval_steps_per_second": 1.66, "step": 10 }, { "epoch": 1.86, "learning_rate": 3.3333333333333335e-05, "loss": 1.5238, "step": 20 }, { "epoch": 1.95, "eval_accuracy": 0.34265734265734266, "eval_loss": 1.5091006755828857, "eval_runtime": 8.9811, "eval_samples_per_second": 47.767, "eval_steps_per_second": 1.559, "step": 21 }, { "epoch": 2.79, "learning_rate": 5e-05, "loss": 1.4881, "step": 30 }, { "epoch": 2.98, "eval_accuracy": 0.34498834498834496, "eval_loss": 1.4853538274765015, "eval_runtime": 8.6991, "eval_samples_per_second": 49.315, "eval_steps_per_second": 1.609, "step": 32 }, { "epoch": 3.72, "learning_rate": 4.814814814814815e-05, "loss": 1.4708, "step": 40 }, { "epoch": 4.0, "eval_accuracy": 0.3473193473193473, "eval_loss": 1.4616328477859497, "eval_runtime": 8.3701, "eval_samples_per_second": 51.254, "eval_steps_per_second": 1.673, "step": 43 }, { "epoch": 4.65, "learning_rate": 4.62962962962963e-05, "loss": 1.4361, "step": 50 }, { "epoch": 4.93, "eval_accuracy": 0.34498834498834496, "eval_loss": 1.4416619539260864, "eval_runtime": 8.7032, "eval_samples_per_second": 49.292, "eval_steps_per_second": 1.609, "step": 53 }, { "epoch": 5.58, "learning_rate": 4.4444444444444447e-05, "loss": 1.3764, "step": 60 }, { "epoch": 5.95, "eval_accuracy": 0.3752913752913753, "eval_loss": 1.4134629964828491, "eval_runtime": 8.9398, "eval_samples_per_second": 47.988, "eval_steps_per_second": 1.566, "step": 64 }, { "epoch": 6.51, "learning_rate": 4.259259259259259e-05, "loss": 1.3333, "step": 70 }, { "epoch": 6.98, "eval_accuracy": 0.3986013986013986, "eval_loss": 1.3822472095489502, "eval_runtime": 8.7499, "eval_samples_per_second": 49.029, "eval_steps_per_second": 1.6, "step": 75 }, { "epoch": 7.44, "learning_rate": 4.074074074074074e-05, "loss": 1.3296, "step": 80 }, { "epoch": 8.0, "eval_accuracy": 0.36363636363636365, "eval_loss": 1.4111592769622803, "eval_runtime": 8.8496, "eval_samples_per_second": 48.477, "eval_steps_per_second": 1.582, "step": 86 }, { "epoch": 8.37, "learning_rate": 3.888888888888889e-05, "loss": 1.2798, "step": 90 }, { "epoch": 8.93, "eval_accuracy": 0.38927738927738925, "eval_loss": 1.4038037061691284, "eval_runtime": 8.5853, "eval_samples_per_second": 49.969, "eval_steps_per_second": 1.631, "step": 96 }, { "epoch": 9.3, "learning_rate": 3.7037037037037037e-05, "loss": 1.3129, "step": 100 }, { "epoch": 9.95, "eval_accuracy": 0.3776223776223776, "eval_loss": 1.424072265625, "eval_runtime": 8.6578, "eval_samples_per_second": 49.551, "eval_steps_per_second": 1.617, "step": 107 }, { "epoch": 10.23, "learning_rate": 3.518518518518519e-05, "loss": 1.3014, "step": 110 }, { "epoch": 10.98, "eval_accuracy": 0.38927738927738925, "eval_loss": 1.356952428817749, "eval_runtime": 8.5422, "eval_samples_per_second": 50.221, "eval_steps_per_second": 1.639, "step": 118 }, { "epoch": 11.16, "learning_rate": 3.3333333333333335e-05, "loss": 1.2332, "step": 120 }, { "epoch": 12.0, "eval_accuracy": 0.38927738927738925, "eval_loss": 1.4072706699371338, "eval_runtime": 8.3607, "eval_samples_per_second": 51.312, "eval_steps_per_second": 1.675, "step": 129 }, { "epoch": 12.09, "learning_rate": 3.148148148148148e-05, "loss": 1.212, "step": 130 }, { "epoch": 12.93, "eval_accuracy": 0.40326340326340326, "eval_loss": 1.376956820487976, "eval_runtime": 8.3675, "eval_samples_per_second": 51.27, "eval_steps_per_second": 1.673, "step": 139 }, { "epoch": 13.02, "learning_rate": 2.962962962962963e-05, "loss": 1.1844, "step": 140 }, { "epoch": 13.95, "learning_rate": 2.777777777777778e-05, "loss": 1.1763, "step": 150 }, { "epoch": 13.95, "eval_accuracy": 0.3962703962703963, "eval_loss": 1.3891488313674927, "eval_runtime": 8.5717, "eval_samples_per_second": 50.049, "eval_steps_per_second": 1.633, "step": 150 }, { "epoch": 14.88, "learning_rate": 2.5925925925925925e-05, "loss": 1.124, "step": 160 }, { "epoch": 14.98, "eval_accuracy": 0.4125874125874126, "eval_loss": 1.3915237188339233, "eval_runtime": 8.5638, "eval_samples_per_second": 50.095, "eval_steps_per_second": 1.635, "step": 161 }, { "epoch": 15.81, "learning_rate": 2.4074074074074074e-05, "loss": 1.0963, "step": 170 }, { "epoch": 16.0, "eval_accuracy": 0.4149184149184149, "eval_loss": 1.4098657369613647, "eval_runtime": 8.8116, "eval_samples_per_second": 48.686, "eval_steps_per_second": 1.589, "step": 172 }, { "epoch": 16.74, "learning_rate": 2.2222222222222223e-05, "loss": 1.0547, "step": 180 }, { "epoch": 16.93, "eval_accuracy": 0.40326340326340326, "eval_loss": 1.4206278324127197, "eval_runtime": 8.7717, "eval_samples_per_second": 48.907, "eval_steps_per_second": 1.596, "step": 182 }, { "epoch": 17.67, "learning_rate": 2.037037037037037e-05, "loss": 1.0631, "step": 190 }, { "epoch": 17.95, "eval_accuracy": 0.4195804195804196, "eval_loss": 1.4040827751159668, "eval_runtime": 8.3983, "eval_samples_per_second": 51.082, "eval_steps_per_second": 1.667, "step": 193 }, { "epoch": 18.6, "learning_rate": 1.8518518518518518e-05, "loss": 0.9911, "step": 200 }, { "epoch": 18.98, "eval_accuracy": 0.4149184149184149, "eval_loss": 1.4271957874298096, "eval_runtime": 8.2919, "eval_samples_per_second": 51.737, "eval_steps_per_second": 1.688, "step": 204 }, { "epoch": 19.53, "learning_rate": 1.6666666666666667e-05, "loss": 1.005, "step": 210 }, { "epoch": 20.0, "eval_accuracy": 0.4219114219114219, "eval_loss": 1.42105233669281, "eval_runtime": 8.2769, "eval_samples_per_second": 51.831, "eval_steps_per_second": 1.691, "step": 215 }, { "epoch": 20.47, "learning_rate": 1.4814814814814815e-05, "loss": 0.9663, "step": 220 }, { "epoch": 20.93, "eval_accuracy": 0.40093240093240096, "eval_loss": 1.466171145439148, "eval_runtime": 9.4718, "eval_samples_per_second": 45.292, "eval_steps_per_second": 1.478, "step": 225 }, { "epoch": 21.4, "learning_rate": 1.2962962962962962e-05, "loss": 0.9533, "step": 230 }, { "epoch": 21.95, "eval_accuracy": 0.43356643356643354, "eval_loss": 1.428614616394043, "eval_runtime": 8.343, "eval_samples_per_second": 51.42, "eval_steps_per_second": 1.678, "step": 236 }, { "epoch": 22.33, "learning_rate": 1.1111111111111112e-05, "loss": 0.9506, "step": 240 }, { "epoch": 22.98, "eval_accuracy": 0.43123543123543123, "eval_loss": 1.413465976715088, "eval_runtime": 8.7694, "eval_samples_per_second": 48.92, "eval_steps_per_second": 1.596, "step": 247 }, { "epoch": 23.26, "learning_rate": 9.259259259259259e-06, "loss": 0.8973, "step": 250 }, { "epoch": 24.0, "eval_accuracy": 0.42657342657342656, "eval_loss": 1.442847728729248, "eval_runtime": 8.7464, "eval_samples_per_second": 49.049, "eval_steps_per_second": 1.601, "step": 258 }, { "epoch": 24.19, "learning_rate": 7.4074074074074075e-06, "loss": 0.8807, "step": 260 }, { "epoch": 24.93, "eval_accuracy": 0.4382284382284382, "eval_loss": 1.447946548461914, "eval_runtime": 8.3339, "eval_samples_per_second": 51.476, "eval_steps_per_second": 1.68, "step": 268 }, { "epoch": 25.12, "learning_rate": 5.555555555555556e-06, "loss": 0.8731, "step": 270 }, { "epoch": 25.95, "eval_accuracy": 0.4289044289044289, "eval_loss": 1.4429428577423096, "eval_runtime": 8.7998, "eval_samples_per_second": 48.751, "eval_steps_per_second": 1.591, "step": 279 }, { "epoch": 26.05, "learning_rate": 3.7037037037037037e-06, "loss": 0.8366, "step": 280 }, { "epoch": 26.98, "learning_rate": 1.8518518518518519e-06, "loss": 0.8472, "step": 290 }, { "epoch": 26.98, "eval_accuracy": 0.43123543123543123, "eval_loss": 1.4461231231689453, "eval_runtime": 8.7766, "eval_samples_per_second": 48.88, "eval_steps_per_second": 1.595, "step": 290 }, { "epoch": 27.91, "learning_rate": 0.0, "loss": 0.8348, "step": 300 }, { "epoch": 27.91, "eval_accuracy": 0.43356643356643354, "eval_loss": 1.453087568283081, "eval_runtime": 8.3523, "eval_samples_per_second": 51.363, "eval_steps_per_second": 1.676, "step": 300 }, { "epoch": 27.91, "step": 300, "total_flos": 3.007291871298355e+18, "train_loss": 1.1542485936482747, "train_runtime": 2419.37, "train_samples_per_second": 16.864, "train_steps_per_second": 0.124 } ], "logging_steps": 10, "max_steps": 300, "num_train_epochs": 30, "save_steps": 500, "total_flos": 3.007291871298355e+18, "trial_name": null, "trial_params": null }