|
{ |
|
"best_metric": 0.4382284382284382, |
|
"best_model_checkpoint": "convnextv2-base-1k-224-for-pre_evaluation/checkpoint-268", |
|
"epoch": 27.906976744186046, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.5952, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.29603729603729606, |
|
"eval_loss": 1.5510554313659668, |
|
"eval_runtime": 8.4324, |
|
"eval_samples_per_second": 50.875, |
|
"eval_steps_per_second": 1.66, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.5238, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.5091006755828857, |
|
"eval_runtime": 8.9811, |
|
"eval_samples_per_second": 47.767, |
|
"eval_steps_per_second": 1.559, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4881, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.34498834498834496, |
|
"eval_loss": 1.4853538274765015, |
|
"eval_runtime": 8.6991, |
|
"eval_samples_per_second": 49.315, |
|
"eval_steps_per_second": 1.609, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 1.4708, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3473193473193473, |
|
"eval_loss": 1.4616328477859497, |
|
"eval_runtime": 8.3701, |
|
"eval_samples_per_second": 51.254, |
|
"eval_steps_per_second": 1.673, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 1.4361, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_accuracy": 0.34498834498834496, |
|
"eval_loss": 1.4416619539260864, |
|
"eval_runtime": 8.7032, |
|
"eval_samples_per_second": 49.292, |
|
"eval_steps_per_second": 1.609, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.3764, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_accuracy": 0.3752913752913753, |
|
"eval_loss": 1.4134629964828491, |
|
"eval_runtime": 8.9398, |
|
"eval_samples_per_second": 47.988, |
|
"eval_steps_per_second": 1.566, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 1.3333, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_accuracy": 0.3986013986013986, |
|
"eval_loss": 1.3822472095489502, |
|
"eval_runtime": 8.7499, |
|
"eval_samples_per_second": 49.029, |
|
"eval_steps_per_second": 1.6, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 1.3296, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 1.4111592769622803, |
|
"eval_runtime": 8.8496, |
|
"eval_samples_per_second": 48.477, |
|
"eval_steps_per_second": 1.582, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.2798, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_accuracy": 0.38927738927738925, |
|
"eval_loss": 1.4038037061691284, |
|
"eval_runtime": 8.5853, |
|
"eval_samples_per_second": 49.969, |
|
"eval_steps_per_second": 1.631, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 1.3129, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.3776223776223776, |
|
"eval_loss": 1.424072265625, |
|
"eval_runtime": 8.6578, |
|
"eval_samples_per_second": 49.551, |
|
"eval_steps_per_second": 1.617, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 1.3014, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_accuracy": 0.38927738927738925, |
|
"eval_loss": 1.356952428817749, |
|
"eval_runtime": 8.5422, |
|
"eval_samples_per_second": 50.221, |
|
"eval_steps_per_second": 1.639, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.2332, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.38927738927738925, |
|
"eval_loss": 1.4072706699371338, |
|
"eval_runtime": 8.3607, |
|
"eval_samples_per_second": 51.312, |
|
"eval_steps_per_second": 1.675, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 1.212, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"eval_accuracy": 0.40326340326340326, |
|
"eval_loss": 1.376956820487976, |
|
"eval_runtime": 8.3675, |
|
"eval_samples_per_second": 51.27, |
|
"eval_steps_per_second": 1.673, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 1.1844, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.1763, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"eval_accuracy": 0.3962703962703963, |
|
"eval_loss": 1.3891488313674927, |
|
"eval_runtime": 8.5717, |
|
"eval_samples_per_second": 50.049, |
|
"eval_steps_per_second": 1.633, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 1.124, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_accuracy": 0.4125874125874126, |
|
"eval_loss": 1.3915237188339233, |
|
"eval_runtime": 8.5638, |
|
"eval_samples_per_second": 50.095, |
|
"eval_steps_per_second": 1.635, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 1.0963, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4149184149184149, |
|
"eval_loss": 1.4098657369613647, |
|
"eval_runtime": 8.8116, |
|
"eval_samples_per_second": 48.686, |
|
"eval_steps_per_second": 1.589, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.0547, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 16.93, |
|
"eval_accuracy": 0.40326340326340326, |
|
"eval_loss": 1.4206278324127197, |
|
"eval_runtime": 8.7717, |
|
"eval_samples_per_second": 48.907, |
|
"eval_steps_per_second": 1.596, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 1.0631, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"eval_accuracy": 0.4195804195804196, |
|
"eval_loss": 1.4040827751159668, |
|
"eval_runtime": 8.3983, |
|
"eval_samples_per_second": 51.082, |
|
"eval_steps_per_second": 1.667, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.9911, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_accuracy": 0.4149184149184149, |
|
"eval_loss": 1.4271957874298096, |
|
"eval_runtime": 8.2919, |
|
"eval_samples_per_second": 51.737, |
|
"eval_steps_per_second": 1.688, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 19.53, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.005, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4219114219114219, |
|
"eval_loss": 1.42105233669281, |
|
"eval_runtime": 8.2769, |
|
"eval_samples_per_second": 51.831, |
|
"eval_steps_per_second": 1.691, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.9663, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 20.93, |
|
"eval_accuracy": 0.40093240093240096, |
|
"eval_loss": 1.466171145439148, |
|
"eval_runtime": 9.4718, |
|
"eval_samples_per_second": 45.292, |
|
"eval_steps_per_second": 1.478, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.9533, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"eval_accuracy": 0.43356643356643354, |
|
"eval_loss": 1.428614616394043, |
|
"eval_runtime": 8.343, |
|
"eval_samples_per_second": 51.42, |
|
"eval_steps_per_second": 1.678, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 22.33, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.9506, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"eval_accuracy": 0.43123543123543123, |
|
"eval_loss": 1.413465976715088, |
|
"eval_runtime": 8.7694, |
|
"eval_samples_per_second": 48.92, |
|
"eval_steps_per_second": 1.596, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.8973, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.42657342657342656, |
|
"eval_loss": 1.442847728729248, |
|
"eval_runtime": 8.7464, |
|
"eval_samples_per_second": 49.049, |
|
"eval_steps_per_second": 1.601, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 24.19, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.8807, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"eval_accuracy": 0.4382284382284382, |
|
"eval_loss": 1.447946548461914, |
|
"eval_runtime": 8.3339, |
|
"eval_samples_per_second": 51.476, |
|
"eval_steps_per_second": 1.68, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 25.12, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.8731, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"eval_accuracy": 0.4289044289044289, |
|
"eval_loss": 1.4429428577423096, |
|
"eval_runtime": 8.7998, |
|
"eval_samples_per_second": 48.751, |
|
"eval_steps_per_second": 1.591, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 26.05, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.8366, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.8472, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"eval_accuracy": 0.43123543123543123, |
|
"eval_loss": 1.4461231231689453, |
|
"eval_runtime": 8.7766, |
|
"eval_samples_per_second": 48.88, |
|
"eval_steps_per_second": 1.595, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"learning_rate": 0.0, |
|
"loss": 0.8348, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"eval_accuracy": 0.43356643356643354, |
|
"eval_loss": 1.453087568283081, |
|
"eval_runtime": 8.3523, |
|
"eval_samples_per_second": 51.363, |
|
"eval_steps_per_second": 1.676, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"step": 300, |
|
"total_flos": 3.007291871298355e+18, |
|
"train_loss": 1.1542485936482747, |
|
"train_runtime": 2419.37, |
|
"train_samples_per_second": 16.864, |
|
"train_steps_per_second": 0.124 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 3.007291871298355e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|