|
{ |
|
"best_metric": 0.22444555163383484, |
|
"best_model_checkpoint": "vit-base-tarsh/checkpoint-500", |
|
"epoch": 3.937007874015748, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019606299212598428, |
|
"loss": 1.623, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001921259842519685, |
|
"loss": 1.2641, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00018818897637795277, |
|
"loss": 0.9268, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000184251968503937, |
|
"loss": 0.7366, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018031496062992125, |
|
"loss": 0.7007, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017637795275590552, |
|
"loss": 0.557, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00017244094488188977, |
|
"loss": 0.6111, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.000168503937007874, |
|
"loss": 0.6436, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016456692913385828, |
|
"loss": 0.4778, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00016062992125984252, |
|
"loss": 0.4692, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9018567639257294, |
|
"eval_loss": 0.4172811806201935, |
|
"eval_runtime": 347.1671, |
|
"eval_samples_per_second": 1.086, |
|
"eval_steps_per_second": 0.138, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001566929133858268, |
|
"loss": 0.2606, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00015275590551181104, |
|
"loss": 0.2834, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00014881889763779528, |
|
"loss": 0.2804, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00014488188976377955, |
|
"loss": 0.2504, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014094488188976377, |
|
"loss": 0.184, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00013700787401574804, |
|
"loss": 0.213, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0001330708661417323, |
|
"loss": 0.2614, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00012913385826771653, |
|
"loss": 0.2567, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001251968503937008, |
|
"loss": 0.1094, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00012125984251968505, |
|
"loss": 0.2064, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_accuracy": 0.9257294429708223, |
|
"eval_loss": 0.29030969738960266, |
|
"eval_runtime": 349.8071, |
|
"eval_samples_per_second": 1.078, |
|
"eval_steps_per_second": 0.137, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00011732283464566928, |
|
"loss": 0.1707, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00011338582677165355, |
|
"loss": 0.2581, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00010944881889763781, |
|
"loss": 0.0885, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00010551181102362204, |
|
"loss": 0.1996, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001015748031496063, |
|
"loss": 0.1614, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.763779527559055e-05, |
|
"loss": 0.1308, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.370078740157481e-05, |
|
"loss": 0.0973, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.976377952755905e-05, |
|
"loss": 0.0764, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.582677165354331e-05, |
|
"loss": 0.039, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 8.188976377952757e-05, |
|
"loss": 0.0748, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9363395225464191, |
|
"eval_loss": 0.2725047767162323, |
|
"eval_runtime": 349.7967, |
|
"eval_samples_per_second": 1.078, |
|
"eval_steps_per_second": 0.137, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 7.795275590551181e-05, |
|
"loss": 0.0508, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.401574803149607e-05, |
|
"loss": 0.0781, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.007874015748031e-05, |
|
"loss": 0.1238, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.614173228346457e-05, |
|
"loss": 0.0506, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 6.220472440944882e-05, |
|
"loss": 0.05, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.826771653543307e-05, |
|
"loss": 0.0748, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.433070866141733e-05, |
|
"loss": 0.0257, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.0393700787401575e-05, |
|
"loss": 0.0861, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.645669291338583e-05, |
|
"loss": 0.029, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.251968503937008e-05, |
|
"loss": 0.0343, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_accuracy": 0.9283819628647215, |
|
"eval_loss": 0.2931118607521057, |
|
"eval_runtime": 343.908, |
|
"eval_samples_per_second": 1.096, |
|
"eval_steps_per_second": 0.14, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.858267716535433e-05, |
|
"loss": 0.0234, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.464566929133858e-05, |
|
"loss": 0.0208, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.070866141732284e-05, |
|
"loss": 0.0186, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.677165354330709e-05, |
|
"loss": 0.0398, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.283464566929134e-05, |
|
"loss": 0.0262, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.889763779527559e-05, |
|
"loss": 0.0482, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.4960629921259845e-05, |
|
"loss": 0.0191, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.1023622047244095e-05, |
|
"loss": 0.02, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 7.086614173228347e-06, |
|
"loss": 0.0501, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.1496062992125985e-06, |
|
"loss": 0.0176, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.9442970822281167, |
|
"eval_loss": 0.22444555163383484, |
|
"eval_runtime": 342.5831, |
|
"eval_samples_per_second": 1.1, |
|
"eval_steps_per_second": 0.14, |
|
"step": 500 |
|
} |
|
], |
|
"max_steps": 508, |
|
"num_train_epochs": 4, |
|
"total_flos": 6.169413753348895e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|