|
{ |
|
"best_metric": 0.8833333333333333, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-20-45-5e-05\\checkpoint-116", |
|
"epoch": 18.06451612903226, |
|
"eval_steps": 500, |
|
"global_step": 140, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.360709547996521, |
|
"eval_runtime": 1.0052, |
|
"eval_samples_per_second": 59.688, |
|
"eval_steps_per_second": 1.99, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.37593984962406e-05, |
|
"loss": 1.3752, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.2550396919250488, |
|
"eval_runtime": 0.9591, |
|
"eval_samples_per_second": 62.556, |
|
"eval_steps_per_second": 2.085, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.9624060150375936e-05, |
|
"loss": 1.2809, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 1.1435749530792236, |
|
"eval_runtime": 0.9632, |
|
"eval_samples_per_second": 62.294, |
|
"eval_steps_per_second": 2.076, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.548872180451128e-05, |
|
"loss": 1.1273, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.038071870803833, |
|
"eval_runtime": 0.9749, |
|
"eval_samples_per_second": 61.545, |
|
"eval_steps_per_second": 2.052, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.6833333333333333, |
|
"eval_loss": 0.9619849920272827, |
|
"eval_runtime": 0.947, |
|
"eval_samples_per_second": 63.359, |
|
"eval_steps_per_second": 2.112, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.135338345864661e-05, |
|
"loss": 0.9919, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.9154461622238159, |
|
"eval_runtime": 1.0081, |
|
"eval_samples_per_second": 59.516, |
|
"eval_steps_per_second": 1.984, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 3.7218045112781954e-05, |
|
"loss": 0.8971, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.8501840829849243, |
|
"eval_runtime": 0.9769, |
|
"eval_samples_per_second": 61.421, |
|
"eval_steps_per_second": 2.047, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 3.3082706766917295e-05, |
|
"loss": 0.8049, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.8643815517425537, |
|
"eval_runtime": 0.9995, |
|
"eval_samples_per_second": 60.028, |
|
"eval_steps_per_second": 2.001, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.8009711503982544, |
|
"eval_runtime": 0.9482, |
|
"eval_samples_per_second": 63.276, |
|
"eval_steps_per_second": 2.109, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.894736842105263e-05, |
|
"loss": 0.7119, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7275562882423401, |
|
"eval_runtime": 0.9716, |
|
"eval_samples_per_second": 61.757, |
|
"eval_steps_per_second": 2.059, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 2.4812030075187968e-05, |
|
"loss": 0.6172, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.6698881983757019, |
|
"eval_runtime": 0.9511, |
|
"eval_samples_per_second": 63.087, |
|
"eval_steps_per_second": 2.103, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 2.0676691729323306e-05, |
|
"loss": 0.5294, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.6532084941864014, |
|
"eval_runtime": 0.9965, |
|
"eval_samples_per_second": 60.209, |
|
"eval_steps_per_second": 2.007, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 1.6541353383458648e-05, |
|
"loss": 0.4696, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.6264522671699524, |
|
"eval_runtime": 0.9786, |
|
"eval_samples_per_second": 61.314, |
|
"eval_steps_per_second": 2.044, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.6011604070663452, |
|
"eval_runtime": 1.0534, |
|
"eval_samples_per_second": 56.959, |
|
"eval_steps_per_second": 1.899, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 1.2406015037593984e-05, |
|
"loss": 0.4074, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.8833333333333333, |
|
"eval_loss": 0.5799916982650757, |
|
"eval_runtime": 1.0374, |
|
"eval_samples_per_second": 57.836, |
|
"eval_steps_per_second": 1.928, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 8.270676691729324e-06, |
|
"loss": 0.3822, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.5691779851913452, |
|
"eval_runtime": 1.0446, |
|
"eval_samples_per_second": 57.439, |
|
"eval_steps_per_second": 1.915, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 4.135338345864662e-06, |
|
"loss": 0.3651, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.6064590811729431, |
|
"eval_runtime": 0.9789, |
|
"eval_samples_per_second": 61.29, |
|
"eval_steps_per_second": 2.043, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.5680865049362183, |
|
"eval_runtime": 0.9558, |
|
"eval_samples_per_second": 62.773, |
|
"eval_steps_per_second": 2.092, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 0.0, |
|
"loss": 0.3731, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.5674660205841064, |
|
"eval_runtime": 1.0121, |
|
"eval_samples_per_second": 59.281, |
|
"eval_steps_per_second": 1.976, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"step": 140, |
|
"total_flos": 1.3635734913760297e+18, |
|
"train_loss": 0.738073068005698, |
|
"train_runtime": 291.1702, |
|
"train_samples_per_second": 66.902, |
|
"train_steps_per_second": 0.481 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.3635734913760297e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|