|
{ |
|
"best_metric": 0.6333333333333333, |
|
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-va-U5-42B\\checkpoint-193", |
|
"epoch": 37.935483870967744, |
|
"eval_steps": 500, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.11666666666666667, |
|
"eval_loss": 7.866283893585205, |
|
"eval_runtime": 1.0387, |
|
"eval_samples_per_second": 57.764, |
|
"eval_steps_per_second": 1.925, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 6.936, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.11666666666666667, |
|
"eval_loss": 7.757172584533691, |
|
"eval_runtime": 0.9676, |
|
"eval_samples_per_second": 62.01, |
|
"eval_steps_per_second": 2.067, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.11666666666666667, |
|
"eval_loss": 7.17894983291626, |
|
"eval_runtime": 1.0595, |
|
"eval_samples_per_second": 56.633, |
|
"eval_steps_per_second": 1.888, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 6.7016, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.11666666666666667, |
|
"eval_loss": 5.914210796356201, |
|
"eval_runtime": 1.1717, |
|
"eval_samples_per_second": 51.21, |
|
"eval_steps_per_second": 1.707, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.909090909090909e-05, |
|
"loss": 5.5418, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.11666666666666667, |
|
"eval_loss": 4.606503486633301, |
|
"eval_runtime": 0.9707, |
|
"eval_samples_per_second": 61.813, |
|
"eval_steps_per_second": 2.06, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.11666666666666667, |
|
"eval_loss": 3.2782602310180664, |
|
"eval_runtime": 1.0414, |
|
"eval_samples_per_second": 57.616, |
|
"eval_steps_per_second": 1.921, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 3.7272727272727276e-05, |
|
"loss": 3.6439, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.11666666666666667, |
|
"eval_loss": 2.1983864307403564, |
|
"eval_runtime": 1.0791, |
|
"eval_samples_per_second": 55.604, |
|
"eval_steps_per_second": 1.853, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 3.545454545454546e-05, |
|
"loss": 2.2477, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.5630935430526733, |
|
"eval_runtime": 1.2494, |
|
"eval_samples_per_second": 48.021, |
|
"eval_steps_per_second": 1.601, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.315753698348999, |
|
"eval_runtime": 1.0403, |
|
"eval_samples_per_second": 57.675, |
|
"eval_steps_per_second": 1.923, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 3.363636363636364e-05, |
|
"loss": 1.5076, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.3333333333333333, |
|
"eval_loss": 1.3664613962173462, |
|
"eval_runtime": 1.076, |
|
"eval_samples_per_second": 55.76, |
|
"eval_steps_per_second": 1.859, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 1.3865, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.35, |
|
"eval_loss": 1.3560707569122314, |
|
"eval_runtime": 1.2112, |
|
"eval_samples_per_second": 49.538, |
|
"eval_steps_per_second": 1.651, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.2673075199127197, |
|
"eval_runtime": 0.9395, |
|
"eval_samples_per_second": 63.867, |
|
"eval_steps_per_second": 2.129, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 1.3436, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.2833333333333333, |
|
"eval_loss": 1.5020203590393066, |
|
"eval_runtime": 1.0828, |
|
"eval_samples_per_second": 55.411, |
|
"eval_steps_per_second": 1.847, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 2.8181818181818185e-05, |
|
"loss": 1.3187, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.3018145561218262, |
|
"eval_runtime": 0.9637, |
|
"eval_samples_per_second": 62.261, |
|
"eval_steps_per_second": 2.075, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.2581952810287476, |
|
"eval_runtime": 0.968, |
|
"eval_samples_per_second": 61.985, |
|
"eval_steps_per_second": 2.066, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 2.6363636363636365e-05, |
|
"loss": 1.2132, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.2250442504882812, |
|
"eval_runtime": 1.0265, |
|
"eval_samples_per_second": 58.453, |
|
"eval_steps_per_second": 1.948, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 1.2070510387420654, |
|
"eval_runtime": 0.8885, |
|
"eval_samples_per_second": 67.53, |
|
"eval_steps_per_second": 2.251, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 2.454545454545455e-05, |
|
"loss": 1.2041, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 1.1806073188781738, |
|
"eval_runtime": 1.0077, |
|
"eval_samples_per_second": 59.543, |
|
"eval_steps_per_second": 1.985, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 2.2727272727272733e-05, |
|
"loss": 1.1756, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.55, |
|
"eval_loss": 1.1720372438430786, |
|
"eval_runtime": 1.0797, |
|
"eval_samples_per_second": 55.573, |
|
"eval_steps_per_second": 1.852, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1318727731704712, |
|
"eval_runtime": 0.8755, |
|
"eval_samples_per_second": 68.529, |
|
"eval_steps_per_second": 2.284, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"learning_rate": 2.090909090909091e-05, |
|
"loss": 1.1107, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.0850528478622437, |
|
"eval_runtime": 0.9417, |
|
"eval_samples_per_second": 63.716, |
|
"eval_steps_per_second": 2.124, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.68, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 1.0651, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.0553667545318604, |
|
"eval_runtime": 0.9498, |
|
"eval_samples_per_second": 63.168, |
|
"eval_steps_per_second": 2.106, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.0579936504364014, |
|
"eval_runtime": 0.9211, |
|
"eval_samples_per_second": 65.141, |
|
"eval_steps_per_second": 2.171, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 1.0419, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.1471267938613892, |
|
"eval_runtime": 0.9539, |
|
"eval_samples_per_second": 62.901, |
|
"eval_steps_per_second": 2.097, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 0.9804, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_loss": 1.0633025169372559, |
|
"eval_runtime": 0.9613, |
|
"eval_samples_per_second": 62.413, |
|
"eval_steps_per_second": 2.08, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 1.0597257614135742, |
|
"eval_runtime": 1.9689, |
|
"eval_samples_per_second": 30.474, |
|
"eval_steps_per_second": 1.016, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.9195, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.6333333333333333, |
|
"eval_loss": 0.9563372135162354, |
|
"eval_runtime": 0.9494, |
|
"eval_samples_per_second": 63.199, |
|
"eval_steps_per_second": 2.107, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"learning_rate": 1.181818181818182e-05, |
|
"loss": 0.9053, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 0.9838621020317078, |
|
"eval_runtime": 0.8939, |
|
"eval_samples_per_second": 67.124, |
|
"eval_steps_per_second": 2.237, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 0.9913681745529175, |
|
"eval_runtime": 0.8926, |
|
"eval_samples_per_second": 67.223, |
|
"eval_steps_per_second": 2.241, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.42, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8645, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 0.9519664645195007, |
|
"eval_runtime": 0.9221, |
|
"eval_samples_per_second": 65.066, |
|
"eval_steps_per_second": 2.169, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 0.8139, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 0.9572514295578003, |
|
"eval_runtime": 0.8817, |
|
"eval_samples_per_second": 68.051, |
|
"eval_steps_per_second": 2.268, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 0.9509603977203369, |
|
"eval_runtime": 0.9232, |
|
"eval_samples_per_second": 64.991, |
|
"eval_steps_per_second": 2.166, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.52, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.8151, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 0.9468602538108826, |
|
"eval_runtime": 0.8657, |
|
"eval_samples_per_second": 69.308, |
|
"eval_steps_per_second": 2.31, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 0.9872100353240967, |
|
"eval_runtime": 0.9376, |
|
"eval_samples_per_second": 63.99, |
|
"eval_steps_per_second": 2.133, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.7837, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 0.9278779625892639, |
|
"eval_runtime": 1.3601, |
|
"eval_samples_per_second": 44.113, |
|
"eval_steps_per_second": 1.47, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 0.7659, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 0.9173651337623596, |
|
"eval_runtime": 0.9252, |
|
"eval_samples_per_second": 64.854, |
|
"eval_steps_per_second": 2.162, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 0.934675931930542, |
|
"eval_runtime": 1.2264, |
|
"eval_samples_per_second": 48.923, |
|
"eval_steps_per_second": 1.631, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 0.7835, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 0.9372119903564453, |
|
"eval_runtime": 0.9199, |
|
"eval_samples_per_second": 65.224, |
|
"eval_steps_per_second": 2.174, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"step": 294, |
|
"total_flos": 1.2027310550050406e+18, |
|
"train_loss": 1.8558632776039798, |
|
"train_runtime": 524.2652, |
|
"train_samples_per_second": 78.029, |
|
"train_steps_per_second": 0.561 |
|
} |
|
], |
|
"logging_steps": 12, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 42, |
|
"save_steps": 500, |
|
"total_flos": 1.2027310550050406e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|