|
{ |
|
"best_metric": 0.9411764705882353, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-fish/checkpoint-21", |
|
"epoch": 60.0, |
|
"eval_steps": 500, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.29411764705882354, |
|
"eval_loss": 1.803544521331787, |
|
"eval_runtime": 0.4219, |
|
"eval_samples_per_second": 40.297, |
|
"eval_steps_per_second": 2.37, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.29411764705882354, |
|
"eval_loss": 1.786075234413147, |
|
"eval_runtime": 0.425, |
|
"eval_samples_per_second": 39.998, |
|
"eval_steps_per_second": 2.353, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.29411764705882354, |
|
"eval_loss": 1.7553783655166626, |
|
"eval_runtime": 0.409, |
|
"eval_samples_per_second": 41.562, |
|
"eval_steps_per_second": 2.445, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.35294117647058826, |
|
"eval_loss": 1.6953703165054321, |
|
"eval_runtime": 0.4264, |
|
"eval_samples_per_second": 39.865, |
|
"eval_steps_per_second": 2.345, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.4117647058823529, |
|
"eval_loss": 1.678010106086731, |
|
"eval_runtime": 0.4097, |
|
"eval_samples_per_second": 41.492, |
|
"eval_steps_per_second": 2.441, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_accuracy": 0.4117647058823529, |
|
"eval_loss": 1.65355384349823, |
|
"eval_runtime": 0.3958, |
|
"eval_samples_per_second": 42.953, |
|
"eval_steps_per_second": 2.527, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_accuracy": 0.4117647058823529, |
|
"eval_loss": 1.6221718788146973, |
|
"eval_runtime": 0.4077, |
|
"eval_samples_per_second": 41.701, |
|
"eval_steps_per_second": 2.453, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 8.47060489654541, |
|
"learning_rate": 4.850746268656717e-05, |
|
"loss": 1.6467, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5294117647058824, |
|
"eval_loss": 1.4681596755981445, |
|
"eval_runtime": 0.4106, |
|
"eval_samples_per_second": 41.405, |
|
"eval_steps_per_second": 2.436, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.5294117647058824, |
|
"eval_loss": 1.3260958194732666, |
|
"eval_runtime": 0.4111, |
|
"eval_samples_per_second": 41.351, |
|
"eval_steps_per_second": 2.432, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_accuracy": 0.5294117647058824, |
|
"eval_loss": 1.1887730360031128, |
|
"eval_runtime": 0.4034, |
|
"eval_samples_per_second": 42.144, |
|
"eval_steps_per_second": 2.479, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_accuracy": 0.5294117647058824, |
|
"eval_loss": 1.0433154106140137, |
|
"eval_runtime": 0.4207, |
|
"eval_samples_per_second": 40.405, |
|
"eval_steps_per_second": 2.377, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5882352941176471, |
|
"eval_loss": 0.8211753368377686, |
|
"eval_runtime": 0.4003, |
|
"eval_samples_per_second": 42.464, |
|
"eval_steps_per_second": 2.498, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_accuracy": 0.7058823529411765, |
|
"eval_loss": 0.7240034341812134, |
|
"eval_runtime": 0.4119, |
|
"eval_samples_per_second": 41.269, |
|
"eval_steps_per_second": 2.428, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_loss": 0.6390114426612854, |
|
"eval_runtime": 0.4132, |
|
"eval_samples_per_second": 41.145, |
|
"eval_steps_per_second": 2.42, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.5593705177307129, |
|
"eval_runtime": 0.413, |
|
"eval_samples_per_second": 41.162, |
|
"eval_steps_per_second": 2.421, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.233682155609131, |
|
"learning_rate": 4.104477611940299e-05, |
|
"loss": 0.782, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_loss": 0.46473824977874756, |
|
"eval_runtime": 0.3967, |
|
"eval_samples_per_second": 42.853, |
|
"eval_steps_per_second": 2.521, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.42642152309417725, |
|
"eval_runtime": 0.42, |
|
"eval_samples_per_second": 40.479, |
|
"eval_steps_per_second": 2.381, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.3983331024646759, |
|
"eval_runtime": 0.4396, |
|
"eval_samples_per_second": 38.669, |
|
"eval_steps_per_second": 2.275, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.3759661912918091, |
|
"eval_runtime": 0.4039, |
|
"eval_samples_per_second": 42.09, |
|
"eval_steps_per_second": 2.476, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.3751123547554016, |
|
"eval_runtime": 0.4128, |
|
"eval_samples_per_second": 41.182, |
|
"eval_steps_per_second": 2.422, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.35528773069381714, |
|
"eval_runtime": 0.3998, |
|
"eval_samples_per_second": 42.522, |
|
"eval_steps_per_second": 2.501, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.31613630056381226, |
|
"eval_runtime": 0.4018, |
|
"eval_samples_per_second": 42.307, |
|
"eval_steps_per_second": 2.489, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.270626962184906, |
|
"eval_runtime": 0.3978, |
|
"eval_samples_per_second": 42.736, |
|
"eval_steps_per_second": 2.514, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 7.092751979827881, |
|
"learning_rate": 3.358208955223881e-05, |
|
"loss": 0.3228, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.21002241969108582, |
|
"eval_runtime": 0.423, |
|
"eval_samples_per_second": 40.194, |
|
"eval_steps_per_second": 2.364, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.18847431242465973, |
|
"eval_runtime": 0.3992, |
|
"eval_samples_per_second": 42.583, |
|
"eval_steps_per_second": 2.505, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.17268314957618713, |
|
"eval_runtime": 0.3893, |
|
"eval_samples_per_second": 43.663, |
|
"eval_steps_per_second": 2.568, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.18181481957435608, |
|
"eval_runtime": 0.4253, |
|
"eval_samples_per_second": 39.969, |
|
"eval_steps_per_second": 2.351, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.1958554983139038, |
|
"eval_runtime": 0.4047, |
|
"eval_samples_per_second": 42.006, |
|
"eval_steps_per_second": 2.471, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.18888713419437408, |
|
"eval_runtime": 0.4156, |
|
"eval_samples_per_second": 40.908, |
|
"eval_steps_per_second": 2.406, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.1994781792163849, |
|
"eval_runtime": 0.4146, |
|
"eval_samples_per_second": 41.008, |
|
"eval_steps_per_second": 2.412, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.20932523906230927, |
|
"eval_runtime": 0.4033, |
|
"eval_samples_per_second": 42.151, |
|
"eval_steps_per_second": 2.479, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 5.438349723815918, |
|
"learning_rate": 2.6119402985074626e-05, |
|
"loss": 0.2375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.18688271939754486, |
|
"eval_runtime": 0.4352, |
|
"eval_samples_per_second": 39.059, |
|
"eval_steps_per_second": 2.298, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.16475379467010498, |
|
"eval_runtime": 0.4369, |
|
"eval_samples_per_second": 38.912, |
|
"eval_steps_per_second": 2.289, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.15764710307121277, |
|
"eval_runtime": 0.4055, |
|
"eval_samples_per_second": 41.923, |
|
"eval_steps_per_second": 2.466, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.17089737951755524, |
|
"eval_runtime": 0.4229, |
|
"eval_samples_per_second": 40.197, |
|
"eval_steps_per_second": 2.365, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.1716768890619278, |
|
"eval_runtime": 0.4195, |
|
"eval_samples_per_second": 40.52, |
|
"eval_steps_per_second": 2.384, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.178259015083313, |
|
"eval_runtime": 0.4058, |
|
"eval_samples_per_second": 41.888, |
|
"eval_steps_per_second": 2.464, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.19927644729614258, |
|
"eval_runtime": 0.418, |
|
"eval_samples_per_second": 40.668, |
|
"eval_steps_per_second": 2.392, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.20850598812103271, |
|
"eval_runtime": 0.4378, |
|
"eval_samples_per_second": 38.829, |
|
"eval_steps_per_second": 2.284, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 6.9101080894470215, |
|
"learning_rate": 1.865671641791045e-05, |
|
"loss": 0.1897, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.2028307318687439, |
|
"eval_runtime": 0.3953, |
|
"eval_samples_per_second": 43.001, |
|
"eval_steps_per_second": 2.529, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.1704062819480896, |
|
"eval_runtime": 0.3961, |
|
"eval_samples_per_second": 42.921, |
|
"eval_steps_per_second": 2.525, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.15204329788684845, |
|
"eval_runtime": 0.4075, |
|
"eval_samples_per_second": 41.72, |
|
"eval_steps_per_second": 2.454, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.13252082467079163, |
|
"eval_runtime": 0.4039, |
|
"eval_samples_per_second": 42.088, |
|
"eval_steps_per_second": 2.476, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.14505437016487122, |
|
"eval_runtime": 0.4043, |
|
"eval_samples_per_second": 42.048, |
|
"eval_steps_per_second": 2.473, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.16641516983509064, |
|
"eval_runtime": 0.4361, |
|
"eval_samples_per_second": 38.986, |
|
"eval_steps_per_second": 2.293, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.19269916415214539, |
|
"eval_runtime": 0.4099, |
|
"eval_samples_per_second": 41.472, |
|
"eval_steps_per_second": 2.44, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.22021445631980896, |
|
"eval_runtime": 0.4008, |
|
"eval_samples_per_second": 42.416, |
|
"eval_steps_per_second": 2.495, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 5.389186382293701, |
|
"learning_rate": 1.119402985074627e-05, |
|
"loss": 0.1676, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.25689512491226196, |
|
"eval_runtime": 0.4197, |
|
"eval_samples_per_second": 40.504, |
|
"eval_steps_per_second": 2.383, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.27476173639297485, |
|
"eval_runtime": 0.4298, |
|
"eval_samples_per_second": 39.551, |
|
"eval_steps_per_second": 2.327, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.26124510169029236, |
|
"eval_runtime": 0.3885, |
|
"eval_samples_per_second": 43.756, |
|
"eval_steps_per_second": 2.574, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.24144531786441803, |
|
"eval_runtime": 0.4017, |
|
"eval_samples_per_second": 42.318, |
|
"eval_steps_per_second": 2.489, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.18416434526443481, |
|
"eval_runtime": 0.3993, |
|
"eval_samples_per_second": 42.577, |
|
"eval_steps_per_second": 2.505, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.15971186757087708, |
|
"eval_runtime": 0.4143, |
|
"eval_samples_per_second": 41.033, |
|
"eval_steps_per_second": 2.414, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.1446918547153473, |
|
"eval_runtime": 0.3964, |
|
"eval_samples_per_second": 42.886, |
|
"eval_steps_per_second": 2.523, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.13591152429580688, |
|
"eval_runtime": 0.392, |
|
"eval_samples_per_second": 43.363, |
|
"eval_steps_per_second": 2.551, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 13.033480644226074, |
|
"learning_rate": 3.7313432835820893e-06, |
|
"loss": 0.1452, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.13674524426460266, |
|
"eval_runtime": 0.4048, |
|
"eval_samples_per_second": 41.997, |
|
"eval_steps_per_second": 2.47, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"eval_accuracy": 0.9411764705882353, |
|
"eval_loss": 0.1401834785938263, |
|
"eval_runtime": 0.4238, |
|
"eval_samples_per_second": 40.11, |
|
"eval_steps_per_second": 2.359, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.14624573290348053, |
|
"eval_runtime": 0.4222, |
|
"eval_samples_per_second": 40.266, |
|
"eval_steps_per_second": 2.369, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.1515001356601715, |
|
"eval_runtime": 0.4044, |
|
"eval_samples_per_second": 42.039, |
|
"eval_steps_per_second": 2.473, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8823529411764706, |
|
"eval_loss": 0.1584557294845581, |
|
"eval_runtime": 0.3957, |
|
"eval_samples_per_second": 42.96, |
|
"eval_steps_per_second": 2.527, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 75, |
|
"total_flos": 2.1926045190131712e+17, |
|
"train_loss": 0.4769280139605204, |
|
"train_runtime": 264.8978, |
|
"train_samples_per_second": 41.62, |
|
"train_steps_per_second": 0.283 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 75, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 75, |
|
"save_steps": 500, |
|
"total_flos": 2.1926045190131712e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|