|
{ |
|
"best_metric": 0.5454545454545454, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-isic217/checkpoint-245", |
|
"epoch": 48.97959183673469, |
|
"eval_steps": 500, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 20.10276222229004, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 2.1681, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 18.530778884887695, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 2.2404, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"eval_accuracy": 0.13636363636363635, |
|
"eval_loss": 2.1526713371276855, |
|
"eval_runtime": 3.7709, |
|
"eval_samples_per_second": 5.834, |
|
"eval_steps_per_second": 2.917, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 19.15616798400879, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.1402, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 18.088478088378906, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.0749, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.13636363636363635, |
|
"eval_loss": 2.115939140319824, |
|
"eval_runtime": 4.2644, |
|
"eval_samples_per_second": 5.159, |
|
"eval_steps_per_second": 2.58, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 28.553932189941406, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 2.0201, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.4489795918367347, |
|
"grad_norm": 21.90468406677246, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.8697, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 29.221223831176758, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 1.947, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.979591836734694, |
|
"eval_accuracy": 0.13636363636363635, |
|
"eval_loss": 2.172260284423828, |
|
"eval_runtime": 3.7285, |
|
"eval_samples_per_second": 5.9, |
|
"eval_steps_per_second": 2.95, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 3.2653061224489797, |
|
"grad_norm": 26.88852882385254, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.8989, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.673469387755102, |
|
"grad_norm": 29.26166343688965, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.732, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.09090909090909091, |
|
"eval_loss": 2.154468536376953, |
|
"eval_runtime": 4.2209, |
|
"eval_samples_per_second": 5.212, |
|
"eval_steps_per_second": 2.606, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 39.108612060546875, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.6138, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.489795918367347, |
|
"grad_norm": 23.49874496459961, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 1.4534, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.8979591836734695, |
|
"grad_norm": 26.28005599975586, |
|
"learning_rate": 5e-05, |
|
"loss": 1.446, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.979591836734694, |
|
"eval_accuracy": 0.18181818181818182, |
|
"eval_loss": 2.2917678356170654, |
|
"eval_runtime": 3.8303, |
|
"eval_samples_per_second": 5.744, |
|
"eval_steps_per_second": 2.872, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 5.3061224489795915, |
|
"grad_norm": 38.8511962890625, |
|
"learning_rate": 4.9537037037037035e-05, |
|
"loss": 1.2513, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 29.820043563842773, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 1.1175, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3181818181818182, |
|
"eval_loss": 1.9290556907653809, |
|
"eval_runtime": 4.301, |
|
"eval_samples_per_second": 5.115, |
|
"eval_steps_per_second": 2.558, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 6.122448979591836, |
|
"grad_norm": 36.40614700317383, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 1.3955, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.530612244897959, |
|
"grad_norm": 31.91864776611328, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 1.0427, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.938775510204081, |
|
"grad_norm": 35.82953643798828, |
|
"learning_rate": 4.768518518518519e-05, |
|
"loss": 1.1069, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.979591836734694, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 1.9551143646240234, |
|
"eval_runtime": 4.268, |
|
"eval_samples_per_second": 5.155, |
|
"eval_steps_per_second": 2.577, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 7.346938775510204, |
|
"grad_norm": 30.75468635559082, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.9718, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.755102040816326, |
|
"grad_norm": 33.49174880981445, |
|
"learning_rate": 4.675925925925926e-05, |
|
"loss": 0.7932, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.0534465312957764, |
|
"eval_runtime": 4.3072, |
|
"eval_samples_per_second": 5.108, |
|
"eval_steps_per_second": 2.554, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 8.16326530612245, |
|
"grad_norm": 45.86684799194336, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 1.0077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 34.891082763671875, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.7286, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.979591836734693, |
|
"grad_norm": 30.408119201660156, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.5994, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.979591836734693, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 1.8135310411453247, |
|
"eval_runtime": 3.8949, |
|
"eval_samples_per_second": 5.648, |
|
"eval_steps_per_second": 2.824, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 9.387755102040817, |
|
"grad_norm": 20.37044906616211, |
|
"learning_rate": 4.490740740740741e-05, |
|
"loss": 0.7078, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 9.795918367346939, |
|
"grad_norm": 14.344355583190918, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.4671, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.7822338342666626, |
|
"eval_runtime": 4.1682, |
|
"eval_samples_per_second": 5.278, |
|
"eval_steps_per_second": 2.639, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 10.204081632653061, |
|
"grad_norm": 39.557579040527344, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.5962, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.612244897959183, |
|
"grad_norm": 7.858915328979492, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.4612, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 10.979591836734693, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.2529866695404053, |
|
"eval_runtime": 3.7854, |
|
"eval_samples_per_second": 5.812, |
|
"eval_steps_per_second": 2.906, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 11.020408163265307, |
|
"grad_norm": 28.87532615661621, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.4548, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 40.782554626464844, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.5538, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 11.83673469387755, |
|
"grad_norm": 17.471019744873047, |
|
"learning_rate": 4.212962962962963e-05, |
|
"loss": 0.4016, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 1.8437938690185547, |
|
"eval_runtime": 4.198, |
|
"eval_samples_per_second": 5.241, |
|
"eval_steps_per_second": 2.62, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 12.244897959183673, |
|
"grad_norm": 38.50642395019531, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.4786, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.653061224489797, |
|
"grad_norm": 30.62510871887207, |
|
"learning_rate": 4.1203703703703705e-05, |
|
"loss": 0.3947, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 12.979591836734693, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.8609530925750732, |
|
"eval_runtime": 3.8332, |
|
"eval_samples_per_second": 5.739, |
|
"eval_steps_per_second": 2.87, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 13.061224489795919, |
|
"grad_norm": 1.0034515857696533, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3527, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 13.46938775510204, |
|
"grad_norm": 11.846328735351562, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 0.3415, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 13.877551020408163, |
|
"grad_norm": 9.749055862426758, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.5033, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 1.8614706993103027, |
|
"eval_runtime": 4.1423, |
|
"eval_samples_per_second": 5.311, |
|
"eval_steps_per_second": 2.656, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 74.4493637084961, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.2709, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 14.693877551020408, |
|
"grad_norm": 34.6863899230957, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2846, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 14.979591836734693, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.5478615760803223, |
|
"eval_runtime": 3.7136, |
|
"eval_samples_per_second": 5.924, |
|
"eval_steps_per_second": 2.962, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 15.10204081632653, |
|
"grad_norm": 21.048105239868164, |
|
"learning_rate": 3.8425925925925924e-05, |
|
"loss": 0.361, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 15.510204081632653, |
|
"grad_norm": 27.45090103149414, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.1265, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 15.918367346938776, |
|
"grad_norm": 2.5599782466888428, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2828, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.6410338878631592, |
|
"eval_runtime": 4.2352, |
|
"eval_samples_per_second": 5.195, |
|
"eval_steps_per_second": 2.597, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 16.3265306122449, |
|
"grad_norm": 44.01292037963867, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.4138, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.73469387755102, |
|
"grad_norm": 49.22306823730469, |
|
"learning_rate": 3.6574074074074076e-05, |
|
"loss": 0.3426, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 16.979591836734695, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 1.9146943092346191, |
|
"eval_runtime": 3.73, |
|
"eval_samples_per_second": 5.898, |
|
"eval_steps_per_second": 2.949, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 20.12368392944336, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.3658, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 17.551020408163264, |
|
"grad_norm": 5.72607946395874, |
|
"learning_rate": 3.564814814814815e-05, |
|
"loss": 0.0983, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 17.959183673469386, |
|
"grad_norm": 127.7773208618164, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.3108, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.4793992042541504, |
|
"eval_runtime": 4.4219, |
|
"eval_samples_per_second": 4.975, |
|
"eval_steps_per_second": 2.488, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 18.367346938775512, |
|
"grad_norm": 2.2349815368652344, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.08, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 18.775510204081634, |
|
"grad_norm": 84.97185516357422, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.2129, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 18.979591836734695, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 1.776501178741455, |
|
"eval_runtime": 3.7003, |
|
"eval_samples_per_second": 5.946, |
|
"eval_steps_per_second": 2.973, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 19.183673469387756, |
|
"grad_norm": 6.593123912811279, |
|
"learning_rate": 3.3796296296296295e-05, |
|
"loss": 0.1714, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 19.591836734693878, |
|
"grad_norm": 2.1411988735198975, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1612, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 7.131197929382324, |
|
"learning_rate": 3.2870370370370375e-05, |
|
"loss": 0.1946, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.16646671295166, |
|
"eval_runtime": 4.4203, |
|
"eval_samples_per_second": 4.977, |
|
"eval_steps_per_second": 2.489, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 20.408163265306122, |
|
"grad_norm": 4.898344039916992, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.2809, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.816326530612244, |
|
"grad_norm": 0.8991552591323853, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.1255, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 20.979591836734695, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.8521960973739624, |
|
"eval_runtime": 4.0094, |
|
"eval_samples_per_second": 5.487, |
|
"eval_steps_per_second": 2.744, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 21.224489795918366, |
|
"grad_norm": 0.9338099956512451, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.0503, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 21.632653061224488, |
|
"grad_norm": 1.708648920059204, |
|
"learning_rate": 3.101851851851852e-05, |
|
"loss": 0.2301, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.028917074203491, |
|
"eval_runtime": 4.3302, |
|
"eval_samples_per_second": 5.081, |
|
"eval_steps_per_second": 2.54, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 22.040816326530614, |
|
"grad_norm": 68.97888946533203, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.1664, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 22.448979591836736, |
|
"grad_norm": 8.795487403869629, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.0495, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 1.032072901725769, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.0909, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 22.979591836734695, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.051244020462036, |
|
"eval_runtime": 4.0425, |
|
"eval_samples_per_second": 5.442, |
|
"eval_steps_per_second": 2.721, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 23.26530612244898, |
|
"grad_norm": 28.04718589782715, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.1032, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 23.6734693877551, |
|
"grad_norm": 24.228317260742188, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.1724, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.3410568237304688, |
|
"eval_runtime": 4.2445, |
|
"eval_samples_per_second": 5.183, |
|
"eval_steps_per_second": 2.592, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 24.081632653061224, |
|
"grad_norm": 51.32829284667969, |
|
"learning_rate": 2.824074074074074e-05, |
|
"loss": 0.1376, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 24.489795918367346, |
|
"grad_norm": 8.50900936126709, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0941, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.897959183673468, |
|
"grad_norm": 53.41023254394531, |
|
"learning_rate": 2.7314814814814816e-05, |
|
"loss": 0.2256, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 24.979591836734695, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.162550687789917, |
|
"eval_runtime": 3.7513, |
|
"eval_samples_per_second": 5.865, |
|
"eval_steps_per_second": 2.932, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 25.306122448979593, |
|
"grad_norm": 20.35555076599121, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.2228, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 5.79302978515625, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.2471, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.0551793575286865, |
|
"eval_runtime": 4.2279, |
|
"eval_samples_per_second": 5.204, |
|
"eval_steps_per_second": 2.602, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 26.122448979591837, |
|
"grad_norm": 57.79982376098633, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1514, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 26.53061224489796, |
|
"grad_norm": 9.047138214111328, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.056, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 26.93877551020408, |
|
"grad_norm": 8.557161331176758, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0671, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 26.979591836734695, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.9339114427566528, |
|
"eval_runtime": 3.8162, |
|
"eval_samples_per_second": 5.765, |
|
"eval_steps_per_second": 2.882, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 27.346938775510203, |
|
"grad_norm": 0.14792662858963013, |
|
"learning_rate": 2.4537037037037038e-05, |
|
"loss": 0.1409, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 27.755102040816325, |
|
"grad_norm": 112.82161712646484, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.2563, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.2506721019744873, |
|
"eval_runtime": 4.1968, |
|
"eval_samples_per_second": 5.242, |
|
"eval_steps_per_second": 2.621, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 28.163265306122447, |
|
"grad_norm": 0.717983067035675, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.0499, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 47.17780303955078, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.1607, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 28.979591836734695, |
|
"grad_norm": 102.77259063720703, |
|
"learning_rate": 2.2685185185185187e-05, |
|
"loss": 0.1865, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 28.979591836734695, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.0703585147857666, |
|
"eval_runtime": 3.7043, |
|
"eval_samples_per_second": 5.939, |
|
"eval_steps_per_second": 2.969, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 29.387755102040817, |
|
"grad_norm": 0.42554885149002075, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1905, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 29.79591836734694, |
|
"grad_norm": 92.93157196044922, |
|
"learning_rate": 2.175925925925926e-05, |
|
"loss": 0.0477, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.3181818181818182, |
|
"eval_loss": 2.839505195617676, |
|
"eval_runtime": 4.1643, |
|
"eval_samples_per_second": 5.283, |
|
"eval_steps_per_second": 2.642, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 30.20408163265306, |
|
"grad_norm": 10.255499839782715, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.2298, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 30.612244897959183, |
|
"grad_norm": 32.9160041809082, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.0931, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 30.979591836734695, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 2.9483964443206787, |
|
"eval_runtime": 3.6962, |
|
"eval_samples_per_second": 5.952, |
|
"eval_steps_per_second": 2.976, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 31.020408163265305, |
|
"grad_norm": 3.2979772090911865, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.0643, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 1.9711153507232666, |
|
"learning_rate": 1.990740740740741e-05, |
|
"loss": 0.1815, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 31.836734693877553, |
|
"grad_norm": 39.575653076171875, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.047, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.548579216003418, |
|
"eval_runtime": 4.2179, |
|
"eval_samples_per_second": 5.216, |
|
"eval_steps_per_second": 2.608, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 32.244897959183675, |
|
"grad_norm": 0.1791549175977707, |
|
"learning_rate": 1.8981481481481482e-05, |
|
"loss": 0.1034, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 32.6530612244898, |
|
"grad_norm": 18.35671043395996, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.165, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 32.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.601107120513916, |
|
"eval_runtime": 3.894, |
|
"eval_samples_per_second": 5.65, |
|
"eval_steps_per_second": 2.825, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 33.06122448979592, |
|
"grad_norm": 88.95061492919922, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.0639, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 33.46938775510204, |
|
"grad_norm": 0.11740878969430923, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.032, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 33.87755102040816, |
|
"grad_norm": 8.803248405456543, |
|
"learning_rate": 1.712962962962963e-05, |
|
"loss": 0.0203, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.3598248958587646, |
|
"eval_runtime": 4.1023, |
|
"eval_samples_per_second": 5.363, |
|
"eval_steps_per_second": 2.681, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"grad_norm": 3.050877571105957, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0173, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 34.69387755102041, |
|
"grad_norm": 10.551043510437012, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.0143, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 34.97959183673469, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.589207649230957, |
|
"eval_runtime": 4.2169, |
|
"eval_samples_per_second": 5.217, |
|
"eval_steps_per_second": 2.609, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 35.10204081632653, |
|
"grad_norm": 2.492408514022827, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.1669, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 35.51020408163265, |
|
"grad_norm": 28.920665740966797, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.0546, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 35.91836734693877, |
|
"grad_norm": 0.1343473196029663, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.0248, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.8362252712249756, |
|
"eval_runtime": 4.252, |
|
"eval_samples_per_second": 5.174, |
|
"eval_steps_per_second": 2.587, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 36.326530612244895, |
|
"grad_norm": 0.00863693282008171, |
|
"learning_rate": 1.4351851851851853e-05, |
|
"loss": 0.0432, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 36.734693877551024, |
|
"grad_norm": 1.786009430885315, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0812, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 36.97959183673469, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.4658091068267822, |
|
"eval_runtime": 4.0409, |
|
"eval_samples_per_second": 5.444, |
|
"eval_steps_per_second": 2.722, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 37.142857142857146, |
|
"grad_norm": 5.971357822418213, |
|
"learning_rate": 1.3425925925925928e-05, |
|
"loss": 0.1312, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 37.55102040816327, |
|
"grad_norm": 9.33790111541748, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.0453, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 37.95918367346939, |
|
"grad_norm": 13.613574028015137, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0662, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.640266180038452, |
|
"eval_runtime": 4.0647, |
|
"eval_samples_per_second": 5.412, |
|
"eval_steps_per_second": 2.706, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 38.36734693877551, |
|
"grad_norm": 17.910255432128906, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.1061, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 38.775510204081634, |
|
"grad_norm": 5.941530227661133, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.1855, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 38.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.604184150695801, |
|
"eval_runtime": 3.8038, |
|
"eval_samples_per_second": 5.784, |
|
"eval_steps_per_second": 2.892, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 39.183673469387756, |
|
"grad_norm": 0.057647667825222015, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0598, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 39.59183673469388, |
|
"grad_norm": 1.981195092201233, |
|
"learning_rate": 1.0648148148148148e-05, |
|
"loss": 0.0107, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 74.54513549804688, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.03, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.5595407485961914, |
|
"eval_runtime": 4.2131, |
|
"eval_samples_per_second": 5.222, |
|
"eval_steps_per_second": 2.611, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 40.40816326530612, |
|
"grad_norm": 3.7874374389648438, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.0882, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 40.816326530612244, |
|
"grad_norm": 0.011240988969802856, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1117, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.97959183673469, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.4715521335601807, |
|
"eval_runtime": 3.7805, |
|
"eval_samples_per_second": 5.819, |
|
"eval_steps_per_second": 2.91, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 41.224489795918366, |
|
"grad_norm": 30.458051681518555, |
|
"learning_rate": 8.796296296296297e-06, |
|
"loss": 0.0819, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 41.63265306122449, |
|
"grad_norm": 0.2596540153026581, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0466, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.325749397277832, |
|
"eval_runtime": 4.0793, |
|
"eval_samples_per_second": 5.393, |
|
"eval_steps_per_second": 2.697, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 42.04081632653061, |
|
"grad_norm": 0.8128257393836975, |
|
"learning_rate": 7.87037037037037e-06, |
|
"loss": 0.2931, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 42.44897959183673, |
|
"grad_norm": 1.8079004287719727, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0667, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 13.098880767822266, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1349, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 42.97959183673469, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 2.5544846057891846, |
|
"eval_runtime": 3.7821, |
|
"eval_samples_per_second": 5.817, |
|
"eval_steps_per_second": 2.908, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 43.265306122448976, |
|
"grad_norm": 0.03145942464470863, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.0184, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 43.673469387755105, |
|
"grad_norm": 0.008914113976061344, |
|
"learning_rate": 6.0185185185185185e-06, |
|
"loss": 0.0069, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.5814802646636963, |
|
"eval_runtime": 4.2458, |
|
"eval_samples_per_second": 5.182, |
|
"eval_steps_per_second": 2.591, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 44.08163265306123, |
|
"grad_norm": 0.0782785564661026, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1017, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 44.48979591836735, |
|
"grad_norm": 0.24598410725593567, |
|
"learning_rate": 5.092592592592592e-06, |
|
"loss": 0.0344, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 44.89795918367347, |
|
"grad_norm": 0.017518645152449608, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.0468, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 44.97959183673469, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.3666675090789795, |
|
"eval_runtime": 3.9709, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 2.77, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 45.30612244897959, |
|
"grad_norm": 3.6144909858703613, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.0372, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 45.714285714285715, |
|
"grad_norm": 0.018579425290226936, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1807, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.472862482070923, |
|
"eval_runtime": 4.5348, |
|
"eval_samples_per_second": 4.851, |
|
"eval_steps_per_second": 2.426, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 46.12244897959184, |
|
"grad_norm": 6.9941864013671875, |
|
"learning_rate": 3.2407407407407406e-06, |
|
"loss": 0.0091, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 46.53061224489796, |
|
"grad_norm": 1.61014986038208, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0388, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 46.93877551020408, |
|
"grad_norm": 0.021990543231368065, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.0667, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 46.97959183673469, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.496938467025757, |
|
"eval_runtime": 3.9012, |
|
"eval_samples_per_second": 5.639, |
|
"eval_steps_per_second": 2.82, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 47.3469387755102, |
|
"grad_norm": 0.5512164235115051, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0191, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 47.755102040816325, |
|
"grad_norm": 0.2976542115211487, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.0199, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.5520730018615723, |
|
"eval_runtime": 4.4614, |
|
"eval_samples_per_second": 4.931, |
|
"eval_steps_per_second": 2.466, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 48.16326530612245, |
|
"grad_norm": 0.37567588686943054, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.0806, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 48.57142857142857, |
|
"grad_norm": 2.0820977687835693, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"loss": 0.1011, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"grad_norm": 0.018370352685451508, |
|
"learning_rate": 0.0, |
|
"loss": 0.2716, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 2.5371408462524414, |
|
"eval_runtime": 3.8373, |
|
"eval_samples_per_second": 5.733, |
|
"eval_steps_per_second": 2.867, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 48.97959183673469, |
|
"step": 1200, |
|
"total_flos": 2.374708462608384e+17, |
|
"train_loss": 0.41171714147552846, |
|
"train_runtime": 1608.8411, |
|
"train_samples_per_second": 6.06, |
|
"train_steps_per_second": 0.746 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.374708462608384e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|