|
{ |
|
"best_metric": 0.811172604560852, |
|
"best_model_checkpoint": "/media/mldrive/kcardenas/limb_classification_person_crop/beit-large-patch16-384/4_7.5e-5_5e-3_0.1/checkpoint-3843", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 4270, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0585480093676815, |
|
"grad_norm": 1740110.625, |
|
"learning_rate": 3.7499999999999997e-06, |
|
"loss": 1.5859, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.117096018735363, |
|
"grad_norm": 1590622.875, |
|
"learning_rate": 7.499999999999999e-06, |
|
"loss": 1.5379, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1756440281030445, |
|
"grad_norm": 751530.75, |
|
"learning_rate": 1.1249999999999999e-05, |
|
"loss": 1.4594, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.234192037470726, |
|
"grad_norm": 514042.71875, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 1.4151, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2927400468384075, |
|
"grad_norm": 732141.0, |
|
"learning_rate": 1.875e-05, |
|
"loss": 1.3797, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.351288056206089, |
|
"grad_norm": 797659.1875, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 1.3951, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4098360655737705, |
|
"grad_norm": 665249.0625, |
|
"learning_rate": 2.6249999999999998e-05, |
|
"loss": 1.4141, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.468384074941452, |
|
"grad_norm": 531666.3125, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.3914, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5269320843091335, |
|
"grad_norm": 361343.0625, |
|
"learning_rate": 3.375e-05, |
|
"loss": 1.4053, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.585480093676815, |
|
"grad_norm": 353420.25, |
|
"learning_rate": 3.75e-05, |
|
"loss": 1.381, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6440281030444965, |
|
"grad_norm": 112823.484375, |
|
"learning_rate": 4.125e-05, |
|
"loss": 1.4151, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.702576112412178, |
|
"grad_norm": 224759.484375, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 1.368, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7611241217798594, |
|
"grad_norm": 506533.09375, |
|
"learning_rate": 4.875e-05, |
|
"loss": 1.3292, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.819672131147541, |
|
"grad_norm": 541720.6875, |
|
"learning_rate": 5.2499999999999995e-05, |
|
"loss": 1.3662, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8782201405152225, |
|
"grad_norm": 185037.796875, |
|
"learning_rate": 5.625e-05, |
|
"loss": 1.373, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.936768149882904, |
|
"grad_norm": 610310.125, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.2948, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9953161592505855, |
|
"grad_norm": 212896.328125, |
|
"learning_rate": 6.374999999999999e-05, |
|
"loss": 1.3391, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3963515754560531, |
|
"eval_loss": 1.3011518716812134, |
|
"eval_runtime": 17.4109, |
|
"eval_samples_per_second": 69.267, |
|
"eval_steps_per_second": 4.365, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.053864168618267, |
|
"grad_norm": 471487.15625, |
|
"learning_rate": 6.75e-05, |
|
"loss": 1.3088, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1124121779859484, |
|
"grad_norm": 444536.53125, |
|
"learning_rate": 7.125e-05, |
|
"loss": 1.2885, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.17096018735363, |
|
"grad_norm": 201403.28125, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3382, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2295081967213115, |
|
"grad_norm": 197585.203125, |
|
"learning_rate": 7.45026525198939e-05, |
|
"loss": 1.2741, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.288056206088993, |
|
"grad_norm": 210764.71875, |
|
"learning_rate": 7.400530503978779e-05, |
|
"loss": 1.2963, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3466042154566744, |
|
"grad_norm": 355732.40625, |
|
"learning_rate": 7.350795755968169e-05, |
|
"loss": 1.2772, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.405152224824356, |
|
"grad_norm": 250851.15625, |
|
"learning_rate": 7.301061007957558e-05, |
|
"loss": 1.2881, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4637002341920375, |
|
"grad_norm": 192076.140625, |
|
"learning_rate": 7.251326259946949e-05, |
|
"loss": 1.2894, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.5222482435597189, |
|
"grad_norm": 514407.875, |
|
"learning_rate": 7.201591511936338e-05, |
|
"loss": 1.2762, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5807962529274004, |
|
"grad_norm": 221643.765625, |
|
"learning_rate": 7.151856763925728e-05, |
|
"loss": 1.2343, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 209969.375, |
|
"learning_rate": 7.102122015915119e-05, |
|
"loss": 1.2508, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.6978922716627634, |
|
"grad_norm": 305884.65625, |
|
"learning_rate": 7.052387267904509e-05, |
|
"loss": 1.2753, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.756440281030445, |
|
"grad_norm": 248356.125, |
|
"learning_rate": 7.002652519893898e-05, |
|
"loss": 1.241, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.8149882903981265, |
|
"grad_norm": 290317.125, |
|
"learning_rate": 6.952917771883289e-05, |
|
"loss": 1.1727, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.8735362997658078, |
|
"grad_norm": 292209.90625, |
|
"learning_rate": 6.903183023872679e-05, |
|
"loss": 1.1803, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9320843091334896, |
|
"grad_norm": 379956.96875, |
|
"learning_rate": 6.853448275862068e-05, |
|
"loss": 1.219, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.990632318501171, |
|
"grad_norm": 473929.34375, |
|
"learning_rate": 6.803713527851459e-05, |
|
"loss": 1.1728, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5613598673300166, |
|
"eval_loss": 1.0953155755996704, |
|
"eval_runtime": 16.2998, |
|
"eval_samples_per_second": 73.989, |
|
"eval_steps_per_second": 4.663, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.0491803278688523, |
|
"grad_norm": 373919.96875, |
|
"learning_rate": 6.753978779840849e-05, |
|
"loss": 1.2129, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.107728337236534, |
|
"grad_norm": 232941.75, |
|
"learning_rate": 6.704244031830238e-05, |
|
"loss": 1.1575, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1662763466042154, |
|
"grad_norm": 274719.65625, |
|
"learning_rate": 6.654509283819629e-05, |
|
"loss": 1.2407, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.2248243559718968, |
|
"grad_norm": 238487.640625, |
|
"learning_rate": 6.604774535809018e-05, |
|
"loss": 1.1643, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2833723653395785, |
|
"grad_norm": 416771.28125, |
|
"learning_rate": 6.555039787798408e-05, |
|
"loss": 1.164, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.34192037470726, |
|
"grad_norm": 333086.4375, |
|
"learning_rate": 6.505305039787798e-05, |
|
"loss": 1.2429, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.4004683840749417, |
|
"grad_norm": 286217.46875, |
|
"learning_rate": 6.455570291777188e-05, |
|
"loss": 1.2015, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.459016393442623, |
|
"grad_norm": 560869.25, |
|
"learning_rate": 6.405835543766578e-05, |
|
"loss": 1.1366, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.5175644028103044, |
|
"grad_norm": 381247.375, |
|
"learning_rate": 6.356100795755967e-05, |
|
"loss": 1.1934, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.576112412177986, |
|
"grad_norm": 338792.46875, |
|
"learning_rate": 6.306366047745357e-05, |
|
"loss": 1.1613, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.6346604215456675, |
|
"grad_norm": 193875.453125, |
|
"learning_rate": 6.256631299734748e-05, |
|
"loss": 1.2336, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.693208430913349, |
|
"grad_norm": 545943.5625, |
|
"learning_rate": 6.206896551724137e-05, |
|
"loss": 1.1853, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.7517564402810306, |
|
"grad_norm": 297349.4375, |
|
"learning_rate": 6.157161803713527e-05, |
|
"loss": 1.169, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.810304449648712, |
|
"grad_norm": 328393.5, |
|
"learning_rate": 6.107427055702916e-05, |
|
"loss": 1.1806, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.8688524590163933, |
|
"grad_norm": 1007124.0625, |
|
"learning_rate": 6.0576923076923076e-05, |
|
"loss": 1.1695, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.927400468384075, |
|
"grad_norm": 297399.3125, |
|
"learning_rate": 6.0079575596816967e-05, |
|
"loss": 1.163, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.9859484777517564, |
|
"grad_norm": 289016.09375, |
|
"learning_rate": 5.958222811671087e-05, |
|
"loss": 1.1865, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6243781094527363, |
|
"eval_loss": 0.9964272379875183, |
|
"eval_runtime": 17.0213, |
|
"eval_samples_per_second": 70.852, |
|
"eval_steps_per_second": 4.465, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 3.0444964871194378, |
|
"grad_norm": 159062.4375, |
|
"learning_rate": 5.9084880636604775e-05, |
|
"loss": 1.1395, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.1030444964871196, |
|
"grad_norm": 234732.515625, |
|
"learning_rate": 5.8587533156498666e-05, |
|
"loss": 1.1499, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 3.161592505854801, |
|
"grad_norm": 196042.09375, |
|
"learning_rate": 5.809018567639257e-05, |
|
"loss": 1.0882, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.2201405152224822, |
|
"grad_norm": 138379.390625, |
|
"learning_rate": 5.759283819628647e-05, |
|
"loss": 1.1631, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 3.278688524590164, |
|
"grad_norm": 185450.734375, |
|
"learning_rate": 5.7095490716180365e-05, |
|
"loss": 1.1566, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.3372365339578454, |
|
"grad_norm": 373123.96875, |
|
"learning_rate": 5.659814323607426e-05, |
|
"loss": 1.1316, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 3.3957845433255267, |
|
"grad_norm": 266897.21875, |
|
"learning_rate": 5.610079575596817e-05, |
|
"loss": 1.1585, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.4543325526932085, |
|
"grad_norm": 273406.1875, |
|
"learning_rate": 5.5603448275862065e-05, |
|
"loss": 1.1306, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 3.51288056206089, |
|
"grad_norm": 250563.671875, |
|
"learning_rate": 5.510610079575596e-05, |
|
"loss": 1.1024, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 269341.34375, |
|
"learning_rate": 5.4608753315649866e-05, |
|
"loss": 1.128, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 3.629976580796253, |
|
"grad_norm": 201319.734375, |
|
"learning_rate": 5.411140583554376e-05, |
|
"loss": 1.119, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.6885245901639343, |
|
"grad_norm": 210587.78125, |
|
"learning_rate": 5.361405835543766e-05, |
|
"loss": 1.1127, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 3.747072599531616, |
|
"grad_norm": 275135.53125, |
|
"learning_rate": 5.3116710875331566e-05, |
|
"loss": 1.146, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.8056206088992974, |
|
"grad_norm": 425411.6875, |
|
"learning_rate": 5.2619363395225456e-05, |
|
"loss": 1.0964, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 3.8641686182669788, |
|
"grad_norm": 215899.6875, |
|
"learning_rate": 5.212201591511936e-05, |
|
"loss": 1.1322, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.9227166276346606, |
|
"grad_norm": 432225.5625, |
|
"learning_rate": 5.162466843501326e-05, |
|
"loss": 1.0799, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 3.981264637002342, |
|
"grad_norm": 257739.796875, |
|
"learning_rate": 5.1127320954907156e-05, |
|
"loss": 1.1112, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6807628524046434, |
|
"eval_loss": 0.9268983006477356, |
|
"eval_runtime": 16.1942, |
|
"eval_samples_per_second": 74.471, |
|
"eval_steps_per_second": 4.693, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 4.039812646370024, |
|
"grad_norm": 165557.71875, |
|
"learning_rate": 5.062997347480105e-05, |
|
"loss": 1.1045, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 4.098360655737705, |
|
"grad_norm": 242673.453125, |
|
"learning_rate": 5.013262599469496e-05, |
|
"loss": 1.1158, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.156908665105386, |
|
"grad_norm": 238596.03125, |
|
"learning_rate": 4.9635278514588855e-05, |
|
"loss": 1.0793, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 4.215456674473068, |
|
"grad_norm": 299998.21875, |
|
"learning_rate": 4.913793103448275e-05, |
|
"loss": 1.0823, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.274004683840749, |
|
"grad_norm": 426185.8125, |
|
"learning_rate": 4.864058355437666e-05, |
|
"loss": 1.0567, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 4.332552693208431, |
|
"grad_norm": 298885.8125, |
|
"learning_rate": 4.814323607427055e-05, |
|
"loss": 1.0354, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 4.391100702576113, |
|
"grad_norm": 264261.46875, |
|
"learning_rate": 4.764588859416445e-05, |
|
"loss": 1.0675, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 4.4496487119437935, |
|
"grad_norm": 290900.5625, |
|
"learning_rate": 4.7148541114058356e-05, |
|
"loss": 1.0601, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.508196721311475, |
|
"grad_norm": 244591.890625, |
|
"learning_rate": 4.665119363395225e-05, |
|
"loss": 1.0654, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 4.566744730679157, |
|
"grad_norm": 374488.9375, |
|
"learning_rate": 4.615384615384615e-05, |
|
"loss": 1.0583, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 4.625292740046838, |
|
"grad_norm": 306982.75, |
|
"learning_rate": 4.565649867374005e-05, |
|
"loss": 1.0611, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 4.68384074941452, |
|
"grad_norm": 290747.125, |
|
"learning_rate": 4.5159151193633946e-05, |
|
"loss": 1.0348, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.742388758782202, |
|
"grad_norm": 321678.875, |
|
"learning_rate": 4.4661803713527844e-05, |
|
"loss": 1.0607, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 4.800936768149883, |
|
"grad_norm": 256100.3125, |
|
"learning_rate": 4.416445623342175e-05, |
|
"loss": 1.079, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 4.859484777517564, |
|
"grad_norm": 220711.328125, |
|
"learning_rate": 4.366710875331565e-05, |
|
"loss": 1.0379, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 4.918032786885246, |
|
"grad_norm": 368012.65625, |
|
"learning_rate": 4.316976127320954e-05, |
|
"loss": 1.0855, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.976580796252927, |
|
"grad_norm": 228352.640625, |
|
"learning_rate": 4.267241379310345e-05, |
|
"loss": 1.0695, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7006633499170812, |
|
"eval_loss": 0.8837149739265442, |
|
"eval_runtime": 16.6116, |
|
"eval_samples_per_second": 72.6, |
|
"eval_steps_per_second": 4.575, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 5.035128805620609, |
|
"grad_norm": 299516.875, |
|
"learning_rate": 4.217506631299734e-05, |
|
"loss": 1.0534, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 5.0936768149882905, |
|
"grad_norm": 278051.8125, |
|
"learning_rate": 4.167771883289124e-05, |
|
"loss": 1.0196, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 5.152224824355972, |
|
"grad_norm": 250583.34375, |
|
"learning_rate": 4.118037135278515e-05, |
|
"loss": 1.0269, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.210772833723653, |
|
"grad_norm": 263307.125, |
|
"learning_rate": 4.068302387267904e-05, |
|
"loss": 1.0053, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 5.269320843091335, |
|
"grad_norm": 315005.5, |
|
"learning_rate": 4.018567639257294e-05, |
|
"loss": 1.0084, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 5.327868852459017, |
|
"grad_norm": 283217.6875, |
|
"learning_rate": 3.968832891246684e-05, |
|
"loss": 1.0222, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 5.386416861826698, |
|
"grad_norm": 187646.140625, |
|
"learning_rate": 3.9190981432360744e-05, |
|
"loss": 1.0519, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 5.444964871194379, |
|
"grad_norm": 359555.5625, |
|
"learning_rate": 3.8693633952254635e-05, |
|
"loss": 0.9909, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 5.503512880562061, |
|
"grad_norm": 162633.640625, |
|
"learning_rate": 3.819628647214854e-05, |
|
"loss": 1.0314, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 5.562060889929742, |
|
"grad_norm": 297492.15625, |
|
"learning_rate": 3.769893899204244e-05, |
|
"loss": 1.0072, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 5.620608899297424, |
|
"grad_norm": 348473.5625, |
|
"learning_rate": 3.7201591511936334e-05, |
|
"loss": 1.0567, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 5.679156908665106, |
|
"grad_norm": 211456.953125, |
|
"learning_rate": 3.670424403183023e-05, |
|
"loss": 1.0665, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 5.737704918032787, |
|
"grad_norm": 226591.765625, |
|
"learning_rate": 3.6206896551724136e-05, |
|
"loss": 1.0109, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 5.796252927400468, |
|
"grad_norm": 204899.03125, |
|
"learning_rate": 3.570954907161803e-05, |
|
"loss": 1.0153, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 5.85480093676815, |
|
"grad_norm": 327535.0625, |
|
"learning_rate": 3.521220159151193e-05, |
|
"loss": 0.958, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.913348946135831, |
|
"grad_norm": 255086.546875, |
|
"learning_rate": 3.4714854111405835e-05, |
|
"loss": 0.9752, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 5.971896955503513, |
|
"grad_norm": 669239.1875, |
|
"learning_rate": 3.421750663129973e-05, |
|
"loss": 1.0618, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7305140961857379, |
|
"eval_loss": 0.8650264739990234, |
|
"eval_runtime": 17.099, |
|
"eval_samples_per_second": 70.53, |
|
"eval_steps_per_second": 4.445, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 6.030444964871195, |
|
"grad_norm": 375855.8125, |
|
"learning_rate": 3.372015915119363e-05, |
|
"loss": 0.9571, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 6.0889929742388755, |
|
"grad_norm": 235724.953125, |
|
"learning_rate": 3.3222811671087534e-05, |
|
"loss": 0.9994, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.147540983606557, |
|
"grad_norm": 152826.78125, |
|
"learning_rate": 3.272546419098143e-05, |
|
"loss": 0.9421, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 6.206088992974239, |
|
"grad_norm": 251024.53125, |
|
"learning_rate": 3.222811671087533e-05, |
|
"loss": 0.9617, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 6.26463700234192, |
|
"grad_norm": 204976.96875, |
|
"learning_rate": 3.173076923076923e-05, |
|
"loss": 0.9922, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 6.323185011709602, |
|
"grad_norm": 237483.46875, |
|
"learning_rate": 3.1233421750663124e-05, |
|
"loss": 0.9958, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 6.381733021077284, |
|
"grad_norm": 342116.09375, |
|
"learning_rate": 3.073607427055702e-05, |
|
"loss": 0.909, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 6.4402810304449645, |
|
"grad_norm": 360569.84375, |
|
"learning_rate": 3.0238726790450926e-05, |
|
"loss": 0.9662, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 6.498829039812646, |
|
"grad_norm": 226234.515625, |
|
"learning_rate": 2.9741379310344827e-05, |
|
"loss": 0.9662, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 6.557377049180328, |
|
"grad_norm": 332145.3125, |
|
"learning_rate": 2.9244031830238725e-05, |
|
"loss": 0.9835, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 6.61592505854801, |
|
"grad_norm": 265223.03125, |
|
"learning_rate": 2.8746684350132622e-05, |
|
"loss": 0.9827, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 6.674473067915691, |
|
"grad_norm": 622789.5, |
|
"learning_rate": 2.8249336870026523e-05, |
|
"loss": 0.9694, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 6.7330210772833725, |
|
"grad_norm": 216138.46875, |
|
"learning_rate": 2.775198938992042e-05, |
|
"loss": 0.9772, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 6.791569086651053, |
|
"grad_norm": 287519.5625, |
|
"learning_rate": 2.725464190981432e-05, |
|
"loss": 0.9552, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 6.850117096018735, |
|
"grad_norm": 356344.1875, |
|
"learning_rate": 2.6757294429708222e-05, |
|
"loss": 0.9777, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 6.908665105386417, |
|
"grad_norm": 240447.515625, |
|
"learning_rate": 2.625994694960212e-05, |
|
"loss": 0.9473, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 6.967213114754099, |
|
"grad_norm": 268753.3125, |
|
"learning_rate": 2.5762599469496018e-05, |
|
"loss": 0.978, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7388059701492538, |
|
"eval_loss": 0.8298783302307129, |
|
"eval_runtime": 17.0775, |
|
"eval_samples_per_second": 70.619, |
|
"eval_steps_per_second": 4.45, |
|
"step": 2989 |
|
}, |
|
{ |
|
"epoch": 7.02576112412178, |
|
"grad_norm": 146248.125, |
|
"learning_rate": 2.526525198938992e-05, |
|
"loss": 0.92, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.0843091334894615, |
|
"grad_norm": 447062.75, |
|
"learning_rate": 2.4767904509283816e-05, |
|
"loss": 0.8693, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 442032.96875, |
|
"learning_rate": 2.427055702917772e-05, |
|
"loss": 0.9494, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 7.201405152224824, |
|
"grad_norm": 358304.59375, |
|
"learning_rate": 2.3773209549071618e-05, |
|
"loss": 0.9677, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 7.259953161592506, |
|
"grad_norm": 344184.375, |
|
"learning_rate": 2.3275862068965515e-05, |
|
"loss": 0.9526, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 7.318501170960188, |
|
"grad_norm": 299834.84375, |
|
"learning_rate": 2.2778514588859413e-05, |
|
"loss": 0.9318, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 7.377049180327869, |
|
"grad_norm": 471871.3125, |
|
"learning_rate": 2.2281167108753314e-05, |
|
"loss": 0.8812, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 7.43559718969555, |
|
"grad_norm": 343672.65625, |
|
"learning_rate": 2.178381962864721e-05, |
|
"loss": 0.9122, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 7.494145199063232, |
|
"grad_norm": 292307.5, |
|
"learning_rate": 2.128647214854111e-05, |
|
"loss": 0.891, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 7.552693208430913, |
|
"grad_norm": 369647.6875, |
|
"learning_rate": 2.0789124668435013e-05, |
|
"loss": 0.9503, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 7.611241217798595, |
|
"grad_norm": 267383.53125, |
|
"learning_rate": 2.029177718832891e-05, |
|
"loss": 0.921, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 7.669789227166277, |
|
"grad_norm": 204631.796875, |
|
"learning_rate": 1.979442970822281e-05, |
|
"loss": 0.9479, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 7.7283372365339575, |
|
"grad_norm": 463962.78125, |
|
"learning_rate": 1.929708222811671e-05, |
|
"loss": 0.9359, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 7.786885245901639, |
|
"grad_norm": 269463.9375, |
|
"learning_rate": 1.8799734748010607e-05, |
|
"loss": 0.9056, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 7.845433255269321, |
|
"grad_norm": 268737.5625, |
|
"learning_rate": 1.8302387267904507e-05, |
|
"loss": 0.8957, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 7.903981264637002, |
|
"grad_norm": 223022.0, |
|
"learning_rate": 1.7805039787798405e-05, |
|
"loss": 0.9156, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 7.962529274004684, |
|
"grad_norm": 374429.5, |
|
"learning_rate": 1.7307692307692306e-05, |
|
"loss": 0.9225, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.746268656716418, |
|
"eval_loss": 0.8513649106025696, |
|
"eval_runtime": 17.1518, |
|
"eval_samples_per_second": 70.313, |
|
"eval_steps_per_second": 4.431, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 8.021077283372366, |
|
"grad_norm": 350031.15625, |
|
"learning_rate": 1.6810344827586207e-05, |
|
"loss": 0.897, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 8.079625292740047, |
|
"grad_norm": 408048.34375, |
|
"learning_rate": 1.6312997347480104e-05, |
|
"loss": 0.8868, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 8.13817330210773, |
|
"grad_norm": 246055.984375, |
|
"learning_rate": 1.5815649867374005e-05, |
|
"loss": 0.8633, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 8.19672131147541, |
|
"grad_norm": 299428.96875, |
|
"learning_rate": 1.5318302387267903e-05, |
|
"loss": 0.8216, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.255269320843091, |
|
"grad_norm": 327893.78125, |
|
"learning_rate": 1.4820954907161802e-05, |
|
"loss": 0.9055, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 8.313817330210773, |
|
"grad_norm": 304376.03125, |
|
"learning_rate": 1.4323607427055701e-05, |
|
"loss": 0.8963, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 8.372365339578455, |
|
"grad_norm": 353551.28125, |
|
"learning_rate": 1.3826259946949602e-05, |
|
"loss": 0.8697, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 8.430913348946136, |
|
"grad_norm": 269133.59375, |
|
"learning_rate": 1.33289124668435e-05, |
|
"loss": 0.8524, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 8.489461358313818, |
|
"grad_norm": 285575.21875, |
|
"learning_rate": 1.2831564986737399e-05, |
|
"loss": 0.8774, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 8.548009367681498, |
|
"grad_norm": 308519.40625, |
|
"learning_rate": 1.23342175066313e-05, |
|
"loss": 0.9149, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 8.60655737704918, |
|
"grad_norm": 315740.9375, |
|
"learning_rate": 1.1836870026525197e-05, |
|
"loss": 0.9201, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 8.665105386416862, |
|
"grad_norm": 276186.9375, |
|
"learning_rate": 1.1339522546419097e-05, |
|
"loss": 0.8658, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 8.723653395784543, |
|
"grad_norm": 265033.71875, |
|
"learning_rate": 1.0842175066312997e-05, |
|
"loss": 0.8278, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 8.782201405152225, |
|
"grad_norm": 267537.15625, |
|
"learning_rate": 1.0344827586206895e-05, |
|
"loss": 0.8673, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 8.840749414519907, |
|
"grad_norm": 284098.4375, |
|
"learning_rate": 9.847480106100794e-06, |
|
"loss": 0.882, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 8.899297423887587, |
|
"grad_norm": 378377.4375, |
|
"learning_rate": 9.350132625994693e-06, |
|
"loss": 0.9042, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 8.957845433255269, |
|
"grad_norm": 339395.0, |
|
"learning_rate": 8.852785145888593e-06, |
|
"loss": 0.8603, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7587064676616916, |
|
"eval_loss": 0.811172604560852, |
|
"eval_runtime": 16.4729, |
|
"eval_samples_per_second": 73.211, |
|
"eval_steps_per_second": 4.614, |
|
"step": 3843 |
|
}, |
|
{ |
|
"epoch": 9.01639344262295, |
|
"grad_norm": 264327.84375, |
|
"learning_rate": 8.355437665782494e-06, |
|
"loss": 0.8228, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 9.074941451990632, |
|
"grad_norm": 197204.109375, |
|
"learning_rate": 7.858090185676391e-06, |
|
"loss": 0.8017, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 9.133489461358314, |
|
"grad_norm": 274553.625, |
|
"learning_rate": 7.360742705570291e-06, |
|
"loss": 0.8603, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 9.192037470725996, |
|
"grad_norm": 303570.9375, |
|
"learning_rate": 6.86339522546419e-06, |
|
"loss": 0.8836, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 9.250585480093676, |
|
"grad_norm": 300032.3125, |
|
"learning_rate": 6.3660477453580895e-06, |
|
"loss": 0.8269, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 9.309133489461358, |
|
"grad_norm": 274956.25, |
|
"learning_rate": 5.868700265251989e-06, |
|
"loss": 0.8442, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 9.36768149882904, |
|
"grad_norm": 346913.625, |
|
"learning_rate": 5.371352785145889e-06, |
|
"loss": 0.8217, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.426229508196721, |
|
"grad_norm": 272469.28125, |
|
"learning_rate": 4.874005305039787e-06, |
|
"loss": 0.7651, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 9.484777517564403, |
|
"grad_norm": 278612.375, |
|
"learning_rate": 4.376657824933686e-06, |
|
"loss": 0.7951, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 9.543325526932085, |
|
"grad_norm": 290784.125, |
|
"learning_rate": 3.879310344827586e-06, |
|
"loss": 0.8942, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 9.601873536299767, |
|
"grad_norm": 234786.703125, |
|
"learning_rate": 3.381962864721485e-06, |
|
"loss": 0.8875, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 9.660421545667447, |
|
"grad_norm": 173186.40625, |
|
"learning_rate": 2.8846153846153845e-06, |
|
"loss": 0.8097, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 9.718969555035128, |
|
"grad_norm": 251768.390625, |
|
"learning_rate": 2.3872679045092837e-06, |
|
"loss": 0.9162, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 9.77751756440281, |
|
"grad_norm": 341676.25, |
|
"learning_rate": 1.8899204244031829e-06, |
|
"loss": 0.8171, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 9.836065573770492, |
|
"grad_norm": 303664.0, |
|
"learning_rate": 1.3925729442970821e-06, |
|
"loss": 0.8194, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 9.894613583138174, |
|
"grad_norm": 224924.5625, |
|
"learning_rate": 8.952254641909813e-07, |
|
"loss": 0.8319, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 9.953161592505854, |
|
"grad_norm": 348901.71875, |
|
"learning_rate": 3.978779840848806e-07, |
|
"loss": 0.8756, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7545605306799337, |
|
"eval_loss": 0.8203858733177185, |
|
"eval_runtime": 17.0537, |
|
"eval_samples_per_second": 70.718, |
|
"eval_steps_per_second": 4.456, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 4270, |
|
"total_flos": 5.510586115727032e+19, |
|
"train_loss": 1.0657175841320314, |
|
"train_runtime": 2342.3552, |
|
"train_samples_per_second": 29.159, |
|
"train_steps_per_second": 1.823 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 4270, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.510586115727032e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|