|
{ |
|
"best_metric": 0.9034653465346535, |
|
"best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-171", |
|
"epoch": 19.649122807017545, |
|
"eval_steps": 500, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 6.915553569793701, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.2535, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 6.068195819854736, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.2536, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9824561403508771, |
|
"eval_accuracy": 0.8985148514851485, |
|
"eval_loss": 0.3073369264602661, |
|
"eval_runtime": 4.5045, |
|
"eval_samples_per_second": 89.687, |
|
"eval_steps_per_second": 2.886, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 4.642627716064453, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 0.2506, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 5.193023681640625, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.2254, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 6.378445148468018, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.1702, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8811881188118812, |
|
"eval_loss": 0.32594048976898193, |
|
"eval_runtime": 4.1579, |
|
"eval_samples_per_second": 97.165, |
|
"eval_steps_per_second": 3.127, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 6.762551307678223, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 0.2028, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 9.539087295532227, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.166, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 5.8399810791015625, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.1528, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"eval_accuracy": 0.8836633663366337, |
|
"eval_loss": 0.3369888365268707, |
|
"eval_runtime": 4.4971, |
|
"eval_samples_per_second": 89.837, |
|
"eval_steps_per_second": 2.891, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 2.9166860580444336, |
|
"learning_rate": 4.662698412698413e-05, |
|
"loss": 0.1767, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 3.477200984954834, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 0.1722, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.8596491228070176, |
|
"grad_norm": 3.7534408569335938, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 0.157, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8811881188118812, |
|
"eval_loss": 0.3372877538204193, |
|
"eval_runtime": 4.1256, |
|
"eval_samples_per_second": 97.926, |
|
"eval_steps_per_second": 3.151, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 5.6130452156066895, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.1275, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.56140350877193, |
|
"grad_norm": 6.323825836181641, |
|
"learning_rate": 4.265873015873016e-05, |
|
"loss": 0.1678, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.912280701754386, |
|
"grad_norm": 8.424270629882812, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.1577, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.982456140350877, |
|
"eval_accuracy": 0.8861386138613861, |
|
"eval_loss": 0.3574221134185791, |
|
"eval_runtime": 4.1778, |
|
"eval_samples_per_second": 96.701, |
|
"eval_steps_per_second": 3.112, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 7.5651936531066895, |
|
"learning_rate": 4.067460317460318e-05, |
|
"loss": 0.1328, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.614035087719298, |
|
"grad_norm": 7.6120195388793945, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.1428, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.964912280701754, |
|
"grad_norm": 9.945347785949707, |
|
"learning_rate": 3.8690476190476195e-05, |
|
"loss": 0.1485, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9034653465346535, |
|
"eval_loss": 0.33881905674934387, |
|
"eval_runtime": 4.2693, |
|
"eval_samples_per_second": 94.629, |
|
"eval_steps_per_second": 3.045, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 5.8488969802856445, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 0.1419, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 5.49446964263916, |
|
"learning_rate": 3.6706349206349205e-05, |
|
"loss": 0.1715, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.982456140350877, |
|
"eval_accuracy": 0.8935643564356436, |
|
"eval_loss": 0.3339557945728302, |
|
"eval_runtime": 4.2324, |
|
"eval_samples_per_second": 95.453, |
|
"eval_steps_per_second": 3.072, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 7.122160911560059, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.2154, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.368421052631579, |
|
"grad_norm": 7.540256977081299, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.2314, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.719298245614035, |
|
"grad_norm": 6.774203777313232, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 0.2079, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8960396039603961, |
|
"eval_loss": 0.3254563808441162, |
|
"eval_runtime": 4.306, |
|
"eval_samples_per_second": 93.823, |
|
"eval_steps_per_second": 3.019, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 8.070175438596491, |
|
"grad_norm": 8.224860191345215, |
|
"learning_rate": 3.273809523809524e-05, |
|
"loss": 0.223, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.421052631578947, |
|
"grad_norm": 4.823063850402832, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.1889, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 6.559729099273682, |
|
"learning_rate": 3.075396825396826e-05, |
|
"loss": 0.1846, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.982456140350877, |
|
"eval_accuracy": 0.8985148514851485, |
|
"eval_loss": 0.33677831292152405, |
|
"eval_runtime": 4.1555, |
|
"eval_samples_per_second": 97.221, |
|
"eval_steps_per_second": 3.128, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 9.12280701754386, |
|
"grad_norm": 4.693236827850342, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 0.1898, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 9.473684210526315, |
|
"grad_norm": 7.4912261962890625, |
|
"learning_rate": 2.876984126984127e-05, |
|
"loss": 0.2097, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"grad_norm": 5.2636213302612305, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1829, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8985148514851485, |
|
"eval_loss": 0.317474901676178, |
|
"eval_runtime": 4.3916, |
|
"eval_samples_per_second": 91.994, |
|
"eval_steps_per_second": 2.96, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 10.175438596491228, |
|
"grad_norm": 4.9916558265686035, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 0.1684, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 6.921645641326904, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 0.1897, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.87719298245614, |
|
"grad_norm": 9.2024507522583, |
|
"learning_rate": 2.4801587301587305e-05, |
|
"loss": 0.1874, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 10.982456140350877, |
|
"eval_accuracy": 0.8910891089108911, |
|
"eval_loss": 0.3384435176849365, |
|
"eval_runtime": 4.1888, |
|
"eval_samples_per_second": 96.449, |
|
"eval_steps_per_second": 3.104, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 11.228070175438596, |
|
"grad_norm": 3.4831368923187256, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.1468, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 11.578947368421053, |
|
"grad_norm": 30.23830795288086, |
|
"learning_rate": 2.281746031746032e-05, |
|
"loss": 0.1723, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 11.929824561403509, |
|
"grad_norm": 6.060935974121094, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 0.1826, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8985148514851485, |
|
"eval_loss": 0.32836616039276123, |
|
"eval_runtime": 4.5565, |
|
"eval_samples_per_second": 88.664, |
|
"eval_steps_per_second": 2.853, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 12.280701754385966, |
|
"grad_norm": 8.074810981750488, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.1798, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.631578947368421, |
|
"grad_norm": 6.300619125366211, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.1516, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 12.982456140350877, |
|
"grad_norm": 8.272747993469238, |
|
"learning_rate": 1.884920634920635e-05, |
|
"loss": 0.1911, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 12.982456140350877, |
|
"eval_accuracy": 0.8910891089108911, |
|
"eval_loss": 0.3503906726837158, |
|
"eval_runtime": 4.1749, |
|
"eval_samples_per_second": 96.769, |
|
"eval_steps_per_second": 3.114, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 6.439492702484131, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 0.1755, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 13.68421052631579, |
|
"grad_norm": 10.013567924499512, |
|
"learning_rate": 1.6865079365079367e-05, |
|
"loss": 0.1552, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8935643564356436, |
|
"eval_loss": 0.31156373023986816, |
|
"eval_runtime": 4.4403, |
|
"eval_samples_per_second": 90.985, |
|
"eval_steps_per_second": 2.928, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 14.035087719298245, |
|
"grad_norm": 4.990965843200684, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.1737, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.385964912280702, |
|
"grad_norm": 5.88447380065918, |
|
"learning_rate": 1.4880952380952381e-05, |
|
"loss": 0.1589, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 14.736842105263158, |
|
"grad_norm": 7.048079490661621, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.1855, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 14.982456140350877, |
|
"eval_accuracy": 0.8960396039603961, |
|
"eval_loss": 0.3131696581840515, |
|
"eval_runtime": 4.1515, |
|
"eval_samples_per_second": 97.315, |
|
"eval_steps_per_second": 3.131, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 15.087719298245615, |
|
"grad_norm": 5.223410129547119, |
|
"learning_rate": 1.2896825396825398e-05, |
|
"loss": 0.1491, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 15.43859649122807, |
|
"grad_norm": 5.402275562286377, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.1627, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 15.789473684210526, |
|
"grad_norm": 8.536218643188477, |
|
"learning_rate": 1.0912698412698414e-05, |
|
"loss": 0.1537, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.900990099009901, |
|
"eval_loss": 0.30002227425575256, |
|
"eval_runtime": 4.5205, |
|
"eval_samples_per_second": 89.37, |
|
"eval_steps_per_second": 2.876, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 16.140350877192983, |
|
"grad_norm": 6.947802543640137, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 0.1511, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 16.49122807017544, |
|
"grad_norm": 2.9147555828094482, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 0.1232, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 16.842105263157894, |
|
"grad_norm": 4.058919906616211, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.118, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 16.982456140350877, |
|
"eval_accuracy": 0.8985148514851485, |
|
"eval_loss": 0.326092928647995, |
|
"eval_runtime": 4.138, |
|
"eval_samples_per_second": 97.632, |
|
"eval_steps_per_second": 3.142, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 17.19298245614035, |
|
"grad_norm": 8.858957290649414, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.1479, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 17.54385964912281, |
|
"grad_norm": 6.7740373611450195, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 0.1512, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.894736842105264, |
|
"grad_norm": 5.1415534019470215, |
|
"learning_rate": 4.96031746031746e-06, |
|
"loss": 0.1483, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8960396039603961, |
|
"eval_loss": 0.31427323818206787, |
|
"eval_runtime": 4.8063, |
|
"eval_samples_per_second": 84.056, |
|
"eval_steps_per_second": 2.705, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 18.24561403508772, |
|
"grad_norm": 5.084235668182373, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.1392, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 18.596491228070175, |
|
"grad_norm": 4.290576934814453, |
|
"learning_rate": 2.9761904761904763e-06, |
|
"loss": 0.1191, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 18.94736842105263, |
|
"grad_norm": 5.261678218841553, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 0.1169, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 18.982456140350877, |
|
"eval_accuracy": 0.9034653465346535, |
|
"eval_loss": 0.3101979196071625, |
|
"eval_runtime": 4.2151, |
|
"eval_samples_per_second": 95.846, |
|
"eval_steps_per_second": 3.084, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 19.29824561403509, |
|
"grad_norm": 5.632396221160889, |
|
"learning_rate": 9.92063492063492e-07, |
|
"loss": 0.1521, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 19.649122807017545, |
|
"grad_norm": 6.291007995605469, |
|
"learning_rate": 0.0, |
|
"loss": 0.1268, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 19.649122807017545, |
|
"eval_accuracy": 0.8985148514851485, |
|
"eval_loss": 0.3104223906993866, |
|
"eval_runtime": 4.1976, |
|
"eval_samples_per_second": 96.244, |
|
"eval_steps_per_second": 3.097, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 19.649122807017545, |
|
"step": 560, |
|
"total_flos": 1.7752639958663823e+18, |
|
"train_loss": 0.17112446086747304, |
|
"train_runtime": 1440.5794, |
|
"train_samples_per_second": 50.452, |
|
"train_steps_per_second": 0.389 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 560, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.7752639958663823e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|