|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "beit-base-patch16-224-hasta-75-fold4/checkpoint-30", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.08333333333333333, |
|
"eval_loss": 2.3240678310394287, |
|
"eval_runtime": 0.1712, |
|
"eval_samples_per_second": 70.075, |
|
"eval_steps_per_second": 5.84, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.08333333333333333, |
|
"eval_loss": 1.8900293111801147, |
|
"eval_runtime": 0.1855, |
|
"eval_samples_per_second": 64.684, |
|
"eval_steps_per_second": 5.39, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3333333333333333, |
|
"eval_loss": 1.1748418807983398, |
|
"eval_runtime": 0.1814, |
|
"eval_samples_per_second": 66.134, |
|
"eval_steps_per_second": 5.511, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.5751119256019592, |
|
"eval_runtime": 0.1769, |
|
"eval_samples_per_second": 67.841, |
|
"eval_steps_per_second": 5.653, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.42243918776512146, |
|
"eval_runtime": 0.1835, |
|
"eval_samples_per_second": 65.396, |
|
"eval_steps_per_second": 5.45, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3931402266025543, |
|
"eval_runtime": 0.176, |
|
"eval_samples_per_second": 68.184, |
|
"eval_steps_per_second": 5.682, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.36350491642951965, |
|
"eval_runtime": 0.1804, |
|
"eval_samples_per_second": 66.514, |
|
"eval_steps_per_second": 5.543, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5636505484580994, |
|
"eval_runtime": 0.1786, |
|
"eval_samples_per_second": 67.173, |
|
"eval_steps_per_second": 5.598, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.4256010949611664, |
|
"eval_runtime": 0.1803, |
|
"eval_samples_per_second": 66.56, |
|
"eval_steps_per_second": 5.547, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.9971117973327637, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4996, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2829968333244324, |
|
"eval_runtime": 0.1808, |
|
"eval_samples_per_second": 66.372, |
|
"eval_steps_per_second": 5.531, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.27432435750961304, |
|
"eval_runtime": 0.1821, |
|
"eval_samples_per_second": 65.883, |
|
"eval_steps_per_second": 5.49, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.4504832327365875, |
|
"eval_runtime": 0.1832, |
|
"eval_samples_per_second": 65.49, |
|
"eval_steps_per_second": 5.458, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3551560342311859, |
|
"eval_runtime": 0.1799, |
|
"eval_samples_per_second": 66.701, |
|
"eval_steps_per_second": 5.558, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.24533440172672272, |
|
"eval_runtime": 0.1786, |
|
"eval_samples_per_second": 67.184, |
|
"eval_steps_per_second": 5.599, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.25282785296440125, |
|
"eval_runtime": 0.1754, |
|
"eval_samples_per_second": 68.41, |
|
"eval_steps_per_second": 5.701, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.292589396238327, |
|
"eval_runtime": 0.1736, |
|
"eval_samples_per_second": 69.116, |
|
"eval_steps_per_second": 5.76, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.3253059685230255, |
|
"eval_runtime": 0.1798, |
|
"eval_samples_per_second": 66.746, |
|
"eval_steps_per_second": 5.562, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.33665966987609863, |
|
"eval_runtime": 0.1802, |
|
"eval_samples_per_second": 66.592, |
|
"eval_steps_per_second": 5.549, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.3681429326534271, |
|
"eval_runtime": 0.1762, |
|
"eval_samples_per_second": 68.114, |
|
"eval_steps_per_second": 5.676, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.239417314529419, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.1796, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.26769375801086426, |
|
"eval_runtime": 0.1824, |
|
"eval_samples_per_second": 65.804, |
|
"eval_steps_per_second": 5.484, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2703898251056671, |
|
"eval_runtime": 0.1838, |
|
"eval_samples_per_second": 65.272, |
|
"eval_steps_per_second": 5.439, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3115863502025604, |
|
"eval_runtime": 0.1833, |
|
"eval_samples_per_second": 65.477, |
|
"eval_steps_per_second": 5.456, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.36495253443717957, |
|
"eval_runtime": 0.179, |
|
"eval_samples_per_second": 67.023, |
|
"eval_steps_per_second": 5.585, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.21704982221126556, |
|
"eval_runtime": 0.1764, |
|
"eval_samples_per_second": 68.023, |
|
"eval_steps_per_second": 5.669, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.21135270595550537, |
|
"eval_runtime": 0.1851, |
|
"eval_samples_per_second": 64.832, |
|
"eval_steps_per_second": 5.403, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1976069211959839, |
|
"eval_runtime": 0.1808, |
|
"eval_samples_per_second": 66.38, |
|
"eval_steps_per_second": 5.532, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.16190174221992493, |
|
"eval_runtime": 0.1803, |
|
"eval_samples_per_second": 66.549, |
|
"eval_steps_per_second": 5.546, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1646103709936142, |
|
"eval_runtime": 0.1783, |
|
"eval_samples_per_second": 67.31, |
|
"eval_steps_per_second": 5.609, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1431630551815033, |
|
"eval_runtime": 0.1822, |
|
"eval_samples_per_second": 65.851, |
|
"eval_steps_per_second": 5.488, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.7249929904937744, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1179, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.08116643875837326, |
|
"eval_runtime": 0.1826, |
|
"eval_samples_per_second": 65.716, |
|
"eval_steps_per_second": 5.476, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.11628057807683945, |
|
"eval_runtime": 0.1856, |
|
"eval_samples_per_second": 64.665, |
|
"eval_steps_per_second": 5.389, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.08982894569635391, |
|
"eval_runtime": 0.1763, |
|
"eval_samples_per_second": 68.058, |
|
"eval_steps_per_second": 5.672, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.11901328712701797, |
|
"eval_runtime": 0.1805, |
|
"eval_samples_per_second": 66.477, |
|
"eval_steps_per_second": 5.54, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.14636912941932678, |
|
"eval_runtime": 0.1814, |
|
"eval_samples_per_second": 66.154, |
|
"eval_steps_per_second": 5.513, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.11360140889883041, |
|
"eval_runtime": 0.1941, |
|
"eval_samples_per_second": 61.837, |
|
"eval_steps_per_second": 5.153, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2270035743713379, |
|
"eval_runtime": 0.1821, |
|
"eval_samples_per_second": 65.904, |
|
"eval_steps_per_second": 5.492, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.22648875415325165, |
|
"eval_runtime": 0.1802, |
|
"eval_samples_per_second": 66.606, |
|
"eval_steps_per_second": 5.551, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.09951093047857285, |
|
"eval_runtime": 0.1797, |
|
"eval_samples_per_second": 66.76, |
|
"eval_steps_per_second": 5.563, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.08532660454511642, |
|
"eval_runtime": 0.1843, |
|
"eval_samples_per_second": 65.1, |
|
"eval_steps_per_second": 5.425, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 3.2059624195098877, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1084, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.08578302711248398, |
|
"eval_runtime": 0.1756, |
|
"eval_samples_per_second": 68.341, |
|
"eval_steps_per_second": 5.695, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.09704094380140305, |
|
"eval_runtime": 0.1802, |
|
"eval_samples_per_second": 66.602, |
|
"eval_steps_per_second": 5.55, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.09492971748113632, |
|
"eval_runtime": 0.1799, |
|
"eval_samples_per_second": 66.718, |
|
"eval_steps_per_second": 5.56, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.07086266577243805, |
|
"eval_runtime": 0.1808, |
|
"eval_samples_per_second": 66.372, |
|
"eval_steps_per_second": 5.531, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08071430027484894, |
|
"eval_runtime": 0.1867, |
|
"eval_samples_per_second": 64.263, |
|
"eval_steps_per_second": 5.355, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.10515321046113968, |
|
"eval_runtime": 0.1877, |
|
"eval_samples_per_second": 63.943, |
|
"eval_steps_per_second": 5.329, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.06287472695112228, |
|
"eval_runtime": 0.176, |
|
"eval_samples_per_second": 68.165, |
|
"eval_steps_per_second": 5.68, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.027201684191823006, |
|
"eval_runtime": 0.189, |
|
"eval_samples_per_second": 63.503, |
|
"eval_steps_per_second": 5.292, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.07752759009599686, |
|
"eval_runtime": 0.1794, |
|
"eval_samples_per_second": 66.892, |
|
"eval_steps_per_second": 5.574, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.11133003234863281, |
|
"eval_runtime": 0.1852, |
|
"eval_samples_per_second": 64.795, |
|
"eval_steps_per_second": 5.4, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 2.9116437435150146, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0591, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.11887478828430176, |
|
"eval_runtime": 0.1815, |
|
"eval_samples_per_second": 66.12, |
|
"eval_steps_per_second": 5.51, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0526016540825367, |
|
"eval_runtime": 0.1779, |
|
"eval_samples_per_second": 67.464, |
|
"eval_steps_per_second": 5.622, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.026192912831902504, |
|
"eval_runtime": 0.1811, |
|
"eval_samples_per_second": 66.253, |
|
"eval_steps_per_second": 5.521, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.10351991653442383, |
|
"eval_runtime": 0.1826, |
|
"eval_samples_per_second": 65.712, |
|
"eval_steps_per_second": 5.476, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1508234441280365, |
|
"eval_runtime": 0.1903, |
|
"eval_samples_per_second": 63.065, |
|
"eval_steps_per_second": 5.255, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.12798932194709778, |
|
"eval_runtime": 0.1781, |
|
"eval_samples_per_second": 67.382, |
|
"eval_steps_per_second": 5.615, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.06516266614198685, |
|
"eval_runtime": 0.1865, |
|
"eval_samples_per_second": 64.335, |
|
"eval_steps_per_second": 5.361, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03566673770546913, |
|
"eval_runtime": 0.1767, |
|
"eval_samples_per_second": 67.903, |
|
"eval_steps_per_second": 5.659, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04070502147078514, |
|
"eval_runtime": 0.1785, |
|
"eval_samples_per_second": 67.241, |
|
"eval_steps_per_second": 5.603, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04303682968020439, |
|
"eval_runtime": 0.1828, |
|
"eval_samples_per_second": 65.641, |
|
"eval_steps_per_second": 5.47, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 3.849208354949951, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0637, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04676436260342598, |
|
"eval_runtime": 0.1745, |
|
"eval_samples_per_second": 68.757, |
|
"eval_steps_per_second": 5.73, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.09966019541025162, |
|
"eval_runtime": 0.1901, |
|
"eval_samples_per_second": 63.116, |
|
"eval_steps_per_second": 5.26, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.21995067596435547, |
|
"eval_runtime": 0.1812, |
|
"eval_samples_per_second": 66.207, |
|
"eval_steps_per_second": 5.517, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2979114353656769, |
|
"eval_runtime": 0.1817, |
|
"eval_samples_per_second": 66.049, |
|
"eval_steps_per_second": 5.504, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.31669190526008606, |
|
"eval_runtime": 0.1788, |
|
"eval_samples_per_second": 67.1, |
|
"eval_steps_per_second": 5.592, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2610935568809509, |
|
"eval_runtime": 0.1781, |
|
"eval_samples_per_second": 67.365, |
|
"eval_steps_per_second": 5.614, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.16972792148590088, |
|
"eval_runtime": 0.1832, |
|
"eval_samples_per_second": 65.49, |
|
"eval_steps_per_second": 5.457, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.06688845902681351, |
|
"eval_runtime": 0.182, |
|
"eval_samples_per_second": 65.951, |
|
"eval_steps_per_second": 5.496, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03132214397192001, |
|
"eval_runtime": 0.1795, |
|
"eval_samples_per_second": 66.867, |
|
"eval_steps_per_second": 5.572, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.025468146428465843, |
|
"eval_runtime": 0.1766, |
|
"eval_samples_per_second": 67.966, |
|
"eval_steps_per_second": 5.664, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 1.297845482826233, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0446, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.02426406554877758, |
|
"eval_runtime": 0.1784, |
|
"eval_samples_per_second": 67.269, |
|
"eval_steps_per_second": 5.606, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01883484609425068, |
|
"eval_runtime": 0.1851, |
|
"eval_samples_per_second": 64.842, |
|
"eval_steps_per_second": 5.404, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.020957307890057564, |
|
"eval_runtime": 0.1781, |
|
"eval_samples_per_second": 67.372, |
|
"eval_steps_per_second": 5.614, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.026050806045532227, |
|
"eval_runtime": 0.1795, |
|
"eval_samples_per_second": 66.857, |
|
"eval_steps_per_second": 5.571, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.037800516933202744, |
|
"eval_runtime": 0.1914, |
|
"eval_samples_per_second": 62.71, |
|
"eval_steps_per_second": 5.226, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04915888234972954, |
|
"eval_runtime": 0.1801, |
|
"eval_samples_per_second": 66.632, |
|
"eval_steps_per_second": 5.553, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.06793534010648727, |
|
"eval_runtime": 0.1788, |
|
"eval_samples_per_second": 67.109, |
|
"eval_steps_per_second": 5.592, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.09575098752975464, |
|
"eval_runtime": 0.1842, |
|
"eval_samples_per_second": 65.143, |
|
"eval_steps_per_second": 5.429, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.08034069091081619, |
|
"eval_runtime": 0.1823, |
|
"eval_samples_per_second": 65.842, |
|
"eval_steps_per_second": 5.487, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04547484591603279, |
|
"eval_runtime": 0.187, |
|
"eval_samples_per_second": 64.181, |
|
"eval_steps_per_second": 5.348, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 2.749316692352295, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0489, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.019428091123700142, |
|
"eval_runtime": 0.1818, |
|
"eval_samples_per_second": 66.004, |
|
"eval_steps_per_second": 5.5, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01414876151829958, |
|
"eval_runtime": 0.1804, |
|
"eval_samples_per_second": 66.536, |
|
"eval_steps_per_second": 5.545, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010926567018032074, |
|
"eval_runtime": 0.184, |
|
"eval_samples_per_second": 65.226, |
|
"eval_steps_per_second": 5.435, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010399104095995426, |
|
"eval_runtime": 0.1795, |
|
"eval_samples_per_second": 66.842, |
|
"eval_steps_per_second": 5.57, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010837866924703121, |
|
"eval_runtime": 0.1826, |
|
"eval_samples_per_second": 65.7, |
|
"eval_steps_per_second": 5.475, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.012089508585631847, |
|
"eval_runtime": 0.1809, |
|
"eval_samples_per_second": 66.338, |
|
"eval_steps_per_second": 5.528, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.011834497563540936, |
|
"eval_runtime": 0.1796, |
|
"eval_samples_per_second": 66.811, |
|
"eval_steps_per_second": 5.568, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01087136659771204, |
|
"eval_runtime": 0.1832, |
|
"eval_samples_per_second": 65.487, |
|
"eval_steps_per_second": 5.457, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010681281797587872, |
|
"eval_runtime": 0.1822, |
|
"eval_samples_per_second": 65.858, |
|
"eval_steps_per_second": 5.488, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01070683915168047, |
|
"eval_runtime": 0.1833, |
|
"eval_samples_per_second": 65.471, |
|
"eval_steps_per_second": 5.456, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 2.0978522300720215, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0322, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010707174427807331, |
|
"eval_runtime": 0.1844, |
|
"eval_samples_per_second": 65.093, |
|
"eval_steps_per_second": 5.424, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010700911283493042, |
|
"eval_runtime": 0.1814, |
|
"eval_samples_per_second": 66.149, |
|
"eval_steps_per_second": 5.512, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010619781911373138, |
|
"eval_runtime": 0.1869, |
|
"eval_samples_per_second": 64.213, |
|
"eval_steps_per_second": 5.351, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010527804493904114, |
|
"eval_runtime": 0.1824, |
|
"eval_samples_per_second": 65.774, |
|
"eval_steps_per_second": 5.481, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01050546020269394, |
|
"eval_runtime": 0.1744, |
|
"eval_samples_per_second": 68.793, |
|
"eval_steps_per_second": 5.733, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010545703582465649, |
|
"eval_runtime": 0.1786, |
|
"eval_samples_per_second": 67.174, |
|
"eval_steps_per_second": 5.598, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010599258355796337, |
|
"eval_runtime": 0.1836, |
|
"eval_samples_per_second": 65.353, |
|
"eval_steps_per_second": 5.446, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010553344152867794, |
|
"eval_runtime": 0.176, |
|
"eval_samples_per_second": 68.182, |
|
"eval_steps_per_second": 5.682, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010620239190757275, |
|
"eval_runtime": 0.1846, |
|
"eval_samples_per_second": 65.001, |
|
"eval_steps_per_second": 5.417, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010768108069896698, |
|
"eval_runtime": 0.179, |
|
"eval_samples_per_second": 67.058, |
|
"eval_steps_per_second": 5.588, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 1.8535388708114624, |
|
"learning_rate": 0.0, |
|
"loss": 0.0405, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.010857379995286465, |
|
"eval_runtime": 0.1769, |
|
"eval_samples_per_second": 67.841, |
|
"eval_steps_per_second": 5.653, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 100, |
|
"total_flos": 4.802492916781056e+17, |
|
"train_loss": 0.11944382786750793, |
|
"train_runtime": 434.603, |
|
"train_samples_per_second": 14.266, |
|
"train_steps_per_second": 0.23 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.08116643875837326, |
|
"eval_runtime": 0.2193, |
|
"eval_samples_per_second": 54.708, |
|
"eval_steps_per_second": 4.559, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.802492916781056e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|