{ "best_metric": 1.0, "best_model_checkpoint": "beit-base-patch16-224-hasta-75-fold1/checkpoint-18", "epoch": 100.0, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.0, "eval_loss": 2.2077012062072754, "eval_runtime": 0.1832, "eval_samples_per_second": 65.496, "eval_steps_per_second": 5.458, "step": 1 }, { "epoch": 2.0, "eval_accuracy": 0.0, "eval_loss": 1.7828108072280884, "eval_runtime": 0.181, "eval_samples_per_second": 66.304, "eval_steps_per_second": 5.525, "step": 2 }, { "epoch": 3.0, "eval_accuracy": 0.3333333333333333, "eval_loss": 1.0542601346969604, "eval_runtime": 0.181, "eval_samples_per_second": 66.296, "eval_steps_per_second": 5.525, "step": 3 }, { "epoch": 4.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.4304567277431488, "eval_runtime": 0.1804, "eval_samples_per_second": 66.504, "eval_steps_per_second": 5.542, "step": 4 }, { "epoch": 5.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.26718705892562866, "eval_runtime": 0.1816, "eval_samples_per_second": 66.078, "eval_steps_per_second": 5.506, "step": 5 }, { "epoch": 6.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.3206009566783905, "eval_runtime": 0.1785, "eval_samples_per_second": 67.21, "eval_steps_per_second": 5.601, "step": 6 }, { "epoch": 7.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.3317834734916687, "eval_runtime": 0.1781, "eval_samples_per_second": 67.376, "eval_steps_per_second": 5.615, "step": 7 }, { "epoch": 8.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.30066391825675964, "eval_runtime": 0.1822, "eval_samples_per_second": 65.88, "eval_steps_per_second": 5.49, "step": 8 }, { "epoch": 9.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.32681936025619507, "eval_runtime": 0.1797, "eval_samples_per_second": 66.776, "eval_steps_per_second": 5.565, "step": 9 }, { "epoch": 10.0, "grad_norm": 4.443905830383301, "learning_rate": 5e-05, "loss": 0.4863, "step": 10 }, { "epoch": 10.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.3424707353115082, "eval_runtime": 0.1796, "eval_samples_per_second": 66.82, "eval_steps_per_second": 5.568, "step": 10 }, { "epoch": 11.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.2905902564525604, "eval_runtime": 0.1813, "eval_samples_per_second": 66.201, "eval_steps_per_second": 5.517, "step": 11 }, { "epoch": 12.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.26393598318099976, "eval_runtime": 0.1806, "eval_samples_per_second": 66.428, "eval_steps_per_second": 5.536, "step": 12 }, { "epoch": 13.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.29621657729148865, "eval_runtime": 0.1827, "eval_samples_per_second": 65.665, "eval_steps_per_second": 5.472, "step": 13 }, { "epoch": 14.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.444223016500473, "eval_runtime": 0.1832, "eval_samples_per_second": 65.485, "eval_steps_per_second": 5.457, "step": 14 }, { "epoch": 15.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.3107624351978302, "eval_runtime": 0.1828, "eval_samples_per_second": 65.656, "eval_steps_per_second": 5.471, "step": 15 }, { "epoch": 16.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.2321174144744873, "eval_runtime": 0.18, "eval_samples_per_second": 66.659, "eval_steps_per_second": 5.555, "step": 16 }, { "epoch": 17.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.23090867698192596, "eval_runtime": 0.1791, "eval_samples_per_second": 67.013, "eval_steps_per_second": 5.584, "step": 17 }, { "epoch": 18.0, "eval_accuracy": 1.0, "eval_loss": 0.24317891895771027, "eval_runtime": 0.1826, "eval_samples_per_second": 65.703, "eval_steps_per_second": 5.475, "step": 18 }, { "epoch": 19.0, "eval_accuracy": 1.0, "eval_loss": 0.22404837608337402, "eval_runtime": 0.1858, "eval_samples_per_second": 64.581, "eval_steps_per_second": 5.382, "step": 19 }, { "epoch": 20.0, "grad_norm": 2.4318888187408447, "learning_rate": 4.4444444444444447e-05, "loss": 0.1603, "step": 20 }, { "epoch": 20.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.16075477004051208, "eval_runtime": 0.1738, "eval_samples_per_second": 69.059, "eval_steps_per_second": 5.755, "step": 20 }, { "epoch": 21.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1274556964635849, "eval_runtime": 0.1768, "eval_samples_per_second": 67.881, "eval_steps_per_second": 5.657, "step": 21 }, { "epoch": 22.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.11907526105642319, "eval_runtime": 0.1807, "eval_samples_per_second": 66.408, "eval_steps_per_second": 5.534, "step": 22 }, { "epoch": 23.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.10295626521110535, "eval_runtime": 0.1785, "eval_samples_per_second": 67.233, "eval_steps_per_second": 5.603, "step": 23 }, { "epoch": 24.0, "eval_accuracy": 1.0, "eval_loss": 0.10096978396177292, "eval_runtime": 0.184, "eval_samples_per_second": 65.212, "eval_steps_per_second": 5.434, "step": 24 }, { "epoch": 25.0, "eval_accuracy": 1.0, "eval_loss": 0.08157830685377121, "eval_runtime": 0.1815, "eval_samples_per_second": 66.129, "eval_steps_per_second": 5.511, "step": 25 }, { "epoch": 26.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1813768893480301, "eval_runtime": 0.1773, "eval_samples_per_second": 67.673, "eval_steps_per_second": 5.639, "step": 26 }, { "epoch": 27.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.16538240015506744, "eval_runtime": 0.1785, "eval_samples_per_second": 67.215, "eval_steps_per_second": 5.601, "step": 27 }, { "epoch": 28.0, "eval_accuracy": 1.0, "eval_loss": 0.09447892755270004, "eval_runtime": 0.1866, "eval_samples_per_second": 64.315, "eval_steps_per_second": 5.36, "step": 28 }, { "epoch": 29.0, "eval_accuracy": 1.0, "eval_loss": 0.08471832424402237, "eval_runtime": 0.178, "eval_samples_per_second": 67.43, "eval_steps_per_second": 5.619, "step": 29 }, { "epoch": 30.0, "grad_norm": 2.647221088409424, "learning_rate": 3.888888888888889e-05, "loss": 0.1007, "step": 30 }, { "epoch": 30.0, "eval_accuracy": 1.0, "eval_loss": 0.15657053887844086, "eval_runtime": 0.1773, "eval_samples_per_second": 67.666, "eval_steps_per_second": 5.639, "step": 30 }, { "epoch": 31.0, "eval_accuracy": 1.0, "eval_loss": 0.0818849727511406, "eval_runtime": 0.1805, "eval_samples_per_second": 66.482, "eval_steps_per_second": 5.54, "step": 31 }, { "epoch": 32.0, "eval_accuracy": 1.0, "eval_loss": 0.07821951806545258, "eval_runtime": 0.1819, "eval_samples_per_second": 65.982, "eval_steps_per_second": 5.498, "step": 32 }, { "epoch": 33.0, "eval_accuracy": 1.0, "eval_loss": 0.0781416967511177, "eval_runtime": 0.1816, "eval_samples_per_second": 66.073, "eval_steps_per_second": 5.506, "step": 33 }, { "epoch": 34.0, "eval_accuracy": 1.0, "eval_loss": 0.06351720541715622, "eval_runtime": 0.1762, "eval_samples_per_second": 68.092, "eval_steps_per_second": 5.674, "step": 34 }, { "epoch": 35.0, "eval_accuracy": 1.0, "eval_loss": 0.06752466410398483, "eval_runtime": 0.1843, "eval_samples_per_second": 65.122, "eval_steps_per_second": 5.427, "step": 35 }, { "epoch": 36.0, "eval_accuracy": 1.0, "eval_loss": 0.1137080118060112, "eval_runtime": 0.1789, "eval_samples_per_second": 67.069, "eval_steps_per_second": 5.589, "step": 36 }, { "epoch": 37.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.12665808200836182, "eval_runtime": 0.1849, "eval_samples_per_second": 64.894, "eval_steps_per_second": 5.408, "step": 37 }, { "epoch": 38.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1438184678554535, "eval_runtime": 0.1772, "eval_samples_per_second": 67.727, "eval_steps_per_second": 5.644, "step": 38 }, { "epoch": 39.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1301492154598236, "eval_runtime": 0.1815, "eval_samples_per_second": 66.113, "eval_steps_per_second": 5.509, "step": 39 }, { "epoch": 40.0, "grad_norm": 2.206031560897827, "learning_rate": 3.3333333333333335e-05, "loss": 0.0573, "step": 40 }, { "epoch": 40.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.11228451877832413, "eval_runtime": 0.1755, "eval_samples_per_second": 68.382, "eval_steps_per_second": 5.699, "step": 40 }, { "epoch": 41.0, "eval_accuracy": 1.0, "eval_loss": 0.0673147514462471, "eval_runtime": 0.1793, "eval_samples_per_second": 66.913, "eval_steps_per_second": 5.576, "step": 41 }, { "epoch": 42.0, "eval_accuracy": 1.0, "eval_loss": 0.026470685377717018, "eval_runtime": 0.1912, "eval_samples_per_second": 62.752, "eval_steps_per_second": 5.229, "step": 42 }, { "epoch": 43.0, "eval_accuracy": 1.0, "eval_loss": 0.031682901084423065, "eval_runtime": 0.1799, "eval_samples_per_second": 66.702, "eval_steps_per_second": 5.558, "step": 43 }, { "epoch": 44.0, "eval_accuracy": 1.0, "eval_loss": 0.04610493779182434, "eval_runtime": 0.1797, "eval_samples_per_second": 66.786, "eval_steps_per_second": 5.566, "step": 44 }, { "epoch": 45.0, "eval_accuracy": 1.0, "eval_loss": 0.03257732838392258, "eval_runtime": 0.1852, "eval_samples_per_second": 64.791, "eval_steps_per_second": 5.399, "step": 45 }, { "epoch": 46.0, "eval_accuracy": 1.0, "eval_loss": 0.022052617743611336, "eval_runtime": 0.1804, "eval_samples_per_second": 66.535, "eval_steps_per_second": 5.545, "step": 46 }, { "epoch": 47.0, "eval_accuracy": 1.0, "eval_loss": 0.022693796083331108, "eval_runtime": 0.1857, "eval_samples_per_second": 64.616, "eval_steps_per_second": 5.385, "step": 47 }, { "epoch": 48.0, "eval_accuracy": 1.0, "eval_loss": 0.021375102922320366, "eval_runtime": 0.1811, "eval_samples_per_second": 66.277, "eval_steps_per_second": 5.523, "step": 48 }, { "epoch": 49.0, "eval_accuracy": 1.0, "eval_loss": 0.017641151323914528, "eval_runtime": 0.1792, "eval_samples_per_second": 66.969, "eval_steps_per_second": 5.581, "step": 49 }, { "epoch": 50.0, "grad_norm": 2.6644582748413086, "learning_rate": 2.777777777777778e-05, "loss": 0.0566, "step": 50 }, { "epoch": 50.0, "eval_accuracy": 1.0, "eval_loss": 0.014967289753258228, "eval_runtime": 0.1798, "eval_samples_per_second": 66.749, "eval_steps_per_second": 5.562, "step": 50 }, { "epoch": 51.0, "eval_accuracy": 1.0, "eval_loss": 0.015390962362289429, "eval_runtime": 0.1849, "eval_samples_per_second": 64.898, "eval_steps_per_second": 5.408, "step": 51 }, { "epoch": 52.0, "eval_accuracy": 1.0, "eval_loss": 0.013894349336624146, "eval_runtime": 0.1828, "eval_samples_per_second": 65.644, "eval_steps_per_second": 5.47, "step": 52 }, { "epoch": 53.0, "eval_accuracy": 1.0, "eval_loss": 0.009721777401864529, "eval_runtime": 0.183, "eval_samples_per_second": 65.56, "eval_steps_per_second": 5.463, "step": 53 }, { "epoch": 54.0, "eval_accuracy": 1.0, "eval_loss": 0.01429061684757471, "eval_runtime": 0.1805, "eval_samples_per_second": 66.473, "eval_steps_per_second": 5.539, "step": 54 }, { "epoch": 55.0, "eval_accuracy": 1.0, "eval_loss": 0.027167662978172302, "eval_runtime": 0.1829, "eval_samples_per_second": 65.604, "eval_steps_per_second": 5.467, "step": 55 }, { "epoch": 56.0, "eval_accuracy": 1.0, "eval_loss": 0.04265804961323738, "eval_runtime": 0.1773, "eval_samples_per_second": 67.682, "eval_steps_per_second": 5.64, "step": 56 }, { "epoch": 57.0, "eval_accuracy": 1.0, "eval_loss": 0.03430110216140747, "eval_runtime": 0.1781, "eval_samples_per_second": 67.383, "eval_steps_per_second": 5.615, "step": 57 }, { "epoch": 58.0, "eval_accuracy": 1.0, "eval_loss": 0.028985222801566124, "eval_runtime": 0.1797, "eval_samples_per_second": 66.762, "eval_steps_per_second": 5.563, "step": 58 }, { "epoch": 59.0, "eval_accuracy": 1.0, "eval_loss": 0.05569181963801384, "eval_runtime": 0.1795, "eval_samples_per_second": 66.845, "eval_steps_per_second": 5.57, "step": 59 }, { "epoch": 60.0, "grad_norm": 1.0558639764785767, "learning_rate": 2.2222222222222223e-05, "loss": 0.0242, "step": 60 }, { "epoch": 60.0, "eval_accuracy": 1.0, "eval_loss": 0.0904507040977478, "eval_runtime": 0.1778, "eval_samples_per_second": 67.489, "eval_steps_per_second": 5.624, "step": 60 }, { "epoch": 61.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1373545080423355, "eval_runtime": 0.1803, "eval_samples_per_second": 66.553, "eval_steps_per_second": 5.546, "step": 61 }, { "epoch": 62.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.17625844478607178, "eval_runtime": 0.1822, "eval_samples_per_second": 65.851, "eval_steps_per_second": 5.488, "step": 62 }, { "epoch": 63.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.179295614361763, "eval_runtime": 0.1819, "eval_samples_per_second": 65.963, "eval_steps_per_second": 5.497, "step": 63 }, { "epoch": 64.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1639540046453476, "eval_runtime": 0.1821, "eval_samples_per_second": 65.893, "eval_steps_per_second": 5.491, "step": 64 }, { "epoch": 65.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.14449164271354675, "eval_runtime": 0.1825, "eval_samples_per_second": 65.738, "eval_steps_per_second": 5.478, "step": 65 }, { "epoch": 66.0, "eval_accuracy": 1.0, "eval_loss": 0.10922203212976456, "eval_runtime": 0.1776, "eval_samples_per_second": 67.567, "eval_steps_per_second": 5.631, "step": 66 }, { "epoch": 67.0, "eval_accuracy": 1.0, "eval_loss": 0.09147190302610397, "eval_runtime": 0.1777, "eval_samples_per_second": 67.527, "eval_steps_per_second": 5.627, "step": 67 }, { "epoch": 68.0, "eval_accuracy": 1.0, "eval_loss": 0.06401555240154266, "eval_runtime": 0.1867, "eval_samples_per_second": 64.265, "eval_steps_per_second": 5.355, "step": 68 }, { "epoch": 69.0, "eval_accuracy": 1.0, "eval_loss": 0.03757796436548233, "eval_runtime": 0.1798, "eval_samples_per_second": 66.739, "eval_steps_per_second": 5.562, "step": 69 }, { "epoch": 70.0, "grad_norm": 1.6899287700653076, "learning_rate": 1.6666666666666667e-05, "loss": 0.0339, "step": 70 }, { "epoch": 70.0, "eval_accuracy": 1.0, "eval_loss": 0.02968785911798477, "eval_runtime": 0.1748, "eval_samples_per_second": 68.654, "eval_steps_per_second": 5.721, "step": 70 }, { "epoch": 71.0, "eval_accuracy": 1.0, "eval_loss": 0.023778622969985008, "eval_runtime": 0.1792, "eval_samples_per_second": 66.955, "eval_steps_per_second": 5.58, "step": 71 }, { "epoch": 72.0, "eval_accuracy": 1.0, "eval_loss": 0.01778542995452881, "eval_runtime": 0.1836, "eval_samples_per_second": 65.367, "eval_steps_per_second": 5.447, "step": 72 }, { "epoch": 73.0, "eval_accuracy": 1.0, "eval_loss": 0.01042032241821289, "eval_runtime": 0.1791, "eval_samples_per_second": 67.01, "eval_steps_per_second": 5.584, "step": 73 }, { "epoch": 74.0, "eval_accuracy": 1.0, "eval_loss": 0.0063457731157541275, "eval_runtime": 0.178, "eval_samples_per_second": 67.432, "eval_steps_per_second": 5.619, "step": 74 }, { "epoch": 75.0, "eval_accuracy": 1.0, "eval_loss": 0.004192787688225508, "eval_runtime": 0.18, "eval_samples_per_second": 66.67, "eval_steps_per_second": 5.556, "step": 75 }, { "epoch": 76.0, "eval_accuracy": 1.0, "eval_loss": 0.003145643277093768, "eval_runtime": 0.1809, "eval_samples_per_second": 66.32, "eval_steps_per_second": 5.527, "step": 76 }, { "epoch": 77.0, "eval_accuracy": 1.0, "eval_loss": 0.0028521008789539337, "eval_runtime": 0.1833, "eval_samples_per_second": 65.45, "eval_steps_per_second": 5.454, "step": 77 }, { "epoch": 78.0, "eval_accuracy": 1.0, "eval_loss": 0.003350273473188281, "eval_runtime": 0.1814, "eval_samples_per_second": 66.157, "eval_steps_per_second": 5.513, "step": 78 }, { "epoch": 79.0, "eval_accuracy": 1.0, "eval_loss": 0.0035124036949127913, "eval_runtime": 0.1822, "eval_samples_per_second": 65.852, "eval_steps_per_second": 5.488, "step": 79 }, { "epoch": 80.0, "grad_norm": 5.092667579650879, "learning_rate": 1.1111111111111112e-05, "loss": 0.0532, "step": 80 }, { "epoch": 80.0, "eval_accuracy": 1.0, "eval_loss": 0.0035463341046124697, "eval_runtime": 0.1807, "eval_samples_per_second": 66.392, "eval_steps_per_second": 5.533, "step": 80 }, { "epoch": 81.0, "eval_accuracy": 1.0, "eval_loss": 0.0038894719909876585, "eval_runtime": 0.1814, "eval_samples_per_second": 66.14, "eval_steps_per_second": 5.512, "step": 81 }, { "epoch": 82.0, "eval_accuracy": 1.0, "eval_loss": 0.005399493966251612, "eval_runtime": 0.1807, "eval_samples_per_second": 66.412, "eval_steps_per_second": 5.534, "step": 82 }, { "epoch": 83.0, "eval_accuracy": 1.0, "eval_loss": 0.010995940305292606, "eval_runtime": 0.1792, "eval_samples_per_second": 66.972, "eval_steps_per_second": 5.581, "step": 83 }, { "epoch": 84.0, "eval_accuracy": 1.0, "eval_loss": 0.025468939915299416, "eval_runtime": 0.1831, "eval_samples_per_second": 65.53, "eval_steps_per_second": 5.461, "step": 84 }, { "epoch": 85.0, "eval_accuracy": 1.0, "eval_loss": 0.05003314092755318, "eval_runtime": 0.1803, "eval_samples_per_second": 66.549, "eval_steps_per_second": 5.546, "step": 85 }, { "epoch": 86.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.08439437299966812, "eval_runtime": 0.1773, "eval_samples_per_second": 67.674, "eval_steps_per_second": 5.64, "step": 86 }, { "epoch": 87.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.11905407905578613, "eval_runtime": 0.1855, "eval_samples_per_second": 64.702, "eval_steps_per_second": 5.392, "step": 87 }, { "epoch": 88.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.14367732405662537, "eval_runtime": 0.1795, "eval_samples_per_second": 66.857, "eval_steps_per_second": 5.571, "step": 88 }, { "epoch": 89.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.15639398992061615, "eval_runtime": 0.1833, "eval_samples_per_second": 65.456, "eval_steps_per_second": 5.455, "step": 89 }, { "epoch": 90.0, "grad_norm": 1.2882177829742432, "learning_rate": 5.555555555555556e-06, "loss": 0.0316, "step": 90 }, { "epoch": 90.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.15441086888313293, "eval_runtime": 0.1825, "eval_samples_per_second": 65.747, "eval_steps_per_second": 5.479, "step": 90 }, { "epoch": 91.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1454831212759018, "eval_runtime": 0.1777, "eval_samples_per_second": 67.533, "eval_steps_per_second": 5.628, "step": 91 }, { "epoch": 92.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.13830962777137756, "eval_runtime": 0.1799, "eval_samples_per_second": 66.706, "eval_steps_per_second": 5.559, "step": 92 }, { "epoch": 93.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.11936721205711365, "eval_runtime": 0.1803, "eval_samples_per_second": 66.542, "eval_steps_per_second": 5.545, "step": 93 }, { "epoch": 94.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.10268055647611618, "eval_runtime": 0.1808, "eval_samples_per_second": 66.384, "eval_steps_per_second": 5.532, "step": 94 }, { "epoch": 95.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.08746868371963501, "eval_runtime": 0.1911, "eval_samples_per_second": 62.805, "eval_steps_per_second": 5.234, "step": 95 }, { "epoch": 96.0, "eval_accuracy": 1.0, "eval_loss": 0.07148657739162445, "eval_runtime": 0.1759, "eval_samples_per_second": 68.224, "eval_steps_per_second": 5.685, "step": 96 }, { "epoch": 97.0, "eval_accuracy": 1.0, "eval_loss": 0.060756634920835495, "eval_runtime": 0.1821, "eval_samples_per_second": 65.899, "eval_steps_per_second": 5.492, "step": 97 }, { "epoch": 98.0, "eval_accuracy": 1.0, "eval_loss": 0.05189364030957222, "eval_runtime": 0.1845, "eval_samples_per_second": 65.035, "eval_steps_per_second": 5.42, "step": 98 }, { "epoch": 99.0, "eval_accuracy": 1.0, "eval_loss": 0.04678298905491829, "eval_runtime": 0.182, "eval_samples_per_second": 65.937, "eval_steps_per_second": 5.495, "step": 99 }, { "epoch": 100.0, "grad_norm": 3.1393415927886963, "learning_rate": 0.0, "loss": 0.0299, "step": 100 }, { "epoch": 100.0, "eval_accuracy": 1.0, "eval_loss": 0.04420096054673195, "eval_runtime": 0.1774, "eval_samples_per_second": 67.659, "eval_steps_per_second": 5.638, "step": 100 }, { "epoch": 100.0, "step": 100, "total_flos": 4.802492916781056e+17, "train_loss": 0.10338099300861359, "train_runtime": 420.0817, "train_samples_per_second": 14.759, "train_steps_per_second": 0.238 }, { "epoch": 100.0, "eval_accuracy": 1.0, "eval_loss": 0.24317891895771027, "eval_runtime": 0.2414, "eval_samples_per_second": 49.711, "eval_steps_per_second": 4.143, "step": 100 } ], "logging_steps": 10, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.802492916781056e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }