{ "best_metric": 1.0, "best_model_checkpoint": "./swin-soiral/checkpoint-72", "epoch": 100.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5, "eval_loss": 0.7685546875, "eval_runtime": 0.2552, "eval_samples_per_second": 47.013, "eval_steps_per_second": 7.835, "step": 4 }, { "epoch": 1.25, "learning_rate": 1.0000000000000002e-06, "loss": 0.7776, "step": 5 }, { "epoch": 2.0, "eval_accuracy": 0.5, "eval_loss": 0.7459920048713684, "eval_runtime": 0.2163, "eval_samples_per_second": 55.485, "eval_steps_per_second": 9.247, "step": 8 }, { "epoch": 2.5, "learning_rate": 2.25e-06, "loss": 0.7839, "step": 10 }, { "epoch": 3.0, "eval_accuracy": 0.5, "eval_loss": 0.71142578125, "eval_runtime": 0.2223, "eval_samples_per_second": 53.982, "eval_steps_per_second": 8.997, "step": 12 }, { "epoch": 3.75, "learning_rate": 3.5e-06, "loss": 0.6965, "step": 15 }, { "epoch": 4.0, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.6708170771598816, "eval_runtime": 0.1987, "eval_samples_per_second": 60.404, "eval_steps_per_second": 10.067, "step": 16 }, { "epoch": 5.0, "learning_rate": 4.75e-06, "loss": 0.6778, "step": 20 }, { "epoch": 5.0, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.62982177734375, "eval_runtime": 0.1998, "eval_samples_per_second": 60.045, "eval_steps_per_second": 10.008, "step": 20 }, { "epoch": 6.0, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.5903117060661316, "eval_runtime": 0.2941, "eval_samples_per_second": 40.798, "eval_steps_per_second": 6.8, "step": 24 }, { "epoch": 6.25, "learning_rate": 6e-06, "loss": 0.6297, "step": 25 }, { "epoch": 7.0, "eval_accuracy": 0.75, "eval_loss": 0.5383097529411316, "eval_runtime": 0.2066, "eval_samples_per_second": 58.089, "eval_steps_per_second": 9.682, "step": 28 }, { "epoch": 7.5, "learning_rate": 7.25e-06, "loss": 0.5812, "step": 30 }, { "epoch": 8.0, "eval_accuracy": 0.75, "eval_loss": 0.4876708984375, "eval_runtime": 0.2154, "eval_samples_per_second": 55.709, "eval_steps_per_second": 9.285, "step": 32 }, { "epoch": 8.75, "learning_rate": 8.5e-06, "loss": 0.5141, "step": 35 }, { "epoch": 9.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.42340087890625, "eval_runtime": 0.2045, "eval_samples_per_second": 58.685, "eval_steps_per_second": 9.781, "step": 36 }, { "epoch": 10.0, "learning_rate": 9.75e-06, "loss": 0.4186, "step": 40 }, { "epoch": 10.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.3495737612247467, "eval_runtime": 0.2143, "eval_samples_per_second": 55.994, "eval_steps_per_second": 9.332, "step": 40 }, { "epoch": 11.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.2956085205078125, "eval_runtime": 0.204, "eval_samples_per_second": 58.832, "eval_steps_per_second": 9.805, "step": 44 }, { "epoch": 11.25, "learning_rate": 9.88888888888889e-06, "loss": 0.3791, "step": 45 }, { "epoch": 12.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.26324209570884705, "eval_runtime": 0.2276, "eval_samples_per_second": 52.73, "eval_steps_per_second": 8.788, "step": 48 }, { "epoch": 12.5, "learning_rate": 9.75e-06, "loss": 0.2906, "step": 50 }, { "epoch": 13.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.21923191845417023, "eval_runtime": 0.2866, "eval_samples_per_second": 41.866, "eval_steps_per_second": 6.978, "step": 52 }, { "epoch": 13.75, "learning_rate": 9.611111111111112e-06, "loss": 0.2247, "step": 55 }, { "epoch": 14.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.18271827697753906, "eval_runtime": 0.2073, "eval_samples_per_second": 57.879, "eval_steps_per_second": 9.647, "step": 56 }, { "epoch": 15.0, "learning_rate": 9.472222222222223e-06, "loss": 0.1724, "step": 60 }, { "epoch": 15.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.1536579132080078, "eval_runtime": 0.3096, "eval_samples_per_second": 38.763, "eval_steps_per_second": 6.461, "step": 60 }, { "epoch": 16.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.11898771673440933, "eval_runtime": 0.207, "eval_samples_per_second": 57.977, "eval_steps_per_second": 9.663, "step": 64 }, { "epoch": 16.25, "learning_rate": 9.333333333333334e-06, "loss": 0.1077, "step": 65 }, { "epoch": 17.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.12817128002643585, "eval_runtime": 0.2101, "eval_samples_per_second": 57.105, "eval_steps_per_second": 9.517, "step": 68 }, { "epoch": 17.5, "learning_rate": 9.194444444444445e-06, "loss": 0.1336, "step": 70 }, { "epoch": 18.0, "eval_accuracy": 1.0, "eval_loss": 0.0798807144165039, "eval_runtime": 0.3334, "eval_samples_per_second": 35.996, "eval_steps_per_second": 5.999, "step": 72 }, { "epoch": 18.75, "learning_rate": 9.083333333333333e-06, "loss": 0.0656, "step": 75 }, { "epoch": 19.0, "eval_accuracy": 1.0, "eval_loss": 0.05580584332346916, "eval_runtime": 0.206, "eval_samples_per_second": 58.24, "eval_steps_per_second": 9.707, "step": 76 }, { "epoch": 20.0, "learning_rate": 8.944444444444446e-06, "loss": 0.0564, "step": 80 }, { "epoch": 20.0, "eval_accuracy": 1.0, "eval_loss": 0.046213943511247635, "eval_runtime": 0.3162, "eval_samples_per_second": 37.95, "eval_steps_per_second": 6.325, "step": 80 }, { "epoch": 21.0, "eval_accuracy": 1.0, "eval_loss": 0.05256287381052971, "eval_runtime": 0.2134, "eval_samples_per_second": 56.23, "eval_steps_per_second": 9.372, "step": 84 }, { "epoch": 21.25, "learning_rate": 8.805555555555557e-06, "loss": 0.0703, "step": 85 }, { "epoch": 22.0, "eval_accuracy": 1.0, "eval_loss": 0.03809436038136482, "eval_runtime": 0.2, "eval_samples_per_second": 60.004, "eval_steps_per_second": 10.001, "step": 88 }, { "epoch": 22.5, "learning_rate": 8.666666666666668e-06, "loss": 0.044, "step": 90 }, { "epoch": 23.0, "eval_accuracy": 1.0, "eval_loss": 0.01835465431213379, "eval_runtime": 0.1962, "eval_samples_per_second": 61.174, "eval_steps_per_second": 10.196, "step": 92 }, { "epoch": 23.75, "learning_rate": 8.527777777777779e-06, "loss": 0.0239, "step": 95 }, { "epoch": 24.0, "eval_accuracy": 1.0, "eval_loss": 0.020169338211417198, "eval_runtime": 0.2151, "eval_samples_per_second": 55.779, "eval_steps_per_second": 9.297, "step": 96 }, { "epoch": 25.0, "learning_rate": 8.38888888888889e-06, "loss": 0.0214, "step": 100 }, { "epoch": 25.0, "eval_accuracy": 1.0, "eval_loss": 0.024330079555511475, "eval_runtime": 0.2292, "eval_samples_per_second": 52.363, "eval_steps_per_second": 8.727, "step": 100 }, { "epoch": 26.0, "eval_accuracy": 1.0, "eval_loss": 0.011685073375701904, "eval_runtime": 0.2099, "eval_samples_per_second": 57.169, "eval_steps_per_second": 9.528, "step": 104 }, { "epoch": 26.25, "learning_rate": 8.25e-06, "loss": 0.031, "step": 105 }, { "epoch": 27.0, "eval_accuracy": 1.0, "eval_loss": 0.008951862342655659, "eval_runtime": 0.2024, "eval_samples_per_second": 59.277, "eval_steps_per_second": 9.88, "step": 108 }, { "epoch": 27.5, "learning_rate": 8.111111111111112e-06, "loss": 0.0334, "step": 110 }, { "epoch": 28.0, "eval_accuracy": 1.0, "eval_loss": 0.03853602334856987, "eval_runtime": 0.2061, "eval_samples_per_second": 58.221, "eval_steps_per_second": 9.703, "step": 112 }, { "epoch": 28.75, "learning_rate": 7.972222222222224e-06, "loss": 0.0046, "step": 115 }, { "epoch": 29.0, "eval_accuracy": 1.0, "eval_loss": 0.03924691677093506, "eval_runtime": 0.3171, "eval_samples_per_second": 37.847, "eval_steps_per_second": 6.308, "step": 116 }, { "epoch": 30.0, "learning_rate": 7.833333333333333e-06, "loss": 0.0051, "step": 120 }, { "epoch": 30.0, "eval_accuracy": 1.0, "eval_loss": 0.0031117696780711412, "eval_runtime": 0.217, "eval_samples_per_second": 55.303, "eval_steps_per_second": 9.217, "step": 120 }, { "epoch": 31.0, "eval_accuracy": 1.0, "eval_loss": 0.0025899012107402086, "eval_runtime": 0.318, "eval_samples_per_second": 37.734, "eval_steps_per_second": 6.289, "step": 124 }, { "epoch": 31.25, "learning_rate": 7.694444444444446e-06, "loss": 0.0045, "step": 125 }, { "epoch": 32.0, "eval_accuracy": 1.0, "eval_loss": 0.002510249614715576, "eval_runtime": 0.2034, "eval_samples_per_second": 58.986, "eval_steps_per_second": 9.831, "step": 128 }, { "epoch": 32.5, "learning_rate": 7.555555555555556e-06, "loss": 0.0133, "step": 130 }, { "epoch": 33.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.07476559281349182, "eval_runtime": 0.2934, "eval_samples_per_second": 40.893, "eval_steps_per_second": 6.816, "step": 132 }, { "epoch": 33.75, "learning_rate": 7.416666666666668e-06, "loss": 0.0014, "step": 135 }, { "epoch": 34.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.16315531730651855, "eval_runtime": 0.2053, "eval_samples_per_second": 58.463, "eval_steps_per_second": 9.744, "step": 136 }, { "epoch": 35.0, "learning_rate": 7.277777777777778e-06, "loss": 0.0134, "step": 140 }, { "epoch": 35.0, "eval_accuracy": 1.0, "eval_loss": 0.006152550224214792, "eval_runtime": 0.2135, "eval_samples_per_second": 56.219, "eval_steps_per_second": 9.37, "step": 140 }, { "epoch": 36.0, "eval_accuracy": 1.0, "eval_loss": 0.005291670560836792, "eval_runtime": 0.2054, "eval_samples_per_second": 58.422, "eval_steps_per_second": 9.737, "step": 144 }, { "epoch": 36.25, "learning_rate": 7.13888888888889e-06, "loss": 0.0365, "step": 145 }, { "epoch": 37.0, "eval_accuracy": 1.0, "eval_loss": 0.0365130789577961, "eval_runtime": 0.2268, "eval_samples_per_second": 52.911, "eval_steps_per_second": 8.819, "step": 148 }, { "epoch": 37.5, "learning_rate": 7e-06, "loss": 0.001, "step": 150 }, { "epoch": 38.0, "eval_accuracy": 1.0, "eval_loss": 0.0017459988594055176, "eval_runtime": 0.2143, "eval_samples_per_second": 56.005, "eval_steps_per_second": 9.334, "step": 152 }, { "epoch": 38.75, "learning_rate": 6.88888888888889e-06, "loss": 0.0503, "step": 155 }, { "epoch": 39.0, "eval_accuracy": 1.0, "eval_loss": 0.05090367794036865, "eval_runtime": 0.208, "eval_samples_per_second": 57.695, "eval_steps_per_second": 9.616, "step": 156 }, { "epoch": 40.0, "learning_rate": 6.750000000000001e-06, "loss": 0.0094, "step": 160 }, { "epoch": 40.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.07808921486139297, "eval_runtime": 0.2086, "eval_samples_per_second": 57.523, "eval_steps_per_second": 9.587, "step": 160 }, { "epoch": 41.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.22892427444458008, "eval_runtime": 0.2158, "eval_samples_per_second": 55.599, "eval_steps_per_second": 9.267, "step": 164 }, { "epoch": 41.25, "learning_rate": 6.6111111111111115e-06, "loss": 0.0231, "step": 165 }, { "epoch": 42.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.16741518676280975, "eval_runtime": 0.2199, "eval_samples_per_second": 54.565, "eval_steps_per_second": 9.094, "step": 168 }, { "epoch": 42.5, "learning_rate": 6.472222222222223e-06, "loss": 0.0002, "step": 170 }, { "epoch": 43.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.25974369049072266, "eval_runtime": 0.297, "eval_samples_per_second": 40.411, "eval_steps_per_second": 6.735, "step": 172 }, { "epoch": 43.75, "learning_rate": 6.333333333333333e-06, "loss": 0.0785, "step": 175 }, { "epoch": 44.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.2814592123031616, "eval_runtime": 0.1943, "eval_samples_per_second": 61.754, "eval_steps_per_second": 10.292, "step": 176 }, { "epoch": 45.0, "learning_rate": 6.194444444444445e-06, "loss": 0.0009, "step": 180 }, { "epoch": 45.0, "eval_accuracy": 0.9166666666666666, "eval_loss": 0.2058378905057907, "eval_runtime": 0.3052, "eval_samples_per_second": 39.313, "eval_steps_per_second": 6.552, "step": 180 }, { "epoch": 46.0, "eval_accuracy": 1.0, "eval_loss": 0.000926971435546875, "eval_runtime": 0.2098, "eval_samples_per_second": 57.192, "eval_steps_per_second": 9.532, "step": 184 }, { "epoch": 46.25, "learning_rate": 6.055555555555555e-06, "loss": 0.0395, "step": 185 }, { "epoch": 47.0, "eval_accuracy": 1.0, "eval_loss": 0.0010882416972890496, "eval_runtime": 0.3081, "eval_samples_per_second": 38.948, "eval_steps_per_second": 6.491, "step": 188 }, { "epoch": 47.5, "learning_rate": 5.916666666666667e-06, "loss": 0.0158, "step": 190 }, { "epoch": 48.0, "eval_accuracy": 1.0, "eval_loss": 0.000389377266401425, "eval_runtime": 0.2087, "eval_samples_per_second": 57.493, "eval_steps_per_second": 9.582, "step": 192 }, { "epoch": 48.75, "learning_rate": 5.777777777777778e-06, "loss": 0.001, "step": 195 }, { "epoch": 49.0, "eval_accuracy": 1.0, "eval_loss": 0.0034777026157826185, "eval_runtime": 0.2113, "eval_samples_per_second": 56.801, "eval_steps_per_second": 9.467, "step": 196 }, { "epoch": 50.0, "learning_rate": 5.638888888888889e-06, "loss": 0.0027, "step": 200 }, { "epoch": 50.0, "eval_accuracy": 1.0, "eval_loss": 0.0018774966010823846, "eval_runtime": 0.1971, "eval_samples_per_second": 60.883, "eval_steps_per_second": 10.147, "step": 200 }, { "epoch": 51.0, "eval_accuracy": 1.0, "eval_loss": 0.0005501409177668393, "eval_runtime": 0.2093, "eval_samples_per_second": 57.332, "eval_steps_per_second": 9.555, "step": 204 }, { "epoch": 51.25, "learning_rate": 5.500000000000001e-06, "loss": 0.0005, "step": 205 }, { "epoch": 52.0, "eval_accuracy": 1.0, "eval_loss": 0.0013666549930348992, "eval_runtime": 0.2027, "eval_samples_per_second": 59.215, "eval_steps_per_second": 9.869, "step": 208 }, { "epoch": 52.5, "learning_rate": 5.361111111111112e-06, "loss": 0.003, "step": 210 }, { "epoch": 53.0, "eval_accuracy": 1.0, "eval_loss": 0.0018307765712961555, "eval_runtime": 0.2153, "eval_samples_per_second": 55.739, "eval_steps_per_second": 9.29, "step": 212 }, { "epoch": 53.75, "learning_rate": 5.2222222222222226e-06, "loss": 0.0039, "step": 215 }, { "epoch": 54.0, "eval_accuracy": 1.0, "eval_loss": 0.01105162501335144, "eval_runtime": 0.2005, "eval_samples_per_second": 59.849, "eval_steps_per_second": 9.975, "step": 216 }, { "epoch": 55.0, "learning_rate": 5.0833333333333335e-06, "loss": 0.0081, "step": 220 }, { "epoch": 55.0, "eval_accuracy": 1.0, "eval_loss": 0.021254947409033775, "eval_runtime": 0.2174, "eval_samples_per_second": 55.192, "eval_steps_per_second": 9.199, "step": 220 }, { "epoch": 56.0, "eval_accuracy": 1.0, "eval_loss": 0.014328837394714355, "eval_runtime": 0.2399, "eval_samples_per_second": 50.017, "eval_steps_per_second": 8.336, "step": 224 }, { "epoch": 56.25, "learning_rate": 4.944444444444445e-06, "loss": 0.0001, "step": 225 }, { "epoch": 57.0, "eval_accuracy": 1.0, "eval_loss": 0.00964482594281435, "eval_runtime": 0.2188, "eval_samples_per_second": 54.838, "eval_steps_per_second": 9.14, "step": 228 }, { "epoch": 57.5, "learning_rate": 4.805555555555556e-06, "loss": 0.0002, "step": 230 }, { "epoch": 58.0, "eval_accuracy": 1.0, "eval_loss": 0.0017709335079416633, "eval_runtime": 0.2916, "eval_samples_per_second": 41.158, "eval_steps_per_second": 6.86, "step": 232 }, { "epoch": 58.75, "learning_rate": 4.666666666666667e-06, "loss": 0.0035, "step": 235 }, { "epoch": 59.0, "eval_accuracy": 1.0, "eval_loss": 0.0005045731668360531, "eval_runtime": 0.2036, "eval_samples_per_second": 58.93, "eval_steps_per_second": 9.822, "step": 236 }, { "epoch": 60.0, "learning_rate": 4.527777777777778e-06, "loss": 0.0049, "step": 240 }, { "epoch": 60.0, "eval_accuracy": 1.0, "eval_loss": 0.0014440218219533563, "eval_runtime": 0.3276, "eval_samples_per_second": 36.631, "eval_steps_per_second": 6.105, "step": 240 }, { "epoch": 61.0, "eval_accuracy": 1.0, "eval_loss": 0.0020179252605885267, "eval_runtime": 0.2179, "eval_samples_per_second": 55.076, "eval_steps_per_second": 9.179, "step": 244 }, { "epoch": 61.25, "learning_rate": 4.388888888888889e-06, "loss": 0.0006, "step": 245 }, { "epoch": 62.0, "eval_accuracy": 1.0, "eval_loss": 0.0003785987792070955, "eval_runtime": 0.2267, "eval_samples_per_second": 52.937, "eval_steps_per_second": 8.823, "step": 248 }, { "epoch": 62.5, "learning_rate": 4.25e-06, "loss": 0.0001, "step": 250 }, { "epoch": 63.0, "eval_accuracy": 1.0, "eval_loss": 0.00021861989807803184, "eval_runtime": 0.2108, "eval_samples_per_second": 56.934, "eval_steps_per_second": 9.489, "step": 252 }, { "epoch": 63.75, "learning_rate": 4.111111111111111e-06, "loss": 0.0272, "step": 255 }, { "epoch": 64.0, "eval_accuracy": 1.0, "eval_loss": 0.0002175172121496871, "eval_runtime": 0.2182, "eval_samples_per_second": 55.001, "eval_steps_per_second": 9.167, "step": 256 }, { "epoch": 65.0, "learning_rate": 3.972222222222223e-06, "loss": 0.0016, "step": 260 }, { "epoch": 65.0, "eval_accuracy": 1.0, "eval_loss": 0.0003321866097394377, "eval_runtime": 0.1986, "eval_samples_per_second": 60.421, "eval_steps_per_second": 10.07, "step": 260 }, { "epoch": 66.0, "eval_accuracy": 1.0, "eval_loss": 0.0012783011188730597, "eval_runtime": 0.2135, "eval_samples_per_second": 56.201, "eval_steps_per_second": 9.367, "step": 264 }, { "epoch": 66.25, "learning_rate": 3.833333333333334e-06, "loss": 0.0002, "step": 265 }, { "epoch": 67.0, "eval_accuracy": 1.0, "eval_loss": 0.004446456674486399, "eval_runtime": 0.2127, "eval_samples_per_second": 56.422, "eval_steps_per_second": 9.404, "step": 268 }, { "epoch": 67.5, "learning_rate": 3.694444444444445e-06, "loss": 0.0001, "step": 270 }, { "epoch": 68.0, "eval_accuracy": 1.0, "eval_loss": 0.013481984846293926, "eval_runtime": 0.2043, "eval_samples_per_second": 58.732, "eval_steps_per_second": 9.789, "step": 272 }, { "epoch": 68.75, "learning_rate": 3.555555555555556e-06, "loss": 0.0002, "step": 275 }, { "epoch": 69.0, "eval_accuracy": 1.0, "eval_loss": 0.012796193361282349, "eval_runtime": 0.2261, "eval_samples_per_second": 53.079, "eval_steps_per_second": 8.846, "step": 276 }, { "epoch": 70.0, "learning_rate": 3.416666666666667e-06, "loss": 0.0002, "step": 280 }, { "epoch": 70.0, "eval_accuracy": 1.0, "eval_loss": 0.01079349685460329, "eval_runtime": 0.1991, "eval_samples_per_second": 60.277, "eval_steps_per_second": 10.046, "step": 280 }, { "epoch": 71.0, "eval_accuracy": 1.0, "eval_loss": 0.008993417024612427, "eval_runtime": 0.2933, "eval_samples_per_second": 40.92, "eval_steps_per_second": 6.82, "step": 284 }, { "epoch": 71.25, "learning_rate": 3.277777777777778e-06, "loss": 0.0001, "step": 285 }, { "epoch": 72.0, "eval_accuracy": 1.0, "eval_loss": 0.006850639823824167, "eval_runtime": 0.2074, "eval_samples_per_second": 57.85, "eval_steps_per_second": 9.642, "step": 288 }, { "epoch": 72.5, "learning_rate": 3.138888888888889e-06, "loss": 0.0001, "step": 290 }, { "epoch": 73.0, "eval_accuracy": 1.0, "eval_loss": 0.0049311816692352295, "eval_runtime": 0.2813, "eval_samples_per_second": 42.663, "eval_steps_per_second": 7.111, "step": 292 }, { "epoch": 73.75, "learning_rate": 3e-06, "loss": 0.0002, "step": 295 }, { "epoch": 74.0, "eval_accuracy": 1.0, "eval_loss": 0.0031823813915252686, "eval_runtime": 0.2076, "eval_samples_per_second": 57.79, "eval_steps_per_second": 9.632, "step": 296 }, { "epoch": 75.0, "learning_rate": 2.861111111111111e-06, "loss": 0.0, "step": 300 }, { "epoch": 75.0, "eval_accuracy": 1.0, "eval_loss": 0.0018174747237935662, "eval_runtime": 0.1974, "eval_samples_per_second": 60.778, "eval_steps_per_second": 10.13, "step": 300 }, { "epoch": 76.0, "eval_accuracy": 1.0, "eval_loss": 0.0033631324768066406, "eval_runtime": 0.2244, "eval_samples_per_second": 53.472, "eval_steps_per_second": 8.912, "step": 304 }, { "epoch": 76.25, "learning_rate": 2.7222222222222224e-06, "loss": 0.0003, "step": 305 }, { "epoch": 77.0, "eval_accuracy": 1.0, "eval_loss": 0.005034655332565308, "eval_runtime": 0.2054, "eval_samples_per_second": 58.436, "eval_steps_per_second": 9.739, "step": 308 }, { "epoch": 77.5, "learning_rate": 2.5833333333333337e-06, "loss": 0.0001, "step": 310 }, { "epoch": 78.0, "eval_accuracy": 1.0, "eval_loss": 0.0060021779499948025, "eval_runtime": 0.2165, "eval_samples_per_second": 55.438, "eval_steps_per_second": 9.24, "step": 312 }, { "epoch": 78.75, "learning_rate": 2.4444444444444447e-06, "loss": 0.0, "step": 315 }, { "epoch": 79.0, "eval_accuracy": 1.0, "eval_loss": 0.006388425827026367, "eval_runtime": 0.2076, "eval_samples_per_second": 57.793, "eval_steps_per_second": 9.632, "step": 316 }, { "epoch": 80.0, "learning_rate": 2.305555555555556e-06, "loss": 0.0001, "step": 320 }, { "epoch": 80.0, "eval_accuracy": 1.0, "eval_loss": 0.0061742267571389675, "eval_runtime": 0.2188, "eval_samples_per_second": 54.852, "eval_steps_per_second": 9.142, "step": 320 }, { "epoch": 81.0, "eval_accuracy": 1.0, "eval_loss": 0.005842983722686768, "eval_runtime": 0.2181, "eval_samples_per_second": 55.023, "eval_steps_per_second": 9.171, "step": 324 }, { "epoch": 81.25, "learning_rate": 2.166666666666667e-06, "loss": 0.0001, "step": 325 }, { "epoch": 82.0, "eval_accuracy": 1.0, "eval_loss": 0.0059954822063446045, "eval_runtime": 0.2026, "eval_samples_per_second": 59.232, "eval_steps_per_second": 9.872, "step": 328 }, { "epoch": 82.5, "learning_rate": 2.027777777777778e-06, "loss": 0.0001, "step": 330 }, { "epoch": 83.0, "eval_accuracy": 1.0, "eval_loss": 0.006137927528470755, "eval_runtime": 0.2087, "eval_samples_per_second": 57.509, "eval_steps_per_second": 9.585, "step": 332 }, { "epoch": 83.75, "learning_rate": 1.888888888888889e-06, "loss": 0.0, "step": 335 }, { "epoch": 84.0, "eval_accuracy": 1.0, "eval_loss": 0.007910847663879395, "eval_runtime": 0.2136, "eval_samples_per_second": 56.177, "eval_steps_per_second": 9.363, "step": 336 }, { "epoch": 85.0, "learning_rate": 1.75e-06, "loss": 0.0005, "step": 340 }, { "epoch": 85.0, "eval_accuracy": 1.0, "eval_loss": 0.012745578773319721, "eval_runtime": 0.2199, "eval_samples_per_second": 54.578, "eval_steps_per_second": 9.096, "step": 340 }, { "epoch": 86.0, "eval_accuracy": 1.0, "eval_loss": 0.021893838420510292, "eval_runtime": 0.2149, "eval_samples_per_second": 55.845, "eval_steps_per_second": 9.307, "step": 344 }, { "epoch": 86.25, "learning_rate": 1.6111111111111113e-06, "loss": 0.0002, "step": 345 }, { "epoch": 87.0, "eval_accuracy": 1.0, "eval_loss": 0.031531721353530884, "eval_runtime": 0.2233, "eval_samples_per_second": 53.743, "eval_steps_per_second": 8.957, "step": 348 }, { "epoch": 87.5, "learning_rate": 1.4722222222222225e-06, "loss": 0.0001, "step": 350 }, { "epoch": 88.0, "eval_accuracy": 1.0, "eval_loss": 0.03811268135905266, "eval_runtime": 0.2096, "eval_samples_per_second": 57.255, "eval_steps_per_second": 9.542, "step": 352 }, { "epoch": 88.75, "learning_rate": 1.3333333333333334e-06, "loss": 0.0001, "step": 355 }, { "epoch": 89.0, "eval_accuracy": 1.0, "eval_loss": 0.04021235182881355, "eval_runtime": 0.2105, "eval_samples_per_second": 57.008, "eval_steps_per_second": 9.501, "step": 356 }, { "epoch": 90.0, "learning_rate": 1.1944444444444446e-06, "loss": 0.0, "step": 360 }, { "epoch": 90.0, "eval_accuracy": 1.0, "eval_loss": 0.040413498878479004, "eval_runtime": 0.2044, "eval_samples_per_second": 58.696, "eval_steps_per_second": 9.783, "step": 360 }, { "epoch": 91.0, "eval_accuracy": 1.0, "eval_loss": 0.0374186746776104, "eval_runtime": 0.316, "eval_samples_per_second": 37.973, "eval_steps_per_second": 6.329, "step": 364 }, { "epoch": 91.25, "learning_rate": 1.0555555555555557e-06, "loss": 0.0, "step": 365 }, { "epoch": 92.0, "eval_accuracy": 1.0, "eval_loss": 0.03140836954116821, "eval_runtime": 0.2142, "eval_samples_per_second": 56.034, "eval_steps_per_second": 9.339, "step": 368 }, { "epoch": 92.5, "learning_rate": 9.166666666666666e-07, "loss": 0.0003, "step": 370 }, { "epoch": 93.0, "eval_accuracy": 1.0, "eval_loss": 0.01769857667386532, "eval_runtime": 0.2018, "eval_samples_per_second": 59.478, "eval_steps_per_second": 9.913, "step": 372 }, { "epoch": 93.75, "learning_rate": 7.777777777777779e-07, "loss": 0.0001, "step": 375 }, { "epoch": 94.0, "eval_accuracy": 1.0, "eval_loss": 0.011687318794429302, "eval_runtime": 0.2027, "eval_samples_per_second": 59.211, "eval_steps_per_second": 9.869, "step": 376 }, { "epoch": 95.0, "learning_rate": 6.388888888888889e-07, "loss": 0.0001, "step": 380 }, { "epoch": 95.0, "eval_accuracy": 1.0, "eval_loss": 0.0088284807279706, "eval_runtime": 0.2065, "eval_samples_per_second": 58.106, "eval_steps_per_second": 9.684, "step": 380 }, { "epoch": 96.0, "eval_accuracy": 1.0, "eval_loss": 0.007670491933822632, "eval_runtime": 0.2024, "eval_samples_per_second": 59.303, "eval_steps_per_second": 9.884, "step": 384 }, { "epoch": 96.25, "learning_rate": 5.000000000000001e-07, "loss": 0.0, "step": 385 }, { "epoch": 97.0, "eval_accuracy": 1.0, "eval_loss": 0.00695762038230896, "eval_runtime": 0.2135, "eval_samples_per_second": 56.218, "eval_steps_per_second": 9.37, "step": 388 }, { "epoch": 97.5, "learning_rate": 3.611111111111111e-07, "loss": 0.0001, "step": 390 }, { "epoch": 98.0, "eval_accuracy": 1.0, "eval_loss": 0.0064276158809661865, "eval_runtime": 0.2059, "eval_samples_per_second": 58.292, "eval_steps_per_second": 9.715, "step": 392 }, { "epoch": 98.75, "learning_rate": 2.2222222222222224e-07, "loss": 0.0001, "step": 395 }, { "epoch": 99.0, "eval_accuracy": 1.0, "eval_loss": 0.006080567836761475, "eval_runtime": 0.2091, "eval_samples_per_second": 57.386, "eval_steps_per_second": 9.564, "step": 396 }, { "epoch": 100.0, "learning_rate": 8.333333333333334e-08, "loss": 0.0001, "step": 400 }, { "epoch": 100.0, "eval_accuracy": 1.0, "eval_loss": 0.005917658563703299, "eval_runtime": 0.2205, "eval_samples_per_second": 54.429, "eval_steps_per_second": 9.072, "step": 400 } ], "logging_steps": 5, "max_steps": 400, "num_train_epochs": 100, "save_steps": 500, "total_flos": 4.70073333768192e+17, "trial_name": null, "trial_params": null }