{ "best_metric": 1.0, "best_model_checkpoint": "bit-50-Pharyngitis\\checkpoint-36", "epoch": 95.23809523809524, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.95, "eval_accuracy": 0.7027027027027027, "eval_loss": 0.6438681483268738, "eval_runtime": 0.5883, "eval_samples_per_second": 62.898, "eval_steps_per_second": 5.1, "step": 5 }, { "epoch": 1.9, "learning_rate": 1e-05, "loss": 0.6378, "step": 10 }, { "epoch": 1.9, "eval_accuracy": 0.8918918918918919, "eval_loss": 0.4361162483692169, "eval_runtime": 0.3774, "eval_samples_per_second": 98.042, "eval_steps_per_second": 7.949, "step": 10 }, { "epoch": 2.86, "eval_accuracy": 0.972972972972973, "eval_loss": 0.2754928469657898, "eval_runtime": 0.3737, "eval_samples_per_second": 99.02, "eval_steps_per_second": 8.029, "step": 15 }, { "epoch": 3.81, "learning_rate": 2e-05, "loss": 0.4237, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.918918918918919, "eval_loss": 0.21373549103736877, "eval_runtime": 0.4065, "eval_samples_per_second": 91.026, "eval_steps_per_second": 7.381, "step": 21 }, { "epoch": 4.95, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.1247631162405014, "eval_runtime": 0.3977, "eval_samples_per_second": 93.032, "eval_steps_per_second": 7.543, "step": 26 }, { "epoch": 5.71, "learning_rate": 3e-05, "loss": 0.2592, "step": 30 }, { "epoch": 5.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.08167269825935364, "eval_runtime": 0.3971, "eval_samples_per_second": 93.166, "eval_steps_per_second": 7.554, "step": 31 }, { "epoch": 6.86, "eval_accuracy": 1.0, "eval_loss": 0.06954346597194672, "eval_runtime": 0.3842, "eval_samples_per_second": 96.305, "eval_steps_per_second": 7.809, "step": 36 }, { "epoch": 7.62, "learning_rate": 4e-05, "loss": 0.1775, "step": 40 }, { "epoch": 8.0, "eval_accuracy": 0.918918918918919, "eval_loss": 0.23191875219345093, "eval_runtime": 0.3999, "eval_samples_per_second": 92.518, "eval_steps_per_second": 7.501, "step": 42 }, { "epoch": 8.95, "eval_accuracy": 0.972972972972973, "eval_loss": 0.052055422216653824, "eval_runtime": 0.4011, "eval_samples_per_second": 92.257, "eval_steps_per_second": 7.48, "step": 47 }, { "epoch": 9.52, "learning_rate": 5e-05, "loss": 0.1805, "step": 50 }, { "epoch": 9.9, "eval_accuracy": 1.0, "eval_loss": 0.047812674194574356, "eval_runtime": 0.3975, "eval_samples_per_second": 93.076, "eval_steps_per_second": 7.547, "step": 52 }, { "epoch": 10.86, "eval_accuracy": 1.0, "eval_loss": 0.04274846613407135, "eval_runtime": 0.4052, "eval_samples_per_second": 91.313, "eval_steps_per_second": 7.404, "step": 57 }, { "epoch": 11.43, "learning_rate": 4.888888888888889e-05, "loss": 0.171, "step": 60 }, { "epoch": 12.0, "eval_accuracy": 0.8918918918918919, "eval_loss": 0.1753544956445694, "eval_runtime": 0.4139, "eval_samples_per_second": 89.385, "eval_steps_per_second": 7.247, "step": 63 }, { "epoch": 12.95, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.06655322760343552, "eval_runtime": 0.3887, "eval_samples_per_second": 95.191, "eval_steps_per_second": 7.718, "step": 68 }, { "epoch": 13.33, "learning_rate": 4.7777777777777784e-05, "loss": 0.089, "step": 70 }, { "epoch": 13.9, "eval_accuracy": 1.0, "eval_loss": 0.02494250051677227, "eval_runtime": 0.4123, "eval_samples_per_second": 89.751, "eval_steps_per_second": 7.277, "step": 73 }, { "epoch": 14.86, "eval_accuracy": 1.0, "eval_loss": 0.019040122628211975, "eval_runtime": 0.4396, "eval_samples_per_second": 84.171, "eval_steps_per_second": 6.825, "step": 78 }, { "epoch": 15.24, "learning_rate": 4.666666666666667e-05, "loss": 0.1093, "step": 80 }, { "epoch": 16.0, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.06111358851194382, "eval_runtime": 0.4317, "eval_samples_per_second": 85.699, "eval_steps_per_second": 6.949, "step": 84 }, { "epoch": 16.95, "eval_accuracy": 0.918918918918919, "eval_loss": 0.16683633625507355, "eval_runtime": 0.394, "eval_samples_per_second": 93.907, "eval_steps_per_second": 7.614, "step": 89 }, { "epoch": 17.14, "learning_rate": 4.555555555555556e-05, "loss": 0.1025, "step": 90 }, { "epoch": 17.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.05101567134261131, "eval_runtime": 0.3847, "eval_samples_per_second": 96.177, "eval_steps_per_second": 7.798, "step": 94 }, { "epoch": 18.86, "eval_accuracy": 1.0, "eval_loss": 0.010445931926369667, "eval_runtime": 0.3987, "eval_samples_per_second": 92.805, "eval_steps_per_second": 7.525, "step": 99 }, { "epoch": 19.05, "learning_rate": 4.4444444444444447e-05, "loss": 0.12, "step": 100 }, { "epoch": 20.0, "eval_accuracy": 0.8918918918918919, "eval_loss": 0.18418262898921967, "eval_runtime": 0.391, "eval_samples_per_second": 94.632, "eval_steps_per_second": 7.673, "step": 105 }, { "epoch": 20.95, "learning_rate": 4.3333333333333334e-05, "loss": 0.0996, "step": 110 }, { "epoch": 20.95, "eval_accuracy": 1.0, "eval_loss": 0.011865493841469288, "eval_runtime": 0.3899, "eval_samples_per_second": 94.901, "eval_steps_per_second": 7.695, "step": 110 }, { "epoch": 21.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.03581492602825165, "eval_runtime": 0.4047, "eval_samples_per_second": 91.415, "eval_steps_per_second": 7.412, "step": 115 }, { "epoch": 22.86, "learning_rate": 4.222222222222222e-05, "loss": 0.0933, "step": 120 }, { "epoch": 22.86, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.11507910490036011, "eval_runtime": 0.4091, "eval_samples_per_second": 90.451, "eval_steps_per_second": 7.334, "step": 120 }, { "epoch": 24.0, "eval_accuracy": 0.972972972972973, "eval_loss": 0.08254072815179825, "eval_runtime": 0.3864, "eval_samples_per_second": 95.759, "eval_steps_per_second": 7.764, "step": 126 }, { "epoch": 24.76, "learning_rate": 4.111111111111111e-05, "loss": 0.1118, "step": 130 }, { "epoch": 24.95, "eval_accuracy": 0.972972972972973, "eval_loss": 0.08668244630098343, "eval_runtime": 0.3898, "eval_samples_per_second": 94.932, "eval_steps_per_second": 7.697, "step": 131 }, { "epoch": 25.9, "eval_accuracy": 1.0, "eval_loss": 0.009897518903017044, "eval_runtime": 0.4036, "eval_samples_per_second": 91.68, "eval_steps_per_second": 7.434, "step": 136 }, { "epoch": 26.67, "learning_rate": 4e-05, "loss": 0.0471, "step": 140 }, { "epoch": 26.86, "eval_accuracy": 1.0, "eval_loss": 0.019395073875784874, "eval_runtime": 0.3954, "eval_samples_per_second": 93.584, "eval_steps_per_second": 7.588, "step": 141 }, { "epoch": 28.0, "eval_accuracy": 1.0, "eval_loss": 0.0031759734265506268, "eval_runtime": 0.3912, "eval_samples_per_second": 94.57, "eval_steps_per_second": 7.668, "step": 147 }, { "epoch": 28.57, "learning_rate": 3.888888888888889e-05, "loss": 0.0686, "step": 150 }, { "epoch": 28.95, "eval_accuracy": 1.0, "eval_loss": 0.0057038371451199055, "eval_runtime": 0.3906, "eval_samples_per_second": 94.728, "eval_steps_per_second": 7.681, "step": 152 }, { "epoch": 29.9, "eval_accuracy": 1.0, "eval_loss": 0.0014269119128584862, "eval_runtime": 0.3905, "eval_samples_per_second": 94.751, "eval_steps_per_second": 7.682, "step": 157 }, { "epoch": 30.48, "learning_rate": 3.777777777777778e-05, "loss": 0.0692, "step": 160 }, { "epoch": 30.86, "eval_accuracy": 0.918918918918919, "eval_loss": 0.34027108550071716, "eval_runtime": 0.4009, "eval_samples_per_second": 92.295, "eval_steps_per_second": 7.483, "step": 162 }, { "epoch": 32.0, "eval_accuracy": 1.0, "eval_loss": 0.011479969136416912, "eval_runtime": 0.4005, "eval_samples_per_second": 92.395, "eval_steps_per_second": 7.491, "step": 168 }, { "epoch": 32.38, "learning_rate": 3.6666666666666666e-05, "loss": 0.0912, "step": 170 }, { "epoch": 32.95, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.0989471897482872, "eval_runtime": 0.4052, "eval_samples_per_second": 91.323, "eval_steps_per_second": 7.405, "step": 173 }, { "epoch": 33.9, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.10641559958457947, "eval_runtime": 0.3889, "eval_samples_per_second": 95.131, "eval_steps_per_second": 7.713, "step": 178 }, { "epoch": 34.29, "learning_rate": 3.555555555555556e-05, "loss": 0.0994, "step": 180 }, { "epoch": 34.86, "eval_accuracy": 0.972972972972973, "eval_loss": 0.09466935694217682, "eval_runtime": 0.3912, "eval_samples_per_second": 94.569, "eval_steps_per_second": 7.668, "step": 183 }, { "epoch": 36.0, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.10136424005031586, "eval_runtime": 0.4255, "eval_samples_per_second": 86.966, "eval_steps_per_second": 7.051, "step": 189 }, { "epoch": 36.19, "learning_rate": 3.444444444444445e-05, "loss": 0.0561, "step": 190 }, { "epoch": 36.95, "eval_accuracy": 0.972972972972973, "eval_loss": 0.0519096776843071, "eval_runtime": 0.4107, "eval_samples_per_second": 90.096, "eval_steps_per_second": 7.305, "step": 194 }, { "epoch": 37.9, "eval_accuracy": 0.918918918918919, "eval_loss": 0.16669504344463348, "eval_runtime": 0.4065, "eval_samples_per_second": 91.027, "eval_steps_per_second": 7.381, "step": 199 }, { "epoch": 38.1, "learning_rate": 3.3333333333333335e-05, "loss": 0.0516, "step": 200 }, { "epoch": 38.86, "eval_accuracy": 0.918918918918919, "eval_loss": 0.3604719936847687, "eval_runtime": 0.3971, "eval_samples_per_second": 93.183, "eval_steps_per_second": 7.555, "step": 204 }, { "epoch": 40.0, "learning_rate": 3.222222222222223e-05, "loss": 0.0535, "step": 210 }, { "epoch": 40.0, "eval_accuracy": 0.972972972972973, "eval_loss": 0.044684119522571564, "eval_runtime": 0.3775, "eval_samples_per_second": 98.002, "eval_steps_per_second": 7.946, "step": 210 }, { "epoch": 40.95, "eval_accuracy": 0.918918918918919, "eval_loss": 0.1711174100637436, "eval_runtime": 0.3876, "eval_samples_per_second": 95.468, "eval_steps_per_second": 7.741, "step": 215 }, { "epoch": 41.9, "learning_rate": 3.111111111111111e-05, "loss": 0.0475, "step": 220 }, { "epoch": 41.9, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.10387804359197617, "eval_runtime": 0.3953, "eval_samples_per_second": 93.597, "eval_steps_per_second": 7.589, "step": 220 }, { "epoch": 42.86, "eval_accuracy": 1.0, "eval_loss": 0.029212407767772675, "eval_runtime": 0.3915, "eval_samples_per_second": 94.5, "eval_steps_per_second": 7.662, "step": 225 }, { "epoch": 43.81, "learning_rate": 3e-05, "loss": 0.0504, "step": 230 }, { "epoch": 44.0, "eval_accuracy": 1.0, "eval_loss": 0.014002898707985878, "eval_runtime": 0.3889, "eval_samples_per_second": 95.145, "eval_steps_per_second": 7.714, "step": 231 }, { "epoch": 44.95, "eval_accuracy": 0.8918918918918919, "eval_loss": 0.27451202273368835, "eval_runtime": 0.4255, "eval_samples_per_second": 86.962, "eval_steps_per_second": 7.051, "step": 236 }, { "epoch": 45.71, "learning_rate": 2.8888888888888888e-05, "loss": 0.0432, "step": 240 }, { "epoch": 45.9, "eval_accuracy": 1.0, "eval_loss": 0.002892308635637164, "eval_runtime": 0.3957, "eval_samples_per_second": 93.507, "eval_steps_per_second": 7.582, "step": 241 }, { "epoch": 46.86, "eval_accuracy": 0.8648648648648649, "eval_loss": 0.4316161274909973, "eval_runtime": 0.4242, "eval_samples_per_second": 87.218, "eval_steps_per_second": 7.072, "step": 246 }, { "epoch": 47.62, "learning_rate": 2.777777777777778e-05, "loss": 0.0992, "step": 250 }, { "epoch": 48.0, "eval_accuracy": 1.0, "eval_loss": 0.015067849308252335, "eval_runtime": 0.3902, "eval_samples_per_second": 94.825, "eval_steps_per_second": 7.688, "step": 252 }, { "epoch": 48.95, "eval_accuracy": 1.0, "eval_loss": 0.00749516487121582, "eval_runtime": 0.3915, "eval_samples_per_second": 94.506, "eval_steps_per_second": 7.663, "step": 257 }, { "epoch": 49.52, "learning_rate": 2.6666666666666667e-05, "loss": 0.1531, "step": 260 }, { "epoch": 49.9, "eval_accuracy": 0.8648648648648649, "eval_loss": 0.3624305725097656, "eval_runtime": 0.4028, "eval_samples_per_second": 91.854, "eval_steps_per_second": 7.448, "step": 262 }, { "epoch": 50.86, "eval_accuracy": 0.972972972972973, "eval_loss": 0.036536574363708496, "eval_runtime": 0.3932, "eval_samples_per_second": 94.091, "eval_steps_per_second": 7.629, "step": 267 }, { "epoch": 51.43, "learning_rate": 2.5555555555555554e-05, "loss": 0.0622, "step": 270 }, { "epoch": 52.0, "eval_accuracy": 0.972972972972973, "eval_loss": 0.20016101002693176, "eval_runtime": 0.4002, "eval_samples_per_second": 92.444, "eval_steps_per_second": 7.495, "step": 273 }, { "epoch": 52.95, "eval_accuracy": 0.972972972972973, "eval_loss": 0.12544459104537964, "eval_runtime": 0.4119, "eval_samples_per_second": 89.837, "eval_steps_per_second": 7.284, "step": 278 }, { "epoch": 53.33, "learning_rate": 2.4444444444444445e-05, "loss": 0.0432, "step": 280 }, { "epoch": 53.9, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.0909779816865921, "eval_runtime": 0.3955, "eval_samples_per_second": 93.545, "eval_steps_per_second": 7.585, "step": 283 }, { "epoch": 54.86, "eval_accuracy": 0.8918918918918919, "eval_loss": 0.3727685809135437, "eval_runtime": 0.3993, "eval_samples_per_second": 92.669, "eval_steps_per_second": 7.514, "step": 288 }, { "epoch": 55.24, "learning_rate": 2.3333333333333336e-05, "loss": 0.0531, "step": 290 }, { "epoch": 56.0, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.10307420045137405, "eval_runtime": 0.3958, "eval_samples_per_second": 93.478, "eval_steps_per_second": 7.579, "step": 294 }, { "epoch": 56.95, "eval_accuracy": 1.0, "eval_loss": 0.002981973346322775, "eval_runtime": 0.3986, "eval_samples_per_second": 92.827, "eval_steps_per_second": 7.526, "step": 299 }, { "epoch": 57.14, "learning_rate": 2.2222222222222223e-05, "loss": 0.0731, "step": 300 }, { "epoch": 57.9, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.2001199871301651, "eval_runtime": 0.4153, "eval_samples_per_second": 89.082, "eval_steps_per_second": 7.223, "step": 304 }, { "epoch": 58.86, "eval_accuracy": 0.918918918918919, "eval_loss": 0.2389511615037918, "eval_runtime": 0.3911, "eval_samples_per_second": 94.615, "eval_steps_per_second": 7.672, "step": 309 }, { "epoch": 59.05, "learning_rate": 2.111111111111111e-05, "loss": 0.0529, "step": 310 }, { "epoch": 60.0, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.13383528590202332, "eval_runtime": 0.3898, "eval_samples_per_second": 94.919, "eval_steps_per_second": 7.696, "step": 315 }, { "epoch": 60.95, "learning_rate": 2e-05, "loss": 0.0203, "step": 320 }, { "epoch": 60.95, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.22631436586380005, "eval_runtime": 0.3946, "eval_samples_per_second": 93.757, "eval_steps_per_second": 7.602, "step": 320 }, { "epoch": 61.9, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.16640439629554749, "eval_runtime": 0.3813, "eval_samples_per_second": 97.045, "eval_steps_per_second": 7.868, "step": 325 }, { "epoch": 62.86, "learning_rate": 1.888888888888889e-05, "loss": 0.0345, "step": 330 }, { "epoch": 62.86, "eval_accuracy": 1.0, "eval_loss": 0.006216500885784626, "eval_runtime": 0.3909, "eval_samples_per_second": 94.661, "eval_steps_per_second": 7.675, "step": 330 }, { "epoch": 64.0, "eval_accuracy": 1.0, "eval_loss": 0.0056242975406348705, "eval_runtime": 0.4048, "eval_samples_per_second": 91.408, "eval_steps_per_second": 7.411, "step": 336 }, { "epoch": 64.76, "learning_rate": 1.777777777777778e-05, "loss": 0.0595, "step": 340 }, { "epoch": 64.95, "eval_accuracy": 1.0, "eval_loss": 0.022064007818698883, "eval_runtime": 0.3988, "eval_samples_per_second": 92.779, "eval_steps_per_second": 7.523, "step": 341 }, { "epoch": 65.9, "eval_accuracy": 1.0, "eval_loss": 0.01646520011126995, "eval_runtime": 0.4012, "eval_samples_per_second": 92.225, "eval_steps_per_second": 7.478, "step": 346 }, { "epoch": 66.67, "learning_rate": 1.6666666666666667e-05, "loss": 0.0278, "step": 350 }, { "epoch": 66.86, "eval_accuracy": 1.0, "eval_loss": 0.01749444752931595, "eval_runtime": 0.404, "eval_samples_per_second": 91.579, "eval_steps_per_second": 7.425, "step": 351 }, { "epoch": 68.0, "eval_accuracy": 1.0, "eval_loss": 0.017633311450481415, "eval_runtime": 0.3933, "eval_samples_per_second": 94.074, "eval_steps_per_second": 7.628, "step": 357 }, { "epoch": 68.57, "learning_rate": 1.5555555555555555e-05, "loss": 0.1035, "step": 360 }, { "epoch": 68.95, "eval_accuracy": 0.918918918918919, "eval_loss": 0.15788349509239197, "eval_runtime": 0.3978, "eval_samples_per_second": 93.022, "eval_steps_per_second": 7.542, "step": 362 }, { "epoch": 69.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.06550092250108719, "eval_runtime": 0.3834, "eval_samples_per_second": 96.515, "eval_steps_per_second": 7.826, "step": 367 }, { "epoch": 70.48, "learning_rate": 1.4444444444444444e-05, "loss": 0.0466, "step": 370 }, { "epoch": 70.86, "eval_accuracy": 1.0, "eval_loss": 0.00981216412037611, "eval_runtime": 0.4023, "eval_samples_per_second": 91.96, "eval_steps_per_second": 7.456, "step": 372 }, { "epoch": 72.0, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.0761876329779625, "eval_runtime": 0.3968, "eval_samples_per_second": 93.24, "eval_steps_per_second": 7.56, "step": 378 }, { "epoch": 72.38, "learning_rate": 1.3333333333333333e-05, "loss": 0.0719, "step": 380 }, { "epoch": 72.95, "eval_accuracy": 0.9459459459459459, "eval_loss": 0.26816752552986145, "eval_runtime": 0.4037, "eval_samples_per_second": 91.659, "eval_steps_per_second": 7.432, "step": 383 }, { "epoch": 73.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.03098950907588005, "eval_runtime": 0.3999, "eval_samples_per_second": 92.524, "eval_steps_per_second": 7.502, "step": 388 }, { "epoch": 74.29, "learning_rate": 1.2222222222222222e-05, "loss": 0.0144, "step": 390 }, { "epoch": 74.86, "eval_accuracy": 1.0, "eval_loss": 0.0038737300783395767, "eval_runtime": 0.4143, "eval_samples_per_second": 89.311, "eval_steps_per_second": 7.241, "step": 393 }, { "epoch": 76.0, "eval_accuracy": 0.972972972972973, "eval_loss": 0.055370986461639404, "eval_runtime": 0.387, "eval_samples_per_second": 95.603, "eval_steps_per_second": 7.752, "step": 399 }, { "epoch": 76.19, "learning_rate": 1.1111111111111112e-05, "loss": 0.0613, "step": 400 }, { "epoch": 76.95, "eval_accuracy": 0.972972972972973, "eval_loss": 0.05363788455724716, "eval_runtime": 0.3953, "eval_samples_per_second": 93.59, "eval_steps_per_second": 7.588, "step": 404 }, { "epoch": 77.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.03543579578399658, "eval_runtime": 0.4021, "eval_samples_per_second": 92.01, "eval_steps_per_second": 7.46, "step": 409 }, { "epoch": 78.1, "learning_rate": 1e-05, "loss": 0.0307, "step": 410 }, { "epoch": 78.86, "eval_accuracy": 0.972972972972973, "eval_loss": 0.02702740766108036, "eval_runtime": 0.4203, "eval_samples_per_second": 88.032, "eval_steps_per_second": 7.138, "step": 414 }, { "epoch": 80.0, "learning_rate": 8.88888888888889e-06, "loss": 0.0253, "step": 420 }, { "epoch": 80.0, "eval_accuracy": 1.0, "eval_loss": 0.005909389816224575, "eval_runtime": 0.3856, "eval_samples_per_second": 95.942, "eval_steps_per_second": 7.779, "step": 420 }, { "epoch": 80.95, "eval_accuracy": 1.0, "eval_loss": 0.01845603436231613, "eval_runtime": 0.3966, "eval_samples_per_second": 93.298, "eval_steps_per_second": 7.565, "step": 425 }, { "epoch": 81.9, "learning_rate": 7.777777777777777e-06, "loss": 0.0311, "step": 430 }, { "epoch": 81.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.02441835217177868, "eval_runtime": 0.4215, "eval_samples_per_second": 87.778, "eval_steps_per_second": 7.117, "step": 430 }, { "epoch": 82.86, "eval_accuracy": 0.972972972972973, "eval_loss": 0.030215006321668625, "eval_runtime": 0.4009, "eval_samples_per_second": 92.283, "eval_steps_per_second": 7.482, "step": 435 }, { "epoch": 83.81, "learning_rate": 6.666666666666667e-06, "loss": 0.0189, "step": 440 }, { "epoch": 84.0, "eval_accuracy": 1.0, "eval_loss": 0.00978150311857462, "eval_runtime": 0.4039, "eval_samples_per_second": 91.607, "eval_steps_per_second": 7.428, "step": 441 }, { "epoch": 84.95, "eval_accuracy": 1.0, "eval_loss": 0.004685988184064627, "eval_runtime": 0.3893, "eval_samples_per_second": 95.039, "eval_steps_per_second": 7.706, "step": 446 }, { "epoch": 85.71, "learning_rate": 5.555555555555556e-06, "loss": 0.0235, "step": 450 }, { "epoch": 85.9, "eval_accuracy": 1.0, "eval_loss": 0.010029388591647148, "eval_runtime": 0.4017, "eval_samples_per_second": 92.114, "eval_steps_per_second": 7.469, "step": 451 }, { "epoch": 86.86, "eval_accuracy": 1.0, "eval_loss": 0.01385235320776701, "eval_runtime": 0.409, "eval_samples_per_second": 90.473, "eval_steps_per_second": 7.336, "step": 456 }, { "epoch": 87.62, "learning_rate": 4.444444444444445e-06, "loss": 0.014, "step": 460 }, { "epoch": 88.0, "eval_accuracy": 1.0, "eval_loss": 0.013605994172394276, "eval_runtime": 0.4002, "eval_samples_per_second": 92.454, "eval_steps_per_second": 7.496, "step": 462 }, { "epoch": 88.95, "eval_accuracy": 0.972972972972973, "eval_loss": 0.030411923304200172, "eval_runtime": 0.3929, "eval_samples_per_second": 94.163, "eval_steps_per_second": 7.635, "step": 467 }, { "epoch": 89.52, "learning_rate": 3.3333333333333333e-06, "loss": 0.0197, "step": 470 }, { "epoch": 89.9, "eval_accuracy": 0.972972972972973, "eval_loss": 0.04400445148348808, "eval_runtime": 0.4094, "eval_samples_per_second": 90.379, "eval_steps_per_second": 7.328, "step": 472 }, { "epoch": 90.86, "eval_accuracy": 0.972972972972973, "eval_loss": 0.059370577335357666, "eval_runtime": 0.4037, "eval_samples_per_second": 91.649, "eval_steps_per_second": 7.431, "step": 477 }, { "epoch": 91.43, "learning_rate": 2.2222222222222225e-06, "loss": 0.0309, "step": 480 }, { "epoch": 92.0, "eval_accuracy": 0.972972972972973, "eval_loss": 0.048442043364048004, "eval_runtime": 0.3924, "eval_samples_per_second": 94.293, "eval_steps_per_second": 7.645, "step": 483 }, { "epoch": 92.95, "eval_accuracy": 0.972972972972973, "eval_loss": 0.031032495200634003, "eval_runtime": 0.3937, "eval_samples_per_second": 93.991, "eval_steps_per_second": 7.621, "step": 488 }, { "epoch": 93.33, "learning_rate": 1.1111111111111112e-06, "loss": 0.0197, "step": 490 }, { "epoch": 93.9, "eval_accuracy": 1.0, "eval_loss": 0.019568899646401405, "eval_runtime": 0.4189, "eval_samples_per_second": 88.319, "eval_steps_per_second": 7.161, "step": 493 }, { "epoch": 94.86, "eval_accuracy": 1.0, "eval_loss": 0.014562019146978855, "eval_runtime": 0.3925, "eval_samples_per_second": 94.275, "eval_steps_per_second": 7.644, "step": 498 }, { "epoch": 95.24, "learning_rate": 0.0, "loss": 0.0106, "step": 500 }, { "epoch": 95.24, "eval_accuracy": 1.0, "eval_loss": 0.014045949093997478, "eval_runtime": 0.394, "eval_samples_per_second": 93.898, "eval_steps_per_second": 7.613, "step": 500 } ], "max_steps": 500, "num_train_epochs": 100, "total_flos": 2.628508796024832e+18, "trial_name": null, "trial_params": null }