diff --git "a/checkpoint-3131/trainer_state.json" "b/checkpoint-3131/trainer_state.json" --- "a/checkpoint-3131/trainer_state.json" +++ "b/checkpoint-3131/trainer_state.json" @@ -11,1605 +11,1605 @@ { "epoch": 0.0, "learning_rate": 3.0000000000000004e-07, - "loss": 3.6419, + "loss": 6.0535, "step": 1 }, { "epoch": 0.0, - "eval_loss": 3.714031457901001, - "eval_runtime": 28.1012, - "eval_samples_per_second": 187.643, - "eval_steps_per_second": 5.872, + "eval_loss": 6.236783504486084, + "eval_runtime": 28.6757, + "eval_samples_per_second": 183.884, + "eval_steps_per_second": 11.508, "step": 1 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, - "loss": 3.8158, + "loss": 6.1527, "step": 2 }, { "epoch": 0.0, "learning_rate": 9e-07, - "loss": 3.6399, + "loss": 6.1004, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-06, - "loss": 3.4312, + "loss": 5.6802, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.5e-06, - "loss": 3.6597, + "loss": 6.105, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.8e-06, - "loss": 3.5666, + "loss": 5.8833, "step": 6 }, { "epoch": 0.0, "learning_rate": 2.1000000000000002e-06, - "loss": 3.7699, + "loss": 6.0386, "step": 7 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-06, - "loss": 3.5036, + "loss": 5.6186, "step": 8 }, { "epoch": 0.0, "learning_rate": 2.7e-06, - "loss": 3.4987, + "loss": 5.6635, "step": 9 }, { "epoch": 0.0, "learning_rate": 3e-06, - "loss": 3.5354, + "loss": 5.5451, "step": 10 }, { "epoch": 0.0, "learning_rate": 3.3e-06, - "loss": 3.6143, + "loss": 5.8442, "step": 11 }, { "epoch": 0.0, "learning_rate": 3.6e-06, - "loss": 3.3061, + "loss": 5.2233, "step": 12 }, { "epoch": 0.0, "learning_rate": 3.9e-06, - "loss": 3.2166, + "loss": 5.2118, "step": 13 }, { "epoch": 0.0, "learning_rate": 4.2000000000000004e-06, - "loss": 3.0829, + "loss": 5.0138, "step": 14 }, { "epoch": 0.0, "learning_rate": 4.5e-06, - "loss": 3.2437, + "loss": 4.8483, "step": 15 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-06, - "loss": 3.0641, + "loss": 4.5374, "step": 16 }, { "epoch": 0.01, "learning_rate": 5.1e-06, - "loss": 2.8823, + "loss": 4.4414, "step": 17 }, { "epoch": 0.01, "learning_rate": 5.4e-06, - "loss": 2.6965, + "loss": 4.0476, "step": 18 }, { "epoch": 0.01, "learning_rate": 5.7000000000000005e-06, - "loss": 2.8709, + "loss": 4.1573, "step": 19 }, { "epoch": 0.01, "learning_rate": 6e-06, - "loss": 2.7647, + "loss": 3.9995, "step": 20 }, { "epoch": 0.01, "learning_rate": 6.3e-06, - "loss": 2.6207, + "loss": 3.6553, "step": 21 }, { "epoch": 0.01, "learning_rate": 6.6e-06, - "loss": 2.3536, + "loss": 3.4045, "step": 22 }, { "epoch": 0.01, "learning_rate": 6.900000000000001e-06, - "loss": 2.3904, + "loss": 3.3223, "step": 23 }, { "epoch": 0.01, "learning_rate": 7.2e-06, - "loss": 2.194, + "loss": 3.0992, "step": 24 }, { "epoch": 0.01, "learning_rate": 7.5e-06, - "loss": 2.0802, + "loss": 2.9959, "step": 25 }, { "epoch": 0.01, "learning_rate": 7.8e-06, - "loss": 1.8686, + "loss": 2.8946, "step": 26 }, { "epoch": 0.01, "learning_rate": 8.1e-06, - "loss": 1.2353, + "loss": 2.9426, "step": 27 }, { "epoch": 0.01, "learning_rate": 8.400000000000001e-06, - "loss": 0.8353, + "loss": 2.5529, "step": 28 }, { "epoch": 0.01, "learning_rate": 8.7e-06, - "loss": 0.6401, + "loss": 2.2701, "step": 29 }, { "epoch": 0.01, "learning_rate": 9e-06, - "loss": 0.7808, + "loss": 2.0024, "step": 30 }, { "epoch": 0.01, "learning_rate": 9.3e-06, - "loss": 0.7208, + "loss": 1.7888, "step": 31 }, { "epoch": 0.01, "learning_rate": 9.600000000000001e-06, - "loss": 0.556, + "loss": 1.1965, "step": 32 }, { "epoch": 0.01, "learning_rate": 9.9e-06, - "loss": 0.5941, + "loss": 1.1853, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.02e-05, - "loss": 0.5093, + "loss": 0.9086, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.05e-05, - "loss": 0.5861, + "loss": 0.7273, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.08e-05, - "loss": 0.4627, + "loss": 0.6449, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.11e-05, - "loss": 0.4513, + "loss": 0.5931, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.1400000000000001e-05, - "loss": 0.3879, + "loss": 0.4779, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.1700000000000001e-05, - "loss": 0.3766, + "loss": 0.4716, "step": 39 }, { "epoch": 0.01, "learning_rate": 1.2e-05, - "loss": 0.3213, + "loss": 0.4238, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.2299999999999999e-05, - "loss": 0.3489, + "loss": 0.4042, "step": 41 }, { "epoch": 0.01, "learning_rate": 1.26e-05, - "loss": 0.2836, + "loss": 0.3709, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.29e-05, - "loss": 0.3482, + "loss": 0.4412, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.32e-05, - "loss": 0.2433, + "loss": 0.2741, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.3500000000000001e-05, - "loss": 0.2226, + "loss": 0.2435, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.3800000000000002e-05, - "loss": 0.2462, + "loss": 0.2673, "step": 46 }, { "epoch": 0.02, "learning_rate": 1.4099999999999999e-05, - "loss": 0.2534, + "loss": 0.2521, "step": 47 }, { "epoch": 0.02, "learning_rate": 1.44e-05, - "loss": 0.2298, + "loss": 0.2228, "step": 48 }, { "epoch": 0.02, "learning_rate": 1.47e-05, - "loss": 0.215, + "loss": 0.2301, "step": 49 }, { "epoch": 0.02, "learning_rate": 1.5e-05, - "loss": 0.2234, + "loss": 0.2527, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.53e-05, - "loss": 0.1899, + "loss": 0.2003, "step": 51 }, { "epoch": 0.02, "learning_rate": 1.56e-05, - "loss": 0.1684, + "loss": 0.2002, "step": 52 }, { "epoch": 0.02, "learning_rate": 1.59e-05, - "loss": 0.138, + "loss": 0.1483, "step": 53 }, { "epoch": 0.02, "learning_rate": 1.62e-05, - "loss": 0.1327, + "loss": 0.1477, "step": 54 }, { "epoch": 0.02, "learning_rate": 1.65e-05, - "loss": 0.1816, + "loss": 0.203, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.6800000000000002e-05, - "loss": 0.1412, + "loss": 0.1541, "step": 56 }, { "epoch": 0.02, "learning_rate": 1.71e-05, - "loss": 0.18, + "loss": 0.2026, "step": 57 }, { "epoch": 0.02, "learning_rate": 1.74e-05, - "loss": 0.1254, + "loss": 0.1226, "step": 58 }, { "epoch": 0.02, "learning_rate": 1.77e-05, - "loss": 0.1552, + "loss": 0.2942, "step": 59 }, { "epoch": 0.02, "learning_rate": 1.8e-05, - "loss": 0.1943, + "loss": 0.1515, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.83e-05, - "loss": 0.1746, + "loss": 0.1453, "step": 61 }, { "epoch": 0.02, "learning_rate": 1.86e-05, - "loss": 0.1195, + "loss": 0.1109, "step": 62 }, { "epoch": 0.02, "learning_rate": 1.8900000000000002e-05, - "loss": 0.1555, + "loss": 0.1693, "step": 63 }, { "epoch": 0.02, "learning_rate": 1.9200000000000003e-05, - "loss": 0.1173, + "loss": 0.1435, "step": 64 }, { "epoch": 0.02, "learning_rate": 1.95e-05, - "loss": 0.0829, + "loss": 0.1054, "step": 65 }, { "epoch": 0.02, "learning_rate": 1.98e-05, - "loss": 0.1057, + "loss": 0.1141, "step": 66 }, { "epoch": 0.02, "learning_rate": 2.01e-05, - "loss": 0.1058, + "loss": 0.1126, "step": 67 }, { "epoch": 0.02, "learning_rate": 2.04e-05, - "loss": 0.1056, + "loss": 0.0971, "step": 68 }, { "epoch": 0.02, "learning_rate": 2.07e-05, - "loss": 0.0856, + "loss": 0.0835, "step": 69 }, { "epoch": 0.02, "learning_rate": 2.1e-05, - "loss": 0.1165, + "loss": 0.0912, "step": 70 }, { "epoch": 0.02, "learning_rate": 2.13e-05, - "loss": 0.0946, + "loss": 0.0894, "step": 71 }, { "epoch": 0.02, "learning_rate": 2.16e-05, - "loss": 0.0911, + "loss": 0.0879, "step": 72 }, { "epoch": 0.02, "learning_rate": 2.19e-05, - "loss": 0.0712, + "loss": 0.0721, "step": 73 }, { "epoch": 0.02, "learning_rate": 2.22e-05, - "loss": 0.0583, + "loss": 0.078, "step": 74 }, { "epoch": 0.02, "learning_rate": 2.25e-05, - "loss": 0.0931, + "loss": 0.0975, "step": 75 }, { "epoch": 0.02, "learning_rate": 2.2800000000000002e-05, - "loss": 0.0844, + "loss": 0.0823, "step": 76 }, { "epoch": 0.02, "learning_rate": 2.3100000000000002e-05, - "loss": 0.13, + "loss": 0.1045, "step": 77 }, { "epoch": 0.02, "learning_rate": 2.3400000000000003e-05, - "loss": 0.09, + "loss": 0.0814, "step": 78 }, { "epoch": 0.03, "learning_rate": 2.37e-05, - "loss": 0.1056, + "loss": 0.0841, "step": 79 }, { "epoch": 0.03, "learning_rate": 2.4e-05, - "loss": 0.0711, + "loss": 0.0564, "step": 80 }, { "epoch": 0.03, "learning_rate": 2.43e-05, - "loss": 0.0853, + "loss": 0.0791, "step": 81 }, { "epoch": 0.03, "learning_rate": 2.4599999999999998e-05, - "loss": 0.1072, + "loss": 0.1116, "step": 82 }, { "epoch": 0.03, "learning_rate": 2.49e-05, - "loss": 0.0877, + "loss": 0.0904, "step": 83 }, { "epoch": 0.03, "learning_rate": 2.52e-05, - "loss": 0.0563, + "loss": 0.0594, "step": 84 }, { "epoch": 0.03, "learning_rate": 2.55e-05, - "loss": 0.0686, + "loss": 0.0668, "step": 85 }, { "epoch": 0.03, "learning_rate": 2.58e-05, - "loss": 0.0587, + "loss": 0.0762, "step": 86 }, { "epoch": 0.03, "learning_rate": 2.61e-05, - "loss": 0.0599, + "loss": 0.0609, "step": 87 }, { "epoch": 0.03, "learning_rate": 2.64e-05, - "loss": 0.0516, + "loss": 0.0735, "step": 88 }, { "epoch": 0.03, "learning_rate": 2.6700000000000002e-05, - "loss": 0.0403, + "loss": 0.0629, "step": 89 }, { "epoch": 0.03, "learning_rate": 2.7000000000000002e-05, - "loss": 0.054, + "loss": 0.0607, "step": 90 }, { "epoch": 0.03, "learning_rate": 2.7300000000000003e-05, - "loss": 0.0513, + "loss": 0.0571, "step": 91 }, { "epoch": 0.03, "learning_rate": 2.7600000000000003e-05, - "loss": 0.0581, + "loss": 0.0704, "step": 92 }, { "epoch": 0.03, "learning_rate": 2.79e-05, - "loss": 0.0563, + "loss": 0.0817, "step": 93 }, { "epoch": 0.03, "learning_rate": 2.8199999999999998e-05, - "loss": 0.0392, + "loss": 0.0452, "step": 94 }, { "epoch": 0.03, "learning_rate": 2.8499999999999998e-05, - "loss": 0.06, + "loss": 0.0467, "step": 95 }, { "epoch": 0.03, "learning_rate": 2.88e-05, - "loss": 0.065, + "loss": 0.0532, "step": 96 }, { "epoch": 0.03, "learning_rate": 2.91e-05, - "loss": 0.0489, + "loss": 0.0515, "step": 97 }, { "epoch": 0.03, "learning_rate": 2.94e-05, - "loss": 0.0729, + "loss": 0.0583, "step": 98 }, { "epoch": 0.03, "learning_rate": 2.97e-05, - "loss": 0.0617, + "loss": 0.0603, "step": 99 }, { "epoch": 0.03, "learning_rate": 3e-05, - "loss": 0.0513, + "loss": 0.0545, "step": 100 }, { "epoch": 0.03, "learning_rate": 2.9999991942708107e-05, - "loss": 0.0567, + "loss": 0.0683, "step": 101 }, { "epoch": 0.03, "learning_rate": 2.9999967770841087e-05, - "loss": 0.0494, + "loss": 0.0468, "step": 102 }, { "epoch": 0.03, "learning_rate": 2.9999927484424903e-05, - "loss": 0.0631, + "loss": 0.0847, "step": 103 }, { "epoch": 0.03, "learning_rate": 2.9999871083502844e-05, - "loss": 0.0511, + "loss": 0.0513, "step": 104 }, { "epoch": 0.03, "learning_rate": 2.999979856813549e-05, - "loss": 0.0804, + "loss": 0.0589, "step": 105 }, { "epoch": 0.03, "learning_rate": 2.9999709938400753e-05, - "loss": 0.0539, + "loss": 0.0577, "step": 106 }, { "epoch": 0.03, "learning_rate": 2.9999605194393848e-05, - "loss": 0.0434, + "loss": 0.0405, "step": 107 }, { "epoch": 0.03, "learning_rate": 2.999948433622729e-05, - "loss": 0.0561, + "loss": 0.0559, "step": 108 }, { "epoch": 0.03, "learning_rate": 2.999934736403094e-05, - "loss": 0.0588, + "loss": 0.0502, "step": 109 }, { "epoch": 0.04, "learning_rate": 2.9999194277951926e-05, - "loss": 0.0858, + "loss": 0.0711, "step": 110 }, { "epoch": 0.04, "learning_rate": 2.9999025078154727e-05, - "loss": 0.0351, + "loss": 0.0359, "step": 111 }, { "epoch": 0.04, "learning_rate": 2.9998839764821103e-05, - "loss": 0.044, + "loss": 0.0572, "step": 112 }, { "epoch": 0.04, "learning_rate": 2.999863833815014e-05, - "loss": 0.0394, + "loss": 0.0377, "step": 113 }, { "epoch": 0.04, "learning_rate": 2.9998420798358235e-05, - "loss": 0.0569, + "loss": 0.0546, "step": 114 }, { "epoch": 0.04, "learning_rate": 2.999818714567909e-05, - "loss": 0.0497, + "loss": 0.0439, "step": 115 }, { "epoch": 0.04, "learning_rate": 2.999793738036372e-05, - "loss": 0.0935, + "loss": 0.0686, "step": 116 }, { "epoch": 0.04, "learning_rate": 2.9997671502680447e-05, - "loss": 0.0571, + "loss": 0.0653, "step": 117 }, { "epoch": 0.04, "learning_rate": 2.999738951291491e-05, - "loss": 0.0513, + "loss": 0.0451, "step": 118 }, { "epoch": 0.04, "learning_rate": 2.9997091411370048e-05, - "loss": 0.0547, + "loss": 0.0441, "step": 119 }, { "epoch": 0.04, "learning_rate": 2.9996777198366115e-05, - "loss": 0.0661, + "loss": 0.073, "step": 120 }, { "epoch": 0.04, "learning_rate": 2.999644687424067e-05, - "loss": 0.0441, + "loss": 0.0439, "step": 121 }, { "epoch": 0.04, "learning_rate": 2.9996100439348582e-05, - "loss": 0.0459, + "loss": 0.0425, "step": 122 }, { "epoch": 0.04, "learning_rate": 2.999573789406203e-05, - "loss": 0.0433, + "loss": 0.0375, "step": 123 }, { "epoch": 0.04, "learning_rate": 2.99953592387705e-05, - "loss": 0.06, + "loss": 0.0405, "step": 124 }, { "epoch": 0.04, "learning_rate": 2.9994964473880776e-05, - "loss": 0.0458, + "loss": 0.0481, "step": 125 }, { "epoch": 0.04, "learning_rate": 2.9994553599816963e-05, - "loss": 0.051, + "loss": 0.0527, "step": 126 }, { "epoch": 0.04, "learning_rate": 2.9994126617020462e-05, - "loss": 0.0558, + "loss": 0.0509, "step": 127 }, { "epoch": 0.04, "learning_rate": 2.9993683525949985e-05, - "loss": 0.0334, + "loss": 0.0476, "step": 128 }, { "epoch": 0.04, "learning_rate": 2.9993224327081544e-05, - "loss": 0.0557, + "loss": 0.0646, "step": 129 }, { "epoch": 0.04, "learning_rate": 2.9992749020908465e-05, - "loss": 0.0272, + "loss": 0.035, "step": 130 }, { "epoch": 0.04, "learning_rate": 2.999225760794137e-05, - "loss": 0.0475, + "loss": 0.0769, "step": 131 }, { "epoch": 0.04, "learning_rate": 2.9991750088708185e-05, - "loss": 0.0385, + "loss": 0.0521, "step": 132 }, { "epoch": 0.04, "learning_rate": 2.9991226463754136e-05, - "loss": 0.0545, + "loss": 0.0789, "step": 133 }, { "epoch": 0.04, "learning_rate": 2.9990686733641767e-05, - "loss": 0.0463, + "loss": 0.0498, "step": 134 }, { "epoch": 0.04, "learning_rate": 2.9990130898950905e-05, - "loss": 0.0491, + "loss": 0.0568, "step": 135 }, { "epoch": 0.04, "learning_rate": 2.9989558960278684e-05, - "loss": 0.0381, + "loss": 0.0403, "step": 136 }, { "epoch": 0.04, "learning_rate": 2.9988970918239553e-05, - "loss": 0.0427, + "loss": 0.0638, "step": 137 }, { "epoch": 0.04, "learning_rate": 2.9988366773465235e-05, - "loss": 0.0351, + "loss": 0.0348, "step": 138 }, { "epoch": 0.04, "learning_rate": 2.9987746526604774e-05, - "loss": 0.0318, + "loss": 0.0336, "step": 139 }, { "epoch": 0.04, "learning_rate": 2.9987110178324505e-05, - "loss": 0.0783, + "loss": 0.0693, "step": 140 }, { "epoch": 0.05, "learning_rate": 2.998645772930806e-05, - "loss": 0.0405, + "loss": 0.0417, "step": 141 }, { "epoch": 0.05, "learning_rate": 2.9985789180256363e-05, - "loss": 0.047, + "loss": 0.0548, "step": 142 }, { "epoch": 0.05, "learning_rate": 2.9985104531887647e-05, - "loss": 0.0467, + "loss": 0.0431, "step": 143 }, { "epoch": 0.05, "learning_rate": 2.9984403784937428e-05, - "loss": 0.0421, + "loss": 0.0409, "step": 144 }, { "epoch": 0.05, "learning_rate": 2.998368694015853e-05, - "loss": 0.0414, + "loss": 0.0375, "step": 145 }, { "epoch": 0.05, "learning_rate": 2.9982953998321054e-05, - "loss": 0.0325, + "loss": 0.0366, "step": 146 }, { "epoch": 0.05, "learning_rate": 2.9982204960212412e-05, - "loss": 0.0308, + "loss": 0.0332, "step": 147 }, { "epoch": 0.05, "learning_rate": 2.998143982663729e-05, - "loss": 0.0598, + "loss": 0.0604, "step": 148 }, { "epoch": 0.05, "learning_rate": 2.9980658598417685e-05, - "loss": 0.0414, + "loss": 0.044, "step": 149 }, { "epoch": 0.05, "learning_rate": 2.997986127639287e-05, - "loss": 0.0437, + "loss": 0.0364, "step": 150 }, { "epoch": 0.05, "learning_rate": 2.997904786141941e-05, - "loss": 0.122, + "loss": 0.0614, "step": 151 }, { "epoch": 0.05, "learning_rate": 2.997821835437117e-05, - "loss": 0.0243, + "loss": 0.0333, "step": 152 }, { "epoch": 0.05, "learning_rate": 2.9977372756139287e-05, - "loss": 0.0841, + "loss": 0.0386, "step": 153 }, { "epoch": 0.05, "learning_rate": 2.9976511067632193e-05, - "loss": 0.0478, + "loss": 0.0415, "step": 154 }, { "epoch": 0.05, "learning_rate": 2.997563328977561e-05, - "loss": 0.0586, + "loss": 0.0488, "step": 155 }, { "epoch": 0.05, "learning_rate": 2.997473942351253e-05, - "loss": 0.0345, + "loss": 0.0311, "step": 156 }, { "epoch": 0.05, "learning_rate": 2.9973829469803243e-05, - "loss": 0.0341, + "loss": 0.0382, "step": 157 }, { "epoch": 0.05, "learning_rate": 2.997290342962532e-05, - "loss": 0.0518, + "loss": 0.0657, "step": 158 }, { "epoch": 0.05, "learning_rate": 2.9971961303973606e-05, - "loss": 0.0557, + "loss": 0.0697, "step": 159 }, { "epoch": 0.05, "learning_rate": 2.997100309386024e-05, - "loss": 0.0346, + "loss": 0.0342, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.9970028800314624e-05, - "loss": 0.035, + "loss": 0.0317, "step": 161 }, { "epoch": 0.05, "learning_rate": 2.9969038424383454e-05, - "loss": 0.0318, + "loss": 0.032, "step": 162 }, { "epoch": 0.05, "learning_rate": 2.9968031967130686e-05, - "loss": 0.0257, + "loss": 0.0349, "step": 163 }, { "epoch": 0.05, "learning_rate": 2.9967009429637575e-05, - "loss": 0.0413, + "loss": 0.036, "step": 164 }, { "epoch": 0.05, "learning_rate": 2.996597081300263e-05, - "loss": 0.0321, + "loss": 0.0289, "step": 165 }, { "epoch": 0.05, "learning_rate": 2.996491611834165e-05, - "loss": 0.0581, + "loss": 0.0308, "step": 166 }, { "epoch": 0.05, "learning_rate": 2.996384534678769e-05, - "loss": 0.0432, + "loss": 0.0305, "step": 167 }, { "epoch": 0.05, "learning_rate": 2.9962758499491093e-05, - "loss": 0.0337, + "loss": 0.0314, "step": 168 }, { "epoch": 0.05, "learning_rate": 2.9961655577619466e-05, - "loss": 0.04, + "loss": 0.0335, "step": 169 }, { "epoch": 0.05, "learning_rate": 2.996053658235768e-05, - "loss": 0.0582, + "loss": 0.0466, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.995940151490788e-05, - "loss": 0.034, + "loss": 0.0363, "step": 171 }, { "epoch": 0.05, "learning_rate": 2.995825037648947e-05, - "loss": 0.0374, + "loss": 0.0358, "step": 172 }, { "epoch": 0.06, "learning_rate": 2.995708316833913e-05, - "loss": 0.0383, + "loss": 0.0453, "step": 173 }, { "epoch": 0.06, "learning_rate": 2.99558998917108e-05, - "loss": 0.0329, + "loss": 0.0344, "step": 174 }, { "epoch": 0.06, "learning_rate": 2.9954700547875675e-05, - "loss": 0.0296, + "loss": 0.0267, "step": 175 }, { "epoch": 0.06, "learning_rate": 2.995348513812222e-05, - "loss": 0.025, + "loss": 0.0248, "step": 176 }, { "epoch": 0.06, "learning_rate": 2.995225366375616e-05, - "loss": 0.038, + "loss": 0.0423, "step": 177 }, { "epoch": 0.06, "learning_rate": 2.9951006126100466e-05, - "loss": 0.0183, + "loss": 0.0172, "step": 178 }, { "epoch": 0.06, "learning_rate": 2.9949742526495382e-05, - "loss": 0.0243, + "loss": 0.0275, "step": 179 }, { "epoch": 0.06, "learning_rate": 2.99484628662984e-05, - "loss": 0.0314, + "loss": 0.0256, "step": 180 }, { "epoch": 0.06, "learning_rate": 2.994716714688426e-05, - "loss": 0.0416, + "loss": 0.0415, "step": 181 }, { "epoch": 0.06, "learning_rate": 2.994585536964497e-05, - "loss": 0.0354, + "loss": 0.0389, "step": 182 }, { "epoch": 0.06, "learning_rate": 2.9944527535989765e-05, - "loss": 0.0348, + "loss": 0.0401, "step": 183 }, { "epoch": 0.06, "learning_rate": 2.994318364734516e-05, - "loss": 0.0303, + "loss": 0.0417, "step": 184 }, { "epoch": 0.06, "learning_rate": 2.9941823705154893e-05, - "loss": 0.0376, + "loss": 0.0497, "step": 185 }, { "epoch": 0.06, "learning_rate": 2.9940447710879957e-05, - "loss": 0.0301, + "loss": 0.0305, "step": 186 }, { "epoch": 0.06, "learning_rate": 2.9939055665998594e-05, - "loss": 0.0261, + "loss": 0.0241, "step": 187 }, { "epoch": 0.06, "learning_rate": 2.9937647572006287e-05, - "loss": 0.0314, + "loss": 0.0202, "step": 188 }, { "epoch": 0.06, "learning_rate": 2.9936223430415756e-05, - "loss": 0.0208, + "loss": 0.0264, "step": 189 }, { "epoch": 0.06, "learning_rate": 2.9934783242756963e-05, - "loss": 0.0325, + "loss": 0.0337, "step": 190 }, { "epoch": 0.06, "learning_rate": 2.9933327010577115e-05, - "loss": 0.031, + "loss": 0.0415, "step": 191 }, { "epoch": 0.06, "learning_rate": 2.9931854735440647e-05, - "loss": 0.0364, + "loss": 0.0406, "step": 192 }, { "epoch": 0.06, "learning_rate": 2.9930366418929225e-05, - "loss": 0.0241, + "loss": 0.0346, "step": 193 }, { "epoch": 0.06, "learning_rate": 2.9928862062641773e-05, - "loss": 0.0159, + "loss": 0.0243, "step": 194 }, { "epoch": 0.06, "learning_rate": 2.992734166819442e-05, - "loss": 0.0412, + "loss": 0.032, "step": 195 }, { "epoch": 0.06, "learning_rate": 2.9925805237220526e-05, - "loss": 0.0299, + "loss": 0.0321, "step": 196 }, { "epoch": 0.06, "learning_rate": 2.99242527713707e-05, - "loss": 0.0298, + "loss": 0.0301, "step": 197 }, { "epoch": 0.06, "learning_rate": 2.9922684272312758e-05, - "loss": 0.0312, + "loss": 0.0379, "step": 198 }, { "epoch": 0.06, "learning_rate": 2.992109974173175e-05, - "loss": 0.0247, + "loss": 0.0242, "step": 199 }, { "epoch": 0.06, "learning_rate": 2.9919499181329948e-05, - "loss": 0.0404, + "loss": 0.0337, "step": 200 }, { "epoch": 0.06, "learning_rate": 2.9917882592826838e-05, - "loss": 0.0324, + "loss": 0.022, "step": 201 }, { "epoch": 0.06, "learning_rate": 2.991624997795913e-05, - "loss": 0.0273, + "loss": 0.031, "step": 202 }, { "epoch": 0.06, "learning_rate": 2.991460133848076e-05, - "loss": 0.0294, + "loss": 0.0229, "step": 203 }, { "epoch": 0.07, "learning_rate": 2.9912936676162856e-05, - "loss": 0.0319, + "loss": 0.0255, "step": 204 }, { "epoch": 0.07, "learning_rate": 2.9911255992793788e-05, - "loss": 0.0331, + "loss": 0.0339, "step": 205 }, { "epoch": 0.07, "learning_rate": 2.9909559290179112e-05, - "loss": 0.0251, + "loss": 0.0272, "step": 206 }, { "epoch": 0.07, "learning_rate": 2.990784657014162e-05, - "loss": 0.0266, + "loss": 0.0297, "step": 207 }, { "epoch": 0.07, "learning_rate": 2.9906117834521283e-05, - "loss": 0.0342, + "loss": 0.033, "step": 208 }, { "epoch": 0.07, "learning_rate": 2.9904373085175293e-05, - "loss": 0.0394, + "loss": 0.0328, "step": 209 }, { "epoch": 0.07, "learning_rate": 2.990261232397805e-05, - "loss": 0.0522, + "loss": 0.0357, "step": 210 }, { "epoch": 0.07, "learning_rate": 2.9900835552821145e-05, - "loss": 0.039, + "loss": 0.0307, "step": 211 }, { "epoch": 0.07, "learning_rate": 2.9899042773613376e-05, - "loss": 0.0291, + "loss": 0.0347, "step": 212 }, { "epoch": 0.07, "learning_rate": 2.9897233988280735e-05, - "loss": 0.0299, + "loss": 0.0289, "step": 213 }, { "epoch": 0.07, "learning_rate": 2.989540919876641e-05, - "loss": 0.0232, + "loss": 0.0224, "step": 214 }, { "epoch": 0.07, "learning_rate": 2.989356840703078e-05, - "loss": 0.0246, + "loss": 0.0231, "step": 215 }, { "epoch": 0.07, "learning_rate": 2.9891711615051417e-05, - "loss": 0.0282, + "loss": 0.0207, "step": 216 }, { "epoch": 0.07, "learning_rate": 2.988983882482309e-05, - "loss": 0.0335, + "loss": 0.0437, "step": 217 }, { "epoch": 0.07, "learning_rate": 2.9887950038357747e-05, - "loss": 0.0315, + "loss": 0.0295, "step": 218 }, { "epoch": 0.07, "learning_rate": 2.9886045257684514e-05, - "loss": 0.0673, + "loss": 0.0549, "step": 219 }, { "epoch": 0.07, "learning_rate": 2.9884124484849716e-05, - "loss": 0.0171, + "loss": 0.0135, "step": 220 }, { "epoch": 0.07, "learning_rate": 2.988218772191684e-05, - "loss": 0.0242, + "loss": 0.0252, "step": 221 }, { "epoch": 0.07, "learning_rate": 2.988023497096657e-05, - "loss": 0.0148, + "loss": 0.0204, "step": 222 }, { "epoch": 0.07, "learning_rate": 2.987826623409676e-05, - "loss": 0.0182, + "loss": 0.0202, "step": 223 }, { "epoch": 0.07, "learning_rate": 2.9876281513422426e-05, - "loss": 0.0249, + "loss": 0.0227, "step": 224 }, { "epoch": 0.07, "learning_rate": 2.9874280811075767e-05, - "loss": 0.0452, + "loss": 0.0178, "step": 225 }, { "epoch": 0.07, "learning_rate": 2.9872264129206155e-05, - "loss": 0.0172, + "loss": 0.0222, "step": 226 }, { "epoch": 0.07, "learning_rate": 2.9870231469980112e-05, - "loss": 0.0296, + "loss": 0.0278, "step": 227 }, { "epoch": 0.07, "learning_rate": 2.9868182835581344e-05, - "loss": 0.0288, + "loss": 0.0362, "step": 228 }, { "epoch": 0.07, "learning_rate": 2.9866118228210704e-05, - "loss": 0.0312, + "loss": 0.0455, "step": 229 }, { "epoch": 0.07, "learning_rate": 2.986403765008622e-05, - "loss": 0.0235, + "loss": 0.0291, "step": 230 }, { "epoch": 0.07, "learning_rate": 2.9861941103443057e-05, - "loss": 0.0274, + "loss": 0.0258, "step": 231 }, { "epoch": 0.07, "learning_rate": 2.9859828590533554e-05, - "loss": 0.028, + "loss": 0.0206, "step": 232 }, { "epoch": 0.07, "learning_rate": 2.98577001136272e-05, - "loss": 0.032, + "loss": 0.0354, "step": 233 }, { "epoch": 0.07, "learning_rate": 2.9855555675010618e-05, - "loss": 0.0339, + "loss": 0.0358, "step": 234 }, { "epoch": 0.08, "learning_rate": 2.9853395276987598e-05, - "loss": 0.0301, + "loss": 0.0186, "step": 235 }, { "epoch": 0.08, "learning_rate": 2.9851218921879064e-05, - "loss": 0.0351, + "loss": 0.0388, "step": 236 }, { "epoch": 0.08, "learning_rate": 2.9849026612023093e-05, - "loss": 0.0246, + "loss": 0.0176, "step": 237 }, { "epoch": 0.08, "learning_rate": 2.984681834977489e-05, - "loss": 0.0215, + "loss": 0.0172, "step": 238 }, { "epoch": 0.08, "learning_rate": 2.9844594137506803e-05, - "loss": 0.035, + "loss": 0.0294, "step": 239 }, { "epoch": 0.08, "learning_rate": 2.9842353977608316e-05, - "loss": 0.0197, + "loss": 0.0321, "step": 240 }, { "epoch": 0.08, "learning_rate": 2.9840097872486048e-05, - "loss": 0.0222, + "loss": 0.0187, "step": 241 }, { "epoch": 0.08, "learning_rate": 2.9837825824563744e-05, - "loss": 0.0192, + "loss": 0.0195, "step": 242 }, { "epoch": 0.08, "learning_rate": 2.9835537836282277e-05, - "loss": 0.0189, + "loss": 0.0166, "step": 243 }, { "epoch": 0.08, "learning_rate": 2.9833233910099647e-05, - "loss": 0.0296, + "loss": 0.0283, "step": 244 }, { "epoch": 0.08, "learning_rate": 2.9830914048490977e-05, - "loss": 0.0222, + "loss": 0.0284, "step": 245 }, { "epoch": 0.08, "learning_rate": 2.9828578253948498e-05, - "loss": 0.0104, + "loss": 0.0158, "step": 246 }, { "epoch": 0.08, "learning_rate": 2.982622652898158e-05, - "loss": 0.0239, + "loss": 0.0315, "step": 247 }, { "epoch": 0.08, "learning_rate": 2.982385887611668e-05, - "loss": 0.0167, + "loss": 0.0121, "step": 248 }, { "epoch": 0.08, "learning_rate": 2.9821475297897395e-05, - "loss": 0.0143, + "loss": 0.0182, "step": 249 }, { "epoch": 0.08, "learning_rate": 2.981907579688441e-05, - "loss": 0.0158, + "loss": 0.0227, "step": 250 }, { "epoch": 0.08, "learning_rate": 2.9816660375655517e-05, - "loss": 0.0352, + "loss": 0.0497, "step": 251 }, { "epoch": 0.08, "learning_rate": 2.9814229036805624e-05, - "loss": 0.0179, + "loss": 0.0186, "step": 252 }, { "epoch": 0.08, "learning_rate": 2.9811781782946732e-05, - "loss": 0.0276, + "loss": 0.0299, "step": 253 }, { "epoch": 0.08, "learning_rate": 2.9809318616707933e-05, - "loss": 0.0148, + "loss": 0.017, "step": 254 }, { "epoch": 0.08, "learning_rate": 2.9806839540735426e-05, - "loss": 0.0119, + "loss": 0.0139, "step": 255 }, { "epoch": 0.08, "learning_rate": 2.9804344557692492e-05, - "loss": 0.0194, + "loss": 0.0208, "step": 256 }, { "epoch": 0.08, "learning_rate": 2.9801833670259516e-05, - "loss": 0.0399, + "loss": 0.0302, "step": 257 }, { "epoch": 0.08, "learning_rate": 2.9799306881133942e-05, - "loss": 0.0643, + "loss": 0.0457, "step": 258 }, { "epoch": 0.08, "learning_rate": 2.9796764193030325e-05, - "loss": 0.0269, + "loss": 0.0312, "step": 259 }, { "epoch": 0.08, "learning_rate": 2.9794205608680285e-05, - "loss": 0.0381, + "loss": 0.0328, "step": 260 }, { "epoch": 0.08, "learning_rate": 2.9791631130832525e-05, - "loss": 0.0184, + "loss": 0.0291, "step": 261 }, { "epoch": 0.08, "learning_rate": 2.9789040762252818e-05, - "loss": 0.0105, + "loss": 0.0171, "step": 262 }, { "epoch": 0.08, "learning_rate": 2.978643450572402e-05, - "loss": 0.0327, + "loss": 0.0384, "step": 263 }, { "epoch": 0.08, "learning_rate": 2.978381236404603e-05, - "loss": 0.0607, + "loss": 0.0136, "step": 264 }, { "epoch": 0.08, "learning_rate": 2.9781174340035845e-05, - "loss": 0.0203, + "loss": 0.025, "step": 265 }, { "epoch": 0.08, "learning_rate": 2.9778520436527504e-05, - "loss": 0.0147, + "loss": 0.0163, "step": 266 }, { @@ -1621,61 +1621,61 @@ { "epoch": 0.09, "learning_rate": 2.9773165002437824e-05, - "loss": 0.0197, + "loss": 0.016, "step": 268 }, { "epoch": 0.09, "learning_rate": 2.9770463477609862e-05, - "loss": 0.0243, + "loss": 0.0195, "step": 269 }, { "epoch": 0.09, "learning_rate": 2.976774608479048e-05, - "loss": 0.0142, + "loss": 0.0145, "step": 270 }, { "epoch": 0.09, "learning_rate": 2.976501282689899e-05, - "loss": 0.0225, + "loss": 0.0196, "step": 271 }, { "epoch": 0.09, "learning_rate": 2.9762263706871746e-05, - "loss": 0.0156, + "loss": 0.0087, "step": 272 }, { "epoch": 0.09, "learning_rate": 2.975949872766215e-05, - "loss": 0.0308, + "loss": 0.0213, "step": 273 }, { "epoch": 0.09, "learning_rate": 2.975671789224063e-05, - "loss": 0.0255, + "loss": 0.0301, "step": 274 }, { "epoch": 0.09, "learning_rate": 2.9753921203594646e-05, - "loss": 0.0151, + "loss": 0.008, "step": 275 }, { "epoch": 0.09, "learning_rate": 2.9751108664728705e-05, - "loss": 0.015, + "loss": 0.0115, "step": 276 }, { "epoch": 0.09, "learning_rate": 2.9748280278664332e-05, - "loss": 0.0423, + "loss": 0.0288, "step": 277 }, { @@ -1687,301 +1687,301 @@ { "epoch": 0.09, "learning_rate": 2.9742575977111513e-05, - "loss": 0.026, + "loss": 0.0207, "step": 279 }, { "epoch": 0.09, "learning_rate": 2.9739700067751224e-05, - "loss": 0.0166, + "loss": 0.0132, "step": 280 }, { "epoch": 0.09, "learning_rate": 2.973680832344883e-05, - "loss": 0.0566, + "loss": 0.0368, "step": 281 }, { "epoch": 0.09, "learning_rate": 2.9733900747310935e-05, - "loss": 0.0295, + "loss": 0.0173, "step": 282 }, { "epoch": 0.09, "learning_rate": 2.9730977342461168e-05, - "loss": 0.0444, + "loss": 0.0315, "step": 283 }, { "epoch": 0.09, "learning_rate": 2.972803811204016e-05, - "loss": 0.0211, + "loss": 0.0176, "step": 284 }, { "epoch": 0.09, "learning_rate": 2.9725083059205544e-05, - "loss": 0.0189, + "loss": 0.0151, "step": 285 }, { "epoch": 0.09, "learning_rate": 2.972211218713195e-05, - "loss": 0.03, + "loss": 0.0287, "step": 286 }, { "epoch": 0.09, "learning_rate": 2.9719125499010995e-05, - "loss": 0.0435, + "loss": 0.0446, "step": 287 }, { "epoch": 0.09, "learning_rate": 2.9716122998051304e-05, - "loss": 0.025, + "loss": 0.0296, "step": 288 }, { "epoch": 0.09, "learning_rate": 2.971310468747848e-05, - "loss": 0.0349, + "loss": 0.0354, "step": 289 }, { "epoch": 0.09, "learning_rate": 2.97100705705351e-05, - "loss": 0.0243, + "loss": 0.0291, "step": 290 }, { "epoch": 0.09, "learning_rate": 2.9707020650480744e-05, - "loss": 0.0421, + "loss": 0.0287, "step": 291 }, { "epoch": 0.09, "learning_rate": 2.9703954930591958e-05, - "loss": 0.0177, + "loss": 0.014, "step": 292 }, { "epoch": 0.09, "learning_rate": 2.9700873414162257e-05, - "loss": 0.0231, + "loss": 0.0144, "step": 293 }, { "epoch": 0.09, "learning_rate": 2.969777610450213e-05, - "loss": 0.0305, + "loss": 0.029, "step": 294 }, { "epoch": 0.09, "learning_rate": 2.9694663004939043e-05, - "loss": 0.0276, + "loss": 0.0399, "step": 295 }, { "epoch": 0.09, "learning_rate": 2.9691534118817406e-05, - "loss": 0.0193, + "loss": 0.0232, "step": 296 }, { "epoch": 0.09, "learning_rate": 2.9688389449498605e-05, - "loss": 0.0192, + "loss": 0.0171, "step": 297 }, { "epoch": 0.1, "learning_rate": 2.9685229000360976e-05, - "loss": 0.0144, + "loss": 0.0158, "step": 298 }, { "epoch": 0.1, "learning_rate": 2.96820527747998e-05, - "loss": 0.0172, + "loss": 0.0184, "step": 299 }, { "epoch": 0.1, "learning_rate": 2.9678860776227327e-05, - "loss": 0.0116, + "loss": 0.0202, "step": 300 }, { "epoch": 0.1, "learning_rate": 2.9675653008072725e-05, - "loss": 0.0165, + "loss": 0.0221, "step": 301 }, { "epoch": 0.1, "learning_rate": 2.9672429473782123e-05, - "loss": 0.0247, + "loss": 0.025, "step": 302 }, { "epoch": 0.1, "learning_rate": 2.9669190176818585e-05, - "loss": 0.0458, + "loss": 0.044, "step": 303 }, { "epoch": 0.1, "learning_rate": 2.9665935120662102e-05, - "loss": 0.0223, + "loss": 0.0203, "step": 304 }, { "epoch": 0.1, "learning_rate": 2.96626643088096e-05, - "loss": 0.0159, + "loss": 0.0196, "step": 305 }, { "epoch": 0.1, "learning_rate": 2.965937774477493e-05, - "loss": 0.0162, + "loss": 0.0228, "step": 306 }, { "epoch": 0.1, "learning_rate": 2.9656075432088868e-05, - "loss": 0.016, + "loss": 0.0225, "step": 307 }, { "epoch": 0.1, "learning_rate": 2.9652757374299103e-05, - "loss": 0.0143, + "loss": 0.0138, "step": 308 }, { "epoch": 0.1, "learning_rate": 2.9649423574970244e-05, - "loss": 0.0457, + "loss": 0.0498, "step": 309 }, { "epoch": 0.1, "learning_rate": 2.9646074037683818e-05, - "loss": 0.0225, + "loss": 0.0244, "step": 310 }, { "epoch": 0.1, "learning_rate": 2.964270876603824e-05, - "loss": 0.0302, + "loss": 0.0249, "step": 311 }, { "epoch": 0.1, "learning_rate": 2.963932776364885e-05, - "loss": 0.0255, + "loss": 0.0154, "step": 312 }, { "epoch": 0.1, "learning_rate": 2.963593103414787e-05, - "loss": 0.0224, + "loss": 0.0217, "step": 313 }, { "epoch": 0.1, "learning_rate": 2.963251858118443e-05, - "loss": 0.0226, + "loss": 0.0217, "step": 314 }, { "epoch": 0.1, "learning_rate": 2.9629090408424542e-05, - "loss": 0.0304, + "loss": 0.0164, "step": 315 }, { "epoch": 0.1, "learning_rate": 2.9625646519551117e-05, - "loss": 0.029, + "loss": 0.0254, "step": 316 }, { "epoch": 0.1, "learning_rate": 2.9622186918263947e-05, - "loss": 0.0179, + "loss": 0.0244, "step": 317 }, { "epoch": 0.1, "learning_rate": 2.961871160827969e-05, - "loss": 0.0253, + "loss": 0.0484, "step": 318 }, { "epoch": 0.1, "learning_rate": 2.96152205933319e-05, - "loss": 0.0106, + "loss": 0.0123, "step": 319 }, { "epoch": 0.1, "learning_rate": 2.9611713877170995e-05, - "loss": 0.023, + "loss": 0.0163, "step": 320 }, { "epoch": 0.1, "learning_rate": 2.960819146356425e-05, - "loss": 0.0167, + "loss": 0.0166, "step": 321 }, { "epoch": 0.1, "learning_rate": 2.9604653356295823e-05, - "loss": 0.0303, + "loss": 0.0351, "step": 322 }, { "epoch": 0.1, "learning_rate": 2.9601099559166714e-05, - "loss": 0.0204, + "loss": 0.0193, "step": 323 }, { "epoch": 0.1, "learning_rate": 2.9597530075994798e-05, - "loss": 0.0166, + "loss": 0.0154, "step": 324 }, { "epoch": 0.1, "learning_rate": 2.959394491061478e-05, - "loss": 0.0278, + "loss": 0.0265, "step": 325 }, { "epoch": 0.1, "learning_rate": 2.959034406687823e-05, - "loss": 0.0129, + "loss": 0.0149, "step": 326 }, { "epoch": 0.1, "learning_rate": 2.958672754865355e-05, - "loss": 0.0117, + "loss": 0.0154, "step": 327 }, { "epoch": 0.1, "learning_rate": 2.958309535982599e-05, - "loss": 0.033, + "loss": 0.0233, "step": 328 }, { @@ -1993,1213 +1993,1213 @@ { "epoch": 0.11, "learning_rate": 2.9575783985987377e-05, - "loss": 0.0182, + "loss": 0.0165, "step": 330 }, { "epoch": 0.11, "learning_rate": 2.957210480883097e-05, - "loss": 0.0168, + "loss": 0.0197, "step": 331 }, { "epoch": 0.11, "learning_rate": 2.9568409976780977e-05, - "loss": 0.0146, + "loss": 0.0135, "step": 332 }, { "epoch": 0.11, "learning_rate": 2.9564699493806773e-05, - "loss": 0.0174, + "loss": 0.0197, "step": 333 }, { "epoch": 0.11, "learning_rate": 2.956097336389455e-05, - "loss": 0.0275, + "loss": 0.028, "step": 334 }, { "epoch": 0.11, "learning_rate": 2.9557231591047302e-05, - "loss": 0.0256, + "loss": 0.0195, "step": 335 }, { "epoch": 0.11, "learning_rate": 2.955347417928485e-05, - "loss": 0.0201, + "loss": 0.0238, "step": 336 }, { "epoch": 0.11, "learning_rate": 2.9549701132643796e-05, - "loss": 0.0219, + "loss": 0.0254, "step": 337 }, { "epoch": 0.11, "learning_rate": 2.9545912455177546e-05, - "loss": 0.0126, + "loss": 0.0119, "step": 338 }, { "epoch": 0.11, "learning_rate": 2.9542108150956296e-05, - "loss": 0.0214, + "loss": 0.0176, "step": 339 }, { "epoch": 0.11, "learning_rate": 2.953828822406703e-05, - "loss": 0.0318, + "loss": 0.0144, "step": 340 }, { "epoch": 0.11, "learning_rate": 2.9534452678613522e-05, - "loss": 0.0164, + "loss": 0.0097, "step": 341 }, { "epoch": 0.11, "learning_rate": 2.9530601518716317e-05, - "loss": 0.033, + "loss": 0.0245, "step": 342 }, { "epoch": 0.11, "learning_rate": 2.952673474851274e-05, - "loss": 0.0262, + "loss": 0.0207, "step": 343 }, { "epoch": 0.11, "learning_rate": 2.9522852372156875e-05, - "loss": 0.0066, + "loss": 0.0084, "step": 344 }, { "epoch": 0.11, "learning_rate": 2.9518954393819587e-05, - "loss": 0.017, + "loss": 0.0195, "step": 345 }, { "epoch": 0.11, "learning_rate": 2.9515040817688504e-05, - "loss": 0.0161, + "loss": 0.0189, "step": 346 }, { "epoch": 0.11, "learning_rate": 2.9511111647967987e-05, - "loss": 0.023, + "loss": 0.0297, "step": 347 }, { "epoch": 0.11, "learning_rate": 2.9507166888879175e-05, - "loss": 0.0146, + "loss": 0.0153, "step": 348 }, { "epoch": 0.11, "learning_rate": 2.950320654465994e-05, - "loss": 0.0222, + "loss": 0.0125, "step": 349 }, { "epoch": 0.11, "learning_rate": 2.9499230619564908e-05, - "loss": 0.0227, + "loss": 0.0207, "step": 350 }, { "epoch": 0.11, "learning_rate": 2.9495239117865433e-05, - "loss": 0.0193, + "loss": 0.0239, "step": 351 }, { "epoch": 0.11, "learning_rate": 2.9491232043849605e-05, - "loss": 0.0201, + "loss": 0.0242, "step": 352 }, { "epoch": 0.11, "learning_rate": 2.948720940182225e-05, - "loss": 0.0164, + "loss": 0.0145, "step": 353 }, { "epoch": 0.11, "learning_rate": 2.9483171196104915e-05, - "loss": 0.0312, + "loss": 0.0393, "step": 354 }, { "epoch": 0.11, "learning_rate": 2.9479117431035867e-05, - "loss": 0.0164, + "loss": 0.0172, "step": 355 }, { "epoch": 0.11, "learning_rate": 2.9475048110970087e-05, - "loss": 0.018, + "loss": 0.0226, "step": 356 }, { "epoch": 0.11, "learning_rate": 2.947096324027927e-05, - "loss": 0.0202, + "loss": 0.0174, "step": 357 }, { "epoch": 0.11, "learning_rate": 2.9466862823351814e-05, - "loss": 0.0167, + "loss": 0.0177, "step": 358 }, { "epoch": 0.11, "learning_rate": 2.946274686459282e-05, - "loss": 0.016, + "loss": 0.023, "step": 359 }, { "epoch": 0.11, "learning_rate": 2.9458615368424082e-05, - "loss": 0.0278, + "loss": 0.0323, "step": 360 }, { "epoch": 0.12, "learning_rate": 2.9454468339284095e-05, - "loss": 0.0166, + "loss": 0.0238, "step": 361 }, { "epoch": 0.12, "learning_rate": 2.9450305781628033e-05, - "loss": 0.0171, + "loss": 0.0186, "step": 362 }, { "epoch": 0.12, "learning_rate": 2.9446127699927752e-05, - "loss": 0.0171, + "loss": 0.0169, "step": 363 }, { "epoch": 0.12, "learning_rate": 2.9441934098671796e-05, - "loss": 0.0122, + "loss": 0.0147, "step": 364 }, { "epoch": 0.12, "learning_rate": 2.943772498236537e-05, - "loss": 0.0386, + "loss": 0.0349, "step": 365 }, { "epoch": 0.12, "learning_rate": 2.9433500355530352e-05, - "loss": 0.0223, + "loss": 0.0208, "step": 366 }, { "epoch": 0.12, "learning_rate": 2.942926022270528e-05, - "loss": 0.0271, + "loss": 0.0174, "step": 367 }, { "epoch": 0.12, "learning_rate": 2.9425004588445353e-05, - "loss": 0.0113, + "loss": 0.0105, "step": 368 }, { "epoch": 0.12, "learning_rate": 2.9420733457322427e-05, - "loss": 0.0129, + "loss": 0.0144, "step": 369 }, { "epoch": 0.12, "learning_rate": 2.9416446833924995e-05, - "loss": 0.0389, + "loss": 0.0246, "step": 370 }, { "epoch": 0.12, "learning_rate": 2.9412144722858205e-05, - "loss": 0.0182, + "loss": 0.018, "step": 371 }, { "epoch": 0.12, "learning_rate": 2.9407827128743838e-05, - "loss": 0.0447, + "loss": 0.0373, "step": 372 }, { "epoch": 0.12, "learning_rate": 2.9403494056220312e-05, - "loss": 0.0295, + "loss": 0.0207, "step": 373 }, { "epoch": 0.12, "learning_rate": 2.9399145509942665e-05, - "loss": 0.0112, + "loss": 0.0127, "step": 374 }, { "epoch": 0.12, "learning_rate": 2.9394781494582568e-05, - "loss": 0.0186, + "loss": 0.0151, "step": 375 }, { "epoch": 0.12, "learning_rate": 2.939040201482831e-05, - "loss": 0.0116, + "loss": 0.0124, "step": 376 }, { "epoch": 0.12, "learning_rate": 2.9386007075384786e-05, - "loss": 0.0233, + "loss": 0.0184, "step": 377 }, { "epoch": 0.12, "learning_rate": 2.9381596680973506e-05, - "loss": 0.0071, + "loss": 0.0192, "step": 378 }, { "epoch": 0.12, "learning_rate": 2.937717083633258e-05, - "loss": 0.0209, + "loss": 0.0383, "step": 379 }, { "epoch": 0.12, "learning_rate": 2.9372729546216718e-05, - "loss": 0.0357, + "loss": 0.0283, "step": 380 }, { "epoch": 0.12, "learning_rate": 2.9368272815397224e-05, - "loss": 0.0235, + "loss": 0.0258, "step": 381 }, { "epoch": 0.12, "learning_rate": 2.9363800648661987e-05, - "loss": 0.0191, + "loss": 0.0203, "step": 382 }, { "epoch": 0.12, "learning_rate": 2.9359313050815486e-05, - "loss": 0.0191, + "loss": 0.02, "step": 383 }, { "epoch": 0.12, "learning_rate": 2.9354810026678766e-05, - "loss": 0.0222, + "loss": 0.0273, "step": 384 }, { "epoch": 0.12, "learning_rate": 2.935029158108945e-05, - "loss": 0.0173, + "loss": 0.0226, "step": 385 }, { "epoch": 0.12, "learning_rate": 2.9345757718901736e-05, - "loss": 0.0184, + "loss": 0.0422, "step": 386 }, { "epoch": 0.12, "learning_rate": 2.9341208444986375e-05, - "loss": 0.0195, + "loss": 0.0187, "step": 387 }, { "epoch": 0.12, "learning_rate": 2.9336643764230676e-05, - "loss": 0.0154, + "loss": 0.0132, "step": 388 }, { "epoch": 0.12, "learning_rate": 2.93320636815385e-05, - "loss": 0.0195, + "loss": 0.0175, "step": 389 }, { "epoch": 0.12, "learning_rate": 2.9327468201830258e-05, - "loss": 0.0225, + "loss": 0.0291, "step": 390 }, { "epoch": 0.12, "learning_rate": 2.93228573300429e-05, - "loss": 0.0175, + "loss": 0.0225, "step": 391 }, { "epoch": 0.13, "learning_rate": 2.9318231071129905e-05, - "loss": 0.0157, + "loss": 0.0171, "step": 392 }, { "epoch": 0.13, "learning_rate": 2.9313589430061297e-05, - "loss": 0.0107, + "loss": 0.0177, "step": 393 }, { "epoch": 0.13, "learning_rate": 2.9308932411823615e-05, - "loss": 0.0179, + "loss": 0.0127, "step": 394 }, { "epoch": 0.13, "learning_rate": 2.9304260021419912e-05, - "loss": 0.0186, + "loss": 0.0238, "step": 395 }, { "epoch": 0.13, "learning_rate": 2.9299572263869777e-05, - "loss": 0.01, + "loss": 0.0117, "step": 396 }, { "epoch": 0.13, "learning_rate": 2.9294869144209282e-05, - "loss": 0.0135, + "loss": 0.0158, "step": 397 }, { "epoch": 0.13, "learning_rate": 2.9290150667491022e-05, - "loss": 0.023, + "loss": 0.0222, "step": 398 }, { "epoch": 0.13, "learning_rate": 2.9285416838784076e-05, - "loss": 0.0162, + "loss": 0.019, "step": 399 }, { "epoch": 0.13, "learning_rate": 2.9280667663174026e-05, - "loss": 0.0113, + "loss": 0.008, "step": 400 }, { "epoch": 0.13, "learning_rate": 2.9275903145762942e-05, - "loss": 0.0239, + "loss": 0.0261, "step": 401 }, { "epoch": 0.13, "learning_rate": 2.927112329166937e-05, - "loss": 0.0236, + "loss": 0.0457, "step": 402 }, { "epoch": 0.13, "learning_rate": 2.926632810602833e-05, - "loss": 0.0143, + "loss": 0.0169, "step": 403 }, { "epoch": 0.13, "learning_rate": 2.9261517593991315e-05, - "loss": 0.0097, + "loss": 0.015, "step": 404 }, { "epoch": 0.13, "learning_rate": 2.9256691760726296e-05, - "loss": 0.0248, + "loss": 0.0222, "step": 405 }, { "epoch": 0.13, "learning_rate": 2.925185061141768e-05, - "loss": 0.014, + "loss": 0.0111, "step": 406 }, { "epoch": 0.13, "learning_rate": 2.924699415126635e-05, - "loss": 0.0189, + "loss": 0.0179, "step": 407 }, { "epoch": 0.13, "learning_rate": 2.924212238548962e-05, - "loss": 0.0217, + "loss": 0.0224, "step": 408 }, { "epoch": 0.13, "learning_rate": 2.9237235319321264e-05, - "loss": 0.0261, + "loss": 0.0294, "step": 409 }, { "epoch": 0.13, "learning_rate": 2.9232332958011477e-05, - "loss": 0.0269, + "loss": 0.0225, "step": 410 }, { "epoch": 0.13, "learning_rate": 2.92274153068269e-05, - "loss": 0.0228, + "loss": 0.0207, "step": 411 }, { "epoch": 0.13, "learning_rate": 2.9222482371050586e-05, - "loss": 0.0251, + "loss": 0.0222, "step": 412 }, { "epoch": 0.13, "learning_rate": 2.921753415598202e-05, - "loss": 0.0162, + "loss": 0.0169, "step": 413 }, { "epoch": 0.13, "learning_rate": 2.9212570666937097e-05, - "loss": 0.0132, + "loss": 0.031, "step": 414 }, { "epoch": 0.13, "learning_rate": 2.9207591909248117e-05, - "loss": 0.0196, + "loss": 0.0249, "step": 415 }, { "epoch": 0.13, "learning_rate": 2.9202597888263793e-05, - "loss": 0.017, + "loss": 0.0137, "step": 416 }, { "epoch": 0.13, "learning_rate": 2.9197588609349224e-05, - "loss": 0.0134, + "loss": 0.0128, "step": 417 }, { "epoch": 0.13, "learning_rate": 2.919256407788591e-05, - "loss": 0.0115, + "loss": 0.0189, "step": 418 }, { "epoch": 0.13, "learning_rate": 2.9187524299271736e-05, - "loss": 0.0098, + "loss": 0.009, "step": 419 }, { "epoch": 0.13, "learning_rate": 2.918246927892096e-05, - "loss": 0.031, + "loss": 0.0196, "step": 420 }, { "epoch": 0.13, "learning_rate": 2.9177399022264214e-05, - "loss": 0.0106, + "loss": 0.0113, "step": 421 }, { "epoch": 0.13, "learning_rate": 2.9172313534748512e-05, - "loss": 0.0095, + "loss": 0.0129, "step": 422 }, { "epoch": 0.14, "learning_rate": 2.916721282183722e-05, - "loss": 0.0139, + "loss": 0.0156, "step": 423 }, { "epoch": 0.14, "learning_rate": 2.9162096889010056e-05, - "loss": 0.0209, + "loss": 0.0185, "step": 424 }, { "epoch": 0.14, "learning_rate": 2.91569657417631e-05, - "loss": 0.0134, + "loss": 0.0131, "step": 425 }, { "epoch": 0.14, "learning_rate": 2.9151819385608774e-05, - "loss": 0.013, + "loss": 0.0209, "step": 426 }, { "epoch": 0.14, "learning_rate": 2.9146657826075833e-05, - "loss": 0.0145, + "loss": 0.0131, "step": 427 }, { "epoch": 0.14, "learning_rate": 2.9141481068709375e-05, - "loss": 0.0177, + "loss": 0.0164, "step": 428 }, { "epoch": 0.14, "learning_rate": 2.913628911907081e-05, - "loss": 0.0077, + "loss": 0.0145, "step": 429 }, { "epoch": 0.14, "learning_rate": 2.913108198273789e-05, - "loss": 0.0166, + "loss": 0.0154, "step": 430 }, { "epoch": 0.14, "learning_rate": 2.912585966530466e-05, - "loss": 0.0289, + "loss": 0.0119, "step": 431 }, { "epoch": 0.14, "learning_rate": 2.9120622172381485e-05, - "loss": 0.0125, + "loss": 0.0104, "step": 432 }, { "epoch": 0.14, "learning_rate": 2.9115369509595046e-05, - "loss": 0.0127, + "loss": 0.0068, "step": 433 }, { "epoch": 0.14, "learning_rate": 2.9110101682588294e-05, - "loss": 0.0208, + "loss": 0.028, "step": 434 }, { "epoch": 0.14, "learning_rate": 2.9104818697020494e-05, - "loss": 0.0051, + "loss": 0.0086, "step": 435 }, { "epoch": 0.14, "learning_rate": 2.909952055856718e-05, - "loss": 0.0125, + "loss": 0.0153, "step": 436 }, { "epoch": 0.14, "learning_rate": 2.9094207272920174e-05, - "loss": 0.0151, + "loss": 0.0205, "step": 437 }, { "epoch": 0.14, "learning_rate": 2.9088878845787573e-05, - "loss": 0.0102, + "loss": 0.0157, "step": 438 }, { "epoch": 0.14, "learning_rate": 2.9083535282893733e-05, - "loss": 0.0255, + "loss": 0.0228, "step": 439 }, { "epoch": 0.14, "learning_rate": 2.9078176589979268e-05, - "loss": 0.0203, + "loss": 0.0149, "step": 440 }, { "epoch": 0.14, "learning_rate": 2.9072802772801056e-05, - "loss": 0.0205, + "loss": 0.0173, "step": 441 }, { "epoch": 0.14, "learning_rate": 2.9067413837132228e-05, - "loss": 0.014, + "loss": 0.0139, "step": 442 }, { "epoch": 0.14, "learning_rate": 2.906200978876213e-05, - "loss": 0.0126, + "loss": 0.0113, "step": 443 }, { "epoch": 0.14, "learning_rate": 2.9056590633496377e-05, - "loss": 0.0136, + "loss": 0.0102, "step": 444 }, { "epoch": 0.14, "learning_rate": 2.9051156377156782e-05, - "loss": 0.0143, + "loss": 0.0072, "step": 445 }, { "epoch": 0.14, "learning_rate": 2.9045707025581413e-05, - "loss": 0.0123, + "loss": 0.0157, "step": 446 }, { "epoch": 0.14, "learning_rate": 2.904024258462453e-05, - "loss": 0.0176, + "loss": 0.0172, "step": 447 }, { "epoch": 0.14, "learning_rate": 2.9034763060156616e-05, - "loss": 0.0052, + "loss": 0.0058, "step": 448 }, { "epoch": 0.14, "learning_rate": 2.902926845806435e-05, - "loss": 0.0088, + "loss": 0.009, "step": 449 }, { "epoch": 0.14, "learning_rate": 2.902375878425062e-05, - "loss": 0.0175, + "loss": 0.009, "step": 450 }, { "epoch": 0.14, "learning_rate": 2.90182340446345e-05, - "loss": 0.0181, + "loss": 0.0185, "step": 451 }, { "epoch": 0.14, "learning_rate": 2.9012694245151238e-05, - "loss": 0.0307, + "loss": 0.0236, "step": 452 }, { "epoch": 0.14, "learning_rate": 2.900713939175228e-05, - "loss": 0.0203, + "loss": 0.0166, "step": 453 }, { "epoch": 0.15, "learning_rate": 2.9001569490405234e-05, - "loss": 0.0178, + "loss": 0.0212, "step": 454 }, { "epoch": 0.15, "learning_rate": 2.8995984547093876e-05, - "loss": 0.0062, + "loss": 0.0061, "step": 455 }, { "epoch": 0.15, "learning_rate": 2.8990384567818144e-05, - "loss": 0.0202, + "loss": 0.0222, "step": 456 }, { "epoch": 0.15, "learning_rate": 2.8984769558594128e-05, - "loss": 0.0114, + "loss": 0.0131, "step": 457 }, { "epoch": 0.15, "learning_rate": 2.8979139525454058e-05, - "loss": 0.0152, + "loss": 0.0161, "step": 458 }, { "epoch": 0.15, "learning_rate": 2.8973494474446308e-05, - "loss": 0.0197, + "loss": 0.0145, "step": 459 }, { "epoch": 0.15, "learning_rate": 2.89678344116354e-05, - "loss": 0.0057, + "loss": 0.0085, "step": 460 }, { "epoch": 0.15, "learning_rate": 2.896215934310196e-05, - "loss": 0.0188, + "loss": 0.0205, "step": 461 }, { "epoch": 0.15, "learning_rate": 2.8956469274942753e-05, - "loss": 0.0157, + "loss": 0.0083, "step": 462 }, { "epoch": 0.15, "learning_rate": 2.8950764213270647e-05, - "loss": 0.0159, + "loss": 0.0116, "step": 463 }, { "epoch": 0.15, "learning_rate": 2.894504416421462e-05, - "loss": 0.0336, + "loss": 0.034, "step": 464 }, { "epoch": 0.15, "learning_rate": 2.8939309133919762e-05, - "loss": 0.0158, + "loss": 0.0209, "step": 465 }, { "epoch": 0.15, "learning_rate": 2.8933559128547236e-05, - "loss": 0.011, + "loss": 0.0205, "step": 466 }, { "epoch": 0.15, "learning_rate": 2.8927794154274313e-05, - "loss": 0.0174, + "loss": 0.0167, "step": 467 }, { "epoch": 0.15, "learning_rate": 2.8922014217294333e-05, - "loss": 0.0249, + "loss": 0.0326, "step": 468 }, { "epoch": 0.15, "learning_rate": 2.8916219323816716e-05, - "loss": 0.0119, + "loss": 0.0073, "step": 469 }, { "epoch": 0.15, "learning_rate": 2.8910409480066943e-05, - "loss": 0.0128, + "loss": 0.0192, "step": 470 }, { "epoch": 0.15, "learning_rate": 2.890458469228657e-05, - "loss": 0.0078, + "loss": 0.0079, "step": 471 }, { "epoch": 0.15, "learning_rate": 2.8898744966733196e-05, - "loss": 0.0122, + "loss": 0.0097, "step": 472 }, { "epoch": 0.15, "learning_rate": 2.8892890309680475e-05, - "loss": 0.0193, + "loss": 0.0091, "step": 473 }, { "epoch": 0.15, "learning_rate": 2.888702072741809e-05, - "loss": 0.013, + "loss": 0.0112, "step": 474 }, { "epoch": 0.15, "learning_rate": 2.888113622625177e-05, - "loss": 0.0118, + "loss": 0.0088, "step": 475 }, { "epoch": 0.15, "learning_rate": 2.8875236812503266e-05, - "loss": 0.0192, + "loss": 0.013, "step": 476 }, { "epoch": 0.15, "learning_rate": 2.8869322492510348e-05, - "loss": 0.011, + "loss": 0.0137, "step": 477 }, { "epoch": 0.15, "learning_rate": 2.886339327262681e-05, - "loss": 0.0167, + "loss": 0.0164, "step": 478 }, { "epoch": 0.15, "learning_rate": 2.8857449159222448e-05, - "loss": 0.013, + "loss": 0.0152, "step": 479 }, { "epoch": 0.15, "learning_rate": 2.8851490158683046e-05, - "loss": 0.01, + "loss": 0.0109, "step": 480 }, { "epoch": 0.15, "learning_rate": 2.88455162774104e-05, - "loss": 0.0113, + "loss": 0.0132, "step": 481 }, { "epoch": 0.15, "learning_rate": 2.883952752182228e-05, - "loss": 0.0334, + "loss": 0.0222, "step": 482 }, { "epoch": 0.15, "learning_rate": 2.8833523898352438e-05, - "loss": 0.0246, + "loss": 0.0159, "step": 483 }, { "epoch": 0.15, "learning_rate": 2.88275054134506e-05, - "loss": 0.0086, + "loss": 0.0154, "step": 484 }, { "epoch": 0.15, "learning_rate": 2.882147207358247e-05, - "loss": 0.0216, + "loss": 0.0214, "step": 485 }, { "epoch": 0.16, "learning_rate": 2.8815423885229683e-05, - "loss": 0.02, + "loss": 0.0159, "step": 486 }, { "epoch": 0.16, "learning_rate": 2.8809360854889848e-05, - "loss": 0.0172, + "loss": 0.0199, "step": 487 }, { "epoch": 0.16, "learning_rate": 2.8803282989076513e-05, - "loss": 0.015, + "loss": 0.0126, "step": 488 }, { "epoch": 0.16, "learning_rate": 2.8797190294319164e-05, - "loss": 0.015, + "loss": 0.0128, "step": 489 }, { "epoch": 0.16, "learning_rate": 2.8791082777163216e-05, - "loss": 0.0063, + "loss": 0.005, "step": 490 }, { "epoch": 0.16, "learning_rate": 2.8784960444170004e-05, - "loss": 0.0221, + "loss": 0.0202, "step": 491 }, { "epoch": 0.16, "learning_rate": 2.877882330191679e-05, - "loss": 0.0114, + "loss": 0.0127, "step": 492 }, { "epoch": 0.16, "learning_rate": 2.8772671356996744e-05, - "loss": 0.0234, + "loss": 0.0361, "step": 493 }, { "epoch": 0.16, "learning_rate": 2.8766504616018925e-05, - "loss": 0.0191, + "loss": 0.0122, "step": 494 }, { "epoch": 0.16, "learning_rate": 2.8760323085608305e-05, - "loss": 0.0091, + "loss": 0.0086, "step": 495 }, { "epoch": 0.16, "learning_rate": 2.875412677240573e-05, - "loss": 0.0292, + "loss": 0.0221, "step": 496 }, { "epoch": 0.16, "learning_rate": 2.874791568306794e-05, - "loss": 0.0136, + "loss": 0.0061, "step": 497 }, { "epoch": 0.16, "learning_rate": 2.874168982426754e-05, - "loss": 0.013, + "loss": 0.0103, "step": 498 }, { "epoch": 0.16, "learning_rate": 2.8735449202693002e-05, - "loss": 0.0137, + "loss": 0.015, "step": 499 }, { "epoch": 0.16, "learning_rate": 2.872919382504867e-05, - "loss": 0.0141, + "loss": 0.0191, "step": 500 }, { "epoch": 0.16, "learning_rate": 2.872292369805472e-05, - "loss": 0.0172, + "loss": 0.0231, "step": 501 }, { "epoch": 0.16, "learning_rate": 2.8716638828447197e-05, - "loss": 0.0166, + "loss": 0.0142, "step": 502 }, { "epoch": 0.16, "learning_rate": 2.8710339222977956e-05, - "loss": 0.0108, + "loss": 0.0071, "step": 503 }, { "epoch": 0.16, "learning_rate": 2.8704024888414712e-05, - "loss": 0.0161, + "loss": 0.021, "step": 504 }, { "epoch": 0.16, "learning_rate": 2.869769583154098e-05, - "loss": 0.0136, + "loss": 0.0191, "step": 505 }, { "epoch": 0.16, "learning_rate": 2.869135205915611e-05, - "loss": 0.011, + "loss": 0.0154, "step": 506 }, { "epoch": 0.16, "learning_rate": 2.8684993578075248e-05, - "loss": 0.0127, + "loss": 0.0101, "step": 507 }, { "epoch": 0.16, "learning_rate": 2.8678620395129338e-05, - "loss": 0.0146, + "loss": 0.0103, "step": 508 }, { "epoch": 0.16, "learning_rate": 2.8672232517165138e-05, - "loss": 0.0225, + "loss": 0.0168, "step": 509 }, { "epoch": 0.16, "learning_rate": 2.8665829951045176e-05, - "loss": 0.0114, + "loss": 0.0117, "step": 510 }, { "epoch": 0.16, "learning_rate": 2.8659412703647767e-05, - "loss": 0.0249, + "loss": 0.0167, "step": 511 }, { "epoch": 0.16, "learning_rate": 2.8652980781866993e-05, - "loss": 0.0144, + "loss": 0.0089, "step": 512 }, { "epoch": 0.16, "learning_rate": 2.8646534192612704e-05, - "loss": 0.0113, + "loss": 0.0108, "step": 513 }, { "epoch": 0.16, "learning_rate": 2.8640072942810507e-05, - "loss": 0.0174, + "loss": 0.0133, "step": 514 }, { "epoch": 0.16, "learning_rate": 2.863359703940176e-05, - "loss": 0.0081, + "loss": 0.0104, "step": 515 }, { "epoch": 0.16, "learning_rate": 2.8627106489343557e-05, - "loss": 0.0302, + "loss": 0.0212, "step": 516 }, { "epoch": 0.17, "learning_rate": 2.8620601299608738e-05, - "loss": 0.0193, + "loss": 0.0167, "step": 517 }, { "epoch": 0.17, "learning_rate": 2.8614081477185866e-05, - "loss": 0.0102, + "loss": 0.0133, "step": 518 }, { "epoch": 0.17, "learning_rate": 2.8607547029079216e-05, - "loss": 0.0068, + "loss": 0.0074, "step": 519 }, { "epoch": 0.17, "learning_rate": 2.8600997962308786e-05, - "loss": 0.0109, + "loss": 0.0123, "step": 520 }, { "epoch": 0.17, "learning_rate": 2.8594434283910277e-05, - "loss": 0.014, + "loss": 0.0182, "step": 521 }, { "epoch": 0.17, "learning_rate": 2.8587856000935078e-05, - "loss": 0.018, + "loss": 0.0162, "step": 522 }, { "epoch": 0.17, "learning_rate": 2.8581263120450286e-05, - "loss": 0.0128, + "loss": 0.0125, "step": 523 }, { "epoch": 0.17, "learning_rate": 2.857465564953866e-05, - "loss": 0.0199, + "loss": 0.0143, "step": 524 }, { "epoch": 0.17, "learning_rate": 2.8568033595298643e-05, - "loss": 0.0143, + "loss": 0.01, "step": 525 }, { "epoch": 0.17, "learning_rate": 2.8561396964844354e-05, - "loss": 0.0192, + "loss": 0.0161, "step": 526 }, { "epoch": 0.17, "learning_rate": 2.8554745765305553e-05, - "loss": 0.0132, + "loss": 0.0095, "step": 527 }, { "epoch": 0.17, "learning_rate": 2.8548080003827668e-05, - "loss": 0.0213, + "loss": 0.0211, "step": 528 }, { "epoch": 0.17, "learning_rate": 2.8541399687571753e-05, - "loss": 0.0138, + "loss": 0.0147, "step": 529 }, { "epoch": 0.17, "learning_rate": 2.8534704823714524e-05, - "loss": 0.0187, + "loss": 0.0229, "step": 530 }, { "epoch": 0.17, "learning_rate": 2.8527995419448298e-05, - "loss": 0.0137, + "loss": 0.0088, "step": 531 }, { @@ -3211,727 +3211,727 @@ { "epoch": 0.17, "learning_rate": 2.8514533018536286e-05, - "loss": 0.0059, + "loss": 0.0124, "step": 533 }, { "epoch": 0.17, "learning_rate": 2.8507780036353235e-05, - "loss": 0.0126, + "loss": 0.0082, "step": 534 }, { "epoch": 0.17, "learning_rate": 2.8501012542686637e-05, - "loss": 0.0115, + "loss": 0.0086, "step": 535 }, { "epoch": 0.17, "learning_rate": 2.8494230544806853e-05, - "loss": 0.0118, + "loss": 0.013, "step": 536 }, { "epoch": 0.17, "learning_rate": 2.8487434049999824e-05, - "loss": 0.0112, + "loss": 0.0082, "step": 537 }, { "epoch": 0.17, "learning_rate": 2.848062306556705e-05, - "loss": 0.0091, + "loss": 0.0098, "step": 538 }, { "epoch": 0.17, "learning_rate": 2.847379759882563e-05, - "loss": 0.0151, + "loss": 0.0121, "step": 539 }, { "epoch": 0.17, "learning_rate": 2.846695765710818e-05, - "loss": 0.0211, + "loss": 0.0244, "step": 540 }, { "epoch": 0.17, "learning_rate": 2.8460103247762904e-05, - "loss": 0.011, + "loss": 0.0098, "step": 541 }, { "epoch": 0.17, "learning_rate": 2.845323437815352e-05, - "loss": 0.0119, + "loss": 0.0112, "step": 542 }, { "epoch": 0.17, "learning_rate": 2.8446351055659303e-05, - "loss": 0.0119, + "loss": 0.0083, "step": 543 }, { "epoch": 0.17, "learning_rate": 2.8439453287675034e-05, - "loss": 0.0155, + "loss": 0.0107, "step": 544 }, { "epoch": 0.17, "learning_rate": 2.8432541081611036e-05, - "loss": 0.0174, + "loss": 0.0242, "step": 545 }, { "epoch": 0.17, "learning_rate": 2.842561444489312e-05, - "loss": 0.0325, + "loss": 0.0244, "step": 546 }, { "epoch": 0.17, "learning_rate": 2.841867338496262e-05, - "loss": 0.0076, + "loss": 0.0058, "step": 547 }, { "epoch": 0.18, "learning_rate": 2.8411717909276348e-05, - "loss": 0.0147, + "loss": 0.0165, "step": 548 }, { "epoch": 0.18, "learning_rate": 2.8404748025306614e-05, - "loss": 0.0185, + "loss": 0.0169, "step": 549 }, { "epoch": 0.18, "learning_rate": 2.83977637405412e-05, - "loss": 0.018, + "loss": 0.0154, "step": 550 }, { "epoch": 0.18, "learning_rate": 2.8390765062483364e-05, - "loss": 0.0121, + "loss": 0.0094, "step": 551 }, { "epoch": 0.18, "learning_rate": 2.8383751998651828e-05, - "loss": 0.0148, + "loss": 0.0189, "step": 552 }, { "epoch": 0.18, "learning_rate": 2.837672455658076e-05, - "loss": 0.0096, + "loss": 0.0104, "step": 553 }, { "epoch": 0.18, "learning_rate": 2.836968274381978e-05, - "loss": 0.0466, + "loss": 0.053, "step": 554 }, { "epoch": 0.18, "learning_rate": 2.8362626567933958e-05, - "loss": 0.0218, + "loss": 0.0136, "step": 555 }, { "epoch": 0.18, "learning_rate": 2.835555603650377e-05, - "loss": 0.025, + "loss": 0.0148, "step": 556 }, { "epoch": 0.18, "learning_rate": 2.8348471157125132e-05, - "loss": 0.0253, + "loss": 0.0193, "step": 557 }, { "epoch": 0.18, "learning_rate": 2.834137193740937e-05, - "loss": 0.0301, + "loss": 0.0151, "step": 558 }, { "epoch": 0.18, "learning_rate": 2.833425838498322e-05, - "loss": 0.0182, + "loss": 0.021, "step": 559 }, { "epoch": 0.18, "learning_rate": 2.83271305074888e-05, - "loss": 0.0093, + "loss": 0.0072, "step": 560 }, { "epoch": 0.18, "learning_rate": 2.831998831258364e-05, - "loss": 0.0092, + "loss": 0.0108, "step": 561 }, { "epoch": 0.18, "learning_rate": 2.8312831807940634e-05, - "loss": 0.0127, + "loss": 0.0168, "step": 562 }, { "epoch": 0.18, "learning_rate": 2.830566100124805e-05, - "loss": 0.0158, + "loss": 0.0181, "step": 563 }, { "epoch": 0.18, "learning_rate": 2.8298475900209542e-05, - "loss": 0.0208, + "loss": 0.0085, "step": 564 }, { "epoch": 0.18, "learning_rate": 2.8291276512544084e-05, - "loss": 0.0065, + "loss": 0.0089, "step": 565 }, { "epoch": 0.18, "learning_rate": 2.8284062845986033e-05, - "loss": 0.0122, + "loss": 0.0115, "step": 566 }, { "epoch": 0.18, "learning_rate": 2.8276834908285064e-05, - "loss": 0.0122, + "loss": 0.0067, "step": 567 }, { "epoch": 0.18, "learning_rate": 2.8269592707206198e-05, - "loss": 0.0156, + "loss": 0.0172, "step": 568 }, { "epoch": 0.18, "learning_rate": 2.826233625052976e-05, - "loss": 0.0137, + "loss": 0.0189, "step": 569 }, { "epoch": 0.18, "learning_rate": 2.825506554605141e-05, - "loss": 0.0112, + "loss": 0.0101, "step": 570 }, { "epoch": 0.18, "learning_rate": 2.8247780601582114e-05, - "loss": 0.0105, + "loss": 0.0109, "step": 571 }, { "epoch": 0.18, "learning_rate": 2.8240481424948115e-05, - "loss": 0.0165, + "loss": 0.0116, "step": 572 }, { "epoch": 0.18, "learning_rate": 2.8233168023990963e-05, - "loss": 0.0155, + "loss": 0.0147, "step": 573 }, { "epoch": 0.18, "learning_rate": 2.8225840406567486e-05, - "loss": 0.0188, + "loss": 0.0121, "step": 574 }, { "epoch": 0.18, "learning_rate": 2.821849858054979e-05, - "loss": 0.0262, + "loss": 0.0184, "step": 575 }, { "epoch": 0.18, "learning_rate": 2.821114255382523e-05, - "loss": 0.0166, + "loss": 0.0179, "step": 576 }, { "epoch": 0.18, "learning_rate": 2.8203772334296436e-05, - "loss": 0.0074, + "loss": 0.0064, "step": 577 }, { "epoch": 0.18, "learning_rate": 2.819638792988127e-05, - "loss": 0.0223, + "loss": 0.0119, "step": 578 }, { "epoch": 0.18, "learning_rate": 2.8188989348512842e-05, - "loss": 0.0219, + "loss": 0.0249, "step": 579 }, { "epoch": 0.19, "learning_rate": 2.8181576598139484e-05, - "loss": 0.013, + "loss": 0.0128, "step": 580 }, { "epoch": 0.19, "learning_rate": 2.8174149686724758e-05, - "loss": 0.0112, + "loss": 0.0114, "step": 581 }, { "epoch": 0.19, "learning_rate": 2.8166708622247443e-05, - "loss": 0.0107, + "loss": 0.0091, "step": 582 }, { "epoch": 0.19, "learning_rate": 2.8159253412701507e-05, - "loss": 0.0159, + "loss": 0.0168, "step": 583 }, { "epoch": 0.19, "learning_rate": 2.8151784066096126e-05, - "loss": 0.0153, + "loss": 0.027, "step": 584 }, { "epoch": 0.19, "learning_rate": 2.814430059045566e-05, - "loss": 0.017, + "loss": 0.0091, "step": 585 }, { "epoch": 0.19, "learning_rate": 2.8136802993819654e-05, - "loss": 0.0213, + "loss": 0.0113, "step": 586 }, { "epoch": 0.19, "learning_rate": 2.8129291284242814e-05, - "loss": 0.0116, + "loss": 0.0109, "step": 587 }, { "epoch": 0.19, "learning_rate": 2.8121765469795006e-05, - "loss": 0.0203, + "loss": 0.0189, "step": 588 }, { "epoch": 0.19, "learning_rate": 2.8114225558561265e-05, - "loss": 0.0133, + "loss": 0.0125, "step": 589 }, { "epoch": 0.19, "learning_rate": 2.8106671558641753e-05, - "loss": 0.0178, + "loss": 0.0124, "step": 590 }, { "epoch": 0.19, "learning_rate": 2.8099103478151777e-05, - "loss": 0.0063, + "loss": 0.0071, "step": 591 }, { "epoch": 0.19, "learning_rate": 2.809152132522177e-05, - "loss": 0.0059, + "loss": 0.0063, "step": 592 }, { "epoch": 0.19, "learning_rate": 2.8083925107997276e-05, - "loss": 0.0122, + "loss": 0.0098, "step": 593 }, { "epoch": 0.19, "learning_rate": 2.8076314834638954e-05, - "loss": 0.0141, + "loss": 0.016, "step": 594 }, { "epoch": 0.19, "learning_rate": 2.8068690513322566e-05, - "loss": 0.0086, + "loss": 0.0084, "step": 595 }, { "epoch": 0.19, "learning_rate": 2.806105215223897e-05, - "loss": 0.0175, + "loss": 0.007, "step": 596 }, { "epoch": 0.19, "learning_rate": 2.8053399759594084e-05, - "loss": 0.0126, + "loss": 0.0143, "step": 597 }, { "epoch": 0.19, "learning_rate": 2.804573334360893e-05, - "loss": 0.0237, + "loss": 0.0219, "step": 598 }, { "epoch": 0.19, "learning_rate": 2.803805291251957e-05, - "loss": 0.0091, + "loss": 0.0079, "step": 599 }, { "epoch": 0.19, "learning_rate": 2.803035847457714e-05, - "loss": 0.0137, + "loss": 0.0179, "step": 600 }, { "epoch": 0.19, "learning_rate": 2.802265003804782e-05, - "loss": 0.0301, + "loss": 0.0359, "step": 601 }, { "epoch": 0.19, "learning_rate": 2.8014927611212824e-05, - "loss": 0.0145, + "loss": 0.0094, "step": 602 }, { "epoch": 0.19, "learning_rate": 2.80071912023684e-05, - "loss": 0.0164, + "loss": 0.0127, "step": 603 }, { "epoch": 0.19, "learning_rate": 2.7999440819825813e-05, - "loss": 0.0155, + "loss": 0.0174, "step": 604 }, { "epoch": 0.19, "learning_rate": 2.799167647191134e-05, - "loss": 0.0191, + "loss": 0.0113, "step": 605 }, { "epoch": 0.19, "learning_rate": 2.798389816696627e-05, - "loss": 0.0108, + "loss": 0.0169, "step": 606 }, { "epoch": 0.19, "learning_rate": 2.7976105913346874e-05, - "loss": 0.0075, + "loss": 0.012, "step": 607 }, { "epoch": 0.19, "learning_rate": 2.7968299719424417e-05, - "loss": 0.0109, + "loss": 0.01, "step": 608 }, { "epoch": 0.19, "learning_rate": 2.796047959358513e-05, - "loss": 0.0083, + "loss": 0.0112, "step": 609 }, { "epoch": 0.19, "learning_rate": 2.7952645544230226e-05, - "loss": 0.0108, + "loss": 0.0121, "step": 610 }, { "epoch": 0.2, "learning_rate": 2.7944797579775864e-05, - "loss": 0.0187, + "loss": 0.017, "step": 611 }, { "epoch": 0.2, "learning_rate": 2.793693570865316e-05, - "loss": 0.022, + "loss": 0.0183, "step": 612 }, { "epoch": 0.2, "learning_rate": 2.792905993930816e-05, - "loss": 0.0155, + "loss": 0.0154, "step": 613 }, { "epoch": 0.2, "learning_rate": 2.7921170280201847e-05, - "loss": 0.0117, + "loss": 0.0102, "step": 614 }, { "epoch": 0.2, "learning_rate": 2.7913266739810136e-05, - "loss": 0.0101, + "loss": 0.0113, "step": 615 }, { "epoch": 0.2, "learning_rate": 2.7905349326623832e-05, - "loss": 0.0293, + "loss": 0.0249, "step": 616 }, { "epoch": 0.2, "learning_rate": 2.7897418049148664e-05, - "loss": 0.011, + "loss": 0.0191, "step": 617 }, { "epoch": 0.2, "learning_rate": 2.788947291590524e-05, - "loss": 0.0128, + "loss": 0.0142, "step": 618 }, { "epoch": 0.2, "learning_rate": 2.7881513935429075e-05, - "loss": 0.0161, + "loss": 0.0171, "step": 619 }, { "epoch": 0.2, "learning_rate": 2.7873541116270534e-05, - "loss": 0.0087, + "loss": 0.0082, "step": 620 }, { "epoch": 0.2, "learning_rate": 2.7865554466994865e-05, - "loss": 0.0155, + "loss": 0.0165, "step": 621 }, { "epoch": 0.2, "learning_rate": 2.7857553996182165e-05, - "loss": 0.0131, + "loss": 0.0159, "step": 622 }, { "epoch": 0.2, "learning_rate": 2.7849539712427396e-05, - "loss": 0.0069, + "loss": 0.0059, "step": 623 }, { "epoch": 0.2, "learning_rate": 2.7841511624340333e-05, - "loss": 0.0151, + "loss": 0.0269, "step": 624 }, { "epoch": 0.2, "learning_rate": 2.783346974054561e-05, - "loss": 0.0192, + "loss": 0.0198, "step": 625 }, { "epoch": 0.2, "learning_rate": 2.782541406968266e-05, - "loss": 0.0076, + "loss": 0.0074, "step": 626 }, { "epoch": 0.2, "learning_rate": 2.7817344620405734e-05, - "loss": 0.012, + "loss": 0.0094, "step": 627 }, { "epoch": 0.2, "learning_rate": 2.7809261401383894e-05, - "loss": 0.0086, + "loss": 0.008, "step": 628 }, { "epoch": 0.2, "learning_rate": 2.7801164421300977e-05, - "loss": 0.0131, + "loss": 0.0152, "step": 629 }, { "epoch": 0.2, "learning_rate": 2.779305368885562e-05, - "loss": 0.0109, + "loss": 0.0172, "step": 630 }, { "epoch": 0.2, "learning_rate": 2.7784929212761227e-05, - "loss": 0.0152, + "loss": 0.0181, "step": 631 }, { "epoch": 0.2, "learning_rate": 2.7776791001745972e-05, - "loss": 0.0155, + "loss": 0.0163, "step": 632 }, { "epoch": 0.2, "learning_rate": 2.7768639064552778e-05, - "loss": 0.0135, + "loss": 0.0118, "step": 633 }, { "epoch": 0.2, "learning_rate": 2.776047340993931e-05, - "loss": 0.0088, + "loss": 0.0123, "step": 634 }, { "epoch": 0.2, "learning_rate": 2.775229404667799e-05, - "loss": 0.0176, + "loss": 0.0179, "step": 635 }, { "epoch": 0.2, "learning_rate": 2.774410098355594e-05, - "loss": 0.0167, + "loss": 0.0123, "step": 636 }, { "epoch": 0.2, "learning_rate": 2.7735894229375018e-05, - "loss": 0.0085, + "loss": 0.0114, "step": 637 }, { "epoch": 0.2, "learning_rate": 2.7727673792951793e-05, - "loss": 0.0117, + "loss": 0.0167, "step": 638 }, { "epoch": 0.2, "learning_rate": 2.7719439683117514e-05, - "loss": 0.0063, + "loss": 0.0055, "step": 639 }, { "epoch": 0.2, "learning_rate": 2.7711191908718137e-05, - "loss": 0.0101, + "loss": 0.0047, "step": 640 }, { "epoch": 0.2, "learning_rate": 2.7702930478614297e-05, - "loss": 0.0158, + "loss": 0.02, "step": 641 }, { "epoch": 0.21, "learning_rate": 2.7694655401681283e-05, - "loss": 0.0093, + "loss": 0.0101, "step": 642 }, { "epoch": 0.21, "learning_rate": 2.768636668680907e-05, - "loss": 0.0073, + "loss": 0.0111, "step": 643 }, { "epoch": 0.21, "learning_rate": 2.7678064342902258e-05, - "loss": 0.0082, + "loss": 0.0105, "step": 644 }, { "epoch": 0.21, "learning_rate": 2.7669748378880112e-05, - "loss": 0.012, + "loss": 0.0124, "step": 645 }, { "epoch": 0.21, "learning_rate": 2.766141880367651e-05, - "loss": 0.0159, + "loss": 0.0164, "step": 646 }, { "epoch": 0.21, "learning_rate": 2.7653075626239965e-05, - "loss": 0.0144, + "loss": 0.0148, "step": 647 }, { "epoch": 0.21, "learning_rate": 2.76447188555336e-05, - "loss": 0.0152, + "loss": 0.0154, "step": 648 }, { "epoch": 0.21, "learning_rate": 2.7636348500535143e-05, - "loss": 0.0108, + "loss": 0.0093, "step": 649 }, { "epoch": 0.21, "learning_rate": 2.7627964570236907e-05, - "loss": 0.0161, + "loss": 0.0197, "step": 650 }, { "epoch": 0.21, "learning_rate": 2.7619567073645793e-05, - "loss": 0.0092, + "loss": 0.0102, "step": 651 }, { "epoch": 0.21, "learning_rate": 2.7611156019783292e-05, - "loss": 0.012, + "loss": 0.0144, "step": 652 }, { "epoch": 0.21, "learning_rate": 2.760273141768543e-05, - "loss": 0.0191, + "loss": 0.0113, "step": 653 }, { @@ -3943,331 +3943,331 @@ { "epoch": 0.21, "learning_rate": 2.7585841605000582e-05, - "loss": 0.0239, + "loss": 0.0122, "step": 655 }, { "epoch": 0.21, "learning_rate": 2.7577376412558417e-05, - "loss": 0.0072, + "loss": 0.0075, "step": 656 }, { "epoch": 0.21, "learning_rate": 2.7568897708170516e-05, - "loss": 0.0157, + "loss": 0.0144, "step": 657 }, { "epoch": 0.21, "learning_rate": 2.75604055009456e-05, - "loss": 0.0131, + "loss": 0.011, "step": 658 }, { "epoch": 0.21, "learning_rate": 2.75518998000069e-05, - "loss": 0.016, + "loss": 0.0104, "step": 659 }, { "epoch": 0.21, "learning_rate": 2.7543380614492134e-05, - "loss": 0.0123, + "loss": 0.0085, "step": 660 }, { "epoch": 0.21, "learning_rate": 2.753484795355351e-05, - "loss": 0.0124, + "loss": 0.0102, "step": 661 }, { "epoch": 0.21, "learning_rate": 2.7526301826357714e-05, - "loss": 0.0046, + "loss": 0.0043, "step": 662 }, { "epoch": 0.21, "learning_rate": 2.75177422420859e-05, - "loss": 0.0095, + "loss": 0.0083, "step": 663 }, { "epoch": 0.21, "learning_rate": 2.7509169209933673e-05, - "loss": 0.0224, + "loss": 0.0212, "step": 664 }, { "epoch": 0.21, "learning_rate": 2.7500582739111097e-05, - "loss": 0.0166, + "loss": 0.0162, "step": 665 }, { "epoch": 0.21, "learning_rate": 2.7491982838842655e-05, - "loss": 0.0132, + "loss": 0.0121, "step": 666 }, { "epoch": 0.21, "learning_rate": 2.7483369518367274e-05, - "loss": 0.0071, + "loss": 0.0077, "step": 667 }, { "epoch": 0.21, "learning_rate": 2.7474742786938292e-05, - "loss": 0.0093, + "loss": 0.008, "step": 668 }, { "epoch": 0.21, "learning_rate": 2.7466102653823455e-05, - "loss": 0.0068, + "loss": 0.0066, "step": 669 }, { "epoch": 0.21, "learning_rate": 2.7457449128304905e-05, - "loss": 0.0073, + "loss": 0.038, "step": 670 }, { "epoch": 0.21, "learning_rate": 2.7448782219679172e-05, - "loss": 0.0051, + "loss": 0.0046, "step": 671 }, { "epoch": 0.21, "learning_rate": 2.7440101937257167e-05, - "loss": 0.0156, + "loss": 0.01, "step": 672 }, { "epoch": 0.21, "learning_rate": 2.743140829036417e-05, - "loss": 0.0131, + "loss": 0.0149, "step": 673 }, { "epoch": 0.22, "learning_rate": 2.7422701288339804e-05, - "loss": 0.0185, + "loss": 0.0146, "step": 674 }, { "epoch": 0.22, "learning_rate": 2.741398094053805e-05, - "loss": 0.009, + "loss": 0.0104, "step": 675 }, { "epoch": 0.22, "learning_rate": 2.740524725632724e-05, - "loss": 0.011, + "loss": 0.0073, "step": 676 }, { "epoch": 0.22, "learning_rate": 2.7396500245090008e-05, - "loss": 0.0061, + "loss": 0.0055, "step": 677 }, { "epoch": 0.22, "learning_rate": 2.738773991622332e-05, - "loss": 0.0119, + "loss": 0.0107, "step": 678 }, { "epoch": 0.22, "learning_rate": 2.7378966279138448e-05, - "loss": 0.0084, + "loss": 0.0093, "step": 679 }, { "epoch": 0.22, "learning_rate": 2.7370179343260952e-05, - "loss": 0.0083, + "loss": 0.009, "step": 680 }, { "epoch": 0.22, "learning_rate": 2.73613791180307e-05, - "loss": 0.0156, + "loss": 0.0154, "step": 681 }, { "epoch": 0.22, "learning_rate": 2.7352565612901806e-05, - "loss": 0.0084, + "loss": 0.0103, "step": 682 }, { "epoch": 0.22, "learning_rate": 2.7343738837342685e-05, - "loss": 0.0129, + "loss": 0.0104, "step": 683 }, { "epoch": 0.22, "learning_rate": 2.733489880083598e-05, - "loss": 0.012, + "loss": 0.011, "step": 684 }, { "epoch": 0.22, "learning_rate": 2.7326045512878594e-05, - "loss": 0.0172, + "loss": 0.0213, "step": 685 }, { "epoch": 0.22, "learning_rate": 2.7317178982981664e-05, - "loss": 0.005, + "loss": 0.0034, "step": 686 }, { "epoch": 0.22, "learning_rate": 2.7308299220670553e-05, - "loss": 0.0094, + "loss": 0.009, "step": 687 }, { "epoch": 0.22, "learning_rate": 2.7299406235484844e-05, - "loss": 0.0153, + "loss": 0.0198, "step": 688 }, { "epoch": 0.22, "learning_rate": 2.729050003697831e-05, - "loss": 0.0077, + "loss": 0.0049, "step": 689 }, { "epoch": 0.22, "learning_rate": 2.728158063471894e-05, - "loss": 0.0146, + "loss": 0.0111, "step": 690 }, { "epoch": 0.22, "learning_rate": 2.7272648038288893e-05, - "loss": 0.0108, + "loss": 0.0186, "step": 691 }, { "epoch": 0.22, "learning_rate": 2.7263702257284503e-05, - "loss": 0.0071, + "loss": 0.0083, "step": 692 }, { "epoch": 0.22, "learning_rate": 2.725474330131628e-05, - "loss": 0.0148, + "loss": 0.0128, "step": 693 }, { "epoch": 0.22, "learning_rate": 2.7245771180008877e-05, - "loss": 0.0458, + "loss": 0.024, "step": 694 }, { "epoch": 0.22, "learning_rate": 2.7236785903001095e-05, - "loss": 0.0126, + "loss": 0.0196, "step": 695 }, { "epoch": 0.22, "learning_rate": 2.722778747994587e-05, - "loss": 0.025, + "loss": 0.0248, "step": 696 }, { "epoch": 0.22, "learning_rate": 2.721877592051025e-05, - "loss": 0.0062, + "loss": 0.0052, "step": 697 }, { "epoch": 0.22, "learning_rate": 2.7209751234375417e-05, - "loss": 0.0127, + "loss": 0.0195, "step": 698 }, { "epoch": 0.22, "learning_rate": 2.7200713431236624e-05, - "loss": 0.0099, + "loss": 0.0091, "step": 699 }, { "epoch": 0.22, "learning_rate": 2.7191662520803248e-05, - "loss": 0.0118, + "loss": 0.0076, "step": 700 }, { "epoch": 0.22, "learning_rate": 2.7182598512798728e-05, - "loss": 0.0126, + "loss": 0.0081, "step": 701 }, { "epoch": 0.22, "learning_rate": 2.7173521416960575e-05, - "loss": 0.0069, + "loss": 0.0066, "step": 702 }, { "epoch": 0.22, "learning_rate": 2.7164431243040368e-05, - "loss": 0.0099, + "loss": 0.0109, "step": 703 }, { "epoch": 0.22, "learning_rate": 2.715532800080373e-05, - "loss": 0.0129, + "loss": 0.0141, "step": 704 }, { "epoch": 0.23, "learning_rate": 2.7146211700030323e-05, - "loss": 0.0082, + "loss": 0.0077, "step": 705 }, { "epoch": 0.23, "learning_rate": 2.7137082350513844e-05, - "loss": 0.0247, + "loss": 0.017, "step": 706 }, { "epoch": 0.23, "learning_rate": 2.7127939962062002e-05, - "loss": 0.0227, + "loss": 0.0173, "step": 707 }, { "epoch": 0.23, "learning_rate": 2.711878454449652e-05, - "loss": 0.0114, + "loss": 0.012, "step": 708 }, { "epoch": 0.23, "learning_rate": 2.710961610765311e-05, - "loss": 0.0095, + "loss": 0.0123, "step": 709 }, { @@ -4279,507 +4279,507 @@ { "epoch": 0.23, "learning_rate": 2.7091240215545282e-05, - "loss": 0.005, + "loss": 0.0064, "step": 711 }, { "epoch": 0.23, "learning_rate": 2.7082032780022202e-05, - "loss": 0.0059, + "loss": 0.005, "step": 712 }, { "epoch": 0.23, "learning_rate": 2.7072812364703817e-05, - "loss": 0.0178, + "loss": 0.0215, "step": 713 }, { "epoch": 0.23, "learning_rate": 2.7063578979495672e-05, - "loss": 0.0095, + "loss": 0.0071, "step": 714 }, { "epoch": 0.23, "learning_rate": 2.705433263431725e-05, - "loss": 0.0147, + "loss": 0.0123, "step": 715 }, { "epoch": 0.23, "learning_rate": 2.7045073339101945e-05, - "loss": 0.0203, + "loss": 0.0152, "step": 716 }, { "epoch": 0.23, "learning_rate": 2.703580110379707e-05, - "loss": 0.0088, + "loss": 0.008, "step": 717 }, { "epoch": 0.23, "learning_rate": 2.7026515938363845e-05, - "loss": 0.0279, + "loss": 0.0178, "step": 718 }, { "epoch": 0.23, "learning_rate": 2.7017217852777368e-05, - "loss": 0.0217, + "loss": 0.0142, "step": 719 }, { "epoch": 0.23, "learning_rate": 2.700790685702663e-05, - "loss": 0.011, + "loss": 0.0125, "step": 720 }, { "epoch": 0.23, "learning_rate": 2.6998582961114485e-05, - "loss": 0.0122, + "loss": 0.0143, "step": 721 }, { "epoch": 0.23, "learning_rate": 2.6989246175057646e-05, - "loss": 0.0187, + "loss": 0.0132, "step": 722 }, { "epoch": 0.23, "learning_rate": 2.697989650888667e-05, - "loss": 0.0131, + "loss": 0.0129, "step": 723 }, { "epoch": 0.23, "learning_rate": 2.6970533972645957e-05, - "loss": 0.0066, + "loss": 0.006, "step": 724 }, { "epoch": 0.23, "learning_rate": 2.6961158576393737e-05, - "loss": 0.0195, + "loss": 0.0254, "step": 725 }, { "epoch": 0.23, "learning_rate": 2.695177033020205e-05, - "loss": 0.0117, + "loss": 0.0151, "step": 726 }, { "epoch": 0.23, "learning_rate": 2.6942369244156732e-05, - "loss": 0.0179, + "loss": 0.0212, "step": 727 }, { "epoch": 0.23, "learning_rate": 2.6932955328357433e-05, - "loss": 0.0087, + "loss": 0.0102, "step": 728 }, { "epoch": 0.23, "learning_rate": 2.692352859291757e-05, - "loss": 0.0142, + "loss": 0.0129, "step": 729 }, { "epoch": 0.23, "learning_rate": 2.6914089047964342e-05, - "loss": 0.0136, + "loss": 0.0082, "step": 730 }, { "epoch": 0.23, "learning_rate": 2.69046367036387e-05, - "loss": 0.0181, + "loss": 0.0212, "step": 731 }, { "epoch": 0.23, "learning_rate": 2.689517157009535e-05, - "loss": 0.0166, + "loss": 0.0207, "step": 732 }, { "epoch": 0.23, "learning_rate": 2.6885693657502742e-05, - "loss": 0.0128, + "loss": 0.0109, "step": 733 }, { "epoch": 0.23, "learning_rate": 2.687620297604305e-05, - "loss": 0.02, + "loss": 0.0191, "step": 734 }, { "epoch": 0.23, "learning_rate": 2.6866699535912164e-05, - "loss": 0.0177, + "loss": 0.0208, "step": 735 }, { "epoch": 0.24, "learning_rate": 2.685718334731968e-05, - "loss": 0.014, + "loss": 0.0111, "step": 736 }, { "epoch": 0.24, "learning_rate": 2.6847654420488895e-05, - "loss": 0.0084, + "loss": 0.0125, "step": 737 }, { "epoch": 0.24, "learning_rate": 2.6838112765656794e-05, - "loss": 0.0105, + "loss": 0.0094, "step": 738 }, { "epoch": 0.24, "learning_rate": 2.6828558393074022e-05, - "loss": 0.0154, + "loss": 0.0069, "step": 739 }, { "epoch": 0.24, "learning_rate": 2.68189913130049e-05, - "loss": 0.0042, + "loss": 0.0052, "step": 740 }, { "epoch": 0.24, "learning_rate": 2.6809411535727388e-05, - "loss": 0.0083, + "loss": 0.0082, "step": 741 }, { "epoch": 0.24, "learning_rate": 2.6799819071533104e-05, - "loss": 0.0125, + "loss": 0.0104, "step": 742 }, { "epoch": 0.24, "learning_rate": 2.679021393072729e-05, - "loss": 0.0105, + "loss": 0.0119, "step": 743 }, { "epoch": 0.24, "learning_rate": 2.6780596123628783e-05, - "loss": 0.0118, + "loss": 0.0143, "step": 744 }, { "epoch": 0.24, "learning_rate": 2.677096566057006e-05, - "loss": 0.0077, + "loss": 0.0071, "step": 745 }, { "epoch": 0.24, "learning_rate": 2.6761322551897182e-05, - "loss": 0.0143, + "loss": 0.0124, "step": 746 }, { "epoch": 0.24, "learning_rate": 2.675166680796979e-05, - "loss": 0.0135, + "loss": 0.012, "step": 747 }, { "epoch": 0.24, "learning_rate": 2.6741998439161105e-05, - "loss": 0.0122, + "loss": 0.0132, "step": 748 }, { "epoch": 0.24, "learning_rate": 2.6732317455857907e-05, - "loss": 0.0138, + "loss": 0.0078, "step": 749 }, { "epoch": 0.24, "learning_rate": 2.672262386846054e-05, - "loss": 0.0125, + "loss": 0.0088, "step": 750 }, { "epoch": 0.24, "learning_rate": 2.6712917687382872e-05, - "loss": 0.0074, + "loss": 0.0042, "step": 751 }, { "epoch": 0.24, "learning_rate": 2.6703198923052304e-05, - "loss": 0.0149, + "loss": 0.0108, "step": 752 }, { "epoch": 0.24, "learning_rate": 2.6693467585909767e-05, - "loss": 0.0096, + "loss": 0.0084, "step": 753 }, { "epoch": 0.24, "learning_rate": 2.6683723686409686e-05, - "loss": 0.0088, + "loss": 0.0074, "step": 754 }, { "epoch": 0.24, "learning_rate": 2.6673967235019986e-05, - "loss": 0.0171, + "loss": 0.0157, "step": 755 }, { "epoch": 0.24, "learning_rate": 2.6664198242222077e-05, - "loss": 0.0552, + "loss": 0.0261, "step": 756 }, { "epoch": 0.24, "learning_rate": 2.6654416718510846e-05, - "loss": 0.0101, + "loss": 0.0099, "step": 757 }, { "epoch": 0.24, "learning_rate": 2.6644622674394635e-05, - "loss": 0.0089, + "loss": 0.0097, "step": 758 }, { "epoch": 0.24, "learning_rate": 2.6634816120395242e-05, - "loss": 0.0148, + "loss": 0.0151, "step": 759 }, { "epoch": 0.24, "learning_rate": 2.6624997067047908e-05, - "loss": 0.0113, + "loss": 0.0106, "step": 760 }, { "epoch": 0.24, "learning_rate": 2.6615165524901286e-05, - "loss": 0.0091, + "loss": 0.0086, "step": 761 }, { "epoch": 0.24, "learning_rate": 2.6605321504517464e-05, - "loss": 0.0113, + "loss": 0.0111, "step": 762 }, { "epoch": 0.24, "learning_rate": 2.659546501647193e-05, - "loss": 0.0068, + "loss": 0.0091, "step": 763 }, { "epoch": 0.24, "learning_rate": 2.6585596071353555e-05, - "loss": 0.0187, + "loss": 0.0105, "step": 764 }, { "epoch": 0.24, "learning_rate": 2.6575714679764613e-05, - "loss": 0.0077, + "loss": 0.0058, "step": 765 }, { "epoch": 0.24, "learning_rate": 2.6565820852320727e-05, - "loss": 0.0168, + "loss": 0.0197, "step": 766 }, { "epoch": 0.24, "learning_rate": 2.6555914599650903e-05, - "loss": 0.0338, + "loss": 0.0277, "step": 767 }, { "epoch": 0.25, "learning_rate": 2.6545995932397466e-05, - "loss": 0.0112, + "loss": 0.012, "step": 768 }, { "epoch": 0.25, "learning_rate": 2.6536064861216117e-05, - "loss": 0.0118, + "loss": 0.0146, "step": 769 }, { "epoch": 0.25, "learning_rate": 2.6526121396775848e-05, - "loss": 0.0082, + "loss": 0.0067, "step": 770 }, { "epoch": 0.25, "learning_rate": 2.651616554975898e-05, - "loss": 0.0088, + "loss": 0.009, "step": 771 }, { "epoch": 0.25, "learning_rate": 2.6506197330861138e-05, - "loss": 0.0061, + "loss": 0.0064, "step": 772 }, { "epoch": 0.25, "learning_rate": 2.6496216750791237e-05, - "loss": 0.0179, + "loss": 0.0127, "step": 773 }, { "epoch": 0.25, "learning_rate": 2.648622382027146e-05, - "loss": 0.0052, + "loss": 0.0044, "step": 774 }, { "epoch": 0.25, "learning_rate": 2.647621855003728e-05, - "loss": 0.0088, + "loss": 0.0077, "step": 775 }, { "epoch": 0.25, "learning_rate": 2.646620095083741e-05, - "loss": 0.0203, + "loss": 0.0184, "step": 776 }, { "epoch": 0.25, "learning_rate": 2.6456171033433807e-05, - "loss": 0.0113, + "loss": 0.0192, "step": 777 }, { "epoch": 0.25, "learning_rate": 2.6446128808601677e-05, - "loss": 0.0066, + "loss": 0.0061, "step": 778 }, { "epoch": 0.25, "learning_rate": 2.6436074287129432e-05, - "loss": 0.0082, + "loss": 0.0084, "step": 779 }, { "epoch": 0.25, "learning_rate": 2.6426007479818703e-05, - "loss": 0.0225, + "loss": 0.0136, "step": 780 }, { "epoch": 0.25, "learning_rate": 2.6415928397484314e-05, - "loss": 0.0123, + "loss": 0.0102, "step": 781 }, { "epoch": 0.25, "learning_rate": 2.6405837050954285e-05, - "loss": 0.0101, + "loss": 0.0076, "step": 782 }, { "epoch": 0.25, "learning_rate": 2.6395733451069804e-05, - "loss": 0.0111, + "loss": 0.0107, "step": 783 }, { "epoch": 0.25, - "eval_loss": 0.014288174919784069, - "eval_runtime": 28.1407, - "eval_samples_per_second": 187.38, - "eval_steps_per_second": 5.863, + "eval_loss": 0.013721341267228127, + "eval_runtime": 28.4521, + "eval_samples_per_second": 185.329, + "eval_steps_per_second": 11.598, "step": 783 }, { "epoch": 0.25, "learning_rate": 2.638561760868522e-05, - "loss": 0.0074, + "loss": 0.0058, "step": 784 }, { "epoch": 0.25, "learning_rate": 2.637548953466805e-05, - "loss": 0.0172, + "loss": 0.0105, "step": 785 }, { "epoch": 0.25, "learning_rate": 2.636534923989893e-05, - "loss": 0.0066, + "loss": 0.0162, "step": 786 }, { "epoch": 0.25, "learning_rate": 2.6355196735271636e-05, - "loss": 0.0108, + "loss": 0.0125, "step": 787 }, { "epoch": 0.25, "learning_rate": 2.6345032031693063e-05, - "loss": 0.0093, + "loss": 0.0069, "step": 788 }, { "epoch": 0.25, "learning_rate": 2.6334855140083208e-05, - "loss": 0.0078, + "loss": 0.0129, "step": 789 }, { "epoch": 0.25, "learning_rate": 2.6324666071375166e-05, - "loss": 0.0169, + "loss": 0.0152, "step": 790 }, { "epoch": 0.25, "learning_rate": 2.6314464836515107e-05, - "loss": 0.0134, + "loss": 0.0136, "step": 791 }, { "epoch": 0.25, "learning_rate": 2.630425144646228e-05, - "loss": 0.0113, + "loss": 0.0145, "step": 792 }, { "epoch": 0.25, "learning_rate": 2.6294025912188976e-05, - "loss": 0.0102, + "loss": 0.0127, "step": 793 }, { @@ -4791,1147 +4791,1147 @@ { "epoch": 0.25, "learning_rate": 2.627353845493539e-05, - "loss": 0.0193, + "loss": 0.0176, "step": 795 }, { "epoch": 0.25, "learning_rate": 2.6263276553964895e-05, - "loss": 0.0247, + "loss": 0.0143, "step": 796 }, { "epoch": 0.25, "learning_rate": 2.6253002552793484e-05, - "loss": 0.0141, + "loss": 0.014, "step": 797 }, { "epoch": 0.25, "learning_rate": 2.624271646245858e-05, - "loss": 0.008, + "loss": 0.0096, "step": 798 }, { "epoch": 0.26, "learning_rate": 2.623241829401058e-05, - "loss": 0.0055, + "loss": 0.0052, "step": 799 }, { "epoch": 0.26, "learning_rate": 2.6222108058512866e-05, - "loss": 0.0133, + "loss": 0.0176, "step": 800 }, { "epoch": 0.26, "learning_rate": 2.621178576704178e-05, - "loss": 0.0175, + "loss": 0.0271, "step": 801 }, { "epoch": 0.26, "learning_rate": 2.6201451430686617e-05, - "loss": 0.0083, + "loss": 0.0064, "step": 802 }, { "epoch": 0.26, "learning_rate": 2.619110506054962e-05, - "loss": 0.0191, + "loss": 0.0186, "step": 803 }, { "epoch": 0.26, "learning_rate": 2.6180746667745946e-05, - "loss": 0.0119, + "loss": 0.0103, "step": 804 }, { "epoch": 0.26, "learning_rate": 2.6170376263403676e-05, - "loss": 0.0058, + "loss": 0.0053, "step": 805 }, { "epoch": 0.26, "learning_rate": 2.615999385866379e-05, - "loss": 0.0229, + "loss": 0.0249, "step": 806 }, { "epoch": 0.26, "learning_rate": 2.6149599464680166e-05, - "loss": 0.0096, + "loss": 0.0109, "step": 807 }, { "epoch": 0.26, "learning_rate": 2.613919309261956e-05, - "loss": 0.0176, + "loss": 0.0228, "step": 808 }, { "epoch": 0.26, "learning_rate": 2.6128774753661594e-05, - "loss": 0.0277, + "loss": 0.0269, "step": 809 }, { "epoch": 0.26, "learning_rate": 2.6118344458998745e-05, - "loss": 0.0112, + "loss": 0.0077, "step": 810 }, { "epoch": 0.26, "learning_rate": 2.6107902219836346e-05, - "loss": 0.0064, + "loss": 0.0056, "step": 811 }, { "epoch": 0.26, "learning_rate": 2.6097448047392544e-05, - "loss": 0.0074, + "loss": 0.0047, "step": 812 }, { "epoch": 0.26, "learning_rate": 2.6086981952898324e-05, - "loss": 0.0128, + "loss": 0.013, "step": 813 }, { "epoch": 0.26, "learning_rate": 2.6076503947597457e-05, - "loss": 0.0169, + "loss": 0.0136, "step": 814 }, { "epoch": 0.26, "learning_rate": 2.6066014042746533e-05, - "loss": 0.0092, + "loss": 0.0085, "step": 815 }, { "epoch": 0.26, "learning_rate": 2.6055512249614915e-05, - "loss": 0.0197, + "loss": 0.014, "step": 816 }, { "epoch": 0.26, "learning_rate": 2.604499857948473e-05, - "loss": 0.0095, + "loss": 0.0083, "step": 817 }, { "epoch": 0.26, "learning_rate": 2.603447304365088e-05, - "loss": 0.0069, + "loss": 0.0075, "step": 818 }, { "epoch": 0.26, "learning_rate": 2.6023935653421e-05, - "loss": 0.024, + "loss": 0.0122, "step": 819 }, { "epoch": 0.26, "learning_rate": 2.6013386420115476e-05, - "loss": 0.0122, + "loss": 0.0113, "step": 820 }, { "epoch": 0.26, "learning_rate": 2.6002825355067402e-05, - "loss": 0.0146, + "loss": 0.019, "step": 821 }, { "epoch": 0.26, "learning_rate": 2.599225246962259e-05, - "loss": 0.0051, + "loss": 0.02, "step": 822 }, { "epoch": 0.26, "learning_rate": 2.5981667775139554e-05, - "loss": 0.0094, + "loss": 0.0089, "step": 823 }, { "epoch": 0.26, "learning_rate": 2.597107128298949e-05, - "loss": 0.0091, + "loss": 0.0125, "step": 824 }, { "epoch": 0.26, "learning_rate": 2.5960463004556257e-05, - "loss": 0.0146, + "loss": 0.0123, "step": 825 }, { "epoch": 0.26, "learning_rate": 2.5949842951236402e-05, - "loss": 0.0056, + "loss": 0.0058, "step": 826 }, { "epoch": 0.26, "learning_rate": 2.5939211134439105e-05, - "loss": 0.016, + "loss": 0.0192, "step": 827 }, { "epoch": 0.26, "learning_rate": 2.592856756558618e-05, - "loss": 0.0095, + "loss": 0.0081, "step": 828 }, { "epoch": 0.26, "learning_rate": 2.5917912256112074e-05, - "loss": 0.0166, + "loss": 0.0178, "step": 829 }, { "epoch": 0.27, "learning_rate": 2.5907245217463852e-05, - "loss": 0.008, + "loss": 0.0117, "step": 830 }, { "epoch": 0.27, "learning_rate": 2.5896566461101168e-05, - "loss": 0.0271, + "loss": 0.02, "step": 831 }, { "epoch": 0.27, "learning_rate": 2.588587599849627e-05, - "loss": 0.0119, + "loss": 0.0141, "step": 832 }, { "epoch": 0.27, "learning_rate": 2.5875173841133985e-05, - "loss": 0.0107, + "loss": 0.0085, "step": 833 }, { "epoch": 0.27, "learning_rate": 2.5864460000511693e-05, - "loss": 0.0059, + "loss": 0.0067, "step": 834 }, { "epoch": 0.27, "learning_rate": 2.5853734488139337e-05, - "loss": 0.0123, + "loss": 0.0174, "step": 835 }, { "epoch": 0.27, "learning_rate": 2.5842997315539395e-05, - "loss": 0.0084, + "loss": 0.0068, "step": 836 }, { "epoch": 0.27, "learning_rate": 2.5832248494246873e-05, - "loss": 0.0148, + "loss": 0.0196, "step": 837 }, { "epoch": 0.27, "learning_rate": 2.582148803580929e-05, - "loss": 0.0093, + "loss": 0.0106, "step": 838 }, { "epoch": 0.27, "learning_rate": 2.5810715951786665e-05, - "loss": 0.0171, + "loss": 0.0115, "step": 839 }, { "epoch": 0.27, "learning_rate": 2.5799932253751506e-05, - "loss": 0.0124, + "loss": 0.0114, "step": 840 }, { "epoch": 0.27, "learning_rate": 2.57891369532888e-05, - "loss": 0.0095, + "loss": 0.0087, "step": 841 }, { "epoch": 0.27, "learning_rate": 2.5778330061996007e-05, - "loss": 0.02, + "loss": 0.0133, "step": 842 }, { "epoch": 0.27, "learning_rate": 2.5767511591483016e-05, - "loss": 0.0099, + "loss": 0.0071, "step": 843 }, { "epoch": 0.27, "learning_rate": 2.575668155337219e-05, - "loss": 0.0141, + "loss": 0.0122, "step": 844 }, { "epoch": 0.27, "learning_rate": 2.5745839959298277e-05, - "loss": 0.0098, + "loss": 0.0043, "step": 845 }, { "epoch": 0.27, "learning_rate": 2.5734986820908478e-05, - "loss": 0.0101, + "loss": 0.0091, "step": 846 }, { "epoch": 0.27, "learning_rate": 2.572412214986238e-05, - "loss": 0.0094, + "loss": 0.0125, "step": 847 }, { "epoch": 0.27, "learning_rate": 2.5713245957831948e-05, - "loss": 0.0082, + "loss": 0.0128, "step": 848 }, { "epoch": 0.27, "learning_rate": 2.5702358256501544e-05, - "loss": 0.0124, + "loss": 0.0117, "step": 849 }, { "epoch": 0.27, "learning_rate": 2.5691459057567887e-05, - "loss": 0.0066, + "loss": 0.0059, "step": 850 }, { "epoch": 0.27, "learning_rate": 2.5680548372740047e-05, - "loss": 0.0093, + "loss": 0.013, "step": 851 }, { "epoch": 0.27, "learning_rate": 2.5669626213739427e-05, - "loss": 0.0083, + "loss": 0.006, "step": 852 }, { "epoch": 0.27, "learning_rate": 2.5658692592299772e-05, - "loss": 0.0069, + "loss": 0.0074, "step": 853 }, { "epoch": 0.27, "learning_rate": 2.5647747520167135e-05, - "loss": 0.0062, + "loss": 0.0082, "step": 854 }, { "epoch": 0.27, "learning_rate": 2.5636791009099855e-05, - "loss": 0.0108, + "loss": 0.0111, "step": 855 }, { "epoch": 0.27, "learning_rate": 2.562582307086858e-05, - "loss": 0.0074, + "loss": 0.0065, "step": 856 }, { "epoch": 0.27, "learning_rate": 2.5614843717256233e-05, - "loss": 0.0104, + "loss": 0.0101, "step": 857 }, { "epoch": 0.27, "learning_rate": 2.5603852960057984e-05, - "loss": 0.0055, + "loss": 0.0074, "step": 858 }, { "epoch": 0.27, "learning_rate": 2.5592850811081274e-05, - "loss": 0.0068, + "loss": 0.0081, "step": 859 }, { "epoch": 0.27, "learning_rate": 2.558183728214577e-05, - "loss": 0.0104, + "loss": 0.0099, "step": 860 }, { "epoch": 0.27, "learning_rate": 2.5570812385083365e-05, - "loss": 0.0094, + "loss": 0.0063, "step": 861 }, { "epoch": 0.28, "learning_rate": 2.5559776131738167e-05, - "loss": 0.0078, + "loss": 0.012, "step": 862 }, { "epoch": 0.28, "learning_rate": 2.5548728533966495e-05, - "loss": 0.0084, + "loss": 0.0088, "step": 863 }, { "epoch": 0.28, "learning_rate": 2.5537669603636834e-05, - "loss": 0.0118, + "loss": 0.0127, "step": 864 }, { "epoch": 0.28, "learning_rate": 2.552659935262986e-05, - "loss": 0.0138, + "loss": 0.0155, "step": 865 }, { "epoch": 0.28, "learning_rate": 2.5515517792838397e-05, - "loss": 0.0069, + "loss": 0.0049, "step": 866 }, { "epoch": 0.28, "learning_rate": 2.550442493616744e-05, - "loss": 0.0195, + "loss": 0.019, "step": 867 }, { "epoch": 0.28, "learning_rate": 2.5493320794534096e-05, - "loss": 0.0149, + "loss": 0.0192, "step": 868 }, { "epoch": 0.28, "learning_rate": 2.548220537986761e-05, - "loss": 0.0181, + "loss": 0.0164, "step": 869 }, { "epoch": 0.28, "learning_rate": 2.5471078704109335e-05, - "loss": 0.0063, + "loss": 0.0054, "step": 870 }, { "epoch": 0.28, "learning_rate": 2.5459940779212726e-05, - "loss": 0.0112, + "loss": 0.0108, "step": 871 }, { "epoch": 0.28, "learning_rate": 2.5448791617143304e-05, - "loss": 0.0156, + "loss": 0.0184, "step": 872 }, { "epoch": 0.28, "learning_rate": 2.543763122987869e-05, - "loss": 0.0118, + "loss": 0.0158, "step": 873 }, { "epoch": 0.28, "learning_rate": 2.5426459629408546e-05, - "loss": 0.0059, + "loss": 0.0067, "step": 874 }, { "epoch": 0.28, "learning_rate": 2.5415276827734577e-05, - "loss": 0.0126, + "loss": 0.0113, "step": 875 }, { "epoch": 0.28, "learning_rate": 2.540408283687054e-05, - "loss": 0.0057, + "loss": 0.0051, "step": 876 }, { "epoch": 0.28, "learning_rate": 2.53928776688422e-05, - "loss": 0.0091, + "loss": 0.0104, "step": 877 }, { "epoch": 0.28, "learning_rate": 2.5381661335687324e-05, - "loss": 0.0097, + "loss": 0.0078, "step": 878 }, { "epoch": 0.28, "learning_rate": 2.5370433849455686e-05, - "loss": 0.0085, + "loss": 0.0067, "step": 879 }, { "epoch": 0.28, "learning_rate": 2.5359195222209044e-05, - "loss": 0.0076, + "loss": 0.0057, "step": 880 }, { "epoch": 0.28, "learning_rate": 2.5347945466021107e-05, - "loss": 0.0106, + "loss": 0.015, "step": 881 }, { "epoch": 0.28, "learning_rate": 2.5336684592977558e-05, - "loss": 0.0109, + "loss": 0.0064, "step": 882 }, { "epoch": 0.28, "learning_rate": 2.5325412615176017e-05, - "loss": 0.0077, + "loss": 0.0087, "step": 883 }, { "epoch": 0.28, "learning_rate": 2.531412954472602e-05, - "loss": 0.0026, + "loss": 0.0046, "step": 884 }, { "epoch": 0.28, "learning_rate": 2.530283539374905e-05, - "loss": 0.0084, + "loss": 0.0098, "step": 885 }, { "epoch": 0.28, "learning_rate": 2.5291530174378465e-05, - "loss": 0.0138, + "loss": 0.0149, "step": 886 }, { "epoch": 0.28, "learning_rate": 2.528021389875953e-05, - "loss": 0.0139, + "loss": 0.0156, "step": 887 }, { "epoch": 0.28, "learning_rate": 2.5268886579049378e-05, - "loss": 0.0119, + "loss": 0.0161, "step": 888 }, { "epoch": 0.28, "learning_rate": 2.525754822741702e-05, - "loss": 0.0097, + "loss": 0.0116, "step": 889 }, { "epoch": 0.28, "learning_rate": 2.5246198856043304e-05, - "loss": 0.0235, + "loss": 0.0327, "step": 890 }, { "epoch": 0.28, "learning_rate": 2.5234838477120928e-05, - "loss": 0.0081, + "loss": 0.0085, "step": 891 }, { "epoch": 0.28, "learning_rate": 2.5223467102854404e-05, - "loss": 0.0078, + "loss": 0.0074, "step": 892 }, { "epoch": 0.29, "learning_rate": 2.5212084745460068e-05, - "loss": 0.0117, + "loss": 0.0108, "step": 893 }, { "epoch": 0.29, "learning_rate": 2.520069141716605e-05, - "loss": 0.0058, + "loss": 0.0068, "step": 894 }, { "epoch": 0.29, "learning_rate": 2.5189287130212265e-05, - "loss": 0.018, + "loss": 0.0151, "step": 895 }, { "epoch": 0.29, "learning_rate": 2.51778718968504e-05, - "loss": 0.0094, + "loss": 0.0138, "step": 896 }, { "epoch": 0.29, "learning_rate": 2.516644572934391e-05, - "loss": 0.0103, + "loss": 0.0102, "step": 897 }, { "epoch": 0.29, "learning_rate": 2.5155008639967982e-05, - "loss": 0.0097, + "loss": 0.0141, "step": 898 }, { "epoch": 0.29, "learning_rate": 2.5143560641009552e-05, - "loss": 0.0179, + "loss": 0.0217, "step": 899 }, { "epoch": 0.29, "learning_rate": 2.5132101744767267e-05, - "loss": 0.0124, + "loss": 0.014, "step": 900 }, { "epoch": 0.29, "learning_rate": 2.512063196355148e-05, - "loss": 0.0096, + "loss": 0.0116, "step": 901 }, { "epoch": 0.29, "learning_rate": 2.5109151309684247e-05, - "loss": 0.0148, + "loss": 0.0111, "step": 902 }, { "epoch": 0.29, "learning_rate": 2.5097659795499297e-05, - "loss": 0.0104, + "loss": 0.0088, "step": 903 }, { "epoch": 0.29, "learning_rate": 2.5086157433342028e-05, - "loss": 0.011, + "loss": 0.0085, "step": 904 }, { "epoch": 0.29, "learning_rate": 2.5074644235569487e-05, - "loss": 0.0054, + "loss": 0.0075, "step": 905 }, { "epoch": 0.29, "learning_rate": 2.506312021455037e-05, - "loss": 0.0114, + "loss": 0.0103, "step": 906 }, { "epoch": 0.29, "learning_rate": 2.5051585382665e-05, - "loss": 0.0113, + "loss": 0.0104, "step": 907 }, { "epoch": 0.29, "learning_rate": 2.504003975230531e-05, - "loss": 0.01, + "loss": 0.0101, "step": 908 }, { "epoch": 0.29, "learning_rate": 2.5028483335874828e-05, - "loss": 0.0077, + "loss": 0.0117, "step": 909 }, { "epoch": 0.29, "learning_rate": 2.501691614578869e-05, - "loss": 0.0062, + "loss": 0.0044, "step": 910 }, { "epoch": 0.29, "learning_rate": 2.5005338194473577e-05, - "loss": 0.0142, + "loss": 0.0126, "step": 911 }, { "epoch": 0.29, "learning_rate": 2.4993749494367762e-05, - "loss": 0.0034, + "loss": 0.0059, "step": 912 }, { "epoch": 0.29, "learning_rate": 2.4982150057921044e-05, - "loss": 0.0136, + "loss": 0.0138, "step": 913 }, { "epoch": 0.29, "learning_rate": 2.4970539897594757e-05, - "loss": 0.0156, + "loss": 0.0081, "step": 914 }, { "epoch": 0.29, "learning_rate": 2.4958919025861764e-05, - "loss": 0.0041, + "loss": 0.0087, "step": 915 }, { "epoch": 0.29, "learning_rate": 2.494728745520644e-05, - "loss": 0.0165, + "loss": 0.0134, "step": 916 }, { "epoch": 0.29, "learning_rate": 2.493564519812464e-05, - "loss": 0.0151, + "loss": 0.0083, "step": 917 }, { "epoch": 0.29, "learning_rate": 2.4923992267123707e-05, - "loss": 0.0084, + "loss": 0.0101, "step": 918 }, { "epoch": 0.29, "learning_rate": 2.4912328674722446e-05, - "loss": 0.0133, + "loss": 0.0148, "step": 919 }, { "epoch": 0.29, "learning_rate": 2.4900654433451128e-05, - "loss": 0.0081, + "loss": 0.0088, "step": 920 }, { "epoch": 0.29, "learning_rate": 2.4888969555851447e-05, - "loss": 0.011, + "loss": 0.0097, "step": 921 }, { "epoch": 0.29, "learning_rate": 2.4877274054476536e-05, - "loss": 0.0067, + "loss": 0.0071, "step": 922 }, { "epoch": 0.29, "learning_rate": 2.4865567941890938e-05, - "loss": 0.0117, + "loss": 0.0148, "step": 923 }, { "epoch": 0.3, "learning_rate": 2.4853851230670594e-05, - "loss": 0.0122, + "loss": 0.016, "step": 924 }, { "epoch": 0.3, "learning_rate": 2.484212393340283e-05, - "loss": 0.0176, + "loss": 0.0137, "step": 925 }, { "epoch": 0.3, "learning_rate": 2.4830386062686358e-05, - "loss": 0.0095, + "loss": 0.01, "step": 926 }, { "epoch": 0.3, "learning_rate": 2.481863763113122e-05, - "loss": 0.0097, + "loss": 0.017, "step": 927 }, { "epoch": 0.3, "learning_rate": 2.4806878651358834e-05, - "loss": 0.0158, + "loss": 0.0147, "step": 928 }, { "epoch": 0.3, "learning_rate": 2.4795109136001938e-05, - "loss": 0.0096, + "loss": 0.0087, "step": 929 }, { "epoch": 0.3, "learning_rate": 2.4783329097704583e-05, - "loss": 0.0078, + "loss": 0.0092, "step": 930 }, { "epoch": 0.3, "learning_rate": 2.477153854912213e-05, - "loss": 0.0181, + "loss": 0.0134, "step": 931 }, { "epoch": 0.3, "learning_rate": 2.475973750292124e-05, - "loss": 0.007, + "loss": 0.0077, "step": 932 }, { "epoch": 0.3, "learning_rate": 2.474792597177983e-05, - "loss": 0.0054, + "loss": 0.0056, "step": 933 }, { "epoch": 0.3, "learning_rate": 2.47361039683871e-05, - "loss": 0.0084, + "loss": 0.0063, "step": 934 }, { "epoch": 0.3, "learning_rate": 2.4724271505443496e-05, - "loss": 0.0121, + "loss": 0.0157, "step": 935 }, { "epoch": 0.3, "learning_rate": 2.4712428595660698e-05, - "loss": 0.007, + "loss": 0.0061, "step": 936 }, { "epoch": 0.3, "learning_rate": 2.470057525176161e-05, - "loss": 0.0105, + "loss": 0.009, "step": 937 }, { "epoch": 0.3, "learning_rate": 2.4688711486480342e-05, - "loss": 0.0074, + "loss": 0.0088, "step": 938 }, { "epoch": 0.3, "learning_rate": 2.4676837312562207e-05, - "loss": 0.0065, + "loss": 0.0067, "step": 939 }, { "epoch": 0.3, "learning_rate": 2.4664952742763697e-05, - "loss": 0.0107, + "loss": 0.0116, "step": 940 }, { "epoch": 0.3, "learning_rate": 2.4653057789852465e-05, - "loss": 0.008, + "loss": 0.0077, "step": 941 }, { "epoch": 0.3, "learning_rate": 2.4641152466607336e-05, - "loss": 0.0088, + "loss": 0.0064, "step": 942 }, { "epoch": 0.3, "learning_rate": 2.4629236785818262e-05, - "loss": 0.0056, + "loss": 0.0087, "step": 943 }, { "epoch": 0.3, "learning_rate": 2.461731076028632e-05, - "loss": 0.0286, + "loss": 0.02, "step": 944 }, { "epoch": 0.3, "learning_rate": 2.460537440282371e-05, - "loss": 0.014, + "loss": 0.0117, "step": 945 }, { "epoch": 0.3, "learning_rate": 2.4593427726253724e-05, - "loss": 0.0073, + "loss": 0.007, "step": 946 }, { "epoch": 0.3, "learning_rate": 2.4581470743410755e-05, - "loss": 0.0122, + "loss": 0.0061, "step": 947 }, { "epoch": 0.3, "learning_rate": 2.4569503467140236e-05, - "loss": 0.0138, + "loss": 0.0089, "step": 948 }, { "epoch": 0.3, "learning_rate": 2.4557525910298697e-05, - "loss": 0.0117, + "loss": 0.0142, "step": 949 }, { "epoch": 0.3, "learning_rate": 2.4545538085753685e-05, - "loss": 0.0132, + "loss": 0.0108, "step": 950 }, { "epoch": 0.3, "learning_rate": 2.4533540006383793e-05, - "loss": 0.0091, + "loss": 0.0071, "step": 951 }, { "epoch": 0.3, "learning_rate": 2.4521531685078622e-05, - "loss": 0.008, + "loss": 0.0072, "step": 952 }, { "epoch": 0.3, "learning_rate": 2.4509513134738773e-05, - "loss": 0.0138, + "loss": 0.0153, "step": 953 }, { "epoch": 0.3, "learning_rate": 2.4497484368275844e-05, - "loss": 0.0053, + "loss": 0.0045, "step": 954 }, { "epoch": 0.31, "learning_rate": 2.4485445398612415e-05, - "loss": 0.0135, + "loss": 0.0107, "step": 955 }, { "epoch": 0.31, "learning_rate": 2.447339623868201e-05, - "loss": 0.0233, + "loss": 0.0109, "step": 956 }, { "epoch": 0.31, "learning_rate": 2.4461336901429106e-05, - "loss": 0.0195, + "loss": 0.0136, "step": 957 }, { "epoch": 0.31, "learning_rate": 2.444926739980913e-05, - "loss": 0.0142, + "loss": 0.0115, "step": 958 }, { "epoch": 0.31, "learning_rate": 2.4437187746788395e-05, - "loss": 0.0067, + "loss": 0.0083, "step": 959 }, { "epoch": 0.31, "learning_rate": 2.4425097955344158e-05, - "loss": 0.0093, + "loss": 0.0111, "step": 960 }, { "epoch": 0.31, "learning_rate": 2.441299803846454e-05, - "loss": 0.0095, + "loss": 0.0088, "step": 961 }, { "epoch": 0.31, "learning_rate": 2.440088800914855e-05, - "loss": 0.0138, + "loss": 0.0131, "step": 962 }, { "epoch": 0.31, "learning_rate": 2.4388767880406068e-05, - "loss": 0.0103, + "loss": 0.0098, "step": 963 }, { "epoch": 0.31, "learning_rate": 2.4376637665257804e-05, - "loss": 0.0099, + "loss": 0.0068, "step": 964 }, { "epoch": 0.31, "learning_rate": 2.4364497376735324e-05, - "loss": 0.0066, + "loss": 0.0085, "step": 965 }, { "epoch": 0.31, "learning_rate": 2.4352347027881003e-05, - "loss": 0.0099, + "loss": 0.0069, "step": 966 }, { "epoch": 0.31, "learning_rate": 2.4340186631748034e-05, - "loss": 0.0088, + "loss": 0.0099, "step": 967 }, { "epoch": 0.31, "learning_rate": 2.4328016201400395e-05, - "loss": 0.008, + "loss": 0.0056, "step": 968 }, { "epoch": 0.31, "learning_rate": 2.431583574991284e-05, - "loss": 0.0095, + "loss": 0.008, "step": 969 }, { "epoch": 0.31, "learning_rate": 2.4303645290370913e-05, - "loss": 0.0062, + "loss": 0.0067, "step": 970 }, { "epoch": 0.31, "learning_rate": 2.4291444835870882e-05, - "loss": 0.0118, + "loss": 0.0156, "step": 971 }, { "epoch": 0.31, "learning_rate": 2.4279234399519765e-05, - "loss": 0.0144, + "loss": 0.0145, "step": 972 }, { "epoch": 0.31, "learning_rate": 2.42670139944353e-05, - "loss": 0.006, + "loss": 0.0063, "step": 973 }, { "epoch": 0.31, "learning_rate": 2.4254783633745942e-05, - "loss": 0.0071, + "loss": 0.0085, "step": 974 }, { "epoch": 0.31, "learning_rate": 2.4242543330590828e-05, - "loss": 0.0103, + "loss": 0.0084, "step": 975 }, { "epoch": 0.31, "learning_rate": 2.4230293098119787e-05, - "loss": 0.0132, + "loss": 0.0094, "step": 976 }, { "epoch": 0.31, "learning_rate": 2.4218032949493317e-05, - "loss": 0.0079, + "loss": 0.0065, "step": 977 }, { "epoch": 0.31, "learning_rate": 2.420576289788256e-05, - "loss": 0.0101, + "loss": 0.0114, "step": 978 }, { "epoch": 0.31, "learning_rate": 2.4193482956469305e-05, - "loss": 0.0101, + "loss": 0.0076, "step": 979 }, { "epoch": 0.31, "learning_rate": 2.4181193138445952e-05, - "loss": 0.0075, + "loss": 0.0058, "step": 980 }, { "epoch": 0.31, "learning_rate": 2.4168893457015528e-05, - "loss": 0.0084, + "loss": 0.0076, "step": 981 }, { "epoch": 0.31, "learning_rate": 2.4156583925391652e-05, - "loss": 0.0103, + "loss": 0.0075, "step": 982 }, { "epoch": 0.31, "learning_rate": 2.414426455679852e-05, - "loss": 0.0051, + "loss": 0.0059, "step": 983 }, { "epoch": 0.31, "learning_rate": 2.4131935364470895e-05, - "loss": 0.004, + "loss": 0.0131, "step": 984 }, { "epoch": 0.31, "learning_rate": 2.41195963616541e-05, - "loss": 0.0059, + "loss": 0.0065, "step": 985 }, { @@ -5943,229 +5943,229 @@ { "epoch": 0.32, "learning_rate": 2.4094888977586968e-05, - "loss": 0.0137, + "loss": 0.0156, "step": 987 }, { "epoch": 0.32, "learning_rate": 2.408252062287991e-05, - "loss": 0.0129, + "loss": 0.0132, "step": 988 }, { "epoch": 0.32, "learning_rate": 2.4070142510770212e-05, - "loss": 0.0113, + "loss": 0.0118, "step": 989 }, { "epoch": 0.32, "learning_rate": 2.4057754654555755e-05, - "loss": 0.0086, + "loss": 0.0083, "step": 990 }, { "epoch": 0.32, "learning_rate": 2.4045357067544875e-05, - "loss": 0.0207, + "loss": 0.0143, "step": 991 }, { "epoch": 0.32, "learning_rate": 2.403294976305638e-05, - "loss": 0.0097, + "loss": 0.0119, "step": 992 }, { "epoch": 0.32, "learning_rate": 2.4020532754419492e-05, - "loss": 0.0145, + "loss": 0.0116, "step": 993 }, { "epoch": 0.32, "learning_rate": 2.4008106054973883e-05, - "loss": 0.0158, + "loss": 0.0104, "step": 994 }, { "epoch": 0.32, "learning_rate": 2.3995669678069627e-05, - "loss": 0.0132, + "loss": 0.0107, "step": 995 }, { "epoch": 0.32, "learning_rate": 2.398322363706719e-05, - "loss": 0.0125, + "loss": 0.0098, "step": 996 }, { "epoch": 0.32, "learning_rate": 2.3970767945337417e-05, - "loss": 0.0088, + "loss": 0.0084, "step": 997 }, { "epoch": 0.32, "learning_rate": 2.395830261626153e-05, - "loss": 0.0062, + "loss": 0.0068, "step": 998 }, { "epoch": 0.32, "learning_rate": 2.394582766323111e-05, - "loss": 0.0109, + "loss": 0.0141, "step": 999 }, { "epoch": 0.32, "learning_rate": 2.393334309964806e-05, - "loss": 0.0107, + "loss": 0.0106, "step": 1000 }, { "epoch": 0.32, "learning_rate": 2.3920848938924625e-05, - "loss": 0.0155, + "loss": 0.0091, "step": 1001 }, { "epoch": 0.32, "learning_rate": 2.3908345194483345e-05, - "loss": 0.0096, + "loss": 0.0126, "step": 1002 }, { "epoch": 0.32, "learning_rate": 2.389583187975706e-05, - "loss": 0.013, + "loss": 0.0089, "step": 1003 }, { "epoch": 0.32, "learning_rate": 2.3883309008188898e-05, - "loss": 0.0086, + "loss": 0.0094, "step": 1004 }, { "epoch": 0.32, "learning_rate": 2.387077659323225e-05, - "loss": 0.006, + "loss": 0.0077, "step": 1005 }, { "epoch": 0.32, "learning_rate": 2.385823464835076e-05, - "loss": 0.0093, + "loss": 0.0054, "step": 1006 }, { "epoch": 0.32, "learning_rate": 2.3845683187018303e-05, - "loss": 0.0181, + "loss": 0.0102, "step": 1007 }, { "epoch": 0.32, "learning_rate": 2.3833122222718993e-05, - "loss": 0.0124, + "loss": 0.0128, "step": 1008 }, { "epoch": 0.32, "learning_rate": 2.382055176894714e-05, - "loss": 0.0068, + "loss": 0.0063, "step": 1009 }, { "epoch": 0.32, "learning_rate": 2.3807971839207258e-05, - "loss": 0.0294, + "loss": 0.0367, "step": 1010 }, { "epoch": 0.32, "learning_rate": 2.3795382447014025e-05, - "loss": 0.0168, + "loss": 0.0173, "step": 1011 }, { "epoch": 0.32, "learning_rate": 2.37827836058923e-05, - "loss": 0.0084, + "loss": 0.0142, "step": 1012 }, { "epoch": 0.32, "learning_rate": 2.377017532937709e-05, - "loss": 0.0126, + "loss": 0.0095, "step": 1013 }, { "epoch": 0.32, "learning_rate": 2.375755763101354e-05, - "loss": 0.0111, + "loss": 0.0167, "step": 1014 }, { "epoch": 0.32, "learning_rate": 2.374493052435691e-05, - "loss": 0.0065, + "loss": 0.0072, "step": 1015 }, { "epoch": 0.32, "learning_rate": 2.373229402297257e-05, - "loss": 0.0071, + "loss": 0.0048, "step": 1016 }, { "epoch": 0.32, "learning_rate": 2.3719648140435986e-05, - "loss": 0.0101, + "loss": 0.0074, "step": 1017 }, { "epoch": 0.33, "learning_rate": 2.37069928903327e-05, - "loss": 0.0132, + "loss": 0.012, "step": 1018 }, { "epoch": 0.33, "learning_rate": 2.369432828625832e-05, - "loss": 0.0087, + "loss": 0.0079, "step": 1019 }, { "epoch": 0.33, "learning_rate": 2.3681654341818494e-05, - "loss": 0.0291, + "loss": 0.0124, "step": 1020 }, { "epoch": 0.33, "learning_rate": 2.366897107062892e-05, - "loss": 0.0081, + "loss": 0.0115, "step": 1021 }, { "epoch": 0.33, "learning_rate": 2.3656278486315302e-05, - "loss": 0.0084, + "loss": 0.0111, "step": 1022 }, { "epoch": 0.33, "learning_rate": 2.364357660251335e-05, - "loss": 0.0096, + "loss": 0.0073, "step": 1023 }, { "epoch": 0.33, "learning_rate": 2.3630865432868776e-05, - "loss": 0.0312, + "loss": 0.025, "step": 1024 }, { @@ -6177,19 +6177,19 @@ { "epoch": 0.33, "learning_rate": 2.3605415290684442e-05, - "loss": 0.0098, + "loss": 0.0109, "step": 1026 }, { "epoch": 0.33, "learning_rate": 2.359267634548591e-05, - "loss": 0.0066, + "loss": 0.0059, "step": 1027 }, { "epoch": 0.33, "learning_rate": 2.3579928169127188e-05, - "loss": 0.0106, + "loss": 0.0079, "step": 1028 }, { @@ -6201,151 +6201,151 @@ { "epoch": 0.33, "learning_rate": 2.3554404177720808e-05, - "loss": 0.006, + "loss": 0.0068, "step": 1030 }, { "epoch": 0.33, "learning_rate": 2.354162839009372e-05, - "loss": 0.0097, + "loss": 0.0064, "step": 1031 }, { "epoch": 0.33, "learning_rate": 2.352884342614754e-05, - "loss": 0.0116, + "loss": 0.0115, "step": 1032 }, { "epoch": 0.33, "learning_rate": 2.351604929961723e-05, - "loss": 0.0111, + "loss": 0.0105, "step": 1033 }, { "epoch": 0.33, "learning_rate": 2.3503246024247583e-05, - "loss": 0.0087, + "loss": 0.0088, "step": 1034 }, { "epoch": 0.33, "learning_rate": 2.3490433613793245e-05, - "loss": 0.0116, + "loss": 0.0108, "step": 1035 }, { "epoch": 0.33, "learning_rate": 2.347761208201865e-05, - "loss": 0.0088, + "loss": 0.0127, "step": 1036 }, { "epoch": 0.33, "learning_rate": 2.3464781442698037e-05, - "loss": 0.008, + "loss": 0.0067, "step": 1037 }, { "epoch": 0.33, "learning_rate": 2.3451941709615442e-05, - "loss": 0.0088, + "loss": 0.0092, "step": 1038 }, { "epoch": 0.33, "learning_rate": 2.3439092896564658e-05, - "loss": 0.0114, + "loss": 0.01, "step": 1039 }, { "epoch": 0.33, "learning_rate": 2.342623501734924e-05, - "loss": 0.0124, + "loss": 0.0125, "step": 1040 }, { "epoch": 0.33, "learning_rate": 2.3413368085782473e-05, - "loss": 0.0079, + "loss": 0.0053, "step": 1041 }, { "epoch": 0.33, "learning_rate": 2.340049211568738e-05, - "loss": 0.0094, + "loss": 0.0087, "step": 1042 }, { "epoch": 0.33, "learning_rate": 2.3387607120896683e-05, - "loss": 0.0094, + "loss": 0.0101, "step": 1043 }, { "epoch": 0.33, "learning_rate": 2.3374713115252806e-05, - "loss": 0.0192, + "loss": 0.0203, "step": 1044 }, { "epoch": 0.33, "learning_rate": 2.336181011260785e-05, - "loss": 0.0127, + "loss": 0.0126, "step": 1045 }, { "epoch": 0.33, "learning_rate": 2.3348898126823584e-05, - "loss": 0.0092, + "loss": 0.0054, "step": 1046 }, { "epoch": 0.33, "learning_rate": 2.3335977171771426e-05, - "loss": 0.0055, + "loss": 0.0076, "step": 1047 }, { "epoch": 0.33, "learning_rate": 2.3323047261332438e-05, - "loss": 0.0201, + "loss": 0.0249, "step": 1048 }, { "epoch": 0.34, "learning_rate": 2.3310108409397276e-05, - "loss": 0.0138, + "loss": 0.0134, "step": 1049 }, { "epoch": 0.34, "learning_rate": 2.329716062986624e-05, - "loss": 0.0151, + "loss": 0.0127, "step": 1050 }, { "epoch": 0.34, "learning_rate": 2.3284203936649194e-05, - "loss": 0.0086, + "loss": 0.0071, "step": 1051 }, { "epoch": 0.34, "learning_rate": 2.3271238343665582e-05, - "loss": 0.0111, + "loss": 0.0084, "step": 1052 }, { "epoch": 0.34, "learning_rate": 2.325826386484442e-05, - "loss": 0.0082, + "loss": 0.0067, "step": 1053 }, { "epoch": 0.34, "learning_rate": 2.324528051412426e-05, - "loss": 0.0051, + "loss": 0.0055, "step": 1054 }, { @@ -6357,109 +6357,109 @@ { "epoch": 0.34, "learning_rate": 2.3219287252788805e-05, - "loss": 0.0101, + "loss": 0.0072, "step": 1056 }, { "epoch": 0.34, "learning_rate": 2.3206277370098218e-05, - "loss": 0.0087, + "loss": 0.0112, "step": 1057 }, { "epoch": 0.34, "learning_rate": 2.3193258671358018e-05, - "loss": 0.0046, + "loss": 0.0053, "step": 1058 }, { "epoch": 0.34, "learning_rate": 2.318023117055426e-05, - "loss": 0.0069, + "loss": 0.0072, "step": 1059 }, { "epoch": 0.34, "learning_rate": 2.3167194881682457e-05, - "loss": 0.0077, + "loss": 0.005, "step": 1060 }, { "epoch": 0.34, "learning_rate": 2.3154149818747583e-05, - "loss": 0.0231, + "loss": 0.0151, "step": 1061 }, { "epoch": 0.34, "learning_rate": 2.314109599576401e-05, - "loss": 0.0128, + "loss": 0.0111, "step": 1062 }, { "epoch": 0.34, "learning_rate": 2.3128033426755534e-05, - "loss": 0.0014, + "loss": 0.0023, "step": 1063 }, { "epoch": 0.34, "learning_rate": 2.3114962125755347e-05, - "loss": 0.0106, + "loss": 0.0115, "step": 1064 }, { "epoch": 0.34, "learning_rate": 2.310188210680602e-05, - "loss": 0.0105, + "loss": 0.0106, "step": 1065 }, { "epoch": 0.34, "learning_rate": 2.3088793383959492e-05, - "loss": 0.0071, + "loss": 0.0067, "step": 1066 }, { "epoch": 0.34, "learning_rate": 2.3075695971277054e-05, - "loss": 0.0091, + "loss": 0.0122, "step": 1067 }, { "epoch": 0.34, "learning_rate": 2.306258988282932e-05, - "loss": 0.0088, + "loss": 0.009, "step": 1068 }, { "epoch": 0.34, "learning_rate": 2.3049475132696244e-05, - "loss": 0.0126, + "loss": 0.0111, "step": 1069 }, { "epoch": 0.34, "learning_rate": 2.303635173496707e-05, - "loss": 0.0046, + "loss": 0.0053, "step": 1070 }, { "epoch": 0.34, "learning_rate": 2.302321970374034e-05, - "loss": 0.0098, + "loss": 0.0087, "step": 1071 }, { "epoch": 0.34, "learning_rate": 2.3010079053123864e-05, - "loss": 0.0123, + "loss": 0.0079, "step": 1072 }, { "epoch": 0.34, "learning_rate": 2.2996929797234724e-05, - "loss": 0.0042, + "loss": 0.0057, "step": 1073 }, { @@ -6471,7 +6471,7 @@ { "epoch": 0.34, "learning_rate": 2.297060552615294e-05, - "loss": 0.012, + "loss": 0.0127, "step": 1075 }, { @@ -6483,79 +6483,79 @@ { "epoch": 0.34, "learning_rate": 2.2944247003616205e-05, - "loss": 0.0133, + "loss": 0.0123, "step": 1077 }, { "epoch": 0.34, "learning_rate": 2.2931054933442867e-05, - "loss": 0.0077, + "loss": 0.0081, "step": 1078 }, { "epoch": 0.34, "learning_rate": 2.291785434289292e-05, - "loss": 0.0139, + "loss": 0.0117, "step": 1079 }, { "epoch": 0.34, "learning_rate": 2.2904645246147823e-05, - "loss": 0.0146, + "loss": 0.0228, "step": 1080 }, { "epoch": 0.35, "learning_rate": 2.2891427657398184e-05, - "loss": 0.0148, + "loss": 0.0156, "step": 1081 }, { "epoch": 0.35, "learning_rate": 2.2878201590843736e-05, - "loss": 0.0102, + "loss": 0.0095, "step": 1082 }, { "epoch": 0.35, "learning_rate": 2.286496706069331e-05, - "loss": 0.0078, + "loss": 0.0066, "step": 1083 }, { "epoch": 0.35, "learning_rate": 2.2851724081164844e-05, - "loss": 0.0068, + "loss": 0.0074, "step": 1084 }, { "epoch": 0.35, "learning_rate": 2.2838472666485342e-05, - "loss": 0.0079, + "loss": 0.0064, "step": 1085 }, { "epoch": 0.35, "learning_rate": 2.282521283089087e-05, - "loss": 0.0045, + "loss": 0.0039, "step": 1086 }, { "epoch": 0.35, "learning_rate": 2.2811944588626537e-05, - "loss": 0.0055, + "loss": 0.0058, "step": 1087 }, { "epoch": 0.35, "learning_rate": 2.279866795394651e-05, - "loss": 0.0069, + "loss": 0.0088, "step": 1088 }, { "epoch": 0.35, "learning_rate": 2.2785382941113928e-05, - "loss": 0.0101, + "loss": 0.0086, "step": 1089 }, { @@ -6567,397 +6567,397 @@ { "epoch": 0.35, "learning_rate": 2.2758787838088785e-05, - "loss": 0.0045, + "loss": 0.0042, "step": 1091 }, { "epoch": 0.35, "learning_rate": 2.2745477776467484e-05, - "loss": 0.0162, + "loss": 0.016, "step": 1092 }, { "epoch": 0.35, "learning_rate": 2.2732159393836145e-05, - "loss": 0.0151, + "loss": 0.0112, "step": 1093 }, { "epoch": 0.35, "learning_rate": 2.2718832704502782e-05, - "loss": 0.0063, + "loss": 0.0119, "step": 1094 }, { "epoch": 0.35, "learning_rate": 2.2705497722784327e-05, - "loss": 0.0084, + "loss": 0.0064, "step": 1095 }, { "epoch": 0.35, "learning_rate": 2.2692154463006627e-05, - "loss": 0.0056, + "loss": 0.004, "step": 1096 }, { "epoch": 0.35, "learning_rate": 2.2678802939504424e-05, - "loss": 0.008, + "loss": 0.0083, "step": 1097 }, { "epoch": 0.35, "learning_rate": 2.266544316662133e-05, - "loss": 0.0138, + "loss": 0.0099, "step": 1098 }, { "epoch": 0.35, "learning_rate": 2.2652075158709828e-05, - "loss": 0.0073, + "loss": 0.0153, "step": 1099 }, { "epoch": 0.35, "learning_rate": 2.263869893013124e-05, - "loss": 0.0057, + "loss": 0.0056, "step": 1100 }, { "epoch": 0.35, "learning_rate": 2.2625314495255726e-05, - "loss": 0.0214, + "loss": 0.0143, "step": 1101 }, { "epoch": 0.35, "learning_rate": 2.2611921868462254e-05, - "loss": 0.0088, + "loss": 0.0126, "step": 1102 }, { "epoch": 0.35, "learning_rate": 2.2598521064138603e-05, - "loss": 0.0069, + "loss": 0.0098, "step": 1103 }, { "epoch": 0.35, "learning_rate": 2.2585112096681328e-05, - "loss": 0.0042, + "loss": 0.0067, "step": 1104 }, { "epoch": 0.35, "learning_rate": 2.257169498049576e-05, - "loss": 0.0087, + "loss": 0.0083, "step": 1105 }, { "epoch": 0.35, "learning_rate": 2.2558269729995985e-05, - "loss": 0.0062, + "loss": 0.0085, "step": 1106 }, { "epoch": 0.35, "learning_rate": 2.2544836359604822e-05, - "loss": 0.0034, + "loss": 0.0088, "step": 1107 }, { "epoch": 0.35, "learning_rate": 2.2531394883753814e-05, - "loss": 0.01, + "loss": 0.0124, "step": 1108 }, { "epoch": 0.35, "learning_rate": 2.251794531688321e-05, - "loss": 0.005, + "loss": 0.0091, "step": 1109 }, { "epoch": 0.35, "learning_rate": 2.2504487673441963e-05, - "loss": 0.0104, + "loss": 0.0085, "step": 1110 }, { "epoch": 0.35, "learning_rate": 2.249102196788769e-05, - "loss": 0.0078, + "loss": 0.0056, "step": 1111 }, { "epoch": 0.36, "learning_rate": 2.2477548214686678e-05, - "loss": 0.0076, + "loss": 0.0068, "step": 1112 }, { "epoch": 0.36, "learning_rate": 2.2464066428313846e-05, - "loss": 0.0069, + "loss": 0.0047, "step": 1113 }, { "epoch": 0.36, "learning_rate": 2.2450576623252757e-05, - "loss": 0.0138, + "loss": 0.0091, "step": 1114 }, { "epoch": 0.36, "learning_rate": 2.243707881399559e-05, - "loss": 0.0048, + "loss": 0.0051, "step": 1115 }, { "epoch": 0.36, "learning_rate": 2.2423573015043115e-05, - "loss": 0.0057, + "loss": 0.006, "step": 1116 }, { "epoch": 0.36, "learning_rate": 2.241005924090468e-05, - "loss": 0.0108, + "loss": 0.0098, "step": 1117 }, { "epoch": 0.36, "learning_rate": 2.239653750609821e-05, - "loss": 0.0112, + "loss": 0.0101, "step": 1118 }, { "epoch": 0.36, "learning_rate": 2.2383007825150188e-05, - "loss": 0.011, + "loss": 0.0073, "step": 1119 }, { "epoch": 0.36, "learning_rate": 2.2369470212595618e-05, - "loss": 0.0075, + "loss": 0.0079, "step": 1120 }, { "epoch": 0.36, "learning_rate": 2.2355924682978037e-05, - "loss": 0.0128, + "loss": 0.0119, "step": 1121 }, { "epoch": 0.36, "learning_rate": 2.234237125084948e-05, - "loss": 0.0084, + "loss": 0.0075, "step": 1122 }, { "epoch": 0.36, "learning_rate": 2.232880993077048e-05, - "loss": 0.013, + "loss": 0.0119, "step": 1123 }, { "epoch": 0.36, "learning_rate": 2.231524073731003e-05, - "loss": 0.0066, + "loss": 0.01, "step": 1124 }, { "epoch": 0.36, "learning_rate": 2.2301663685045596e-05, - "loss": 0.0052, + "loss": 0.0049, "step": 1125 }, { "epoch": 0.36, "learning_rate": 2.228807878856308e-05, - "loss": 0.0066, + "loss": 0.0072, "step": 1126 }, { "epoch": 0.36, "learning_rate": 2.2274486062456815e-05, - "loss": 0.0083, + "loss": 0.0046, "step": 1127 }, { "epoch": 0.36, "learning_rate": 2.2260885521329542e-05, - "loss": 0.011, + "loss": 0.0151, "step": 1128 }, { "epoch": 0.36, "learning_rate": 2.2247277179792392e-05, - "loss": 0.0159, + "loss": 0.0204, "step": 1129 }, { "epoch": 0.36, "learning_rate": 2.2233661052464892e-05, - "loss": 0.012, + "loss": 0.0156, "step": 1130 }, { "epoch": 0.36, "learning_rate": 2.2220037153974915e-05, - "loss": 0.0018, + "loss": 0.0017, "step": 1131 }, { "epoch": 0.36, "learning_rate": 2.2206405498958698e-05, - "loss": 0.0172, + "loss": 0.0152, "step": 1132 }, { "epoch": 0.36, "learning_rate": 2.2192766102060798e-05, - "loss": 0.0062, + "loss": 0.0061, "step": 1133 }, { "epoch": 0.36, "learning_rate": 2.2179118977934098e-05, - "loss": 0.0087, + "loss": 0.0088, "step": 1134 }, { "epoch": 0.36, "learning_rate": 2.216546414123978e-05, - "loss": 0.0113, + "loss": 0.0128, "step": 1135 }, { "epoch": 0.36, "learning_rate": 2.2151801606647314e-05, - "loss": 0.006, + "loss": 0.0043, "step": 1136 }, { "epoch": 0.36, "learning_rate": 2.2138131388834434e-05, - "loss": 0.0135, + "loss": 0.0116, "step": 1137 }, { "epoch": 0.36, "learning_rate": 2.2124453502487134e-05, - "loss": 0.0124, + "loss": 0.0097, "step": 1138 }, { "epoch": 0.36, "learning_rate": 2.2110767962299635e-05, - "loss": 0.0126, + "loss": 0.0122, "step": 1139 }, { "epoch": 0.36, "learning_rate": 2.2097074782974394e-05, - "loss": 0.0099, + "loss": 0.0085, "step": 1140 }, { "epoch": 0.36, "learning_rate": 2.2083373979222077e-05, - "loss": 0.0057, + "loss": 0.0071, "step": 1141 }, { "epoch": 0.36, "learning_rate": 2.2069665565761533e-05, - "loss": 0.0109, + "loss": 0.0186, "step": 1142 }, { "epoch": 0.37, "learning_rate": 2.2055949557319775e-05, - "loss": 0.0118, + "loss": 0.0122, "step": 1143 }, { "epoch": 0.37, "learning_rate": 2.2042225968631997e-05, - "loss": 0.0124, + "loss": 0.0081, "step": 1144 }, { "epoch": 0.37, "learning_rate": 2.202849481444153e-05, - "loss": 0.0075, + "loss": 0.007, "step": 1145 }, { "epoch": 0.37, "learning_rate": 2.2014756109499823e-05, - "loss": 0.0079, + "loss": 0.0062, "step": 1146 }, { "epoch": 0.37, "learning_rate": 2.2001009868566442e-05, - "loss": 0.0051, + "loss": 0.0065, "step": 1147 }, { "epoch": 0.37, "learning_rate": 2.1987256106409058e-05, - "loss": 0.006, + "loss": 0.0055, "step": 1148 }, { "epoch": 0.37, "learning_rate": 2.1973494837803408e-05, - "loss": 0.0119, + "loss": 0.0161, "step": 1149 }, { "epoch": 0.37, "learning_rate": 2.1959726077533307e-05, - "loss": 0.0053, + "loss": 0.0101, "step": 1150 }, { "epoch": 0.37, "learning_rate": 2.1945949840390603e-05, - "loss": 0.0064, + "loss": 0.008, "step": 1151 }, { "epoch": 0.37, "learning_rate": 2.1932166141175188e-05, - "loss": 0.0115, + "loss": 0.0105, "step": 1152 }, { "epoch": 0.37, "learning_rate": 2.1918374994694963e-05, - "loss": 0.0092, + "loss": 0.0077, "step": 1153 }, { "epoch": 0.37, "learning_rate": 2.1904576415765846e-05, - "loss": 0.0093, + "loss": 0.0098, "step": 1154 }, { "epoch": 0.37, "learning_rate": 2.1890770419211712e-05, - "loss": 0.0112, + "loss": 0.0096, "step": 1155 }, { "epoch": 0.37, "learning_rate": 2.187695701986443e-05, - "loss": 0.0081, + "loss": 0.012, "step": 1156 }, { @@ -6969,121 +6969,121 @@ { "epoch": 0.37, "learning_rate": 2.1849308072157595e-05, - "loss": 0.0122, + "loss": 0.0142, "step": 1158 }, { "epoch": 0.37, "learning_rate": 2.1835472553501463e-05, - "loss": 0.01, + "loss": 0.0101, "step": 1159 }, { "epoch": 0.37, "learning_rate": 2.1821629691458986e-05, - "loss": 0.0079, + "loss": 0.0077, "step": 1160 }, { "epoch": 0.37, "learning_rate": 2.180777950090163e-05, - "loss": 0.0113, + "loss": 0.0109, "step": 1161 }, { "epoch": 0.37, "learning_rate": 2.179392199670873e-05, - "loss": 0.007, + "loss": 0.0078, "step": 1162 }, { "epoch": 0.37, "learning_rate": 2.1780057193767478e-05, - "loss": 0.0093, + "loss": 0.0076, "step": 1163 }, { "epoch": 0.37, "learning_rate": 2.1766185106972913e-05, - "loss": 0.0156, + "loss": 0.0215, "step": 1164 }, { "epoch": 0.37, "learning_rate": 2.17523057512279e-05, - "loss": 0.007, + "loss": 0.0076, "step": 1165 }, { "epoch": 0.37, "learning_rate": 2.1738419141443095e-05, - "loss": 0.0114, + "loss": 0.0115, "step": 1166 }, { "epoch": 0.37, "learning_rate": 2.1724525292536967e-05, - "loss": 0.0078, + "loss": 0.0088, "step": 1167 }, { "epoch": 0.37, "learning_rate": 2.171062421943576e-05, - "loss": 0.0042, + "loss": 0.0041, "step": 1168 }, { "epoch": 0.37, "learning_rate": 2.169671593707347e-05, - "loss": 0.0069, + "loss": 0.0068, "step": 1169 }, { "epoch": 0.37, "learning_rate": 2.168280046039184e-05, - "loss": 0.0047, + "loss": 0.0068, "step": 1170 }, { "epoch": 0.37, "learning_rate": 2.1668877804340354e-05, - "loss": 0.0046, + "loss": 0.0053, "step": 1171 }, { "epoch": 0.37, "learning_rate": 2.165494798387619e-05, - "loss": 0.0063, + "loss": 0.0076, "step": 1172 }, { "epoch": 0.37, "learning_rate": 2.1641011013964234e-05, - "loss": 0.0086, + "loss": 0.0093, "step": 1173 }, { "epoch": 0.37, "learning_rate": 2.1627066909577052e-05, - "loss": 0.0083, + "loss": 0.009, "step": 1174 }, { "epoch": 0.38, "learning_rate": 2.1613115685694868e-05, - "loss": 0.0129, + "loss": 0.0086, "step": 1175 }, { "epoch": 0.38, "learning_rate": 2.1599157357305572e-05, - "loss": 0.0104, + "loss": 0.0101, "step": 1176 }, { "epoch": 0.38, "learning_rate": 2.158519193940466e-05, - "loss": 0.0085, + "loss": 0.0067, "step": 1177 }, { @@ -7095,13 +7095,13 @@ { "epoch": 0.38, "learning_rate": 2.155723989508812e-05, - "loss": 0.0107, + "loss": 0.0071, "step": 1179 }, { "epoch": 0.38, "learning_rate": 2.1543253298701526e-05, - "loss": 0.0068, + "loss": 0.0077, "step": 1180 }, { @@ -7113,43 +7113,43 @@ { "epoch": 0.38, "learning_rate": 2.1515259032601067e-05, - "loss": 0.0089, + "loss": 0.0118, "step": 1182 }, { "epoch": 0.38, "learning_rate": 2.15012513929616e-05, - "loss": 0.0083, + "loss": 0.0079, "step": 1183 }, { "epoch": 0.38, "learning_rate": 2.1487236768991443e-05, - "loss": 0.017, + "loss": 0.0144, "step": 1184 }, { "epoch": 0.38, "learning_rate": 2.147321517574659e-05, - "loss": 0.0075, + "loss": 0.0107, "step": 1185 }, { "epoch": 0.38, "learning_rate": 2.1459186628290522e-05, - "loss": 0.0062, + "loss": 0.0067, "step": 1186 }, { "epoch": 0.38, "learning_rate": 2.1445151141694174e-05, - "loss": 0.0073, + "loss": 0.0119, "step": 1187 }, { "epoch": 0.38, "learning_rate": 2.1431108731035953e-05, - "loss": 0.0094, + "loss": 0.0093, "step": 1188 }, { @@ -7161,31 +7161,31 @@ { "epoch": 0.38, "learning_rate": 2.1403003197884687e-05, - "loss": 0.0074, + "loss": 0.0096, "step": 1190 }, { "epoch": 0.38, "learning_rate": 2.138894010558557e-05, - "loss": 0.0077, + "loss": 0.0097, "step": 1191 }, { "epoch": 0.38, "learning_rate": 2.137487014961241e-05, - "loss": 0.0191, + "loss": 0.0184, "step": 1192 }, { "epoch": 0.38, "learning_rate": 2.136079334508064e-05, - "loss": 0.0072, + "loss": 0.007, "step": 1193 }, { "epoch": 0.38, "learning_rate": 2.1346709707113053e-05, - "loss": 0.0079, + "loss": 0.0074, "step": 1194 }, { @@ -7197,337 +7197,337 @@ { "epoch": 0.38, "learning_rate": 2.1318521991398265e-05, - "loss": 0.0109, + "loss": 0.0088, "step": 1196 }, { "epoch": 0.38, "learning_rate": 2.130441794393328e-05, - "loss": 0.0236, + "loss": 0.0094, "step": 1197 }, { "epoch": 0.38, "learning_rate": 2.129030712359689e-05, - "loss": 0.0038, + "loss": 0.0033, "step": 1198 }, { "epoch": 0.38, "learning_rate": 2.1276189545548424e-05, - "loss": 0.0068, + "loss": 0.007, "step": 1199 }, { "epoch": 0.38, "learning_rate": 2.1262065224954467e-05, - "loss": 0.0114, + "loss": 0.0088, "step": 1200 }, { "epoch": 0.38, "learning_rate": 2.124793417698886e-05, - "loss": 0.0071, + "loss": 0.0081, "step": 1201 }, { "epoch": 0.38, "learning_rate": 2.123379641683267e-05, - "loss": 0.0124, + "loss": 0.0114, "step": 1202 }, { "epoch": 0.38, "learning_rate": 2.1219651959674172e-05, - "loss": 0.0083, + "loss": 0.01, "step": 1203 }, { "epoch": 0.38, "learning_rate": 2.1205500820708832e-05, - "loss": 0.0038, + "loss": 0.0036, "step": 1204 }, { "epoch": 0.38, "learning_rate": 2.1191343015139297e-05, - "loss": 0.0186, + "loss": 0.0184, "step": 1205 }, { "epoch": 0.39, "learning_rate": 2.117717855817538e-05, - "loss": 0.0153, + "loss": 0.0071, "step": 1206 }, { "epoch": 0.39, "learning_rate": 2.1163007465034037e-05, - "loss": 0.0112, + "loss": 0.0107, "step": 1207 }, { "epoch": 0.39, "learning_rate": 2.1148829750939342e-05, - "loss": 0.0073, + "loss": 0.006, "step": 1208 }, { "epoch": 0.39, "learning_rate": 2.1134645431122507e-05, - "loss": 0.0161, + "loss": 0.0114, "step": 1209 }, { "epoch": 0.39, "learning_rate": 2.1120454520821812e-05, - "loss": 0.0081, + "loss": 0.0141, "step": 1210 }, { "epoch": 0.39, "learning_rate": 2.1106257035282646e-05, - "loss": 0.0057, + "loss": 0.006, "step": 1211 }, { "epoch": 0.39, "learning_rate": 2.1092052989757436e-05, - "loss": 0.0072, + "loss": 0.0067, "step": 1212 }, { "epoch": 0.39, "learning_rate": 2.1077842399505673e-05, - "loss": 0.0028, + "loss": 0.0025, "step": 1213 }, { "epoch": 0.39, "learning_rate": 2.1063625279793872e-05, - "loss": 0.0118, + "loss": 0.0079, "step": 1214 }, { "epoch": 0.39, "learning_rate": 2.1049401645895556e-05, - "loss": 0.0084, + "loss": 0.007, "step": 1215 }, { "epoch": 0.39, "learning_rate": 2.103517151309127e-05, - "loss": 0.0053, + "loss": 0.0069, "step": 1216 }, { "epoch": 0.39, "learning_rate": 2.1020934896668516e-05, - "loss": 0.0092, + "loss": 0.0138, "step": 1217 }, { "epoch": 0.39, "learning_rate": 2.100669181192177e-05, - "loss": 0.0086, + "loss": 0.0085, "step": 1218 }, { "epoch": 0.39, "learning_rate": 2.099244227415246e-05, - "loss": 0.0169, + "loss": 0.0171, "step": 1219 }, { "epoch": 0.39, "learning_rate": 2.0978186298668944e-05, - "loss": 0.0072, + "loss": 0.0051, "step": 1220 }, { "epoch": 0.39, "learning_rate": 2.0963923900786492e-05, - "loss": 0.0154, + "loss": 0.0133, "step": 1221 }, { "epoch": 0.39, "learning_rate": 2.0949655095827288e-05, - "loss": 0.0106, + "loss": 0.0112, "step": 1222 }, { "epoch": 0.39, "learning_rate": 2.0935379899120373e-05, - "loss": 0.0097, + "loss": 0.0103, "step": 1223 }, { "epoch": 0.39, "learning_rate": 2.0921098326001683e-05, - "loss": 0.0107, + "loss": 0.0127, "step": 1224 }, { "epoch": 0.39, "learning_rate": 2.0906810391813987e-05, - "loss": 0.0054, + "loss": 0.0094, "step": 1225 }, { "epoch": 0.39, "learning_rate": 2.0892516111906897e-05, - "loss": 0.0077, + "loss": 0.0076, "step": 1226 }, { "epoch": 0.39, "learning_rate": 2.0878215501636832e-05, - "loss": 0.0053, + "loss": 0.0052, "step": 1227 }, { "epoch": 0.39, "learning_rate": 2.0863908576367015e-05, - "loss": 0.0109, + "loss": 0.01, "step": 1228 }, { "epoch": 0.39, "learning_rate": 2.084959535146747e-05, - "loss": 0.0089, + "loss": 0.0088, "step": 1229 }, { "epoch": 0.39, "learning_rate": 2.083527584231496e-05, - "loss": 0.009, + "loss": 0.0097, "step": 1230 }, { "epoch": 0.39, "learning_rate": 2.0820950064293022e-05, - "loss": 0.0097, + "loss": 0.0113, "step": 1231 }, { "epoch": 0.39, "learning_rate": 2.0806618032791916e-05, - "loss": 0.0093, + "loss": 0.0102, "step": 1232 }, { "epoch": 0.39, "learning_rate": 2.079227976320862e-05, - "loss": 0.0269, + "loss": 0.0416, "step": 1233 }, { "epoch": 0.39, "learning_rate": 2.0777935270946824e-05, - "loss": 0.0082, + "loss": 0.0067, "step": 1234 }, { "epoch": 0.39, "learning_rate": 2.0763584571416888e-05, - "loss": 0.0186, + "loss": 0.0135, "step": 1235 }, { "epoch": 0.39, "learning_rate": 2.074922768003586e-05, - "loss": 0.0108, + "loss": 0.0074, "step": 1236 }, { "epoch": 0.4, "learning_rate": 2.0734864612227417e-05, - "loss": 0.0146, + "loss": 0.0148, "step": 1237 }, { "epoch": 0.4, "learning_rate": 2.0720495383421887e-05, - "loss": 0.0118, + "loss": 0.0078, "step": 1238 }, { "epoch": 0.4, "learning_rate": 2.070612000905622e-05, - "loss": 0.0128, + "loss": 0.0216, "step": 1239 }, { "epoch": 0.4, "learning_rate": 2.069173850457395e-05, - "loss": 0.0075, + "loss": 0.0067, "step": 1240 }, { "epoch": 0.4, "learning_rate": 2.067735088542521e-05, - "loss": 0.0084, + "loss": 0.0069, "step": 1241 }, { "epoch": 0.4, "learning_rate": 2.0662957167066706e-05, - "loss": 0.0058, + "loss": 0.0059, "step": 1242 }, { "epoch": 0.4, "learning_rate": 2.064855736496169e-05, - "loss": 0.0071, + "loss": 0.0095, "step": 1243 }, { "epoch": 0.4, "learning_rate": 2.063415149457994e-05, - "loss": 0.0152, + "loss": 0.0101, "step": 1244 }, { "epoch": 0.4, "learning_rate": 2.061973957139777e-05, - "loss": 0.0083, + "loss": 0.0091, "step": 1245 }, { "epoch": 0.4, "learning_rate": 2.0605321610897988e-05, - "loss": 0.0149, + "loss": 0.012, "step": 1246 }, { "epoch": 0.4, "learning_rate": 2.0590897628569894e-05, - "loss": 0.007, + "loss": 0.0077, "step": 1247 }, { "epoch": 0.4, "learning_rate": 2.057646763990925e-05, - "loss": 0.0084, + "loss": 0.0092, "step": 1248 }, { "epoch": 0.4, "learning_rate": 2.056203166041827e-05, - "loss": 0.0062, + "loss": 0.0066, "step": 1249 }, { "epoch": 0.4, "learning_rate": 2.0547589705605604e-05, - "loss": 0.0099, + "loss": 0.0148, "step": 1250 }, { "epoch": 0.4, "learning_rate": 2.0533141790986333e-05, - "loss": 0.0113, + "loss": 0.0083, "step": 1251 }, { @@ -7539,157 +7539,157 @@ { "epoch": 0.4, "learning_rate": 2.050422814442026e-05, - "loss": 0.0025, + "loss": 0.0051, "step": 1253 }, { "epoch": 0.4, "learning_rate": 2.0489762443535543e-05, - "loss": 0.0146, + "loss": 0.0119, "step": 1254 }, { "epoch": 0.4, "learning_rate": 2.0475290844968372e-05, - "loss": 0.0105, + "loss": 0.0149, "step": 1255 }, { "epoch": 0.4, "learning_rate": 2.0460813364265666e-05, - "loss": 0.0172, + "loss": 0.0174, "step": 1256 }, { "epoch": 0.4, "learning_rate": 2.044633001698066e-05, - "loss": 0.0121, + "loss": 0.0075, "step": 1257 }, { "epoch": 0.4, "learning_rate": 2.0431840818672894e-05, - "loss": 0.012, + "loss": 0.009, "step": 1258 }, { "epoch": 0.4, "learning_rate": 2.0417345784908195e-05, - "loss": 0.0065, + "loss": 0.0069, "step": 1259 }, { "epoch": 0.4, "learning_rate": 2.0402844931258662e-05, - "loss": 0.0091, + "loss": 0.0102, "step": 1260 }, { "epoch": 0.4, "learning_rate": 2.0388338273302638e-05, - "loss": 0.0049, + "loss": 0.005, "step": 1261 }, { "epoch": 0.4, "learning_rate": 2.037382582662471e-05, - "loss": 0.0116, + "loss": 0.0124, "step": 1262 }, { "epoch": 0.4, "learning_rate": 2.0359307606815683e-05, - "loss": 0.0118, + "loss": 0.0078, "step": 1263 }, { "epoch": 0.4, "learning_rate": 2.034478362947256e-05, - "loss": 0.0122, + "loss": 0.0117, "step": 1264 }, { "epoch": 0.4, "learning_rate": 2.0330253910198527e-05, - "loss": 0.0097, + "loss": 0.0089, "step": 1265 }, { "epoch": 0.4, "learning_rate": 2.0315718464602947e-05, - "loss": 0.0116, + "loss": 0.0102, "step": 1266 }, { "epoch": 0.4, "learning_rate": 2.0301177308301323e-05, - "loss": 0.0097, + "loss": 0.0124, "step": 1267 }, { "epoch": 0.4, "learning_rate": 2.028663045691531e-05, - "loss": 0.0072, + "loss": 0.0086, "step": 1268 }, { "epoch": 0.41, "learning_rate": 2.0272077926072667e-05, - "loss": 0.0108, + "loss": 0.0096, "step": 1269 }, { "epoch": 0.41, "learning_rate": 2.0257519731407264e-05, - "loss": 0.0109, + "loss": 0.0119, "step": 1270 }, { "epoch": 0.41, "learning_rate": 2.0242955888559042e-05, - "loss": 0.0112, + "loss": 0.0147, "step": 1271 }, { "epoch": 0.41, "learning_rate": 2.0228386413174025e-05, - "loss": 0.0054, + "loss": 0.0103, "step": 1272 }, { "epoch": 0.41, "learning_rate": 2.0213811320904283e-05, - "loss": 0.0081, + "loss": 0.0076, "step": 1273 }, { "epoch": 0.41, "learning_rate": 2.0199230627407912e-05, - "loss": 0.0069, + "loss": 0.0078, "step": 1274 }, { "epoch": 0.41, "learning_rate": 2.018464434834904e-05, - "loss": 0.0062, + "loss": 0.0059, "step": 1275 }, { "epoch": 0.41, "learning_rate": 2.0170052499397785e-05, - "loss": 0.0117, + "loss": 0.0135, "step": 1276 }, { "epoch": 0.41, "learning_rate": 2.0155455096230248e-05, - "loss": 0.0084, + "loss": 0.0117, "step": 1277 }, { "epoch": 0.41, "learning_rate": 2.014085215452851e-05, - "loss": 0.0078, + "loss": 0.0076, "step": 1278 }, { @@ -7701,205 +7701,205 @@ { "epoch": 0.41, "learning_rate": 2.011162971828043e-05, - "loss": 0.0039, + "loss": 0.0036, "step": 1280 }, { "epoch": 0.41, "learning_rate": 2.0097010255127914e-05, - "loss": 0.0031, + "loss": 0.0045, "step": 1281 }, { "epoch": 0.41, "learning_rate": 2.0082385316228816e-05, - "loss": 0.013, + "loss": 0.0111, "step": 1282 }, { "epoch": 0.41, "learning_rate": 2.0067754917294783e-05, - "loss": 0.0048, + "loss": 0.004, "step": 1283 }, { "epoch": 0.41, "learning_rate": 2.0053119074043336e-05, - "loss": 0.0073, + "loss": 0.0055, "step": 1284 }, { "epoch": 0.41, "learning_rate": 2.003847780219784e-05, - "loss": 0.0114, + "loss": 0.0101, "step": 1285 }, { "epoch": 0.41, "learning_rate": 2.0023831117487496e-05, - "loss": 0.0067, + "loss": 0.0072, "step": 1286 }, { "epoch": 0.41, "learning_rate": 2.0009179035647325e-05, - "loss": 0.0111, + "loss": 0.0112, "step": 1287 }, { "epoch": 0.41, "learning_rate": 1.9994521572418137e-05, - "loss": 0.0066, + "loss": 0.0075, "step": 1288 }, { "epoch": 0.41, "learning_rate": 1.9979858743546524e-05, - "loss": 0.0082, + "loss": 0.0092, "step": 1289 }, { "epoch": 0.41, "learning_rate": 1.9965190564784848e-05, - "loss": 0.0125, + "loss": 0.0136, "step": 1290 }, { "epoch": 0.41, "learning_rate": 1.995051705189122e-05, - "loss": 0.0102, + "loss": 0.0136, "step": 1291 }, { "epoch": 0.41, "learning_rate": 1.9935838220629464e-05, - "loss": 0.0102, + "loss": 0.0146, "step": 1292 }, { "epoch": 0.41, "learning_rate": 1.992115408676914e-05, - "loss": 0.0143, + "loss": 0.0152, "step": 1293 }, { "epoch": 0.41, "learning_rate": 1.9906464666085488e-05, - "loss": 0.0123, + "loss": 0.0133, "step": 1294 }, { "epoch": 0.41, "learning_rate": 1.989176997435944e-05, - "loss": 0.0077, + "loss": 0.0079, "step": 1295 }, { "epoch": 0.41, "learning_rate": 1.9877070027377593e-05, - "loss": 0.0128, + "loss": 0.0119, "step": 1296 }, { "epoch": 0.41, "learning_rate": 1.986236484093217e-05, - "loss": 0.008, + "loss": 0.0065, "step": 1297 }, { "epoch": 0.41, "learning_rate": 1.9847654430821036e-05, - "loss": 0.0118, + "loss": 0.0115, "step": 1298 }, { "epoch": 0.41, "learning_rate": 1.9832938812847668e-05, - "loss": 0.0098, + "loss": 0.0085, "step": 1299 }, { "epoch": 0.42, "learning_rate": 1.9818218002821144e-05, - "loss": 0.0056, + "loss": 0.0059, "step": 1300 }, { "epoch": 0.42, "learning_rate": 1.9803492016556105e-05, - "loss": 0.0085, + "loss": 0.0068, "step": 1301 }, { "epoch": 0.42, "learning_rate": 1.978876086987276e-05, - "loss": 0.009, + "loss": 0.0092, "step": 1302 }, { "epoch": 0.42, "learning_rate": 1.9774024578596866e-05, - "loss": 0.011, + "loss": 0.0085, "step": 1303 }, { "epoch": 0.42, "learning_rate": 1.97592831585597e-05, - "loss": 0.0157, + "loss": 0.0114, "step": 1304 }, { "epoch": 0.42, "learning_rate": 1.9744536625598056e-05, - "loss": 0.0055, + "loss": 0.0047, "step": 1305 }, { "epoch": 0.42, "learning_rate": 1.9729784995554213e-05, - "loss": 0.0122, + "loss": 0.0132, "step": 1306 }, { "epoch": 0.42, "learning_rate": 1.9715028284275926e-05, - "loss": 0.0061, + "loss": 0.0046, "step": 1307 }, { "epoch": 0.42, "learning_rate": 1.970026650761642e-05, - "loss": 0.0081, + "loss": 0.008, "step": 1308 }, { "epoch": 0.42, "learning_rate": 1.968549968143435e-05, - "loss": 0.0054, + "loss": 0.0038, "step": 1309 }, { "epoch": 0.42, "learning_rate": 1.9670727821593802e-05, - "loss": 0.0123, + "loss": 0.013, "step": 1310 }, { "epoch": 0.42, "learning_rate": 1.9655950943964267e-05, - "loss": 0.0181, + "loss": 0.0161, "step": 1311 }, { "epoch": 0.42, "learning_rate": 1.964116906442062e-05, - "loss": 0.0056, + "loss": 0.0062, "step": 1312 }, { "epoch": 0.42, "learning_rate": 1.962638219884313e-05, - "loss": 0.011, + "loss": 0.0084, "step": 1313 }, { @@ -7911,205 +7911,205 @@ { "epoch": 0.42, "learning_rate": 1.9596793573134384e-05, - "loss": 0.0074, + "loss": 0.0077, "step": 1315 }, { "epoch": 0.42, "learning_rate": 1.958199184479036e-05, - "loss": 0.0131, + "loss": 0.0152, "step": 1316 }, { "epoch": 0.42, "learning_rate": 1.9567185193986896e-05, - "loss": 0.0114, + "loss": 0.0099, "step": 1317 }, { "epoch": 0.42, "learning_rate": 1.9552373636630873e-05, - "loss": 0.0092, + "loss": 0.0116, "step": 1318 }, { "epoch": 0.42, "learning_rate": 1.9537557188634424e-05, - "loss": 0.0078, + "loss": 0.008, "step": 1319 }, { "epoch": 0.42, "learning_rate": 1.952273586591494e-05, - "loss": 0.0117, + "loss": 0.0119, "step": 1320 }, { "epoch": 0.42, "learning_rate": 1.9507909684395054e-05, - "loss": 0.0084, + "loss": 0.008, "step": 1321 }, { "epoch": 0.42, "learning_rate": 1.949307866000261e-05, - "loss": 0.0081, + "loss": 0.0089, "step": 1322 }, { "epoch": 0.42, "learning_rate": 1.947824280867067e-05, - "loss": 0.0093, + "loss": 0.0137, "step": 1323 }, { "epoch": 0.42, "learning_rate": 1.9463402146337467e-05, - "loss": 0.0049, + "loss": 0.0052, "step": 1324 }, { "epoch": 0.42, "learning_rate": 1.9448556688946403e-05, - "loss": 0.0109, + "loss": 0.0085, "step": 1325 }, { "epoch": 0.42, "learning_rate": 1.943370645244604e-05, - "loss": 0.0059, + "loss": 0.0062, "step": 1326 }, { "epoch": 0.42, "learning_rate": 1.9418851452790075e-05, - "loss": 0.0102, + "loss": 0.0071, "step": 1327 }, { "epoch": 0.42, "learning_rate": 1.940399170593731e-05, - "loss": 0.0069, + "loss": 0.008, "step": 1328 }, { "epoch": 0.42, "learning_rate": 1.9389127227851654e-05, - "loss": 0.0173, + "loss": 0.0195, "step": 1329 }, { "epoch": 0.42, "learning_rate": 1.937425803450211e-05, - "loss": 0.0107, + "loss": 0.0108, "step": 1330 }, { "epoch": 0.43, "learning_rate": 1.9359384141862717e-05, - "loss": 0.007, + "loss": 0.0103, "step": 1331 }, { "epoch": 0.43, "learning_rate": 1.9344505565912593e-05, - "loss": 0.0128, + "loss": 0.0099, "step": 1332 }, { "epoch": 0.43, "learning_rate": 1.9329622322635872e-05, - "loss": 0.0063, + "loss": 0.0066, "step": 1333 }, { "epoch": 0.43, "learning_rate": 1.9314734428021703e-05, - "loss": 0.0109, + "loss": 0.0086, "step": 1334 }, { "epoch": 0.43, "learning_rate": 1.929984189806424e-05, - "loss": 0.0054, + "loss": 0.0052, "step": 1335 }, { "epoch": 0.43, "learning_rate": 1.928494474876261e-05, - "loss": 0.0125, + "loss": 0.0137, "step": 1336 }, { "epoch": 0.43, "learning_rate": 1.92700429961209e-05, - "loss": 0.0068, + "loss": 0.0083, "step": 1337 }, { "epoch": 0.43, "learning_rate": 1.925513665614815e-05, - "loss": 0.0105, + "loss": 0.0082, "step": 1338 }, { "epoch": 0.43, "learning_rate": 1.9240225744858318e-05, - "loss": 0.0101, + "loss": 0.0105, "step": 1339 }, { "epoch": 0.43, "learning_rate": 1.922531027827029e-05, - "loss": 0.0094, + "loss": 0.0153, "step": 1340 }, { "epoch": 0.43, "learning_rate": 1.9210390272407826e-05, - "loss": 0.0097, + "loss": 0.0079, "step": 1341 }, { "epoch": 0.43, "learning_rate": 1.9195465743299574e-05, - "loss": 0.046, + "loss": 0.0398, "step": 1342 }, { "epoch": 0.43, "learning_rate": 1.9180536706979043e-05, - "loss": 0.009, + "loss": 0.0124, "step": 1343 }, { "epoch": 0.43, "learning_rate": 1.9165603179484574e-05, - "loss": 0.0089, + "loss": 0.0145, "step": 1344 }, { "epoch": 0.43, "learning_rate": 1.9150665176859345e-05, - "loss": 0.0119, + "loss": 0.0102, "step": 1345 }, { "epoch": 0.43, "learning_rate": 1.913572271515133e-05, - "loss": 0.0059, + "loss": 0.0037, "step": 1346 }, { "epoch": 0.43, "learning_rate": 1.9120775810413297e-05, - "loss": 0.0124, + "loss": 0.0135, "step": 1347 }, { "epoch": 0.43, "learning_rate": 1.91058244787028e-05, - "loss": 0.0042, + "loss": 0.0044, "step": 1348 }, { @@ -8121,13 +8121,13 @@ { "epoch": 0.43, "learning_rate": 1.9075908598618326e-05, - "loss": 0.0071, + "loss": 0.0096, "step": 1350 }, { "epoch": 0.43, "learning_rate": 1.9060944082383142e-05, - "loss": 0.0123, + "loss": 0.0095, "step": 1351 }, { @@ -8139,241 +8139,241 @@ { "epoch": 0.43, "learning_rate": 1.9031001977909205e-05, - "loss": 0.0079, + "loss": 0.0201, "step": 1353 }, { "epoch": 0.43, "learning_rate": 1.901602442183742e-05, - "loss": 0.0113, + "loss": 0.0121, "step": 1354 }, { "epoch": 0.43, "learning_rate": 1.900104255132816e-05, - "loss": 0.0037, + "loss": 0.0032, "step": 1355 }, { "epoch": 0.43, "learning_rate": 1.8986056382476535e-05, - "loss": 0.0101, + "loss": 0.0182, "step": 1356 }, { "epoch": 0.43, "learning_rate": 1.897106593138228e-05, - "loss": 0.0059, + "loss": 0.0057, "step": 1357 }, { "epoch": 0.43, "learning_rate": 1.8956071214149716e-05, - "loss": 0.0117, + "loss": 0.0135, "step": 1358 }, { "epoch": 0.43, "learning_rate": 1.8941072246887747e-05, - "loss": 0.0082, + "loss": 0.0088, "step": 1359 }, { "epoch": 0.43, "learning_rate": 1.8926069045709845e-05, - "loss": 0.0116, + "loss": 0.0102, "step": 1360 }, { "epoch": 0.43, "learning_rate": 1.8911061626734043e-05, - "loss": 0.0065, + "loss": 0.0064, "step": 1361 }, { "epoch": 0.44, "learning_rate": 1.8896050006082886e-05, - "loss": 0.0079, + "loss": 0.0076, "step": 1362 }, { "epoch": 0.44, "learning_rate": 1.888103419988345e-05, - "loss": 0.0105, + "loss": 0.0091, "step": 1363 }, { "epoch": 0.44, "learning_rate": 1.886601422426729e-05, - "loss": 0.0047, + "loss": 0.0066, "step": 1364 }, { "epoch": 0.44, "learning_rate": 1.8850990095370457e-05, - "loss": 0.0083, + "loss": 0.0076, "step": 1365 }, { "epoch": 0.44, "learning_rate": 1.8835961829333452e-05, - "loss": 0.0083, + "loss": 0.009, "step": 1366 }, { "epoch": 0.44, "learning_rate": 1.882092944230123e-05, - "loss": 0.0054, + "loss": 0.005, "step": 1367 }, { "epoch": 0.44, "learning_rate": 1.8805892950423165e-05, - "loss": 0.008, + "loss": 0.0079, "step": 1368 }, { "epoch": 0.44, "learning_rate": 1.8790852369853042e-05, - "loss": 0.0112, + "loss": 0.0101, "step": 1369 }, { "epoch": 0.44, "learning_rate": 1.877580771674905e-05, - "loss": 0.0062, + "loss": 0.0059, "step": 1370 }, { "epoch": 0.44, "learning_rate": 1.8760759007273732e-05, - "loss": 0.0086, + "loss": 0.0115, "step": 1371 }, { "epoch": 0.44, "learning_rate": 1.8745706257594007e-05, - "loss": 0.0058, + "loss": 0.0064, "step": 1372 }, { "epoch": 0.44, "learning_rate": 1.873064948388113e-05, - "loss": 0.0101, + "loss": 0.0114, "step": 1373 }, { "epoch": 0.44, "learning_rate": 1.8715588702310675e-05, - "loss": 0.0055, + "loss": 0.0056, "step": 1374 }, { "epoch": 0.44, "learning_rate": 1.870052392906252e-05, - "loss": 0.0088, + "loss": 0.009, "step": 1375 }, { "epoch": 0.44, "learning_rate": 1.8685455180320842e-05, - "loss": 0.0041, + "loss": 0.0044, "step": 1376 }, { "epoch": 0.44, "learning_rate": 1.8670382472274076e-05, - "loss": 0.0064, + "loss": 0.0068, "step": 1377 }, { "epoch": 0.44, "learning_rate": 1.8655305821114916e-05, - "loss": 0.0125, + "loss": 0.0058, "step": 1378 }, { "epoch": 0.44, "learning_rate": 1.8640225243040295e-05, - "loss": 0.0056, + "loss": 0.0078, "step": 1379 }, { "epoch": 0.44, "learning_rate": 1.8625140754251366e-05, - "loss": 0.0036, + "loss": 0.0045, "step": 1380 }, { "epoch": 0.44, "learning_rate": 1.8610052370953473e-05, - "loss": 0.0022, + "loss": 0.0018, "step": 1381 }, { "epoch": 0.44, "learning_rate": 1.8594960109356153e-05, - "loss": 0.0109, + "loss": 0.0082, "step": 1382 }, { "epoch": 0.44, "learning_rate": 1.857986398567311e-05, - "loss": 0.0121, + "loss": 0.0108, "step": 1383 }, { "epoch": 0.44, "learning_rate": 1.856476401612219e-05, - "loss": 0.0064, + "loss": 0.0053, "step": 1384 }, { "epoch": 0.44, "learning_rate": 1.8549660216925375e-05, - "loss": 0.0055, + "loss": 0.0056, "step": 1385 }, { "epoch": 0.44, "learning_rate": 1.8534552604308767e-05, - "loss": 0.0117, + "loss": 0.0102, "step": 1386 }, { "epoch": 0.44, "learning_rate": 1.851944119450255e-05, - "loss": 0.0052, + "loss": 0.0033, "step": 1387 }, { "epoch": 0.44, "learning_rate": 1.8504326003740995e-05, - "loss": 0.0104, + "loss": 0.0121, "step": 1388 }, { "epoch": 0.44, "learning_rate": 1.8489207048262448e-05, - "loss": 0.0146, + "loss": 0.012, "step": 1389 }, { "epoch": 0.44, "learning_rate": 1.8474084344309274e-05, - "loss": 0.0091, + "loss": 0.0078, "step": 1390 }, { "epoch": 0.44, "learning_rate": 1.8458957908127885e-05, - "loss": 0.0096, + "loss": 0.0111, "step": 1391 }, { "epoch": 0.44, "learning_rate": 1.8443827755968694e-05, - "loss": 0.0124, + "loss": 0.0058, "step": 1392 }, { @@ -8385,595 +8385,595 @@ { "epoch": 0.45, "learning_rate": 1.8413556368738513e-05, - "loss": 0.0044, + "loss": 0.0057, "step": 1394 }, { "epoch": 0.45, "learning_rate": 1.839841516618824e-05, - "loss": 0.0063, + "loss": 0.0089, "step": 1395 }, { "epoch": 0.45, "learning_rate": 1.8383270312701574e-05, - "loss": 0.0065, + "loss": 0.0064, "step": 1396 }, { "epoch": 0.45, "learning_rate": 1.8368121824548714e-05, - "loss": 0.0045, + "loss": 0.0044, "step": 1397 }, { "epoch": 0.45, "learning_rate": 1.8352969718003765e-05, - "loss": 0.013, + "loss": 0.015, "step": 1398 }, { "epoch": 0.45, "learning_rate": 1.8337814009344716e-05, - "loss": 0.0085, + "loss": 0.0095, "step": 1399 }, { "epoch": 0.45, "learning_rate": 1.8322654714853436e-05, - "loss": 0.0076, + "loss": 0.0057, "step": 1400 }, { "epoch": 0.45, "learning_rate": 1.830749185081564e-05, - "loss": 0.0062, + "loss": 0.0055, "step": 1401 }, { "epoch": 0.45, "learning_rate": 1.829232543352087e-05, - "loss": 0.0057, + "loss": 0.0059, "step": 1402 }, { "epoch": 0.45, "learning_rate": 1.8277155479262496e-05, - "loss": 0.0074, + "loss": 0.0085, "step": 1403 }, { "epoch": 0.45, "learning_rate": 1.826198200433768e-05, - "loss": 0.0104, + "loss": 0.0098, "step": 1404 }, { "epoch": 0.45, "learning_rate": 1.8246805025047388e-05, - "loss": 0.0081, + "loss": 0.0099, "step": 1405 }, { "epoch": 0.45, "learning_rate": 1.8231624557696314e-05, - "loss": 0.0077, + "loss": 0.007, "step": 1406 }, { "epoch": 0.45, "learning_rate": 1.8216440618592932e-05, - "loss": 0.0146, + "loss": 0.0147, "step": 1407 }, { "epoch": 0.45, "learning_rate": 1.8201253224049423e-05, - "loss": 0.0097, + "loss": 0.0129, "step": 1408 }, { "epoch": 0.45, "learning_rate": 1.8186062390381698e-05, - "loss": 0.0065, + "loss": 0.0077, "step": 1409 }, { "epoch": 0.45, "learning_rate": 1.8170868133909348e-05, - "loss": 0.0046, + "loss": 0.0084, "step": 1410 }, { "epoch": 0.45, "learning_rate": 1.8155670470955653e-05, - "loss": 0.0069, + "loss": 0.0088, "step": 1411 }, { "epoch": 0.45, "learning_rate": 1.8140469417847545e-05, - "loss": 0.0063, + "loss": 0.0055, "step": 1412 }, { "epoch": 0.45, "learning_rate": 1.8125264990915597e-05, - "loss": 0.0086, + "loss": 0.0106, "step": 1413 }, { "epoch": 0.45, "learning_rate": 1.8110057206494012e-05, - "loss": 0.0025, + "loss": 0.0021, "step": 1414 }, { "epoch": 0.45, "learning_rate": 1.8094846080920602e-05, - "loss": 0.0059, + "loss": 0.007, "step": 1415 }, { "epoch": 0.45, "learning_rate": 1.8079631630536757e-05, - "loss": 0.0033, + "loss": 0.0027, "step": 1416 }, { "epoch": 0.45, "learning_rate": 1.8064413871687452e-05, - "loss": 0.0058, + "loss": 0.0062, "step": 1417 }, { "epoch": 0.45, "learning_rate": 1.8049192820721205e-05, - "loss": 0.0127, + "loss": 0.0125, "step": 1418 }, { "epoch": 0.45, "learning_rate": 1.8033968493990082e-05, - "loss": 0.0117, + "loss": 0.0141, "step": 1419 }, { "epoch": 0.45, "learning_rate": 1.801874090784966e-05, - "loss": 0.0104, + "loss": 0.0075, "step": 1420 }, { "epoch": 0.45, "learning_rate": 1.8003510078659015e-05, - "loss": 0.0087, + "loss": 0.0083, "step": 1421 }, { "epoch": 0.45, "learning_rate": 1.7988276022780717e-05, - "loss": 0.0061, + "loss": 0.0066, "step": 1422 }, { "epoch": 0.45, "learning_rate": 1.79730387565808e-05, - "loss": 0.0088, + "loss": 0.0083, "step": 1423 }, { "epoch": 0.45, "learning_rate": 1.7957798296428735e-05, - "loss": 0.0046, + "loss": 0.0039, "step": 1424 }, { "epoch": 0.46, "learning_rate": 1.7942554658697446e-05, - "loss": 0.0062, + "loss": 0.0061, "step": 1425 }, { "epoch": 0.46, "learning_rate": 1.792730785976324e-05, - "loss": 0.0081, + "loss": 0.0103, "step": 1426 }, { "epoch": 0.46, "learning_rate": 1.7912057916005857e-05, - "loss": 0.0069, + "loss": 0.0081, "step": 1427 }, { "epoch": 0.46, "learning_rate": 1.7896804843808393e-05, - "loss": 0.0066, + "loss": 0.0076, "step": 1428 }, { "epoch": 0.46, "learning_rate": 1.78815486595573e-05, - "loss": 0.0051, + "loss": 0.0067, "step": 1429 }, { "epoch": 0.46, "learning_rate": 1.7866289379642388e-05, - "loss": 0.006, + "loss": 0.0068, "step": 1430 }, { "epoch": 0.46, "learning_rate": 1.785102702045678e-05, - "loss": 0.0093, + "loss": 0.0066, "step": 1431 }, { "epoch": 0.46, "learning_rate": 1.783576159839692e-05, - "loss": 0.0129, + "loss": 0.0124, "step": 1432 }, { "epoch": 0.46, "learning_rate": 1.7820493129862542e-05, - "loss": 0.0128, + "loss": 0.0113, "step": 1433 }, { "epoch": 0.46, "learning_rate": 1.7805221631256638e-05, - "loss": 0.0072, + "loss": 0.0094, "step": 1434 }, { "epoch": 0.46, "learning_rate": 1.778994711898546e-05, - "loss": 0.0176, + "loss": 0.0212, "step": 1435 }, { "epoch": 0.46, "learning_rate": 1.7774669609458515e-05, - "loss": 0.0055, + "loss": 0.0073, "step": 1436 }, { "epoch": 0.46, "learning_rate": 1.775938911908851e-05, - "loss": 0.005, + "loss": 0.0054, "step": 1437 }, { "epoch": 0.46, "learning_rate": 1.774410566429136e-05, - "loss": 0.0036, + "loss": 0.0031, "step": 1438 }, { "epoch": 0.46, "learning_rate": 1.7728819261486166e-05, - "loss": 0.0061, + "loss": 0.0062, "step": 1439 }, { "epoch": 0.46, "learning_rate": 1.7713529927095198e-05, - "loss": 0.0031, + "loss": 0.0036, "step": 1440 }, { "epoch": 0.46, "learning_rate": 1.769823767754387e-05, - "loss": 0.0107, + "loss": 0.0078, "step": 1441 }, { "epoch": 0.46, "learning_rate": 1.768294252926074e-05, - "loss": 0.0083, + "loss": 0.0079, "step": 1442 }, { "epoch": 0.46, "learning_rate": 1.7667644498677465e-05, - "loss": 0.0147, + "loss": 0.0154, "step": 1443 }, { "epoch": 0.46, "learning_rate": 1.76523436022288e-05, - "loss": 0.0095, + "loss": 0.0081, "step": 1444 }, { "epoch": 0.46, "learning_rate": 1.7637039856352587e-05, - "loss": 0.0054, + "loss": 0.0063, "step": 1445 }, { "epoch": 0.46, "learning_rate": 1.762173327748973e-05, - "loss": 0.0052, + "loss": 0.0057, "step": 1446 }, { "epoch": 0.46, "learning_rate": 1.7606423882084168e-05, - "loss": 0.0076, + "loss": 0.0078, "step": 1447 }, { "epoch": 0.46, "learning_rate": 1.7591111686582866e-05, - "loss": 0.0055, + "loss": 0.0065, "step": 1448 }, { "epoch": 0.46, "learning_rate": 1.757579670743581e-05, - "loss": 0.013, + "loss": 0.0105, "step": 1449 }, { "epoch": 0.46, "learning_rate": 1.7560478961095965e-05, - "loss": 0.0088, + "loss": 0.0089, "step": 1450 }, { "epoch": 0.46, "learning_rate": 1.7545158464019273e-05, - "loss": 0.0059, + "loss": 0.0068, "step": 1451 }, { "epoch": 0.46, "learning_rate": 1.752983523266462e-05, - "loss": 0.0113, + "loss": 0.0105, "step": 1452 }, { "epoch": 0.46, "learning_rate": 1.7514509283493848e-05, - "loss": 0.0046, + "loss": 0.0055, "step": 1453 }, { "epoch": 0.46, "learning_rate": 1.7499180632971708e-05, - "loss": 0.0075, + "loss": 0.006, "step": 1454 }, { "epoch": 0.46, "learning_rate": 1.7483849297565853e-05, - "loss": 0.0104, + "loss": 0.0093, "step": 1455 }, { "epoch": 0.47, "learning_rate": 1.7468515293746826e-05, - "loss": 0.0112, + "loss": 0.0078, "step": 1456 }, { "epoch": 0.47, "learning_rate": 1.7453178637988032e-05, - "loss": 0.0077, + "loss": 0.0074, "step": 1457 }, { "epoch": 0.47, "learning_rate": 1.743783934676572e-05, - "loss": 0.022, + "loss": 0.0232, "step": 1458 }, { "epoch": 0.47, "learning_rate": 1.742249743655899e-05, - "loss": 0.0073, + "loss": 0.0066, "step": 1459 }, { "epoch": 0.47, "learning_rate": 1.7407152923849725e-05, - "loss": 0.01, + "loss": 0.0087, "step": 1460 }, { "epoch": 0.47, "learning_rate": 1.739180582512263e-05, - "loss": 0.0058, + "loss": 0.0054, "step": 1461 }, { "epoch": 0.47, "learning_rate": 1.7376456156865174e-05, - "loss": 0.0091, + "loss": 0.0088, "step": 1462 }, { "epoch": 0.47, "learning_rate": 1.7361103935567596e-05, - "loss": 0.0101, + "loss": 0.0113, "step": 1463 }, { "epoch": 0.47, "learning_rate": 1.7345749177722874e-05, - "loss": 0.0114, + "loss": 0.0118, "step": 1464 }, { "epoch": 0.47, "learning_rate": 1.7330391899826706e-05, - "loss": 0.0066, + "loss": 0.0077, "step": 1465 }, { "epoch": 0.47, "learning_rate": 1.7315032118377503e-05, - "loss": 0.0053, + "loss": 0.005, "step": 1466 }, { "epoch": 0.47, "learning_rate": 1.7299669849876367e-05, - "loss": 0.0124, + "loss": 0.0057, "step": 1467 }, { "epoch": 0.47, "learning_rate": 1.7284305110827065e-05, - "loss": 0.0051, + "loss": 0.0055, "step": 1468 }, { "epoch": 0.47, "learning_rate": 1.7268937917736025e-05, - "loss": 0.006, + "loss": 0.0057, "step": 1469 }, { "epoch": 0.47, "learning_rate": 1.7253568287112303e-05, - "loss": 0.0084, + "loss": 0.0108, "step": 1470 }, { "epoch": 0.47, "learning_rate": 1.7238196235467583e-05, - "loss": 0.0065, + "loss": 0.0064, "step": 1471 }, { "epoch": 0.47, "learning_rate": 1.7222821779316147e-05, - "loss": 0.0049, + "loss": 0.0041, "step": 1472 }, { "epoch": 0.47, "learning_rate": 1.7207444935174858e-05, - "loss": 0.0027, + "loss": 0.0083, "step": 1473 }, { "epoch": 0.47, "learning_rate": 1.7192065719563144e-05, - "loss": 0.0067, + "loss": 0.006, "step": 1474 }, { "epoch": 0.47, "learning_rate": 1.717668414900298e-05, - "loss": 0.0094, + "loss": 0.0087, "step": 1475 }, { "epoch": 0.47, "learning_rate": 1.716130024001888e-05, - "loss": 0.0099, + "loss": 0.0098, "step": 1476 }, { "epoch": 0.47, "learning_rate": 1.714591400913786e-05, - "loss": 0.0066, + "loss": 0.0059, "step": 1477 }, { "epoch": 0.47, "learning_rate": 1.7130525472889434e-05, - "loss": 0.0074, + "loss": 0.0045, "step": 1478 }, { "epoch": 0.47, "learning_rate": 1.7115134647805585e-05, - "loss": 0.0149, + "loss": 0.01, "step": 1479 }, { "epoch": 0.47, "learning_rate": 1.7099741550420776e-05, - "loss": 0.0131, + "loss": 0.0111, "step": 1480 }, { "epoch": 0.47, "learning_rate": 1.7084346197271894e-05, - "loss": 0.0078, + "loss": 0.0079, "step": 1481 }, { "epoch": 0.47, "learning_rate": 1.706894860489825e-05, - "loss": 0.0064, + "loss": 0.0071, "step": 1482 }, { "epoch": 0.47, "learning_rate": 1.7053548789841557e-05, - "loss": 0.0062, + "loss": 0.0052, "step": 1483 }, { "epoch": 0.47, "learning_rate": 1.703814676864593e-05, - "loss": 0.0101, + "loss": 0.0058, "step": 1484 }, { "epoch": 0.47, "learning_rate": 1.702274255785785e-05, - "loss": 0.0072, + "loss": 0.0102, "step": 1485 }, { "epoch": 0.47, "learning_rate": 1.7007336174026135e-05, - "loss": 0.0068, + "loss": 0.0128, "step": 1486 }, { "epoch": 0.47, "learning_rate": 1.699192763370196e-05, - "loss": 0.0052, + "loss": 0.0053, "step": 1487 }, { "epoch": 0.48, "learning_rate": 1.6976516953438798e-05, - "loss": 0.011, + "loss": 0.0119, "step": 1488 }, { "epoch": 0.48, "learning_rate": 1.6961104149792437e-05, - "loss": 0.0081, + "loss": 0.0079, "step": 1489 }, { "epoch": 0.48, "learning_rate": 1.6945689239320936e-05, - "loss": 0.0093, + "loss": 0.0133, "step": 1490 }, { "epoch": 0.48, "learning_rate": 1.6930272238584613e-05, - "loss": 0.0093, + "loss": 0.0112, "step": 1491 }, { "epoch": 0.48, "learning_rate": 1.691485316414604e-05, - "loss": 0.0069, + "loss": 0.0076, "step": 1492 }, { @@ -8985,115 +8985,115 @@ { "epoch": 0.48, "learning_rate": 1.688400886042356e-05, - "loss": 0.014, + "loss": 0.0154, "step": 1494 }, { "epoch": 0.48, "learning_rate": 1.6868583664275855e-05, - "loss": 0.0089, + "loss": 0.0092, "step": 1495 }, { "epoch": 0.48, "learning_rate": 1.6853156460698282e-05, - "loss": 0.0115, + "loss": 0.0095, "step": 1496 }, { "epoch": 0.48, "learning_rate": 1.6837727266264377e-05, - "loss": 0.0039, + "loss": 0.0048, "step": 1497 }, { "epoch": 0.48, "learning_rate": 1.6822296097549802e-05, - "loss": 0.011, + "loss": 0.0094, "step": 1498 }, { "epoch": 0.48, "learning_rate": 1.680686297113235e-05, - "loss": 0.0052, + "loss": 0.0058, "step": 1499 }, { "epoch": 0.48, "learning_rate": 1.6791427903591916e-05, - "loss": 0.0084, + "loss": 0.0069, "step": 1500 }, { "epoch": 0.48, "learning_rate": 1.6775990911510473e-05, - "loss": 0.01, + "loss": 0.011, "step": 1501 }, { "epoch": 0.48, "learning_rate": 1.6760552011472077e-05, - "loss": 0.0091, + "loss": 0.0097, "step": 1502 }, { "epoch": 0.48, "learning_rate": 1.674511122006283e-05, - "loss": 0.0062, + "loss": 0.0069, "step": 1503 }, { "epoch": 0.48, "learning_rate": 1.6729668553870845e-05, - "loss": 0.0093, + "loss": 0.0083, "step": 1504 }, { "epoch": 0.48, "learning_rate": 1.671422402948627e-05, - "loss": 0.0046, + "loss": 0.0062, "step": 1505 }, { "epoch": 0.48, "learning_rate": 1.669877766350124e-05, - "loss": 0.0178, + "loss": 0.0151, "step": 1506 }, { "epoch": 0.48, "learning_rate": 1.6683329472509885e-05, - "loss": 0.0069, + "loss": 0.0066, "step": 1507 }, { "epoch": 0.48, "learning_rate": 1.6667879473108277e-05, - "loss": 0.007, + "loss": 0.0068, "step": 1508 }, { "epoch": 0.48, "learning_rate": 1.6652427681894432e-05, - "loss": 0.0086, + "loss": 0.0132, "step": 1509 }, { "epoch": 0.48, "learning_rate": 1.66369741154683e-05, - "loss": 0.0062, + "loss": 0.0073, "step": 1510 }, { "epoch": 0.48, "learning_rate": 1.6621518790431728e-05, - "loss": 0.0029, + "loss": 0.003, "step": 1511 }, { "epoch": 0.48, "learning_rate": 1.6606061723388465e-05, - "loss": 0.007, + "loss": 0.009, "step": 1512 }, { @@ -9105,25 +9105,25 @@ { "epoch": 0.48, "learning_rate": 1.6575142429706165e-05, - "loss": 0.0062, + "loss": 0.01, "step": 1514 }, { "epoch": 0.48, "learning_rate": 1.655968023628389e-05, - "loss": 0.0033, + "loss": 0.0064, "step": 1515 }, { "epoch": 0.48, "learning_rate": 1.6544216367288435e-05, - "loss": 0.0121, + "loss": 0.0116, "step": 1516 }, { "epoch": 0.48, "learning_rate": 1.652875083933271e-05, - "loss": 0.0038, + "loss": 0.0044, "step": 1517 }, { @@ -9135,393 +9135,393 @@ { "epoch": 0.49, "learning_rate": 1.6497814873001027e-05, - "loss": 0.0043, + "loss": 0.0047, "step": 1519 }, { "epoch": 0.49, "learning_rate": 1.6482344467859753e-05, - "loss": 0.0117, + "loss": 0.011, "step": 1520 }, { "epoch": 0.49, "learning_rate": 1.6466872470227535e-05, - "loss": 0.0057, + "loss": 0.0085, "step": 1521 }, { "epoch": 0.49, "learning_rate": 1.6451398896726033e-05, - "loss": 0.0058, + "loss": 0.0108, "step": 1522 }, { "epoch": 0.49, "learning_rate": 1.6435923763978584e-05, - "loss": 0.0053, + "loss": 0.0095, "step": 1523 }, { "epoch": 0.49, "learning_rate": 1.642044708861022e-05, - "loss": 0.011, + "loss": 0.0106, "step": 1524 }, { "epoch": 0.49, "learning_rate": 1.640496888724761e-05, - "loss": 0.0109, + "loss": 0.0102, "step": 1525 }, { "epoch": 0.49, "learning_rate": 1.6389489176519084e-05, - "loss": 0.0065, + "loss": 0.0074, "step": 1526 }, { "epoch": 0.49, "learning_rate": 1.637400797305457e-05, - "loss": 0.0068, + "loss": 0.0061, "step": 1527 }, { "epoch": 0.49, "learning_rate": 1.635852529348561e-05, - "loss": 0.0056, + "loss": 0.0057, "step": 1528 }, { "epoch": 0.49, "learning_rate": 1.6343041154445343e-05, - "loss": 0.0091, + "loss": 0.0106, "step": 1529 }, { "epoch": 0.49, "learning_rate": 1.6327555572568465e-05, - "loss": 0.0044, + "loss": 0.0051, "step": 1530 }, { "epoch": 0.49, "learning_rate": 1.631206856449122e-05, - "loss": 0.0101, + "loss": 0.0098, "step": 1531 }, { "epoch": 0.49, "learning_rate": 1.6296580146851384e-05, - "loss": 0.0104, + "loss": 0.0086, "step": 1532 }, { "epoch": 0.49, "learning_rate": 1.6281090336288257e-05, - "loss": 0.0053, + "loss": 0.0063, "step": 1533 }, { "epoch": 0.49, "learning_rate": 1.626559914944262e-05, - "loss": 0.0077, + "loss": 0.0049, "step": 1534 }, { "epoch": 0.49, "learning_rate": 1.6250106602956753e-05, - "loss": 0.0082, + "loss": 0.008, "step": 1535 }, { "epoch": 0.49, "learning_rate": 1.6234612713474383e-05, - "loss": 0.0077, + "loss": 0.0058, "step": 1536 }, { "epoch": 0.49, "learning_rate": 1.621911749764067e-05, - "loss": 0.0094, + "loss": 0.0108, "step": 1537 }, { "epoch": 0.49, "learning_rate": 1.6203620972102224e-05, - "loss": 0.0074, + "loss": 0.0071, "step": 1538 }, { "epoch": 0.49, "learning_rate": 1.6188123153507044e-05, - "loss": 0.0093, + "loss": 0.0099, "step": 1539 }, { "epoch": 0.49, "learning_rate": 1.6172624058504526e-05, - "loss": 0.0119, + "loss": 0.0105, "step": 1540 }, { "epoch": 0.49, "learning_rate": 1.6157123703745433e-05, - "loss": 0.0064, + "loss": 0.0058, "step": 1541 }, { "epoch": 0.49, "learning_rate": 1.614162210588188e-05, - "loss": 0.01, + "loss": 0.0082, "step": 1542 }, { "epoch": 0.49, "learning_rate": 1.6126119281567324e-05, - "loss": 0.0065, + "loss": 0.0056, "step": 1543 }, { "epoch": 0.49, "learning_rate": 1.611061524745653e-05, - "loss": 0.0083, + "loss": 0.0087, "step": 1544 }, { "epoch": 0.49, "learning_rate": 1.609511002020557e-05, - "loss": 0.0071, + "loss": 0.0054, "step": 1545 }, { "epoch": 0.49, "learning_rate": 1.60796036164718e-05, - "loss": 0.0026, + "loss": 0.0027, "step": 1546 }, { "epoch": 0.49, "learning_rate": 1.606409605291384e-05, - "loss": 0.0057, + "loss": 0.0052, "step": 1547 }, { "epoch": 0.49, "learning_rate": 1.6048587346191543e-05, - "loss": 0.0075, + "loss": 0.0084, "step": 1548 }, { "epoch": 0.49, "learning_rate": 1.6033077512966003e-05, - "loss": 0.0055, + "loss": 0.0069, "step": 1549 }, { "epoch": 0.5, "learning_rate": 1.601756656989952e-05, - "loss": 0.0058, + "loss": 0.0055, "step": 1550 }, { "epoch": 0.5, "learning_rate": 1.6002054533655583e-05, - "loss": 0.0114, + "loss": 0.0142, "step": 1551 }, { "epoch": 0.5, "learning_rate": 1.5986541420898873e-05, - "loss": 0.0048, + "loss": 0.0047, "step": 1552 }, { "epoch": 0.5, "learning_rate": 1.5971027248295197e-05, - "loss": 0.0071, + "loss": 0.0087, "step": 1553 }, { "epoch": 0.5, "learning_rate": 1.5955512032511526e-05, - "loss": 0.0084, + "loss": 0.0061, "step": 1554 }, { "epoch": 0.5, "learning_rate": 1.5939995790215942e-05, - "loss": 0.0086, + "loss": 0.0068, "step": 1555 }, { "epoch": 0.5, "learning_rate": 1.5924478538077632e-05, - "loss": 0.0067, + "loss": 0.0062, "step": 1556 }, { "epoch": 0.5, "learning_rate": 1.5908960292766862e-05, - "loss": 0.0066, + "loss": 0.0092, "step": 1557 }, { "epoch": 0.5, "learning_rate": 1.5893441070954976e-05, - "loss": 0.0089, + "loss": 0.0075, "step": 1558 }, { "epoch": 0.5, "learning_rate": 1.587792088931435e-05, - "loss": 0.0057, + "loss": 0.004, "step": 1559 }, { "epoch": 0.5, "learning_rate": 1.586239976451841e-05, - "loss": 0.0099, + "loss": 0.0074, "step": 1560 }, { "epoch": 0.5, "learning_rate": 1.5846877713241592e-05, - "loss": 0.0149, + "loss": 0.0137, "step": 1561 }, { "epoch": 0.5, "learning_rate": 1.583135475215931e-05, - "loss": 0.0087, + "loss": 0.0084, "step": 1562 }, { "epoch": 0.5, "learning_rate": 1.5815830897947976e-05, - "loss": 0.011, + "loss": 0.0111, "step": 1563 }, { "epoch": 0.5, "learning_rate": 1.5800306167284953e-05, - "loss": 0.0086, + "loss": 0.008, "step": 1564 }, { "epoch": 0.5, "learning_rate": 1.5784780576848543e-05, - "loss": 0.0098, + "loss": 0.0093, "step": 1565 }, { "epoch": 0.5, "learning_rate": 1.5769254143317977e-05, - "loss": 0.0121, + "loss": 0.013, "step": 1566 }, { "epoch": 0.5, - "eval_loss": 0.01018073782324791, - "eval_runtime": 28.108, - "eval_samples_per_second": 187.598, - "eval_steps_per_second": 5.87, + "eval_loss": 0.009752409532666206, + "eval_runtime": 28.4298, + "eval_samples_per_second": 185.474, + "eval_steps_per_second": 11.608, "step": 1566 }, { "epoch": 0.5, "learning_rate": 1.5753726883373387e-05, - "loss": 0.0048, + "loss": 0.0059, "step": 1567 }, { "epoch": 0.5, "learning_rate": 1.57381988136958e-05, - "loss": 0.0064, + "loss": 0.0066, "step": 1568 }, { "epoch": 0.5, "learning_rate": 1.57226699509671e-05, - "loss": 0.0116, + "loss": 0.0118, "step": 1569 }, { "epoch": 0.5, "learning_rate": 1.5707140311870037e-05, - "loss": 0.0047, + "loss": 0.0034, "step": 1570 }, { "epoch": 0.5, "learning_rate": 1.5691609913088187e-05, - "loss": 0.01, + "loss": 0.0113, "step": 1571 }, { "epoch": 0.5, "learning_rate": 1.567607877130594e-05, - "loss": 0.0117, + "loss": 0.0088, "step": 1572 }, { "epoch": 0.5, "learning_rate": 1.5660546903208494e-05, - "loss": 0.0128, + "loss": 0.0146, "step": 1573 }, { "epoch": 0.5, "learning_rate": 1.5645014325481824e-05, - "loss": 0.0054, + "loss": 0.0072, "step": 1574 }, { "epoch": 0.5, "learning_rate": 1.562948105481266e-05, - "loss": 0.0076, + "loss": 0.005, "step": 1575 }, { "epoch": 0.5, "learning_rate": 1.5613947107888475e-05, - "loss": 0.0173, + "loss": 0.022, "step": 1576 }, { "epoch": 0.5, "learning_rate": 1.559841250139749e-05, - "loss": 0.0077, + "loss": 0.0071, "step": 1577 }, { "epoch": 0.5, "learning_rate": 1.5582877252028616e-05, - "loss": 0.009, + "loss": 0.0093, "step": 1578 }, { "epoch": 0.5, "learning_rate": 1.556734137647145e-05, - "loss": 0.0071, + "loss": 0.0063, "step": 1579 }, { "epoch": 0.5, "learning_rate": 1.5551804891416274e-05, - "loss": 0.0044, + "loss": 0.0041, "step": 1580 }, { "epoch": 0.5, "learning_rate": 1.5536267813554023e-05, - "loss": 0.0085, + "loss": 0.0097, "step": 1581 }, { "epoch": 0.51, "learning_rate": 1.5520730159576265e-05, - "loss": 0.0065, + "loss": 0.0075, "step": 1582 }, { @@ -9533,307 +9533,307 @@ { "epoch": 0.51, "learning_rate": 1.5489653190043578e-05, - "loss": 0.0046, + "loss": 0.0045, "step": 1584 }, { "epoch": 0.51, "learning_rate": 1.5474113907874806e-05, - "loss": 0.0079, + "loss": 0.0067, "step": 1585 }, { "epoch": 0.51, "learning_rate": 1.5458574116362825e-05, - "loss": 0.0157, + "loss": 0.0179, "step": 1586 }, { "epoch": 0.51, "learning_rate": 1.5443033832202106e-05, - "loss": 0.0098, + "loss": 0.0075, "step": 1587 }, { "epoch": 0.51, "learning_rate": 1.5427493072087662e-05, - "loss": 0.0051, + "loss": 0.004, "step": 1588 }, { "epoch": 0.51, "learning_rate": 1.541195185271503e-05, - "loss": 0.0073, + "loss": 0.0075, "step": 1589 }, { "epoch": 0.51, "learning_rate": 1.5396410190780217e-05, - "loss": 0.0037, + "loss": 0.0036, "step": 1590 }, { "epoch": 0.51, "learning_rate": 1.538086810297972e-05, - "loss": 0.0096, + "loss": 0.0052, "step": 1591 }, { "epoch": 0.51, "learning_rate": 1.5365325606010496e-05, - "loss": 0.0121, + "loss": 0.0117, "step": 1592 }, { "epoch": 0.51, "learning_rate": 1.5349782716569935e-05, - "loss": 0.0078, + "loss": 0.0079, "step": 1593 }, { "epoch": 0.51, "learning_rate": 1.5334239451355844e-05, - "loss": 0.0052, + "loss": 0.0049, "step": 1594 }, { "epoch": 0.51, "learning_rate": 1.5318695827066445e-05, - "loss": 0.0038, + "loss": 0.0036, "step": 1595 }, { "epoch": 0.51, "learning_rate": 1.530315186040034e-05, - "loss": 0.009, + "loss": 0.0098, "step": 1596 }, { "epoch": 0.51, "learning_rate": 1.5287607568056498e-05, - "loss": 0.0073, + "loss": 0.0076, "step": 1597 }, { "epoch": 0.51, "learning_rate": 1.5272062966734232e-05, - "loss": 0.0044, + "loss": 0.0047, "step": 1598 }, { "epoch": 0.51, "learning_rate": 1.5256518073133209e-05, - "loss": 0.0134, + "loss": 0.0115, "step": 1599 }, { "epoch": 0.51, "learning_rate": 1.5240972903953384e-05, - "loss": 0.0087, + "loss": 0.0076, "step": 1600 }, { "epoch": 0.51, "learning_rate": 1.5225427475895025e-05, - "loss": 0.0065, + "loss": 0.0062, "step": 1601 }, { "epoch": 0.51, "learning_rate": 1.520988180565867e-05, - "loss": 0.0142, + "loss": 0.0122, "step": 1602 }, { "epoch": 0.51, "learning_rate": 1.5194335909945112e-05, - "loss": 0.0066, + "loss": 0.0092, "step": 1603 }, { "epoch": 0.51, "learning_rate": 1.5178789805455408e-05, - "loss": 0.0038, + "loss": 0.0025, "step": 1604 }, { "epoch": 0.51, "learning_rate": 1.5163243508890813e-05, - "loss": 0.0053, + "loss": 0.0051, "step": 1605 }, { "epoch": 0.51, "learning_rate": 1.5147697036952808e-05, - "loss": 0.0115, + "loss": 0.0109, "step": 1606 }, { "epoch": 0.51, "learning_rate": 1.5132150406343045e-05, - "loss": 0.0091, + "loss": 0.0107, "step": 1607 }, { "epoch": 0.51, "learning_rate": 1.5116603633763372e-05, - "loss": 0.0031, + "loss": 0.0035, "step": 1608 }, { "epoch": 0.51, "learning_rate": 1.5101056735915758e-05, - "loss": 0.0093, + "loss": 0.0075, "step": 1609 }, { "epoch": 0.51, "learning_rate": 1.5085509729502328e-05, - "loss": 0.0059, + "loss": 0.0073, "step": 1610 }, { "epoch": 0.51, "learning_rate": 1.5069962631225321e-05, - "loss": 0.0038, + "loss": 0.0035, "step": 1611 }, { "epoch": 0.51, "learning_rate": 1.5054415457787065e-05, - "loss": 0.0109, + "loss": 0.0089, "step": 1612 }, { "epoch": 0.52, "learning_rate": 1.5038868225889979e-05, - "loss": 0.0111, + "loss": 0.0108, "step": 1613 }, { "epoch": 0.52, "learning_rate": 1.5023320952236542e-05, - "loss": 0.0102, + "loss": 0.0097, "step": 1614 }, { "epoch": 0.52, "learning_rate": 1.5007773653529277e-05, - "loss": 0.0094, + "loss": 0.01, "step": 1615 }, { "epoch": 0.52, "learning_rate": 1.4992226346470727e-05, - "loss": 0.004, + "loss": 0.0049, "step": 1616 }, { "epoch": 0.52, "learning_rate": 1.4976679047763464e-05, - "loss": 0.0113, + "loss": 0.0122, "step": 1617 }, { "epoch": 0.52, "learning_rate": 1.4961131774110024e-05, - "loss": 0.0103, + "loss": 0.0123, "step": 1618 }, { "epoch": 0.52, "learning_rate": 1.4945584542212936e-05, - "loss": 0.0078, + "loss": 0.0075, "step": 1619 }, { "epoch": 0.52, "learning_rate": 1.4930037368774686e-05, - "loss": 0.0082, + "loss": 0.0083, "step": 1620 }, { "epoch": 0.52, "learning_rate": 1.4914490270497673e-05, - "loss": 0.0048, + "loss": 0.0043, "step": 1621 }, { "epoch": 0.52, "learning_rate": 1.4898943264084248e-05, - "loss": 0.0115, + "loss": 0.0161, "step": 1622 }, { "epoch": 0.52, "learning_rate": 1.4883396366236632e-05, - "loss": 0.0108, + "loss": 0.0103, "step": 1623 }, { "epoch": 0.52, "learning_rate": 1.4867849593656954e-05, - "loss": 0.0068, + "loss": 0.0089, "step": 1624 }, { "epoch": 0.52, "learning_rate": 1.48523029630472e-05, - "loss": 0.0041, + "loss": 0.0038, "step": 1625 }, { "epoch": 0.52, "learning_rate": 1.4836756491109189e-05, - "loss": 0.0095, + "loss": 0.0064, "step": 1626 }, { "epoch": 0.52, "learning_rate": 1.4821210194544593e-05, - "loss": 0.0059, + "loss": 0.0062, "step": 1627 }, { "epoch": 0.52, "learning_rate": 1.480566409005489e-05, - "loss": 0.0092, + "loss": 0.009, "step": 1628 }, { "epoch": 0.52, "learning_rate": 1.4790118194341334e-05, - "loss": 0.0054, + "loss": 0.0061, "step": 1629 }, { "epoch": 0.52, "learning_rate": 1.477457252410498e-05, - "loss": 0.0021, + "loss": 0.0027, "step": 1630 }, { "epoch": 0.52, "learning_rate": 1.4759027096046617e-05, - "loss": 0.0061, + "loss": 0.0066, "step": 1631 }, { "epoch": 0.52, "learning_rate": 1.474348192686679e-05, - "loss": 0.011, + "loss": 0.0083, "step": 1632 }, { "epoch": 0.52, "learning_rate": 1.4727937033265768e-05, - "loss": 0.0074, + "loss": 0.0078, "step": 1633 }, { "epoch": 0.52, "learning_rate": 1.4712392431943506e-05, - "loss": 0.0039, + "loss": 0.0033, "step": 1634 }, { @@ -9845,139 +9845,139 @@ { "epoch": 0.52, "learning_rate": 1.4681304172933557e-05, - "loss": 0.003, + "loss": 0.0033, "step": 1636 }, { "epoch": 0.52, "learning_rate": 1.4665760548644158e-05, - "loss": 0.0156, + "loss": 0.0143, "step": 1637 }, { "epoch": 0.52, "learning_rate": 1.465021728343007e-05, - "loss": 0.0055, + "loss": 0.006, "step": 1638 }, { "epoch": 0.52, "learning_rate": 1.4634674393989504e-05, - "loss": 0.0067, + "loss": 0.0066, "step": 1639 }, { "epoch": 0.52, "learning_rate": 1.4619131897020278e-05, - "loss": 0.0082, + "loss": 0.0063, "step": 1640 }, { "epoch": 0.52, "learning_rate": 1.4603589809219789e-05, - "loss": 0.0039, + "loss": 0.0036, "step": 1641 }, { "epoch": 0.52, "learning_rate": 1.4588048147284973e-05, - "loss": 0.0098, + "loss": 0.0102, "step": 1642 }, { "epoch": 0.52, "learning_rate": 1.4572506927912342e-05, - "loss": 0.012, + "loss": 0.0109, "step": 1643 }, { "epoch": 0.53, "learning_rate": 1.4556966167797898e-05, - "loss": 0.0044, + "loss": 0.0045, "step": 1644 }, { "epoch": 0.53, "learning_rate": 1.4541425883637176e-05, - "loss": 0.0053, + "loss": 0.0064, "step": 1645 }, { "epoch": 0.53, "learning_rate": 1.4525886092125195e-05, - "loss": 0.0097, + "loss": 0.0091, "step": 1646 }, { "epoch": 0.53, "learning_rate": 1.4510346809956426e-05, - "loss": 0.0094, + "loss": 0.0048, "step": 1647 }, { "epoch": 0.53, "learning_rate": 1.4494808053824821e-05, - "loss": 0.0036, + "loss": 0.003, "step": 1648 }, { "epoch": 0.53, "learning_rate": 1.447926984042374e-05, - "loss": 0.0082, + "loss": 0.0086, "step": 1649 }, { "epoch": 0.53, "learning_rate": 1.446373218644598e-05, - "loss": 0.009, + "loss": 0.006, "step": 1650 }, { "epoch": 0.53, "learning_rate": 1.4448195108583728e-05, - "loss": 0.0099, + "loss": 0.0167, "step": 1651 }, { "epoch": 0.53, "learning_rate": 1.4432658623528554e-05, - "loss": 0.0122, + "loss": 0.0111, "step": 1652 }, { "epoch": 0.53, "learning_rate": 1.4417122747971388e-05, - "loss": 0.0102, + "loss": 0.0116, "step": 1653 }, { "epoch": 0.53, "learning_rate": 1.440158749860251e-05, - "loss": 0.0109, + "loss": 0.0133, "step": 1654 }, { "epoch": 0.53, "learning_rate": 1.4386052892111522e-05, - "loss": 0.0054, + "loss": 0.01, "step": 1655 }, { "epoch": 0.53, "learning_rate": 1.4370518945187349e-05, - "loss": 0.0129, + "loss": 0.0115, "step": 1656 }, { "epoch": 0.53, "learning_rate": 1.435498567451818e-05, - "loss": 0.0046, + "loss": 0.0055, "step": 1657 }, { "epoch": 0.53, "learning_rate": 1.4339453096791505e-05, - "loss": 0.0071, + "loss": 0.008, "step": 1658 }, { @@ -9989,97 +9989,97 @@ { "epoch": 0.53, "learning_rate": 1.4308390086911819e-05, - "loss": 0.0066, + "loss": 0.0049, "step": 1660 }, { "epoch": 0.53, "learning_rate": 1.429285968812997e-05, - "loss": 0.0121, + "loss": 0.0117, "step": 1661 }, { "epoch": 0.53, "learning_rate": 1.4277330049032904e-05, - "loss": 0.0057, + "loss": 0.0063, "step": 1662 }, { "epoch": 0.53, "learning_rate": 1.4261801186304205e-05, - "loss": 0.0076, + "loss": 0.0071, "step": 1663 }, { "epoch": 0.53, "learning_rate": 1.4246273116626615e-05, - "loss": 0.005, + "loss": 0.0053, "step": 1664 }, { "epoch": 0.53, "learning_rate": 1.4230745856682027e-05, - "loss": 0.0119, + "loss": 0.0141, "step": 1665 }, { "epoch": 0.53, "learning_rate": 1.421521942315146e-05, - "loss": 0.0042, + "loss": 0.004, "step": 1666 }, { "epoch": 0.53, "learning_rate": 1.4199693832715051e-05, - "loss": 0.0083, + "loss": 0.0087, "step": 1667 }, { "epoch": 0.53, "learning_rate": 1.4184169102052023e-05, - "loss": 0.0052, + "loss": 0.0065, "step": 1668 }, { "epoch": 0.53, "learning_rate": 1.4168645247840695e-05, - "loss": 0.0065, + "loss": 0.006, "step": 1669 }, { "epoch": 0.53, "learning_rate": 1.4153122286758414e-05, - "loss": 0.005, + "loss": 0.0054, "step": 1670 }, { "epoch": 0.53, "learning_rate": 1.4137600235481588e-05, - "loss": 0.0072, + "loss": 0.0071, "step": 1671 }, { "epoch": 0.53, "learning_rate": 1.4122079110685655e-05, - "loss": 0.0047, + "loss": 0.005, "step": 1672 }, { "epoch": 0.53, "learning_rate": 1.4106558929045028e-05, - "loss": 0.0054, + "loss": 0.0052, "step": 1673 }, { "epoch": 0.53, "learning_rate": 1.4091039707233144e-05, - "loss": 0.007, + "loss": 0.0075, "step": 1674 }, { "epoch": 0.53, "learning_rate": 1.4075521461922372e-05, - "loss": 0.0022, + "loss": 0.0021, "step": 1675 }, { @@ -10091,145 +10091,145 @@ { "epoch": 0.54, "learning_rate": 1.4044487967488475e-05, - "loss": 0.0112, + "loss": 0.0107, "step": 1677 }, { "epoch": 0.54, "learning_rate": 1.4028972751704806e-05, - "loss": 0.0052, + "loss": 0.0041, "step": 1678 }, { "epoch": 0.54, "learning_rate": 1.4013458579101131e-05, - "loss": 0.006, + "loss": 0.0059, "step": 1679 }, { "epoch": 0.54, "learning_rate": 1.3997945466344416e-05, - "loss": 0.0094, + "loss": 0.0096, "step": 1680 }, { "epoch": 0.54, "learning_rate": 1.3982433430100481e-05, - "loss": 0.0043, + "loss": 0.0024, "step": 1681 }, { "epoch": 0.54, "learning_rate": 1.3966922487034004e-05, - "loss": 0.007, + "loss": 0.0073, "step": 1682 }, { "epoch": 0.54, "learning_rate": 1.395141265380846e-05, - "loss": 0.0075, + "loss": 0.0078, "step": 1683 }, { "epoch": 0.54, "learning_rate": 1.393590394708616e-05, - "loss": 0.0078, + "loss": 0.0071, "step": 1684 }, { "epoch": 0.54, "learning_rate": 1.39203963835282e-05, - "loss": 0.0088, + "loss": 0.0107, "step": 1685 }, { "epoch": 0.54, "learning_rate": 1.3904889979794428e-05, - "loss": 0.0036, + "loss": 0.0069, "step": 1686 }, { "epoch": 0.54, "learning_rate": 1.3889384752543477e-05, - "loss": 0.0053, + "loss": 0.0048, "step": 1687 }, { "epoch": 0.54, "learning_rate": 1.3873880718432681e-05, - "loss": 0.0036, + "loss": 0.003, "step": 1688 }, { "epoch": 0.54, "learning_rate": 1.385837789411812e-05, - "loss": 0.0183, + "loss": 0.0152, "step": 1689 }, { "epoch": 0.54, "learning_rate": 1.3842876296254571e-05, - "loss": 0.0036, + "loss": 0.0034, "step": 1690 }, { "epoch": 0.54, "learning_rate": 1.3827375941495475e-05, - "loss": 0.011, + "loss": 0.0104, "step": 1691 }, { "epoch": 0.54, "learning_rate": 1.3811876846492955e-05, - "loss": 0.0023, + "loss": 0.0025, "step": 1692 }, { "epoch": 0.54, "learning_rate": 1.3796379027897777e-05, - "loss": 0.0138, + "loss": 0.0113, "step": 1693 }, { "epoch": 0.54, "learning_rate": 1.378088250235933e-05, - "loss": 0.0054, + "loss": 0.005, "step": 1694 }, { "epoch": 0.54, "learning_rate": 1.3765387286525625e-05, - "loss": 0.009, + "loss": 0.0098, "step": 1695 }, { "epoch": 0.54, "learning_rate": 1.3749893397043248e-05, - "loss": 0.0046, + "loss": 0.0044, "step": 1696 }, { "epoch": 0.54, "learning_rate": 1.3734400850557384e-05, - "loss": 0.0086, + "loss": 0.0091, "step": 1697 }, { "epoch": 0.54, "learning_rate": 1.3718909663711749e-05, - "loss": 0.0023, + "loss": 0.0024, "step": 1698 }, { "epoch": 0.54, "learning_rate": 1.3703419853148617e-05, - "loss": 0.009, + "loss": 0.0095, "step": 1699 }, { "epoch": 0.54, "learning_rate": 1.3687931435508788e-05, - "loss": 0.008, + "loss": 0.0078, "step": 1700 }, { @@ -10241,79 +10241,79 @@ { "epoch": 0.54, "learning_rate": 1.3656958845554653e-05, - "loss": 0.007, + "loss": 0.0075, "step": 1702 }, { "epoch": 0.54, "learning_rate": 1.3641474706514393e-05, - "loss": 0.0075, + "loss": 0.0069, "step": 1703 }, { "epoch": 0.54, "learning_rate": 1.3625992026945434e-05, - "loss": 0.0081, + "loss": 0.0054, "step": 1704 }, { "epoch": 0.54, "learning_rate": 1.3610510823480924e-05, - "loss": 0.0077, + "loss": 0.0066, "step": 1705 }, { "epoch": 0.54, "learning_rate": 1.3595031112752394e-05, - "loss": 0.0094, + "loss": 0.009, "step": 1706 }, { "epoch": 0.55, "learning_rate": 1.3579552911389782e-05, - "loss": 0.0051, + "loss": 0.0042, "step": 1707 }, { "epoch": 0.55, "learning_rate": 1.356407623602142e-05, - "loss": 0.0127, + "loss": 0.0141, "step": 1708 }, { "epoch": 0.55, "learning_rate": 1.3548601103273969e-05, - "loss": 0.0044, + "loss": 0.0043, "step": 1709 }, { "epoch": 0.55, "learning_rate": 1.3533127529772469e-05, - "loss": 0.0071, + "loss": 0.0075, "step": 1710 }, { "epoch": 0.55, "learning_rate": 1.3517655532140251e-05, - "loss": 0.0053, + "loss": 0.0055, "step": 1711 }, { "epoch": 0.55, "learning_rate": 1.3502185126998973e-05, - "loss": 0.0057, + "loss": 0.0064, "step": 1712 }, { "epoch": 0.55, "learning_rate": 1.3486716330968586e-05, - "loss": 0.0088, + "loss": 0.0085, "step": 1713 }, { "epoch": 0.55, "learning_rate": 1.3471249160667291e-05, - "loss": 0.0078, + "loss": 0.0129, "step": 1714 }, { @@ -10325,67 +10325,67 @@ { "epoch": 0.55, "learning_rate": 1.3440319763716108e-05, - "loss": 0.0072, + "loss": 0.0078, "step": 1716 }, { "epoch": 0.55, "learning_rate": 1.3424857570293837e-05, - "loss": 0.0049, + "loss": 0.0053, "step": 1717 }, { "epoch": 0.55, "learning_rate": 1.3409397069055885e-05, - "loss": 0.0051, + "loss": 0.0049, "step": 1718 }, { "epoch": 0.55, "learning_rate": 1.3393938276611538e-05, - "loss": 0.0077, + "loss": 0.0054, "step": 1719 }, { "epoch": 0.55, "learning_rate": 1.3378481209568276e-05, - "loss": 0.0025, + "loss": 0.0032, "step": 1720 }, { "epoch": 0.55, "learning_rate": 1.3363025884531705e-05, - "loss": 0.0109, + "loss": 0.0088, "step": 1721 }, { "epoch": 0.55, "learning_rate": 1.3347572318105572e-05, - "loss": 0.0035, + "loss": 0.0047, "step": 1722 }, { "epoch": 0.55, "learning_rate": 1.3332120526891729e-05, - "loss": 0.0027, + "loss": 0.0035, "step": 1723 }, { "epoch": 0.55, "learning_rate": 1.3316670527490116e-05, - "loss": 0.0081, + "loss": 0.0069, "step": 1724 }, { "epoch": 0.55, "learning_rate": 1.3301222336498755e-05, - "loss": 0.0055, + "loss": 0.0061, "step": 1725 }, { "epoch": 0.55, "learning_rate": 1.3285775970513738e-05, - "loss": 0.0065, + "loss": 0.006, "step": 1726 }, { @@ -10397,31 +10397,31 @@ { "epoch": 0.55, "learning_rate": 1.3254888779937178e-05, - "loss": 0.0096, + "loss": 0.0094, "step": 1728 }, { "epoch": 0.55, "learning_rate": 1.3239447988527922e-05, - "loss": 0.005, + "loss": 0.0048, "step": 1729 }, { "epoch": 0.55, "learning_rate": 1.3224009088489526e-05, - "loss": 0.0117, + "loss": 0.0048, "step": 1730 }, { "epoch": 0.55, "learning_rate": 1.3208572096408092e-05, - "loss": 0.0085, + "loss": 0.0084, "step": 1731 }, { "epoch": 0.55, "learning_rate": 1.3193137028867654e-05, - "loss": 0.0076, + "loss": 0.0068, "step": 1732 }, { @@ -10433,223 +10433,223 @@ { "epoch": 0.55, "learning_rate": 1.3162272733735627e-05, - "loss": 0.0094, + "loss": 0.0105, "step": 1734 }, { "epoch": 0.55, "learning_rate": 1.3146843539301719e-05, - "loss": 0.009, + "loss": 0.0089, "step": 1735 }, { "epoch": 0.55, "learning_rate": 1.3131416335724148e-05, - "loss": 0.0042, + "loss": 0.0038, "step": 1736 }, { "epoch": 0.55, "learning_rate": 1.3115991139576444e-05, - "loss": 0.0072, + "loss": 0.0083, "step": 1737 }, { "epoch": 0.56, "learning_rate": 1.310056796742998e-05, - "loss": 0.0095, + "loss": 0.0083, "step": 1738 }, { "epoch": 0.56, "learning_rate": 1.3085146835853963e-05, - "loss": 0.0126, + "loss": 0.0106, "step": 1739 }, { "epoch": 0.56, "learning_rate": 1.3069727761415389e-05, - "loss": 0.0093, + "loss": 0.0088, "step": 1740 }, { "epoch": 0.56, "learning_rate": 1.3054310760679072e-05, - "loss": 0.0044, + "loss": 0.0074, "step": 1741 }, { "epoch": 0.56, "learning_rate": 1.3038895850207565e-05, - "loss": 0.0061, + "loss": 0.0067, "step": 1742 }, { "epoch": 0.56, "learning_rate": 1.3023483046561198e-05, - "loss": 0.0059, + "loss": 0.0074, "step": 1743 }, { "epoch": 0.56, "learning_rate": 1.3008072366298045e-05, - "loss": 0.0076, + "loss": 0.0064, "step": 1744 }, { "epoch": 0.56, "learning_rate": 1.2992663825973866e-05, - "loss": 0.005, + "loss": 0.0048, "step": 1745 }, { "epoch": 0.56, "learning_rate": 1.2977257442142155e-05, - "loss": 0.0067, + "loss": 0.0075, "step": 1746 }, { "epoch": 0.56, "learning_rate": 1.2961853231354071e-05, - "loss": 0.0107, + "loss": 0.0139, "step": 1747 }, { "epoch": 0.56, "learning_rate": 1.2946451210158447e-05, - "loss": 0.0101, + "loss": 0.0134, "step": 1748 }, { "epoch": 0.56, "learning_rate": 1.2931051395101755e-05, - "loss": 0.0088, + "loss": 0.0087, "step": 1749 }, { "epoch": 0.56, "learning_rate": 1.2915653802728108e-05, - "loss": 0.0046, + "loss": 0.0103, "step": 1750 }, { "epoch": 0.56, "learning_rate": 1.290025844957922e-05, - "loss": 0.0074, + "loss": 0.0066, "step": 1751 }, { "epoch": 0.56, "learning_rate": 1.2884865352194414e-05, - "loss": 0.0125, + "loss": 0.0104, "step": 1752 }, { "epoch": 0.56, "learning_rate": 1.286947452711057e-05, - "loss": 0.0054, + "loss": 0.006, "step": 1753 }, { "epoch": 0.56, "learning_rate": 1.2854085990862145e-05, - "loss": 0.0118, + "loss": 0.0104, "step": 1754 }, { "epoch": 0.56, "learning_rate": 1.2838699759981123e-05, - "loss": 0.0045, + "loss": 0.0036, "step": 1755 }, { "epoch": 0.56, "learning_rate": 1.282331585099702e-05, - "loss": 0.0065, + "loss": 0.0045, "step": 1756 }, { "epoch": 0.56, "learning_rate": 1.2807934280436862e-05, - "loss": 0.0303, + "loss": 0.023, "step": 1757 }, { "epoch": 0.56, "learning_rate": 1.2792555064825145e-05, - "loss": 0.0029, + "loss": 0.0026, "step": 1758 }, { "epoch": 0.56, "learning_rate": 1.2777178220683857e-05, - "loss": 0.0073, + "loss": 0.0083, "step": 1759 }, { "epoch": 0.56, "learning_rate": 1.2761803764532418e-05, - "loss": 0.0114, + "loss": 0.0165, "step": 1760 }, { "epoch": 0.56, "learning_rate": 1.2746431712887701e-05, - "loss": 0.0064, + "loss": 0.0055, "step": 1761 }, { "epoch": 0.56, "learning_rate": 1.273106208226398e-05, - "loss": 0.006, + "loss": 0.01, "step": 1762 }, { "epoch": 0.56, "learning_rate": 1.271569488917294e-05, - "loss": 0.0093, + "loss": 0.0119, "step": 1763 }, { "epoch": 0.56, "learning_rate": 1.2700330150123633e-05, - "loss": 0.0098, + "loss": 0.0103, "step": 1764 }, { "epoch": 0.56, "learning_rate": 1.2684967881622498e-05, - "loss": 0.005, + "loss": 0.0055, "step": 1765 }, { "epoch": 0.56, "learning_rate": 1.2669608100173293e-05, - "loss": 0.0064, + "loss": 0.0074, "step": 1766 }, { "epoch": 0.56, "learning_rate": 1.265425082227713e-05, - "loss": 0.0087, + "loss": 0.0101, "step": 1767 }, { "epoch": 0.56, "learning_rate": 1.2638896064432405e-05, - "loss": 0.0083, + "loss": 0.0081, "step": 1768 }, { "epoch": 0.56, "learning_rate": 1.2623543843134825e-05, - "loss": 0.0057, + "loss": 0.0065, "step": 1769 }, { "epoch": 0.57, "learning_rate": 1.2608194174877376e-05, - "loss": 0.0063, + "loss": 0.0061, "step": 1770 }, { @@ -10661,199 +10661,199 @@ { "epoch": 0.57, "learning_rate": 1.2577502563441019e-05, - "loss": 0.0091, + "loss": 0.0102, "step": 1772 }, { "epoch": 0.57, "learning_rate": 1.2562160653234278e-05, - "loss": 0.0105, + "loss": 0.0117, "step": 1773 }, { "epoch": 0.57, "learning_rate": 1.2546821362011969e-05, - "loss": 0.0109, + "loss": 0.01, "step": 1774 }, { "epoch": 0.57, "learning_rate": 1.2531484706253173e-05, - "loss": 0.0077, + "loss": 0.005, "step": 1775 }, { "epoch": 0.57, "learning_rate": 1.2516150702434147e-05, - "loss": 0.0119, + "loss": 0.0108, "step": 1776 }, { "epoch": 0.57, "learning_rate": 1.2500819367028291e-05, - "loss": 0.0156, + "loss": 0.009, "step": 1777 }, { "epoch": 0.57, "learning_rate": 1.2485490716506156e-05, - "loss": 0.0043, + "loss": 0.004, "step": 1778 }, { "epoch": 0.57, "learning_rate": 1.2470164767335381e-05, - "loss": 0.0102, + "loss": 0.0092, "step": 1779 }, { "epoch": 0.57, "learning_rate": 1.2454841535980735e-05, - "loss": 0.0062, + "loss": 0.0077, "step": 1780 }, { "epoch": 0.57, "learning_rate": 1.2439521038904036e-05, - "loss": 0.0059, + "loss": 0.0156, "step": 1781 }, { "epoch": 0.57, "learning_rate": 1.2424203292564188e-05, - "loss": 0.0018, + "loss": 0.0021, "step": 1782 }, { "epoch": 0.57, "learning_rate": 1.2408888313417135e-05, - "loss": 0.0071, + "loss": 0.0089, "step": 1783 }, { "epoch": 0.57, "learning_rate": 1.2393576117915835e-05, - "loss": 0.0075, + "loss": 0.0091, "step": 1784 }, { "epoch": 0.57, "learning_rate": 1.2378266722510276e-05, - "loss": 0.006, + "loss": 0.0067, "step": 1785 }, { "epoch": 0.57, "learning_rate": 1.2362960143647415e-05, - "loss": 0.0074, + "loss": 0.0068, "step": 1786 }, { "epoch": 0.57, "learning_rate": 1.2347656397771202e-05, - "loss": 0.0082, + "loss": 0.0072, "step": 1787 }, { "epoch": 0.57, "learning_rate": 1.2332355501322541e-05, - "loss": 0.0078, + "loss": 0.0071, "step": 1788 }, { "epoch": 0.57, "learning_rate": 1.2317057470739262e-05, - "loss": 0.0048, + "loss": 0.0047, "step": 1789 }, { "epoch": 0.57, "learning_rate": 1.2301762322456126e-05, - "loss": 0.0083, + "loss": 0.0078, "step": 1790 }, { "epoch": 0.57, "learning_rate": 1.2286470072904805e-05, - "loss": 0.0153, + "loss": 0.006, "step": 1791 }, { "epoch": 0.57, "learning_rate": 1.2271180738513834e-05, - "loss": 0.0056, + "loss": 0.0058, "step": 1792 }, { "epoch": 0.57, "learning_rate": 1.2255894335708647e-05, - "loss": 0.0071, + "loss": 0.0074, "step": 1793 }, { "epoch": 0.57, "learning_rate": 1.2240610880911494e-05, - "loss": 0.0084, + "loss": 0.0113, "step": 1794 }, { "epoch": 0.57, "learning_rate": 1.2225330390541483e-05, - "loss": 0.0075, + "loss": 0.007, "step": 1795 }, { "epoch": 0.57, "learning_rate": 1.2210052881014543e-05, - "loss": 0.0036, + "loss": 0.0073, "step": 1796 }, { "epoch": 0.57, "learning_rate": 1.2194778368743366e-05, - "loss": 0.003, + "loss": 0.0031, "step": 1797 }, { "epoch": 0.57, "learning_rate": 1.2179506870137464e-05, - "loss": 0.0138, + "loss": 0.0148, "step": 1798 }, { "epoch": 0.57, "learning_rate": 1.2164238401603078e-05, - "loss": 0.0051, + "loss": 0.0039, "step": 1799 }, { "epoch": 0.57, "learning_rate": 1.2148972979543219e-05, - "loss": 0.004, + "loss": 0.0041, "step": 1800 }, { "epoch": 0.58, "learning_rate": 1.213371062035762e-05, - "loss": 0.0152, + "loss": 0.012, "step": 1801 }, { "epoch": 0.58, "learning_rate": 1.2118451340442703e-05, - "loss": 0.0104, + "loss": 0.0113, "step": 1802 }, { "epoch": 0.58, "learning_rate": 1.2103195156191613e-05, - "loss": 0.0046, + "loss": 0.0058, "step": 1803 }, { "epoch": 0.58, "learning_rate": 1.2087942083994145e-05, - "loss": 0.0065, + "loss": 0.0043, "step": 1804 }, { @@ -10865,259 +10865,259 @@ { "epoch": 0.58, "learning_rate": 1.2057445341302564e-05, - "loss": 0.0065, + "loss": 0.0052, "step": 1806 }, { "epoch": 0.58, "learning_rate": 1.2042201703571267e-05, - "loss": 0.0049, + "loss": 0.0046, "step": 1807 }, { "epoch": 0.58, "learning_rate": 1.2026961243419202e-05, - "loss": 0.0128, + "loss": 0.012, "step": 1808 }, { "epoch": 0.58, "learning_rate": 1.2011723977219286e-05, - "loss": 0.0088, + "loss": 0.0083, "step": 1809 }, { "epoch": 0.58, "learning_rate": 1.1996489921340985e-05, - "loss": 0.0134, + "loss": 0.0119, "step": 1810 }, { "epoch": 0.58, "learning_rate": 1.1981259092150347e-05, - "loss": 0.0069, + "loss": 0.0097, "step": 1811 }, { "epoch": 0.58, "learning_rate": 1.196603150600992e-05, - "loss": 0.0086, + "loss": 0.0087, "step": 1812 }, { "epoch": 0.58, "learning_rate": 1.1950807179278794e-05, - "loss": 0.0104, + "loss": 0.0091, "step": 1813 }, { "epoch": 0.58, "learning_rate": 1.1935586128312554e-05, - "loss": 0.0035, + "loss": 0.0036, "step": 1814 }, { "epoch": 0.58, "learning_rate": 1.1920368369463245e-05, - "loss": 0.0048, + "loss": 0.008, "step": 1815 }, { "epoch": 0.58, "learning_rate": 1.1905153919079404e-05, - "loss": 0.0022, + "loss": 0.0024, "step": 1816 }, { "epoch": 0.58, "learning_rate": 1.188994279350599e-05, - "loss": 0.0069, + "loss": 0.0075, "step": 1817 }, { "epoch": 0.58, "learning_rate": 1.1874735009084407e-05, - "loss": 0.0071, + "loss": 0.0072, "step": 1818 }, { "epoch": 0.58, "learning_rate": 1.1859530582152464e-05, - "loss": 0.0098, + "loss": 0.0072, "step": 1819 }, { "epoch": 0.58, "learning_rate": 1.1844329529044352e-05, - "loss": 0.0097, + "loss": 0.0103, "step": 1820 }, { "epoch": 0.58, "learning_rate": 1.1829131866090651e-05, - "loss": 0.0081, + "loss": 0.0071, "step": 1821 }, { "epoch": 0.58, "learning_rate": 1.1813937609618308e-05, - "loss": 0.0036, + "loss": 0.004, "step": 1822 }, { "epoch": 0.58, "learning_rate": 1.1798746775950578e-05, - "loss": 0.0069, + "loss": 0.0062, "step": 1823 }, { "epoch": 0.58, "learning_rate": 1.1783559381407074e-05, - "loss": 0.0084, + "loss": 0.0055, "step": 1824 }, { "epoch": 0.58, "learning_rate": 1.1768375442303687e-05, - "loss": 0.0078, + "loss": 0.0101, "step": 1825 }, { "epoch": 0.58, "learning_rate": 1.1753194974952615e-05, - "loss": 0.0124, + "loss": 0.0139, "step": 1826 }, { "epoch": 0.58, "learning_rate": 1.173801799566232e-05, - "loss": 0.0091, + "loss": 0.0072, "step": 1827 }, { "epoch": 0.58, "learning_rate": 1.1722844520737509e-05, - "loss": 0.0108, + "loss": 0.0106, "step": 1828 }, { "epoch": 0.58, "learning_rate": 1.1707674566479139e-05, - "loss": 0.008, + "loss": 0.0087, "step": 1829 }, { "epoch": 0.58, "learning_rate": 1.1692508149184366e-05, - "loss": 0.0066, + "loss": 0.0067, "step": 1830 }, { "epoch": 0.58, "learning_rate": 1.1677345285146566e-05, - "loss": 0.0089, + "loss": 0.0056, "step": 1831 }, { "epoch": 0.59, "learning_rate": 1.1662185990655285e-05, - "loss": 0.0053, + "loss": 0.005, "step": 1832 }, { "epoch": 0.59, "learning_rate": 1.164703028199624e-05, - "loss": 0.006, + "loss": 0.0061, "step": 1833 }, { "epoch": 0.59, "learning_rate": 1.1631878175451285e-05, - "loss": 0.0025, + "loss": 0.0027, "step": 1834 }, { "epoch": 0.59, "learning_rate": 1.161672968729843e-05, - "loss": 0.0079, + "loss": 0.008, "step": 1835 }, { "epoch": 0.59, "learning_rate": 1.160158483381176e-05, - "loss": 0.0074, + "loss": 0.0078, "step": 1836 }, { "epoch": 0.59, "learning_rate": 1.1586443631261493e-05, - "loss": 0.0128, + "loss": 0.0086, "step": 1837 }, { "epoch": 0.59, "learning_rate": 1.1571306095913895e-05, - "loss": 0.0058, + "loss": 0.0063, "step": 1838 }, { "epoch": 0.59, "learning_rate": 1.1556172244031306e-05, - "loss": 0.008, + "loss": 0.0075, "step": 1839 }, { "epoch": 0.59, "learning_rate": 1.1541042091872119e-05, - "loss": 0.0144, + "loss": 0.0111, "step": 1840 }, { "epoch": 0.59, "learning_rate": 1.1525915655690727e-05, - "loss": 0.0037, + "loss": 0.0036, "step": 1841 }, { "epoch": 0.59, "learning_rate": 1.151079295173756e-05, - "loss": 0.0034, + "loss": 0.0037, "step": 1842 }, { "epoch": 0.59, "learning_rate": 1.1495673996259006e-05, - "loss": 0.0059, + "loss": 0.0055, "step": 1843 }, { "epoch": 0.59, "learning_rate": 1.1480558805497455e-05, - "loss": 0.0108, + "loss": 0.0099, "step": 1844 }, { "epoch": 0.59, "learning_rate": 1.1465447395691237e-05, - "loss": 0.0079, + "loss": 0.0112, "step": 1845 }, { "epoch": 0.59, "learning_rate": 1.1450339783074626e-05, - "loss": 0.005, + "loss": 0.0049, "step": 1846 }, { "epoch": 0.59, "learning_rate": 1.1435235983877812e-05, - "loss": 0.006, + "loss": 0.0059, "step": 1847 }, { "epoch": 0.59, "learning_rate": 1.1420136014326893e-05, - "loss": 0.0064, + "loss": 0.0075, "step": 1848 }, { @@ -11129,571 +11129,571 @@ { "epoch": 0.59, "learning_rate": 1.138994762904653e-05, - "loss": 0.0138, + "loss": 0.0107, "step": 1850 }, { "epoch": 0.59, "learning_rate": 1.1374859245748637e-05, - "loss": 0.0118, + "loss": 0.0082, "step": 1851 }, { "epoch": 0.59, "learning_rate": 1.1359774756959704e-05, - "loss": 0.0062, + "loss": 0.0061, "step": 1852 }, { "epoch": 0.59, "learning_rate": 1.1344694178885088e-05, - "loss": 0.0061, + "loss": 0.0062, "step": 1853 }, { "epoch": 0.59, "learning_rate": 1.1329617527725926e-05, - "loss": 0.0095, + "loss": 0.0117, "step": 1854 }, { "epoch": 0.59, "learning_rate": 1.1314544819679165e-05, - "loss": 0.0092, + "loss": 0.0071, "step": 1855 }, { "epoch": 0.59, "learning_rate": 1.129947607093748e-05, - "loss": 0.0038, + "loss": 0.0037, "step": 1856 }, { "epoch": 0.59, "learning_rate": 1.1284411297689326e-05, - "loss": 0.0093, + "loss": 0.007, "step": 1857 }, { "epoch": 0.59, "learning_rate": 1.1269350516118871e-05, - "loss": 0.0034, + "loss": 0.0035, "step": 1858 }, { "epoch": 0.59, "learning_rate": 1.1254293742405992e-05, - "loss": 0.0041, + "loss": 0.0044, "step": 1859 }, { "epoch": 0.59, "learning_rate": 1.123924099272627e-05, - "loss": 0.0206, + "loss": 0.0197, "step": 1860 }, { "epoch": 0.59, "learning_rate": 1.1224192283250955e-05, - "loss": 0.0033, + "loss": 0.0041, "step": 1861 }, { "epoch": 0.59, "learning_rate": 1.1209147630146955e-05, - "loss": 0.0151, + "loss": 0.0164, "step": 1862 }, { "epoch": 0.6, "learning_rate": 1.119410704957684e-05, - "loss": 0.0049, + "loss": 0.0047, "step": 1863 }, { "epoch": 0.6, "learning_rate": 1.1179070557698772e-05, - "loss": 0.0054, + "loss": 0.0048, "step": 1864 }, { "epoch": 0.6, "learning_rate": 1.1164038170666547e-05, - "loss": 0.0103, + "loss": 0.0092, "step": 1865 }, { "epoch": 0.6, "learning_rate": 1.1149009904629549e-05, - "loss": 0.0099, + "loss": 0.0087, "step": 1866 }, { "epoch": 0.6, "learning_rate": 1.113398577573271e-05, - "loss": 0.0075, + "loss": 0.0078, "step": 1867 }, { "epoch": 0.6, "learning_rate": 1.1118965800116555e-05, - "loss": 0.0049, + "loss": 0.0042, "step": 1868 }, { "epoch": 0.6, "learning_rate": 1.1103949993917115e-05, - "loss": 0.0085, + "loss": 0.013, "step": 1869 }, { "epoch": 0.6, "learning_rate": 1.1088938373265956e-05, - "loss": 0.0057, + "loss": 0.0073, "step": 1870 }, { "epoch": 0.6, "learning_rate": 1.1073930954290157e-05, - "loss": 0.0108, + "loss": 0.0106, "step": 1871 }, { "epoch": 0.6, "learning_rate": 1.105892775311226e-05, - "loss": 0.0069, + "loss": 0.0108, "step": 1872 }, { "epoch": 0.6, "learning_rate": 1.104392878585029e-05, - "loss": 0.011, + "loss": 0.0094, "step": 1873 }, { "epoch": 0.6, "learning_rate": 1.1028934068617722e-05, - "loss": 0.0043, + "loss": 0.0044, "step": 1874 }, { "epoch": 0.6, "learning_rate": 1.1013943617523462e-05, - "loss": 0.0061, + "loss": 0.0043, "step": 1875 }, { "epoch": 0.6, "learning_rate": 1.0998957448671848e-05, - "loss": 0.0074, + "loss": 0.0067, "step": 1876 }, { "epoch": 0.6, "learning_rate": 1.0983975578162586e-05, - "loss": 0.0054, + "loss": 0.0059, "step": 1877 }, { "epoch": 0.6, "learning_rate": 1.0968998022090798e-05, - "loss": 0.0078, + "loss": 0.0081, "step": 1878 }, { "epoch": 0.6, "learning_rate": 1.0954024796546947e-05, - "loss": 0.0147, + "loss": 0.0148, "step": 1879 }, { "epoch": 0.6, "learning_rate": 1.0939055917616855e-05, - "loss": 0.0126, + "loss": 0.0132, "step": 1880 }, { "epoch": 0.6, "learning_rate": 1.092409140138168e-05, - "loss": 0.005, + "loss": 0.0069, "step": 1881 }, { "epoch": 0.6, "learning_rate": 1.0909131263917874e-05, - "loss": 0.0066, + "loss": 0.0063, "step": 1882 }, { "epoch": 0.6, "learning_rate": 1.08941755212972e-05, - "loss": 0.0026, + "loss": 0.0025, "step": 1883 }, { "epoch": 0.6, "learning_rate": 1.0879224189586705e-05, - "loss": 0.0052, + "loss": 0.0037, "step": 1884 }, { "epoch": 0.6, "learning_rate": 1.0864277284848672e-05, - "loss": 0.0054, + "loss": 0.0058, "step": 1885 }, { "epoch": 0.6, "learning_rate": 1.0849334823140661e-05, - "loss": 0.0062, + "loss": 0.006, "step": 1886 }, { "epoch": 0.6, "learning_rate": 1.083439682051543e-05, - "loss": 0.0061, + "loss": 0.0062, "step": 1887 }, { "epoch": 0.6, "learning_rate": 1.0819463293020957e-05, - "loss": 0.0107, + "loss": 0.0109, "step": 1888 }, { "epoch": 0.6, "learning_rate": 1.0804534256700429e-05, - "loss": 0.0051, + "loss": 0.0044, "step": 1889 }, { "epoch": 0.6, "learning_rate": 1.0789609727592175e-05, - "loss": 0.0073, + "loss": 0.0059, "step": 1890 }, { "epoch": 0.6, "learning_rate": 1.0774689721729715e-05, - "loss": 0.0056, + "loss": 0.0067, "step": 1891 }, { "epoch": 0.6, "learning_rate": 1.0759774255141681e-05, - "loss": 0.0058, + "loss": 0.0064, "step": 1892 }, { "epoch": 0.6, "learning_rate": 1.0744863343851853e-05, - "loss": 0.0049, + "loss": 0.0059, "step": 1893 }, { "epoch": 0.6, "learning_rate": 1.0729957003879104e-05, - "loss": 0.0065, + "loss": 0.0073, "step": 1894 }, { "epoch": 0.61, "learning_rate": 1.0715055251237393e-05, - "loss": 0.0107, + "loss": 0.0087, "step": 1895 }, { "epoch": 0.61, "learning_rate": 1.070015810193576e-05, - "loss": 0.0042, + "loss": 0.0045, "step": 1896 }, { "epoch": 0.61, "learning_rate": 1.0685265571978302e-05, - "loss": 0.003, + "loss": 0.0034, "step": 1897 }, { "epoch": 0.61, "learning_rate": 1.067037767736413e-05, - "loss": 0.0036, + "loss": 0.0047, "step": 1898 }, { "epoch": 0.61, "learning_rate": 1.0655494434087414e-05, - "loss": 0.0077, + "loss": 0.0084, "step": 1899 }, { "epoch": 0.61, "learning_rate": 1.0640615858137289e-05, - "loss": 0.0093, + "loss": 0.0091, "step": 1900 }, { "epoch": 0.61, "learning_rate": 1.0625741965497897e-05, - "loss": 0.0103, + "loss": 0.0102, "step": 1901 }, { "epoch": 0.61, "learning_rate": 1.061087277214835e-05, - "loss": 0.0056, + "loss": 0.0073, "step": 1902 }, { "epoch": 0.61, "learning_rate": 1.0596008294062692e-05, - "loss": 0.0064, + "loss": 0.007, "step": 1903 }, { "epoch": 0.61, "learning_rate": 1.058114854720993e-05, - "loss": 0.0047, + "loss": 0.0048, "step": 1904 }, { "epoch": 0.61, "learning_rate": 1.056629354755396e-05, - "loss": 0.0041, + "loss": 0.0047, "step": 1905 }, { "epoch": 0.61, "learning_rate": 1.0551443311053596e-05, - "loss": 0.008, + "loss": 0.0085, "step": 1906 }, { "epoch": 0.61, "learning_rate": 1.0536597853662539e-05, - "loss": 0.006, + "loss": 0.0059, "step": 1907 }, { "epoch": 0.61, "learning_rate": 1.052175719132933e-05, - "loss": 0.0152, + "loss": 0.0162, "step": 1908 }, { "epoch": 0.61, "learning_rate": 1.0506921339997387e-05, - "loss": 0.0119, + "loss": 0.0096, "step": 1909 }, { "epoch": 0.61, "learning_rate": 1.0492090315604952e-05, - "loss": 0.0039, + "loss": 0.0043, "step": 1910 }, { "epoch": 0.61, "learning_rate": 1.0477264134085063e-05, - "loss": 0.0095, + "loss": 0.0107, "step": 1911 }, { "epoch": 0.61, "learning_rate": 1.0462442811365582e-05, - "loss": 0.0106, + "loss": 0.0105, "step": 1912 }, { "epoch": 0.61, "learning_rate": 1.044762636336913e-05, - "loss": 0.0084, + "loss": 0.0104, "step": 1913 }, { "epoch": 0.61, "learning_rate": 1.0432814806013104e-05, - "loss": 0.004, + "loss": 0.0034, "step": 1914 }, { "epoch": 0.61, "learning_rate": 1.0418008155209646e-05, - "loss": 0.0156, + "loss": 0.0132, "step": 1915 }, { "epoch": 0.61, "learning_rate": 1.0403206426865619e-05, - "loss": 0.0072, + "loss": 0.0084, "step": 1916 }, { "epoch": 0.61, "learning_rate": 1.0388409636882606e-05, - "loss": 0.0042, + "loss": 0.0036, "step": 1917 }, { "epoch": 0.61, "learning_rate": 1.0373617801156872e-05, - "loss": 0.0071, + "loss": 0.0045, "step": 1918 }, { "epoch": 0.61, "learning_rate": 1.0358830935579378e-05, - "loss": 0.0027, + "loss": 0.0033, "step": 1919 }, { "epoch": 0.61, "learning_rate": 1.0344049056035741e-05, - "loss": 0.0098, + "loss": 0.0088, "step": 1920 }, { "epoch": 0.61, "learning_rate": 1.03292721784062e-05, - "loss": 0.009, + "loss": 0.0096, "step": 1921 }, { "epoch": 0.61, "learning_rate": 1.0314500318565653e-05, - "loss": 0.0058, + "loss": 0.0051, "step": 1922 }, { "epoch": 0.61, "learning_rate": 1.0299733492383582e-05, - "loss": 0.0029, + "loss": 0.0033, "step": 1923 }, { "epoch": 0.61, "learning_rate": 1.0284971715724073e-05, - "loss": 0.0068, + "loss": 0.0072, "step": 1924 }, { "epoch": 0.61, "learning_rate": 1.0270215004445793e-05, - "loss": 0.0053, + "loss": 0.0061, "step": 1925 }, { "epoch": 0.62, "learning_rate": 1.0255463374401945e-05, - "loss": 0.0061, + "loss": 0.0067, "step": 1926 }, { "epoch": 0.62, "learning_rate": 1.0240716841440301e-05, - "loss": 0.0153, + "loss": 0.0163, "step": 1927 }, { "epoch": 0.62, "learning_rate": 1.0225975421403135e-05, - "loss": 0.0053, + "loss": 0.0056, "step": 1928 }, { "epoch": 0.62, "learning_rate": 1.021123913012724e-05, - "loss": 0.0072, + "loss": 0.0067, "step": 1929 }, { "epoch": 0.62, "learning_rate": 1.0196507983443899e-05, - "loss": 0.0061, + "loss": 0.0052, "step": 1930 }, { "epoch": 0.62, "learning_rate": 1.0181781997178859e-05, - "loss": 0.0068, + "loss": 0.0069, "step": 1931 }, { "epoch": 0.62, "learning_rate": 1.016706118715233e-05, - "loss": 0.0036, + "loss": 0.0048, "step": 1932 }, { "epoch": 0.62, "learning_rate": 1.015234556917897e-05, - "loss": 0.0083, + "loss": 0.0095, "step": 1933 }, { "epoch": 0.62, "learning_rate": 1.0137635159067834e-05, - "loss": 0.0095, + "loss": 0.0065, "step": 1934 }, { "epoch": 0.62, "learning_rate": 1.0122929972622414e-05, - "loss": 0.0087, + "loss": 0.0074, "step": 1935 }, { "epoch": 0.62, "learning_rate": 1.010823002564056e-05, - "loss": 0.0078, + "loss": 0.0094, "step": 1936 }, { "epoch": 0.62, "learning_rate": 1.0093535333914513e-05, - "loss": 0.006, + "loss": 0.0058, "step": 1937 }, { "epoch": 0.62, "learning_rate": 1.0078845913230868e-05, - "loss": 0.0066, + "loss": 0.0081, "step": 1938 }, { "epoch": 0.62, "learning_rate": 1.006416177937054e-05, - "loss": 0.0134, + "loss": 0.0081, "step": 1939 }, { "epoch": 0.62, "learning_rate": 1.0049482948108787e-05, - "loss": 0.0168, + "loss": 0.016, "step": 1940 }, { "epoch": 0.62, "learning_rate": 1.0034809435215154e-05, - "loss": 0.0092, + "loss": 0.0085, "step": 1941 }, { "epoch": 0.62, "learning_rate": 1.0020141256453477e-05, - "loss": 0.0129, + "loss": 0.0121, "step": 1942 }, { "epoch": 0.62, "learning_rate": 1.0005478427581865e-05, - "loss": 0.0038, + "loss": 0.0039, "step": 1943 }, { "epoch": 0.62, "learning_rate": 9.990820964352678e-06, - "loss": 0.004, + "loss": 0.005, "step": 1944 }, { @@ -11705,79 +11705,79 @@ { "epoch": 0.62, "learning_rate": 9.961522197802166e-06, - "loss": 0.0054, + "loss": 0.0044, "step": 1946 }, { "epoch": 0.62, "learning_rate": 9.946880925956668e-06, - "loss": 0.0246, + "loss": 0.0071, "step": 1947 }, { "epoch": 0.62, "learning_rate": 9.932245082705223e-06, - "loss": 0.0114, + "loss": 0.01, "step": 1948 }, { "epoch": 0.62, "learning_rate": 9.917614683771187e-06, - "loss": 0.0038, + "loss": 0.0036, "step": 1949 }, { "epoch": 0.62, "learning_rate": 9.902989744872083e-06, - "loss": 0.0052, + "loss": 0.0049, "step": 1950 }, { "epoch": 0.62, "learning_rate": 9.888370281719573e-06, - "loss": 0.0068, + "loss": 0.0061, "step": 1951 }, { "epoch": 0.62, "learning_rate": 9.873756310019417e-06, - "loss": 0.0123, + "loss": 0.0117, "step": 1952 }, { "epoch": 0.62, "learning_rate": 9.859147845471495e-06, - "loss": 0.0041, + "loss": 0.0044, "step": 1953 }, { "epoch": 0.62, "learning_rate": 9.84454490376975e-06, - "loss": 0.0115, + "loss": 0.0156, "step": 1954 }, { "epoch": 0.62, "learning_rate": 9.829947500602217e-06, - "loss": 0.0082, + "loss": 0.0106, "step": 1955 }, { "epoch": 0.62, "learning_rate": 9.815355651650962e-06, - "loss": 0.0045, + "loss": 0.0044, "step": 1956 }, { "epoch": 0.63, "learning_rate": 9.800769372592088e-06, - "loss": 0.0098, + "loss": 0.009, "step": 1957 }, { "epoch": 0.63, "learning_rate": 9.78618867909572e-06, - "loss": 0.0041, + "loss": 0.0042, "step": 1958 }, { @@ -11789,187 +11789,187 @@ { "epoch": 0.63, "learning_rate": 9.757044111440957e-06, - "loss": 0.0064, + "loss": 0.0059, "step": 1960 }, { "epoch": 0.63, "learning_rate": 9.742480268592742e-06, - "loss": 0.0057, + "loss": 0.0055, "step": 1961 }, { "epoch": 0.63, "learning_rate": 9.727922073927334e-06, - "loss": 0.0057, + "loss": 0.006, "step": 1962 }, { "epoch": 0.63, "learning_rate": 9.713369543084689e-06, - "loss": 0.0068, + "loss": 0.0059, "step": 1963 }, { "epoch": 0.63, "learning_rate": 9.69882269169868e-06, - "loss": 0.0104, + "loss": 0.0115, "step": 1964 }, { "epoch": 0.63, "learning_rate": 9.684281535397057e-06, - "loss": 0.0067, + "loss": 0.0058, "step": 1965 }, { "epoch": 0.63, "learning_rate": 9.669746089801478e-06, - "loss": 0.0043, + "loss": 0.004, "step": 1966 }, { "epoch": 0.63, "learning_rate": 9.655216370527445e-06, - "loss": 0.006, + "loss": 0.0051, "step": 1967 }, { "epoch": 0.63, "learning_rate": 9.640692393184316e-06, - "loss": 0.0055, + "loss": 0.006, "step": 1968 }, { "epoch": 0.63, "learning_rate": 9.62617417337529e-06, - "loss": 0.0083, + "loss": 0.0086, "step": 1969 }, { "epoch": 0.63, "learning_rate": 9.611661726697365e-06, - "loss": 0.0093, + "loss": 0.0087, "step": 1970 }, { "epoch": 0.63, "learning_rate": 9.59715506874134e-06, - "loss": 0.0051, + "loss": 0.0061, "step": 1971 }, { "epoch": 0.63, "learning_rate": 9.58265421509181e-06, - "loss": 0.0045, + "loss": 0.0058, "step": 1972 }, { "epoch": 0.63, "learning_rate": 9.568159181327108e-06, - "loss": 0.0097, + "loss": 0.0099, "step": 1973 }, { "epoch": 0.63, "learning_rate": 9.553669983019345e-06, - "loss": 0.009, + "loss": 0.0076, "step": 1974 }, { "epoch": 0.63, "learning_rate": 9.539186635734336e-06, - "loss": 0.0051, + "loss": 0.0048, "step": 1975 }, { "epoch": 0.63, "learning_rate": 9.524709155031624e-06, - "loss": 0.0057, + "loss": 0.0069, "step": 1976 }, { "epoch": 0.63, "learning_rate": 9.510237556464458e-06, - "loss": 0.0093, + "loss": 0.0102, "step": 1977 }, { "epoch": 0.63, "learning_rate": 9.495771855579742e-06, - "loss": 0.0148, + "loss": 0.0149, "step": 1978 }, { "epoch": 0.63, "learning_rate": 9.481312067918074e-06, - "loss": 0.0115, + "loss": 0.0079, "step": 1979 }, { "epoch": 0.63, "learning_rate": 9.466858209013668e-06, - "loss": 0.0119, + "loss": 0.0094, "step": 1980 }, { "epoch": 0.63, "learning_rate": 9.452410294394399e-06, - "loss": 0.009, + "loss": 0.0073, "step": 1981 }, { "epoch": 0.63, "learning_rate": 9.43796833958174e-06, - "loss": 0.0066, + "loss": 0.0101, "step": 1982 }, { "epoch": 0.63, "learning_rate": 9.423532360090755e-06, - "loss": 0.0065, + "loss": 0.0094, "step": 1983 }, { "epoch": 0.63, "learning_rate": 9.409102371430107e-06, - "loss": 0.0047, + "loss": 0.0038, "step": 1984 }, { "epoch": 0.63, "learning_rate": 9.394678389102014e-06, - "loss": 0.0073, + "loss": 0.0076, "step": 1985 }, { "epoch": 0.63, "learning_rate": 9.38026042860223e-06, - "loss": 0.0063, + "loss": 0.0074, "step": 1986 }, { "epoch": 0.63, "learning_rate": 9.365848505420066e-06, - "loss": 0.0092, + "loss": 0.0089, "step": 1987 }, { "epoch": 0.63, "learning_rate": 9.351442635038315e-06, - "loss": 0.0073, + "loss": 0.0083, "step": 1988 }, { "epoch": 0.64, "learning_rate": 9.337042832933291e-06, - "loss": 0.0045, + "loss": 0.0059, "step": 1989 }, { "epoch": 0.64, "learning_rate": 9.322649114574791e-06, - "loss": 0.004, + "loss": 0.0039, "step": 1990 }, { @@ -11981,373 +11981,373 @@ { "epoch": 0.64, "learning_rate": 9.293879990943788e-06, - "loss": 0.0203, + "loss": 0.0183, "step": 1992 }, { "epoch": 0.64, "learning_rate": 9.279504616578114e-06, - "loss": 0.0055, + "loss": 0.0062, "step": 1993 }, { "epoch": 0.64, "learning_rate": 9.265135387772586e-06, - "loss": 0.0064, + "loss": 0.0073, "step": 1994 }, { "epoch": 0.64, "learning_rate": 9.250772319964148e-06, - "loss": 0.0089, + "loss": 0.0079, "step": 1995 }, { "epoch": 0.64, "learning_rate": 9.236415428583113e-06, - "loss": 0.0039, + "loss": 0.0029, "step": 1996 }, { "epoch": 0.64, "learning_rate": 9.222064729053185e-06, - "loss": 0.0128, + "loss": 0.012, "step": 1997 }, { "epoch": 0.64, "learning_rate": 9.207720236791382e-06, - "loss": 0.0072, + "loss": 0.0086, "step": 1998 }, { "epoch": 0.64, "learning_rate": 9.19338196720809e-06, - "loss": 0.0057, + "loss": 0.0084, "step": 1999 }, { "epoch": 0.64, "learning_rate": 9.179049935706984e-06, - "loss": 0.0099, + "loss": 0.0121, "step": 2000 }, { "epoch": 0.64, "learning_rate": 9.164724157685042e-06, - "loss": 0.0073, + "loss": 0.0071, "step": 2001 }, { "epoch": 0.64, "learning_rate": 9.15040464853253e-06, - "loss": 0.0063, + "loss": 0.0062, "step": 2002 }, { "epoch": 0.64, "learning_rate": 9.136091423632984e-06, - "loss": 0.008, + "loss": 0.0081, "step": 2003 }, { "epoch": 0.64, "learning_rate": 9.121784498363169e-06, - "loss": 0.0101, + "loss": 0.0123, "step": 2004 }, { "epoch": 0.64, "learning_rate": 9.107483888093109e-06, - "loss": 0.0144, + "loss": 0.0092, "step": 2005 }, { "epoch": 0.64, "learning_rate": 9.093189608186015e-06, - "loss": 0.0038, + "loss": 0.0043, "step": 2006 }, { "epoch": 0.64, "learning_rate": 9.078901673998317e-06, - "loss": 0.0087, + "loss": 0.0048, "step": 2007 }, { "epoch": 0.64, "learning_rate": 9.064620100879631e-06, - "loss": 0.0047, + "loss": 0.0037, "step": 2008 }, { "epoch": 0.64, "learning_rate": 9.050344904172718e-06, - "loss": 0.0165, + "loss": 0.0138, "step": 2009 }, { "epoch": 0.64, "learning_rate": 9.036076099213512e-06, - "loss": 0.014, + "loss": 0.0127, "step": 2010 }, { "epoch": 0.64, "learning_rate": 9.021813701331059e-06, - "loss": 0.006, + "loss": 0.0074, "step": 2011 }, { "epoch": 0.64, "learning_rate": 9.007557725847543e-06, - "loss": 0.0086, + "loss": 0.0096, "step": 2012 }, { "epoch": 0.64, "learning_rate": 8.993308188078232e-06, - "loss": 0.0092, + "loss": 0.0057, "step": 2013 }, { "epoch": 0.64, "learning_rate": 8.979065103331487e-06, - "loss": 0.0052, + "loss": 0.0048, "step": 2014 }, { "epoch": 0.64, "learning_rate": 8.964828486908729e-06, - "loss": 0.0089, + "loss": 0.0091, "step": 2015 }, { "epoch": 0.64, "learning_rate": 8.950598354104444e-06, - "loss": 0.0048, + "loss": 0.0035, "step": 2016 }, { "epoch": 0.64, "learning_rate": 8.936374720206133e-06, - "loss": 0.0043, + "loss": 0.0055, "step": 2017 }, { "epoch": 0.64, "learning_rate": 8.922157600494331e-06, - "loss": 0.0095, + "loss": 0.0054, "step": 2018 }, { "epoch": 0.64, "learning_rate": 8.907947010242566e-06, - "loss": 0.0054, + "loss": 0.0064, "step": 2019 }, { "epoch": 0.65, "learning_rate": 8.893742964717354e-06, - "loss": 0.0093, + "loss": 0.0094, "step": 2020 }, { "epoch": 0.65, "learning_rate": 8.879545479178188e-06, - "loss": 0.0081, + "loss": 0.0096, "step": 2021 }, { "epoch": 0.65, "learning_rate": 8.865354568877496e-06, - "loss": 0.0034, + "loss": 0.0039, "step": 2022 }, { "epoch": 0.65, "learning_rate": 8.85117024906066e-06, - "loss": 0.0052, + "loss": 0.0051, "step": 2023 }, { "epoch": 0.65, "learning_rate": 8.836992534965969e-06, - "loss": 0.0137, + "loss": 0.0101, "step": 2024 }, { "epoch": 0.65, "learning_rate": 8.822821441824622e-06, - "loss": 0.0039, + "loss": 0.005, "step": 2025 }, { "epoch": 0.65, "learning_rate": 8.808656984860704e-06, - "loss": 0.005, + "loss": 0.0054, "step": 2026 }, { "epoch": 0.65, "learning_rate": 8.79449917929117e-06, - "loss": 0.0097, + "loss": 0.0108, "step": 2027 }, { "epoch": 0.65, "learning_rate": 8.78034804032583e-06, - "loss": 0.0084, + "loss": 0.0093, "step": 2028 }, { "epoch": 0.65, "learning_rate": 8.766203583167331e-06, - "loss": 0.006, + "loss": 0.0074, "step": 2029 }, { "epoch": 0.65, "learning_rate": 8.752065823011135e-06, - "loss": 0.0116, + "loss": 0.0115, "step": 2030 }, { "epoch": 0.65, "learning_rate": 8.737934775045536e-06, - "loss": 0.0057, + "loss": 0.0044, "step": 2031 }, { "epoch": 0.65, "learning_rate": 8.723810454451579e-06, - "loss": 0.0057, + "loss": 0.0055, "step": 2032 }, { "epoch": 0.65, "learning_rate": 8.70969287640311e-06, - "loss": 0.0058, + "loss": 0.0064, "step": 2033 }, { "epoch": 0.65, "learning_rate": 8.695582056066723e-06, - "loss": 0.0063, + "loss": 0.0109, "step": 2034 }, { "epoch": 0.65, "learning_rate": 8.681478008601741e-06, - "loss": 0.0073, + "loss": 0.0077, "step": 2035 }, { "epoch": 0.65, "learning_rate": 8.667380749160231e-06, - "loss": 0.0038, + "loss": 0.0035, "step": 2036 }, { "epoch": 0.65, "learning_rate": 8.653290292886953e-06, - "loss": 0.0083, + "loss": 0.0077, "step": 2037 }, { "epoch": 0.65, "learning_rate": 8.63920665491936e-06, - "loss": 0.0052, + "loss": 0.0051, "step": 2038 }, { "epoch": 0.65, "learning_rate": 8.625129850387593e-06, - "loss": 0.0062, + "loss": 0.0054, "step": 2039 }, { "epoch": 0.65, "learning_rate": 8.611059894414431e-06, - "loss": 0.0028, + "loss": 0.0034, "step": 2040 }, { "epoch": 0.65, "learning_rate": 8.596996802115315e-06, - "loss": 0.0079, + "loss": 0.0081, "step": 2041 }, { "epoch": 0.65, "learning_rate": 8.582940588598296e-06, - "loss": 0.0111, + "loss": 0.0092, "step": 2042 }, { "epoch": 0.65, "learning_rate": 8.568891268964044e-06, - "loss": 0.0115, + "loss": 0.0144, "step": 2043 }, { "epoch": 0.65, "learning_rate": 8.55484885830583e-06, - "loss": 0.0048, + "loss": 0.0051, "step": 2044 }, { "epoch": 0.65, "learning_rate": 8.540813371709484e-06, - "loss": 0.0034, + "loss": 0.003, "step": 2045 }, { "epoch": 0.65, "learning_rate": 8.52678482425341e-06, - "loss": 0.0057, + "loss": 0.0062, "step": 2046 }, { "epoch": 0.65, "learning_rate": 8.512763231008563e-06, - "loss": 0.0109, + "loss": 0.0124, "step": 2047 }, { "epoch": 0.65, "learning_rate": 8.498748607038406e-06, - "loss": 0.0056, + "loss": 0.0055, "step": 2048 }, { "epoch": 0.65, "learning_rate": 8.48474096739894e-06, - "loss": 0.0036, + "loss": 0.0054, "step": 2049 }, { "epoch": 0.65, "learning_rate": 8.470740327138637e-06, - "loss": 0.0074, + "loss": 0.0088, "step": 2050 }, { "epoch": 0.66, "learning_rate": 8.456746701298473e-06, - "loss": 0.0061, + "loss": 0.0062, "step": 2051 }, { "epoch": 0.66, "learning_rate": 8.442760104911882e-06, - "loss": 0.0053, + "loss": 0.0052, "step": 2052 }, { "epoch": 0.66, "learning_rate": 8.42878055300473e-06, - "loss": 0.0091, + "loss": 0.0093, "step": 2053 }, { @@ -12359,301 +12359,301 @@ { "epoch": 0.66, "learning_rate": 8.400842642694429e-06, - "loss": 0.0063, + "loss": 0.0059, "step": 2055 }, { "epoch": 0.66, "learning_rate": 8.386884314305126e-06, - "loss": 0.0089, + "loss": 0.0083, "step": 2056 }, { "epoch": 0.66, "learning_rate": 8.37293309042295e-06, - "loss": 0.0033, + "loss": 0.0042, "step": 2057 }, { "epoch": 0.66, "learning_rate": 8.358988986035765e-06, - "loss": 0.0042, + "loss": 0.0048, "step": 2058 }, { "epoch": 0.66, "learning_rate": 8.345052016123812e-06, - "loss": 0.0083, + "loss": 0.0091, "step": 2059 }, { "epoch": 0.66, "learning_rate": 8.331122195659646e-06, - "loss": 0.0061, + "loss": 0.0031, "step": 2060 }, { "epoch": 0.66, "learning_rate": 8.31719953960816e-06, - "loss": 0.0061, + "loss": 0.006, "step": 2061 }, { "epoch": 0.66, "learning_rate": 8.30328406292653e-06, - "loss": 0.0094, + "loss": 0.0132, "step": 2062 }, { "epoch": 0.66, "learning_rate": 8.289375780564245e-06, - "loss": 0.0062, + "loss": 0.006, "step": 2063 }, { "epoch": 0.66, "learning_rate": 8.275474707463034e-06, - "loss": 0.0055, + "loss": 0.0062, "step": 2064 }, { "epoch": 0.66, "learning_rate": 8.261580858556911e-06, - "loss": 0.0029, + "loss": 0.0053, "step": 2065 }, { "epoch": 0.66, "learning_rate": 8.247694248772105e-06, - "loss": 0.0047, + "loss": 0.0048, "step": 2066 }, { "epoch": 0.66, "learning_rate": 8.23381489302709e-06, - "loss": 0.0055, + "loss": 0.0073, "step": 2067 }, { "epoch": 0.66, "learning_rate": 8.219942806232524e-06, - "loss": 0.0073, + "loss": 0.0093, "step": 2068 }, { "epoch": 0.66, "learning_rate": 8.206078003291273e-06, - "loss": 0.0061, + "loss": 0.0069, "step": 2069 }, { "epoch": 0.66, "learning_rate": 8.192220499098374e-06, - "loss": 0.006, + "loss": 0.0054, "step": 2070 }, { "epoch": 0.66, "learning_rate": 8.178370308541015e-06, - "loss": 0.0048, + "loss": 0.0046, "step": 2071 }, { "epoch": 0.66, "learning_rate": 8.164527446498541e-06, - "loss": 0.0025, + "loss": 0.0038, "step": 2072 }, { "epoch": 0.66, "learning_rate": 8.150691927842408e-06, - "loss": 0.0073, + "loss": 0.0088, "step": 2073 }, { "epoch": 0.66, "learning_rate": 8.136863767436193e-06, - "loss": 0.0097, + "loss": 0.0062, "step": 2074 }, { "epoch": 0.66, "learning_rate": 8.123042980135576e-06, - "loss": 0.0036, + "loss": 0.0038, "step": 2075 }, { "epoch": 0.66, "learning_rate": 8.10922958078829e-06, - "loss": 0.0042, + "loss": 0.0039, "step": 2076 }, { "epoch": 0.66, "learning_rate": 8.095423584234157e-06, - "loss": 0.0041, + "loss": 0.005, "step": 2077 }, { "epoch": 0.66, "learning_rate": 8.081625005305036e-06, - "loss": 0.0066, + "loss": 0.0073, "step": 2078 }, { "epoch": 0.66, "learning_rate": 8.067833858824814e-06, - "loss": 0.0061, + "loss": 0.0055, "step": 2079 }, { "epoch": 0.66, "learning_rate": 8.054050159609401e-06, - "loss": 0.0045, + "loss": 0.0049, "step": 2080 }, { "epoch": 0.66, "learning_rate": 8.040273922466696e-06, - "loss": 0.0063, + "loss": 0.0058, "step": 2081 }, { "epoch": 0.66, "learning_rate": 8.02650516219659e-06, - "loss": 0.0037, + "loss": 0.0023, "step": 2082 }, { "epoch": 0.67, "learning_rate": 8.012743893590943e-06, - "loss": 0.0075, + "loss": 0.0061, "step": 2083 }, { "epoch": 0.67, "learning_rate": 7.998990131433554e-06, - "loss": 0.0027, + "loss": 0.0026, "step": 2084 }, { "epoch": 0.67, "learning_rate": 7.985243890500181e-06, - "loss": 0.0025, + "loss": 0.0028, "step": 2085 }, { "epoch": 0.67, "learning_rate": 7.971505185558471e-06, - "loss": 0.0092, + "loss": 0.0089, "step": 2086 }, { "epoch": 0.67, "learning_rate": 7.957774031368004e-06, - "loss": 0.005, + "loss": 0.0062, "step": 2087 }, { "epoch": 0.67, "learning_rate": 7.944050442680225e-06, - "loss": 0.0069, + "loss": 0.0091, "step": 2088 }, { "epoch": 0.67, "learning_rate": 7.930334434238475e-06, - "loss": 0.0068, + "loss": 0.0067, "step": 2089 }, { "epoch": 0.67, "learning_rate": 7.916626020777924e-06, - "loss": 0.0115, + "loss": 0.0116, "step": 2090 }, { "epoch": 0.67, "learning_rate": 7.90292521702561e-06, - "loss": 0.0105, + "loss": 0.0112, "step": 2091 }, { "epoch": 0.67, "learning_rate": 7.88923203770037e-06, - "loss": 0.005, + "loss": 0.0051, "step": 2092 }, { "epoch": 0.67, "learning_rate": 7.875546497512879e-06, - "loss": 0.0048, + "loss": 0.0055, "step": 2093 }, { "epoch": 0.67, "learning_rate": 7.861868611165572e-06, - "loss": 0.0034, + "loss": 0.0037, "step": 2094 }, { "epoch": 0.67, "learning_rate": 7.848198393352689e-06, - "loss": 0.0038, + "loss": 0.0035, "step": 2095 }, { "epoch": 0.67, "learning_rate": 7.834535858760225e-06, - "loss": 0.0101, + "loss": 0.0107, "step": 2096 }, { "epoch": 0.67, "learning_rate": 7.820881022065902e-06, - "loss": 0.009, + "loss": 0.0067, "step": 2097 }, { "epoch": 0.67, "learning_rate": 7.80723389793921e-06, - "loss": 0.0062, + "loss": 0.0096, "step": 2098 }, { "epoch": 0.67, "learning_rate": 7.793594501041306e-06, - "loss": 0.01, + "loss": 0.008, "step": 2099 }, { "epoch": 0.67, "learning_rate": 7.779962846025086e-06, - "loss": 0.0119, + "loss": 0.0115, "step": 2100 }, { "epoch": 0.67, "learning_rate": 7.766338947535113e-06, - "loss": 0.0118, + "loss": 0.0068, "step": 2101 }, { "epoch": 0.67, "learning_rate": 7.752722820207607e-06, - "loss": 0.0084, + "loss": 0.009, "step": 2102 }, { "epoch": 0.67, "learning_rate": 7.739114478670463e-06, - "loss": 0.009, + "loss": 0.008, "step": 2103 }, { "epoch": 0.67, "learning_rate": 7.725513937543184e-06, - "loss": 0.0039, + "loss": 0.0042, "step": 2104 }, { @@ -12665,61 +12665,61 @@ { "epoch": 0.67, "learning_rate": 7.698336314954408e-06, - "loss": 0.0085, + "loss": 0.0114, "step": 2106 }, { "epoch": 0.67, "learning_rate": 7.684759262689971e-06, - "loss": 0.0028, + "loss": 0.0029, "step": 2107 }, { "epoch": 0.67, "learning_rate": 7.67119006922952e-06, - "loss": 0.0062, + "loss": 0.0063, "step": 2108 }, { "epoch": 0.67, "learning_rate": 7.65762874915052e-06, - "loss": 0.0065, + "loss": 0.0048, "step": 2109 }, { "epoch": 0.67, "learning_rate": 7.644075317021962e-06, - "loss": 0.0026, + "loss": 0.0036, "step": 2110 }, { "epoch": 0.67, "learning_rate": 7.630529787404383e-06, - "loss": 0.0062, + "loss": 0.007, "step": 2111 }, { "epoch": 0.67, "learning_rate": 7.616992174849811e-06, - "loss": 0.0132, + "loss": 0.0112, "step": 2112 }, { "epoch": 0.67, "learning_rate": 7.603462493901785e-06, - "loss": 0.0077, + "loss": 0.0084, "step": 2113 }, { "epoch": 0.68, "learning_rate": 7.589940759095322e-06, - "loss": 0.0098, + "loss": 0.0091, "step": 2114 }, { "epoch": 0.68, "learning_rate": 7.576426984956885e-06, - "loss": 0.0029, + "loss": 0.003, "step": 2115 }, { @@ -12731,133 +12731,133 @@ { "epoch": 0.68, "learning_rate": 7.549423376747245e-06, - "loss": 0.0078, + "loss": 0.0088, "step": 2117 }, { "epoch": 0.68, "learning_rate": 7.5359335716861586e-06, - "loss": 0.0058, + "loss": 0.0065, "step": 2118 }, { "epoch": 0.68, "learning_rate": 7.522451785313331e-06, - "loss": 0.0066, + "loss": 0.0076, "step": 2119 }, { "epoch": 0.68, "learning_rate": 7.5089780321123136e-06, - "loss": 0.0054, + "loss": 0.0055, "step": 2120 }, { "epoch": 0.68, "learning_rate": 7.49551232655804e-06, - "loss": 0.0089, + "loss": 0.0082, "step": 2121 }, { "epoch": 0.68, "learning_rate": 7.482054683116795e-06, - "loss": 0.0071, + "loss": 0.009, "step": 2122 }, { "epoch": 0.68, "learning_rate": 7.468605116246191e-06, - "loss": 0.0058, + "loss": 0.007, "step": 2123 }, { "epoch": 0.68, "learning_rate": 7.4551636403951845e-06, - "loss": 0.0037, + "loss": 0.0038, "step": 2124 }, { "epoch": 0.68, "learning_rate": 7.441730270004016e-06, - "loss": 0.0063, + "loss": 0.0066, "step": 2125 }, { "epoch": 0.68, "learning_rate": 7.428305019504237e-06, - "loss": 0.0098, + "loss": 0.0114, "step": 2126 }, { "epoch": 0.68, "learning_rate": 7.414887903318674e-06, - "loss": 0.005, + "loss": 0.0037, "step": 2127 }, { "epoch": 0.68, "learning_rate": 7.401478935861399e-06, - "loss": 0.0038, + "loss": 0.0033, "step": 2128 }, { "epoch": 0.68, "learning_rate": 7.388078131537751e-06, - "loss": 0.0031, + "loss": 0.0033, "step": 2129 }, { "epoch": 0.68, "learning_rate": 7.374685504744278e-06, - "loss": 0.006, + "loss": 0.0059, "step": 2130 }, { "epoch": 0.68, "learning_rate": 7.361301069868759e-06, - "loss": 0.0074, + "loss": 0.0083, "step": 2131 }, { "epoch": 0.68, "learning_rate": 7.347924841290174e-06, - "loss": 0.0047, + "loss": 0.0058, "step": 2132 }, { "epoch": 0.68, "learning_rate": 7.33455683337867e-06, - "loss": 0.011, + "loss": 0.0115, "step": 2133 }, { "epoch": 0.68, "learning_rate": 7.321197060495575e-06, - "loss": 0.0047, + "loss": 0.0052, "step": 2134 }, { "epoch": 0.68, "learning_rate": 7.3078455369933746e-06, - "loss": 0.008, + "loss": 0.0048, "step": 2135 }, { "epoch": 0.68, "learning_rate": 7.294502277215674e-06, - "loss": 0.0021, + "loss": 0.0024, "step": 2136 }, { "epoch": 0.68, "learning_rate": 7.281167295497223e-06, - "loss": 0.0077, + "loss": 0.0132, "step": 2137 }, { "epoch": 0.68, "learning_rate": 7.2678406061638545e-06, - "loss": 0.0149, + "loss": 0.0113, "step": 2138 }, { @@ -12869,115 +12869,115 @@ { "epoch": 0.68, "learning_rate": 7.241212161911217e-06, - "loss": 0.0052, + "loss": 0.0063, "step": 2140 }, { "epoch": 0.68, "learning_rate": 7.2279104355990265e-06, - "loss": 0.0046, + "loss": 0.0048, "step": 2141 }, { "epoch": 0.68, "learning_rate": 7.214617058886073e-06, - "loss": 0.005, + "loss": 0.0039, "step": 2142 }, { "epoch": 0.68, "learning_rate": 7.201332046053492e-06, - "loss": 0.0053, + "loss": 0.0065, "step": 2143 }, { "epoch": 0.68, "learning_rate": 7.188055411373463e-06, - "loss": 0.0046, + "loss": 0.0048, "step": 2144 }, { "epoch": 0.69, "learning_rate": 7.174787169109135e-06, - "loss": 0.0076, + "loss": 0.0065, "step": 2145 }, { "epoch": 0.69, "learning_rate": 7.161527333514664e-06, - "loss": 0.0115, + "loss": 0.0099, "step": 2146 }, { "epoch": 0.69, "learning_rate": 7.148275918835162e-06, - "loss": 0.0059, + "loss": 0.0065, "step": 2147 }, { "epoch": 0.69, "learning_rate": 7.1350329393066925e-06, - "loss": 0.004, + "loss": 0.0042, "step": 2148 }, { "epoch": 0.69, "learning_rate": 7.121798409156267e-06, - "loss": 0.0025, + "loss": 0.0027, "step": 2149 }, { "epoch": 0.69, "learning_rate": 7.108572342601821e-06, - "loss": 0.0078, + "loss": 0.0051, "step": 2150 }, { "epoch": 0.69, "learning_rate": 7.095354753852181e-06, - "loss": 0.0057, + "loss": 0.0054, "step": 2151 }, { "epoch": 0.69, "learning_rate": 7.082145657107082e-06, - "loss": 0.0064, + "loss": 0.0065, "step": 2152 }, { "epoch": 0.69, "learning_rate": 7.068945066557135e-06, - "loss": 0.0065, + "loss": 0.0044, "step": 2153 }, { "epoch": 0.69, "learning_rate": 7.055752996383797e-06, - "loss": 0.0029, + "loss": 0.0026, "step": 2154 }, { "epoch": 0.69, "learning_rate": 7.0425694607593946e-06, - "loss": 0.0126, + "loss": 0.0106, "step": 2155 }, { "epoch": 0.69, "learning_rate": 7.029394473847063e-06, - "loss": 0.0059, + "loss": 0.0107, "step": 2156 }, { "epoch": 0.69, "learning_rate": 7.016228049800768e-06, - "loss": 0.003, + "loss": 0.0033, "step": 2157 }, { "epoch": 0.69, "learning_rate": 7.003070202765279e-06, - "loss": 0.011, + "loss": 0.0122, "step": 2158 }, { @@ -12989,67 +12989,67 @@ { "epoch": 0.69, "learning_rate": 6.976780296259665e-06, - "loss": 0.0082, + "loss": 0.0095, "step": 2160 }, { "epoch": 0.69, "learning_rate": 6.9636482650329316e-06, - "loss": 0.0082, + "loss": 0.0076, "step": 2161 }, { "epoch": 0.69, "learning_rate": 6.950524867303756e-06, - "loss": 0.0081, + "loss": 0.0078, "step": 2162 }, { "epoch": 0.69, "learning_rate": 6.937410117170683e-06, - "loss": 0.006, + "loss": 0.0066, "step": 2163 }, { "epoch": 0.69, "learning_rate": 6.924304028722949e-06, - "loss": 0.0062, + "loss": 0.0055, "step": 2164 }, { "epoch": 0.69, "learning_rate": 6.911206616040505e-06, - "loss": 0.0132, + "loss": 0.0142, "step": 2165 }, { "epoch": 0.69, "learning_rate": 6.89811789319398e-06, - "loss": 0.0049, + "loss": 0.0063, "step": 2166 }, { "epoch": 0.69, "learning_rate": 6.885037874244654e-06, - "loss": 0.0144, + "loss": 0.0148, "step": 2167 }, { "epoch": 0.69, "learning_rate": 6.871966573244469e-06, - "loss": 0.0032, + "loss": 0.0034, "step": 2168 }, { "epoch": 0.69, "learning_rate": 6.858904004235991e-06, - "loss": 0.0058, + "loss": 0.0065, "step": 2169 }, { "epoch": 0.69, "learning_rate": 6.84585018125242e-06, - "loss": 0.0028, + "loss": 0.0026, "step": 2170 }, { @@ -13061,169 +13061,169 @@ { "epoch": 0.69, "learning_rate": 6.819768829445746e-06, - "loss": 0.0053, + "loss": 0.0069, "step": 2172 }, { "epoch": 0.69, "learning_rate": 6.806741328641985e-06, - "loss": 0.0078, + "loss": 0.0049, "step": 2173 }, { "epoch": 0.69, "learning_rate": 6.793722629901785e-06, - "loss": 0.0063, + "loss": 0.0064, "step": 2174 }, { "epoch": 0.69, "learning_rate": 6.780712747211196e-06, - "loss": 0.0049, + "loss": 0.0044, "step": 2175 }, { "epoch": 0.69, "learning_rate": 6.767711694546816e-06, - "loss": 0.0114, + "loss": 0.0125, "step": 2176 }, { "epoch": 0.7, "learning_rate": 6.754719485875743e-06, - "loss": 0.0067, + "loss": 0.0068, "step": 2177 }, { "epoch": 0.7, "learning_rate": 6.741736135155584e-06, - "loss": 0.0064, + "loss": 0.0069, "step": 2178 }, { "epoch": 0.7, "learning_rate": 6.728761656334419e-06, - "loss": 0.0047, + "loss": 0.0048, "step": 2179 }, { "epoch": 0.7, "learning_rate": 6.715796063350809e-06, - "loss": 0.0044, + "loss": 0.0034, "step": 2180 }, { "epoch": 0.7, "learning_rate": 6.702839370133763e-06, - "loss": 0.0049, + "loss": 0.0051, "step": 2181 }, { "epoch": 0.7, "learning_rate": 6.689891590602722e-06, - "loss": 0.0084, + "loss": 0.0089, "step": 2182 }, { "epoch": 0.7, "learning_rate": 6.676952738667564e-06, - "loss": 0.0059, + "loss": 0.0107, "step": 2183 }, { "epoch": 0.7, "learning_rate": 6.664022828228573e-06, - "loss": 0.0074, + "loss": 0.0081, "step": 2184 }, { "epoch": 0.7, "learning_rate": 6.651101873176413e-06, - "loss": 0.01, + "loss": 0.0076, "step": 2185 }, { "epoch": 0.7, "learning_rate": 6.638189887392152e-06, - "loss": 0.0068, + "loss": 0.0059, "step": 2186 }, { "epoch": 0.7, "learning_rate": 6.625286884747197e-06, - "loss": 0.0079, + "loss": 0.0068, "step": 2187 }, { "epoch": 0.7, "learning_rate": 6.6123928791033165e-06, - "loss": 0.0078, + "loss": 0.0077, "step": 2188 }, { "epoch": 0.7, "learning_rate": 6.599507884312624e-06, - "loss": 0.0082, + "loss": 0.0147, "step": 2189 }, { "epoch": 0.7, "learning_rate": 6.586631914217527e-06, - "loss": 0.0087, + "loss": 0.0104, "step": 2190 }, { "epoch": 0.7, "learning_rate": 6.573764982650766e-06, - "loss": 0.0064, + "loss": 0.0076, "step": 2191 }, { "epoch": 0.7, "learning_rate": 6.560907103435343e-06, - "loss": 0.0093, + "loss": 0.0057, "step": 2192 }, { "epoch": 0.7, "learning_rate": 6.548058290384556e-06, - "loss": 0.0038, + "loss": 0.004, "step": 2193 }, { "epoch": 0.7, "learning_rate": 6.535218557301965e-06, - "loss": 0.0058, + "loss": 0.0065, "step": 2194 }, { "epoch": 0.7, "learning_rate": 6.522387917981352e-06, - "loss": 0.0069, + "loss": 0.0051, "step": 2195 }, { "epoch": 0.7, "learning_rate": 6.509566386206752e-06, - "loss": 0.0056, + "loss": 0.005, "step": 2196 }, { "epoch": 0.7, "learning_rate": 6.496753975752415e-06, - "loss": 0.0034, + "loss": 0.0054, "step": 2197 }, { "epoch": 0.7, "learning_rate": 6.48395070038277e-06, - "loss": 0.0057, + "loss": 0.0064, "step": 2198 }, { "epoch": 0.7, "learning_rate": 6.471156573852462e-06, - "loss": 0.0086, + "loss": 0.0075, "step": 2199 }, { @@ -13235,133 +13235,133 @@ { "epoch": 0.7, "learning_rate": 6.445595822279195e-06, - "loss": 0.0111, + "loss": 0.013, "step": 2201 }, { "epoch": 0.7, "learning_rate": 6.4328292246963e-06, - "loss": 0.0039, + "loss": 0.0054, "step": 2202 }, { "epoch": 0.7, "learning_rate": 6.420071830872816e-06, - "loss": 0.0112, + "loss": 0.0097, "step": 2203 }, { "epoch": 0.7, "learning_rate": 6.407323654514093e-06, - "loss": 0.0058, + "loss": 0.0061, "step": 2204 }, { "epoch": 0.7, "learning_rate": 6.394584709315559e-06, - "loss": 0.0069, + "loss": 0.0053, "step": 2205 }, { "epoch": 0.7, "learning_rate": 6.38185500896274e-06, - "loss": 0.0042, + "loss": 0.0043, "step": 2206 }, { "epoch": 0.7, "learning_rate": 6.369134567131226e-06, - "loss": 0.0045, + "loss": 0.0062, "step": 2207 }, { "epoch": 0.71, "learning_rate": 6.356423397486652e-06, - "loss": 0.0061, + "loss": 0.0058, "step": 2208 }, { "epoch": 0.71, "learning_rate": 6.343721513684701e-06, - "loss": 0.0056, + "loss": 0.0059, "step": 2209 }, { "epoch": 0.71, "learning_rate": 6.331028929371086e-06, - "loss": 0.0098, + "loss": 0.0106, "step": 2210 }, { "epoch": 0.71, "learning_rate": 6.318345658181506e-06, - "loss": 0.0107, + "loss": 0.0079, "step": 2211 }, { "epoch": 0.71, "learning_rate": 6.305671713741686e-06, - "loss": 0.0036, + "loss": 0.0042, "step": 2212 }, { "epoch": 0.71, "learning_rate": 6.293007109667301e-06, - "loss": 0.0109, + "loss": 0.0091, "step": 2213 }, { "epoch": 0.71, "learning_rate": 6.280351859564013e-06, - "loss": 0.0054, + "loss": 0.0058, "step": 2214 }, { "epoch": 0.71, "learning_rate": 6.267705977027432e-06, - "loss": 0.0051, + "loss": 0.0053, "step": 2215 }, { "epoch": 0.71, "learning_rate": 6.255069475643089e-06, - "loss": 0.0088, + "loss": 0.0071, "step": 2216 }, { "epoch": 0.71, "learning_rate": 6.242442368986462e-06, - "loss": 0.0064, + "loss": 0.0085, "step": 2217 }, { "epoch": 0.71, "learning_rate": 6.229824670622908e-06, - "loss": 0.0042, + "loss": 0.0038, "step": 2218 }, { "epoch": 0.71, "learning_rate": 6.217216394107697e-06, - "loss": 0.006, + "loss": 0.0053, "step": 2219 }, { "epoch": 0.71, "learning_rate": 6.2046175529859785e-06, - "loss": 0.0121, + "loss": 0.0098, "step": 2220 }, { "epoch": 0.71, "learning_rate": 6.192028160792746e-06, - "loss": 0.0037, + "loss": 0.0042, "step": 2221 }, { "epoch": 0.71, "learning_rate": 6.1794482310528614e-06, - "loss": 0.0045, + "loss": 0.0047, "step": 2222 }, { @@ -13373,55 +13373,55 @@ { "epoch": 0.71, "learning_rate": 6.154316812981694e-06, - "loss": 0.0069, + "loss": 0.0039, "step": 2224 }, { "epoch": 0.71, "learning_rate": 6.141765351649245e-06, - "loss": 0.0058, + "loss": 0.0085, "step": 2225 }, { "epoch": 0.71, "learning_rate": 6.12922340676775e-06, - "loss": 0.0098, + "loss": 0.0092, "step": 2226 }, { "epoch": 0.71, "learning_rate": 6.1166909918111055e-06, - "loss": 0.0055, + "loss": 0.0059, "step": 2227 }, { "epoch": 0.71, "learning_rate": 6.104168120242943e-06, - "loss": 0.0151, + "loss": 0.014, "step": 2228 }, { "epoch": 0.71, "learning_rate": 6.091654805516661e-06, - "loss": 0.0117, + "loss": 0.0124, "step": 2229 }, { "epoch": 0.71, "learning_rate": 6.079151061075382e-06, - "loss": 0.0038, + "loss": 0.0132, "step": 2230 }, { "epoch": 0.71, "learning_rate": 6.06665690035194e-06, - "loss": 0.0017, + "loss": 0.0021, "step": 2231 }, { "epoch": 0.71, "learning_rate": 6.05417233676889e-06, - "loss": 0.0036, + "loss": 0.0038, "step": 2232 }, { @@ -13433,67 +13433,67 @@ { "epoch": 0.71, "learning_rate": 6.0292320546625875e-06, - "loss": 0.0057, + "loss": 0.005, "step": 2234 }, { "epoch": 0.71, "learning_rate": 6.016776362932819e-06, - "loss": 0.0048, + "loss": 0.0042, "step": 2235 }, { "epoch": 0.71, "learning_rate": 6.0043303219303755e-06, - "loss": 0.0082, + "loss": 0.0065, "step": 2236 }, { "epoch": 0.71, "learning_rate": 5.9918939450261154e-06, - "loss": 0.004, + "loss": 0.0045, "step": 2237 }, { "epoch": 0.71, "learning_rate": 5.9794672455805095e-06, - "loss": 0.0079, + "loss": 0.0074, "step": 2238 }, { "epoch": 0.72, "learning_rate": 5.967050236943624e-06, - "loss": 0.0065, + "loss": 0.0059, "step": 2239 }, { "epoch": 0.72, "learning_rate": 5.954642932455122e-06, - "loss": 0.0045, + "loss": 0.0049, "step": 2240 }, { "epoch": 0.72, "learning_rate": 5.942245345444248e-06, - "loss": 0.0064, + "loss": 0.0078, "step": 2241 }, { "epoch": 0.72, "learning_rate": 5.929857489229788e-06, - "loss": 0.0064, + "loss": 0.0056, "step": 2242 }, { "epoch": 0.72, "learning_rate": 5.9174793771200954e-06, - "loss": 0.0045, + "loss": 0.0039, "step": 2243 }, { "epoch": 0.72, "learning_rate": 5.9051110224130336e-06, - "loss": 0.0071, + "loss": 0.0051, "step": 2244 }, { @@ -13505,61 +13505,61 @@ { "epoch": 0.72, "learning_rate": 5.880403638345901e-06, - "loss": 0.0089, + "loss": 0.0072, "step": 2246 }, { "epoch": 0.72, "learning_rate": 5.868064635529105e-06, - "loss": 0.0096, + "loss": 0.0101, "step": 2247 }, { "epoch": 0.72, "learning_rate": 5.855735443201485e-06, - "loss": 0.0077, + "loss": 0.0092, "step": 2248 }, { "epoch": 0.72, "learning_rate": 5.843416074608348e-06, - "loss": 0.01, + "loss": 0.0105, "step": 2249 }, { "epoch": 0.72, "learning_rate": 5.831106542984469e-06, - "loss": 0.0116, + "loss": 0.0107, "step": 2250 }, { "epoch": 0.72, "learning_rate": 5.8188068615540505e-06, - "loss": 0.0039, + "loss": 0.0048, "step": 2251 }, { "epoch": 0.72, "learning_rate": 5.806517043530697e-06, - "loss": 0.0065, + "loss": 0.005, "step": 2252 }, { "epoch": 0.72, "learning_rate": 5.794237102117442e-06, - "loss": 0.0062, + "loss": 0.007, "step": 2253 }, { "epoch": 0.72, "learning_rate": 5.781967050506683e-06, - "loss": 0.0057, + "loss": 0.0077, "step": 2254 }, { "epoch": 0.72, "learning_rate": 5.769706901880214e-06, - "loss": 0.0074, + "loss": 0.008, "step": 2255 }, { @@ -13571,295 +13571,295 @@ { "epoch": 0.72, "learning_rate": 5.745216366254065e-06, - "loss": 0.0072, + "loss": 0.0068, "step": 2257 }, { "epoch": 0.72, "learning_rate": 5.732986005564702e-06, - "loss": 0.0078, + "loss": 0.0086, "step": 2258 }, { "epoch": 0.72, "learning_rate": 5.720765600480241e-06, - "loss": 0.0048, + "loss": 0.0042, "step": 2259 }, { "epoch": 0.72, "learning_rate": 5.70855516412912e-06, - "loss": 0.0073, + "loss": 0.0055, "step": 2260 }, { "epoch": 0.72, "learning_rate": 5.6963547096290915e-06, - "loss": 0.0072, + "loss": 0.0073, "step": 2261 }, { "epoch": 0.72, "learning_rate": 5.68416425008716e-06, - "loss": 0.0034, + "loss": 0.0032, "step": 2262 }, { "epoch": 0.72, "learning_rate": 5.671983798599609e-06, - "loss": 0.0077, + "loss": 0.0083, "step": 2263 }, { "epoch": 0.72, "learning_rate": 5.65981336825197e-06, - "loss": 0.0066, + "loss": 0.0059, "step": 2264 }, { "epoch": 0.72, "learning_rate": 5.647652972118998e-06, - "loss": 0.0047, + "loss": 0.0049, "step": 2265 }, { "epoch": 0.72, "learning_rate": 5.635502623264681e-06, - "loss": 0.0071, + "loss": 0.0091, "step": 2266 }, { "epoch": 0.72, "learning_rate": 5.623362334742198e-06, - "loss": 0.0069, + "loss": 0.0072, "step": 2267 }, { "epoch": 0.72, "learning_rate": 5.611232119593934e-06, - "loss": 0.0049, + "loss": 0.0046, "step": 2268 }, { "epoch": 0.72, "learning_rate": 5.59911199085145e-06, - "loss": 0.01, + "loss": 0.0087, "step": 2269 }, { "epoch": 0.73, "learning_rate": 5.58700196153546e-06, - "loss": 0.0069, + "loss": 0.0061, "step": 2270 }, { "epoch": 0.73, "learning_rate": 5.5749020446558415e-06, - "loss": 0.0073, + "loss": 0.0064, "step": 2271 }, { "epoch": 0.73, "learning_rate": 5.562812253211606e-06, - "loss": 0.0055, + "loss": 0.0065, "step": 2272 }, { "epoch": 0.73, "learning_rate": 5.5507326001908735e-06, - "loss": 0.0053, + "loss": 0.0056, "step": 2273 }, { "epoch": 0.73, "learning_rate": 5.538663098570895e-06, - "loss": 0.0041, + "loss": 0.003, "step": 2274 }, { "epoch": 0.73, "learning_rate": 5.526603761317994e-06, - "loss": 0.0085, + "loss": 0.0098, "step": 2275 }, { "epoch": 0.73, "learning_rate": 5.514554601387585e-06, - "loss": 0.0043, + "loss": 0.0027, "step": 2276 }, { "epoch": 0.73, "learning_rate": 5.5025156317241555e-06, - "loss": 0.0106, + "loss": 0.0093, "step": 2277 }, { "epoch": 0.73, "learning_rate": 5.490486865261229e-06, - "loss": 0.0095, + "loss": 0.0098, "step": 2278 }, { "epoch": 0.73, "learning_rate": 5.478468314921384e-06, - "loss": 0.0074, + "loss": 0.0089, "step": 2279 }, { "epoch": 0.73, "learning_rate": 5.4664599936162064e-06, - "loss": 0.0106, + "loss": 0.011, "step": 2280 }, { "epoch": 0.73, "learning_rate": 5.454461914246316e-06, - "loss": 0.0115, + "loss": 0.0107, "step": 2281 }, { "epoch": 0.73, "learning_rate": 5.442474089701302e-06, - "loss": 0.0076, + "loss": 0.0081, "step": 2282 }, { "epoch": 0.73, "learning_rate": 5.430496532859765e-06, - "loss": 0.0068, + "loss": 0.0069, "step": 2283 }, { "epoch": 0.73, "learning_rate": 5.418529256589249e-06, - "loss": 0.0032, + "loss": 0.0043, "step": 2284 }, { "epoch": 0.73, "learning_rate": 5.406572273746277e-06, - "loss": 0.0114, + "loss": 0.0101, "step": 2285 }, { "epoch": 0.73, "learning_rate": 5.394625597176292e-06, - "loss": 0.0085, + "loss": 0.0093, "step": 2286 }, { "epoch": 0.73, "learning_rate": 5.382689239713684e-06, - "loss": 0.0068, + "loss": 0.0044, "step": 2287 }, { "epoch": 0.73, "learning_rate": 5.37076321418174e-06, - "loss": 0.0081, + "loss": 0.0086, "step": 2288 }, { "epoch": 0.73, "learning_rate": 5.358847533392661e-06, - "loss": 0.008, + "loss": 0.0083, "step": 2289 }, { "epoch": 0.73, "learning_rate": 5.346942210147535e-06, - "loss": 0.0037, + "loss": 0.0039, "step": 2290 }, { "epoch": 0.73, "learning_rate": 5.3350472572363065e-06, - "loss": 0.0039, + "loss": 0.0046, "step": 2291 }, { "epoch": 0.73, "learning_rate": 5.323162687437798e-06, - "loss": 0.0101, + "loss": 0.01, "step": 2292 }, { "epoch": 0.73, "learning_rate": 5.311288513519661e-06, - "loss": 0.0085, + "loss": 0.0065, "step": 2293 }, { "epoch": 0.73, "learning_rate": 5.299424748238392e-06, - "loss": 0.0072, + "loss": 0.0126, "step": 2294 }, { "epoch": 0.73, "learning_rate": 5.287571404339307e-06, - "loss": 0.0033, + "loss": 0.0037, "step": 2295 }, { "epoch": 0.73, "learning_rate": 5.275728494556506e-06, - "loss": 0.0075, + "loss": 0.0089, "step": 2296 }, { "epoch": 0.73, "learning_rate": 5.263896031612904e-06, - "loss": 0.0045, + "loss": 0.005, "step": 2297 }, { "epoch": 0.73, "learning_rate": 5.252074028220174e-06, - "loss": 0.0041, + "loss": 0.0051, "step": 2298 }, { "epoch": 0.73, "learning_rate": 5.2402624970787625e-06, - "loss": 0.006, + "loss": 0.0059, "step": 2299 }, { "epoch": 0.73, "learning_rate": 5.2284614508778714e-06, - "loss": 0.0058, + "loss": 0.0053, "step": 2300 }, { "epoch": 0.73, "learning_rate": 5.216670902295419e-06, - "loss": 0.0058, + "loss": 0.0061, "step": 2301 }, { "epoch": 0.74, "learning_rate": 5.204890863998063e-06, - "loss": 0.0048, + "loss": 0.0051, "step": 2302 }, { "epoch": 0.74, "learning_rate": 5.193121348641167e-06, - "loss": 0.0081, + "loss": 0.0083, "step": 2303 }, { "epoch": 0.74, "learning_rate": 5.1813623688687804e-06, - "loss": 0.0077, + "loss": 0.0072, "step": 2304 }, { "epoch": 0.74, "learning_rate": 5.169613937313649e-06, - "loss": 0.0083, + "loss": 0.0065, "step": 2305 }, { @@ -13871,169 +13871,169 @@ { "epoch": 0.74, "learning_rate": 5.146148769329405e-06, - "loss": 0.0048, + "loss": 0.0047, "step": 2307 }, { "epoch": 0.74, "learning_rate": 5.134432058109064e-06, - "loss": 0.006, + "loss": 0.0045, "step": 2308 }, { "epoch": 0.74, "learning_rate": 5.122725945523465e-06, - "loss": 0.0096, + "loss": 0.0108, "step": 2309 }, { "epoch": 0.74, "learning_rate": 5.111030444148556e-06, - "loss": 0.0083, + "loss": 0.0073, "step": 2310 }, { "epoch": 0.74, "learning_rate": 5.099345566548874e-06, - "loss": 0.0082, + "loss": 0.0088, "step": 2311 }, { "epoch": 0.74, "learning_rate": 5.087671325277556e-06, - "loss": 0.0128, + "loss": 0.0088, "step": 2312 }, { "epoch": 0.74, "learning_rate": 5.0760077328763e-06, - "loss": 0.0072, + "loss": 0.0067, "step": 2313 }, { "epoch": 0.74, "learning_rate": 5.064354801875364e-06, - "loss": 0.0049, + "loss": 0.0052, "step": 2314 }, { "epoch": 0.74, "learning_rate": 5.05271254479356e-06, - "loss": 0.0053, + "loss": 0.0051, "step": 2315 }, { "epoch": 0.74, "learning_rate": 5.041080974138237e-06, - "loss": 0.0144, + "loss": 0.0138, "step": 2316 }, { "epoch": 0.74, "learning_rate": 5.029460102405248e-06, - "loss": 0.0065, + "loss": 0.0067, "step": 2317 }, { "epoch": 0.74, "learning_rate": 5.017849942078964e-06, - "loss": 0.0076, + "loss": 0.0068, "step": 2318 }, { "epoch": 0.74, "learning_rate": 5.0062505056322425e-06, - "loss": 0.0147, + "loss": 0.0146, "step": 2319 }, { "epoch": 0.74, "learning_rate": 4.994661805526423e-06, - "loss": 0.003, + "loss": 0.0036, "step": 2320 }, { "epoch": 0.74, "learning_rate": 4.983083854211317e-06, - "loss": 0.0089, + "loss": 0.0079, "step": 2321 }, { "epoch": 0.74, "learning_rate": 4.971516664125174e-06, - "loss": 0.0058, + "loss": 0.0051, "step": 2322 }, { "epoch": 0.74, "learning_rate": 4.959960247694698e-06, - "loss": 0.0064, + "loss": 0.0066, "step": 2323 }, { "epoch": 0.74, "learning_rate": 4.9484146173350014e-06, - "loss": 0.008, + "loss": 0.0077, "step": 2324 }, { "epoch": 0.74, "learning_rate": 4.936879785449629e-06, - "loss": 0.0054, + "loss": 0.0053, "step": 2325 }, { "epoch": 0.74, "learning_rate": 4.925355764430516e-06, - "loss": 0.0052, + "loss": 0.0061, "step": 2326 }, { "epoch": 0.74, "learning_rate": 4.913842566657974e-06, - "loss": 0.0053, + "loss": 0.0049, "step": 2327 }, { "epoch": 0.74, "learning_rate": 4.902340204500705e-06, - "loss": 0.0082, + "loss": 0.0079, "step": 2328 }, { "epoch": 0.74, "learning_rate": 4.890848690315752e-06, - "loss": 0.0124, + "loss": 0.0091, "step": 2329 }, { "epoch": 0.74, "learning_rate": 4.879368036448517e-06, - "loss": 0.0088, + "loss": 0.01, "step": 2330 }, { "epoch": 0.74, "learning_rate": 4.8678982552327356e-06, - "loss": 0.0043, + "loss": 0.005, "step": 2331 }, { "epoch": 0.74, "learning_rate": 4.856439358990448e-06, - "loss": 0.0075, + "loss": 0.0078, "step": 2332 }, { "epoch": 0.75, "learning_rate": 4.844991360032016e-06, - "loss": 0.0081, + "loss": 0.0084, "step": 2333 }, { "epoch": 0.75, "learning_rate": 4.833554270656093e-06, - "loss": 0.0057, + "loss": 0.0044, "step": 2334 }, { @@ -14045,73 +14045,73 @@ { "epoch": 0.75, "learning_rate": 4.8107128697877385e-06, - "loss": 0.0091, + "loss": 0.0078, "step": 2336 }, { "epoch": 0.75, "learning_rate": 4.79930858283395e-06, - "loss": 0.008, + "loss": 0.0081, "step": 2337 }, { "epoch": 0.75, "learning_rate": 4.787915254539933e-06, - "loss": 0.0055, + "loss": 0.0056, "step": 2338 }, { "epoch": 0.75, "learning_rate": 4.776532897145597e-06, - "loss": 0.0104, + "loss": 0.0083, "step": 2339 }, { "epoch": 0.75, "learning_rate": 4.765161522879077e-06, - "loss": 0.005, + "loss": 0.0053, "step": 2340 }, { "epoch": 0.75, "learning_rate": 4.753801143956697e-06, - "loss": 0.007, + "loss": 0.0076, "step": 2341 }, { "epoch": 0.75, "learning_rate": 4.742451772582984e-06, - "loss": 0.0055, + "loss": 0.0081, "step": 2342 }, { "epoch": 0.75, "learning_rate": 4.731113420950624e-06, - "loss": 0.0037, + "loss": 0.004, "step": 2343 }, { "epoch": 0.75, "learning_rate": 4.719786101240477e-06, - "loss": 0.0041, + "loss": 0.0043, "step": 2344 }, { "epoch": 0.75, "learning_rate": 4.708469825621538e-06, - "loss": 0.0069, + "loss": 0.0061, "step": 2345 }, { "epoch": 0.75, "learning_rate": 4.697164606250952e-06, - "loss": 0.0061, + "loss": 0.0058, "step": 2346 }, { "epoch": 0.75, "learning_rate": 4.6858704552739825e-06, - "loss": 0.0072, + "loss": 0.0058, "step": 2347 }, { @@ -14123,231 +14123,231 @@ { "epoch": 0.75, "learning_rate": 4.663315407022446e-06, - "loss": 0.0047, + "loss": 0.0077, "step": 2349 }, { "epoch": 0.75, - "eval_loss": 0.008719976991415024, - "eval_runtime": 28.1817, - "eval_samples_per_second": 187.108, - "eval_steps_per_second": 5.855, + "eval_loss": 0.008244536817073822, + "eval_runtime": 28.4604, + "eval_samples_per_second": 185.275, + "eval_steps_per_second": 11.595, "step": 2349 }, { "epoch": 0.75, "learning_rate": 4.652054533978895e-06, - "loss": 0.0049, + "loss": 0.0054, "step": 2350 }, { "epoch": 0.75, "learning_rate": 4.640804777790957e-06, - "loss": 0.0046, + "loss": 0.0042, "step": 2351 }, { "epoch": 0.75, "learning_rate": 4.629566150544315e-06, - "loss": 0.0093, + "loss": 0.007, "step": 2352 }, { "epoch": 0.75, "learning_rate": 4.618338664312678e-06, - "loss": 0.0054, + "loss": 0.007, "step": 2353 }, { "epoch": 0.75, "learning_rate": 4.607122331157806e-06, - "loss": 0.0047, + "loss": 0.0044, "step": 2354 }, { "epoch": 0.75, "learning_rate": 4.5959171631294605e-06, - "loss": 0.0084, + "loss": 0.008, "step": 2355 }, { "epoch": 0.75, "learning_rate": 4.584723172265421e-06, - "loss": 0.0049, + "loss": 0.0059, "step": 2356 }, { "epoch": 0.75, "learning_rate": 4.5735403705914594e-06, - "loss": 0.0071, + "loss": 0.0066, "step": 2357 }, { "epoch": 0.75, "learning_rate": 4.562368770121308e-06, - "loss": 0.0078, + "loss": 0.0055, "step": 2358 }, { "epoch": 0.75, "learning_rate": 4.551208382856692e-06, - "loss": 0.0083, + "loss": 0.0084, "step": 2359 }, { "epoch": 0.75, "learning_rate": 4.5400592207872754e-06, - "loss": 0.0072, + "loss": 0.0069, "step": 2360 }, { "epoch": 0.75, "learning_rate": 4.528921295890659e-06, - "loss": 0.0044, + "loss": 0.0047, "step": 2361 }, { "epoch": 0.75, "learning_rate": 4.517794620132389e-06, - "loss": 0.0096, + "loss": 0.0081, "step": 2362 }, { "epoch": 0.75, "learning_rate": 4.506679205465903e-06, - "loss": 0.0067, + "loss": 0.0065, "step": 2363 }, { "epoch": 0.76, "learning_rate": 4.495575063832563e-06, - "loss": 0.0031, + "loss": 0.0056, "step": 2364 }, { "epoch": 0.76, "learning_rate": 4.4844822071616015e-06, - "loss": 0.0069, + "loss": 0.0075, "step": 2365 }, { "epoch": 0.76, "learning_rate": 4.4734006473701455e-06, - "loss": 0.013, + "loss": 0.0179, "step": 2366 }, { "epoch": 0.76, "learning_rate": 4.462330396363166e-06, - "loss": 0.008, + "loss": 0.0075, "step": 2367 }, { "epoch": 0.76, "learning_rate": 4.451271466033508e-06, - "loss": 0.0125, + "loss": 0.0116, "step": 2368 }, { "epoch": 0.76, "learning_rate": 4.440223868261831e-06, - "loss": 0.0095, + "loss": 0.0096, "step": 2369 }, { "epoch": 0.76, "learning_rate": 4.42918761491664e-06, - "loss": 0.0043, + "loss": 0.0066, "step": 2370 }, { "epoch": 0.76, "learning_rate": 4.418162717854234e-06, - "loss": 0.0043, + "loss": 0.0038, "step": 2371 }, { "epoch": 0.76, "learning_rate": 4.4071491889187316e-06, - "loss": 0.0119, + "loss": 0.0109, "step": 2372 }, { "epoch": 0.76, "learning_rate": 4.3961470399420194e-06, - "loss": 0.008, + "loss": 0.0086, "step": 2373 }, { "epoch": 0.76, "learning_rate": 4.3851562827437716e-06, - "loss": 0.0058, + "loss": 0.0061, "step": 2374 }, { "epoch": 0.76, "learning_rate": 4.374176929131424e-06, - "loss": 0.0076, + "loss": 0.0078, "step": 2375 }, { "epoch": 0.76, "learning_rate": 4.36320899090015e-06, - "loss": 0.005, + "loss": 0.0052, "step": 2376 }, { "epoch": 0.76, "learning_rate": 4.35225247983287e-06, - "loss": 0.0071, + "loss": 0.0066, "step": 2377 }, { "epoch": 0.76, "learning_rate": 4.34130740770023e-06, - "loss": 0.0066, + "loss": 0.0071, "step": 2378 }, { "epoch": 0.76, "learning_rate": 4.330373786260573e-06, - "loss": 0.007, + "loss": 0.0064, "step": 2379 }, { "epoch": 0.76, "learning_rate": 4.3194516272599585e-06, - "loss": 0.0081, + "loss": 0.0069, "step": 2380 }, { "epoch": 0.76, "learning_rate": 4.308540942432115e-06, - "loss": 0.0067, + "loss": 0.0074, "step": 2381 }, { "epoch": 0.76, "learning_rate": 4.2976417434984565e-06, - "loss": 0.0039, + "loss": 0.0041, "step": 2382 }, { "epoch": 0.76, "learning_rate": 4.2867540421680566e-06, - "loss": 0.0063, + "loss": 0.0064, "step": 2383 }, { "epoch": 0.76, "learning_rate": 4.275877850137624e-06, - "loss": 0.0064, + "loss": 0.0066, "step": 2384 }, { "epoch": 0.76, "learning_rate": 4.265013179091525e-06, - "loss": 0.0053, + "loss": 0.005, "step": 2385 }, { @@ -14359,19 +14359,19 @@ { "epoch": 0.76, "learning_rate": 4.243318446627813e-06, - "loss": 0.006, + "loss": 0.0051, "step": 2387 }, { "epoch": 0.76, "learning_rate": 4.2324884085169815e-06, - "loss": 0.0025, + "loss": 0.0024, "step": 2388 }, { "epoch": 0.76, "learning_rate": 4.221669938003995e-06, - "loss": 0.0039, + "loss": 0.0048, "step": 2389 }, { @@ -14383,13 +14383,13 @@ { "epoch": 0.76, "learning_rate": 4.200067746248495e-06, - "loss": 0.0027, + "loss": 0.0025, "step": 2391 }, { "epoch": 0.76, "learning_rate": 4.189284048213336e-06, - "loss": 0.0048, + "loss": 0.0057, "step": 2392 }, { @@ -14401,319 +14401,319 @@ { "epoch": 0.76, "learning_rate": 4.16775150575313e-06, - "loss": 0.008, + "loss": 0.0075, "step": 2394 }, { "epoch": 0.76, "learning_rate": 4.157002684460606e-06, - "loss": 0.012, + "loss": 0.0121, "step": 2395 }, { "epoch": 0.77, "learning_rate": 4.146265511860669e-06, - "loss": 0.0079, + "loss": 0.0081, "step": 2396 }, { "epoch": 0.77, "learning_rate": 4.135539999488312e-06, - "loss": 0.0099, + "loss": 0.0083, "step": 2397 }, { "epoch": 0.77, "learning_rate": 4.124826158866024e-06, - "loss": 0.0032, + "loss": 0.003, "step": 2398 }, { "epoch": 0.77, "learning_rate": 4.114124001503733e-06, - "loss": 0.0036, + "loss": 0.0034, "step": 2399 }, { "epoch": 0.77, "learning_rate": 4.103433538898834e-06, - "loss": 0.0046, + "loss": 0.0044, "step": 2400 }, { "epoch": 0.77, "learning_rate": 4.092754782536151e-06, - "loss": 0.0071, + "loss": 0.0064, "step": 2401 }, { "epoch": 0.77, "learning_rate": 4.0820877438879255e-06, - "loss": 0.0079, + "loss": 0.0082, "step": 2402 }, { "epoch": 0.77, "learning_rate": 4.071432434413826e-06, - "loss": 0.0044, + "loss": 0.0042, "step": 2403 }, { "epoch": 0.77, "learning_rate": 4.060788865560899e-06, - "loss": 0.0072, + "loss": 0.0074, "step": 2404 }, { "epoch": 0.77, "learning_rate": 4.0501570487635975e-06, - "loss": 0.0048, + "loss": 0.0043, "step": 2405 }, { "epoch": 0.77, "learning_rate": 4.039536995443745e-06, - "loss": 0.0114, + "loss": 0.0151, "step": 2406 }, { "epoch": 0.77, "learning_rate": 4.028928717010515e-06, - "loss": 0.0057, + "loss": 0.0041, "step": 2407 }, { "epoch": 0.77, "learning_rate": 4.018332224860446e-06, - "loss": 0.0045, + "loss": 0.0048, "step": 2408 }, { "epoch": 0.77, "learning_rate": 4.00774753037741e-06, - "loss": 0.0043, + "loss": 0.0034, "step": 2409 }, { "epoch": 0.77, "learning_rate": 3.997174644932599e-06, - "loss": 0.008, + "loss": 0.0074, "step": 2410 }, { "epoch": 0.77, "learning_rate": 3.986613579884528e-06, - "loss": 0.0067, + "loss": 0.01, "step": 2411 }, { "epoch": 0.77, "learning_rate": 3.976064346579e-06, - "loss": 0.0024, + "loss": 0.0028, "step": 2412 }, { "epoch": 0.77, "learning_rate": 3.965526956349123e-06, - "loss": 0.0062, + "loss": 0.0071, "step": 2413 }, { "epoch": 0.77, "learning_rate": 3.955001420515273e-06, - "loss": 0.008, + "loss": 0.0071, "step": 2414 }, { "epoch": 0.77, "learning_rate": 3.944487750385088e-06, - "loss": 0.0077, + "loss": 0.0061, "step": 2415 }, { "epoch": 0.77, "learning_rate": 3.933985957253469e-06, - "loss": 0.0047, + "loss": 0.0059, "step": 2416 }, { "epoch": 0.77, "learning_rate": 3.923496052402542e-06, - "loss": 0.0063, + "loss": 0.0057, "step": 2417 }, { "epoch": 0.77, "learning_rate": 3.913018047101675e-06, - "loss": 0.0055, + "loss": 0.0052, "step": 2418 }, { "epoch": 0.77, "learning_rate": 3.902551952607455e-06, - "loss": 0.0069, + "loss": 0.0077, "step": 2419 }, { "epoch": 0.77, "learning_rate": 3.892097780163652e-06, - "loss": 0.0105, + "loss": 0.0097, "step": 2420 }, { "epoch": 0.77, "learning_rate": 3.8816555410012545e-06, - "loss": 0.0105, + "loss": 0.0124, "step": 2421 }, { "epoch": 0.77, "learning_rate": 3.871225246338408e-06, - "loss": 0.0052, + "loss": 0.0049, "step": 2422 }, { "epoch": 0.77, "learning_rate": 3.860806907380444e-06, - "loss": 0.0071, + "loss": 0.0058, "step": 2423 }, { "epoch": 0.77, "learning_rate": 3.850400535319835e-06, - "loss": 0.0087, + "loss": 0.0097, "step": 2424 }, { "epoch": 0.77, "learning_rate": 3.840006141336215e-06, - "loss": 0.0062, + "loss": 0.008, "step": 2425 }, { "epoch": 0.77, "learning_rate": 3.8296237365963276e-06, - "loss": 0.0045, + "loss": 0.0043, "step": 2426 }, { "epoch": 0.78, "learning_rate": 3.819253332254059e-06, - "loss": 0.0187, + "loss": 0.012, "step": 2427 }, { "epoch": 0.78, "learning_rate": 3.808894939450382e-06, - "loss": 0.0046, + "loss": 0.0044, "step": 2428 }, { "epoch": 0.78, "learning_rate": 3.7985485693133844e-06, - "loss": 0.0059, + "loss": 0.0065, "step": 2429 }, { "epoch": 0.78, "learning_rate": 3.788214232958224e-06, - "loss": 0.0082, + "loss": 0.0083, "step": 2430 }, { "epoch": 0.78, "learning_rate": 3.7778919414871377e-06, - "loss": 0.0074, + "loss": 0.0055, "step": 2431 }, { "epoch": 0.78, "learning_rate": 3.7675817059894257e-06, - "loss": 0.0081, + "loss": 0.0085, "step": 2432 }, { "epoch": 0.78, "learning_rate": 3.7572835375414234e-06, - "loss": 0.0129, + "loss": 0.013, "step": 2433 }, { "epoch": 0.78, "learning_rate": 3.746997447206514e-06, - "loss": 0.0112, + "loss": 0.0094, "step": 2434 }, { "epoch": 0.78, "learning_rate": 3.736723446035108e-06, - "loss": 0.0085, + "loss": 0.0081, "step": 2435 }, { "epoch": 0.78, "learning_rate": 3.7264615450646116e-06, - "loss": 0.0065, + "loss": 0.0051, "step": 2436 }, { "epoch": 0.78, "learning_rate": 3.7162117553194513e-06, - "loss": 0.0055, + "loss": 0.0064, "step": 2437 }, { "epoch": 0.78, "learning_rate": 3.7059740878110255e-06, - "loss": 0.0083, + "loss": 0.0086, "step": 2438 }, { "epoch": 0.78, "learning_rate": 3.6957485535377226e-06, - "loss": 0.0069, + "loss": 0.008, "step": 2439 }, { "epoch": 0.78, "learning_rate": 3.6855351634848928e-06, - "loss": 0.0103, + "loss": 0.0092, "step": 2440 }, { "epoch": 0.78, "learning_rate": 3.6753339286248334e-06, - "loss": 0.0076, + "loss": 0.007, "step": 2441 }, { "epoch": 0.78, "learning_rate": 3.665144859916793e-06, - "loss": 0.0055, + "loss": 0.0067, "step": 2442 }, { "epoch": 0.78, "learning_rate": 3.6549679683069393e-06, - "loss": 0.006, + "loss": 0.0052, "step": 2443 }, { "epoch": 0.78, "learning_rate": 3.6448032647283647e-06, - "loss": 0.0038, + "loss": 0.0039, "step": 2444 }, { "epoch": 0.78, "learning_rate": 3.634650760101076e-06, - "loss": 0.0029, + "loss": 0.0041, "step": 2445 }, { "epoch": 0.78, "learning_rate": 3.6245104653319517e-06, - "loss": 0.004, + "loss": 0.0073, "step": 2446 }, { @@ -14725,229 +14725,229 @@ { "epoch": 0.78, "learning_rate": 3.6042665489301956e-06, - "loss": 0.0075, + "loss": 0.0084, "step": 2448 }, { "epoch": 0.78, "learning_rate": 3.5941629490457173e-06, - "loss": 0.0055, + "loss": 0.007, "step": 2449 }, { "epoch": 0.78, "learning_rate": 3.584071602515685e-06, - "loss": 0.0023, + "loss": 0.0029, "step": 2450 }, { "epoch": 0.78, "learning_rate": 3.573992520181302e-06, - "loss": 0.0068, + "loss": 0.0069, "step": 2451 }, { "epoch": 0.78, "learning_rate": 3.5639257128705694e-06, - "loss": 0.0057, + "loss": 0.007, "step": 2452 }, { "epoch": 0.78, "learning_rate": 3.5538711913983284e-06, - "loss": 0.0109, + "loss": 0.0083, "step": 2453 }, { "epoch": 0.78, "learning_rate": 3.5438289665661955e-06, - "loss": 0.0112, + "loss": 0.0115, "step": 2454 }, { "epoch": 0.78, "learning_rate": 3.533799049162597e-06, - "loss": 0.0086, + "loss": 0.0089, "step": 2455 }, { "epoch": 0.78, "learning_rate": 3.523781449962724e-06, - "loss": 0.0049, + "loss": 0.0051, "step": 2456 }, { "epoch": 0.78, "learning_rate": 3.5137761797285396e-06, - "loss": 0.0076, + "loss": 0.0066, "step": 2457 }, { "epoch": 0.79, "learning_rate": 3.5037832492087686e-06, - "loss": 0.0065, + "loss": 0.0076, "step": 2458 }, { "epoch": 0.79, "learning_rate": 3.4938026691388636e-06, - "loss": 0.0037, + "loss": 0.0033, "step": 2459 }, { "epoch": 0.79, "learning_rate": 3.483834450241023e-06, - "loss": 0.0083, + "loss": 0.0082, "step": 2460 }, { "epoch": 0.79, "learning_rate": 3.4738786032241553e-06, - "loss": 0.0079, + "loss": 0.0068, "step": 2461 }, { "epoch": 0.79, "learning_rate": 3.463935138783885e-06, - "loss": 0.0073, + "loss": 0.0062, "step": 2462 }, { "epoch": 0.79, "learning_rate": 3.4540040676025343e-06, - "loss": 0.0063, + "loss": 0.0057, "step": 2463 }, { "epoch": 0.79, "learning_rate": 3.444085400349103e-06, - "loss": 0.0071, + "loss": 0.0051, "step": 2464 }, { "epoch": 0.79, "learning_rate": 3.434179147679273e-06, - "loss": 0.0075, + "loss": 0.0074, "step": 2465 }, { "epoch": 0.79, "learning_rate": 3.4242853202353922e-06, - "loss": 0.0046, + "loss": 0.005, "step": 2466 }, { "epoch": 0.79, "learning_rate": 3.4144039286464447e-06, - "loss": 0.009, + "loss": 0.0093, "step": 2467 }, { "epoch": 0.79, "learning_rate": 3.4045349835280744e-06, - "loss": 0.0055, + "loss": 0.0059, "step": 2468 }, { "epoch": 0.79, "learning_rate": 3.3946784954825355e-06, - "loss": 0.0119, + "loss": 0.0138, "step": 2469 }, { "epoch": 0.79, "learning_rate": 3.3848344750987115e-06, - "loss": 0.0053, + "loss": 0.0046, "step": 2470 }, { "epoch": 0.79, "learning_rate": 3.375002932952094e-06, - "loss": 0.0054, + "loss": 0.0066, "step": 2471 }, { "epoch": 0.79, "learning_rate": 3.3651838796047553e-06, - "loss": 0.0102, + "loss": 0.0076, "step": 2472 }, { "epoch": 0.79, "learning_rate": 3.355377325605366e-06, - "loss": 0.0099, + "loss": 0.0062, "step": 2473 }, { "epoch": 0.79, "learning_rate": 3.3455832814891548e-06, - "loss": 0.0108, + "loss": 0.0079, "step": 2474 }, { "epoch": 0.79, "learning_rate": 3.3358017577779225e-06, - "loss": 0.0058, + "loss": 0.0071, "step": 2475 }, { "epoch": 0.79, "learning_rate": 3.326032764980018e-06, - "loss": 0.0077, + "loss": 0.0081, "step": 2476 }, { "epoch": 0.79, "learning_rate": 3.3162763135903196e-06, - "loss": 0.0053, + "loss": 0.0096, "step": 2477 }, { "epoch": 0.79, "learning_rate": 3.3065324140902358e-06, - "loss": 0.0059, + "loss": 0.0052, "step": 2478 }, { "epoch": 0.79, "learning_rate": 3.2968010769477e-06, - "loss": 0.0051, + "loss": 0.006, "step": 2479 }, { "epoch": 0.79, "learning_rate": 3.2870823126171317e-06, - "loss": 0.007, + "loss": 0.0064, "step": 2480 }, { "epoch": 0.79, "learning_rate": 3.2773761315394635e-06, - "loss": 0.0124, + "loss": 0.0132, "step": 2481 }, { "epoch": 0.79, "learning_rate": 3.267682544142092e-06, - "loss": 0.0057, + "loss": 0.0065, "step": 2482 }, { "epoch": 0.79, "learning_rate": 3.2580015608388956e-06, - "loss": 0.0042, + "loss": 0.0039, "step": 2483 }, { "epoch": 0.79, "learning_rate": 3.248333192030215e-06, - "loss": 0.0064, + "loss": 0.0057, "step": 2484 }, { "epoch": 0.79, "learning_rate": 3.2386774481028213e-06, - "loss": 0.009, + "loss": 0.0092, "step": 2485 }, { @@ -14959,103 +14959,103 @@ { "epoch": 0.79, "learning_rate": 3.2194038763712204e-06, - "loss": 0.0095, + "loss": 0.0079, "step": 2487 }, { "epoch": 0.79, "learning_rate": 3.209786069272715e-06, - "loss": 0.0048, + "loss": 0.0041, "step": 2488 }, { "epoch": 0.79, "learning_rate": 3.2001809284668957e-06, - "loss": 0.0043, + "loss": 0.0075, "step": 2489 }, { "epoch": 0.8, "learning_rate": 3.190588464272611e-06, - "loss": 0.0037, + "loss": 0.0032, "step": 2490 }, { "epoch": 0.8, "learning_rate": 3.1810086869951062e-06, - "loss": 0.0047, + "loss": 0.0054, "step": 2491 }, { "epoch": 0.8, "learning_rate": 3.1714416069259813e-06, - "loss": 0.005, + "loss": 0.0051, "step": 2492 }, { "epoch": 0.8, "learning_rate": 3.1618872343432074e-06, - "loss": 0.0034, + "loss": 0.0035, "step": 2493 }, { "epoch": 0.8, "learning_rate": 3.1523455795111066e-06, - "loss": 0.0029, + "loss": 0.003, "step": 2494 }, { "epoch": 0.8, "learning_rate": 3.142816652680322e-06, - "loss": 0.0049, + "loss": 0.0057, "step": 2495 }, { "epoch": 0.8, "learning_rate": 3.1333004640878383e-06, - "loss": 0.0089, + "loss": 0.0083, "step": 2496 }, { "epoch": 0.8, "learning_rate": 3.123797023956954e-06, - "loss": 0.0118, + "loss": 0.0111, "step": 2497 }, { "epoch": 0.8, "learning_rate": 3.114306342497258e-06, - "loss": 0.0057, + "loss": 0.0063, "step": 2498 }, { "epoch": 0.8, "learning_rate": 3.104828429904652e-06, - "loss": 0.0062, + "loss": 0.0059, "step": 2499 }, { "epoch": 0.8, "learning_rate": 3.0953632963613014e-06, - "loss": 0.0079, + "loss": 0.0067, "step": 2500 }, { "epoch": 0.8, "learning_rate": 3.085910952035658e-06, - "loss": 0.0048, + "loss": 0.0032, "step": 2501 }, { "epoch": 0.8, "learning_rate": 3.0764714070824305e-06, - "loss": 0.0065, + "loss": 0.0071, "step": 2502 }, { "epoch": 0.8, "learning_rate": 3.067044671642567e-06, - "loss": 0.006, + "loss": 0.0067, "step": 2503 }, { @@ -15067,163 +15067,163 @@ { "epoch": 0.8, "learning_rate": 3.048229669797954e-06, - "loss": 0.0115, + "loss": 0.0091, "step": 2505 }, { "epoch": 0.8, "learning_rate": 3.0388414236062656e-06, - "loss": 0.0028, + "loss": 0.0031, "step": 2506 }, { "epoch": 0.8, "learning_rate": 3.029466027354043e-06, - "loss": 0.006, + "loss": 0.0074, "step": 2507 }, { "epoch": 0.8, "learning_rate": 3.0201034911133344e-06, - "loss": 0.0073, + "loss": 0.0078, "step": 2508 }, { "epoch": 0.8, "learning_rate": 3.010753824942358e-06, - "loss": 0.0092, + "loss": 0.011, "step": 2509 }, { "epoch": 0.8, "learning_rate": 3.0014170388855195e-06, - "loss": 0.0055, + "loss": 0.0068, "step": 2510 }, { "epoch": 0.8, "learning_rate": 2.9920931429733713e-06, - "loss": 0.0065, + "loss": 0.0059, "step": 2511 }, { "epoch": 0.8, "learning_rate": 2.982782147222635e-06, - "loss": 0.0069, + "loss": 0.0055, "step": 2512 }, { "epoch": 0.8, "learning_rate": 2.9734840616361587e-06, - "loss": 0.0081, + "loss": 0.0075, "step": 2513 }, { "epoch": 0.8, "learning_rate": 2.964198896202931e-06, - "loss": 0.008, + "loss": 0.0075, "step": 2514 }, { "epoch": 0.8, "learning_rate": 2.9549266608980595e-06, - "loss": 0.0094, + "loss": 0.0086, "step": 2515 }, { "epoch": 0.8, "learning_rate": 2.945667365682753e-06, - "loss": 0.005, + "loss": 0.0044, "step": 2516 }, { "epoch": 0.8, "learning_rate": 2.9364210205043314e-06, - "loss": 0.0043, + "loss": 0.0038, "step": 2517 }, { "epoch": 0.8, "learning_rate": 2.927187635296186e-06, - "loss": 0.0061, + "loss": 0.0067, "step": 2518 }, { "epoch": 0.8, "learning_rate": 2.9179672199777984e-06, - "loss": 0.0047, + "loss": 0.0046, "step": 2519 }, { "epoch": 0.8, "learning_rate": 2.9087597844547176e-06, - "loss": 0.0086, + "loss": 0.0081, "step": 2520 }, { "epoch": 0.81, "learning_rate": 2.8995653386185327e-06, - "loss": 0.0059, + "loss": 0.0062, "step": 2521 }, { "epoch": 0.81, "learning_rate": 2.890383892346898e-06, - "loss": 0.0048, + "loss": 0.0052, "step": 2522 }, { "epoch": 0.81, "learning_rate": 2.8812154555034843e-06, - "loss": 0.0053, + "loss": 0.0059, "step": 2523 }, { "epoch": 0.81, "learning_rate": 2.8720600379379964e-06, - "loss": 0.0081, + "loss": 0.0065, "step": 2524 }, { "epoch": 0.81, "learning_rate": 2.862917649486158e-06, - "loss": 0.0097, + "loss": 0.0069, "step": 2525 }, { "epoch": 0.81, "learning_rate": 2.8537882999696762e-06, - "loss": 0.009, + "loss": 0.0101, "step": 2526 }, { "epoch": 0.81, "learning_rate": 2.8446719991962705e-06, - "loss": 0.0094, + "loss": 0.0116, "step": 2527 }, { "epoch": 0.81, "learning_rate": 2.835568756959635e-06, - "loss": 0.0076, + "loss": 0.0062, "step": 2528 }, { "epoch": 0.81, "learning_rate": 2.826478583039425e-06, - "loss": 0.0042, + "loss": 0.0036, "step": 2529 }, { "epoch": 0.81, "learning_rate": 2.817401487201276e-06, - "loss": 0.0077, + "loss": 0.0079, "step": 2530 }, { "epoch": 0.81, "learning_rate": 2.808337479196752e-06, - "loss": 0.0053, + "loss": 0.0061, "step": 2531 }, { @@ -15235,19 +15235,19 @@ { "epoch": 0.81, "learning_rate": 2.7902487656245874e-06, - "loss": 0.004, + "loss": 0.0045, "step": 2533 }, { "epoch": 0.81, "learning_rate": 2.78122407948975e-06, - "loss": 0.0066, + "loss": 0.0053, "step": 2534 }, { "epoch": 0.81, "learning_rate": 2.772212520054131e-06, - "loss": 0.0098, + "loss": 0.0114, "step": 2535 }, { @@ -15259,289 +15259,289 @@ { "epoch": 0.81, "learning_rate": 2.7542288199911233e-06, - "loss": 0.0048, + "loss": 0.0045, "step": 2537 }, { "epoch": 0.81, "learning_rate": 2.7452566986837233e-06, - "loss": 0.0053, + "loss": 0.0045, "step": 2538 }, { "epoch": 0.81, "learning_rate": 2.7362977427154993e-06, - "loss": 0.0065, + "loss": 0.0073, "step": 2539 }, { "epoch": 0.81, "learning_rate": 2.727351961711112e-06, - "loss": 0.008, + "loss": 0.0074, "step": 2540 }, { "epoch": 0.81, "learning_rate": 2.7184193652810654e-06, - "loss": 0.003, + "loss": 0.0039, "step": 2541 }, { "epoch": 0.81, "learning_rate": 2.709499963021691e-06, - "loss": 0.0084, + "loss": 0.008, "step": 2542 }, { "epoch": 0.81, "learning_rate": 2.7005937645151628e-06, - "loss": 0.0087, + "loss": 0.0083, "step": 2543 }, { "epoch": 0.81, "learning_rate": 2.6917007793294475e-06, - "loss": 0.0049, + "loss": 0.0046, "step": 2544 }, { "epoch": 0.81, "learning_rate": 2.682821017018337e-06, - "loss": 0.0039, + "loss": 0.0038, "step": 2545 }, { "epoch": 0.81, "learning_rate": 2.6739544871214094e-06, - "loss": 0.0025, + "loss": 0.0027, "step": 2546 }, { "epoch": 0.81, "learning_rate": 2.665101199164023e-06, - "loss": 0.0088, + "loss": 0.0079, "step": 2547 }, { "epoch": 0.81, "learning_rate": 2.6562611626573207e-06, - "loss": 0.0038, + "loss": 0.0049, "step": 2548 }, { "epoch": 0.81, "learning_rate": 2.647434387098195e-06, - "loss": 0.0044, + "loss": 0.0045, "step": 2549 }, { "epoch": 0.81, "learning_rate": 2.6386208819693037e-06, - "loss": 0.0055, + "loss": 0.0052, "step": 2550 }, { "epoch": 0.81, "learning_rate": 2.62982065673905e-06, - "loss": 0.005, + "loss": 0.0071, "step": 2551 }, { "epoch": 0.82, "learning_rate": 2.621033720861556e-06, - "loss": 0.01, + "loss": 0.0087, "step": 2552 }, { "epoch": 0.82, "learning_rate": 2.612260083776684e-06, - "loss": 0.0053, + "loss": 0.0046, "step": 2553 }, { "epoch": 0.82, "learning_rate": 2.603499754909995e-06, - "loss": 0.0055, + "loss": 0.0051, "step": 2554 }, { "epoch": 0.82, "learning_rate": 2.5947527436727596e-06, - "loss": 0.0047, + "loss": 0.0063, "step": 2555 }, { "epoch": 0.82, "learning_rate": 2.5860190594619486e-06, - "loss": 0.0148, + "loss": 0.015, "step": 2556 }, { "epoch": 0.82, "learning_rate": 2.577298711660198e-06, - "loss": 0.0069, + "loss": 0.0062, "step": 2557 }, { "epoch": 0.82, "learning_rate": 2.5685917096358326e-06, - "loss": 0.0029, + "loss": 0.003, "step": 2558 }, { "epoch": 0.82, "learning_rate": 2.5598980627428324e-06, - "loss": 0.0035, + "loss": 0.0029, "step": 2559 }, { "epoch": 0.82, "learning_rate": 2.5512177803208293e-06, - "loss": 0.0076, + "loss": 0.0088, "step": 2560 }, { "epoch": 0.82, "learning_rate": 2.542550871695096e-06, - "loss": 0.0024, + "loss": 0.003, "step": 2561 }, { "epoch": 0.82, "learning_rate": 2.533897346176549e-06, - "loss": 0.0053, + "loss": 0.0043, "step": 2562 }, { "epoch": 0.82, "learning_rate": 2.5252572130617113e-06, - "loss": 0.0161, + "loss": 0.0112, "step": 2563 }, { "epoch": 0.82, "learning_rate": 2.516630481632731e-06, - "loss": 0.0071, + "loss": 0.0058, "step": 2564 }, { "epoch": 0.82, "learning_rate": 2.508017161157349e-06, - "loss": 0.0047, + "loss": 0.0042, "step": 2565 }, { "epoch": 0.82, "learning_rate": 2.4994172608889103e-06, - "loss": 0.0076, + "loss": 0.0066, "step": 2566 }, { "epoch": 0.82, "learning_rate": 2.490830790066329e-06, - "loss": 0.0081, + "loss": 0.0074, "step": 2567 }, { "epoch": 0.82, "learning_rate": 2.482257757914101e-06, - "loss": 0.0038, + "loss": 0.0041, "step": 2568 }, { "epoch": 0.82, "learning_rate": 2.4736981736422887e-06, - "loss": 0.0062, + "loss": 0.005, "step": 2569 }, { "epoch": 0.82, "learning_rate": 2.4651520464464914e-06, - "loss": 0.0034, + "loss": 0.0054, "step": 2570 }, { "epoch": 0.82, "learning_rate": 2.4566193855078655e-06, - "loss": 0.0042, + "loss": 0.0043, "step": 2571 }, { "epoch": 0.82, "learning_rate": 2.448100199993101e-06, - "loss": 0.0053, + "loss": 0.0045, "step": 2572 }, { "epoch": 0.82, "learning_rate": 2.4395944990543994e-06, - "loss": 0.005, + "loss": 0.0049, "step": 2573 }, { "epoch": 0.82, "learning_rate": 2.4311022918294874e-06, - "loss": 0.0114, + "loss": 0.0102, "step": 2574 }, { "epoch": 0.82, "learning_rate": 2.422623587441587e-06, - "loss": 0.0039, + "loss": 0.0103, "step": 2575 }, { "epoch": 0.82, "learning_rate": 2.414158394999419e-06, - "loss": 0.0089, + "loss": 0.0083, "step": 2576 }, { "epoch": 0.82, "learning_rate": 2.40570672359719e-06, - "loss": 0.0056, + "loss": 0.0055, "step": 2577 }, { "epoch": 0.82, "learning_rate": 2.397268582314573e-06, - "loss": 0.0125, + "loss": 0.0141, "step": 2578 }, { "epoch": 0.82, "learning_rate": 2.3888439802167157e-06, - "loss": 0.0065, + "loss": 0.0068, "step": 2579 }, { "epoch": 0.82, "learning_rate": 2.3804329263542066e-06, - "loss": 0.0103, + "loss": 0.0111, "step": 2580 }, { "epoch": 0.82, "learning_rate": 2.3720354297630954e-06, - "loss": 0.0088, + "loss": 0.0048, "step": 2581 }, { "epoch": 0.82, "learning_rate": 2.3636514994648613e-06, - "loss": 0.0053, + "loss": 0.0047, "step": 2582 }, { "epoch": 0.82, "learning_rate": 2.3552811444663996e-06, - "loss": 0.0081, + "loss": 0.0078, "step": 2583 }, { "epoch": 0.83, "learning_rate": 2.3469243737600326e-06, - "loss": 0.0046, + "loss": 0.005, "step": 2584 }, { @@ -15553,25 +15553,25 @@ { "epoch": 0.83, "learning_rate": 2.3302516211198883e-06, - "loss": 0.0027, + "loss": 0.0033, "step": 2586 }, { "epoch": 0.83, "learning_rate": 2.3219356570977428e-06, - "loss": 0.0062, + "loss": 0.006, "step": 2587 }, { "epoch": 0.83, "learning_rate": 2.3136333131909316e-06, - "loss": 0.006, + "loss": 0.0058, "step": 2588 }, { "epoch": 0.83, "learning_rate": 2.3053445983187177e-06, - "loss": 0.0097, + "loss": 0.0098, "step": 2589 }, { @@ -15583,205 +15583,205 @@ { "epoch": 0.83, "learning_rate": 2.2888080912818644e-06, - "loss": 0.0087, + "loss": 0.0089, "step": 2591 }, { "epoch": 0.83, "learning_rate": 2.280560316882492e-06, - "loss": 0.0051, + "loss": 0.005, "step": 2592 }, { "epoch": 0.83, "learning_rate": 2.2723262070482132e-06, - "loss": 0.007, + "loss": 0.0077, "step": 2593 }, { "epoch": 0.83, "learning_rate": 2.2641057706249824e-06, - "loss": 0.0049, + "loss": 0.0038, "step": 2594 }, { "epoch": 0.83, "learning_rate": 2.2558990164440656e-06, - "loss": 0.0064, + "loss": 0.0098, "step": 2595 }, { "epoch": 0.83, "learning_rate": 2.247705953322014e-06, - "loss": 0.0064, + "loss": 0.0063, "step": 2596 }, { "epoch": 0.83, "learning_rate": 2.2395265900606916e-06, - "loss": 0.0056, + "loss": 0.008, "step": 2597 }, { "epoch": 0.83, "learning_rate": 2.231360935447226e-06, - "loss": 0.0069, + "loss": 0.0071, "step": 2598 }, { "epoch": 0.83, "learning_rate": 2.223208998254028e-06, - "loss": 0.0096, + "loss": 0.0091, "step": 2599 }, { "epoch": 0.83, "learning_rate": 2.2150707872387733e-06, - "loss": 0.0038, + "loss": 0.0039, "step": 2600 }, { "epoch": 0.83, "learning_rate": 2.2069463111443805e-06, - "loss": 0.0074, + "loss": 0.0044, "step": 2601 }, { "epoch": 0.83, "learning_rate": 2.1988355786990234e-06, - "loss": 0.0062, + "loss": 0.0063, "step": 2602 }, { "epoch": 0.83, "learning_rate": 2.19073859861611e-06, - "loss": 0.0057, + "loss": 0.0048, "step": 2603 }, { "epoch": 0.83, "learning_rate": 2.1826553795942655e-06, - "loss": 0.0092, + "loss": 0.0121, "step": 2604 }, { "epoch": 0.83, "learning_rate": 2.1745859303173438e-06, - "loss": 0.0041, + "loss": 0.0048, "step": 2605 }, { "epoch": 0.83, "learning_rate": 2.1665302594543918e-06, - "loss": 0.0075, + "loss": 0.0067, "step": 2606 }, { "epoch": 0.83, "learning_rate": 2.158488375659665e-06, - "loss": 0.0091, + "loss": 0.0082, "step": 2607 }, { "epoch": 0.83, "learning_rate": 2.1504602875726083e-06, - "loss": 0.0162, + "loss": 0.0114, "step": 2608 }, { "epoch": 0.83, "learning_rate": 2.1424460038178333e-06, - "loss": 0.0049, + "loss": 0.0067, "step": 2609 }, { "epoch": 0.83, "learning_rate": 2.1344455330051387e-06, - "loss": 0.0124, + "loss": 0.0086, "step": 2610 }, { "epoch": 0.83, "learning_rate": 2.1264588837294672e-06, - "loss": 0.0029, + "loss": 0.0024, "step": 2611 }, { "epoch": 0.83, "learning_rate": 2.1184860645709243e-06, - "loss": 0.0084, + "loss": 0.006, "step": 2612 }, { "epoch": 0.83, "learning_rate": 2.110527084094756e-06, - "loss": 0.0095, + "loss": 0.0084, "step": 2613 }, { "epoch": 0.83, "learning_rate": 2.102581950851335e-06, - "loss": 0.0079, + "loss": 0.0081, "step": 2614 }, { "epoch": 0.84, "learning_rate": 2.0946506733761683e-06, - "loss": 0.0074, + "loss": 0.0078, "step": 2615 }, { "epoch": 0.84, "learning_rate": 2.0867332601898635e-06, - "loss": 0.0095, + "loss": 0.0105, "step": 2616 }, { "epoch": 0.84, "learning_rate": 2.078829719798152e-06, - "loss": 0.0091, + "loss": 0.0064, "step": 2617 }, { "epoch": 0.84, "learning_rate": 2.07094006069184e-06, - "loss": 0.0072, + "loss": 0.007, "step": 2618 }, { "epoch": 0.84, "learning_rate": 2.063064291346844e-06, - "loss": 0.004, + "loss": 0.0041, "step": 2619 }, { "epoch": 0.84, "learning_rate": 2.055202420224136e-06, - "loss": 0.0043, + "loss": 0.0046, "step": 2620 }, { "epoch": 0.84, "learning_rate": 2.0473544557697765e-06, - "loss": 0.0079, + "loss": 0.0064, "step": 2621 }, { "epoch": 0.84, "learning_rate": 2.0395204064148698e-06, - "loss": 0.0087, + "loss": 0.0082, "step": 2622 }, { "epoch": 0.84, "learning_rate": 2.0317002805755887e-06, - "loss": 0.004, + "loss": 0.0041, "step": 2623 }, { "epoch": 0.84, "learning_rate": 2.023894086653129e-06, - "loss": 0.0039, + "loss": 0.004, "step": 2624 }, { @@ -15793,7 +15793,7 @@ { "epoch": 0.84, "learning_rate": 2.008323528088661e-06, - "loss": 0.0094, + "loss": 0.0099, "step": 2626 }, { @@ -15805,43 +15805,43 @@ { "epoch": 0.84, "learning_rate": 1.992808797631604e-06, - "loss": 0.0044, + "loss": 0.0042, "step": 2628 }, { "epoch": 0.84, "learning_rate": 1.9850723887871757e-06, - "loss": 0.0055, + "loss": 0.0059, "step": 2629 }, { "epoch": 0.84, "learning_rate": 1.9773499619521794e-06, - "loss": 0.0052, + "loss": 0.0055, "step": 2630 }, { "epoch": 0.84, "learning_rate": 1.969641525422862e-06, - "loss": 0.0113, + "loss": 0.0134, "step": 2631 }, { "epoch": 0.84, "learning_rate": 1.961947087480433e-06, - "loss": 0.0022, + "loss": 0.0023, "step": 2632 }, { "epoch": 0.84, "learning_rate": 1.9542666563910745e-06, - "loss": 0.0082, + "loss": 0.0069, "step": 2633 }, { "epoch": 0.84, "learning_rate": 1.946600240405918e-06, - "loss": 0.0104, + "loss": 0.0105, "step": 2634 }, { @@ -15853,97 +15853,97 @@ { "epoch": 0.84, "learning_rate": 1.9313094866774327e-06, - "loss": 0.0121, + "loss": 0.0124, "step": 2636 }, { "epoch": 0.84, "learning_rate": 1.9236851653610454e-06, - "loss": 0.0025, + "loss": 0.0027, "step": 2637 }, { "epoch": 0.84, "learning_rate": 1.916074892002725e-06, - "loss": 0.0076, + "loss": 0.0071, "step": 2638 }, { "epoch": 0.84, "learning_rate": 1.908478674778233e-06, - "loss": 0.0078, + "loss": 0.0086, "step": 2639 }, { "epoch": 0.84, "learning_rate": 1.9008965218482216e-06, - "loss": 0.0047, + "loss": 0.0046, "step": 2640 }, { "epoch": 0.84, "learning_rate": 1.893328441358247e-06, - "loss": 0.003, + "loss": 0.0036, "step": 2641 }, { "epoch": 0.84, "learning_rate": 1.885774441438735e-06, - "loss": 0.0034, + "loss": 0.0028, "step": 2642 }, { "epoch": 0.84, "learning_rate": 1.8782345302049947e-06, - "loss": 0.0042, + "loss": 0.0167, "step": 2643 }, { "epoch": 0.84, "learning_rate": 1.8707087157571895e-06, - "loss": 0.0114, + "loss": 0.0092, "step": 2644 }, { "epoch": 0.84, "learning_rate": 1.8631970061803504e-06, - "loss": 0.0044, + "loss": 0.0053, "step": 2645 }, { "epoch": 0.85, "learning_rate": 1.8556994095443408e-06, - "loss": 0.0077, + "loss": 0.0073, "step": 2646 }, { "epoch": 0.85, "learning_rate": 1.848215933903879e-06, - "loss": 0.0051, + "loss": 0.0062, "step": 2647 }, { "epoch": 0.85, "learning_rate": 1.8407465872984975e-06, - "loss": 0.0082, + "loss": 0.0065, "step": 2648 }, { "epoch": 0.85, "learning_rate": 1.8332913777525623e-06, - "loss": 0.0018, + "loss": 0.0023, "step": 2649 }, { "epoch": 0.85, "learning_rate": 1.8258503132752436e-06, - "loss": 0.0041, + "loss": 0.0054, "step": 2650 }, { "epoch": 0.85, "learning_rate": 1.8184234018605185e-06, - "loss": 0.008, + "loss": 0.0074, "step": 2651 }, { @@ -15961,175 +15961,175 @@ { "epoch": 0.85, "learning_rate": 1.7962276657035665e-06, - "loss": 0.0128, + "loss": 0.011, "step": 2654 }, { "epoch": 0.85, "learning_rate": 1.7888574461747687e-06, - "loss": 0.0075, + "loss": 0.0077, "step": 2655 }, { "epoch": 0.85, "learning_rate": 1.7815014194502111e-06, - "loss": 0.0111, + "loss": 0.0107, "step": 2656 }, { "epoch": 0.85, "learning_rate": 1.7741595934325138e-06, - "loss": 0.004, + "loss": 0.0033, "step": 2657 }, { "epoch": 0.85, "learning_rate": 1.766831976009038e-06, - "loss": 0.0117, + "loss": 0.0109, "step": 2658 }, { "epoch": 0.85, "learning_rate": 1.7595185750518844e-06, - "loss": 0.0055, + "loss": 0.0069, "step": 2659 }, { "epoch": 0.85, "learning_rate": 1.7522193984178886e-06, - "loss": 0.0061, + "loss": 0.0071, "step": 2660 }, { "epoch": 0.85, "learning_rate": 1.7449344539485845e-06, - "loss": 0.0057, + "loss": 0.0068, "step": 2661 }, { "epoch": 0.85, "learning_rate": 1.7376637494702403e-06, - "loss": 0.0076, + "loss": 0.0087, "step": 2662 }, { "epoch": 0.85, "learning_rate": 1.730407292793804e-06, - "loss": 0.003, + "loss": 0.0031, "step": 2663 }, { "epoch": 0.85, "learning_rate": 1.723165091714934e-06, - "loss": 0.0141, + "loss": 0.0111, "step": 2664 }, { "epoch": 0.85, "learning_rate": 1.7159371540139674e-06, - "loss": 0.0023, + "loss": 0.0026, "step": 2665 }, { "epoch": 0.85, "learning_rate": 1.7087234874559149e-06, - "loss": 0.005, + "loss": 0.0043, "step": 2666 }, { "epoch": 0.85, "learning_rate": 1.7015240997904624e-06, - "loss": 0.0034, + "loss": 0.0032, "step": 2667 }, { "epoch": 0.85, "learning_rate": 1.694338998751948e-06, - "loss": 0.0072, + "loss": 0.0078, "step": 2668 }, { "epoch": 0.85, "learning_rate": 1.6871681920593674e-06, - "loss": 0.0062, + "loss": 0.0059, "step": 2669 }, { "epoch": 0.85, "learning_rate": 1.6800116874163613e-06, - "loss": 0.0042, + "loss": 0.005, "step": 2670 }, { "epoch": 0.85, "learning_rate": 1.6728694925111981e-06, - "loss": 0.0222, + "loss": 0.0241, "step": 2671 }, { "epoch": 0.85, "learning_rate": 1.6657416150167836e-06, - "loss": 0.0093, + "loss": 0.0087, "step": 2672 }, { "epoch": 0.85, "learning_rate": 1.6586280625906308e-06, - "loss": 0.0064, + "loss": 0.0055, "step": 2673 }, { "epoch": 0.85, "learning_rate": 1.6515288428748698e-06, - "loss": 0.0045, + "loss": 0.0043, "step": 2674 }, { "epoch": 0.85, "learning_rate": 1.6444439634962349e-06, - "loss": 0.0089, + "loss": 0.0099, "step": 2675 }, { "epoch": 0.85, "learning_rate": 1.637373432066046e-06, - "loss": 0.0134, + "loss": 0.0079, "step": 2676 }, { "epoch": 0.85, "learning_rate": 1.6303172561802188e-06, - "loss": 0.0063, + "loss": 0.0061, "step": 2677 }, { "epoch": 0.86, "learning_rate": 1.6232754434192442e-06, - "loss": 0.0104, + "loss": 0.0118, "step": 2678 }, { "epoch": 0.86, "learning_rate": 1.6162480013481761e-06, - "loss": 0.0029, + "loss": 0.0028, "step": 2679 }, { "epoch": 0.86, "learning_rate": 1.6092349375166403e-06, - "loss": 0.0042, + "loss": 0.0044, "step": 2680 }, { "epoch": 0.86, "learning_rate": 1.6022362594588037e-06, - "loss": 0.0098, + "loss": 0.0078, "step": 2681 }, { "epoch": 0.86, "learning_rate": 1.5952519746933897e-06, - "loss": 0.0102, + "loss": 0.0109, "step": 2682 }, { @@ -16141,79 +16141,79 @@ { "epoch": 0.86, "learning_rate": 1.5813266150373818e-06, - "loss": 0.0056, + "loss": 0.0039, "step": 2684 }, { "epoch": 0.86, "learning_rate": 1.5743855551068809e-06, - "loss": 0.0117, + "loss": 0.0109, "step": 2685 }, { "epoch": 0.86, "learning_rate": 1.5674589183889659e-06, - "loss": 0.0053, + "loss": 0.0106, "step": 2686 }, { "epoch": 0.86, "learning_rate": 1.5605467123249646e-06, - "loss": 0.006, + "loss": 0.0059, "step": 2687 }, { "epoch": 0.86, "learning_rate": 1.5536489443407016e-06, - "loss": 0.008, + "loss": 0.01, "step": 2688 }, { "epoch": 0.86, "learning_rate": 1.546765621846481e-06, - "loss": 0.0069, + "loss": 0.0067, "step": 2689 }, { "epoch": 0.86, "learning_rate": 1.5398967522370978e-06, - "loss": 0.0054, + "loss": 0.0049, "step": 2690 }, { "epoch": 0.86, "learning_rate": 1.5330423428918205e-06, - "loss": 0.0061, + "loss": 0.006, "step": 2691 }, { "epoch": 0.86, "learning_rate": 1.5262024011743742e-06, - "loss": 0.0084, + "loss": 0.0066, "step": 2692 }, { "epoch": 0.86, "learning_rate": 1.5193769344329488e-06, - "loss": 0.0055, + "loss": 0.0056, "step": 2693 }, { "epoch": 0.86, "learning_rate": 1.512565950000181e-06, - "loss": 0.0057, + "loss": 0.0058, "step": 2694 }, { "epoch": 0.86, "learning_rate": 1.5057694551931473e-06, - "loss": 0.0054, + "loss": 0.0057, "step": 2695 }, { "epoch": 0.86, "learning_rate": 1.4989874573133654e-06, - "loss": 0.0052, + "loss": 0.0051, "step": 2696 }, { @@ -16225,43 +16225,43 @@ { "epoch": 0.86, "learning_rate": 1.4854669814637145e-06, - "loss": 0.0101, + "loss": 0.007, "step": 2698 }, { "epoch": 0.86, "learning_rate": 1.478728518018968e-06, - "loss": 0.0043, + "loss": 0.0032, "step": 2699 }, { "epoch": 0.86, "learning_rate": 1.4720045805517024e-06, - "loss": 0.0052, + "loss": 0.0049, "step": 2700 }, { "epoch": 0.86, "learning_rate": 1.465295176285476e-06, - "loss": 0.0099, + "loss": 0.0072, "step": 2701 }, { "epoch": 0.86, "learning_rate": 1.4586003124282465e-06, - "loss": 0.0051, + "loss": 0.0046, "step": 2702 }, { "epoch": 0.86, "learning_rate": 1.4519199961723372e-06, - "loss": 0.0043, + "loss": 0.0063, "step": 2703 }, { "epoch": 0.86, "learning_rate": 1.4452542346944492e-06, - "loss": 0.0087, + "loss": 0.0062, "step": 2704 }, { @@ -16273,25 +16273,25 @@ { "epoch": 0.86, "learning_rate": 1.4319664047013593e-06, - "loss": 0.0051, + "loss": 0.0053, "step": 2706 }, { "epoch": 0.86, "learning_rate": 1.4253443504613434e-06, - "loss": 0.0033, + "loss": 0.0044, "step": 2707 }, { "epoch": 0.86, "learning_rate": 1.4187368795497175e-06, - "loss": 0.0059, + "loss": 0.0275, "step": 2708 }, { "epoch": 0.87, "learning_rate": 1.4121439990649232e-06, - "loss": 0.0044, + "loss": 0.0045, "step": 2709 }, { @@ -16303,67 +16303,67 @@ { "epoch": 0.87, "learning_rate": 1.3990020376912165e-06, - "loss": 0.0062, + "loss": 0.0051, "step": 2711 }, { "epoch": 0.87, "learning_rate": 1.3924529709207874e-06, - "loss": 0.008, + "loss": 0.0074, "step": 2712 }, { "epoch": 0.87, "learning_rate": 1.3859185228141359e-06, - "loss": 0.0172, + "loss": 0.0139, "step": 2713 }, { "epoch": 0.87, "learning_rate": 1.3793987003912634e-06, - "loss": 0.0059, + "loss": 0.0061, "step": 2714 }, { "epoch": 0.87, "learning_rate": 1.372893510656444e-06, - "loss": 0.0041, + "loss": 0.004, "step": 2715 }, { "epoch": 0.87, "learning_rate": 1.3664029605982453e-06, - "loss": 0.0135, + "loss": 0.011, "step": 2716 }, { "epoch": 0.87, "learning_rate": 1.3599270571894951e-06, - "loss": 0.0075, + "loss": 0.0072, "step": 2717 }, { "epoch": 0.87, "learning_rate": 1.3534658073872963e-06, - "loss": 0.0046, + "loss": 0.0044, "step": 2718 }, { "epoch": 0.87, "learning_rate": 1.3470192181330081e-06, - "loss": 0.0086, + "loss": 0.0076, "step": 2719 }, { "epoch": 0.87, "learning_rate": 1.3405872963522325e-06, - "loss": 0.0092, + "loss": 0.0106, "step": 2720 }, { "epoch": 0.87, "learning_rate": 1.334170048954822e-06, - "loss": 0.0057, + "loss": 0.007, "step": 2721 }, { @@ -16381,115 +16381,115 @@ { "epoch": 0.87, "learning_rate": 1.3150064219247571e-06, - "loss": 0.0051, + "loss": 0.004, "step": 2724 }, { "epoch": 0.87, "learning_rate": 1.3086479408438912e-06, - "loss": 0.008, + "loss": 0.0079, "step": 2725 }, { "epoch": 0.87, "learning_rate": 1.3023041684590214e-06, - "loss": 0.0068, + "loss": 0.0067, "step": 2726 }, { "epoch": 0.87, "learning_rate": 1.2959751115852896e-06, - "loss": 0.0114, + "loss": 0.0094, "step": 2727 }, { "epoch": 0.87, "learning_rate": 1.2896607770220453e-06, - "loss": 0.0046, + "loss": 0.0044, "step": 2728 }, { "epoch": 0.87, "learning_rate": 1.283361171552807e-06, - "loss": 0.0152, + "loss": 0.0127, "step": 2729 }, { "epoch": 0.87, "learning_rate": 1.2770763019452813e-06, - "loss": 0.0036, + "loss": 0.0041, "step": 2730 }, { "epoch": 0.87, "learning_rate": 1.2708061749513323e-06, - "loss": 0.0059, + "loss": 0.006, "step": 2731 }, { "epoch": 0.87, "learning_rate": 1.2645507973069997e-06, - "loss": 0.0048, + "loss": 0.0053, "step": 2732 }, { "epoch": 0.87, "learning_rate": 1.2583101757324633e-06, - "loss": 0.0067, + "loss": 0.0059, "step": 2733 }, { "epoch": 0.87, "learning_rate": 1.2520843169320616e-06, - "loss": 0.0058, + "loss": 0.0053, "step": 2734 }, { "epoch": 0.87, "learning_rate": 1.245873227594273e-06, - "loss": 0.0029, + "loss": 0.003, "step": 2735 }, { "epoch": 0.87, "learning_rate": 1.239676914391698e-06, - "loss": 0.0096, + "loss": 0.0097, "step": 2736 }, { "epoch": 0.87, "learning_rate": 1.2334953839810787e-06, - "loss": 0.0037, + "loss": 0.0033, "step": 2737 }, { "epoch": 0.87, "learning_rate": 1.2273286430032593e-06, - "loss": 0.0066, + "loss": 0.0077, "step": 2738 }, { "epoch": 0.87, "learning_rate": 1.221176698083209e-06, - "loss": 0.0096, + "loss": 0.0088, "step": 2739 }, { "epoch": 0.88, "learning_rate": 1.2150395558299977e-06, - "loss": 0.0088, + "loss": 0.0087, "step": 2740 }, { "epoch": 0.88, "learning_rate": 1.2089172228367862e-06, - "loss": 0.0159, + "loss": 0.017, "step": 2741 }, { "epoch": 0.88, "learning_rate": 1.2028097056808384e-06, - "loss": 0.0042, + "loss": 0.0044, "step": 2742 }, { @@ -16501,43 +16501,43 @@ { "epoch": 0.88, "learning_rate": 1.1906391451101512e-06, - "loss": 0.0051, + "loss": 0.0043, "step": 2744 }, { "epoch": 0.88, "learning_rate": 1.1845761147703193e-06, - "loss": 0.0105, + "loss": 0.0099, "step": 2745 }, { "epoch": 0.88, "learning_rate": 1.1785279264175319e-06, - "loss": 0.0052, + "loss": 0.0048, "step": 2746 }, { "epoch": 0.88, "learning_rate": 1.172494586549398e-06, - "loss": 0.0091, + "loss": 0.0089, "step": 2747 }, { "epoch": 0.88, "learning_rate": 1.1664761016475645e-06, - "loss": 0.0076, + "loss": 0.0063, "step": 2748 }, { "epoch": 0.88, "learning_rate": 1.1604724781777226e-06, - "loss": 0.0055, + "loss": 0.006, "step": 2749 }, { "epoch": 0.88, "learning_rate": 1.1544837225896033e-06, - "loss": 0.0116, + "loss": 0.0112, "step": 2750 }, { @@ -16549,253 +16549,253 @@ { "epoch": 0.88, "learning_rate": 1.142550840777552e-06, - "loss": 0.0074, + "loss": 0.0075, "step": 2752 }, { "epoch": 0.88, "learning_rate": 1.136606727373189e-06, - "loss": 0.0047, + "loss": 0.0064, "step": 2753 }, { "epoch": 0.88, "learning_rate": 1.1306775074896508e-06, - "loss": 0.0075, + "loss": 0.0076, "step": 2754 }, { "epoch": 0.88, "learning_rate": 1.1247631874967374e-06, - "loss": 0.0107, + "loss": 0.01, "step": 2755 }, { "epoch": 0.88, "learning_rate": 1.1188637737482344e-06, - "loss": 0.007, + "loss": 0.0073, "step": 2756 }, { "epoch": 0.88, "learning_rate": 1.1129792725819137e-06, - "loss": 0.0036, + "loss": 0.0051, "step": 2757 }, { "epoch": 0.88, "learning_rate": 1.1071096903195287e-06, - "loss": 0.012, + "loss": 0.0115, "step": 2758 }, { "epoch": 0.88, "learning_rate": 1.1012550332668037e-06, - "loss": 0.0034, + "loss": 0.003, "step": 2759 }, { "epoch": 0.88, "learning_rate": 1.0954153077134322e-06, - "loss": 0.006, + "loss": 0.0058, "step": 2760 }, { "epoch": 0.88, "learning_rate": 1.0895905199330592e-06, - "loss": 0.0049, + "loss": 0.0038, "step": 2761 }, { "epoch": 0.88, "learning_rate": 1.0837806761832891e-06, - "loss": 0.0073, + "loss": 0.0064, "step": 2762 }, { "epoch": 0.88, "learning_rate": 1.077985782705671e-06, - "loss": 0.0049, + "loss": 0.005, "step": 2763 }, { "epoch": 0.88, "learning_rate": 1.07220584572569e-06, - "loss": 0.004, + "loss": 0.0043, "step": 2764 }, { "epoch": 0.88, "learning_rate": 1.066440871452764e-06, - "loss": 0.0074, + "loss": 0.0068, "step": 2765 }, { "epoch": 0.88, "learning_rate": 1.0606908660802407e-06, - "loss": 0.0099, + "loss": 0.0122, "step": 2766 }, { "epoch": 0.88, "learning_rate": 1.0549558357853766e-06, - "loss": 0.0094, + "loss": 0.0075, "step": 2767 }, { "epoch": 0.88, "learning_rate": 1.0492357867293522e-06, - "loss": 0.0085, + "loss": 0.01, "step": 2768 }, { "epoch": 0.88, "learning_rate": 1.043530725057245e-06, - "loss": 0.0055, + "loss": 0.0059, "step": 2769 }, { "epoch": 0.88, "learning_rate": 1.0378406568980376e-06, - "loss": 0.006, + "loss": 0.0051, "step": 2770 }, { "epoch": 0.89, "learning_rate": 1.0321655883645998e-06, - "loss": 0.0085, + "loss": 0.01, "step": 2771 }, { "epoch": 0.89, "learning_rate": 1.0265055255536898e-06, - "loss": 0.0055, + "loss": 0.0056, "step": 2772 }, { "epoch": 0.89, "learning_rate": 1.0208604745459454e-06, - "loss": 0.0159, + "loss": 0.0123, "step": 2773 }, { "epoch": 0.89, "learning_rate": 1.0152304414058756e-06, - "loss": 0.0061, + "loss": 0.0076, "step": 2774 }, { "epoch": 0.89, "learning_rate": 1.0096154321818551e-06, - "loss": 0.0026, + "loss": 0.0031, "step": 2775 }, { "epoch": 0.89, "learning_rate": 1.004015452906124e-06, - "loss": 0.006, + "loss": 0.0069, "step": 2776 }, { "epoch": 0.89, "learning_rate": 9.984305095947648e-07, - "loss": 0.0032, + "loss": 0.0029, "step": 2777 }, { "epoch": 0.89, "learning_rate": 9.928606082477214e-07, - "loss": 0.0041, + "loss": 0.0039, "step": 2778 }, { "epoch": 0.89, "learning_rate": 9.873057548487623e-07, - "loss": 0.0061, + "loss": 0.0062, "step": 2779 }, { "epoch": 0.89, "learning_rate": 9.817659553655023e-07, - "loss": 0.0101, + "loss": 0.0074, "step": 2780 }, { "epoch": 0.89, "learning_rate": 9.76241215749379e-07, - "loss": 0.0039, + "loss": 0.0044, "step": 2781 }, { "epoch": 0.89, "learning_rate": 9.707315419356477e-07, - "loss": 0.0113, + "loss": 0.0121, "step": 2782 }, { "epoch": 0.89, "learning_rate": 9.65236939843384e-07, - "loss": 0.0092, + "loss": 0.0072, "step": 2783 }, { "epoch": 0.89, "learning_rate": 9.59757415375468e-07, - "loss": 0.0058, + "loss": 0.0064, "step": 2784 }, { "epoch": 0.89, "learning_rate": 9.542929744185885e-07, - "loss": 0.0104, + "loss": 0.0119, "step": 2785 }, { "epoch": 0.89, "learning_rate": 9.488436228432184e-07, - "loss": 0.0051, + "loss": 0.0054, "step": 2786 }, { "epoch": 0.89, "learning_rate": 9.43409366503628e-07, - "loss": 0.0152, + "loss": 0.0146, "step": 2787 }, { "epoch": 0.89, "learning_rate": 9.379902112378708e-07, - "loss": 0.0069, + "loss": 0.0066, "step": 2788 }, { "epoch": 0.89, "learning_rate": 9.325861628677779e-07, - "loss": 0.0104, + "loss": 0.0092, "step": 2789 }, { "epoch": 0.89, "learning_rate": 9.271972271989438e-07, - "loss": 0.0094, + "loss": 0.0098, "step": 2790 }, { "epoch": 0.89, "learning_rate": 9.218234100207357e-07, - "loss": 0.005, + "loss": 0.0046, "step": 2791 }, { "epoch": 0.89, "learning_rate": 9.164647171062723e-07, - "loss": 0.0052, + "loss": 0.0051, "step": 2792 }, { "epoch": 0.89, "learning_rate": 9.111211542124287e-07, - "loss": 0.0061, + "loss": 0.008, "step": 2793 }, { @@ -16807,169 +16807,169 @@ { "epoch": 0.89, "learning_rate": 9.004794414328221e-07, - "loss": 0.0036, + "loss": 0.0047, "step": 2795 }, { "epoch": 0.89, "learning_rate": 8.951813029795075e-07, - "loss": 0.0058, + "loss": 0.0064, "step": 2796 }, { "epoch": 0.89, "learning_rate": 8.898983174117075e-07, - "loss": 0.0062, + "loss": 0.007, "step": 2797 }, { "epoch": 0.89, "learning_rate": 8.846304904049557e-07, - "loss": 0.0068, + "loss": 0.0049, "step": 2798 }, { "epoch": 0.89, "learning_rate": 8.793778276185149e-07, - "loss": 0.0048, + "loss": 0.0034, "step": 2799 }, { "epoch": 0.89, "learning_rate": 8.74140334695343e-07, - "loss": 0.0068, + "loss": 0.0069, "step": 2800 }, { "epoch": 0.89, "learning_rate": 8.689180172621142e-07, - "loss": 0.0117, + "loss": 0.0126, "step": 2801 }, { "epoch": 0.89, "learning_rate": 8.637108809291904e-07, - "loss": 0.0082, + "loss": 0.0092, "step": 2802 }, { "epoch": 0.9, "learning_rate": 8.585189312906272e-07, - "loss": 0.0043, + "loss": 0.0035, "step": 2803 }, { "epoch": 0.9, "learning_rate": 8.533421739241676e-07, - "loss": 0.0076, + "loss": 0.0056, "step": 2804 }, { "epoch": 0.9, "learning_rate": 8.481806143912263e-07, - "loss": 0.0057, + "loss": 0.0058, "step": 2805 }, { "epoch": 0.9, "learning_rate": 8.430342582368994e-07, - "loss": 0.0039, + "loss": 0.0069, "step": 2806 }, { "epoch": 0.9, "learning_rate": 8.379031109899459e-07, - "loss": 0.0045, + "loss": 0.0048, "step": 2807 }, { "epoch": 0.9, "learning_rate": 8.327871781627816e-07, - "loss": 0.0094, + "loss": 0.0111, "step": 2808 }, { "epoch": 0.9, "learning_rate": 8.276864652514882e-07, - "loss": 0.0127, + "loss": 0.0142, "step": 2809 }, { "epoch": 0.9, "learning_rate": 8.226009777357857e-07, - "loss": 0.0085, + "loss": 0.0088, "step": 2810 }, { "epoch": 0.9, "learning_rate": 8.175307210790439e-07, - "loss": 0.0109, + "loss": 0.0102, "step": 2811 }, { "epoch": 0.9, "learning_rate": 8.124757007282657e-07, - "loss": 0.005, + "loss": 0.0068, "step": 2812 }, { "epoch": 0.9, "learning_rate": 8.074359221140903e-07, - "loss": 0.0047, + "loss": 0.0032, "step": 2813 }, { "epoch": 0.9, "learning_rate": 8.024113906507752e-07, - "loss": 0.0069, + "loss": 0.0054, "step": 2814 }, { "epoch": 0.9, "learning_rate": 7.974021117362091e-07, - "loss": 0.0057, + "loss": 0.0064, "step": 2815 }, { "epoch": 0.9, "learning_rate": 7.924080907518838e-07, - "loss": 0.0096, + "loss": 0.0095, "step": 2816 }, { "epoch": 0.9, "learning_rate": 7.874293330629078e-07, - "loss": 0.0043, + "loss": 0.0042, "step": 2817 }, { "epoch": 0.9, "learning_rate": 7.824658440179827e-07, - "loss": 0.0047, + "loss": 0.004, "step": 2818 }, { "epoch": 0.9, "learning_rate": 7.775176289494146e-07, - "loss": 0.0062, + "loss": 0.0067, "step": 2819 }, { "epoch": 0.9, "learning_rate": 7.725846931731029e-07, - "loss": 0.0064, + "loss": 0.0061, "step": 2820 }, { "epoch": 0.9, "learning_rate": 7.676670419885234e-07, - "loss": 0.002, + "loss": 0.0024, "step": 2821 }, { "epoch": 0.9, "learning_rate": 7.627646806787386e-07, - "loss": 0.0051, + "loss": 0.0052, "step": 2822 }, { @@ -16981,97 +16981,97 @@ { "epoch": 0.9, "learning_rate": 7.530058487336511e-07, - "loss": 0.0081, + "loss": 0.0069, "step": 2824 }, { "epoch": 0.9, "learning_rate": 7.481493885823204e-07, - "loss": 0.0077, + "loss": 0.0089, "step": 2825 }, { "epoch": 0.9, "learning_rate": 7.433082392737067e-07, - "loss": 0.0055, + "loss": 0.0049, "step": 2826 }, { "epoch": 0.9, "learning_rate": 7.384824060086831e-07, - "loss": 0.0056, + "loss": 0.0052, "step": 2827 }, { "epoch": 0.9, "learning_rate": 7.336718939716724e-07, - "loss": 0.0097, + "loss": 0.0083, "step": 2828 }, { "epoch": 0.9, "learning_rate": 7.288767083306308e-07, - "loss": 0.012, + "loss": 0.0122, "step": 2829 }, { "epoch": 0.9, "learning_rate": 7.240968542370574e-07, - "loss": 0.0067, + "loss": 0.0065, "step": 2830 }, { "epoch": 0.9, "learning_rate": 7.19332336825973e-07, - "loss": 0.0117, + "loss": 0.0107, "step": 2831 }, { "epoch": 0.9, "learning_rate": 7.145831612159248e-07, - "loss": 0.0062, + "loss": 0.0077, "step": 2832 }, { "epoch": 0.9, "learning_rate": 7.098493325089817e-07, - "loss": 0.0119, + "loss": 0.0098, "step": 2833 }, { "epoch": 0.91, "learning_rate": 7.051308557907187e-07, - "loss": 0.0073, + "loss": 0.0076, "step": 2834 }, { "epoch": 0.91, "learning_rate": 7.004277361302264e-07, - "loss": 0.0065, + "loss": 0.0062, "step": 2835 }, { "epoch": 0.91, "learning_rate": 6.957399785800861e-07, - "loss": 0.0074, + "loss": 0.0078, "step": 2836 }, { "epoch": 0.91, "learning_rate": 6.910675881763861e-07, - "loss": 0.0029, + "loss": 0.003, "step": 2837 }, { "epoch": 0.91, "learning_rate": 6.86410569938703e-07, - "loss": 0.0067, + "loss": 0.0086, "step": 2838 }, { "epoch": 0.91, "learning_rate": 6.817689288700962e-07, - "loss": 0.0064, + "loss": 0.0054, "step": 2839 }, { @@ -17083,43 +17083,43 @@ { "epoch": 0.91, "learning_rate": 6.725317981697448e-07, - "loss": 0.0068, + "loss": 0.0054, "step": 2841 }, { "epoch": 0.91, "learning_rate": 6.679363184615006e-07, - "loss": 0.0076, + "loss": 0.003, "step": 2842 }, { "epoch": 0.91, "learning_rate": 6.633562357693268e-07, - "loss": 0.0067, + "loss": 0.0073, "step": 2843 }, { "epoch": 0.91, "learning_rate": 6.587915550136259e-07, - "loss": 0.0095, + "loss": 0.0059, "step": 2844 }, { "epoch": 0.91, "learning_rate": 6.542422810982634e-07, - "loss": 0.007, + "loss": 0.0066, "step": 2845 }, { "epoch": 0.91, "learning_rate": 6.497084189105501e-07, - "loss": 0.0059, + "loss": 0.0054, "step": 2846 }, { "epoch": 0.91, "learning_rate": 6.451899733212363e-07, - "loss": 0.0084, + "loss": 0.0061, "step": 2847 }, { @@ -17131,79 +17131,79 @@ { "epoch": 0.91, "learning_rate": 6.361993513380121e-07, - "loss": 0.0089, + "loss": 0.0092, "step": 2849 }, { "epoch": 0.91, "learning_rate": 6.317271846027772e-07, - "loss": 0.0077, + "loss": 0.0082, "step": 2850 }, { "epoch": 0.91, "learning_rate": 6.272704537832846e-07, - "loss": 0.0092, + "loss": 0.0082, "step": 2851 }, { "epoch": 0.91, "learning_rate": 6.228291636674227e-07, - "loss": 0.0051, + "loss": 0.0044, "step": 2852 }, { "epoch": 0.91, "learning_rate": 6.184033190264982e-07, - "loss": 0.0028, + "loss": 0.003, "step": 2853 }, { "epoch": 0.91, "learning_rate": 6.139929246152159e-07, - "loss": 0.0057, + "loss": 0.0052, "step": 2854 }, { "epoch": 0.91, "learning_rate": 6.09597985171691e-07, - "loss": 0.0036, + "loss": 0.004, "step": 2855 }, { "epoch": 0.91, "learning_rate": 6.052185054174314e-07, - "loss": 0.0083, + "loss": 0.009, "step": 2856 }, { "epoch": 0.91, "learning_rate": 6.008544900573354e-07, - "loss": 0.0033, + "loss": 0.0043, "step": 2857 }, { "epoch": 0.91, "learning_rate": 5.965059437796894e-07, - "loss": 0.0113, + "loss": 0.0104, "step": 2858 }, { "epoch": 0.91, "learning_rate": 5.921728712561614e-07, - "loss": 0.0141, + "loss": 0.0123, "step": 2859 }, { "epoch": 0.91, "learning_rate": 5.878552771417945e-07, - "loss": 0.0502, + "loss": 0.0094, "step": 2860 }, { "epoch": 0.91, "learning_rate": 5.835531660750048e-07, - "loss": 0.0055, + "loss": 0.005, "step": 2861 }, { @@ -17215,223 +17215,223 @@ { "epoch": 0.91, "learning_rate": 5.749954115546447e-07, - "loss": 0.0114, + "loss": 0.01, "step": 2863 }, { "epoch": 0.91, "learning_rate": 5.707397772947198e-07, - "loss": 0.0051, + "loss": 0.0059, "step": 2864 }, { "epoch": 0.92, "learning_rate": 5.664996444696475e-07, - "loss": 0.0051, + "loss": 0.0052, "step": 2865 }, { "epoch": 0.92, "learning_rate": 5.622750176346286e-07, - "loss": 0.0046, + "loss": 0.0053, "step": 2866 }, { "epoch": 0.92, "learning_rate": 5.58065901328203e-07, - "loss": 0.0069, + "loss": 0.0067, "step": 2867 }, { "epoch": 0.92, "learning_rate": 5.538723000722467e-07, - "loss": 0.0064, + "loss": 0.0065, "step": 2868 }, { "epoch": 0.92, "learning_rate": 5.496942183719711e-07, - "loss": 0.0032, + "loss": 0.0028, "step": 2869 }, { "epoch": 0.92, "learning_rate": 5.455316607159072e-07, - "loss": 0.0084, + "loss": 0.0076, "step": 2870 }, { "epoch": 0.92, "learning_rate": 5.413846315759197e-07, - "loss": 0.0078, + "loss": 0.0079, "step": 2871 }, { "epoch": 0.92, "learning_rate": 5.372531354071847e-07, - "loss": 0.0068, + "loss": 0.0064, "step": 2872 }, { "epoch": 0.92, "learning_rate": 5.331371766481885e-07, - "loss": 0.0035, + "loss": 0.0039, "step": 2873 }, { "epoch": 0.92, "learning_rate": 5.290367597207324e-07, - "loss": 0.0074, + "loss": 0.0047, "step": 2874 }, { "epoch": 0.92, "learning_rate": 5.249518890299143e-07, - "loss": 0.0044, + "loss": 0.0043, "step": 2875 }, { "epoch": 0.92, "learning_rate": 5.208825689641322e-07, - "loss": 0.0106, + "loss": 0.0096, "step": 2876 }, { "epoch": 0.92, "learning_rate": 5.168288038950841e-07, - "loss": 0.0032, + "loss": 0.0031, "step": 2877 }, { "epoch": 0.92, "learning_rate": 5.127905981777497e-07, - "loss": 0.0059, + "loss": 0.0035, "step": 2878 }, { "epoch": 0.92, "learning_rate": 5.087679561503972e-07, - "loss": 0.0036, + "loss": 0.0033, "step": 2879 }, { "epoch": 0.92, "learning_rate": 5.047608821345695e-07, - "loss": 0.0088, + "loss": 0.0083, "step": 2880 }, { "epoch": 0.92, "learning_rate": 5.007693804350932e-07, - "loss": 0.0058, + "loss": 0.005, "step": 2881 }, { "epoch": 0.92, "learning_rate": 4.967934553400599e-07, - "loss": 0.0059, + "loss": 0.007, "step": 2882 }, { "epoch": 0.92, "learning_rate": 4.928331111208262e-07, - "loss": 0.0097, + "loss": 0.0098, "step": 2883 }, { "epoch": 0.92, "learning_rate": 4.888883520320137e-07, - "loss": 0.0055, + "loss": 0.0062, "step": 2884 }, { "epoch": 0.92, "learning_rate": 4.849591823114991e-07, - "loss": 0.0025, + "loss": 0.0029, "step": 2885 }, { "epoch": 0.92, "learning_rate": 4.810456061804108e-07, - "loss": 0.0036, + "loss": 0.0054, "step": 2886 }, { "epoch": 0.92, "learning_rate": 4.771476278431258e-07, - "loss": 0.0056, + "loss": 0.0053, "step": 2887 }, { "epoch": 0.92, "learning_rate": 4.732652514872643e-07, - "loss": 0.0077, + "loss": 0.0072, "step": 2888 }, { "epoch": 0.92, "learning_rate": 4.6939848128368325e-07, - "loss": 0.0049, + "loss": 0.0057, "step": 2889 }, { "epoch": 0.92, "learning_rate": 4.6554732138647806e-07, - "loss": 0.0085, + "loss": 0.0058, "step": 2890 }, { "epoch": 0.92, "learning_rate": 4.617117759329675e-07, - "loss": 0.0084, + "loss": 0.0079, "step": 2891 }, { "epoch": 0.92, "learning_rate": 4.5789184904370565e-07, - "loss": 0.0067, + "loss": 0.0065, "step": 2892 }, { "epoch": 0.92, "learning_rate": 4.5408754482245464e-07, - "loss": 0.0063, + "loss": 0.0048, "step": 2893 }, { "epoch": 0.92, "learning_rate": 4.502988673562053e-07, - "loss": 0.0085, + "loss": 0.009, "step": 2894 }, { "epoch": 0.92, "learning_rate": 4.465258207151501e-07, - "loss": 0.0059, + "loss": 0.0056, "step": 2895 }, { "epoch": 0.92, "learning_rate": 4.4276840895269674e-07, - "loss": 0.006, + "loss": 0.0057, "step": 2896 }, { "epoch": 0.93, "learning_rate": 4.390266361054529e-07, - "loss": 0.0101, + "loss": 0.0093, "step": 2897 }, { "epoch": 0.93, "learning_rate": 4.353005061932297e-07, - "loss": 0.0101, + "loss": 0.0107, "step": 2898 }, { "epoch": 0.93, "learning_rate": 4.3159002321902175e-07, - "loss": 0.0049, + "loss": 0.006, "step": 2899 }, { @@ -17443,43 +17443,43 @@ { "epoch": 0.93, "learning_rate": 4.2421601401262557e-07, - "loss": 0.0071, + "loss": 0.0091, "step": 2901 }, { "epoch": 0.93, "learning_rate": 4.2055249570237374e-07, - "loss": 0.0043, + "loss": 0.0041, "step": 2902 }, { "epoch": 0.93, "learning_rate": 4.169046401740134e-07, - "loss": 0.0066, + "loss": 0.0062, "step": 2903 }, { "epoch": 0.93, "learning_rate": 4.132724513464514e-07, - "loss": 0.0052, + "loss": 0.0051, "step": 2904 }, { "epoch": 0.93, "learning_rate": 4.0965593312177474e-07, - "loss": 0.0037, + "loss": 0.0032, "step": 2905 }, { "epoch": 0.93, "learning_rate": 4.0605508938522216e-07, - "loss": 0.0124, + "loss": 0.0121, "step": 2906 }, { "epoch": 0.93, "learning_rate": 4.0246992400520435e-07, - "loss": 0.0061, + "loss": 0.0063, "step": 2907 }, { @@ -17491,109 +17491,109 @@ { "epoch": 0.93, "learning_rate": 3.9534664370418084e-07, - "loss": 0.007, + "loss": 0.0085, "step": 2909 }, { "epoch": 0.93, "learning_rate": 3.9180853643575207e-07, - "loss": 0.0059, + "loss": 0.0048, "step": 2910 }, { "epoch": 0.93, "learning_rate": 3.8828612282900857e-07, - "loss": 0.0053, + "loss": 0.0049, "step": 2911 }, { "epoch": 0.93, "learning_rate": 3.847794066680982e-07, - "loss": 0.0104, + "loss": 0.008, "step": 2912 }, { "epoch": 0.93, "learning_rate": 3.812883917203092e-07, - "loss": 0.0039, + "loss": 0.0057, "step": 2913 }, { "epoch": 0.93, "learning_rate": 3.7781308173605477e-07, - "loss": 0.011, + "loss": 0.0103, "step": 2914 }, { "epoch": 0.93, "learning_rate": 3.7435348044888187e-07, - "loss": 0.0076, + "loss": 0.007, "step": 2915 }, { "epoch": 0.93, "learning_rate": 3.70909591575459e-07, - "loss": 0.0064, + "loss": 0.0066, "step": 2916 }, { "epoch": 0.93, "learning_rate": 3.674814188155734e-07, - "loss": 0.0123, + "loss": 0.0114, "step": 2917 }, { "epoch": 0.93, "learning_rate": 3.64068965852134e-07, - "loss": 0.0043, + "loss": 0.0045, "step": 2918 }, { "epoch": 0.93, "learning_rate": 3.606722363511533e-07, - "loss": 0.0064, + "loss": 0.0051, "step": 2919 }, { "epoch": 0.93, "learning_rate": 3.572912339617623e-07, - "loss": 0.0089, + "loss": 0.0085, "step": 2920 }, { "epoch": 0.93, "learning_rate": 3.5392596231618555e-07, - "loss": 0.0085, + "loss": 0.0097, "step": 2921 }, { "epoch": 0.93, "learning_rate": 3.5057642502975606e-07, - "loss": 0.0056, + "loss": 0.0053, "step": 2922 }, { "epoch": 0.93, "learning_rate": 3.4724262570090036e-07, - "loss": 0.006, + "loss": 0.0064, "step": 2923 }, { "epoch": 0.93, "learning_rate": 3.439245679111369e-07, - "loss": 0.0092, + "loss": 0.0118, "step": 2924 }, { "epoch": 0.93, "learning_rate": 3.406222552250726e-07, - "loss": 0.0096, + "loss": 0.012, "step": 2925 }, { "epoch": 0.93, "learning_rate": 3.3733569119040296e-07, - "loss": 0.0083, + "loss": 0.007, "step": 2926 }, { @@ -17605,151 +17605,151 @@ { "epoch": 0.94, "learning_rate": 3.308098231814188e-07, - "loss": 0.0044, + "loss": 0.0042, "step": 2928 }, { "epoch": 0.94, "learning_rate": 3.2757052621787796e-07, - "loss": 0.0039, + "loss": 0.0031, "step": 2929 }, { "epoch": 0.94, "learning_rate": 3.2434699192727736e-07, - "loss": 0.004, + "loss": 0.005, "step": 2930 }, { "epoch": 0.94, "learning_rate": 3.2113922377267845e-07, - "loss": 0.0095, + "loss": 0.0083, "step": 2931 }, { "epoch": 0.94, "learning_rate": 3.179472252001997e-07, - "loss": 0.0083, + "loss": 0.0089, "step": 2932 }, { "epoch": 0.94, "learning_rate": 3.1477099963902635e-07, - "loss": 0.0092, + "loss": 0.0089, "step": 2933 }, { "epoch": 0.94, "learning_rate": 3.1161055050139555e-07, - "loss": 0.0075, + "loss": 0.0167, "step": 2934 }, { "epoch": 0.94, "learning_rate": 3.084658811825947e-07, - "loss": 0.0058, + "loss": 0.0044, "step": 2935 }, { "epoch": 0.94, "learning_rate": 3.053369950609597e-07, - "loss": 0.0047, + "loss": 0.0057, "step": 2936 }, { "epoch": 0.94, "learning_rate": 3.0222389549786845e-07, - "loss": 0.0083, + "loss": 0.008, "step": 2937 }, { "epoch": 0.94, "learning_rate": 2.99126585837744e-07, - "loss": 0.0062, + "loss": 0.0057, "step": 2938 }, { "epoch": 0.94, "learning_rate": 2.96045069408043e-07, - "loss": 0.0024, + "loss": 0.0019, "step": 2939 }, { "epoch": 0.94, "learning_rate": 2.9297934951925564e-07, - "loss": 0.0048, + "loss": 0.0051, "step": 2940 }, { "epoch": 0.94, "learning_rate": 2.8992942946490075e-07, - "loss": 0.0029, + "loss": 0.0063, "step": 2941 }, { "epoch": 0.94, "learning_rate": 2.86895312521524e-07, - "loss": 0.007, + "loss": 0.0069, "step": 2942 }, { "epoch": 0.94, "learning_rate": 2.838770019486964e-07, - "loss": 0.0037, + "loss": 0.0034, "step": 2943 }, { "epoch": 0.94, "learning_rate": 2.808745009890057e-07, - "loss": 0.0104, + "loss": 0.0115, "step": 2944 }, { "epoch": 0.94, "learning_rate": 2.778878128680534e-07, - "loss": 0.0081, + "loss": 0.0067, "step": 2945 }, { "epoch": 0.94, "learning_rate": 2.749169407944563e-07, - "loss": 0.0056, + "loss": 0.006, "step": 2946 }, { "epoch": 0.94, "learning_rate": 2.7196188795983955e-07, - "loss": 0.004, + "loss": 0.0037, "step": 2947 }, { "epoch": 0.94, "learning_rate": 2.6902265753883213e-07, - "loss": 0.0071, + "loss": 0.0062, "step": 2948 }, { "epoch": 0.94, "learning_rate": 2.6609925268906656e-07, - "loss": 0.0107, + "loss": 0.0099, "step": 2949 }, { "epoch": 0.94, "learning_rate": 2.631916765511722e-07, - "loss": 0.0058, + "loss": 0.0065, "step": 2950 }, { "epoch": 0.94, "learning_rate": 2.6029993224877367e-07, - "loss": 0.0064, + "loss": 0.0068, "step": 2951 }, { "epoch": 0.94, "learning_rate": 2.574240228884911e-07, - "loss": 0.0082, + "loss": 0.0072, "step": 2952 }, { @@ -17761,145 +17761,145 @@ { "epoch": 0.94, "learning_rate": 2.5171972133566976e-07, - "loss": 0.0067, + "loss": 0.0062, "step": 2954 }, { "epoch": 0.94, "learning_rate": 2.4889133527129573e-07, - "loss": 0.0066, + "loss": 0.0061, "step": 2955 }, { "epoch": 0.94, "learning_rate": 2.4607879640535515e-07, - "loss": 0.0037, + "loss": 0.0043, "step": 2956 }, { "epoch": 0.94, "learning_rate": 2.432821077593744e-07, - "loss": 0.0052, + "loss": 0.0049, "step": 2957 }, { "epoch": 0.94, "learning_rate": 2.4050127233785193e-07, - "loss": 0.0093, + "loss": 0.0091, "step": 2958 }, { "epoch": 0.95, "learning_rate": 2.3773629312825284e-07, - "loss": 0.0084, + "loss": 0.007, "step": 2959 }, { "epoch": 0.95, "learning_rate": 2.3498717310101114e-07, - "loss": 0.0051, + "loss": 0.0054, "step": 2960 }, { "epoch": 0.95, "learning_rate": 2.32253915209521e-07, - "loss": 0.009, + "loss": 0.0075, "step": 2961 }, { "epoch": 0.95, "learning_rate": 2.295365223901402e-07, - "loss": 0.0069, + "loss": 0.0068, "step": 2962 }, { "epoch": 0.95, "learning_rate": 2.2683499756217362e-07, - "loss": 0.004, + "loss": 0.0043, "step": 2963 }, { "epoch": 0.95, "learning_rate": 2.241493436278863e-07, - "loss": 0.0072, + "loss": 0.007, "step": 2964 }, { "epoch": 0.95, "learning_rate": 2.2147956347249366e-07, - "loss": 0.0066, + "loss": 0.0051, "step": 2965 }, { "epoch": 0.95, "learning_rate": 2.1882565996415305e-07, - "loss": 0.0089, + "loss": 0.0093, "step": 2966 }, { "epoch": 0.95, "learning_rate": 2.1618763595397052e-07, - "loss": 0.0058, + "loss": 0.0048, "step": 2967 }, { "epoch": 0.95, "learning_rate": 2.1356549427598571e-07, - "loss": 0.007, + "loss": 0.0063, "step": 2968 }, { "epoch": 0.95, "learning_rate": 2.1095923774718197e-07, - "loss": 0.0074, + "loss": 0.0067, "step": 2969 }, { "epoch": 0.95, "learning_rate": 2.083688691674762e-07, - "loss": 0.0058, + "loss": 0.0051, "step": 2970 }, { "epoch": 0.95, "learning_rate": 2.0579439131971577e-07, - "loss": 0.0075, + "loss": 0.0045, "step": 2971 }, { "epoch": 0.95, "learning_rate": 2.0323580696967659e-07, - "loss": 0.0076, + "loss": 0.0069, "step": 2972 }, { "epoch": 0.95, "learning_rate": 2.0069311886605822e-07, - "loss": 0.0055, + "loss": 0.0051, "step": 2973 }, { "epoch": 0.95, "learning_rate": 1.9816632974048566e-07, - "loss": 0.0054, + "loss": 0.0064, "step": 2974 }, { "epoch": 0.95, "learning_rate": 1.9565544230750576e-07, - "loss": 0.0067, + "loss": 0.0071, "step": 2975 }, { "epoch": 0.95, "learning_rate": 1.9316045926457414e-07, - "loss": 0.0081, + "loss": 0.0075, "step": 2976 }, { "epoch": 0.95, "learning_rate": 1.9068138329206842e-07, - "loss": 0.0048, + "loss": 0.0045, "step": 2977 }, { @@ -17911,13 +17911,13 @@ { "epoch": 0.95, "learning_rate": 1.857709631943766e-07, - "loss": 0.0048, + "loss": 0.0053, "step": 2979 }, { "epoch": 0.95, "learning_rate": 1.8333962434448402e-07, - "loss": 0.0087, + "loss": 0.0065, "step": 2980 }, { @@ -17929,43 +17929,43 @@ { "epoch": 0.95, "learning_rate": 1.7852470210260708e-07, - "loss": 0.0096, + "loss": 0.0086, "step": 2982 }, { "epoch": 0.95, "learning_rate": 1.7614112388331993e-07, - "loss": 0.0039, + "loss": 0.0062, "step": 2983 }, { "epoch": 0.95, "learning_rate": 1.73773471018423e-07, - "loss": 0.005, + "loss": 0.006, "step": 2984 }, { "epoch": 0.95, "learning_rate": 1.714217460515033e-07, - "loss": 0.003, + "loss": 0.0025, "step": 2985 }, { "epoch": 0.95, "learning_rate": 1.690859515090265e-07, - "loss": 0.0059, + "loss": 0.006, "step": 2986 }, { "epoch": 0.95, "learning_rate": 1.667660899003537e-07, - "loss": 0.0044, + "loss": 0.0039, "step": 2987 }, { "epoch": 0.95, "learning_rate": 1.6446216371772293e-07, - "loss": 0.0093, + "loss": 0.0129, "step": 2988 }, { @@ -17977,25 +17977,25 @@ { "epoch": 0.95, "learning_rate": 1.5990212751395173e-07, - "loss": 0.0074, + "loss": 0.006, "step": 2990 }, { "epoch": 0.96, "learning_rate": 1.576460223916859e-07, - "loss": 0.0071, + "loss": 0.0049, "step": 2991 }, { "epoch": 0.96, "learning_rate": 1.5540586249319977e-07, - "loss": 0.006, + "loss": 0.0061, "step": 2992 }, { "epoch": 0.96, "learning_rate": 1.5318165022511322e-07, - "loss": 0.0052, + "loss": 0.0068, "step": 2993 }, { @@ -18007,31 +18007,31 @@ { "epoch": 0.96, "learning_rate": 1.4878107812093368e-07, - "loss": 0.0057, + "loss": 0.0066, "step": 2995 }, { "epoch": 0.96, "learning_rate": 1.4660472301240235e-07, - "loss": 0.0066, + "loss": 0.0061, "step": 2996 }, { "epoch": 0.96, "learning_rate": 1.4444432498938232e-07, - "loss": 0.0045, + "loss": 0.0035, "step": 2997 }, { "epoch": 0.96, "learning_rate": 1.4229988637280367e-07, - "loss": 0.0085, + "loss": 0.0086, "step": 2998 }, { "epoch": 0.96, "learning_rate": 1.4017140946644357e-07, - "loss": 0.0029, + "loss": 0.0036, "step": 2999 }, { @@ -18043,19 +18043,19 @@ { "epoch": 0.96, "learning_rate": 1.35962349913783e-07, - "loss": 0.0054, + "loss": 0.0061, "step": 3001 }, { "epoch": 0.96, "learning_rate": 1.338817717892954e-07, - "loss": 0.0036, + "loss": 0.0042, "step": 3002 }, { "epoch": 0.96, "learning_rate": 1.318171644186572e-07, - "loss": 0.0102, + "loss": 0.014, "step": 3003 }, { @@ -18067,13 +18067,13 @@ { "epoch": 0.96, "learning_rate": 1.2773587079384751e-07, - "loss": 0.0078, + "loss": 0.0094, "step": 3005 }, { "epoch": 0.96, "learning_rate": 1.2571918892423207e-07, - "loss": 0.0066, + "loss": 0.0082, "step": 3006 }, { @@ -18085,97 +18085,97 @@ { "epoch": 0.96, "learning_rate": 1.2173376590324136e-07, - "loss": 0.0067, + "loss": 0.006, "step": 3008 }, { "epoch": 0.96, "learning_rate": 1.197650290334279e-07, - "loss": 0.0068, + "loss": 0.0064, "step": 3009 }, { "epoch": 0.96, "learning_rate": 1.1781227808315919e-07, - "loss": 0.0054, + "loss": 0.0057, "step": 3010 }, { "epoch": 0.96, "learning_rate": 1.1587551515028716e-07, - "loss": 0.006, + "loss": 0.0053, "step": 3011 }, { "epoch": 0.96, "learning_rate": 1.1395474231548742e-07, - "loss": 0.008, + "loss": 0.0058, "step": 3012 }, { "epoch": 0.96, "learning_rate": 1.1204996164225433e-07, - "loss": 0.007, + "loss": 0.0067, "step": 3013 }, { "epoch": 0.96, "learning_rate": 1.1016117517690938e-07, - "loss": 0.0103, + "loss": 0.0112, "step": 3014 }, { "epoch": 0.96, "learning_rate": 1.0828838494858107e-07, - "loss": 0.0078, + "loss": 0.0084, "step": 3015 }, { "epoch": 0.96, "learning_rate": 1.0643159296922334e-07, - "loss": 0.0066, + "loss": 0.0109, "step": 3016 }, { "epoch": 0.96, "learning_rate": 1.0459080123359221e-07, - "loss": 0.0117, + "loss": 0.0108, "step": 3017 }, { "epoch": 0.96, "learning_rate": 1.027660117192658e-07, - "loss": 0.0098, + "loss": 0.0109, "step": 3018 }, { "epoch": 0.96, "learning_rate": 1.009572263866243e-07, - "loss": 0.0107, + "loss": 0.0088, "step": 3019 }, { "epoch": 0.96, "learning_rate": 9.916444717885497e-08, - "loss": 0.0115, + "loss": 0.0136, "step": 3020 }, { "epoch": 0.96, "learning_rate": 9.738767602195053e-08, - "loss": 0.0069, + "loss": 0.0078, "step": 3021 }, { "epoch": 0.97, "learning_rate": 9.562691482470743e-08, - "loss": 0.0125, + "loss": 0.0099, "step": 3022 }, { "epoch": 0.97, "learning_rate": 9.38821654787192e-08, - "loss": 0.0073, + "loss": 0.0077, "step": 3023 }, { @@ -18187,61 +18187,61 @@ { "epoch": 0.97, "learning_rate": 9.044070982088537e-08, - "loss": 0.0071, + "loss": 0.007, "step": 3025 }, { "epoch": 0.97, "learning_rate": 8.874400720621401e-08, - "loss": 0.0087, + "loss": 0.011, "step": 3026 }, { "epoch": 0.97, "learning_rate": 8.706332383714432e-08, - "loss": 0.0098, + "loss": 0.0086, "step": 3027 }, { "epoch": 0.97, "learning_rate": 8.539866151924369e-08, - "loss": 0.0046, + "loss": 0.0041, "step": 3028 }, { "epoch": 0.97, "learning_rate": 8.37500220408699e-08, - "loss": 0.0102, + "loss": 0.0099, "step": 3029 }, { "epoch": 0.97, "learning_rate": 8.211740717316452e-08, - "loss": 0.0055, + "loss": 0.0057, "step": 3030 }, { "epoch": 0.97, "learning_rate": 8.050081867005455e-08, - "loss": 0.007, + "loss": 0.0071, "step": 3031 }, { "epoch": 0.97, "learning_rate": 7.89002582682491e-08, - "loss": 0.0134, + "loss": 0.013, "step": 3032 }, { "epoch": 0.97, "learning_rate": 7.731572768724105e-08, - "loss": 0.0124, + "loss": 0.012, "step": 3033 }, { "epoch": 0.97, "learning_rate": 7.574722862930206e-08, - "loss": 0.0068, + "loss": 0.0118, "step": 3034 }, { @@ -18253,19 +18253,19 @@ { "epoch": 0.97, "learning_rate": 7.265833180558512e-08, - "loss": 0.0048, + "loss": 0.0054, "step": 3036 }, { "epoch": 0.97, "learning_rate": 7.113793735822771e-08, - "loss": 0.0053, + "loss": 0.0067, "step": 3037 }, { "epoch": 0.97, "learning_rate": 6.96335810707721e-08, - "loss": 0.0054, + "loss": 0.0042, "step": 3038 }, { @@ -18277,19 +18277,19 @@ { "epoch": 0.97, "learning_rate": 6.667298942288736e-08, - "loss": 0.0087, + "loss": 0.0117, "step": 3040 }, { "epoch": 0.97, "learning_rate": 6.521675724303732e-08, - "loss": 0.0092, + "loss": 0.009, "step": 3041 }, { "epoch": 0.97, "learning_rate": 6.377656958424571e-08, - "loss": 0.0084, + "loss": 0.0077, "step": 3042 }, { @@ -18301,43 +18301,43 @@ { "epoch": 0.97, "learning_rate": 6.094433400140598e-08, - "loss": 0.0059, + "loss": 0.0048, "step": 3044 }, { "epoch": 0.97, "learning_rate": 5.955228912004396e-08, - "loss": 0.0089, + "loss": 0.0083, "step": 3045 }, { "epoch": 0.97, "learning_rate": 5.817629484511089e-08, - "loss": 0.0095, + "loss": 0.011, "step": 3046 }, { "epoch": 0.97, "learning_rate": 5.6816352654844305e-08, - "loss": 0.0052, + "loss": 0.0055, "step": 3047 }, { "epoch": 0.97, "learning_rate": 5.547246401023554e-08, - "loss": 0.0096, + "loss": 0.0077, "step": 3048 }, { "epoch": 0.97, "learning_rate": 5.4144630355034694e-08, - "loss": 0.0025, + "loss": 0.0026, "step": 3049 }, { "epoch": 0.97, "learning_rate": 5.283285311574071e-08, - "loss": 0.0059, + "loss": 0.0049, "step": 3050 }, { @@ -18349,43 +18349,43 @@ { "epoch": 0.97, "learning_rate": 5.025747350461629e-08, - "loss": 0.0131, + "loss": 0.0135, "step": 3052 }, { "epoch": 0.98, "learning_rate": 4.8993873899532696e-08, - "loss": 0.0036, + "loss": 0.0039, "step": 3053 }, { "epoch": 0.98, "learning_rate": 4.7746336243839614e-08, - "loss": 0.0044, + "loss": 0.0048, "step": 3054 }, { "epoch": 0.98, "learning_rate": 4.651486187777831e-08, - "loss": 0.004, + "loss": 0.0069, "step": 3055 }, { "epoch": 0.98, "learning_rate": 4.529945212432385e-08, - "loss": 0.0068, + "loss": 0.0065, "step": 3056 }, { "epoch": 0.98, "learning_rate": 4.410010828920008e-08, - "loss": 0.0095, + "loss": 0.0079, "step": 3057 }, { "epoch": 0.98, "learning_rate": 4.291683166086968e-08, - "loss": 0.0062, + "loss": 0.006, "step": 3058 }, { @@ -18397,31 +18397,31 @@ { "epoch": 0.98, "learning_rate": 4.0598485092123717e-08, - "loss": 0.0068, + "loss": 0.0078, "step": 3060 }, { "epoch": 0.98, "learning_rate": 3.9463417642320865e-08, - "loss": 0.0077, + "loss": 0.0072, "step": 3061 }, { "epoch": 0.98, "learning_rate": 3.834442238053515e-08, - "loss": 0.0077, + "loss": 0.0074, "step": 3062 }, { "epoch": 0.98, "learning_rate": 3.7241500508904955e-08, - "loss": 0.0069, + "loss": 0.0067, "step": 3063 }, { "epoch": 0.98, "learning_rate": 3.615465321230915e-08, - "loss": 0.0063, + "loss": 0.0061, "step": 3064 }, { @@ -18433,301 +18433,301 @@ { "epoch": 0.98, "learning_rate": 3.402918699736857e-08, - "loss": 0.0082, + "loss": 0.0084, "step": 3066 }, { "epoch": 0.98, "learning_rate": 3.299057036242559e-08, - "loss": 0.01, + "loss": 0.0098, "step": 3067 }, { "epoch": 0.98, "learning_rate": 3.196803286931393e-08, - "loss": 0.0089, + "loss": 0.0088, "step": 3068 }, { "epoch": 0.98, "learning_rate": 3.0961575616549886e-08, - "loss": 0.0071, + "loss": 0.0078, "step": 3069 }, { "epoch": 0.98, "learning_rate": 2.997119968537687e-08, - "loss": 0.0039, + "loss": 0.0054, "step": 3070 }, { "epoch": 0.98, "learning_rate": 2.899690613976047e-08, - "loss": 0.0037, + "loss": 0.0034, "step": 3071 }, { "epoch": 0.98, "learning_rate": 2.8038696026391753e-08, - "loss": 0.0094, + "loss": 0.0085, "step": 3072 }, { "epoch": 0.98, "learning_rate": 2.7096570374680607e-08, - "loss": 0.0086, + "loss": 0.0072, "step": 3073 }, { "epoch": 0.98, "learning_rate": 2.6170530196757415e-08, - "loss": 0.0066, + "loss": 0.0069, "step": 3074 }, { "epoch": 0.98, "learning_rate": 2.5260576487471376e-08, - "loss": 0.0043, + "loss": 0.0036, "step": 3075 }, { "epoch": 0.98, "learning_rate": 2.436671022439385e-08, - "loss": 0.0042, + "loss": 0.0044, "step": 3076 }, { "epoch": 0.98, "learning_rate": 2.3488932367806694e-08, - "loss": 0.0074, + "loss": 0.0053, "step": 3077 }, { "epoch": 0.98, "learning_rate": 2.2627243860712244e-08, - "loss": 0.0132, + "loss": 0.014, "step": 3078 }, { "epoch": 0.98, "learning_rate": 2.1781645628830006e-08, - "loss": 0.0027, + "loss": 0.0031, "step": 3079 }, { "epoch": 0.98, "learning_rate": 2.0952138580586643e-08, - "loss": 0.0089, + "loss": 0.0088, "step": 3080 }, { "epoch": 0.98, "learning_rate": 2.0138723607130982e-08, - "loss": 0.0054, + "loss": 0.0041, "step": 3081 }, { "epoch": 0.98, "learning_rate": 1.9341401582315676e-08, - "loss": 0.0044, + "loss": 0.0042, "step": 3082 }, { "epoch": 0.98, "learning_rate": 1.856017336271054e-08, - "loss": 0.0074, + "loss": 0.0076, "step": 3083 }, { "epoch": 0.98, "learning_rate": 1.7795039787590896e-08, - "loss": 0.0078, + "loss": 0.0073, "step": 3084 }, { "epoch": 0.99, "learning_rate": 1.7046001678947543e-08, - "loss": 0.0055, + "loss": 0.0059, "step": 3085 }, { "epoch": 0.99, "learning_rate": 1.63130598414718e-08, - "loss": 0.0142, + "loss": 0.0111, "step": 3086 }, { "epoch": 0.99, "learning_rate": 1.5596215062570475e-08, - "loss": 0.0058, + "loss": 0.0066, "step": 3087 }, { "epoch": 0.99, "learning_rate": 1.4895468112354205e-08, - "loss": 0.0068, + "loss": 0.0064, "step": 3088 }, { "epoch": 0.99, "learning_rate": 1.4210819743637471e-08, - "loss": 0.0086, + "loss": 0.0111, "step": 3089 }, { "epoch": 0.99, "learning_rate": 1.3542270691941916e-08, - "loss": 0.0125, + "loss": 0.0109, "step": 3090 }, { "epoch": 0.99, "learning_rate": 1.2889821675494684e-08, - "loss": 0.0067, + "loss": 0.0042, "step": 3091 }, { "epoch": 0.99, "learning_rate": 1.225347339522509e-08, - "loss": 0.0025, + "loss": 0.0028, "step": 3092 }, { "epoch": 0.99, "learning_rate": 1.1633226534764618e-08, - "loss": 0.0131, + "loss": 0.0098, "step": 3093 }, { "epoch": 0.99, "learning_rate": 1.1029081760450255e-08, - "loss": 0.0048, + "loss": 0.0062, "step": 3094 }, { "epoch": 0.99, "learning_rate": 1.0441039721314494e-08, - "loss": 0.0065, + "loss": 0.0061, "step": 3095 }, { "epoch": 0.99, "learning_rate": 9.869101049096996e-09, - "loss": 0.0135, + "loss": 0.0129, "step": 3096 }, { "epoch": 0.99, "learning_rate": 9.313266358234596e-09, - "loss": 0.0152, + "loss": 0.0118, "step": 3097 }, { "epoch": 0.99, "learning_rate": 8.773536245862967e-09, - "loss": 0.0026, + "loss": 0.0031, "step": 3098 }, { "epoch": 0.99, "learning_rate": 8.249911291816625e-09, - "loss": 0.0076, + "loss": 0.0068, "step": 3099 }, { "epoch": 0.99, "learning_rate": 7.742392058630587e-09, - "loss": 0.0118, + "loss": 0.0127, "step": 3100 }, { "epoch": 0.99, "learning_rate": 7.250979091533716e-09, - "loss": 0.0063, + "loss": 0.0066, "step": 3101 }, { "epoch": 0.99, "learning_rate": 6.775672918453713e-09, - "loss": 0.0094, + "loss": 0.0099, "step": 3102 }, { "epoch": 0.99, "learning_rate": 6.316474050015452e-09, - "loss": 0.005, + "loss": 0.0057, "step": 3103 }, { "epoch": 0.99, "learning_rate": 5.8733829795393215e-09, - "loss": 0.0106, + "loss": 0.011, "step": 3104 }, { "epoch": 0.99, "learning_rate": 5.446400183039546e-09, - "loss": 0.0049, + "loss": 0.0056, "step": 3105 }, { "epoch": 0.99, "learning_rate": 5.0355261192258635e-09, - "loss": 0.006, + "loss": 0.0061, "step": 3106 }, { "epoch": 0.99, "learning_rate": 4.640761229503521e-09, - "loss": 0.0088, + "loss": 0.0084, "step": 3107 }, { "epoch": 0.99, "learning_rate": 4.2621059379716055e-09, - "loss": 0.0029, + "loss": 0.0025, "step": 3108 }, { "epoch": 0.99, "learning_rate": 3.899560651418055e-09, - "loss": 0.0046, + "loss": 0.0042, "step": 3109 }, { "epoch": 0.99, "learning_rate": 3.5531257593313103e-09, - "loss": 0.0153, + "loss": 0.0157, "step": 3110 }, { "epoch": 0.99, "learning_rate": 3.2228016338853307e-09, - "loss": 0.0067, + "loss": 0.0084, "step": 3111 }, { "epoch": 0.99, "learning_rate": 2.9085886299512477e-09, - "loss": 0.0055, + "loss": 0.0052, "step": 3112 }, { "epoch": 0.99, "learning_rate": 2.6104870850890417e-09, - "loss": 0.0088, + "loss": 0.0069, "step": 3113 }, { "epoch": 0.99, "learning_rate": 2.3284973195508707e-09, - "loss": 0.0026, + "loss": 0.0031, "step": 3114 }, { "epoch": 0.99, "learning_rate": 2.062619636279406e-09, - "loss": 0.0096, + "loss": 0.0078, "step": 3115 }, { @@ -18739,55 +18739,55 @@ { "epoch": 1.0, "learning_rate": 1.579201641764838e-09, - "loss": 0.0045, + "loss": 0.0047, "step": 3117 }, { "epoch": 1.0, "learning_rate": 1.361661849859641e-09, - "loss": 0.006, + "loss": 0.0054, "step": 3118 }, { "epoch": 1.0, "learning_rate": 1.1602351788986276e-09, - "loss": 0.0051, + "loss": 0.0048, "step": 3119 }, { "epoch": 1.0, "learning_rate": 9.749218452753672e-10, - "loss": 0.0065, + "loss": 0.0057, "step": 3120 }, { "epoch": 1.0, "learning_rate": 8.057220480722771e-10, - "loss": 0.0051, + "loss": 0.0052, "step": 3121 }, { "epoch": 1.0, "learning_rate": 6.526359690622873e-10, - "loss": 0.0136, + "loss": 0.0131, "step": 3122 }, { "epoch": 1.0, "learning_rate": 5.156637727071756e-10, - "loss": 0.0061, + "loss": 0.0059, "step": 3123 }, { "epoch": 1.0, "learning_rate": 3.9480560615756665e-10, - "loss": 0.0042, + "loss": 0.0043, "step": 3124 }, { "epoch": 1.0, "learning_rate": 2.900615992496025e-10, - "loss": 0.0078, + "loss": 0.0082, "step": 3125 }, { @@ -18799,31 +18799,31 @@ { "epoch": 1.0, "learning_rate": 1.2891649715907062e-10, - "loss": 0.006, + "loss": 0.0066, "step": 3127 }, { "epoch": 1.0, "learning_rate": 7.251557509635465e-11, - "loss": 0.011, + "loss": 0.0059, "step": 3128 }, { "epoch": 1.0, "learning_rate": 3.2229158913321676e-11, - "loss": 0.0066, + "loss": 0.0099, "step": 3129 }, { "epoch": 1.0, "learning_rate": 8.057291892016316e-12, - "loss": 0.0066, + "loss": 0.0069, "step": 3130 }, { "epoch": 1.0, "learning_rate": 0.0, - "loss": 0.0085, + "loss": 0.0092, "step": 3131 } ], @@ -18832,8 +18832,8 @@ "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, - "total_flos": 1.978219893526364e+17, - "train_batch_size": 32, + "total_flos": 1.806664471459922e+17, + "train_batch_size": 16, "trial_name": null, "trial_params": null }