{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998934772573944, "eval_steps": 500, "global_step": 1760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 5.2482, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.0, "loss": 5.0467, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.8867924528301888e-05, "loss": 4.9564, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.8867924528301888e-05, "loss": 5.1658, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.8867924528301888e-05, "loss": 4.9956, "step": 5 }, { "epoch": 0.0, "learning_rate": 3.7735849056603776e-05, "loss": 4.9814, "step": 6 }, { "epoch": 0.0, "learning_rate": 5.660377358490566e-05, "loss": 4.2625, "step": 7 }, { "epoch": 0.0, "learning_rate": 7.547169811320755e-05, "loss": 3.9414, "step": 8 }, { "epoch": 0.01, "learning_rate": 9.433962264150944e-05, "loss": 3.7966, "step": 9 }, { "epoch": 0.01, "learning_rate": 0.00011320754716981132, "loss": 3.6802, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.0001320754716981132, "loss": 3.6004, "step": 11 }, { "epoch": 0.01, "learning_rate": 0.0001509433962264151, "loss": 3.5606, "step": 12 }, { "epoch": 0.01, "learning_rate": 0.00016981132075471697, "loss": 3.5021, "step": 13 }, { "epoch": 0.01, "learning_rate": 0.00018867924528301889, "loss": 3.5367, "step": 14 }, { "epoch": 0.01, "learning_rate": 0.00020754716981132078, "loss": 3.4328, "step": 15 }, { "epoch": 0.01, "learning_rate": 0.00022641509433962264, "loss": 3.4319, "step": 16 }, { "epoch": 0.01, "learning_rate": 0.00024528301886792453, "loss": 3.4415, "step": 17 }, { "epoch": 0.01, "learning_rate": 0.0002641509433962264, "loss": 3.4164, "step": 18 }, { "epoch": 0.01, "learning_rate": 0.0002830188679245283, "loss": 3.5059, "step": 19 }, { "epoch": 0.01, "learning_rate": 0.0003018867924528302, "loss": 3.3674, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00032075471698113204, "loss": 3.4163, "step": 21 }, { "epoch": 0.01, "learning_rate": 0.00033962264150943393, "loss": 3.3629, "step": 22 }, { "epoch": 0.01, "learning_rate": 0.0003584905660377358, "loss": 3.4262, "step": 23 }, { "epoch": 0.01, "learning_rate": 0.00037735849056603777, "loss": 3.4067, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.00039622641509433966, "loss": 3.2488, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.00041509433962264155, "loss": 3.4274, "step": 26 }, { "epoch": 0.02, "learning_rate": 0.00043396226415094345, "loss": 3.2567, "step": 27 }, { "epoch": 0.02, "learning_rate": 0.0004528301886792453, "loss": 3.2996, "step": 28 }, { "epoch": 0.02, "learning_rate": 0.0004716981132075472, "loss": 3.2675, "step": 29 }, { "epoch": 0.02, "learning_rate": 0.0004905660377358491, "loss": 3.166, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.000509433962264151, "loss": 3.1844, "step": 31 }, { "epoch": 0.02, "learning_rate": 0.0005283018867924528, "loss": 3.1868, "step": 32 }, { "epoch": 0.02, "learning_rate": 0.0005471698113207547, "loss": 3.2863, "step": 33 }, { "epoch": 0.02, "learning_rate": 0.0005660377358490566, "loss": 3.1815, "step": 34 }, { "epoch": 0.02, "learning_rate": 0.0005849056603773585, "loss": 3.2916, "step": 35 }, { "epoch": 0.02, "learning_rate": 0.0006037735849056604, "loss": 3.1167, "step": 36 }, { "epoch": 0.02, "learning_rate": 0.0006226415094339623, "loss": 3.1742, "step": 37 }, { "epoch": 0.02, "learning_rate": 0.0006415094339622641, "loss": 3.1693, "step": 38 }, { "epoch": 0.02, "learning_rate": 0.000660377358490566, "loss": 3.1958, "step": 39 }, { "epoch": 0.02, "learning_rate": 0.0006792452830188679, "loss": 3.1499, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.0006981132075471698, "loss": 3.1234, "step": 41 }, { "epoch": 0.02, "learning_rate": 0.0007169811320754717, "loss": 3.0571, "step": 42 }, { "epoch": 0.02, "learning_rate": 0.0007358490566037735, "loss": 3.0833, "step": 43 }, { "epoch": 0.02, "learning_rate": 0.0007547169811320755, "loss": 3.1011, "step": 44 }, { "epoch": 0.03, "learning_rate": 0.0007735849056603774, "loss": 3.0498, "step": 45 }, { "epoch": 0.03, "learning_rate": 0.0007924528301886793, "loss": 3.1238, "step": 46 }, { "epoch": 0.03, "learning_rate": 0.0008113207547169812, "loss": 3.061, "step": 47 }, { "epoch": 0.03, "learning_rate": 0.0008301886792452831, "loss": 3.0054, "step": 48 }, { "epoch": 0.03, "learning_rate": 0.000849056603773585, "loss": 3.0881, "step": 49 }, { "epoch": 0.03, "learning_rate": 0.0008679245283018869, "loss": 3.1222, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.0008867924528301887, "loss": 3.1099, "step": 51 }, { "epoch": 0.03, "learning_rate": 0.0009056603773584906, "loss": 2.9995, "step": 52 }, { "epoch": 0.03, "learning_rate": 0.0009245283018867925, "loss": 3.0292, "step": 53 }, { "epoch": 0.03, "learning_rate": 0.0009433962264150943, "loss": 3.1197, "step": 54 }, { "epoch": 0.03, "learning_rate": 0.0009622641509433962, "loss": 3.0159, "step": 55 }, { "epoch": 0.03, "learning_rate": 0.0009811320754716981, "loss": 2.93, "step": 56 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 2.9495, "step": 57 }, { "epoch": 0.03, "learning_rate": 0.0009999991532161057, "loss": 2.9419, "step": 58 }, { "epoch": 0.03, "learning_rate": 0.0009999966128672907, "loss": 3.027, "step": 59 }, { "epoch": 0.03, "learning_rate": 0.0009999923789621597, "loss": 3.0178, "step": 60 }, { "epoch": 0.03, "learning_rate": 0.0009999864515150534, "loss": 2.9705, "step": 61 }, { "epoch": 0.04, "learning_rate": 0.0009999788305460491, "loss": 2.927, "step": 62 }, { "epoch": 0.04, "learning_rate": 0.0009999695160809597, "loss": 3.0041, "step": 63 }, { "epoch": 0.04, "learning_rate": 0.0009999585081513348, "loss": 3.0589, "step": 64 }, { "epoch": 0.04, "learning_rate": 0.0009999458067944597, "loss": 2.9795, "step": 65 }, { "epoch": 0.04, "learning_rate": 0.0009999314120533555, "loss": 2.8439, "step": 66 }, { "epoch": 0.04, "learning_rate": 0.0009999153239767793, "loss": 3.0651, "step": 67 }, { "epoch": 0.04, "learning_rate": 0.0009998975426192237, "loss": 3.0304, "step": 68 }, { "epoch": 0.04, "learning_rate": 0.000999878068040916, "loss": 2.9077, "step": 69 }, { "epoch": 0.04, "learning_rate": 0.00099985690030782, "loss": 2.9534, "step": 70 }, { "epoch": 0.04, "learning_rate": 0.0009998340394916333, "loss": 2.9447, "step": 71 }, { "epoch": 0.04, "learning_rate": 0.0009998094856697884, "loss": 2.8214, "step": 72 }, { "epoch": 0.04, "learning_rate": 0.0009997832389254527, "loss": 3.0377, "step": 73 }, { "epoch": 0.04, "learning_rate": 0.0009997552993475274, "loss": 3.0556, "step": 74 }, { "epoch": 0.04, "learning_rate": 0.0009997256670306479, "loss": 2.9563, "step": 75 }, { "epoch": 0.04, "learning_rate": 0.0009996943420751824, "loss": 2.94, "step": 76 }, { "epoch": 0.04, "learning_rate": 0.000999661324587233, "loss": 2.9925, "step": 77 }, { "epoch": 0.04, "learning_rate": 0.0009996266146786344, "loss": 2.9317, "step": 78 }, { "epoch": 0.04, "learning_rate": 0.0009995902124669538, "loss": 3.021, "step": 79 }, { "epoch": 0.05, "learning_rate": 0.0009995521180754905, "loss": 2.9763, "step": 80 }, { "epoch": 0.05, "learning_rate": 0.000999512331633275, "loss": 2.9475, "step": 81 }, { "epoch": 0.05, "learning_rate": 0.0009994708532750699, "loss": 2.9488, "step": 82 }, { "epoch": 0.05, "learning_rate": 0.0009994276831413675, "loss": 2.7804, "step": 83 }, { "epoch": 0.05, "learning_rate": 0.0009993828213783913, "loss": 2.93, "step": 84 }, { "epoch": 0.05, "learning_rate": 0.0009993362681380942, "loss": 2.8958, "step": 85 }, { "epoch": 0.05, "learning_rate": 0.000999288023578158, "loss": 2.9788, "step": 86 }, { "epoch": 0.05, "learning_rate": 0.0009992380878619937, "loss": 2.9375, "step": 87 }, { "epoch": 0.05, "learning_rate": 0.0009991864611587405, "loss": 2.9048, "step": 88 }, { "epoch": 0.05, "learning_rate": 0.0009991331436432647, "loss": 2.8672, "step": 89 }, { "epoch": 0.05, "learning_rate": 0.0009990781354961605, "loss": 2.8791, "step": 90 }, { "epoch": 0.05, "learning_rate": 0.0009990214369037474, "loss": 2.8839, "step": 91 }, { "epoch": 0.05, "learning_rate": 0.0009989630480580713, "loss": 2.988, "step": 92 }, { "epoch": 0.05, "learning_rate": 0.0009989029691569037, "loss": 2.8999, "step": 93 }, { "epoch": 0.05, "learning_rate": 0.000998841200403739, "loss": 2.9038, "step": 94 }, { "epoch": 0.05, "learning_rate": 0.0009987777420077974, "loss": 2.9074, "step": 95 }, { "epoch": 0.05, "learning_rate": 0.0009987125941840205, "loss": 2.8319, "step": 96 }, { "epoch": 0.06, "learning_rate": 0.0009986457571530727, "loss": 2.9062, "step": 97 }, { "epoch": 0.06, "learning_rate": 0.0009985772311413403, "loss": 2.8477, "step": 98 }, { "epoch": 0.06, "learning_rate": 0.0009985070163809305, "loss": 2.8691, "step": 99 }, { "epoch": 0.06, "learning_rate": 0.0009984351131096696, "loss": 2.9212, "step": 100 }, { "epoch": 0.06, "learning_rate": 0.0009983615215711041, "loss": 2.9209, "step": 101 }, { "epoch": 0.06, "learning_rate": 0.0009982862420144984, "loss": 2.9573, "step": 102 }, { "epoch": 0.06, "learning_rate": 0.0009982092746948347, "loss": 2.8668, "step": 103 }, { "epoch": 0.06, "learning_rate": 0.0009981306198728116, "loss": 2.8374, "step": 104 }, { "epoch": 0.06, "learning_rate": 0.0009980502778148437, "loss": 2.8702, "step": 105 }, { "epoch": 0.06, "learning_rate": 0.0009979682487930605, "loss": 2.799, "step": 106 }, { "epoch": 0.06, "learning_rate": 0.0009978845330853054, "loss": 2.8704, "step": 107 }, { "epoch": 0.06, "learning_rate": 0.0009977991309751346, "loss": 2.8714, "step": 108 }, { "epoch": 0.06, "learning_rate": 0.000997712042751817, "loss": 2.8934, "step": 109 }, { "epoch": 0.06, "learning_rate": 0.0009976232687103322, "loss": 2.8024, "step": 110 }, { "epoch": 0.06, "learning_rate": 0.0009975328091513696, "loss": 2.8448, "step": 111 }, { "epoch": 0.06, "learning_rate": 0.000997440664381328, "loss": 2.8474, "step": 112 }, { "epoch": 0.06, "learning_rate": 0.0009973468347123148, "loss": 2.8874, "step": 113 }, { "epoch": 0.06, "learning_rate": 0.000997251320462143, "loss": 2.8774, "step": 114 }, { "epoch": 0.07, "learning_rate": 0.000997154121954333, "loss": 2.8746, "step": 115 }, { "epoch": 0.07, "learning_rate": 0.0009970552395181089, "loss": 2.9059, "step": 116 }, { "epoch": 0.07, "learning_rate": 0.000996954673488399, "loss": 2.8659, "step": 117 }, { "epoch": 0.07, "learning_rate": 0.0009968524242058342, "loss": 2.8242, "step": 118 }, { "epoch": 0.07, "learning_rate": 0.0009967484920167466, "loss": 2.7469, "step": 119 }, { "epoch": 0.07, "learning_rate": 0.0009966428772731689, "loss": 2.9072, "step": 120 }, { "epoch": 0.07, "learning_rate": 0.000996535580332832, "loss": 2.8385, "step": 121 }, { "epoch": 0.07, "learning_rate": 0.0009964266015591655, "loss": 2.8843, "step": 122 }, { "epoch": 0.07, "learning_rate": 0.0009963159413212952, "loss": 2.8877, "step": 123 }, { "epoch": 0.07, "learning_rate": 0.0009962035999940425, "loss": 2.8456, "step": 124 }, { "epoch": 0.07, "learning_rate": 0.0009960895779579228, "loss": 2.8527, "step": 125 }, { "epoch": 0.07, "learning_rate": 0.0009959738755991436, "loss": 2.8255, "step": 126 }, { "epoch": 0.07, "learning_rate": 0.000995856493309605, "loss": 2.8471, "step": 127 }, { "epoch": 0.07, "learning_rate": 0.0009957374314868966, "loss": 2.847, "step": 128 }, { "epoch": 0.07, "learning_rate": 0.000995616690534297, "loss": 2.7666, "step": 129 }, { "epoch": 0.07, "learning_rate": 0.000995494270860772, "loss": 2.855, "step": 130 }, { "epoch": 0.07, "learning_rate": 0.0009953701728809736, "loss": 2.871, "step": 131 }, { "epoch": 0.07, "learning_rate": 0.000995244397015239, "loss": 2.7651, "step": 132 }, { "epoch": 0.08, "learning_rate": 0.0009951169436895875, "loss": 2.8782, "step": 133 }, { "epoch": 0.08, "learning_rate": 0.000994987813335721, "loss": 2.7868, "step": 134 }, { "epoch": 0.08, "learning_rate": 0.0009948570063910214, "loss": 2.821, "step": 135 }, { "epoch": 0.08, "learning_rate": 0.00099472452329855, "loss": 2.7883, "step": 136 }, { "epoch": 0.08, "learning_rate": 0.0009945903645070446, "loss": 2.8112, "step": 137 }, { "epoch": 0.08, "learning_rate": 0.0009944545304709192, "loss": 2.8404, "step": 138 }, { "epoch": 0.08, "learning_rate": 0.000994317021650262, "loss": 2.8906, "step": 139 }, { "epoch": 0.08, "learning_rate": 0.0009941778385108347, "loss": 2.8831, "step": 140 }, { "epoch": 0.08, "learning_rate": 0.0009940369815240688, "loss": 2.7353, "step": 141 }, { "epoch": 0.08, "learning_rate": 0.000993894451167066, "loss": 2.8234, "step": 142 }, { "epoch": 0.08, "learning_rate": 0.0009937502479225964, "loss": 2.7929, "step": 143 }, { "epoch": 0.08, "learning_rate": 0.0009936043722790955, "loss": 2.7202, "step": 144 }, { "epoch": 0.08, "learning_rate": 0.0009934568247306642, "loss": 2.8444, "step": 145 }, { "epoch": 0.08, "learning_rate": 0.0009933076057770658, "loss": 2.8307, "step": 146 }, { "epoch": 0.08, "learning_rate": 0.000993156715923725, "loss": 2.8203, "step": 147 }, { "epoch": 0.08, "learning_rate": 0.0009930041556817269, "loss": 2.7556, "step": 148 }, { "epoch": 0.08, "learning_rate": 0.000992849925567813, "loss": 2.8758, "step": 149 }, { "epoch": 0.09, "learning_rate": 0.000992694026104382, "loss": 2.7549, "step": 150 }, { "epoch": 0.09, "learning_rate": 0.0009925364578194861, "loss": 2.8526, "step": 151 }, { "epoch": 0.09, "learning_rate": 0.000992377221246831, "loss": 2.781, "step": 152 }, { "epoch": 0.09, "learning_rate": 0.000992216316925772, "loss": 2.8254, "step": 153 }, { "epoch": 0.09, "learning_rate": 0.0009920537454013144, "loss": 2.7487, "step": 154 }, { "epoch": 0.09, "learning_rate": 0.0009918895072241096, "loss": 2.7887, "step": 155 }, { "epoch": 0.09, "learning_rate": 0.0009917236029504547, "loss": 2.8059, "step": 156 }, { "epoch": 0.09, "learning_rate": 0.00099155603314229, "loss": 2.8325, "step": 157 }, { "epoch": 0.09, "learning_rate": 0.000991386798367197, "loss": 2.8345, "step": 158 }, { "epoch": 0.09, "learning_rate": 0.000991215899198397, "loss": 2.8227, "step": 159 }, { "epoch": 0.09, "learning_rate": 0.0009910433362147488, "loss": 2.8391, "step": 160 }, { "epoch": 0.09, "learning_rate": 0.0009908691100007463, "loss": 2.8131, "step": 161 }, { "epoch": 0.09, "learning_rate": 0.0009906932211465173, "loss": 2.7959, "step": 162 }, { "epoch": 0.09, "learning_rate": 0.0009905156702478214, "loss": 2.8631, "step": 163 }, { "epoch": 0.09, "learning_rate": 0.0009903364579060476, "loss": 2.7267, "step": 164 }, { "epoch": 0.09, "learning_rate": 0.0009901555847282122, "loss": 2.7919, "step": 165 }, { "epoch": 0.09, "learning_rate": 0.0009899730513269573, "loss": 2.711, "step": 166 }, { "epoch": 0.09, "learning_rate": 0.0009897888583205482, "loss": 2.8072, "step": 167 }, { "epoch": 0.1, "learning_rate": 0.0009896030063328717, "loss": 2.7619, "step": 168 }, { "epoch": 0.1, "learning_rate": 0.0009894154959934336, "loss": 2.7956, "step": 169 }, { "epoch": 0.1, "learning_rate": 0.0009892263279373567, "loss": 2.7723, "step": 170 }, { "epoch": 0.1, "learning_rate": 0.0009890355028053792, "loss": 2.7653, "step": 171 }, { "epoch": 0.1, "learning_rate": 0.0009888430212438514, "loss": 2.7335, "step": 172 }, { "epoch": 0.1, "learning_rate": 0.0009886488839047347, "loss": 2.7963, "step": 173 }, { "epoch": 0.1, "learning_rate": 0.0009884530914455983, "loss": 2.8076, "step": 174 }, { "epoch": 0.1, "learning_rate": 0.000988255644529618, "loss": 2.7262, "step": 175 }, { "epoch": 0.1, "learning_rate": 0.0009880565438255733, "loss": 2.8753, "step": 176 }, { "epoch": 0.1, "learning_rate": 0.0009878557900078449, "loss": 2.7464, "step": 177 }, { "epoch": 0.1, "learning_rate": 0.0009876533837564138, "loss": 2.7399, "step": 178 }, { "epoch": 0.1, "learning_rate": 0.0009874493257568569, "loss": 2.7399, "step": 179 }, { "epoch": 0.1, "learning_rate": 0.0009872436167003467, "loss": 2.7471, "step": 180 }, { "epoch": 0.1, "learning_rate": 0.0009870362572836472, "loss": 2.7681, "step": 181 }, { "epoch": 0.1, "learning_rate": 0.0009868272482091134, "loss": 2.8374, "step": 182 }, { "epoch": 0.1, "learning_rate": 0.000986616590184687, "loss": 2.7107, "step": 183 }, { "epoch": 0.1, "learning_rate": 0.0009864042839238954, "loss": 2.8556, "step": 184 }, { "epoch": 0.11, "learning_rate": 0.0009861903301458487, "loss": 2.7352, "step": 185 }, { "epoch": 0.11, "learning_rate": 0.0009859747295752372, "loss": 2.8007, "step": 186 }, { "epoch": 0.11, "learning_rate": 0.0009857574829423297, "loss": 2.7395, "step": 187 }, { "epoch": 0.11, "learning_rate": 0.0009855385909829696, "loss": 2.8226, "step": 188 }, { "epoch": 0.11, "learning_rate": 0.0009853180544385737, "loss": 2.7514, "step": 189 }, { "epoch": 0.11, "learning_rate": 0.000985095874056129, "loss": 2.7679, "step": 190 }, { "epoch": 0.11, "learning_rate": 0.0009848720505881909, "loss": 2.805, "step": 191 }, { "epoch": 0.11, "learning_rate": 0.00098464658479288, "loss": 2.7142, "step": 192 }, { "epoch": 0.11, "learning_rate": 0.0009844194774338792, "loss": 2.7042, "step": 193 }, { "epoch": 0.11, "learning_rate": 0.0009841907292804316, "loss": 2.7734, "step": 194 }, { "epoch": 0.11, "learning_rate": 0.0009839603411073389, "loss": 2.7753, "step": 195 }, { "epoch": 0.11, "learning_rate": 0.0009837283136949566, "loss": 2.7156, "step": 196 }, { "epoch": 0.11, "learning_rate": 0.0009834946478291932, "loss": 2.6806, "step": 197 }, { "epoch": 0.11, "learning_rate": 0.0009832593443015066, "loss": 2.7494, "step": 198 }, { "epoch": 0.11, "learning_rate": 0.000983022403908902, "loss": 2.6956, "step": 199 }, { "epoch": 0.11, "learning_rate": 0.000982783827453928, "loss": 2.7452, "step": 200 }, { "epoch": 0.11, "learning_rate": 0.0009825436157446762, "loss": 2.6981, "step": 201 }, { "epoch": 0.11, "learning_rate": 0.0009823017695947756, "loss": 2.8186, "step": 202 }, { "epoch": 0.12, "learning_rate": 0.0009820582898233921, "loss": 2.6943, "step": 203 }, { "epoch": 0.12, "learning_rate": 0.0009818131772552248, "loss": 2.7198, "step": 204 }, { "epoch": 0.12, "learning_rate": 0.0009815664327205032, "loss": 2.7194, "step": 205 }, { "epoch": 0.12, "learning_rate": 0.0009813180570549841, "loss": 2.834, "step": 206 }, { "epoch": 0.12, "learning_rate": 0.0009810680510999503, "loss": 2.7756, "step": 207 }, { "epoch": 0.12, "learning_rate": 0.0009808164157022053, "loss": 2.7234, "step": 208 }, { "epoch": 0.12, "learning_rate": 0.0009805631517140723, "loss": 2.7346, "step": 209 }, { "epoch": 0.12, "learning_rate": 0.000980308259993391, "loss": 2.6711, "step": 210 }, { "epoch": 0.12, "learning_rate": 0.0009800517414035141, "loss": 2.7579, "step": 211 }, { "epoch": 0.12, "learning_rate": 0.000979793596813305, "loss": 2.7716, "step": 212 }, { "epoch": 0.12, "learning_rate": 0.000979533827097134, "loss": 2.7547, "step": 213 }, { "epoch": 0.12, "learning_rate": 0.0009792724331348767, "loss": 2.7107, "step": 214 }, { "epoch": 0.12, "learning_rate": 0.00097900941581191, "loss": 2.7257, "step": 215 }, { "epoch": 0.12, "learning_rate": 0.0009787447760191091, "loss": 2.7289, "step": 216 }, { "epoch": 0.12, "learning_rate": 0.0009784785146528447, "loss": 2.6947, "step": 217 }, { "epoch": 0.12, "learning_rate": 0.0009782106326149801, "loss": 2.769, "step": 218 }, { "epoch": 0.12, "learning_rate": 0.0009779411308128685, "loss": 2.7123, "step": 219 }, { "epoch": 0.12, "learning_rate": 0.0009776700101593488, "loss": 2.7408, "step": 220 }, { "epoch": 0.13, "learning_rate": 0.0009773972715727433, "loss": 2.7125, "step": 221 }, { "epoch": 0.13, "learning_rate": 0.0009771229159768547, "loss": 2.6776, "step": 222 }, { "epoch": 0.13, "learning_rate": 0.0009768469443009625, "loss": 2.7475, "step": 223 }, { "epoch": 0.13, "learning_rate": 0.0009765693574798203, "loss": 2.7532, "step": 224 }, { "epoch": 0.13, "learning_rate": 0.0009762901564536521, "loss": 2.6687, "step": 225 }, { "epoch": 0.13, "learning_rate": 0.00097600934216815, "loss": 2.783, "step": 226 }, { "epoch": 0.13, "learning_rate": 0.0009757269155744696, "loss": 2.7581, "step": 227 }, { "epoch": 0.13, "learning_rate": 0.0009754428776292286, "loss": 2.7305, "step": 228 }, { "epoch": 0.13, "learning_rate": 0.0009751572292945015, "loss": 2.7487, "step": 229 }, { "epoch": 0.13, "learning_rate": 0.0009748699715378182, "loss": 2.7499, "step": 230 }, { "epoch": 0.13, "learning_rate": 0.0009745811053321596, "loss": 2.6904, "step": 231 }, { "epoch": 0.13, "learning_rate": 0.0009742906316559548, "loss": 2.7082, "step": 232 }, { "epoch": 0.13, "learning_rate": 0.0009739985514930775, "loss": 2.7261, "step": 233 }, { "epoch": 0.13, "learning_rate": 0.0009737048658328426, "loss": 2.6158, "step": 234 }, { "epoch": 0.13, "learning_rate": 0.0009734095756700037, "loss": 2.6389, "step": 235 }, { "epoch": 0.13, "learning_rate": 0.0009731126820047481, "loss": 2.6846, "step": 236 }, { "epoch": 0.13, "learning_rate": 0.0009728141858426952, "loss": 2.7424, "step": 237 }, { "epoch": 0.14, "learning_rate": 0.000972514088194892, "loss": 2.7509, "step": 238 }, { "epoch": 0.14, "learning_rate": 0.0009722123900778096, "loss": 2.722, "step": 239 }, { "epoch": 0.14, "learning_rate": 0.0009719090925133407, "loss": 2.6854, "step": 240 }, { "epoch": 0.14, "learning_rate": 0.0009716041965287953, "loss": 2.7199, "step": 241 }, { "epoch": 0.14, "learning_rate": 0.0009712977031568973, "loss": 2.6969, "step": 242 }, { "epoch": 0.14, "learning_rate": 0.0009709896134357815, "loss": 2.7208, "step": 243 }, { "epoch": 0.14, "learning_rate": 0.0009706799284089891, "loss": 2.647, "step": 244 }, { "epoch": 0.14, "learning_rate": 0.0009703686491254658, "loss": 2.6342, "step": 245 }, { "epoch": 0.14, "learning_rate": 0.0009700557766395565, "loss": 2.7401, "step": 246 }, { "epoch": 0.14, "learning_rate": 0.0009697413120110028, "loss": 2.701, "step": 247 }, { "epoch": 0.14, "learning_rate": 0.000969425256304939, "loss": 2.6749, "step": 248 }, { "epoch": 0.14, "learning_rate": 0.0009691076105918884, "loss": 2.7221, "step": 249 }, { "epoch": 0.14, "learning_rate": 0.0009687883759477605, "loss": 2.7285, "step": 250 }, { "epoch": 0.14, "learning_rate": 0.0009684675534538461, "loss": 2.704, "step": 251 }, { "epoch": 0.14, "learning_rate": 0.0009681451441968143, "loss": 2.7219, "step": 252 }, { "epoch": 0.14, "learning_rate": 0.0009678211492687095, "loss": 2.6817, "step": 253 }, { "epoch": 0.14, "learning_rate": 0.0009674955697669458, "loss": 2.6934, "step": 254 }, { "epoch": 0.14, "learning_rate": 0.0009671684067943056, "loss": 2.6851, "step": 255 }, { "epoch": 0.15, "learning_rate": 0.0009668396614589341, "loss": 2.6716, "step": 256 }, { "epoch": 0.15, "learning_rate": 0.0009665093348743361, "loss": 2.7255, "step": 257 }, { "epoch": 0.15, "learning_rate": 0.0009661774281593729, "loss": 2.659, "step": 258 }, { "epoch": 0.15, "learning_rate": 0.0009658439424382575, "loss": 2.6577, "step": 259 }, { "epoch": 0.15, "learning_rate": 0.0009655088788405508, "loss": 2.7046, "step": 260 }, { "epoch": 0.15, "learning_rate": 0.0009651722385011592, "loss": 2.6396, "step": 261 }, { "epoch": 0.15, "learning_rate": 0.0009648340225603287, "loss": 2.7347, "step": 262 }, { "epoch": 0.15, "learning_rate": 0.000964494232163643, "loss": 2.6952, "step": 263 }, { "epoch": 0.15, "learning_rate": 0.0009641528684620179, "loss": 2.7599, "step": 264 }, { "epoch": 0.15, "learning_rate": 0.0009638099326116987, "loss": 2.7123, "step": 265 }, { "epoch": 0.15, "learning_rate": 0.0009634654257742554, "loss": 2.6239, "step": 266 }, { "epoch": 0.15, "learning_rate": 0.0009631193491165797, "loss": 2.6331, "step": 267 }, { "epoch": 0.15, "learning_rate": 0.0009627717038108799, "loss": 2.6387, "step": 268 }, { "epoch": 0.15, "learning_rate": 0.000962422491034678, "loss": 2.7134, "step": 269 }, { "epoch": 0.15, "learning_rate": 0.0009620717119708047, "loss": 2.6042, "step": 270 }, { "epoch": 0.15, "learning_rate": 0.0009617193678073965, "loss": 2.7081, "step": 271 }, { "epoch": 0.15, "learning_rate": 0.0009613654597378909, "loss": 2.6387, "step": 272 }, { "epoch": 0.16, "learning_rate": 0.0009610099889610223, "loss": 2.6194, "step": 273 }, { "epoch": 0.16, "learning_rate": 0.0009606529566808186, "loss": 2.6575, "step": 274 }, { "epoch": 0.16, "learning_rate": 0.0009602943641065964, "loss": 2.6457, "step": 275 }, { "epoch": 0.16, "learning_rate": 0.0009599342124529575, "loss": 2.5716, "step": 276 }, { "epoch": 0.16, "learning_rate": 0.0009595725029397841, "loss": 2.6692, "step": 277 }, { "epoch": 0.16, "learning_rate": 0.0009592092367922358, "loss": 2.6708, "step": 278 }, { "epoch": 0.16, "learning_rate": 0.0009588444152407441, "loss": 2.686, "step": 279 }, { "epoch": 0.16, "learning_rate": 0.0009584780395210088, "loss": 2.5811, "step": 280 }, { "epoch": 0.16, "learning_rate": 0.0009581101108739944, "loss": 2.5678, "step": 281 }, { "epoch": 0.16, "learning_rate": 0.000957740630545925, "loss": 2.6341, "step": 282 }, { "epoch": 0.16, "learning_rate": 0.0009573695997882808, "loss": 2.6191, "step": 283 }, { "epoch": 0.16, "learning_rate": 0.000956997019857793, "loss": 2.7044, "step": 284 }, { "epoch": 0.16, "learning_rate": 0.0009566228920164405, "loss": 2.6047, "step": 285 }, { "epoch": 0.16, "learning_rate": 0.0009562472175314449, "loss": 2.6376, "step": 286 }, { "epoch": 0.16, "learning_rate": 0.0009558699976752668, "loss": 2.686, "step": 287 }, { "epoch": 0.16, "learning_rate": 0.0009554912337256007, "loss": 2.6306, "step": 288 }, { "epoch": 0.16, "learning_rate": 0.0009551109269653718, "loss": 2.6262, "step": 289 }, { "epoch": 0.16, "learning_rate": 0.0009547290786827303, "loss": 2.6516, "step": 290 }, { "epoch": 0.17, "learning_rate": 0.0009543456901710483, "loss": 2.6135, "step": 291 }, { "epoch": 0.17, "learning_rate": 0.0009539607627289146, "loss": 2.7414, "step": 292 }, { "epoch": 0.17, "learning_rate": 0.0009535742976601309, "loss": 2.6305, "step": 293 }, { "epoch": 0.17, "learning_rate": 0.0009531862962737065, "loss": 2.6181, "step": 294 }, { "epoch": 0.17, "learning_rate": 0.0009527967598838547, "loss": 2.6371, "step": 295 }, { "epoch": 0.17, "learning_rate": 0.0009524056898099881, "loss": 2.6824, "step": 296 }, { "epoch": 0.17, "learning_rate": 0.0009520130873767141, "loss": 2.5707, "step": 297 }, { "epoch": 0.17, "learning_rate": 0.0009516189539138305, "loss": 2.5983, "step": 298 }, { "epoch": 0.17, "learning_rate": 0.0009512232907563206, "loss": 2.6824, "step": 299 }, { "epoch": 0.17, "learning_rate": 0.0009508260992443492, "loss": 2.6618, "step": 300 }, { "epoch": 0.17, "learning_rate": 0.0009504273807232576, "loss": 2.6094, "step": 301 }, { "epoch": 0.17, "learning_rate": 0.0009500271365435599, "loss": 2.5858, "step": 302 }, { "epoch": 0.17, "learning_rate": 0.000949625368060937, "loss": 2.6308, "step": 303 }, { "epoch": 0.17, "learning_rate": 0.0009492220766362336, "loss": 2.5783, "step": 304 }, { "epoch": 0.17, "learning_rate": 0.0009488172636354521, "loss": 2.6703, "step": 305 }, { "epoch": 0.17, "learning_rate": 0.0009484109304297492, "loss": 2.6737, "step": 306 }, { "epoch": 0.17, "learning_rate": 0.0009480030783954306, "loss": 2.5912, "step": 307 }, { "epoch": 0.17, "learning_rate": 0.0009475937089139463, "loss": 2.5694, "step": 308 }, { "epoch": 0.18, "learning_rate": 0.0009471828233718863, "loss": 2.6827, "step": 309 }, { "epoch": 0.18, "learning_rate": 0.0009467704231609755, "loss": 2.6466, "step": 310 }, { "epoch": 0.18, "learning_rate": 0.0009463565096780695, "loss": 2.6016, "step": 311 }, { "epoch": 0.18, "learning_rate": 0.0009459410843251495, "loss": 2.5613, "step": 312 }, { "epoch": 0.18, "learning_rate": 0.0009455241485093171, "loss": 2.5848, "step": 313 }, { "epoch": 0.18, "learning_rate": 0.0009451057036427909, "loss": 2.6192, "step": 314 }, { "epoch": 0.18, "learning_rate": 0.0009446857511428998, "loss": 2.6521, "step": 315 }, { "epoch": 0.18, "learning_rate": 0.0009442642924320806, "loss": 2.6224, "step": 316 }, { "epoch": 0.18, "learning_rate": 0.0009438413289378705, "loss": 2.5611, "step": 317 }, { "epoch": 0.18, "learning_rate": 0.0009434168620929044, "loss": 2.602, "step": 318 }, { "epoch": 0.18, "learning_rate": 0.0009429908933349091, "loss": 2.5021, "step": 319 }, { "epoch": 0.18, "learning_rate": 0.0009425634241066985, "loss": 2.5542, "step": 320 }, { "epoch": 0.18, "learning_rate": 0.0009421344558561687, "loss": 2.5592, "step": 321 }, { "epoch": 0.18, "learning_rate": 0.0009417039900362936, "loss": 2.6289, "step": 322 }, { "epoch": 0.18, "learning_rate": 0.000941272028105119, "loss": 2.5938, "step": 323 }, { "epoch": 0.18, "learning_rate": 0.0009408385715257588, "loss": 2.5195, "step": 324 }, { "epoch": 0.18, "learning_rate": 0.000940403621766389, "loss": 2.6641, "step": 325 }, { "epoch": 0.19, "learning_rate": 0.0009399671803002434, "loss": 2.5733, "step": 326 }, { "epoch": 0.19, "learning_rate": 0.0009395292486056086, "loss": 2.5534, "step": 327 }, { "epoch": 0.19, "learning_rate": 0.0009390898281658184, "loss": 2.6198, "step": 328 }, { "epoch": 0.19, "learning_rate": 0.0009386489204692497, "loss": 2.6807, "step": 329 }, { "epoch": 0.19, "learning_rate": 0.0009382065270093163, "loss": 2.5644, "step": 330 }, { "epoch": 0.19, "learning_rate": 0.0009377626492844649, "loss": 2.6815, "step": 331 }, { "epoch": 0.19, "learning_rate": 0.0009373172887981699, "loss": 2.5921, "step": 332 }, { "epoch": 0.19, "learning_rate": 0.0009368704470589271, "loss": 2.5429, "step": 333 }, { "epoch": 0.19, "learning_rate": 0.0009364221255802504, "loss": 2.5444, "step": 334 }, { "epoch": 0.19, "learning_rate": 0.0009359723258806654, "loss": 2.5487, "step": 335 }, { "epoch": 0.19, "learning_rate": 0.0009355210494837045, "loss": 2.5282, "step": 336 }, { "epoch": 0.19, "learning_rate": 0.0009350682979179024, "loss": 2.6438, "step": 337 }, { "epoch": 0.19, "learning_rate": 0.0009346140727167896, "loss": 2.5692, "step": 338 }, { "epoch": 0.19, "learning_rate": 0.0009341583754188887, "loss": 2.5261, "step": 339 }, { "epoch": 0.19, "learning_rate": 0.000933701207567708, "loss": 2.6031, "step": 340 }, { "epoch": 0.19, "learning_rate": 0.0009332425707117373, "loss": 2.5488, "step": 341 }, { "epoch": 0.19, "learning_rate": 0.0009327824664044417, "loss": 2.6051, "step": 342 }, { "epoch": 0.19, "learning_rate": 0.0009323208962042569, "loss": 2.6468, "step": 343 }, { "epoch": 0.2, "learning_rate": 0.0009318578616745834, "loss": 2.4675, "step": 344 }, { "epoch": 0.2, "learning_rate": 0.0009313933643837825, "loss": 2.6519, "step": 345 }, { "epoch": 0.2, "learning_rate": 0.0009309274059051691, "loss": 2.5038, "step": 346 }, { "epoch": 0.2, "learning_rate": 0.0009304599878170078, "loss": 2.6157, "step": 347 }, { "epoch": 0.2, "learning_rate": 0.0009299911117025071, "loss": 2.6254, "step": 348 }, { "epoch": 0.2, "learning_rate": 0.000929520779149814, "loss": 2.5462, "step": 349 }, { "epoch": 0.2, "learning_rate": 0.0009290489917520087, "loss": 2.5864, "step": 350 }, { "epoch": 0.2, "learning_rate": 0.0009285757511070987, "loss": 2.5915, "step": 351 }, { "epoch": 0.2, "learning_rate": 0.0009281010588180146, "loss": 2.5718, "step": 352 }, { "epoch": 0.2, "learning_rate": 0.0009276249164926034, "loss": 2.6059, "step": 353 }, { "epoch": 0.2, "learning_rate": 0.0009271473257436238, "loss": 2.5459, "step": 354 }, { "epoch": 0.2, "learning_rate": 0.0009266682881887403, "loss": 2.5749, "step": 355 }, { "epoch": 0.2, "learning_rate": 0.0009261878054505181, "loss": 2.5975, "step": 356 }, { "epoch": 0.2, "learning_rate": 0.0009257058791564173, "loss": 2.5799, "step": 357 }, { "epoch": 0.2, "learning_rate": 0.0009252225109387879, "loss": 2.5351, "step": 358 }, { "epoch": 0.2, "learning_rate": 0.0009247377024348631, "loss": 2.6105, "step": 359 }, { "epoch": 0.2, "learning_rate": 0.0009242514552867555, "loss": 2.6401, "step": 360 }, { "epoch": 0.21, "learning_rate": 0.0009237637711414497, "loss": 2.5392, "step": 361 }, { "epoch": 0.21, "learning_rate": 0.0009232746516507984, "loss": 2.589, "step": 362 }, { "epoch": 0.21, "learning_rate": 0.0009227840984715153, "loss": 2.5614, "step": 363 }, { "epoch": 0.21, "learning_rate": 0.0009222921132651708, "loss": 2.5614, "step": 364 }, { "epoch": 0.21, "learning_rate": 0.0009217986976981854, "loss": 2.5427, "step": 365 }, { "epoch": 0.21, "learning_rate": 0.0009213038534418243, "loss": 2.5695, "step": 366 }, { "epoch": 0.21, "learning_rate": 0.0009208075821721926, "loss": 2.5442, "step": 367 }, { "epoch": 0.21, "learning_rate": 0.0009203098855702277, "loss": 2.6164, "step": 368 }, { "epoch": 0.21, "learning_rate": 0.0009198107653216961, "loss": 2.5373, "step": 369 }, { "epoch": 0.21, "learning_rate": 0.0009193102231171852, "loss": 2.5858, "step": 370 }, { "epoch": 0.21, "learning_rate": 0.0009188082606520997, "loss": 2.5471, "step": 371 }, { "epoch": 0.21, "learning_rate": 0.0009183048796266546, "loss": 2.5069, "step": 372 }, { "epoch": 0.21, "learning_rate": 0.0009178000817458694, "loss": 2.5816, "step": 373 }, { "epoch": 0.21, "learning_rate": 0.0009172938687195629, "loss": 2.4701, "step": 374 }, { "epoch": 0.21, "learning_rate": 0.0009167862422623474, "loss": 2.6111, "step": 375 }, { "epoch": 0.21, "learning_rate": 0.0009162772040936227, "loss": 2.6355, "step": 376 }, { "epoch": 0.21, "learning_rate": 0.0009157667559375697, "loss": 2.5845, "step": 377 }, { "epoch": 0.21, "learning_rate": 0.000915254899523146, "loss": 2.6657, "step": 378 }, { "epoch": 0.22, "learning_rate": 0.0009147416365840783, "loss": 2.5875, "step": 379 }, { "epoch": 0.22, "learning_rate": 0.0009142269688588578, "loss": 2.5423, "step": 380 }, { "epoch": 0.22, "learning_rate": 0.000913710898090734, "loss": 2.5814, "step": 381 }, { "epoch": 0.22, "learning_rate": 0.0009131934260277084, "loss": 2.5892, "step": 382 }, { "epoch": 0.22, "learning_rate": 0.0009126745544225292, "loss": 2.6292, "step": 383 }, { "epoch": 0.22, "learning_rate": 0.0009121542850326848, "loss": 2.5795, "step": 384 }, { "epoch": 0.22, "learning_rate": 0.0009116326196203981, "loss": 2.5144, "step": 385 }, { "epoch": 0.22, "learning_rate": 0.0009111095599526206, "loss": 2.5801, "step": 386 }, { "epoch": 0.22, "learning_rate": 0.0009105851078010266, "loss": 2.5736, "step": 387 }, { "epoch": 0.22, "learning_rate": 0.0009100592649420061, "loss": 2.56, "step": 388 }, { "epoch": 0.22, "learning_rate": 0.0009095320331566606, "loss": 2.5927, "step": 389 }, { "epoch": 0.22, "learning_rate": 0.0009090034142307954, "loss": 2.5959, "step": 390 }, { "epoch": 0.22, "learning_rate": 0.0009084734099549146, "loss": 2.5392, "step": 391 }, { "epoch": 0.22, "learning_rate": 0.0009079420221242145, "loss": 2.6631, "step": 392 }, { "epoch": 0.22, "learning_rate": 0.0009074092525385777, "loss": 2.5413, "step": 393 }, { "epoch": 0.22, "learning_rate": 0.000906875103002567, "loss": 2.5866, "step": 394 }, { "epoch": 0.22, "learning_rate": 0.0009063395753254193, "loss": 2.5817, "step": 395 }, { "epoch": 0.22, "learning_rate": 0.0009058026713210397, "loss": 2.6183, "step": 396 }, { "epoch": 0.23, "learning_rate": 0.0009052643928079945, "loss": 2.5762, "step": 397 }, { "epoch": 0.23, "learning_rate": 0.0009047247416095059, "loss": 2.5657, "step": 398 }, { "epoch": 0.23, "learning_rate": 0.0009041837195534462, "loss": 2.4247, "step": 399 }, { "epoch": 0.23, "learning_rate": 0.0009036413284723301, "loss": 2.6515, "step": 400 }, { "epoch": 0.23, "learning_rate": 0.0009030975702033098, "loss": 2.5671, "step": 401 }, { "epoch": 0.23, "learning_rate": 0.0009025524465881683, "loss": 2.5696, "step": 402 }, { "epoch": 0.23, "learning_rate": 0.000902005959473313, "loss": 2.5915, "step": 403 }, { "epoch": 0.23, "learning_rate": 0.0009014581107097702, "loss": 2.5173, "step": 404 }, { "epoch": 0.23, "learning_rate": 0.0009009089021531776, "loss": 2.5242, "step": 405 }, { "epoch": 0.23, "learning_rate": 0.0009003583356637793, "loss": 2.5752, "step": 406 }, { "epoch": 0.23, "learning_rate": 0.0008998064131064185, "loss": 2.4775, "step": 407 }, { "epoch": 0.23, "learning_rate": 0.0008992531363505318, "loss": 2.5739, "step": 408 }, { "epoch": 0.23, "learning_rate": 0.0008986985072701425, "loss": 2.4938, "step": 409 }, { "epoch": 0.23, "learning_rate": 0.0008981425277438546, "loss": 2.4801, "step": 410 }, { "epoch": 0.23, "learning_rate": 0.0008975851996548459, "loss": 2.4974, "step": 411 }, { "epoch": 0.23, "learning_rate": 0.0008970265248908626, "loss": 2.5897, "step": 412 }, { "epoch": 0.23, "learning_rate": 0.0008964665053442116, "loss": 2.5122, "step": 413 }, { "epoch": 0.24, "learning_rate": 0.000895905142911755, "loss": 2.5803, "step": 414 }, { "epoch": 0.24, "learning_rate": 0.0008953424394949035, "loss": 2.6401, "step": 415 }, { "epoch": 0.24, "learning_rate": 0.00089477839699961, "loss": 2.561, "step": 416 }, { "epoch": 0.24, "learning_rate": 0.0008942130173363627, "loss": 2.6039, "step": 417 }, { "epoch": 0.24, "learning_rate": 0.0008936463024201794, "loss": 2.5148, "step": 418 }, { "epoch": 0.24, "learning_rate": 0.0008930782541706002, "loss": 2.5421, "step": 419 }, { "epoch": 0.24, "learning_rate": 0.0008925088745116816, "loss": 2.4854, "step": 420 }, { "epoch": 0.24, "learning_rate": 0.0008919381653719895, "loss": 2.5535, "step": 421 }, { "epoch": 0.24, "learning_rate": 0.0008913661286845936, "loss": 2.576, "step": 422 }, { "epoch": 0.24, "learning_rate": 0.0008907927663870592, "loss": 2.6122, "step": 423 }, { "epoch": 0.24, "learning_rate": 0.0008902180804214423, "loss": 2.6166, "step": 424 }, { "epoch": 0.24, "learning_rate": 0.0008896420727342823, "loss": 2.5119, "step": 425 }, { "epoch": 0.24, "learning_rate": 0.0008890647452765953, "loss": 2.5047, "step": 426 }, { "epoch": 0.24, "learning_rate": 0.0008884861000038676, "loss": 2.5731, "step": 427 }, { "epoch": 0.24, "learning_rate": 0.0008879061388760492, "loss": 2.576, "step": 428 }, { "epoch": 0.24, "learning_rate": 0.0008873248638575471, "loss": 2.5827, "step": 429 }, { "epoch": 0.24, "learning_rate": 0.0008867422769172187, "loss": 2.5291, "step": 430 }, { "epoch": 0.24, "learning_rate": 0.0008861583800283646, "loss": 2.4942, "step": 431 }, { "epoch": 0.25, "learning_rate": 0.0008855731751687232, "loss": 2.5566, "step": 432 }, { "epoch": 0.25, "learning_rate": 0.0008849866643204625, "loss": 2.5337, "step": 433 }, { "epoch": 0.25, "learning_rate": 0.000884398849470174, "loss": 2.5165, "step": 434 }, { "epoch": 0.25, "learning_rate": 0.0008838097326088666, "loss": 2.6361, "step": 435 }, { "epoch": 0.25, "learning_rate": 0.000883219315731959, "loss": 2.5504, "step": 436 }, { "epoch": 0.25, "learning_rate": 0.0008826276008392731, "loss": 2.5573, "step": 437 }, { "epoch": 0.25, "learning_rate": 0.0008820345899350273, "loss": 2.562, "step": 438 }, { "epoch": 0.25, "learning_rate": 0.0008814402850278303, "loss": 2.4775, "step": 439 }, { "epoch": 0.25, "learning_rate": 0.0008808446881306731, "loss": 2.5607, "step": 440 }, { "epoch": 0.25, "learning_rate": 0.0008802478012609234, "loss": 2.5578, "step": 441 }, { "epoch": 0.25, "learning_rate": 0.0008796496264403175, "loss": 2.5214, "step": 442 }, { "epoch": 0.25, "learning_rate": 0.0008790501656949552, "loss": 2.519, "step": 443 }, { "epoch": 0.25, "learning_rate": 0.0008784494210552909, "loss": 2.5888, "step": 444 }, { "epoch": 0.25, "learning_rate": 0.0008778473945561283, "loss": 2.487, "step": 445 }, { "epoch": 0.25, "learning_rate": 0.0008772440882366126, "loss": 2.5737, "step": 446 }, { "epoch": 0.25, "learning_rate": 0.0008766395041402244, "loss": 2.5556, "step": 447 }, { "epoch": 0.25, "learning_rate": 0.0008760336443147718, "loss": 2.5803, "step": 448 }, { "epoch": 0.26, "learning_rate": 0.0008754265108123843, "loss": 2.5664, "step": 449 }, { "epoch": 0.26, "learning_rate": 0.0008748181056895051, "loss": 2.5422, "step": 450 }, { "epoch": 0.26, "learning_rate": 0.0008742084310068851, "loss": 2.5137, "step": 451 }, { "epoch": 0.26, "learning_rate": 0.0008735974888295753, "loss": 2.4922, "step": 452 }, { "epoch": 0.26, "learning_rate": 0.0008729852812269191, "loss": 2.5818, "step": 453 }, { "epoch": 0.26, "learning_rate": 0.0008723718102725471, "loss": 2.5574, "step": 454 }, { "epoch": 0.26, "learning_rate": 0.0008717570780443685, "loss": 2.582, "step": 455 }, { "epoch": 0.26, "learning_rate": 0.0008711410866245647, "loss": 2.4976, "step": 456 }, { "epoch": 0.26, "learning_rate": 0.0008705238380995821, "loss": 2.5415, "step": 457 }, { "epoch": 0.26, "learning_rate": 0.0008699053345601252, "loss": 2.548, "step": 458 }, { "epoch": 0.26, "learning_rate": 0.0008692855781011493, "loss": 2.4905, "step": 459 }, { "epoch": 0.26, "learning_rate": 0.0008686645708218535, "loss": 2.5447, "step": 460 }, { "epoch": 0.26, "learning_rate": 0.0008680423148256737, "loss": 2.5469, "step": 461 }, { "epoch": 0.26, "learning_rate": 0.0008674188122202755, "loss": 2.5667, "step": 462 }, { "epoch": 0.26, "learning_rate": 0.0008667940651175465, "loss": 2.5645, "step": 463 }, { "epoch": 0.26, "learning_rate": 0.00086616807563359, "loss": 2.4549, "step": 464 }, { "epoch": 0.26, "learning_rate": 0.0008655408458887171, "loss": 2.4675, "step": 465 }, { "epoch": 0.26, "learning_rate": 0.0008649123780074401, "loss": 2.4899, "step": 466 }, { "epoch": 0.27, "learning_rate": 0.000864282674118465, "loss": 2.4976, "step": 467 }, { "epoch": 0.27, "learning_rate": 0.0008636517363546838, "loss": 2.5422, "step": 468 }, { "epoch": 0.27, "learning_rate": 0.0008630195668531689, "loss": 2.5581, "step": 469 }, { "epoch": 0.27, "learning_rate": 0.0008623861677551637, "loss": 2.5215, "step": 470 }, { "epoch": 0.27, "learning_rate": 0.000861751541206077, "loss": 2.583, "step": 471 }, { "epoch": 0.27, "learning_rate": 0.0008611156893554748, "loss": 2.5575, "step": 472 }, { "epoch": 0.27, "learning_rate": 0.0008604786143570735, "loss": 2.4754, "step": 473 }, { "epoch": 0.27, "learning_rate": 0.0008598403183687328, "loss": 2.5841, "step": 474 }, { "epoch": 0.27, "learning_rate": 0.0008592008035524474, "loss": 2.5482, "step": 475 }, { "epoch": 0.27, "learning_rate": 0.0008585600720743409, "loss": 2.4925, "step": 476 }, { "epoch": 0.27, "learning_rate": 0.0008579181261046576, "loss": 2.5339, "step": 477 }, { "epoch": 0.27, "learning_rate": 0.0008572749678177555, "loss": 2.5182, "step": 478 }, { "epoch": 0.27, "learning_rate": 0.000856630599392099, "loss": 2.4739, "step": 479 }, { "epoch": 0.27, "learning_rate": 0.0008559850230102512, "loss": 2.5473, "step": 480 }, { "epoch": 0.27, "learning_rate": 0.000855338240858867, "loss": 2.5086, "step": 481 }, { "epoch": 0.27, "learning_rate": 0.0008546902551286853, "loss": 2.5512, "step": 482 }, { "epoch": 0.27, "learning_rate": 0.0008540410680145211, "loss": 2.4758, "step": 483 }, { "epoch": 0.27, "learning_rate": 0.0008533906817152599, "loss": 2.5077, "step": 484 }, { "epoch": 0.28, "learning_rate": 0.0008527390984338477, "loss": 2.5646, "step": 485 }, { "epoch": 0.28, "learning_rate": 0.0008520863203772857, "loss": 2.555, "step": 486 }, { "epoch": 0.28, "learning_rate": 0.0008514323497566216, "loss": 2.4723, "step": 487 }, { "epoch": 0.28, "learning_rate": 0.0008507771887869426, "loss": 2.5635, "step": 488 }, { "epoch": 0.28, "learning_rate": 0.0008501208396873676, "loss": 2.503, "step": 489 }, { "epoch": 0.28, "learning_rate": 0.0008494633046810402, "loss": 2.4948, "step": 490 }, { "epoch": 0.28, "learning_rate": 0.0008488045859951206, "loss": 2.5435, "step": 491 }, { "epoch": 0.28, "learning_rate": 0.000848144685860778, "loss": 2.5013, "step": 492 }, { "epoch": 0.28, "learning_rate": 0.0008474836065131838, "loss": 2.5691, "step": 493 }, { "epoch": 0.28, "learning_rate": 0.0008468213501915034, "loss": 2.5194, "step": 494 }, { "epoch": 0.28, "learning_rate": 0.0008461579191388889, "loss": 2.5856, "step": 495 }, { "epoch": 0.28, "learning_rate": 0.0008454933156024709, "loss": 2.6139, "step": 496 }, { "epoch": 0.28, "learning_rate": 0.0008448275418333518, "loss": 2.4525, "step": 497 }, { "epoch": 0.28, "learning_rate": 0.0008441606000865978, "loss": 2.5658, "step": 498 }, { "epoch": 0.28, "learning_rate": 0.0008434924926212307, "loss": 2.4961, "step": 499 }, { "epoch": 0.28, "learning_rate": 0.0008428232217002214, "loss": 2.546, "step": 500 }, { "epoch": 0.28, "learning_rate": 0.0008421527895904811, "loss": 2.496, "step": 501 }, { "epoch": 0.29, "learning_rate": 0.0008414811985628544, "loss": 2.4998, "step": 502 }, { "epoch": 0.29, "learning_rate": 0.0008408084508921106, "loss": 2.4938, "step": 503 }, { "epoch": 0.29, "learning_rate": 0.000840134548856938, "loss": 2.5078, "step": 504 }, { "epoch": 0.29, "learning_rate": 0.0008394594947399337, "loss": 2.5869, "step": 505 }, { "epoch": 0.29, "learning_rate": 0.0008387832908275977, "loss": 2.5255, "step": 506 }, { "epoch": 0.29, "learning_rate": 0.0008381059394103243, "loss": 2.5458, "step": 507 }, { "epoch": 0.29, "learning_rate": 0.0008374274427823946, "loss": 2.5222, "step": 508 }, { "epoch": 0.29, "learning_rate": 0.0008367478032419686, "loss": 2.5068, "step": 509 }, { "epoch": 0.29, "learning_rate": 0.0008360670230910777, "loss": 2.511, "step": 510 }, { "epoch": 0.29, "learning_rate": 0.0008353851046356163, "loss": 2.4518, "step": 511 }, { "epoch": 0.29, "learning_rate": 0.0008347020501853348, "loss": 2.404, "step": 512 }, { "epoch": 0.29, "learning_rate": 0.0008340178620538315, "loss": 2.5601, "step": 513 }, { "epoch": 0.29, "learning_rate": 0.0008333325425585439, "loss": 2.4297, "step": 514 }, { "epoch": 0.29, "learning_rate": 0.0008326460940207423, "loss": 2.5419, "step": 515 }, { "epoch": 0.29, "learning_rate": 0.000831958518765521, "loss": 2.5175, "step": 516 }, { "epoch": 0.29, "learning_rate": 0.0008312698191217906, "loss": 2.5704, "step": 517 }, { "epoch": 0.29, "learning_rate": 0.0008305799974222701, "loss": 2.5524, "step": 518 }, { "epoch": 0.29, "learning_rate": 0.0008298890560034791, "loss": 2.5907, "step": 519 }, { "epoch": 0.3, "learning_rate": 0.0008291969972057298, "loss": 2.5519, "step": 520 }, { "epoch": 0.3, "learning_rate": 0.0008285038233731193, "loss": 2.3889, "step": 521 }, { "epoch": 0.3, "learning_rate": 0.0008278095368535215, "loss": 2.4414, "step": 522 }, { "epoch": 0.3, "learning_rate": 0.0008271141399985787, "loss": 2.4874, "step": 523 }, { "epoch": 0.3, "learning_rate": 0.0008264176351636943, "loss": 2.4667, "step": 524 }, { "epoch": 0.3, "learning_rate": 0.0008257200247080247, "loss": 2.4855, "step": 525 }, { "epoch": 0.3, "learning_rate": 0.0008250213109944712, "loss": 2.4921, "step": 526 }, { "epoch": 0.3, "learning_rate": 0.0008243214963896718, "loss": 2.4261, "step": 527 }, { "epoch": 0.3, "learning_rate": 0.0008236205832639935, "loss": 2.4202, "step": 528 }, { "epoch": 0.3, "learning_rate": 0.000822918573991524, "loss": 2.5184, "step": 529 }, { "epoch": 0.3, "learning_rate": 0.0008222154709500637, "loss": 2.5094, "step": 530 }, { "epoch": 0.3, "learning_rate": 0.0008215112765211184, "loss": 2.4613, "step": 531 }, { "epoch": 0.3, "learning_rate": 0.0008208059930898898, "loss": 2.4444, "step": 532 }, { "epoch": 0.3, "learning_rate": 0.0008200996230452684, "loss": 2.5802, "step": 533 }, { "epoch": 0.3, "learning_rate": 0.0008193921687798256, "loss": 2.5125, "step": 534 }, { "epoch": 0.3, "learning_rate": 0.0008186836326898047, "loss": 2.4846, "step": 535 }, { "epoch": 0.3, "learning_rate": 0.0008179740171751135, "loss": 2.4259, "step": 536 }, { "epoch": 0.31, "learning_rate": 0.000817263324639316, "loss": 2.4611, "step": 537 }, { "epoch": 0.31, "learning_rate": 0.0008165515574896243, "loss": 2.5396, "step": 538 }, { "epoch": 0.31, "learning_rate": 0.0008158387181368901, "loss": 2.5483, "step": 539 }, { "epoch": 0.31, "learning_rate": 0.0008151248089955969, "loss": 2.4982, "step": 540 }, { "epoch": 0.31, "learning_rate": 0.0008144098324838518, "loss": 2.4895, "step": 541 }, { "epoch": 0.31, "learning_rate": 0.0008136937910233772, "loss": 2.4413, "step": 542 }, { "epoch": 0.31, "learning_rate": 0.0008129766870395026, "loss": 2.5752, "step": 543 }, { "epoch": 0.31, "learning_rate": 0.0008122585229611565, "loss": 2.4546, "step": 544 }, { "epoch": 0.31, "learning_rate": 0.0008115393012208578, "loss": 2.475, "step": 545 }, { "epoch": 0.31, "learning_rate": 0.0008108190242547082, "loss": 2.4423, "step": 546 }, { "epoch": 0.31, "learning_rate": 0.0008100976945023834, "loss": 2.5039, "step": 547 }, { "epoch": 0.31, "learning_rate": 0.000809375314407125, "loss": 2.4312, "step": 548 }, { "epoch": 0.31, "learning_rate": 0.0008086518864157324, "loss": 2.4996, "step": 549 }, { "epoch": 0.31, "learning_rate": 0.0008079274129785543, "loss": 2.5001, "step": 550 }, { "epoch": 0.31, "learning_rate": 0.0008072018965494804, "loss": 2.5265, "step": 551 }, { "epoch": 0.31, "learning_rate": 0.0008064753395859332, "loss": 2.4214, "step": 552 }, { "epoch": 0.31, "learning_rate": 0.0008057477445488597, "loss": 2.4731, "step": 553 }, { "epoch": 0.31, "learning_rate": 0.000805019113902723, "loss": 2.4975, "step": 554 }, { "epoch": 0.32, "learning_rate": 0.0008042894501154937, "loss": 2.4966, "step": 555 }, { "epoch": 0.32, "learning_rate": 0.0008035587556586421, "loss": 2.4733, "step": 556 }, { "epoch": 0.32, "learning_rate": 0.0008028270330071292, "loss": 2.5611, "step": 557 }, { "epoch": 0.32, "learning_rate": 0.0008020942846393992, "loss": 2.4482, "step": 558 }, { "epoch": 0.32, "learning_rate": 0.0008013605130373697, "loss": 2.4835, "step": 559 }, { "epoch": 0.32, "learning_rate": 0.0008006257206864252, "loss": 2.5224, "step": 560 }, { "epoch": 0.32, "learning_rate": 0.0007998899100754064, "loss": 2.5073, "step": 561 }, { "epoch": 0.32, "learning_rate": 0.0007991530836966039, "loss": 2.3737, "step": 562 }, { "epoch": 0.32, "learning_rate": 0.0007984152440457485, "loss": 2.5418, "step": 563 }, { "epoch": 0.32, "learning_rate": 0.000797676393622003, "loss": 2.4813, "step": 564 }, { "epoch": 0.32, "learning_rate": 0.0007969365349279544, "loss": 2.515, "step": 565 }, { "epoch": 0.32, "learning_rate": 0.0007961956704696039, "loss": 2.5198, "step": 566 }, { "epoch": 0.32, "learning_rate": 0.0007954538027563601, "loss": 2.5001, "step": 567 }, { "epoch": 0.32, "learning_rate": 0.0007947109343010295, "loss": 2.4378, "step": 568 }, { "epoch": 0.32, "learning_rate": 0.0007939670676198083, "loss": 2.4959, "step": 569 }, { "epoch": 0.32, "learning_rate": 0.0007932222052322737, "loss": 2.493, "step": 570 }, { "epoch": 0.32, "learning_rate": 0.0007924763496613756, "loss": 2.4409, "step": 571 }, { "epoch": 0.32, "learning_rate": 0.000791729503433428, "loss": 2.5195, "step": 572 }, { "epoch": 0.33, "learning_rate": 0.0007909816690781004, "loss": 2.5452, "step": 573 }, { "epoch": 0.33, "learning_rate": 0.000790232849128409, "loss": 2.49, "step": 574 }, { "epoch": 0.33, "learning_rate": 0.0007894830461207085, "loss": 2.5596, "step": 575 }, { "epoch": 0.33, "learning_rate": 0.0007887322625946835, "loss": 2.503, "step": 576 }, { "epoch": 0.33, "learning_rate": 0.0007879805010933396, "loss": 2.5275, "step": 577 }, { "epoch": 0.33, "learning_rate": 0.0007872277641629948, "loss": 2.4316, "step": 578 }, { "epoch": 0.33, "learning_rate": 0.0007864740543532711, "loss": 2.4647, "step": 579 }, { "epoch": 0.33, "learning_rate": 0.0007857193742170859, "loss": 2.4628, "step": 580 }, { "epoch": 0.33, "learning_rate": 0.0007849637263106431, "loss": 2.4359, "step": 581 }, { "epoch": 0.33, "learning_rate": 0.0007842071131934246, "loss": 2.3986, "step": 582 }, { "epoch": 0.33, "learning_rate": 0.0007834495374281816, "loss": 2.459, "step": 583 }, { "epoch": 0.33, "learning_rate": 0.0007826910015809261, "loss": 2.4605, "step": 584 }, { "epoch": 0.33, "learning_rate": 0.0007819315082209217, "loss": 2.4607, "step": 585 }, { "epoch": 0.33, "learning_rate": 0.0007811710599206752, "loss": 2.4803, "step": 586 }, { "epoch": 0.33, "learning_rate": 0.0007804096592559284, "loss": 2.5248, "step": 587 }, { "epoch": 0.33, "learning_rate": 0.0007796473088056487, "loss": 2.4919, "step": 588 }, { "epoch": 0.33, "learning_rate": 0.0007788840111520197, "loss": 2.5144, "step": 589 }, { "epoch": 0.34, "learning_rate": 0.0007781197688804349, "loss": 2.513, "step": 590 }, { "epoch": 0.34, "learning_rate": 0.000777354584579486, "loss": 2.4596, "step": 591 }, { "epoch": 0.34, "learning_rate": 0.0007765884608409561, "loss": 2.4682, "step": 592 }, { "epoch": 0.34, "learning_rate": 0.0007758214002598103, "loss": 2.3991, "step": 593 }, { "epoch": 0.34, "learning_rate": 0.0007750534054341866, "loss": 2.4929, "step": 594 }, { "epoch": 0.34, "learning_rate": 0.0007742844789653875, "loss": 2.4513, "step": 595 }, { "epoch": 0.34, "learning_rate": 0.0007735146234578716, "loss": 2.49, "step": 596 }, { "epoch": 0.34, "learning_rate": 0.0007727438415192434, "loss": 2.4734, "step": 597 }, { "epoch": 0.34, "learning_rate": 0.0007719721357602461, "loss": 2.4681, "step": 598 }, { "epoch": 0.34, "learning_rate": 0.0007711995087947517, "loss": 2.4802, "step": 599 }, { "epoch": 0.34, "learning_rate": 0.0007704259632397525, "loss": 2.5405, "step": 600 }, { "epoch": 0.34, "learning_rate": 0.0007696515017153522, "loss": 2.5218, "step": 601 }, { "epoch": 0.34, "learning_rate": 0.0007688761268447569, "loss": 2.52, "step": 602 }, { "epoch": 0.34, "learning_rate": 0.0007680998412542664, "loss": 2.5314, "step": 603 }, { "epoch": 0.34, "learning_rate": 0.0007673226475732652, "loss": 2.3808, "step": 604 }, { "epoch": 0.34, "learning_rate": 0.0007665445484342137, "loss": 2.4448, "step": 605 }, { "epoch": 0.34, "learning_rate": 0.0007657655464726394, "loss": 2.4814, "step": 606 }, { "epoch": 0.34, "learning_rate": 0.0007649856443271273, "loss": 2.4774, "step": 607 }, { "epoch": 0.35, "learning_rate": 0.0007642048446393117, "loss": 2.4356, "step": 608 }, { "epoch": 0.35, "learning_rate": 0.0007634231500538671, "loss": 2.4701, "step": 609 }, { "epoch": 0.35, "learning_rate": 0.0007626405632184992, "loss": 2.4459, "step": 610 }, { "epoch": 0.35, "learning_rate": 0.0007618570867839354, "loss": 2.5487, "step": 611 }, { "epoch": 0.35, "learning_rate": 0.0007610727234039167, "loss": 2.4338, "step": 612 }, { "epoch": 0.35, "learning_rate": 0.0007602874757351883, "loss": 2.4479, "step": 613 }, { "epoch": 0.35, "learning_rate": 0.0007595013464374904, "loss": 2.4928, "step": 614 }, { "epoch": 0.35, "learning_rate": 0.0007587143381735498, "loss": 2.423, "step": 615 }, { "epoch": 0.35, "learning_rate": 0.0007579264536090697, "loss": 2.4827, "step": 616 }, { "epoch": 0.35, "learning_rate": 0.0007571376954127223, "loss": 2.5261, "step": 617 }, { "epoch": 0.35, "learning_rate": 0.0007563480662561386, "loss": 2.47, "step": 618 }, { "epoch": 0.35, "learning_rate": 0.0007555575688138993, "loss": 2.4221, "step": 619 }, { "epoch": 0.35, "learning_rate": 0.0007547662057635266, "loss": 2.4059, "step": 620 }, { "epoch": 0.35, "learning_rate": 0.0007539739797854746, "loss": 2.381, "step": 621 }, { "epoch": 0.35, "learning_rate": 0.0007531808935631197, "loss": 2.5162, "step": 622 }, { "epoch": 0.35, "learning_rate": 0.0007523869497827527, "loss": 2.4406, "step": 623 }, { "epoch": 0.35, "learning_rate": 0.0007515921511335688, "loss": 2.4417, "step": 624 }, { "epoch": 0.36, "learning_rate": 0.0007507965003076589, "loss": 2.4449, "step": 625 }, { "epoch": 0.36, "learning_rate": 0.00075, "loss": 2.4725, "step": 626 }, { "epoch": 0.36, "learning_rate": 0.0007492026529084468, "loss": 2.4522, "step": 627 }, { "epoch": 0.36, "learning_rate": 0.000748404461733722, "loss": 2.5874, "step": 628 }, { "epoch": 0.36, "learning_rate": 0.000747605429179407, "loss": 2.5147, "step": 629 }, { "epoch": 0.36, "learning_rate": 0.0007468055579519338, "loss": 2.4721, "step": 630 }, { "epoch": 0.36, "learning_rate": 0.0007460048507605746, "loss": 2.4702, "step": 631 }, { "epoch": 0.36, "learning_rate": 0.0007452033103174332, "loss": 2.3927, "step": 632 }, { "epoch": 0.36, "learning_rate": 0.0007444009393374356, "loss": 2.4879, "step": 633 }, { "epoch": 0.36, "learning_rate": 0.0007435977405383212, "loss": 2.4123, "step": 634 }, { "epoch": 0.36, "learning_rate": 0.0007427937166406332, "loss": 2.4447, "step": 635 }, { "epoch": 0.36, "learning_rate": 0.0007419888703677097, "loss": 2.4302, "step": 636 }, { "epoch": 0.36, "learning_rate": 0.0007411832044456739, "loss": 2.4721, "step": 637 }, { "epoch": 0.36, "learning_rate": 0.0007403767216034257, "loss": 2.4133, "step": 638 }, { "epoch": 0.36, "learning_rate": 0.0007395694245726318, "loss": 2.3523, "step": 639 }, { "epoch": 0.36, "learning_rate": 0.0007387613160877165, "loss": 2.4691, "step": 640 }, { "epoch": 0.36, "learning_rate": 0.0007379523988858532, "loss": 2.447, "step": 641 }, { "epoch": 0.36, "learning_rate": 0.0007371426757069537, "loss": 2.5446, "step": 642 }, { "epoch": 0.37, "learning_rate": 0.0007363321492936604, "loss": 2.4753, "step": 643 }, { "epoch": 0.37, "learning_rate": 0.0007355208223913362, "loss": 2.4594, "step": 644 }, { "epoch": 0.37, "learning_rate": 0.0007347086977480551, "loss": 2.4352, "step": 645 }, { "epoch": 0.37, "learning_rate": 0.0007338957781145936, "loss": 2.442, "step": 646 }, { "epoch": 0.37, "learning_rate": 0.0007330820662444207, "loss": 2.3776, "step": 647 }, { "epoch": 0.37, "learning_rate": 0.0007322675648936887, "loss": 2.3646, "step": 648 }, { "epoch": 0.37, "learning_rate": 0.0007314522768212241, "loss": 2.4233, "step": 649 }, { "epoch": 0.37, "learning_rate": 0.0007306362047885182, "loss": 2.4675, "step": 650 }, { "epoch": 0.37, "learning_rate": 0.0007298193515597177, "loss": 2.5046, "step": 651 }, { "epoch": 0.37, "learning_rate": 0.0007290017199016152, "loss": 2.548, "step": 652 }, { "epoch": 0.37, "learning_rate": 0.0007281833125836397, "loss": 2.4224, "step": 653 }, { "epoch": 0.37, "learning_rate": 0.0007273641323778481, "loss": 2.4324, "step": 654 }, { "epoch": 0.37, "learning_rate": 0.0007265441820589145, "loss": 2.4377, "step": 655 }, { "epoch": 0.37, "learning_rate": 0.0007257234644041222, "loss": 2.4007, "step": 656 }, { "epoch": 0.37, "learning_rate": 0.0007249019821933529, "loss": 2.4604, "step": 657 }, { "epoch": 0.37, "learning_rate": 0.0007240797382090783, "loss": 2.483, "step": 658 }, { "epoch": 0.37, "learning_rate": 0.0007232567352363501, "loss": 2.4878, "step": 659 }, { "epoch": 0.37, "learning_rate": 0.0007224329760627911, "loss": 2.5135, "step": 660 }, { "epoch": 0.38, "learning_rate": 0.0007216084634785854, "loss": 2.495, "step": 661 }, { "epoch": 0.38, "learning_rate": 0.0007207832002764687, "loss": 2.4743, "step": 662 }, { "epoch": 0.38, "learning_rate": 0.0007199571892517193, "loss": 2.4359, "step": 663 }, { "epoch": 0.38, "learning_rate": 0.0007191304332021489, "loss": 2.4334, "step": 664 }, { "epoch": 0.38, "learning_rate": 0.0007183029349280921, "loss": 2.4952, "step": 665 }, { "epoch": 0.38, "learning_rate": 0.0007174746972323975, "loss": 2.439, "step": 666 }, { "epoch": 0.38, "learning_rate": 0.0007166457229204189, "loss": 2.5289, "step": 667 }, { "epoch": 0.38, "learning_rate": 0.0007158160148000044, "loss": 2.4211, "step": 668 }, { "epoch": 0.38, "learning_rate": 0.000714985575681488, "loss": 2.4422, "step": 669 }, { "epoch": 0.38, "learning_rate": 0.0007141544083776795, "loss": 2.4207, "step": 670 }, { "epoch": 0.38, "learning_rate": 0.0007133225157038554, "loss": 2.4256, "step": 671 }, { "epoch": 0.38, "learning_rate": 0.0007124899004777489, "loss": 2.4774, "step": 672 }, { "epoch": 0.38, "learning_rate": 0.0007116565655195406, "loss": 2.3843, "step": 673 }, { "epoch": 0.38, "learning_rate": 0.000710822513651849, "loss": 2.5185, "step": 674 }, { "epoch": 0.38, "learning_rate": 0.000709987747699721, "loss": 2.4379, "step": 675 }, { "epoch": 0.38, "learning_rate": 0.0007091522704906218, "loss": 2.4733, "step": 676 }, { "epoch": 0.38, "learning_rate": 0.0007083160848544261, "loss": 2.4462, "step": 677 }, { "epoch": 0.39, "learning_rate": 0.0007074791936234082, "loss": 2.3959, "step": 678 }, { "epoch": 0.39, "learning_rate": 0.0007066415996322318, "loss": 2.4609, "step": 679 }, { "epoch": 0.39, "learning_rate": 0.0007058033057179415, "loss": 2.4024, "step": 680 }, { "epoch": 0.39, "learning_rate": 0.0007049643147199524, "loss": 2.5052, "step": 681 }, { "epoch": 0.39, "learning_rate": 0.0007041246294800411, "loss": 2.4512, "step": 682 }, { "epoch": 0.39, "learning_rate": 0.0007032842528423348, "loss": 2.4629, "step": 683 }, { "epoch": 0.39, "learning_rate": 0.0007024431876533035, "loss": 2.3917, "step": 684 }, { "epoch": 0.39, "learning_rate": 0.0007016014367617487, "loss": 2.4708, "step": 685 }, { "epoch": 0.39, "learning_rate": 0.0007007590030187952, "loss": 2.4183, "step": 686 }, { "epoch": 0.39, "learning_rate": 0.0006999158892778798, "loss": 2.4237, "step": 687 }, { "epoch": 0.39, "learning_rate": 0.0006990720983947435, "loss": 2.45, "step": 688 }, { "epoch": 0.39, "learning_rate": 0.0006982276332274202, "loss": 2.4202, "step": 689 }, { "epoch": 0.39, "learning_rate": 0.0006973824966362281, "loss": 2.4131, "step": 690 }, { "epoch": 0.39, "learning_rate": 0.0006965366914837591, "loss": 2.4219, "step": 691 }, { "epoch": 0.39, "learning_rate": 0.0006956902206348702, "loss": 2.4663, "step": 692 }, { "epoch": 0.39, "learning_rate": 0.0006948430869566728, "loss": 2.5585, "step": 693 }, { "epoch": 0.39, "learning_rate": 0.0006939952933185234, "loss": 2.4702, "step": 694 }, { "epoch": 0.39, "learning_rate": 0.0006931468425920142, "loss": 2.4307, "step": 695 }, { "epoch": 0.4, "learning_rate": 0.0006922977376509629, "loss": 2.4803, "step": 696 }, { "epoch": 0.4, "learning_rate": 0.000691447981371403, "loss": 2.4817, "step": 697 }, { "epoch": 0.4, "learning_rate": 0.0006905975766315739, "loss": 2.4975, "step": 698 }, { "epoch": 0.4, "learning_rate": 0.0006897465263119122, "loss": 2.5327, "step": 699 }, { "epoch": 0.4, "learning_rate": 0.0006888948332950405, "loss": 2.4044, "step": 700 }, { "epoch": 0.4, "learning_rate": 0.0006880425004657586, "loss": 2.5197, "step": 701 }, { "epoch": 0.4, "learning_rate": 0.0006871895307110332, "loss": 2.4461, "step": 702 }, { "epoch": 0.4, "learning_rate": 0.0006863359269199886, "loss": 2.4614, "step": 703 }, { "epoch": 0.4, "learning_rate": 0.0006854816919838967, "loss": 2.3401, "step": 704 }, { "epoch": 0.4, "learning_rate": 0.0006846268287961667, "loss": 2.4542, "step": 705 }, { "epoch": 0.4, "learning_rate": 0.0006837713402523364, "loss": 2.4437, "step": 706 }, { "epoch": 0.4, "learning_rate": 0.0006829152292500613, "loss": 2.4147, "step": 707 }, { "epoch": 0.4, "learning_rate": 0.0006820584986891058, "loss": 2.512, "step": 708 }, { "epoch": 0.4, "learning_rate": 0.0006812011514713321, "loss": 2.4996, "step": 709 }, { "epoch": 0.4, "learning_rate": 0.0006803431905006916, "loss": 2.4634, "step": 710 }, { "epoch": 0.4, "learning_rate": 0.0006794846186832143, "loss": 2.4315, "step": 711 }, { "epoch": 0.4, "learning_rate": 0.0006786254389269994, "loss": 2.4492, "step": 712 }, { "epoch": 0.41, "learning_rate": 0.0006777656541422055, "loss": 2.4084, "step": 713 }, { "epoch": 0.41, "learning_rate": 0.0006769052672410398, "loss": 2.4165, "step": 714 }, { "epoch": 0.41, "learning_rate": 0.0006760442811377499, "loss": 2.4669, "step": 715 }, { "epoch": 0.41, "learning_rate": 0.0006751826987486118, "loss": 2.3723, "step": 716 }, { "epoch": 0.41, "learning_rate": 0.0006743205229919224, "loss": 2.4004, "step": 717 }, { "epoch": 0.41, "learning_rate": 0.0006734577567879876, "loss": 2.4222, "step": 718 }, { "epoch": 0.41, "learning_rate": 0.0006725944030591135, "loss": 2.4204, "step": 719 }, { "epoch": 0.41, "learning_rate": 0.0006717304647295964, "loss": 2.4191, "step": 720 }, { "epoch": 0.41, "learning_rate": 0.0006708659447257126, "loss": 2.4486, "step": 721 }, { "epoch": 0.41, "learning_rate": 0.0006700008459757083, "loss": 2.3486, "step": 722 }, { "epoch": 0.41, "learning_rate": 0.0006691351714097905, "loss": 2.3805, "step": 723 }, { "epoch": 0.41, "learning_rate": 0.0006682689239601161, "loss": 2.402, "step": 724 }, { "epoch": 0.41, "learning_rate": 0.0006674021065607829, "loss": 2.367, "step": 725 }, { "epoch": 0.41, "learning_rate": 0.0006665347221478187, "loss": 2.4574, "step": 726 }, { "epoch": 0.41, "learning_rate": 0.0006656667736591724, "loss": 2.4072, "step": 727 }, { "epoch": 0.41, "learning_rate": 0.0006647982640347029, "loss": 2.4673, "step": 728 }, { "epoch": 0.41, "learning_rate": 0.0006639291962161702, "loss": 2.4676, "step": 729 }, { "epoch": 0.41, "learning_rate": 0.0006630595731472249, "loss": 2.4421, "step": 730 }, { "epoch": 0.42, "learning_rate": 0.000662189397773398, "loss": 2.4604, "step": 731 }, { "epoch": 0.42, "learning_rate": 0.0006613186730420917, "loss": 2.4199, "step": 732 }, { "epoch": 0.42, "learning_rate": 0.0006604474019025686, "loss": 2.4471, "step": 733 }, { "epoch": 0.42, "learning_rate": 0.0006595755873059422, "loss": 2.4323, "step": 734 }, { "epoch": 0.42, "learning_rate": 0.0006587032322051667, "loss": 2.3491, "step": 735 }, { "epoch": 0.42, "learning_rate": 0.0006578303395550271, "loss": 2.4445, "step": 736 }, { "epoch": 0.42, "learning_rate": 0.0006569569123121294, "loss": 2.5522, "step": 737 }, { "epoch": 0.42, "learning_rate": 0.0006560829534348897, "loss": 2.393, "step": 738 }, { "epoch": 0.42, "learning_rate": 0.0006552084658835254, "loss": 2.4933, "step": 739 }, { "epoch": 0.42, "learning_rate": 0.0006543334526200445, "loss": 2.4027, "step": 740 }, { "epoch": 0.42, "learning_rate": 0.0006534579166082354, "loss": 2.4971, "step": 741 }, { "epoch": 0.42, "learning_rate": 0.0006525818608136572, "loss": 2.4315, "step": 742 }, { "epoch": 0.42, "learning_rate": 0.0006517052882036298, "loss": 2.4696, "step": 743 }, { "epoch": 0.42, "learning_rate": 0.0006508282017472235, "loss": 2.4609, "step": 744 }, { "epoch": 0.42, "learning_rate": 0.000649950604415249, "loss": 2.4614, "step": 745 }, { "epoch": 0.42, "learning_rate": 0.0006490724991802474, "loss": 2.4132, "step": 746 }, { "epoch": 0.42, "learning_rate": 0.00064819388901648, "loss": 2.5059, "step": 747 }, { "epoch": 0.42, "learning_rate": 0.000647314776899919, "loss": 2.4084, "step": 748 }, { "epoch": 0.43, "learning_rate": 0.000646435165808236, "loss": 2.4937, "step": 749 }, { "epoch": 0.43, "learning_rate": 0.000645555058720793, "loss": 2.5148, "step": 750 }, { "epoch": 0.43, "learning_rate": 0.0006446744586186322, "loss": 2.4094, "step": 751 }, { "epoch": 0.43, "learning_rate": 0.0006437933684844655, "loss": 2.4952, "step": 752 }, { "epoch": 0.43, "learning_rate": 0.0006429117913026646, "loss": 2.4557, "step": 753 }, { "epoch": 0.43, "learning_rate": 0.000642029730059251, "loss": 2.5387, "step": 754 }, { "epoch": 0.43, "learning_rate": 0.0006411471877418855, "loss": 2.4937, "step": 755 }, { "epoch": 0.43, "learning_rate": 0.000640264167339859, "loss": 2.4658, "step": 756 }, { "epoch": 0.43, "learning_rate": 0.0006393806718440806, "loss": 2.4209, "step": 757 }, { "epoch": 0.43, "learning_rate": 0.0006384967042470702, "loss": 2.4134, "step": 758 }, { "epoch": 0.43, "learning_rate": 0.0006376122675429452, "loss": 2.4574, "step": 759 }, { "epoch": 0.43, "learning_rate": 0.0006367273647274131, "loss": 2.3942, "step": 760 }, { "epoch": 0.43, "learning_rate": 0.0006358419987977595, "loss": 2.4388, "step": 761 }, { "epoch": 0.43, "learning_rate": 0.0006349561727528388, "loss": 2.3744, "step": 762 }, { "epoch": 0.43, "learning_rate": 0.0006340698895930638, "loss": 2.4443, "step": 763 }, { "epoch": 0.43, "learning_rate": 0.0006331831523203963, "loss": 2.4567, "step": 764 }, { "epoch": 0.43, "learning_rate": 0.000632295963938335, "loss": 2.498, "step": 765 }, { "epoch": 0.44, "learning_rate": 0.0006314083274519076, "loss": 2.5225, "step": 766 }, { "epoch": 0.44, "learning_rate": 0.000630520245867659, "loss": 2.4169, "step": 767 }, { "epoch": 0.44, "learning_rate": 0.0006296317221936421, "loss": 2.4793, "step": 768 }, { "epoch": 0.44, "learning_rate": 0.0006287427594394069, "loss": 2.4272, "step": 769 }, { "epoch": 0.44, "learning_rate": 0.000627853360615991, "loss": 2.5425, "step": 770 }, { "epoch": 0.44, "learning_rate": 0.0006269635287359086, "loss": 2.3694, "step": 771 }, { "epoch": 0.44, "learning_rate": 0.0006260732668131409, "loss": 2.4553, "step": 772 }, { "epoch": 0.44, "learning_rate": 0.0006251825778631258, "loss": 2.4705, "step": 773 }, { "epoch": 0.44, "learning_rate": 0.0006242914649027476, "loss": 2.416, "step": 774 }, { "epoch": 0.44, "learning_rate": 0.0006233999309503267, "loss": 2.3962, "step": 775 }, { "epoch": 0.44, "learning_rate": 0.0006225079790256094, "loss": 2.4481, "step": 776 }, { "epoch": 0.44, "learning_rate": 0.0006216156121497578, "loss": 2.4229, "step": 777 }, { "epoch": 0.44, "learning_rate": 0.0006207228333453396, "loss": 2.3921, "step": 778 }, { "epoch": 0.44, "learning_rate": 0.0006198296456363174, "loss": 2.4722, "step": 779 }, { "epoch": 0.44, "learning_rate": 0.0006189360520480393, "loss": 2.4749, "step": 780 }, { "epoch": 0.44, "learning_rate": 0.0006180420556072279, "loss": 2.482, "step": 781 }, { "epoch": 0.44, "learning_rate": 0.0006171476593419703, "loss": 2.4231, "step": 782 }, { "epoch": 0.44, "learning_rate": 0.000616252866281708, "loss": 2.4011, "step": 783 }, { "epoch": 0.45, "learning_rate": 0.0006153576794572262, "loss": 2.4231, "step": 784 }, { "epoch": 0.45, "learning_rate": 0.0006144621019006443, "loss": 2.478, "step": 785 }, { "epoch": 0.45, "learning_rate": 0.0006135661366454045, "loss": 2.4443, "step": 786 }, { "epoch": 0.45, "learning_rate": 0.0006126697867262632, "loss": 2.4758, "step": 787 }, { "epoch": 0.45, "learning_rate": 0.0006117730551792786, "loss": 2.4348, "step": 788 }, { "epoch": 0.45, "learning_rate": 0.0006108759450418022, "loss": 2.4317, "step": 789 }, { "epoch": 0.45, "learning_rate": 0.0006099784593524678, "loss": 2.4133, "step": 790 }, { "epoch": 0.45, "learning_rate": 0.0006090806011511808, "loss": 2.4218, "step": 791 }, { "epoch": 0.45, "learning_rate": 0.0006081823734791089, "loss": 2.3959, "step": 792 }, { "epoch": 0.45, "learning_rate": 0.000607283779378671, "loss": 2.436, "step": 793 }, { "epoch": 0.45, "learning_rate": 0.000606384821893527, "loss": 2.4626, "step": 794 }, { "epoch": 0.45, "learning_rate": 0.0006054855040685679, "loss": 2.4465, "step": 795 }, { "epoch": 0.45, "learning_rate": 0.000604585828949905, "loss": 2.4273, "step": 796 }, { "epoch": 0.45, "learning_rate": 0.00060368579958486, "loss": 2.4659, "step": 797 }, { "epoch": 0.45, "learning_rate": 0.0006027854190219544, "loss": 2.3485, "step": 798 }, { "epoch": 0.45, "learning_rate": 0.0006018846903108991, "loss": 2.4722, "step": 799 }, { "epoch": 0.45, "learning_rate": 0.0006009836165025845, "loss": 2.372, "step": 800 }, { "epoch": 0.46, "learning_rate": 0.0006000822006490696, "loss": 2.4584, "step": 801 }, { "epoch": 0.46, "learning_rate": 0.0005991804458035724, "loss": 2.5084, "step": 802 }, { "epoch": 0.46, "learning_rate": 0.0005982783550204585, "loss": 2.3997, "step": 803 }, { "epoch": 0.46, "learning_rate": 0.0005973759313552318, "loss": 2.4404, "step": 804 }, { "epoch": 0.46, "learning_rate": 0.0005964731778645237, "loss": 2.4222, "step": 805 }, { "epoch": 0.46, "learning_rate": 0.0005955700976060826, "loss": 2.4262, "step": 806 }, { "epoch": 0.46, "learning_rate": 0.0005946666936387637, "loss": 2.4388, "step": 807 }, { "epoch": 0.46, "learning_rate": 0.0005937629690225188, "loss": 2.4265, "step": 808 }, { "epoch": 0.46, "learning_rate": 0.0005928589268183856, "loss": 2.3676, "step": 809 }, { "epoch": 0.46, "learning_rate": 0.0005919545700884778, "loss": 2.3852, "step": 810 }, { "epoch": 0.46, "learning_rate": 0.0005910499018959741, "loss": 2.3914, "step": 811 }, { "epoch": 0.46, "learning_rate": 0.0005901449253051084, "loss": 2.4019, "step": 812 }, { "epoch": 0.46, "learning_rate": 0.0005892396433811589, "loss": 2.4193, "step": 813 }, { "epoch": 0.46, "learning_rate": 0.0005883340591904385, "loss": 2.343, "step": 814 }, { "epoch": 0.46, "learning_rate": 0.0005874281758002835, "loss": 2.4708, "step": 815 }, { "epoch": 0.46, "learning_rate": 0.0005865219962790438, "loss": 2.4664, "step": 816 }, { "epoch": 0.46, "learning_rate": 0.0005856155236960723, "loss": 2.3792, "step": 817 }, { "epoch": 0.46, "learning_rate": 0.0005847087611217145, "loss": 2.4771, "step": 818 }, { "epoch": 0.47, "learning_rate": 0.0005838017116272981, "loss": 2.3939, "step": 819 }, { "epoch": 0.47, "learning_rate": 0.0005828943782851228, "loss": 2.4681, "step": 820 }, { "epoch": 0.47, "learning_rate": 0.0005819867641684497, "loss": 2.5123, "step": 821 }, { "epoch": 0.47, "learning_rate": 0.0005810788723514908, "loss": 2.478, "step": 822 }, { "epoch": 0.47, "learning_rate": 0.0005801707059093988, "loss": 2.4284, "step": 823 }, { "epoch": 0.47, "learning_rate": 0.0005792622679182565, "loss": 2.3805, "step": 824 }, { "epoch": 0.47, "learning_rate": 0.0005783535614550666, "loss": 2.4595, "step": 825 }, { "epoch": 0.47, "learning_rate": 0.000577444589597741, "loss": 2.4835, "step": 826 }, { "epoch": 0.47, "learning_rate": 0.0005765353554250907, "loss": 2.4444, "step": 827 }, { "epoch": 0.47, "learning_rate": 0.0005756258620168151, "loss": 2.3644, "step": 828 }, { "epoch": 0.47, "learning_rate": 0.0005747161124534918, "loss": 2.3951, "step": 829 }, { "epoch": 0.47, "learning_rate": 0.0005738061098165657, "loss": 2.3674, "step": 830 }, { "epoch": 0.47, "learning_rate": 0.0005728958571883392, "loss": 2.3816, "step": 831 }, { "epoch": 0.47, "learning_rate": 0.0005719853576519614, "loss": 2.4328, "step": 832 }, { "epoch": 0.47, "learning_rate": 0.0005710746142914177, "loss": 2.4554, "step": 833 }, { "epoch": 0.47, "learning_rate": 0.0005701636301915191, "loss": 2.4233, "step": 834 }, { "epoch": 0.47, "learning_rate": 0.0005692524084378926, "loss": 2.3548, "step": 835 }, { "epoch": 0.47, "learning_rate": 0.0005683409521169697, "loss": 2.3324, "step": 836 }, { "epoch": 0.48, "learning_rate": 0.0005674292643159764, "loss": 2.4403, "step": 837 }, { "epoch": 0.48, "learning_rate": 0.000566517348122923, "loss": 2.4564, "step": 838 }, { "epoch": 0.48, "learning_rate": 0.0005656052066265931, "loss": 2.3844, "step": 839 }, { "epoch": 0.48, "learning_rate": 0.0005646928429165339, "loss": 2.3731, "step": 840 }, { "epoch": 0.48, "learning_rate": 0.0005637802600830449, "loss": 2.4234, "step": 841 }, { "epoch": 0.48, "learning_rate": 0.0005628674612171679, "loss": 2.3916, "step": 842 }, { "epoch": 0.48, "learning_rate": 0.0005619544494106761, "loss": 2.3402, "step": 843 }, { "epoch": 0.48, "learning_rate": 0.0005610412277560646, "loss": 2.5129, "step": 844 }, { "epoch": 0.48, "learning_rate": 0.000560127799346539, "loss": 2.3983, "step": 845 }, { "epoch": 0.48, "learning_rate": 0.0005592141672760049, "loss": 2.4103, "step": 846 }, { "epoch": 0.48, "learning_rate": 0.0005583003346390582, "loss": 2.3486, "step": 847 }, { "epoch": 0.48, "learning_rate": 0.0005573863045309738, "loss": 2.4649, "step": 848 }, { "epoch": 0.48, "learning_rate": 0.0005564720800476958, "loss": 2.4571, "step": 849 }, { "epoch": 0.48, "learning_rate": 0.0005555576642858263, "loss": 2.3613, "step": 850 }, { "epoch": 0.48, "learning_rate": 0.0005546430603426154, "loss": 2.4394, "step": 851 }, { "epoch": 0.48, "learning_rate": 0.0005537282713159507, "loss": 2.4288, "step": 852 }, { "epoch": 0.48, "learning_rate": 0.0005528133003043468, "loss": 2.4392, "step": 853 }, { "epoch": 0.49, "learning_rate": 0.0005518981504069345, "loss": 2.4756, "step": 854 }, { "epoch": 0.49, "learning_rate": 0.0005509828247234505, "loss": 2.4553, "step": 855 }, { "epoch": 0.49, "learning_rate": 0.0005500673263542272, "loss": 2.4203, "step": 856 }, { "epoch": 0.49, "learning_rate": 0.0005491516584001814, "loss": 2.3956, "step": 857 }, { "epoch": 0.49, "learning_rate": 0.0005482358239628047, "loss": 2.3655, "step": 858 }, { "epoch": 0.49, "learning_rate": 0.0005473198261441526, "loss": 2.4502, "step": 859 }, { "epoch": 0.49, "learning_rate": 0.0005464036680468339, "loss": 2.346, "step": 860 }, { "epoch": 0.49, "learning_rate": 0.0005454873527740002, "loss": 2.4168, "step": 861 }, { "epoch": 0.49, "learning_rate": 0.0005445708834293356, "loss": 2.4072, "step": 862 }, { "epoch": 0.49, "learning_rate": 0.0005436542631170459, "loss": 2.4012, "step": 863 }, { "epoch": 0.49, "learning_rate": 0.0005427374949418486, "loss": 2.4058, "step": 864 }, { "epoch": 0.49, "learning_rate": 0.0005418205820089616, "loss": 2.4099, "step": 865 }, { "epoch": 0.49, "learning_rate": 0.0005409035274240935, "loss": 2.4391, "step": 866 }, { "epoch": 0.49, "learning_rate": 0.0005399863342934323, "loss": 2.412, "step": 867 }, { "epoch": 0.49, "learning_rate": 0.0005390690057236359, "loss": 2.3586, "step": 868 }, { "epoch": 0.49, "learning_rate": 0.00053815154482182, "loss": 2.4768, "step": 869 }, { "epoch": 0.49, "learning_rate": 0.0005372339546955492, "loss": 2.3788, "step": 870 }, { "epoch": 0.49, "learning_rate": 0.000536316238452826, "loss": 2.263, "step": 871 }, { "epoch": 0.5, "learning_rate": 0.0005353983992020794, "loss": 2.4776, "step": 872 }, { "epoch": 0.5, "learning_rate": 0.0005344804400521554, "loss": 2.4101, "step": 873 }, { "epoch": 0.5, "learning_rate": 0.0005335623641123062, "loss": 2.3999, "step": 874 }, { "epoch": 0.5, "learning_rate": 0.0005326441744921795, "loss": 2.4281, "step": 875 }, { "epoch": 0.5, "learning_rate": 0.000531725874301808, "loss": 2.443, "step": 876 }, { "epoch": 0.5, "learning_rate": 0.0005308074666515989, "loss": 2.3822, "step": 877 }, { "epoch": 0.5, "learning_rate": 0.0005298889546523233, "loss": 2.4589, "step": 878 }, { "epoch": 0.5, "learning_rate": 0.0005289703414151062, "loss": 2.4423, "step": 879 }, { "epoch": 0.5, "learning_rate": 0.000528051630051415, "loss": 2.4234, "step": 880 }, { "epoch": 0.5, "learning_rate": 0.0005271328236730496, "loss": 2.4683, "step": 881 }, { "epoch": 0.5, "learning_rate": 0.0005262139253921318, "loss": 2.3599, "step": 882 }, { "epoch": 0.5, "learning_rate": 0.0005252949383210948, "loss": 2.4579, "step": 883 }, { "epoch": 0.5, "learning_rate": 0.0005243758655726721, "loss": 2.3679, "step": 884 }, { "epoch": 0.5, "learning_rate": 0.0005234567102598881, "loss": 2.3544, "step": 885 }, { "epoch": 0.5, "learning_rate": 0.0005225374754960462, "loss": 2.3712, "step": 886 }, { "epoch": 0.5, "learning_rate": 0.0005216181643947191, "loss": 2.4384, "step": 887 }, { "epoch": 0.5, "learning_rate": 0.0005206987800697383, "loss": 2.3886, "step": 888 }, { "epoch": 0.51, "learning_rate": 0.0005197793256351832, "loss": 2.4095, "step": 889 }, { "epoch": 0.51, "learning_rate": 0.0005188598042053704, "loss": 2.404, "step": 890 }, { "epoch": 0.51, "learning_rate": 0.0005179402188948438, "loss": 2.4051, "step": 891 }, { "epoch": 0.51, "learning_rate": 0.0005170205728183636, "loss": 2.4318, "step": 892 }, { "epoch": 0.51, "learning_rate": 0.0005161008690908956, "loss": 2.3231, "step": 893 }, { "epoch": 0.51, "learning_rate": 0.0005151811108276011, "loss": 2.4394, "step": 894 }, { "epoch": 0.51, "learning_rate": 0.000514261301143826, "loss": 2.3758, "step": 895 }, { "epoch": 0.51, "learning_rate": 0.0005133414431550905, "loss": 2.3559, "step": 896 }, { "epoch": 0.51, "learning_rate": 0.0005124215399770782, "loss": 2.4378, "step": 897 }, { "epoch": 0.51, "learning_rate": 0.0005115015947256259, "loss": 2.3401, "step": 898 }, { "epoch": 0.51, "learning_rate": 0.0005105816105167129, "loss": 2.4664, "step": 899 }, { "epoch": 0.51, "learning_rate": 0.0005096615904664505, "loss": 2.308, "step": 900 }, { "epoch": 0.51, "learning_rate": 0.0005087415376910712, "loss": 2.316, "step": 901 }, { "epoch": 0.51, "learning_rate": 0.0005078214553069186, "loss": 2.4194, "step": 902 }, { "epoch": 0.51, "learning_rate": 0.0005069013464304365, "loss": 2.3844, "step": 903 }, { "epoch": 0.51, "learning_rate": 0.0005059812141781584, "loss": 2.4863, "step": 904 }, { "epoch": 0.51, "learning_rate": 0.0005050610616666968, "loss": 2.3552, "step": 905 }, { "epoch": 0.51, "learning_rate": 0.0005041408920127332, "loss": 2.4535, "step": 906 }, { "epoch": 0.52, "learning_rate": 0.0005032207083330071, "loss": 2.3841, "step": 907 }, { "epoch": 0.52, "learning_rate": 0.0005023005137443051, "loss": 2.4558, "step": 908 }, { "epoch": 0.52, "learning_rate": 0.0005013803113634514, "loss": 2.4689, "step": 909 }, { "epoch": 0.52, "learning_rate": 0.0005004601043072958, "loss": 2.4366, "step": 910 }, { "epoch": 0.52, "learning_rate": 0.0004995398956927044, "loss": 2.3713, "step": 911 }, { "epoch": 0.52, "learning_rate": 0.0004986196886365487, "loss": 2.4485, "step": 912 }, { "epoch": 0.52, "learning_rate": 0.0004976994862556949, "loss": 2.41, "step": 913 }, { "epoch": 0.52, "learning_rate": 0.0004967792916669929, "loss": 2.4275, "step": 914 }, { "epoch": 0.52, "learning_rate": 0.0004958591079872668, "loss": 2.4909, "step": 915 }, { "epoch": 0.52, "learning_rate": 0.0004949389383333032, "loss": 2.4397, "step": 916 }, { "epoch": 0.52, "learning_rate": 0.0004940187858218417, "loss": 2.495, "step": 917 }, { "epoch": 0.52, "learning_rate": 0.0004930986535695636, "loss": 2.3796, "step": 918 }, { "epoch": 0.52, "learning_rate": 0.0004921785446930815, "loss": 2.4329, "step": 919 }, { "epoch": 0.52, "learning_rate": 0.0004912584623089289, "loss": 2.3956, "step": 920 }, { "epoch": 0.52, "learning_rate": 0.0004903384095335496, "loss": 2.4484, "step": 921 }, { "epoch": 0.52, "learning_rate": 0.0004894183894832872, "loss": 2.3731, "step": 922 }, { "epoch": 0.52, "learning_rate": 0.0004884984052743741, "loss": 2.3831, "step": 923 }, { "epoch": 0.52, "learning_rate": 0.0004875784600229219, "loss": 2.4037, "step": 924 }, { "epoch": 0.53, "learning_rate": 0.00048665855684490954, "loss": 2.4096, "step": 925 }, { "epoch": 0.53, "learning_rate": 0.000485738698856174, "loss": 2.3367, "step": 926 }, { "epoch": 0.53, "learning_rate": 0.000484818889172399, "loss": 2.4187, "step": 927 }, { "epoch": 0.53, "learning_rate": 0.0004838991309091045, "loss": 2.4407, "step": 928 }, { "epoch": 0.53, "learning_rate": 0.0004829794271816365, "loss": 2.4333, "step": 929 }, { "epoch": 0.53, "learning_rate": 0.0004820597811051563, "loss": 2.4117, "step": 930 }, { "epoch": 0.53, "learning_rate": 0.00048114019579462977, "loss": 2.4389, "step": 931 }, { "epoch": 0.53, "learning_rate": 0.00048022067436481703, "loss": 2.4473, "step": 932 }, { "epoch": 0.53, "learning_rate": 0.0004793012199302619, "loss": 2.3529, "step": 933 }, { "epoch": 0.53, "learning_rate": 0.00047838183560528115, "loss": 2.4897, "step": 934 }, { "epoch": 0.53, "learning_rate": 0.0004774625245039541, "loss": 2.3568, "step": 935 }, { "epoch": 0.53, "learning_rate": 0.0004765432897401121, "loss": 2.4079, "step": 936 }, { "epoch": 0.53, "learning_rate": 0.00047562413442732784, "loss": 2.4042, "step": 937 }, { "epoch": 0.53, "learning_rate": 0.0004747050616789052, "loss": 2.3854, "step": 938 }, { "epoch": 0.53, "learning_rate": 0.00047378607460786814, "loss": 2.3854, "step": 939 }, { "epoch": 0.53, "learning_rate": 0.00047286717632695035, "loss": 2.4135, "step": 940 }, { "epoch": 0.53, "learning_rate": 0.00047194836994858503, "loss": 2.4586, "step": 941 }, { "epoch": 0.54, "learning_rate": 0.0004710296585848938, "loss": 2.4829, "step": 942 }, { "epoch": 0.54, "learning_rate": 0.0004701110453476767, "loss": 2.4407, "step": 943 }, { "epoch": 0.54, "learning_rate": 0.00046919253334840125, "loss": 2.3109, "step": 944 }, { "epoch": 0.54, "learning_rate": 0.00046827412569819217, "loss": 2.3722, "step": 945 }, { "epoch": 0.54, "learning_rate": 0.00046735582550782066, "loss": 2.3676, "step": 946 }, { "epoch": 0.54, "learning_rate": 0.0004664376358876939, "loss": 2.4991, "step": 947 }, { "epoch": 0.54, "learning_rate": 0.0004655195599478448, "loss": 2.3987, "step": 948 }, { "epoch": 0.54, "learning_rate": 0.0004646016007979207, "loss": 2.3807, "step": 949 }, { "epoch": 0.54, "learning_rate": 0.0004636837615471741, "loss": 2.3651, "step": 950 }, { "epoch": 0.54, "learning_rate": 0.0004627660453044508, "loss": 2.4073, "step": 951 }, { "epoch": 0.54, "learning_rate": 0.00046184845517818014, "loss": 2.4485, "step": 952 }, { "epoch": 0.54, "learning_rate": 0.00046093099427636433, "loss": 2.4144, "step": 953 }, { "epoch": 0.54, "learning_rate": 0.0004600136657065677, "loss": 2.3495, "step": 954 }, { "epoch": 0.54, "learning_rate": 0.0004590964725759066, "loss": 2.4432, "step": 955 }, { "epoch": 0.54, "learning_rate": 0.00045817941799103845, "loss": 2.3367, "step": 956 }, { "epoch": 0.54, "learning_rate": 0.0004572625050581516, "loss": 2.4404, "step": 957 }, { "epoch": 0.54, "learning_rate": 0.0004563457368829542, "loss": 2.4057, "step": 958 }, { "epoch": 0.54, "learning_rate": 0.00045542911657066467, "loss": 2.4284, "step": 959 }, { "epoch": 0.55, "learning_rate": 0.0004545126472260001, "loss": 2.3815, "step": 960 }, { "epoch": 0.55, "learning_rate": 0.0004535963319531663, "loss": 2.3614, "step": 961 }, { "epoch": 0.55, "learning_rate": 0.0004526801738558476, "loss": 2.4519, "step": 962 }, { "epoch": 0.55, "learning_rate": 0.0004517641760371955, "loss": 2.3831, "step": 963 }, { "epoch": 0.55, "learning_rate": 0.00045084834159981864, "loss": 2.346, "step": 964 }, { "epoch": 0.55, "learning_rate": 0.00044993267364577285, "loss": 2.4427, "step": 965 }, { "epoch": 0.55, "learning_rate": 0.0004490171752765494, "loss": 2.391, "step": 966 }, { "epoch": 0.55, "learning_rate": 0.00044810184959306545, "loss": 2.451, "step": 967 }, { "epoch": 0.55, "learning_rate": 0.00044718669969565317, "loss": 2.3838, "step": 968 }, { "epoch": 0.55, "learning_rate": 0.00044627172868404923, "loss": 2.4119, "step": 969 }, { "epoch": 0.55, "learning_rate": 0.00044535693965738467, "loss": 2.4274, "step": 970 }, { "epoch": 0.55, "learning_rate": 0.0004444423357141738, "loss": 2.3029, "step": 971 }, { "epoch": 0.55, "learning_rate": 0.0004435279199523043, "loss": 2.3777, "step": 972 }, { "epoch": 0.55, "learning_rate": 0.0004426136954690262, "loss": 2.3987, "step": 973 }, { "epoch": 0.55, "learning_rate": 0.00044169966536094184, "loss": 2.3581, "step": 974 }, { "epoch": 0.55, "learning_rate": 0.00044078583272399513, "loss": 2.3907, "step": 975 }, { "epoch": 0.55, "learning_rate": 0.00043987220065346114, "loss": 2.4248, "step": 976 }, { "epoch": 0.56, "learning_rate": 0.00043895877224393545, "loss": 2.3192, "step": 977 }, { "epoch": 0.56, "learning_rate": 0.000438045550589324, "loss": 2.358, "step": 978 }, { "epoch": 0.56, "learning_rate": 0.00043713253878283233, "loss": 2.391, "step": 979 }, { "epoch": 0.56, "learning_rate": 0.00043621973991695524, "loss": 2.4907, "step": 980 }, { "epoch": 0.56, "learning_rate": 0.0004353071570834662, "loss": 2.4146, "step": 981 }, { "epoch": 0.56, "learning_rate": 0.00043439479337340696, "loss": 2.3712, "step": 982 }, { "epoch": 0.56, "learning_rate": 0.0004334826518770771, "loss": 2.4449, "step": 983 }, { "epoch": 0.56, "learning_rate": 0.0004325707356840237, "loss": 2.3753, "step": 984 }, { "epoch": 0.56, "learning_rate": 0.0004316590478830304, "loss": 2.3247, "step": 985 }, { "epoch": 0.56, "learning_rate": 0.0004307475915621074, "loss": 2.3548, "step": 986 }, { "epoch": 0.56, "learning_rate": 0.0004298363698084809, "loss": 2.3644, "step": 987 }, { "epoch": 0.56, "learning_rate": 0.0004289253857085824, "loss": 2.4098, "step": 988 }, { "epoch": 0.56, "learning_rate": 0.00042801464234803864, "loss": 2.3517, "step": 989 }, { "epoch": 0.56, "learning_rate": 0.00042710414281166075, "loss": 2.439, "step": 990 }, { "epoch": 0.56, "learning_rate": 0.00042619389018343435, "loss": 2.4223, "step": 991 }, { "epoch": 0.56, "learning_rate": 0.0004252838875465083, "loss": 2.3211, "step": 992 }, { "epoch": 0.56, "learning_rate": 0.0004243741379831848, "loss": 2.401, "step": 993 }, { "epoch": 0.56, "learning_rate": 0.0004234646445749093, "loss": 2.4459, "step": 994 }, { "epoch": 0.57, "learning_rate": 0.000422555410402259, "loss": 2.2829, "step": 995 }, { "epoch": 0.57, "learning_rate": 0.0004216464385449335, "loss": 2.3647, "step": 996 }, { "epoch": 0.57, "learning_rate": 0.0004207377320817436, "loss": 2.4144, "step": 997 }, { "epoch": 0.57, "learning_rate": 0.00041982929409060135, "loss": 2.3446, "step": 998 }, { "epoch": 0.57, "learning_rate": 0.00041892112764850927, "loss": 2.3324, "step": 999 }, { "epoch": 0.57, "learning_rate": 0.00041801323583155037, "loss": 2.2845, "step": 1000 }, { "epoch": 0.57, "learning_rate": 0.00041710562171487725, "loss": 2.3375, "step": 1001 }, { "epoch": 0.57, "learning_rate": 0.000416198288372702, "loss": 2.3582, "step": 1002 }, { "epoch": 0.57, "learning_rate": 0.0004152912388782856, "loss": 2.2841, "step": 1003 }, { "epoch": 0.57, "learning_rate": 0.00041438447630392784, "loss": 2.3542, "step": 1004 }, { "epoch": 0.57, "learning_rate": 0.00041347800372095627, "loss": 2.4362, "step": 1005 }, { "epoch": 0.57, "learning_rate": 0.00041257182419971654, "loss": 2.4016, "step": 1006 }, { "epoch": 0.57, "learning_rate": 0.0004116659408095616, "loss": 2.4134, "step": 1007 }, { "epoch": 0.57, "learning_rate": 0.0004107603566188412, "loss": 2.3195, "step": 1008 }, { "epoch": 0.57, "learning_rate": 0.0004098550746948918, "loss": 2.3902, "step": 1009 }, { "epoch": 0.57, "learning_rate": 0.0004089500981040261, "loss": 2.4356, "step": 1010 }, { "epoch": 0.57, "learning_rate": 0.0004080454299115224, "loss": 2.4718, "step": 1011 }, { "epoch": 0.57, "learning_rate": 0.00040714107318161455, "loss": 2.3165, "step": 1012 }, { "epoch": 0.58, "learning_rate": 0.0004062370309774814, "loss": 2.3676, "step": 1013 }, { "epoch": 0.58, "learning_rate": 0.00040533330636123647, "loss": 2.334, "step": 1014 }, { "epoch": 0.58, "learning_rate": 0.00040442990239391766, "loss": 2.4078, "step": 1015 }, { "epoch": 0.58, "learning_rate": 0.00040352682213547626, "loss": 2.3833, "step": 1016 }, { "epoch": 0.58, "learning_rate": 0.0004026240686447681, "loss": 2.3571, "step": 1017 }, { "epoch": 0.58, "learning_rate": 0.0004017216449795415, "loss": 2.3641, "step": 1018 }, { "epoch": 0.58, "learning_rate": 0.00040081955419642764, "loss": 2.367, "step": 1019 }, { "epoch": 0.58, "learning_rate": 0.0003999177993509303, "loss": 2.3368, "step": 1020 }, { "epoch": 0.58, "learning_rate": 0.00039901638349741555, "loss": 2.4514, "step": 1021 }, { "epoch": 0.58, "learning_rate": 0.0003981153096891009, "loss": 2.3297, "step": 1022 }, { "epoch": 0.58, "learning_rate": 0.0003972145809780457, "loss": 2.3843, "step": 1023 }, { "epoch": 0.58, "learning_rate": 0.0003963142004151401, "loss": 2.4125, "step": 1024 }, { "epoch": 0.58, "learning_rate": 0.00039541417105009506, "loss": 2.3168, "step": 1025 }, { "epoch": 0.58, "learning_rate": 0.00039451449593143215, "loss": 2.4065, "step": 1026 }, { "epoch": 0.58, "learning_rate": 0.0003936151781064731, "loss": 2.3478, "step": 1027 }, { "epoch": 0.58, "learning_rate": 0.0003927162206213291, "loss": 2.3989, "step": 1028 }, { "epoch": 0.58, "learning_rate": 0.00039181762652089114, "loss": 2.4372, "step": 1029 }, { "epoch": 0.59, "learning_rate": 0.0003909193988488192, "loss": 2.3873, "step": 1030 }, { "epoch": 0.59, "learning_rate": 0.0003900215406475324, "loss": 2.4046, "step": 1031 }, { "epoch": 0.59, "learning_rate": 0.00038912405495819785, "loss": 2.3524, "step": 1032 }, { "epoch": 0.59, "learning_rate": 0.00038822694482072153, "loss": 2.3564, "step": 1033 }, { "epoch": 0.59, "learning_rate": 0.00038733021327373696, "loss": 2.369, "step": 1034 }, { "epoch": 0.59, "learning_rate": 0.0003864338633545956, "loss": 2.3578, "step": 1035 }, { "epoch": 0.59, "learning_rate": 0.000385537898099356, "loss": 2.3602, "step": 1036 }, { "epoch": 0.59, "learning_rate": 0.000384642320542774, "loss": 2.4313, "step": 1037 }, { "epoch": 0.59, "learning_rate": 0.0003837471337182923, "loss": 2.3291, "step": 1038 }, { "epoch": 0.59, "learning_rate": 0.0003828523406580299, "loss": 2.3555, "step": 1039 }, { "epoch": 0.59, "learning_rate": 0.00038195794439277225, "loss": 2.3163, "step": 1040 }, { "epoch": 0.59, "learning_rate": 0.00038106394795196086, "loss": 2.3385, "step": 1041 }, { "epoch": 0.59, "learning_rate": 0.00038017035436368254, "loss": 2.4187, "step": 1042 }, { "epoch": 0.59, "learning_rate": 0.00037927716665466047, "loss": 2.4404, "step": 1043 }, { "epoch": 0.59, "learning_rate": 0.00037838438785024216, "loss": 2.397, "step": 1044 }, { "epoch": 0.59, "learning_rate": 0.00037749202097439057, "loss": 2.3734, "step": 1045 }, { "epoch": 0.59, "learning_rate": 0.00037660006904967325, "loss": 2.4009, "step": 1046 }, { "epoch": 0.59, "learning_rate": 0.00037570853509725236, "loss": 2.4276, "step": 1047 }, { "epoch": 0.6, "learning_rate": 0.0003748174221368742, "loss": 2.3821, "step": 1048 }, { "epoch": 0.6, "learning_rate": 0.00037392673318685916, "loss": 2.321, "step": 1049 }, { "epoch": 0.6, "learning_rate": 0.00037303647126409154, "loss": 2.3743, "step": 1050 }, { "epoch": 0.6, "learning_rate": 0.00037214663938400913, "loss": 2.3692, "step": 1051 }, { "epoch": 0.6, "learning_rate": 0.0003712572405605932, "loss": 2.383, "step": 1052 }, { "epoch": 0.6, "learning_rate": 0.0003703682778063581, "loss": 2.3707, "step": 1053 }, { "epoch": 0.6, "learning_rate": 0.00036947975413234113, "loss": 2.3864, "step": 1054 }, { "epoch": 0.6, "learning_rate": 0.00036859167254809256, "loss": 2.3399, "step": 1055 }, { "epoch": 0.6, "learning_rate": 0.00036770403606166516, "loss": 2.469, "step": 1056 }, { "epoch": 0.6, "learning_rate": 0.00036681684767960387, "loss": 2.4053, "step": 1057 }, { "epoch": 0.6, "learning_rate": 0.00036593011040693613, "loss": 2.4008, "step": 1058 }, { "epoch": 0.6, "learning_rate": 0.00036504382724716135, "loss": 2.3744, "step": 1059 }, { "epoch": 0.6, "learning_rate": 0.00036415800120224057, "loss": 2.3632, "step": 1060 }, { "epoch": 0.6, "learning_rate": 0.0003632726352725869, "loss": 2.3286, "step": 1061 }, { "epoch": 0.6, "learning_rate": 0.0003623877324570548, "loss": 2.4072, "step": 1062 }, { "epoch": 0.6, "learning_rate": 0.00036150329575292996, "loss": 2.2996, "step": 1063 }, { "epoch": 0.6, "learning_rate": 0.0003606193281559194, "loss": 2.4821, "step": 1064 }, { "epoch": 0.61, "learning_rate": 0.00035973583266014133, "loss": 2.3883, "step": 1065 }, { "epoch": 0.61, "learning_rate": 0.0003588528122581146, "loss": 2.3579, "step": 1066 }, { "epoch": 0.61, "learning_rate": 0.0003579702699407492, "loss": 2.3023, "step": 1067 }, { "epoch": 0.61, "learning_rate": 0.0003570882086973355, "loss": 2.4152, "step": 1068 }, { "epoch": 0.61, "learning_rate": 0.0003562066315155345, "loss": 2.3282, "step": 1069 }, { "epoch": 0.61, "learning_rate": 0.0003553255413813676, "loss": 2.2976, "step": 1070 }, { "epoch": 0.61, "learning_rate": 0.0003544449412792069, "loss": 2.4261, "step": 1071 }, { "epoch": 0.61, "learning_rate": 0.000353564834191764, "loss": 2.4305, "step": 1072 }, { "epoch": 0.61, "learning_rate": 0.000352685223100081, "loss": 2.3566, "step": 1073 }, { "epoch": 0.61, "learning_rate": 0.00035180611098351987, "loss": 2.3931, "step": 1074 }, { "epoch": 0.61, "learning_rate": 0.00035092750081975265, "loss": 2.4228, "step": 1075 }, { "epoch": 0.61, "learning_rate": 0.00035004939558475105, "loss": 2.3614, "step": 1076 }, { "epoch": 0.61, "learning_rate": 0.0003491717982527765, "loss": 2.3207, "step": 1077 }, { "epoch": 0.61, "learning_rate": 0.0003482947117963702, "loss": 2.3927, "step": 1078 }, { "epoch": 0.61, "learning_rate": 0.0003474181391863428, "loss": 2.3219, "step": 1079 }, { "epoch": 0.61, "learning_rate": 0.0003465420833917647, "loss": 2.4248, "step": 1080 }, { "epoch": 0.61, "learning_rate": 0.00034566654737995555, "loss": 2.393, "step": 1081 }, { "epoch": 0.61, "learning_rate": 0.00034479153411647457, "loss": 2.3521, "step": 1082 }, { "epoch": 0.62, "learning_rate": 0.00034391704656511034, "loss": 2.3803, "step": 1083 }, { "epoch": 0.62, "learning_rate": 0.00034304308768787076, "loss": 2.3621, "step": 1084 }, { "epoch": 0.62, "learning_rate": 0.0003421696604449729, "loss": 2.365, "step": 1085 }, { "epoch": 0.62, "learning_rate": 0.00034129676779483346, "loss": 2.3525, "step": 1086 }, { "epoch": 0.62, "learning_rate": 0.000340424412694058, "loss": 2.3504, "step": 1087 }, { "epoch": 0.62, "learning_rate": 0.00033955259809743157, "loss": 2.3886, "step": 1088 }, { "epoch": 0.62, "learning_rate": 0.0003386813269579085, "loss": 2.4235, "step": 1089 }, { "epoch": 0.62, "learning_rate": 0.0003378106022266022, "loss": 2.3589, "step": 1090 }, { "epoch": 0.62, "learning_rate": 0.0003369404268527754, "loss": 2.3473, "step": 1091 }, { "epoch": 0.62, "learning_rate": 0.00033607080378383, "loss": 2.3863, "step": 1092 }, { "epoch": 0.62, "learning_rate": 0.00033520173596529725, "loss": 2.3328, "step": 1093 }, { "epoch": 0.62, "learning_rate": 0.0003343332263408277, "loss": 2.3727, "step": 1094 }, { "epoch": 0.62, "learning_rate": 0.0003334652778521813, "loss": 2.3475, "step": 1095 }, { "epoch": 0.62, "learning_rate": 0.0003325978934392171, "loss": 2.3711, "step": 1096 }, { "epoch": 0.62, "learning_rate": 0.0003317310760398839, "loss": 2.2961, "step": 1097 }, { "epoch": 0.62, "learning_rate": 0.00033086482859020957, "loss": 2.3632, "step": 1098 }, { "epoch": 0.62, "learning_rate": 0.00032999915402429173, "loss": 2.3933, "step": 1099 }, { "epoch": 0.62, "learning_rate": 0.0003291340552742875, "loss": 2.3845, "step": 1100 }, { "epoch": 0.63, "learning_rate": 0.0003282695352704036, "loss": 2.3837, "step": 1101 }, { "epoch": 0.63, "learning_rate": 0.00032740559694088655, "loss": 2.3652, "step": 1102 }, { "epoch": 0.63, "learning_rate": 0.0003265422432120125, "loss": 2.5405, "step": 1103 }, { "epoch": 0.63, "learning_rate": 0.0003256794770080778, "loss": 2.3815, "step": 1104 }, { "epoch": 0.63, "learning_rate": 0.00032481730125138823, "loss": 2.4564, "step": 1105 }, { "epoch": 0.63, "learning_rate": 0.0003239557188622503, "loss": 2.367, "step": 1106 }, { "epoch": 0.63, "learning_rate": 0.0003230947327589602, "loss": 2.4146, "step": 1107 }, { "epoch": 0.63, "learning_rate": 0.0003222343458577945, "loss": 2.3144, "step": 1108 }, { "epoch": 0.63, "learning_rate": 0.00032137456107300054, "loss": 2.3881, "step": 1109 }, { "epoch": 0.63, "learning_rate": 0.0003205153813167858, "loss": 2.3861, "step": 1110 }, { "epoch": 0.63, "learning_rate": 0.00031965680949930854, "loss": 2.292, "step": 1111 }, { "epoch": 0.63, "learning_rate": 0.00031879884852866803, "loss": 2.418, "step": 1112 }, { "epoch": 0.63, "learning_rate": 0.00031794150131089435, "loss": 2.4622, "step": 1113 }, { "epoch": 0.63, "learning_rate": 0.0003170847707499387, "loss": 2.3496, "step": 1114 }, { "epoch": 0.63, "learning_rate": 0.0003162286597476638, "loss": 2.3934, "step": 1115 }, { "epoch": 0.63, "learning_rate": 0.0003153731712038335, "loss": 2.3758, "step": 1116 }, { "epoch": 0.63, "learning_rate": 0.0003145183080161036, "loss": 2.3755, "step": 1117 }, { "epoch": 0.64, "learning_rate": 0.0003136640730800116, "loss": 2.339, "step": 1118 }, { "epoch": 0.64, "learning_rate": 0.000312810469288967, "loss": 2.3153, "step": 1119 }, { "epoch": 0.64, "learning_rate": 0.00031195749953424163, "loss": 2.4115, "step": 1120 }, { "epoch": 0.64, "learning_rate": 0.00031110516670495946, "loss": 2.3596, "step": 1121 }, { "epoch": 0.64, "learning_rate": 0.00031025347368808775, "loss": 2.3859, "step": 1122 }, { "epoch": 0.64, "learning_rate": 0.000309402423368426, "loss": 2.3421, "step": 1123 }, { "epoch": 0.64, "learning_rate": 0.00030855201862859706, "loss": 2.4224, "step": 1124 }, { "epoch": 0.64, "learning_rate": 0.0003077022623490371, "loss": 2.4121, "step": 1125 }, { "epoch": 0.64, "learning_rate": 0.0003068531574079857, "loss": 2.3836, "step": 1126 }, { "epoch": 0.64, "learning_rate": 0.00030600470668147654, "loss": 2.3476, "step": 1127 }, { "epoch": 0.64, "learning_rate": 0.00030515691304332725, "loss": 2.3695, "step": 1128 }, { "epoch": 0.64, "learning_rate": 0.0003043097793651299, "loss": 2.4237, "step": 1129 }, { "epoch": 0.64, "learning_rate": 0.00030346330851624095, "loss": 2.3247, "step": 1130 }, { "epoch": 0.64, "learning_rate": 0.00030261750336377203, "loss": 2.2692, "step": 1131 }, { "epoch": 0.64, "learning_rate": 0.0003017723667725798, "loss": 2.3942, "step": 1132 }, { "epoch": 0.64, "learning_rate": 0.00030092790160525653, "loss": 2.4238, "step": 1133 }, { "epoch": 0.64, "learning_rate": 0.00030008411072212027, "loss": 2.3459, "step": 1134 }, { "epoch": 0.64, "learning_rate": 0.000299240996981205, "loss": 2.3385, "step": 1135 }, { "epoch": 0.65, "learning_rate": 0.0002983985632382514, "loss": 2.4455, "step": 1136 }, { "epoch": 0.65, "learning_rate": 0.0002975568123466966, "loss": 2.3829, "step": 1137 }, { "epoch": 0.65, "learning_rate": 0.00029671574715766524, "loss": 2.2501, "step": 1138 }, { "epoch": 0.65, "learning_rate": 0.000295875370519959, "loss": 2.3536, "step": 1139 }, { "epoch": 0.65, "learning_rate": 0.0002950356852800475, "loss": 2.2893, "step": 1140 }, { "epoch": 0.65, "learning_rate": 0.00029419669428205853, "loss": 2.3131, "step": 1141 }, { "epoch": 0.65, "learning_rate": 0.00029335840036776836, "loss": 2.3992, "step": 1142 }, { "epoch": 0.65, "learning_rate": 0.000292520806376592, "loss": 2.4069, "step": 1143 }, { "epoch": 0.65, "learning_rate": 0.000291683915145574, "loss": 2.3701, "step": 1144 }, { "epoch": 0.65, "learning_rate": 0.0002908477295093784, "loss": 2.4106, "step": 1145 }, { "epoch": 0.65, "learning_rate": 0.0002900122523002792, "loss": 2.3763, "step": 1146 }, { "epoch": 0.65, "learning_rate": 0.000289177486348151, "loss": 2.385, "step": 1147 }, { "epoch": 0.65, "learning_rate": 0.00028834343448045936, "loss": 2.433, "step": 1148 }, { "epoch": 0.65, "learning_rate": 0.00028751009952225116, "loss": 2.338, "step": 1149 }, { "epoch": 0.65, "learning_rate": 0.0002866774842961445, "loss": 2.3869, "step": 1150 }, { "epoch": 0.65, "learning_rate": 0.00028584559162232054, "loss": 2.4237, "step": 1151 }, { "epoch": 0.65, "learning_rate": 0.000285014424318512, "loss": 2.4219, "step": 1152 }, { "epoch": 0.66, "learning_rate": 0.0002841839851999958, "loss": 2.3598, "step": 1153 }, { "epoch": 0.66, "learning_rate": 0.00028335427707958116, "loss": 2.3515, "step": 1154 }, { "epoch": 0.66, "learning_rate": 0.0002825253027676026, "loss": 2.37, "step": 1155 }, { "epoch": 0.66, "learning_rate": 0.00028169706507190806, "loss": 2.3696, "step": 1156 }, { "epoch": 0.66, "learning_rate": 0.00028086956679785127, "loss": 2.396, "step": 1157 }, { "epoch": 0.66, "learning_rate": 0.0002800428107482806, "loss": 2.4012, "step": 1158 }, { "epoch": 0.66, "learning_rate": 0.0002792167997235313, "loss": 2.3801, "step": 1159 }, { "epoch": 0.66, "learning_rate": 0.0002783915365214147, "loss": 2.3561, "step": 1160 }, { "epoch": 0.66, "learning_rate": 0.00027756702393720876, "loss": 2.3585, "step": 1161 }, { "epoch": 0.66, "learning_rate": 0.00027674326476364995, "loss": 2.394, "step": 1162 }, { "epoch": 0.66, "learning_rate": 0.00027592026179092176, "loss": 2.3956, "step": 1163 }, { "epoch": 0.66, "learning_rate": 0.0002750980178066472, "loss": 2.3338, "step": 1164 }, { "epoch": 0.66, "learning_rate": 0.00027427653559587776, "loss": 2.4041, "step": 1165 }, { "epoch": 0.66, "learning_rate": 0.00027345581794108555, "loss": 2.2587, "step": 1166 }, { "epoch": 0.66, "learning_rate": 0.000272635867622152, "loss": 2.3868, "step": 1167 }, { "epoch": 0.66, "learning_rate": 0.00027181668741636046, "loss": 2.354, "step": 1168 }, { "epoch": 0.66, "learning_rate": 0.000270998280098385, "loss": 2.2642, "step": 1169 }, { "epoch": 0.66, "learning_rate": 0.00027018064844028244, "loss": 2.4138, "step": 1170 }, { "epoch": 0.67, "learning_rate": 0.00026936379521148184, "loss": 2.3258, "step": 1171 }, { "epoch": 0.67, "learning_rate": 0.0002685477231787761, "loss": 2.4207, "step": 1172 }, { "epoch": 0.67, "learning_rate": 0.00026773243510631146, "loss": 2.3677, "step": 1173 }, { "epoch": 0.67, "learning_rate": 0.0002669179337555794, "loss": 2.3905, "step": 1174 }, { "epoch": 0.67, "learning_rate": 0.0002661042218854063, "loss": 2.3724, "step": 1175 }, { "epoch": 0.67, "learning_rate": 0.0002652913022519449, "loss": 2.3516, "step": 1176 }, { "epoch": 0.67, "learning_rate": 0.0002644791776086638, "loss": 2.3496, "step": 1177 }, { "epoch": 0.67, "learning_rate": 0.0002636678507063397, "loss": 2.307, "step": 1178 }, { "epoch": 0.67, "learning_rate": 0.0002628573242930463, "loss": 2.402, "step": 1179 }, { "epoch": 0.67, "learning_rate": 0.000262047601114147, "loss": 2.3445, "step": 1180 }, { "epoch": 0.67, "learning_rate": 0.0002612386839122834, "loss": 2.3665, "step": 1181 }, { "epoch": 0.67, "learning_rate": 0.0002604305754273684, "loss": 2.3387, "step": 1182 }, { "epoch": 0.67, "learning_rate": 0.00025962327839657435, "loss": 2.3526, "step": 1183 }, { "epoch": 0.67, "learning_rate": 0.00025881679555432623, "loss": 2.4376, "step": 1184 }, { "epoch": 0.67, "learning_rate": 0.0002580111296322904, "loss": 2.4212, "step": 1185 }, { "epoch": 0.67, "learning_rate": 0.0002572062833593669, "loss": 2.3326, "step": 1186 }, { "epoch": 0.67, "learning_rate": 0.00025640225946167895, "loss": 2.3383, "step": 1187 }, { "epoch": 0.67, "learning_rate": 0.00025559906066256467, "loss": 2.4154, "step": 1188 }, { "epoch": 0.68, "learning_rate": 0.000254796689682567, "loss": 2.3926, "step": 1189 }, { "epoch": 0.68, "learning_rate": 0.0002539951492394256, "loss": 2.3123, "step": 1190 }, { "epoch": 0.68, "learning_rate": 0.0002531944420480662, "loss": 2.2899, "step": 1191 }, { "epoch": 0.68, "learning_rate": 0.00025239457082059297, "loss": 2.3844, "step": 1192 }, { "epoch": 0.68, "learning_rate": 0.00025159553826627825, "loss": 2.3151, "step": 1193 }, { "epoch": 0.68, "learning_rate": 0.00025079734709155323, "loss": 2.4088, "step": 1194 }, { "epoch": 0.68, "learning_rate": 0.0002500000000000001, "loss": 2.4768, "step": 1195 }, { "epoch": 0.68, "learning_rate": 0.00024920349969234114, "loss": 2.3978, "step": 1196 }, { "epoch": 0.68, "learning_rate": 0.0002484078488664313, "loss": 2.3491, "step": 1197 }, { "epoch": 0.68, "learning_rate": 0.00024761305021724734, "loss": 2.3269, "step": 1198 }, { "epoch": 0.68, "learning_rate": 0.0002468191064368805, "loss": 2.3558, "step": 1199 }, { "epoch": 0.68, "learning_rate": 0.0002460260202145256, "loss": 2.3755, "step": 1200 }, { "epoch": 0.68, "learning_rate": 0.00024523379423647333, "loss": 2.3858, "step": 1201 }, { "epoch": 0.68, "learning_rate": 0.0002444424311861006, "loss": 2.3476, "step": 1202 }, { "epoch": 0.68, "learning_rate": 0.00024365193374386148, "loss": 2.3317, "step": 1203 }, { "epoch": 0.68, "learning_rate": 0.00024286230458727754, "loss": 2.369, "step": 1204 }, { "epoch": 0.68, "learning_rate": 0.00024207354639093026, "loss": 2.277, "step": 1205 }, { "epoch": 0.69, "learning_rate": 0.00024128566182645024, "loss": 2.3561, "step": 1206 }, { "epoch": 0.69, "learning_rate": 0.00024049865356250955, "loss": 2.3518, "step": 1207 }, { "epoch": 0.69, "learning_rate": 0.00023971252426481166, "loss": 2.3853, "step": 1208 }, { "epoch": 0.69, "learning_rate": 0.00023892727659608338, "loss": 2.3574, "step": 1209 }, { "epoch": 0.69, "learning_rate": 0.00023814291321606462, "loss": 2.2977, "step": 1210 }, { "epoch": 0.69, "learning_rate": 0.00023735943678150097, "loss": 2.4343, "step": 1211 }, { "epoch": 0.69, "learning_rate": 0.00023657684994613282, "loss": 2.3817, "step": 1212 }, { "epoch": 0.69, "learning_rate": 0.00023579515536068836, "loss": 2.4122, "step": 1213 }, { "epoch": 0.69, "learning_rate": 0.00023501435567287277, "loss": 2.3791, "step": 1214 }, { "epoch": 0.69, "learning_rate": 0.0002342344535273608, "loss": 2.3868, "step": 1215 }, { "epoch": 0.69, "learning_rate": 0.0002334554515657863, "loss": 2.4097, "step": 1216 }, { "epoch": 0.69, "learning_rate": 0.000232677352426735, "loss": 2.316, "step": 1217 }, { "epoch": 0.69, "learning_rate": 0.00023190015874573373, "loss": 2.3906, "step": 1218 }, { "epoch": 0.69, "learning_rate": 0.00023112387315524336, "loss": 2.3863, "step": 1219 }, { "epoch": 0.69, "learning_rate": 0.00023034849828464787, "loss": 2.3612, "step": 1220 }, { "epoch": 0.69, "learning_rate": 0.00022957403676024762, "loss": 2.3062, "step": 1221 }, { "epoch": 0.69, "learning_rate": 0.00022880049120524833, "loss": 2.3007, "step": 1222 }, { "epoch": 0.69, "learning_rate": 0.0002280278642397541, "loss": 2.2701, "step": 1223 }, { "epoch": 0.7, "learning_rate": 0.0002272561584807567, "loss": 2.4067, "step": 1224 }, { "epoch": 0.7, "learning_rate": 0.0002264853765421287, "loss": 2.339, "step": 1225 }, { "epoch": 0.7, "learning_rate": 0.0002257155210346124, "loss": 2.314, "step": 1226 }, { "epoch": 0.7, "learning_rate": 0.0002249465945658135, "loss": 2.3595, "step": 1227 }, { "epoch": 0.7, "learning_rate": 0.00022417859974018967, "loss": 2.2792, "step": 1228 }, { "epoch": 0.7, "learning_rate": 0.00022341153915904387, "loss": 2.3722, "step": 1229 }, { "epoch": 0.7, "learning_rate": 0.00022264541542051398, "loss": 2.3774, "step": 1230 }, { "epoch": 0.7, "learning_rate": 0.00022188023111956517, "loss": 2.2806, "step": 1231 }, { "epoch": 0.7, "learning_rate": 0.00022111598884798022, "loss": 2.3556, "step": 1232 }, { "epoch": 0.7, "learning_rate": 0.00022035269119435164, "loss": 2.3964, "step": 1233 }, { "epoch": 0.7, "learning_rate": 0.00021959034074407163, "loss": 2.3563, "step": 1234 }, { "epoch": 0.7, "learning_rate": 0.00021882894007932492, "loss": 2.3775, "step": 1235 }, { "epoch": 0.7, "learning_rate": 0.00021806849177907845, "loss": 2.3476, "step": 1236 }, { "epoch": 0.7, "learning_rate": 0.0002173089984190739, "loss": 2.3872, "step": 1237 }, { "epoch": 0.7, "learning_rate": 0.00021655046257181842, "loss": 2.2585, "step": 1238 }, { "epoch": 0.7, "learning_rate": 0.00021579288680657538, "loss": 2.3441, "step": 1239 }, { "epoch": 0.7, "learning_rate": 0.00021503627368935703, "loss": 2.3755, "step": 1240 }, { "epoch": 0.71, "learning_rate": 0.00021428062578291414, "loss": 2.3433, "step": 1241 }, { "epoch": 0.71, "learning_rate": 0.00021352594564672905, "loss": 2.3281, "step": 1242 }, { "epoch": 0.71, "learning_rate": 0.00021277223583700527, "loss": 2.3531, "step": 1243 }, { "epoch": 0.71, "learning_rate": 0.0002120194989066605, "loss": 2.3857, "step": 1244 }, { "epoch": 0.71, "learning_rate": 0.0002112677374053164, "loss": 2.3441, "step": 1245 }, { "epoch": 0.71, "learning_rate": 0.00021051695387929153, "loss": 2.3564, "step": 1246 }, { "epoch": 0.71, "learning_rate": 0.00020976715087159104, "loss": 2.396, "step": 1247 }, { "epoch": 0.71, "learning_rate": 0.00020901833092189982, "loss": 2.3913, "step": 1248 }, { "epoch": 0.71, "learning_rate": 0.0002082704965665721, "loss": 2.3628, "step": 1249 }, { "epoch": 0.71, "learning_rate": 0.00020752365033862463, "loss": 2.288, "step": 1250 }, { "epoch": 0.71, "learning_rate": 0.00020677779476772651, "loss": 2.3476, "step": 1251 }, { "epoch": 0.71, "learning_rate": 0.00020603293238019183, "loss": 2.3533, "step": 1252 }, { "epoch": 0.71, "learning_rate": 0.00020528906569897043, "loss": 2.281, "step": 1253 }, { "epoch": 0.71, "learning_rate": 0.00020454619724363994, "loss": 2.4064, "step": 1254 }, { "epoch": 0.71, "learning_rate": 0.00020380432953039603, "loss": 2.4079, "step": 1255 }, { "epoch": 0.71, "learning_rate": 0.00020306346507204564, "loss": 2.3546, "step": 1256 }, { "epoch": 0.71, "learning_rate": 0.00020232360637799685, "loss": 2.3198, "step": 1257 }, { "epoch": 0.71, "learning_rate": 0.00020158475595425158, "loss": 2.3119, "step": 1258 }, { "epoch": 0.72, "learning_rate": 0.00020084691630339614, "loss": 2.3479, "step": 1259 }, { "epoch": 0.72, "learning_rate": 0.00020011008992459374, "loss": 2.3278, "step": 1260 }, { "epoch": 0.72, "learning_rate": 0.0001993742793135749, "loss": 2.3611, "step": 1261 }, { "epoch": 0.72, "learning_rate": 0.0001986394869626303, "loss": 2.2788, "step": 1262 }, { "epoch": 0.72, "learning_rate": 0.00019790571536060087, "loss": 2.3928, "step": 1263 }, { "epoch": 0.72, "learning_rate": 0.00019717296699287086, "loss": 2.3541, "step": 1264 }, { "epoch": 0.72, "learning_rate": 0.00019644124434135802, "loss": 2.3972, "step": 1265 }, { "epoch": 0.72, "learning_rate": 0.00019571054988450648, "loss": 2.4321, "step": 1266 }, { "epoch": 0.72, "learning_rate": 0.0001949808860972771, "loss": 2.3616, "step": 1267 }, { "epoch": 0.72, "learning_rate": 0.0001942522554511404, "loss": 2.3426, "step": 1268 }, { "epoch": 0.72, "learning_rate": 0.00019352466041406684, "loss": 2.309, "step": 1269 }, { "epoch": 0.72, "learning_rate": 0.00019279810345051957, "loss": 2.3185, "step": 1270 }, { "epoch": 0.72, "learning_rate": 0.0001920725870214458, "loss": 2.3605, "step": 1271 }, { "epoch": 0.72, "learning_rate": 0.00019134811358426757, "loss": 2.2825, "step": 1272 }, { "epoch": 0.72, "learning_rate": 0.0001906246855928751, "loss": 2.3287, "step": 1273 }, { "epoch": 0.72, "learning_rate": 0.00018990230549761666, "loss": 2.3759, "step": 1274 }, { "epoch": 0.72, "learning_rate": 0.0001891809757452919, "loss": 2.3883, "step": 1275 }, { "epoch": 0.72, "learning_rate": 0.00018846069877914224, "loss": 2.3734, "step": 1276 }, { "epoch": 0.73, "learning_rate": 0.00018774147703884365, "loss": 2.3386, "step": 1277 }, { "epoch": 0.73, "learning_rate": 0.00018702331296049742, "loss": 2.3961, "step": 1278 }, { "epoch": 0.73, "learning_rate": 0.00018630620897662275, "loss": 2.305, "step": 1279 }, { "epoch": 0.73, "learning_rate": 0.0001855901675161481, "loss": 2.2302, "step": 1280 }, { "epoch": 0.73, "learning_rate": 0.00018487519100440314, "loss": 2.3607, "step": 1281 }, { "epoch": 0.73, "learning_rate": 0.00018416128186310988, "loss": 2.405, "step": 1282 }, { "epoch": 0.73, "learning_rate": 0.00018344844251037574, "loss": 2.3226, "step": 1283 }, { "epoch": 0.73, "learning_rate": 0.0001827366753606839, "loss": 2.4195, "step": 1284 }, { "epoch": 0.73, "learning_rate": 0.00018202598282488652, "loss": 2.3763, "step": 1285 }, { "epoch": 0.73, "learning_rate": 0.0001813163673101953, "loss": 2.3062, "step": 1286 }, { "epoch": 0.73, "learning_rate": 0.00018060783122017448, "loss": 2.3832, "step": 1287 }, { "epoch": 0.73, "learning_rate": 0.00017990037695473154, "loss": 2.2896, "step": 1288 }, { "epoch": 0.73, "learning_rate": 0.00017919400691011035, "loss": 2.3387, "step": 1289 }, { "epoch": 0.73, "learning_rate": 0.00017848872347888162, "loss": 2.357, "step": 1290 }, { "epoch": 0.73, "learning_rate": 0.0001777845290499363, "loss": 2.2424, "step": 1291 }, { "epoch": 0.73, "learning_rate": 0.0001770814260084761, "loss": 2.4304, "step": 1292 }, { "epoch": 0.73, "learning_rate": 0.00017637941673600665, "loss": 2.2946, "step": 1293 }, { "epoch": 0.74, "learning_rate": 0.0001756785036103282, "loss": 2.3486, "step": 1294 }, { "epoch": 0.74, "learning_rate": 0.00017497868900552888, "loss": 2.3464, "step": 1295 }, { "epoch": 0.74, "learning_rate": 0.0001742799752919753, "loss": 2.3703, "step": 1296 }, { "epoch": 0.74, "learning_rate": 0.0001735823648363059, "loss": 2.3988, "step": 1297 }, { "epoch": 0.74, "learning_rate": 0.00017288586000142149, "loss": 2.4077, "step": 1298 }, { "epoch": 0.74, "learning_rate": 0.00017219046314647873, "loss": 2.2285, "step": 1299 }, { "epoch": 0.74, "learning_rate": 0.00017149617662688072, "loss": 2.3668, "step": 1300 }, { "epoch": 0.74, "learning_rate": 0.00017080300279427035, "loss": 2.3386, "step": 1301 }, { "epoch": 0.74, "learning_rate": 0.00017011094399652104, "loss": 2.3591, "step": 1302 }, { "epoch": 0.74, "learning_rate": 0.00016942000257772998, "loss": 2.2742, "step": 1303 }, { "epoch": 0.74, "learning_rate": 0.0001687301808782095, "loss": 2.2877, "step": 1304 }, { "epoch": 0.74, "learning_rate": 0.00016804148123447898, "loss": 2.3301, "step": 1305 }, { "epoch": 0.74, "learning_rate": 0.00016735390597925765, "loss": 2.2378, "step": 1306 }, { "epoch": 0.74, "learning_rate": 0.00016666745744145617, "loss": 2.3547, "step": 1307 }, { "epoch": 0.74, "learning_rate": 0.00016598213794616863, "loss": 2.3288, "step": 1308 }, { "epoch": 0.74, "learning_rate": 0.00016529794981466527, "loss": 2.3687, "step": 1309 }, { "epoch": 0.74, "learning_rate": 0.00016461489536438378, "loss": 2.3908, "step": 1310 }, { "epoch": 0.74, "learning_rate": 0.00016393297690892256, "loss": 2.4032, "step": 1311 }, { "epoch": 0.75, "learning_rate": 0.0001632521967580315, "loss": 2.3021, "step": 1312 }, { "epoch": 0.75, "learning_rate": 0.00016257255721760538, "loss": 2.2437, "step": 1313 }, { "epoch": 0.75, "learning_rate": 0.00016189406058967577, "loss": 2.4009, "step": 1314 }, { "epoch": 0.75, "learning_rate": 0.00016121670917240228, "loss": 2.4062, "step": 1315 }, { "epoch": 0.75, "learning_rate": 0.00016054050526006635, "loss": 2.2743, "step": 1316 }, { "epoch": 0.75, "learning_rate": 0.000159865451143062, "loss": 2.3105, "step": 1317 }, { "epoch": 0.75, "learning_rate": 0.0001591915491078894, "loss": 2.369, "step": 1318 }, { "epoch": 0.75, "learning_rate": 0.00015851880143714575, "loss": 2.3745, "step": 1319 }, { "epoch": 0.75, "learning_rate": 0.00015784721040951899, "loss": 2.3623, "step": 1320 }, { "epoch": 0.75, "learning_rate": 0.0001571767782997786, "loss": 2.3515, "step": 1321 }, { "epoch": 0.75, "learning_rate": 0.00015650750737876935, "loss": 2.3739, "step": 1322 }, { "epoch": 0.75, "learning_rate": 0.0001558393999134023, "loss": 2.371, "step": 1323 }, { "epoch": 0.75, "learning_rate": 0.00015517245816664834, "loss": 2.2585, "step": 1324 }, { "epoch": 0.75, "learning_rate": 0.00015450668439752924, "loss": 2.3359, "step": 1325 }, { "epoch": 0.75, "learning_rate": 0.0001538420808611114, "loss": 2.4117, "step": 1326 }, { "epoch": 0.75, "learning_rate": 0.00015317864980849666, "loss": 2.3056, "step": 1327 }, { "epoch": 0.75, "learning_rate": 0.0001525163934868164, "loss": 2.3315, "step": 1328 }, { "epoch": 0.76, "learning_rate": 0.00015185531413922215, "loss": 2.3256, "step": 1329 }, { "epoch": 0.76, "learning_rate": 0.0001511954140048797, "loss": 2.3463, "step": 1330 }, { "epoch": 0.76, "learning_rate": 0.0001505366953189597, "loss": 2.3142, "step": 1331 }, { "epoch": 0.76, "learning_rate": 0.00014987916031263232, "loss": 2.317, "step": 1332 }, { "epoch": 0.76, "learning_rate": 0.00014922281121305736, "loss": 2.4335, "step": 1333 }, { "epoch": 0.76, "learning_rate": 0.00014856765024337843, "loss": 2.3708, "step": 1334 }, { "epoch": 0.76, "learning_rate": 0.00014791367962271423, "loss": 2.299, "step": 1335 }, { "epoch": 0.76, "learning_rate": 0.00014726090156615235, "loss": 2.3126, "step": 1336 }, { "epoch": 0.76, "learning_rate": 0.0001466093182847401, "loss": 2.3929, "step": 1337 }, { "epoch": 0.76, "learning_rate": 0.00014595893198547888, "loss": 2.3254, "step": 1338 }, { "epoch": 0.76, "learning_rate": 0.00014530974487131482, "loss": 2.3279, "step": 1339 }, { "epoch": 0.76, "learning_rate": 0.00014466175914113304, "loss": 2.3223, "step": 1340 }, { "epoch": 0.76, "learning_rate": 0.00014401497698974874, "loss": 2.3211, "step": 1341 }, { "epoch": 0.76, "learning_rate": 0.00014336940060790117, "loss": 2.3453, "step": 1342 }, { "epoch": 0.76, "learning_rate": 0.00014272503218224454, "loss": 2.4381, "step": 1343 }, { "epoch": 0.76, "learning_rate": 0.00014208187389534255, "loss": 2.3102, "step": 1344 }, { "epoch": 0.76, "learning_rate": 0.00014143992792565917, "loss": 2.3664, "step": 1345 }, { "epoch": 0.76, "learning_rate": 0.00014079919644755258, "loss": 2.3132, "step": 1346 }, { "epoch": 0.77, "learning_rate": 0.0001401596816312673, "loss": 2.3598, "step": 1347 }, { "epoch": 0.77, "learning_rate": 0.00013952138564292643, "loss": 2.404, "step": 1348 }, { "epoch": 0.77, "learning_rate": 0.00013888431064452527, "loss": 2.4006, "step": 1349 }, { "epoch": 0.77, "learning_rate": 0.00013824845879392301, "loss": 2.3513, "step": 1350 }, { "epoch": 0.77, "learning_rate": 0.0001376138322448363, "loss": 2.2951, "step": 1351 }, { "epoch": 0.77, "learning_rate": 0.00013698043314683107, "loss": 2.3619, "step": 1352 }, { "epoch": 0.77, "learning_rate": 0.00013634826364531617, "loss": 2.3567, "step": 1353 }, { "epoch": 0.77, "learning_rate": 0.00013571732588153512, "loss": 2.324, "step": 1354 }, { "epoch": 0.77, "learning_rate": 0.00013508762199256004, "loss": 2.2771, "step": 1355 }, { "epoch": 0.77, "learning_rate": 0.00013445915411128295, "loss": 2.3974, "step": 1356 }, { "epoch": 0.77, "learning_rate": 0.00013383192436641, "loss": 2.4326, "step": 1357 }, { "epoch": 0.77, "learning_rate": 0.00013320593488245336, "loss": 2.3954, "step": 1358 }, { "epoch": 0.77, "learning_rate": 0.0001325811877797245, "loss": 2.3697, "step": 1359 }, { "epoch": 0.77, "learning_rate": 0.00013195768517432611, "loss": 2.4528, "step": 1360 }, { "epoch": 0.77, "learning_rate": 0.0001313354291781465, "loss": 2.2912, "step": 1361 }, { "epoch": 0.77, "learning_rate": 0.0001307144218988507, "loss": 2.2383, "step": 1362 }, { "epoch": 0.77, "learning_rate": 0.00013009466543987487, "loss": 2.3279, "step": 1363 }, { "epoch": 0.77, "learning_rate": 0.00012947616190041783, "loss": 2.3716, "step": 1364 }, { "epoch": 0.78, "learning_rate": 0.00012885891337543538, "loss": 2.3219, "step": 1365 }, { "epoch": 0.78, "learning_rate": 0.00012824292195563148, "loss": 2.4057, "step": 1366 }, { "epoch": 0.78, "learning_rate": 0.00012762818972745292, "loss": 2.3609, "step": 1367 }, { "epoch": 0.78, "learning_rate": 0.0001270147187730809, "loss": 2.3141, "step": 1368 }, { "epoch": 0.78, "learning_rate": 0.0001264025111704249, "loss": 2.3393, "step": 1369 }, { "epoch": 0.78, "learning_rate": 0.00012579156899311485, "loss": 2.291, "step": 1370 }, { "epoch": 0.78, "learning_rate": 0.00012518189431049497, "loss": 2.3709, "step": 1371 }, { "epoch": 0.78, "learning_rate": 0.00012457348918761585, "loss": 2.2204, "step": 1372 }, { "epoch": 0.78, "learning_rate": 0.00012396635568522835, "loss": 2.3543, "step": 1373 }, { "epoch": 0.78, "learning_rate": 0.00012336049585977567, "loss": 2.3487, "step": 1374 }, { "epoch": 0.78, "learning_rate": 0.00012275591176338753, "loss": 2.3581, "step": 1375 }, { "epoch": 0.78, "learning_rate": 0.00012215260544387187, "loss": 2.3577, "step": 1376 }, { "epoch": 0.78, "learning_rate": 0.00012155057894470928, "loss": 2.3512, "step": 1377 }, { "epoch": 0.78, "learning_rate": 0.00012094983430504491, "loss": 2.2911, "step": 1378 }, { "epoch": 0.78, "learning_rate": 0.00012035037355968259, "loss": 2.3251, "step": 1379 }, { "epoch": 0.78, "learning_rate": 0.00011975219873907678, "loss": 2.2616, "step": 1380 }, { "epoch": 0.78, "learning_rate": 0.00011915531186932688, "loss": 2.3803, "step": 1381 }, { "epoch": 0.79, "learning_rate": 0.00011855971497216983, "loss": 2.3612, "step": 1382 }, { "epoch": 0.79, "learning_rate": 0.00011796541006497269, "loss": 2.3583, "step": 1383 }, { "epoch": 0.79, "learning_rate": 0.00011737239916072695, "loss": 2.3789, "step": 1384 }, { "epoch": 0.79, "learning_rate": 0.00011678068426804106, "loss": 2.457, "step": 1385 }, { "epoch": 0.79, "learning_rate": 0.00011619026739113331, "loss": 2.2434, "step": 1386 }, { "epoch": 0.79, "learning_rate": 0.00011560115052982606, "loss": 2.2616, "step": 1387 }, { "epoch": 0.79, "learning_rate": 0.00011501333567953758, "loss": 2.3304, "step": 1388 }, { "epoch": 0.79, "learning_rate": 0.00011442682483127687, "loss": 2.3475, "step": 1389 }, { "epoch": 0.79, "learning_rate": 0.00011384161997163533, "loss": 2.2778, "step": 1390 }, { "epoch": 0.79, "learning_rate": 0.00011325772308278132, "loss": 2.3904, "step": 1391 }, { "epoch": 0.79, "learning_rate": 0.00011267513614245289, "loss": 2.3907, "step": 1392 }, { "epoch": 0.79, "learning_rate": 0.00011209386112395076, "loss": 2.3336, "step": 1393 }, { "epoch": 0.79, "learning_rate": 0.00011151389999613248, "loss": 2.3793, "step": 1394 }, { "epoch": 0.79, "learning_rate": 0.00011093525472340472, "loss": 2.3641, "step": 1395 }, { "epoch": 0.79, "learning_rate": 0.00011035792726571776, "loss": 2.3915, "step": 1396 }, { "epoch": 0.79, "learning_rate": 0.00010978191957855771, "loss": 2.3244, "step": 1397 }, { "epoch": 0.79, "learning_rate": 0.00010920723361294099, "loss": 2.2922, "step": 1398 }, { "epoch": 0.79, "learning_rate": 0.00010863387131540653, "loss": 2.4013, "step": 1399 }, { "epoch": 0.8, "learning_rate": 0.0001080618346280105, "loss": 2.3676, "step": 1400 }, { "epoch": 0.8, "learning_rate": 0.00010749112548831846, "loss": 2.3596, "step": 1401 }, { "epoch": 0.8, "learning_rate": 0.0001069217458293999, "loss": 2.3525, "step": 1402 }, { "epoch": 0.8, "learning_rate": 0.00010635369757982061, "loss": 2.3417, "step": 1403 }, { "epoch": 0.8, "learning_rate": 0.00010578698266363734, "loss": 2.358, "step": 1404 }, { "epoch": 0.8, "learning_rate": 0.00010522160300039007, "loss": 2.3709, "step": 1405 }, { "epoch": 0.8, "learning_rate": 0.00010465756050509661, "loss": 2.4521, "step": 1406 }, { "epoch": 0.8, "learning_rate": 0.00010409485708824506, "loss": 2.4123, "step": 1407 }, { "epoch": 0.8, "learning_rate": 0.00010353349465578859, "loss": 2.3052, "step": 1408 }, { "epoch": 0.8, "learning_rate": 0.00010297347510913746, "loss": 2.3366, "step": 1409 }, { "epoch": 0.8, "learning_rate": 0.00010241480034515404, "loss": 2.3183, "step": 1410 }, { "epoch": 0.8, "learning_rate": 0.00010185747225614539, "loss": 2.3082, "step": 1411 }, { "epoch": 0.8, "learning_rate": 0.00010130149272985756, "loss": 2.37, "step": 1412 }, { "epoch": 0.8, "learning_rate": 0.00010074686364946823, "loss": 2.399, "step": 1413 }, { "epoch": 0.8, "learning_rate": 0.00010019358689358154, "loss": 2.4144, "step": 1414 }, { "epoch": 0.8, "learning_rate": 9.964166433622068e-05, "loss": 2.3796, "step": 1415 }, { "epoch": 0.8, "learning_rate": 9.90910978468224e-05, "loss": 2.3717, "step": 1416 }, { "epoch": 0.81, "learning_rate": 9.854188929022984e-05, "loss": 2.3963, "step": 1417 }, { "epoch": 0.81, "learning_rate": 9.799404052668704e-05, "loss": 2.3167, "step": 1418 }, { "epoch": 0.81, "learning_rate": 9.744755341183181e-05, "loss": 2.4056, "step": 1419 }, { "epoch": 0.81, "learning_rate": 9.690242979669035e-05, "loss": 2.3068, "step": 1420 }, { "epoch": 0.81, "learning_rate": 9.635867152766997e-05, "loss": 2.3482, "step": 1421 }, { "epoch": 0.81, "learning_rate": 9.581628044655394e-05, "loss": 2.3084, "step": 1422 }, { "epoch": 0.81, "learning_rate": 9.527525839049406e-05, "loss": 2.3281, "step": 1423 }, { "epoch": 0.81, "learning_rate": 9.473560719200564e-05, "loss": 2.3111, "step": 1424 }, { "epoch": 0.81, "learning_rate": 9.419732867896047e-05, "loss": 2.3346, "step": 1425 }, { "epoch": 0.81, "learning_rate": 9.366042467458064e-05, "loss": 2.3117, "step": 1426 }, { "epoch": 0.81, "learning_rate": 9.312489699743309e-05, "loss": 2.3598, "step": 1427 }, { "epoch": 0.81, "learning_rate": 9.259074746142238e-05, "loss": 2.3327, "step": 1428 }, { "epoch": 0.81, "learning_rate": 9.205797787578563e-05, "loss": 2.3157, "step": 1429 }, { "epoch": 0.81, "learning_rate": 9.152659004508545e-05, "loss": 2.4291, "step": 1430 }, { "epoch": 0.81, "learning_rate": 9.099658576920466e-05, "loss": 2.3179, "step": 1431 }, { "epoch": 0.81, "learning_rate": 9.046796684333947e-05, "loss": 2.3465, "step": 1432 }, { "epoch": 0.81, "learning_rate": 8.994073505799399e-05, "loss": 2.395, "step": 1433 }, { "epoch": 0.81, "learning_rate": 8.941489219897354e-05, "loss": 2.3563, "step": 1434 }, { "epoch": 0.82, "learning_rate": 8.889044004737939e-05, "loss": 2.3259, "step": 1435 }, { "epoch": 0.82, "learning_rate": 8.836738037960179e-05, "loss": 2.3736, "step": 1436 }, { "epoch": 0.82, "learning_rate": 8.78457149673152e-05, "loss": 2.3209, "step": 1437 }, { "epoch": 0.82, "learning_rate": 8.732544557747073e-05, "loss": 2.3401, "step": 1438 }, { "epoch": 0.82, "learning_rate": 8.680657397229158e-05, "loss": 2.3871, "step": 1439 }, { "epoch": 0.82, "learning_rate": 8.628910190926598e-05, "loss": 2.3419, "step": 1440 }, { "epoch": 0.82, "learning_rate": 8.577303114114227e-05, "loss": 2.3418, "step": 1441 }, { "epoch": 0.82, "learning_rate": 8.525836341592175e-05, "loss": 2.4486, "step": 1442 }, { "epoch": 0.82, "learning_rate": 8.474510047685408e-05, "loss": 2.3244, "step": 1443 }, { "epoch": 0.82, "learning_rate": 8.423324406243016e-05, "loss": 2.3141, "step": 1444 }, { "epoch": 0.82, "learning_rate": 8.372279590637738e-05, "loss": 2.321, "step": 1445 }, { "epoch": 0.82, "learning_rate": 8.321375773765255e-05, "loss": 2.2767, "step": 1446 }, { "epoch": 0.82, "learning_rate": 8.27061312804372e-05, "loss": 2.3465, "step": 1447 }, { "epoch": 0.82, "learning_rate": 8.21999182541307e-05, "loss": 2.3883, "step": 1448 }, { "epoch": 0.82, "learning_rate": 8.169512037334554e-05, "loss": 2.3339, "step": 1449 }, { "epoch": 0.82, "learning_rate": 8.119173934790019e-05, "loss": 2.3466, "step": 1450 }, { "epoch": 0.82, "learning_rate": 8.068977688281482e-05, "loss": 2.3427, "step": 1451 }, { "epoch": 0.82, "learning_rate": 8.018923467830403e-05, "loss": 2.3627, "step": 1452 }, { "epoch": 0.83, "learning_rate": 7.969011442977237e-05, "loss": 2.3335, "step": 1453 }, { "epoch": 0.83, "learning_rate": 7.919241782780756e-05, "loss": 2.2868, "step": 1454 }, { "epoch": 0.83, "learning_rate": 7.869614655817576e-05, "loss": 2.3564, "step": 1455 }, { "epoch": 0.83, "learning_rate": 7.820130230181472e-05, "loss": 2.333, "step": 1456 }, { "epoch": 0.83, "learning_rate": 7.770788673482921e-05, "loss": 2.2877, "step": 1457 }, { "epoch": 0.83, "learning_rate": 7.721590152848473e-05, "loss": 2.3721, "step": 1458 }, { "epoch": 0.83, "learning_rate": 7.672534834920164e-05, "loss": 2.3642, "step": 1459 }, { "epoch": 0.83, "learning_rate": 7.623622885855036e-05, "loss": 2.3208, "step": 1460 }, { "epoch": 0.83, "learning_rate": 7.574854471324461e-05, "loss": 2.3572, "step": 1461 }, { "epoch": 0.83, "learning_rate": 7.526229756513686e-05, "loss": 2.3688, "step": 1462 }, { "epoch": 0.83, "learning_rate": 7.477748906121223e-05, "loss": 2.3688, "step": 1463 }, { "epoch": 0.83, "learning_rate": 7.429412084358261e-05, "loss": 2.319, "step": 1464 }, { "epoch": 0.83, "learning_rate": 7.381219454948196e-05, "loss": 2.3633, "step": 1465 }, { "epoch": 0.83, "learning_rate": 7.333171181125975e-05, "loss": 2.3446, "step": 1466 }, { "epoch": 0.83, "learning_rate": 7.285267425637621e-05, "loss": 2.295, "step": 1467 }, { "epoch": 0.83, "learning_rate": 7.237508350739663e-05, "loss": 2.3123, "step": 1468 }, { "epoch": 0.83, "learning_rate": 7.189894118198542e-05, "loss": 2.3276, "step": 1469 }, { "epoch": 0.84, "learning_rate": 7.142424889290139e-05, "loss": 2.3237, "step": 1470 }, { "epoch": 0.84, "learning_rate": 7.095100824799145e-05, "loss": 2.3258, "step": 1471 }, { "epoch": 0.84, "learning_rate": 7.047922085018604e-05, "loss": 2.3153, "step": 1472 }, { "epoch": 0.84, "learning_rate": 7.00088882974929e-05, "loss": 2.3657, "step": 1473 }, { "epoch": 0.84, "learning_rate": 6.954001218299227e-05, "loss": 2.3485, "step": 1474 }, { "epoch": 0.84, "learning_rate": 6.907259409483097e-05, "loss": 2.3716, "step": 1475 }, { "epoch": 0.84, "learning_rate": 6.860663561621766e-05, "loss": 2.3926, "step": 1476 }, { "epoch": 0.84, "learning_rate": 6.814213832541655e-05, "loss": 2.3702, "step": 1477 }, { "epoch": 0.84, "learning_rate": 6.767910379574332e-05, "loss": 2.3756, "step": 1478 }, { "epoch": 0.84, "learning_rate": 6.721753359555833e-05, "loss": 2.3763, "step": 1479 }, { "epoch": 0.84, "learning_rate": 6.675742928826273e-05, "loss": 2.373, "step": 1480 }, { "epoch": 0.84, "learning_rate": 6.629879243229198e-05, "loss": 2.2572, "step": 1481 }, { "epoch": 0.84, "learning_rate": 6.584162458111148e-05, "loss": 2.3411, "step": 1482 }, { "epoch": 0.84, "learning_rate": 6.538592728321053e-05, "loss": 2.3552, "step": 1483 }, { "epoch": 0.84, "learning_rate": 6.493170208209781e-05, "loss": 2.4024, "step": 1484 }, { "epoch": 0.84, "learning_rate": 6.447895051629549e-05, "loss": 2.3138, "step": 1485 }, { "epoch": 0.84, "learning_rate": 6.402767411933475e-05, "loss": 2.3135, "step": 1486 }, { "epoch": 0.84, "learning_rate": 6.357787441974967e-05, "loss": 2.271, "step": 1487 }, { "epoch": 0.85, "learning_rate": 6.312955294107303e-05, "loss": 2.3715, "step": 1488 }, { "epoch": 0.85, "learning_rate": 6.268271120183022e-05, "loss": 2.3508, "step": 1489 }, { "epoch": 0.85, "learning_rate": 6.22373507155351e-05, "loss": 2.3682, "step": 1490 }, { "epoch": 0.85, "learning_rate": 6.179347299068378e-05, "loss": 2.361, "step": 1491 }, { "epoch": 0.85, "learning_rate": 6.135107953075048e-05, "loss": 2.2785, "step": 1492 }, { "epoch": 0.85, "learning_rate": 6.091017183418157e-05, "loss": 2.3612, "step": 1493 }, { "epoch": 0.85, "learning_rate": 6.047075139439151e-05, "loss": 2.3511, "step": 1494 }, { "epoch": 0.85, "learning_rate": 6.00328196997566e-05, "loss": 2.3548, "step": 1495 }, { "epoch": 0.85, "learning_rate": 5.9596378233611135e-05, "loss": 2.257, "step": 1496 }, { "epoch": 0.85, "learning_rate": 5.9161428474241274e-05, "loss": 2.2737, "step": 1497 }, { "epoch": 0.85, "learning_rate": 5.872797189488105e-05, "loss": 2.3026, "step": 1498 }, { "epoch": 0.85, "learning_rate": 5.829600996370649e-05, "loss": 2.2523, "step": 1499 }, { "epoch": 0.85, "learning_rate": 5.7865544143831274e-05, "loss": 2.2814, "step": 1500 }, { "epoch": 0.85, "learning_rate": 5.743657589330159e-05, "loss": 2.2883, "step": 1501 }, { "epoch": 0.85, "learning_rate": 5.700910666509096e-05, "loss": 2.2735, "step": 1502 }, { "epoch": 0.85, "learning_rate": 5.6583137907095686e-05, "loss": 2.3183, "step": 1503 }, { "epoch": 0.85, "learning_rate": 5.615867106212957e-05, "loss": 2.3637, "step": 1504 }, { "epoch": 0.86, "learning_rate": 5.573570756791957e-05, "loss": 2.2044, "step": 1505 }, { "epoch": 0.86, "learning_rate": 5.531424885710012e-05, "loss": 2.3988, "step": 1506 }, { "epoch": 0.86, "learning_rate": 5.4894296357209274e-05, "loss": 2.3839, "step": 1507 }, { "epoch": 0.86, "learning_rate": 5.4475851490682904e-05, "loss": 2.3843, "step": 1508 }, { "epoch": 0.86, "learning_rate": 5.4058915674850664e-05, "loss": 2.3209, "step": 1509 }, { "epoch": 0.86, "learning_rate": 5.36434903219305e-05, "loss": 2.3541, "step": 1510 }, { "epoch": 0.86, "learning_rate": 5.322957683902457e-05, "loss": 2.3996, "step": 1511 }, { "epoch": 0.86, "learning_rate": 5.2817176628113807e-05, "loss": 2.3179, "step": 1512 }, { "epoch": 0.86, "learning_rate": 5.2406291086053826e-05, "loss": 2.3432, "step": 1513 }, { "epoch": 0.86, "learning_rate": 5.199692160456948e-05, "loss": 2.367, "step": 1514 }, { "epoch": 0.86, "learning_rate": 5.158906957025078e-05, "loss": 2.368, "step": 1515 }, { "epoch": 0.86, "learning_rate": 5.118273636454784e-05, "loss": 2.3652, "step": 1516 }, { "epoch": 0.86, "learning_rate": 5.077792336376647e-05, "loss": 2.3421, "step": 1517 }, { "epoch": 0.86, "learning_rate": 5.0374631939062943e-05, "loss": 2.3421, "step": 1518 }, { "epoch": 0.86, "learning_rate": 4.99728634564402e-05, "loss": 2.2835, "step": 1519 }, { "epoch": 0.86, "learning_rate": 4.9572619276742393e-05, "loss": 2.3785, "step": 1520 }, { "epoch": 0.86, "learning_rate": 4.917390075565098e-05, "loss": 2.3257, "step": 1521 }, { "epoch": 0.86, "learning_rate": 4.8776709243679495e-05, "loss": 2.3212, "step": 1522 }, { "epoch": 0.87, "learning_rate": 4.838104608616961e-05, "loss": 2.3864, "step": 1523 }, { "epoch": 0.87, "learning_rate": 4.798691262328586e-05, "loss": 2.3235, "step": 1524 }, { "epoch": 0.87, "learning_rate": 4.759431019001198e-05, "loss": 2.3822, "step": 1525 }, { "epoch": 0.87, "learning_rate": 4.720324011614535e-05, "loss": 2.3212, "step": 1526 }, { "epoch": 0.87, "learning_rate": 4.6813703726293676e-05, "loss": 2.451, "step": 1527 }, { "epoch": 0.87, "learning_rate": 4.642570233986915e-05, "loss": 2.3598, "step": 1528 }, { "epoch": 0.87, "learning_rate": 4.6039237271085386e-05, "loss": 2.2875, "step": 1529 }, { "epoch": 0.87, "learning_rate": 4.565430982895175e-05, "loss": 2.4021, "step": 1530 }, { "epoch": 0.87, "learning_rate": 4.52709213172699e-05, "loss": 2.3524, "step": 1531 }, { "epoch": 0.87, "learning_rate": 4.488907303462841e-05, "loss": 2.3779, "step": 1532 }, { "epoch": 0.87, "learning_rate": 4.4508766274399424e-05, "loss": 2.3639, "step": 1533 }, { "epoch": 0.87, "learning_rate": 4.4130002324733355e-05, "loss": 2.3627, "step": 1534 }, { "epoch": 0.87, "learning_rate": 4.375278246855513e-05, "loss": 2.3944, "step": 1535 }, { "epoch": 0.87, "learning_rate": 4.3377107983559596e-05, "loss": 2.2929, "step": 1536 }, { "epoch": 0.87, "learning_rate": 4.300298014220699e-05, "loss": 2.3835, "step": 1537 }, { "epoch": 0.87, "learning_rate": 4.2630400211719264e-05, "loss": 2.2726, "step": 1538 }, { "epoch": 0.87, "learning_rate": 4.2259369454074935e-05, "loss": 2.312, "step": 1539 }, { "epoch": 0.87, "learning_rate": 4.188988912600566e-05, "loss": 2.3564, "step": 1540 }, { "epoch": 0.88, "learning_rate": 4.152196047899126e-05, "loss": 2.4055, "step": 1541 }, { "epoch": 0.88, "learning_rate": 4.115558475925601e-05, "loss": 2.3959, "step": 1542 }, { "epoch": 0.88, "learning_rate": 4.079076320776426e-05, "loss": 2.3602, "step": 1543 }, { "epoch": 0.88, "learning_rate": 4.042749706021581e-05, "loss": 2.362, "step": 1544 }, { "epoch": 0.88, "learning_rate": 4.0065787547042544e-05, "loss": 2.2404, "step": 1545 }, { "epoch": 0.88, "learning_rate": 3.9705635893403594e-05, "loss": 2.3495, "step": 1546 }, { "epoch": 0.88, "learning_rate": 3.934704331918137e-05, "loss": 2.3613, "step": 1547 }, { "epoch": 0.88, "learning_rate": 3.89900110389777e-05, "loss": 2.255, "step": 1548 }, { "epoch": 0.88, "learning_rate": 3.863454026210911e-05, "loss": 2.3231, "step": 1549 }, { "epoch": 0.88, "learning_rate": 3.828063219260347e-05, "loss": 2.3597, "step": 1550 }, { "epoch": 0.88, "learning_rate": 3.792828802919529e-05, "loss": 2.4335, "step": 1551 }, { "epoch": 0.88, "learning_rate": 3.757750896532214e-05, "loss": 2.3403, "step": 1552 }, { "epoch": 0.88, "learning_rate": 3.72282961891201e-05, "loss": 2.4333, "step": 1553 }, { "epoch": 0.88, "learning_rate": 3.688065088342041e-05, "loss": 2.377, "step": 1554 }, { "epoch": 0.88, "learning_rate": 3.653457422574458e-05, "loss": 2.3642, "step": 1555 }, { "epoch": 0.88, "learning_rate": 3.619006738830144e-05, "loss": 2.4349, "step": 1556 }, { "epoch": 0.88, "learning_rate": 3.584713153798214e-05, "loss": 2.3163, "step": 1557 }, { "epoch": 0.89, "learning_rate": 3.550576783635712e-05, "loss": 2.3527, "step": 1558 }, { "epoch": 0.89, "learning_rate": 3.516597743967126e-05, "loss": 2.3009, "step": 1559 }, { "epoch": 0.89, "learning_rate": 3.4827761498840924e-05, "loss": 2.3623, "step": 1560 }, { "epoch": 0.89, "learning_rate": 3.449112115944919e-05, "loss": 2.2798, "step": 1561 }, { "epoch": 0.89, "learning_rate": 3.415605756174267e-05, "loss": 2.3023, "step": 1562 }, { "epoch": 0.89, "learning_rate": 3.382257184062709e-05, "loss": 2.4368, "step": 1563 }, { "epoch": 0.89, "learning_rate": 3.34906651256639e-05, "loss": 2.3222, "step": 1564 }, { "epoch": 0.89, "learning_rate": 3.316033854106604e-05, "loss": 2.2483, "step": 1565 }, { "epoch": 0.89, "learning_rate": 3.283159320569451e-05, "loss": 2.2451, "step": 1566 }, { "epoch": 0.89, "learning_rate": 3.2504430233054274e-05, "loss": 2.3724, "step": 1567 }, { "epoch": 0.89, "learning_rate": 3.217885073129067e-05, "loss": 2.3633, "step": 1568 }, { "epoch": 0.89, "learning_rate": 3.1854855803185645e-05, "loss": 2.408, "step": 1569 }, { "epoch": 0.89, "learning_rate": 3.153244654615406e-05, "loss": 2.3808, "step": 1570 }, { "epoch": 0.89, "learning_rate": 3.121162405223954e-05, "loss": 2.4206, "step": 1571 }, { "epoch": 0.89, "learning_rate": 3.089238940811162e-05, "loss": 2.3333, "step": 1572 }, { "epoch": 0.89, "learning_rate": 3.057474369506108e-05, "loss": 2.306, "step": 1573 }, { "epoch": 0.89, "learning_rate": 3.025868798899728e-05, "loss": 2.2793, "step": 1574 }, { "epoch": 0.89, "learning_rate": 2.9944223360443446e-05, "loss": 2.4081, "step": 1575 }, { "epoch": 0.9, "learning_rate": 2.9631350874534225e-05, "loss": 2.3331, "step": 1576 }, { "epoch": 0.9, "learning_rate": 2.9320071591010854e-05, "loss": 2.3505, "step": 1577 }, { "epoch": 0.9, "learning_rate": 2.9010386564218615e-05, "loss": 2.2429, "step": 1578 }, { "epoch": 0.9, "learning_rate": 2.8702296843102715e-05, "loss": 2.2545, "step": 1579 }, { "epoch": 0.9, "learning_rate": 2.8395803471204684e-05, "loss": 2.3952, "step": 1580 }, { "epoch": 0.9, "learning_rate": 2.8090907486659322e-05, "loss": 2.3529, "step": 1581 }, { "epoch": 0.9, "learning_rate": 2.7787609922190427e-05, "loss": 2.2856, "step": 1582 }, { "epoch": 0.9, "learning_rate": 2.7485911805108178e-05, "loss": 2.3722, "step": 1583 }, { "epoch": 0.9, "learning_rate": 2.718581415730481e-05, "loss": 2.3342, "step": 1584 }, { "epoch": 0.9, "learning_rate": 2.688731799525196e-05, "loss": 2.3142, "step": 1585 }, { "epoch": 0.9, "learning_rate": 2.6590424329996367e-05, "loss": 2.2973, "step": 1586 }, { "epoch": 0.9, "learning_rate": 2.6295134167157342e-05, "loss": 2.3004, "step": 1587 }, { "epoch": 0.9, "learning_rate": 2.6001448506922544e-05, "loss": 2.3562, "step": 1588 }, { "epoch": 0.9, "learning_rate": 2.5709368344045248e-05, "loss": 2.2182, "step": 1589 }, { "epoch": 0.9, "learning_rate": 2.5418894667840364e-05, "loss": 2.2891, "step": 1590 }, { "epoch": 0.9, "learning_rate": 2.5130028462181876e-05, "loss": 2.2727, "step": 1591 }, { "epoch": 0.9, "learning_rate": 2.4842770705498563e-05, "loss": 2.3455, "step": 1592 }, { "epoch": 0.91, "learning_rate": 2.455712237077157e-05, "loss": 2.3612, "step": 1593 }, { "epoch": 0.91, "learning_rate": 2.427308442553028e-05, "loss": 2.2578, "step": 1594 }, { "epoch": 0.91, "learning_rate": 2.399065783185006e-05, "loss": 2.2582, "step": 1595 }, { "epoch": 0.91, "learning_rate": 2.3709843546347864e-05, "loss": 2.3917, "step": 1596 }, { "epoch": 0.91, "learning_rate": 2.3430642520179847e-05, "loss": 2.2371, "step": 1597 }, { "epoch": 0.91, "learning_rate": 2.3153055699037583e-05, "loss": 2.3884, "step": 1598 }, { "epoch": 0.91, "learning_rate": 2.2877084023145424e-05, "loss": 2.2932, "step": 1599 }, { "epoch": 0.91, "learning_rate": 2.26027284272568e-05, "loss": 2.3436, "step": 1600 }, { "epoch": 0.91, "learning_rate": 2.232998984065132e-05, "loss": 2.3251, "step": 1601 }, { "epoch": 0.91, "learning_rate": 2.2058869187131515e-05, "loss": 2.3943, "step": 1602 }, { "epoch": 0.91, "learning_rate": 2.1789367385019854e-05, "loss": 2.3229, "step": 1603 }, { "epoch": 0.91, "learning_rate": 2.152148534715537e-05, "loss": 2.3256, "step": 1604 }, { "epoch": 0.91, "learning_rate": 2.1255223980891027e-05, "loss": 2.3357, "step": 1605 }, { "epoch": 0.91, "learning_rate": 2.0990584188089966e-05, "loss": 2.36, "step": 1606 }, { "epoch": 0.91, "learning_rate": 2.0727566865123228e-05, "loss": 2.4062, "step": 1607 }, { "epoch": 0.91, "learning_rate": 2.046617290286601e-05, "loss": 2.3019, "step": 1608 }, { "epoch": 0.91, "learning_rate": 2.0206403186695198e-05, "loss": 2.3427, "step": 1609 }, { "epoch": 0.91, "learning_rate": 1.994825859648597e-05, "loss": 2.3014, "step": 1610 }, { "epoch": 0.92, "learning_rate": 1.969174000660906e-05, "loss": 2.3167, "step": 1611 }, { "epoch": 0.92, "learning_rate": 1.9436848285927787e-05, "loss": 2.2969, "step": 1612 }, { "epoch": 0.92, "learning_rate": 1.918358429779482e-05, "loss": 2.3046, "step": 1613 }, { "epoch": 0.92, "learning_rate": 1.893194890004979e-05, "loss": 2.3401, "step": 1614 }, { "epoch": 0.92, "learning_rate": 1.868194294501574e-05, "loss": 2.2342, "step": 1615 }, { "epoch": 0.92, "learning_rate": 1.843356727949691e-05, "loss": 2.3481, "step": 1616 }, { "epoch": 0.92, "learning_rate": 1.8186822744775234e-05, "loss": 2.2885, "step": 1617 }, { "epoch": 0.92, "learning_rate": 1.794171017660795e-05, "loss": 2.3167, "step": 1618 }, { "epoch": 0.92, "learning_rate": 1.7698230405224504e-05, "loss": 2.3262, "step": 1619 }, { "epoch": 0.92, "learning_rate": 1.745638425532392e-05, "loss": 2.358, "step": 1620 }, { "epoch": 0.92, "learning_rate": 1.7216172546071885e-05, "loss": 2.3507, "step": 1621 }, { "epoch": 0.92, "learning_rate": 1.6977596091098113e-05, "loss": 2.3153, "step": 1622 }, { "epoch": 0.92, "learning_rate": 1.6740655698493313e-05, "loss": 2.2792, "step": 1623 }, { "epoch": 0.92, "learning_rate": 1.6505352170806787e-05, "loss": 2.2654, "step": 1624 }, { "epoch": 0.92, "learning_rate": 1.627168630504339e-05, "loss": 2.3947, "step": 1625 }, { "epoch": 0.92, "learning_rate": 1.603965889266118e-05, "loss": 2.3774, "step": 1626 }, { "epoch": 0.92, "learning_rate": 1.5809270719568335e-05, "loss": 2.3866, "step": 1627 }, { "epoch": 0.92, "learning_rate": 1.5580522566121024e-05, "loss": 2.3637, "step": 1628 }, { "epoch": 0.93, "learning_rate": 1.535341520712008e-05, "loss": 2.3164, "step": 1629 }, { "epoch": 0.93, "learning_rate": 1.512794941180906e-05, "loss": 2.279, "step": 1630 }, { "epoch": 0.93, "learning_rate": 1.4904125943871028e-05, "loss": 2.3181, "step": 1631 }, { "epoch": 0.93, "learning_rate": 1.4681945561426547e-05, "loss": 2.3144, "step": 1632 }, { "epoch": 0.93, "learning_rate": 1.446140901703058e-05, "loss": 2.4334, "step": 1633 }, { "epoch": 0.93, "learning_rate": 1.4242517057670434e-05, "loss": 2.364, "step": 1634 }, { "epoch": 0.93, "learning_rate": 1.402527042476276e-05, "loss": 2.3259, "step": 1635 }, { "epoch": 0.93, "learning_rate": 1.380966985415144e-05, "loss": 2.3583, "step": 1636 }, { "epoch": 0.93, "learning_rate": 1.3595716076104658e-05, "loss": 2.3553, "step": 1637 }, { "epoch": 0.93, "learning_rate": 1.3383409815313108e-05, "loss": 2.2715, "step": 1638 }, { "epoch": 0.93, "learning_rate": 1.3172751790886672e-05, "loss": 2.3486, "step": 1639 }, { "epoch": 0.93, "learning_rate": 1.2963742716352812e-05, "loss": 2.3027, "step": 1640 }, { "epoch": 0.93, "learning_rate": 1.2756383299653452e-05, "loss": 2.3052, "step": 1641 }, { "epoch": 0.93, "learning_rate": 1.2550674243143156e-05, "loss": 2.3633, "step": 1642 }, { "epoch": 0.93, "learning_rate": 1.2346616243586294e-05, "loss": 2.3606, "step": 1643 }, { "epoch": 0.93, "learning_rate": 1.2144209992155042e-05, "loss": 2.3093, "step": 1644 }, { "epoch": 0.93, "learning_rate": 1.1943456174426825e-05, "loss": 2.364, "step": 1645 }, { "epoch": 0.94, "learning_rate": 1.1744355470381996e-05, "loss": 2.4326, "step": 1646 }, { "epoch": 0.94, "learning_rate": 1.1546908554401658e-05, "loss": 2.3876, "step": 1647 }, { "epoch": 0.94, "learning_rate": 1.135111609526529e-05, "loss": 2.3325, "step": 1648 }, { "epoch": 0.94, "learning_rate": 1.1156978756148462e-05, "loss": 2.3742, "step": 1649 }, { "epoch": 0.94, "learning_rate": 1.0964497194620726e-05, "loss": 2.3834, "step": 1650 }, { "epoch": 0.94, "learning_rate": 1.077367206264318e-05, "loss": 2.3848, "step": 1651 }, { "epoch": 0.94, "learning_rate": 1.0584504006566465e-05, "loss": 2.3192, "step": 1652 }, { "epoch": 0.94, "learning_rate": 1.0396993667128318e-05, "loss": 2.4382, "step": 1653 }, { "epoch": 0.94, "learning_rate": 1.0211141679451753e-05, "loss": 2.2944, "step": 1654 }, { "epoch": 0.94, "learning_rate": 1.0026948673042768e-05, "loss": 2.3364, "step": 1655 }, { "epoch": 0.94, "learning_rate": 9.84441527178781e-06, "loss": 2.368, "step": 1656 }, { "epoch": 0.94, "learning_rate": 9.663542093952483e-06, "loss": 2.3153, "step": 1657 }, { "epoch": 0.94, "learning_rate": 9.484329752178555e-06, "loss": 2.3558, "step": 1658 }, { "epoch": 0.94, "learning_rate": 9.306778853482744e-06, "loss": 2.348, "step": 1659 }, { "epoch": 0.94, "learning_rate": 9.130889999253822e-06, "loss": 2.3848, "step": 1660 }, { "epoch": 0.94, "learning_rate": 8.95666378525134e-06, "loss": 2.3773, "step": 1661 }, { "epoch": 0.94, "learning_rate": 8.784100801602912e-06, "loss": 2.3582, "step": 1662 }, { "epoch": 0.94, "learning_rate": 8.613201632802992e-06, "loss": 2.3574, "step": 1663 }, { "epoch": 0.95, "learning_rate": 8.443966857710095e-06, "loss": 2.3734, "step": 1664 }, { "epoch": 0.95, "learning_rate": 8.276397049545359e-06, "loss": 2.291, "step": 1665 }, { "epoch": 0.95, "learning_rate": 8.110492775890433e-06, "loss": 2.3514, "step": 1666 }, { "epoch": 0.95, "learning_rate": 7.946254598685643e-06, "loss": 2.3549, "step": 1667 }, { "epoch": 0.95, "learning_rate": 7.783683074227943e-06, "loss": 2.3524, "step": 1668 }, { "epoch": 0.95, "learning_rate": 7.622778753169079e-06, "loss": 2.3111, "step": 1669 }, { "epoch": 0.95, "learning_rate": 7.463542180513871e-06, "loss": 2.323, "step": 1670 }, { "epoch": 0.95, "learning_rate": 7.305973895618157e-06, "loss": 2.3242, "step": 1671 }, { "epoch": 0.95, "learning_rate": 7.150074432187015e-06, "loss": 2.3144, "step": 1672 }, { "epoch": 0.95, "learning_rate": 6.995844318273104e-06, "loss": 2.3182, "step": 1673 }, { "epoch": 0.95, "learning_rate": 6.8432840762747695e-06, "loss": 2.3319, "step": 1674 }, { "epoch": 0.95, "learning_rate": 6.692394222934272e-06, "loss": 2.3763, "step": 1675 }, { "epoch": 0.95, "learning_rate": 6.543175269335844e-06, "loss": 2.3976, "step": 1676 }, { "epoch": 0.95, "learning_rate": 6.395627720904518e-06, "loss": 2.341, "step": 1677 }, { "epoch": 0.95, "learning_rate": 6.249752077403636e-06, "loss": 2.3106, "step": 1678 }, { "epoch": 0.95, "learning_rate": 6.10554883293396e-06, "loss": 2.3996, "step": 1679 }, { "epoch": 0.95, "learning_rate": 5.963018475931281e-06, "loss": 2.3723, "step": 1680 }, { "epoch": 0.96, "learning_rate": 5.8221614891653115e-06, "loss": 2.3411, "step": 1681 }, { "epoch": 0.96, "learning_rate": 5.682978349737799e-06, "loss": 2.2581, "step": 1682 }, { "epoch": 0.96, "learning_rate": 5.54546952908086e-06, "loss": 2.3132, "step": 1683 }, { "epoch": 0.96, "learning_rate": 5.409635492955478e-06, "loss": 2.2714, "step": 1684 }, { "epoch": 0.96, "learning_rate": 5.275476701450066e-06, "loss": 2.3827, "step": 1685 }, { "epoch": 0.96, "learning_rate": 5.142993608978519e-06, "loss": 2.3675, "step": 1686 }, { "epoch": 0.96, "learning_rate": 5.012186664279106e-06, "loss": 2.3512, "step": 1687 }, { "epoch": 0.96, "learning_rate": 4.883056310412581e-06, "loss": 2.2707, "step": 1688 }, { "epoch": 0.96, "learning_rate": 4.755602984761132e-06, "loss": 2.3824, "step": 1689 }, { "epoch": 0.96, "learning_rate": 4.62982711902632e-06, "loss": 2.307, "step": 1690 }, { "epoch": 0.96, "learning_rate": 4.505729139228087e-06, "loss": 2.403, "step": 1691 }, { "epoch": 0.96, "learning_rate": 4.383309465703145e-06, "loss": 2.2792, "step": 1692 }, { "epoch": 0.96, "learning_rate": 4.262568513103415e-06, "loss": 2.2435, "step": 1693 }, { "epoch": 0.96, "learning_rate": 4.143506690395038e-06, "loss": 2.3489, "step": 1694 }, { "epoch": 0.96, "learning_rate": 4.026124400856479e-06, "loss": 2.3703, "step": 1695 }, { "epoch": 0.96, "learning_rate": 3.910422042077421e-06, "loss": 2.3224, "step": 1696 }, { "epoch": 0.96, "learning_rate": 3.79640000595749e-06, "loss": 2.4044, "step": 1697 }, { "epoch": 0.96, "learning_rate": 3.6840586787048045e-06, "loss": 2.3773, "step": 1698 }, { "epoch": 0.97, "learning_rate": 3.573398440834541e-06, "loss": 2.3467, "step": 1699 }, { "epoch": 0.97, "learning_rate": 3.464419667168095e-06, "loss": 2.3637, "step": 1700 }, { "epoch": 0.97, "learning_rate": 3.357122726831252e-06, "loss": 2.3537, "step": 1701 }, { "epoch": 0.97, "learning_rate": 3.2515079832533543e-06, "loss": 2.3654, "step": 1702 }, { "epoch": 0.97, "learning_rate": 3.1475757941658023e-06, "loss": 2.329, "step": 1703 }, { "epoch": 0.97, "learning_rate": 3.0453265116009987e-06, "loss": 2.3691, "step": 1704 }, { "epoch": 0.97, "learning_rate": 2.9447604818911844e-06, "loss": 2.3862, "step": 1705 }, { "epoch": 0.97, "learning_rate": 2.845878045667161e-06, "loss": 2.3251, "step": 1706 }, { "epoch": 0.97, "learning_rate": 2.748679537857013e-06, "loss": 2.3459, "step": 1707 }, { "epoch": 0.97, "learning_rate": 2.6531652876853884e-06, "loss": 2.2902, "step": 1708 }, { "epoch": 0.97, "learning_rate": 2.559335618671943e-06, "loss": 2.3358, "step": 1709 }, { "epoch": 0.97, "learning_rate": 2.4671908486305073e-06, "loss": 2.4531, "step": 1710 }, { "epoch": 0.97, "learning_rate": 2.3767312896679214e-06, "loss": 2.3742, "step": 1711 }, { "epoch": 0.97, "learning_rate": 2.2879572481829814e-06, "loss": 2.4129, "step": 1712 }, { "epoch": 0.97, "learning_rate": 2.200869024865326e-06, "loss": 2.2343, "step": 1713 }, { "epoch": 0.97, "learning_rate": 2.115466914694719e-06, "loss": 2.414, "step": 1714 }, { "epoch": 0.97, "learning_rate": 2.0317512069394917e-06, "loss": 2.3059, "step": 1715 }, { "epoch": 0.97, "learning_rate": 1.949722185156266e-06, "loss": 2.2761, "step": 1716 }, { "epoch": 0.98, "learning_rate": 1.8693801271884026e-06, "loss": 2.3328, "step": 1717 }, { "epoch": 0.98, "learning_rate": 1.7907253051653305e-06, "loss": 2.3489, "step": 1718 }, { "epoch": 0.98, "learning_rate": 1.7137579855016072e-06, "loss": 2.2278, "step": 1719 }, { "epoch": 0.98, "learning_rate": 1.6384784288959175e-06, "loss": 2.2491, "step": 1720 }, { "epoch": 0.98, "learning_rate": 1.5648868903304637e-06, "loss": 2.3181, "step": 1721 }, { "epoch": 0.98, "learning_rate": 1.4929836190696322e-06, "loss": 2.3198, "step": 1722 }, { "epoch": 0.98, "learning_rate": 1.4227688586596066e-06, "loss": 2.3803, "step": 1723 }, { "epoch": 0.98, "learning_rate": 1.3542428469273116e-06, "loss": 2.3766, "step": 1724 }, { "epoch": 0.98, "learning_rate": 1.2874058159796366e-06, "loss": 2.4288, "step": 1725 }, { "epoch": 0.98, "learning_rate": 1.2222579922026577e-06, "loss": 2.4122, "step": 1726 }, { "epoch": 0.98, "learning_rate": 1.1587995962608623e-06, "loss": 2.3623, "step": 1727 }, { "epoch": 0.98, "learning_rate": 1.0970308430964805e-06, "loss": 2.381, "step": 1728 }, { "epoch": 0.98, "learning_rate": 1.0369519419286543e-06, "loss": 2.4029, "step": 1729 }, { "epoch": 0.98, "learning_rate": 9.785630962527159e-07, "loss": 2.3803, "step": 1730 }, { "epoch": 0.98, "learning_rate": 9.218645038396312e-07, "loss": 2.3646, "step": 1731 }, { "epoch": 0.98, "learning_rate": 8.668563567352238e-07, "loss": 2.2831, "step": 1732 }, { "epoch": 0.98, "learning_rate": 8.135388412595646e-07, "loss": 2.3445, "step": 1733 }, { "epoch": 0.99, "learning_rate": 7.619121380063043e-07, "loss": 2.3601, "step": 1734 }, { "epoch": 0.99, "learning_rate": 7.119764218420644e-07, "loss": 2.299, "step": 1735 }, { "epoch": 0.99, "learning_rate": 6.637318619058807e-07, "loss": 2.3585, "step": 1736 }, { "epoch": 0.99, "learning_rate": 6.17178621608594e-07, "loss": 2.2829, "step": 1737 }, { "epoch": 0.99, "learning_rate": 5.723168586324046e-07, "loss": 2.3443, "step": 1738 }, { "epoch": 0.99, "learning_rate": 5.291467249302074e-07, "loss": 2.2769, "step": 1739 }, { "epoch": 0.99, "learning_rate": 4.876683667249804e-07, "loss": 2.31, "step": 1740 }, { "epoch": 0.99, "learning_rate": 4.4788192450961886e-07, "loss": 2.335, "step": 1741 }, { "epoch": 0.99, "learning_rate": 4.097875330462131e-07, "loss": 2.3004, "step": 1742 }, { "epoch": 0.99, "learning_rate": 3.733853213656047e-07, "loss": 2.3632, "step": 1743 }, { "epoch": 0.99, "learning_rate": 3.386754127670533e-07, "loss": 2.3078, "step": 1744 }, { "epoch": 0.99, "learning_rate": 3.056579248176816e-07, "loss": 2.3236, "step": 1745 }, { "epoch": 0.99, "learning_rate": 2.7433296935219785e-07, "loss": 2.3353, "step": 1746 }, { "epoch": 0.99, "learning_rate": 2.4470065247250706e-07, "loss": 2.4458, "step": 1747 }, { "epoch": 0.99, "learning_rate": 2.16761074547267e-07, "loss": 2.3492, "step": 1748 }, { "epoch": 0.99, "learning_rate": 1.905143302116108e-07, "loss": 2.3494, "step": 1749 }, { "epoch": 0.99, "learning_rate": 1.6596050836681365e-07, "loss": 2.2391, "step": 1750 }, { "epoch": 0.99, "learning_rate": 1.4309969218001538e-07, "loss": 2.3532, "step": 1751 }, { "epoch": 1.0, "learning_rate": 1.2193195908388744e-07, "loss": 2.294, "step": 1752 }, { "epoch": 1.0, "learning_rate": 1.0245738077641065e-07, "loss": 2.2277, "step": 1753 }, { "epoch": 1.0, "learning_rate": 8.467602322065338e-08, "loss": 2.3152, "step": 1754 }, { "epoch": 1.0, "learning_rate": 6.858794664449386e-08, "loss": 2.2876, "step": 1755 }, { "epoch": 1.0, "learning_rate": 5.419320554034268e-08, "loss": 2.3495, "step": 1756 }, { "epoch": 1.0, "learning_rate": 4.149184866519828e-08, "loss": 2.2936, "step": 1757 }, { "epoch": 1.0, "learning_rate": 3.048391904031389e-08, "loss": 2.2783, "step": 1758 }, { "epoch": 1.0, "learning_rate": 2.1169453950975472e-08, "loss": 2.3406, "step": 1759 }, { "epoch": 1.0, "learning_rate": 1.3548484946501737e-08, "loss": 2.3872, "step": 1760 }, { "epoch": 1.0, "step": 1760, "total_flos": 1.4654118649502106e+17, "train_loss": 2.4962065879594197, "train_runtime": 20089.3142, "train_samples_per_second": 5.607, "train_steps_per_second": 0.088 } ], "logging_steps": 1.0, "max_steps": 1760, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20, "total_flos": 1.4654118649502106e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }