{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6202, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.3475935828877e-06, "loss": 5.9486, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.06951871657754e-05, "loss": 5.8951, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.6042780748663105e-05, "loss": 5.9756, "step": 3 }, { "epoch": 0.0, "learning_rate": 2.13903743315508e-05, "loss": 5.7782, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.67379679144385e-05, "loss": 5.959, "step": 5 }, { "epoch": 0.0, "learning_rate": 3.208556149732621e-05, "loss": 5.9055, "step": 6 }, { "epoch": 0.0, "learning_rate": 3.74331550802139e-05, "loss": 6.2998, "step": 7 }, { "epoch": 0.0, "learning_rate": 4.27807486631016e-05, "loss": 6.1521, "step": 8 }, { "epoch": 0.0, "learning_rate": 4.8128342245989304e-05, "loss": 6.4036, "step": 9 }, { "epoch": 0.0, "learning_rate": 5.3475935828877e-05, "loss": 6.501, "step": 10 }, { "epoch": 0.0, "learning_rate": 5.882352941176471e-05, "loss": 6.3223, "step": 11 }, { "epoch": 0.0, "learning_rate": 6.417112299465242e-05, "loss": 6.3368, "step": 12 }, { "epoch": 0.0, "learning_rate": 6.951871657754011e-05, "loss": 6.2648, "step": 13 }, { "epoch": 0.0, "learning_rate": 7.48663101604278e-05, "loss": 6.1554, "step": 14 }, { "epoch": 0.0, "learning_rate": 8.021390374331551e-05, "loss": 5.8798, "step": 15 }, { "epoch": 0.0, "learning_rate": 8.55614973262032e-05, "loss": 5.9567, "step": 16 }, { "epoch": 0.0, "learning_rate": 9.090909090909092e-05, "loss": 5.9103, "step": 17 }, { "epoch": 0.0, "learning_rate": 9.625668449197861e-05, "loss": 5.4526, "step": 18 }, { "epoch": 0.0, "learning_rate": 0.00010160427807486631, "loss": 5.367, "step": 19 }, { "epoch": 0.0, "learning_rate": 0.000106951871657754, "loss": 5.4023, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00011229946524064172, "loss": 5.3629, "step": 21 }, { "epoch": 0.0, "learning_rate": 0.00011764705882352942, "loss": 5.0887, "step": 22 }, { "epoch": 0.0, "learning_rate": 0.00012299465240641713, "loss": 5.1178, "step": 23 }, { "epoch": 0.0, "learning_rate": 0.00012834224598930484, "loss": 4.8329, "step": 24 }, { "epoch": 0.0, "learning_rate": 0.00013368983957219252, "loss": 4.849, "step": 25 }, { "epoch": 0.0, "learning_rate": 0.00013903743315508022, "loss": 4.8322, "step": 26 }, { "epoch": 0.0, "learning_rate": 0.0001443850267379679, "loss": 4.9571, "step": 27 }, { "epoch": 0.0, "learning_rate": 0.0001497326203208556, "loss": 4.8369, "step": 28 }, { "epoch": 0.0, "learning_rate": 0.00015508021390374334, "loss": 4.8279, "step": 29 }, { "epoch": 0.0, "learning_rate": 0.00016042780748663101, "loss": 4.8095, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.00016577540106951872, "loss": 4.8473, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.0001711229946524064, "loss": 4.6431, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.00017647058823529413, "loss": 4.7465, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.00018181818181818183, "loss": 4.7161, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.0001871657754010695, "loss": 4.705, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.00019251336898395722, "loss": 4.6889, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.00019786096256684495, "loss": 4.5606, "step": 37 }, { "epoch": 0.01, "learning_rate": 0.00020320855614973263, "loss": 4.6834, "step": 38 }, { "epoch": 0.01, "learning_rate": 0.00020855614973262033, "loss": 4.5475, "step": 39 }, { "epoch": 0.01, "learning_rate": 0.000213903743315508, "loss": 4.7062, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00021925133689839572, "loss": 4.555, "step": 41 }, { "epoch": 0.01, "learning_rate": 0.00022459893048128345, "loss": 4.4943, "step": 42 }, { "epoch": 0.01, "learning_rate": 0.00022994652406417113, "loss": 4.6637, "step": 43 }, { "epoch": 0.01, "learning_rate": 0.00023529411764705883, "loss": 4.7428, "step": 44 }, { "epoch": 0.01, "learning_rate": 0.0002406417112299465, "loss": 4.5246, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.00024598930481283427, "loss": 4.5559, "step": 46 }, { "epoch": 0.01, "learning_rate": 0.0002513368983957219, "loss": 4.3376, "step": 47 }, { "epoch": 0.01, "learning_rate": 0.0002566844919786097, "loss": 4.5093, "step": 48 }, { "epoch": 0.01, "learning_rate": 0.00026203208556149733, "loss": 4.3749, "step": 49 }, { "epoch": 0.01, "learning_rate": 0.00026737967914438503, "loss": 4.4069, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.00027272727272727274, "loss": 4.5086, "step": 51 }, { "epoch": 0.01, "learning_rate": 0.00027807486631016044, "loss": 4.3684, "step": 52 }, { "epoch": 0.01, "learning_rate": 0.00028342245989304815, "loss": 4.4441, "step": 53 }, { "epoch": 0.01, "learning_rate": 0.0002887700534759358, "loss": 4.439, "step": 54 }, { "epoch": 0.01, "learning_rate": 0.00029411764705882356, "loss": 4.4707, "step": 55 }, { "epoch": 0.01, "learning_rate": 0.0002994652406417112, "loss": 4.2591, "step": 56 }, { "epoch": 0.01, "learning_rate": 0.0003048128342245989, "loss": 4.3509, "step": 57 }, { "epoch": 0.01, "learning_rate": 0.00031016042780748667, "loss": 4.4297, "step": 58 }, { "epoch": 0.01, "learning_rate": 0.0003155080213903743, "loss": 4.4056, "step": 59 }, { "epoch": 0.01, "learning_rate": 0.00032085561497326203, "loss": 4.398, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.0003262032085561498, "loss": 4.2179, "step": 61 }, { "epoch": 0.01, "learning_rate": 0.00033155080213903744, "loss": 4.2304, "step": 62 }, { "epoch": 0.01, "learning_rate": 0.00033689839572192514, "loss": 4.3654, "step": 63 }, { "epoch": 0.01, "learning_rate": 0.0003422459893048128, "loss": 4.4312, "step": 64 }, { "epoch": 0.01, "learning_rate": 0.00034759358288770055, "loss": 4.2155, "step": 65 }, { "epoch": 0.01, "learning_rate": 0.00035294117647058826, "loss": 4.2967, "step": 66 }, { "epoch": 0.01, "learning_rate": 0.0003582887700534759, "loss": 4.2588, "step": 67 }, { "epoch": 0.01, "learning_rate": 0.00036363636363636367, "loss": 4.2611, "step": 68 }, { "epoch": 0.01, "learning_rate": 0.0003689839572192513, "loss": 4.2833, "step": 69 }, { "epoch": 0.01, "learning_rate": 0.000374331550802139, "loss": 4.343, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.0003796791443850268, "loss": 4.3456, "step": 71 }, { "epoch": 0.01, "learning_rate": 0.00038502673796791443, "loss": 4.141, "step": 72 }, { "epoch": 0.01, "learning_rate": 0.00039037433155080214, "loss": 4.4074, "step": 73 }, { "epoch": 0.01, "learning_rate": 0.0003957219251336899, "loss": 4.247, "step": 74 }, { "epoch": 0.01, "learning_rate": 0.00040106951871657755, "loss": 4.3275, "step": 75 }, { "epoch": 0.01, "learning_rate": 0.00040641711229946525, "loss": 4.1954, "step": 76 }, { "epoch": 0.01, "learning_rate": 0.0004117647058823529, "loss": 4.134, "step": 77 }, { "epoch": 0.01, "learning_rate": 0.00041711229946524066, "loss": 4.3486, "step": 78 }, { "epoch": 0.01, "learning_rate": 0.00042245989304812837, "loss": 4.3122, "step": 79 }, { "epoch": 0.01, "learning_rate": 0.000427807486631016, "loss": 4.2094, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.0004331550802139038, "loss": 4.3217, "step": 81 }, { "epoch": 0.01, "learning_rate": 0.00043850267379679143, "loss": 4.102, "step": 82 }, { "epoch": 0.01, "learning_rate": 0.00044385026737967914, "loss": 4.1534, "step": 83 }, { "epoch": 0.01, "learning_rate": 0.0004491978609625669, "loss": 4.2219, "step": 84 }, { "epoch": 0.01, "learning_rate": 0.00045454545454545455, "loss": 4.2487, "step": 85 }, { "epoch": 0.01, "learning_rate": 0.00045989304812834225, "loss": 4.2481, "step": 86 }, { "epoch": 0.01, "learning_rate": 0.00046524064171123, "loss": 4.2309, "step": 87 }, { "epoch": 0.01, "learning_rate": 0.00047058823529411766, "loss": 4.0941, "step": 88 }, { "epoch": 0.01, "learning_rate": 0.00047593582887700537, "loss": 4.0096, "step": 89 }, { "epoch": 0.01, "learning_rate": 0.000481283422459893, "loss": 4.0631, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.0004866310160427808, "loss": 4.1869, "step": 91 }, { "epoch": 0.01, "learning_rate": 0.0004919786096256685, "loss": 4.1159, "step": 92 }, { "epoch": 0.01, "learning_rate": 0.0004973262032085562, "loss": 4.1178, "step": 93 }, { "epoch": 0.02, "learning_rate": 0.0005026737967914438, "loss": 4.1284, "step": 94 }, { "epoch": 0.02, "learning_rate": 0.0005080213903743316, "loss": 4.0345, "step": 95 }, { "epoch": 0.02, "learning_rate": 0.0005133689839572194, "loss": 4.0867, "step": 96 }, { "epoch": 0.02, "learning_rate": 0.0005187165775401069, "loss": 4.057, "step": 97 }, { "epoch": 0.02, "learning_rate": 0.0005240641711229947, "loss": 3.937, "step": 98 }, { "epoch": 0.02, "learning_rate": 0.0005294117647058824, "loss": 3.9765, "step": 99 }, { "epoch": 0.02, "learning_rate": 0.0005347593582887701, "loss": 3.9608, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0005401069518716578, "loss": 4.1594, "step": 101 }, { "epoch": 0.02, "learning_rate": 0.0005454545454545455, "loss": 4.1302, "step": 102 }, { "epoch": 0.02, "learning_rate": 0.0005508021390374331, "loss": 4.1312, "step": 103 }, { "epoch": 0.02, "learning_rate": 0.0005561497326203209, "loss": 4.0223, "step": 104 }, { "epoch": 0.02, "learning_rate": 0.0005614973262032086, "loss": 4.0161, "step": 105 }, { "epoch": 0.02, "learning_rate": 0.0005668449197860963, "loss": 4.0451, "step": 106 }, { "epoch": 0.02, "learning_rate": 0.000572192513368984, "loss": 4.1083, "step": 107 }, { "epoch": 0.02, "learning_rate": 0.0005775401069518716, "loss": 4.1682, "step": 108 }, { "epoch": 0.02, "learning_rate": 0.0005828877005347594, "loss": 4.0605, "step": 109 }, { "epoch": 0.02, "learning_rate": 0.0005882352941176471, "loss": 4.0765, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.0005935828877005348, "loss": 3.8291, "step": 111 }, { "epoch": 0.02, "learning_rate": 0.0005989304812834224, "loss": 4.0857, "step": 112 }, { "epoch": 0.02, "learning_rate": 0.0006042780748663102, "loss": 4.077, "step": 113 }, { "epoch": 0.02, "learning_rate": 0.0006096256684491978, "loss": 4.0375, "step": 114 }, { "epoch": 0.02, "learning_rate": 0.0006149732620320856, "loss": 3.8772, "step": 115 }, { "epoch": 0.02, "learning_rate": 0.0006203208556149733, "loss": 3.9426, "step": 116 }, { "epoch": 0.02, "learning_rate": 0.0006256684491978609, "loss": 4.0139, "step": 117 }, { "epoch": 0.02, "learning_rate": 0.0006310160427807486, "loss": 4.0414, "step": 118 }, { "epoch": 0.02, "learning_rate": 0.0006363636363636364, "loss": 4.0246, "step": 119 }, { "epoch": 0.02, "learning_rate": 0.0006417112299465241, "loss": 3.9336, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.0006470588235294118, "loss": 4.0677, "step": 121 }, { "epoch": 0.02, "learning_rate": 0.0006524064171122996, "loss": 4.0379, "step": 122 }, { "epoch": 0.02, "learning_rate": 0.0006577540106951871, "loss": 3.8605, "step": 123 }, { "epoch": 0.02, "learning_rate": 0.0006631016042780749, "loss": 3.8841, "step": 124 }, { "epoch": 0.02, "learning_rate": 0.0006684491978609626, "loss": 3.837, "step": 125 }, { "epoch": 0.02, "learning_rate": 0.0006737967914438503, "loss": 3.8734, "step": 126 }, { "epoch": 0.02, "learning_rate": 0.000679144385026738, "loss": 3.8831, "step": 127 }, { "epoch": 0.02, "learning_rate": 0.0006844919786096256, "loss": 3.8443, "step": 128 }, { "epoch": 0.02, "learning_rate": 0.0006898395721925133, "loss": 3.8587, "step": 129 }, { "epoch": 0.02, "learning_rate": 0.0006951871657754011, "loss": 4.0822, "step": 130 }, { "epoch": 0.02, "learning_rate": 0.0007005347593582888, "loss": 3.779, "step": 131 }, { "epoch": 0.02, "learning_rate": 0.0007058823529411765, "loss": 3.7747, "step": 132 }, { "epoch": 0.02, "learning_rate": 0.0007112299465240642, "loss": 3.8958, "step": 133 }, { "epoch": 0.02, "learning_rate": 0.0007165775401069518, "loss": 3.8679, "step": 134 }, { "epoch": 0.02, "learning_rate": 0.0007219251336898396, "loss": 3.6683, "step": 135 }, { "epoch": 0.02, "learning_rate": 0.0007272727272727273, "loss": 3.8482, "step": 136 }, { "epoch": 0.02, "learning_rate": 0.000732620320855615, "loss": 3.92, "step": 137 }, { "epoch": 0.02, "learning_rate": 0.0007379679144385026, "loss": 3.8979, "step": 138 }, { "epoch": 0.02, "learning_rate": 0.0007433155080213904, "loss": 3.8828, "step": 139 }, { "epoch": 0.02, "learning_rate": 0.000748663101604278, "loss": 3.9528, "step": 140 }, { "epoch": 0.02, "learning_rate": 0.0007540106951871658, "loss": 3.9246, "step": 141 }, { "epoch": 0.02, "learning_rate": 0.0007593582887700536, "loss": 3.8685, "step": 142 }, { "epoch": 0.02, "learning_rate": 0.0007647058823529411, "loss": 3.9712, "step": 143 }, { "epoch": 0.02, "learning_rate": 0.0007700534759358289, "loss": 4.0532, "step": 144 }, { "epoch": 0.02, "learning_rate": 0.0007754010695187166, "loss": 3.9034, "step": 145 }, { "epoch": 0.02, "learning_rate": 0.0007807486631016043, "loss": 3.8289, "step": 146 }, { "epoch": 0.02, "learning_rate": 0.000786096256684492, "loss": 3.9289, "step": 147 }, { "epoch": 0.02, "learning_rate": 0.0007914438502673798, "loss": 4.0015, "step": 148 }, { "epoch": 0.02, "learning_rate": 0.0007967914438502673, "loss": 3.8183, "step": 149 }, { "epoch": 0.02, "learning_rate": 0.0008021390374331551, "loss": 3.8087, "step": 150 }, { "epoch": 0.02, "learning_rate": 0.0008074866310160429, "loss": 3.8361, "step": 151 }, { "epoch": 0.02, "learning_rate": 0.0008128342245989305, "loss": 3.839, "step": 152 }, { "epoch": 0.02, "learning_rate": 0.0008181818181818183, "loss": 3.8806, "step": 153 }, { "epoch": 0.02, "learning_rate": 0.0008235294117647058, "loss": 3.8447, "step": 154 }, { "epoch": 0.02, "learning_rate": 0.0008288770053475936, "loss": 3.77, "step": 155 }, { "epoch": 0.03, "learning_rate": 0.0008342245989304813, "loss": 3.9146, "step": 156 }, { "epoch": 0.03, "learning_rate": 0.000839572192513369, "loss": 3.845, "step": 157 }, { "epoch": 0.03, "learning_rate": 0.0008449197860962567, "loss": 3.8684, "step": 158 }, { "epoch": 0.03, "learning_rate": 0.0008502673796791444, "loss": 4.0146, "step": 159 }, { "epoch": 0.03, "learning_rate": 0.000855614973262032, "loss": 3.7876, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.0008609625668449198, "loss": 3.8683, "step": 161 }, { "epoch": 0.03, "learning_rate": 0.0008663101604278076, "loss": 3.9205, "step": 162 }, { "epoch": 0.03, "learning_rate": 0.0008716577540106952, "loss": 4.0075, "step": 163 }, { "epoch": 0.03, "learning_rate": 0.0008770053475935829, "loss": 3.7433, "step": 164 }, { "epoch": 0.03, "learning_rate": 0.0008823529411764706, "loss": 3.7349, "step": 165 }, { "epoch": 0.03, "learning_rate": 0.0008877005347593583, "loss": 3.8098, "step": 166 }, { "epoch": 0.03, "learning_rate": 0.000893048128342246, "loss": 3.885, "step": 167 }, { "epoch": 0.03, "learning_rate": 0.0008983957219251338, "loss": 3.8511, "step": 168 }, { "epoch": 0.03, "learning_rate": 0.0009037433155080213, "loss": 3.6956, "step": 169 }, { "epoch": 0.03, "learning_rate": 0.0009090909090909091, "loss": 3.8744, "step": 170 }, { "epoch": 0.03, "learning_rate": 0.0009144385026737968, "loss": 3.7744, "step": 171 }, { "epoch": 0.03, "learning_rate": 0.0009197860962566845, "loss": 3.9944, "step": 172 }, { "epoch": 0.03, "learning_rate": 0.0009251336898395723, "loss": 3.8654, "step": 173 }, { "epoch": 0.03, "learning_rate": 0.00093048128342246, "loss": 3.8064, "step": 174 }, { "epoch": 0.03, "learning_rate": 0.0009358288770053476, "loss": 3.8453, "step": 175 }, { "epoch": 0.03, "learning_rate": 0.0009411764705882353, "loss": 3.6996, "step": 176 }, { "epoch": 0.03, "learning_rate": 0.000946524064171123, "loss": 3.9487, "step": 177 }, { "epoch": 0.03, "learning_rate": 0.0009518716577540107, "loss": 3.7477, "step": 178 }, { "epoch": 0.03, "learning_rate": 0.0009572192513368985, "loss": 3.7293, "step": 179 }, { "epoch": 0.03, "learning_rate": 0.000962566844919786, "loss": 3.8472, "step": 180 }, { "epoch": 0.03, "learning_rate": 0.0009679144385026738, "loss": 3.7662, "step": 181 }, { "epoch": 0.03, "learning_rate": 0.0009732620320855616, "loss": 3.8273, "step": 182 }, { "epoch": 0.03, "learning_rate": 0.0009786096256684492, "loss": 3.7248, "step": 183 }, { "epoch": 0.03, "learning_rate": 0.000983957219251337, "loss": 3.8215, "step": 184 }, { "epoch": 0.03, "learning_rate": 0.0009893048128342245, "loss": 3.8897, "step": 185 }, { "epoch": 0.03, "learning_rate": 0.0009946524064171124, "loss": 3.7155, "step": 186 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 4.0417, "step": 187 }, { "epoch": 0.03, "learning_rate": 0.000999999931802496, "loss": 3.7861, "step": 188 }, { "epoch": 0.03, "learning_rate": 0.0009999997272100022, "loss": 3.8133, "step": 189 }, { "epoch": 0.03, "learning_rate": 0.0009999993862225744, "loss": 3.623, "step": 190 }, { "epoch": 0.03, "learning_rate": 0.000999998908840306, "loss": 3.7479, "step": 191 }, { "epoch": 0.03, "learning_rate": 0.0009999982950633268, "loss": 3.8731, "step": 192 }, { "epoch": 0.03, "learning_rate": 0.0009999975448918043, "loss": 3.7765, "step": 193 }, { "epoch": 0.03, "learning_rate": 0.0009999966583259434, "loss": 3.8732, "step": 194 }, { "epoch": 0.03, "learning_rate": 0.000999995635365986, "loss": 3.7426, "step": 195 }, { "epoch": 0.03, "learning_rate": 0.0009999944760122107, "loss": 3.88, "step": 196 }, { "epoch": 0.03, "learning_rate": 0.000999993180264934, "loss": 3.7482, "step": 197 }, { "epoch": 0.03, "learning_rate": 0.0009999917481245096, "loss": 3.9271, "step": 198 }, { "epoch": 0.03, "learning_rate": 0.0009999901795913278, "loss": 3.8027, "step": 199 }, { "epoch": 0.03, "learning_rate": 0.0009999884746658165, "loss": 3.8586, "step": 200 }, { "epoch": 0.03, "learning_rate": 0.000999986633348441, "loss": 3.8988, "step": 201 }, { "epoch": 0.03, "learning_rate": 0.0009999846556397038, "loss": 3.7926, "step": 202 }, { "epoch": 0.03, "learning_rate": 0.0009999825415401438, "loss": 3.7898, "step": 203 }, { "epoch": 0.03, "learning_rate": 0.0009999802910503383, "loss": 3.7323, "step": 204 }, { "epoch": 0.03, "learning_rate": 0.0009999779041709005, "loss": 3.7795, "step": 205 }, { "epoch": 0.03, "learning_rate": 0.0009999753809024823, "loss": 3.73, "step": 206 }, { "epoch": 0.03, "learning_rate": 0.0009999727212457715, "loss": 3.7879, "step": 207 }, { "epoch": 0.03, "learning_rate": 0.0009999699252014938, "loss": 3.6848, "step": 208 }, { "epoch": 0.03, "learning_rate": 0.000999966992770412, "loss": 3.7039, "step": 209 }, { "epoch": 0.03, "learning_rate": 0.0009999639239533257, "loss": 3.8482, "step": 210 }, { "epoch": 0.03, "learning_rate": 0.0009999607187510726, "loss": 3.7943, "step": 211 }, { "epoch": 0.03, "learning_rate": 0.0009999573771645267, "loss": 3.7842, "step": 212 }, { "epoch": 0.03, "learning_rate": 0.0009999538991945997, "loss": 3.9321, "step": 213 }, { "epoch": 0.03, "learning_rate": 0.0009999502848422402, "loss": 3.8289, "step": 214 }, { "epoch": 0.03, "learning_rate": 0.000999946534108434, "loss": 3.7996, "step": 215 }, { "epoch": 0.03, "learning_rate": 0.0009999426469942047, "loss": 3.7495, "step": 216 }, { "epoch": 0.03, "learning_rate": 0.0009999386235006124, "loss": 3.7358, "step": 217 }, { "epoch": 0.04, "learning_rate": 0.0009999344636287548, "loss": 3.7833, "step": 218 }, { "epoch": 0.04, "learning_rate": 0.0009999301673797665, "loss": 3.7257, "step": 219 }, { "epoch": 0.04, "learning_rate": 0.0009999257347548195, "loss": 3.7508, "step": 220 }, { "epoch": 0.04, "learning_rate": 0.0009999211657551234, "loss": 3.8274, "step": 221 }, { "epoch": 0.04, "learning_rate": 0.0009999164603819238, "loss": 3.8082, "step": 222 }, { "epoch": 0.04, "learning_rate": 0.0009999116186365047, "loss": 3.7433, "step": 223 }, { "epoch": 0.04, "learning_rate": 0.000999906640520187, "loss": 3.7989, "step": 224 }, { "epoch": 0.04, "learning_rate": 0.0009999015260343286, "loss": 3.5308, "step": 225 }, { "epoch": 0.04, "learning_rate": 0.0009998962751803246, "loss": 3.7875, "step": 226 }, { "epoch": 0.04, "learning_rate": 0.0009998908879596076, "loss": 3.932, "step": 227 }, { "epoch": 0.04, "learning_rate": 0.0009998853643736468, "loss": 3.8212, "step": 228 }, { "epoch": 0.04, "learning_rate": 0.000999879704423949, "loss": 3.789, "step": 229 }, { "epoch": 0.04, "learning_rate": 0.0009998739081120585, "loss": 3.7505, "step": 230 }, { "epoch": 0.04, "learning_rate": 0.0009998679754395566, "loss": 3.8381, "step": 231 }, { "epoch": 0.04, "learning_rate": 0.0009998619064080611, "loss": 3.8199, "step": 232 }, { "epoch": 0.04, "learning_rate": 0.000999855701019228, "loss": 3.7177, "step": 233 }, { "epoch": 0.04, "learning_rate": 0.0009998493592747498, "loss": 3.8603, "step": 234 }, { "epoch": 0.04, "learning_rate": 0.0009998428811763566, "loss": 3.7391, "step": 235 }, { "epoch": 0.04, "learning_rate": 0.000999836266725816, "loss": 3.9236, "step": 236 }, { "epoch": 0.04, "learning_rate": 0.0009998295159249315, "loss": 3.7047, "step": 237 }, { "epoch": 0.04, "learning_rate": 0.0009998226287755451, "loss": 3.7526, "step": 238 }, { "epoch": 0.04, "learning_rate": 0.0009998156052795355, "loss": 3.6538, "step": 239 }, { "epoch": 0.04, "learning_rate": 0.000999808445438819, "loss": 3.815, "step": 240 }, { "epoch": 0.04, "learning_rate": 0.0009998011492553481, "loss": 3.7717, "step": 241 }, { "epoch": 0.04, "learning_rate": 0.0009997937167311134, "loss": 3.767, "step": 242 }, { "epoch": 0.04, "learning_rate": 0.0009997861478681425, "loss": 3.7465, "step": 243 }, { "epoch": 0.04, "learning_rate": 0.0009997784426685002, "loss": 3.6345, "step": 244 }, { "epoch": 0.04, "learning_rate": 0.000999770601134288, "loss": 3.7467, "step": 245 }, { "epoch": 0.04, "learning_rate": 0.0009997626232676454, "loss": 3.7458, "step": 246 }, { "epoch": 0.04, "learning_rate": 0.0009997545090707485, "loss": 3.6346, "step": 247 }, { "epoch": 0.04, "learning_rate": 0.0009997462585458108, "loss": 3.7477, "step": 248 }, { "epoch": 0.04, "learning_rate": 0.000999737871695083, "loss": 3.6396, "step": 249 }, { "epoch": 0.04, "learning_rate": 0.0009997293485208528, "loss": 3.7122, "step": 250 }, { "epoch": 0.04, "learning_rate": 0.0009997206890254454, "loss": 3.7103, "step": 251 }, { "epoch": 0.04, "learning_rate": 0.0009997118932112228, "loss": 3.8014, "step": 252 }, { "epoch": 0.04, "learning_rate": 0.0009997029610805848, "loss": 3.7202, "step": 253 }, { "epoch": 0.04, "learning_rate": 0.0009996938926359676, "loss": 3.6884, "step": 254 }, { "epoch": 0.04, "learning_rate": 0.0009996846878798453, "loss": 3.868, "step": 255 }, { "epoch": 0.04, "learning_rate": 0.0009996753468147286, "loss": 3.6932, "step": 256 }, { "epoch": 0.04, "learning_rate": 0.000999665869443166, "loss": 3.7192, "step": 257 }, { "epoch": 0.04, "learning_rate": 0.0009996562557677425, "loss": 3.7406, "step": 258 }, { "epoch": 0.04, "learning_rate": 0.0009996465057910805, "loss": 3.778, "step": 259 }, { "epoch": 0.04, "learning_rate": 0.0009996366195158402, "loss": 3.7166, "step": 260 }, { "epoch": 0.04, "learning_rate": 0.000999626596944718, "loss": 3.7759, "step": 261 }, { "epoch": 0.04, "learning_rate": 0.0009996164380804483, "loss": 3.774, "step": 262 }, { "epoch": 0.04, "learning_rate": 0.0009996061429258022, "loss": 3.7561, "step": 263 }, { "epoch": 0.04, "learning_rate": 0.0009995957114835878, "loss": 3.837, "step": 264 }, { "epoch": 0.04, "learning_rate": 0.0009995851437566514, "loss": 3.6542, "step": 265 }, { "epoch": 0.04, "learning_rate": 0.000999574439747875, "loss": 3.6925, "step": 266 }, { "epoch": 0.04, "learning_rate": 0.0009995635994601792, "loss": 3.6578, "step": 267 }, { "epoch": 0.04, "learning_rate": 0.0009995526228965205, "loss": 3.7383, "step": 268 }, { "epoch": 0.04, "learning_rate": 0.000999541510059894, "loss": 3.7585, "step": 269 }, { "epoch": 0.04, "learning_rate": 0.0009995302609533303, "loss": 3.8008, "step": 270 }, { "epoch": 0.04, "learning_rate": 0.0009995188755798985, "loss": 3.6539, "step": 271 }, { "epoch": 0.04, "learning_rate": 0.0009995073539427047, "loss": 3.8845, "step": 272 }, { "epoch": 0.04, "learning_rate": 0.0009994956960448912, "loss": 3.5674, "step": 273 }, { "epoch": 0.04, "learning_rate": 0.0009994839018896387, "loss": 3.6741, "step": 274 }, { "epoch": 0.04, "learning_rate": 0.0009994719714801641, "loss": 3.7959, "step": 275 }, { "epoch": 0.04, "learning_rate": 0.0009994599048197223, "loss": 3.7257, "step": 276 }, { "epoch": 0.04, "learning_rate": 0.0009994477019116049, "loss": 3.6391, "step": 277 }, { "epoch": 0.04, "learning_rate": 0.0009994353627591403, "loss": 3.6798, "step": 278 }, { "epoch": 0.04, "learning_rate": 0.000999422887365695, "loss": 3.6971, "step": 279 }, { "epoch": 0.05, "learning_rate": 0.0009994102757346721, "loss": 3.6488, "step": 280 }, { "epoch": 0.05, "learning_rate": 0.0009993975278695117, "loss": 3.7403, "step": 281 }, { "epoch": 0.05, "learning_rate": 0.0009993846437736913, "loss": 3.6619, "step": 282 }, { "epoch": 0.05, "learning_rate": 0.000999371623450726, "loss": 3.6132, "step": 283 }, { "epoch": 0.05, "learning_rate": 0.000999358466904167, "loss": 3.7227, "step": 284 }, { "epoch": 0.05, "learning_rate": 0.0009993451741376034, "loss": 3.8561, "step": 285 }, { "epoch": 0.05, "learning_rate": 0.0009993317451546617, "loss": 3.6644, "step": 286 }, { "epoch": 0.05, "learning_rate": 0.000999318179959005, "loss": 3.7534, "step": 287 }, { "epoch": 0.05, "learning_rate": 0.0009993044785543337, "loss": 3.7825, "step": 288 }, { "epoch": 0.05, "learning_rate": 0.0009992906409443854, "loss": 3.7098, "step": 289 }, { "epoch": 0.05, "learning_rate": 0.000999276667132935, "loss": 3.684, "step": 290 }, { "epoch": 0.05, "learning_rate": 0.000999262557123794, "loss": 3.6203, "step": 291 }, { "epoch": 0.05, "learning_rate": 0.0009992483109208122, "loss": 3.5992, "step": 292 }, { "epoch": 0.05, "learning_rate": 0.000999233928527875, "loss": 3.6556, "step": 293 }, { "epoch": 0.05, "learning_rate": 0.0009992194099489064, "loss": 3.7577, "step": 294 }, { "epoch": 0.05, "learning_rate": 0.0009992047551878667, "loss": 3.6756, "step": 295 }, { "epoch": 0.05, "learning_rate": 0.0009991899642487535, "loss": 3.8233, "step": 296 }, { "epoch": 0.05, "learning_rate": 0.0009991750371356016, "loss": 3.7358, "step": 297 }, { "epoch": 0.05, "learning_rate": 0.0009991599738524831, "loss": 3.6257, "step": 298 }, { "epoch": 0.05, "learning_rate": 0.0009991447744035072, "loss": 3.6915, "step": 299 }, { "epoch": 0.05, "learning_rate": 0.00099912943879282, "loss": 3.731, "step": 300 }, { "epoch": 0.05, "learning_rate": 0.0009991139670246047, "loss": 3.7566, "step": 301 }, { "epoch": 0.05, "learning_rate": 0.0009990983591030825, "loss": 3.6893, "step": 302 }, { "epoch": 0.05, "learning_rate": 0.0009990826150325103, "loss": 3.6207, "step": 303 }, { "epoch": 0.05, "learning_rate": 0.0009990667348171833, "loss": 3.6676, "step": 304 }, { "epoch": 0.05, "learning_rate": 0.0009990507184614336, "loss": 3.6716, "step": 305 }, { "epoch": 0.05, "learning_rate": 0.00099903456596963, "loss": 3.86, "step": 306 }, { "epoch": 0.05, "learning_rate": 0.0009990182773461792, "loss": 3.8431, "step": 307 }, { "epoch": 0.05, "learning_rate": 0.000999001852595524, "loss": 3.6791, "step": 308 }, { "epoch": 0.05, "learning_rate": 0.0009989852917221451, "loss": 3.717, "step": 309 }, { "epoch": 0.05, "learning_rate": 0.0009989685947305602, "loss": 3.6599, "step": 310 }, { "epoch": 0.05, "learning_rate": 0.0009989517616253243, "loss": 3.6526, "step": 311 }, { "epoch": 0.05, "learning_rate": 0.000998934792411029, "loss": 3.6757, "step": 312 }, { "epoch": 0.05, "learning_rate": 0.0009989176870923034, "loss": 3.6942, "step": 313 }, { "epoch": 0.05, "learning_rate": 0.0009989004456738137, "loss": 3.7508, "step": 314 }, { "epoch": 0.05, "learning_rate": 0.0009988830681602631, "loss": 3.601, "step": 315 }, { "epoch": 0.05, "learning_rate": 0.000998865554556392, "loss": 3.5761, "step": 316 }, { "epoch": 0.05, "learning_rate": 0.0009988479048669782, "loss": 3.6358, "step": 317 }, { "epoch": 0.05, "learning_rate": 0.0009988301190968362, "loss": 3.7138, "step": 318 }, { "epoch": 0.05, "learning_rate": 0.0009988121972508176, "loss": 3.6576, "step": 319 }, { "epoch": 0.05, "learning_rate": 0.0009987941393338115, "loss": 3.572, "step": 320 }, { "epoch": 0.05, "learning_rate": 0.0009987759453507438, "loss": 3.7492, "step": 321 }, { "epoch": 0.05, "learning_rate": 0.000998757615306578, "loss": 3.6342, "step": 322 }, { "epoch": 0.05, "learning_rate": 0.0009987391492063138, "loss": 3.5609, "step": 323 }, { "epoch": 0.05, "learning_rate": 0.000998720547054989, "loss": 3.5772, "step": 324 }, { "epoch": 0.05, "learning_rate": 0.0009987018088576777, "loss": 3.751, "step": 325 }, { "epoch": 0.05, "learning_rate": 0.0009986829346194917, "loss": 3.6777, "step": 326 }, { "epoch": 0.05, "learning_rate": 0.00099866392434558, "loss": 3.7342, "step": 327 }, { "epoch": 0.05, "learning_rate": 0.0009986447780411278, "loss": 3.6687, "step": 328 }, { "epoch": 0.05, "learning_rate": 0.0009986254957113586, "loss": 3.5875, "step": 329 }, { "epoch": 0.05, "learning_rate": 0.0009986060773615319, "loss": 3.6872, "step": 330 }, { "epoch": 0.05, "learning_rate": 0.0009985865229969453, "loss": 3.5409, "step": 331 }, { "epoch": 0.05, "learning_rate": 0.0009985668326229328, "loss": 3.597, "step": 332 }, { "epoch": 0.05, "learning_rate": 0.0009985470062448658, "loss": 3.6946, "step": 333 }, { "epoch": 0.05, "learning_rate": 0.0009985270438681525, "loss": 3.7482, "step": 334 }, { "epoch": 0.05, "learning_rate": 0.0009985069454982389, "loss": 3.7012, "step": 335 }, { "epoch": 0.05, "learning_rate": 0.0009984867111406076, "loss": 3.5765, "step": 336 }, { "epoch": 0.05, "learning_rate": 0.0009984663408007777, "loss": 3.5477, "step": 337 }, { "epoch": 0.05, "learning_rate": 0.0009984458344843067, "loss": 3.8007, "step": 338 }, { "epoch": 0.05, "learning_rate": 0.000998425192196788, "loss": 3.6531, "step": 339 }, { "epoch": 0.05, "learning_rate": 0.000998404413943853, "loss": 3.6274, "step": 340 }, { "epoch": 0.05, "learning_rate": 0.0009983834997311697, "loss": 3.7646, "step": 341 }, { "epoch": 0.06, "learning_rate": 0.0009983624495644432, "loss": 3.7375, "step": 342 }, { "epoch": 0.06, "learning_rate": 0.0009983412634494157, "loss": 3.6213, "step": 343 }, { "epoch": 0.06, "learning_rate": 0.000998319941391867, "loss": 3.646, "step": 344 }, { "epoch": 0.06, "learning_rate": 0.000998298483397613, "loss": 3.7726, "step": 345 }, { "epoch": 0.06, "learning_rate": 0.0009982768894725076, "loss": 3.6982, "step": 346 }, { "epoch": 0.06, "learning_rate": 0.0009982551596224411, "loss": 3.5818, "step": 347 }, { "epoch": 0.06, "learning_rate": 0.0009982332938533417, "loss": 3.7393, "step": 348 }, { "epoch": 0.06, "learning_rate": 0.0009982112921711735, "loss": 3.6548, "step": 349 }, { "epoch": 0.06, "learning_rate": 0.0009981891545819388, "loss": 3.6664, "step": 350 }, { "epoch": 0.06, "learning_rate": 0.0009981668810916764, "loss": 3.5646, "step": 351 }, { "epoch": 0.06, "learning_rate": 0.0009981444717064622, "loss": 3.5343, "step": 352 }, { "epoch": 0.06, "learning_rate": 0.0009981219264324093, "loss": 3.8004, "step": 353 }, { "epoch": 0.06, "learning_rate": 0.000998099245275668, "loss": 3.6435, "step": 354 }, { "epoch": 0.06, "learning_rate": 0.0009980764282424251, "loss": 3.5236, "step": 355 }, { "epoch": 0.06, "learning_rate": 0.000998053475338905, "loss": 3.7884, "step": 356 }, { "epoch": 0.06, "learning_rate": 0.0009980303865713693, "loss": 3.6316, "step": 357 }, { "epoch": 0.06, "learning_rate": 0.0009980071619461162, "loss": 3.6923, "step": 358 }, { "epoch": 0.06, "learning_rate": 0.0009979838014694813, "loss": 3.6906, "step": 359 }, { "epoch": 0.06, "learning_rate": 0.0009979603051478367, "loss": 3.7143, "step": 360 }, { "epoch": 0.06, "learning_rate": 0.0009979366729875924, "loss": 3.6242, "step": 361 }, { "epoch": 0.06, "learning_rate": 0.0009979129049951948, "loss": 3.6223, "step": 362 }, { "epoch": 0.06, "learning_rate": 0.0009978890011771275, "loss": 3.7303, "step": 363 }, { "epoch": 0.06, "learning_rate": 0.0009978649615399113, "loss": 3.7574, "step": 364 }, { "epoch": 0.06, "learning_rate": 0.0009978407860901043, "loss": 3.5757, "step": 365 }, { "epoch": 0.06, "learning_rate": 0.0009978164748343007, "loss": 3.5999, "step": 366 }, { "epoch": 0.06, "learning_rate": 0.000997792027779133, "loss": 3.6576, "step": 367 }, { "epoch": 0.06, "learning_rate": 0.0009977674449312692, "loss": 3.6334, "step": 368 }, { "epoch": 0.06, "learning_rate": 0.0009977427262974163, "loss": 3.5762, "step": 369 }, { "epoch": 0.06, "learning_rate": 0.0009977178718843169, "loss": 3.6162, "step": 370 }, { "epoch": 0.06, "learning_rate": 0.000997692881698751, "loss": 3.6593, "step": 371 }, { "epoch": 0.06, "learning_rate": 0.0009976677557475353, "loss": 3.623, "step": 372 }, { "epoch": 0.06, "learning_rate": 0.0009976424940375246, "loss": 3.7449, "step": 373 }, { "epoch": 0.06, "learning_rate": 0.0009976170965756098, "loss": 3.5766, "step": 374 }, { "epoch": 0.06, "learning_rate": 0.0009975915633687188, "loss": 3.4851, "step": 375 }, { "epoch": 0.06, "learning_rate": 0.0009975658944238169, "loss": 3.5713, "step": 376 }, { "epoch": 0.06, "learning_rate": 0.0009975400897479065, "loss": 3.6258, "step": 377 }, { "epoch": 0.06, "learning_rate": 0.000997514149348027, "loss": 3.6143, "step": 378 }, { "epoch": 0.06, "learning_rate": 0.0009974880732312541, "loss": 3.8064, "step": 379 }, { "epoch": 0.06, "learning_rate": 0.0009974618614047017, "loss": 3.7325, "step": 380 }, { "epoch": 0.06, "learning_rate": 0.0009974355138755197, "loss": 3.5524, "step": 381 }, { "epoch": 0.06, "learning_rate": 0.0009974090306508958, "loss": 3.674, "step": 382 }, { "epoch": 0.06, "learning_rate": 0.0009973824117380542, "loss": 3.5691, "step": 383 }, { "epoch": 0.06, "learning_rate": 0.0009973556571442564, "loss": 3.5528, "step": 384 }, { "epoch": 0.06, "learning_rate": 0.0009973287668768004, "loss": 3.7901, "step": 385 }, { "epoch": 0.06, "learning_rate": 0.000997301740943022, "loss": 3.4705, "step": 386 }, { "epoch": 0.06, "learning_rate": 0.0009972745793502933, "loss": 3.6123, "step": 387 }, { "epoch": 0.06, "learning_rate": 0.0009972472821060238, "loss": 3.6343, "step": 388 }, { "epoch": 0.06, "learning_rate": 0.0009972198492176603, "loss": 3.5437, "step": 389 }, { "epoch": 0.06, "learning_rate": 0.0009971922806926857, "loss": 3.6402, "step": 390 }, { "epoch": 0.06, "learning_rate": 0.0009971645765386207, "loss": 3.7182, "step": 391 }, { "epoch": 0.06, "learning_rate": 0.0009971367367630226, "loss": 3.5644, "step": 392 }, { "epoch": 0.06, "learning_rate": 0.0009971087613734858, "loss": 3.653, "step": 393 }, { "epoch": 0.06, "learning_rate": 0.000997080650377642, "loss": 3.6866, "step": 394 }, { "epoch": 0.06, "learning_rate": 0.000997052403783159, "loss": 3.6885, "step": 395 }, { "epoch": 0.06, "learning_rate": 0.0009970240215977427, "loss": 3.5455, "step": 396 }, { "epoch": 0.06, "learning_rate": 0.0009969955038291352, "loss": 3.6225, "step": 397 }, { "epoch": 0.06, "learning_rate": 0.000996966850485116, "loss": 3.6166, "step": 398 }, { "epoch": 0.06, "learning_rate": 0.0009969380615735017, "loss": 3.6286, "step": 399 }, { "epoch": 0.06, "learning_rate": 0.0009969091371021452, "loss": 3.5394, "step": 400 }, { "epoch": 0.06, "learning_rate": 0.000996880077078937, "loss": 3.6536, "step": 401 }, { "epoch": 0.06, "learning_rate": 0.0009968508815118042, "loss": 3.5967, "step": 402 }, { "epoch": 0.06, "learning_rate": 0.0009968215504087114, "loss": 3.5914, "step": 403 }, { "epoch": 0.07, "learning_rate": 0.0009967920837776595, "loss": 3.6483, "step": 404 }, { "epoch": 0.07, "learning_rate": 0.0009967624816266869, "loss": 3.7576, "step": 405 }, { "epoch": 0.07, "learning_rate": 0.0009967327439638688, "loss": 3.6956, "step": 406 }, { "epoch": 0.07, "learning_rate": 0.000996702870797317, "loss": 3.6324, "step": 407 }, { "epoch": 0.07, "learning_rate": 0.000996672862135181, "loss": 3.5627, "step": 408 }, { "epoch": 0.07, "learning_rate": 0.0009966427179856467, "loss": 3.8247, "step": 409 }, { "epoch": 0.07, "learning_rate": 0.0009966124383569371, "loss": 3.5794, "step": 410 }, { "epoch": 0.07, "learning_rate": 0.0009965820232573125, "loss": 3.7232, "step": 411 }, { "epoch": 0.07, "learning_rate": 0.000996551472695069, "loss": 3.6576, "step": 412 }, { "epoch": 0.07, "learning_rate": 0.0009965207866785414, "loss": 3.5436, "step": 413 }, { "epoch": 0.07, "learning_rate": 0.0009964899652161003, "loss": 3.51, "step": 414 }, { "epoch": 0.07, "learning_rate": 0.0009964590083161532, "loss": 3.6684, "step": 415 }, { "epoch": 0.07, "learning_rate": 0.0009964279159871449, "loss": 3.5939, "step": 416 }, { "epoch": 0.07, "learning_rate": 0.0009963966882375575, "loss": 3.7163, "step": 417 }, { "epoch": 0.07, "learning_rate": 0.000996365325075909, "loss": 3.6201, "step": 418 }, { "epoch": 0.07, "learning_rate": 0.0009963338265107553, "loss": 3.6707, "step": 419 }, { "epoch": 0.07, "learning_rate": 0.000996302192550689, "loss": 3.5358, "step": 420 }, { "epoch": 0.07, "learning_rate": 0.0009962704232043392, "loss": 3.6123, "step": 421 }, { "epoch": 0.07, "learning_rate": 0.0009962385184803726, "loss": 3.6365, "step": 422 }, { "epoch": 0.07, "learning_rate": 0.0009962064783874923, "loss": 3.6887, "step": 423 }, { "epoch": 0.07, "learning_rate": 0.0009961743029344382, "loss": 3.4337, "step": 424 }, { "epoch": 0.07, "learning_rate": 0.0009961419921299883, "loss": 3.4868, "step": 425 }, { "epoch": 0.07, "learning_rate": 0.000996109545982956, "loss": 3.6961, "step": 426 }, { "epoch": 0.07, "learning_rate": 0.0009960769645021923, "loss": 3.785, "step": 427 }, { "epoch": 0.07, "learning_rate": 0.0009960442476965852, "loss": 3.4476, "step": 428 }, { "epoch": 0.07, "learning_rate": 0.00099601139557506, "loss": 3.6732, "step": 429 }, { "epoch": 0.07, "learning_rate": 0.0009959784081465775, "loss": 3.5934, "step": 430 }, { "epoch": 0.07, "learning_rate": 0.000995945285420137, "loss": 3.5392, "step": 431 }, { "epoch": 0.07, "learning_rate": 0.0009959120274047738, "loss": 3.4776, "step": 432 }, { "epoch": 0.07, "learning_rate": 0.0009958786341095604, "loss": 3.7242, "step": 433 }, { "epoch": 0.07, "learning_rate": 0.0009958451055436064, "loss": 3.5389, "step": 434 }, { "epoch": 0.07, "learning_rate": 0.0009958114417160577, "loss": 3.5481, "step": 435 }, { "epoch": 0.07, "learning_rate": 0.0009957776426360975, "loss": 3.5544, "step": 436 }, { "epoch": 0.07, "learning_rate": 0.0009957437083129464, "loss": 3.6186, "step": 437 }, { "epoch": 0.07, "learning_rate": 0.0009957096387558605, "loss": 3.4458, "step": 438 }, { "epoch": 0.07, "learning_rate": 0.000995675433974134, "loss": 3.5658, "step": 439 }, { "epoch": 0.07, "learning_rate": 0.0009956410939770978, "loss": 3.5481, "step": 440 }, { "epoch": 0.07, "learning_rate": 0.0009956066187741194, "loss": 3.4711, "step": 441 }, { "epoch": 0.07, "learning_rate": 0.0009955720083746034, "loss": 3.5488, "step": 442 }, { "epoch": 0.07, "learning_rate": 0.000995537262787991, "loss": 3.5963, "step": 443 }, { "epoch": 0.07, "learning_rate": 0.0009955023820237602, "loss": 3.6598, "step": 444 }, { "epoch": 0.07, "learning_rate": 0.0009954673660914266, "loss": 3.6826, "step": 445 }, { "epoch": 0.07, "learning_rate": 0.000995432215000542, "loss": 3.5356, "step": 446 }, { "epoch": 0.07, "learning_rate": 0.000995396928760695, "loss": 3.6369, "step": 447 }, { "epoch": 0.07, "learning_rate": 0.000995361507381512, "loss": 3.7795, "step": 448 }, { "epoch": 0.07, "learning_rate": 0.000995325950872655, "loss": 3.5622, "step": 449 }, { "epoch": 0.07, "learning_rate": 0.0009952902592438237, "loss": 3.6422, "step": 450 }, { "epoch": 0.07, "learning_rate": 0.0009952544325047543, "loss": 3.6076, "step": 451 }, { "epoch": 0.07, "learning_rate": 0.00099521847066522, "loss": 3.7241, "step": 452 }, { "epoch": 0.07, "learning_rate": 0.0009951823737350313, "loss": 3.5244, "step": 453 }, { "epoch": 0.07, "learning_rate": 0.0009951461417240341, "loss": 3.5843, "step": 454 }, { "epoch": 0.07, "learning_rate": 0.0009951097746421131, "loss": 3.6061, "step": 455 }, { "epoch": 0.07, "learning_rate": 0.0009950732724991885, "loss": 3.6687, "step": 456 }, { "epoch": 0.07, "learning_rate": 0.0009950366353052174, "loss": 3.6813, "step": 457 }, { "epoch": 0.07, "learning_rate": 0.0009949998630701946, "loss": 3.4376, "step": 458 }, { "epoch": 0.07, "learning_rate": 0.000994962955804151, "loss": 3.6877, "step": 459 }, { "epoch": 0.07, "learning_rate": 0.0009949259135171543, "loss": 3.5971, "step": 460 }, { "epoch": 0.07, "learning_rate": 0.0009948887362193095, "loss": 3.5688, "step": 461 }, { "epoch": 0.07, "learning_rate": 0.000994851423920758, "loss": 3.5673, "step": 462 }, { "epoch": 0.07, "learning_rate": 0.0009948139766316785, "loss": 3.5814, "step": 463 }, { "epoch": 0.07, "learning_rate": 0.0009947763943622865, "loss": 3.6455, "step": 464 }, { "epoch": 0.07, "learning_rate": 0.0009947386771228333, "loss": 3.4994, "step": 465 }, { "epoch": 0.08, "learning_rate": 0.0009947008249236082, "loss": 3.5484, "step": 466 }, { "epoch": 0.08, "learning_rate": 0.000994662837774937, "loss": 3.4904, "step": 467 }, { "epoch": 0.08, "learning_rate": 0.000994624715687182, "loss": 3.6338, "step": 468 }, { "epoch": 0.08, "learning_rate": 0.0009945864586707427, "loss": 3.5473, "step": 469 }, { "epoch": 0.08, "learning_rate": 0.0009945480667360549, "loss": 3.6766, "step": 470 }, { "epoch": 0.08, "learning_rate": 0.000994509539893592, "loss": 3.5347, "step": 471 }, { "epoch": 0.08, "learning_rate": 0.0009944708781538635, "loss": 3.5936, "step": 472 }, { "epoch": 0.08, "learning_rate": 0.000994432081527416, "loss": 3.6759, "step": 473 }, { "epoch": 0.08, "learning_rate": 0.000994393150024833, "loss": 3.4666, "step": 474 }, { "epoch": 0.08, "learning_rate": 0.000994354083656734, "loss": 3.6506, "step": 475 }, { "epoch": 0.08, "learning_rate": 0.0009943148824337765, "loss": 3.5375, "step": 476 }, { "epoch": 0.08, "learning_rate": 0.0009942755463666542, "loss": 3.4339, "step": 477 }, { "epoch": 0.08, "learning_rate": 0.000994236075466097, "loss": 3.5631, "step": 478 }, { "epoch": 0.08, "learning_rate": 0.000994196469742873, "loss": 3.5491, "step": 479 }, { "epoch": 0.08, "learning_rate": 0.0009941567292077857, "loss": 3.5338, "step": 480 }, { "epoch": 0.08, "learning_rate": 0.000994116853871676, "loss": 3.6489, "step": 481 }, { "epoch": 0.08, "learning_rate": 0.0009940768437454218, "loss": 3.6265, "step": 482 }, { "epoch": 0.08, "learning_rate": 0.0009940366988399369, "loss": 3.487, "step": 483 }, { "epoch": 0.08, "learning_rate": 0.000993996419166173, "loss": 3.5466, "step": 484 }, { "epoch": 0.08, "learning_rate": 0.0009939560047351175, "loss": 3.6031, "step": 485 }, { "epoch": 0.08, "learning_rate": 0.0009939154555577954, "loss": 3.5594, "step": 486 }, { "epoch": 0.08, "learning_rate": 0.0009938747716452681, "loss": 3.6552, "step": 487 }, { "epoch": 0.08, "learning_rate": 0.0009938339530086336, "loss": 3.571, "step": 488 }, { "epoch": 0.08, "learning_rate": 0.0009937929996590266, "loss": 3.5924, "step": 489 }, { "epoch": 0.08, "learning_rate": 0.0009937519116076194, "loss": 3.7261, "step": 490 }, { "epoch": 0.08, "learning_rate": 0.00099371068886562, "loss": 3.5448, "step": 491 }, { "epoch": 0.08, "learning_rate": 0.0009936693314442737, "loss": 3.5326, "step": 492 }, { "epoch": 0.08, "learning_rate": 0.000993627839354862, "loss": 3.4735, "step": 493 }, { "epoch": 0.08, "learning_rate": 0.0009935862126087042, "loss": 3.5275, "step": 494 }, { "epoch": 0.08, "learning_rate": 0.000993544451217155, "loss": 3.7592, "step": 495 }, { "epoch": 0.08, "learning_rate": 0.0009935025551916067, "loss": 3.5127, "step": 496 }, { "epoch": 0.08, "learning_rate": 0.0009934605245434886, "loss": 3.6119, "step": 497 }, { "epoch": 0.08, "learning_rate": 0.0009934183592842656, "loss": 3.3895, "step": 498 }, { "epoch": 0.08, "learning_rate": 0.0009933760594254403, "loss": 3.493, "step": 499 }, { "epoch": 0.08, "learning_rate": 0.0009933336249785514, "loss": 3.6469, "step": 500 }, { "epoch": 0.08, "learning_rate": 0.0009932910559551748, "loss": 3.5954, "step": 501 }, { "epoch": 0.08, "learning_rate": 0.000993248352366923, "loss": 3.5558, "step": 502 }, { "epoch": 0.08, "learning_rate": 0.000993205514225445, "loss": 3.4001, "step": 503 }, { "epoch": 0.08, "learning_rate": 0.0009931625415424265, "loss": 3.6699, "step": 504 }, { "epoch": 0.08, "learning_rate": 0.00099311943432959, "loss": 3.4861, "step": 505 }, { "epoch": 0.08, "learning_rate": 0.0009930761925986953, "loss": 3.5784, "step": 506 }, { "epoch": 0.08, "learning_rate": 0.0009930328163615374, "loss": 3.6346, "step": 507 }, { "epoch": 0.08, "learning_rate": 0.0009929893056299496, "loss": 3.5611, "step": 508 }, { "epoch": 0.08, "learning_rate": 0.000992945660415801, "loss": 3.5395, "step": 509 }, { "epoch": 0.08, "learning_rate": 0.0009929018807309975, "loss": 3.6389, "step": 510 }, { "epoch": 0.08, "learning_rate": 0.0009928579665874817, "loss": 3.4937, "step": 511 }, { "epoch": 0.08, "learning_rate": 0.000992813917997233, "loss": 3.4859, "step": 512 }, { "epoch": 0.08, "learning_rate": 0.0009927697349722676, "loss": 3.4833, "step": 513 }, { "epoch": 0.08, "learning_rate": 0.000992725417524638, "loss": 3.5376, "step": 514 }, { "epoch": 0.08, "learning_rate": 0.0009926809656664334, "loss": 3.5175, "step": 515 }, { "epoch": 0.08, "learning_rate": 0.0009926363794097804, "loss": 3.6341, "step": 516 }, { "epoch": 0.08, "learning_rate": 0.000992591658766841, "loss": 3.5758, "step": 517 }, { "epoch": 0.08, "learning_rate": 0.000992546803749815, "loss": 3.5091, "step": 518 }, { "epoch": 0.08, "learning_rate": 0.000992501814370938, "loss": 3.4318, "step": 519 }, { "epoch": 0.08, "learning_rate": 0.0009924566906424832, "loss": 3.4936, "step": 520 }, { "epoch": 0.08, "learning_rate": 0.0009924114325767595, "loss": 3.5361, "step": 521 }, { "epoch": 0.08, "learning_rate": 0.000992366040186113, "loss": 3.6106, "step": 522 }, { "epoch": 0.08, "learning_rate": 0.000992320513482926, "loss": 3.4672, "step": 523 }, { "epoch": 0.08, "learning_rate": 0.0009922748524796184, "loss": 3.4872, "step": 524 }, { "epoch": 0.08, "learning_rate": 0.0009922290571886452, "loss": 3.6643, "step": 525 }, { "epoch": 0.08, "learning_rate": 0.0009921831276224996, "loss": 3.5878, "step": 526 }, { "epoch": 0.08, "learning_rate": 0.0009921370637937105, "loss": 3.5023, "step": 527 }, { "epoch": 0.09, "learning_rate": 0.0009920908657148435, "loss": 3.5572, "step": 528 }, { "epoch": 0.09, "learning_rate": 0.000992044533398501, "loss": 3.4962, "step": 529 }, { "epoch": 0.09, "learning_rate": 0.0009919980668573222, "loss": 3.5311, "step": 530 }, { "epoch": 0.09, "learning_rate": 0.0009919514661039827, "loss": 3.5491, "step": 531 }, { "epoch": 0.09, "learning_rate": 0.0009919047311511945, "loss": 3.5165, "step": 532 }, { "epoch": 0.09, "learning_rate": 0.0009918578620117067, "loss": 3.4012, "step": 533 }, { "epoch": 0.09, "learning_rate": 0.0009918108586983044, "loss": 3.3695, "step": 534 }, { "epoch": 0.09, "learning_rate": 0.00099176372122381, "loss": 3.5244, "step": 535 }, { "epoch": 0.09, "learning_rate": 0.0009917164496010818, "loss": 3.6515, "step": 536 }, { "epoch": 0.09, "learning_rate": 0.000991669043843015, "loss": 3.5018, "step": 537 }, { "epoch": 0.09, "learning_rate": 0.000991621503962542, "loss": 3.4892, "step": 538 }, { "epoch": 0.09, "learning_rate": 0.0009915738299726305, "loss": 3.6485, "step": 539 }, { "epoch": 0.09, "learning_rate": 0.000991526021886286, "loss": 3.5323, "step": 540 }, { "epoch": 0.09, "learning_rate": 0.0009914780797165496, "loss": 3.4927, "step": 541 }, { "epoch": 0.09, "learning_rate": 0.0009914300034764999, "loss": 3.4527, "step": 542 }, { "epoch": 0.09, "learning_rate": 0.0009913817931792512, "loss": 3.6255, "step": 543 }, { "epoch": 0.09, "learning_rate": 0.0009913334488379551, "loss": 3.605, "step": 544 }, { "epoch": 0.09, "learning_rate": 0.0009912849704657996, "loss": 3.6616, "step": 545 }, { "epoch": 0.09, "learning_rate": 0.0009912363580760085, "loss": 3.428, "step": 546 }, { "epoch": 0.09, "learning_rate": 0.0009911876116818433, "loss": 3.599, "step": 547 }, { "epoch": 0.09, "learning_rate": 0.0009911387312966013, "loss": 3.444, "step": 548 }, { "epoch": 0.09, "learning_rate": 0.0009910897169336168, "loss": 3.5013, "step": 549 }, { "epoch": 0.09, "learning_rate": 0.0009910405686062603, "loss": 3.5512, "step": 550 }, { "epoch": 0.09, "learning_rate": 0.0009909912863279389, "loss": 3.5392, "step": 551 }, { "epoch": 0.09, "learning_rate": 0.0009909418701120965, "loss": 3.4871, "step": 552 }, { "epoch": 0.09, "learning_rate": 0.0009908923199722132, "loss": 3.4046, "step": 553 }, { "epoch": 0.09, "learning_rate": 0.0009908426359218055, "loss": 3.5671, "step": 554 }, { "epoch": 0.09, "learning_rate": 0.0009907928179744272, "loss": 3.6767, "step": 555 }, { "epoch": 0.09, "learning_rate": 0.0009907428661436678, "loss": 3.5237, "step": 556 }, { "epoch": 0.09, "learning_rate": 0.0009906927804431542, "loss": 3.5909, "step": 557 }, { "epoch": 0.09, "learning_rate": 0.0009906425608865487, "loss": 3.585, "step": 558 }, { "epoch": 0.09, "learning_rate": 0.0009905922074875508, "loss": 3.4669, "step": 559 }, { "epoch": 0.09, "learning_rate": 0.0009905417202598968, "loss": 3.6374, "step": 560 }, { "epoch": 0.09, "learning_rate": 0.0009904910992173585, "loss": 3.5054, "step": 561 }, { "epoch": 0.09, "learning_rate": 0.0009904403443737452, "loss": 3.5301, "step": 562 }, { "epoch": 0.09, "learning_rate": 0.0009903894557429023, "loss": 3.4528, "step": 563 }, { "epoch": 0.09, "learning_rate": 0.0009903384333387116, "loss": 3.4843, "step": 564 }, { "epoch": 0.09, "learning_rate": 0.0009902872771750915, "loss": 3.4409, "step": 565 }, { "epoch": 0.09, "learning_rate": 0.0009902359872659972, "loss": 3.4456, "step": 566 }, { "epoch": 0.09, "learning_rate": 0.0009901845636254196, "loss": 3.7117, "step": 567 }, { "epoch": 0.09, "learning_rate": 0.0009901330062673868, "loss": 3.5793, "step": 568 }, { "epoch": 0.09, "learning_rate": 0.000990081315205963, "loss": 3.5781, "step": 569 }, { "epoch": 0.09, "learning_rate": 0.0009900294904552492, "loss": 3.5186, "step": 570 }, { "epoch": 0.09, "learning_rate": 0.0009899775320293827, "loss": 3.5342, "step": 571 }, { "epoch": 0.09, "learning_rate": 0.000989925439942537, "loss": 3.5156, "step": 572 }, { "epoch": 0.09, "learning_rate": 0.0009898732142089224, "loss": 3.5947, "step": 573 }, { "epoch": 0.09, "learning_rate": 0.0009898208548427855, "loss": 3.625, "step": 574 }, { "epoch": 0.09, "learning_rate": 0.0009897683618584098, "loss": 3.5469, "step": 575 }, { "epoch": 0.09, "learning_rate": 0.0009897157352701143, "loss": 3.502, "step": 576 }, { "epoch": 0.09, "learning_rate": 0.0009896629750922555, "loss": 3.5049, "step": 577 }, { "epoch": 0.09, "learning_rate": 0.0009896100813392254, "loss": 3.5742, "step": 578 }, { "epoch": 0.09, "learning_rate": 0.0009895570540254532, "loss": 3.5773, "step": 579 }, { "epoch": 0.09, "learning_rate": 0.0009895038931654041, "loss": 3.5103, "step": 580 }, { "epoch": 0.09, "learning_rate": 0.00098945059877358, "loss": 3.6207, "step": 581 }, { "epoch": 0.09, "learning_rate": 0.0009893971708645189, "loss": 3.5775, "step": 582 }, { "epoch": 0.09, "learning_rate": 0.0009893436094527953, "loss": 3.5351, "step": 583 }, { "epoch": 0.09, "learning_rate": 0.0009892899145530205, "loss": 3.5495, "step": 584 }, { "epoch": 0.09, "learning_rate": 0.0009892360861798418, "loss": 3.5029, "step": 585 }, { "epoch": 0.09, "learning_rate": 0.000989182124347943, "loss": 3.5773, "step": 586 }, { "epoch": 0.09, "learning_rate": 0.0009891280290720446, "loss": 3.6013, "step": 587 }, { "epoch": 0.09, "learning_rate": 0.0009890738003669028, "loss": 3.6577, "step": 588 }, { "epoch": 0.09, "learning_rate": 0.000989019438247311, "loss": 3.5552, "step": 589 }, { "epoch": 0.1, "learning_rate": 0.0009889649427280987, "loss": 3.4023, "step": 590 }, { "epoch": 0.1, "learning_rate": 0.0009889103138241314, "loss": 3.7719, "step": 591 }, { "epoch": 0.1, "learning_rate": 0.0009888555515503116, "loss": 3.6188, "step": 592 }, { "epoch": 0.1, "learning_rate": 0.0009888006559215777, "loss": 3.5023, "step": 593 }, { "epoch": 0.1, "learning_rate": 0.0009887456269529048, "loss": 3.4684, "step": 594 }, { "epoch": 0.1, "learning_rate": 0.000988690464659304, "loss": 3.5242, "step": 595 }, { "epoch": 0.1, "learning_rate": 0.0009886351690558237, "loss": 3.588, "step": 596 }, { "epoch": 0.1, "learning_rate": 0.0009885797401575473, "loss": 3.5065, "step": 597 }, { "epoch": 0.1, "learning_rate": 0.0009885241779795954, "loss": 3.6716, "step": 598 }, { "epoch": 0.1, "learning_rate": 0.000988468482537125, "loss": 3.5966, "step": 599 }, { "epoch": 0.1, "learning_rate": 0.0009884126538453294, "loss": 3.4471, "step": 600 }, { "epoch": 0.1, "learning_rate": 0.0009883566919194375, "loss": 3.5159, "step": 601 }, { "epoch": 0.1, "learning_rate": 0.0009883005967747157, "loss": 3.651, "step": 602 }, { "epoch": 0.1, "learning_rate": 0.000988244368426466, "loss": 3.5517, "step": 603 }, { "epoch": 0.1, "learning_rate": 0.000988188006890027, "loss": 3.4959, "step": 604 }, { "epoch": 0.1, "learning_rate": 0.0009881315121807734, "loss": 3.5669, "step": 605 }, { "epoch": 0.1, "learning_rate": 0.0009880748843141168, "loss": 3.6149, "step": 606 }, { "epoch": 0.1, "learning_rate": 0.000988018123305504, "loss": 3.5215, "step": 607 }, { "epoch": 0.1, "learning_rate": 0.0009879612291704197, "loss": 3.5427, "step": 608 }, { "epoch": 0.1, "learning_rate": 0.0009879042019243834, "loss": 3.4381, "step": 609 }, { "epoch": 0.1, "learning_rate": 0.0009878470415829517, "loss": 3.6386, "step": 610 }, { "epoch": 0.1, "learning_rate": 0.0009877897481617175, "loss": 3.4823, "step": 611 }, { "epoch": 0.1, "learning_rate": 0.0009877323216763098, "loss": 3.5773, "step": 612 }, { "epoch": 0.1, "learning_rate": 0.0009876747621423941, "loss": 3.4238, "step": 613 }, { "epoch": 0.1, "learning_rate": 0.000987617069575672, "loss": 3.5172, "step": 614 }, { "epoch": 0.1, "learning_rate": 0.0009875592439918812, "loss": 3.5516, "step": 615 }, { "epoch": 0.1, "learning_rate": 0.0009875012854067965, "loss": 3.4397, "step": 616 }, { "epoch": 0.1, "learning_rate": 0.0009874431938362276, "loss": 3.4421, "step": 617 }, { "epoch": 0.1, "learning_rate": 0.0009873849692960221, "loss": 3.7115, "step": 618 }, { "epoch": 0.1, "learning_rate": 0.0009873266118020627, "loss": 3.5163, "step": 619 }, { "epoch": 0.1, "learning_rate": 0.0009872681213702688, "loss": 3.4918, "step": 620 }, { "epoch": 0.1, "learning_rate": 0.000987209498016596, "loss": 3.4362, "step": 621 }, { "epoch": 0.1, "learning_rate": 0.0009871507417570362, "loss": 3.45, "step": 622 }, { "epoch": 0.1, "learning_rate": 0.0009870918526076176, "loss": 3.5738, "step": 623 }, { "epoch": 0.1, "learning_rate": 0.0009870328305844043, "loss": 3.4739, "step": 624 }, { "epoch": 0.1, "learning_rate": 0.0009869736757034972, "loss": 3.4735, "step": 625 }, { "epoch": 0.1, "learning_rate": 0.000986914387981033, "loss": 3.4475, "step": 626 }, { "epoch": 0.1, "learning_rate": 0.0009868549674331847, "loss": 3.4443, "step": 627 }, { "epoch": 0.1, "learning_rate": 0.000986795414076162, "loss": 3.4398, "step": 628 }, { "epoch": 0.1, "learning_rate": 0.00098673572792621, "loss": 3.5642, "step": 629 }, { "epoch": 0.1, "learning_rate": 0.0009866759089996108, "loss": 3.4707, "step": 630 }, { "epoch": 0.1, "learning_rate": 0.0009866159573126824, "loss": 3.4429, "step": 631 }, { "epoch": 0.1, "learning_rate": 0.0009865558728817789, "loss": 3.5282, "step": 632 }, { "epoch": 0.1, "learning_rate": 0.0009864956557232908, "loss": 3.5258, "step": 633 }, { "epoch": 0.1, "learning_rate": 0.0009864353058536446, "loss": 3.4504, "step": 634 }, { "epoch": 0.1, "learning_rate": 0.0009863748232893034, "loss": 3.532, "step": 635 }, { "epoch": 0.1, "learning_rate": 0.0009863142080467663, "loss": 3.5252, "step": 636 }, { "epoch": 0.1, "learning_rate": 0.000986253460142568, "loss": 3.3678, "step": 637 }, { "epoch": 0.1, "learning_rate": 0.0009861925795932804, "loss": 3.5199, "step": 638 }, { "epoch": 0.1, "learning_rate": 0.000986131566415511, "loss": 3.558, "step": 639 }, { "epoch": 0.1, "learning_rate": 0.0009860704206259034, "loss": 3.4935, "step": 640 }, { "epoch": 0.1, "learning_rate": 0.0009860091422411379, "loss": 3.5435, "step": 641 }, { "epoch": 0.1, "learning_rate": 0.0009859477312779302, "loss": 3.5333, "step": 642 }, { "epoch": 0.1, "learning_rate": 0.000985886187753033, "loss": 3.5841, "step": 643 }, { "epoch": 0.1, "learning_rate": 0.0009858245116832346, "loss": 3.5702, "step": 644 }, { "epoch": 0.1, "learning_rate": 0.0009857627030853595, "loss": 3.5011, "step": 645 }, { "epoch": 0.1, "learning_rate": 0.0009857007619762687, "loss": 3.5814, "step": 646 }, { "epoch": 0.1, "learning_rate": 0.000985638688372859, "loss": 3.4576, "step": 647 }, { "epoch": 0.1, "learning_rate": 0.0009855764822920634, "loss": 3.5555, "step": 648 }, { "epoch": 0.1, "learning_rate": 0.0009855141437508513, "loss": 3.4445, "step": 649 }, { "epoch": 0.1, "learning_rate": 0.0009854516727662277, "loss": 3.591, "step": 650 }, { "epoch": 0.1, "learning_rate": 0.0009853890693552344, "loss": 3.5724, "step": 651 }, { "epoch": 0.11, "learning_rate": 0.0009853263335349487, "loss": 3.4615, "step": 652 }, { "epoch": 0.11, "learning_rate": 0.0009852634653224846, "loss": 3.4789, "step": 653 }, { "epoch": 0.11, "learning_rate": 0.0009852004647349917, "loss": 3.4826, "step": 654 }, { "epoch": 0.11, "learning_rate": 0.000985137331789656, "loss": 3.5983, "step": 655 }, { "epoch": 0.11, "learning_rate": 0.0009850740665036993, "loss": 3.5091, "step": 656 }, { "epoch": 0.11, "learning_rate": 0.0009850106688943803, "loss": 3.5704, "step": 657 }, { "epoch": 0.11, "learning_rate": 0.0009849471389789927, "loss": 3.458, "step": 658 }, { "epoch": 0.11, "learning_rate": 0.0009848834767748672, "loss": 3.5689, "step": 659 }, { "epoch": 0.11, "learning_rate": 0.0009848196822993698, "loss": 3.4767, "step": 660 }, { "epoch": 0.11, "learning_rate": 0.0009847557555699034, "loss": 3.5393, "step": 661 }, { "epoch": 0.11, "learning_rate": 0.0009846916966039063, "loss": 3.5799, "step": 662 }, { "epoch": 0.11, "learning_rate": 0.0009846275054188533, "loss": 3.5126, "step": 663 }, { "epoch": 0.11, "learning_rate": 0.000984563182032255, "loss": 3.4894, "step": 664 }, { "epoch": 0.11, "learning_rate": 0.0009844987264616585, "loss": 3.5242, "step": 665 }, { "epoch": 0.11, "learning_rate": 0.0009844341387246463, "loss": 3.5015, "step": 666 }, { "epoch": 0.11, "learning_rate": 0.0009843694188388374, "loss": 3.4522, "step": 667 }, { "epoch": 0.11, "learning_rate": 0.0009843045668218866, "loss": 3.4047, "step": 668 }, { "epoch": 0.11, "learning_rate": 0.000984239582691485, "loss": 3.3793, "step": 669 }, { "epoch": 0.11, "learning_rate": 0.0009841744664653596, "loss": 3.6124, "step": 670 }, { "epoch": 0.11, "learning_rate": 0.0009841092181612737, "loss": 3.4762, "step": 671 }, { "epoch": 0.11, "learning_rate": 0.000984043837797026, "loss": 3.5768, "step": 672 }, { "epoch": 0.11, "learning_rate": 0.0009839783253904517, "loss": 3.4672, "step": 673 }, { "epoch": 0.11, "learning_rate": 0.000983912680959422, "loss": 3.5286, "step": 674 }, { "epoch": 0.11, "learning_rate": 0.0009838469045218442, "loss": 3.5863, "step": 675 }, { "epoch": 0.11, "learning_rate": 0.000983780996095661, "loss": 3.5107, "step": 676 }, { "epoch": 0.11, "learning_rate": 0.0009837149556988524, "loss": 3.6364, "step": 677 }, { "epoch": 0.11, "learning_rate": 0.0009836487833494328, "loss": 3.5758, "step": 678 }, { "epoch": 0.11, "learning_rate": 0.0009835824790654535, "loss": 3.4237, "step": 679 }, { "epoch": 0.11, "learning_rate": 0.0009835160428650017, "loss": 3.3763, "step": 680 }, { "epoch": 0.11, "learning_rate": 0.0009834494747662007, "loss": 3.6611, "step": 681 }, { "epoch": 0.11, "learning_rate": 0.0009833827747872093, "loss": 3.3836, "step": 682 }, { "epoch": 0.11, "learning_rate": 0.0009833159429462229, "loss": 3.3775, "step": 683 }, { "epoch": 0.11, "learning_rate": 0.0009832489792614722, "loss": 3.5003, "step": 684 }, { "epoch": 0.11, "learning_rate": 0.0009831818837512248, "loss": 3.5504, "step": 685 }, { "epoch": 0.11, "learning_rate": 0.000983114656433783, "loss": 3.4241, "step": 686 }, { "epoch": 0.11, "learning_rate": 0.0009830472973274861, "loss": 3.5479, "step": 687 }, { "epoch": 0.11, "learning_rate": 0.000982979806450709, "loss": 3.4593, "step": 688 }, { "epoch": 0.11, "learning_rate": 0.000982912183821863, "loss": 3.5261, "step": 689 }, { "epoch": 0.11, "learning_rate": 0.0009828444294593937, "loss": 3.4643, "step": 690 }, { "epoch": 0.11, "learning_rate": 0.0009827765433817848, "loss": 3.6277, "step": 691 }, { "epoch": 0.11, "learning_rate": 0.0009827085256075545, "loss": 3.6039, "step": 692 }, { "epoch": 0.11, "learning_rate": 0.0009826403761552579, "loss": 3.4462, "step": 693 }, { "epoch": 0.11, "learning_rate": 0.0009825720950434847, "loss": 3.6047, "step": 694 }, { "epoch": 0.11, "learning_rate": 0.0009825036822908616, "loss": 3.3855, "step": 695 }, { "epoch": 0.11, "learning_rate": 0.0009824351379160514, "loss": 3.5885, "step": 696 }, { "epoch": 0.11, "learning_rate": 0.0009823664619377516, "loss": 3.5475, "step": 697 }, { "epoch": 0.11, "learning_rate": 0.000982297654374697, "loss": 3.5979, "step": 698 }, { "epoch": 0.11, "learning_rate": 0.0009822287152456571, "loss": 3.5535, "step": 699 }, { "epoch": 0.11, "learning_rate": 0.000982159644569438, "loss": 3.5769, "step": 700 }, { "epoch": 0.11, "learning_rate": 0.0009820904423648816, "loss": 3.5396, "step": 701 }, { "epoch": 0.11, "learning_rate": 0.0009820211086508652, "loss": 3.427, "step": 702 }, { "epoch": 0.11, "learning_rate": 0.0009819516434463028, "loss": 3.4448, "step": 703 }, { "epoch": 0.11, "learning_rate": 0.0009818820467701434, "loss": 3.4626, "step": 704 }, { "epoch": 0.11, "learning_rate": 0.0009818123186413725, "loss": 3.3334, "step": 705 }, { "epoch": 0.11, "learning_rate": 0.0009817424590790114, "loss": 3.4315, "step": 706 }, { "epoch": 0.11, "learning_rate": 0.0009816724681021168, "loss": 3.5492, "step": 707 }, { "epoch": 0.11, "learning_rate": 0.0009816023457297819, "loss": 3.5175, "step": 708 }, { "epoch": 0.11, "learning_rate": 0.0009815320919811348, "loss": 3.4223, "step": 709 }, { "epoch": 0.11, "learning_rate": 0.0009814617068753405, "loss": 3.5122, "step": 710 }, { "epoch": 0.11, "learning_rate": 0.0009813911904315992, "loss": 3.5805, "step": 711 }, { "epoch": 0.11, "learning_rate": 0.000981320542669147, "loss": 3.5595, "step": 712 }, { "epoch": 0.11, "learning_rate": 0.0009812497636072562, "loss": 3.4128, "step": 713 }, { "epoch": 0.12, "learning_rate": 0.0009811788532652342, "loss": 3.4413, "step": 714 }, { "epoch": 0.12, "learning_rate": 0.000981107811662425, "loss": 3.54, "step": 715 }, { "epoch": 0.12, "learning_rate": 0.0009810366388182078, "loss": 3.6898, "step": 716 }, { "epoch": 0.12, "learning_rate": 0.000980965334751998, "loss": 3.5101, "step": 717 }, { "epoch": 0.12, "learning_rate": 0.0009808938994832464, "loss": 3.5096, "step": 718 }, { "epoch": 0.12, "learning_rate": 0.0009808223330314402, "loss": 3.4713, "step": 719 }, { "epoch": 0.12, "learning_rate": 0.0009807506354161017, "loss": 3.3966, "step": 720 }, { "epoch": 0.12, "learning_rate": 0.0009806788066567896, "loss": 3.4621, "step": 721 }, { "epoch": 0.12, "learning_rate": 0.0009806068467730976, "loss": 3.5022, "step": 722 }, { "epoch": 0.12, "learning_rate": 0.000980534755784656, "loss": 3.392, "step": 723 }, { "epoch": 0.12, "learning_rate": 0.0009804625337111305, "loss": 3.3015, "step": 724 }, { "epoch": 0.12, "learning_rate": 0.0009803901805722224, "loss": 3.5334, "step": 725 }, { "epoch": 0.12, "learning_rate": 0.0009803176963876692, "loss": 3.3377, "step": 726 }, { "epoch": 0.12, "learning_rate": 0.0009802450811772433, "loss": 3.5432, "step": 727 }, { "epoch": 0.12, "learning_rate": 0.0009801723349607539, "loss": 3.3393, "step": 728 }, { "epoch": 0.12, "learning_rate": 0.0009800994577580453, "loss": 3.4604, "step": 729 }, { "epoch": 0.12, "learning_rate": 0.0009800264495889977, "loss": 3.4806, "step": 730 }, { "epoch": 0.12, "learning_rate": 0.000979953310473527, "loss": 3.5224, "step": 731 }, { "epoch": 0.12, "learning_rate": 0.0009798800404315848, "loss": 3.347, "step": 732 }, { "epoch": 0.12, "learning_rate": 0.0009798066394831584, "loss": 3.4114, "step": 733 }, { "epoch": 0.12, "learning_rate": 0.000979733107648271, "loss": 3.4063, "step": 734 }, { "epoch": 0.12, "learning_rate": 0.0009796594449469809, "loss": 3.4683, "step": 735 }, { "epoch": 0.12, "learning_rate": 0.000979585651399383, "loss": 3.4805, "step": 736 }, { "epoch": 0.12, "learning_rate": 0.0009795117270256074, "loss": 3.3007, "step": 737 }, { "epoch": 0.12, "learning_rate": 0.0009794376718458197, "loss": 3.3117, "step": 738 }, { "epoch": 0.12, "learning_rate": 0.000979363485880222, "loss": 3.3804, "step": 739 }, { "epoch": 0.12, "learning_rate": 0.0009792891691490506, "loss": 3.4172, "step": 740 }, { "epoch": 0.12, "learning_rate": 0.0009792147216725789, "loss": 3.5357, "step": 741 }, { "epoch": 0.12, "learning_rate": 0.0009791401434711153, "loss": 3.4155, "step": 742 }, { "epoch": 0.12, "learning_rate": 0.000979065434565004, "loss": 3.483, "step": 743 }, { "epoch": 0.12, "learning_rate": 0.0009789905949746252, "loss": 3.4455, "step": 744 }, { "epoch": 0.12, "learning_rate": 0.0009789156247203938, "loss": 3.4546, "step": 745 }, { "epoch": 0.12, "learning_rate": 0.0009788405238227611, "loss": 3.5603, "step": 746 }, { "epoch": 0.12, "learning_rate": 0.000978765292302214, "loss": 3.435, "step": 747 }, { "epoch": 0.12, "learning_rate": 0.0009786899301792752, "loss": 3.4021, "step": 748 }, { "epoch": 0.12, "learning_rate": 0.0009786144374745022, "loss": 3.3085, "step": 749 }, { "epoch": 0.12, "learning_rate": 0.000978538814208489, "loss": 3.7027, "step": 750 }, { "epoch": 0.12, "learning_rate": 0.0009784630604018647, "loss": 3.4932, "step": 751 }, { "epoch": 0.12, "learning_rate": 0.000978387176075294, "loss": 3.5908, "step": 752 }, { "epoch": 0.12, "learning_rate": 0.000978311161249478, "loss": 3.3883, "step": 753 }, { "epoch": 0.12, "learning_rate": 0.0009782350159451521, "loss": 3.5029, "step": 754 }, { "epoch": 0.12, "learning_rate": 0.0009781587401830885, "loss": 3.4665, "step": 755 }, { "epoch": 0.12, "learning_rate": 0.000978082333984094, "loss": 3.483, "step": 756 }, { "epoch": 0.12, "learning_rate": 0.000978005797369012, "loss": 3.476, "step": 757 }, { "epoch": 0.12, "learning_rate": 0.0009779291303587203, "loss": 3.431, "step": 758 }, { "epoch": 0.12, "learning_rate": 0.0009778523329741335, "loss": 3.4099, "step": 759 }, { "epoch": 0.12, "learning_rate": 0.0009777754052362007, "loss": 3.534, "step": 760 }, { "epoch": 0.12, "learning_rate": 0.0009776983471659073, "loss": 3.4808, "step": 761 }, { "epoch": 0.12, "learning_rate": 0.0009776211587842738, "loss": 3.5587, "step": 762 }, { "epoch": 0.12, "learning_rate": 0.0009775438401123566, "loss": 3.4687, "step": 763 }, { "epoch": 0.12, "learning_rate": 0.000977466391171247, "loss": 3.4258, "step": 764 }, { "epoch": 0.12, "learning_rate": 0.000977388811982073, "loss": 3.4678, "step": 765 }, { "epoch": 0.12, "learning_rate": 0.0009773111025659972, "loss": 3.4692, "step": 766 }, { "epoch": 0.12, "learning_rate": 0.0009772332629442175, "loss": 3.4615, "step": 767 }, { "epoch": 0.12, "learning_rate": 0.000977155293137968, "loss": 3.4073, "step": 768 }, { "epoch": 0.12, "learning_rate": 0.0009770771931685185, "loss": 3.613, "step": 769 }, { "epoch": 0.12, "learning_rate": 0.0009769989630571737, "loss": 3.6737, "step": 770 }, { "epoch": 0.12, "learning_rate": 0.0009769206028252735, "loss": 3.5279, "step": 771 }, { "epoch": 0.12, "learning_rate": 0.0009768421124941944, "loss": 3.5014, "step": 772 }, { "epoch": 0.12, "learning_rate": 0.0009767634920853474, "loss": 3.4013, "step": 773 }, { "epoch": 0.12, "learning_rate": 0.0009766847416201796, "loss": 3.4528, "step": 774 }, { "epoch": 0.12, "learning_rate": 0.0009766058611201732, "loss": 3.395, "step": 775 }, { "epoch": 0.13, "learning_rate": 0.0009765268506068459, "loss": 3.4454, "step": 776 }, { "epoch": 0.13, "learning_rate": 0.0009764477101017514, "loss": 3.4507, "step": 777 }, { "epoch": 0.13, "learning_rate": 0.000976368439626478, "loss": 3.4283, "step": 778 }, { "epoch": 0.13, "learning_rate": 0.0009762890392026502, "loss": 3.4319, "step": 779 }, { "epoch": 0.13, "learning_rate": 0.0009762095088519274, "loss": 3.3616, "step": 780 }, { "epoch": 0.13, "learning_rate": 0.0009761298485960048, "loss": 3.3573, "step": 781 }, { "epoch": 0.13, "learning_rate": 0.000976050058456613, "loss": 3.3781, "step": 782 }, { "epoch": 0.13, "learning_rate": 0.0009759701384555177, "loss": 3.537, "step": 783 }, { "epoch": 0.13, "learning_rate": 0.0009758900886145205, "loss": 3.5197, "step": 784 }, { "epoch": 0.13, "learning_rate": 0.0009758099089554584, "loss": 3.5167, "step": 785 }, { "epoch": 0.13, "learning_rate": 0.0009757295995002031, "loss": 3.5496, "step": 786 }, { "epoch": 0.13, "learning_rate": 0.0009756491602706625, "loss": 3.4181, "step": 787 }, { "epoch": 0.13, "learning_rate": 0.0009755685912887798, "loss": 3.3405, "step": 788 }, { "epoch": 0.13, "learning_rate": 0.0009754878925765329, "loss": 3.3718, "step": 789 }, { "epoch": 0.13, "learning_rate": 0.0009754070641559362, "loss": 3.5102, "step": 790 }, { "epoch": 0.13, "learning_rate": 0.0009753261060490384, "loss": 3.4478, "step": 791 }, { "epoch": 0.13, "learning_rate": 0.0009752450182779243, "loss": 3.4663, "step": 792 }, { "epoch": 0.13, "learning_rate": 0.0009751638008647139, "loss": 3.4649, "step": 793 }, { "epoch": 0.13, "learning_rate": 0.0009750824538315623, "loss": 3.4826, "step": 794 }, { "epoch": 0.13, "learning_rate": 0.0009750009772006603, "loss": 3.6714, "step": 795 }, { "epoch": 0.13, "learning_rate": 0.0009749193709942339, "loss": 3.4362, "step": 796 }, { "epoch": 0.13, "learning_rate": 0.0009748376352345443, "loss": 3.5046, "step": 797 }, { "epoch": 0.13, "learning_rate": 0.0009747557699438884, "loss": 3.4076, "step": 798 }, { "epoch": 0.13, "learning_rate": 0.000974673775144598, "loss": 3.608, "step": 799 }, { "epoch": 0.13, "learning_rate": 0.0009745916508590408, "loss": 3.4245, "step": 800 }, { "epoch": 0.13, "learning_rate": 0.0009745093971096193, "loss": 3.362, "step": 801 }, { "epoch": 0.13, "learning_rate": 0.0009744270139187716, "loss": 3.5268, "step": 802 }, { "epoch": 0.13, "learning_rate": 0.0009743445013089707, "loss": 3.5477, "step": 803 }, { "epoch": 0.13, "learning_rate": 0.0009742618593027255, "loss": 3.5585, "step": 804 }, { "epoch": 0.13, "learning_rate": 0.00097417908792258, "loss": 3.5237, "step": 805 }, { "epoch": 0.13, "learning_rate": 0.000974096187191113, "loss": 3.4769, "step": 806 }, { "epoch": 0.13, "learning_rate": 0.0009740131571309394, "loss": 3.5084, "step": 807 }, { "epoch": 0.13, "learning_rate": 0.000973929997764709, "loss": 3.361, "step": 808 }, { "epoch": 0.13, "learning_rate": 0.0009738467091151063, "loss": 3.4713, "step": 809 }, { "epoch": 0.13, "learning_rate": 0.0009737632912048522, "loss": 3.3813, "step": 810 }, { "epoch": 0.13, "learning_rate": 0.000973679744056702, "loss": 3.5314, "step": 811 }, { "epoch": 0.13, "learning_rate": 0.0009735960676934465, "loss": 3.5343, "step": 812 }, { "epoch": 0.13, "learning_rate": 0.000973512262137912, "loss": 3.5633, "step": 813 }, { "epoch": 0.13, "learning_rate": 0.0009734283274129595, "loss": 3.4784, "step": 814 }, { "epoch": 0.13, "learning_rate": 0.0009733442635414858, "loss": 3.5808, "step": 815 }, { "epoch": 0.13, "learning_rate": 0.0009732600705464228, "loss": 3.4311, "step": 816 }, { "epoch": 0.13, "learning_rate": 0.0009731757484507371, "loss": 3.4009, "step": 817 }, { "epoch": 0.13, "learning_rate": 0.0009730912972774311, "loss": 3.4182, "step": 818 }, { "epoch": 0.13, "learning_rate": 0.0009730067170495425, "loss": 3.3137, "step": 819 }, { "epoch": 0.13, "learning_rate": 0.0009729220077901435, "loss": 3.4959, "step": 820 }, { "epoch": 0.13, "learning_rate": 0.0009728371695223424, "loss": 3.4695, "step": 821 }, { "epoch": 0.13, "learning_rate": 0.0009727522022692817, "loss": 3.5425, "step": 822 }, { "epoch": 0.13, "learning_rate": 0.0009726671060541401, "loss": 3.5181, "step": 823 }, { "epoch": 0.13, "learning_rate": 0.0009725818809001308, "loss": 3.4539, "step": 824 }, { "epoch": 0.13, "learning_rate": 0.0009724965268305024, "loss": 3.4355, "step": 825 }, { "epoch": 0.13, "learning_rate": 0.0009724110438685386, "loss": 3.4231, "step": 826 }, { "epoch": 0.13, "learning_rate": 0.0009723254320375583, "loss": 3.3532, "step": 827 }, { "epoch": 0.13, "learning_rate": 0.0009722396913609157, "loss": 3.5464, "step": 828 }, { "epoch": 0.13, "learning_rate": 0.0009721538218619997, "loss": 3.4366, "step": 829 }, { "epoch": 0.13, "learning_rate": 0.000972067823564235, "loss": 3.3732, "step": 830 }, { "epoch": 0.13, "learning_rate": 0.0009719816964910809, "loss": 3.3369, "step": 831 }, { "epoch": 0.13, "learning_rate": 0.0009718954406660317, "loss": 3.4348, "step": 832 }, { "epoch": 0.13, "learning_rate": 0.0009718090561126176, "loss": 3.4791, "step": 833 }, { "epoch": 0.13, "learning_rate": 0.0009717225428544034, "loss": 3.3915, "step": 834 }, { "epoch": 0.13, "learning_rate": 0.0009716359009149889, "loss": 3.5801, "step": 835 }, { "epoch": 0.13, "learning_rate": 0.000971549130318009, "loss": 3.2591, "step": 836 }, { "epoch": 0.13, "learning_rate": 0.0009714622310871342, "loss": 3.4877, "step": 837 }, { "epoch": 0.14, "learning_rate": 0.0009713752032460694, "loss": 3.5312, "step": 838 }, { "epoch": 0.14, "learning_rate": 0.0009712880468185552, "loss": 3.3617, "step": 839 }, { "epoch": 0.14, "learning_rate": 0.000971200761828367, "loss": 3.5226, "step": 840 }, { "epoch": 0.14, "learning_rate": 0.0009711133482993151, "loss": 3.3586, "step": 841 }, { "epoch": 0.14, "learning_rate": 0.0009710258062552451, "loss": 3.579, "step": 842 }, { "epoch": 0.14, "learning_rate": 0.0009709381357200376, "loss": 3.489, "step": 843 }, { "epoch": 0.14, "learning_rate": 0.0009708503367176083, "loss": 3.4277, "step": 844 }, { "epoch": 0.14, "learning_rate": 0.0009707624092719078, "loss": 3.3935, "step": 845 }, { "epoch": 0.14, "learning_rate": 0.000970674353406922, "loss": 3.5132, "step": 846 }, { "epoch": 0.14, "learning_rate": 0.0009705861691466716, "loss": 3.3905, "step": 847 }, { "epoch": 0.14, "learning_rate": 0.000970497856515212, "loss": 3.4973, "step": 848 }, { "epoch": 0.14, "learning_rate": 0.0009704094155366345, "loss": 3.4043, "step": 849 }, { "epoch": 0.14, "learning_rate": 0.0009703208462350649, "loss": 3.4351, "step": 850 }, { "epoch": 0.14, "learning_rate": 0.0009702321486346637, "loss": 3.5083, "step": 851 }, { "epoch": 0.14, "learning_rate": 0.000970143322759627, "loss": 3.4914, "step": 852 }, { "epoch": 0.14, "learning_rate": 0.0009700543686341854, "loss": 3.5055, "step": 853 }, { "epoch": 0.14, "learning_rate": 0.0009699652862826049, "loss": 3.3615, "step": 854 }, { "epoch": 0.14, "learning_rate": 0.000969876075729186, "loss": 3.4662, "step": 855 }, { "epoch": 0.14, "learning_rate": 0.0009697867369982647, "loss": 3.4596, "step": 856 }, { "epoch": 0.14, "learning_rate": 0.0009696972701142118, "loss": 3.4821, "step": 857 }, { "epoch": 0.14, "learning_rate": 0.0009696076751014327, "loss": 3.4982, "step": 858 }, { "epoch": 0.14, "learning_rate": 0.000969517951984368, "loss": 3.4358, "step": 859 }, { "epoch": 0.14, "learning_rate": 0.0009694281007874935, "loss": 3.4106, "step": 860 }, { "epoch": 0.14, "learning_rate": 0.0009693381215353196, "loss": 3.4424, "step": 861 }, { "epoch": 0.14, "learning_rate": 0.0009692480142523918, "loss": 3.5152, "step": 862 }, { "epoch": 0.14, "learning_rate": 0.0009691577789632904, "loss": 3.4181, "step": 863 }, { "epoch": 0.14, "learning_rate": 0.0009690674156926308, "loss": 3.4328, "step": 864 }, { "epoch": 0.14, "learning_rate": 0.0009689769244650631, "loss": 3.4373, "step": 865 }, { "epoch": 0.14, "learning_rate": 0.0009688863053052722, "loss": 3.4083, "step": 866 }, { "epoch": 0.14, "learning_rate": 0.0009687955582379785, "loss": 3.5364, "step": 867 }, { "epoch": 0.14, "learning_rate": 0.0009687046832879367, "loss": 3.4056, "step": 868 }, { "epoch": 0.14, "learning_rate": 0.0009686136804799365, "loss": 3.5085, "step": 869 }, { "epoch": 0.14, "learning_rate": 0.0009685225498388026, "loss": 3.4764, "step": 870 }, { "epoch": 0.14, "learning_rate": 0.0009684312913893947, "loss": 3.3951, "step": 871 }, { "epoch": 0.14, "learning_rate": 0.000968339905156607, "loss": 3.5044, "step": 872 }, { "epoch": 0.14, "learning_rate": 0.000968248391165369, "loss": 3.3641, "step": 873 }, { "epoch": 0.14, "learning_rate": 0.0009681567494406445, "loss": 3.4339, "step": 874 }, { "epoch": 0.14, "learning_rate": 0.0009680649800074323, "loss": 3.5734, "step": 875 }, { "epoch": 0.14, "learning_rate": 0.0009679730828907669, "loss": 3.5639, "step": 876 }, { "epoch": 0.14, "learning_rate": 0.0009678810581157162, "loss": 3.4469, "step": 877 }, { "epoch": 0.14, "learning_rate": 0.000967788905707384, "loss": 3.559, "step": 878 }, { "epoch": 0.14, "learning_rate": 0.0009676966256909085, "loss": 3.5335, "step": 879 }, { "epoch": 0.14, "learning_rate": 0.0009676042180914626, "loss": 3.5049, "step": 880 }, { "epoch": 0.14, "learning_rate": 0.0009675116829342543, "loss": 3.4148, "step": 881 }, { "epoch": 0.14, "learning_rate": 0.0009674190202445264, "loss": 3.2746, "step": 882 }, { "epoch": 0.14, "learning_rate": 0.0009673262300475562, "loss": 3.4149, "step": 883 }, { "epoch": 0.14, "learning_rate": 0.000967233312368656, "loss": 3.4076, "step": 884 }, { "epoch": 0.14, "learning_rate": 0.0009671402672331727, "loss": 3.3937, "step": 885 }, { "epoch": 0.14, "learning_rate": 0.0009670470946664883, "loss": 3.4546, "step": 886 }, { "epoch": 0.14, "learning_rate": 0.000966953794694019, "loss": 3.5755, "step": 887 }, { "epoch": 0.14, "learning_rate": 0.0009668603673412164, "loss": 3.3033, "step": 888 }, { "epoch": 0.14, "learning_rate": 0.0009667668126335665, "loss": 3.4493, "step": 889 }, { "epoch": 0.14, "learning_rate": 0.0009666731305965901, "loss": 3.4355, "step": 890 }, { "epoch": 0.14, "learning_rate": 0.0009665793212558427, "loss": 3.4193, "step": 891 }, { "epoch": 0.14, "learning_rate": 0.0009664853846369143, "loss": 3.4251, "step": 892 }, { "epoch": 0.14, "learning_rate": 0.0009663913207654303, "loss": 3.4735, "step": 893 }, { "epoch": 0.14, "learning_rate": 0.0009662971296670502, "loss": 3.5888, "step": 894 }, { "epoch": 0.14, "learning_rate": 0.0009662028113674683, "loss": 3.2766, "step": 895 }, { "epoch": 0.14, "learning_rate": 0.0009661083658924139, "loss": 3.4096, "step": 896 }, { "epoch": 0.14, "learning_rate": 0.0009660137932676505, "loss": 3.4307, "step": 897 }, { "epoch": 0.14, "learning_rate": 0.0009659190935189768, "loss": 3.3639, "step": 898 }, { "epoch": 0.14, "learning_rate": 0.0009658242666722258, "loss": 3.4123, "step": 899 }, { "epoch": 0.15, "learning_rate": 0.0009657293127532654, "loss": 3.3955, "step": 900 }, { "epoch": 0.15, "learning_rate": 0.000965634231787998, "loss": 3.3262, "step": 901 }, { "epoch": 0.15, "learning_rate": 0.0009655390238023609, "loss": 3.3778, "step": 902 }, { "epoch": 0.15, "learning_rate": 0.000965443688822326, "loss": 3.5448, "step": 903 }, { "epoch": 0.15, "learning_rate": 0.0009653482268738991, "loss": 3.5188, "step": 904 }, { "epoch": 0.15, "learning_rate": 0.000965252637983122, "loss": 3.3605, "step": 905 }, { "epoch": 0.15, "learning_rate": 0.0009651569221760699, "loss": 3.4631, "step": 906 }, { "epoch": 0.15, "learning_rate": 0.0009650610794788533, "loss": 3.3609, "step": 907 }, { "epoch": 0.15, "learning_rate": 0.0009649651099176172, "loss": 3.2997, "step": 908 }, { "epoch": 0.15, "learning_rate": 0.000964869013518541, "loss": 3.5119, "step": 909 }, { "epoch": 0.15, "learning_rate": 0.000964772790307839, "loss": 3.443, "step": 910 }, { "epoch": 0.15, "learning_rate": 0.0009646764403117598, "loss": 3.4665, "step": 911 }, { "epoch": 0.15, "learning_rate": 0.0009645799635565869, "loss": 3.4322, "step": 912 }, { "epoch": 0.15, "learning_rate": 0.0009644833600686378, "loss": 3.3975, "step": 913 }, { "epoch": 0.15, "learning_rate": 0.0009643866298742654, "loss": 3.3586, "step": 914 }, { "epoch": 0.15, "learning_rate": 0.0009642897729998563, "loss": 3.4003, "step": 915 }, { "epoch": 0.15, "learning_rate": 0.0009641927894718325, "loss": 3.4892, "step": 916 }, { "epoch": 0.15, "learning_rate": 0.0009640956793166501, "loss": 3.5207, "step": 917 }, { "epoch": 0.15, "learning_rate": 0.0009639984425607995, "loss": 3.4757, "step": 918 }, { "epoch": 0.15, "learning_rate": 0.000963901079230806, "loss": 3.5099, "step": 919 }, { "epoch": 0.15, "learning_rate": 0.0009638035893532297, "loss": 3.4433, "step": 920 }, { "epoch": 0.15, "learning_rate": 0.0009637059729546645, "loss": 3.4504, "step": 921 }, { "epoch": 0.15, "learning_rate": 0.000963608230061739, "loss": 3.4371, "step": 922 }, { "epoch": 0.15, "learning_rate": 0.000963510360701117, "loss": 3.4395, "step": 923 }, { "epoch": 0.15, "learning_rate": 0.0009634123648994959, "loss": 3.4227, "step": 924 }, { "epoch": 0.15, "learning_rate": 0.0009633142426836081, "loss": 3.4331, "step": 925 }, { "epoch": 0.15, "learning_rate": 0.0009632159940802205, "loss": 3.4253, "step": 926 }, { "epoch": 0.15, "learning_rate": 0.0009631176191161341, "loss": 3.5397, "step": 927 }, { "epoch": 0.15, "learning_rate": 0.0009630191178181848, "loss": 3.4202, "step": 928 }, { "epoch": 0.15, "learning_rate": 0.0009629204902132425, "loss": 3.4039, "step": 929 }, { "epoch": 0.15, "learning_rate": 0.0009628217363282123, "loss": 3.3886, "step": 930 }, { "epoch": 0.15, "learning_rate": 0.0009627228561900328, "loss": 3.4761, "step": 931 }, { "epoch": 0.15, "learning_rate": 0.0009626238498256776, "loss": 3.4573, "step": 932 }, { "epoch": 0.15, "learning_rate": 0.0009625247172621549, "loss": 3.4066, "step": 933 }, { "epoch": 0.15, "learning_rate": 0.0009624254585265068, "loss": 3.4431, "step": 934 }, { "epoch": 0.15, "learning_rate": 0.0009623260736458104, "loss": 3.3837, "step": 935 }, { "epoch": 0.15, "learning_rate": 0.0009622265626471766, "loss": 3.3437, "step": 936 }, { "epoch": 0.15, "learning_rate": 0.0009621269255577512, "loss": 3.462, "step": 937 }, { "epoch": 0.15, "learning_rate": 0.0009620271624047141, "loss": 3.589, "step": 938 }, { "epoch": 0.15, "learning_rate": 0.0009619272732152796, "loss": 3.4145, "step": 939 }, { "epoch": 0.15, "learning_rate": 0.0009618272580166967, "loss": 3.4257, "step": 940 }, { "epoch": 0.15, "learning_rate": 0.0009617271168362485, "loss": 3.5116, "step": 941 }, { "epoch": 0.15, "learning_rate": 0.0009616268497012523, "loss": 3.4112, "step": 942 }, { "epoch": 0.15, "learning_rate": 0.0009615264566390602, "loss": 3.2934, "step": 943 }, { "epoch": 0.15, "learning_rate": 0.0009614259376770584, "loss": 3.4571, "step": 944 }, { "epoch": 0.15, "learning_rate": 0.0009613252928426674, "loss": 3.3936, "step": 945 }, { "epoch": 0.15, "learning_rate": 0.000961224522163342, "loss": 3.4374, "step": 946 }, { "epoch": 0.15, "learning_rate": 0.0009611236256665718, "loss": 3.5991, "step": 947 }, { "epoch": 0.15, "learning_rate": 0.0009610226033798799, "loss": 3.5437, "step": 948 }, { "epoch": 0.15, "learning_rate": 0.0009609214553308246, "loss": 3.305, "step": 949 }, { "epoch": 0.15, "learning_rate": 0.0009608201815469977, "loss": 3.2431, "step": 950 }, { "epoch": 0.15, "learning_rate": 0.0009607187820560258, "loss": 3.3745, "step": 951 }, { "epoch": 0.15, "learning_rate": 0.0009606172568855698, "loss": 3.52, "step": 952 }, { "epoch": 0.15, "learning_rate": 0.0009605156060633246, "loss": 3.3794, "step": 953 }, { "epoch": 0.15, "learning_rate": 0.0009604138296170197, "loss": 3.5553, "step": 954 }, { "epoch": 0.15, "learning_rate": 0.0009603119275744188, "loss": 3.3365, "step": 955 }, { "epoch": 0.15, "learning_rate": 0.0009602098999633192, "loss": 3.2153, "step": 956 }, { "epoch": 0.15, "learning_rate": 0.0009601077468115535, "loss": 3.3288, "step": 957 }, { "epoch": 0.15, "learning_rate": 0.000960005468146988, "loss": 3.4794, "step": 958 }, { "epoch": 0.15, "learning_rate": 0.000959903063997523, "loss": 3.4947, "step": 959 }, { "epoch": 0.15, "learning_rate": 0.0009598005343910938, "loss": 3.4697, "step": 960 }, { "epoch": 0.15, "learning_rate": 0.0009596978793556693, "loss": 3.5168, "step": 961 }, { "epoch": 0.16, "learning_rate": 0.0009595950989192524, "loss": 3.3613, "step": 962 }, { "epoch": 0.16, "learning_rate": 0.000959492193109881, "loss": 3.476, "step": 963 }, { "epoch": 0.16, "learning_rate": 0.0009593891619556265, "loss": 3.4309, "step": 964 }, { "epoch": 0.16, "learning_rate": 0.000959286005484595, "loss": 3.1764, "step": 965 }, { "epoch": 0.16, "learning_rate": 0.0009591827237249264, "loss": 3.3749, "step": 966 }, { "epoch": 0.16, "learning_rate": 0.0009590793167047951, "loss": 3.4264, "step": 967 }, { "epoch": 0.16, "learning_rate": 0.0009589757844524094, "loss": 3.5059, "step": 968 }, { "epoch": 0.16, "learning_rate": 0.0009588721269960118, "loss": 3.3735, "step": 969 }, { "epoch": 0.16, "learning_rate": 0.000958768344363879, "loss": 3.3721, "step": 970 }, { "epoch": 0.16, "learning_rate": 0.0009586644365843221, "loss": 3.4304, "step": 971 }, { "epoch": 0.16, "learning_rate": 0.0009585604036856859, "loss": 3.2022, "step": 972 }, { "epoch": 0.16, "learning_rate": 0.0009584562456963496, "loss": 3.3411, "step": 973 }, { "epoch": 0.16, "learning_rate": 0.0009583519626447265, "loss": 3.5984, "step": 974 }, { "epoch": 0.16, "learning_rate": 0.0009582475545592637, "loss": 3.4496, "step": 975 }, { "epoch": 0.16, "learning_rate": 0.0009581430214684431, "loss": 3.4124, "step": 976 }, { "epoch": 0.16, "learning_rate": 0.0009580383634007801, "loss": 3.3729, "step": 977 }, { "epoch": 0.16, "learning_rate": 0.0009579335803848244, "loss": 3.4016, "step": 978 }, { "epoch": 0.16, "learning_rate": 0.0009578286724491595, "loss": 3.4017, "step": 979 }, { "epoch": 0.16, "learning_rate": 0.0009577236396224036, "loss": 3.3784, "step": 980 }, { "epoch": 0.16, "learning_rate": 0.0009576184819332085, "loss": 3.5411, "step": 981 }, { "epoch": 0.16, "learning_rate": 0.0009575131994102601, "loss": 3.3887, "step": 982 }, { "epoch": 0.16, "learning_rate": 0.0009574077920822784, "loss": 3.2414, "step": 983 }, { "epoch": 0.16, "learning_rate": 0.0009573022599780177, "loss": 3.4576, "step": 984 }, { "epoch": 0.16, "learning_rate": 0.0009571966031262657, "loss": 3.4938, "step": 985 }, { "epoch": 0.16, "learning_rate": 0.0009570908215558449, "loss": 3.483, "step": 986 }, { "epoch": 0.16, "learning_rate": 0.0009569849152956114, "loss": 3.3474, "step": 987 }, { "epoch": 0.16, "learning_rate": 0.0009568788843744552, "loss": 3.3386, "step": 988 }, { "epoch": 0.16, "learning_rate": 0.0009567727288213005, "loss": 3.4209, "step": 989 }, { "epoch": 0.16, "learning_rate": 0.0009566664486651055, "loss": 3.6608, "step": 990 }, { "epoch": 0.16, "learning_rate": 0.0009565600439348625, "loss": 3.5509, "step": 991 }, { "epoch": 0.16, "learning_rate": 0.0009564535146595976, "loss": 3.2398, "step": 992 }, { "epoch": 0.16, "learning_rate": 0.0009563468608683709, "loss": 3.3412, "step": 993 }, { "epoch": 0.16, "learning_rate": 0.0009562400825902764, "loss": 3.4999, "step": 994 }, { "epoch": 0.16, "learning_rate": 0.0009561331798544422, "loss": 3.5068, "step": 995 }, { "epoch": 0.16, "learning_rate": 0.0009560261526900303, "loss": 3.4056, "step": 996 }, { "epoch": 0.16, "learning_rate": 0.0009559190011262369, "loss": 3.4596, "step": 997 }, { "epoch": 0.16, "learning_rate": 0.0009558117251922913, "loss": 3.283, "step": 998 }, { "epoch": 0.16, "learning_rate": 0.0009557043249174578, "loss": 3.4376, "step": 999 }, { "epoch": 0.16, "learning_rate": 0.000955596800331034, "loss": 3.3436, "step": 1000 }, { "epoch": 0.16, "learning_rate": 0.0009554891514623514, "loss": 3.362, "step": 1001 }, { "epoch": 0.16, "learning_rate": 0.0009553813783407756, "loss": 3.5383, "step": 1002 }, { "epoch": 0.16, "learning_rate": 0.0009552734809957062, "loss": 3.4304, "step": 1003 }, { "epoch": 0.16, "learning_rate": 0.0009551654594565763, "loss": 3.378, "step": 1004 }, { "epoch": 0.16, "learning_rate": 0.0009550573137528532, "loss": 3.2956, "step": 1005 }, { "epoch": 0.16, "learning_rate": 0.000954949043914038, "loss": 3.4008, "step": 1006 }, { "epoch": 0.16, "learning_rate": 0.0009548406499696653, "loss": 3.1183, "step": 1007 }, { "epoch": 0.16, "learning_rate": 0.0009547321319493045, "loss": 3.4021, "step": 1008 }, { "epoch": 0.16, "learning_rate": 0.0009546234898825578, "loss": 3.4357, "step": 1009 }, { "epoch": 0.16, "learning_rate": 0.0009545147237990617, "loss": 3.4669, "step": 1010 }, { "epoch": 0.16, "learning_rate": 0.0009544058337284867, "loss": 3.4969, "step": 1011 }, { "epoch": 0.16, "learning_rate": 0.0009542968197005366, "loss": 3.3618, "step": 1012 }, { "epoch": 0.16, "learning_rate": 0.0009541876817449498, "loss": 3.481, "step": 1013 }, { "epoch": 0.16, "learning_rate": 0.0009540784198914976, "loss": 3.3194, "step": 1014 }, { "epoch": 0.16, "learning_rate": 0.0009539690341699857, "loss": 3.5061, "step": 1015 }, { "epoch": 0.16, "learning_rate": 0.0009538595246102535, "loss": 3.3064, "step": 1016 }, { "epoch": 0.16, "learning_rate": 0.0009537498912421741, "loss": 3.4014, "step": 1017 }, { "epoch": 0.16, "learning_rate": 0.0009536401340956542, "loss": 3.5274, "step": 1018 }, { "epoch": 0.16, "learning_rate": 0.0009535302532006348, "loss": 3.4335, "step": 1019 }, { "epoch": 0.16, "learning_rate": 0.00095342024858709, "loss": 3.3156, "step": 1020 }, { "epoch": 0.16, "learning_rate": 0.0009533101202850282, "loss": 3.3732, "step": 1021 }, { "epoch": 0.16, "learning_rate": 0.0009531998683244911, "loss": 3.3095, "step": 1022 }, { "epoch": 0.16, "learning_rate": 0.0009530894927355545, "loss": 3.4049, "step": 1023 }, { "epoch": 0.17, "learning_rate": 0.0009529789935483275, "loss": 3.533, "step": 1024 }, { "epoch": 0.17, "learning_rate": 0.0009528683707929535, "loss": 3.4015, "step": 1025 }, { "epoch": 0.17, "learning_rate": 0.000952757624499609, "loss": 3.561, "step": 1026 }, { "epoch": 0.17, "learning_rate": 0.0009526467546985048, "loss": 3.392, "step": 1027 }, { "epoch": 0.17, "learning_rate": 0.0009525357614198848, "loss": 3.357, "step": 1028 }, { "epoch": 0.17, "learning_rate": 0.000952424644694027, "loss": 3.5334, "step": 1029 }, { "epoch": 0.17, "learning_rate": 0.0009523134045512429, "loss": 3.3527, "step": 1030 }, { "epoch": 0.17, "learning_rate": 0.0009522020410218775, "loss": 3.3857, "step": 1031 }, { "epoch": 0.17, "learning_rate": 0.0009520905541363099, "loss": 3.3311, "step": 1032 }, { "epoch": 0.17, "learning_rate": 0.0009519789439249527, "loss": 3.4424, "step": 1033 }, { "epoch": 0.17, "learning_rate": 0.0009518672104182517, "loss": 3.4414, "step": 1034 }, { "epoch": 0.17, "learning_rate": 0.0009517553536466872, "loss": 3.5108, "step": 1035 }, { "epoch": 0.17, "learning_rate": 0.000951643373640772, "loss": 3.3373, "step": 1036 }, { "epoch": 0.17, "learning_rate": 0.0009515312704310535, "loss": 3.4415, "step": 1037 }, { "epoch": 0.17, "learning_rate": 0.0009514190440481123, "loss": 3.397, "step": 1038 }, { "epoch": 0.17, "learning_rate": 0.0009513066945225626, "loss": 3.3632, "step": 1039 }, { "epoch": 0.17, "learning_rate": 0.0009511942218850523, "loss": 3.4813, "step": 1040 }, { "epoch": 0.17, "learning_rate": 0.0009510816261662627, "loss": 3.3691, "step": 1041 }, { "epoch": 0.17, "learning_rate": 0.0009509689073969088, "loss": 3.296, "step": 1042 }, { "epoch": 0.17, "learning_rate": 0.0009508560656077392, "loss": 3.394, "step": 1043 }, { "epoch": 0.17, "learning_rate": 0.000950743100829536, "loss": 3.426, "step": 1044 }, { "epoch": 0.17, "learning_rate": 0.0009506300130931149, "loss": 3.5267, "step": 1045 }, { "epoch": 0.17, "learning_rate": 0.0009505168024293249, "loss": 3.4061, "step": 1046 }, { "epoch": 0.17, "learning_rate": 0.000950403468869049, "loss": 3.4177, "step": 1047 }, { "epoch": 0.17, "learning_rate": 0.0009502900124432035, "loss": 3.383, "step": 1048 }, { "epoch": 0.17, "learning_rate": 0.0009501764331827378, "loss": 3.3181, "step": 1049 }, { "epoch": 0.17, "learning_rate": 0.0009500627311186356, "loss": 3.3809, "step": 1050 }, { "epoch": 0.17, "learning_rate": 0.0009499489062819136, "loss": 3.4403, "step": 1051 }, { "epoch": 0.17, "learning_rate": 0.0009498349587036219, "loss": 3.4243, "step": 1052 }, { "epoch": 0.17, "learning_rate": 0.0009497208884148444, "loss": 3.3877, "step": 1053 }, { "epoch": 0.17, "learning_rate": 0.0009496066954466983, "loss": 3.4378, "step": 1054 }, { "epoch": 0.17, "learning_rate": 0.0009494923798303343, "loss": 3.4132, "step": 1055 }, { "epoch": 0.17, "learning_rate": 0.0009493779415969365, "loss": 3.4078, "step": 1056 }, { "epoch": 0.17, "learning_rate": 0.0009492633807777227, "loss": 3.5369, "step": 1057 }, { "epoch": 0.17, "learning_rate": 0.0009491486974039438, "loss": 3.4259, "step": 1058 }, { "epoch": 0.17, "learning_rate": 0.0009490338915068841, "loss": 3.5392, "step": 1059 }, { "epoch": 0.17, "learning_rate": 0.0009489189631178618, "loss": 3.4965, "step": 1060 }, { "epoch": 0.17, "learning_rate": 0.0009488039122682283, "loss": 3.4576, "step": 1061 }, { "epoch": 0.17, "learning_rate": 0.0009486887389893679, "loss": 3.3752, "step": 1062 }, { "epoch": 0.17, "learning_rate": 0.0009485734433126991, "loss": 3.4839, "step": 1063 }, { "epoch": 0.17, "learning_rate": 0.0009484580252696733, "loss": 3.4843, "step": 1064 }, { "epoch": 0.17, "learning_rate": 0.0009483424848917753, "loss": 3.3902, "step": 1065 }, { "epoch": 0.17, "learning_rate": 0.0009482268222105234, "loss": 3.3114, "step": 1066 }, { "epoch": 0.17, "learning_rate": 0.0009481110372574693, "loss": 3.4135, "step": 1067 }, { "epoch": 0.17, "learning_rate": 0.000947995130064198, "loss": 3.4888, "step": 1068 }, { "epoch": 0.17, "learning_rate": 0.0009478791006623276, "loss": 3.4122, "step": 1069 }, { "epoch": 0.17, "learning_rate": 0.00094776294908351, "loss": 3.3093, "step": 1070 }, { "epoch": 0.17, "learning_rate": 0.0009476466753594302, "loss": 3.3862, "step": 1071 }, { "epoch": 0.17, "learning_rate": 0.0009475302795218061, "loss": 3.423, "step": 1072 }, { "epoch": 0.17, "learning_rate": 0.0009474137616023899, "loss": 3.4152, "step": 1073 }, { "epoch": 0.17, "learning_rate": 0.0009472971216329662, "loss": 3.3902, "step": 1074 }, { "epoch": 0.17, "learning_rate": 0.0009471803596453535, "loss": 3.2852, "step": 1075 }, { "epoch": 0.17, "learning_rate": 0.0009470634756714027, "loss": 3.3719, "step": 1076 }, { "epoch": 0.17, "learning_rate": 0.0009469464697429992, "loss": 3.3613, "step": 1077 }, { "epoch": 0.17, "learning_rate": 0.0009468293418920608, "loss": 3.4722, "step": 1078 }, { "epoch": 0.17, "learning_rate": 0.0009467120921505388, "loss": 3.3449, "step": 1079 }, { "epoch": 0.17, "learning_rate": 0.0009465947205504178, "loss": 3.2864, "step": 1080 }, { "epoch": 0.17, "learning_rate": 0.0009464772271237155, "loss": 3.4195, "step": 1081 }, { "epoch": 0.17, "learning_rate": 0.0009463596119024831, "loss": 3.3884, "step": 1082 }, { "epoch": 0.17, "learning_rate": 0.0009462418749188048, "loss": 3.5345, "step": 1083 }, { "epoch": 0.17, "learning_rate": 0.000946124016204798, "loss": 3.4601, "step": 1084 }, { "epoch": 0.17, "learning_rate": 0.0009460060357926134, "loss": 3.3562, "step": 1085 }, { "epoch": 0.18, "learning_rate": 0.000945887933714435, "loss": 3.2827, "step": 1086 }, { "epoch": 0.18, "learning_rate": 0.0009457697100024798, "loss": 3.5575, "step": 1087 }, { "epoch": 0.18, "learning_rate": 0.000945651364688998, "loss": 3.3943, "step": 1088 }, { "epoch": 0.18, "learning_rate": 0.000945532897806273, "loss": 3.4887, "step": 1089 }, { "epoch": 0.18, "learning_rate": 0.0009454143093866216, "loss": 3.4859, "step": 1090 }, { "epoch": 0.18, "learning_rate": 0.0009452955994623932, "loss": 3.3214, "step": 1091 }, { "epoch": 0.18, "learning_rate": 0.0009451767680659709, "loss": 3.287, "step": 1092 }, { "epoch": 0.18, "learning_rate": 0.0009450578152297706, "loss": 3.4778, "step": 1093 }, { "epoch": 0.18, "learning_rate": 0.0009449387409862415, "loss": 3.3501, "step": 1094 }, { "epoch": 0.18, "learning_rate": 0.0009448195453678661, "loss": 3.3924, "step": 1095 }, { "epoch": 0.18, "learning_rate": 0.0009447002284071593, "loss": 3.4862, "step": 1096 }, { "epoch": 0.18, "learning_rate": 0.0009445807901366699, "loss": 3.4371, "step": 1097 }, { "epoch": 0.18, "learning_rate": 0.0009444612305889795, "loss": 3.5001, "step": 1098 }, { "epoch": 0.18, "learning_rate": 0.0009443415497967026, "loss": 3.3799, "step": 1099 }, { "epoch": 0.18, "learning_rate": 0.000944221747792487, "loss": 3.428, "step": 1100 }, { "epoch": 0.18, "learning_rate": 0.0009441018246090134, "loss": 3.4268, "step": 1101 }, { "epoch": 0.18, "learning_rate": 0.0009439817802789957, "loss": 3.4337, "step": 1102 }, { "epoch": 0.18, "learning_rate": 0.0009438616148351809, "loss": 3.3007, "step": 1103 }, { "epoch": 0.18, "learning_rate": 0.0009437413283103486, "loss": 3.3873, "step": 1104 }, { "epoch": 0.18, "learning_rate": 0.0009436209207373123, "loss": 3.4679, "step": 1105 }, { "epoch": 0.18, "learning_rate": 0.0009435003921489176, "loss": 3.4424, "step": 1106 }, { "epoch": 0.18, "learning_rate": 0.0009433797425780435, "loss": 3.4173, "step": 1107 }, { "epoch": 0.18, "learning_rate": 0.0009432589720576021, "loss": 3.433, "step": 1108 }, { "epoch": 0.18, "learning_rate": 0.0009431380806205385, "loss": 3.4495, "step": 1109 }, { "epoch": 0.18, "learning_rate": 0.0009430170682998305, "loss": 3.5405, "step": 1110 }, { "epoch": 0.18, "learning_rate": 0.000942895935128489, "loss": 3.3693, "step": 1111 }, { "epoch": 0.18, "learning_rate": 0.0009427746811395581, "loss": 3.3385, "step": 1112 }, { "epoch": 0.18, "learning_rate": 0.0009426533063661147, "loss": 3.3986, "step": 1113 }, { "epoch": 0.18, "learning_rate": 0.0009425318108412684, "loss": 3.4789, "step": 1114 }, { "epoch": 0.18, "learning_rate": 0.000942410194598162, "loss": 3.4064, "step": 1115 }, { "epoch": 0.18, "learning_rate": 0.0009422884576699715, "loss": 3.4204, "step": 1116 }, { "epoch": 0.18, "learning_rate": 0.0009421666000899052, "loss": 3.3817, "step": 1117 }, { "epoch": 0.18, "learning_rate": 0.0009420446218912047, "loss": 3.3647, "step": 1118 }, { "epoch": 0.18, "learning_rate": 0.0009419225231071446, "loss": 3.2934, "step": 1119 }, { "epoch": 0.18, "learning_rate": 0.0009418003037710321, "loss": 3.4512, "step": 1120 }, { "epoch": 0.18, "learning_rate": 0.0009416779639162072, "loss": 3.2144, "step": 1121 }, { "epoch": 0.18, "learning_rate": 0.0009415555035760434, "loss": 3.4466, "step": 1122 }, { "epoch": 0.18, "learning_rate": 0.0009414329227839464, "loss": 3.5094, "step": 1123 }, { "epoch": 0.18, "learning_rate": 0.0009413102215733553, "loss": 3.4368, "step": 1124 }, { "epoch": 0.18, "learning_rate": 0.0009411873999777414, "loss": 3.352, "step": 1125 }, { "epoch": 0.18, "learning_rate": 0.0009410644580306092, "loss": 3.2927, "step": 1126 }, { "epoch": 0.18, "learning_rate": 0.0009409413957654964, "loss": 3.3452, "step": 1127 }, { "epoch": 0.18, "learning_rate": 0.0009408182132159728, "loss": 3.4336, "step": 1128 }, { "epoch": 0.18, "learning_rate": 0.0009406949104156417, "loss": 3.3229, "step": 1129 }, { "epoch": 0.18, "learning_rate": 0.0009405714873981386, "loss": 3.4584, "step": 1130 }, { "epoch": 0.18, "learning_rate": 0.0009404479441971321, "loss": 3.5503, "step": 1131 }, { "epoch": 0.18, "learning_rate": 0.0009403242808463236, "loss": 3.3838, "step": 1132 }, { "epoch": 0.18, "learning_rate": 0.0009402004973794474, "loss": 3.3587, "step": 1133 }, { "epoch": 0.18, "learning_rate": 0.00094007659383027, "loss": 3.3125, "step": 1134 }, { "epoch": 0.18, "learning_rate": 0.0009399525702325915, "loss": 3.3735, "step": 1135 }, { "epoch": 0.18, "learning_rate": 0.0009398284266202439, "loss": 3.514, "step": 1136 }, { "epoch": 0.18, "learning_rate": 0.0009397041630270926, "loss": 3.4996, "step": 1137 }, { "epoch": 0.18, "learning_rate": 0.0009395797794870354, "loss": 3.328, "step": 1138 }, { "epoch": 0.18, "learning_rate": 0.0009394552760340029, "loss": 3.4831, "step": 1139 }, { "epoch": 0.18, "learning_rate": 0.0009393306527019584, "loss": 3.3892, "step": 1140 }, { "epoch": 0.18, "learning_rate": 0.0009392059095248978, "loss": 3.3798, "step": 1141 }, { "epoch": 0.18, "learning_rate": 0.0009390810465368499, "loss": 3.3717, "step": 1142 }, { "epoch": 0.18, "learning_rate": 0.000938956063771876, "loss": 3.3771, "step": 1143 }, { "epoch": 0.18, "learning_rate": 0.0009388309612640703, "loss": 3.476, "step": 1144 }, { "epoch": 0.18, "learning_rate": 0.0009387057390475593, "loss": 3.3482, "step": 1145 }, { "epoch": 0.18, "learning_rate": 0.0009385803971565025, "loss": 3.343, "step": 1146 }, { "epoch": 0.18, "learning_rate": 0.000938454935625092, "loss": 3.3992, "step": 1147 }, { "epoch": 0.19, "learning_rate": 0.0009383293544875523, "loss": 3.2775, "step": 1148 }, { "epoch": 0.19, "learning_rate": 0.0009382036537781408, "loss": 3.4508, "step": 1149 }, { "epoch": 0.19, "learning_rate": 0.0009380778335311472, "loss": 3.3439, "step": 1150 }, { "epoch": 0.19, "learning_rate": 0.0009379518937808941, "loss": 3.452, "step": 1151 }, { "epoch": 0.19, "learning_rate": 0.0009378258345617368, "loss": 3.3057, "step": 1152 }, { "epoch": 0.19, "learning_rate": 0.0009376996559080628, "loss": 3.6207, "step": 1153 }, { "epoch": 0.19, "learning_rate": 0.0009375733578542925, "loss": 3.4744, "step": 1154 }, { "epoch": 0.19, "learning_rate": 0.0009374469404348784, "loss": 3.3499, "step": 1155 }, { "epoch": 0.19, "learning_rate": 0.0009373204036843064, "loss": 3.2892, "step": 1156 }, { "epoch": 0.19, "learning_rate": 0.0009371937476370942, "loss": 3.4037, "step": 1157 }, { "epoch": 0.19, "learning_rate": 0.0009370669723277922, "loss": 3.1885, "step": 1158 }, { "epoch": 0.19, "learning_rate": 0.0009369400777909838, "loss": 3.2903, "step": 1159 }, { "epoch": 0.19, "learning_rate": 0.0009368130640612842, "loss": 3.4753, "step": 1160 }, { "epoch": 0.19, "learning_rate": 0.0009366859311733418, "loss": 3.3379, "step": 1161 }, { "epoch": 0.19, "learning_rate": 0.0009365586791618368, "loss": 3.2838, "step": 1162 }, { "epoch": 0.19, "learning_rate": 0.0009364313080614826, "loss": 3.4207, "step": 1163 }, { "epoch": 0.19, "learning_rate": 0.0009363038179070245, "loss": 3.4738, "step": 1164 }, { "epoch": 0.19, "learning_rate": 0.0009361762087332408, "loss": 3.3545, "step": 1165 }, { "epoch": 0.19, "learning_rate": 0.0009360484805749418, "loss": 3.269, "step": 1166 }, { "epoch": 0.19, "learning_rate": 0.0009359206334669707, "loss": 3.2842, "step": 1167 }, { "epoch": 0.19, "learning_rate": 0.0009357926674442027, "loss": 3.2509, "step": 1168 }, { "epoch": 0.19, "learning_rate": 0.0009356645825415459, "loss": 3.4129, "step": 1169 }, { "epoch": 0.19, "learning_rate": 0.0009355363787939404, "loss": 3.5417, "step": 1170 }, { "epoch": 0.19, "learning_rate": 0.0009354080562363588, "loss": 3.4689, "step": 1171 }, { "epoch": 0.19, "learning_rate": 0.0009352796149038063, "loss": 3.3206, "step": 1172 }, { "epoch": 0.19, "learning_rate": 0.0009351510548313204, "loss": 3.3774, "step": 1173 }, { "epoch": 0.19, "learning_rate": 0.0009350223760539713, "loss": 3.4419, "step": 1174 }, { "epoch": 0.19, "learning_rate": 0.0009348935786068608, "loss": 3.453, "step": 1175 }, { "epoch": 0.19, "learning_rate": 0.0009347646625251238, "loss": 3.3074, "step": 1176 }, { "epoch": 0.19, "learning_rate": 0.0009346356278439274, "loss": 3.4048, "step": 1177 }, { "epoch": 0.19, "learning_rate": 0.0009345064745984709, "loss": 3.3332, "step": 1178 }, { "epoch": 0.19, "learning_rate": 0.0009343772028239858, "loss": 3.4451, "step": 1179 }, { "epoch": 0.19, "learning_rate": 0.0009342478125557366, "loss": 3.3869, "step": 1180 }, { "epoch": 0.19, "learning_rate": 0.0009341183038290193, "loss": 3.3183, "step": 1181 }, { "epoch": 0.19, "learning_rate": 0.0009339886766791628, "loss": 3.4331, "step": 1182 }, { "epoch": 0.19, "learning_rate": 0.000933858931141528, "loss": 3.3845, "step": 1183 }, { "epoch": 0.19, "learning_rate": 0.0009337290672515081, "loss": 3.299, "step": 1184 }, { "epoch": 0.19, "learning_rate": 0.0009335990850445288, "loss": 3.5026, "step": 1185 }, { "epoch": 0.19, "learning_rate": 0.0009334689845560479, "loss": 3.434, "step": 1186 }, { "epoch": 0.19, "learning_rate": 0.0009333387658215555, "loss": 3.3191, "step": 1187 }, { "epoch": 0.19, "learning_rate": 0.000933208428876574, "loss": 3.3561, "step": 1188 }, { "epoch": 0.19, "learning_rate": 0.0009330779737566581, "loss": 3.2574, "step": 1189 }, { "epoch": 0.19, "learning_rate": 0.0009329474004973945, "loss": 3.2602, "step": 1190 }, { "epoch": 0.19, "learning_rate": 0.0009328167091344024, "loss": 3.2468, "step": 1191 }, { "epoch": 0.19, "learning_rate": 0.0009326858997033329, "loss": 3.4466, "step": 1192 }, { "epoch": 0.19, "learning_rate": 0.0009325549722398698, "loss": 3.3555, "step": 1193 }, { "epoch": 0.19, "learning_rate": 0.0009324239267797287, "loss": 3.2234, "step": 1194 }, { "epoch": 0.19, "learning_rate": 0.0009322927633586575, "loss": 3.3295, "step": 1195 }, { "epoch": 0.19, "learning_rate": 0.000932161482012436, "loss": 3.272, "step": 1196 }, { "epoch": 0.19, "learning_rate": 0.0009320300827768769, "loss": 3.4187, "step": 1197 }, { "epoch": 0.19, "learning_rate": 0.0009318985656878243, "loss": 3.4064, "step": 1198 }, { "epoch": 0.19, "learning_rate": 0.0009317669307811547, "loss": 3.3096, "step": 1199 }, { "epoch": 0.19, "learning_rate": 0.000931635178092777, "loss": 3.393, "step": 1200 }, { "epoch": 0.19, "learning_rate": 0.0009315033076586319, "loss": 3.2821, "step": 1201 }, { "epoch": 0.19, "learning_rate": 0.0009313713195146923, "loss": 3.3809, "step": 1202 }, { "epoch": 0.19, "learning_rate": 0.0009312392136969634, "loss": 3.3547, "step": 1203 }, { "epoch": 0.19, "learning_rate": 0.0009311069902414821, "loss": 3.5618, "step": 1204 }, { "epoch": 0.19, "learning_rate": 0.0009309746491843181, "loss": 3.3222, "step": 1205 }, { "epoch": 0.19, "learning_rate": 0.0009308421905615722, "loss": 3.3861, "step": 1206 }, { "epoch": 0.19, "learning_rate": 0.000930709614409378, "loss": 3.5047, "step": 1207 }, { "epoch": 0.19, "learning_rate": 0.000930576920763901, "loss": 3.4651, "step": 1208 }, { "epoch": 0.19, "learning_rate": 0.0009304441096613386, "loss": 3.4263, "step": 1209 }, { "epoch": 0.2, "learning_rate": 0.0009303111811379206, "loss": 3.3901, "step": 1210 }, { "epoch": 0.2, "learning_rate": 0.0009301781352299081, "loss": 3.3603, "step": 1211 }, { "epoch": 0.2, "learning_rate": 0.000930044971973595, "loss": 3.3481, "step": 1212 }, { "epoch": 0.2, "learning_rate": 0.0009299116914053071, "loss": 3.2465, "step": 1213 }, { "epoch": 0.2, "learning_rate": 0.0009297782935614017, "loss": 3.4738, "step": 1214 }, { "epoch": 0.2, "learning_rate": 0.0009296447784782684, "loss": 3.3136, "step": 1215 }, { "epoch": 0.2, "learning_rate": 0.0009295111461923289, "loss": 3.2506, "step": 1216 }, { "epoch": 0.2, "learning_rate": 0.0009293773967400368, "loss": 3.2414, "step": 1217 }, { "epoch": 0.2, "learning_rate": 0.0009292435301578773, "loss": 3.3469, "step": 1218 }, { "epoch": 0.2, "learning_rate": 0.0009291095464823683, "loss": 3.3221, "step": 1219 }, { "epoch": 0.2, "learning_rate": 0.000928975445750059, "loss": 3.357, "step": 1220 }, { "epoch": 0.2, "learning_rate": 0.0009288412279975307, "loss": 3.3918, "step": 1221 }, { "epoch": 0.2, "learning_rate": 0.0009287068932613967, "loss": 3.4859, "step": 1222 }, { "epoch": 0.2, "learning_rate": 0.0009285724415783023, "loss": 3.4007, "step": 1223 }, { "epoch": 0.2, "learning_rate": 0.0009284378729849243, "loss": 3.2551, "step": 1224 }, { "epoch": 0.2, "learning_rate": 0.000928303187517972, "loss": 3.3322, "step": 1225 }, { "epoch": 0.2, "learning_rate": 0.000928168385214186, "loss": 3.2775, "step": 1226 }, { "epoch": 0.2, "learning_rate": 0.0009280334661103391, "loss": 3.4409, "step": 1227 }, { "epoch": 0.2, "learning_rate": 0.0009278984302432358, "loss": 3.3964, "step": 1228 }, { "epoch": 0.2, "learning_rate": 0.0009277632776497129, "loss": 3.3164, "step": 1229 }, { "epoch": 0.2, "learning_rate": 0.0009276280083666382, "loss": 3.3247, "step": 1230 }, { "epoch": 0.2, "learning_rate": 0.0009274926224309121, "loss": 3.2771, "step": 1231 }, { "epoch": 0.2, "learning_rate": 0.0009273571198794663, "loss": 3.3696, "step": 1232 }, { "epoch": 0.2, "learning_rate": 0.000927221500749265, "loss": 3.2238, "step": 1233 }, { "epoch": 0.2, "learning_rate": 0.0009270857650773032, "loss": 3.2579, "step": 1234 }, { "epoch": 0.2, "learning_rate": 0.0009269499129006085, "loss": 3.4019, "step": 1235 }, { "epoch": 0.2, "learning_rate": 0.00092681394425624, "loss": 3.4263, "step": 1236 }, { "epoch": 0.2, "learning_rate": 0.0009266778591812885, "loss": 3.4831, "step": 1237 }, { "epoch": 0.2, "learning_rate": 0.0009265416577128769, "loss": 3.5093, "step": 1238 }, { "epoch": 0.2, "learning_rate": 0.0009264053398881593, "loss": 3.4835, "step": 1239 }, { "epoch": 0.2, "learning_rate": 0.000926268905744322, "loss": 3.473, "step": 1240 }, { "epoch": 0.2, "learning_rate": 0.000926132355318583, "loss": 3.3827, "step": 1241 }, { "epoch": 0.2, "learning_rate": 0.0009259956886481916, "loss": 3.4095, "step": 1242 }, { "epoch": 0.2, "learning_rate": 0.0009258589057704291, "loss": 3.2897, "step": 1243 }, { "epoch": 0.2, "learning_rate": 0.0009257220067226088, "loss": 3.4457, "step": 1244 }, { "epoch": 0.2, "learning_rate": 0.0009255849915420754, "loss": 3.3444, "step": 1245 }, { "epoch": 0.2, "learning_rate": 0.0009254478602662049, "loss": 3.345, "step": 1246 }, { "epoch": 0.2, "learning_rate": 0.0009253106129324056, "loss": 3.2801, "step": 1247 }, { "epoch": 0.2, "learning_rate": 0.0009251732495781171, "loss": 3.4944, "step": 1248 }, { "epoch": 0.2, "learning_rate": 0.000925035770240811, "loss": 3.3267, "step": 1249 }, { "epoch": 0.2, "learning_rate": 0.0009248981749579899, "loss": 3.4009, "step": 1250 }, { "epoch": 0.2, "learning_rate": 0.0009247604637671887, "loss": 3.4202, "step": 1251 }, { "epoch": 0.2, "learning_rate": 0.0009246226367059736, "loss": 3.2489, "step": 1252 }, { "epoch": 0.2, "learning_rate": 0.0009244846938119422, "loss": 3.4814, "step": 1253 }, { "epoch": 0.2, "learning_rate": 0.0009243466351227243, "loss": 3.416, "step": 1254 }, { "epoch": 0.2, "learning_rate": 0.0009242084606759809, "loss": 3.3531, "step": 1255 }, { "epoch": 0.2, "learning_rate": 0.0009240701705094044, "loss": 3.3162, "step": 1256 }, { "epoch": 0.2, "learning_rate": 0.000923931764660719, "loss": 3.2706, "step": 1257 }, { "epoch": 0.2, "learning_rate": 0.0009237932431676806, "loss": 3.3022, "step": 1258 }, { "epoch": 0.2, "learning_rate": 0.0009236546060680761, "loss": 3.3965, "step": 1259 }, { "epoch": 0.2, "learning_rate": 0.0009235158533997249, "loss": 3.4596, "step": 1260 }, { "epoch": 0.2, "learning_rate": 0.0009233769852004769, "loss": 3.3808, "step": 1261 }, { "epoch": 0.2, "learning_rate": 0.0009232380015082143, "loss": 3.3786, "step": 1262 }, { "epoch": 0.2, "learning_rate": 0.00092309890236085, "loss": 3.4302, "step": 1263 }, { "epoch": 0.2, "learning_rate": 0.0009229596877963292, "loss": 3.3764, "step": 1264 }, { "epoch": 0.2, "learning_rate": 0.0009228203578526281, "loss": 3.4333, "step": 1265 }, { "epoch": 0.2, "learning_rate": 0.0009226809125677548, "loss": 3.5228, "step": 1266 }, { "epoch": 0.2, "learning_rate": 0.0009225413519797482, "loss": 3.2941, "step": 1267 }, { "epoch": 0.2, "learning_rate": 0.0009224016761266793, "loss": 3.3698, "step": 1268 }, { "epoch": 0.2, "learning_rate": 0.00092226188504665, "loss": 3.3965, "step": 1269 }, { "epoch": 0.2, "learning_rate": 0.0009221219787777942, "loss": 3.3558, "step": 1270 }, { "epoch": 0.2, "learning_rate": 0.0009219819573582768, "loss": 3.5112, "step": 1271 }, { "epoch": 0.21, "learning_rate": 0.0009218418208262944, "loss": 3.3175, "step": 1272 }, { "epoch": 0.21, "learning_rate": 0.0009217015692200745, "loss": 3.3449, "step": 1273 }, { "epoch": 0.21, "learning_rate": 0.0009215612025778767, "loss": 3.3651, "step": 1274 }, { "epoch": 0.21, "learning_rate": 0.0009214207209379914, "loss": 3.4455, "step": 1275 }, { "epoch": 0.21, "learning_rate": 0.0009212801243387406, "loss": 3.4602, "step": 1276 }, { "epoch": 0.21, "learning_rate": 0.0009211394128184778, "loss": 3.5385, "step": 1277 }, { "epoch": 0.21, "learning_rate": 0.0009209985864155877, "loss": 3.2499, "step": 1278 }, { "epoch": 0.21, "learning_rate": 0.000920857645168486, "loss": 3.4178, "step": 1279 }, { "epoch": 0.21, "learning_rate": 0.0009207165891156205, "loss": 3.452, "step": 1280 }, { "epoch": 0.21, "learning_rate": 0.0009205754182954696, "loss": 3.3734, "step": 1281 }, { "epoch": 0.21, "learning_rate": 0.0009204341327465434, "loss": 3.4437, "step": 1282 }, { "epoch": 0.21, "learning_rate": 0.0009202927325073832, "loss": 3.4694, "step": 1283 }, { "epoch": 0.21, "learning_rate": 0.0009201512176165615, "loss": 3.4712, "step": 1284 }, { "epoch": 0.21, "learning_rate": 0.0009200095881126822, "loss": 3.3471, "step": 1285 }, { "epoch": 0.21, "learning_rate": 0.0009198678440343804, "loss": 3.4713, "step": 1286 }, { "epoch": 0.21, "learning_rate": 0.0009197259854203226, "loss": 3.3782, "step": 1287 }, { "epoch": 0.21, "learning_rate": 0.000919584012309206, "loss": 3.4582, "step": 1288 }, { "epoch": 0.21, "learning_rate": 0.00091944192473976, "loss": 3.4302, "step": 1289 }, { "epoch": 0.21, "learning_rate": 0.0009192997227507443, "loss": 3.3561, "step": 1290 }, { "epoch": 0.21, "learning_rate": 0.0009191574063809504, "loss": 3.41, "step": 1291 }, { "epoch": 0.21, "learning_rate": 0.0009190149756692006, "loss": 3.3291, "step": 1292 }, { "epoch": 0.21, "learning_rate": 0.0009188724306543486, "loss": 3.4251, "step": 1293 }, { "epoch": 0.21, "learning_rate": 0.0009187297713752794, "loss": 3.4574, "step": 1294 }, { "epoch": 0.21, "learning_rate": 0.0009185869978709087, "loss": 3.2576, "step": 1295 }, { "epoch": 0.21, "learning_rate": 0.0009184441101801841, "loss": 3.5057, "step": 1296 }, { "epoch": 0.21, "learning_rate": 0.0009183011083420838, "loss": 3.2183, "step": 1297 }, { "epoch": 0.21, "learning_rate": 0.0009181579923956171, "loss": 3.336, "step": 1298 }, { "epoch": 0.21, "learning_rate": 0.0009180147623798249, "loss": 3.3807, "step": 1299 }, { "epoch": 0.21, "learning_rate": 0.0009178714183337787, "loss": 3.3969, "step": 1300 }, { "epoch": 0.21, "learning_rate": 0.0009177279602965813, "loss": 3.4602, "step": 1301 }, { "epoch": 0.21, "learning_rate": 0.0009175843883073667, "loss": 3.2714, "step": 1302 }, { "epoch": 0.21, "learning_rate": 0.0009174407024053, "loss": 3.3338, "step": 1303 }, { "epoch": 0.21, "learning_rate": 0.0009172969026295769, "loss": 3.175, "step": 1304 }, { "epoch": 0.21, "learning_rate": 0.0009171529890194252, "loss": 3.3754, "step": 1305 }, { "epoch": 0.21, "learning_rate": 0.0009170089616141026, "loss": 3.4411, "step": 1306 }, { "epoch": 0.21, "learning_rate": 0.0009168648204528983, "loss": 3.258, "step": 1307 }, { "epoch": 0.21, "learning_rate": 0.0009167205655751329, "loss": 3.2787, "step": 1308 }, { "epoch": 0.21, "learning_rate": 0.0009165761970201574, "loss": 3.401, "step": 1309 }, { "epoch": 0.21, "learning_rate": 0.0009164317148273543, "loss": 3.4217, "step": 1310 }, { "epoch": 0.21, "learning_rate": 0.0009162871190361369, "loss": 3.3705, "step": 1311 }, { "epoch": 0.21, "learning_rate": 0.0009161424096859492, "loss": 3.4489, "step": 1312 }, { "epoch": 0.21, "learning_rate": 0.0009159975868162668, "loss": 3.3931, "step": 1313 }, { "epoch": 0.21, "learning_rate": 0.0009158526504665957, "loss": 3.2986, "step": 1314 }, { "epoch": 0.21, "learning_rate": 0.0009157076006764733, "loss": 3.3457, "step": 1315 }, { "epoch": 0.21, "learning_rate": 0.0009155624374854676, "loss": 3.4797, "step": 1316 }, { "epoch": 0.21, "learning_rate": 0.0009154171609331778, "loss": 3.3956, "step": 1317 }, { "epoch": 0.21, "learning_rate": 0.0009152717710592336, "loss": 3.3376, "step": 1318 }, { "epoch": 0.21, "learning_rate": 0.0009151262679032963, "loss": 3.3388, "step": 1319 }, { "epoch": 0.21, "learning_rate": 0.0009149806515050573, "loss": 3.4063, "step": 1320 }, { "epoch": 0.21, "learning_rate": 0.0009148349219042396, "loss": 3.5079, "step": 1321 }, { "epoch": 0.21, "learning_rate": 0.0009146890791405966, "loss": 3.2323, "step": 1322 }, { "epoch": 0.21, "learning_rate": 0.000914543123253913, "loss": 3.3223, "step": 1323 }, { "epoch": 0.21, "learning_rate": 0.0009143970542840039, "loss": 3.3247, "step": 1324 }, { "epoch": 0.21, "learning_rate": 0.0009142508722707154, "loss": 3.4344, "step": 1325 }, { "epoch": 0.21, "learning_rate": 0.0009141045772539247, "loss": 3.4103, "step": 1326 }, { "epoch": 0.21, "learning_rate": 0.0009139581692735395, "loss": 3.2872, "step": 1327 }, { "epoch": 0.21, "learning_rate": 0.0009138116483694984, "loss": 3.3249, "step": 1328 }, { "epoch": 0.21, "learning_rate": 0.0009136650145817708, "loss": 3.3037, "step": 1329 }, { "epoch": 0.21, "learning_rate": 0.0009135182679503573, "loss": 3.2295, "step": 1330 }, { "epoch": 0.21, "learning_rate": 0.0009133714085152884, "loss": 3.2756, "step": 1331 }, { "epoch": 0.21, "learning_rate": 0.0009132244363166262, "loss": 3.2556, "step": 1332 }, { "epoch": 0.21, "learning_rate": 0.0009130773513944632, "loss": 3.4014, "step": 1333 }, { "epoch": 0.22, "learning_rate": 0.0009129301537889227, "loss": 3.2324, "step": 1334 }, { "epoch": 0.22, "learning_rate": 0.0009127828435401587, "loss": 3.2639, "step": 1335 }, { "epoch": 0.22, "learning_rate": 0.000912635420688356, "loss": 3.5575, "step": 1336 }, { "epoch": 0.22, "learning_rate": 0.00091248788527373, "loss": 3.5006, "step": 1337 }, { "epoch": 0.22, "learning_rate": 0.0009123402373365269, "loss": 3.4526, "step": 1338 }, { "epoch": 0.22, "learning_rate": 0.0009121924769170237, "loss": 3.3776, "step": 1339 }, { "epoch": 0.22, "learning_rate": 0.0009120446040555279, "loss": 3.4802, "step": 1340 }, { "epoch": 0.22, "learning_rate": 0.0009118966187923777, "loss": 3.4072, "step": 1341 }, { "epoch": 0.22, "learning_rate": 0.000911748521167942, "loss": 3.5645, "step": 1342 }, { "epoch": 0.22, "learning_rate": 0.0009116003112226205, "loss": 3.3619, "step": 1343 }, { "epoch": 0.22, "learning_rate": 0.0009114519889968434, "loss": 3.3224, "step": 1344 }, { "epoch": 0.22, "learning_rate": 0.000911303554531071, "loss": 3.249, "step": 1345 }, { "epoch": 0.22, "learning_rate": 0.0009111550078657956, "loss": 3.1902, "step": 1346 }, { "epoch": 0.22, "learning_rate": 0.0009110063490415385, "loss": 3.4587, "step": 1347 }, { "epoch": 0.22, "learning_rate": 0.0009108575780988527, "loss": 3.3273, "step": 1348 }, { "epoch": 0.22, "learning_rate": 0.0009107086950783215, "loss": 3.3583, "step": 1349 }, { "epoch": 0.22, "learning_rate": 0.0009105597000205585, "loss": 3.4856, "step": 1350 }, { "epoch": 0.22, "learning_rate": 0.0009104105929662081, "loss": 3.3383, "step": 1351 }, { "epoch": 0.22, "learning_rate": 0.0009102613739559452, "loss": 3.4071, "step": 1352 }, { "epoch": 0.22, "learning_rate": 0.0009101120430304755, "loss": 3.3558, "step": 1353 }, { "epoch": 0.22, "learning_rate": 0.0009099626002305346, "loss": 3.4049, "step": 1354 }, { "epoch": 0.22, "learning_rate": 0.0009098130455968892, "loss": 3.3784, "step": 1355 }, { "epoch": 0.22, "learning_rate": 0.0009096633791703364, "loss": 3.2711, "step": 1356 }, { "epoch": 0.22, "learning_rate": 0.0009095136009917036, "loss": 3.3998, "step": 1357 }, { "epoch": 0.22, "learning_rate": 0.0009093637111018487, "loss": 3.3993, "step": 1358 }, { "epoch": 0.22, "learning_rate": 0.0009092137095416604, "loss": 3.5481, "step": 1359 }, { "epoch": 0.22, "learning_rate": 0.0009090635963520575, "loss": 3.4415, "step": 1360 }, { "epoch": 0.22, "learning_rate": 0.0009089133715739893, "loss": 3.3139, "step": 1361 }, { "epoch": 0.22, "learning_rate": 0.0009087630352484357, "loss": 3.2524, "step": 1362 }, { "epoch": 0.22, "learning_rate": 0.000908612587416407, "loss": 3.2122, "step": 1363 }, { "epoch": 0.22, "learning_rate": 0.0009084620281189438, "loss": 3.4643, "step": 1364 }, { "epoch": 0.22, "learning_rate": 0.0009083113573971171, "loss": 3.4022, "step": 1365 }, { "epoch": 0.22, "learning_rate": 0.0009081605752920285, "loss": 3.3301, "step": 1366 }, { "epoch": 0.22, "learning_rate": 0.00090800968184481, "loss": 3.4332, "step": 1367 }, { "epoch": 0.22, "learning_rate": 0.0009078586770966236, "loss": 3.269, "step": 1368 }, { "epoch": 0.22, "learning_rate": 0.0009077075610886616, "loss": 3.3823, "step": 1369 }, { "epoch": 0.22, "learning_rate": 0.0009075563338621476, "loss": 3.2805, "step": 1370 }, { "epoch": 0.22, "learning_rate": 0.0009074049954583344, "loss": 3.2919, "step": 1371 }, { "epoch": 0.22, "learning_rate": 0.0009072535459185056, "loss": 3.2892, "step": 1372 }, { "epoch": 0.22, "learning_rate": 0.0009071019852839755, "loss": 3.3164, "step": 1373 }, { "epoch": 0.22, "learning_rate": 0.000906950313596088, "loss": 3.4243, "step": 1374 }, { "epoch": 0.22, "learning_rate": 0.0009067985308962176, "loss": 3.5045, "step": 1375 }, { "epoch": 0.22, "learning_rate": 0.0009066466372257694, "loss": 3.3844, "step": 1376 }, { "epoch": 0.22, "learning_rate": 0.0009064946326261784, "loss": 3.3357, "step": 1377 }, { "epoch": 0.22, "learning_rate": 0.0009063425171389097, "loss": 3.3924, "step": 1378 }, { "epoch": 0.22, "learning_rate": 0.0009061902908054589, "loss": 3.3363, "step": 1379 }, { "epoch": 0.22, "learning_rate": 0.0009060379536673521, "loss": 3.1473, "step": 1380 }, { "epoch": 0.22, "learning_rate": 0.0009058855057661452, "loss": 3.3926, "step": 1381 }, { "epoch": 0.22, "learning_rate": 0.0009057329471434244, "loss": 3.3823, "step": 1382 }, { "epoch": 0.22, "learning_rate": 0.0009055802778408063, "loss": 3.379, "step": 1383 }, { "epoch": 0.22, "learning_rate": 0.0009054274978999373, "loss": 3.3205, "step": 1384 }, { "epoch": 0.22, "learning_rate": 0.0009052746073624947, "loss": 3.4581, "step": 1385 }, { "epoch": 0.22, "learning_rate": 0.000905121606270185, "loss": 3.4159, "step": 1386 }, { "epoch": 0.22, "learning_rate": 0.0009049684946647458, "loss": 3.4125, "step": 1387 }, { "epoch": 0.22, "learning_rate": 0.0009048152725879442, "loss": 3.3618, "step": 1388 }, { "epoch": 0.22, "learning_rate": 0.0009046619400815777, "loss": 3.2907, "step": 1389 }, { "epoch": 0.22, "learning_rate": 0.0009045084971874737, "loss": 3.341, "step": 1390 }, { "epoch": 0.22, "learning_rate": 0.0009043549439474903, "loss": 3.3855, "step": 1391 }, { "epoch": 0.22, "learning_rate": 0.000904201280403515, "loss": 3.4016, "step": 1392 }, { "epoch": 0.22, "learning_rate": 0.0009040475065974656, "loss": 3.3087, "step": 1393 }, { "epoch": 0.22, "learning_rate": 0.0009038936225712901, "loss": 3.3458, "step": 1394 }, { "epoch": 0.22, "learning_rate": 0.0009037396283669667, "loss": 3.3492, "step": 1395 }, { "epoch": 0.23, "learning_rate": 0.0009035855240265037, "loss": 3.2055, "step": 1396 }, { "epoch": 0.23, "learning_rate": 0.0009034313095919386, "loss": 3.2368, "step": 1397 }, { "epoch": 0.23, "learning_rate": 0.0009032769851053399, "loss": 3.5413, "step": 1398 }, { "epoch": 0.23, "learning_rate": 0.0009031225506088057, "loss": 3.3277, "step": 1399 }, { "epoch": 0.23, "learning_rate": 0.0009029680061444645, "loss": 3.4812, "step": 1400 }, { "epoch": 0.23, "learning_rate": 0.0009028133517544741, "loss": 3.4095, "step": 1401 }, { "epoch": 0.23, "learning_rate": 0.0009026585874810227, "loss": 3.3131, "step": 1402 }, { "epoch": 0.23, "learning_rate": 0.0009025037133663287, "loss": 3.3154, "step": 1403 }, { "epoch": 0.23, "learning_rate": 0.00090234872945264, "loss": 3.3992, "step": 1404 }, { "epoch": 0.23, "learning_rate": 0.0009021936357822347, "loss": 3.3944, "step": 1405 }, { "epoch": 0.23, "learning_rate": 0.0009020384323974209, "loss": 3.4425, "step": 1406 }, { "epoch": 0.23, "learning_rate": 0.0009018831193405365, "loss": 3.2933, "step": 1407 }, { "epoch": 0.23, "learning_rate": 0.0009017276966539491, "loss": 3.3987, "step": 1408 }, { "epoch": 0.23, "learning_rate": 0.000901572164380057, "loss": 3.4202, "step": 1409 }, { "epoch": 0.23, "learning_rate": 0.0009014165225612874, "loss": 3.3525, "step": 1410 }, { "epoch": 0.23, "learning_rate": 0.000901260771240098, "loss": 3.3778, "step": 1411 }, { "epoch": 0.23, "learning_rate": 0.0009011049104589759, "loss": 3.3594, "step": 1412 }, { "epoch": 0.23, "learning_rate": 0.0009009489402604389, "loss": 3.2807, "step": 1413 }, { "epoch": 0.23, "learning_rate": 0.0009007928606870339, "loss": 3.4698, "step": 1414 }, { "epoch": 0.23, "learning_rate": 0.0009006366717813377, "loss": 3.321, "step": 1415 }, { "epoch": 0.23, "learning_rate": 0.0009004803735859571, "loss": 3.4248, "step": 1416 }, { "epoch": 0.23, "learning_rate": 0.0009003239661435289, "loss": 3.4691, "step": 1417 }, { "epoch": 0.23, "learning_rate": 0.0009001674494967191, "loss": 3.2513, "step": 1418 }, { "epoch": 0.23, "learning_rate": 0.0009000108236882245, "loss": 3.3327, "step": 1419 }, { "epoch": 0.23, "learning_rate": 0.0008998540887607705, "loss": 3.4059, "step": 1420 }, { "epoch": 0.23, "learning_rate": 0.000899697244757113, "loss": 3.3752, "step": 1421 }, { "epoch": 0.23, "learning_rate": 0.0008995402917200373, "loss": 3.3514, "step": 1422 }, { "epoch": 0.23, "learning_rate": 0.0008993832296923591, "loss": 3.3647, "step": 1423 }, { "epoch": 0.23, "learning_rate": 0.000899226058716923, "loss": 3.2287, "step": 1424 }, { "epoch": 0.23, "learning_rate": 0.0008990687788366037, "loss": 3.3707, "step": 1425 }, { "epoch": 0.23, "learning_rate": 0.0008989113900943056, "loss": 3.3156, "step": 1426 }, { "epoch": 0.23, "learning_rate": 0.0008987538925329628, "loss": 3.4474, "step": 1427 }, { "epoch": 0.23, "learning_rate": 0.000898596286195539, "loss": 3.3224, "step": 1428 }, { "epoch": 0.23, "learning_rate": 0.0008984385711250277, "loss": 3.4184, "step": 1429 }, { "epoch": 0.23, "learning_rate": 0.0008982807473644521, "loss": 3.3285, "step": 1430 }, { "epoch": 0.23, "learning_rate": 0.0008981228149568647, "loss": 3.348, "step": 1431 }, { "epoch": 0.23, "learning_rate": 0.000897964773945348, "loss": 3.3918, "step": 1432 }, { "epoch": 0.23, "learning_rate": 0.000897806624373014, "loss": 3.3156, "step": 1433 }, { "epoch": 0.23, "learning_rate": 0.0008976483662830046, "loss": 3.4797, "step": 1434 }, { "epoch": 0.23, "learning_rate": 0.0008974899997184906, "loss": 3.3313, "step": 1435 }, { "epoch": 0.23, "learning_rate": 0.000897331524722673, "loss": 3.2881, "step": 1436 }, { "epoch": 0.23, "learning_rate": 0.0008971729413387822, "loss": 3.3171, "step": 1437 }, { "epoch": 0.23, "learning_rate": 0.0008970142496100782, "loss": 3.3463, "step": 1438 }, { "epoch": 0.23, "learning_rate": 0.0008968554495798505, "loss": 3.1659, "step": 1439 }, { "epoch": 0.23, "learning_rate": 0.000896696541291418, "loss": 3.2594, "step": 1440 }, { "epoch": 0.23, "learning_rate": 0.0008965375247881296, "loss": 3.3362, "step": 1441 }, { "epoch": 0.23, "learning_rate": 0.000896378400113363, "loss": 3.3815, "step": 1442 }, { "epoch": 0.23, "learning_rate": 0.0008962191673105263, "loss": 3.3254, "step": 1443 }, { "epoch": 0.23, "learning_rate": 0.0008960598264230563, "loss": 3.306, "step": 1444 }, { "epoch": 0.23, "learning_rate": 0.0008959003774944198, "loss": 3.3453, "step": 1445 }, { "epoch": 0.23, "learning_rate": 0.0008957408205681128, "loss": 3.4091, "step": 1446 }, { "epoch": 0.23, "learning_rate": 0.0008955811556876605, "loss": 3.3559, "step": 1447 }, { "epoch": 0.23, "learning_rate": 0.0008954213828966185, "loss": 3.4489, "step": 1448 }, { "epoch": 0.23, "learning_rate": 0.0008952615022385709, "loss": 3.5211, "step": 1449 }, { "epoch": 0.23, "learning_rate": 0.0008951015137571314, "loss": 3.338, "step": 1450 }, { "epoch": 0.23, "learning_rate": 0.0008949414174959434, "loss": 3.4531, "step": 1451 }, { "epoch": 0.23, "learning_rate": 0.0008947812134986797, "loss": 3.3413, "step": 1452 }, { "epoch": 0.23, "learning_rate": 0.0008946209018090422, "loss": 3.2718, "step": 1453 }, { "epoch": 0.23, "learning_rate": 0.0008944604824707623, "loss": 3.2736, "step": 1454 }, { "epoch": 0.23, "learning_rate": 0.000894299955527601, "loss": 3.3943, "step": 1455 }, { "epoch": 0.23, "learning_rate": 0.0008941393210233482, "loss": 3.1605, "step": 1456 }, { "epoch": 0.23, "learning_rate": 0.0008939785790018235, "loss": 3.3159, "step": 1457 }, { "epoch": 0.24, "learning_rate": 0.0008938177295068757, "loss": 3.2299, "step": 1458 }, { "epoch": 0.24, "learning_rate": 0.000893656772582383, "loss": 3.3505, "step": 1459 }, { "epoch": 0.24, "learning_rate": 0.0008934957082722528, "loss": 3.1961, "step": 1460 }, { "epoch": 0.24, "learning_rate": 0.0008933345366204218, "loss": 3.5531, "step": 1461 }, { "epoch": 0.24, "learning_rate": 0.000893173257670856, "loss": 3.4106, "step": 1462 }, { "epoch": 0.24, "learning_rate": 0.0008930118714675508, "loss": 3.5009, "step": 1463 }, { "epoch": 0.24, "learning_rate": 0.0008928503780545307, "loss": 3.3692, "step": 1464 }, { "epoch": 0.24, "learning_rate": 0.0008926887774758493, "loss": 3.3384, "step": 1465 }, { "epoch": 0.24, "learning_rate": 0.00089252706977559, "loss": 3.2305, "step": 1466 }, { "epoch": 0.24, "learning_rate": 0.0008923652549978647, "loss": 3.3736, "step": 1467 }, { "epoch": 0.24, "learning_rate": 0.0008922033331868149, "loss": 3.3896, "step": 1468 }, { "epoch": 0.24, "learning_rate": 0.0008920413043866116, "loss": 3.3662, "step": 1469 }, { "epoch": 0.24, "learning_rate": 0.0008918791686414543, "loss": 3.4063, "step": 1470 }, { "epoch": 0.24, "learning_rate": 0.000891716925995572, "loss": 3.2955, "step": 1471 }, { "epoch": 0.24, "learning_rate": 0.000891554576493223, "loss": 3.2443, "step": 1472 }, { "epoch": 0.24, "learning_rate": 0.0008913921201786947, "loss": 3.3375, "step": 1473 }, { "epoch": 0.24, "learning_rate": 0.0008912295570963033, "loss": 3.3762, "step": 1474 }, { "epoch": 0.24, "learning_rate": 0.0008910668872903946, "loss": 3.4337, "step": 1475 }, { "epoch": 0.24, "learning_rate": 0.0008909041108053433, "loss": 3.2725, "step": 1476 }, { "epoch": 0.24, "learning_rate": 0.0008907412276855532, "loss": 3.3808, "step": 1477 }, { "epoch": 0.24, "learning_rate": 0.000890578237975457, "loss": 3.2695, "step": 1478 }, { "epoch": 0.24, "learning_rate": 0.0008904151417195167, "loss": 3.2181, "step": 1479 }, { "epoch": 0.24, "learning_rate": 0.0008902519389622237, "loss": 3.2118, "step": 1480 }, { "epoch": 0.24, "learning_rate": 0.0008900886297480977, "loss": 3.4683, "step": 1481 }, { "epoch": 0.24, "learning_rate": 0.0008899252141216879, "loss": 3.3104, "step": 1482 }, { "epoch": 0.24, "learning_rate": 0.0008897616921275724, "loss": 3.1985, "step": 1483 }, { "epoch": 0.24, "learning_rate": 0.0008895980638103587, "loss": 3.3502, "step": 1484 }, { "epoch": 0.24, "learning_rate": 0.0008894343292146825, "loss": 3.246, "step": 1485 }, { "epoch": 0.24, "learning_rate": 0.0008892704883852092, "loss": 3.3688, "step": 1486 }, { "epoch": 0.24, "learning_rate": 0.000889106541366633, "loss": 3.2304, "step": 1487 }, { "epoch": 0.24, "learning_rate": 0.0008889424882036769, "loss": 3.3946, "step": 1488 }, { "epoch": 0.24, "learning_rate": 0.0008887783289410931, "loss": 3.3653, "step": 1489 }, { "epoch": 0.24, "learning_rate": 0.0008886140636236624, "loss": 3.4106, "step": 1490 }, { "epoch": 0.24, "learning_rate": 0.0008884496922961949, "loss": 3.2781, "step": 1491 }, { "epoch": 0.24, "learning_rate": 0.0008882852150035295, "loss": 3.3197, "step": 1492 }, { "epoch": 0.24, "learning_rate": 0.0008881206317905337, "loss": 3.3429, "step": 1493 }, { "epoch": 0.24, "learning_rate": 0.0008879559427021044, "loss": 3.3133, "step": 1494 }, { "epoch": 0.24, "learning_rate": 0.0008877911477831671, "loss": 3.3206, "step": 1495 }, { "epoch": 0.24, "learning_rate": 0.000887626247078676, "loss": 3.2705, "step": 1496 }, { "epoch": 0.24, "learning_rate": 0.0008874612406336147, "loss": 3.2695, "step": 1497 }, { "epoch": 0.24, "learning_rate": 0.0008872961284929952, "loss": 3.329, "step": 1498 }, { "epoch": 0.24, "learning_rate": 0.0008871309107018584, "loss": 3.2673, "step": 1499 }, { "epoch": 0.24, "learning_rate": 0.0008869655873052738, "loss": 3.315, "step": 1500 }, { "epoch": 0.24, "learning_rate": 0.0008868001583483405, "loss": 3.3725, "step": 1501 }, { "epoch": 0.24, "learning_rate": 0.0008866346238761855, "loss": 3.2297, "step": 1502 }, { "epoch": 0.24, "learning_rate": 0.0008864689839339652, "loss": 3.3058, "step": 1503 }, { "epoch": 0.24, "learning_rate": 0.0008863032385668641, "loss": 3.4595, "step": 1504 }, { "epoch": 0.24, "learning_rate": 0.0008861373878200964, "loss": 3.3501, "step": 1505 }, { "epoch": 0.24, "learning_rate": 0.0008859714317389041, "loss": 3.3853, "step": 1506 }, { "epoch": 0.24, "learning_rate": 0.0008858053703685587, "loss": 3.4266, "step": 1507 }, { "epoch": 0.24, "learning_rate": 0.0008856392037543599, "loss": 3.2929, "step": 1508 }, { "epoch": 0.24, "learning_rate": 0.0008854729319416363, "loss": 3.3024, "step": 1509 }, { "epoch": 0.24, "learning_rate": 0.0008853065549757451, "loss": 3.302, "step": 1510 }, { "epoch": 0.24, "learning_rate": 0.0008851400729020726, "loss": 3.2701, "step": 1511 }, { "epoch": 0.24, "learning_rate": 0.0008849734857660331, "loss": 3.2209, "step": 1512 }, { "epoch": 0.24, "learning_rate": 0.0008848067936130701, "loss": 3.2898, "step": 1513 }, { "epoch": 0.24, "learning_rate": 0.0008846399964886555, "loss": 3.4593, "step": 1514 }, { "epoch": 0.24, "learning_rate": 0.0008844730944382899, "loss": 3.1545, "step": 1515 }, { "epoch": 0.24, "learning_rate": 0.0008843060875075024, "loss": 3.3224, "step": 1516 }, { "epoch": 0.24, "learning_rate": 0.000884138975741851, "loss": 3.3049, "step": 1517 }, { "epoch": 0.24, "learning_rate": 0.0008839717591869221, "loss": 3.3861, "step": 1518 }, { "epoch": 0.24, "learning_rate": 0.0008838044378883305, "loss": 3.5483, "step": 1519 }, { "epoch": 0.25, "learning_rate": 0.0008836370118917201, "loss": 3.2823, "step": 1520 }, { "epoch": 0.25, "learning_rate": 0.0008834694812427629, "loss": 3.432, "step": 1521 }, { "epoch": 0.25, "learning_rate": 0.0008833018459871593, "loss": 3.4038, "step": 1522 }, { "epoch": 0.25, "learning_rate": 0.000883134106170639, "loss": 3.3181, "step": 1523 }, { "epoch": 0.25, "learning_rate": 0.0008829662618389595, "loss": 3.3551, "step": 1524 }, { "epoch": 0.25, "learning_rate": 0.0008827983130379072, "loss": 3.233, "step": 1525 }, { "epoch": 0.25, "learning_rate": 0.0008826302598132964, "loss": 3.4419, "step": 1526 }, { "epoch": 0.25, "learning_rate": 0.000882462102210971, "loss": 3.3194, "step": 1527 }, { "epoch": 0.25, "learning_rate": 0.0008822938402768024, "loss": 3.2747, "step": 1528 }, { "epoch": 0.25, "learning_rate": 0.0008821254740566906, "loss": 3.2828, "step": 1529 }, { "epoch": 0.25, "learning_rate": 0.0008819570035965645, "loss": 3.3737, "step": 1530 }, { "epoch": 0.25, "learning_rate": 0.0008817884289423812, "loss": 3.2664, "step": 1531 }, { "epoch": 0.25, "learning_rate": 0.000881619750140126, "loss": 3.267, "step": 1532 }, { "epoch": 0.25, "learning_rate": 0.0008814509672358126, "loss": 3.5011, "step": 1533 }, { "epoch": 0.25, "learning_rate": 0.0008812820802754837, "loss": 3.4976, "step": 1534 }, { "epoch": 0.25, "learning_rate": 0.0008811130893052099, "loss": 3.4919, "step": 1535 }, { "epoch": 0.25, "learning_rate": 0.00088094399437109, "loss": 3.2727, "step": 1536 }, { "epoch": 0.25, "learning_rate": 0.0008807747955192517, "loss": 3.2272, "step": 1537 }, { "epoch": 0.25, "learning_rate": 0.0008806054927958506, "loss": 3.2577, "step": 1538 }, { "epoch": 0.25, "learning_rate": 0.0008804360862470709, "loss": 3.4307, "step": 1539 }, { "epoch": 0.25, "learning_rate": 0.0008802665759191247, "loss": 3.4068, "step": 1540 }, { "epoch": 0.25, "learning_rate": 0.000880096961858253, "loss": 3.3839, "step": 1541 }, { "epoch": 0.25, "learning_rate": 0.0008799272441107249, "loss": 3.428, "step": 1542 }, { "epoch": 0.25, "learning_rate": 0.0008797574227228375, "loss": 3.3235, "step": 1543 }, { "epoch": 0.25, "learning_rate": 0.0008795874977409167, "loss": 3.4361, "step": 1544 }, { "epoch": 0.25, "learning_rate": 0.0008794174692113158, "loss": 3.3737, "step": 1545 }, { "epoch": 0.25, "learning_rate": 0.0008792473371804174, "loss": 3.2859, "step": 1546 }, { "epoch": 0.25, "learning_rate": 0.0008790771016946315, "loss": 3.3732, "step": 1547 }, { "epoch": 0.25, "learning_rate": 0.0008789067628003968, "loss": 3.4734, "step": 1548 }, { "epoch": 0.25, "learning_rate": 0.0008787363205441799, "loss": 3.2824, "step": 1549 }, { "epoch": 0.25, "learning_rate": 0.000878565774972476, "loss": 3.471, "step": 1550 }, { "epoch": 0.25, "learning_rate": 0.0008783951261318079, "loss": 3.3632, "step": 1551 }, { "epoch": 0.25, "learning_rate": 0.0008782243740687272, "loss": 3.2654, "step": 1552 }, { "epoch": 0.25, "learning_rate": 0.0008780535188298134, "loss": 3.2931, "step": 1553 }, { "epoch": 0.25, "learning_rate": 0.0008778825604616737, "loss": 3.3474, "step": 1554 }, { "epoch": 0.25, "learning_rate": 0.0008777114990109442, "loss": 3.3624, "step": 1555 }, { "epoch": 0.25, "learning_rate": 0.0008775403345242885, "loss": 3.2995, "step": 1556 }, { "epoch": 0.25, "learning_rate": 0.0008773690670483988, "loss": 3.4203, "step": 1557 }, { "epoch": 0.25, "learning_rate": 0.000877197696629995, "loss": 3.3728, "step": 1558 }, { "epoch": 0.25, "learning_rate": 0.0008770262233158253, "loss": 3.3482, "step": 1559 }, { "epoch": 0.25, "learning_rate": 0.0008768546471526659, "loss": 3.1999, "step": 1560 }, { "epoch": 0.25, "learning_rate": 0.0008766829681873212, "loss": 3.3516, "step": 1561 }, { "epoch": 0.25, "learning_rate": 0.0008765111864666231, "loss": 3.3693, "step": 1562 }, { "epoch": 0.25, "learning_rate": 0.0008763393020374324, "loss": 3.3647, "step": 1563 }, { "epoch": 0.25, "learning_rate": 0.0008761673149466373, "loss": 3.376, "step": 1564 }, { "epoch": 0.25, "learning_rate": 0.000875995225241154, "loss": 3.2895, "step": 1565 }, { "epoch": 0.25, "learning_rate": 0.0008758230329679271, "loss": 3.2676, "step": 1566 }, { "epoch": 0.25, "learning_rate": 0.0008756507381739287, "loss": 3.3312, "step": 1567 }, { "epoch": 0.25, "learning_rate": 0.0008754783409061593, "loss": 3.2945, "step": 1568 }, { "epoch": 0.25, "learning_rate": 0.000875305841211647, "loss": 3.3013, "step": 1569 }, { "epoch": 0.25, "learning_rate": 0.0008751332391374482, "loss": 3.322, "step": 1570 }, { "epoch": 0.25, "learning_rate": 0.0008749605347306467, "loss": 3.3417, "step": 1571 }, { "epoch": 0.25, "learning_rate": 0.0008747877280383548, "loss": 3.3849, "step": 1572 }, { "epoch": 0.25, "learning_rate": 0.0008746148191077123, "loss": 3.2775, "step": 1573 }, { "epoch": 0.25, "learning_rate": 0.0008744418079858873, "loss": 3.2642, "step": 1574 }, { "epoch": 0.25, "learning_rate": 0.0008742686947200751, "loss": 3.4024, "step": 1575 }, { "epoch": 0.25, "learning_rate": 0.0008740954793574995, "loss": 3.2527, "step": 1576 }, { "epoch": 0.25, "learning_rate": 0.0008739221619454121, "loss": 3.3368, "step": 1577 }, { "epoch": 0.25, "learning_rate": 0.0008737487425310917, "loss": 3.2765, "step": 1578 }, { "epoch": 0.25, "learning_rate": 0.0008735752211618456, "loss": 3.3055, "step": 1579 }, { "epoch": 0.25, "learning_rate": 0.000873401597885009, "loss": 3.3211, "step": 1580 }, { "epoch": 0.25, "learning_rate": 0.0008732278727479441, "loss": 3.3662, "step": 1581 }, { "epoch": 0.26, "learning_rate": 0.0008730540457980417, "loss": 3.2515, "step": 1582 }, { "epoch": 0.26, "learning_rate": 0.00087288011708272, "loss": 3.4034, "step": 1583 }, { "epoch": 0.26, "learning_rate": 0.0008727060866494249, "loss": 3.3333, "step": 1584 }, { "epoch": 0.26, "learning_rate": 0.0008725319545456303, "loss": 3.4605, "step": 1585 }, { "epoch": 0.26, "learning_rate": 0.0008723577208188377, "loss": 3.3403, "step": 1586 }, { "epoch": 0.26, "learning_rate": 0.0008721833855165761, "loss": 3.2783, "step": 1587 }, { "epoch": 0.26, "learning_rate": 0.0008720089486864029, "loss": 3.1317, "step": 1588 }, { "epoch": 0.26, "learning_rate": 0.0008718344103759021, "loss": 3.2456, "step": 1589 }, { "epoch": 0.26, "learning_rate": 0.0008716597706326867, "loss": 3.3759, "step": 1590 }, { "epoch": 0.26, "learning_rate": 0.0008714850295043961, "loss": 3.2944, "step": 1591 }, { "epoch": 0.26, "learning_rate": 0.0008713101870386981, "loss": 3.2833, "step": 1592 }, { "epoch": 0.26, "learning_rate": 0.0008711352432832881, "loss": 3.2855, "step": 1593 }, { "epoch": 0.26, "learning_rate": 0.0008709601982858891, "loss": 3.3516, "step": 1594 }, { "epoch": 0.26, "learning_rate": 0.0008707850520942512, "loss": 3.3091, "step": 1595 }, { "epoch": 0.26, "learning_rate": 0.0008706098047561529, "loss": 3.4651, "step": 1596 }, { "epoch": 0.26, "learning_rate": 0.0008704344563193998, "loss": 3.3412, "step": 1597 }, { "epoch": 0.26, "learning_rate": 0.0008702590068318252, "loss": 3.3547, "step": 1598 }, { "epoch": 0.26, "learning_rate": 0.0008700834563412902, "loss": 3.2924, "step": 1599 }, { "epoch": 0.26, "learning_rate": 0.0008699078048956828, "loss": 3.363, "step": 1600 }, { "epoch": 0.26, "learning_rate": 0.0008697320525429194, "loss": 3.4448, "step": 1601 }, { "epoch": 0.26, "learning_rate": 0.0008695561993309431, "loss": 3.5214, "step": 1602 }, { "epoch": 0.26, "learning_rate": 0.0008693802453077251, "loss": 3.4056, "step": 1603 }, { "epoch": 0.26, "learning_rate": 0.0008692041905212637, "loss": 3.5342, "step": 1604 }, { "epoch": 0.26, "learning_rate": 0.0008690280350195853, "loss": 3.4405, "step": 1605 }, { "epoch": 0.26, "learning_rate": 0.000868851778850743, "loss": 3.3173, "step": 1606 }, { "epoch": 0.26, "learning_rate": 0.0008686754220628179, "loss": 3.3101, "step": 1607 }, { "epoch": 0.26, "learning_rate": 0.0008684989647039183, "loss": 3.2712, "step": 1608 }, { "epoch": 0.26, "learning_rate": 0.00086832240682218, "loss": 3.3881, "step": 1609 }, { "epoch": 0.26, "learning_rate": 0.0008681457484657662, "loss": 3.4118, "step": 1610 }, { "epoch": 0.26, "learning_rate": 0.0008679689896828677, "loss": 3.3789, "step": 1611 }, { "epoch": 0.26, "learning_rate": 0.0008677921305217022, "loss": 3.252, "step": 1612 }, { "epoch": 0.26, "learning_rate": 0.0008676151710305156, "loss": 3.1559, "step": 1613 }, { "epoch": 0.26, "learning_rate": 0.0008674381112575802, "loss": 3.4185, "step": 1614 }, { "epoch": 0.26, "learning_rate": 0.0008672609512511964, "loss": 3.265, "step": 1615 }, { "epoch": 0.26, "learning_rate": 0.0008670836910596917, "loss": 3.2801, "step": 1616 }, { "epoch": 0.26, "learning_rate": 0.0008669063307314207, "loss": 3.2141, "step": 1617 }, { "epoch": 0.26, "learning_rate": 0.0008667288703147658, "loss": 3.3782, "step": 1618 }, { "epoch": 0.26, "learning_rate": 0.0008665513098581363, "loss": 3.307, "step": 1619 }, { "epoch": 0.26, "learning_rate": 0.0008663736494099688, "loss": 3.2769, "step": 1620 }, { "epoch": 0.26, "learning_rate": 0.0008661958890187276, "loss": 3.3125, "step": 1621 }, { "epoch": 0.26, "learning_rate": 0.0008660180287329036, "loss": 3.2936, "step": 1622 }, { "epoch": 0.26, "learning_rate": 0.0008658400686010155, "loss": 3.3425, "step": 1623 }, { "epoch": 0.26, "learning_rate": 0.000865662008671609, "loss": 3.2615, "step": 1624 }, { "epoch": 0.26, "learning_rate": 0.0008654838489932573, "loss": 3.3733, "step": 1625 }, { "epoch": 0.26, "learning_rate": 0.0008653055896145602, "loss": 3.4423, "step": 1626 }, { "epoch": 0.26, "learning_rate": 0.0008651272305841454, "loss": 3.3877, "step": 1627 }, { "epoch": 0.26, "learning_rate": 0.0008649487719506671, "loss": 3.3195, "step": 1628 }, { "epoch": 0.26, "learning_rate": 0.0008647702137628074, "loss": 3.322, "step": 1629 }, { "epoch": 0.26, "learning_rate": 0.000864591556069275, "loss": 3.2975, "step": 1630 }, { "epoch": 0.26, "learning_rate": 0.000864412798918806, "loss": 3.2645, "step": 1631 }, { "epoch": 0.26, "learning_rate": 0.0008642339423601636, "loss": 3.4851, "step": 1632 }, { "epoch": 0.26, "learning_rate": 0.0008640549864421381, "loss": 3.164, "step": 1633 }, { "epoch": 0.26, "learning_rate": 0.0008638759312135467, "loss": 3.114, "step": 1634 }, { "epoch": 0.26, "learning_rate": 0.000863696776723234, "loss": 3.3485, "step": 1635 }, { "epoch": 0.26, "learning_rate": 0.0008635175230200715, "loss": 3.4216, "step": 1636 }, { "epoch": 0.26, "learning_rate": 0.000863338170152958, "loss": 3.2377, "step": 1637 }, { "epoch": 0.26, "learning_rate": 0.000863158718170819, "loss": 3.4166, "step": 1638 }, { "epoch": 0.26, "learning_rate": 0.0008629791671226072, "loss": 3.1966, "step": 1639 }, { "epoch": 0.26, "learning_rate": 0.0008627995170573025, "loss": 3.4511, "step": 1640 }, { "epoch": 0.26, "learning_rate": 0.0008626197680239115, "loss": 3.3347, "step": 1641 }, { "epoch": 0.26, "learning_rate": 0.000862439920071468, "loss": 3.1548, "step": 1642 }, { "epoch": 0.26, "learning_rate": 0.0008622599732490326, "loss": 3.2822, "step": 1643 }, { "epoch": 0.27, "learning_rate": 0.0008620799276056932, "loss": 3.4061, "step": 1644 }, { "epoch": 0.27, "learning_rate": 0.0008618997831905644, "loss": 3.2086, "step": 1645 }, { "epoch": 0.27, "learning_rate": 0.0008617195400527877, "loss": 3.2865, "step": 1646 }, { "epoch": 0.27, "learning_rate": 0.0008615391982415316, "loss": 3.2712, "step": 1647 }, { "epoch": 0.27, "learning_rate": 0.0008613587578059916, "loss": 3.3824, "step": 1648 }, { "epoch": 0.27, "learning_rate": 0.0008611782187953903, "loss": 3.3385, "step": 1649 }, { "epoch": 0.27, "learning_rate": 0.0008609975812589766, "loss": 3.3252, "step": 1650 }, { "epoch": 0.27, "learning_rate": 0.0008608168452460265, "loss": 3.312, "step": 1651 }, { "epoch": 0.27, "learning_rate": 0.0008606360108058435, "loss": 3.2779, "step": 1652 }, { "epoch": 0.27, "learning_rate": 0.0008604550779877571, "loss": 3.3992, "step": 1653 }, { "epoch": 0.27, "learning_rate": 0.000860274046841124, "loss": 3.3449, "step": 1654 }, { "epoch": 0.27, "learning_rate": 0.0008600929174153275, "loss": 3.2202, "step": 1655 }, { "epoch": 0.27, "learning_rate": 0.0008599116897597784, "loss": 3.3143, "step": 1656 }, { "epoch": 0.27, "learning_rate": 0.0008597303639239133, "loss": 3.2843, "step": 1657 }, { "epoch": 0.27, "learning_rate": 0.0008595489399571964, "loss": 3.4379, "step": 1658 }, { "epoch": 0.27, "learning_rate": 0.0008593674179091182, "loss": 3.5006, "step": 1659 }, { "epoch": 0.27, "learning_rate": 0.000859185797829196, "loss": 3.2389, "step": 1660 }, { "epoch": 0.27, "learning_rate": 0.0008590040797669741, "loss": 3.3283, "step": 1661 }, { "epoch": 0.27, "learning_rate": 0.0008588222637720233, "loss": 3.2288, "step": 1662 }, { "epoch": 0.27, "learning_rate": 0.0008586403498939414, "loss": 3.2369, "step": 1663 }, { "epoch": 0.27, "learning_rate": 0.0008584583381823523, "loss": 3.3218, "step": 1664 }, { "epoch": 0.27, "learning_rate": 0.0008582762286869073, "loss": 3.2937, "step": 1665 }, { "epoch": 0.27, "learning_rate": 0.0008580940214572841, "loss": 3.2713, "step": 1666 }, { "epoch": 0.27, "learning_rate": 0.0008579117165431867, "loss": 3.3341, "step": 1667 }, { "epoch": 0.27, "learning_rate": 0.000857729313994346, "loss": 3.3384, "step": 1668 }, { "epoch": 0.27, "learning_rate": 0.0008575468138605203, "loss": 3.3807, "step": 1669 }, { "epoch": 0.27, "learning_rate": 0.000857364216191493, "loss": 3.4699, "step": 1670 }, { "epoch": 0.27, "learning_rate": 0.0008571815210370753, "loss": 3.3673, "step": 1671 }, { "epoch": 0.27, "learning_rate": 0.0008569987284471046, "loss": 3.3418, "step": 1672 }, { "epoch": 0.27, "learning_rate": 0.0008568158384714448, "loss": 3.2977, "step": 1673 }, { "epoch": 0.27, "learning_rate": 0.0008566328511599864, "loss": 3.3425, "step": 1674 }, { "epoch": 0.27, "learning_rate": 0.0008564497665626468, "loss": 3.293, "step": 1675 }, { "epoch": 0.27, "learning_rate": 0.0008562665847293695, "loss": 3.2654, "step": 1676 }, { "epoch": 0.27, "learning_rate": 0.0008560833057101246, "loss": 3.3123, "step": 1677 }, { "epoch": 0.27, "learning_rate": 0.0008558999295549088, "loss": 3.3273, "step": 1678 }, { "epoch": 0.27, "learning_rate": 0.0008557164563137454, "loss": 3.3569, "step": 1679 }, { "epoch": 0.27, "learning_rate": 0.000855532886036684, "loss": 3.2548, "step": 1680 }, { "epoch": 0.27, "learning_rate": 0.0008553492187738005, "loss": 3.1176, "step": 1681 }, { "epoch": 0.27, "learning_rate": 0.0008551654545751981, "loss": 3.3414, "step": 1682 }, { "epoch": 0.27, "learning_rate": 0.0008549815934910052, "loss": 3.2832, "step": 1683 }, { "epoch": 0.27, "learning_rate": 0.0008547976355713776, "loss": 3.3259, "step": 1684 }, { "epoch": 0.27, "learning_rate": 0.0008546135808664972, "loss": 3.3804, "step": 1685 }, { "epoch": 0.27, "learning_rate": 0.0008544294294265722, "loss": 3.4757, "step": 1686 }, { "epoch": 0.27, "learning_rate": 0.0008542451813018372, "loss": 3.2337, "step": 1687 }, { "epoch": 0.27, "learning_rate": 0.0008540608365425532, "loss": 3.3022, "step": 1688 }, { "epoch": 0.27, "learning_rate": 0.000853876395199008, "loss": 3.3891, "step": 1689 }, { "epoch": 0.27, "learning_rate": 0.000853691857321515, "loss": 3.4109, "step": 1690 }, { "epoch": 0.27, "learning_rate": 0.0008535072229604144, "loss": 3.2647, "step": 1691 }, { "epoch": 0.27, "learning_rate": 0.0008533224921660725, "loss": 3.3116, "step": 1692 }, { "epoch": 0.27, "learning_rate": 0.0008531376649888822, "loss": 3.3762, "step": 1693 }, { "epoch": 0.27, "learning_rate": 0.0008529527414792623, "loss": 3.2327, "step": 1694 }, { "epoch": 0.27, "learning_rate": 0.0008527677216876584, "loss": 3.2331, "step": 1695 }, { "epoch": 0.27, "learning_rate": 0.0008525826056645418, "loss": 3.405, "step": 1696 }, { "epoch": 0.27, "learning_rate": 0.0008523973934604104, "loss": 3.2509, "step": 1697 }, { "epoch": 0.27, "learning_rate": 0.0008522120851257881, "loss": 3.354, "step": 1698 }, { "epoch": 0.27, "learning_rate": 0.0008520266807112252, "loss": 3.3016, "step": 1699 }, { "epoch": 0.27, "learning_rate": 0.0008518411802672984, "loss": 3.3542, "step": 1700 }, { "epoch": 0.27, "learning_rate": 0.0008516555838446101, "loss": 3.3469, "step": 1701 }, { "epoch": 0.27, "learning_rate": 0.0008514698914937894, "loss": 3.3224, "step": 1702 }, { "epoch": 0.27, "learning_rate": 0.0008512841032654911, "loss": 3.3695, "step": 1703 }, { "epoch": 0.27, "learning_rate": 0.0008510982192103965, "loss": 3.353, "step": 1704 }, { "epoch": 0.27, "learning_rate": 0.0008509122393792129, "loss": 3.3689, "step": 1705 }, { "epoch": 0.28, "learning_rate": 0.0008507261638226735, "loss": 3.3009, "step": 1706 }, { "epoch": 0.28, "learning_rate": 0.0008505399925915382, "loss": 3.2994, "step": 1707 }, { "epoch": 0.28, "learning_rate": 0.0008503537257365925, "loss": 3.2778, "step": 1708 }, { "epoch": 0.28, "learning_rate": 0.0008501673633086482, "loss": 3.4669, "step": 1709 }, { "epoch": 0.28, "learning_rate": 0.000849980905358543, "loss": 3.4403, "step": 1710 }, { "epoch": 0.28, "learning_rate": 0.000849794351937141, "loss": 3.3289, "step": 1711 }, { "epoch": 0.28, "learning_rate": 0.0008496077030953318, "loss": 3.3796, "step": 1712 }, { "epoch": 0.28, "learning_rate": 0.0008494209588840313, "loss": 3.3422, "step": 1713 }, { "epoch": 0.28, "learning_rate": 0.000849234119354182, "loss": 3.337, "step": 1714 }, { "epoch": 0.28, "learning_rate": 0.0008490471845567513, "loss": 3.2886, "step": 1715 }, { "epoch": 0.28, "learning_rate": 0.0008488601545427333, "loss": 3.2523, "step": 1716 }, { "epoch": 0.28, "learning_rate": 0.0008486730293631482, "loss": 3.2376, "step": 1717 }, { "epoch": 0.28, "learning_rate": 0.0008484858090690415, "loss": 3.3598, "step": 1718 }, { "epoch": 0.28, "learning_rate": 0.000848298493711485, "loss": 3.3352, "step": 1719 }, { "epoch": 0.28, "learning_rate": 0.0008481110833415769, "loss": 3.316, "step": 1720 }, { "epoch": 0.28, "learning_rate": 0.0008479235780104405, "loss": 3.3215, "step": 1721 }, { "epoch": 0.28, "learning_rate": 0.0008477359777692255, "loss": 3.2302, "step": 1722 }, { "epoch": 0.28, "learning_rate": 0.0008475482826691075, "loss": 3.299, "step": 1723 }, { "epoch": 0.28, "learning_rate": 0.0008473604927612874, "loss": 3.1687, "step": 1724 }, { "epoch": 0.28, "learning_rate": 0.000847172608096993, "loss": 3.2639, "step": 1725 }, { "epoch": 0.28, "learning_rate": 0.000846984628727477, "loss": 3.4036, "step": 1726 }, { "epoch": 0.28, "learning_rate": 0.0008467965547040184, "loss": 3.2366, "step": 1727 }, { "epoch": 0.28, "learning_rate": 0.0008466083860779219, "loss": 3.2039, "step": 1728 }, { "epoch": 0.28, "learning_rate": 0.0008464201229005182, "loss": 3.3338, "step": 1729 }, { "epoch": 0.28, "learning_rate": 0.0008462317652231631, "loss": 3.3872, "step": 1730 }, { "epoch": 0.28, "learning_rate": 0.0008460433130972392, "loss": 3.2931, "step": 1731 }, { "epoch": 0.28, "learning_rate": 0.0008458547665741542, "loss": 3.4087, "step": 1732 }, { "epoch": 0.28, "learning_rate": 0.0008456661257053418, "loss": 3.3458, "step": 1733 }, { "epoch": 0.28, "learning_rate": 0.000845477390542261, "loss": 3.3708, "step": 1734 }, { "epoch": 0.28, "learning_rate": 0.0008452885611363974, "loss": 3.3202, "step": 1735 }, { "epoch": 0.28, "learning_rate": 0.0008450996375392613, "loss": 3.3844, "step": 1736 }, { "epoch": 0.28, "learning_rate": 0.0008449106198023892, "loss": 3.1221, "step": 1737 }, { "epoch": 0.28, "learning_rate": 0.0008447215079773438, "loss": 3.1685, "step": 1738 }, { "epoch": 0.28, "learning_rate": 0.0008445323021157123, "loss": 3.2708, "step": 1739 }, { "epoch": 0.28, "learning_rate": 0.0008443430022691085, "loss": 3.3997, "step": 1740 }, { "epoch": 0.28, "learning_rate": 0.0008441536084891714, "loss": 3.3485, "step": 1741 }, { "epoch": 0.28, "learning_rate": 0.0008439641208275657, "loss": 3.5026, "step": 1742 }, { "epoch": 0.28, "learning_rate": 0.0008437745393359817, "loss": 3.4389, "step": 1743 }, { "epoch": 0.28, "learning_rate": 0.0008435848640661356, "loss": 3.2722, "step": 1744 }, { "epoch": 0.28, "learning_rate": 0.0008433950950697686, "loss": 3.2661, "step": 1745 }, { "epoch": 0.28, "learning_rate": 0.000843205232398648, "loss": 3.4215, "step": 1746 }, { "epoch": 0.28, "learning_rate": 0.0008430152761045664, "loss": 3.345, "step": 1747 }, { "epoch": 0.28, "learning_rate": 0.0008428252262393419, "loss": 3.3567, "step": 1748 }, { "epoch": 0.28, "learning_rate": 0.0008426350828548182, "loss": 3.3495, "step": 1749 }, { "epoch": 0.28, "learning_rate": 0.0008424448460028647, "loss": 3.244, "step": 1750 }, { "epoch": 0.28, "learning_rate": 0.0008422545157353759, "loss": 3.373, "step": 1751 }, { "epoch": 0.28, "learning_rate": 0.0008420640921042721, "loss": 3.2599, "step": 1752 }, { "epoch": 0.28, "learning_rate": 0.000841873575161499, "loss": 3.3489, "step": 1753 }, { "epoch": 0.28, "learning_rate": 0.0008416829649590277, "loss": 3.5301, "step": 1754 }, { "epoch": 0.28, "learning_rate": 0.0008414922615488546, "loss": 3.2543, "step": 1755 }, { "epoch": 0.28, "learning_rate": 0.000841301464983002, "loss": 3.4799, "step": 1756 }, { "epoch": 0.28, "learning_rate": 0.0008411105753135169, "loss": 3.2386, "step": 1757 }, { "epoch": 0.28, "learning_rate": 0.0008409195925924725, "loss": 3.3996, "step": 1758 }, { "epoch": 0.28, "learning_rate": 0.0008407285168719667, "loss": 3.2367, "step": 1759 }, { "epoch": 0.28, "learning_rate": 0.0008405373482041232, "loss": 3.2706, "step": 1760 }, { "epoch": 0.28, "learning_rate": 0.0008403460866410908, "loss": 3.2546, "step": 1761 }, { "epoch": 0.28, "learning_rate": 0.0008401547322350438, "loss": 3.3229, "step": 1762 }, { "epoch": 0.28, "learning_rate": 0.0008399632850381817, "loss": 3.3638, "step": 1763 }, { "epoch": 0.28, "learning_rate": 0.0008397717451027296, "loss": 3.3542, "step": 1764 }, { "epoch": 0.28, "learning_rate": 0.0008395801124809374, "loss": 3.2764, "step": 1765 }, { "epoch": 0.28, "learning_rate": 0.0008393883872250807, "loss": 3.3415, "step": 1766 }, { "epoch": 0.28, "learning_rate": 0.0008391965693874602, "loss": 3.2087, "step": 1767 }, { "epoch": 0.29, "learning_rate": 0.000839004659020402, "loss": 3.3824, "step": 1768 }, { "epoch": 0.29, "learning_rate": 0.0008388126561762572, "loss": 3.2754, "step": 1769 }, { "epoch": 0.29, "learning_rate": 0.0008386205609074024, "loss": 3.3948, "step": 1770 }, { "epoch": 0.29, "learning_rate": 0.0008384283732662391, "loss": 3.3137, "step": 1771 }, { "epoch": 0.29, "learning_rate": 0.0008382360933051943, "loss": 3.2394, "step": 1772 }, { "epoch": 0.29, "learning_rate": 0.00083804372107672, "loss": 3.3858, "step": 1773 }, { "epoch": 0.29, "learning_rate": 0.0008378512566332934, "loss": 3.2368, "step": 1774 }, { "epoch": 0.29, "learning_rate": 0.0008376587000274169, "loss": 3.2317, "step": 1775 }, { "epoch": 0.29, "learning_rate": 0.0008374660513116181, "loss": 3.37, "step": 1776 }, { "epoch": 0.29, "learning_rate": 0.0008372733105384496, "loss": 3.3567, "step": 1777 }, { "epoch": 0.29, "learning_rate": 0.000837080477760489, "loss": 3.4351, "step": 1778 }, { "epoch": 0.29, "learning_rate": 0.0008368875530303394, "loss": 3.2047, "step": 1779 }, { "epoch": 0.29, "learning_rate": 0.0008366945364006287, "loss": 3.3192, "step": 1780 }, { "epoch": 0.29, "learning_rate": 0.0008365014279240096, "loss": 3.1908, "step": 1781 }, { "epoch": 0.29, "learning_rate": 0.0008363082276531607, "loss": 3.3268, "step": 1782 }, { "epoch": 0.29, "learning_rate": 0.0008361149356407845, "loss": 3.3165, "step": 1783 }, { "epoch": 0.29, "learning_rate": 0.0008359215519396096, "loss": 3.337, "step": 1784 }, { "epoch": 0.29, "learning_rate": 0.000835728076602389, "loss": 3.2683, "step": 1785 }, { "epoch": 0.29, "learning_rate": 0.0008355345096819008, "loss": 3.1733, "step": 1786 }, { "epoch": 0.29, "learning_rate": 0.0008353408512309482, "loss": 3.3719, "step": 1787 }, { "epoch": 0.29, "learning_rate": 0.0008351471013023591, "loss": 3.4096, "step": 1788 }, { "epoch": 0.29, "learning_rate": 0.0008349532599489869, "loss": 3.3843, "step": 1789 }, { "epoch": 0.29, "learning_rate": 0.0008347593272237092, "loss": 3.3216, "step": 1790 }, { "epoch": 0.29, "learning_rate": 0.0008345653031794292, "loss": 3.4051, "step": 1791 }, { "epoch": 0.29, "learning_rate": 0.0008343711878690746, "loss": 3.4066, "step": 1792 }, { "epoch": 0.29, "learning_rate": 0.0008341769813455979, "loss": 3.3974, "step": 1793 }, { "epoch": 0.29, "learning_rate": 0.0008339826836619771, "loss": 3.1576, "step": 1794 }, { "epoch": 0.29, "learning_rate": 0.0008337882948712146, "loss": 3.2871, "step": 1795 }, { "epoch": 0.29, "learning_rate": 0.0008335938150263373, "loss": 3.3098, "step": 1796 }, { "epoch": 0.29, "learning_rate": 0.000833399244180398, "loss": 3.1953, "step": 1797 }, { "epoch": 0.29, "learning_rate": 0.0008332045823864733, "loss": 3.2946, "step": 1798 }, { "epoch": 0.29, "learning_rate": 0.000833009829697665, "loss": 3.3054, "step": 1799 }, { "epoch": 0.29, "learning_rate": 0.0008328149861670997, "loss": 3.3388, "step": 1800 }, { "epoch": 0.29, "learning_rate": 0.000832620051847929, "loss": 3.4077, "step": 1801 }, { "epoch": 0.29, "learning_rate": 0.0008324250267933285, "loss": 3.3008, "step": 1802 }, { "epoch": 0.29, "learning_rate": 0.0008322299110564997, "loss": 3.2172, "step": 1803 }, { "epoch": 0.29, "learning_rate": 0.000832034704690668, "loss": 3.2018, "step": 1804 }, { "epoch": 0.29, "learning_rate": 0.0008318394077490835, "loss": 3.4325, "step": 1805 }, { "epoch": 0.29, "learning_rate": 0.0008316440202850216, "loss": 3.23, "step": 1806 }, { "epoch": 0.29, "learning_rate": 0.0008314485423517819, "loss": 3.4174, "step": 1807 }, { "epoch": 0.29, "learning_rate": 0.0008312529740026888, "loss": 3.4263, "step": 1808 }, { "epoch": 0.29, "learning_rate": 0.0008310573152910916, "loss": 3.4368, "step": 1809 }, { "epoch": 0.29, "learning_rate": 0.0008308615662703638, "loss": 3.2422, "step": 1810 }, { "epoch": 0.29, "learning_rate": 0.0008306657269939037, "loss": 3.243, "step": 1811 }, { "epoch": 0.29, "learning_rate": 0.0008304697975151347, "loss": 3.4144, "step": 1812 }, { "epoch": 0.29, "learning_rate": 0.0008302737778875041, "loss": 3.2929, "step": 1813 }, { "epoch": 0.29, "learning_rate": 0.0008300776681644841, "loss": 3.2921, "step": 1814 }, { "epoch": 0.29, "learning_rate": 0.0008298814683995715, "loss": 3.2962, "step": 1815 }, { "epoch": 0.29, "learning_rate": 0.0008296851786462877, "loss": 3.3011, "step": 1816 }, { "epoch": 0.29, "learning_rate": 0.0008294887989581786, "loss": 3.3418, "step": 1817 }, { "epoch": 0.29, "learning_rate": 0.0008292923293888145, "loss": 3.3993, "step": 1818 }, { "epoch": 0.29, "learning_rate": 0.0008290957699917905, "loss": 3.3173, "step": 1819 }, { "epoch": 0.29, "learning_rate": 0.000828899120820726, "loss": 3.3227, "step": 1820 }, { "epoch": 0.29, "learning_rate": 0.0008287023819292648, "loss": 3.5335, "step": 1821 }, { "epoch": 0.29, "learning_rate": 0.0008285055533710752, "loss": 3.2822, "step": 1822 }, { "epoch": 0.29, "learning_rate": 0.0008283086351998505, "loss": 3.3104, "step": 1823 }, { "epoch": 0.29, "learning_rate": 0.0008281116274693078, "loss": 3.3159, "step": 1824 }, { "epoch": 0.29, "learning_rate": 0.0008279145302331888, "loss": 3.2211, "step": 1825 }, { "epoch": 0.29, "learning_rate": 0.0008277173435452597, "loss": 3.2887, "step": 1826 }, { "epoch": 0.29, "learning_rate": 0.0008275200674593108, "loss": 3.2633, "step": 1827 }, { "epoch": 0.29, "learning_rate": 0.0008273227020291574, "loss": 3.3126, "step": 1828 }, { "epoch": 0.29, "learning_rate": 0.0008271252473086388, "loss": 3.2822, "step": 1829 }, { "epoch": 0.3, "learning_rate": 0.0008269277033516184, "loss": 3.2569, "step": 1830 }, { "epoch": 0.3, "learning_rate": 0.0008267300702119845, "loss": 3.2969, "step": 1831 }, { "epoch": 0.3, "learning_rate": 0.0008265323479436493, "loss": 3.4729, "step": 1832 }, { "epoch": 0.3, "learning_rate": 0.0008263345366005494, "loss": 3.4062, "step": 1833 }, { "epoch": 0.3, "learning_rate": 0.000826136636236646, "loss": 3.3166, "step": 1834 }, { "epoch": 0.3, "learning_rate": 0.0008259386469059242, "loss": 3.3158, "step": 1835 }, { "epoch": 0.3, "learning_rate": 0.0008257405686623936, "loss": 3.2941, "step": 1836 }, { "epoch": 0.3, "learning_rate": 0.0008255424015600877, "loss": 3.4495, "step": 1837 }, { "epoch": 0.3, "learning_rate": 0.0008253441456530647, "loss": 3.2866, "step": 1838 }, { "epoch": 0.3, "learning_rate": 0.0008251458009954069, "loss": 3.4028, "step": 1839 }, { "epoch": 0.3, "learning_rate": 0.0008249473676412207, "loss": 3.3452, "step": 1840 }, { "epoch": 0.3, "learning_rate": 0.0008247488456446366, "loss": 3.3798, "step": 1841 }, { "epoch": 0.3, "learning_rate": 0.0008245502350598096, "loss": 3.4339, "step": 1842 }, { "epoch": 0.3, "learning_rate": 0.0008243515359409185, "loss": 3.2506, "step": 1843 }, { "epoch": 0.3, "learning_rate": 0.0008241527483421665, "loss": 3.3814, "step": 1844 }, { "epoch": 0.3, "learning_rate": 0.000823953872317781, "loss": 3.3459, "step": 1845 }, { "epoch": 0.3, "learning_rate": 0.0008237549079220135, "loss": 3.4041, "step": 1846 }, { "epoch": 0.3, "learning_rate": 0.0008235558552091391, "loss": 3.358, "step": 1847 }, { "epoch": 0.3, "learning_rate": 0.0008233567142334576, "loss": 3.382, "step": 1848 }, { "epoch": 0.3, "learning_rate": 0.0008231574850492926, "loss": 3.2565, "step": 1849 }, { "epoch": 0.3, "learning_rate": 0.0008229581677109921, "loss": 3.2941, "step": 1850 }, { "epoch": 0.3, "learning_rate": 0.0008227587622729275, "loss": 3.4547, "step": 1851 }, { "epoch": 0.3, "learning_rate": 0.0008225592687894949, "loss": 3.4747, "step": 1852 }, { "epoch": 0.3, "learning_rate": 0.0008223596873151142, "loss": 3.2097, "step": 1853 }, { "epoch": 0.3, "learning_rate": 0.0008221600179042288, "loss": 3.1926, "step": 1854 }, { "epoch": 0.3, "learning_rate": 0.0008219602606113068, "loss": 3.3898, "step": 1855 }, { "epoch": 0.3, "learning_rate": 0.00082176041549084, "loss": 3.2946, "step": 1856 }, { "epoch": 0.3, "learning_rate": 0.0008215604825973442, "loss": 3.2356, "step": 1857 }, { "epoch": 0.3, "learning_rate": 0.0008213604619853591, "loss": 3.2952, "step": 1858 }, { "epoch": 0.3, "learning_rate": 0.000821160353709448, "loss": 3.2517, "step": 1859 }, { "epoch": 0.3, "learning_rate": 0.0008209601578241989, "loss": 3.4935, "step": 1860 }, { "epoch": 0.3, "learning_rate": 0.000820759874384223, "loss": 3.296, "step": 1861 }, { "epoch": 0.3, "learning_rate": 0.0008205595034441556, "loss": 3.2877, "step": 1862 }, { "epoch": 0.3, "learning_rate": 0.000820359045058656, "loss": 3.3523, "step": 1863 }, { "epoch": 0.3, "learning_rate": 0.0008201584992824072, "loss": 3.242, "step": 1864 }, { "epoch": 0.3, "learning_rate": 0.0008199578661701161, "loss": 3.3313, "step": 1865 }, { "epoch": 0.3, "learning_rate": 0.0008197571457765134, "loss": 3.1903, "step": 1866 }, { "epoch": 0.3, "learning_rate": 0.0008195563381563535, "loss": 3.276, "step": 1867 }, { "epoch": 0.3, "learning_rate": 0.0008193554433644149, "loss": 3.3687, "step": 1868 }, { "epoch": 0.3, "learning_rate": 0.0008191544614554996, "loss": 3.3583, "step": 1869 }, { "epoch": 0.3, "learning_rate": 0.0008189533924844335, "loss": 3.2527, "step": 1870 }, { "epoch": 0.3, "learning_rate": 0.000818752236506066, "loss": 3.1176, "step": 1871 }, { "epoch": 0.3, "learning_rate": 0.0008185509935752708, "loss": 3.3722, "step": 1872 }, { "epoch": 0.3, "learning_rate": 0.0008183496637469449, "loss": 3.3747, "step": 1873 }, { "epoch": 0.3, "learning_rate": 0.0008181482470760086, "loss": 3.3236, "step": 1874 }, { "epoch": 0.3, "learning_rate": 0.0008179467436174071, "loss": 3.3958, "step": 1875 }, { "epoch": 0.3, "learning_rate": 0.000817745153426108, "loss": 3.4579, "step": 1876 }, { "epoch": 0.3, "learning_rate": 0.0008175434765571031, "loss": 3.2303, "step": 1877 }, { "epoch": 0.3, "learning_rate": 0.0008173417130654081, "loss": 3.3073, "step": 1878 }, { "epoch": 0.3, "learning_rate": 0.000817139863006062, "loss": 3.2312, "step": 1879 }, { "epoch": 0.3, "learning_rate": 0.0008169379264341273, "loss": 3.4417, "step": 1880 }, { "epoch": 0.3, "learning_rate": 0.0008167359034046906, "loss": 3.3127, "step": 1881 }, { "epoch": 0.3, "learning_rate": 0.0008165337939728615, "loss": 3.282, "step": 1882 }, { "epoch": 0.3, "learning_rate": 0.0008163315981937734, "loss": 3.3903, "step": 1883 }, { "epoch": 0.3, "learning_rate": 0.0008161293161225834, "loss": 3.299, "step": 1884 }, { "epoch": 0.3, "learning_rate": 0.000815926947814472, "loss": 3.3398, "step": 1885 }, { "epoch": 0.3, "learning_rate": 0.0008157244933246433, "loss": 3.2728, "step": 1886 }, { "epoch": 0.3, "learning_rate": 0.0008155219527083248, "loss": 3.2947, "step": 1887 }, { "epoch": 0.3, "learning_rate": 0.0008153193260207677, "loss": 3.4153, "step": 1888 }, { "epoch": 0.3, "learning_rate": 0.0008151166133172464, "loss": 3.2443, "step": 1889 }, { "epoch": 0.3, "learning_rate": 0.0008149138146530588, "loss": 3.134, "step": 1890 }, { "epoch": 0.3, "learning_rate": 0.0008147109300835268, "loss": 3.3258, "step": 1891 }, { "epoch": 0.31, "learning_rate": 0.0008145079596639947, "loss": 3.373, "step": 1892 }, { "epoch": 0.31, "learning_rate": 0.0008143049034498311, "loss": 3.5346, "step": 1893 }, { "epoch": 0.31, "learning_rate": 0.0008141017614964278, "loss": 3.4896, "step": 1894 }, { "epoch": 0.31, "learning_rate": 0.0008138985338591998, "loss": 3.3701, "step": 1895 }, { "epoch": 0.31, "learning_rate": 0.0008136952205935854, "loss": 3.1809, "step": 1896 }, { "epoch": 0.31, "learning_rate": 0.0008134918217550467, "loss": 3.1437, "step": 1897 }, { "epoch": 0.31, "learning_rate": 0.0008132883373990688, "loss": 3.2098, "step": 1898 }, { "epoch": 0.31, "learning_rate": 0.0008130847675811599, "loss": 3.2158, "step": 1899 }, { "epoch": 0.31, "learning_rate": 0.0008128811123568522, "loss": 3.3247, "step": 1900 }, { "epoch": 0.31, "learning_rate": 0.0008126773717817007, "loss": 3.3004, "step": 1901 }, { "epoch": 0.31, "learning_rate": 0.0008124735459112837, "loss": 3.4167, "step": 1902 }, { "epoch": 0.31, "learning_rate": 0.000812269634801203, "loss": 3.3174, "step": 1903 }, { "epoch": 0.31, "learning_rate": 0.0008120656385070837, "loss": 3.2891, "step": 1904 }, { "epoch": 0.31, "learning_rate": 0.0008118615570845734, "loss": 3.3537, "step": 1905 }, { "epoch": 0.31, "learning_rate": 0.0008116573905893439, "loss": 3.2605, "step": 1906 }, { "epoch": 0.31, "learning_rate": 0.0008114531390770896, "loss": 3.2651, "step": 1907 }, { "epoch": 0.31, "learning_rate": 0.0008112488026035284, "loss": 3.2966, "step": 1908 }, { "epoch": 0.31, "learning_rate": 0.000811044381224401, "loss": 3.3765, "step": 1909 }, { "epoch": 0.31, "learning_rate": 0.000810839874995472, "loss": 3.2007, "step": 1910 }, { "epoch": 0.31, "learning_rate": 0.0008106352839725282, "loss": 3.3245, "step": 1911 }, { "epoch": 0.31, "learning_rate": 0.0008104306082113801, "loss": 3.3985, "step": 1912 }, { "epoch": 0.31, "learning_rate": 0.0008102258477678613, "loss": 3.2951, "step": 1913 }, { "epoch": 0.31, "learning_rate": 0.0008100210026978283, "loss": 3.3456, "step": 1914 }, { "epoch": 0.31, "learning_rate": 0.0008098160730571609, "loss": 3.0625, "step": 1915 }, { "epoch": 0.31, "learning_rate": 0.0008096110589017617, "loss": 3.3031, "step": 1916 }, { "epoch": 0.31, "learning_rate": 0.0008094059602875567, "loss": 3.3961, "step": 1917 }, { "epoch": 0.31, "learning_rate": 0.0008092007772704948, "loss": 3.2407, "step": 1918 }, { "epoch": 0.31, "learning_rate": 0.0008089955099065476, "loss": 3.2703, "step": 1919 }, { "epoch": 0.31, "learning_rate": 0.0008087901582517101, "loss": 3.2769, "step": 1920 }, { "epoch": 0.31, "learning_rate": 0.0008085847223620002, "loss": 3.1523, "step": 1921 }, { "epoch": 0.31, "learning_rate": 0.0008083792022934589, "loss": 3.3082, "step": 1922 }, { "epoch": 0.31, "learning_rate": 0.0008081735981021499, "loss": 3.2886, "step": 1923 }, { "epoch": 0.31, "learning_rate": 0.0008079679098441599, "loss": 3.1875, "step": 1924 }, { "epoch": 0.31, "learning_rate": 0.0008077621375755987, "loss": 3.2664, "step": 1925 }, { "epoch": 0.31, "learning_rate": 0.000807556281352599, "loss": 3.1982, "step": 1926 }, { "epoch": 0.31, "learning_rate": 0.0008073503412313159, "loss": 3.2889, "step": 1927 }, { "epoch": 0.31, "learning_rate": 0.0008071443172679285, "loss": 3.3018, "step": 1928 }, { "epoch": 0.31, "learning_rate": 0.0008069382095186375, "loss": 3.2903, "step": 1929 }, { "epoch": 0.31, "learning_rate": 0.0008067320180396672, "loss": 3.3355, "step": 1930 }, { "epoch": 0.31, "learning_rate": 0.0008065257428872647, "loss": 3.2915, "step": 1931 }, { "epoch": 0.31, "learning_rate": 0.0008063193841176999, "loss": 3.4211, "step": 1932 }, { "epoch": 0.31, "learning_rate": 0.000806112941787265, "loss": 3.3267, "step": 1933 }, { "epoch": 0.31, "learning_rate": 0.0008059064159522757, "loss": 3.261, "step": 1934 }, { "epoch": 0.31, "learning_rate": 0.0008056998066690702, "loss": 3.2882, "step": 1935 }, { "epoch": 0.31, "learning_rate": 0.0008054931139940092, "loss": 3.3267, "step": 1936 }, { "epoch": 0.31, "learning_rate": 0.0008052863379834767, "loss": 3.2724, "step": 1937 }, { "epoch": 0.31, "learning_rate": 0.0008050794786938791, "loss": 3.354, "step": 1938 }, { "epoch": 0.31, "learning_rate": 0.0008048725361816454, "loss": 3.1559, "step": 1939 }, { "epoch": 0.31, "learning_rate": 0.0008046655105032273, "loss": 3.379, "step": 1940 }, { "epoch": 0.31, "learning_rate": 0.0008044584017150997, "loss": 3.1259, "step": 1941 }, { "epoch": 0.31, "learning_rate": 0.0008042512098737597, "loss": 3.333, "step": 1942 }, { "epoch": 0.31, "learning_rate": 0.0008040439350357272, "loss": 3.3844, "step": 1943 }, { "epoch": 0.31, "learning_rate": 0.0008038365772575444, "loss": 3.2899, "step": 1944 }, { "epoch": 0.31, "learning_rate": 0.0008036291365957768, "loss": 3.2145, "step": 1945 }, { "epoch": 0.31, "learning_rate": 0.000803421613107012, "loss": 3.2227, "step": 1946 }, { "epoch": 0.31, "learning_rate": 0.0008032140068478603, "loss": 3.1662, "step": 1947 }, { "epoch": 0.31, "learning_rate": 0.0008030063178749548, "loss": 3.3444, "step": 1948 }, { "epoch": 0.31, "learning_rate": 0.0008027985462449505, "loss": 3.1388, "step": 1949 }, { "epoch": 0.31, "learning_rate": 0.000802590692014526, "loss": 3.2995, "step": 1950 }, { "epoch": 0.31, "learning_rate": 0.0008023827552403815, "loss": 3.3751, "step": 1951 }, { "epoch": 0.31, "learning_rate": 0.0008021747359792403, "loss": 3.3041, "step": 1952 }, { "epoch": 0.31, "learning_rate": 0.0008019666342878479, "loss": 3.3719, "step": 1953 }, { "epoch": 0.32, "learning_rate": 0.0008017584502229723, "loss": 3.2941, "step": 1954 }, { "epoch": 0.32, "learning_rate": 0.0008015501838414038, "loss": 3.2941, "step": 1955 }, { "epoch": 0.32, "learning_rate": 0.0008013418351999561, "loss": 3.3175, "step": 1956 }, { "epoch": 0.32, "learning_rate": 0.0008011334043554639, "loss": 3.2465, "step": 1957 }, { "epoch": 0.32, "learning_rate": 0.0008009248913647855, "loss": 3.2828, "step": 1958 }, { "epoch": 0.32, "learning_rate": 0.0008007162962848009, "loss": 3.2684, "step": 1959 }, { "epoch": 0.32, "learning_rate": 0.0008005076191724128, "loss": 3.4251, "step": 1960 }, { "epoch": 0.32, "learning_rate": 0.0008002988600845464, "loss": 3.3131, "step": 1961 }, { "epoch": 0.32, "learning_rate": 0.0008000900190781489, "loss": 3.2152, "step": 1962 }, { "epoch": 0.32, "learning_rate": 0.0007998810962101902, "loss": 3.3632, "step": 1963 }, { "epoch": 0.32, "learning_rate": 0.0007996720915376623, "loss": 3.3837, "step": 1964 }, { "epoch": 0.32, "learning_rate": 0.0007994630051175795, "loss": 3.2259, "step": 1965 }, { "epoch": 0.32, "learning_rate": 0.0007992538370069787, "loss": 3.2372, "step": 1966 }, { "epoch": 0.32, "learning_rate": 0.0007990445872629188, "loss": 3.2951, "step": 1967 }, { "epoch": 0.32, "learning_rate": 0.0007988352559424808, "loss": 3.3707, "step": 1968 }, { "epoch": 0.32, "learning_rate": 0.0007986258431027684, "loss": 3.302, "step": 1969 }, { "epoch": 0.32, "learning_rate": 0.0007984163488009076, "loss": 3.2188, "step": 1970 }, { "epoch": 0.32, "learning_rate": 0.0007982067730940457, "loss": 3.1943, "step": 1971 }, { "epoch": 0.32, "learning_rate": 0.0007979971160393534, "loss": 3.2798, "step": 1972 }, { "epoch": 0.32, "learning_rate": 0.000797787377694023, "loss": 3.3238, "step": 1973 }, { "epoch": 0.32, "learning_rate": 0.0007975775581152687, "loss": 3.4319, "step": 1974 }, { "epoch": 0.32, "learning_rate": 0.0007973676573603274, "loss": 3.3202, "step": 1975 }, { "epoch": 0.32, "learning_rate": 0.000797157675486458, "loss": 3.3417, "step": 1976 }, { "epoch": 0.32, "learning_rate": 0.0007969476125509414, "loss": 3.4365, "step": 1977 }, { "epoch": 0.32, "learning_rate": 0.0007967374686110807, "loss": 3.3685, "step": 1978 }, { "epoch": 0.32, "learning_rate": 0.000796527243724201, "loss": 3.2208, "step": 1979 }, { "epoch": 0.32, "learning_rate": 0.0007963169379476495, "loss": 3.1208, "step": 1980 }, { "epoch": 0.32, "learning_rate": 0.0007961065513387956, "loss": 3.2225, "step": 1981 }, { "epoch": 0.32, "learning_rate": 0.0007958960839550307, "loss": 3.2271, "step": 1982 }, { "epoch": 0.32, "learning_rate": 0.0007956855358537682, "loss": 3.2955, "step": 1983 }, { "epoch": 0.32, "learning_rate": 0.0007954749070924434, "loss": 3.2951, "step": 1984 }, { "epoch": 0.32, "learning_rate": 0.0007952641977285137, "loss": 3.2528, "step": 1985 }, { "epoch": 0.32, "learning_rate": 0.0007950534078194589, "loss": 3.3092, "step": 1986 }, { "epoch": 0.32, "learning_rate": 0.0007948425374227799, "loss": 3.1577, "step": 1987 }, { "epoch": 0.32, "learning_rate": 0.0007946315865960004, "loss": 3.4106, "step": 1988 }, { "epoch": 0.32, "learning_rate": 0.0007944205553966653, "loss": 3.1926, "step": 1989 }, { "epoch": 0.32, "learning_rate": 0.0007942094438823421, "loss": 3.2895, "step": 1990 }, { "epoch": 0.32, "learning_rate": 0.0007939982521106198, "loss": 3.2897, "step": 1991 }, { "epoch": 0.32, "learning_rate": 0.0007937869801391095, "loss": 3.3033, "step": 1992 }, { "epoch": 0.32, "learning_rate": 0.000793575628025444, "loss": 3.244, "step": 1993 }, { "epoch": 0.32, "learning_rate": 0.0007933641958272782, "loss": 3.2956, "step": 1994 }, { "epoch": 0.32, "learning_rate": 0.0007931526836022884, "loss": 3.3539, "step": 1995 }, { "epoch": 0.32, "learning_rate": 0.000792941091408173, "loss": 3.2385, "step": 1996 }, { "epoch": 0.32, "learning_rate": 0.0007927294193026529, "loss": 3.2053, "step": 1997 }, { "epoch": 0.32, "learning_rate": 0.0007925176673434693, "loss": 3.3122, "step": 1998 }, { "epoch": 0.32, "learning_rate": 0.0007923058355883864, "loss": 3.3112, "step": 1999 }, { "epoch": 0.32, "learning_rate": 0.0007920939240951899, "loss": 3.3024, "step": 2000 }, { "epoch": 0.32, "learning_rate": 0.0007918819329216871, "loss": 3.2744, "step": 2001 }, { "epoch": 0.32, "learning_rate": 0.0007916698621257068, "loss": 3.4105, "step": 2002 }, { "epoch": 0.32, "learning_rate": 0.0007914577117651001, "loss": 3.3182, "step": 2003 }, { "epoch": 0.32, "learning_rate": 0.0007912454818977393, "loss": 3.424, "step": 2004 }, { "epoch": 0.32, "learning_rate": 0.0007910331725815185, "loss": 3.1176, "step": 2005 }, { "epoch": 0.32, "learning_rate": 0.0007908207838743541, "loss": 3.2679, "step": 2006 }, { "epoch": 0.32, "learning_rate": 0.0007906083158341831, "loss": 3.3404, "step": 2007 }, { "epoch": 0.32, "learning_rate": 0.0007903957685189649, "loss": 3.2217, "step": 2008 }, { "epoch": 0.32, "learning_rate": 0.0007901831419866799, "loss": 3.354, "step": 2009 }, { "epoch": 0.32, "learning_rate": 0.000789970436295331, "loss": 3.1425, "step": 2010 }, { "epoch": 0.32, "learning_rate": 0.0007897576515029421, "loss": 3.3965, "step": 2011 }, { "epoch": 0.32, "learning_rate": 0.0007895447876675583, "loss": 3.2766, "step": 2012 }, { "epoch": 0.32, "learning_rate": 0.0007893318448472474, "loss": 3.4282, "step": 2013 }, { "epoch": 0.32, "learning_rate": 0.0007891188231000977, "loss": 3.2903, "step": 2014 }, { "epoch": 0.32, "learning_rate": 0.0007889057224842193, "loss": 3.3053, "step": 2015 }, { "epoch": 0.33, "learning_rate": 0.0007886925430577443, "loss": 3.4105, "step": 2016 }, { "epoch": 0.33, "learning_rate": 0.0007884792848788256, "loss": 3.2865, "step": 2017 }, { "epoch": 0.33, "learning_rate": 0.000788265948005638, "loss": 3.3477, "step": 2018 }, { "epoch": 0.33, "learning_rate": 0.0007880525324963776, "loss": 3.1795, "step": 2019 }, { "epoch": 0.33, "learning_rate": 0.0007878390384092622, "loss": 3.2196, "step": 2020 }, { "epoch": 0.33, "learning_rate": 0.0007876254658025307, "loss": 3.2707, "step": 2021 }, { "epoch": 0.33, "learning_rate": 0.0007874118147344436, "loss": 3.2618, "step": 2022 }, { "epoch": 0.33, "learning_rate": 0.0007871980852632829, "loss": 3.3785, "step": 2023 }, { "epoch": 0.33, "learning_rate": 0.0007869842774473517, "loss": 3.3158, "step": 2024 }, { "epoch": 0.33, "learning_rate": 0.0007867703913449747, "loss": 3.3477, "step": 2025 }, { "epoch": 0.33, "learning_rate": 0.0007865564270144978, "loss": 3.04, "step": 2026 }, { "epoch": 0.33, "learning_rate": 0.0007863423845142886, "loss": 3.3698, "step": 2027 }, { "epoch": 0.33, "learning_rate": 0.0007861282639027354, "loss": 3.4027, "step": 2028 }, { "epoch": 0.33, "learning_rate": 0.0007859140652382486, "loss": 3.2539, "step": 2029 }, { "epoch": 0.33, "learning_rate": 0.0007856997885792589, "loss": 3.2992, "step": 2030 }, { "epoch": 0.33, "learning_rate": 0.0007854854339842195, "loss": 3.2591, "step": 2031 }, { "epoch": 0.33, "learning_rate": 0.0007852710015116036, "loss": 3.4617, "step": 2032 }, { "epoch": 0.33, "learning_rate": 0.0007850564912199066, "loss": 3.2176, "step": 2033 }, { "epoch": 0.33, "learning_rate": 0.0007848419031676445, "loss": 3.2108, "step": 2034 }, { "epoch": 0.33, "learning_rate": 0.0007846272374133549, "loss": 3.2394, "step": 2035 }, { "epoch": 0.33, "learning_rate": 0.0007844124940155967, "loss": 3.2863, "step": 2036 }, { "epoch": 0.33, "learning_rate": 0.0007841976730329494, "loss": 3.533, "step": 2037 }, { "epoch": 0.33, "learning_rate": 0.0007839827745240143, "loss": 3.3082, "step": 2038 }, { "epoch": 0.33, "learning_rate": 0.0007837677985474132, "loss": 3.4071, "step": 2039 }, { "epoch": 0.33, "learning_rate": 0.0007835527451617899, "loss": 3.2687, "step": 2040 }, { "epoch": 0.33, "learning_rate": 0.0007833376144258085, "loss": 3.2481, "step": 2041 }, { "epoch": 0.33, "learning_rate": 0.0007831224063981543, "loss": 3.1558, "step": 2042 }, { "epoch": 0.33, "learning_rate": 0.0007829071211375345, "loss": 3.3342, "step": 2043 }, { "epoch": 0.33, "learning_rate": 0.0007826917587026763, "loss": 3.3242, "step": 2044 }, { "epoch": 0.33, "learning_rate": 0.0007824763191523285, "loss": 3.1648, "step": 2045 }, { "epoch": 0.33, "learning_rate": 0.000782260802545261, "loss": 3.3914, "step": 2046 }, { "epoch": 0.33, "learning_rate": 0.0007820452089402645, "loss": 3.2113, "step": 2047 }, { "epoch": 0.33, "learning_rate": 0.0007818295383961507, "loss": 3.2991, "step": 2048 }, { "epoch": 0.33, "learning_rate": 0.0007816137909717526, "loss": 3.3813, "step": 2049 }, { "epoch": 0.33, "learning_rate": 0.0007813979667259238, "loss": 3.4531, "step": 2050 }, { "epoch": 0.33, "learning_rate": 0.0007811820657175389, "loss": 3.1524, "step": 2051 }, { "epoch": 0.33, "learning_rate": 0.0007809660880054936, "loss": 3.291, "step": 2052 }, { "epoch": 0.33, "learning_rate": 0.0007807500336487046, "loss": 3.314, "step": 2053 }, { "epoch": 0.33, "learning_rate": 0.0007805339027061093, "loss": 3.3191, "step": 2054 }, { "epoch": 0.33, "learning_rate": 0.000780317695236666, "loss": 3.2411, "step": 2055 }, { "epoch": 0.33, "learning_rate": 0.0007801014112993539, "loss": 3.3343, "step": 2056 }, { "epoch": 0.33, "learning_rate": 0.0007798850509531732, "loss": 3.3003, "step": 2057 }, { "epoch": 0.33, "learning_rate": 0.000779668614257145, "loss": 3.3541, "step": 2058 }, { "epoch": 0.33, "learning_rate": 0.0007794521012703106, "loss": 3.1875, "step": 2059 }, { "epoch": 0.33, "learning_rate": 0.000779235512051733, "loss": 3.3534, "step": 2060 }, { "epoch": 0.33, "learning_rate": 0.0007790188466604955, "loss": 3.3571, "step": 2061 }, { "epoch": 0.33, "learning_rate": 0.0007788021051557022, "loss": 3.3341, "step": 2062 }, { "epoch": 0.33, "learning_rate": 0.0007785852875964778, "loss": 3.1624, "step": 2063 }, { "epoch": 0.33, "learning_rate": 0.0007783683940419684, "loss": 3.3886, "step": 2064 }, { "epoch": 0.33, "learning_rate": 0.0007781514245513401, "loss": 3.2168, "step": 2065 }, { "epoch": 0.33, "learning_rate": 0.0007779343791837803, "loss": 3.2119, "step": 2066 }, { "epoch": 0.33, "learning_rate": 0.0007777172579984963, "loss": 3.2798, "step": 2067 }, { "epoch": 0.33, "learning_rate": 0.0007775000610547169, "loss": 3.3222, "step": 2068 }, { "epoch": 0.33, "learning_rate": 0.0007772827884116915, "loss": 3.3426, "step": 2069 }, { "epoch": 0.33, "learning_rate": 0.0007770654401286896, "loss": 3.177, "step": 2070 }, { "epoch": 0.33, "learning_rate": 0.0007768480162650016, "loss": 3.2953, "step": 2071 }, { "epoch": 0.33, "learning_rate": 0.0007766305168799385, "loss": 3.2255, "step": 2072 }, { "epoch": 0.33, "learning_rate": 0.0007764129420328326, "loss": 3.2328, "step": 2073 }, { "epoch": 0.33, "learning_rate": 0.0007761952917830353, "loss": 3.3376, "step": 2074 }, { "epoch": 0.33, "learning_rate": 0.0007759775661899198, "loss": 3.3519, "step": 2075 }, { "epoch": 0.33, "learning_rate": 0.0007757597653128796, "loss": 3.3258, "step": 2076 }, { "epoch": 0.33, "learning_rate": 0.0007755418892113282, "loss": 3.2734, "step": 2077 }, { "epoch": 0.34, "learning_rate": 0.0007753239379447005, "loss": 3.266, "step": 2078 }, { "epoch": 0.34, "learning_rate": 0.0007751059115724511, "loss": 3.3698, "step": 2079 }, { "epoch": 0.34, "learning_rate": 0.0007748878101540556, "loss": 3.4313, "step": 2080 }, { "epoch": 0.34, "learning_rate": 0.0007746696337490098, "loss": 3.2772, "step": 2081 }, { "epoch": 0.34, "learning_rate": 0.0007744513824168301, "loss": 3.2807, "step": 2082 }, { "epoch": 0.34, "learning_rate": 0.0007742330562170532, "loss": 3.3006, "step": 2083 }, { "epoch": 0.34, "learning_rate": 0.0007740146552092364, "loss": 3.2362, "step": 2084 }, { "epoch": 0.34, "learning_rate": 0.0007737961794529573, "loss": 3.4251, "step": 2085 }, { "epoch": 0.34, "learning_rate": 0.0007735776290078138, "loss": 3.3316, "step": 2086 }, { "epoch": 0.34, "learning_rate": 0.0007733590039334242, "loss": 3.3105, "step": 2087 }, { "epoch": 0.34, "learning_rate": 0.0007731403042894276, "loss": 3.2811, "step": 2088 }, { "epoch": 0.34, "learning_rate": 0.0007729215301354828, "loss": 3.3068, "step": 2089 }, { "epoch": 0.34, "learning_rate": 0.0007727026815312694, "loss": 3.4135, "step": 2090 }, { "epoch": 0.34, "learning_rate": 0.0007724837585364868, "loss": 3.2548, "step": 2091 }, { "epoch": 0.34, "learning_rate": 0.0007722647612108554, "loss": 3.4309, "step": 2092 }, { "epoch": 0.34, "learning_rate": 0.000772045689614115, "loss": 3.1607, "step": 2093 }, { "epoch": 0.34, "learning_rate": 0.0007718265438060266, "loss": 3.2631, "step": 2094 }, { "epoch": 0.34, "learning_rate": 0.0007716073238463709, "loss": 3.1701, "step": 2095 }, { "epoch": 0.34, "learning_rate": 0.0007713880297949488, "loss": 3.3182, "step": 2096 }, { "epoch": 0.34, "learning_rate": 0.0007711686617115814, "loss": 3.4139, "step": 2097 }, { "epoch": 0.34, "learning_rate": 0.0007709492196561103, "loss": 3.2526, "step": 2098 }, { "epoch": 0.34, "learning_rate": 0.0007707297036883972, "loss": 3.4242, "step": 2099 }, { "epoch": 0.34, "learning_rate": 0.0007705101138683237, "loss": 3.2089, "step": 2100 }, { "epoch": 0.34, "learning_rate": 0.0007702904502557919, "loss": 3.3798, "step": 2101 }, { "epoch": 0.34, "learning_rate": 0.0007700707129107234, "loss": 3.3468, "step": 2102 }, { "epoch": 0.34, "learning_rate": 0.000769850901893061, "loss": 3.2908, "step": 2103 }, { "epoch": 0.34, "learning_rate": 0.0007696310172627664, "loss": 3.352, "step": 2104 }, { "epoch": 0.34, "learning_rate": 0.000769411059079822, "loss": 3.1928, "step": 2105 }, { "epoch": 0.34, "learning_rate": 0.0007691910274042306, "loss": 3.3302, "step": 2106 }, { "epoch": 0.34, "learning_rate": 0.0007689709222960144, "loss": 3.2059, "step": 2107 }, { "epoch": 0.34, "learning_rate": 0.0007687507438152157, "loss": 3.2943, "step": 2108 }, { "epoch": 0.34, "learning_rate": 0.0007685304920218972, "loss": 3.2335, "step": 2109 }, { "epoch": 0.34, "learning_rate": 0.0007683101669761412, "loss": 3.2784, "step": 2110 }, { "epoch": 0.34, "learning_rate": 0.0007680897687380504, "loss": 3.4007, "step": 2111 }, { "epoch": 0.34, "learning_rate": 0.0007678692973677472, "loss": 3.3147, "step": 2112 }, { "epoch": 0.34, "learning_rate": 0.000767648752925374, "loss": 3.165, "step": 2113 }, { "epoch": 0.34, "learning_rate": 0.000767428135471093, "loss": 3.4647, "step": 2114 }, { "epoch": 0.34, "learning_rate": 0.0007672074450650863, "loss": 3.2766, "step": 2115 }, { "epoch": 0.34, "learning_rate": 0.0007669866817675564, "loss": 3.5702, "step": 2116 }, { "epoch": 0.34, "learning_rate": 0.0007667658456387251, "loss": 3.3298, "step": 2117 }, { "epoch": 0.34, "learning_rate": 0.0007665449367388346, "loss": 3.3949, "step": 2118 }, { "epoch": 0.34, "learning_rate": 0.0007663239551281461, "loss": 3.2642, "step": 2119 }, { "epoch": 0.34, "learning_rate": 0.0007661029008669417, "loss": 3.3703, "step": 2120 }, { "epoch": 0.34, "learning_rate": 0.0007658817740155224, "loss": 3.2931, "step": 2121 }, { "epoch": 0.34, "learning_rate": 0.0007656605746342096, "loss": 3.2367, "step": 2122 }, { "epoch": 0.34, "learning_rate": 0.0007654393027833443, "loss": 3.2409, "step": 2123 }, { "epoch": 0.34, "learning_rate": 0.0007652179585232871, "loss": 3.3024, "step": 2124 }, { "epoch": 0.34, "learning_rate": 0.0007649965419144187, "loss": 3.4153, "step": 2125 }, { "epoch": 0.34, "learning_rate": 0.0007647750530171393, "loss": 3.3285, "step": 2126 }, { "epoch": 0.34, "learning_rate": 0.0007645534918918687, "loss": 3.2449, "step": 2127 }, { "epoch": 0.34, "learning_rate": 0.0007643318585990465, "loss": 3.3336, "step": 2128 }, { "epoch": 0.34, "learning_rate": 0.0007641101531991324, "loss": 3.2487, "step": 2129 }, { "epoch": 0.34, "learning_rate": 0.0007638883757526052, "loss": 3.2595, "step": 2130 }, { "epoch": 0.34, "learning_rate": 0.0007636665263199637, "loss": 3.1952, "step": 2131 }, { "epoch": 0.34, "learning_rate": 0.0007634446049617261, "loss": 3.3774, "step": 2132 }, { "epoch": 0.34, "learning_rate": 0.0007632226117384302, "loss": 3.3321, "step": 2133 }, { "epoch": 0.34, "learning_rate": 0.0007630005467106338, "loss": 3.3728, "step": 2134 }, { "epoch": 0.34, "learning_rate": 0.000762778409938914, "loss": 3.2154, "step": 2135 }, { "epoch": 0.34, "learning_rate": 0.0007625562014838673, "loss": 3.1877, "step": 2136 }, { "epoch": 0.34, "learning_rate": 0.0007623339214061101, "loss": 3.2775, "step": 2137 }, { "epoch": 0.34, "learning_rate": 0.0007621115697662782, "loss": 3.299, "step": 2138 }, { "epoch": 0.34, "learning_rate": 0.0007618891466250267, "loss": 3.2165, "step": 2139 }, { "epoch": 0.35, "learning_rate": 0.0007616666520430304, "loss": 3.3403, "step": 2140 }, { "epoch": 0.35, "learning_rate": 0.0007614440860809841, "loss": 3.3165, "step": 2141 }, { "epoch": 0.35, "learning_rate": 0.000761221448799601, "loss": 3.2329, "step": 2142 }, { "epoch": 0.35, "learning_rate": 0.0007609987402596146, "loss": 3.3127, "step": 2143 }, { "epoch": 0.35, "learning_rate": 0.0007607759605217777, "loss": 3.3359, "step": 2144 }, { "epoch": 0.35, "learning_rate": 0.000760553109646862, "loss": 3.368, "step": 2145 }, { "epoch": 0.35, "learning_rate": 0.0007603301876956592, "loss": 3.3018, "step": 2146 }, { "epoch": 0.35, "learning_rate": 0.0007601071947289803, "loss": 3.2157, "step": 2147 }, { "epoch": 0.35, "learning_rate": 0.0007598841308076553, "loss": 3.2489, "step": 2148 }, { "epoch": 0.35, "learning_rate": 0.0007596609959925342, "loss": 3.3598, "step": 2149 }, { "epoch": 0.35, "learning_rate": 0.0007594377903444856, "loss": 3.2148, "step": 2150 }, { "epoch": 0.35, "learning_rate": 0.0007592145139243977, "loss": 3.2342, "step": 2151 }, { "epoch": 0.35, "learning_rate": 0.0007589911667931785, "loss": 3.4123, "step": 2152 }, { "epoch": 0.35, "learning_rate": 0.0007587677490117546, "loss": 3.1564, "step": 2153 }, { "epoch": 0.35, "learning_rate": 0.0007585442606410721, "loss": 3.2405, "step": 2154 }, { "epoch": 0.35, "learning_rate": 0.0007583207017420966, "loss": 3.3394, "step": 2155 }, { "epoch": 0.35, "learning_rate": 0.0007580970723758123, "loss": 3.1147, "step": 2156 }, { "epoch": 0.35, "learning_rate": 0.0007578733726032234, "loss": 3.2013, "step": 2157 }, { "epoch": 0.35, "learning_rate": 0.0007576496024853532, "loss": 3.3885, "step": 2158 }, { "epoch": 0.35, "learning_rate": 0.0007574257620832435, "loss": 3.0919, "step": 2159 }, { "epoch": 0.35, "learning_rate": 0.0007572018514579557, "loss": 3.2372, "step": 2160 }, { "epoch": 0.35, "learning_rate": 0.0007569778706705708, "loss": 3.1643, "step": 2161 }, { "epoch": 0.35, "learning_rate": 0.0007567538197821882, "loss": 3.3225, "step": 2162 }, { "epoch": 0.35, "learning_rate": 0.000756529698853927, "loss": 3.2682, "step": 2163 }, { "epoch": 0.35, "learning_rate": 0.000756305507946925, "loss": 3.3258, "step": 2164 }, { "epoch": 0.35, "learning_rate": 0.000756081247122339, "loss": 3.156, "step": 2165 }, { "epoch": 0.35, "learning_rate": 0.0007558569164413456, "loss": 3.3805, "step": 2166 }, { "epoch": 0.35, "learning_rate": 0.0007556325159651399, "loss": 3.2488, "step": 2167 }, { "epoch": 0.35, "learning_rate": 0.0007554080457549355, "loss": 3.2599, "step": 2168 }, { "epoch": 0.35, "learning_rate": 0.0007551835058719663, "loss": 3.3745, "step": 2169 }, { "epoch": 0.35, "learning_rate": 0.0007549588963774843, "loss": 3.3421, "step": 2170 }, { "epoch": 0.35, "learning_rate": 0.0007547342173327606, "loss": 3.2082, "step": 2171 }, { "epoch": 0.35, "learning_rate": 0.0007545094687990857, "loss": 3.3218, "step": 2172 }, { "epoch": 0.35, "learning_rate": 0.0007542846508377687, "loss": 3.3511, "step": 2173 }, { "epoch": 0.35, "learning_rate": 0.0007540597635101371, "loss": 3.1408, "step": 2174 }, { "epoch": 0.35, "learning_rate": 0.000753834806877539, "loss": 3.1676, "step": 2175 }, { "epoch": 0.35, "learning_rate": 0.0007536097810013395, "loss": 3.3126, "step": 2176 }, { "epoch": 0.35, "learning_rate": 0.0007533846859429234, "loss": 3.0507, "step": 2177 }, { "epoch": 0.35, "learning_rate": 0.000753159521763695, "loss": 3.4192, "step": 2178 }, { "epoch": 0.35, "learning_rate": 0.0007529342885250763, "loss": 3.2173, "step": 2179 }, { "epoch": 0.35, "learning_rate": 0.0007527089862885088, "loss": 3.3216, "step": 2180 }, { "epoch": 0.35, "learning_rate": 0.0007524836151154528, "loss": 3.2036, "step": 2181 }, { "epoch": 0.35, "learning_rate": 0.0007522581750673871, "loss": 3.1914, "step": 2182 }, { "epoch": 0.35, "learning_rate": 0.0007520326662058099, "loss": 3.2593, "step": 2183 }, { "epoch": 0.35, "learning_rate": 0.0007518070885922374, "loss": 3.2114, "step": 2184 }, { "epoch": 0.35, "learning_rate": 0.0007515814422882051, "loss": 3.2457, "step": 2185 }, { "epoch": 0.35, "learning_rate": 0.0007513557273552669, "loss": 3.0602, "step": 2186 }, { "epoch": 0.35, "learning_rate": 0.0007511299438549957, "loss": 3.3307, "step": 2187 }, { "epoch": 0.35, "learning_rate": 0.0007509040918489829, "loss": 3.3034, "step": 2188 }, { "epoch": 0.35, "learning_rate": 0.0007506781713988389, "loss": 3.1123, "step": 2189 }, { "epoch": 0.35, "learning_rate": 0.0007504521825661923, "loss": 3.0999, "step": 2190 }, { "epoch": 0.35, "learning_rate": 0.0007502261254126905, "loss": 3.0838, "step": 2191 }, { "epoch": 0.35, "learning_rate": 0.00075, "loss": 3.3044, "step": 2192 }, { "epoch": 0.35, "learning_rate": 0.0007497738063898053, "loss": 3.3323, "step": 2193 }, { "epoch": 0.35, "learning_rate": 0.0007495475446438098, "loss": 3.3135, "step": 2194 }, { "epoch": 0.35, "learning_rate": 0.0007493212148237356, "loss": 3.2394, "step": 2195 }, { "epoch": 0.35, "learning_rate": 0.000749094816991323, "loss": 3.3311, "step": 2196 }, { "epoch": 0.35, "learning_rate": 0.0007488683512083309, "loss": 3.2943, "step": 2197 }, { "epoch": 0.35, "learning_rate": 0.0007486418175365376, "loss": 3.2907, "step": 2198 }, { "epoch": 0.35, "learning_rate": 0.0007484152160377385, "loss": 3.2355, "step": 2199 }, { "epoch": 0.35, "learning_rate": 0.0007481885467737486, "loss": 3.337, "step": 2200 }, { "epoch": 0.35, "learning_rate": 0.0007479618098064009, "loss": 3.2714, "step": 2201 }, { "epoch": 0.36, "learning_rate": 0.0007477350051975468, "loss": 3.3108, "step": 2202 }, { "epoch": 0.36, "learning_rate": 0.0007475081330090567, "loss": 3.1877, "step": 2203 }, { "epoch": 0.36, "learning_rate": 0.0007472811933028191, "loss": 3.2094, "step": 2204 }, { "epoch": 0.36, "learning_rate": 0.0007470541861407404, "loss": 3.1647, "step": 2205 }, { "epoch": 0.36, "learning_rate": 0.0007468271115847463, "loss": 3.2426, "step": 2206 }, { "epoch": 0.36, "learning_rate": 0.0007465999696967804, "loss": 3.2321, "step": 2207 }, { "epoch": 0.36, "learning_rate": 0.0007463727605388044, "loss": 3.2257, "step": 2208 }, { "epoch": 0.36, "learning_rate": 0.0007461454841727992, "loss": 3.3371, "step": 2209 }, { "epoch": 0.36, "learning_rate": 0.0007459181406607633, "loss": 3.2265, "step": 2210 }, { "epoch": 0.36, "learning_rate": 0.0007456907300647137, "loss": 3.2353, "step": 2211 }, { "epoch": 0.36, "learning_rate": 0.0007454632524466858, "loss": 3.2555, "step": 2212 }, { "epoch": 0.36, "learning_rate": 0.0007452357078687332, "loss": 3.2839, "step": 2213 }, { "epoch": 0.36, "learning_rate": 0.0007450080963929277, "loss": 3.1861, "step": 2214 }, { "epoch": 0.36, "learning_rate": 0.0007447804180813596, "loss": 3.2644, "step": 2215 }, { "epoch": 0.36, "learning_rate": 0.0007445526729961372, "loss": 3.3463, "step": 2216 }, { "epoch": 0.36, "learning_rate": 0.0007443248611993872, "loss": 3.3303, "step": 2217 }, { "epoch": 0.36, "learning_rate": 0.000744096982753254, "loss": 3.3225, "step": 2218 }, { "epoch": 0.36, "learning_rate": 0.0007438690377199011, "loss": 3.2817, "step": 2219 }, { "epoch": 0.36, "learning_rate": 0.0007436410261615092, "loss": 3.2854, "step": 2220 }, { "epoch": 0.36, "learning_rate": 0.0007434129481402777, "loss": 3.4136, "step": 2221 }, { "epoch": 0.36, "learning_rate": 0.0007431848037184242, "loss": 3.2205, "step": 2222 }, { "epoch": 0.36, "learning_rate": 0.000742956592958184, "loss": 3.368, "step": 2223 }, { "epoch": 0.36, "learning_rate": 0.0007427283159218107, "loss": 3.1307, "step": 2224 }, { "epoch": 0.36, "learning_rate": 0.0007424999726715763, "loss": 3.377, "step": 2225 }, { "epoch": 0.36, "learning_rate": 0.0007422715632697702, "loss": 3.3145, "step": 2226 }, { "epoch": 0.36, "learning_rate": 0.0007420430877787003, "loss": 3.3853, "step": 2227 }, { "epoch": 0.36, "learning_rate": 0.0007418145462606925, "loss": 3.1315, "step": 2228 }, { "epoch": 0.36, "learning_rate": 0.0007415859387780906, "loss": 3.1234, "step": 2229 }, { "epoch": 0.36, "learning_rate": 0.0007413572653932566, "loss": 3.4594, "step": 2230 }, { "epoch": 0.36, "learning_rate": 0.0007411285261685701, "loss": 3.3256, "step": 2231 }, { "epoch": 0.36, "learning_rate": 0.000740899721166429, "loss": 3.301, "step": 2232 }, { "epoch": 0.36, "learning_rate": 0.000740670850449249, "loss": 3.3586, "step": 2233 }, { "epoch": 0.36, "learning_rate": 0.0007404419140794634, "loss": 3.2743, "step": 2234 }, { "epoch": 0.36, "learning_rate": 0.0007402129121195245, "loss": 3.221, "step": 2235 }, { "epoch": 0.36, "learning_rate": 0.0007399838446319011, "loss": 3.1987, "step": 2236 }, { "epoch": 0.36, "learning_rate": 0.0007397547116790807, "loss": 3.1278, "step": 2237 }, { "epoch": 0.36, "learning_rate": 0.0007395255133235686, "loss": 3.3863, "step": 2238 }, { "epoch": 0.36, "learning_rate": 0.0007392962496278879, "loss": 3.2034, "step": 2239 }, { "epoch": 0.36, "learning_rate": 0.0007390669206545791, "loss": 3.2001, "step": 2240 }, { "epoch": 0.36, "learning_rate": 0.0007388375264662011, "loss": 3.3175, "step": 2241 }, { "epoch": 0.36, "learning_rate": 0.0007386080671253305, "loss": 3.1897, "step": 2242 }, { "epoch": 0.36, "learning_rate": 0.0007383785426945611, "loss": 3.0812, "step": 2243 }, { "epoch": 0.36, "learning_rate": 0.0007381489532365051, "loss": 3.3124, "step": 2244 }, { "epoch": 0.36, "learning_rate": 0.0007379192988137924, "loss": 3.0756, "step": 2245 }, { "epoch": 0.36, "learning_rate": 0.0007376895794890699, "loss": 3.2923, "step": 2246 }, { "epoch": 0.36, "learning_rate": 0.0007374597953250034, "loss": 3.3694, "step": 2247 }, { "epoch": 0.36, "learning_rate": 0.0007372299463842753, "loss": 3.2766, "step": 2248 }, { "epoch": 0.36, "learning_rate": 0.000737000032729586, "loss": 3.2082, "step": 2249 }, { "epoch": 0.36, "learning_rate": 0.0007367700544236541, "loss": 3.3897, "step": 2250 }, { "epoch": 0.36, "learning_rate": 0.0007365400115292151, "loss": 3.151, "step": 2251 }, { "epoch": 0.36, "learning_rate": 0.0007363099041090223, "loss": 3.2438, "step": 2252 }, { "epoch": 0.36, "learning_rate": 0.000736079732225847, "loss": 3.2622, "step": 2253 }, { "epoch": 0.36, "learning_rate": 0.0007358494959424775, "loss": 3.3762, "step": 2254 }, { "epoch": 0.36, "learning_rate": 0.0007356191953217202, "loss": 3.244, "step": 2255 }, { "epoch": 0.36, "learning_rate": 0.0007353888304263987, "loss": 3.2934, "step": 2256 }, { "epoch": 0.36, "learning_rate": 0.0007351584013193543, "loss": 3.2493, "step": 2257 }, { "epoch": 0.36, "learning_rate": 0.0007349279080634456, "loss": 3.2075, "step": 2258 }, { "epoch": 0.36, "learning_rate": 0.0007346973507215491, "loss": 3.1229, "step": 2259 }, { "epoch": 0.36, "learning_rate": 0.0007344667293565582, "loss": 3.4161, "step": 2260 }, { "epoch": 0.36, "learning_rate": 0.0007342360440313845, "loss": 3.4066, "step": 2261 }, { "epoch": 0.36, "learning_rate": 0.0007340052948089564, "loss": 3.1915, "step": 2262 }, { "epoch": 0.36, "learning_rate": 0.00073377448175222, "loss": 3.3072, "step": 2263 }, { "epoch": 0.37, "learning_rate": 0.0007335436049241391, "loss": 3.3976, "step": 2264 }, { "epoch": 0.37, "learning_rate": 0.000733312664387694, "loss": 3.2397, "step": 2265 }, { "epoch": 0.37, "learning_rate": 0.0007330816602058835, "loss": 3.2002, "step": 2266 }, { "epoch": 0.37, "learning_rate": 0.0007328505924417231, "loss": 3.176, "step": 2267 }, { "epoch": 0.37, "learning_rate": 0.0007326194611582457, "loss": 3.3226, "step": 2268 }, { "epoch": 0.37, "learning_rate": 0.0007323882664185016, "loss": 3.2193, "step": 2269 }, { "epoch": 0.37, "learning_rate": 0.0007321570082855584, "loss": 3.1747, "step": 2270 }, { "epoch": 0.37, "learning_rate": 0.0007319256868225011, "loss": 3.211, "step": 2271 }, { "epoch": 0.37, "learning_rate": 0.000731694302092432, "loss": 3.3152, "step": 2272 }, { "epoch": 0.37, "learning_rate": 0.0007314628541584703, "loss": 3.3018, "step": 2273 }, { "epoch": 0.37, "learning_rate": 0.0007312313430837528, "loss": 3.2751, "step": 2274 }, { "epoch": 0.37, "learning_rate": 0.0007309997689314333, "loss": 3.3193, "step": 2275 }, { "epoch": 0.37, "learning_rate": 0.0007307681317646831, "loss": 3.2053, "step": 2276 }, { "epoch": 0.37, "learning_rate": 0.0007305364316466902, "loss": 3.313, "step": 2277 }, { "epoch": 0.37, "learning_rate": 0.0007303046686406606, "loss": 3.4025, "step": 2278 }, { "epoch": 0.37, "learning_rate": 0.0007300728428098165, "loss": 3.2414, "step": 2279 }, { "epoch": 0.37, "learning_rate": 0.0007298409542173979, "loss": 3.2211, "step": 2280 }, { "epoch": 0.37, "learning_rate": 0.0007296090029266613, "loss": 3.2886, "step": 2281 }, { "epoch": 0.37, "learning_rate": 0.0007293769890008813, "loss": 3.3432, "step": 2282 }, { "epoch": 0.37, "learning_rate": 0.0007291449125033485, "loss": 3.3046, "step": 2283 }, { "epoch": 0.37, "learning_rate": 0.0007289127734973713, "loss": 3.28, "step": 2284 }, { "epoch": 0.37, "learning_rate": 0.0007286805720462749, "loss": 3.197, "step": 2285 }, { "epoch": 0.37, "learning_rate": 0.0007284483082134013, "loss": 3.1771, "step": 2286 }, { "epoch": 0.37, "learning_rate": 0.0007282159820621101, "loss": 3.2587, "step": 2287 }, { "epoch": 0.37, "learning_rate": 0.0007279835936557773, "loss": 3.1347, "step": 2288 }, { "epoch": 0.37, "learning_rate": 0.0007277511430577961, "loss": 3.2187, "step": 2289 }, { "epoch": 0.37, "learning_rate": 0.0007275186303315769, "loss": 3.3697, "step": 2290 }, { "epoch": 0.37, "learning_rate": 0.0007272860555405468, "loss": 3.3054, "step": 2291 }, { "epoch": 0.37, "learning_rate": 0.0007270534187481498, "loss": 3.2354, "step": 2292 }, { "epoch": 0.37, "learning_rate": 0.000726820720017847, "loss": 3.3119, "step": 2293 }, { "epoch": 0.37, "learning_rate": 0.0007265879594131161, "loss": 3.2453, "step": 2294 }, { "epoch": 0.37, "learning_rate": 0.0007263551369974522, "loss": 3.3854, "step": 2295 }, { "epoch": 0.37, "learning_rate": 0.0007261222528343665, "loss": 3.157, "step": 2296 }, { "epoch": 0.37, "learning_rate": 0.0007258893069873878, "loss": 3.1915, "step": 2297 }, { "epoch": 0.37, "learning_rate": 0.0007256562995200614, "loss": 3.2659, "step": 2298 }, { "epoch": 0.37, "learning_rate": 0.0007254232304959491, "loss": 3.1568, "step": 2299 }, { "epoch": 0.37, "learning_rate": 0.0007251900999786303, "loss": 3.2489, "step": 2300 }, { "epoch": 0.37, "learning_rate": 0.0007249569080317002, "loss": 3.3377, "step": 2301 }, { "epoch": 0.37, "learning_rate": 0.0007247236547187715, "loss": 3.2611, "step": 2302 }, { "epoch": 0.37, "learning_rate": 0.0007244903401034733, "loss": 3.2788, "step": 2303 }, { "epoch": 0.37, "learning_rate": 0.0007242569642494516, "loss": 3.3779, "step": 2304 }, { "epoch": 0.37, "learning_rate": 0.0007240235272203689, "loss": 3.2913, "step": 2305 }, { "epoch": 0.37, "learning_rate": 0.0007237900290799044, "loss": 3.2563, "step": 2306 }, { "epoch": 0.37, "learning_rate": 0.0007235564698917542, "loss": 3.1967, "step": 2307 }, { "epoch": 0.37, "learning_rate": 0.0007233228497196309, "loss": 3.3739, "step": 2308 }, { "epoch": 0.37, "learning_rate": 0.0007230891686272636, "loss": 3.3081, "step": 2309 }, { "epoch": 0.37, "learning_rate": 0.0007228554266783985, "loss": 3.1338, "step": 2310 }, { "epoch": 0.37, "learning_rate": 0.0007226216239367977, "loss": 3.1505, "step": 2311 }, { "epoch": 0.37, "learning_rate": 0.0007223877604662403, "loss": 3.2499, "step": 2312 }, { "epoch": 0.37, "learning_rate": 0.0007221538363305223, "loss": 3.2739, "step": 2313 }, { "epoch": 0.37, "learning_rate": 0.0007219198515934556, "loss": 3.3302, "step": 2314 }, { "epoch": 0.37, "learning_rate": 0.0007216858063188687, "loss": 3.4127, "step": 2315 }, { "epoch": 0.37, "learning_rate": 0.0007214517005706073, "loss": 3.4279, "step": 2316 }, { "epoch": 0.37, "learning_rate": 0.0007212175344125327, "loss": 3.2479, "step": 2317 }, { "epoch": 0.37, "learning_rate": 0.0007209833079085231, "loss": 3.3085, "step": 2318 }, { "epoch": 0.37, "learning_rate": 0.0007207490211224736, "loss": 3.2179, "step": 2319 }, { "epoch": 0.37, "learning_rate": 0.0007205146741182946, "loss": 3.457, "step": 2320 }, { "epoch": 0.37, "learning_rate": 0.0007202802669599143, "loss": 3.1284, "step": 2321 }, { "epoch": 0.37, "learning_rate": 0.0007200457997112761, "loss": 3.2329, "step": 2322 }, { "epoch": 0.37, "learning_rate": 0.0007198112724363407, "loss": 3.226, "step": 2323 }, { "epoch": 0.37, "learning_rate": 0.0007195766851990846, "loss": 3.2632, "step": 2324 }, { "epoch": 0.37, "learning_rate": 0.000719342038063501, "loss": 3.2964, "step": 2325 }, { "epoch": 0.38, "learning_rate": 0.000719107331093599, "loss": 3.2298, "step": 2326 }, { "epoch": 0.38, "learning_rate": 0.0007188725643534047, "loss": 3.1379, "step": 2327 }, { "epoch": 0.38, "learning_rate": 0.00071863773790696, "loss": 3.1072, "step": 2328 }, { "epoch": 0.38, "learning_rate": 0.000718402851818323, "loss": 3.2268, "step": 2329 }, { "epoch": 0.38, "learning_rate": 0.0007181679061515684, "loss": 3.2713, "step": 2330 }, { "epoch": 0.38, "learning_rate": 0.0007179329009707872, "loss": 3.2938, "step": 2331 }, { "epoch": 0.38, "learning_rate": 0.0007176978363400864, "loss": 3.3029, "step": 2332 }, { "epoch": 0.38, "learning_rate": 0.0007174627123235891, "loss": 3.2513, "step": 2333 }, { "epoch": 0.38, "learning_rate": 0.0007172275289854349, "loss": 3.4377, "step": 2334 }, { "epoch": 0.38, "learning_rate": 0.0007169922863897795, "loss": 3.3368, "step": 2335 }, { "epoch": 0.38, "learning_rate": 0.0007167569846007946, "loss": 3.1661, "step": 2336 }, { "epoch": 0.38, "learning_rate": 0.0007165216236826684, "loss": 3.3538, "step": 2337 }, { "epoch": 0.38, "learning_rate": 0.0007162862036996048, "loss": 3.2639, "step": 2338 }, { "epoch": 0.38, "learning_rate": 0.0007160507247158243, "loss": 3.2069, "step": 2339 }, { "epoch": 0.38, "learning_rate": 0.0007158151867955629, "loss": 3.2416, "step": 2340 }, { "epoch": 0.38, "learning_rate": 0.000715579590003073, "loss": 3.1853, "step": 2341 }, { "epoch": 0.38, "learning_rate": 0.0007153439344026233, "loss": 3.3641, "step": 2342 }, { "epoch": 0.38, "learning_rate": 0.0007151082200584982, "loss": 3.173, "step": 2343 }, { "epoch": 0.38, "learning_rate": 0.0007148724470349981, "loss": 3.2297, "step": 2344 }, { "epoch": 0.38, "learning_rate": 0.0007146366153964399, "loss": 3.1906, "step": 2345 }, { "epoch": 0.38, "learning_rate": 0.0007144007252071555, "loss": 3.227, "step": 2346 }, { "epoch": 0.38, "learning_rate": 0.0007141647765314938, "loss": 3.3371, "step": 2347 }, { "epoch": 0.38, "learning_rate": 0.0007139287694338192, "loss": 3.2033, "step": 2348 }, { "epoch": 0.38, "learning_rate": 0.000713692703978512, "loss": 3.2108, "step": 2349 }, { "epoch": 0.38, "learning_rate": 0.0007134565802299686, "loss": 3.309, "step": 2350 }, { "epoch": 0.38, "learning_rate": 0.0007132203982526012, "loss": 3.2436, "step": 2351 }, { "epoch": 0.38, "learning_rate": 0.0007129841581108376, "loss": 3.4498, "step": 2352 }, { "epoch": 0.38, "learning_rate": 0.0007127478598691222, "loss": 3.1987, "step": 2353 }, { "epoch": 0.38, "learning_rate": 0.0007125115035919147, "loss": 3.2257, "step": 2354 }, { "epoch": 0.38, "learning_rate": 0.0007122750893436904, "loss": 3.2829, "step": 2355 }, { "epoch": 0.38, "learning_rate": 0.0007120386171889409, "loss": 3.4217, "step": 2356 }, { "epoch": 0.38, "learning_rate": 0.0007118020871921736, "loss": 3.211, "step": 2357 }, { "epoch": 0.38, "learning_rate": 0.0007115654994179115, "loss": 3.4212, "step": 2358 }, { "epoch": 0.38, "learning_rate": 0.0007113288539306932, "loss": 3.2172, "step": 2359 }, { "epoch": 0.38, "learning_rate": 0.0007110921507950733, "loss": 3.2645, "step": 2360 }, { "epoch": 0.38, "learning_rate": 0.0007108553900756222, "loss": 3.2434, "step": 2361 }, { "epoch": 0.38, "learning_rate": 0.0007106185718369258, "loss": 3.3187, "step": 2362 }, { "epoch": 0.38, "learning_rate": 0.0007103816961435858, "loss": 3.3843, "step": 2363 }, { "epoch": 0.38, "learning_rate": 0.0007101447630602192, "loss": 3.2455, "step": 2364 }, { "epoch": 0.38, "learning_rate": 0.0007099077726514592, "loss": 3.2321, "step": 2365 }, { "epoch": 0.38, "learning_rate": 0.0007096707249819545, "loss": 3.2395, "step": 2366 }, { "epoch": 0.38, "learning_rate": 0.0007094336201163692, "loss": 3.1378, "step": 2367 }, { "epoch": 0.38, "learning_rate": 0.0007091964581193833, "loss": 3.2701, "step": 2368 }, { "epoch": 0.38, "learning_rate": 0.0007089592390556919, "loss": 3.2501, "step": 2369 }, { "epoch": 0.38, "learning_rate": 0.0007087219629900066, "loss": 3.1448, "step": 2370 }, { "epoch": 0.38, "learning_rate": 0.0007084846299870532, "loss": 3.2279, "step": 2371 }, { "epoch": 0.38, "learning_rate": 0.0007082472401115742, "loss": 3.2373, "step": 2372 }, { "epoch": 0.38, "learning_rate": 0.0007080097934283274, "loss": 3.1273, "step": 2373 }, { "epoch": 0.38, "learning_rate": 0.0007077722900020853, "loss": 3.2881, "step": 2374 }, { "epoch": 0.38, "learning_rate": 0.0007075347298976369, "loss": 3.2207, "step": 2375 }, { "epoch": 0.38, "learning_rate": 0.0007072971131797861, "loss": 3.4982, "step": 2376 }, { "epoch": 0.38, "learning_rate": 0.0007070594399133524, "loss": 3.1521, "step": 2377 }, { "epoch": 0.38, "learning_rate": 0.0007068217101631705, "loss": 3.2129, "step": 2378 }, { "epoch": 0.38, "learning_rate": 0.0007065839239940911, "loss": 3.2518, "step": 2379 }, { "epoch": 0.38, "learning_rate": 0.0007063460814709795, "loss": 3.3066, "step": 2380 }, { "epoch": 0.38, "learning_rate": 0.0007061081826587169, "loss": 3.2251, "step": 2381 }, { "epoch": 0.38, "learning_rate": 0.0007058702276221998, "loss": 3.2024, "step": 2382 }, { "epoch": 0.38, "learning_rate": 0.00070563221642634, "loss": 3.3269, "step": 2383 }, { "epoch": 0.38, "learning_rate": 0.0007053941491360642, "loss": 3.1851, "step": 2384 }, { "epoch": 0.38, "learning_rate": 0.0007051560258163152, "loss": 3.1468, "step": 2385 }, { "epoch": 0.38, "learning_rate": 0.0007049178465320506, "loss": 3.2766, "step": 2386 }, { "epoch": 0.38, "learning_rate": 0.0007046796113482431, "loss": 3.2584, "step": 2387 }, { "epoch": 0.39, "learning_rate": 0.0007044413203298812, "loss": 3.2294, "step": 2388 }, { "epoch": 0.39, "learning_rate": 0.0007042029735419681, "loss": 3.3003, "step": 2389 }, { "epoch": 0.39, "learning_rate": 0.0007039645710495224, "loss": 3.2018, "step": 2390 }, { "epoch": 0.39, "learning_rate": 0.0007037261129175781, "loss": 3.1521, "step": 2391 }, { "epoch": 0.39, "learning_rate": 0.0007034875992111839, "loss": 3.3059, "step": 2392 }, { "epoch": 0.39, "learning_rate": 0.0007032490299954041, "loss": 3.2848, "step": 2393 }, { "epoch": 0.39, "learning_rate": 0.0007030104053353184, "loss": 3.2664, "step": 2394 }, { "epoch": 0.39, "learning_rate": 0.0007027717252960205, "loss": 3.318, "step": 2395 }, { "epoch": 0.39, "learning_rate": 0.0007025329899426205, "loss": 3.2827, "step": 2396 }, { "epoch": 0.39, "learning_rate": 0.0007022941993402428, "loss": 3.1678, "step": 2397 }, { "epoch": 0.39, "learning_rate": 0.000702055353554027, "loss": 3.2703, "step": 2398 }, { "epoch": 0.39, "learning_rate": 0.0007018164526491281, "loss": 3.1449, "step": 2399 }, { "epoch": 0.39, "learning_rate": 0.0007015774966907157, "loss": 3.2486, "step": 2400 }, { "epoch": 0.39, "learning_rate": 0.0007013384857439746, "loss": 3.2396, "step": 2401 }, { "epoch": 0.39, "learning_rate": 0.0007010994198741046, "loss": 3.2147, "step": 2402 }, { "epoch": 0.39, "learning_rate": 0.0007008602991463207, "loss": 3.3983, "step": 2403 }, { "epoch": 0.39, "learning_rate": 0.0007006211236258523, "loss": 3.1631, "step": 2404 }, { "epoch": 0.39, "learning_rate": 0.0007003818933779444, "loss": 3.1418, "step": 2405 }, { "epoch": 0.39, "learning_rate": 0.0007001426084678563, "loss": 3.2172, "step": 2406 }, { "epoch": 0.39, "learning_rate": 0.0006999032689608629, "loss": 3.1305, "step": 2407 }, { "epoch": 0.39, "learning_rate": 0.0006996638749222534, "loss": 3.2526, "step": 2408 }, { "epoch": 0.39, "learning_rate": 0.0006994244264173321, "loss": 3.2125, "step": 2409 }, { "epoch": 0.39, "learning_rate": 0.0006991849235114183, "loss": 3.3355, "step": 2410 }, { "epoch": 0.39, "learning_rate": 0.0006989453662698458, "loss": 3.2209, "step": 2411 }, { "epoch": 0.39, "learning_rate": 0.0006987057547579636, "loss": 3.2027, "step": 2412 }, { "epoch": 0.39, "learning_rate": 0.0006984660890411353, "loss": 3.131, "step": 2413 }, { "epoch": 0.39, "learning_rate": 0.0006982263691847393, "loss": 3.2785, "step": 2414 }, { "epoch": 0.39, "learning_rate": 0.0006979865952541687, "loss": 3.2359, "step": 2415 }, { "epoch": 0.39, "learning_rate": 0.0006977467673148315, "loss": 3.2334, "step": 2416 }, { "epoch": 0.39, "learning_rate": 0.0006975068854321504, "loss": 3.3546, "step": 2417 }, { "epoch": 0.39, "learning_rate": 0.000697266949671563, "loss": 3.2672, "step": 2418 }, { "epoch": 0.39, "learning_rate": 0.0006970269600985208, "loss": 3.2393, "step": 2419 }, { "epoch": 0.39, "learning_rate": 0.0006967869167784909, "loss": 3.2753, "step": 2420 }, { "epoch": 0.39, "learning_rate": 0.0006965468197769547, "loss": 3.321, "step": 2421 }, { "epoch": 0.39, "learning_rate": 0.0006963066691594084, "loss": 3.1718, "step": 2422 }, { "epoch": 0.39, "learning_rate": 0.0006960664649913628, "loss": 3.1256, "step": 2423 }, { "epoch": 0.39, "learning_rate": 0.0006958262073383424, "loss": 3.1845, "step": 2424 }, { "epoch": 0.39, "learning_rate": 0.0006955858962658881, "loss": 3.2462, "step": 2425 }, { "epoch": 0.39, "learning_rate": 0.0006953455318395538, "loss": 3.3439, "step": 2426 }, { "epoch": 0.39, "learning_rate": 0.0006951051141249086, "loss": 3.2748, "step": 2427 }, { "epoch": 0.39, "learning_rate": 0.0006948646431875362, "loss": 3.0447, "step": 2428 }, { "epoch": 0.39, "learning_rate": 0.0006946241190930345, "loss": 3.4248, "step": 2429 }, { "epoch": 0.39, "learning_rate": 0.0006943835419070161, "loss": 3.3493, "step": 2430 }, { "epoch": 0.39, "learning_rate": 0.0006941429116951081, "loss": 3.1887, "step": 2431 }, { "epoch": 0.39, "learning_rate": 0.000693902228522952, "loss": 3.2811, "step": 2432 }, { "epoch": 0.39, "learning_rate": 0.0006936614924562038, "loss": 3.1866, "step": 2433 }, { "epoch": 0.39, "learning_rate": 0.0006934207035605338, "loss": 3.0892, "step": 2434 }, { "epoch": 0.39, "learning_rate": 0.0006931798619016269, "loss": 3.2421, "step": 2435 }, { "epoch": 0.39, "learning_rate": 0.0006929389675451823, "loss": 3.3325, "step": 2436 }, { "epoch": 0.39, "learning_rate": 0.0006926980205569135, "loss": 3.3181, "step": 2437 }, { "epoch": 0.39, "learning_rate": 0.0006924570210025485, "loss": 3.1922, "step": 2438 }, { "epoch": 0.39, "learning_rate": 0.0006922159689478297, "loss": 3.0082, "step": 2439 }, { "epoch": 0.39, "learning_rate": 0.0006919748644585133, "loss": 3.1829, "step": 2440 }, { "epoch": 0.39, "learning_rate": 0.0006917337076003706, "loss": 3.1297, "step": 2441 }, { "epoch": 0.39, "learning_rate": 0.0006914924984391866, "loss": 3.1293, "step": 2442 }, { "epoch": 0.39, "learning_rate": 0.0006912512370407608, "loss": 3.2214, "step": 2443 }, { "epoch": 0.39, "learning_rate": 0.0006910099234709069, "loss": 3.2349, "step": 2444 }, { "epoch": 0.39, "learning_rate": 0.0006907685577954528, "loss": 3.2974, "step": 2445 }, { "epoch": 0.39, "learning_rate": 0.0006905271400802405, "loss": 3.3025, "step": 2446 }, { "epoch": 0.39, "learning_rate": 0.0006902856703911266, "loss": 3.2207, "step": 2447 }, { "epoch": 0.39, "learning_rate": 0.0006900441487939817, "loss": 3.1736, "step": 2448 }, { "epoch": 0.39, "learning_rate": 0.0006898025753546902, "loss": 3.2478, "step": 2449 }, { "epoch": 0.4, "learning_rate": 0.0006895609501391509, "loss": 3.3099, "step": 2450 }, { "epoch": 0.4, "learning_rate": 0.000689319273213277, "loss": 3.3642, "step": 2451 }, { "epoch": 0.4, "learning_rate": 0.0006890775446429955, "loss": 3.2618, "step": 2452 }, { "epoch": 0.4, "learning_rate": 0.0006888357644942472, "loss": 3.0745, "step": 2453 }, { "epoch": 0.4, "learning_rate": 0.0006885939328329878, "loss": 3.2057, "step": 2454 }, { "epoch": 0.4, "learning_rate": 0.0006883520497251861, "loss": 3.2935, "step": 2455 }, { "epoch": 0.4, "learning_rate": 0.0006881101152368259, "loss": 3.366, "step": 2456 }, { "epoch": 0.4, "learning_rate": 0.0006878681294339042, "loss": 3.2241, "step": 2457 }, { "epoch": 0.4, "learning_rate": 0.0006876260923824322, "loss": 3.2848, "step": 2458 }, { "epoch": 0.4, "learning_rate": 0.0006873840041484356, "loss": 3.275, "step": 2459 }, { "epoch": 0.4, "learning_rate": 0.0006871418647979532, "loss": 3.1394, "step": 2460 }, { "epoch": 0.4, "learning_rate": 0.0006868996743970386, "loss": 3.2407, "step": 2461 }, { "epoch": 0.4, "learning_rate": 0.0006866574330117585, "loss": 3.1837, "step": 2462 }, { "epoch": 0.4, "learning_rate": 0.0006864151407081943, "loss": 3.2849, "step": 2463 }, { "epoch": 0.4, "learning_rate": 0.0006861727975524407, "loss": 3.2006, "step": 2464 }, { "epoch": 0.4, "learning_rate": 0.0006859304036106067, "loss": 3.2295, "step": 2465 }, { "epoch": 0.4, "learning_rate": 0.0006856879589488147, "loss": 3.3242, "step": 2466 }, { "epoch": 0.4, "learning_rate": 0.0006854454636332014, "loss": 3.1532, "step": 2467 }, { "epoch": 0.4, "learning_rate": 0.0006852029177299169, "loss": 3.2337, "step": 2468 }, { "epoch": 0.4, "learning_rate": 0.0006849603213051255, "loss": 3.3271, "step": 2469 }, { "epoch": 0.4, "learning_rate": 0.0006847176744250049, "loss": 3.2234, "step": 2470 }, { "epoch": 0.4, "learning_rate": 0.000684474977155747, "loss": 3.2485, "step": 2471 }, { "epoch": 0.4, "learning_rate": 0.0006842322295635569, "loss": 3.279, "step": 2472 }, { "epoch": 0.4, "learning_rate": 0.0006839894317146538, "loss": 3.0611, "step": 2473 }, { "epoch": 0.4, "learning_rate": 0.0006837465836752706, "loss": 3.2719, "step": 2474 }, { "epoch": 0.4, "learning_rate": 0.0006835036855116539, "loss": 3.2059, "step": 2475 }, { "epoch": 0.4, "learning_rate": 0.0006832607372900636, "loss": 3.2122, "step": 2476 }, { "epoch": 0.4, "learning_rate": 0.000683017739076774, "loss": 3.3313, "step": 2477 }, { "epoch": 0.4, "learning_rate": 0.0006827746909380722, "loss": 3.074, "step": 2478 }, { "epoch": 0.4, "learning_rate": 0.0006825315929402594, "loss": 3.2372, "step": 2479 }, { "epoch": 0.4, "learning_rate": 0.0006822884451496505, "loss": 3.153, "step": 2480 }, { "epoch": 0.4, "learning_rate": 0.0006820452476325733, "loss": 3.2201, "step": 2481 }, { "epoch": 0.4, "learning_rate": 0.0006818020004553702, "loss": 3.2218, "step": 2482 }, { "epoch": 0.4, "learning_rate": 0.0006815587036843964, "loss": 3.2461, "step": 2483 }, { "epoch": 0.4, "learning_rate": 0.0006813153573860209, "loss": 3.3799, "step": 2484 }, { "epoch": 0.4, "learning_rate": 0.0006810719616266259, "loss": 3.4, "step": 2485 }, { "epoch": 0.4, "learning_rate": 0.0006808285164726076, "loss": 3.1708, "step": 2486 }, { "epoch": 0.4, "learning_rate": 0.0006805850219903751, "loss": 3.2029, "step": 2487 }, { "epoch": 0.4, "learning_rate": 0.0006803414782463516, "loss": 3.131, "step": 2488 }, { "epoch": 0.4, "learning_rate": 0.0006800978853069733, "loss": 3.3197, "step": 2489 }, { "epoch": 0.4, "learning_rate": 0.0006798542432386897, "loss": 3.3108, "step": 2490 }, { "epoch": 0.4, "learning_rate": 0.0006796105521079643, "loss": 3.1706, "step": 2491 }, { "epoch": 0.4, "learning_rate": 0.0006793668119812735, "loss": 3.3288, "step": 2492 }, { "epoch": 0.4, "learning_rate": 0.0006791230229251067, "loss": 3.2508, "step": 2493 }, { "epoch": 0.4, "learning_rate": 0.0006788791850059679, "loss": 3.1876, "step": 2494 }, { "epoch": 0.4, "learning_rate": 0.0006786352982903732, "loss": 3.2854, "step": 2495 }, { "epoch": 0.4, "learning_rate": 0.0006783913628448523, "loss": 3.3168, "step": 2496 }, { "epoch": 0.4, "learning_rate": 0.0006781473787359488, "loss": 3.2681, "step": 2497 }, { "epoch": 0.4, "learning_rate": 0.0006779033460302189, "loss": 3.2489, "step": 2498 }, { "epoch": 0.4, "learning_rate": 0.0006776592647942322, "loss": 3.0535, "step": 2499 }, { "epoch": 0.4, "learning_rate": 0.0006774151350945717, "loss": 3.2502, "step": 2500 }, { "epoch": 0.4, "learning_rate": 0.0006771709569978337, "loss": 3.1901, "step": 2501 }, { "epoch": 0.4, "learning_rate": 0.0006769267305706272, "loss": 3.1329, "step": 2502 }, { "epoch": 0.4, "learning_rate": 0.000676682455879575, "loss": 3.2133, "step": 2503 }, { "epoch": 0.4, "learning_rate": 0.000676438132991313, "loss": 3.1973, "step": 2504 }, { "epoch": 0.4, "learning_rate": 0.0006761937619724894, "loss": 3.3134, "step": 2505 }, { "epoch": 0.4, "learning_rate": 0.0006759493428897667, "loss": 3.3521, "step": 2506 }, { "epoch": 0.4, "learning_rate": 0.0006757048758098199, "loss": 3.2364, "step": 2507 }, { "epoch": 0.4, "learning_rate": 0.000675460360799337, "loss": 3.3886, "step": 2508 }, { "epoch": 0.4, "learning_rate": 0.0006752157979250195, "loss": 3.308, "step": 2509 }, { "epoch": 0.4, "learning_rate": 0.0006749711872535816, "loss": 3.0457, "step": 2510 }, { "epoch": 0.4, "learning_rate": 0.0006747265288517505, "loss": 3.2852, "step": 2511 }, { "epoch": 0.41, "learning_rate": 0.0006744818227862668, "loss": 3.2673, "step": 2512 }, { "epoch": 0.41, "learning_rate": 0.0006742370691238836, "loss": 3.2516, "step": 2513 }, { "epoch": 0.41, "learning_rate": 0.0006739922679313677, "loss": 3.2373, "step": 2514 }, { "epoch": 0.41, "learning_rate": 0.0006737474192754979, "loss": 3.2346, "step": 2515 }, { "epoch": 0.41, "learning_rate": 0.0006735025232230668, "loss": 3.0965, "step": 2516 }, { "epoch": 0.41, "learning_rate": 0.0006732575798408797, "loss": 3.2551, "step": 2517 }, { "epoch": 0.41, "learning_rate": 0.0006730125891957543, "loss": 3.1609, "step": 2518 }, { "epoch": 0.41, "learning_rate": 0.0006727675513545219, "loss": 3.1434, "step": 2519 }, { "epoch": 0.41, "learning_rate": 0.0006725224663840264, "loss": 3.3367, "step": 2520 }, { "epoch": 0.41, "learning_rate": 0.0006722773343511244, "loss": 3.1853, "step": 2521 }, { "epoch": 0.41, "learning_rate": 0.0006720321553226854, "loss": 3.3621, "step": 2522 }, { "epoch": 0.41, "learning_rate": 0.0006717869293655919, "loss": 3.2767, "step": 2523 }, { "epoch": 0.41, "learning_rate": 0.0006715416565467392, "loss": 3.3447, "step": 2524 }, { "epoch": 0.41, "learning_rate": 0.0006712963369330351, "loss": 3.1636, "step": 2525 }, { "epoch": 0.41, "learning_rate": 0.0006710509705914005, "loss": 3.2909, "step": 2526 }, { "epoch": 0.41, "learning_rate": 0.0006708055575887687, "loss": 3.2877, "step": 2527 }, { "epoch": 0.41, "learning_rate": 0.0006705600979920861, "loss": 3.3157, "step": 2528 }, { "epoch": 0.41, "learning_rate": 0.0006703145918683116, "loss": 3.2956, "step": 2529 }, { "epoch": 0.41, "learning_rate": 0.0006700690392844166, "loss": 3.3955, "step": 2530 }, { "epoch": 0.41, "learning_rate": 0.0006698234403073856, "loss": 3.3652, "step": 2531 }, { "epoch": 0.41, "learning_rate": 0.0006695777950042155, "loss": 3.3426, "step": 2532 }, { "epoch": 0.41, "learning_rate": 0.000669332103441916, "loss": 3.336, "step": 2533 }, { "epoch": 0.41, "learning_rate": 0.000669086365687509, "loss": 3.2321, "step": 2534 }, { "epoch": 0.41, "learning_rate": 0.0006688405818080298, "loss": 3.3056, "step": 2535 }, { "epoch": 0.41, "learning_rate": 0.0006685947518705253, "loss": 3.1641, "step": 2536 }, { "epoch": 0.41, "learning_rate": 0.0006683488759420555, "loss": 3.2236, "step": 2537 }, { "epoch": 0.41, "learning_rate": 0.0006681029540896934, "loss": 3.2956, "step": 2538 }, { "epoch": 0.41, "learning_rate": 0.0006678569863805234, "loss": 3.2407, "step": 2539 }, { "epoch": 0.41, "learning_rate": 0.0006676109728816434, "loss": 3.1761, "step": 2540 }, { "epoch": 0.41, "learning_rate": 0.0006673649136601634, "loss": 3.2503, "step": 2541 }, { "epoch": 0.41, "learning_rate": 0.0006671188087832056, "loss": 3.1361, "step": 2542 }, { "epoch": 0.41, "learning_rate": 0.0006668726583179054, "loss": 3.2491, "step": 2543 }, { "epoch": 0.41, "learning_rate": 0.00066662646233141, "loss": 3.3543, "step": 2544 }, { "epoch": 0.41, "learning_rate": 0.000666380220890879, "loss": 3.165, "step": 2545 }, { "epoch": 0.41, "learning_rate": 0.0006661339340634848, "loss": 3.2246, "step": 2546 }, { "epoch": 0.41, "learning_rate": 0.0006658876019164122, "loss": 3.1885, "step": 2547 }, { "epoch": 0.41, "learning_rate": 0.0006656412245168578, "loss": 3.1899, "step": 2548 }, { "epoch": 0.41, "learning_rate": 0.0006653948019320309, "loss": 3.3509, "step": 2549 }, { "epoch": 0.41, "learning_rate": 0.0006651483342291535, "loss": 3.2327, "step": 2550 }, { "epoch": 0.41, "learning_rate": 0.0006649018214754591, "loss": 3.2743, "step": 2551 }, { "epoch": 0.41, "learning_rate": 0.0006646552637381942, "loss": 3.316, "step": 2552 }, { "epoch": 0.41, "learning_rate": 0.0006644086610846171, "loss": 3.2112, "step": 2553 }, { "epoch": 0.41, "learning_rate": 0.0006641620135819985, "loss": 3.1752, "step": 2554 }, { "epoch": 0.41, "learning_rate": 0.0006639153212976217, "loss": 3.2628, "step": 2555 }, { "epoch": 0.41, "learning_rate": 0.0006636685842987816, "loss": 3.3566, "step": 2556 }, { "epoch": 0.41, "learning_rate": 0.0006634218026527856, "loss": 3.2893, "step": 2557 }, { "epoch": 0.41, "learning_rate": 0.0006631749764269536, "loss": 3.2105, "step": 2558 }, { "epoch": 0.41, "learning_rate": 0.0006629281056886167, "loss": 3.2792, "step": 2559 }, { "epoch": 0.41, "learning_rate": 0.0006626811905051194, "loss": 3.292, "step": 2560 }, { "epoch": 0.41, "learning_rate": 0.0006624342309438176, "loss": 3.3569, "step": 2561 }, { "epoch": 0.41, "learning_rate": 0.0006621872270720789, "loss": 3.0615, "step": 2562 }, { "epoch": 0.41, "learning_rate": 0.0006619401789572841, "loss": 3.3385, "step": 2563 }, { "epoch": 0.41, "learning_rate": 0.0006616930866668252, "loss": 3.3599, "step": 2564 }, { "epoch": 0.41, "learning_rate": 0.0006614459502681062, "loss": 3.1784, "step": 2565 }, { "epoch": 0.41, "learning_rate": 0.000661198769828544, "loss": 3.2052, "step": 2566 }, { "epoch": 0.41, "learning_rate": 0.0006609515454155668, "loss": 3.1359, "step": 2567 }, { "epoch": 0.41, "learning_rate": 0.0006607042770966147, "loss": 3.1847, "step": 2568 }, { "epoch": 0.41, "learning_rate": 0.0006604569649391402, "loss": 3.2265, "step": 2569 }, { "epoch": 0.41, "learning_rate": 0.0006602096090106077, "loss": 3.2389, "step": 2570 }, { "epoch": 0.41, "learning_rate": 0.000659962209378493, "loss": 3.232, "step": 2571 }, { "epoch": 0.41, "learning_rate": 0.000659714766110285, "loss": 3.3135, "step": 2572 }, { "epoch": 0.41, "learning_rate": 0.0006594672792734831, "loss": 3.3107, "step": 2573 }, { "epoch": 0.42, "learning_rate": 0.0006592197489355993, "loss": 3.2807, "step": 2574 }, { "epoch": 0.42, "learning_rate": 0.0006589721751641578, "loss": 3.3102, "step": 2575 }, { "epoch": 0.42, "learning_rate": 0.0006587245580266937, "loss": 3.2197, "step": 2576 }, { "epoch": 0.42, "learning_rate": 0.0006584768975907552, "loss": 3.3875, "step": 2577 }, { "epoch": 0.42, "learning_rate": 0.0006582291939239008, "loss": 3.2672, "step": 2578 }, { "epoch": 0.42, "learning_rate": 0.0006579814470937021, "loss": 3.2274, "step": 2579 }, { "epoch": 0.42, "learning_rate": 0.000657733657167742, "loss": 3.2129, "step": 2580 }, { "epoch": 0.42, "learning_rate": 0.0006574858242136146, "loss": 3.3573, "step": 2581 }, { "epoch": 0.42, "learning_rate": 0.0006572379482989269, "loss": 3.3165, "step": 2582 }, { "epoch": 0.42, "learning_rate": 0.0006569900294912968, "loss": 3.2058, "step": 2583 }, { "epoch": 0.42, "learning_rate": 0.0006567420678583536, "loss": 3.2085, "step": 2584 }, { "epoch": 0.42, "learning_rate": 0.0006564940634677392, "loss": 3.2628, "step": 2585 }, { "epoch": 0.42, "learning_rate": 0.0006562460163871068, "loss": 3.3337, "step": 2586 }, { "epoch": 0.42, "learning_rate": 0.0006559979266841209, "loss": 3.1905, "step": 2587 }, { "epoch": 0.42, "learning_rate": 0.0006557497944264578, "loss": 3.2537, "step": 2588 }, { "epoch": 0.42, "learning_rate": 0.0006555016196818062, "loss": 3.3512, "step": 2589 }, { "epoch": 0.42, "learning_rate": 0.0006552534025178647, "loss": 3.2943, "step": 2590 }, { "epoch": 0.42, "learning_rate": 0.0006550051430023452, "loss": 3.2082, "step": 2591 }, { "epoch": 0.42, "learning_rate": 0.0006547568412029701, "loss": 3.1531, "step": 2592 }, { "epoch": 0.42, "learning_rate": 0.0006545084971874737, "loss": 3.1906, "step": 2593 }, { "epoch": 0.42, "learning_rate": 0.0006542601110236018, "loss": 3.2806, "step": 2594 }, { "epoch": 0.42, "learning_rate": 0.0006540116827791118, "loss": 3.3476, "step": 2595 }, { "epoch": 0.42, "learning_rate": 0.0006537632125217722, "loss": 3.2024, "step": 2596 }, { "epoch": 0.42, "learning_rate": 0.0006535147003193634, "loss": 3.2417, "step": 2597 }, { "epoch": 0.42, "learning_rate": 0.0006532661462396768, "loss": 3.2972, "step": 2598 }, { "epoch": 0.42, "learning_rate": 0.0006530175503505157, "loss": 3.1001, "step": 2599 }, { "epoch": 0.42, "learning_rate": 0.0006527689127196945, "loss": 3.2191, "step": 2600 }, { "epoch": 0.42, "learning_rate": 0.0006525202334150392, "loss": 3.3669, "step": 2601 }, { "epoch": 0.42, "learning_rate": 0.0006522715125043868, "loss": 3.3027, "step": 2602 }, { "epoch": 0.42, "learning_rate": 0.0006520227500555858, "loss": 3.2313, "step": 2603 }, { "epoch": 0.42, "learning_rate": 0.0006517739461364965, "loss": 3.3859, "step": 2604 }, { "epoch": 0.42, "learning_rate": 0.00065152510081499, "loss": 3.1639, "step": 2605 }, { "epoch": 0.42, "learning_rate": 0.0006512762141589486, "loss": 3.175, "step": 2606 }, { "epoch": 0.42, "learning_rate": 0.0006510272862362662, "loss": 3.1064, "step": 2607 }, { "epoch": 0.42, "learning_rate": 0.0006507783171148479, "loss": 3.3029, "step": 2608 }, { "epoch": 0.42, "learning_rate": 0.00065052930686261, "loss": 3.2576, "step": 2609 }, { "epoch": 0.42, "learning_rate": 0.0006502802555474801, "loss": 3.2599, "step": 2610 }, { "epoch": 0.42, "learning_rate": 0.0006500311632373967, "loss": 3.1116, "step": 2611 }, { "epoch": 0.42, "learning_rate": 0.0006497820300003098, "loss": 3.2654, "step": 2612 }, { "epoch": 0.42, "learning_rate": 0.0006495328559041805, "loss": 3.166, "step": 2613 }, { "epoch": 0.42, "learning_rate": 0.0006492836410169809, "loss": 3.3195, "step": 2614 }, { "epoch": 0.42, "learning_rate": 0.0006490343854066945, "loss": 3.186, "step": 2615 }, { "epoch": 0.42, "learning_rate": 0.0006487850891413157, "loss": 3.2709, "step": 2616 }, { "epoch": 0.42, "learning_rate": 0.0006485357522888496, "loss": 3.2833, "step": 2617 }, { "epoch": 0.42, "learning_rate": 0.0006482863749173134, "loss": 3.1862, "step": 2618 }, { "epoch": 0.42, "learning_rate": 0.0006480369570947347, "loss": 3.2558, "step": 2619 }, { "epoch": 0.42, "learning_rate": 0.0006477874988891518, "loss": 3.2192, "step": 2620 }, { "epoch": 0.42, "learning_rate": 0.0006475380003686147, "loss": 3.3221, "step": 2621 }, { "epoch": 0.42, "learning_rate": 0.000647288461601184, "loss": 3.1782, "step": 2622 }, { "epoch": 0.42, "learning_rate": 0.0006470388826549314, "loss": 3.3355, "step": 2623 }, { "epoch": 0.42, "learning_rate": 0.0006467892635979396, "loss": 3.0389, "step": 2624 }, { "epoch": 0.42, "learning_rate": 0.0006465396044983023, "loss": 3.3462, "step": 2625 }, { "epoch": 0.42, "learning_rate": 0.0006462899054241237, "loss": 3.2048, "step": 2626 }, { "epoch": 0.42, "learning_rate": 0.0006460401664435195, "loss": 3.2257, "step": 2627 }, { "epoch": 0.42, "learning_rate": 0.0006457903876246156, "loss": 3.2329, "step": 2628 }, { "epoch": 0.42, "learning_rate": 0.0006455405690355497, "loss": 3.2402, "step": 2629 }, { "epoch": 0.42, "learning_rate": 0.0006452907107444696, "loss": 3.0645, "step": 2630 }, { "epoch": 0.42, "learning_rate": 0.0006450408128195338, "loss": 3.3536, "step": 2631 }, { "epoch": 0.42, "learning_rate": 0.0006447908753289127, "loss": 3.0372, "step": 2632 }, { "epoch": 0.42, "learning_rate": 0.0006445408983407861, "loss": 3.1113, "step": 2633 }, { "epoch": 0.42, "learning_rate": 0.0006442908819233453, "loss": 3.1191, "step": 2634 }, { "epoch": 0.42, "learning_rate": 0.0006440408261447927, "loss": 3.2081, "step": 2635 }, { "epoch": 0.43, "learning_rate": 0.0006437907310733406, "loss": 3.3645, "step": 2636 }, { "epoch": 0.43, "learning_rate": 0.0006435405967772126, "loss": 3.2132, "step": 2637 }, { "epoch": 0.43, "learning_rate": 0.0006432904233246428, "loss": 3.2448, "step": 2638 }, { "epoch": 0.43, "learning_rate": 0.0006430402107838762, "loss": 3.1957, "step": 2639 }, { "epoch": 0.43, "learning_rate": 0.000642789959223168, "loss": 3.1748, "step": 2640 }, { "epoch": 0.43, "learning_rate": 0.0006425396687107845, "loss": 3.1541, "step": 2641 }, { "epoch": 0.43, "learning_rate": 0.0006422893393150024, "loss": 3.1863, "step": 2642 }, { "epoch": 0.43, "learning_rate": 0.0006420389711041091, "loss": 3.1469, "step": 2643 }, { "epoch": 0.43, "learning_rate": 0.0006417885641464026, "loss": 3.1419, "step": 2644 }, { "epoch": 0.43, "learning_rate": 0.000641538118510191, "loss": 3.238, "step": 2645 }, { "epoch": 0.43, "learning_rate": 0.000641287634263794, "loss": 3.1513, "step": 2646 }, { "epoch": 0.43, "learning_rate": 0.0006410371114755409, "loss": 3.3119, "step": 2647 }, { "epoch": 0.43, "learning_rate": 0.0006407865502137717, "loss": 3.2069, "step": 2648 }, { "epoch": 0.43, "learning_rate": 0.0006405359505468372, "loss": 3.3763, "step": 2649 }, { "epoch": 0.43, "learning_rate": 0.0006402853125430985, "loss": 3.2821, "step": 2650 }, { "epoch": 0.43, "learning_rate": 0.000640034636270927, "loss": 3.2266, "step": 2651 }, { "epoch": 0.43, "learning_rate": 0.000639783921798705, "loss": 3.1684, "step": 2652 }, { "epoch": 0.43, "learning_rate": 0.0006395331691948243, "loss": 3.119, "step": 2653 }, { "epoch": 0.43, "learning_rate": 0.0006392823785276882, "loss": 3.2322, "step": 2654 }, { "epoch": 0.43, "learning_rate": 0.0006390315498657098, "loss": 3.2483, "step": 2655 }, { "epoch": 0.43, "learning_rate": 0.0006387806832773125, "loss": 3.3167, "step": 2656 }, { "epoch": 0.43, "learning_rate": 0.0006385297788309303, "loss": 3.1301, "step": 2657 }, { "epoch": 0.43, "learning_rate": 0.0006382788365950076, "loss": 3.3424, "step": 2658 }, { "epoch": 0.43, "learning_rate": 0.0006380278566379987, "loss": 3.2576, "step": 2659 }, { "epoch": 0.43, "learning_rate": 0.0006377768390283683, "loss": 3.2985, "step": 2660 }, { "epoch": 0.43, "learning_rate": 0.0006375257838345921, "loss": 3.1934, "step": 2661 }, { "epoch": 0.43, "learning_rate": 0.0006372746911251548, "loss": 3.1864, "step": 2662 }, { "epoch": 0.43, "learning_rate": 0.0006370235609685522, "loss": 3.3021, "step": 2663 }, { "epoch": 0.43, "learning_rate": 0.0006367723934332903, "loss": 3.4077, "step": 2664 }, { "epoch": 0.43, "learning_rate": 0.0006365211885878848, "loss": 3.3459, "step": 2665 }, { "epoch": 0.43, "learning_rate": 0.0006362699465008619, "loss": 3.2732, "step": 2666 }, { "epoch": 0.43, "learning_rate": 0.0006360186672407583, "loss": 3.3125, "step": 2667 }, { "epoch": 0.43, "learning_rate": 0.00063576735087612, "loss": 3.2195, "step": 2668 }, { "epoch": 0.43, "learning_rate": 0.0006355159974755039, "loss": 3.2436, "step": 2669 }, { "epoch": 0.43, "learning_rate": 0.0006352646071074767, "loss": 3.2048, "step": 2670 }, { "epoch": 0.43, "learning_rate": 0.0006350131798406149, "loss": 3.2943, "step": 2671 }, { "epoch": 0.43, "learning_rate": 0.0006347617157435057, "loss": 3.2185, "step": 2672 }, { "epoch": 0.43, "learning_rate": 0.0006345102148847458, "loss": 3.2188, "step": 2673 }, { "epoch": 0.43, "learning_rate": 0.0006342586773329421, "loss": 3.2676, "step": 2674 }, { "epoch": 0.43, "learning_rate": 0.0006340071031567115, "loss": 3.117, "step": 2675 }, { "epoch": 0.43, "learning_rate": 0.0006337554924246812, "loss": 3.3023, "step": 2676 }, { "epoch": 0.43, "learning_rate": 0.0006335038452054877, "loss": 3.258, "step": 2677 }, { "epoch": 0.43, "learning_rate": 0.0006332521615677782, "loss": 3.345, "step": 2678 }, { "epoch": 0.43, "learning_rate": 0.0006330004415802095, "loss": 3.0818, "step": 2679 }, { "epoch": 0.43, "learning_rate": 0.0006327486853114478, "loss": 3.3204, "step": 2680 }, { "epoch": 0.43, "learning_rate": 0.0006324968928301702, "loss": 3.3297, "step": 2681 }, { "epoch": 0.43, "learning_rate": 0.000632245064205063, "loss": 3.0557, "step": 2682 }, { "epoch": 0.43, "learning_rate": 0.0006319931995048226, "loss": 3.208, "step": 2683 }, { "epoch": 0.43, "learning_rate": 0.000631741298798155, "loss": 3.3319, "step": 2684 }, { "epoch": 0.43, "learning_rate": 0.0006314893621537765, "loss": 3.4352, "step": 2685 }, { "epoch": 0.43, "learning_rate": 0.0006312373896404126, "loss": 2.9215, "step": 2686 }, { "epoch": 0.43, "learning_rate": 0.0006309853813267989, "loss": 3.3112, "step": 2687 }, { "epoch": 0.43, "learning_rate": 0.0006307333372816811, "loss": 3.203, "step": 2688 }, { "epoch": 0.43, "learning_rate": 0.000630481257573814, "loss": 3.2017, "step": 2689 }, { "epoch": 0.43, "learning_rate": 0.0006302291422719623, "loss": 3.2631, "step": 2690 }, { "epoch": 0.43, "learning_rate": 0.000629976991444901, "loss": 3.3584, "step": 2691 }, { "epoch": 0.43, "learning_rate": 0.0006297248051614137, "loss": 3.1867, "step": 2692 }, { "epoch": 0.43, "learning_rate": 0.0006294725834902948, "loss": 3.1731, "step": 2693 }, { "epoch": 0.43, "learning_rate": 0.0006292203265003479, "loss": 3.1717, "step": 2694 }, { "epoch": 0.43, "learning_rate": 0.0006289680342603858, "loss": 3.2928, "step": 2695 }, { "epoch": 0.43, "learning_rate": 0.0006287157068392315, "loss": 3.2615, "step": 2696 }, { "epoch": 0.43, "learning_rate": 0.0006284633443057175, "loss": 3.2823, "step": 2697 }, { "epoch": 0.44, "learning_rate": 0.0006282109467286856, "loss": 3.3115, "step": 2698 }, { "epoch": 0.44, "learning_rate": 0.0006279585141769874, "loss": 3.3554, "step": 2699 }, { "epoch": 0.44, "learning_rate": 0.0006277060467194841, "loss": 3.3139, "step": 2700 }, { "epoch": 0.44, "learning_rate": 0.0006274535444250462, "loss": 3.2183, "step": 2701 }, { "epoch": 0.44, "learning_rate": 0.0006272010073625538, "loss": 3.2839, "step": 2702 }, { "epoch": 0.44, "learning_rate": 0.0006269484356008963, "loss": 3.2324, "step": 2703 }, { "epoch": 0.44, "learning_rate": 0.0006266958292089732, "loss": 3.1865, "step": 2704 }, { "epoch": 0.44, "learning_rate": 0.0006264431882556927, "loss": 3.1163, "step": 2705 }, { "epoch": 0.44, "learning_rate": 0.0006261905128099726, "loss": 3.2799, "step": 2706 }, { "epoch": 0.44, "learning_rate": 0.0006259378029407406, "loss": 3.2576, "step": 2707 }, { "epoch": 0.44, "learning_rate": 0.0006256850587169334, "loss": 3.2953, "step": 2708 }, { "epoch": 0.44, "learning_rate": 0.0006254322802074965, "loss": 3.2036, "step": 2709 }, { "epoch": 0.44, "learning_rate": 0.0006251794674813862, "loss": 3.2261, "step": 2710 }, { "epoch": 0.44, "learning_rate": 0.0006249266206075668, "loss": 3.2979, "step": 2711 }, { "epoch": 0.44, "learning_rate": 0.0006246737396550124, "loss": 3.1635, "step": 2712 }, { "epoch": 0.44, "learning_rate": 0.0006244208246927066, "loss": 3.2663, "step": 2713 }, { "epoch": 0.44, "learning_rate": 0.0006241678757896419, "loss": 3.3069, "step": 2714 }, { "epoch": 0.44, "learning_rate": 0.0006239148930148203, "loss": 3.2022, "step": 2715 }, { "epoch": 0.44, "learning_rate": 0.000623661876437253, "loss": 3.3226, "step": 2716 }, { "epoch": 0.44, "learning_rate": 0.0006234088261259604, "loss": 3.1836, "step": 2717 }, { "epoch": 0.44, "learning_rate": 0.000623155742149972, "loss": 3.476, "step": 2718 }, { "epoch": 0.44, "learning_rate": 0.0006229026245783269, "loss": 3.2331, "step": 2719 }, { "epoch": 0.44, "learning_rate": 0.0006226494734800725, "loss": 3.2609, "step": 2720 }, { "epoch": 0.44, "learning_rate": 0.0006223962889242663, "loss": 3.282, "step": 2721 }, { "epoch": 0.44, "learning_rate": 0.0006221430709799745, "loss": 3.2676, "step": 2722 }, { "epoch": 0.44, "learning_rate": 0.0006218898197162723, "loss": 3.3403, "step": 2723 }, { "epoch": 0.44, "learning_rate": 0.0006216365352022441, "loss": 3.1192, "step": 2724 }, { "epoch": 0.44, "learning_rate": 0.0006213832175069837, "loss": 3.2273, "step": 2725 }, { "epoch": 0.44, "learning_rate": 0.0006211298666995932, "loss": 3.1297, "step": 2726 }, { "epoch": 0.44, "learning_rate": 0.0006208764828491844, "loss": 3.3427, "step": 2727 }, { "epoch": 0.44, "learning_rate": 0.0006206230660248777, "loss": 3.3057, "step": 2728 }, { "epoch": 0.44, "learning_rate": 0.0006203696162958029, "loss": 3.1609, "step": 2729 }, { "epoch": 0.44, "learning_rate": 0.0006201161337310986, "loss": 3.2403, "step": 2730 }, { "epoch": 0.44, "learning_rate": 0.0006198626183999121, "loss": 3.1572, "step": 2731 }, { "epoch": 0.44, "learning_rate": 0.0006196090703713998, "loss": 3.2182, "step": 2732 }, { "epoch": 0.44, "learning_rate": 0.0006193554897147275, "loss": 3.2134, "step": 2733 }, { "epoch": 0.44, "learning_rate": 0.0006191018764990689, "loss": 3.4599, "step": 2734 }, { "epoch": 0.44, "learning_rate": 0.0006188482307936076, "loss": 3.1646, "step": 2735 }, { "epoch": 0.44, "learning_rate": 0.0006185945526675355, "loss": 3.2858, "step": 2736 }, { "epoch": 0.44, "learning_rate": 0.0006183408421900533, "loss": 3.2918, "step": 2737 }, { "epoch": 0.44, "learning_rate": 0.0006180870994303707, "loss": 3.2482, "step": 2738 }, { "epoch": 0.44, "learning_rate": 0.0006178333244577066, "loss": 3.371, "step": 2739 }, { "epoch": 0.44, "learning_rate": 0.0006175795173412877, "loss": 3.418, "step": 2740 }, { "epoch": 0.44, "learning_rate": 0.0006173256781503505, "loss": 3.0703, "step": 2741 }, { "epoch": 0.44, "learning_rate": 0.0006170718069541395, "loss": 3.3643, "step": 2742 }, { "epoch": 0.44, "learning_rate": 0.0006168179038219083, "loss": 3.1886, "step": 2743 }, { "epoch": 0.44, "learning_rate": 0.0006165639688229193, "loss": 3.1264, "step": 2744 }, { "epoch": 0.44, "learning_rate": 0.0006163100020264434, "loss": 3.1647, "step": 2745 }, { "epoch": 0.44, "learning_rate": 0.00061605600350176, "loss": 3.3302, "step": 2746 }, { "epoch": 0.44, "learning_rate": 0.0006158019733181573, "loss": 3.3022, "step": 2747 }, { "epoch": 0.44, "learning_rate": 0.0006155479115449328, "loss": 3.1757, "step": 2748 }, { "epoch": 0.44, "learning_rate": 0.0006152938182513912, "loss": 3.3729, "step": 2749 }, { "epoch": 0.44, "learning_rate": 0.0006150396935068471, "loss": 3.251, "step": 2750 }, { "epoch": 0.44, "learning_rate": 0.0006147855373806233, "loss": 3.1539, "step": 2751 }, { "epoch": 0.44, "learning_rate": 0.0006145313499420506, "loss": 3.2478, "step": 2752 }, { "epoch": 0.44, "learning_rate": 0.0006142771312604692, "loss": 3.2511, "step": 2753 }, { "epoch": 0.44, "learning_rate": 0.0006140228814052273, "loss": 3.2792, "step": 2754 }, { "epoch": 0.44, "learning_rate": 0.0006137686004456816, "loss": 3.1717, "step": 2755 }, { "epoch": 0.44, "learning_rate": 0.0006135142884511975, "loss": 3.2796, "step": 2756 }, { "epoch": 0.44, "learning_rate": 0.0006132599454911489, "loss": 3.179, "step": 2757 }, { "epoch": 0.44, "learning_rate": 0.0006130055716349178, "loss": 3.2311, "step": 2758 }, { "epoch": 0.44, "learning_rate": 0.0006127511669518949, "loss": 3.1269, "step": 2759 }, { "epoch": 0.45, "learning_rate": 0.0006124967315114795, "loss": 3.1684, "step": 2760 }, { "epoch": 0.45, "learning_rate": 0.0006122422653830786, "loss": 3.2389, "step": 2761 }, { "epoch": 0.45, "learning_rate": 0.0006119877686361084, "loss": 3.1949, "step": 2762 }, { "epoch": 0.45, "learning_rate": 0.000611733241339993, "loss": 3.211, "step": 2763 }, { "epoch": 0.45, "learning_rate": 0.0006114786835641647, "loss": 3.2839, "step": 2764 }, { "epoch": 0.45, "learning_rate": 0.0006112240953780644, "loss": 3.4735, "step": 2765 }, { "epoch": 0.45, "learning_rate": 0.0006109694768511414, "loss": 3.2524, "step": 2766 }, { "epoch": 0.45, "learning_rate": 0.000610714828052853, "loss": 3.2904, "step": 2767 }, { "epoch": 0.45, "learning_rate": 0.0006104601490526649, "loss": 3.2764, "step": 2768 }, { "epoch": 0.45, "learning_rate": 0.0006102054399200507, "loss": 3.1725, "step": 2769 }, { "epoch": 0.45, "learning_rate": 0.0006099507007244929, "loss": 3.2126, "step": 2770 }, { "epoch": 0.45, "learning_rate": 0.0006096959315354815, "loss": 3.263, "step": 2771 }, { "epoch": 0.45, "learning_rate": 0.0006094411324225152, "loss": 3.1602, "step": 2772 }, { "epoch": 0.45, "learning_rate": 0.0006091863034551006, "loss": 3.2716, "step": 2773 }, { "epoch": 0.45, "learning_rate": 0.0006089314447027525, "loss": 3.1168, "step": 2774 }, { "epoch": 0.45, "learning_rate": 0.0006086765562349937, "loss": 3.2818, "step": 2775 }, { "epoch": 0.45, "learning_rate": 0.0006084216381213553, "loss": 3.1495, "step": 2776 }, { "epoch": 0.45, "learning_rate": 0.0006081666904313766, "loss": 3.2694, "step": 2777 }, { "epoch": 0.45, "learning_rate": 0.0006079117132346043, "loss": 3.2119, "step": 2778 }, { "epoch": 0.45, "learning_rate": 0.0006076567066005944, "loss": 3.2002, "step": 2779 }, { "epoch": 0.45, "learning_rate": 0.0006074016705989094, "loss": 3.2411, "step": 2780 }, { "epoch": 0.45, "learning_rate": 0.0006071466052991209, "loss": 3.1096, "step": 2781 }, { "epoch": 0.45, "learning_rate": 0.0006068915107708084, "loss": 3.2159, "step": 2782 }, { "epoch": 0.45, "learning_rate": 0.0006066363870835587, "loss": 3.3051, "step": 2783 }, { "epoch": 0.45, "learning_rate": 0.0006063812343069672, "loss": 3.236, "step": 2784 }, { "epoch": 0.45, "learning_rate": 0.0006061260525106372, "loss": 3.0784, "step": 2785 }, { "epoch": 0.45, "learning_rate": 0.0006058708417641795, "loss": 3.2405, "step": 2786 }, { "epoch": 0.45, "learning_rate": 0.0006056156021372131, "loss": 3.3285, "step": 2787 }, { "epoch": 0.45, "learning_rate": 0.0006053603336993649, "loss": 3.2654, "step": 2788 }, { "epoch": 0.45, "learning_rate": 0.0006051050365202695, "loss": 3.3196, "step": 2789 }, { "epoch": 0.45, "learning_rate": 0.0006048497106695694, "loss": 3.1975, "step": 2790 }, { "epoch": 0.45, "learning_rate": 0.0006045943562169151, "loss": 3.2366, "step": 2791 }, { "epoch": 0.45, "learning_rate": 0.0006043389732319645, "loss": 3.4659, "step": 2792 }, { "epoch": 0.45, "learning_rate": 0.0006040835617843836, "loss": 3.2157, "step": 2793 }, { "epoch": 0.45, "learning_rate": 0.0006038281219438463, "loss": 3.3577, "step": 2794 }, { "epoch": 0.45, "learning_rate": 0.0006035726537800339, "loss": 3.3213, "step": 2795 }, { "epoch": 0.45, "learning_rate": 0.0006033171573626355, "loss": 3.1815, "step": 2796 }, { "epoch": 0.45, "learning_rate": 0.0006030616327613482, "loss": 3.2646, "step": 2797 }, { "epoch": 0.45, "learning_rate": 0.0006028060800458762, "loss": 3.2252, "step": 2798 }, { "epoch": 0.45, "learning_rate": 0.0006025504992859321, "loss": 3.1211, "step": 2799 }, { "epoch": 0.45, "learning_rate": 0.0006022948905512355, "loss": 3.3619, "step": 2800 }, { "epoch": 0.45, "learning_rate": 0.0006020392539115138, "loss": 3.089, "step": 2801 }, { "epoch": 0.45, "learning_rate": 0.0006017835894365027, "loss": 3.2625, "step": 2802 }, { "epoch": 0.45, "learning_rate": 0.0006015278971959444, "loss": 3.1854, "step": 2803 }, { "epoch": 0.45, "learning_rate": 0.0006012721772595893, "loss": 3.2827, "step": 2804 }, { "epoch": 0.45, "learning_rate": 0.0006010164296971953, "loss": 3.3445, "step": 2805 }, { "epoch": 0.45, "learning_rate": 0.0006007606545785278, "loss": 3.2105, "step": 2806 }, { "epoch": 0.45, "learning_rate": 0.0006005048519733596, "loss": 3.2327, "step": 2807 }, { "epoch": 0.45, "learning_rate": 0.0006002490219514712, "loss": 3.3197, "step": 2808 }, { "epoch": 0.45, "learning_rate": 0.0005999931645826505, "loss": 3.1442, "step": 2809 }, { "epoch": 0.45, "learning_rate": 0.0005997372799366927, "loss": 3.2893, "step": 2810 }, { "epoch": 0.45, "learning_rate": 0.0005994813680834008, "loss": 3.1903, "step": 2811 }, { "epoch": 0.45, "learning_rate": 0.0005992254290925847, "loss": 3.3819, "step": 2812 }, { "epoch": 0.45, "learning_rate": 0.0005989694630340622, "loss": 3.248, "step": 2813 }, { "epoch": 0.45, "learning_rate": 0.0005987134699776583, "loss": 3.1616, "step": 2814 }, { "epoch": 0.45, "learning_rate": 0.0005984574499932052, "loss": 3.2481, "step": 2815 }, { "epoch": 0.45, "learning_rate": 0.0005982014031505427, "loss": 3.1431, "step": 2816 }, { "epoch": 0.45, "learning_rate": 0.0005979453295195179, "loss": 3.256, "step": 2817 }, { "epoch": 0.45, "learning_rate": 0.0005976892291699848, "loss": 3.2701, "step": 2818 }, { "epoch": 0.45, "learning_rate": 0.0005974331021718056, "loss": 3.3866, "step": 2819 }, { "epoch": 0.45, "learning_rate": 0.0005971769485948488, "loss": 3.1709, "step": 2820 }, { "epoch": 0.45, "learning_rate": 0.0005969207685089901, "loss": 3.2827, "step": 2821 }, { "epoch": 0.46, "learning_rate": 0.0005966645619841139, "loss": 3.3235, "step": 2822 }, { "epoch": 0.46, "learning_rate": 0.0005964083290901101, "loss": 3.3103, "step": 2823 }, { "epoch": 0.46, "learning_rate": 0.0005961520698968766, "loss": 3.4071, "step": 2824 }, { "epoch": 0.46, "learning_rate": 0.0005958957844743182, "loss": 3.3132, "step": 2825 }, { "epoch": 0.46, "learning_rate": 0.0005956394728923474, "loss": 3.2692, "step": 2826 }, { "epoch": 0.46, "learning_rate": 0.0005953831352208831, "loss": 3.2395, "step": 2827 }, { "epoch": 0.46, "learning_rate": 0.0005951267715298516, "loss": 3.3205, "step": 2828 }, { "epoch": 0.46, "learning_rate": 0.0005948703818891869, "loss": 3.2166, "step": 2829 }, { "epoch": 0.46, "learning_rate": 0.0005946139663688288, "loss": 3.2329, "step": 2830 }, { "epoch": 0.46, "learning_rate": 0.0005943575250387253, "loss": 3.3753, "step": 2831 }, { "epoch": 0.46, "learning_rate": 0.0005941010579688312, "loss": 3.2393, "step": 2832 }, { "epoch": 0.46, "learning_rate": 0.0005938445652291078, "loss": 3.2916, "step": 2833 }, { "epoch": 0.46, "learning_rate": 0.0005935880468895239, "loss": 3.2894, "step": 2834 }, { "epoch": 0.46, "learning_rate": 0.000593331503020055, "loss": 3.1294, "step": 2835 }, { "epoch": 0.46, "learning_rate": 0.0005930749336906841, "loss": 3.1199, "step": 2836 }, { "epoch": 0.46, "learning_rate": 0.0005928183389714004, "loss": 3.3385, "step": 2837 }, { "epoch": 0.46, "learning_rate": 0.0005925617189322004, "loss": 3.154, "step": 2838 }, { "epoch": 0.46, "learning_rate": 0.0005923050736430876, "loss": 3.1568, "step": 2839 }, { "epoch": 0.46, "learning_rate": 0.0005920484031740722, "loss": 3.242, "step": 2840 }, { "epoch": 0.46, "learning_rate": 0.0005917917075951714, "loss": 3.1087, "step": 2841 }, { "epoch": 0.46, "learning_rate": 0.0005915349869764092, "loss": 3.2438, "step": 2842 }, { "epoch": 0.46, "learning_rate": 0.0005912782413878164, "loss": 3.3033, "step": 2843 }, { "epoch": 0.46, "learning_rate": 0.0005910214708994306, "loss": 3.411, "step": 2844 }, { "epoch": 0.46, "learning_rate": 0.0005907646755812963, "loss": 3.2507, "step": 2845 }, { "epoch": 0.46, "learning_rate": 0.0005905078555034645, "loss": 3.2788, "step": 2846 }, { "epoch": 0.46, "learning_rate": 0.0005902510107359934, "loss": 3.1932, "step": 2847 }, { "epoch": 0.46, "learning_rate": 0.0005899941413489477, "loss": 3.2109, "step": 2848 }, { "epoch": 0.46, "learning_rate": 0.0005897372474123985, "loss": 3.3343, "step": 2849 }, { "epoch": 0.46, "learning_rate": 0.0005894803289964242, "loss": 3.2959, "step": 2850 }, { "epoch": 0.46, "learning_rate": 0.0005892233861711094, "loss": 3.2676, "step": 2851 }, { "epoch": 0.46, "learning_rate": 0.0005889664190065456, "loss": 3.3498, "step": 2852 }, { "epoch": 0.46, "learning_rate": 0.0005887094275728309, "loss": 3.2089, "step": 2853 }, { "epoch": 0.46, "learning_rate": 0.00058845241194007, "loss": 3.2367, "step": 2854 }, { "epoch": 0.46, "learning_rate": 0.0005881953721783743, "loss": 3.09, "step": 2855 }, { "epoch": 0.46, "learning_rate": 0.0005879383083578614, "loss": 3.234, "step": 2856 }, { "epoch": 0.46, "learning_rate": 0.000587681220548656, "loss": 3.3249, "step": 2857 }, { "epoch": 0.46, "learning_rate": 0.000587424108820889, "loss": 3.1046, "step": 2858 }, { "epoch": 0.46, "learning_rate": 0.0005871669732446978, "loss": 3.2977, "step": 2859 }, { "epoch": 0.46, "learning_rate": 0.0005869098138902265, "loss": 3.1625, "step": 2860 }, { "epoch": 0.46, "learning_rate": 0.0005866526308276258, "loss": 3.2562, "step": 2861 }, { "epoch": 0.46, "learning_rate": 0.0005863954241270524, "loss": 3.3571, "step": 2862 }, { "epoch": 0.46, "learning_rate": 0.0005861381938586699, "loss": 3.3367, "step": 2863 }, { "epoch": 0.46, "learning_rate": 0.000585880940092648, "loss": 3.1936, "step": 2864 }, { "epoch": 0.46, "learning_rate": 0.000585623662899163, "loss": 3.2304, "step": 2865 }, { "epoch": 0.46, "learning_rate": 0.0005853663623483977, "loss": 3.2672, "step": 2866 }, { "epoch": 0.46, "learning_rate": 0.0005851090385105409, "loss": 3.0635, "step": 2867 }, { "epoch": 0.46, "learning_rate": 0.0005848516914557881, "loss": 3.2419, "step": 2868 }, { "epoch": 0.46, "learning_rate": 0.000584594321254341, "loss": 3.284, "step": 2869 }, { "epoch": 0.46, "learning_rate": 0.0005843369279764076, "loss": 3.113, "step": 2870 }, { "epoch": 0.46, "learning_rate": 0.0005840795116922022, "loss": 3.2406, "step": 2871 }, { "epoch": 0.46, "learning_rate": 0.0005838220724719454, "loss": 3.2539, "step": 2872 }, { "epoch": 0.46, "learning_rate": 0.0005835646103858641, "loss": 3.2641, "step": 2873 }, { "epoch": 0.46, "learning_rate": 0.0005833071255041914, "loss": 3.1373, "step": 2874 }, { "epoch": 0.46, "learning_rate": 0.0005830496178971665, "loss": 3.2945, "step": 2875 }, { "epoch": 0.46, "learning_rate": 0.0005827920876350349, "loss": 3.3018, "step": 2876 }, { "epoch": 0.46, "learning_rate": 0.0005825345347880484, "loss": 3.1548, "step": 2877 }, { "epoch": 0.46, "learning_rate": 0.0005822769594264649, "loss": 3.4063, "step": 2878 }, { "epoch": 0.46, "learning_rate": 0.000582019361620548, "loss": 3.2583, "step": 2879 }, { "epoch": 0.46, "learning_rate": 0.0005817617414405684, "loss": 3.3856, "step": 2880 }, { "epoch": 0.46, "learning_rate": 0.0005815040989568018, "loss": 3.1103, "step": 2881 }, { "epoch": 0.46, "learning_rate": 0.0005812464342395307, "loss": 3.1975, "step": 2882 }, { "epoch": 0.46, "learning_rate": 0.0005809887473590435, "loss": 3.3603, "step": 2883 }, { "epoch": 0.47, "learning_rate": 0.0005807310383856345, "loss": 3.2184, "step": 2884 }, { "epoch": 0.47, "learning_rate": 0.0005804733073896044, "loss": 3.2268, "step": 2885 }, { "epoch": 0.47, "learning_rate": 0.0005802155544412593, "loss": 3.191, "step": 2886 }, { "epoch": 0.47, "learning_rate": 0.0005799577796109117, "loss": 3.1723, "step": 2887 }, { "epoch": 0.47, "learning_rate": 0.0005796999829688802, "loss": 3.0655, "step": 2888 }, { "epoch": 0.47, "learning_rate": 0.000579442164585489, "loss": 3.1588, "step": 2889 }, { "epoch": 0.47, "learning_rate": 0.0005791843245310683, "loss": 3.3678, "step": 2890 }, { "epoch": 0.47, "learning_rate": 0.0005789264628759545, "loss": 3.0815, "step": 2891 }, { "epoch": 0.47, "learning_rate": 0.0005786685796904897, "loss": 3.2768, "step": 2892 }, { "epoch": 0.47, "learning_rate": 0.0005784106750450215, "loss": 3.391, "step": 2893 }, { "epoch": 0.47, "learning_rate": 0.0005781527490099042, "loss": 3.2984, "step": 2894 }, { "epoch": 0.47, "learning_rate": 0.0005778948016554971, "loss": 3.1969, "step": 2895 }, { "epoch": 0.47, "learning_rate": 0.0005776368330521656, "loss": 3.2379, "step": 2896 }, { "epoch": 0.47, "learning_rate": 0.0005773788432702814, "loss": 3.3042, "step": 2897 }, { "epoch": 0.47, "learning_rate": 0.0005771208323802213, "loss": 3.2272, "step": 2898 }, { "epoch": 0.47, "learning_rate": 0.0005768628004523678, "loss": 3.2949, "step": 2899 }, { "epoch": 0.47, "learning_rate": 0.0005766047475571098, "loss": 3.3118, "step": 2900 }, { "epoch": 0.47, "learning_rate": 0.0005763466737648415, "loss": 3.2878, "step": 2901 }, { "epoch": 0.47, "learning_rate": 0.0005760885791459627, "loss": 3.2965, "step": 2902 }, { "epoch": 0.47, "learning_rate": 0.0005758304637708791, "loss": 3.1834, "step": 2903 }, { "epoch": 0.47, "learning_rate": 0.0005755723277100021, "loss": 3.2982, "step": 2904 }, { "epoch": 0.47, "learning_rate": 0.0005753141710337486, "loss": 3.2503, "step": 2905 }, { "epoch": 0.47, "learning_rate": 0.0005750559938125411, "loss": 3.2491, "step": 2906 }, { "epoch": 0.47, "learning_rate": 0.0005747977961168078, "loss": 3.2223, "step": 2907 }, { "epoch": 0.47, "learning_rate": 0.0005745395780169826, "loss": 3.1516, "step": 2908 }, { "epoch": 0.47, "learning_rate": 0.0005742813395835045, "loss": 3.339, "step": 2909 }, { "epoch": 0.47, "learning_rate": 0.0005740230808868185, "loss": 3.105, "step": 2910 }, { "epoch": 0.47, "learning_rate": 0.0005737648019973753, "loss": 3.2541, "step": 2911 }, { "epoch": 0.47, "learning_rate": 0.0005735065029856304, "loss": 3.4053, "step": 2912 }, { "epoch": 0.47, "learning_rate": 0.0005732481839220453, "loss": 3.1824, "step": 2913 }, { "epoch": 0.47, "learning_rate": 0.0005729898448770869, "loss": 3.2087, "step": 2914 }, { "epoch": 0.47, "learning_rate": 0.0005727314859212275, "loss": 3.1342, "step": 2915 }, { "epoch": 0.47, "learning_rate": 0.0005724731071249449, "loss": 3.0929, "step": 2916 }, { "epoch": 0.47, "learning_rate": 0.0005722147085587223, "loss": 3.2516, "step": 2917 }, { "epoch": 0.47, "learning_rate": 0.0005719562902930479, "loss": 3.4588, "step": 2918 }, { "epoch": 0.47, "learning_rate": 0.000571697852398416, "loss": 3.0319, "step": 2919 }, { "epoch": 0.47, "learning_rate": 0.0005714393949453258, "loss": 3.1345, "step": 2920 }, { "epoch": 0.47, "learning_rate": 0.0005711809180042819, "loss": 3.2252, "step": 2921 }, { "epoch": 0.47, "learning_rate": 0.0005709224216457941, "loss": 3.3952, "step": 2922 }, { "epoch": 0.47, "learning_rate": 0.0005706639059403778, "loss": 3.0421, "step": 2923 }, { "epoch": 0.47, "learning_rate": 0.0005704053709585533, "loss": 3.3343, "step": 2924 }, { "epoch": 0.47, "learning_rate": 0.0005701468167708467, "loss": 3.2704, "step": 2925 }, { "epoch": 0.47, "learning_rate": 0.0005698882434477887, "loss": 3.1814, "step": 2926 }, { "epoch": 0.47, "learning_rate": 0.0005696296510599156, "loss": 3.289, "step": 2927 }, { "epoch": 0.47, "learning_rate": 0.0005693710396777687, "loss": 3.3108, "step": 2928 }, { "epoch": 0.47, "learning_rate": 0.000569112409371895, "loss": 3.0992, "step": 2929 }, { "epoch": 0.47, "learning_rate": 0.0005688537602128458, "loss": 3.159, "step": 2930 }, { "epoch": 0.47, "learning_rate": 0.0005685950922711782, "loss": 3.1889, "step": 2931 }, { "epoch": 0.47, "learning_rate": 0.0005683364056174545, "loss": 3.2205, "step": 2932 }, { "epoch": 0.47, "learning_rate": 0.0005680777003222413, "loss": 3.3713, "step": 2933 }, { "epoch": 0.47, "learning_rate": 0.0005678189764561113, "loss": 3.2425, "step": 2934 }, { "epoch": 0.47, "learning_rate": 0.0005675602340896415, "loss": 3.3798, "step": 2935 }, { "epoch": 0.47, "learning_rate": 0.0005673014732934143, "loss": 3.4012, "step": 2936 }, { "epoch": 0.47, "learning_rate": 0.0005670426941380172, "loss": 3.2409, "step": 2937 }, { "epoch": 0.47, "learning_rate": 0.0005667838966940423, "loss": 3.2229, "step": 2938 }, { "epoch": 0.47, "learning_rate": 0.0005665250810320871, "loss": 3.2976, "step": 2939 }, { "epoch": 0.47, "learning_rate": 0.000566266247222754, "loss": 3.1528, "step": 2940 }, { "epoch": 0.47, "learning_rate": 0.0005660073953366503, "loss": 3.188, "step": 2941 }, { "epoch": 0.47, "learning_rate": 0.0005657485254443881, "loss": 3.3084, "step": 2942 }, { "epoch": 0.47, "learning_rate": 0.0005654896376165846, "loss": 3.1222, "step": 2943 }, { "epoch": 0.47, "learning_rate": 0.0005652307319238617, "loss": 3.1588, "step": 2944 }, { "epoch": 0.47, "learning_rate": 0.0005649718084368463, "loss": 3.2015, "step": 2945 }, { "epoch": 0.48, "learning_rate": 0.0005647128672261702, "loss": 3.1567, "step": 2946 }, { "epoch": 0.48, "learning_rate": 0.0005644539083624701, "loss": 3.1609, "step": 2947 }, { "epoch": 0.48, "learning_rate": 0.0005641949319163872, "loss": 3.1626, "step": 2948 }, { "epoch": 0.48, "learning_rate": 0.0005639359379585679, "loss": 3.0694, "step": 2949 }, { "epoch": 0.48, "learning_rate": 0.0005636769265596628, "loss": 3.2203, "step": 2950 }, { "epoch": 0.48, "learning_rate": 0.000563417897790328, "loss": 3.3084, "step": 2951 }, { "epoch": 0.48, "learning_rate": 0.0005631588517212238, "loss": 3.2316, "step": 2952 }, { "epoch": 0.48, "learning_rate": 0.0005628997884230154, "loss": 3.1063, "step": 2953 }, { "epoch": 0.48, "learning_rate": 0.0005626407079663726, "loss": 3.1869, "step": 2954 }, { "epoch": 0.48, "learning_rate": 0.0005623816104219701, "loss": 3.227, "step": 2955 }, { "epoch": 0.48, "learning_rate": 0.0005621224958604868, "loss": 3.2836, "step": 2956 }, { "epoch": 0.48, "learning_rate": 0.0005618633643526072, "loss": 3.243, "step": 2957 }, { "epoch": 0.48, "learning_rate": 0.0005616042159690193, "loss": 3.2233, "step": 2958 }, { "epoch": 0.48, "learning_rate": 0.0005613450507804161, "loss": 3.1671, "step": 2959 }, { "epoch": 0.48, "learning_rate": 0.0005610858688574958, "loss": 3.1712, "step": 2960 }, { "epoch": 0.48, "learning_rate": 0.0005608266702709602, "loss": 3.2488, "step": 2961 }, { "epoch": 0.48, "learning_rate": 0.0005605674550915161, "loss": 3.2241, "step": 2962 }, { "epoch": 0.48, "learning_rate": 0.0005603082233898751, "loss": 3.163, "step": 2963 }, { "epoch": 0.48, "learning_rate": 0.0005600489752367528, "loss": 3.2125, "step": 2964 }, { "epoch": 0.48, "learning_rate": 0.0005597897107028694, "loss": 3.1831, "step": 2965 }, { "epoch": 0.48, "learning_rate": 0.00055953042985895, "loss": 3.1, "step": 2966 }, { "epoch": 0.48, "learning_rate": 0.0005592711327757236, "loss": 2.9714, "step": 2967 }, { "epoch": 0.48, "learning_rate": 0.0005590118195239238, "loss": 3.3674, "step": 2968 }, { "epoch": 0.48, "learning_rate": 0.0005587524901742889, "loss": 3.3734, "step": 2969 }, { "epoch": 0.48, "learning_rate": 0.0005584931447975613, "loss": 3.194, "step": 2970 }, { "epoch": 0.48, "learning_rate": 0.0005582337834644875, "loss": 3.2746, "step": 2971 }, { "epoch": 0.48, "learning_rate": 0.0005579744062458191, "loss": 3.1882, "step": 2972 }, { "epoch": 0.48, "learning_rate": 0.0005577150132123115, "loss": 3.1259, "step": 2973 }, { "epoch": 0.48, "learning_rate": 0.0005574556044347245, "loss": 3.2761, "step": 2974 }, { "epoch": 0.48, "learning_rate": 0.0005571961799838223, "loss": 3.2351, "step": 2975 }, { "epoch": 0.48, "learning_rate": 0.000556936739930373, "loss": 3.1233, "step": 2976 }, { "epoch": 0.48, "learning_rate": 0.0005566772843451495, "loss": 3.1026, "step": 2977 }, { "epoch": 0.48, "learning_rate": 0.0005564178132989288, "loss": 3.3462, "step": 2978 }, { "epoch": 0.48, "learning_rate": 0.0005561583268624917, "loss": 3.1032, "step": 2979 }, { "epoch": 0.48, "learning_rate": 0.000555898825106624, "loss": 3.2915, "step": 2980 }, { "epoch": 0.48, "learning_rate": 0.0005556393081021145, "loss": 3.0523, "step": 2981 }, { "epoch": 0.48, "learning_rate": 0.0005553797759197574, "loss": 3.2151, "step": 2982 }, { "epoch": 0.48, "learning_rate": 0.0005551202286303504, "loss": 3.1518, "step": 2983 }, { "epoch": 0.48, "learning_rate": 0.0005548606663046951, "loss": 3.2045, "step": 2984 }, { "epoch": 0.48, "learning_rate": 0.0005546010890135979, "loss": 3.1733, "step": 2985 }, { "epoch": 0.48, "learning_rate": 0.0005543414968278687, "loss": 3.2085, "step": 2986 }, { "epoch": 0.48, "learning_rate": 0.0005540818898183216, "loss": 3.2113, "step": 2987 }, { "epoch": 0.48, "learning_rate": 0.0005538222680557749, "loss": 3.0879, "step": 2988 }, { "epoch": 0.48, "learning_rate": 0.000553562631611051, "loss": 3.3749, "step": 2989 }, { "epoch": 0.48, "learning_rate": 0.0005533029805549756, "loss": 3.232, "step": 2990 }, { "epoch": 0.48, "learning_rate": 0.0005530433149583795, "loss": 3.3065, "step": 2991 }, { "epoch": 0.48, "learning_rate": 0.0005527836348920965, "loss": 3.2166, "step": 2992 }, { "epoch": 0.48, "learning_rate": 0.000552523940426965, "loss": 3.0705, "step": 2993 }, { "epoch": 0.48, "learning_rate": 0.0005522642316338268, "loss": 3.3184, "step": 2994 }, { "epoch": 0.48, "learning_rate": 0.000552004508583528, "loss": 3.216, "step": 2995 }, { "epoch": 0.48, "learning_rate": 0.0005517447713469185, "loss": 3.2676, "step": 2996 }, { "epoch": 0.48, "learning_rate": 0.0005514850199948519, "loss": 3.3614, "step": 2997 }, { "epoch": 0.48, "learning_rate": 0.000551225254598186, "loss": 3.2384, "step": 2998 }, { "epoch": 0.48, "learning_rate": 0.0005509654752277819, "loss": 3.2557, "step": 2999 }, { "epoch": 0.48, "learning_rate": 0.0005507056819545049, "loss": 3.3166, "step": 3000 }, { "epoch": 0.48, "learning_rate": 0.0005504458748492243, "loss": 3.1723, "step": 3001 }, { "epoch": 0.48, "learning_rate": 0.0005501860539828125, "loss": 3.1236, "step": 3002 }, { "epoch": 0.48, "learning_rate": 0.0005499262194261462, "loss": 3.2639, "step": 3003 }, { "epoch": 0.48, "learning_rate": 0.0005496663712501057, "loss": 3.3128, "step": 3004 }, { "epoch": 0.48, "learning_rate": 0.0005494065095255751, "loss": 3.2574, "step": 3005 }, { "epoch": 0.48, "learning_rate": 0.0005491466343234418, "loss": 3.1444, "step": 3006 }, { "epoch": 0.48, "learning_rate": 0.0005488867457145974, "loss": 3.2533, "step": 3007 }, { "epoch": 0.49, "learning_rate": 0.0005486268437699368, "loss": 3.2704, "step": 3008 }, { "epoch": 0.49, "learning_rate": 0.0005483669285603585, "loss": 3.2861, "step": 3009 }, { "epoch": 0.49, "learning_rate": 0.0005481070001567653, "loss": 3.3433, "step": 3010 }, { "epoch": 0.49, "learning_rate": 0.0005478470586300625, "loss": 3.2027, "step": 3011 }, { "epoch": 0.49, "learning_rate": 0.0005475871040511597, "loss": 3.1678, "step": 3012 }, { "epoch": 0.49, "learning_rate": 0.0005473271364909702, "loss": 3.3192, "step": 3013 }, { "epoch": 0.49, "learning_rate": 0.0005470671560204104, "loss": 3.199, "step": 3014 }, { "epoch": 0.49, "learning_rate": 0.0005468071627104001, "loss": 3.3145, "step": 3015 }, { "epoch": 0.49, "learning_rate": 0.0005465471566318633, "loss": 3.195, "step": 3016 }, { "epoch": 0.49, "learning_rate": 0.0005462871378557267, "loss": 3.218, "step": 3017 }, { "epoch": 0.49, "learning_rate": 0.0005460271064529211, "loss": 3.1642, "step": 3018 }, { "epoch": 0.49, "learning_rate": 0.0005457670624943805, "loss": 3.1151, "step": 3019 }, { "epoch": 0.49, "learning_rate": 0.0005455070060510419, "loss": 3.3324, "step": 3020 }, { "epoch": 0.49, "learning_rate": 0.0005452469371938464, "loss": 3.1659, "step": 3021 }, { "epoch": 0.49, "learning_rate": 0.0005449868559937385, "loss": 3.1428, "step": 3022 }, { "epoch": 0.49, "learning_rate": 0.000544726762521665, "loss": 3.3396, "step": 3023 }, { "epoch": 0.49, "learning_rate": 0.0005444666568485774, "loss": 3.1478, "step": 3024 }, { "epoch": 0.49, "learning_rate": 0.0005442065390454296, "loss": 3.1821, "step": 3025 }, { "epoch": 0.49, "learning_rate": 0.0005439464091831795, "loss": 3.0375, "step": 3026 }, { "epoch": 0.49, "learning_rate": 0.0005436862673327877, "loss": 3.3832, "step": 3027 }, { "epoch": 0.49, "learning_rate": 0.000543426113565218, "loss": 3.2798, "step": 3028 }, { "epoch": 0.49, "learning_rate": 0.0005431659479514383, "loss": 3.219, "step": 3029 }, { "epoch": 0.49, "learning_rate": 0.0005429057705624191, "loss": 3.2747, "step": 3030 }, { "epoch": 0.49, "learning_rate": 0.0005426455814691337, "loss": 3.165, "step": 3031 }, { "epoch": 0.49, "learning_rate": 0.0005423853807425596, "loss": 3.0539, "step": 3032 }, { "epoch": 0.49, "learning_rate": 0.0005421251684536769, "loss": 3.2583, "step": 3033 }, { "epoch": 0.49, "learning_rate": 0.0005418649446734684, "loss": 3.1761, "step": 3034 }, { "epoch": 0.49, "learning_rate": 0.0005416047094729214, "loss": 3.2972, "step": 3035 }, { "epoch": 0.49, "learning_rate": 0.0005413444629230248, "loss": 3.1143, "step": 3036 }, { "epoch": 0.49, "learning_rate": 0.0005410842050947714, "loss": 3.1442, "step": 3037 }, { "epoch": 0.49, "learning_rate": 0.0005408239360591572, "loss": 3.3191, "step": 3038 }, { "epoch": 0.49, "learning_rate": 0.0005405636558871808, "loss": 3.3667, "step": 3039 }, { "epoch": 0.49, "learning_rate": 0.0005403033646498439, "loss": 3.2234, "step": 3040 }, { "epoch": 0.49, "learning_rate": 0.0005400430624181516, "loss": 3.1733, "step": 3041 }, { "epoch": 0.49, "learning_rate": 0.0005397827492631116, "loss": 3.104, "step": 3042 }, { "epoch": 0.49, "learning_rate": 0.0005395224252557347, "loss": 3.1591, "step": 3043 }, { "epoch": 0.49, "learning_rate": 0.0005392620904670349, "loss": 3.0896, "step": 3044 }, { "epoch": 0.49, "learning_rate": 0.0005390017449680287, "loss": 3.3104, "step": 3045 }, { "epoch": 0.49, "learning_rate": 0.0005387413888297359, "loss": 3.2191, "step": 3046 }, { "epoch": 0.49, "learning_rate": 0.0005384810221231789, "loss": 3.2861, "step": 3047 }, { "epoch": 0.49, "learning_rate": 0.0005382206449193833, "loss": 3.0633, "step": 3048 }, { "epoch": 0.49, "learning_rate": 0.0005379602572893774, "loss": 3.2495, "step": 3049 }, { "epoch": 0.49, "learning_rate": 0.0005376998593041921, "loss": 3.2491, "step": 3050 }, { "epoch": 0.49, "learning_rate": 0.0005374394510348617, "loss": 3.1534, "step": 3051 }, { "epoch": 0.49, "learning_rate": 0.0005371790325524228, "loss": 3.2734, "step": 3052 }, { "epoch": 0.49, "learning_rate": 0.000536918603927915, "loss": 3.2062, "step": 3053 }, { "epoch": 0.49, "learning_rate": 0.0005366581652323807, "loss": 3.2644, "step": 3054 }, { "epoch": 0.49, "learning_rate": 0.0005363977165368649, "loss": 3.188, "step": 3055 }, { "epoch": 0.49, "learning_rate": 0.0005361372579124152, "loss": 3.1875, "step": 3056 }, { "epoch": 0.49, "learning_rate": 0.0005358767894300824, "loss": 3.303, "step": 3057 }, { "epoch": 0.49, "learning_rate": 0.0005356163111609198, "loss": 3.1102, "step": 3058 }, { "epoch": 0.49, "learning_rate": 0.0005353558231759828, "loss": 3.2855, "step": 3059 }, { "epoch": 0.49, "learning_rate": 0.0005350953255463304, "loss": 3.2437, "step": 3060 }, { "epoch": 0.49, "learning_rate": 0.0005348348183430236, "loss": 3.3033, "step": 3061 }, { "epoch": 0.49, "learning_rate": 0.0005345743016371261, "loss": 3.2723, "step": 3062 }, { "epoch": 0.49, "learning_rate": 0.0005343137754997042, "loss": 3.1542, "step": 3063 }, { "epoch": 0.49, "learning_rate": 0.0005340532400018269, "loss": 3.2267, "step": 3064 }, { "epoch": 0.49, "learning_rate": 0.0005337926952145658, "loss": 3.3288, "step": 3065 }, { "epoch": 0.49, "learning_rate": 0.0005335321412089947, "loss": 3.2769, "step": 3066 }, { "epoch": 0.49, "learning_rate": 0.0005332715780561904, "loss": 3.1983, "step": 3067 }, { "epoch": 0.49, "learning_rate": 0.0005330110058272316, "loss": 3.356, "step": 3068 }, { "epoch": 0.49, "learning_rate": 0.0005327504245932, "loss": 3.3256, "step": 3069 }, { "epoch": 0.5, "learning_rate": 0.0005324898344251796, "loss": 3.2436, "step": 3070 }, { "epoch": 0.5, "learning_rate": 0.0005322292353942568, "loss": 3.1418, "step": 3071 }, { "epoch": 0.5, "learning_rate": 0.0005319686275715202, "loss": 3.3643, "step": 3072 }, { "epoch": 0.5, "learning_rate": 0.0005317080110280613, "loss": 3.0293, "step": 3073 }, { "epoch": 0.5, "learning_rate": 0.0005314473858349733, "loss": 3.2588, "step": 3074 }, { "epoch": 0.5, "learning_rate": 0.0005311867520633526, "loss": 3.2127, "step": 3075 }, { "epoch": 0.5, "learning_rate": 0.0005309261097842974, "loss": 3.1018, "step": 3076 }, { "epoch": 0.5, "learning_rate": 0.0005306654590689079, "loss": 3.3698, "step": 3077 }, { "epoch": 0.5, "learning_rate": 0.0005304047999882874, "loss": 3.2209, "step": 3078 }, { "epoch": 0.5, "learning_rate": 0.0005301441326135412, "loss": 3.3358, "step": 3079 }, { "epoch": 0.5, "learning_rate": 0.0005298834570157763, "loss": 3.1811, "step": 3080 }, { "epoch": 0.5, "learning_rate": 0.0005296227732661028, "loss": 3.3593, "step": 3081 }, { "epoch": 0.5, "learning_rate": 0.0005293620814356326, "loss": 3.1968, "step": 3082 }, { "epoch": 0.5, "learning_rate": 0.0005291013815954795, "loss": 3.2258, "step": 3083 }, { "epoch": 0.5, "learning_rate": 0.0005288406738167601, "loss": 3.3108, "step": 3084 }, { "epoch": 0.5, "learning_rate": 0.000528579958170593, "loss": 3.1465, "step": 3085 }, { "epoch": 0.5, "learning_rate": 0.0005283192347280984, "loss": 3.1123, "step": 3086 }, { "epoch": 0.5, "learning_rate": 0.0005280585035603993, "loss": 3.3332, "step": 3087 }, { "epoch": 0.5, "learning_rate": 0.0005277977647386207, "loss": 3.1474, "step": 3088 }, { "epoch": 0.5, "learning_rate": 0.0005275370183338893, "loss": 3.1415, "step": 3089 }, { "epoch": 0.5, "learning_rate": 0.0005272762644173342, "loss": 3.1128, "step": 3090 }, { "epoch": 0.5, "learning_rate": 0.0005270155030600866, "loss": 3.105, "step": 3091 }, { "epoch": 0.5, "learning_rate": 0.0005267547343332793, "loss": 3.1629, "step": 3092 }, { "epoch": 0.5, "learning_rate": 0.0005264939583080477, "loss": 3.2143, "step": 3093 }, { "epoch": 0.5, "learning_rate": 0.0005262331750555287, "loss": 3.2167, "step": 3094 }, { "epoch": 0.5, "learning_rate": 0.0005259723846468615, "loss": 3.3578, "step": 3095 }, { "epoch": 0.5, "learning_rate": 0.0005257115871531873, "loss": 3.2177, "step": 3096 }, { "epoch": 0.5, "learning_rate": 0.0005254507826456483, "loss": 3.1446, "step": 3097 }, { "epoch": 0.5, "learning_rate": 0.0005251899711953903, "loss": 3.2223, "step": 3098 }, { "epoch": 0.5, "learning_rate": 0.0005249291528735594, "loss": 3.1324, "step": 3099 }, { "epoch": 0.5, "learning_rate": 0.0005246683277513044, "loss": 3.2095, "step": 3100 }, { "epoch": 0.5, "learning_rate": 0.0005244074958997762, "loss": 3.2255, "step": 3101 }, { "epoch": 0.5, "learning_rate": 0.0005241466573901267, "loss": 3.2232, "step": 3102 }, { "epoch": 0.5, "learning_rate": 0.0005238858122935099, "loss": 3.2391, "step": 3103 }, { "epoch": 0.5, "learning_rate": 0.0005236249606810821, "loss": 3.1954, "step": 3104 }, { "epoch": 0.5, "learning_rate": 0.000523364102624001, "loss": 3.3927, "step": 3105 }, { "epoch": 0.5, "learning_rate": 0.0005231032381934256, "loss": 3.2091, "step": 3106 }, { "epoch": 0.5, "learning_rate": 0.0005228423674605179, "loss": 3.1559, "step": 3107 }, { "epoch": 0.5, "learning_rate": 0.0005225814904964402, "loss": 3.2105, "step": 3108 }, { "epoch": 0.5, "learning_rate": 0.0005223206073723573, "loss": 3.3645, "step": 3109 }, { "epoch": 0.5, "learning_rate": 0.0005220597181594356, "loss": 3.1628, "step": 3110 }, { "epoch": 0.5, "learning_rate": 0.0005217988229288431, "loss": 3.1492, "step": 3111 }, { "epoch": 0.5, "learning_rate": 0.0005215379217517492, "loss": 3.1273, "step": 3112 }, { "epoch": 0.5, "learning_rate": 0.0005212770146993254, "loss": 3.1202, "step": 3113 }, { "epoch": 0.5, "learning_rate": 0.0005210161018427443, "loss": 3.1059, "step": 3114 }, { "epoch": 0.5, "learning_rate": 0.0005207551832531803, "loss": 3.2784, "step": 3115 }, { "epoch": 0.5, "learning_rate": 0.0005204942590018098, "loss": 3.3378, "step": 3116 }, { "epoch": 0.5, "learning_rate": 0.0005202333291598099, "loss": 3.1373, "step": 3117 }, { "epoch": 0.5, "learning_rate": 0.0005199723937983599, "loss": 3.1663, "step": 3118 }, { "epoch": 0.5, "learning_rate": 0.0005197114529886402, "loss": 3.3453, "step": 3119 }, { "epoch": 0.5, "learning_rate": 0.0005194505068018328, "loss": 3.0295, "step": 3120 }, { "epoch": 0.5, "learning_rate": 0.0005191895553091216, "loss": 3.4432, "step": 3121 }, { "epoch": 0.5, "learning_rate": 0.000518928598581691, "loss": 3.2766, "step": 3122 }, { "epoch": 0.5, "learning_rate": 0.0005186676366907278, "loss": 3.105, "step": 3123 }, { "epoch": 0.5, "learning_rate": 0.0005184066697074197, "loss": 3.1081, "step": 3124 }, { "epoch": 0.5, "learning_rate": 0.000518145697702956, "loss": 3.3497, "step": 3125 }, { "epoch": 0.5, "learning_rate": 0.0005178847207485268, "loss": 3.2287, "step": 3126 }, { "epoch": 0.5, "learning_rate": 0.0005176237389153246, "loss": 3.159, "step": 3127 }, { "epoch": 0.5, "learning_rate": 0.0005173627522745422, "loss": 3.2109, "step": 3128 }, { "epoch": 0.5, "learning_rate": 0.0005171017608973744, "loss": 3.1443, "step": 3129 }, { "epoch": 0.5, "learning_rate": 0.000516840764855017, "loss": 3.2476, "step": 3130 }, { "epoch": 0.5, "learning_rate": 0.0005165797642186671, "loss": 3.0899, "step": 3131 }, { "epoch": 0.5, "learning_rate": 0.0005163187590595229, "loss": 3.2179, "step": 3132 }, { "epoch": 0.51, "learning_rate": 0.0005160577494487843, "loss": 3.2804, "step": 3133 }, { "epoch": 0.51, "learning_rate": 0.0005157967354576518, "loss": 3.3433, "step": 3134 }, { "epoch": 0.51, "learning_rate": 0.0005155357171573276, "loss": 3.3916, "step": 3135 }, { "epoch": 0.51, "learning_rate": 0.000515274694619015, "loss": 3.2238, "step": 3136 }, { "epoch": 0.51, "learning_rate": 0.000515013667913918, "loss": 3.1708, "step": 3137 }, { "epoch": 0.51, "learning_rate": 0.0005147526371132424, "loss": 3.1294, "step": 3138 }, { "epoch": 0.51, "learning_rate": 0.0005144916022881949, "loss": 3.2582, "step": 3139 }, { "epoch": 0.51, "learning_rate": 0.0005142305635099826, "loss": 3.2925, "step": 3140 }, { "epoch": 0.51, "learning_rate": 0.0005139695208498147, "loss": 3.245, "step": 3141 }, { "epoch": 0.51, "learning_rate": 0.0005137084743789013, "loss": 3.1815, "step": 3142 }, { "epoch": 0.51, "learning_rate": 0.0005134474241684526, "loss": 3.3845, "step": 3143 }, { "epoch": 0.51, "learning_rate": 0.0005131863702896809, "loss": 3.1579, "step": 3144 }, { "epoch": 0.51, "learning_rate": 0.0005129253128137992, "loss": 3.1075, "step": 3145 }, { "epoch": 0.51, "learning_rate": 0.000512664251812021, "loss": 3.1062, "step": 3146 }, { "epoch": 0.51, "learning_rate": 0.0005124031873555613, "loss": 3.1656, "step": 3147 }, { "epoch": 0.51, "learning_rate": 0.0005121421195156361, "loss": 3.0344, "step": 3148 }, { "epoch": 0.51, "learning_rate": 0.0005118810483634619, "loss": 3.2101, "step": 3149 }, { "epoch": 0.51, "learning_rate": 0.0005116199739702563, "loss": 3.24, "step": 3150 }, { "epoch": 0.51, "learning_rate": 0.0005113588964072378, "loss": 3.1664, "step": 3151 }, { "epoch": 0.51, "learning_rate": 0.0005110978157456257, "loss": 3.3337, "step": 3152 }, { "epoch": 0.51, "learning_rate": 0.0005108367320566403, "loss": 3.2461, "step": 3153 }, { "epoch": 0.51, "learning_rate": 0.0005105756454115027, "loss": 3.0171, "step": 3154 }, { "epoch": 0.51, "learning_rate": 0.0005103145558814344, "loss": 3.1236, "step": 3155 }, { "epoch": 0.51, "learning_rate": 0.0005100534635376583, "loss": 3.1632, "step": 3156 }, { "epoch": 0.51, "learning_rate": 0.0005097923684513979, "loss": 3.1155, "step": 3157 }, { "epoch": 0.51, "learning_rate": 0.0005095312706938768, "loss": 3.1794, "step": 3158 }, { "epoch": 0.51, "learning_rate": 0.0005092701703363205, "loss": 3.209, "step": 3159 }, { "epoch": 0.51, "learning_rate": 0.0005090090674499542, "loss": 3.1605, "step": 3160 }, { "epoch": 0.51, "learning_rate": 0.0005087479621060042, "loss": 3.1132, "step": 3161 }, { "epoch": 0.51, "learning_rate": 0.0005084868543756974, "loss": 3.2622, "step": 3162 }, { "epoch": 0.51, "learning_rate": 0.0005082257443302615, "loss": 3.0867, "step": 3163 }, { "epoch": 0.51, "learning_rate": 0.0005079646320409249, "loss": 3.05, "step": 3164 }, { "epoch": 0.51, "learning_rate": 0.000507703517578916, "loss": 3.3022, "step": 3165 }, { "epoch": 0.51, "learning_rate": 0.0005074424010154643, "loss": 3.3712, "step": 3166 }, { "epoch": 0.51, "learning_rate": 0.0005071812824218001, "loss": 3.2997, "step": 3167 }, { "epoch": 0.51, "learning_rate": 0.0005069201618691537, "loss": 3.2909, "step": 3168 }, { "epoch": 0.51, "learning_rate": 0.0005066590394287562, "loss": 3.3085, "step": 3169 }, { "epoch": 0.51, "learning_rate": 0.0005063979151718393, "loss": 3.1279, "step": 3170 }, { "epoch": 0.51, "learning_rate": 0.000506136789169635, "loss": 3.277, "step": 3171 }, { "epoch": 0.51, "learning_rate": 0.0005058756614933757, "loss": 3.3897, "step": 3172 }, { "epoch": 0.51, "learning_rate": 0.0005056145322142948, "loss": 3.1298, "step": 3173 }, { "epoch": 0.51, "learning_rate": 0.0005053534014036256, "loss": 3.197, "step": 3174 }, { "epoch": 0.51, "learning_rate": 0.0005050922691326017, "loss": 3.2081, "step": 3175 }, { "epoch": 0.51, "learning_rate": 0.0005048311354724578, "loss": 3.2495, "step": 3176 }, { "epoch": 0.51, "learning_rate": 0.0005045700004944283, "loss": 3.273, "step": 3177 }, { "epoch": 0.51, "learning_rate": 0.0005043088642697482, "loss": 3.2238, "step": 3178 }, { "epoch": 0.51, "learning_rate": 0.0005040477268696532, "loss": 3.1562, "step": 3179 }, { "epoch": 0.51, "learning_rate": 0.0005037865883653785, "loss": 3.342, "step": 3180 }, { "epoch": 0.51, "learning_rate": 0.0005035254488281604, "loss": 3.3034, "step": 3181 }, { "epoch": 0.51, "learning_rate": 0.0005032643083292351, "loss": 3.2256, "step": 3182 }, { "epoch": 0.51, "learning_rate": 0.0005030031669398389, "loss": 3.2956, "step": 3183 }, { "epoch": 0.51, "learning_rate": 0.0005027420247312089, "loss": 3.1793, "step": 3184 }, { "epoch": 0.51, "learning_rate": 0.000502480881774582, "loss": 3.1414, "step": 3185 }, { "epoch": 0.51, "learning_rate": 0.0005022197381411951, "loss": 3.1477, "step": 3186 }, { "epoch": 0.51, "learning_rate": 0.0005019585939022859, "loss": 3.2471, "step": 3187 }, { "epoch": 0.51, "learning_rate": 0.0005016974491290918, "loss": 3.1409, "step": 3188 }, { "epoch": 0.51, "learning_rate": 0.0005014363038928506, "loss": 3.3576, "step": 3189 }, { "epoch": 0.51, "learning_rate": 0.0005011751582648, "loss": 3.0862, "step": 3190 }, { "epoch": 0.51, "learning_rate": 0.000500914012316178, "loss": 3.2099, "step": 3191 }, { "epoch": 0.51, "learning_rate": 0.0005006528661182225, "loss": 3.2117, "step": 3192 }, { "epoch": 0.51, "learning_rate": 0.0005003917197421717, "loss": 3.1058, "step": 3193 }, { "epoch": 0.51, "learning_rate": 0.0005001305732592636, "loss": 3.3008, "step": 3194 }, { "epoch": 0.52, "learning_rate": 0.0004998694267407364, "loss": 3.1614, "step": 3195 }, { "epoch": 0.52, "learning_rate": 0.0004996082802578285, "loss": 3.2821, "step": 3196 }, { "epoch": 0.52, "learning_rate": 0.0004993471338817775, "loss": 3.2097, "step": 3197 }, { "epoch": 0.52, "learning_rate": 0.000499085987683822, "loss": 3.3142, "step": 3198 }, { "epoch": 0.52, "learning_rate": 0.0004988248417352, "loss": 3.237, "step": 3199 }, { "epoch": 0.52, "learning_rate": 0.0004985636961071495, "loss": 2.9857, "step": 3200 }, { "epoch": 0.52, "learning_rate": 0.0004983025508709082, "loss": 3.1696, "step": 3201 }, { "epoch": 0.52, "learning_rate": 0.0004980414060977142, "loss": 3.2288, "step": 3202 }, { "epoch": 0.52, "learning_rate": 0.0004977802618588049, "loss": 3.1491, "step": 3203 }, { "epoch": 0.52, "learning_rate": 0.000497519118225418, "loss": 3.1794, "step": 3204 }, { "epoch": 0.52, "learning_rate": 0.000497257975268791, "loss": 3.365, "step": 3205 }, { "epoch": 0.52, "learning_rate": 0.0004969968330601611, "loss": 3.1326, "step": 3206 }, { "epoch": 0.52, "learning_rate": 0.000496735691670765, "loss": 3.2382, "step": 3207 }, { "epoch": 0.52, "learning_rate": 0.0004964745511718397, "loss": 3.1295, "step": 3208 }, { "epoch": 0.52, "learning_rate": 0.0004962134116346215, "loss": 3.1542, "step": 3209 }, { "epoch": 0.52, "learning_rate": 0.000495952273130347, "loss": 3.1851, "step": 3210 }, { "epoch": 0.52, "learning_rate": 0.0004956911357302517, "loss": 3.2107, "step": 3211 }, { "epoch": 0.52, "learning_rate": 0.0004954299995055719, "loss": 3.1284, "step": 3212 }, { "epoch": 0.52, "learning_rate": 0.0004951688645275423, "loss": 3.0539, "step": 3213 }, { "epoch": 0.52, "learning_rate": 0.0004949077308673984, "loss": 3.0944, "step": 3214 }, { "epoch": 0.52, "learning_rate": 0.0004946465985963746, "loss": 3.2093, "step": 3215 }, { "epoch": 0.52, "learning_rate": 0.0004943854677857054, "loss": 3.1784, "step": 3216 }, { "epoch": 0.52, "learning_rate": 0.0004941243385066243, "loss": 3.186, "step": 3217 }, { "epoch": 0.52, "learning_rate": 0.0004938632108303651, "loss": 3.1289, "step": 3218 }, { "epoch": 0.52, "learning_rate": 0.0004936020848281609, "loss": 3.252, "step": 3219 }, { "epoch": 0.52, "learning_rate": 0.000493340960571244, "loss": 3.3504, "step": 3220 }, { "epoch": 0.52, "learning_rate": 0.0004930798381308464, "loss": 3.3246, "step": 3221 }, { "epoch": 0.52, "learning_rate": 0.0004928187175782001, "loss": 3.3063, "step": 3222 }, { "epoch": 0.52, "learning_rate": 0.0004925575989845356, "loss": 3.2332, "step": 3223 }, { "epoch": 0.52, "learning_rate": 0.000492296482421084, "loss": 3.1433, "step": 3224 }, { "epoch": 0.52, "learning_rate": 0.0004920353679590753, "loss": 3.2586, "step": 3225 }, { "epoch": 0.52, "learning_rate": 0.0004917742556697385, "loss": 3.163, "step": 3226 }, { "epoch": 0.52, "learning_rate": 0.0004915131456243026, "loss": 3.1047, "step": 3227 }, { "epoch": 0.52, "learning_rate": 0.000491252037893996, "loss": 3.2494, "step": 3228 }, { "epoch": 0.52, "learning_rate": 0.0004909909325500459, "loss": 3.1794, "step": 3229 }, { "epoch": 0.52, "learning_rate": 0.0004907298296636795, "loss": 3.0601, "step": 3230 }, { "epoch": 0.52, "learning_rate": 0.0004904687293061232, "loss": 3.3469, "step": 3231 }, { "epoch": 0.52, "learning_rate": 0.0004902076315486023, "loss": 3.2351, "step": 3232 }, { "epoch": 0.52, "learning_rate": 0.0004899465364623418, "loss": 3.3783, "step": 3233 }, { "epoch": 0.52, "learning_rate": 0.0004896854441185658, "loss": 3.1779, "step": 3234 }, { "epoch": 0.52, "learning_rate": 0.0004894243545884976, "loss": 3.1827, "step": 3235 }, { "epoch": 0.52, "learning_rate": 0.0004891632679433597, "loss": 3.1433, "step": 3236 }, { "epoch": 0.52, "learning_rate": 0.0004889021842543744, "loss": 3.2206, "step": 3237 }, { "epoch": 0.52, "learning_rate": 0.0004886411035927622, "loss": 3.2492, "step": 3238 }, { "epoch": 0.52, "learning_rate": 0.0004883800260297439, "loss": 3.3205, "step": 3239 }, { "epoch": 0.52, "learning_rate": 0.0004881189516365383, "loss": 3.1091, "step": 3240 }, { "epoch": 0.52, "learning_rate": 0.00048785788048436404, "loss": 3.2434, "step": 3241 }, { "epoch": 0.52, "learning_rate": 0.00048759681264443864, "loss": 3.2045, "step": 3242 }, { "epoch": 0.52, "learning_rate": 0.0004873357481879791, "loss": 3.1103, "step": 3243 }, { "epoch": 0.52, "learning_rate": 0.00048707468718620095, "loss": 3.2026, "step": 3244 }, { "epoch": 0.52, "learning_rate": 0.0004868136297103192, "loss": 3.1947, "step": 3245 }, { "epoch": 0.52, "learning_rate": 0.00048655257583154755, "loss": 3.2089, "step": 3246 }, { "epoch": 0.52, "learning_rate": 0.000486291525621099, "loss": 3.1493, "step": 3247 }, { "epoch": 0.52, "learning_rate": 0.0004860304791501852, "loss": 3.1183, "step": 3248 }, { "epoch": 0.52, "learning_rate": 0.00048576943649001746, "loss": 3.1506, "step": 3249 }, { "epoch": 0.52, "learning_rate": 0.00048550839771180533, "loss": 3.209, "step": 3250 }, { "epoch": 0.52, "learning_rate": 0.00048524736288675766, "loss": 3.0394, "step": 3251 }, { "epoch": 0.52, "learning_rate": 0.000484986332086082, "loss": 3.1961, "step": 3252 }, { "epoch": 0.52, "learning_rate": 0.0004847253053809852, "loss": 3.0844, "step": 3253 }, { "epoch": 0.52, "learning_rate": 0.00048446428284267234, "loss": 3.2307, "step": 3254 }, { "epoch": 0.52, "learning_rate": 0.0004842032645423483, "loss": 3.0909, "step": 3255 }, { "epoch": 0.52, "learning_rate": 0.0004839422505512158, "loss": 3.2183, "step": 3256 }, { "epoch": 0.53, "learning_rate": 0.0004836812409404772, "loss": 3.2456, "step": 3257 }, { "epoch": 0.53, "learning_rate": 0.0004834202357813331, "loss": 3.1626, "step": 3258 }, { "epoch": 0.53, "learning_rate": 0.00048315923514498317, "loss": 2.9725, "step": 3259 }, { "epoch": 0.53, "learning_rate": 0.0004828982391026258, "loss": 3.2855, "step": 3260 }, { "epoch": 0.53, "learning_rate": 0.0004826372477254579, "loss": 3.2049, "step": 3261 }, { "epoch": 0.53, "learning_rate": 0.0004823762610846755, "loss": 3.1008, "step": 3262 }, { "epoch": 0.53, "learning_rate": 0.00048211527925147326, "loss": 3.1315, "step": 3263 }, { "epoch": 0.53, "learning_rate": 0.0004818543022970442, "loss": 3.228, "step": 3264 }, { "epoch": 0.53, "learning_rate": 0.0004815933302925804, "loss": 3.3829, "step": 3265 }, { "epoch": 0.53, "learning_rate": 0.00048133236330927227, "loss": 3.1658, "step": 3266 }, { "epoch": 0.53, "learning_rate": 0.00048107140141830893, "loss": 3.0786, "step": 3267 }, { "epoch": 0.53, "learning_rate": 0.00048081044469087855, "loss": 3.2617, "step": 3268 }, { "epoch": 0.53, "learning_rate": 0.0004805494931981672, "loss": 3.2626, "step": 3269 }, { "epoch": 0.53, "learning_rate": 0.00048028854701135987, "loss": 3.2426, "step": 3270 }, { "epoch": 0.53, "learning_rate": 0.00048002760620164027, "loss": 3.3305, "step": 3271 }, { "epoch": 0.53, "learning_rate": 0.00047976667084019016, "loss": 3.1159, "step": 3272 }, { "epoch": 0.53, "learning_rate": 0.00047950574099819013, "loss": 3.3174, "step": 3273 }, { "epoch": 0.53, "learning_rate": 0.00047924481674681957, "loss": 3.2665, "step": 3274 }, { "epoch": 0.53, "learning_rate": 0.0004789838981572558, "loss": 3.153, "step": 3275 }, { "epoch": 0.53, "learning_rate": 0.0004787229853006747, "loss": 3.1022, "step": 3276 }, { "epoch": 0.53, "learning_rate": 0.0004784620782482509, "loss": 3.1786, "step": 3277 }, { "epoch": 0.53, "learning_rate": 0.00047820117707115704, "loss": 2.9901, "step": 3278 }, { "epoch": 0.53, "learning_rate": 0.00047794028184056434, "loss": 3.2805, "step": 3279 }, { "epoch": 0.53, "learning_rate": 0.00047767939262764264, "loss": 3.2803, "step": 3280 }, { "epoch": 0.53, "learning_rate": 0.0004774185095035599, "loss": 3.3162, "step": 3281 }, { "epoch": 0.53, "learning_rate": 0.0004771576325394822, "loss": 3.2557, "step": 3282 }, { "epoch": 0.53, "learning_rate": 0.00047689676180657435, "loss": 3.1711, "step": 3283 }, { "epoch": 0.53, "learning_rate": 0.00047663589737599924, "loss": 3.1379, "step": 3284 }, { "epoch": 0.53, "learning_rate": 0.0004763750393189181, "loss": 3.2339, "step": 3285 }, { "epoch": 0.53, "learning_rate": 0.00047611418770649014, "loss": 3.2799, "step": 3286 }, { "epoch": 0.53, "learning_rate": 0.00047585334260987345, "loss": 3.3689, "step": 3287 }, { "epoch": 0.53, "learning_rate": 0.0004755925041002239, "loss": 3.2719, "step": 3288 }, { "epoch": 0.53, "learning_rate": 0.00047533167224869563, "loss": 3.2448, "step": 3289 }, { "epoch": 0.53, "learning_rate": 0.00047507084712644074, "loss": 3.1812, "step": 3290 }, { "epoch": 0.53, "learning_rate": 0.00047481002880461, "loss": 3.1863, "step": 3291 }, { "epoch": 0.53, "learning_rate": 0.0004745492173543517, "loss": 3.3076, "step": 3292 }, { "epoch": 0.53, "learning_rate": 0.0004742884128468129, "loss": 3.3589, "step": 3293 }, { "epoch": 0.53, "learning_rate": 0.00047402761535313853, "loss": 3.1985, "step": 3294 }, { "epoch": 0.53, "learning_rate": 0.0004737668249444714, "loss": 3.1538, "step": 3295 }, { "epoch": 0.53, "learning_rate": 0.00047350604169195236, "loss": 3.3158, "step": 3296 }, { "epoch": 0.53, "learning_rate": 0.00047324526566672084, "loss": 3.2434, "step": 3297 }, { "epoch": 0.53, "learning_rate": 0.00047298449693991345, "loss": 3.1582, "step": 3298 }, { "epoch": 0.53, "learning_rate": 0.0004727237355826657, "loss": 3.227, "step": 3299 }, { "epoch": 0.53, "learning_rate": 0.00047246298166611075, "loss": 3.1504, "step": 3300 }, { "epoch": 0.53, "learning_rate": 0.00047220223526137935, "loss": 3.112, "step": 3301 }, { "epoch": 0.53, "learning_rate": 0.0004719414964396007, "loss": 3.3051, "step": 3302 }, { "epoch": 0.53, "learning_rate": 0.00047168076527190177, "loss": 3.1122, "step": 3303 }, { "epoch": 0.53, "learning_rate": 0.00047142004182940714, "loss": 3.2138, "step": 3304 }, { "epoch": 0.53, "learning_rate": 0.00047115932618323984, "loss": 3.1657, "step": 3305 }, { "epoch": 0.53, "learning_rate": 0.00047089861840452055, "loss": 3.1503, "step": 3306 }, { "epoch": 0.53, "learning_rate": 0.0004706379185643675, "loss": 3.3249, "step": 3307 }, { "epoch": 0.53, "learning_rate": 0.00047037722673389724, "loss": 3.0006, "step": 3308 }, { "epoch": 0.53, "learning_rate": 0.00047011654298422385, "loss": 3.2079, "step": 3309 }, { "epoch": 0.53, "learning_rate": 0.000469855867386459, "loss": 3.2883, "step": 3310 }, { "epoch": 0.53, "learning_rate": 0.0004695952000117125, "loss": 3.0878, "step": 3311 }, { "epoch": 0.53, "learning_rate": 0.0004693345409310922, "loss": 3.2655, "step": 3312 }, { "epoch": 0.53, "learning_rate": 0.0004690738902157028, "loss": 3.3669, "step": 3313 }, { "epoch": 0.53, "learning_rate": 0.0004688132479366475, "loss": 3.0848, "step": 3314 }, { "epoch": 0.53, "learning_rate": 0.0004685526141650267, "loss": 3.0766, "step": 3315 }, { "epoch": 0.53, "learning_rate": 0.0004682919889719389, "loss": 3.1671, "step": 3316 }, { "epoch": 0.53, "learning_rate": 0.0004680313724284798, "loss": 3.1254, "step": 3317 }, { "epoch": 0.53, "learning_rate": 0.00046777076460574336, "loss": 3.1751, "step": 3318 }, { "epoch": 0.54, "learning_rate": 0.0004675101655748204, "loss": 3.2538, "step": 3319 }, { "epoch": 0.54, "learning_rate": 0.0004672495754068001, "loss": 3.1657, "step": 3320 }, { "epoch": 0.54, "learning_rate": 0.00046698899417276846, "loss": 3.3485, "step": 3321 }, { "epoch": 0.54, "learning_rate": 0.00046672842194380983, "loss": 3.251, "step": 3322 }, { "epoch": 0.54, "learning_rate": 0.0004664678587910053, "loss": 3.2865, "step": 3323 }, { "epoch": 0.54, "learning_rate": 0.0004662073047854343, "loss": 3.2871, "step": 3324 }, { "epoch": 0.54, "learning_rate": 0.00046594675999817313, "loss": 3.3369, "step": 3325 }, { "epoch": 0.54, "learning_rate": 0.00046568622450029594, "loss": 3.1117, "step": 3326 }, { "epoch": 0.54, "learning_rate": 0.0004654256983628741, "loss": 3.2471, "step": 3327 }, { "epoch": 0.54, "learning_rate": 0.0004651651816569766, "loss": 3.2726, "step": 3328 }, { "epoch": 0.54, "learning_rate": 0.0004649046744536696, "loss": 3.2909, "step": 3329 }, { "epoch": 0.54, "learning_rate": 0.00046464417682401724, "loss": 3.2507, "step": 3330 }, { "epoch": 0.54, "learning_rate": 0.00046438368883908036, "loss": 3.2136, "step": 3331 }, { "epoch": 0.54, "learning_rate": 0.00046412321056991766, "loss": 3.2698, "step": 3332 }, { "epoch": 0.54, "learning_rate": 0.0004638627420875849, "loss": 3.2456, "step": 3333 }, { "epoch": 0.54, "learning_rate": 0.00046360228346313537, "loss": 3.1461, "step": 3334 }, { "epoch": 0.54, "learning_rate": 0.0004633418347676195, "loss": 3.2197, "step": 3335 }, { "epoch": 0.54, "learning_rate": 0.0004630813960720849, "loss": 3.2401, "step": 3336 }, { "epoch": 0.54, "learning_rate": 0.00046282096744757725, "loss": 2.9968, "step": 3337 }, { "epoch": 0.54, "learning_rate": 0.0004625605489651384, "loss": 3.1799, "step": 3338 }, { "epoch": 0.54, "learning_rate": 0.00046230014069580794, "loss": 3.1964, "step": 3339 }, { "epoch": 0.54, "learning_rate": 0.0004620397427106229, "loss": 3.3756, "step": 3340 }, { "epoch": 0.54, "learning_rate": 0.00046177935508061687, "loss": 3.2419, "step": 3341 }, { "epoch": 0.54, "learning_rate": 0.0004615189778768211, "loss": 3.3482, "step": 3342 }, { "epoch": 0.54, "learning_rate": 0.0004612586111702642, "loss": 3.2245, "step": 3343 }, { "epoch": 0.54, "learning_rate": 0.00046099825503197146, "loss": 3.2362, "step": 3344 }, { "epoch": 0.54, "learning_rate": 0.00046073790953296524, "loss": 3.0774, "step": 3345 }, { "epoch": 0.54, "learning_rate": 0.00046047757474426546, "loss": 3.0815, "step": 3346 }, { "epoch": 0.54, "learning_rate": 0.00046021725073688857, "loss": 3.2171, "step": 3347 }, { "epoch": 0.54, "learning_rate": 0.00045995693758184845, "loss": 3.2208, "step": 3348 }, { "epoch": 0.54, "learning_rate": 0.00045969663535015616, "loss": 3.1965, "step": 3349 }, { "epoch": 0.54, "learning_rate": 0.00045943634411281926, "loss": 3.1778, "step": 3350 }, { "epoch": 0.54, "learning_rate": 0.0004591760639408429, "loss": 3.3007, "step": 3351 }, { "epoch": 0.54, "learning_rate": 0.00045891579490522867, "loss": 3.0539, "step": 3352 }, { "epoch": 0.54, "learning_rate": 0.0004586555370769753, "loss": 3.2757, "step": 3353 }, { "epoch": 0.54, "learning_rate": 0.00045839529052707864, "loss": 3.2285, "step": 3354 }, { "epoch": 0.54, "learning_rate": 0.0004581350553265315, "loss": 3.2487, "step": 3355 }, { "epoch": 0.54, "learning_rate": 0.0004578748315463232, "loss": 3.2526, "step": 3356 }, { "epoch": 0.54, "learning_rate": 0.0004576146192574405, "loss": 3.1446, "step": 3357 }, { "epoch": 0.54, "learning_rate": 0.0004573544185308664, "loss": 3.1207, "step": 3358 }, { "epoch": 0.54, "learning_rate": 0.0004570942294375811, "loss": 3.1406, "step": 3359 }, { "epoch": 0.54, "learning_rate": 0.00045683405204856184, "loss": 3.0804, "step": 3360 }, { "epoch": 0.54, "learning_rate": 0.0004565738864347819, "loss": 3.2368, "step": 3361 }, { "epoch": 0.54, "learning_rate": 0.00045631373266721246, "loss": 3.1443, "step": 3362 }, { "epoch": 0.54, "learning_rate": 0.00045605359081682064, "loss": 3.2279, "step": 3363 }, { "epoch": 0.54, "learning_rate": 0.0004557934609545705, "loss": 3.1636, "step": 3364 }, { "epoch": 0.54, "learning_rate": 0.00045553334315142274, "loss": 3.061, "step": 3365 }, { "epoch": 0.54, "learning_rate": 0.00045527323747833514, "loss": 3.1988, "step": 3366 }, { "epoch": 0.54, "learning_rate": 0.0004550131440062617, "loss": 3.2118, "step": 3367 }, { "epoch": 0.54, "learning_rate": 0.00045475306280615347, "loss": 3.173, "step": 3368 }, { "epoch": 0.54, "learning_rate": 0.00045449299394895816, "loss": 3.0572, "step": 3369 }, { "epoch": 0.54, "learning_rate": 0.0004542329375056197, "loss": 3.0886, "step": 3370 }, { "epoch": 0.54, "learning_rate": 0.000453972893547079, "loss": 3.2411, "step": 3371 }, { "epoch": 0.54, "learning_rate": 0.00045371286214427345, "loss": 3.3076, "step": 3372 }, { "epoch": 0.54, "learning_rate": 0.0004534528433681368, "loss": 3.3572, "step": 3373 }, { "epoch": 0.54, "learning_rate": 0.00045319283728959984, "loss": 3.2711, "step": 3374 }, { "epoch": 0.54, "learning_rate": 0.0004529328439795898, "loss": 3.2829, "step": 3375 }, { "epoch": 0.54, "learning_rate": 0.0004526728635090298, "loss": 3.2396, "step": 3376 }, { "epoch": 0.54, "learning_rate": 0.0004524128959488403, "loss": 3.318, "step": 3377 }, { "epoch": 0.54, "learning_rate": 0.0004521529413699377, "loss": 3.3001, "step": 3378 }, { "epoch": 0.54, "learning_rate": 0.00045189299984323486, "loss": 3.1894, "step": 3379 }, { "epoch": 0.54, "learning_rate": 0.0004516330714396414, "loss": 3.2905, "step": 3380 }, { "epoch": 0.55, "learning_rate": 0.00045137315623006337, "loss": 3.1209, "step": 3381 }, { "epoch": 0.55, "learning_rate": 0.0004511132542854027, "loss": 3.3547, "step": 3382 }, { "epoch": 0.55, "learning_rate": 0.00045085336567655834, "loss": 3.2633, "step": 3383 }, { "epoch": 0.55, "learning_rate": 0.000450593490474425, "loss": 3.1031, "step": 3384 }, { "epoch": 0.55, "learning_rate": 0.00045033362874989435, "loss": 3.1281, "step": 3385 }, { "epoch": 0.55, "learning_rate": 0.00045007378057385376, "loss": 3.2569, "step": 3386 }, { "epoch": 0.55, "learning_rate": 0.0004498139460171876, "loss": 3.1097, "step": 3387 }, { "epoch": 0.55, "learning_rate": 0.0004495541251507758, "loss": 3.2054, "step": 3388 }, { "epoch": 0.55, "learning_rate": 0.00044929431804549514, "loss": 3.1308, "step": 3389 }, { "epoch": 0.55, "learning_rate": 0.00044903452477221826, "loss": 2.993, "step": 3390 }, { "epoch": 0.55, "learning_rate": 0.00044877474540181426, "loss": 3.2576, "step": 3391 }, { "epoch": 0.55, "learning_rate": 0.00044851498000514806, "loss": 3.1196, "step": 3392 }, { "epoch": 0.55, "learning_rate": 0.0004482552286530816, "loss": 3.2188, "step": 3393 }, { "epoch": 0.55, "learning_rate": 0.00044799549141647205, "loss": 3.2312, "step": 3394 }, { "epoch": 0.55, "learning_rate": 0.00044773576836617336, "loss": 3.0987, "step": 3395 }, { "epoch": 0.55, "learning_rate": 0.0004474760595730352, "loss": 3.0337, "step": 3396 }, { "epoch": 0.55, "learning_rate": 0.00044721636510790366, "loss": 3.3296, "step": 3397 }, { "epoch": 0.55, "learning_rate": 0.0004469566850416206, "loss": 3.1772, "step": 3398 }, { "epoch": 0.55, "learning_rate": 0.00044669701944502444, "loss": 3.1926, "step": 3399 }, { "epoch": 0.55, "learning_rate": 0.0004464373683889492, "loss": 3.2088, "step": 3400 }, { "epoch": 0.55, "learning_rate": 0.0004461777319442252, "loss": 3.148, "step": 3401 }, { "epoch": 0.55, "learning_rate": 0.0004459181101816785, "loss": 3.1503, "step": 3402 }, { "epoch": 0.55, "learning_rate": 0.00044565850317213155, "loss": 3.0292, "step": 3403 }, { "epoch": 0.55, "learning_rate": 0.00044539891098640215, "loss": 3.2398, "step": 3404 }, { "epoch": 0.55, "learning_rate": 0.00044513933369530484, "loss": 3.1354, "step": 3405 }, { "epoch": 0.55, "learning_rate": 0.00044487977136964976, "loss": 3.2474, "step": 3406 }, { "epoch": 0.55, "learning_rate": 0.0004446202240802427, "loss": 3.2825, "step": 3407 }, { "epoch": 0.55, "learning_rate": 0.00044436069189788554, "loss": 3.2114, "step": 3408 }, { "epoch": 0.55, "learning_rate": 0.00044410117489337633, "loss": 3.1577, "step": 3409 }, { "epoch": 0.55, "learning_rate": 0.00044384167313750835, "loss": 3.1737, "step": 3410 }, { "epoch": 0.55, "learning_rate": 0.0004435821867010712, "loss": 3.1093, "step": 3411 }, { "epoch": 0.55, "learning_rate": 0.0004433227156548505, "loss": 3.207, "step": 3412 }, { "epoch": 0.55, "learning_rate": 0.0004430632600696272, "loss": 3.2113, "step": 3413 }, { "epoch": 0.55, "learning_rate": 0.0004428038200161779, "loss": 3.0631, "step": 3414 }, { "epoch": 0.55, "learning_rate": 0.00044254439556527564, "loss": 3.2515, "step": 3415 }, { "epoch": 0.55, "learning_rate": 0.0004422849867876886, "loss": 3.2185, "step": 3416 }, { "epoch": 0.55, "learning_rate": 0.0004420255937541808, "loss": 3.1429, "step": 3417 }, { "epoch": 0.55, "learning_rate": 0.00044176621653551246, "loss": 3.4016, "step": 3418 }, { "epoch": 0.55, "learning_rate": 0.00044150685520243885, "loss": 3.1982, "step": 3419 }, { "epoch": 0.55, "learning_rate": 0.00044124750982571113, "loss": 3.273, "step": 3420 }, { "epoch": 0.55, "learning_rate": 0.0004409881804760763, "loss": 3.1384, "step": 3421 }, { "epoch": 0.55, "learning_rate": 0.0004407288672242766, "loss": 3.1849, "step": 3422 }, { "epoch": 0.55, "learning_rate": 0.00044046957014105007, "loss": 3.0956, "step": 3423 }, { "epoch": 0.55, "learning_rate": 0.00044021028929713065, "loss": 3.4236, "step": 3424 }, { "epoch": 0.55, "learning_rate": 0.00043995102476324734, "loss": 3.1925, "step": 3425 }, { "epoch": 0.55, "learning_rate": 0.00043969177661012503, "loss": 3.1946, "step": 3426 }, { "epoch": 0.55, "learning_rate": 0.000439432544908484, "loss": 3.2926, "step": 3427 }, { "epoch": 0.55, "learning_rate": 0.00043917332972904, "loss": 3.1886, "step": 3428 }, { "epoch": 0.55, "learning_rate": 0.00043891413114250424, "loss": 3.2028, "step": 3429 }, { "epoch": 0.55, "learning_rate": 0.0004386549492195838, "loss": 3.2794, "step": 3430 }, { "epoch": 0.55, "learning_rate": 0.00043839578403098077, "loss": 3.2413, "step": 3431 }, { "epoch": 0.55, "learning_rate": 0.0004381366356473929, "loss": 3.2096, "step": 3432 }, { "epoch": 0.55, "learning_rate": 0.0004378775041395132, "loss": 3.2268, "step": 3433 }, { "epoch": 0.55, "learning_rate": 0.0004376183895780301, "loss": 3.095, "step": 3434 }, { "epoch": 0.55, "learning_rate": 0.00043735929203362765, "loss": 3.1742, "step": 3435 }, { "epoch": 0.55, "learning_rate": 0.00043710021157698477, "loss": 3.2431, "step": 3436 }, { "epoch": 0.55, "learning_rate": 0.0004368411482787763, "loss": 3.1533, "step": 3437 }, { "epoch": 0.55, "learning_rate": 0.00043658210220967207, "loss": 3.477, "step": 3438 }, { "epoch": 0.55, "learning_rate": 0.0004363230734403372, "loss": 3.2855, "step": 3439 }, { "epoch": 0.55, "learning_rate": 0.00043606406204143227, "loss": 3.2613, "step": 3440 }, { "epoch": 0.55, "learning_rate": 0.00043580506808361296, "loss": 3.1322, "step": 3441 }, { "epoch": 0.55, "learning_rate": 0.0004355460916375299, "loss": 3.2311, "step": 3442 }, { "epoch": 0.56, "learning_rate": 0.0004352871327738297, "loss": 3.2778, "step": 3443 }, { "epoch": 0.56, "learning_rate": 0.00043502819156315375, "loss": 3.1577, "step": 3444 }, { "epoch": 0.56, "learning_rate": 0.0004347692680761384, "loss": 3.2708, "step": 3445 }, { "epoch": 0.56, "learning_rate": 0.0004345103623834156, "loss": 3.2704, "step": 3446 }, { "epoch": 0.56, "learning_rate": 0.00043425147455561206, "loss": 3.1493, "step": 3447 }, { "epoch": 0.56, "learning_rate": 0.00043399260466334973, "loss": 3.2697, "step": 3448 }, { "epoch": 0.56, "learning_rate": 0.0004337337527772459, "loss": 3.1715, "step": 3449 }, { "epoch": 0.56, "learning_rate": 0.00043347491896791293, "loss": 3.2397, "step": 3450 }, { "epoch": 0.56, "learning_rate": 0.0004332161033059578, "loss": 3.1042, "step": 3451 }, { "epoch": 0.56, "learning_rate": 0.000432957305861983, "loss": 3.2512, "step": 3452 }, { "epoch": 0.56, "learning_rate": 0.00043269852670658576, "loss": 3.2441, "step": 3453 }, { "epoch": 0.56, "learning_rate": 0.0004324397659103586, "loss": 3.0104, "step": 3454 }, { "epoch": 0.56, "learning_rate": 0.00043218102354388875, "loss": 3.2199, "step": 3455 }, { "epoch": 0.56, "learning_rate": 0.00043192229967775874, "loss": 3.3412, "step": 3456 }, { "epoch": 0.56, "learning_rate": 0.0004316635943825456, "loss": 3.1961, "step": 3457 }, { "epoch": 0.56, "learning_rate": 0.0004314049077288218, "loss": 3.0986, "step": 3458 }, { "epoch": 0.56, "learning_rate": 0.00043114623978715425, "loss": 3.0902, "step": 3459 }, { "epoch": 0.56, "learning_rate": 0.00043088759062810515, "loss": 3.2653, "step": 3460 }, { "epoch": 0.56, "learning_rate": 0.0004306289603222312, "loss": 2.9927, "step": 3461 }, { "epoch": 0.56, "learning_rate": 0.0004303703489400845, "loss": 3.2053, "step": 3462 }, { "epoch": 0.56, "learning_rate": 0.00043011175655221134, "loss": 3.1761, "step": 3463 }, { "epoch": 0.56, "learning_rate": 0.0004298531832291535, "loss": 3.33, "step": 3464 }, { "epoch": 0.56, "learning_rate": 0.0004295946290414467, "loss": 3.1031, "step": 3465 }, { "epoch": 0.56, "learning_rate": 0.0004293360940596224, "loss": 3.2573, "step": 3466 }, { "epoch": 0.56, "learning_rate": 0.00042907757835420593, "loss": 3.1434, "step": 3467 }, { "epoch": 0.56, "learning_rate": 0.00042881908199571825, "loss": 3.2677, "step": 3468 }, { "epoch": 0.56, "learning_rate": 0.0004285606050546742, "loss": 3.1098, "step": 3469 }, { "epoch": 0.56, "learning_rate": 0.0004283021476015841, "loss": 3.1216, "step": 3470 }, { "epoch": 0.56, "learning_rate": 0.0004280437097069522, "loss": 3.1876, "step": 3471 }, { "epoch": 0.56, "learning_rate": 0.000427785291441278, "loss": 3.3451, "step": 3472 }, { "epoch": 0.56, "learning_rate": 0.0004275268928750551, "loss": 3.2029, "step": 3473 }, { "epoch": 0.56, "learning_rate": 0.0004272685140787724, "loss": 3.254, "step": 3474 }, { "epoch": 0.56, "learning_rate": 0.0004270101551229131, "loss": 3.1195, "step": 3475 }, { "epoch": 0.56, "learning_rate": 0.0004267518160779549, "loss": 3.2315, "step": 3476 }, { "epoch": 0.56, "learning_rate": 0.0004264934970143697, "loss": 3.1434, "step": 3477 }, { "epoch": 0.56, "learning_rate": 0.0004262351980026249, "loss": 3.0753, "step": 3478 }, { "epoch": 0.56, "learning_rate": 0.00042597691911318134, "loss": 3.1686, "step": 3479 }, { "epoch": 0.56, "learning_rate": 0.0004257186604164955, "loss": 3.2603, "step": 3480 }, { "epoch": 0.56, "learning_rate": 0.0004254604219830175, "loss": 3.2399, "step": 3481 }, { "epoch": 0.56, "learning_rate": 0.00042520220388319224, "loss": 3.1753, "step": 3482 }, { "epoch": 0.56, "learning_rate": 0.00042494400618745893, "loss": 3.0867, "step": 3483 }, { "epoch": 0.56, "learning_rate": 0.0004246858289662515, "loss": 3.2331, "step": 3484 }, { "epoch": 0.56, "learning_rate": 0.00042442767228999804, "loss": 3.3531, "step": 3485 }, { "epoch": 0.56, "learning_rate": 0.00042416953622912085, "loss": 3.2081, "step": 3486 }, { "epoch": 0.56, "learning_rate": 0.0004239114208540374, "loss": 3.0914, "step": 3487 }, { "epoch": 0.56, "learning_rate": 0.00042365332623515864, "loss": 3.3112, "step": 3488 }, { "epoch": 0.56, "learning_rate": 0.00042339525244289034, "loss": 3.1849, "step": 3489 }, { "epoch": 0.56, "learning_rate": 0.0004231371995476324, "loss": 3.21, "step": 3490 }, { "epoch": 0.56, "learning_rate": 0.000422879167619779, "loss": 3.3596, "step": 3491 }, { "epoch": 0.56, "learning_rate": 0.0004226211567297186, "loss": 3.0654, "step": 3492 }, { "epoch": 0.56, "learning_rate": 0.00042236316694783434, "loss": 3.2633, "step": 3493 }, { "epoch": 0.56, "learning_rate": 0.00042210519834450297, "loss": 3.1809, "step": 3494 }, { "epoch": 0.56, "learning_rate": 0.00042184725099009594, "loss": 3.2025, "step": 3495 }, { "epoch": 0.56, "learning_rate": 0.0004215893249549786, "loss": 3.285, "step": 3496 }, { "epoch": 0.56, "learning_rate": 0.00042133142030951044, "loss": 3.2173, "step": 3497 }, { "epoch": 0.56, "learning_rate": 0.0004210735371240454, "loss": 3.3562, "step": 3498 }, { "epoch": 0.56, "learning_rate": 0.00042081567546893166, "loss": 3.2351, "step": 3499 }, { "epoch": 0.56, "learning_rate": 0.000420557835414511, "loss": 3.103, "step": 3500 }, { "epoch": 0.56, "learning_rate": 0.0004203000170311199, "loss": 3.204, "step": 3501 }, { "epoch": 0.56, "learning_rate": 0.00042004222038908846, "loss": 3.0736, "step": 3502 }, { "epoch": 0.56, "learning_rate": 0.0004197844455587409, "loss": 3.1707, "step": 3503 }, { "epoch": 0.56, "learning_rate": 0.0004195266926103959, "loss": 3.2559, "step": 3504 }, { "epoch": 0.57, "learning_rate": 0.0004192689616143655, "loss": 3.1687, "step": 3505 }, { "epoch": 0.57, "learning_rate": 0.0004190112526409565, "loss": 3.216, "step": 3506 }, { "epoch": 0.57, "learning_rate": 0.0004187535657604694, "loss": 3.3032, "step": 3507 }, { "epoch": 0.57, "learning_rate": 0.00041849590104319834, "loss": 3.155, "step": 3508 }, { "epoch": 0.57, "learning_rate": 0.0004182382585594318, "loss": 3.1065, "step": 3509 }, { "epoch": 0.57, "learning_rate": 0.00041798063837945214, "loss": 3.2242, "step": 3510 }, { "epoch": 0.57, "learning_rate": 0.00041772304057353525, "loss": 3.357, "step": 3511 }, { "epoch": 0.57, "learning_rate": 0.0004174654652119516, "loss": 3.1096, "step": 3512 }, { "epoch": 0.57, "learning_rate": 0.0004172079123649652, "loss": 3.1568, "step": 3513 }, { "epoch": 0.57, "learning_rate": 0.0004169503821028336, "loss": 3.0949, "step": 3514 }, { "epoch": 0.57, "learning_rate": 0.0004166928744958088, "loss": 3.0884, "step": 3515 }, { "epoch": 0.57, "learning_rate": 0.0004164353896141361, "loss": 3.2861, "step": 3516 }, { "epoch": 0.57, "learning_rate": 0.00041617792752805463, "loss": 3.0788, "step": 3517 }, { "epoch": 0.57, "learning_rate": 0.0004159204883077978, "loss": 3.1022, "step": 3518 }, { "epoch": 0.57, "learning_rate": 0.0004156630720235925, "loss": 3.1972, "step": 3519 }, { "epoch": 0.57, "learning_rate": 0.00041540567874565905, "loss": 3.0318, "step": 3520 }, { "epoch": 0.57, "learning_rate": 0.00041514830854421205, "loss": 3.191, "step": 3521 }, { "epoch": 0.57, "learning_rate": 0.00041489096148945913, "loss": 3.1712, "step": 3522 }, { "epoch": 0.57, "learning_rate": 0.0004146336376516024, "loss": 3.1524, "step": 3523 }, { "epoch": 0.57, "learning_rate": 0.000414376337100837, "loss": 3.3731, "step": 3524 }, { "epoch": 0.57, "learning_rate": 0.0004141190599073521, "loss": 3.3135, "step": 3525 }, { "epoch": 0.57, "learning_rate": 0.0004138618061413302, "loss": 3.2335, "step": 3526 }, { "epoch": 0.57, "learning_rate": 0.0004136045758729477, "loss": 3.1564, "step": 3527 }, { "epoch": 0.57, "learning_rate": 0.00041334736917237426, "loss": 3.1694, "step": 3528 }, { "epoch": 0.57, "learning_rate": 0.00041309018610977355, "loss": 3.1674, "step": 3529 }, { "epoch": 0.57, "learning_rate": 0.0004128330267553022, "loss": 3.1642, "step": 3530 }, { "epoch": 0.57, "learning_rate": 0.0004125758911791112, "loss": 3.164, "step": 3531 }, { "epoch": 0.57, "learning_rate": 0.00041231877945134413, "loss": 2.9476, "step": 3532 }, { "epoch": 0.57, "learning_rate": 0.0004120616916421387, "loss": 3.1589, "step": 3533 }, { "epoch": 0.57, "learning_rate": 0.00041180462782162584, "loss": 3.2706, "step": 3534 }, { "epoch": 0.57, "learning_rate": 0.0004115475880599301, "loss": 3.2866, "step": 3535 }, { "epoch": 0.57, "learning_rate": 0.00041129057242716906, "loss": 3.1801, "step": 3536 }, { "epoch": 0.57, "learning_rate": 0.00041103358099345446, "loss": 3.2429, "step": 3537 }, { "epoch": 0.57, "learning_rate": 0.00041077661382889067, "loss": 3.162, "step": 3538 }, { "epoch": 0.57, "learning_rate": 0.000410519671003576, "loss": 3.1663, "step": 3539 }, { "epoch": 0.57, "learning_rate": 0.0004102627525876016, "loss": 3.2447, "step": 3540 }, { "epoch": 0.57, "learning_rate": 0.00041000585865105257, "loss": 3.0776, "step": 3541 }, { "epoch": 0.57, "learning_rate": 0.0004097489892640066, "loss": 3.3406, "step": 3542 }, { "epoch": 0.57, "learning_rate": 0.0004094921444965354, "loss": 3.1378, "step": 3543 }, { "epoch": 0.57, "learning_rate": 0.0004092353244187038, "loss": 3.0408, "step": 3544 }, { "epoch": 0.57, "learning_rate": 0.0004089785291005695, "loss": 3.0257, "step": 3545 }, { "epoch": 0.57, "learning_rate": 0.0004087217586121836, "loss": 3.1976, "step": 3546 }, { "epoch": 0.57, "learning_rate": 0.0004084650130235909, "loss": 3.2189, "step": 3547 }, { "epoch": 0.57, "learning_rate": 0.00040820829240482856, "loss": 3.2109, "step": 3548 }, { "epoch": 0.57, "learning_rate": 0.00040795159682592775, "loss": 3.1772, "step": 3549 }, { "epoch": 0.57, "learning_rate": 0.00040769492635691246, "loss": 3.2018, "step": 3550 }, { "epoch": 0.57, "learning_rate": 0.00040743828106779977, "loss": 3.3782, "step": 3551 }, { "epoch": 0.57, "learning_rate": 0.00040718166102859975, "loss": 3.0642, "step": 3552 }, { "epoch": 0.57, "learning_rate": 0.00040692506630931615, "loss": 3.1237, "step": 3553 }, { "epoch": 0.57, "learning_rate": 0.00040666849697994505, "loss": 3.0363, "step": 3554 }, { "epoch": 0.57, "learning_rate": 0.0004064119531104762, "loss": 3.182, "step": 3555 }, { "epoch": 0.57, "learning_rate": 0.00040615543477089236, "loss": 3.2085, "step": 3556 }, { "epoch": 0.57, "learning_rate": 0.0004058989420311689, "loss": 3.1791, "step": 3557 }, { "epoch": 0.57, "learning_rate": 0.0004056424749612747, "loss": 3.2958, "step": 3558 }, { "epoch": 0.57, "learning_rate": 0.0004053860336311714, "loss": 3.2603, "step": 3559 }, { "epoch": 0.57, "learning_rate": 0.0004051296181108134, "loss": 3.1953, "step": 3560 }, { "epoch": 0.57, "learning_rate": 0.0004048732284701483, "loss": 3.2972, "step": 3561 }, { "epoch": 0.57, "learning_rate": 0.0004046168647791171, "loss": 3.1484, "step": 3562 }, { "epoch": 0.57, "learning_rate": 0.0004043605271076527, "loss": 3.24, "step": 3563 }, { "epoch": 0.57, "learning_rate": 0.0004041042155256819, "loss": 3.1752, "step": 3564 }, { "epoch": 0.57, "learning_rate": 0.00040384793010312363, "loss": 3.2012, "step": 3565 }, { "epoch": 0.57, "learning_rate": 0.00040359167090989005, "loss": 3.3491, "step": 3566 }, { "epoch": 0.58, "learning_rate": 0.00040333543801588615, "loss": 3.0724, "step": 3567 }, { "epoch": 0.58, "learning_rate": 0.00040307923149100977, "loss": 3.06, "step": 3568 }, { "epoch": 0.58, "learning_rate": 0.0004028230514051514, "loss": 3.3532, "step": 3569 }, { "epoch": 0.58, "learning_rate": 0.00040256689782819453, "loss": 3.2151, "step": 3570 }, { "epoch": 0.58, "learning_rate": 0.0004023107708300153, "loss": 3.1697, "step": 3571 }, { "epoch": 0.58, "learning_rate": 0.0004020546704804823, "loss": 3.1717, "step": 3572 }, { "epoch": 0.58, "learning_rate": 0.0004017985968494573, "loss": 3.042, "step": 3573 }, { "epoch": 0.58, "learning_rate": 0.00040154255000679484, "loss": 3.146, "step": 3574 }, { "epoch": 0.58, "learning_rate": 0.00040128653002234176, "loss": 3.4314, "step": 3575 }, { "epoch": 0.58, "learning_rate": 0.0004010305369659379, "loss": 3.2727, "step": 3576 }, { "epoch": 0.58, "learning_rate": 0.00040077457090741537, "loss": 3.1477, "step": 3577 }, { "epoch": 0.58, "learning_rate": 0.00040051863191659944, "loss": 3.1486, "step": 3578 }, { "epoch": 0.58, "learning_rate": 0.00040026272006330745, "loss": 3.2335, "step": 3579 }, { "epoch": 0.58, "learning_rate": 0.00040000683541734956, "loss": 3.2524, "step": 3580 }, { "epoch": 0.58, "learning_rate": 0.00039975097804852876, "loss": 3.2035, "step": 3581 }, { "epoch": 0.58, "learning_rate": 0.0003994951480266405, "loss": 3.2949, "step": 3582 }, { "epoch": 0.58, "learning_rate": 0.0003992393454214723, "loss": 3.1868, "step": 3583 }, { "epoch": 0.58, "learning_rate": 0.00039898357030280487, "loss": 3.0569, "step": 3584 }, { "epoch": 0.58, "learning_rate": 0.000398727822740411, "loss": 3.1112, "step": 3585 }, { "epoch": 0.58, "learning_rate": 0.00039847210280405577, "loss": 3.2703, "step": 3586 }, { "epoch": 0.58, "learning_rate": 0.0003982164105634974, "loss": 3.1283, "step": 3587 }, { "epoch": 0.58, "learning_rate": 0.0003979607460884862, "loss": 3.1639, "step": 3588 }, { "epoch": 0.58, "learning_rate": 0.0003977051094487647, "loss": 3.3186, "step": 3589 }, { "epoch": 0.58, "learning_rate": 0.00039744950071406816, "loss": 3.1536, "step": 3590 }, { "epoch": 0.58, "learning_rate": 0.0003971939199541239, "loss": 3.0353, "step": 3591 }, { "epoch": 0.58, "learning_rate": 0.00039693836723865176, "loss": 3.0424, "step": 3592 }, { "epoch": 0.58, "learning_rate": 0.00039668284263736443, "loss": 3.1944, "step": 3593 }, { "epoch": 0.58, "learning_rate": 0.00039642734621996614, "loss": 3.3091, "step": 3594 }, { "epoch": 0.58, "learning_rate": 0.0003961718780561537, "loss": 3.2013, "step": 3595 }, { "epoch": 0.58, "learning_rate": 0.0003959164382156164, "loss": 3.2532, "step": 3596 }, { "epoch": 0.58, "learning_rate": 0.0003956610267680356, "loss": 3.0643, "step": 3597 }, { "epoch": 0.58, "learning_rate": 0.00039540564378308493, "loss": 3.0848, "step": 3598 }, { "epoch": 0.58, "learning_rate": 0.00039515028933043064, "loss": 3.2112, "step": 3599 }, { "epoch": 0.58, "learning_rate": 0.00039489496347973064, "loss": 3.1576, "step": 3600 }, { "epoch": 0.58, "learning_rate": 0.0003946396663006352, "loss": 3.1201, "step": 3601 }, { "epoch": 0.58, "learning_rate": 0.000394384397862787, "loss": 3.2159, "step": 3602 }, { "epoch": 0.58, "learning_rate": 0.00039412915823582056, "loss": 3.1391, "step": 3603 }, { "epoch": 0.58, "learning_rate": 0.0003938739474893629, "loss": 3.1118, "step": 3604 }, { "epoch": 0.58, "learning_rate": 0.00039361876569303267, "loss": 3.0487, "step": 3605 }, { "epoch": 0.58, "learning_rate": 0.0003933636129164413, "loss": 3.1398, "step": 3606 }, { "epoch": 0.58, "learning_rate": 0.0003931084892291917, "loss": 3.1341, "step": 3607 }, { "epoch": 0.58, "learning_rate": 0.0003928533947008791, "loss": 3.162, "step": 3608 }, { "epoch": 0.58, "learning_rate": 0.0003925983294010907, "loss": 3.2357, "step": 3609 }, { "epoch": 0.58, "learning_rate": 0.00039234329339940587, "loss": 3.1344, "step": 3610 }, { "epoch": 0.58, "learning_rate": 0.00039208828676539557, "loss": 3.2706, "step": 3611 }, { "epoch": 0.58, "learning_rate": 0.0003918333095686235, "loss": 3.0659, "step": 3612 }, { "epoch": 0.58, "learning_rate": 0.00039157836187864474, "loss": 3.3347, "step": 3613 }, { "epoch": 0.58, "learning_rate": 0.00039132344376500647, "loss": 3.2705, "step": 3614 }, { "epoch": 0.58, "learning_rate": 0.00039106855529724764, "loss": 3.1605, "step": 3615 }, { "epoch": 0.58, "learning_rate": 0.00039081369654489956, "loss": 3.2678, "step": 3616 }, { "epoch": 0.58, "learning_rate": 0.0003905588675774848, "loss": 3.1127, "step": 3617 }, { "epoch": 0.58, "learning_rate": 0.0003903040684645184, "loss": 3.264, "step": 3618 }, { "epoch": 0.58, "learning_rate": 0.00039004929927550716, "loss": 2.9704, "step": 3619 }, { "epoch": 0.58, "learning_rate": 0.0003897945600799493, "loss": 3.1316, "step": 3620 }, { "epoch": 0.58, "learning_rate": 0.0003895398509473352, "loss": 3.3078, "step": 3621 }, { "epoch": 0.58, "learning_rate": 0.00038928517194714707, "loss": 3.4036, "step": 3622 }, { "epoch": 0.58, "learning_rate": 0.00038903052314885855, "loss": 3.279, "step": 3623 }, { "epoch": 0.58, "learning_rate": 0.0003887759046219355, "loss": 3.5028, "step": 3624 }, { "epoch": 0.58, "learning_rate": 0.0003885213164358354, "loss": 3.0752, "step": 3625 }, { "epoch": 0.58, "learning_rate": 0.0003882667586600071, "loss": 3.2094, "step": 3626 }, { "epoch": 0.58, "learning_rate": 0.00038801223136389167, "loss": 3.2529, "step": 3627 }, { "epoch": 0.58, "learning_rate": 0.00038775773461692157, "loss": 3.2987, "step": 3628 }, { "epoch": 0.59, "learning_rate": 0.00038750326848852073, "loss": 3.1633, "step": 3629 }, { "epoch": 0.59, "learning_rate": 0.00038724883304810503, "loss": 3.2604, "step": 3630 }, { "epoch": 0.59, "learning_rate": 0.00038699442836508227, "loss": 3.1565, "step": 3631 }, { "epoch": 0.59, "learning_rate": 0.00038674005450885115, "loss": 3.2225, "step": 3632 }, { "epoch": 0.59, "learning_rate": 0.0003864857115488025, "loss": 3.3834, "step": 3633 }, { "epoch": 0.59, "learning_rate": 0.00038623139955431857, "loss": 3.174, "step": 3634 }, { "epoch": 0.59, "learning_rate": 0.00038597711859477286, "loss": 3.2362, "step": 3635 }, { "epoch": 0.59, "learning_rate": 0.0003857228687395308, "loss": 3.2945, "step": 3636 }, { "epoch": 0.59, "learning_rate": 0.0003854686500579494, "loss": 3.1396, "step": 3637 }, { "epoch": 0.59, "learning_rate": 0.0003852144626193768, "loss": 3.1494, "step": 3638 }, { "epoch": 0.59, "learning_rate": 0.00038496030649315296, "loss": 3.2615, "step": 3639 }, { "epoch": 0.59, "learning_rate": 0.000384706181748609, "loss": 3.0965, "step": 3640 }, { "epoch": 0.59, "learning_rate": 0.0003844520884550675, "loss": 3.1378, "step": 3641 }, { "epoch": 0.59, "learning_rate": 0.0003841980266818426, "loss": 3.1798, "step": 3642 }, { "epoch": 0.59, "learning_rate": 0.0003839439964982402, "loss": 3.1281, "step": 3643 }, { "epoch": 0.59, "learning_rate": 0.00038368999797355676, "loss": 3.1302, "step": 3644 }, { "epoch": 0.59, "learning_rate": 0.0003834360311770808, "loss": 3.0452, "step": 3645 }, { "epoch": 0.59, "learning_rate": 0.0003831820961780917, "loss": 3.1079, "step": 3646 }, { "epoch": 0.59, "learning_rate": 0.0003829281930458607, "loss": 3.3207, "step": 3647 }, { "epoch": 0.59, "learning_rate": 0.0003826743218496496, "loss": 3.2192, "step": 3648 }, { "epoch": 0.59, "learning_rate": 0.00038242048265871233, "loss": 3.1293, "step": 3649 }, { "epoch": 0.59, "learning_rate": 0.0003821666755422935, "loss": 3.024, "step": 3650 }, { "epoch": 0.59, "learning_rate": 0.00038191290056962927, "loss": 3.1154, "step": 3651 }, { "epoch": 0.59, "learning_rate": 0.0003816591578099468, "loss": 3.1187, "step": 3652 }, { "epoch": 0.59, "learning_rate": 0.0003814054473324647, "loss": 3.1588, "step": 3653 }, { "epoch": 0.59, "learning_rate": 0.00038115176920639263, "loss": 3.1915, "step": 3654 }, { "epoch": 0.59, "learning_rate": 0.0003808981235009311, "loss": 3.1378, "step": 3655 }, { "epoch": 0.59, "learning_rate": 0.00038064451028527267, "loss": 3.0514, "step": 3656 }, { "epoch": 0.59, "learning_rate": 0.0003803909296286002, "loss": 3.156, "step": 3657 }, { "epoch": 0.59, "learning_rate": 0.000380137381600088, "loss": 3.3135, "step": 3658 }, { "epoch": 0.59, "learning_rate": 0.0003798838662689016, "loss": 3.1641, "step": 3659 }, { "epoch": 0.59, "learning_rate": 0.00037963038370419715, "loss": 3.106, "step": 3660 }, { "epoch": 0.59, "learning_rate": 0.00037937693397512224, "loss": 3.0765, "step": 3661 }, { "epoch": 0.59, "learning_rate": 0.0003791235171508157, "loss": 3.2622, "step": 3662 }, { "epoch": 0.59, "learning_rate": 0.0003788701333004069, "loss": 3.1377, "step": 3663 }, { "epoch": 0.59, "learning_rate": 0.0003786167824930164, "loss": 3.2103, "step": 3664 }, { "epoch": 0.59, "learning_rate": 0.0003783634647977559, "loss": 3.2589, "step": 3665 }, { "epoch": 0.59, "learning_rate": 0.00037811018028372775, "loss": 3.0918, "step": 3666 }, { "epoch": 0.59, "learning_rate": 0.0003778569290200254, "loss": 3.219, "step": 3667 }, { "epoch": 0.59, "learning_rate": 0.0003776037110757336, "loss": 3.2095, "step": 3668 }, { "epoch": 0.59, "learning_rate": 0.0003773505265199275, "loss": 3.4172, "step": 3669 }, { "epoch": 0.59, "learning_rate": 0.0003770973754216732, "loss": 3.2771, "step": 3670 }, { "epoch": 0.59, "learning_rate": 0.000376844257850028, "loss": 3.1861, "step": 3671 }, { "epoch": 0.59, "learning_rate": 0.0003765911738740397, "loss": 3.2049, "step": 3672 }, { "epoch": 0.59, "learning_rate": 0.00037633812356274694, "loss": 3.225, "step": 3673 }, { "epoch": 0.59, "learning_rate": 0.00037608510698517974, "loss": 3.2639, "step": 3674 }, { "epoch": 0.59, "learning_rate": 0.00037583212421035824, "loss": 3.1461, "step": 3675 }, { "epoch": 0.59, "learning_rate": 0.0003755791753072935, "loss": 3.1747, "step": 3676 }, { "epoch": 0.59, "learning_rate": 0.00037532626034498775, "loss": 3.2179, "step": 3677 }, { "epoch": 0.59, "learning_rate": 0.00037507337939243334, "loss": 3.2245, "step": 3678 }, { "epoch": 0.59, "learning_rate": 0.000374820532518614, "loss": 3.1879, "step": 3679 }, { "epoch": 0.59, "learning_rate": 0.00037456771979250334, "loss": 3.1053, "step": 3680 }, { "epoch": 0.59, "learning_rate": 0.00037431494128306673, "loss": 3.2661, "step": 3681 }, { "epoch": 0.59, "learning_rate": 0.0003740621970592594, "loss": 3.15, "step": 3682 }, { "epoch": 0.59, "learning_rate": 0.00037380948719002743, "loss": 3.1301, "step": 3683 }, { "epoch": 0.59, "learning_rate": 0.00037355681174430745, "loss": 3.2726, "step": 3684 }, { "epoch": 0.59, "learning_rate": 0.00037330417079102697, "loss": 3.2972, "step": 3685 }, { "epoch": 0.59, "learning_rate": 0.00037305156439910363, "loss": 3.2773, "step": 3686 }, { "epoch": 0.59, "learning_rate": 0.0003727989926374463, "loss": 3.0526, "step": 3687 }, { "epoch": 0.59, "learning_rate": 0.0003725464555749539, "loss": 3.1856, "step": 3688 }, { "epoch": 0.59, "learning_rate": 0.00037229395328051604, "loss": 3.108, "step": 3689 }, { "epoch": 0.59, "learning_rate": 0.0003720414858230126, "loss": 3.247, "step": 3690 }, { "epoch": 0.6, "learning_rate": 0.0003717890532713145, "loss": 3.2252, "step": 3691 }, { "epoch": 0.6, "learning_rate": 0.0003715366556942825, "loss": 3.23, "step": 3692 }, { "epoch": 0.6, "learning_rate": 0.00037128429316076844, "loss": 3.2128, "step": 3693 }, { "epoch": 0.6, "learning_rate": 0.0003710319657396143, "loss": 3.1424, "step": 3694 }, { "epoch": 0.6, "learning_rate": 0.0003707796734996522, "loss": 3.1431, "step": 3695 }, { "epoch": 0.6, "learning_rate": 0.00037052741650970525, "loss": 3.1507, "step": 3696 }, { "epoch": 0.6, "learning_rate": 0.0003702751948385864, "loss": 3.0993, "step": 3697 }, { "epoch": 0.6, "learning_rate": 0.0003700230085550992, "loss": 3.0506, "step": 3698 }, { "epoch": 0.6, "learning_rate": 0.0003697708577280377, "loss": 3.1036, "step": 3699 }, { "epoch": 0.6, "learning_rate": 0.0003695187424261862, "loss": 3.2103, "step": 3700 }, { "epoch": 0.6, "learning_rate": 0.000369266662718319, "loss": 3.1875, "step": 3701 }, { "epoch": 0.6, "learning_rate": 0.0003690146186732012, "loss": 3.0278, "step": 3702 }, { "epoch": 0.6, "learning_rate": 0.00036876261035958765, "loss": 3.2181, "step": 3703 }, { "epoch": 0.6, "learning_rate": 0.00036851063784622376, "loss": 3.2134, "step": 3704 }, { "epoch": 0.6, "learning_rate": 0.00036825870120184496, "loss": 3.0586, "step": 3705 }, { "epoch": 0.6, "learning_rate": 0.00036800680049517756, "loss": 3.3617, "step": 3706 }, { "epoch": 0.6, "learning_rate": 0.000367754935794937, "loss": 3.1478, "step": 3707 }, { "epoch": 0.6, "learning_rate": 0.00036750310716982996, "loss": 3.1093, "step": 3708 }, { "epoch": 0.6, "learning_rate": 0.0003672513146885523, "loss": 3.1908, "step": 3709 }, { "epoch": 0.6, "learning_rate": 0.0003669995584197908, "loss": 3.1753, "step": 3710 }, { "epoch": 0.6, "learning_rate": 0.00036674783843222177, "loss": 3.3199, "step": 3711 }, { "epoch": 0.6, "learning_rate": 0.0003664961547945123, "loss": 3.1907, "step": 3712 }, { "epoch": 0.6, "learning_rate": 0.0003662445075753189, "loss": 3.0726, "step": 3713 }, { "epoch": 0.6, "learning_rate": 0.0003659928968432886, "loss": 3.132, "step": 3714 }, { "epoch": 0.6, "learning_rate": 0.00036574132266705805, "loss": 3.3089, "step": 3715 }, { "epoch": 0.6, "learning_rate": 0.0003654897851152544, "loss": 3.2814, "step": 3716 }, { "epoch": 0.6, "learning_rate": 0.0003652382842564943, "loss": 3.0118, "step": 3717 }, { "epoch": 0.6, "learning_rate": 0.0003649868201593851, "loss": 3.133, "step": 3718 }, { "epoch": 0.6, "learning_rate": 0.0003647353928925234, "loss": 3.2615, "step": 3719 }, { "epoch": 0.6, "learning_rate": 0.00036448400252449617, "loss": 3.1365, "step": 3720 }, { "epoch": 0.6, "learning_rate": 0.00036423264912388, "loss": 3.0687, "step": 3721 }, { "epoch": 0.6, "learning_rate": 0.0003639813327592419, "loss": 3.2587, "step": 3722 }, { "epoch": 0.6, "learning_rate": 0.000363730053499138, "loss": 3.2239, "step": 3723 }, { "epoch": 0.6, "learning_rate": 0.0003634788114121154, "loss": 3.183, "step": 3724 }, { "epoch": 0.6, "learning_rate": 0.0003632276065667098, "loss": 3.0569, "step": 3725 }, { "epoch": 0.6, "learning_rate": 0.00036297643903144796, "loss": 3.3382, "step": 3726 }, { "epoch": 0.6, "learning_rate": 0.00036272530887484535, "loss": 3.4445, "step": 3727 }, { "epoch": 0.6, "learning_rate": 0.00036247421616540816, "loss": 3.1898, "step": 3728 }, { "epoch": 0.6, "learning_rate": 0.0003622231609716317, "loss": 3.3619, "step": 3729 }, { "epoch": 0.6, "learning_rate": 0.00036197214336200137, "loss": 3.177, "step": 3730 }, { "epoch": 0.6, "learning_rate": 0.0003617211634049925, "loss": 3.2446, "step": 3731 }, { "epoch": 0.6, "learning_rate": 0.0003614702211690698, "loss": 3.1348, "step": 3732 }, { "epoch": 0.6, "learning_rate": 0.0003612193167226876, "loss": 3.0741, "step": 3733 }, { "epoch": 0.6, "learning_rate": 0.0003609684501342905, "loss": 3.167, "step": 3734 }, { "epoch": 0.6, "learning_rate": 0.00036071762147231196, "loss": 3.0626, "step": 3735 }, { "epoch": 0.6, "learning_rate": 0.00036046683080517573, "loss": 3.1588, "step": 3736 }, { "epoch": 0.6, "learning_rate": 0.0003602160782012952, "loss": 3.1651, "step": 3737 }, { "epoch": 0.6, "learning_rate": 0.0003599653637290731, "loss": 3.1758, "step": 3738 }, { "epoch": 0.6, "learning_rate": 0.0003597146874569015, "loss": 3.2, "step": 3739 }, { "epoch": 0.6, "learning_rate": 0.0003594640494531628, "loss": 3.1079, "step": 3740 }, { "epoch": 0.6, "learning_rate": 0.0003592134497862283, "loss": 3.1873, "step": 3741 }, { "epoch": 0.6, "learning_rate": 0.00035896288852445903, "loss": 3.1328, "step": 3742 }, { "epoch": 0.6, "learning_rate": 0.000358712365736206, "loss": 3.2796, "step": 3743 }, { "epoch": 0.6, "learning_rate": 0.00035846188148980896, "loss": 3.2468, "step": 3744 }, { "epoch": 0.6, "learning_rate": 0.0003582114358535976, "loss": 3.425, "step": 3745 }, { "epoch": 0.6, "learning_rate": 0.0003579610288958911, "loss": 3.2191, "step": 3746 }, { "epoch": 0.6, "learning_rate": 0.00035771066068499767, "loss": 3.1012, "step": 3747 }, { "epoch": 0.6, "learning_rate": 0.0003574603312892155, "loss": 3.0041, "step": 3748 }, { "epoch": 0.6, "learning_rate": 0.00035721004077683206, "loss": 3.1309, "step": 3749 }, { "epoch": 0.6, "learning_rate": 0.00035695978921612383, "loss": 3.1496, "step": 3750 }, { "epoch": 0.6, "learning_rate": 0.0003567095766753572, "loss": 3.2345, "step": 3751 }, { "epoch": 0.6, "learning_rate": 0.0003564594032227875, "loss": 3.1334, "step": 3752 }, { "epoch": 0.61, "learning_rate": 0.0003562092689266595, "loss": 3.1289, "step": 3753 }, { "epoch": 0.61, "learning_rate": 0.00035595917385520756, "loss": 3.1095, "step": 3754 }, { "epoch": 0.61, "learning_rate": 0.0003557091180766547, "loss": 3.0637, "step": 3755 }, { "epoch": 0.61, "learning_rate": 0.000355459101659214, "loss": 3.27, "step": 3756 }, { "epoch": 0.61, "learning_rate": 0.00035520912467108747, "loss": 3.078, "step": 3757 }, { "epoch": 0.61, "learning_rate": 0.00035495918718046624, "loss": 3.2856, "step": 3758 }, { "epoch": 0.61, "learning_rate": 0.0003547092892555306, "loss": 3.1458, "step": 3759 }, { "epoch": 0.61, "learning_rate": 0.00035445943096445044, "loss": 3.2951, "step": 3760 }, { "epoch": 0.61, "learning_rate": 0.00035420961237538437, "loss": 3.348, "step": 3761 }, { "epoch": 0.61, "learning_rate": 0.00035395983355648067, "loss": 3.2594, "step": 3762 }, { "epoch": 0.61, "learning_rate": 0.00035371009457587645, "loss": 3.293, "step": 3763 }, { "epoch": 0.61, "learning_rate": 0.00035346039550169785, "loss": 3.2117, "step": 3764 }, { "epoch": 0.61, "learning_rate": 0.0003532107364020605, "loss": 3.1945, "step": 3765 }, { "epoch": 0.61, "learning_rate": 0.00035296111734506877, "loss": 3.1235, "step": 3766 }, { "epoch": 0.61, "learning_rate": 0.00035271153839881605, "loss": 3.2138, "step": 3767 }, { "epoch": 0.61, "learning_rate": 0.00035246199963138536, "loss": 3.137, "step": 3768 }, { "epoch": 0.61, "learning_rate": 0.00035221250111084837, "loss": 3.1597, "step": 3769 }, { "epoch": 0.61, "learning_rate": 0.00035196304290526545, "loss": 3.1508, "step": 3770 }, { "epoch": 0.61, "learning_rate": 0.00035171362508268665, "loss": 3.1114, "step": 3771 }, { "epoch": 0.61, "learning_rate": 0.0003514642477111505, "loss": 3.0672, "step": 3772 }, { "epoch": 0.61, "learning_rate": 0.0003512149108586845, "loss": 3.3381, "step": 3773 }, { "epoch": 0.61, "learning_rate": 0.00035096561459330557, "loss": 3.1665, "step": 3774 }, { "epoch": 0.61, "learning_rate": 0.00035071635898301914, "loss": 3.0854, "step": 3775 }, { "epoch": 0.61, "learning_rate": 0.0003504671440958195, "loss": 3.2345, "step": 3776 }, { "epoch": 0.61, "learning_rate": 0.0003502179699996903, "loss": 3.1385, "step": 3777 }, { "epoch": 0.61, "learning_rate": 0.00034996883676260347, "loss": 3.0541, "step": 3778 }, { "epoch": 0.61, "learning_rate": 0.0003497197444525201, "loss": 3.2339, "step": 3779 }, { "epoch": 0.61, "learning_rate": 0.00034947069313738993, "loss": 3.3471, "step": 3780 }, { "epoch": 0.61, "learning_rate": 0.0003492216828851521, "loss": 3.0803, "step": 3781 }, { "epoch": 0.61, "learning_rate": 0.00034897271376373385, "loss": 3.2488, "step": 3782 }, { "epoch": 0.61, "learning_rate": 0.0003487237858410516, "loss": 3.2869, "step": 3783 }, { "epoch": 0.61, "learning_rate": 0.0003484748991850102, "loss": 3.1695, "step": 3784 }, { "epoch": 0.61, "learning_rate": 0.0003482260538635036, "loss": 3.2834, "step": 3785 }, { "epoch": 0.61, "learning_rate": 0.00034797724994441414, "loss": 2.9703, "step": 3786 }, { "epoch": 0.61, "learning_rate": 0.0003477284874956134, "loss": 3.1104, "step": 3787 }, { "epoch": 0.61, "learning_rate": 0.00034747976658496097, "loss": 3.1409, "step": 3788 }, { "epoch": 0.61, "learning_rate": 0.00034723108728030563, "loss": 3.2163, "step": 3789 }, { "epoch": 0.61, "learning_rate": 0.00034698244964948433, "loss": 3.2635, "step": 3790 }, { "epoch": 0.61, "learning_rate": 0.00034673385376032335, "loss": 3.0444, "step": 3791 }, { "epoch": 0.61, "learning_rate": 0.0003464852996806367, "loss": 3.0745, "step": 3792 }, { "epoch": 0.61, "learning_rate": 0.00034623678747822785, "loss": 3.1054, "step": 3793 }, { "epoch": 0.61, "learning_rate": 0.00034598831722088826, "loss": 3.29, "step": 3794 }, { "epoch": 0.61, "learning_rate": 0.0003457398889763982, "loss": 3.0743, "step": 3795 }, { "epoch": 0.61, "learning_rate": 0.00034549150281252633, "loss": 3.1037, "step": 3796 }, { "epoch": 0.61, "learning_rate": 0.00034524315879703006, "loss": 3.1515, "step": 3797 }, { "epoch": 0.61, "learning_rate": 0.00034499485699765486, "loss": 3.3224, "step": 3798 }, { "epoch": 0.61, "learning_rate": 0.0003447465974821352, "loss": 3.1269, "step": 3799 }, { "epoch": 0.61, "learning_rate": 0.00034449838031819403, "loss": 3.0513, "step": 3800 }, { "epoch": 0.61, "learning_rate": 0.0003442502055735421, "loss": 3.1829, "step": 3801 }, { "epoch": 0.61, "learning_rate": 0.00034400207331587924, "loss": 3.1715, "step": 3802 }, { "epoch": 0.61, "learning_rate": 0.0003437539836128935, "loss": 3.3458, "step": 3803 }, { "epoch": 0.61, "learning_rate": 0.00034350593653226095, "loss": 3.2647, "step": 3804 }, { "epoch": 0.61, "learning_rate": 0.0003432579321416464, "loss": 3.1644, "step": 3805 }, { "epoch": 0.61, "learning_rate": 0.0003430099705087034, "loss": 3.1215, "step": 3806 }, { "epoch": 0.61, "learning_rate": 0.0003427620517010732, "loss": 3.2238, "step": 3807 }, { "epoch": 0.61, "learning_rate": 0.0003425141757863854, "loss": 3.2699, "step": 3808 }, { "epoch": 0.61, "learning_rate": 0.0003422663428322582, "loss": 3.1745, "step": 3809 }, { "epoch": 0.61, "learning_rate": 0.000342018552906298, "loss": 3.1356, "step": 3810 }, { "epoch": 0.61, "learning_rate": 0.0003417708060760992, "loss": 3.1268, "step": 3811 }, { "epoch": 0.61, "learning_rate": 0.00034152310240924503, "loss": 3.1308, "step": 3812 }, { "epoch": 0.61, "learning_rate": 0.00034127544197330636, "loss": 3.0954, "step": 3813 }, { "epoch": 0.61, "learning_rate": 0.00034102782483584235, "loss": 3.2691, "step": 3814 }, { "epoch": 0.62, "learning_rate": 0.0003407802510644008, "loss": 3.141, "step": 3815 }, { "epoch": 0.62, "learning_rate": 0.0003405327207265171, "loss": 3.2359, "step": 3816 }, { "epoch": 0.62, "learning_rate": 0.0003402852338897151, "loss": 3.1622, "step": 3817 }, { "epoch": 0.62, "learning_rate": 0.0003400377906215069, "loss": 3.111, "step": 3818 }, { "epoch": 0.62, "learning_rate": 0.0003397903909893924, "loss": 3.1455, "step": 3819 }, { "epoch": 0.62, "learning_rate": 0.00033954303506085985, "loss": 3.1247, "step": 3820 }, { "epoch": 0.62, "learning_rate": 0.0003392957229033855, "loss": 3.222, "step": 3821 }, { "epoch": 0.62, "learning_rate": 0.0003390484545844334, "loss": 3.2638, "step": 3822 }, { "epoch": 0.62, "learning_rate": 0.0003388012301714559, "loss": 3.1266, "step": 3823 }, { "epoch": 0.62, "learning_rate": 0.00033855404973189376, "loss": 3.0327, "step": 3824 }, { "epoch": 0.62, "learning_rate": 0.00033830691333317496, "loss": 3.3177, "step": 3825 }, { "epoch": 0.62, "learning_rate": 0.00033805982104271606, "loss": 3.0392, "step": 3826 }, { "epoch": 0.62, "learning_rate": 0.0003378127729279212, "loss": 3.1377, "step": 3827 }, { "epoch": 0.62, "learning_rate": 0.0003375657690561826, "loss": 3.1744, "step": 3828 }, { "epoch": 0.62, "learning_rate": 0.0003373188094948807, "loss": 3.2019, "step": 3829 }, { "epoch": 0.62, "learning_rate": 0.00033707189431138323, "loss": 3.0339, "step": 3830 }, { "epoch": 0.62, "learning_rate": 0.0003368250235730466, "loss": 3.1042, "step": 3831 }, { "epoch": 0.62, "learning_rate": 0.0003365781973472144, "loss": 3.0907, "step": 3832 }, { "epoch": 0.62, "learning_rate": 0.0003363314157012185, "loss": 3.2083, "step": 3833 }, { "epoch": 0.62, "learning_rate": 0.0003360846787023785, "loss": 3.1847, "step": 3834 }, { "epoch": 0.62, "learning_rate": 0.00033583798641800166, "loss": 3.2091, "step": 3835 }, { "epoch": 0.62, "learning_rate": 0.000335591338915383, "loss": 3.3147, "step": 3836 }, { "epoch": 0.62, "learning_rate": 0.00033534473626180587, "loss": 3.2378, "step": 3837 }, { "epoch": 0.62, "learning_rate": 0.00033509817852454094, "loss": 3.0241, "step": 3838 }, { "epoch": 0.62, "learning_rate": 0.0003348516657708466, "loss": 3.2588, "step": 3839 }, { "epoch": 0.62, "learning_rate": 0.00033460519806796906, "loss": 3.0929, "step": 3840 }, { "epoch": 0.62, "learning_rate": 0.0003343587754831424, "loss": 3.2418, "step": 3841 }, { "epoch": 0.62, "learning_rate": 0.00033411239808358787, "loss": 3.302, "step": 3842 }, { "epoch": 0.62, "learning_rate": 0.00033386606593651503, "loss": 3.0182, "step": 3843 }, { "epoch": 0.62, "learning_rate": 0.00033361977910912103, "loss": 3.1076, "step": 3844 }, { "epoch": 0.62, "learning_rate": 0.0003333735376685901, "loss": 3.1627, "step": 3845 }, { "epoch": 0.62, "learning_rate": 0.0003331273416820947, "loss": 3.0565, "step": 3846 }, { "epoch": 0.62, "learning_rate": 0.0003328811912167945, "loss": 3.2785, "step": 3847 }, { "epoch": 0.62, "learning_rate": 0.00033263508633983677, "loss": 3.1276, "step": 3848 }, { "epoch": 0.62, "learning_rate": 0.0003323890271183566, "loss": 3.1734, "step": 3849 }, { "epoch": 0.62, "learning_rate": 0.0003321430136194766, "loss": 3.3011, "step": 3850 }, { "epoch": 0.62, "learning_rate": 0.00033189704591030675, "loss": 3.2495, "step": 3851 }, { "epoch": 0.62, "learning_rate": 0.0003316511240579445, "loss": 3.139, "step": 3852 }, { "epoch": 0.62, "learning_rate": 0.0003314052481294748, "loss": 3.3046, "step": 3853 }, { "epoch": 0.62, "learning_rate": 0.00033115941819197045, "loss": 3.2697, "step": 3854 }, { "epoch": 0.62, "learning_rate": 0.00033091363431249094, "loss": 3.0877, "step": 3855 }, { "epoch": 0.62, "learning_rate": 0.00033066789655808416, "loss": 3.1205, "step": 3856 }, { "epoch": 0.62, "learning_rate": 0.00033042220499578454, "loss": 3.1523, "step": 3857 }, { "epoch": 0.62, "learning_rate": 0.0003301765596926145, "loss": 3.0972, "step": 3858 }, { "epoch": 0.62, "learning_rate": 0.0003299309607155835, "loss": 2.9794, "step": 3859 }, { "epoch": 0.62, "learning_rate": 0.0003296854081316887, "loss": 3.2037, "step": 3860 }, { "epoch": 0.62, "learning_rate": 0.00032943990200791395, "loss": 3.1062, "step": 3861 }, { "epoch": 0.62, "learning_rate": 0.00032919444241123134, "loss": 3.3388, "step": 3862 }, { "epoch": 0.62, "learning_rate": 0.0003289490294085996, "loss": 3.3456, "step": 3863 }, { "epoch": 0.62, "learning_rate": 0.00032870366306696495, "loss": 3.1253, "step": 3864 }, { "epoch": 0.62, "learning_rate": 0.00032845834345326085, "loss": 3.0594, "step": 3865 }, { "epoch": 0.62, "learning_rate": 0.0003282130706344082, "loss": 3.205, "step": 3866 }, { "epoch": 0.62, "learning_rate": 0.00032796784467731466, "loss": 3.2603, "step": 3867 }, { "epoch": 0.62, "learning_rate": 0.00032772266564887566, "loss": 3.3066, "step": 3868 }, { "epoch": 0.62, "learning_rate": 0.00032747753361597363, "loss": 3.086, "step": 3869 }, { "epoch": 0.62, "learning_rate": 0.0003272324486454782, "loss": 3.249, "step": 3870 }, { "epoch": 0.62, "learning_rate": 0.00032698741080424576, "loss": 3.0652, "step": 3871 }, { "epoch": 0.62, "learning_rate": 0.0003267424201591205, "loss": 3.218, "step": 3872 }, { "epoch": 0.62, "learning_rate": 0.00032649747677693307, "loss": 3.12, "step": 3873 }, { "epoch": 0.62, "learning_rate": 0.00032625258072450203, "loss": 3.1962, "step": 3874 }, { "epoch": 0.62, "learning_rate": 0.00032600773206863245, "loss": 3.1178, "step": 3875 }, { "epoch": 0.62, "learning_rate": 0.0003257629308761164, "loss": 3.239, "step": 3876 }, { "epoch": 0.63, "learning_rate": 0.00032551817721373333, "loss": 3.1471, "step": 3877 }, { "epoch": 0.63, "learning_rate": 0.0003252734711482497, "loss": 3.1145, "step": 3878 }, { "epoch": 0.63, "learning_rate": 0.0003250288127464186, "loss": 3.2499, "step": 3879 }, { "epoch": 0.63, "learning_rate": 0.0003247842020749805, "loss": 3.063, "step": 3880 }, { "epoch": 0.63, "learning_rate": 0.000324539639200663, "loss": 3.1301, "step": 3881 }, { "epoch": 0.63, "learning_rate": 0.00032429512419018027, "loss": 3.2871, "step": 3882 }, { "epoch": 0.63, "learning_rate": 0.0003240506571102334, "loss": 3.0852, "step": 3883 }, { "epoch": 0.63, "learning_rate": 0.00032380623802751073, "loss": 3.2337, "step": 3884 }, { "epoch": 0.63, "learning_rate": 0.00032356186700868727, "loss": 3.1967, "step": 3885 }, { "epoch": 0.63, "learning_rate": 0.0003233175441204249, "loss": 3.2311, "step": 3886 }, { "epoch": 0.63, "learning_rate": 0.0003230732694293728, "loss": 3.2081, "step": 3887 }, { "epoch": 0.63, "learning_rate": 0.0003228290430021664, "loss": 3.1216, "step": 3888 }, { "epoch": 0.63, "learning_rate": 0.00032258486490542836, "loss": 3.1377, "step": 3889 }, { "epoch": 0.63, "learning_rate": 0.000322340735205768, "loss": 3.1451, "step": 3890 }, { "epoch": 0.63, "learning_rate": 0.0003220966539697813, "loss": 3.0131, "step": 3891 }, { "epoch": 0.63, "learning_rate": 0.00032185262126405113, "loss": 3.2489, "step": 3892 }, { "epoch": 0.63, "learning_rate": 0.00032160863715514763, "loss": 3.2848, "step": 3893 }, { "epoch": 0.63, "learning_rate": 0.00032136470170962686, "loss": 3.3124, "step": 3894 }, { "epoch": 0.63, "learning_rate": 0.0003211208149940321, "loss": 3.3503, "step": 3895 }, { "epoch": 0.63, "learning_rate": 0.00032087697707489327, "loss": 3.198, "step": 3896 }, { "epoch": 0.63, "learning_rate": 0.0003206331880187267, "loss": 3.1232, "step": 3897 }, { "epoch": 0.63, "learning_rate": 0.0003203894478920356, "loss": 3.1928, "step": 3898 }, { "epoch": 0.63, "learning_rate": 0.0003201457567613102, "loss": 3.2272, "step": 3899 }, { "epoch": 0.63, "learning_rate": 0.0003199021146930268, "loss": 3.2764, "step": 3900 }, { "epoch": 0.63, "learning_rate": 0.0003196585217536485, "loss": 3.1136, "step": 3901 }, { "epoch": 0.63, "learning_rate": 0.00031941497800962496, "loss": 3.1489, "step": 3902 }, { "epoch": 0.63, "learning_rate": 0.0003191714835273927, "loss": 3.0662, "step": 3903 }, { "epoch": 0.63, "learning_rate": 0.00031892803837337436, "loss": 3.1778, "step": 3904 }, { "epoch": 0.63, "learning_rate": 0.00031868464261397924, "loss": 3.0942, "step": 3905 }, { "epoch": 0.63, "learning_rate": 0.0003184412963156036, "loss": 3.1304, "step": 3906 }, { "epoch": 0.63, "learning_rate": 0.0003181979995446298, "loss": 3.1426, "step": 3907 }, { "epoch": 0.63, "learning_rate": 0.00031795475236742667, "loss": 3.26, "step": 3908 }, { "epoch": 0.63, "learning_rate": 0.00031771155485034973, "loss": 3.2953, "step": 3909 }, { "epoch": 0.63, "learning_rate": 0.0003174684070597408, "loss": 3.2263, "step": 3910 }, { "epoch": 0.63, "learning_rate": 0.0003172253090619279, "loss": 3.1705, "step": 3911 }, { "epoch": 0.63, "learning_rate": 0.000316982260923226, "loss": 3.072, "step": 3912 }, { "epoch": 0.63, "learning_rate": 0.00031673926270993634, "loss": 3.2148, "step": 3913 }, { "epoch": 0.63, "learning_rate": 0.00031649631448834616, "loss": 3.1464, "step": 3914 }, { "epoch": 0.63, "learning_rate": 0.0003162534163247295, "loss": 3.0728, "step": 3915 }, { "epoch": 0.63, "learning_rate": 0.00031601056828534633, "loss": 3.1059, "step": 3916 }, { "epoch": 0.63, "learning_rate": 0.00031576777043644316, "loss": 3.319, "step": 3917 }, { "epoch": 0.63, "learning_rate": 0.00031552502284425306, "loss": 3.195, "step": 3918 }, { "epoch": 0.63, "learning_rate": 0.00031528232557499514, "loss": 3.0563, "step": 3919 }, { "epoch": 0.63, "learning_rate": 0.00031503967869487453, "loss": 3.2843, "step": 3920 }, { "epoch": 0.63, "learning_rate": 0.0003147970822700832, "loss": 3.1263, "step": 3921 }, { "epoch": 0.63, "learning_rate": 0.00031455453636679867, "loss": 3.2415, "step": 3922 }, { "epoch": 0.63, "learning_rate": 0.00031431204105118515, "loss": 3.0898, "step": 3923 }, { "epoch": 0.63, "learning_rate": 0.0003140695963893933, "loss": 3.1328, "step": 3924 }, { "epoch": 0.63, "learning_rate": 0.0003138272024475593, "loss": 3.2403, "step": 3925 }, { "epoch": 0.63, "learning_rate": 0.0003135848592918057, "loss": 3.2497, "step": 3926 }, { "epoch": 0.63, "learning_rate": 0.0003133425669882416, "loss": 3.2576, "step": 3927 }, { "epoch": 0.63, "learning_rate": 0.00031310032560296155, "loss": 3.1809, "step": 3928 }, { "epoch": 0.63, "learning_rate": 0.0003128581352020469, "loss": 3.1822, "step": 3929 }, { "epoch": 0.63, "learning_rate": 0.00031261599585156443, "loss": 3.1691, "step": 3930 }, { "epoch": 0.63, "learning_rate": 0.0003123739076175678, "loss": 3.2921, "step": 3931 }, { "epoch": 0.63, "learning_rate": 0.0003121318705660959, "loss": 3.2874, "step": 3932 }, { "epoch": 0.63, "learning_rate": 0.0003118898847631742, "loss": 3.2254, "step": 3933 }, { "epoch": 0.63, "learning_rate": 0.00031164795027481383, "loss": 3.1937, "step": 3934 }, { "epoch": 0.63, "learning_rate": 0.0003114060671670124, "loss": 3.0856, "step": 3935 }, { "epoch": 0.63, "learning_rate": 0.0003111642355057528, "loss": 3.2967, "step": 3936 }, { "epoch": 0.63, "learning_rate": 0.00031092245535700464, "loss": 3.1986, "step": 3937 }, { "epoch": 0.63, "learning_rate": 0.0003106807267867231, "loss": 3.2297, "step": 3938 }, { "epoch": 0.64, "learning_rate": 0.00031043904986084926, "loss": 3.1417, "step": 3939 }, { "epoch": 0.64, "learning_rate": 0.00031019742464531, "loss": 3.0704, "step": 3940 }, { "epoch": 0.64, "learning_rate": 0.00030995585120601854, "loss": 3.1322, "step": 3941 }, { "epoch": 0.64, "learning_rate": 0.00030971432960887334, "loss": 3.1744, "step": 3942 }, { "epoch": 0.64, "learning_rate": 0.0003094728599197595, "loss": 3.1892, "step": 3943 }, { "epoch": 0.64, "learning_rate": 0.0003092314422045474, "loss": 2.9808, "step": 3944 }, { "epoch": 0.64, "learning_rate": 0.00030899007652909326, "loss": 3.1168, "step": 3945 }, { "epoch": 0.64, "learning_rate": 0.0003087487629592393, "loss": 3.0731, "step": 3946 }, { "epoch": 0.64, "learning_rate": 0.0003085075015608135, "loss": 3.1754, "step": 3947 }, { "epoch": 0.64, "learning_rate": 0.00030826629239962943, "loss": 3.2921, "step": 3948 }, { "epoch": 0.64, "learning_rate": 0.00030802513554148664, "loss": 3.2581, "step": 3949 }, { "epoch": 0.64, "learning_rate": 0.00030778403105217046, "loss": 3.1403, "step": 3950 }, { "epoch": 0.64, "learning_rate": 0.0003075429789974515, "loss": 3.1663, "step": 3951 }, { "epoch": 0.64, "learning_rate": 0.0003073019794430866, "loss": 3.1007, "step": 3952 }, { "epoch": 0.64, "learning_rate": 0.0003070610324548179, "loss": 2.9963, "step": 3953 }, { "epoch": 0.64, "learning_rate": 0.00030682013809837325, "loss": 3.2805, "step": 3954 }, { "epoch": 0.64, "learning_rate": 0.0003065792964394662, "loss": 3.2961, "step": 3955 }, { "epoch": 0.64, "learning_rate": 0.00030633850754379635, "loss": 3.2686, "step": 3956 }, { "epoch": 0.64, "learning_rate": 0.00030609777147704806, "loss": 3.2565, "step": 3957 }, { "epoch": 0.64, "learning_rate": 0.000305857088304892, "loss": 3.2475, "step": 3958 }, { "epoch": 0.64, "learning_rate": 0.0003056164580929841, "loss": 3.17, "step": 3959 }, { "epoch": 0.64, "learning_rate": 0.0003053758809069657, "loss": 3.2915, "step": 3960 }, { "epoch": 0.64, "learning_rate": 0.0003051353568124638, "loss": 3.0746, "step": 3961 }, { "epoch": 0.64, "learning_rate": 0.0003048948858750914, "loss": 3.0767, "step": 3962 }, { "epoch": 0.64, "learning_rate": 0.0003046544681604462, "loss": 3.2315, "step": 3963 }, { "epoch": 0.64, "learning_rate": 0.00030441410373411193, "loss": 3.3243, "step": 3964 }, { "epoch": 0.64, "learning_rate": 0.0003041737926616576, "loss": 3.1168, "step": 3965 }, { "epoch": 0.64, "learning_rate": 0.00030393353500863754, "loss": 3.1595, "step": 3966 }, { "epoch": 0.64, "learning_rate": 0.0003036933308405915, "loss": 3.2305, "step": 3967 }, { "epoch": 0.64, "learning_rate": 0.0003034531802230452, "loss": 3.0983, "step": 3968 }, { "epoch": 0.64, "learning_rate": 0.0003032130832215091, "loss": 3.1213, "step": 3969 }, { "epoch": 0.64, "learning_rate": 0.0003029730399014794, "loss": 3.1739, "step": 3970 }, { "epoch": 0.64, "learning_rate": 0.00030273305032843724, "loss": 3.0616, "step": 3971 }, { "epoch": 0.64, "learning_rate": 0.00030249311456784965, "loss": 3.1848, "step": 3972 }, { "epoch": 0.64, "learning_rate": 0.0003022532326851685, "loss": 3.2177, "step": 3973 }, { "epoch": 0.64, "learning_rate": 0.00030201340474583137, "loss": 3.2138, "step": 3974 }, { "epoch": 0.64, "learning_rate": 0.0003017736308152608, "loss": 3.2187, "step": 3975 }, { "epoch": 0.64, "learning_rate": 0.0003015339109588648, "loss": 3.1413, "step": 3976 }, { "epoch": 0.64, "learning_rate": 0.0003012942452420364, "loss": 3.0542, "step": 3977 }, { "epoch": 0.64, "learning_rate": 0.00030105463373015427, "loss": 3.1738, "step": 3978 }, { "epoch": 0.64, "learning_rate": 0.000300815076488582, "loss": 3.3473, "step": 3979 }, { "epoch": 0.64, "learning_rate": 0.00030057557358266794, "loss": 3.1101, "step": 3980 }, { "epoch": 0.64, "learning_rate": 0.00030033612507774667, "loss": 3.1499, "step": 3981 }, { "epoch": 0.64, "learning_rate": 0.0003000967310391373, "loss": 3.0242, "step": 3982 }, { "epoch": 0.64, "learning_rate": 0.00029985739153214373, "loss": 3.1692, "step": 3983 }, { "epoch": 0.64, "learning_rate": 0.0002996181066220558, "loss": 3.1013, "step": 3984 }, { "epoch": 0.64, "learning_rate": 0.0002993788763741479, "loss": 3.1263, "step": 3985 }, { "epoch": 0.64, "learning_rate": 0.0002991397008536794, "loss": 3.1486, "step": 3986 }, { "epoch": 0.64, "learning_rate": 0.0002989005801258954, "loss": 3.3533, "step": 3987 }, { "epoch": 0.64, "learning_rate": 0.0002986615142560255, "loss": 3.0696, "step": 3988 }, { "epoch": 0.64, "learning_rate": 0.0002984225033092844, "loss": 3.2083, "step": 3989 }, { "epoch": 0.64, "learning_rate": 0.0002981835473508721, "loss": 3.2213, "step": 3990 }, { "epoch": 0.64, "learning_rate": 0.00029794464644597305, "loss": 3.1267, "step": 3991 }, { "epoch": 0.64, "learning_rate": 0.0002977058006597572, "loss": 3.2145, "step": 3992 }, { "epoch": 0.64, "learning_rate": 0.0002974670100573795, "loss": 3.1627, "step": 3993 }, { "epoch": 0.64, "learning_rate": 0.00029722827470397953, "loss": 3.0899, "step": 3994 }, { "epoch": 0.64, "learning_rate": 0.0002969895946646818, "loss": 3.1466, "step": 3995 }, { "epoch": 0.64, "learning_rate": 0.00029675097000459594, "loss": 3.2256, "step": 3996 }, { "epoch": 0.64, "learning_rate": 0.0002965124007888163, "loss": 3.1676, "step": 3997 }, { "epoch": 0.64, "learning_rate": 0.00029627388708242195, "loss": 3.2112, "step": 3998 }, { "epoch": 0.64, "learning_rate": 0.0002960354289504776, "loss": 3.1592, "step": 3999 }, { "epoch": 0.64, "learning_rate": 0.0002957970264580321, "loss": 3.2051, "step": 4000 }, { "epoch": 0.65, "learning_rate": 0.00029555867967011887, "loss": 2.9887, "step": 4001 }, { "epoch": 0.65, "learning_rate": 0.00029532038865175695, "loss": 3.1266, "step": 4002 }, { "epoch": 0.65, "learning_rate": 0.0002950821534679495, "loss": 3.1663, "step": 4003 }, { "epoch": 0.65, "learning_rate": 0.00029484397418368493, "loss": 3.1082, "step": 4004 }, { "epoch": 0.65, "learning_rate": 0.00029460585086393576, "loss": 3.0419, "step": 4005 }, { "epoch": 0.65, "learning_rate": 0.00029436778357366014, "loss": 3.1903, "step": 4006 }, { "epoch": 0.65, "learning_rate": 0.00029412977237780024, "loss": 3.1059, "step": 4007 }, { "epoch": 0.65, "learning_rate": 0.0002938918173412832, "loss": 3.2009, "step": 4008 }, { "epoch": 0.65, "learning_rate": 0.0002936539185290206, "loss": 3.132, "step": 4009 }, { "epoch": 0.65, "learning_rate": 0.0002934160760059091, "loss": 3.2861, "step": 4010 }, { "epoch": 0.65, "learning_rate": 0.0002931782898368294, "loss": 3.2226, "step": 4011 }, { "epoch": 0.65, "learning_rate": 0.0002929405600866476, "loss": 3.2351, "step": 4012 }, { "epoch": 0.65, "learning_rate": 0.0002927028868202139, "loss": 3.0392, "step": 4013 }, { "epoch": 0.65, "learning_rate": 0.0002924652701023631, "loss": 3.0767, "step": 4014 }, { "epoch": 0.65, "learning_rate": 0.00029222770999791473, "loss": 3.0831, "step": 4015 }, { "epoch": 0.65, "learning_rate": 0.0002919902065716728, "loss": 3.2114, "step": 4016 }, { "epoch": 0.65, "learning_rate": 0.0002917527598884256, "loss": 3.3294, "step": 4017 }, { "epoch": 0.65, "learning_rate": 0.0002915153700129468, "loss": 3.2263, "step": 4018 }, { "epoch": 0.65, "learning_rate": 0.00029127803700999355, "loss": 3.2039, "step": 4019 }, { "epoch": 0.65, "learning_rate": 0.000291040760944308, "loss": 3.2107, "step": 4020 }, { "epoch": 0.65, "learning_rate": 0.000290803541880617, "loss": 3.2709, "step": 4021 }, { "epoch": 0.65, "learning_rate": 0.000290566379883631, "loss": 3.13, "step": 4022 }, { "epoch": 0.65, "learning_rate": 0.00029032927501804553, "loss": 3.2467, "step": 4023 }, { "epoch": 0.65, "learning_rate": 0.0002900922273485409, "loss": 3.2125, "step": 4024 }, { "epoch": 0.65, "learning_rate": 0.000289855236939781, "loss": 3.0606, "step": 4025 }, { "epoch": 0.65, "learning_rate": 0.0002896183038564144, "loss": 3.146, "step": 4026 }, { "epoch": 0.65, "learning_rate": 0.0002893814281630744, "loss": 3.335, "step": 4027 }, { "epoch": 0.65, "learning_rate": 0.00028914460992437784, "loss": 3.2915, "step": 4028 }, { "epoch": 0.65, "learning_rate": 0.00028890784920492673, "loss": 3.148, "step": 4029 }, { "epoch": 0.65, "learning_rate": 0.0002886711460693069, "loss": 3.1647, "step": 4030 }, { "epoch": 0.65, "learning_rate": 0.00028843450058208865, "loss": 3.1223, "step": 4031 }, { "epoch": 0.65, "learning_rate": 0.0002881979128078264, "loss": 3.1942, "step": 4032 }, { "epoch": 0.65, "learning_rate": 0.0002879613828110591, "loss": 3.2868, "step": 4033 }, { "epoch": 0.65, "learning_rate": 0.0002877249106563099, "loss": 3.1564, "step": 4034 }, { "epoch": 0.65, "learning_rate": 0.0002874884964080856, "loss": 3.3212, "step": 4035 }, { "epoch": 0.65, "learning_rate": 0.00028725214013087787, "loss": 3.1215, "step": 4036 }, { "epoch": 0.65, "learning_rate": 0.00028701584188916234, "loss": 3.1798, "step": 4037 }, { "epoch": 0.65, "learning_rate": 0.0002867796017473989, "loss": 3.2662, "step": 4038 }, { "epoch": 0.65, "learning_rate": 0.0002865434197700314, "loss": 3.0221, "step": 4039 }, { "epoch": 0.65, "learning_rate": 0.00028630729602148816, "loss": 3.2844, "step": 4040 }, { "epoch": 0.65, "learning_rate": 0.00028607123056618094, "loss": 3.268, "step": 4041 }, { "epoch": 0.65, "learning_rate": 0.0002858352234685063, "loss": 3.0573, "step": 4042 }, { "epoch": 0.65, "learning_rate": 0.0002855992747928446, "loss": 3.2745, "step": 4043 }, { "epoch": 0.65, "learning_rate": 0.0002853633846035603, "loss": 3.1147, "step": 4044 }, { "epoch": 0.65, "learning_rate": 0.0002851275529650018, "loss": 3.1835, "step": 4045 }, { "epoch": 0.65, "learning_rate": 0.00028489177994150196, "loss": 3.1462, "step": 4046 }, { "epoch": 0.65, "learning_rate": 0.00028465606559737675, "loss": 3.3475, "step": 4047 }, { "epoch": 0.65, "learning_rate": 0.00028442040999692705, "loss": 3.252, "step": 4048 }, { "epoch": 0.65, "learning_rate": 0.0002841848132044372, "loss": 3.1954, "step": 4049 }, { "epoch": 0.65, "learning_rate": 0.0002839492752841758, "loss": 3.2729, "step": 4050 }, { "epoch": 0.65, "learning_rate": 0.0002837137963003952, "loss": 3.0788, "step": 4051 }, { "epoch": 0.65, "learning_rate": 0.0002834783763173318, "loss": 3.2465, "step": 4052 }, { "epoch": 0.65, "learning_rate": 0.0002832430153992055, "loss": 3.1525, "step": 4053 }, { "epoch": 0.65, "learning_rate": 0.0002830077136102207, "loss": 3.2683, "step": 4054 }, { "epoch": 0.65, "learning_rate": 0.0002827724710145653, "loss": 3.122, "step": 4055 }, { "epoch": 0.65, "learning_rate": 0.00028253728767641104, "loss": 3.1916, "step": 4056 }, { "epoch": 0.65, "learning_rate": 0.0002823021636599137, "loss": 3.025, "step": 4057 }, { "epoch": 0.65, "learning_rate": 0.00028206709902921294, "loss": 3.2628, "step": 4058 }, { "epoch": 0.65, "learning_rate": 0.00028183209384843167, "loss": 3.2141, "step": 4059 }, { "epoch": 0.65, "learning_rate": 0.00028159714818167713, "loss": 3.1428, "step": 4060 }, { "epoch": 0.65, "learning_rate": 0.00028136226209304015, "loss": 3.258, "step": 4061 }, { "epoch": 0.65, "learning_rate": 0.00028112743564659534, "loss": 2.9183, "step": 4062 }, { "epoch": 0.66, "learning_rate": 0.0002808926689064009, "loss": 3.0787, "step": 4063 }, { "epoch": 0.66, "learning_rate": 0.00028065796193649917, "loss": 3.0015, "step": 4064 }, { "epoch": 0.66, "learning_rate": 0.0002804233148009155, "loss": 3.1877, "step": 4065 }, { "epoch": 0.66, "learning_rate": 0.0002801887275636594, "loss": 3.2151, "step": 4066 }, { "epoch": 0.66, "learning_rate": 0.0002799542002887239, "loss": 3.2113, "step": 4067 }, { "epoch": 0.66, "learning_rate": 0.0002797197330400858, "loss": 3.0123, "step": 4068 }, { "epoch": 0.66, "learning_rate": 0.0002794853258817053, "loss": 3.1978, "step": 4069 }, { "epoch": 0.66, "learning_rate": 0.00027925097887752666, "loss": 3.2574, "step": 4070 }, { "epoch": 0.66, "learning_rate": 0.0002790166920914769, "loss": 3.0455, "step": 4071 }, { "epoch": 0.66, "learning_rate": 0.0002787824655874674, "loss": 3.13, "step": 4072 }, { "epoch": 0.66, "learning_rate": 0.00027854829942939273, "loss": 3.174, "step": 4073 }, { "epoch": 0.66, "learning_rate": 0.0002783141936811312, "loss": 3.2969, "step": 4074 }, { "epoch": 0.66, "learning_rate": 0.00027808014840654437, "loss": 3.2942, "step": 4075 }, { "epoch": 0.66, "learning_rate": 0.0002778461636694778, "loss": 3.0967, "step": 4076 }, { "epoch": 0.66, "learning_rate": 0.0002776122395337597, "loss": 3.1659, "step": 4077 }, { "epoch": 0.66, "learning_rate": 0.00027737837606320244, "loss": 3.2761, "step": 4078 }, { "epoch": 0.66, "learning_rate": 0.0002771445733216017, "loss": 3.2108, "step": 4079 }, { "epoch": 0.66, "learning_rate": 0.00027691083137273645, "loss": 3.1669, "step": 4080 }, { "epoch": 0.66, "learning_rate": 0.0002766771502803692, "loss": 3.2361, "step": 4081 }, { "epoch": 0.66, "learning_rate": 0.000276443530108246, "loss": 3.1721, "step": 4082 }, { "epoch": 0.66, "learning_rate": 0.0002762099709200958, "loss": 3.2816, "step": 4083 }, { "epoch": 0.66, "learning_rate": 0.0002759764727796313, "loss": 3.1641, "step": 4084 }, { "epoch": 0.66, "learning_rate": 0.00027574303575054847, "loss": 3.1325, "step": 4085 }, { "epoch": 0.66, "learning_rate": 0.00027550965989652664, "loss": 3.0987, "step": 4086 }, { "epoch": 0.66, "learning_rate": 0.0002752763452812285, "loss": 3.2683, "step": 4087 }, { "epoch": 0.66, "learning_rate": 0.00027504309196829966, "loss": 3.2042, "step": 4088 }, { "epoch": 0.66, "learning_rate": 0.00027480990002136987, "loss": 3.1936, "step": 4089 }, { "epoch": 0.66, "learning_rate": 0.0002745767695040509, "loss": 3.2831, "step": 4090 }, { "epoch": 0.66, "learning_rate": 0.0002743437004799387, "loss": 3.1679, "step": 4091 }, { "epoch": 0.66, "learning_rate": 0.00027411069301261213, "loss": 3.1541, "step": 4092 }, { "epoch": 0.66, "learning_rate": 0.00027387774716563346, "loss": 3.1048, "step": 4093 }, { "epoch": 0.66, "learning_rate": 0.00027364486300254787, "loss": 3.2002, "step": 4094 }, { "epoch": 0.66, "learning_rate": 0.000273412040586884, "loss": 3.1037, "step": 4095 }, { "epoch": 0.66, "learning_rate": 0.0002731792799821532, "loss": 3.1418, "step": 4096 }, { "epoch": 0.66, "learning_rate": 0.0002729465812518503, "loss": 3.3329, "step": 4097 }, { "epoch": 0.66, "learning_rate": 0.00027271394445945346, "loss": 3.0457, "step": 4098 }, { "epoch": 0.66, "learning_rate": 0.0002724813696684231, "loss": 3.2609, "step": 4099 }, { "epoch": 0.66, "learning_rate": 0.0002722488569422039, "loss": 3.0507, "step": 4100 }, { "epoch": 0.66, "learning_rate": 0.0002720164063442229, "loss": 3.0593, "step": 4101 }, { "epoch": 0.66, "learning_rate": 0.0002717840179378901, "loss": 3.1612, "step": 4102 }, { "epoch": 0.66, "learning_rate": 0.00027155169178659874, "loss": 3.3641, "step": 4103 }, { "epoch": 0.66, "learning_rate": 0.00027131942795372536, "loss": 3.2591, "step": 4104 }, { "epoch": 0.66, "learning_rate": 0.0002710872265026286, "loss": 3.1082, "step": 4105 }, { "epoch": 0.66, "learning_rate": 0.00027085508749665144, "loss": 3.2559, "step": 4106 }, { "epoch": 0.66, "learning_rate": 0.0002706230109991188, "loss": 3.0895, "step": 4107 }, { "epoch": 0.66, "learning_rate": 0.0002703909970733387, "loss": 3.3199, "step": 4108 }, { "epoch": 0.66, "learning_rate": 0.0002701590457826023, "loss": 3.0681, "step": 4109 }, { "epoch": 0.66, "learning_rate": 0.0002699271571901837, "loss": 3.1658, "step": 4110 }, { "epoch": 0.66, "learning_rate": 0.00026969533135933946, "loss": 3.0492, "step": 4111 }, { "epoch": 0.66, "learning_rate": 0.0002694635683533096, "loss": 3.1969, "step": 4112 }, { "epoch": 0.66, "learning_rate": 0.00026923186823531707, "loss": 3.1702, "step": 4113 }, { "epoch": 0.66, "learning_rate": 0.0002690002310685669, "loss": 3.1702, "step": 4114 }, { "epoch": 0.66, "learning_rate": 0.0002687686569162474, "loss": 2.9403, "step": 4115 }, { "epoch": 0.66, "learning_rate": 0.0002685371458415298, "loss": 3.1884, "step": 4116 }, { "epoch": 0.66, "learning_rate": 0.00026830569790756804, "loss": 3.1893, "step": 4117 }, { "epoch": 0.66, "learning_rate": 0.0002680743131774987, "loss": 3.1849, "step": 4118 }, { "epoch": 0.66, "learning_rate": 0.0002678429917144417, "loss": 3.1613, "step": 4119 }, { "epoch": 0.66, "learning_rate": 0.0002676117335814985, "loss": 3.1853, "step": 4120 }, { "epoch": 0.66, "learning_rate": 0.00026738053884175437, "loss": 3.2112, "step": 4121 }, { "epoch": 0.66, "learning_rate": 0.00026714940755827695, "loss": 3.1344, "step": 4122 }, { "epoch": 0.66, "learning_rate": 0.0002669183397941166, "loss": 3.2412, "step": 4123 }, { "epoch": 0.66, "learning_rate": 0.0002666873356123059, "loss": 3.2354, "step": 4124 }, { "epoch": 0.67, "learning_rate": 0.0002664563950758611, "loss": 3.1895, "step": 4125 }, { "epoch": 0.67, "learning_rate": 0.00026622551824778, "loss": 3.2522, "step": 4126 }, { "epoch": 0.67, "learning_rate": 0.0002659947051910436, "loss": 3.2679, "step": 4127 }, { "epoch": 0.67, "learning_rate": 0.00026576395596861554, "loss": 3.1023, "step": 4128 }, { "epoch": 0.67, "learning_rate": 0.0002655332706434419, "loss": 3.1887, "step": 4129 }, { "epoch": 0.67, "learning_rate": 0.0002653026492784509, "loss": 2.9679, "step": 4130 }, { "epoch": 0.67, "learning_rate": 0.00026507209193655444, "loss": 3.2127, "step": 4131 }, { "epoch": 0.67, "learning_rate": 0.00026484159868064584, "loss": 3.0054, "step": 4132 }, { "epoch": 0.67, "learning_rate": 0.0002646111695736013, "loss": 3.3069, "step": 4133 }, { "epoch": 0.67, "learning_rate": 0.0002643808046782797, "loss": 3.2137, "step": 4134 }, { "epoch": 0.67, "learning_rate": 0.0002641505040575226, "loss": 3.0031, "step": 4135 }, { "epoch": 0.67, "learning_rate": 0.0002639202677741529, "loss": 3.1172, "step": 4136 }, { "epoch": 0.67, "learning_rate": 0.0002636900958909776, "loss": 3.1372, "step": 4137 }, { "epoch": 0.67, "learning_rate": 0.000263459988470785, "loss": 3.0422, "step": 4138 }, { "epoch": 0.67, "learning_rate": 0.0002632299455763459, "loss": 3.3007, "step": 4139 }, { "epoch": 0.67, "learning_rate": 0.0002629999672704139, "loss": 2.9857, "step": 4140 }, { "epoch": 0.67, "learning_rate": 0.00026277005361572493, "loss": 3.0287, "step": 4141 }, { "epoch": 0.67, "learning_rate": 0.00026254020467499664, "loss": 3.0796, "step": 4142 }, { "epoch": 0.67, "learning_rate": 0.0002623104205109299, "loss": 3.1754, "step": 4143 }, { "epoch": 0.67, "learning_rate": 0.00026208070118620777, "loss": 2.9953, "step": 4144 }, { "epoch": 0.67, "learning_rate": 0.00026185104676349494, "loss": 3.1351, "step": 4145 }, { "epoch": 0.67, "learning_rate": 0.000261621457305439, "loss": 3.2094, "step": 4146 }, { "epoch": 0.67, "learning_rate": 0.0002613919328746698, "loss": 3.2105, "step": 4147 }, { "epoch": 0.67, "learning_rate": 0.000261162473533799, "loss": 3.2661, "step": 4148 }, { "epoch": 0.67, "learning_rate": 0.0002609330793454208, "loss": 3.1949, "step": 4149 }, { "epoch": 0.67, "learning_rate": 0.00026070375037211225, "loss": 3.2443, "step": 4150 }, { "epoch": 0.67, "learning_rate": 0.0002604744866764314, "loss": 3.2915, "step": 4151 }, { "epoch": 0.67, "learning_rate": 0.00026024528832091926, "loss": 3.0836, "step": 4152 }, { "epoch": 0.67, "learning_rate": 0.00026001615536809915, "loss": 3.1686, "step": 4153 }, { "epoch": 0.67, "learning_rate": 0.0002597870878804758, "loss": 3.1607, "step": 4154 }, { "epoch": 0.67, "learning_rate": 0.00025955808592053643, "loss": 3.179, "step": 4155 }, { "epoch": 0.67, "learning_rate": 0.00025932914955075127, "loss": 3.1991, "step": 4156 }, { "epoch": 0.67, "learning_rate": 0.0002591002788335711, "loss": 3.27, "step": 4157 }, { "epoch": 0.67, "learning_rate": 0.00025887147383142997, "loss": 3.1548, "step": 4158 }, { "epoch": 0.67, "learning_rate": 0.0002586427346067436, "loss": 3.1206, "step": 4159 }, { "epoch": 0.67, "learning_rate": 0.0002584140612219095, "loss": 3.2813, "step": 4160 }, { "epoch": 0.67, "learning_rate": 0.00025818545373930743, "loss": 3.2768, "step": 4161 }, { "epoch": 0.67, "learning_rate": 0.0002579569122212998, "loss": 3.1401, "step": 4162 }, { "epoch": 0.67, "learning_rate": 0.0002577284367302299, "loss": 3.4463, "step": 4163 }, { "epoch": 0.67, "learning_rate": 0.00025750002732842384, "loss": 3.3511, "step": 4164 }, { "epoch": 0.67, "learning_rate": 0.0002572716840781894, "loss": 3.0053, "step": 4165 }, { "epoch": 0.67, "learning_rate": 0.00025704340704181614, "loss": 3.0359, "step": 4166 }, { "epoch": 0.67, "learning_rate": 0.0002568151962815757, "loss": 3.0519, "step": 4167 }, { "epoch": 0.67, "learning_rate": 0.0002565870518597223, "loss": 3.1631, "step": 4168 }, { "epoch": 0.67, "learning_rate": 0.00025635897383849095, "loss": 3.0362, "step": 4169 }, { "epoch": 0.67, "learning_rate": 0.000256130962280099, "loss": 3.242, "step": 4170 }, { "epoch": 0.67, "learning_rate": 0.00025590301724674595, "loss": 3.0901, "step": 4171 }, { "epoch": 0.67, "learning_rate": 0.0002556751388006131, "loss": 3.1802, "step": 4172 }, { "epoch": 0.67, "learning_rate": 0.0002554473270038629, "loss": 3.2809, "step": 4173 }, { "epoch": 0.67, "learning_rate": 0.0002552195819186405, "loss": 3.1039, "step": 4174 }, { "epoch": 0.67, "learning_rate": 0.00025499190360707234, "loss": 3.05, "step": 4175 }, { "epoch": 0.67, "learning_rate": 0.0002547642921312669, "loss": 3.1561, "step": 4176 }, { "epoch": 0.67, "learning_rate": 0.00025453674755331426, "loss": 3.1667, "step": 4177 }, { "epoch": 0.67, "learning_rate": 0.00025430926993528645, "loss": 3.1744, "step": 4178 }, { "epoch": 0.67, "learning_rate": 0.0002540818593392369, "loss": 3.2322, "step": 4179 }, { "epoch": 0.67, "learning_rate": 0.00025385451582720086, "loss": 3.1559, "step": 4180 }, { "epoch": 0.67, "learning_rate": 0.0002536272394611956, "loss": 3.1086, "step": 4181 }, { "epoch": 0.67, "learning_rate": 0.00025340003030321977, "loss": 3.0673, "step": 4182 }, { "epoch": 0.67, "learning_rate": 0.00025317288841525377, "loss": 3.1716, "step": 4183 }, { "epoch": 0.67, "learning_rate": 0.00025294581385925974, "loss": 3.2887, "step": 4184 }, { "epoch": 0.67, "learning_rate": 0.0002527188066971811, "loss": 3.1829, "step": 4185 }, { "epoch": 0.67, "learning_rate": 0.00025249186699094326, "loss": 3.0651, "step": 4186 }, { "epoch": 0.68, "learning_rate": 0.0002522649948024531, "loss": 3.1439, "step": 4187 }, { "epoch": 0.68, "learning_rate": 0.0002520381901935992, "loss": 3.1099, "step": 4188 }, { "epoch": 0.68, "learning_rate": 0.00025181145322625146, "loss": 3.1224, "step": 4189 }, { "epoch": 0.68, "learning_rate": 0.0002515847839622617, "loss": 3.0274, "step": 4190 }, { "epoch": 0.68, "learning_rate": 0.0002513581824634626, "loss": 3.1903, "step": 4191 }, { "epoch": 0.68, "learning_rate": 0.00025113164879166886, "loss": 3.2002, "step": 4192 }, { "epoch": 0.68, "learning_rate": 0.00025090518300867717, "loss": 3.1281, "step": 4193 }, { "epoch": 0.68, "learning_rate": 0.00025067878517626445, "loss": 3.1395, "step": 4194 }, { "epoch": 0.68, "learning_rate": 0.0002504524553561901, "loss": 3.2152, "step": 4195 }, { "epoch": 0.68, "learning_rate": 0.0002502261936101948, "loss": 3.1826, "step": 4196 }, { "epoch": 0.68, "learning_rate": 0.0002500000000000001, "loss": 3.2082, "step": 4197 }, { "epoch": 0.68, "learning_rate": 0.00024977387458730954, "loss": 3.1214, "step": 4198 }, { "epoch": 0.68, "learning_rate": 0.00024954781743380785, "loss": 3.1125, "step": 4199 }, { "epoch": 0.68, "learning_rate": 0.00024932182860116115, "loss": 3.0731, "step": 4200 }, { "epoch": 0.68, "learning_rate": 0.000249095908151017, "loss": 2.8992, "step": 4201 }, { "epoch": 0.68, "learning_rate": 0.00024887005614500444, "loss": 3.2321, "step": 4202 }, { "epoch": 0.68, "learning_rate": 0.0002486442726447332, "loss": 3.1698, "step": 4203 }, { "epoch": 0.68, "learning_rate": 0.0002484185577117951, "loss": 3.2945, "step": 4204 }, { "epoch": 0.68, "learning_rate": 0.0002481929114077626, "loss": 3.2349, "step": 4205 }, { "epoch": 0.68, "learning_rate": 0.00024796733379419013, "loss": 3.2539, "step": 4206 }, { "epoch": 0.68, "learning_rate": 0.0002477418249326128, "loss": 3.1775, "step": 4207 }, { "epoch": 0.68, "learning_rate": 0.0002475163848845474, "loss": 3.1465, "step": 4208 }, { "epoch": 0.68, "learning_rate": 0.0002472910137114914, "loss": 3.2704, "step": 4209 }, { "epoch": 0.68, "learning_rate": 0.0002470657114749238, "loss": 3.1569, "step": 4210 }, { "epoch": 0.68, "learning_rate": 0.0002468404782363051, "loss": 3.0388, "step": 4211 }, { "epoch": 0.68, "learning_rate": 0.00024661531405707656, "loss": 3.1313, "step": 4212 }, { "epoch": 0.68, "learning_rate": 0.0002463902189986606, "loss": 3.2158, "step": 4213 }, { "epoch": 0.68, "learning_rate": 0.00024616519312246123, "loss": 3.2347, "step": 4214 }, { "epoch": 0.68, "learning_rate": 0.00024594023648986284, "loss": 3.1266, "step": 4215 }, { "epoch": 0.68, "learning_rate": 0.00024571534916223155, "loss": 3.2098, "step": 4216 }, { "epoch": 0.68, "learning_rate": 0.0002454905312009144, "loss": 3.2493, "step": 4217 }, { "epoch": 0.68, "learning_rate": 0.0002452657826672394, "loss": 3.0256, "step": 4218 }, { "epoch": 0.68, "learning_rate": 0.0002450411036225158, "loss": 3.2188, "step": 4219 }, { "epoch": 0.68, "learning_rate": 0.0002448164941280337, "loss": 3.1295, "step": 4220 }, { "epoch": 0.68, "learning_rate": 0.00024459195424506464, "loss": 3.1127, "step": 4221 }, { "epoch": 0.68, "learning_rate": 0.00024436748403486037, "loss": 3.1494, "step": 4222 }, { "epoch": 0.68, "learning_rate": 0.0002441430835586544, "loss": 3.1721, "step": 4223 }, { "epoch": 0.68, "learning_rate": 0.0002439187528776609, "loss": 3.0297, "step": 4224 }, { "epoch": 0.68, "learning_rate": 0.00024369449205307504, "loss": 3.2309, "step": 4225 }, { "epoch": 0.68, "learning_rate": 0.00024347030114607295, "loss": 3.0079, "step": 4226 }, { "epoch": 0.68, "learning_rate": 0.00024324618021781186, "loss": 3.052, "step": 4227 }, { "epoch": 0.68, "learning_rate": 0.0002430221293294293, "loss": 3.2588, "step": 4228 }, { "epoch": 0.68, "learning_rate": 0.00024279814854204435, "loss": 3.2389, "step": 4229 }, { "epoch": 0.68, "learning_rate": 0.0002425742379167567, "loss": 3.1093, "step": 4230 }, { "epoch": 0.68, "learning_rate": 0.00024235039751464694, "loss": 3.3073, "step": 4231 }, { "epoch": 0.68, "learning_rate": 0.00024212662739677655, "loss": 3.0591, "step": 4232 }, { "epoch": 0.68, "learning_rate": 0.00024190292762418786, "loss": 3.1953, "step": 4233 }, { "epoch": 0.68, "learning_rate": 0.0002416792982579037, "loss": 3.3459, "step": 4234 }, { "epoch": 0.68, "learning_rate": 0.00024145573935892802, "loss": 3.2357, "step": 4235 }, { "epoch": 0.68, "learning_rate": 0.00024123225098824548, "loss": 3.0579, "step": 4236 }, { "epoch": 0.68, "learning_rate": 0.00024100883320682148, "loss": 3.1938, "step": 4237 }, { "epoch": 0.68, "learning_rate": 0.00024078548607560214, "loss": 3.2051, "step": 4238 }, { "epoch": 0.68, "learning_rate": 0.00024056220965551457, "loss": 3.1117, "step": 4239 }, { "epoch": 0.68, "learning_rate": 0.00024033900400746589, "loss": 3.0689, "step": 4240 }, { "epoch": 0.68, "learning_rate": 0.00024011586919234462, "loss": 3.1101, "step": 4241 }, { "epoch": 0.68, "learning_rate": 0.0002398928052710197, "loss": 3.0901, "step": 4242 }, { "epoch": 0.68, "learning_rate": 0.0002396698123043407, "loss": 3.1744, "step": 4243 }, { "epoch": 0.68, "learning_rate": 0.000239446890353138, "loss": 3.227, "step": 4244 }, { "epoch": 0.68, "learning_rate": 0.00023922403947822252, "loss": 3.3615, "step": 4245 }, { "epoch": 0.68, "learning_rate": 0.00023900125974038545, "loss": 3.2235, "step": 4246 }, { "epoch": 0.68, "learning_rate": 0.00023877855120039905, "loss": 3.2234, "step": 4247 }, { "epoch": 0.68, "learning_rate": 0.0002385559139190162, "loss": 3.4154, "step": 4248 }, { "epoch": 0.69, "learning_rate": 0.00023833334795696955, "loss": 3.2047, "step": 4249 }, { "epoch": 0.69, "learning_rate": 0.0002381108533749734, "loss": 3.2145, "step": 4250 }, { "epoch": 0.69, "learning_rate": 0.00023788843023372209, "loss": 3.1631, "step": 4251 }, { "epoch": 0.69, "learning_rate": 0.00023766607859389006, "loss": 3.1779, "step": 4252 }, { "epoch": 0.69, "learning_rate": 0.0002374437985161328, "loss": 3.171, "step": 4253 }, { "epoch": 0.69, "learning_rate": 0.00023722159006108606, "loss": 3.3265, "step": 4254 }, { "epoch": 0.69, "learning_rate": 0.0002369994532893661, "loss": 3.2081, "step": 4255 }, { "epoch": 0.69, "learning_rate": 0.0002367773882615697, "loss": 3.1658, "step": 4256 }, { "epoch": 0.69, "learning_rate": 0.00023655539503827407, "loss": 3.0066, "step": 4257 }, { "epoch": 0.69, "learning_rate": 0.00023633347368003639, "loss": 3.1828, "step": 4258 }, { "epoch": 0.69, "learning_rate": 0.0002361116242473948, "loss": 3.2217, "step": 4259 }, { "epoch": 0.69, "learning_rate": 0.00023588984680086755, "loss": 3.2927, "step": 4260 }, { "epoch": 0.69, "learning_rate": 0.00023566814140095344, "loss": 3.1804, "step": 4261 }, { "epoch": 0.69, "learning_rate": 0.00023544650810813138, "loss": 3.2444, "step": 4262 }, { "epoch": 0.69, "learning_rate": 0.00023522494698286097, "loss": 2.9836, "step": 4263 }, { "epoch": 0.69, "learning_rate": 0.00023500345808558144, "loss": 3.2735, "step": 4264 }, { "epoch": 0.69, "learning_rate": 0.00023478204147671294, "loss": 3.2904, "step": 4265 }, { "epoch": 0.69, "learning_rate": 0.0002345606972166558, "loss": 3.1513, "step": 4266 }, { "epoch": 0.69, "learning_rate": 0.00023433942536579038, "loss": 3.2664, "step": 4267 }, { "epoch": 0.69, "learning_rate": 0.00023411822598447756, "loss": 3.2827, "step": 4268 }, { "epoch": 0.69, "learning_rate": 0.0002338970991330585, "loss": 3.0973, "step": 4269 }, { "epoch": 0.69, "learning_rate": 0.00023367604487185394, "loss": 3.0368, "step": 4270 }, { "epoch": 0.69, "learning_rate": 0.0002334550632611655, "loss": 3.0457, "step": 4271 }, { "epoch": 0.69, "learning_rate": 0.00023323415436127482, "loss": 3.082, "step": 4272 }, { "epoch": 0.69, "learning_rate": 0.0002330133182324437, "loss": 3.153, "step": 4273 }, { "epoch": 0.69, "learning_rate": 0.0002327925549349136, "loss": 3.0703, "step": 4274 }, { "epoch": 0.69, "learning_rate": 0.00023257186452890706, "loss": 3.1036, "step": 4275 }, { "epoch": 0.69, "learning_rate": 0.0002323512470746262, "loss": 3.3104, "step": 4276 }, { "epoch": 0.69, "learning_rate": 0.00023213070263225282, "loss": 3.0323, "step": 4277 }, { "epoch": 0.69, "learning_rate": 0.00023191023126194955, "loss": 3.063, "step": 4278 }, { "epoch": 0.69, "learning_rate": 0.00023168983302385894, "loss": 3.0741, "step": 4279 }, { "epoch": 0.69, "learning_rate": 0.00023146950797810285, "loss": 3.0996, "step": 4280 }, { "epoch": 0.69, "learning_rate": 0.00023124925618478432, "loss": 3.1348, "step": 4281 }, { "epoch": 0.69, "learning_rate": 0.0002310290777039858, "loss": 3.1846, "step": 4282 }, { "epoch": 0.69, "learning_rate": 0.00023080897259576943, "loss": 3.1313, "step": 4283 }, { "epoch": 0.69, "learning_rate": 0.0002305889409201779, "loss": 3.1428, "step": 4284 }, { "epoch": 0.69, "learning_rate": 0.00023036898273723382, "loss": 3.228, "step": 4285 }, { "epoch": 0.69, "learning_rate": 0.00023014909810693907, "loss": 3.0927, "step": 4286 }, { "epoch": 0.69, "learning_rate": 0.00022992928708927645, "loss": 3.1049, "step": 4287 }, { "epoch": 0.69, "learning_rate": 0.00022970954974420828, "loss": 3.051, "step": 4288 }, { "epoch": 0.69, "learning_rate": 0.00022948988613167632, "loss": 3.287, "step": 4289 }, { "epoch": 0.69, "learning_rate": 0.00022927029631160278, "loss": 2.9917, "step": 4290 }, { "epoch": 0.69, "learning_rate": 0.00022905078034388983, "loss": 3.2523, "step": 4291 }, { "epoch": 0.69, "learning_rate": 0.00022883133828841858, "loss": 3.1499, "step": 4292 }, { "epoch": 0.69, "learning_rate": 0.00022861197020505126, "loss": 3.0912, "step": 4293 }, { "epoch": 0.69, "learning_rate": 0.00022839267615362924, "loss": 3.1126, "step": 4294 }, { "epoch": 0.69, "learning_rate": 0.00022817345619397338, "loss": 3.1517, "step": 4295 }, { "epoch": 0.69, "learning_rate": 0.00022795431038588498, "loss": 3.1545, "step": 4296 }, { "epoch": 0.69, "learning_rate": 0.00022773523878914494, "loss": 3.2291, "step": 4297 }, { "epoch": 0.69, "learning_rate": 0.00022751624146351336, "loss": 3.2285, "step": 4298 }, { "epoch": 0.69, "learning_rate": 0.00022729731846873064, "loss": 3.1711, "step": 4299 }, { "epoch": 0.69, "learning_rate": 0.0002270784698645173, "loss": 3.1627, "step": 4300 }, { "epoch": 0.69, "learning_rate": 0.00022685969571057248, "loss": 3.1063, "step": 4301 }, { "epoch": 0.69, "learning_rate": 0.00022664099606657578, "loss": 3.3439, "step": 4302 }, { "epoch": 0.69, "learning_rate": 0.00022642237099218648, "loss": 3.0449, "step": 4303 }, { "epoch": 0.69, "learning_rate": 0.00022620382054704298, "loss": 3.0033, "step": 4304 }, { "epoch": 0.69, "learning_rate": 0.0002259853447907636, "loss": 3.1899, "step": 4305 }, { "epoch": 0.69, "learning_rate": 0.0002257669437829469, "loss": 3.1683, "step": 4306 }, { "epoch": 0.69, "learning_rate": 0.00022554861758316996, "loss": 3.2902, "step": 4307 }, { "epoch": 0.69, "learning_rate": 0.0002253303662509902, "loss": 3.1518, "step": 4308 }, { "epoch": 0.69, "learning_rate": 0.00022511218984594438, "loss": 3.1665, "step": 4309 }, { "epoch": 0.69, "learning_rate": 0.00022489408842754898, "loss": 3.057, "step": 4310 }, { "epoch": 0.7, "learning_rate": 0.00022467606205529945, "loss": 3.3021, "step": 4311 }, { "epoch": 0.7, "learning_rate": 0.00022445811078867185, "loss": 3.2694, "step": 4312 }, { "epoch": 0.7, "learning_rate": 0.00022424023468712058, "loss": 3.2542, "step": 4313 }, { "epoch": 0.7, "learning_rate": 0.00022402243381008024, "loss": 3.1886, "step": 4314 }, { "epoch": 0.7, "learning_rate": 0.00022380470821696476, "loss": 3.1301, "step": 4315 }, { "epoch": 0.7, "learning_rate": 0.0002235870579671677, "loss": 3.1813, "step": 4316 }, { "epoch": 0.7, "learning_rate": 0.00022336948312006127, "loss": 3.2524, "step": 4317 }, { "epoch": 0.7, "learning_rate": 0.0002231519837349985, "loss": 3.2157, "step": 4318 }, { "epoch": 0.7, "learning_rate": 0.00022293455987131052, "loss": 3.0789, "step": 4319 }, { "epoch": 0.7, "learning_rate": 0.00022271721158830855, "loss": 3.061, "step": 4320 }, { "epoch": 0.7, "learning_rate": 0.000222499938945283, "loss": 3.0671, "step": 4321 }, { "epoch": 0.7, "learning_rate": 0.00022228274200150383, "loss": 3.1777, "step": 4322 }, { "epoch": 0.7, "learning_rate": 0.00022206562081621996, "loss": 3.1716, "step": 4323 }, { "epoch": 0.7, "learning_rate": 0.00022184857544865994, "loss": 3.1554, "step": 4324 }, { "epoch": 0.7, "learning_rate": 0.0002216316059580316, "loss": 3.1727, "step": 4325 }, { "epoch": 0.7, "learning_rate": 0.00022141471240352212, "loss": 3.0826, "step": 4326 }, { "epoch": 0.7, "learning_rate": 0.00022119789484429785, "loss": 3.1011, "step": 4327 }, { "epoch": 0.7, "learning_rate": 0.0002209811533395047, "loss": 3.1726, "step": 4328 }, { "epoch": 0.7, "learning_rate": 0.00022076448794826708, "loss": 3.2459, "step": 4329 }, { "epoch": 0.7, "learning_rate": 0.00022054789872968928, "loss": 3.1833, "step": 4330 }, { "epoch": 0.7, "learning_rate": 0.00022033138574285515, "loss": 3.1993, "step": 4331 }, { "epoch": 0.7, "learning_rate": 0.00022011494904682682, "loss": 3.1781, "step": 4332 }, { "epoch": 0.7, "learning_rate": 0.0002198985887006461, "loss": 3.1976, "step": 4333 }, { "epoch": 0.7, "learning_rate": 0.00021968230476333424, "loss": 3.1598, "step": 4334 }, { "epoch": 0.7, "learning_rate": 0.00021946609729389088, "loss": 3.0296, "step": 4335 }, { "epoch": 0.7, "learning_rate": 0.0002192499663512953, "loss": 3.1073, "step": 4336 }, { "epoch": 0.7, "learning_rate": 0.0002190339119945064, "loss": 3.2129, "step": 4337 }, { "epoch": 0.7, "learning_rate": 0.00021881793428246116, "loss": 3.0633, "step": 4338 }, { "epoch": 0.7, "learning_rate": 0.00021860203327407624, "loss": 3.1796, "step": 4339 }, { "epoch": 0.7, "learning_rate": 0.00021838620902824758, "loss": 3.1979, "step": 4340 }, { "epoch": 0.7, "learning_rate": 0.00021817046160384934, "loss": 3.0947, "step": 4341 }, { "epoch": 0.7, "learning_rate": 0.00021795479105973542, "loss": 3.133, "step": 4342 }, { "epoch": 0.7, "learning_rate": 0.00021773919745473908, "loss": 3.2023, "step": 4343 }, { "epoch": 0.7, "learning_rate": 0.00021752368084767155, "loss": 3.2168, "step": 4344 }, { "epoch": 0.7, "learning_rate": 0.0002173082412973238, "loss": 3.1128, "step": 4345 }, { "epoch": 0.7, "learning_rate": 0.00021709287886246577, "loss": 3.357, "step": 4346 }, { "epoch": 0.7, "learning_rate": 0.00021687759360184577, "loss": 3.208, "step": 4347 }, { "epoch": 0.7, "learning_rate": 0.00021666238557419178, "loss": 3.1786, "step": 4348 }, { "epoch": 0.7, "learning_rate": 0.00021644725483821025, "loss": 3.1689, "step": 4349 }, { "epoch": 0.7, "learning_rate": 0.00021623220145258683, "loss": 3.0795, "step": 4350 }, { "epoch": 0.7, "learning_rate": 0.0002160172254759858, "loss": 3.1103, "step": 4351 }, { "epoch": 0.7, "learning_rate": 0.00021580232696705076, "loss": 3.2286, "step": 4352 }, { "epoch": 0.7, "learning_rate": 0.00021558750598440346, "loss": 3.2055, "step": 4353 }, { "epoch": 0.7, "learning_rate": 0.00021537276258664511, "loss": 3.1061, "step": 4354 }, { "epoch": 0.7, "learning_rate": 0.0002151580968323556, "loss": 3.1011, "step": 4355 }, { "epoch": 0.7, "learning_rate": 0.0002149435087800935, "loss": 3.1195, "step": 4356 }, { "epoch": 0.7, "learning_rate": 0.00021472899848839645, "loss": 3.3282, "step": 4357 }, { "epoch": 0.7, "learning_rate": 0.00021451456601578056, "loss": 3.0536, "step": 4358 }, { "epoch": 0.7, "learning_rate": 0.00021430021142074113, "loss": 3.302, "step": 4359 }, { "epoch": 0.7, "learning_rate": 0.0002140859347617516, "loss": 2.993, "step": 4360 }, { "epoch": 0.7, "learning_rate": 0.00021387173609726463, "loss": 3.0391, "step": 4361 }, { "epoch": 0.7, "learning_rate": 0.0002136576154857115, "loss": 3.2748, "step": 4362 }, { "epoch": 0.7, "learning_rate": 0.0002134435729855022, "loss": 3.2154, "step": 4363 }, { "epoch": 0.7, "learning_rate": 0.00021322960865502533, "loss": 3.1719, "step": 4364 }, { "epoch": 0.7, "learning_rate": 0.0002130157225526485, "loss": 3.1404, "step": 4365 }, { "epoch": 0.7, "learning_rate": 0.0002128019147367173, "loss": 3.2187, "step": 4366 }, { "epoch": 0.7, "learning_rate": 0.00021258818526555647, "loss": 3.3178, "step": 4367 }, { "epoch": 0.7, "learning_rate": 0.00021237453419746934, "loss": 3.1361, "step": 4368 }, { "epoch": 0.7, "learning_rate": 0.00021216096159073784, "loss": 3.0917, "step": 4369 }, { "epoch": 0.7, "learning_rate": 0.00021194746750362236, "loss": 3.2578, "step": 4370 }, { "epoch": 0.7, "learning_rate": 0.00021173405199436217, "loss": 3.1958, "step": 4371 }, { "epoch": 0.7, "learning_rate": 0.00021152071512117455, "loss": 3.0908, "step": 4372 }, { "epoch": 0.71, "learning_rate": 0.00021130745694225578, "loss": 3.1061, "step": 4373 }, { "epoch": 0.71, "learning_rate": 0.00021109427751578064, "loss": 3.1406, "step": 4374 }, { "epoch": 0.71, "learning_rate": 0.00021088117689990234, "loss": 2.9409, "step": 4375 }, { "epoch": 0.71, "learning_rate": 0.00021066815515275255, "loss": 3.176, "step": 4376 }, { "epoch": 0.71, "learning_rate": 0.00021045521233244169, "loss": 3.2612, "step": 4377 }, { "epoch": 0.71, "learning_rate": 0.00021024234849705809, "loss": 3.2818, "step": 4378 }, { "epoch": 0.71, "learning_rate": 0.00021002956370466902, "loss": 3.1456, "step": 4379 }, { "epoch": 0.71, "learning_rate": 0.00020981685801332013, "loss": 3.0804, "step": 4380 }, { "epoch": 0.71, "learning_rate": 0.00020960423148103525, "loss": 3.2385, "step": 4381 }, { "epoch": 0.71, "learning_rate": 0.00020939168416581695, "loss": 3.1754, "step": 4382 }, { "epoch": 0.71, "learning_rate": 0.00020917921612564606, "loss": 3.1608, "step": 4383 }, { "epoch": 0.71, "learning_rate": 0.00020896682741848143, "loss": 3.1386, "step": 4384 }, { "epoch": 0.71, "learning_rate": 0.00020875451810226081, "loss": 3.1207, "step": 4385 }, { "epoch": 0.71, "learning_rate": 0.00020854228823490001, "loss": 3.2492, "step": 4386 }, { "epoch": 0.71, "learning_rate": 0.00020833013787429323, "loss": 3.2126, "step": 4387 }, { "epoch": 0.71, "learning_rate": 0.00020811806707831299, "loss": 3.202, "step": 4388 }, { "epoch": 0.71, "learning_rate": 0.00020790607590481019, "loss": 3.202, "step": 4389 }, { "epoch": 0.71, "learning_rate": 0.00020769416441161366, "loss": 3.2607, "step": 4390 }, { "epoch": 0.71, "learning_rate": 0.00020748233265653084, "loss": 3.1455, "step": 4391 }, { "epoch": 0.71, "learning_rate": 0.0002072705806973473, "loss": 3.1702, "step": 4392 }, { "epoch": 0.71, "learning_rate": 0.00020705890859182692, "loss": 3.1232, "step": 4393 }, { "epoch": 0.71, "learning_rate": 0.0002068473163977117, "loss": 3.2072, "step": 4394 }, { "epoch": 0.71, "learning_rate": 0.0002066358041727221, "loss": 3.213, "step": 4395 }, { "epoch": 0.71, "learning_rate": 0.00020642437197455617, "loss": 3.1671, "step": 4396 }, { "epoch": 0.71, "learning_rate": 0.00020621301986089064, "loss": 3.1399, "step": 4397 }, { "epoch": 0.71, "learning_rate": 0.00020600174788938026, "loss": 3.1052, "step": 4398 }, { "epoch": 0.71, "learning_rate": 0.00020579055611765797, "loss": 3.1146, "step": 4399 }, { "epoch": 0.71, "learning_rate": 0.00020557944460333478, "loss": 3.1345, "step": 4400 }, { "epoch": 0.71, "learning_rate": 0.00020536841340399988, "loss": 3.0773, "step": 4401 }, { "epoch": 0.71, "learning_rate": 0.00020515746257722024, "loss": 3.1409, "step": 4402 }, { "epoch": 0.71, "learning_rate": 0.00020494659218054124, "loss": 3.009, "step": 4403 }, { "epoch": 0.71, "learning_rate": 0.00020473580227148625, "loss": 3.301, "step": 4404 }, { "epoch": 0.71, "learning_rate": 0.00020452509290755667, "loss": 3.0811, "step": 4405 }, { "epoch": 0.71, "learning_rate": 0.00020431446414623185, "loss": 3.1263, "step": 4406 }, { "epoch": 0.71, "learning_rate": 0.00020410391604496947, "loss": 3.2726, "step": 4407 }, { "epoch": 0.71, "learning_rate": 0.0002038934486612045, "loss": 3.1985, "step": 4408 }, { "epoch": 0.71, "learning_rate": 0.0002036830620523506, "loss": 3.217, "step": 4409 }, { "epoch": 0.71, "learning_rate": 0.00020347275627579915, "loss": 3.0197, "step": 4410 }, { "epoch": 0.71, "learning_rate": 0.00020326253138891936, "loss": 3.1509, "step": 4411 }, { "epoch": 0.71, "learning_rate": 0.00020305238744905857, "loss": 3.2235, "step": 4412 }, { "epoch": 0.71, "learning_rate": 0.00020284232451354189, "loss": 3.056, "step": 4413 }, { "epoch": 0.71, "learning_rate": 0.00020263234263967266, "loss": 3.1263, "step": 4414 }, { "epoch": 0.71, "learning_rate": 0.00020242244188473142, "loss": 3.051, "step": 4415 }, { "epoch": 0.71, "learning_rate": 0.00020221262230597716, "loss": 3.2285, "step": 4416 }, { "epoch": 0.71, "learning_rate": 0.00020200288396064658, "loss": 3.3223, "step": 4417 }, { "epoch": 0.71, "learning_rate": 0.00020179322690595426, "loss": 3.3599, "step": 4418 }, { "epoch": 0.71, "learning_rate": 0.0002015836511990925, "loss": 3.2719, "step": 4419 }, { "epoch": 0.71, "learning_rate": 0.0002013741568972316, "loss": 3.2008, "step": 4420 }, { "epoch": 0.71, "learning_rate": 0.00020116474405751922, "loss": 3.0324, "step": 4421 }, { "epoch": 0.71, "learning_rate": 0.00020095541273708128, "loss": 3.2074, "step": 4422 }, { "epoch": 0.71, "learning_rate": 0.00020074616299302144, "loss": 3.0828, "step": 4423 }, { "epoch": 0.71, "learning_rate": 0.0002005369948824204, "loss": 3.3061, "step": 4424 }, { "epoch": 0.71, "learning_rate": 0.00020032790846233762, "loss": 3.1345, "step": 4425 }, { "epoch": 0.71, "learning_rate": 0.00020011890378980983, "loss": 3.1573, "step": 4426 }, { "epoch": 0.71, "learning_rate": 0.00019990998092185108, "loss": 3.1563, "step": 4427 }, { "epoch": 0.71, "learning_rate": 0.00019970113991545364, "loss": 3.1301, "step": 4428 }, { "epoch": 0.71, "learning_rate": 0.00019949238082758735, "loss": 3.1447, "step": 4429 }, { "epoch": 0.71, "learning_rate": 0.0001992837037151991, "loss": 3.1626, "step": 4430 }, { "epoch": 0.71, "learning_rate": 0.00019907510863521449, "loss": 3.1769, "step": 4431 }, { "epoch": 0.71, "learning_rate": 0.00019886659564453618, "loss": 3.037, "step": 4432 }, { "epoch": 0.71, "learning_rate": 0.000198658164800044, "loss": 3.1764, "step": 4433 }, { "epoch": 0.71, "learning_rate": 0.0001984498161585961, "loss": 3.0076, "step": 4434 }, { "epoch": 0.72, "learning_rate": 0.00019824154977702795, "loss": 3.2423, "step": 4435 }, { "epoch": 0.72, "learning_rate": 0.00019803336571215212, "loss": 3.1217, "step": 4436 }, { "epoch": 0.72, "learning_rate": 0.00019782526402075963, "loss": 3.1258, "step": 4437 }, { "epoch": 0.72, "learning_rate": 0.0001976172447596185, "loss": 3.3219, "step": 4438 }, { "epoch": 0.72, "learning_rate": 0.00019740930798547407, "loss": 3.1885, "step": 4439 }, { "epoch": 0.72, "learning_rate": 0.0001972014537550495, "loss": 3.1222, "step": 4440 }, { "epoch": 0.72, "learning_rate": 0.00019699368212504554, "loss": 3.0396, "step": 4441 }, { "epoch": 0.72, "learning_rate": 0.00019678599315213968, "loss": 3.2, "step": 4442 }, { "epoch": 0.72, "learning_rate": 0.00019657838689298796, "loss": 3.1946, "step": 4443 }, { "epoch": 0.72, "learning_rate": 0.00019637086340422327, "loss": 3.1946, "step": 4444 }, { "epoch": 0.72, "learning_rate": 0.00019616342274245564, "loss": 2.9829, "step": 4445 }, { "epoch": 0.72, "learning_rate": 0.00019595606496427293, "loss": 3.2215, "step": 4446 }, { "epoch": 0.72, "learning_rate": 0.0001957487901262403, "loss": 3.1195, "step": 4447 }, { "epoch": 0.72, "learning_rate": 0.0001955415982849004, "loss": 3.1998, "step": 4448 }, { "epoch": 0.72, "learning_rate": 0.00019533448949677262, "loss": 3.2898, "step": 4449 }, { "epoch": 0.72, "learning_rate": 0.00019512746381835483, "loss": 3.1303, "step": 4450 }, { "epoch": 0.72, "learning_rate": 0.00019492052130612103, "loss": 3.1352, "step": 4451 }, { "epoch": 0.72, "learning_rate": 0.00019471366201652328, "loss": 3.2404, "step": 4452 }, { "epoch": 0.72, "learning_rate": 0.00019450688600599077, "loss": 3.3935, "step": 4453 }, { "epoch": 0.72, "learning_rate": 0.00019430019333093002, "loss": 3.1514, "step": 4454 }, { "epoch": 0.72, "learning_rate": 0.00019409358404772425, "loss": 3.2582, "step": 4455 }, { "epoch": 0.72, "learning_rate": 0.00019388705821273512, "loss": 3.2168, "step": 4456 }, { "epoch": 0.72, "learning_rate": 0.0001936806158823003, "loss": 3.1718, "step": 4457 }, { "epoch": 0.72, "learning_rate": 0.00019347425711273525, "loss": 3.0814, "step": 4458 }, { "epoch": 0.72, "learning_rate": 0.00019326798196033274, "loss": 3.1877, "step": 4459 }, { "epoch": 0.72, "learning_rate": 0.0001930617904813627, "loss": 3.2138, "step": 4460 }, { "epoch": 0.72, "learning_rate": 0.00019285568273207156, "loss": 3.2122, "step": 4461 }, { "epoch": 0.72, "learning_rate": 0.00019264965876868395, "loss": 3.1165, "step": 4462 }, { "epoch": 0.72, "learning_rate": 0.00019244371864740118, "loss": 3.1591, "step": 4463 }, { "epoch": 0.72, "learning_rate": 0.0001922378624244014, "loss": 3.1526, "step": 4464 }, { "epoch": 0.72, "learning_rate": 0.00019203209015584012, "loss": 3.3269, "step": 4465 }, { "epoch": 0.72, "learning_rate": 0.0001918264018978503, "loss": 3.1421, "step": 4466 }, { "epoch": 0.72, "learning_rate": 0.00019162079770654105, "loss": 3.1378, "step": 4467 }, { "epoch": 0.72, "learning_rate": 0.00019141527763799965, "loss": 3.0, "step": 4468 }, { "epoch": 0.72, "learning_rate": 0.00019120984174828999, "loss": 3.3038, "step": 4469 }, { "epoch": 0.72, "learning_rate": 0.0001910044900934525, "loss": 3.0214, "step": 4470 }, { "epoch": 0.72, "learning_rate": 0.0001907992227295053, "loss": 3.158, "step": 4471 }, { "epoch": 0.72, "learning_rate": 0.0001905940397124434, "loss": 2.9944, "step": 4472 }, { "epoch": 0.72, "learning_rate": 0.00019038894109823834, "loss": 3.2414, "step": 4473 }, { "epoch": 0.72, "learning_rate": 0.00019018392694283905, "loss": 3.1629, "step": 4474 }, { "epoch": 0.72, "learning_rate": 0.00018997899730217177, "loss": 3.3381, "step": 4475 }, { "epoch": 0.72, "learning_rate": 0.0001897741522321388, "loss": 3.0663, "step": 4476 }, { "epoch": 0.72, "learning_rate": 0.00018956939178861994, "loss": 3.0867, "step": 4477 }, { "epoch": 0.72, "learning_rate": 0.000189364716027472, "loss": 3.3121, "step": 4478 }, { "epoch": 0.72, "learning_rate": 0.00018916012500452817, "loss": 3.1079, "step": 4479 }, { "epoch": 0.72, "learning_rate": 0.0001889556187755988, "loss": 3.3009, "step": 4480 }, { "epoch": 0.72, "learning_rate": 0.00018875119739647172, "loss": 3.1446, "step": 4481 }, { "epoch": 0.72, "learning_rate": 0.00018854686092291046, "loss": 3.2041, "step": 4482 }, { "epoch": 0.72, "learning_rate": 0.00018834260941065611, "loss": 3.1649, "step": 4483 }, { "epoch": 0.72, "learning_rate": 0.00018813844291542677, "loss": 3.1978, "step": 4484 }, { "epoch": 0.72, "learning_rate": 0.00018793436149291647, "loss": 3.1597, "step": 4485 }, { "epoch": 0.72, "learning_rate": 0.00018773036519879676, "loss": 3.1231, "step": 4486 }, { "epoch": 0.72, "learning_rate": 0.00018752645408871626, "loss": 3.292, "step": 4487 }, { "epoch": 0.72, "learning_rate": 0.00018732262821829931, "loss": 3.1241, "step": 4488 }, { "epoch": 0.72, "learning_rate": 0.00018711888764314777, "loss": 3.1956, "step": 4489 }, { "epoch": 0.72, "learning_rate": 0.00018691523241884029, "loss": 3.1702, "step": 4490 }, { "epoch": 0.72, "learning_rate": 0.00018671166260093147, "loss": 2.9963, "step": 4491 }, { "epoch": 0.72, "learning_rate": 0.0001865081782449533, "loss": 3.2701, "step": 4492 }, { "epoch": 0.72, "learning_rate": 0.00018630477940641472, "loss": 3.2058, "step": 4493 }, { "epoch": 0.72, "learning_rate": 0.00018610146614080037, "loss": 3.0433, "step": 4494 }, { "epoch": 0.72, "learning_rate": 0.00018589823850357224, "loss": 3.0629, "step": 4495 }, { "epoch": 0.72, "learning_rate": 0.00018569509655016886, "loss": 3.2751, "step": 4496 }, { "epoch": 0.73, "learning_rate": 0.00018549204033600547, "loss": 3.2882, "step": 4497 }, { "epoch": 0.73, "learning_rate": 0.00018528906991647343, "loss": 3.2119, "step": 4498 }, { "epoch": 0.73, "learning_rate": 0.00018508618534694121, "loss": 3.1328, "step": 4499 }, { "epoch": 0.73, "learning_rate": 0.00018488338668275369, "loss": 3.1851, "step": 4500 }, { "epoch": 0.73, "learning_rate": 0.00018468067397923231, "loss": 3.1355, "step": 4501 }, { "epoch": 0.73, "learning_rate": 0.00018447804729167518, "loss": 3.1353, "step": 4502 }, { "epoch": 0.73, "learning_rate": 0.00018427550667535686, "loss": 3.0431, "step": 4503 }, { "epoch": 0.73, "learning_rate": 0.00018407305218552815, "loss": 3.0502, "step": 4504 }, { "epoch": 0.73, "learning_rate": 0.00018387068387741679, "loss": 3.0584, "step": 4505 }, { "epoch": 0.73, "learning_rate": 0.00018366840180622675, "loss": 3.2285, "step": 4506 }, { "epoch": 0.73, "learning_rate": 0.00018346620602713865, "loss": 3.0916, "step": 4507 }, { "epoch": 0.73, "learning_rate": 0.00018326409659530945, "loss": 3.1314, "step": 4508 }, { "epoch": 0.73, "learning_rate": 0.00018306207356587274, "loss": 3.3056, "step": 4509 }, { "epoch": 0.73, "learning_rate": 0.00018286013699393805, "loss": 3.2546, "step": 4510 }, { "epoch": 0.73, "learning_rate": 0.0001826582869345919, "loss": 3.2522, "step": 4511 }, { "epoch": 0.73, "learning_rate": 0.00018245652344289686, "loss": 3.1361, "step": 4512 }, { "epoch": 0.73, "learning_rate": 0.00018225484657389206, "loss": 3.2023, "step": 4513 }, { "epoch": 0.73, "learning_rate": 0.0001820532563825929, "loss": 3.1521, "step": 4514 }, { "epoch": 0.73, "learning_rate": 0.00018185175292399143, "loss": 3.0769, "step": 4515 }, { "epoch": 0.73, "learning_rate": 0.00018165033625305526, "loss": 3.104, "step": 4516 }, { "epoch": 0.73, "learning_rate": 0.00018144900642472905, "loss": 3.1297, "step": 4517 }, { "epoch": 0.73, "learning_rate": 0.000181247763493934, "loss": 3.0331, "step": 4518 }, { "epoch": 0.73, "learning_rate": 0.0001810466075155666, "loss": 3.1865, "step": 4519 }, { "epoch": 0.73, "learning_rate": 0.00018084553854450048, "loss": 2.997, "step": 4520 }, { "epoch": 0.73, "learning_rate": 0.00018064455663558532, "loss": 3.3253, "step": 4521 }, { "epoch": 0.73, "learning_rate": 0.0001804436618436467, "loss": 3.2662, "step": 4522 }, { "epoch": 0.73, "learning_rate": 0.00018024285422348679, "loss": 3.2048, "step": 4523 }, { "epoch": 0.73, "learning_rate": 0.00018004213382988405, "loss": 3.1431, "step": 4524 }, { "epoch": 0.73, "learning_rate": 0.00017984150071759288, "loss": 3.0896, "step": 4525 }, { "epoch": 0.73, "learning_rate": 0.00017964095494134403, "loss": 3.113, "step": 4526 }, { "epoch": 0.73, "learning_rate": 0.00017944049655584454, "loss": 3.1699, "step": 4527 }, { "epoch": 0.73, "learning_rate": 0.00017924012561577714, "loss": 3.3035, "step": 4528 }, { "epoch": 0.73, "learning_rate": 0.00017903984217580116, "loss": 3.225, "step": 4529 }, { "epoch": 0.73, "learning_rate": 0.000178839646290552, "loss": 3.1091, "step": 4530 }, { "epoch": 0.73, "learning_rate": 0.000178639538014641, "loss": 3.1416, "step": 4531 }, { "epoch": 0.73, "learning_rate": 0.00017843951740265578, "loss": 3.1746, "step": 4532 }, { "epoch": 0.73, "learning_rate": 0.0001782395845091601, "loss": 3.1843, "step": 4533 }, { "epoch": 0.73, "learning_rate": 0.0001780397393886933, "loss": 3.2838, "step": 4534 }, { "epoch": 0.73, "learning_rate": 0.00017783998209577135, "loss": 3.1648, "step": 4535 }, { "epoch": 0.73, "learning_rate": 0.00017764031268488595, "loss": 3.1085, "step": 4536 }, { "epoch": 0.73, "learning_rate": 0.00017744073121050508, "loss": 3.1781, "step": 4537 }, { "epoch": 0.73, "learning_rate": 0.0001772412377270724, "loss": 3.2897, "step": 4538 }, { "epoch": 0.73, "learning_rate": 0.00017704183228900805, "loss": 3.1062, "step": 4539 }, { "epoch": 0.73, "learning_rate": 0.0001768425149507074, "loss": 3.1701, "step": 4540 }, { "epoch": 0.73, "learning_rate": 0.0001766432857665425, "loss": 3.1701, "step": 4541 }, { "epoch": 0.73, "learning_rate": 0.00017644414479086102, "loss": 3.1457, "step": 4542 }, { "epoch": 0.73, "learning_rate": 0.00017624509207798662, "loss": 3.0751, "step": 4543 }, { "epoch": 0.73, "learning_rate": 0.0001760461276822189, "loss": 3.1069, "step": 4544 }, { "epoch": 0.73, "learning_rate": 0.00017584725165783354, "loss": 3.1885, "step": 4545 }, { "epoch": 0.73, "learning_rate": 0.00017564846405908164, "loss": 3.0473, "step": 4546 }, { "epoch": 0.73, "learning_rate": 0.00017544976494019056, "loss": 3.057, "step": 4547 }, { "epoch": 0.73, "learning_rate": 0.00017525115435536347, "loss": 3.2509, "step": 4548 }, { "epoch": 0.73, "learning_rate": 0.00017505263235877944, "loss": 3.1918, "step": 4549 }, { "epoch": 0.73, "learning_rate": 0.0001748541990045931, "loss": 3.0851, "step": 4550 }, { "epoch": 0.73, "learning_rate": 0.00017465585434693525, "loss": 3.047, "step": 4551 }, { "epoch": 0.73, "learning_rate": 0.00017445759843991244, "loss": 3.2151, "step": 4552 }, { "epoch": 0.73, "learning_rate": 0.00017425943133760657, "loss": 3.0513, "step": 4553 }, { "epoch": 0.73, "learning_rate": 0.00017406135309407583, "loss": 3.148, "step": 4554 }, { "epoch": 0.73, "learning_rate": 0.00017386336376335398, "loss": 3.1805, "step": 4555 }, { "epoch": 0.73, "learning_rate": 0.0001736654633994505, "loss": 3.1815, "step": 4556 }, { "epoch": 0.73, "learning_rate": 0.00017346765205635068, "loss": 3.1679, "step": 4557 }, { "epoch": 0.73, "learning_rate": 0.00017326992978801563, "loss": 3.1974, "step": 4558 }, { "epoch": 0.74, "learning_rate": 0.0001730722966483817, "loss": 3.1465, "step": 4559 }, { "epoch": 0.74, "learning_rate": 0.00017287475269136133, "loss": 3.1986, "step": 4560 }, { "epoch": 0.74, "learning_rate": 0.00017267729797084265, "loss": 3.129, "step": 4561 }, { "epoch": 0.74, "learning_rate": 0.00017247993254068921, "loss": 3.1412, "step": 4562 }, { "epoch": 0.74, "learning_rate": 0.0001722826564547404, "loss": 3.1509, "step": 4563 }, { "epoch": 0.74, "learning_rate": 0.00017208546976681138, "loss": 3.1893, "step": 4564 }, { "epoch": 0.74, "learning_rate": 0.00017188837253069228, "loss": 3.1064, "step": 4565 }, { "epoch": 0.74, "learning_rate": 0.00017169136480014947, "loss": 3.0647, "step": 4566 }, { "epoch": 0.74, "learning_rate": 0.00017149444662892472, "loss": 3.2034, "step": 4567 }, { "epoch": 0.74, "learning_rate": 0.0001712976180707353, "loss": 3.1724, "step": 4568 }, { "epoch": 0.74, "learning_rate": 0.00017110087917927407, "loss": 2.9871, "step": 4569 }, { "epoch": 0.74, "learning_rate": 0.00017090423000820965, "loss": 3.1078, "step": 4570 }, { "epoch": 0.74, "learning_rate": 0.0001707076706111856, "loss": 3.1918, "step": 4571 }, { "epoch": 0.74, "learning_rate": 0.0001705112010418215, "loss": 3.1108, "step": 4572 }, { "epoch": 0.74, "learning_rate": 0.00017031482135371252, "loss": 3.251, "step": 4573 }, { "epoch": 0.74, "learning_rate": 0.00017011853160042855, "loss": 3.3603, "step": 4574 }, { "epoch": 0.74, "learning_rate": 0.00016992233183551596, "loss": 3.0229, "step": 4575 }, { "epoch": 0.74, "learning_rate": 0.0001697262221124961, "loss": 3.0078, "step": 4576 }, { "epoch": 0.74, "learning_rate": 0.00016953020248486545, "loss": 3.1344, "step": 4577 }, { "epoch": 0.74, "learning_rate": 0.00016933427300609626, "loss": 3.0572, "step": 4578 }, { "epoch": 0.74, "learning_rate": 0.00016913843372963645, "loss": 3.1619, "step": 4579 }, { "epoch": 0.74, "learning_rate": 0.00016894268470890844, "loss": 3.0956, "step": 4580 }, { "epoch": 0.74, "learning_rate": 0.00016874702599731106, "loss": 3.0275, "step": 4581 }, { "epoch": 0.74, "learning_rate": 0.00016855145764821817, "loss": 3.1206, "step": 4582 }, { "epoch": 0.74, "learning_rate": 0.00016835597971497847, "loss": 3.1665, "step": 4583 }, { "epoch": 0.74, "learning_rate": 0.0001681605922509165, "loss": 3.2499, "step": 4584 }, { "epoch": 0.74, "learning_rate": 0.0001679652953093321, "loss": 3.2056, "step": 4585 }, { "epoch": 0.74, "learning_rate": 0.00016777008894350027, "loss": 3.1612, "step": 4586 }, { "epoch": 0.74, "learning_rate": 0.00016757497320667137, "loss": 3.1377, "step": 4587 }, { "epoch": 0.74, "learning_rate": 0.00016737994815207126, "loss": 3.0569, "step": 4588 }, { "epoch": 0.74, "learning_rate": 0.00016718501383290035, "loss": 3.0067, "step": 4589 }, { "epoch": 0.74, "learning_rate": 0.00016699017030233509, "loss": 3.0986, "step": 4590 }, { "epoch": 0.74, "learning_rate": 0.00016679541761352673, "loss": 3.1474, "step": 4591 }, { "epoch": 0.74, "learning_rate": 0.00016660075581960198, "loss": 3.0609, "step": 4592 }, { "epoch": 0.74, "learning_rate": 0.00016640618497366255, "loss": 3.1537, "step": 4593 }, { "epoch": 0.74, "learning_rate": 0.00016621170512878559, "loss": 3.1354, "step": 4594 }, { "epoch": 0.74, "learning_rate": 0.0001660173163380229, "loss": 3.0581, "step": 4595 }, { "epoch": 0.74, "learning_rate": 0.00016582301865440213, "loss": 3.3011, "step": 4596 }, { "epoch": 0.74, "learning_rate": 0.00016562881213092557, "loss": 3.0772, "step": 4597 }, { "epoch": 0.74, "learning_rate": 0.00016543469682057105, "loss": 3.1538, "step": 4598 }, { "epoch": 0.74, "learning_rate": 0.0001652406727762908, "loss": 3.0836, "step": 4599 }, { "epoch": 0.74, "learning_rate": 0.0001650467400510131, "loss": 3.0759, "step": 4600 }, { "epoch": 0.74, "learning_rate": 0.0001648528986976409, "loss": 3.1682, "step": 4601 }, { "epoch": 0.74, "learning_rate": 0.00016465914876905192, "loss": 3.1532, "step": 4602 }, { "epoch": 0.74, "learning_rate": 0.0001644654903180992, "loss": 3.1793, "step": 4603 }, { "epoch": 0.74, "learning_rate": 0.00016427192339761115, "loss": 3.1389, "step": 4604 }, { "epoch": 0.74, "learning_rate": 0.00016407844806039034, "loss": 3.2438, "step": 4605 }, { "epoch": 0.74, "learning_rate": 0.00016388506435921542, "loss": 3.1604, "step": 4606 }, { "epoch": 0.74, "learning_rate": 0.00016369177234683952, "loss": 3.2712, "step": 4607 }, { "epoch": 0.74, "learning_rate": 0.00016349857207599046, "loss": 3.1298, "step": 4608 }, { "epoch": 0.74, "learning_rate": 0.00016330546359937144, "loss": 3.0876, "step": 4609 }, { "epoch": 0.74, "learning_rate": 0.00016311244696966075, "loss": 3.1551, "step": 4610 }, { "epoch": 0.74, "learning_rate": 0.00016291952223951094, "loss": 3.056, "step": 4611 }, { "epoch": 0.74, "learning_rate": 0.00016272668946155038, "loss": 3.0544, "step": 4612 }, { "epoch": 0.74, "learning_rate": 0.00016253394868838195, "loss": 3.0717, "step": 4613 }, { "epoch": 0.74, "learning_rate": 0.00016234129997258308, "loss": 3.0896, "step": 4614 }, { "epoch": 0.74, "learning_rate": 0.0001621487433667066, "loss": 3.0597, "step": 4615 }, { "epoch": 0.74, "learning_rate": 0.00016195627892328014, "loss": 3.1656, "step": 4616 }, { "epoch": 0.74, "learning_rate": 0.00016176390669480568, "loss": 3.0709, "step": 4617 }, { "epoch": 0.74, "learning_rate": 0.0001615716267337608, "loss": 3.0979, "step": 4618 }, { "epoch": 0.74, "learning_rate": 0.00016137943909259766, "loss": 3.1539, "step": 4619 }, { "epoch": 0.74, "learning_rate": 0.00016118734382374278, "loss": 3.1911, "step": 4620 }, { "epoch": 0.75, "learning_rate": 0.000160995340979598, "loss": 3.1721, "step": 4621 }, { "epoch": 0.75, "learning_rate": 0.00016080343061253993, "loss": 3.2664, "step": 4622 }, { "epoch": 0.75, "learning_rate": 0.0001606116127749194, "loss": 3.1529, "step": 4623 }, { "epoch": 0.75, "learning_rate": 0.0001604198875190626, "loss": 3.0861, "step": 4624 }, { "epoch": 0.75, "learning_rate": 0.00016022825489727055, "loss": 2.9629, "step": 4625 }, { "epoch": 0.75, "learning_rate": 0.00016003671496181833, "loss": 3.1157, "step": 4626 }, { "epoch": 0.75, "learning_rate": 0.00015984526776495627, "loss": 3.2161, "step": 4627 }, { "epoch": 0.75, "learning_rate": 0.0001596539133589094, "loss": 3.1533, "step": 4628 }, { "epoch": 0.75, "learning_rate": 0.00015946265179587698, "loss": 3.0633, "step": 4629 }, { "epoch": 0.75, "learning_rate": 0.00015927148312803324, "loss": 3.0741, "step": 4630 }, { "epoch": 0.75, "learning_rate": 0.0001590804074075276, "loss": 3.2475, "step": 4631 }, { "epoch": 0.75, "learning_rate": 0.00015888942468648309, "loss": 3.0978, "step": 4632 }, { "epoch": 0.75, "learning_rate": 0.0001586985350169981, "loss": 3.2043, "step": 4633 }, { "epoch": 0.75, "learning_rate": 0.00015850773845114534, "loss": 3.178, "step": 4634 }, { "epoch": 0.75, "learning_rate": 0.0001583170350409725, "loss": 3.0867, "step": 4635 }, { "epoch": 0.75, "learning_rate": 0.00015812642483850094, "loss": 3.1799, "step": 4636 }, { "epoch": 0.75, "learning_rate": 0.00015793590789572792, "loss": 2.9196, "step": 4637 }, { "epoch": 0.75, "learning_rate": 0.00015774548426462416, "loss": 3.3066, "step": 4638 }, { "epoch": 0.75, "learning_rate": 0.00015755515399713533, "loss": 3.2982, "step": 4639 }, { "epoch": 0.75, "learning_rate": 0.00015736491714518175, "loss": 3.1342, "step": 4640 }, { "epoch": 0.75, "learning_rate": 0.00015717477376065824, "loss": 3.2029, "step": 4641 }, { "epoch": 0.75, "learning_rate": 0.0001569847238954336, "loss": 3.1513, "step": 4642 }, { "epoch": 0.75, "learning_rate": 0.00015679476760135209, "loss": 3.3632, "step": 4643 }, { "epoch": 0.75, "learning_rate": 0.00015660490493023144, "loss": 3.0427, "step": 4644 }, { "epoch": 0.75, "learning_rate": 0.00015641513593386448, "loss": 3.0676, "step": 4645 }, { "epoch": 0.75, "learning_rate": 0.00015622546066401826, "loss": 3.1386, "step": 4646 }, { "epoch": 0.75, "learning_rate": 0.0001560358791724345, "loss": 3.2193, "step": 4647 }, { "epoch": 0.75, "learning_rate": 0.00015584639151082873, "loss": 3.126, "step": 4648 }, { "epoch": 0.75, "learning_rate": 0.0001556569977308916, "loss": 3.0454, "step": 4649 }, { "epoch": 0.75, "learning_rate": 0.00015546769788428772, "loss": 3.0527, "step": 4650 }, { "epoch": 0.75, "learning_rate": 0.00015527849202265625, "loss": 3.1509, "step": 4651 }, { "epoch": 0.75, "learning_rate": 0.00015508938019761066, "loss": 3.0953, "step": 4652 }, { "epoch": 0.75, "learning_rate": 0.0001549003624607389, "loss": 3.111, "step": 4653 }, { "epoch": 0.75, "learning_rate": 0.00015471143886360277, "loss": 3.1105, "step": 4654 }, { "epoch": 0.75, "learning_rate": 0.0001545226094577389, "loss": 2.9829, "step": 4655 }, { "epoch": 0.75, "learning_rate": 0.00015433387429465833, "loss": 3.1464, "step": 4656 }, { "epoch": 0.75, "learning_rate": 0.0001541452334258458, "loss": 3.3385, "step": 4657 }, { "epoch": 0.75, "learning_rate": 0.00015395668690276076, "loss": 3.0799, "step": 4658 }, { "epoch": 0.75, "learning_rate": 0.00015376823477683698, "loss": 3.0852, "step": 4659 }, { "epoch": 0.75, "learning_rate": 0.000153579877099482, "loss": 3.1752, "step": 4660 }, { "epoch": 0.75, "learning_rate": 0.00015339161392207795, "loss": 3.116, "step": 4661 }, { "epoch": 0.75, "learning_rate": 0.00015320344529598162, "loss": 3.1615, "step": 4662 }, { "epoch": 0.75, "learning_rate": 0.00015301537127252295, "loss": 3.2399, "step": 4663 }, { "epoch": 0.75, "learning_rate": 0.00015282739190300697, "loss": 3.2792, "step": 4664 }, { "epoch": 0.75, "learning_rate": 0.0001526395072387126, "loss": 3.1532, "step": 4665 }, { "epoch": 0.75, "learning_rate": 0.0001524517173308927, "loss": 3.0397, "step": 4666 }, { "epoch": 0.75, "learning_rate": 0.0001522640222307744, "loss": 3.0568, "step": 4667 }, { "epoch": 0.75, "learning_rate": 0.00015207642198955957, "loss": 3.1239, "step": 4668 }, { "epoch": 0.75, "learning_rate": 0.00015188891665842314, "loss": 3.0319, "step": 4669 }, { "epoch": 0.75, "learning_rate": 0.00015170150628851493, "loss": 3.0856, "step": 4670 }, { "epoch": 0.75, "learning_rate": 0.00015151419093095876, "loss": 3.1884, "step": 4671 }, { "epoch": 0.75, "learning_rate": 0.00015132697063685203, "loss": 3.0688, "step": 4672 }, { "epoch": 0.75, "learning_rate": 0.00015113984545726673, "loss": 3.2576, "step": 4673 }, { "epoch": 0.75, "learning_rate": 0.0001509528154432488, "loss": 3.242, "step": 4674 }, { "epoch": 0.75, "learning_rate": 0.0001507658806458181, "loss": 3.2179, "step": 4675 }, { "epoch": 0.75, "learning_rate": 0.0001505790411159686, "loss": 3.2161, "step": 4676 }, { "epoch": 0.75, "learning_rate": 0.00015039229690466845, "loss": 3.2918, "step": 4677 }, { "epoch": 0.75, "learning_rate": 0.00015020564806285925, "loss": 3.0003, "step": 4678 }, { "epoch": 0.75, "learning_rate": 0.00015001909464145707, "loss": 3.082, "step": 4679 }, { "epoch": 0.75, "learning_rate": 0.00014983263669135187, "loss": 3.3381, "step": 4680 }, { "epoch": 0.75, "learning_rate": 0.00014964627426340755, "loss": 3.1087, "step": 4681 }, { "epoch": 0.75, "learning_rate": 0.00014946000740846183, "loss": 3.2561, "step": 4682 }, { "epoch": 0.76, "learning_rate": 0.00014927383617732666, "loss": 3.3689, "step": 4683 }, { "epoch": 0.76, "learning_rate": 0.00014908776062078733, "loss": 3.2829, "step": 4684 }, { "epoch": 0.76, "learning_rate": 0.00014890178078960366, "loss": 3.2179, "step": 4685 }, { "epoch": 0.76, "learning_rate": 0.00014871589673450897, "loss": 3.1368, "step": 4686 }, { "epoch": 0.76, "learning_rate": 0.00014853010850621062, "loss": 3.1453, "step": 4687 }, { "epoch": 0.76, "learning_rate": 0.00014834441615538984, "loss": 3.0794, "step": 4688 }, { "epoch": 0.76, "learning_rate": 0.00014815881973270157, "loss": 3.2073, "step": 4689 }, { "epoch": 0.76, "learning_rate": 0.00014797331928877484, "loss": 3.072, "step": 4690 }, { "epoch": 0.76, "learning_rate": 0.000147787914874212, "loss": 3.0164, "step": 4691 }, { "epoch": 0.76, "learning_rate": 0.00014760260653958973, "loss": 3.0295, "step": 4692 }, { "epoch": 0.76, "learning_rate": 0.00014741739433545825, "loss": 3.1035, "step": 4693 }, { "epoch": 0.76, "learning_rate": 0.00014723227831234159, "loss": 3.2157, "step": 4694 }, { "epoch": 0.76, "learning_rate": 0.0001470472585207376, "loss": 3.1863, "step": 4695 }, { "epoch": 0.76, "learning_rate": 0.0001468623350111179, "loss": 3.125, "step": 4696 }, { "epoch": 0.76, "learning_rate": 0.00014667750783392753, "loss": 3.0151, "step": 4697 }, { "epoch": 0.76, "learning_rate": 0.0001464927770395857, "loss": 3.2317, "step": 4698 }, { "epoch": 0.76, "learning_rate": 0.00014630814267848502, "loss": 3.179, "step": 4699 }, { "epoch": 0.76, "learning_rate": 0.000146123604800992, "loss": 3.1452, "step": 4700 }, { "epoch": 0.76, "learning_rate": 0.00014593916345744667, "loss": 3.1667, "step": 4701 }, { "epoch": 0.76, "learning_rate": 0.00014575481869816292, "loss": 3.1468, "step": 4702 }, { "epoch": 0.76, "learning_rate": 0.00014557057057342792, "loss": 3.0361, "step": 4703 }, { "epoch": 0.76, "learning_rate": 0.00014538641913350287, "loss": 3.0457, "step": 4704 }, { "epoch": 0.76, "learning_rate": 0.00014520236442862238, "loss": 3.218, "step": 4705 }, { "epoch": 0.76, "learning_rate": 0.00014501840650899477, "loss": 3.2558, "step": 4706 }, { "epoch": 0.76, "learning_rate": 0.0001448345454248019, "loss": 3.2224, "step": 4707 }, { "epoch": 0.76, "learning_rate": 0.00014465078122619945, "loss": 3.0929, "step": 4708 }, { "epoch": 0.76, "learning_rate": 0.00014446711396331614, "loss": 3.2538, "step": 4709 }, { "epoch": 0.76, "learning_rate": 0.00014428354368625467, "loss": 3.0254, "step": 4710 }, { "epoch": 0.76, "learning_rate": 0.00014410007044509123, "loss": 3.0677, "step": 4711 }, { "epoch": 0.76, "learning_rate": 0.0001439166942898754, "loss": 3.1195, "step": 4712 }, { "epoch": 0.76, "learning_rate": 0.0001437334152706305, "loss": 3.243, "step": 4713 }, { "epoch": 0.76, "learning_rate": 0.0001435502334373533, "loss": 3.1226, "step": 4714 }, { "epoch": 0.76, "learning_rate": 0.00014336714884001362, "loss": 3.1822, "step": 4715 }, { "epoch": 0.76, "learning_rate": 0.00014318416152855535, "loss": 3.2075, "step": 4716 }, { "epoch": 0.76, "learning_rate": 0.00014300127155289572, "loss": 3.0089, "step": 4717 }, { "epoch": 0.76, "learning_rate": 0.00014281847896292484, "loss": 3.3334, "step": 4718 }, { "epoch": 0.76, "learning_rate": 0.00014263578380850706, "loss": 3.0239, "step": 4719 }, { "epoch": 0.76, "learning_rate": 0.00014245318613947998, "loss": 3.0748, "step": 4720 }, { "epoch": 0.76, "learning_rate": 0.00014227068600565395, "loss": 3.1174, "step": 4721 }, { "epoch": 0.76, "learning_rate": 0.00014208828345681345, "loss": 3.019, "step": 4722 }, { "epoch": 0.76, "learning_rate": 0.00014190597854271602, "loss": 3.2834, "step": 4723 }, { "epoch": 0.76, "learning_rate": 0.00014172377131309267, "loss": 3.1406, "step": 4724 }, { "epoch": 0.76, "learning_rate": 0.0001415416618176476, "loss": 3.1112, "step": 4725 }, { "epoch": 0.76, "learning_rate": 0.00014135965010605878, "loss": 3.1568, "step": 4726 }, { "epoch": 0.76, "learning_rate": 0.0001411777362279767, "loss": 3.0292, "step": 4727 }, { "epoch": 0.76, "learning_rate": 0.00014099592023302594, "loss": 3.1026, "step": 4728 }, { "epoch": 0.76, "learning_rate": 0.00014081420217080403, "loss": 3.1332, "step": 4729 }, { "epoch": 0.76, "learning_rate": 0.00014063258209088186, "loss": 3.119, "step": 4730 }, { "epoch": 0.76, "learning_rate": 0.00014045106004280362, "loss": 3.0669, "step": 4731 }, { "epoch": 0.76, "learning_rate": 0.0001402696360760868, "loss": 3.2539, "step": 4732 }, { "epoch": 0.76, "learning_rate": 0.00014008831024022166, "loss": 3.3096, "step": 4733 }, { "epoch": 0.76, "learning_rate": 0.00013990708258467243, "loss": 3.2309, "step": 4734 }, { "epoch": 0.76, "learning_rate": 0.0001397259531588761, "loss": 3.0476, "step": 4735 }, { "epoch": 0.76, "learning_rate": 0.00013954492201224289, "loss": 3.0703, "step": 4736 }, { "epoch": 0.76, "learning_rate": 0.00013936398919415645, "loss": 3.0459, "step": 4737 }, { "epoch": 0.76, "learning_rate": 0.00013918315475397336, "loss": 3.1504, "step": 4738 }, { "epoch": 0.76, "learning_rate": 0.0001390024187410236, "loss": 3.1693, "step": 4739 }, { "epoch": 0.76, "learning_rate": 0.0001388217812046098, "loss": 3.2382, "step": 4740 }, { "epoch": 0.76, "learning_rate": 0.00013864124219400837, "loss": 3.1005, "step": 4741 }, { "epoch": 0.76, "learning_rate": 0.0001384608017584686, "loss": 3.0691, "step": 4742 }, { "epoch": 0.76, "learning_rate": 0.00013828045994721238, "loss": 3.193, "step": 4743 }, { "epoch": 0.76, "learning_rate": 0.00013810021680943563, "loss": 3.2702, "step": 4744 }, { "epoch": 0.77, "learning_rate": 0.00013792007239430687, "loss": 3.0813, "step": 4745 }, { "epoch": 0.77, "learning_rate": 0.00013774002675096743, "loss": 3.175, "step": 4746 }, { "epoch": 0.77, "learning_rate": 0.00013756007992853209, "loss": 3.1572, "step": 4747 }, { "epoch": 0.77, "learning_rate": 0.00013738023197608866, "loss": 3.1296, "step": 4748 }, { "epoch": 0.77, "learning_rate": 0.0001372004829426975, "loss": 3.2493, "step": 4749 }, { "epoch": 0.77, "learning_rate": 0.0001370208328773927, "loss": 3.227, "step": 4750 }, { "epoch": 0.77, "learning_rate": 0.0001368412818291811, "loss": 3.2034, "step": 4751 }, { "epoch": 0.77, "learning_rate": 0.00013666182984704207, "loss": 3.1294, "step": 4752 }, { "epoch": 0.77, "learning_rate": 0.00013648247697992848, "loss": 3.1629, "step": 4753 }, { "epoch": 0.77, "learning_rate": 0.0001363032232767662, "loss": 3.1215, "step": 4754 }, { "epoch": 0.77, "learning_rate": 0.00013612406878645334, "loss": 3.1865, "step": 4755 }, { "epoch": 0.77, "learning_rate": 0.00013594501355786197, "loss": 3.1684, "step": 4756 }, { "epoch": 0.77, "learning_rate": 0.0001357660576398365, "loss": 3.3268, "step": 4757 }, { "epoch": 0.77, "learning_rate": 0.00013558720108119404, "loss": 3.1857, "step": 4758 }, { "epoch": 0.77, "learning_rate": 0.00013540844393072505, "loss": 3.1692, "step": 4759 }, { "epoch": 0.77, "learning_rate": 0.00013522978623719279, "loss": 3.1543, "step": 4760 }, { "epoch": 0.77, "learning_rate": 0.0001350512280493329, "loss": 3.2336, "step": 4761 }, { "epoch": 0.77, "learning_rate": 0.0001348727694158547, "loss": 3.1581, "step": 4762 }, { "epoch": 0.77, "learning_rate": 0.00013469441038543994, "loss": 3.1, "step": 4763 }, { "epoch": 0.77, "learning_rate": 0.00013451615100674286, "loss": 3.195, "step": 4764 }, { "epoch": 0.77, "learning_rate": 0.00013433799132839098, "loss": 3.0023, "step": 4765 }, { "epoch": 0.77, "learning_rate": 0.0001341599313989847, "loss": 3.1391, "step": 4766 }, { "epoch": 0.77, "learning_rate": 0.0001339819712670966, "loss": 3.1696, "step": 4767 }, { "epoch": 0.77, "learning_rate": 0.00013380411098127244, "loss": 2.9665, "step": 4768 }, { "epoch": 0.77, "learning_rate": 0.00013362635059003126, "loss": 3.1726, "step": 4769 }, { "epoch": 0.77, "learning_rate": 0.00013344869014186378, "loss": 3.1854, "step": 4770 }, { "epoch": 0.77, "learning_rate": 0.0001332711296852342, "loss": 3.0604, "step": 4771 }, { "epoch": 0.77, "learning_rate": 0.00013309366926857923, "loss": 3.1276, "step": 4772 }, { "epoch": 0.77, "learning_rate": 0.0001329163089403085, "loss": 3.1862, "step": 4773 }, { "epoch": 0.77, "learning_rate": 0.00013273904874880354, "loss": 3.0583, "step": 4774 }, { "epoch": 0.77, "learning_rate": 0.00013256188874241986, "loss": 3.2679, "step": 4775 }, { "epoch": 0.77, "learning_rate": 0.0001323848289694845, "loss": 3.1481, "step": 4776 }, { "epoch": 0.77, "learning_rate": 0.00013220786947829778, "loss": 3.1598, "step": 4777 }, { "epoch": 0.77, "learning_rate": 0.00013203101031713239, "loss": 3.2316, "step": 4778 }, { "epoch": 0.77, "learning_rate": 0.00013185425153423391, "loss": 3.264, "step": 4779 }, { "epoch": 0.77, "learning_rate": 0.00013167759317782003, "loss": 3.1103, "step": 4780 }, { "epoch": 0.77, "learning_rate": 0.00013150103529608183, "loss": 3.0893, "step": 4781 }, { "epoch": 0.77, "learning_rate": 0.00013132457793718218, "loss": 3.1284, "step": 4782 }, { "epoch": 0.77, "learning_rate": 0.00013114822114925701, "loss": 3.2, "step": 4783 }, { "epoch": 0.77, "learning_rate": 0.00013097196498041474, "loss": 3.1236, "step": 4784 }, { "epoch": 0.77, "learning_rate": 0.00013079580947873638, "loss": 3.1939, "step": 4785 }, { "epoch": 0.77, "learning_rate": 0.00013061975469227493, "loss": 3.1774, "step": 4786 }, { "epoch": 0.77, "learning_rate": 0.0001304438006690571, "loss": 3.1344, "step": 4787 }, { "epoch": 0.77, "learning_rate": 0.00013026794745708077, "loss": 3.1989, "step": 4788 }, { "epoch": 0.77, "learning_rate": 0.0001300921951043172, "loss": 3.1928, "step": 4789 }, { "epoch": 0.77, "learning_rate": 0.00012991654365870987, "loss": 3.1758, "step": 4790 }, { "epoch": 0.77, "learning_rate": 0.00012974099316817483, "loss": 3.2121, "step": 4791 }, { "epoch": 0.77, "learning_rate": 0.0001295655436806003, "loss": 3.1831, "step": 4792 }, { "epoch": 0.77, "learning_rate": 0.00012939019524384703, "loss": 3.1475, "step": 4793 }, { "epoch": 0.77, "learning_rate": 0.0001292149479057489, "loss": 3.1872, "step": 4794 }, { "epoch": 0.77, "learning_rate": 0.00012903980171411107, "loss": 3.3097, "step": 4795 }, { "epoch": 0.77, "learning_rate": 0.00012886475671671188, "loss": 3.1364, "step": 4796 }, { "epoch": 0.77, "learning_rate": 0.000128689812961302, "loss": 3.1821, "step": 4797 }, { "epoch": 0.77, "learning_rate": 0.00012851497049560406, "loss": 3.2509, "step": 4798 }, { "epoch": 0.77, "learning_rate": 0.00012834022936731332, "loss": 3.0814, "step": 4799 }, { "epoch": 0.77, "learning_rate": 0.00012816558962409785, "loss": 3.1439, "step": 4800 }, { "epoch": 0.77, "learning_rate": 0.00012799105131359719, "loss": 3.2606, "step": 4801 }, { "epoch": 0.77, "learning_rate": 0.0001278166144834238, "loss": 3.1709, "step": 4802 }, { "epoch": 0.77, "learning_rate": 0.00012764227918116245, "loss": 3.215, "step": 4803 }, { "epoch": 0.77, "learning_rate": 0.0001274680454543698, "loss": 3.1228, "step": 4804 }, { "epoch": 0.77, "learning_rate": 0.000127293913350575, "loss": 3.2092, "step": 4805 }, { "epoch": 0.77, "learning_rate": 0.0001271198829172801, "loss": 3.1427, "step": 4806 }, { "epoch": 0.78, "learning_rate": 0.00012694595420195836, "loss": 3.0412, "step": 4807 }, { "epoch": 0.78, "learning_rate": 0.00012677212725205594, "loss": 3.0979, "step": 4808 }, { "epoch": 0.78, "learning_rate": 0.00012659840211499124, "loss": 3.1395, "step": 4809 }, { "epoch": 0.78, "learning_rate": 0.00012642477883815446, "loss": 3.2791, "step": 4810 }, { "epoch": 0.78, "learning_rate": 0.0001262512574689083, "loss": 3.0985, "step": 4811 }, { "epoch": 0.78, "learning_rate": 0.00012607783805458812, "loss": 3.1743, "step": 4812 }, { "epoch": 0.78, "learning_rate": 0.00012590452064250053, "loss": 3.1178, "step": 4813 }, { "epoch": 0.78, "learning_rate": 0.00012573130527992492, "loss": 3.2321, "step": 4814 }, { "epoch": 0.78, "learning_rate": 0.0001255581920141129, "loss": 2.9761, "step": 4815 }, { "epoch": 0.78, "learning_rate": 0.00012538518089228774, "loss": 3.1456, "step": 4816 }, { "epoch": 0.78, "learning_rate": 0.0001252122719616453, "loss": 3.2656, "step": 4817 }, { "epoch": 0.78, "learning_rate": 0.0001250394652693534, "loss": 3.2394, "step": 4818 }, { "epoch": 0.78, "learning_rate": 0.00012486676086255195, "loss": 3.1443, "step": 4819 }, { "epoch": 0.78, "learning_rate": 0.000124694158788353, "loss": 3.0552, "step": 4820 }, { "epoch": 0.78, "learning_rate": 0.0001245216590938409, "loss": 3.1, "step": 4821 }, { "epoch": 0.78, "learning_rate": 0.00012434926182607143, "loss": 3.0593, "step": 4822 }, { "epoch": 0.78, "learning_rate": 0.00012417696703207303, "loss": 3.1442, "step": 4823 }, { "epoch": 0.78, "learning_rate": 0.00012400477475884604, "loss": 3.1415, "step": 4824 }, { "epoch": 0.78, "learning_rate": 0.00012383268505336277, "loss": 3.1526, "step": 4825 }, { "epoch": 0.78, "learning_rate": 0.00012366069796256756, "loss": 3.1983, "step": 4826 }, { "epoch": 0.78, "learning_rate": 0.0001234888135333768, "loss": 3.1195, "step": 4827 }, { "epoch": 0.78, "learning_rate": 0.000123317031812679, "loss": 3.0281, "step": 4828 }, { "epoch": 0.78, "learning_rate": 0.00012314535284733413, "loss": 3.1642, "step": 4829 }, { "epoch": 0.78, "learning_rate": 0.00012297377668417476, "loss": 3.2705, "step": 4830 }, { "epoch": 0.78, "learning_rate": 0.00012280230337000504, "loss": 3.2031, "step": 4831 }, { "epoch": 0.78, "learning_rate": 0.0001226309329516012, "loss": 3.2556, "step": 4832 }, { "epoch": 0.78, "learning_rate": 0.00012245966547571146, "loss": 3.203, "step": 4833 }, { "epoch": 0.78, "learning_rate": 0.00012228850098905598, "loss": 3.052, "step": 4834 }, { "epoch": 0.78, "learning_rate": 0.00012211743953832637, "loss": 3.1725, "step": 4835 }, { "epoch": 0.78, "learning_rate": 0.00012194648117018676, "loss": 3.173, "step": 4836 }, { "epoch": 0.78, "learning_rate": 0.00012177562593127272, "loss": 3.1276, "step": 4837 }, { "epoch": 0.78, "learning_rate": 0.00012160487386819202, "loss": 3.0708, "step": 4838 }, { "epoch": 0.78, "learning_rate": 0.00012143422502752399, "loss": 3.1303, "step": 4839 }, { "epoch": 0.78, "learning_rate": 0.00012126367945582017, "loss": 3.1881, "step": 4840 }, { "epoch": 0.78, "learning_rate": 0.00012109323719960336, "loss": 3.0731, "step": 4841 }, { "epoch": 0.78, "learning_rate": 0.00012092289830536857, "loss": 3.144, "step": 4842 }, { "epoch": 0.78, "learning_rate": 0.00012075266281958269, "loss": 3.2424, "step": 4843 }, { "epoch": 0.78, "learning_rate": 0.0001205825307886842, "loss": 3.3722, "step": 4844 }, { "epoch": 0.78, "learning_rate": 0.0001204125022590834, "loss": 3.1274, "step": 4845 }, { "epoch": 0.78, "learning_rate": 0.00012024257727716253, "loss": 3.1859, "step": 4846 }, { "epoch": 0.78, "learning_rate": 0.00012007275588927519, "loss": 3.228, "step": 4847 }, { "epoch": 0.78, "learning_rate": 0.00011990303814174702, "loss": 3.2129, "step": 4848 }, { "epoch": 0.78, "learning_rate": 0.0001197334240808754, "loss": 3.084, "step": 4849 }, { "epoch": 0.78, "learning_rate": 0.0001195639137529293, "loss": 3.1734, "step": 4850 }, { "epoch": 0.78, "learning_rate": 0.00011939450720414946, "loss": 3.2095, "step": 4851 }, { "epoch": 0.78, "learning_rate": 0.00011922520448074841, "loss": 3.0224, "step": 4852 }, { "epoch": 0.78, "learning_rate": 0.00011905600562891006, "loss": 3.1346, "step": 4853 }, { "epoch": 0.78, "learning_rate": 0.00011888691069479018, "loss": 3.0641, "step": 4854 }, { "epoch": 0.78, "learning_rate": 0.00011871791972451628, "loss": 3.0481, "step": 4855 }, { "epoch": 0.78, "learning_rate": 0.00011854903276418738, "loss": 3.1751, "step": 4856 }, { "epoch": 0.78, "learning_rate": 0.00011838024985987411, "loss": 3.0715, "step": 4857 }, { "epoch": 0.78, "learning_rate": 0.00011821157105761898, "loss": 3.0544, "step": 4858 }, { "epoch": 0.78, "learning_rate": 0.00011804299640343551, "loss": 3.1181, "step": 4859 }, { "epoch": 0.78, "learning_rate": 0.00011787452594330944, "loss": 3.4344, "step": 4860 }, { "epoch": 0.78, "learning_rate": 0.00011770615972319771, "loss": 3.0966, "step": 4861 }, { "epoch": 0.78, "learning_rate": 0.00011753789778902902, "loss": 3.2174, "step": 4862 }, { "epoch": 0.78, "learning_rate": 0.0001173697401867035, "loss": 3.1807, "step": 4863 }, { "epoch": 0.78, "learning_rate": 0.00011720168696209305, "loss": 3.0867, "step": 4864 }, { "epoch": 0.78, "learning_rate": 0.00011703373816104062, "loss": 3.3051, "step": 4865 }, { "epoch": 0.78, "learning_rate": 0.00011686589382936108, "loss": 3.0291, "step": 4866 }, { "epoch": 0.78, "learning_rate": 0.00011669815401284073, "loss": 3.2071, "step": 4867 }, { "epoch": 0.78, "learning_rate": 0.00011653051875723725, "loss": 3.0775, "step": 4868 }, { "epoch": 0.79, "learning_rate": 0.00011636298810827994, "loss": 3.2901, "step": 4869 }, { "epoch": 0.79, "learning_rate": 0.0001161955621116696, "loss": 3.1684, "step": 4870 }, { "epoch": 0.79, "learning_rate": 0.00011602824081307805, "loss": 3.0837, "step": 4871 }, { "epoch": 0.79, "learning_rate": 0.0001158610242581491, "loss": 2.9655, "step": 4872 }, { "epoch": 0.79, "learning_rate": 0.00011569391249249766, "loss": 3.1339, "step": 4873 }, { "epoch": 0.79, "learning_rate": 0.00011552690556171019, "loss": 3.1537, "step": 4874 }, { "epoch": 0.79, "learning_rate": 0.00011536000351134457, "loss": 3.3432, "step": 4875 }, { "epoch": 0.79, "learning_rate": 0.00011519320638692993, "loss": 3.1176, "step": 4876 }, { "epoch": 0.79, "learning_rate": 0.00011502651423396698, "loss": 3.2177, "step": 4877 }, { "epoch": 0.79, "learning_rate": 0.00011485992709792753, "loss": 3.1806, "step": 4878 }, { "epoch": 0.79, "learning_rate": 0.00011469344502425488, "loss": 3.1027, "step": 4879 }, { "epoch": 0.79, "learning_rate": 0.00011452706805836376, "loss": 3.0263, "step": 4880 }, { "epoch": 0.79, "learning_rate": 0.00011436079624564011, "loss": 3.0376, "step": 4881 }, { "epoch": 0.79, "learning_rate": 0.00011419462963144129, "loss": 3.2135, "step": 4882 }, { "epoch": 0.79, "learning_rate": 0.00011402856826109592, "loss": 3.2435, "step": 4883 }, { "epoch": 0.79, "learning_rate": 0.00011386261217990368, "loss": 3.1854, "step": 4884 }, { "epoch": 0.79, "learning_rate": 0.0001136967614331359, "loss": 3.3745, "step": 4885 }, { "epoch": 0.79, "learning_rate": 0.00011353101606603489, "loss": 3.1521, "step": 4886 }, { "epoch": 0.79, "learning_rate": 0.00011336537612381448, "loss": 2.9812, "step": 4887 }, { "epoch": 0.79, "learning_rate": 0.00011319984165165947, "loss": 3.1545, "step": 4888 }, { "epoch": 0.79, "learning_rate": 0.0001130344126947262, "loss": 3.121, "step": 4889 }, { "epoch": 0.79, "learning_rate": 0.00011286908929814176, "loss": 3.2406, "step": 4890 }, { "epoch": 0.79, "learning_rate": 0.00011270387150700489, "loss": 3.1395, "step": 4891 }, { "epoch": 0.79, "learning_rate": 0.00011253875936638542, "loss": 3.1566, "step": 4892 }, { "epoch": 0.79, "learning_rate": 0.00011237375292132395, "loss": 3.1297, "step": 4893 }, { "epoch": 0.79, "learning_rate": 0.00011220885221683297, "loss": 3.0869, "step": 4894 }, { "epoch": 0.79, "learning_rate": 0.00011204405729789574, "loss": 3.2534, "step": 4895 }, { "epoch": 0.79, "learning_rate": 0.00011187936820946642, "loss": 3.2705, "step": 4896 }, { "epoch": 0.79, "learning_rate": 0.00011171478499647064, "loss": 3.0887, "step": 4897 }, { "epoch": 0.79, "learning_rate": 0.00011155030770380525, "loss": 3.0535, "step": 4898 }, { "epoch": 0.79, "learning_rate": 0.0001113859363763376, "loss": 3.0782, "step": 4899 }, { "epoch": 0.79, "learning_rate": 0.0001112216710589069, "loss": 3.0684, "step": 4900 }, { "epoch": 0.79, "learning_rate": 0.00011105751179632318, "loss": 3.0915, "step": 4901 }, { "epoch": 0.79, "learning_rate": 0.00011089345863336703, "loss": 3.1393, "step": 4902 }, { "epoch": 0.79, "learning_rate": 0.00011072951161479083, "loss": 3.0918, "step": 4903 }, { "epoch": 0.79, "learning_rate": 0.00011056567078531771, "loss": 3.1304, "step": 4904 }, { "epoch": 0.79, "learning_rate": 0.0001104019361896414, "loss": 3.1763, "step": 4905 }, { "epoch": 0.79, "learning_rate": 0.0001102383078724275, "loss": 3.2028, "step": 4906 }, { "epoch": 0.79, "learning_rate": 0.00011007478587831222, "loss": 3.2097, "step": 4907 }, { "epoch": 0.79, "learning_rate": 0.00010991137025190239, "loss": 3.0088, "step": 4908 }, { "epoch": 0.79, "learning_rate": 0.00010974806103777635, "loss": 3.0237, "step": 4909 }, { "epoch": 0.79, "learning_rate": 0.0001095848582804832, "loss": 3.1475, "step": 4910 }, { "epoch": 0.79, "learning_rate": 0.00010942176202454302, "loss": 3.2141, "step": 4911 }, { "epoch": 0.79, "learning_rate": 0.00010925877231444687, "loss": 3.0692, "step": 4912 }, { "epoch": 0.79, "learning_rate": 0.0001090958891946568, "loss": 3.2114, "step": 4913 }, { "epoch": 0.79, "learning_rate": 0.00010893311270960542, "loss": 3.2398, "step": 4914 }, { "epoch": 0.79, "learning_rate": 0.0001087704429036967, "loss": 3.1053, "step": 4915 }, { "epoch": 0.79, "learning_rate": 0.00010860787982130538, "loss": 3.217, "step": 4916 }, { "epoch": 0.79, "learning_rate": 0.0001084454235067771, "loss": 3.0064, "step": 4917 }, { "epoch": 0.79, "learning_rate": 0.000108283074004428, "loss": 3.1373, "step": 4918 }, { "epoch": 0.79, "learning_rate": 0.00010812083135854589, "loss": 3.1779, "step": 4919 }, { "epoch": 0.79, "learning_rate": 0.0001079586956133885, "loss": 3.0326, "step": 4920 }, { "epoch": 0.79, "learning_rate": 0.00010779666681318507, "loss": 3.078, "step": 4921 }, { "epoch": 0.79, "learning_rate": 0.00010763474500213539, "loss": 3.0738, "step": 4922 }, { "epoch": 0.79, "learning_rate": 0.00010747293022441024, "loss": 3.1753, "step": 4923 }, { "epoch": 0.79, "learning_rate": 0.00010731122252415065, "loss": 3.1369, "step": 4924 }, { "epoch": 0.79, "learning_rate": 0.0001071496219454695, "loss": 3.1852, "step": 4925 }, { "epoch": 0.79, "learning_rate": 0.00010698812853244932, "loss": 3.0958, "step": 4926 }, { "epoch": 0.79, "learning_rate": 0.00010682674232914407, "loss": 3.0674, "step": 4927 }, { "epoch": 0.79, "learning_rate": 0.00010666546337957828, "loss": 3.1281, "step": 4928 }, { "epoch": 0.79, "learning_rate": 0.0001065042917277474, "loss": 3.2644, "step": 4929 }, { "epoch": 0.79, "learning_rate": 0.00010634322741761699, "loss": 3.0315, "step": 4930 }, { "epoch": 0.8, "learning_rate": 0.00010618227049312423, "loss": 3.2259, "step": 4931 }, { "epoch": 0.8, "learning_rate": 0.00010602142099817658, "loss": 3.0497, "step": 4932 }, { "epoch": 0.8, "learning_rate": 0.00010586067897665186, "loss": 3.1686, "step": 4933 }, { "epoch": 0.8, "learning_rate": 0.00010570004447239906, "loss": 3.0573, "step": 4934 }, { "epoch": 0.8, "learning_rate": 0.00010553951752923779, "loss": 3.1807, "step": 4935 }, { "epoch": 0.8, "learning_rate": 0.00010537909819095781, "loss": 3.3023, "step": 4936 }, { "epoch": 0.8, "learning_rate": 0.00010521878650132027, "loss": 3.1906, "step": 4937 }, { "epoch": 0.8, "learning_rate": 0.00010505858250405664, "loss": 3.2826, "step": 4938 }, { "epoch": 0.8, "learning_rate": 0.00010489848624286868, "loss": 3.0047, "step": 4939 }, { "epoch": 0.8, "learning_rate": 0.00010473849776142924, "loss": 3.2077, "step": 4940 }, { "epoch": 0.8, "learning_rate": 0.00010457861710338167, "loss": 3.1718, "step": 4941 }, { "epoch": 0.8, "learning_rate": 0.0001044188443123395, "loss": 3.1703, "step": 4942 }, { "epoch": 0.8, "learning_rate": 0.00010425917943188728, "loss": 3.1818, "step": 4943 }, { "epoch": 0.8, "learning_rate": 0.00010409962250558025, "loss": 3.0449, "step": 4944 }, { "epoch": 0.8, "learning_rate": 0.00010394017357694368, "loss": 3.0185, "step": 4945 }, { "epoch": 0.8, "learning_rate": 0.00010378083268947369, "loss": 3.1774, "step": 4946 }, { "epoch": 0.8, "learning_rate": 0.00010362159988663699, "loss": 3.0863, "step": 4947 }, { "epoch": 0.8, "learning_rate": 0.00010346247521187058, "loss": 3.0478, "step": 4948 }, { "epoch": 0.8, "learning_rate": 0.00010330345870858194, "loss": 2.9725, "step": 4949 }, { "epoch": 0.8, "learning_rate": 0.00010314455042014964, "loss": 3.0298, "step": 4950 }, { "epoch": 0.8, "learning_rate": 0.00010298575038992186, "loss": 3.1567, "step": 4951 }, { "epoch": 0.8, "learning_rate": 0.00010282705866121778, "loss": 3.0787, "step": 4952 }, { "epoch": 0.8, "learning_rate": 0.00010266847527732714, "loss": 3.2465, "step": 4953 }, { "epoch": 0.8, "learning_rate": 0.00010251000028150954, "loss": 3.0574, "step": 4954 }, { "epoch": 0.8, "learning_rate": 0.00010235163371699541, "loss": 3.1112, "step": 4955 }, { "epoch": 0.8, "learning_rate": 0.00010219337562698594, "loss": 3.1573, "step": 4956 }, { "epoch": 0.8, "learning_rate": 0.00010203522605465204, "loss": 3.094, "step": 4957 }, { "epoch": 0.8, "learning_rate": 0.00010187718504313537, "loss": 3.1952, "step": 4958 }, { "epoch": 0.8, "learning_rate": 0.00010171925263554815, "loss": 3.1114, "step": 4959 }, { "epoch": 0.8, "learning_rate": 0.00010156142887497244, "loss": 3.099, "step": 4960 }, { "epoch": 0.8, "learning_rate": 0.000101403713804461, "loss": 3.211, "step": 4961 }, { "epoch": 0.8, "learning_rate": 0.00010124610746703738, "loss": 3.1278, "step": 4962 }, { "epoch": 0.8, "learning_rate": 0.00010108860990569452, "loss": 3.1937, "step": 4963 }, { "epoch": 0.8, "learning_rate": 0.00010093122116339642, "loss": 3.2154, "step": 4964 }, { "epoch": 0.8, "learning_rate": 0.00010077394128307704, "loss": 3.1238, "step": 4965 }, { "epoch": 0.8, "learning_rate": 0.00010061677030764105, "loss": 3.1657, "step": 4966 }, { "epoch": 0.8, "learning_rate": 0.0001004597082799627, "loss": 3.0789, "step": 4967 }, { "epoch": 0.8, "learning_rate": 0.00010030275524288718, "loss": 3.1792, "step": 4968 }, { "epoch": 0.8, "learning_rate": 0.00010014591123922962, "loss": 3.237, "step": 4969 }, { "epoch": 0.8, "learning_rate": 9.998917631177557e-05, "loss": 3.006, "step": 4970 }, { "epoch": 0.8, "learning_rate": 9.983255050328077e-05, "loss": 3.1297, "step": 4971 }, { "epoch": 0.8, "learning_rate": 9.967603385647128e-05, "loss": 3.1067, "step": 4972 }, { "epoch": 0.8, "learning_rate": 9.9519626414043e-05, "loss": 3.2081, "step": 4973 }, { "epoch": 0.8, "learning_rate": 9.936332821866239e-05, "loss": 3.2674, "step": 4974 }, { "epoch": 0.8, "learning_rate": 9.92071393129662e-05, "loss": 3.0694, "step": 4975 }, { "epoch": 0.8, "learning_rate": 9.905105973956107e-05, "loss": 3.2677, "step": 4976 }, { "epoch": 0.8, "learning_rate": 9.8895089541024e-05, "loss": 3.1751, "step": 4977 }, { "epoch": 0.8, "learning_rate": 9.87392287599022e-05, "loss": 3.0518, "step": 4978 }, { "epoch": 0.8, "learning_rate": 9.858347743871277e-05, "loss": 3.1192, "step": 4979 }, { "epoch": 0.8, "learning_rate": 9.842783561994295e-05, "loss": 3.2888, "step": 4980 }, { "epoch": 0.8, "learning_rate": 9.827230334605086e-05, "loss": 3.1125, "step": 4981 }, { "epoch": 0.8, "learning_rate": 9.811688065946361e-05, "loss": 3.3754, "step": 4982 }, { "epoch": 0.8, "learning_rate": 9.796156760257912e-05, "loss": 3.1284, "step": 4983 }, { "epoch": 0.8, "learning_rate": 9.78063642177654e-05, "loss": 3.0273, "step": 4984 }, { "epoch": 0.8, "learning_rate": 9.765127054736011e-05, "loss": 3.1209, "step": 4985 }, { "epoch": 0.8, "learning_rate": 9.749628663367127e-05, "loss": 2.9797, "step": 4986 }, { "epoch": 0.8, "learning_rate": 9.734141251897733e-05, "loss": 3.3267, "step": 4987 }, { "epoch": 0.8, "learning_rate": 9.718664824552604e-05, "loss": 3.1869, "step": 4988 }, { "epoch": 0.8, "learning_rate": 9.703199385553558e-05, "loss": 3.1266, "step": 4989 }, { "epoch": 0.8, "learning_rate": 9.68774493911943e-05, "loss": 3.0127, "step": 4990 }, { "epoch": 0.8, "learning_rate": 9.672301489466023e-05, "loss": 3.1857, "step": 4991 }, { "epoch": 0.8, "learning_rate": 9.656869040806155e-05, "loss": 3.2729, "step": 4992 }, { "epoch": 0.81, "learning_rate": 9.641447597349651e-05, "loss": 3.2776, "step": 4993 }, { "epoch": 0.81, "learning_rate": 9.626037163303319e-05, "loss": 3.0817, "step": 4994 }, { "epoch": 0.81, "learning_rate": 9.610637742870987e-05, "loss": 3.0875, "step": 4995 }, { "epoch": 0.81, "learning_rate": 9.595249340253459e-05, "loss": 3.168, "step": 4996 }, { "epoch": 0.81, "learning_rate": 9.579871959648523e-05, "loss": 2.8711, "step": 4997 }, { "epoch": 0.81, "learning_rate": 9.564505605250984e-05, "loss": 2.9325, "step": 4998 }, { "epoch": 0.81, "learning_rate": 9.549150281252633e-05, "loss": 3.0769, "step": 4999 }, { "epoch": 0.81, "learning_rate": 9.533805991842242e-05, "loss": 3.1756, "step": 5000 }, { "epoch": 0.81, "learning_rate": 9.518472741205586e-05, "loss": 3.1727, "step": 5001 }, { "epoch": 0.81, "learning_rate": 9.503150533525435e-05, "loss": 3.1319, "step": 5002 }, { "epoch": 0.81, "learning_rate": 9.487839372981505e-05, "loss": 3.1683, "step": 5003 }, { "epoch": 0.81, "learning_rate": 9.472539263750546e-05, "loss": 3.1614, "step": 5004 }, { "epoch": 0.81, "learning_rate": 9.457250210006274e-05, "loss": 3.0727, "step": 5005 }, { "epoch": 0.81, "learning_rate": 9.441972215919387e-05, "loss": 3.1181, "step": 5006 }, { "epoch": 0.81, "learning_rate": 9.426705285657567e-05, "loss": 3.1921, "step": 5007 }, { "epoch": 0.81, "learning_rate": 9.411449423385499e-05, "loss": 3.108, "step": 5008 }, { "epoch": 0.81, "learning_rate": 9.396204633264798e-05, "loss": 3.0903, "step": 5009 }, { "epoch": 0.81, "learning_rate": 9.380970919454113e-05, "loss": 3.0797, "step": 5010 }, { "epoch": 0.81, "learning_rate": 9.365748286109044e-05, "loss": 3.0541, "step": 5011 }, { "epoch": 0.81, "learning_rate": 9.35053673738217e-05, "loss": 3.1747, "step": 5012 }, { "epoch": 0.81, "learning_rate": 9.335336277423051e-05, "loss": 3.0877, "step": 5013 }, { "epoch": 0.81, "learning_rate": 9.320146910378224e-05, "loss": 3.2224, "step": 5014 }, { "epoch": 0.81, "learning_rate": 9.30496864039121e-05, "loss": 3.2575, "step": 5015 }, { "epoch": 0.81, "learning_rate": 9.289801471602455e-05, "loss": 3.0143, "step": 5016 }, { "epoch": 0.81, "learning_rate": 9.274645408149435e-05, "loss": 3.1051, "step": 5017 }, { "epoch": 0.81, "learning_rate": 9.25950045416657e-05, "loss": 3.1506, "step": 5018 }, { "epoch": 0.81, "learning_rate": 9.244366613785249e-05, "loss": 3.1688, "step": 5019 }, { "epoch": 0.81, "learning_rate": 9.229243891133832e-05, "loss": 3.0318, "step": 5020 }, { "epoch": 0.81, "learning_rate": 9.214132290337663e-05, "loss": 3.1099, "step": 5021 }, { "epoch": 0.81, "learning_rate": 9.19903181551901e-05, "loss": 3.1499, "step": 5022 }, { "epoch": 0.81, "learning_rate": 9.183942470797141e-05, "loss": 3.1946, "step": 5023 }, { "epoch": 0.81, "learning_rate": 9.168864260288285e-05, "loss": 3.2483, "step": 5024 }, { "epoch": 0.81, "learning_rate": 9.153797188105623e-05, "loss": 3.1258, "step": 5025 }, { "epoch": 0.81, "learning_rate": 9.138741258359295e-05, "loss": 2.9609, "step": 5026 }, { "epoch": 0.81, "learning_rate": 9.123696475156434e-05, "loss": 2.9939, "step": 5027 }, { "epoch": 0.81, "learning_rate": 9.108662842601079e-05, "loss": 3.0845, "step": 5028 }, { "epoch": 0.81, "learning_rate": 9.093640364794258e-05, "loss": 3.0058, "step": 5029 }, { "epoch": 0.81, "learning_rate": 9.078629045833964e-05, "loss": 3.0417, "step": 5030 }, { "epoch": 0.81, "learning_rate": 9.063628889815128e-05, "loss": 3.0204, "step": 5031 }, { "epoch": 0.81, "learning_rate": 9.048639900829642e-05, "loss": 3.0949, "step": 5032 }, { "epoch": 0.81, "learning_rate": 9.03366208296637e-05, "loss": 3.0728, "step": 5033 }, { "epoch": 0.81, "learning_rate": 9.018695440311087e-05, "loss": 3.1579, "step": 5034 }, { "epoch": 0.81, "learning_rate": 9.003739976946552e-05, "loss": 3.1137, "step": 5035 }, { "epoch": 0.81, "learning_rate": 8.988795696952462e-05, "loss": 3.0077, "step": 5036 }, { "epoch": 0.81, "learning_rate": 8.97386260440548e-05, "loss": 3.1329, "step": 5037 }, { "epoch": 0.81, "learning_rate": 8.958940703379192e-05, "loss": 3.3283, "step": 5038 }, { "epoch": 0.81, "learning_rate": 8.944029997944165e-05, "loss": 3.1693, "step": 5039 }, { "epoch": 0.81, "learning_rate": 8.929130492167864e-05, "loss": 3.1059, "step": 5040 }, { "epoch": 0.81, "learning_rate": 8.91424219011473e-05, "loss": 3.0857, "step": 5041 }, { "epoch": 0.81, "learning_rate": 8.899365095846167e-05, "loss": 3.2368, "step": 5042 }, { "epoch": 0.81, "learning_rate": 8.88449921342045e-05, "loss": 3.0072, "step": 5043 }, { "epoch": 0.81, "learning_rate": 8.869644546892891e-05, "loss": 3.053, "step": 5044 }, { "epoch": 0.81, "learning_rate": 8.854801100315685e-05, "loss": 3.1801, "step": 5045 }, { "epoch": 0.81, "learning_rate": 8.839968877737958e-05, "loss": 3.201, "step": 5046 }, { "epoch": 0.81, "learning_rate": 8.825147883205804e-05, "loss": 3.121, "step": 5047 }, { "epoch": 0.81, "learning_rate": 8.810338120762235e-05, "loss": 3.1162, "step": 5048 }, { "epoch": 0.81, "learning_rate": 8.795539594447216e-05, "loss": 3.1618, "step": 5049 }, { "epoch": 0.81, "learning_rate": 8.780752308297629e-05, "loss": 3.1336, "step": 5050 }, { "epoch": 0.81, "learning_rate": 8.76597626634732e-05, "loss": 3.0342, "step": 5051 }, { "epoch": 0.81, "learning_rate": 8.751211472627014e-05, "loss": 3.1021, "step": 5052 }, { "epoch": 0.81, "learning_rate": 8.736457931164416e-05, "loss": 3.2496, "step": 5053 }, { "epoch": 0.81, "learning_rate": 8.721715645984135e-05, "loss": 2.9835, "step": 5054 }, { "epoch": 0.82, "learning_rate": 8.706984621107733e-05, "loss": 3.2831, "step": 5055 }, { "epoch": 0.82, "learning_rate": 8.692264860553673e-05, "loss": 3.249, "step": 5056 }, { "epoch": 0.82, "learning_rate": 8.677556368337386e-05, "loss": 2.9502, "step": 5057 }, { "epoch": 0.82, "learning_rate": 8.662859148471164e-05, "loss": 3.0645, "step": 5058 }, { "epoch": 0.82, "learning_rate": 8.648173204964277e-05, "loss": 3.3553, "step": 5059 }, { "epoch": 0.82, "learning_rate": 8.633498541822909e-05, "loss": 3.0614, "step": 5060 }, { "epoch": 0.82, "learning_rate": 8.61883516305016e-05, "loss": 2.975, "step": 5061 }, { "epoch": 0.82, "learning_rate": 8.604183072646055e-05, "loss": 3.1529, "step": 5062 }, { "epoch": 0.82, "learning_rate": 8.589542274607543e-05, "loss": 3.2732, "step": 5063 }, { "epoch": 0.82, "learning_rate": 8.574912772928461e-05, "loss": 3.1847, "step": 5064 }, { "epoch": 0.82, "learning_rate": 8.56029457159962e-05, "loss": 3.0164, "step": 5065 }, { "epoch": 0.82, "learning_rate": 8.545687674608704e-05, "loss": 3.0532, "step": 5066 }, { "epoch": 0.82, "learning_rate": 8.531092085940345e-05, "loss": 3.1011, "step": 5067 }, { "epoch": 0.82, "learning_rate": 8.516507809576041e-05, "loss": 3.01, "step": 5068 }, { "epoch": 0.82, "learning_rate": 8.50193484949427e-05, "loss": 3.2182, "step": 5069 }, { "epoch": 0.82, "learning_rate": 8.487373209670391e-05, "loss": 3.0786, "step": 5070 }, { "epoch": 0.82, "learning_rate": 8.472822894076643e-05, "loss": 3.1203, "step": 5071 }, { "epoch": 0.82, "learning_rate": 8.458283906682229e-05, "loss": 3.1015, "step": 5072 }, { "epoch": 0.82, "learning_rate": 8.443756251453249e-05, "loss": 3.2932, "step": 5073 }, { "epoch": 0.82, "learning_rate": 8.429239932352666e-05, "loss": 3.1192, "step": 5074 }, { "epoch": 0.82, "learning_rate": 8.414734953340419e-05, "loss": 3.2206, "step": 5075 }, { "epoch": 0.82, "learning_rate": 8.400241318373331e-05, "loss": 3.1854, "step": 5076 }, { "epoch": 0.82, "learning_rate": 8.385759031405082e-05, "loss": 3.3214, "step": 5077 }, { "epoch": 0.82, "learning_rate": 8.371288096386321e-05, "loss": 3.2587, "step": 5078 }, { "epoch": 0.82, "learning_rate": 8.35682851726458e-05, "loss": 3.1495, "step": 5079 }, { "epoch": 0.82, "learning_rate": 8.342380297984253e-05, "loss": 3.0329, "step": 5080 }, { "epoch": 0.82, "learning_rate": 8.327943442486708e-05, "loss": 3.114, "step": 5081 }, { "epoch": 0.82, "learning_rate": 8.31351795471017e-05, "loss": 3.0755, "step": 5082 }, { "epoch": 0.82, "learning_rate": 8.29910383858975e-05, "loss": 3.2667, "step": 5083 }, { "epoch": 0.82, "learning_rate": 8.284701098057485e-05, "loss": 3.1781, "step": 5084 }, { "epoch": 0.82, "learning_rate": 8.270309737042308e-05, "loss": 3.3113, "step": 5085 }, { "epoch": 0.82, "learning_rate": 8.255929759470004e-05, "loss": 3.1095, "step": 5086 }, { "epoch": 0.82, "learning_rate": 8.241561169263329e-05, "loss": 3.2347, "step": 5087 }, { "epoch": 0.82, "learning_rate": 8.227203970341879e-05, "loss": 3.0496, "step": 5088 }, { "epoch": 0.82, "learning_rate": 8.21285816662214e-05, "loss": 3.0478, "step": 5089 }, { "epoch": 0.82, "learning_rate": 8.198523762017512e-05, "loss": 3.002, "step": 5090 }, { "epoch": 0.82, "learning_rate": 8.184200760438298e-05, "loss": 3.2932, "step": 5091 }, { "epoch": 0.82, "learning_rate": 8.169889165791633e-05, "loss": 3.1013, "step": 5092 }, { "epoch": 0.82, "learning_rate": 8.155588981981583e-05, "loss": 3.1271, "step": 5093 }, { "epoch": 0.82, "learning_rate": 8.141300212909131e-05, "loss": 3.2487, "step": 5094 }, { "epoch": 0.82, "learning_rate": 8.127022862472077e-05, "loss": 3.1907, "step": 5095 }, { "epoch": 0.82, "learning_rate": 8.112756934565146e-05, "loss": 2.9575, "step": 5096 }, { "epoch": 0.82, "learning_rate": 8.098502433079963e-05, "loss": 3.2369, "step": 5097 }, { "epoch": 0.82, "learning_rate": 8.084259361904977e-05, "loss": 3.0907, "step": 5098 }, { "epoch": 0.82, "learning_rate": 8.070027724925565e-05, "loss": 3.0067, "step": 5099 }, { "epoch": 0.82, "learning_rate": 8.055807526024005e-05, "loss": 3.1305, "step": 5100 }, { "epoch": 0.82, "learning_rate": 8.041598769079395e-05, "loss": 3.1364, "step": 5101 }, { "epoch": 0.82, "learning_rate": 8.027401457967748e-05, "loss": 3.2659, "step": 5102 }, { "epoch": 0.82, "learning_rate": 8.013215596561957e-05, "loss": 3.1394, "step": 5103 }, { "epoch": 0.82, "learning_rate": 7.999041188731787e-05, "loss": 3.1791, "step": 5104 }, { "epoch": 0.82, "learning_rate": 7.984878238343846e-05, "loss": 3.0557, "step": 5105 }, { "epoch": 0.82, "learning_rate": 7.970726749261687e-05, "loss": 2.9985, "step": 5106 }, { "epoch": 0.82, "learning_rate": 7.95658672534566e-05, "loss": 3.1085, "step": 5107 }, { "epoch": 0.82, "learning_rate": 7.942458170453043e-05, "loss": 3.0313, "step": 5108 }, { "epoch": 0.82, "learning_rate": 7.928341088437952e-05, "loss": 3.0882, "step": 5109 }, { "epoch": 0.82, "learning_rate": 7.914235483151405e-05, "loss": 3.1335, "step": 5110 }, { "epoch": 0.82, "learning_rate": 7.900141358441232e-05, "loss": 3.1116, "step": 5111 }, { "epoch": 0.82, "learning_rate": 7.886058718152222e-05, "loss": 3.0612, "step": 5112 }, { "epoch": 0.82, "learning_rate": 7.871987566125938e-05, "loss": 3.1548, "step": 5113 }, { "epoch": 0.82, "learning_rate": 7.857927906200863e-05, "loss": 3.0652, "step": 5114 }, { "epoch": 0.82, "learning_rate": 7.843879742212334e-05, "loss": 3.3375, "step": 5115 }, { "epoch": 0.82, "learning_rate": 7.82984307799256e-05, "loss": 3.1351, "step": 5116 }, { "epoch": 0.83, "learning_rate": 7.815817917370577e-05, "loss": 3.4055, "step": 5117 }, { "epoch": 0.83, "learning_rate": 7.801804264172313e-05, "loss": 3.0711, "step": 5118 }, { "epoch": 0.83, "learning_rate": 7.787802122220583e-05, "loss": 2.9377, "step": 5119 }, { "epoch": 0.83, "learning_rate": 7.773811495334999e-05, "loss": 3.3156, "step": 5120 }, { "epoch": 0.83, "learning_rate": 7.759832387332078e-05, "loss": 3.1588, "step": 5121 }, { "epoch": 0.83, "learning_rate": 7.745864802025194e-05, "loss": 3.1414, "step": 5122 }, { "epoch": 0.83, "learning_rate": 7.73190874322453e-05, "loss": 3.1297, "step": 5123 }, { "epoch": 0.83, "learning_rate": 7.717964214737178e-05, "loss": 3.1544, "step": 5124 }, { "epoch": 0.83, "learning_rate": 7.704031220367086e-05, "loss": 3.1058, "step": 5125 }, { "epoch": 0.83, "learning_rate": 7.690109763915004e-05, "loss": 3.2174, "step": 5126 }, { "epoch": 0.83, "learning_rate": 7.676199849178583e-05, "loss": 3.2697, "step": 5127 }, { "epoch": 0.83, "learning_rate": 7.662301479952317e-05, "loss": 3.0165, "step": 5128 }, { "epoch": 0.83, "learning_rate": 7.64841466002752e-05, "loss": 3.0727, "step": 5129 }, { "epoch": 0.83, "learning_rate": 7.63453939319238e-05, "loss": 3.1139, "step": 5130 }, { "epoch": 0.83, "learning_rate": 7.620675683231959e-05, "loss": 3.0211, "step": 5131 }, { "epoch": 0.83, "learning_rate": 7.60682353392811e-05, "loss": 3.2531, "step": 5132 }, { "epoch": 0.83, "learning_rate": 7.592982949059567e-05, "loss": 3.1231, "step": 5133 }, { "epoch": 0.83, "learning_rate": 7.579153932401928e-05, "loss": 3.2348, "step": 5134 }, { "epoch": 0.83, "learning_rate": 7.56533648772757e-05, "loss": 3.1262, "step": 5135 }, { "epoch": 0.83, "learning_rate": 7.551530618805768e-05, "loss": 3.1831, "step": 5136 }, { "epoch": 0.83, "learning_rate": 7.53773632940265e-05, "loss": 3.192, "step": 5137 }, { "epoch": 0.83, "learning_rate": 7.523953623281132e-05, "loss": 3.1427, "step": 5138 }, { "epoch": 0.83, "learning_rate": 7.510182504201013e-05, "loss": 3.112, "step": 5139 }, { "epoch": 0.83, "learning_rate": 7.496422975918915e-05, "loss": 3.0814, "step": 5140 }, { "epoch": 0.83, "learning_rate": 7.482675042188292e-05, "loss": 3.0157, "step": 5141 }, { "epoch": 0.83, "learning_rate": 7.468938706759448e-05, "loss": 3.1684, "step": 5142 }, { "epoch": 0.83, "learning_rate": 7.455213973379516e-05, "loss": 2.9667, "step": 5143 }, { "epoch": 0.83, "learning_rate": 7.441500845792471e-05, "loss": 3.2422, "step": 5144 }, { "epoch": 0.83, "learning_rate": 7.42779932773911e-05, "loss": 2.9819, "step": 5145 }, { "epoch": 0.83, "learning_rate": 7.414109422957088e-05, "loss": 3.1231, "step": 5146 }, { "epoch": 0.83, "learning_rate": 7.400431135180852e-05, "loss": 3.0518, "step": 5147 }, { "epoch": 0.83, "learning_rate": 7.38676446814171e-05, "loss": 3.061, "step": 5148 }, { "epoch": 0.83, "learning_rate": 7.373109425567797e-05, "loss": 3.0037, "step": 5149 }, { "epoch": 0.83, "learning_rate": 7.359466011184068e-05, "loss": 3.1321, "step": 5150 }, { "epoch": 0.83, "learning_rate": 7.345834228712311e-05, "loss": 3.102, "step": 5151 }, { "epoch": 0.83, "learning_rate": 7.332214081871141e-05, "loss": 3.0638, "step": 5152 }, { "epoch": 0.83, "learning_rate": 7.318605574376014e-05, "loss": 3.0269, "step": 5153 }, { "epoch": 0.83, "learning_rate": 7.30500870993916e-05, "loss": 3.1972, "step": 5154 }, { "epoch": 0.83, "learning_rate": 7.291423492269694e-05, "loss": 3.1706, "step": 5155 }, { "epoch": 0.83, "learning_rate": 7.277849925073515e-05, "loss": 3.2558, "step": 5156 }, { "epoch": 0.83, "learning_rate": 7.264288012053366e-05, "loss": 3.2528, "step": 5157 }, { "epoch": 0.83, "learning_rate": 7.250737756908794e-05, "loss": 3.181, "step": 5158 }, { "epoch": 0.83, "learning_rate": 7.237199163336189e-05, "loss": 3.0513, "step": 5159 }, { "epoch": 0.83, "learning_rate": 7.223672235028727e-05, "loss": 3.2261, "step": 5160 }, { "epoch": 0.83, "learning_rate": 7.210156975676418e-05, "loss": 3.0933, "step": 5161 }, { "epoch": 0.83, "learning_rate": 7.196653388966095e-05, "loss": 3.195, "step": 5162 }, { "epoch": 0.83, "learning_rate": 7.183161478581406e-05, "loss": 3.2378, "step": 5163 }, { "epoch": 0.83, "learning_rate": 7.169681248202808e-05, "loss": 3.1512, "step": 5164 }, { "epoch": 0.83, "learning_rate": 7.15621270150758e-05, "loss": 3.1431, "step": 5165 }, { "epoch": 0.83, "learning_rate": 7.142755842169785e-05, "loss": 3.2168, "step": 5166 }, { "epoch": 0.83, "learning_rate": 7.129310673860334e-05, "loss": 2.9561, "step": 5167 }, { "epoch": 0.83, "learning_rate": 7.11587720024694e-05, "loss": 3.1838, "step": 5168 }, { "epoch": 0.83, "learning_rate": 7.102455424994108e-05, "loss": 3.1856, "step": 5169 }, { "epoch": 0.83, "learning_rate": 7.089045351763174e-05, "loss": 3.112, "step": 5170 }, { "epoch": 0.83, "learning_rate": 7.075646984212275e-05, "loss": 3.0103, "step": 5171 }, { "epoch": 0.83, "learning_rate": 7.062260325996339e-05, "loss": 3.0474, "step": 5172 }, { "epoch": 0.83, "learning_rate": 7.048885380767123e-05, "loss": 3.2045, "step": 5173 }, { "epoch": 0.83, "learning_rate": 7.035522152173168e-05, "loss": 3.1105, "step": 5174 }, { "epoch": 0.83, "learning_rate": 7.022170643859838e-05, "loss": 3.2056, "step": 5175 }, { "epoch": 0.83, "learning_rate": 7.008830859469296e-05, "loss": 3.1943, "step": 5176 }, { "epoch": 0.83, "learning_rate": 6.995502802640497e-05, "loss": 3.2604, "step": 5177 }, { "epoch": 0.83, "learning_rate": 6.982186477009194e-05, "loss": 3.2211, "step": 5178 }, { "epoch": 0.84, "learning_rate": 6.968881886207956e-05, "loss": 3.1681, "step": 5179 }, { "epoch": 0.84, "learning_rate": 6.955589033866139e-05, "loss": 3.0491, "step": 5180 }, { "epoch": 0.84, "learning_rate": 6.942307923609904e-05, "loss": 3.2136, "step": 5181 }, { "epoch": 0.84, "learning_rate": 6.929038559062201e-05, "loss": 3.2473, "step": 5182 }, { "epoch": 0.84, "learning_rate": 6.915780943842792e-05, "loss": 3.1467, "step": 5183 }, { "epoch": 0.84, "learning_rate": 6.9025350815682e-05, "loss": 3.1693, "step": 5184 }, { "epoch": 0.84, "learning_rate": 6.889300975851781e-05, "loss": 3.158, "step": 5185 }, { "epoch": 0.84, "learning_rate": 6.876078630303661e-05, "loss": 3.3385, "step": 5186 }, { "epoch": 0.84, "learning_rate": 6.862868048530768e-05, "loss": 3.1976, "step": 5187 }, { "epoch": 0.84, "learning_rate": 6.849669234136813e-05, "loss": 3.1504, "step": 5188 }, { "epoch": 0.84, "learning_rate": 6.836482190722309e-05, "loss": 2.9868, "step": 5189 }, { "epoch": 0.84, "learning_rate": 6.823306921884537e-05, "loss": 3.0692, "step": 5190 }, { "epoch": 0.84, "learning_rate": 6.810143431217585e-05, "loss": 3.1256, "step": 5191 }, { "epoch": 0.84, "learning_rate": 6.796991722312318e-05, "loss": 3.2791, "step": 5192 }, { "epoch": 0.84, "learning_rate": 6.783851798756396e-05, "loss": 3.0654, "step": 5193 }, { "epoch": 0.84, "learning_rate": 6.770723664134254e-05, "loss": 3.1216, "step": 5194 }, { "epoch": 0.84, "learning_rate": 6.757607322027132e-05, "loss": 3.2263, "step": 5195 }, { "epoch": 0.84, "learning_rate": 6.744502776013018e-05, "loss": 3.2164, "step": 5196 }, { "epoch": 0.84, "learning_rate": 6.731410029666701e-05, "loss": 3.2069, "step": 5197 }, { "epoch": 0.84, "learning_rate": 6.71832908655976e-05, "loss": 3.1535, "step": 5198 }, { "epoch": 0.84, "learning_rate": 6.70525995026055e-05, "loss": 3.1078, "step": 5199 }, { "epoch": 0.84, "learning_rate": 6.692202624334187e-05, "loss": 3.1529, "step": 5200 }, { "epoch": 0.84, "learning_rate": 6.679157112342604e-05, "loss": 3.1202, "step": 5201 }, { "epoch": 0.84, "learning_rate": 6.666123417844456e-05, "loss": 3.0805, "step": 5202 }, { "epoch": 0.84, "learning_rate": 6.653101544395218e-05, "loss": 3.2423, "step": 5203 }, { "epoch": 0.84, "learning_rate": 6.640091495547129e-05, "loss": 3.0891, "step": 5204 }, { "epoch": 0.84, "learning_rate": 6.627093274849194e-05, "loss": 3.1721, "step": 5205 }, { "epoch": 0.84, "learning_rate": 6.61410688584721e-05, "loss": 3.1488, "step": 5206 }, { "epoch": 0.84, "learning_rate": 6.60113233208372e-05, "loss": 2.9562, "step": 5207 }, { "epoch": 0.84, "learning_rate": 6.588169617098071e-05, "loss": 3.169, "step": 5208 }, { "epoch": 0.84, "learning_rate": 6.575218744426348e-05, "loss": 3.1151, "step": 5209 }, { "epoch": 0.84, "learning_rate": 6.562279717601415e-05, "loss": 3.0651, "step": 5210 }, { "epoch": 0.84, "learning_rate": 6.54935254015292e-05, "loss": 3.168, "step": 5211 }, { "epoch": 0.84, "learning_rate": 6.536437215607261e-05, "loss": 3.1644, "step": 5212 }, { "epoch": 0.84, "learning_rate": 6.52353374748762e-05, "loss": 3.2229, "step": 5213 }, { "epoch": 0.84, "learning_rate": 6.510642139313933e-05, "loss": 3.3354, "step": 5214 }, { "epoch": 0.84, "learning_rate": 6.49776239460289e-05, "loss": 2.9036, "step": 5215 }, { "epoch": 0.84, "learning_rate": 6.484894516867962e-05, "loss": 3.2851, "step": 5216 }, { "epoch": 0.84, "learning_rate": 6.472038509619388e-05, "loss": 3.3044, "step": 5217 }, { "epoch": 0.84, "learning_rate": 6.45919437636413e-05, "loss": 3.1882, "step": 5218 }, { "epoch": 0.84, "learning_rate": 6.446362120605969e-05, "loss": 3.0779, "step": 5219 }, { "epoch": 0.84, "learning_rate": 6.433541745845417e-05, "loss": 3.2151, "step": 5220 }, { "epoch": 0.84, "learning_rate": 6.42073325557972e-05, "loss": 3.1287, "step": 5221 }, { "epoch": 0.84, "learning_rate": 6.407936653302926e-05, "loss": 3.0623, "step": 5222 }, { "epoch": 0.84, "learning_rate": 6.395151942505822e-05, "loss": 3.2403, "step": 5223 }, { "epoch": 0.84, "learning_rate": 6.382379126675919e-05, "loss": 3.1908, "step": 5224 }, { "epoch": 0.84, "learning_rate": 6.369618209297546e-05, "loss": 3.1351, "step": 5225 }, { "epoch": 0.84, "learning_rate": 6.356869193851755e-05, "loss": 3.2411, "step": 5226 }, { "epoch": 0.84, "learning_rate": 6.344132083816328e-05, "loss": 3.1305, "step": 5227 }, { "epoch": 0.84, "learning_rate": 6.331406882665836e-05, "loss": 3.0891, "step": 5228 }, { "epoch": 0.84, "learning_rate": 6.318693593871593e-05, "loss": 3.2478, "step": 5229 }, { "epoch": 0.84, "learning_rate": 6.305992220901624e-05, "loss": 3.1533, "step": 5230 }, { "epoch": 0.84, "learning_rate": 6.293302767220771e-05, "loss": 3.1216, "step": 5231 }, { "epoch": 0.84, "learning_rate": 6.280625236290593e-05, "loss": 3.1764, "step": 5232 }, { "epoch": 0.84, "learning_rate": 6.26795963156937e-05, "loss": 3.2167, "step": 5233 }, { "epoch": 0.84, "learning_rate": 6.255305956512159e-05, "loss": 3.0672, "step": 5234 }, { "epoch": 0.84, "learning_rate": 6.242664214570776e-05, "loss": 3.258, "step": 5235 }, { "epoch": 0.84, "learning_rate": 6.230034409193724e-05, "loss": 3.2261, "step": 5236 }, { "epoch": 0.84, "learning_rate": 6.21741654382632e-05, "loss": 3.2774, "step": 5237 }, { "epoch": 0.84, "learning_rate": 6.20481062191059e-05, "loss": 3.0319, "step": 5238 }, { "epoch": 0.84, "learning_rate": 6.19221664688529e-05, "loss": 3.3377, "step": 5239 }, { "epoch": 0.84, "learning_rate": 6.179634622185932e-05, "loss": 3.2308, "step": 5240 }, { "epoch": 0.85, "learning_rate": 6.167064551244772e-05, "loss": 3.0471, "step": 5241 }, { "epoch": 0.85, "learning_rate": 6.15450643749081e-05, "loss": 3.0981, "step": 5242 }, { "epoch": 0.85, "learning_rate": 6.14196028434974e-05, "loss": 3.2595, "step": 5243 }, { "epoch": 0.85, "learning_rate": 6.129426095244073e-05, "loss": 3.0728, "step": 5244 }, { "epoch": 0.85, "learning_rate": 6.116903873592977e-05, "loss": 3.2367, "step": 5245 }, { "epoch": 0.85, "learning_rate": 6.104393622812399e-05, "loss": 3.112, "step": 5246 }, { "epoch": 0.85, "learning_rate": 6.091895346315013e-05, "loss": 2.9946, "step": 5247 }, { "epoch": 0.85, "learning_rate": 6.079409047510231e-05, "loss": 3.1833, "step": 5248 }, { "epoch": 0.85, "learning_rate": 6.066934729804158e-05, "loss": 3.0505, "step": 5249 }, { "epoch": 0.85, "learning_rate": 6.054472396599714e-05, "loss": 3.0997, "step": 5250 }, { "epoch": 0.85, "learning_rate": 6.0420220512964585e-05, "loss": 3.132, "step": 5251 }, { "epoch": 0.85, "learning_rate": 6.029583697290736e-05, "loss": 3.0132, "step": 5252 }, { "epoch": 0.85, "learning_rate": 6.017157337975609e-05, "loss": 3.1636, "step": 5253 }, { "epoch": 0.85, "learning_rate": 6.004742976740868e-05, "loss": 3.1478, "step": 5254 }, { "epoch": 0.85, "learning_rate": 5.9923406169729966e-05, "loss": 3.0455, "step": 5255 }, { "epoch": 0.85, "learning_rate": 5.979950262055267e-05, "loss": 3.077, "step": 5256 }, { "epoch": 0.85, "learning_rate": 5.967571915367642e-05, "loss": 3.056, "step": 5257 }, { "epoch": 0.85, "learning_rate": 5.955205580286799e-05, "loss": 3.1825, "step": 5258 }, { "epoch": 0.85, "learning_rate": 5.94285126018615e-05, "loss": 3.2461, "step": 5259 }, { "epoch": 0.85, "learning_rate": 5.930508958435848e-05, "loss": 3.0501, "step": 5260 }, { "epoch": 0.85, "learning_rate": 5.918178678402714e-05, "loss": 3.1652, "step": 5261 }, { "epoch": 0.85, "learning_rate": 5.90586042345036e-05, "loss": 3.0504, "step": 5262 }, { "epoch": 0.85, "learning_rate": 5.8935541969390825e-05, "loss": 3.1661, "step": 5263 }, { "epoch": 0.85, "learning_rate": 5.8812600022258745e-05, "loss": 3.2866, "step": 5264 }, { "epoch": 0.85, "learning_rate": 5.8689778426644805e-05, "loss": 3.1115, "step": 5265 }, { "epoch": 0.85, "learning_rate": 5.856707721605359e-05, "loss": 3.0553, "step": 5266 }, { "epoch": 0.85, "learning_rate": 5.844449642395666e-05, "loss": 3.1275, "step": 5267 }, { "epoch": 0.85, "learning_rate": 5.832203608379272e-05, "loss": 3.0551, "step": 5268 }, { "epoch": 0.85, "learning_rate": 5.8199696228968036e-05, "loss": 3.1618, "step": 5269 }, { "epoch": 0.85, "learning_rate": 5.807747689285547e-05, "loss": 3.1199, "step": 5270 }, { "epoch": 0.85, "learning_rate": 5.795537810879531e-05, "loss": 3.149, "step": 5271 }, { "epoch": 0.85, "learning_rate": 5.7833399910094955e-05, "loss": 3.3431, "step": 5272 }, { "epoch": 0.85, "learning_rate": 5.771154233002862e-05, "loss": 3.1143, "step": 5273 }, { "epoch": 0.85, "learning_rate": 5.7589805401837894e-05, "loss": 3.1932, "step": 5274 }, { "epoch": 0.85, "learning_rate": 5.74681891587317e-05, "loss": 3.2348, "step": 5275 }, { "epoch": 0.85, "learning_rate": 5.7346693633885446e-05, "loss": 3.1473, "step": 5276 }, { "epoch": 0.85, "learning_rate": 5.722531886044191e-05, "loss": 3.0744, "step": 5277 }, { "epoch": 0.85, "learning_rate": 5.71040648715111e-05, "loss": 3.1193, "step": 5278 }, { "epoch": 0.85, "learning_rate": 5.698293170016966e-05, "loss": 3.1755, "step": 5279 }, { "epoch": 0.85, "learning_rate": 5.68619193794615e-05, "loss": 3.1354, "step": 5280 }, { "epoch": 0.85, "learning_rate": 5.6741027942397885e-05, "loss": 2.9966, "step": 5281 }, { "epoch": 0.85, "learning_rate": 5.662025742195654e-05, "loss": 3.2396, "step": 5282 }, { "epoch": 0.85, "learning_rate": 5.649960785108244e-05, "loss": 3.199, "step": 5283 }, { "epoch": 0.85, "learning_rate": 5.6379079262687813e-05, "loss": 3.0118, "step": 5284 }, { "epoch": 0.85, "learning_rate": 5.625867168965138e-05, "loss": 3.1244, "step": 5285 }, { "epoch": 0.85, "learning_rate": 5.613838516481912e-05, "loss": 3.0405, "step": 5286 }, { "epoch": 0.85, "learning_rate": 5.601821972100435e-05, "loss": 3.1956, "step": 5287 }, { "epoch": 0.85, "learning_rate": 5.5898175390986686e-05, "loss": 3.171, "step": 5288 }, { "epoch": 0.85, "learning_rate": 5.5778252207513094e-05, "loss": 3.063, "step": 5289 }, { "epoch": 0.85, "learning_rate": 5.565845020329741e-05, "loss": 3.0836, "step": 5290 }, { "epoch": 0.85, "learning_rate": 5.5538769411020596e-05, "loss": 3.0852, "step": 5291 }, { "epoch": 0.85, "learning_rate": 5.54192098633301e-05, "loss": 3.1848, "step": 5292 }, { "epoch": 0.85, "learning_rate": 5.529977159284072e-05, "loss": 2.9598, "step": 5293 }, { "epoch": 0.85, "learning_rate": 5.5180454632134006e-05, "loss": 2.9875, "step": 5294 }, { "epoch": 0.85, "learning_rate": 5.506125901375847e-05, "loss": 3.0159, "step": 5295 }, { "epoch": 0.85, "learning_rate": 5.494218477022939e-05, "loss": 3.1757, "step": 5296 }, { "epoch": 0.85, "learning_rate": 5.482323193402921e-05, "loss": 3.2564, "step": 5297 }, { "epoch": 0.85, "learning_rate": 5.470440053760689e-05, "loss": 3.0884, "step": 5298 }, { "epoch": 0.85, "learning_rate": 5.458569061337854e-05, "loss": 3.1643, "step": 5299 }, { "epoch": 0.85, "learning_rate": 5.446710219372697e-05, "loss": 3.0618, "step": 5300 }, { "epoch": 0.85, "learning_rate": 5.434863531100198e-05, "loss": 3.161, "step": 5301 }, { "epoch": 0.85, "learning_rate": 5.4230289997520166e-05, "loss": 3.1078, "step": 5302 }, { "epoch": 0.86, "learning_rate": 5.4112066285564975e-05, "loss": 3.2769, "step": 5303 }, { "epoch": 0.86, "learning_rate": 5.399396420738656e-05, "loss": 2.9889, "step": 5304 }, { "epoch": 0.86, "learning_rate": 5.387598379520203e-05, "loss": 3.0165, "step": 5305 }, { "epoch": 0.86, "learning_rate": 5.375812508119521e-05, "loss": 3.0288, "step": 5306 }, { "epoch": 0.86, "learning_rate": 5.3640388097516866e-05, "loss": 3.3053, "step": 5307 }, { "epoch": 0.86, "learning_rate": 5.352277287628449e-05, "loss": 3.0723, "step": 5308 }, { "epoch": 0.86, "learning_rate": 5.3405279449582345e-05, "loss": 3.1045, "step": 5309 }, { "epoch": 0.86, "learning_rate": 5.3287907849461304e-05, "loss": 3.1271, "step": 5310 }, { "epoch": 0.86, "learning_rate": 5.317065810793931e-05, "loss": 3.0603, "step": 5311 }, { "epoch": 0.86, "learning_rate": 5.3053530257000826e-05, "loss": 3.0677, "step": 5312 }, { "epoch": 0.86, "learning_rate": 5.29365243285973e-05, "loss": 3.2249, "step": 5313 }, { "epoch": 0.86, "learning_rate": 5.2819640354646645e-05, "loss": 3.0821, "step": 5314 }, { "epoch": 0.86, "learning_rate": 5.27028783670338e-05, "loss": 3.0314, "step": 5315 }, { "epoch": 0.86, "learning_rate": 5.258623839761012e-05, "loss": 3.1329, "step": 5316 }, { "epoch": 0.86, "learning_rate": 5.246972047819387e-05, "loss": 3.1326, "step": 5317 }, { "epoch": 0.86, "learning_rate": 5.2353324640569965e-05, "loss": 3.2606, "step": 5318 }, { "epoch": 0.86, "learning_rate": 5.2237050916490006e-05, "loss": 3.2651, "step": 5319 }, { "epoch": 0.86, "learning_rate": 5.212089933767239e-05, "loss": 3.0767, "step": 5320 }, { "epoch": 0.86, "learning_rate": 5.200486993580211e-05, "loss": 3.1629, "step": 5321 }, { "epoch": 0.86, "learning_rate": 5.1888962742530745e-05, "loss": 3.282, "step": 5322 }, { "epoch": 0.86, "learning_rate": 5.17731777894766e-05, "loss": 3.1028, "step": 5323 }, { "epoch": 0.86, "learning_rate": 5.1657515108224694e-05, "loss": 3.2119, "step": 5324 }, { "epoch": 0.86, "learning_rate": 5.154197473032668e-05, "loss": 3.1275, "step": 5325 }, { "epoch": 0.86, "learning_rate": 5.142655668730084e-05, "loss": 3.0484, "step": 5326 }, { "epoch": 0.86, "learning_rate": 5.1311261010632104e-05, "loss": 3.2182, "step": 5327 }, { "epoch": 0.86, "learning_rate": 5.1196087731771815e-05, "loss": 3.191, "step": 5328 }, { "epoch": 0.86, "learning_rate": 5.1081036882138166e-05, "loss": 3.0079, "step": 5329 }, { "epoch": 0.86, "learning_rate": 5.096610849311589e-05, "loss": 3.0101, "step": 5330 }, { "epoch": 0.86, "learning_rate": 5.08513025960563e-05, "loss": 3.0847, "step": 5331 }, { "epoch": 0.86, "learning_rate": 5.0736619222277346e-05, "loss": 3.03, "step": 5332 }, { "epoch": 0.86, "learning_rate": 5.062205840306355e-05, "loss": 3.146, "step": 5333 }, { "epoch": 0.86, "learning_rate": 5.0507620169665814e-05, "loss": 2.9684, "step": 5334 }, { "epoch": 0.86, "learning_rate": 5.0393304553301825e-05, "loss": 3.0812, "step": 5335 }, { "epoch": 0.86, "learning_rate": 5.027911158515569e-05, "loss": 3.2237, "step": 5336 }, { "epoch": 0.86, "learning_rate": 5.016504129637817e-05, "loss": 3.1383, "step": 5337 }, { "epoch": 0.86, "learning_rate": 5.005109371808647e-05, "loss": 3.1091, "step": 5338 }, { "epoch": 0.86, "learning_rate": 4.993726888136446e-05, "loss": 3.024, "step": 5339 }, { "epoch": 0.86, "learning_rate": 4.982356681726219e-05, "loss": 3.2634, "step": 5340 }, { "epoch": 0.86, "learning_rate": 4.970998755679662e-05, "loss": 3.178, "step": 5341 }, { "epoch": 0.86, "learning_rate": 4.9596531130951026e-05, "loss": 3.2132, "step": 5342 }, { "epoch": 0.86, "learning_rate": 4.94831975706751e-05, "loss": 3.197, "step": 5343 }, { "epoch": 0.86, "learning_rate": 4.936998690688521e-05, "loss": 3.0834, "step": 5344 }, { "epoch": 0.86, "learning_rate": 4.9256899170464056e-05, "loss": 3.1816, "step": 5345 }, { "epoch": 0.86, "learning_rate": 4.9143934392260946e-05, "loss": 3.169, "step": 5346 }, { "epoch": 0.86, "learning_rate": 4.90310926030913e-05, "loss": 3.209, "step": 5347 }, { "epoch": 0.86, "learning_rate": 4.891837383373737e-05, "loss": 3.0332, "step": 5348 }, { "epoch": 0.86, "learning_rate": 4.8805778114947744e-05, "loss": 3.1495, "step": 5349 }, { "epoch": 0.86, "learning_rate": 4.8693305477437335e-05, "loss": 3.179, "step": 5350 }, { "epoch": 0.86, "learning_rate": 4.858095595188766e-05, "loss": 3.1582, "step": 5351 }, { "epoch": 0.86, "learning_rate": 4.846872956894649e-05, "loss": 3.2267, "step": 5352 }, { "epoch": 0.86, "learning_rate": 4.835662635922805e-05, "loss": 3.2132, "step": 5353 }, { "epoch": 0.86, "learning_rate": 4.824464635331294e-05, "loss": 3.1774, "step": 5354 }, { "epoch": 0.86, "learning_rate": 4.8132789581748216e-05, "loss": 3.0654, "step": 5355 }, { "epoch": 0.86, "learning_rate": 4.802105607504731e-05, "loss": 2.9699, "step": 5356 }, { "epoch": 0.86, "learning_rate": 4.790944586369e-05, "loss": 3.309, "step": 5357 }, { "epoch": 0.86, "learning_rate": 4.7797958978122555e-05, "loss": 3.1813, "step": 5358 }, { "epoch": 0.86, "learning_rate": 4.7686595448757254e-05, "loss": 3.1829, "step": 5359 }, { "epoch": 0.86, "learning_rate": 4.757535530597307e-05, "loss": 3.2379, "step": 5360 }, { "epoch": 0.86, "learning_rate": 4.746423858011534e-05, "loss": 3.1527, "step": 5361 }, { "epoch": 0.86, "learning_rate": 4.735324530149521e-05, "loss": 3.1598, "step": 5362 }, { "epoch": 0.86, "learning_rate": 4.72423755003909e-05, "loss": 3.1183, "step": 5363 }, { "epoch": 0.86, "learning_rate": 4.713162920704656e-05, "loss": 3.1192, "step": 5364 }, { "epoch": 0.87, "learning_rate": 4.702100645167251e-05, "loss": 2.943, "step": 5365 }, { "epoch": 0.87, "learning_rate": 4.691050726444562e-05, "loss": 3.2065, "step": 5366 }, { "epoch": 0.87, "learning_rate": 4.6800131675509e-05, "loss": 3.0614, "step": 5367 }, { "epoch": 0.87, "learning_rate": 4.668987971497185e-05, "loss": 3.0328, "step": 5368 }, { "epoch": 0.87, "learning_rate": 4.657975141290993e-05, "loss": 3.2564, "step": 5369 }, { "epoch": 0.87, "learning_rate": 4.646974679936527e-05, "loss": 3.022, "step": 5370 }, { "epoch": 0.87, "learning_rate": 4.6359865904345765e-05, "loss": 3.086, "step": 5371 }, { "epoch": 0.87, "learning_rate": 4.625010875782598e-05, "loss": 3.2474, "step": 5372 }, { "epoch": 0.87, "learning_rate": 4.614047538974664e-05, "loss": 3.3224, "step": 5373 }, { "epoch": 0.87, "learning_rate": 4.603096583001432e-05, "loss": 3.2119, "step": 5374 }, { "epoch": 0.87, "learning_rate": 4.592158010850245e-05, "loss": 3.1726, "step": 5375 }, { "epoch": 0.87, "learning_rate": 4.581231825505033e-05, "loss": 3.1038, "step": 5376 }, { "epoch": 0.87, "learning_rate": 4.570318029946341e-05, "loss": 3.1587, "step": 5377 }, { "epoch": 0.87, "learning_rate": 4.559416627151336e-05, "loss": 3.1657, "step": 5378 }, { "epoch": 0.87, "learning_rate": 4.548527620093828e-05, "loss": 3.1365, "step": 5379 }, { "epoch": 0.87, "learning_rate": 4.5376510117442205e-05, "loss": 3.1016, "step": 5380 }, { "epoch": 0.87, "learning_rate": 4.526786805069549e-05, "loss": 3.1437, "step": 5381 }, { "epoch": 0.87, "learning_rate": 4.5159350030334665e-05, "loss": 3.38, "step": 5382 }, { "epoch": 0.87, "learning_rate": 4.505095608596216e-05, "loss": 3.0378, "step": 5383 }, { "epoch": 0.87, "learning_rate": 4.494268624714687e-05, "loss": 3.1409, "step": 5384 }, { "epoch": 0.87, "learning_rate": 4.483454054342373e-05, "loss": 3.0254, "step": 5385 }, { "epoch": 0.87, "learning_rate": 4.472651900429392e-05, "loss": 3.1104, "step": 5386 }, { "epoch": 0.87, "learning_rate": 4.461862165922437e-05, "loss": 3.0587, "step": 5387 }, { "epoch": 0.87, "learning_rate": 4.4510848537648694e-05, "loss": 3.1152, "step": 5388 }, { "epoch": 0.87, "learning_rate": 4.440319966896611e-05, "loss": 3.1375, "step": 5389 }, { "epoch": 0.87, "learning_rate": 4.429567508254223e-05, "loss": 3.0635, "step": 5390 }, { "epoch": 0.87, "learning_rate": 4.4188274807708705e-05, "loss": 3.1113, "step": 5391 }, { "epoch": 0.87, "learning_rate": 4.408099887376332e-05, "loss": 3.1863, "step": 5392 }, { "epoch": 0.87, "learning_rate": 4.397384730996962e-05, "loss": 3.1055, "step": 5393 }, { "epoch": 0.87, "learning_rate": 4.386682014555776e-05, "loss": 3.2366, "step": 5394 }, { "epoch": 0.87, "learning_rate": 4.375991740972368e-05, "loss": 3.0813, "step": 5395 }, { "epoch": 0.87, "learning_rate": 4.365313913162916e-05, "loss": 3.1199, "step": 5396 }, { "epoch": 0.87, "learning_rate": 4.3546485340402395e-05, "loss": 3.0969, "step": 5397 }, { "epoch": 0.87, "learning_rate": 4.343995606513751e-05, "loss": 3.0795, "step": 5398 }, { "epoch": 0.87, "learning_rate": 4.333355133489442e-05, "loss": 3.1548, "step": 5399 }, { "epoch": 0.87, "learning_rate": 4.322727117869951e-05, "loss": 3.094, "step": 5400 }, { "epoch": 0.87, "learning_rate": 4.3121115625544935e-05, "loss": 3.0508, "step": 5401 }, { "epoch": 0.87, "learning_rate": 4.301508470438869e-05, "loss": 3.0741, "step": 5402 }, { "epoch": 0.87, "learning_rate": 4.2909178444155094e-05, "loss": 3.1428, "step": 5403 }, { "epoch": 0.87, "learning_rate": 4.280339687373436e-05, "loss": 3.2297, "step": 5404 }, { "epoch": 0.87, "learning_rate": 4.269774002198235e-05, "loss": 2.9579, "step": 5405 }, { "epoch": 0.87, "learning_rate": 4.259220791772156e-05, "loss": 3.1745, "step": 5406 }, { "epoch": 0.87, "learning_rate": 4.248680058973997e-05, "loss": 3.3474, "step": 5407 }, { "epoch": 0.87, "learning_rate": 4.238151806679158e-05, "loss": 3.1116, "step": 5408 }, { "epoch": 0.87, "learning_rate": 4.227636037759641e-05, "loss": 3.1529, "step": 5409 }, { "epoch": 0.87, "learning_rate": 4.217132755084058e-05, "loss": 3.1089, "step": 5410 }, { "epoch": 0.87, "learning_rate": 4.2066419615175824e-05, "loss": 3.1472, "step": 5411 }, { "epoch": 0.87, "learning_rate": 4.196163659921992e-05, "loss": 3.2151, "step": 5412 }, { "epoch": 0.87, "learning_rate": 4.18569785315569e-05, "loss": 3.1856, "step": 5413 }, { "epoch": 0.87, "learning_rate": 4.1752445440736254e-05, "loss": 3.0456, "step": 5414 }, { "epoch": 0.87, "learning_rate": 4.164803735527356e-05, "loss": 3.096, "step": 5415 }, { "epoch": 0.87, "learning_rate": 4.1543754303650484e-05, "loss": 3.3045, "step": 5416 }, { "epoch": 0.87, "learning_rate": 4.143959631431415e-05, "loss": 2.9486, "step": 5417 }, { "epoch": 0.87, "learning_rate": 4.1335563415677844e-05, "loss": 3.1236, "step": 5418 }, { "epoch": 0.87, "learning_rate": 4.123165563612097e-05, "loss": 3.3154, "step": 5419 }, { "epoch": 0.87, "learning_rate": 4.1127873003988233e-05, "loss": 3.054, "step": 5420 }, { "epoch": 0.87, "learning_rate": 4.102421554759061e-05, "loss": 3.0125, "step": 5421 }, { "epoch": 0.87, "learning_rate": 4.092068329520493e-05, "loss": 3.0724, "step": 5422 }, { "epoch": 0.87, "learning_rate": 4.081727627507359e-05, "loss": 3.0123, "step": 5423 }, { "epoch": 0.87, "learning_rate": 4.071399451540497e-05, "loss": 3.1354, "step": 5424 }, { "epoch": 0.87, "learning_rate": 4.061083804437355e-05, "loss": 3.0101, "step": 5425 }, { "epoch": 0.87, "learning_rate": 4.050780689011912e-05, "loss": 3.0255, "step": 5426 }, { "epoch": 0.88, "learning_rate": 4.040490108074768e-05, "loss": 3.2069, "step": 5427 }, { "epoch": 0.88, "learning_rate": 4.0302120644330864e-05, "loss": 3.0411, "step": 5428 }, { "epoch": 0.88, "learning_rate": 4.019946560890625e-05, "loss": 3.0705, "step": 5429 }, { "epoch": 0.88, "learning_rate": 4.009693600247688e-05, "loss": 3.2786, "step": 5430 }, { "epoch": 0.88, "learning_rate": 3.999453185301211e-05, "loss": 2.9021, "step": 5431 }, { "epoch": 0.88, "learning_rate": 3.9892253188446524e-05, "loss": 3.276, "step": 5432 }, { "epoch": 0.88, "learning_rate": 3.979010003668082e-05, "loss": 3.0801, "step": 5433 }, { "epoch": 0.88, "learning_rate": 3.968807242558131e-05, "loss": 3.3216, "step": 5434 }, { "epoch": 0.88, "learning_rate": 3.958617038298029e-05, "loss": 3.2306, "step": 5435 }, { "epoch": 0.88, "learning_rate": 3.948439393667536e-05, "loss": 3.1317, "step": 5436 }, { "epoch": 0.88, "learning_rate": 3.938274311443018e-05, "loss": 3.206, "step": 5437 }, { "epoch": 0.88, "learning_rate": 3.9281217943974214e-05, "loss": 3.1811, "step": 5438 }, { "epoch": 0.88, "learning_rate": 3.917981845300239e-05, "loss": 3.1741, "step": 5439 }, { "epoch": 0.88, "learning_rate": 3.907854466917554e-05, "loss": 3.0861, "step": 5440 }, { "epoch": 0.88, "learning_rate": 3.897739662012017e-05, "loss": 3.1293, "step": 5441 }, { "epoch": 0.88, "learning_rate": 3.8876374333428335e-05, "loss": 3.0845, "step": 5442 }, { "epoch": 0.88, "learning_rate": 3.8775477836658026e-05, "loss": 3.1251, "step": 5443 }, { "epoch": 0.88, "learning_rate": 3.867470715733268e-05, "loss": 3.1721, "step": 5444 }, { "epoch": 0.88, "learning_rate": 3.857406232294164e-05, "loss": 3.0012, "step": 5445 }, { "epoch": 0.88, "learning_rate": 3.8473543360939775e-05, "loss": 3.2365, "step": 5446 }, { "epoch": 0.88, "learning_rate": 3.837315029874777e-05, "loss": 3.2745, "step": 5447 }, { "epoch": 0.88, "learning_rate": 3.8272883163751605e-05, "loss": 3.1103, "step": 5448 }, { "epoch": 0.88, "learning_rate": 3.817274198330323e-05, "loss": 3.3087, "step": 5449 }, { "epoch": 0.88, "learning_rate": 3.80727267847204e-05, "loss": 3.1715, "step": 5450 }, { "epoch": 0.88, "learning_rate": 3.797283759528597e-05, "loss": 3.1065, "step": 5451 }, { "epoch": 0.88, "learning_rate": 3.78730744422488e-05, "loss": 3.1389, "step": 5452 }, { "epoch": 0.88, "learning_rate": 3.777343735282346e-05, "loss": 3.1071, "step": 5453 }, { "epoch": 0.88, "learning_rate": 3.767392635418965e-05, "loss": 3.0999, "step": 5454 }, { "epoch": 0.88, "learning_rate": 3.757454147349304e-05, "loss": 3.1647, "step": 5455 }, { "epoch": 0.88, "learning_rate": 3.7475282737845116e-05, "loss": 2.9632, "step": 5456 }, { "epoch": 0.88, "learning_rate": 3.737615017432239e-05, "loss": 3.104, "step": 5457 }, { "epoch": 0.88, "learning_rate": 3.7277143809967274e-05, "loss": 3.2383, "step": 5458 }, { "epoch": 0.88, "learning_rate": 3.717826367178789e-05, "loss": 2.9692, "step": 5459 }, { "epoch": 0.88, "learning_rate": 3.70795097867575e-05, "loss": 3.0111, "step": 5460 }, { "epoch": 0.88, "learning_rate": 3.698088218181533e-05, "loss": 3.0743, "step": 5461 }, { "epoch": 0.88, "learning_rate": 3.688238088386592e-05, "loss": 3.0761, "step": 5462 }, { "epoch": 0.88, "learning_rate": 3.678400591977954e-05, "loss": 3.1577, "step": 5463 }, { "epoch": 0.88, "learning_rate": 3.668575731639184e-05, "loss": 2.9539, "step": 5464 }, { "epoch": 0.88, "learning_rate": 3.658763510050417e-05, "loss": 3.1983, "step": 5465 }, { "epoch": 0.88, "learning_rate": 3.64896392988831e-05, "loss": 3.0652, "step": 5466 }, { "epoch": 0.88, "learning_rate": 3.6391769938261047e-05, "loss": 3.1895, "step": 5467 }, { "epoch": 0.88, "learning_rate": 3.6294027045335686e-05, "loss": 3.083, "step": 5468 }, { "epoch": 0.88, "learning_rate": 3.619641064677037e-05, "loss": 3.1826, "step": 5469 }, { "epoch": 0.88, "learning_rate": 3.609892076919391e-05, "loss": 3.0434, "step": 5470 }, { "epoch": 0.88, "learning_rate": 3.600155743920058e-05, "loss": 3.2173, "step": 5471 }, { "epoch": 0.88, "learning_rate": 3.590432068334998e-05, "loss": 3.163, "step": 5472 }, { "epoch": 0.88, "learning_rate": 3.5807210528167434e-05, "loss": 3.1514, "step": 5473 }, { "epoch": 0.88, "learning_rate": 3.5710227000143646e-05, "loss": 3.205, "step": 5474 }, { "epoch": 0.88, "learning_rate": 3.5613370125734734e-05, "loss": 3.2162, "step": 5475 }, { "epoch": 0.88, "learning_rate": 3.5516639931362224e-05, "loss": 3.1297, "step": 5476 }, { "epoch": 0.88, "learning_rate": 3.542003644341329e-05, "loss": 3.2072, "step": 5477 }, { "epoch": 0.88, "learning_rate": 3.5323559688240246e-05, "loss": 3.1577, "step": 5478 }, { "epoch": 0.88, "learning_rate": 3.5227209692161035e-05, "loss": 3.262, "step": 5479 }, { "epoch": 0.88, "learning_rate": 3.5130986481458956e-05, "loss": 3.1636, "step": 5480 }, { "epoch": 0.88, "learning_rate": 3.503489008238281e-05, "loss": 3.2934, "step": 5481 }, { "epoch": 0.88, "learning_rate": 3.493892052114666e-05, "loss": 3.1579, "step": 5482 }, { "epoch": 0.88, "learning_rate": 3.4843077823930146e-05, "loss": 3.2004, "step": 5483 }, { "epoch": 0.88, "learning_rate": 3.474736201687817e-05, "loss": 3.1963, "step": 5484 }, { "epoch": 0.88, "learning_rate": 3.4651773126100904e-05, "loss": 3.0756, "step": 5485 }, { "epoch": 0.88, "learning_rate": 3.455631117767422e-05, "loss": 3.0818, "step": 5486 }, { "epoch": 0.88, "learning_rate": 3.4460976197639047e-05, "loss": 3.0706, "step": 5487 }, { "epoch": 0.88, "learning_rate": 3.436576821200193e-05, "loss": 3.1925, "step": 5488 }, { "epoch": 0.89, "learning_rate": 3.42706872467346e-05, "loss": 3.2538, "step": 5489 }, { "epoch": 0.89, "learning_rate": 3.4175733327774296e-05, "loss": 3.091, "step": 5490 }, { "epoch": 0.89, "learning_rate": 3.40809064810233e-05, "loss": 2.994, "step": 5491 }, { "epoch": 0.89, "learning_rate": 3.398620673234953e-05, "loss": 3.0751, "step": 5492 }, { "epoch": 0.89, "learning_rate": 3.389163410758622e-05, "loss": 3.1426, "step": 5493 }, { "epoch": 0.89, "learning_rate": 3.3797188632531675e-05, "loss": 2.9936, "step": 5494 }, { "epoch": 0.89, "learning_rate": 3.3702870332949776e-05, "loss": 3.279, "step": 5495 }, { "epoch": 0.89, "learning_rate": 3.360867923456973e-05, "loss": 3.1055, "step": 5496 }, { "epoch": 0.89, "learning_rate": 3.351461536308564e-05, "loss": 3.1975, "step": 5497 }, { "epoch": 0.89, "learning_rate": 3.3420678744157384e-05, "loss": 3.173, "step": 5498 }, { "epoch": 0.89, "learning_rate": 3.33268694034099e-05, "loss": 2.9665, "step": 5499 }, { "epoch": 0.89, "learning_rate": 3.3233187366433436e-05, "loss": 3.1158, "step": 5500 }, { "epoch": 0.89, "learning_rate": 3.313963265878356e-05, "loss": 2.9923, "step": 5501 }, { "epoch": 0.89, "learning_rate": 3.3046205305981066e-05, "loss": 3.1909, "step": 5502 }, { "epoch": 0.89, "learning_rate": 3.2952905333511865e-05, "loss": 3.0754, "step": 5503 }, { "epoch": 0.89, "learning_rate": 3.285973276682736e-05, "loss": 3.1634, "step": 5504 }, { "epoch": 0.89, "learning_rate": 3.276668763134405e-05, "loss": 3.1021, "step": 5505 }, { "epoch": 0.89, "learning_rate": 3.267376995244381e-05, "loss": 3.1038, "step": 5506 }, { "epoch": 0.89, "learning_rate": 3.2580979755473586e-05, "loss": 3.0182, "step": 5507 }, { "epoch": 0.89, "learning_rate": 3.248831706574568e-05, "loss": 3.2036, "step": 5508 }, { "epoch": 0.89, "learning_rate": 3.239578190853748e-05, "loss": 3.0976, "step": 5509 }, { "epoch": 0.89, "learning_rate": 3.2303374309091635e-05, "loss": 3.1029, "step": 5510 }, { "epoch": 0.89, "learning_rate": 3.221109429261615e-05, "loss": 3.0883, "step": 5511 }, { "epoch": 0.89, "learning_rate": 3.2118941884283824e-05, "loss": 3.1463, "step": 5512 }, { "epoch": 0.89, "learning_rate": 3.202691710923317e-05, "loss": 2.9521, "step": 5513 }, { "epoch": 0.89, "learning_rate": 3.193501999256765e-05, "loss": 3.132, "step": 5514 }, { "epoch": 0.89, "learning_rate": 3.1843250559355666e-05, "loss": 3.1626, "step": 5515 }, { "epoch": 0.89, "learning_rate": 3.175160883463113e-05, "loss": 3.1504, "step": 5516 }, { "epoch": 0.89, "learning_rate": 3.166009484339294e-05, "loss": 3.0446, "step": 5517 }, { "epoch": 0.89, "learning_rate": 3.156870861060529e-05, "loss": 3.214, "step": 5518 }, { "epoch": 0.89, "learning_rate": 3.1477450161197297e-05, "loss": 3.0483, "step": 5519 }, { "epoch": 0.89, "learning_rate": 3.13863195200636e-05, "loss": 3.159, "step": 5520 }, { "epoch": 0.89, "learning_rate": 3.1295316712063426e-05, "loss": 2.9099, "step": 5521 }, { "epoch": 0.89, "learning_rate": 3.120444176202153e-05, "loss": 2.9941, "step": 5522 }, { "epoch": 0.89, "learning_rate": 3.11136946947278e-05, "loss": 3.1232, "step": 5523 }, { "epoch": 0.89, "learning_rate": 3.102307553493699e-05, "loss": 3.1427, "step": 5524 }, { "epoch": 0.89, "learning_rate": 3.093258430736923e-05, "loss": 3.1802, "step": 5525 }, { "epoch": 0.89, "learning_rate": 3.084222103670964e-05, "loss": 3.2014, "step": 5526 }, { "epoch": 0.89, "learning_rate": 3.075198574760823e-05, "loss": 3.175, "step": 5527 }, { "epoch": 0.89, "learning_rate": 3.066187846468038e-05, "loss": 3.1835, "step": 5528 }, { "epoch": 0.89, "learning_rate": 3.057189921250653e-05, "loss": 3.2351, "step": 5529 }, { "epoch": 0.89, "learning_rate": 3.0482048015632036e-05, "loss": 3.2159, "step": 5530 }, { "epoch": 0.89, "learning_rate": 3.0392324898567414e-05, "loss": 3.1304, "step": 5531 }, { "epoch": 0.89, "learning_rate": 3.030272988578825e-05, "loss": 3.0428, "step": 5532 }, { "epoch": 0.89, "learning_rate": 3.0213263001735326e-05, "loss": 3.0456, "step": 5533 }, { "epoch": 0.89, "learning_rate": 3.0123924270814008e-05, "loss": 2.9054, "step": 5534 }, { "epoch": 0.89, "learning_rate": 3.003471371739519e-05, "loss": 3.1082, "step": 5535 }, { "epoch": 0.89, "learning_rate": 2.994563136581474e-05, "loss": 3.1387, "step": 5536 }, { "epoch": 0.89, "learning_rate": 2.9856677240373055e-05, "loss": 3.2325, "step": 5537 }, { "epoch": 0.89, "learning_rate": 2.9767851365336273e-05, "loss": 3.1436, "step": 5538 }, { "epoch": 0.89, "learning_rate": 2.9679153764935184e-05, "loss": 3.1744, "step": 5539 }, { "epoch": 0.89, "learning_rate": 2.9590584463365434e-05, "loss": 3.107, "step": 5540 }, { "epoch": 0.89, "learning_rate": 2.9502143484787968e-05, "loss": 3.083, "step": 5541 }, { "epoch": 0.89, "learning_rate": 2.9413830853328606e-05, "loss": 3.1019, "step": 5542 }, { "epoch": 0.89, "learning_rate": 2.9325646593078014e-05, "loss": 3.1257, "step": 5543 }, { "epoch": 0.89, "learning_rate": 2.9237590728092122e-05, "loss": 3.1544, "step": 5544 }, { "epoch": 0.89, "learning_rate": 2.9149663282391713e-05, "loss": 3.0667, "step": 5545 }, { "epoch": 0.89, "learning_rate": 2.906186427996238e-05, "loss": 3.2552, "step": 5546 }, { "epoch": 0.89, "learning_rate": 2.8974193744754907e-05, "loss": 3.1815, "step": 5547 }, { "epoch": 0.89, "learning_rate": 2.8886651700684995e-05, "loss": 3.252, "step": 5548 }, { "epoch": 0.89, "learning_rate": 2.8799238171633047e-05, "loss": 3.1829, "step": 5549 }, { "epoch": 0.89, "learning_rate": 2.8711953181444707e-05, "loss": 3.1914, "step": 5550 }, { "epoch": 0.9, "learning_rate": 2.8624796753930592e-05, "loss": 3.0172, "step": 5551 }, { "epoch": 0.9, "learning_rate": 2.8537768912865912e-05, "loss": 3.048, "step": 5552 }, { "epoch": 0.9, "learning_rate": 2.845086968199101e-05, "loss": 3.0889, "step": 5553 }, { "epoch": 0.9, "learning_rate": 2.836409908501131e-05, "loss": 3.1821, "step": 5554 }, { "epoch": 0.9, "learning_rate": 2.8277457145596653e-05, "loss": 3.1015, "step": 5555 }, { "epoch": 0.9, "learning_rate": 2.8190943887382303e-05, "loss": 2.9738, "step": 5556 }, { "epoch": 0.9, "learning_rate": 2.8104559333968326e-05, "loss": 3.0788, "step": 5557 }, { "epoch": 0.9, "learning_rate": 2.8018303508919262e-05, "loss": 3.1068, "step": 5558 }, { "epoch": 0.9, "learning_rate": 2.7932176435765066e-05, "loss": 3.239, "step": 5559 }, { "epoch": 0.9, "learning_rate": 2.7846178138000333e-05, "loss": 3.0694, "step": 5560 }, { "epoch": 0.9, "learning_rate": 2.7760308639084408e-05, "loss": 3.0002, "step": 5561 }, { "epoch": 0.9, "learning_rate": 2.767456796244161e-05, "loss": 3.1365, "step": 5562 }, { "epoch": 0.9, "learning_rate": 2.7588956131461396e-05, "loss": 3.1457, "step": 5563 }, { "epoch": 0.9, "learning_rate": 2.750347316949764e-05, "loss": 3.1996, "step": 5564 }, { "epoch": 0.9, "learning_rate": 2.741811909986919e-05, "loss": 3.0402, "step": 5565 }, { "epoch": 0.9, "learning_rate": 2.733289394585986e-05, "loss": 3.0723, "step": 5566 }, { "epoch": 0.9, "learning_rate": 2.7247797730718338e-05, "loss": 3.1055, "step": 5567 }, { "epoch": 0.9, "learning_rate": 2.7162830477657717e-05, "loss": 3.2497, "step": 5568 }, { "epoch": 0.9, "learning_rate": 2.7077992209856518e-05, "loss": 3.1498, "step": 5569 }, { "epoch": 0.9, "learning_rate": 2.6993282950457664e-05, "loss": 3.2538, "step": 5570 }, { "epoch": 0.9, "learning_rate": 2.69087027225689e-05, "loss": 3.1881, "step": 5571 }, { "epoch": 0.9, "learning_rate": 2.682425154926299e-05, "loss": 3.1852, "step": 5572 }, { "epoch": 0.9, "learning_rate": 2.6739929453577396e-05, "loss": 2.9833, "step": 5573 }, { "epoch": 0.9, "learning_rate": 2.665573645851416e-05, "loss": 2.9415, "step": 5574 }, { "epoch": 0.9, "learning_rate": 2.657167258704052e-05, "loss": 3.0489, "step": 5575 }, { "epoch": 0.9, "learning_rate": 2.6487737862088136e-05, "loss": 3.0258, "step": 5576 }, { "epoch": 0.9, "learning_rate": 2.6403932306553523e-05, "loss": 3.1672, "step": 5577 }, { "epoch": 0.9, "learning_rate": 2.6320255943298065e-05, "loss": 3.0145, "step": 5578 }, { "epoch": 0.9, "learning_rate": 2.6236708795147945e-05, "loss": 3.1491, "step": 5579 }, { "epoch": 0.9, "learning_rate": 2.615329088489371e-05, "loss": 3.1024, "step": 5580 }, { "epoch": 0.9, "learning_rate": 2.6070002235291212e-05, "loss": 3.2109, "step": 5581 }, { "epoch": 0.9, "learning_rate": 2.5986842869060613e-05, "loss": 3.0365, "step": 5582 }, { "epoch": 0.9, "learning_rate": 2.5903812808886985e-05, "loss": 3.0836, "step": 5583 }, { "epoch": 0.9, "learning_rate": 2.58209120774201e-05, "loss": 3.0877, "step": 5584 }, { "epoch": 0.9, "learning_rate": 2.5738140697274537e-05, "loss": 3.2719, "step": 5585 }, { "epoch": 0.9, "learning_rate": 2.5655498691029343e-05, "loss": 3.1799, "step": 5586 }, { "epoch": 0.9, "learning_rate": 2.557298608122849e-05, "loss": 3.0924, "step": 5587 }, { "epoch": 0.9, "learning_rate": 2.5490602890380687e-05, "loss": 3.1799, "step": 5588 }, { "epoch": 0.9, "learning_rate": 2.540834914095913e-05, "loss": 3.1245, "step": 5589 }, { "epoch": 0.9, "learning_rate": 2.5326224855401925e-05, "loss": 3.1834, "step": 5590 }, { "epoch": 0.9, "learning_rate": 2.5244230056111704e-05, "loss": 3.2579, "step": 5591 }, { "epoch": 0.9, "learning_rate": 2.5162364765455804e-05, "loss": 3.1161, "step": 5592 }, { "epoch": 0.9, "learning_rate": 2.5080629005766188e-05, "loss": 3.1203, "step": 5593 }, { "epoch": 0.9, "learning_rate": 2.4999022799339798e-05, "loss": 3.1735, "step": 5594 }, { "epoch": 0.9, "learning_rate": 2.4917546168437722e-05, "loss": 3.2784, "step": 5595 }, { "epoch": 0.9, "learning_rate": 2.483619913528612e-05, "loss": 3.085, "step": 5596 }, { "epoch": 0.9, "learning_rate": 2.4754981722075744e-05, "loss": 3.017, "step": 5597 }, { "epoch": 0.9, "learning_rate": 2.4673893950961645e-05, "loss": 3.2078, "step": 5598 }, { "epoch": 0.9, "learning_rate": 2.4592935844063858e-05, "loss": 3.1349, "step": 5599 }, { "epoch": 0.9, "learning_rate": 2.4512107423467045e-05, "loss": 3.0338, "step": 5600 }, { "epoch": 0.9, "learning_rate": 2.4431408711220294e-05, "loss": 3.0361, "step": 5601 }, { "epoch": 0.9, "learning_rate": 2.435083972933744e-05, "loss": 3.268, "step": 5602 }, { "epoch": 0.9, "learning_rate": 2.427040049979695e-05, "loss": 3.0204, "step": 5603 }, { "epoch": 0.9, "learning_rate": 2.419009104454173e-05, "loss": 3.1422, "step": 5604 }, { "epoch": 0.9, "learning_rate": 2.4109911385479356e-05, "loss": 3.0914, "step": 5605 }, { "epoch": 0.9, "learning_rate": 2.4029861544482277e-05, "loss": 3.0345, "step": 5606 }, { "epoch": 0.9, "learning_rate": 2.3949941543387087e-05, "loss": 3.2027, "step": 5607 }, { "epoch": 0.9, "learning_rate": 2.3870151403995234e-05, "loss": 3.1112, "step": 5608 }, { "epoch": 0.9, "learning_rate": 2.379049114807269e-05, "loss": 3.1418, "step": 5609 }, { "epoch": 0.9, "learning_rate": 2.3710960797349912e-05, "loss": 3.0829, "step": 5610 }, { "epoch": 0.9, "learning_rate": 2.3631560373522044e-05, "loss": 3.1345, "step": 5611 }, { "epoch": 0.9, "learning_rate": 2.35522898982487e-05, "loss": 3.2449, "step": 5612 }, { "epoch": 0.91, "learning_rate": 2.3473149393154037e-05, "loss": 3.1377, "step": 5613 }, { "epoch": 0.91, "learning_rate": 2.3394138879826886e-05, "loss": 3.2888, "step": 5614 }, { "epoch": 0.91, "learning_rate": 2.3315258379820516e-05, "loss": 3.2535, "step": 5615 }, { "epoch": 0.91, "learning_rate": 2.323650791465265e-05, "loss": 3.1639, "step": 5616 }, { "epoch": 0.91, "learning_rate": 2.315788750580572e-05, "loss": 3.1812, "step": 5617 }, { "epoch": 0.91, "learning_rate": 2.3079397174726512e-05, "loss": 3.033, "step": 5618 }, { "epoch": 0.91, "learning_rate": 2.300103694282646e-05, "loss": 3.105, "step": 5619 }, { "epoch": 0.91, "learning_rate": 2.2922806831481402e-05, "loss": 3.1516, "step": 5620 }, { "epoch": 0.91, "learning_rate": 2.284470686203177e-05, "loss": 3.1781, "step": 5621 }, { "epoch": 0.91, "learning_rate": 2.2766737055782583e-05, "loss": 3.1672, "step": 5622 }, { "epoch": 0.91, "learning_rate": 2.2688897434002986e-05, "loss": 3.4466, "step": 5623 }, { "epoch": 0.91, "learning_rate": 2.2611188017926943e-05, "loss": 3.1497, "step": 5624 }, { "epoch": 0.91, "learning_rate": 2.2533608828752827e-05, "loss": 3.1041, "step": 5625 }, { "epoch": 0.91, "learning_rate": 2.245615988764349e-05, "loss": 3.1792, "step": 5626 }, { "epoch": 0.91, "learning_rate": 2.23788412157262e-05, "loss": 3.1931, "step": 5627 }, { "epoch": 0.91, "learning_rate": 2.23016528340928e-05, "loss": 2.9503, "step": 5628 }, { "epoch": 0.91, "learning_rate": 2.2224594763799344e-05, "loss": 3.2422, "step": 5629 }, { "epoch": 0.91, "learning_rate": 2.2147667025866568e-05, "loss": 3.1883, "step": 5630 }, { "epoch": 0.91, "learning_rate": 2.2070869641279633e-05, "loss": 3.2113, "step": 5631 }, { "epoch": 0.91, "learning_rate": 2.1994202630988113e-05, "loss": 3.175, "step": 5632 }, { "epoch": 0.91, "learning_rate": 2.1917666015905946e-05, "loss": 3.1057, "step": 5633 }, { "epoch": 0.91, "learning_rate": 2.184125981691165e-05, "loss": 3.0862, "step": 5634 }, { "epoch": 0.91, "learning_rate": 2.1764984054847947e-05, "loss": 3.2033, "step": 5635 }, { "epoch": 0.91, "learning_rate": 2.1688838750522134e-05, "loss": 3.148, "step": 5636 }, { "epoch": 0.91, "learning_rate": 2.1612823924705927e-05, "loss": 3.1849, "step": 5637 }, { "epoch": 0.91, "learning_rate": 2.1536939598135406e-05, "loss": 3.1379, "step": 5638 }, { "epoch": 0.91, "learning_rate": 2.1461185791511072e-05, "loss": 3.1795, "step": 5639 }, { "epoch": 0.91, "learning_rate": 2.1385562525497838e-05, "loss": 3.0553, "step": 5640 }, { "epoch": 0.91, "learning_rate": 2.1310069820724866e-05, "loss": 3.1972, "step": 5641 }, { "epoch": 0.91, "learning_rate": 2.1234707697785848e-05, "loss": 3.1765, "step": 5642 }, { "epoch": 0.91, "learning_rate": 2.1159476177238846e-05, "loss": 3.062, "step": 5643 }, { "epoch": 0.91, "learning_rate": 2.1084375279606273e-05, "loss": 3.172, "step": 5644 }, { "epoch": 0.91, "learning_rate": 2.1009405025374904e-05, "loss": 3.1337, "step": 5645 }, { "epoch": 0.91, "learning_rate": 2.0934565434995944e-05, "loss": 3.0838, "step": 5646 }, { "epoch": 0.91, "learning_rate": 2.0859856528884726e-05, "loss": 3.0862, "step": 5647 }, { "epoch": 0.91, "learning_rate": 2.0785278327421218e-05, "loss": 3.1934, "step": 5648 }, { "epoch": 0.91, "learning_rate": 2.0710830850949547e-05, "loss": 3.1889, "step": 5649 }, { "epoch": 0.91, "learning_rate": 2.0636514119778238e-05, "loss": 3.1315, "step": 5650 }, { "epoch": 0.91, "learning_rate": 2.0562328154180188e-05, "loss": 3.3087, "step": 5651 }, { "epoch": 0.91, "learning_rate": 2.0488272974392654e-05, "loss": 3.1141, "step": 5652 }, { "epoch": 0.91, "learning_rate": 2.041434860061697e-05, "loss": 3.1611, "step": 5653 }, { "epoch": 0.91, "learning_rate": 2.0340555053019126e-05, "loss": 3.149, "step": 5654 }, { "epoch": 0.91, "learning_rate": 2.0266892351729183e-05, "loss": 3.0845, "step": 5655 }, { "epoch": 0.91, "learning_rate": 2.0193360516841618e-05, "loss": 3.1973, "step": 5656 }, { "epoch": 0.91, "learning_rate": 2.011995956841517e-05, "loss": 2.975, "step": 5657 }, { "epoch": 0.91, "learning_rate": 2.004668952647298e-05, "loss": 3.0875, "step": 5658 }, { "epoch": 0.91, "learning_rate": 1.997355041100224e-05, "loss": 2.9051, "step": 5659 }, { "epoch": 0.91, "learning_rate": 1.9900542241954645e-05, "loss": 3.085, "step": 5660 }, { "epoch": 0.91, "learning_rate": 1.9827665039246046e-05, "loss": 3.2426, "step": 5661 }, { "epoch": 0.91, "learning_rate": 1.975491882275665e-05, "loss": 3.024, "step": 5662 }, { "epoch": 0.91, "learning_rate": 1.968230361233092e-05, "loss": 2.9406, "step": 5663 }, { "epoch": 0.91, "learning_rate": 1.960981942777762e-05, "loss": 3.163, "step": 5664 }, { "epoch": 0.91, "learning_rate": 1.9537466288869542e-05, "loss": 3.0246, "step": 5665 }, { "epoch": 0.91, "learning_rate": 1.946524421534396e-05, "loss": 3.1116, "step": 5666 }, { "epoch": 0.91, "learning_rate": 1.9393153226902384e-05, "loss": 3.1128, "step": 5667 }, { "epoch": 0.91, "learning_rate": 1.932119334321053e-05, "loss": 3.1036, "step": 5668 }, { "epoch": 0.91, "learning_rate": 1.9249364583898253e-05, "loss": 3.1381, "step": 5669 }, { "epoch": 0.91, "learning_rate": 1.9177666968559825e-05, "loss": 3.0413, "step": 5670 }, { "epoch": 0.91, "learning_rate": 1.9106100516753598e-05, "loss": 3.0466, "step": 5671 }, { "epoch": 0.91, "learning_rate": 1.9034665248002126e-05, "loss": 3.0609, "step": 5672 }, { "epoch": 0.91, "learning_rate": 1.896336118179226e-05, "loss": 3.125, "step": 5673 }, { "epoch": 0.91, "learning_rate": 1.889218833757511e-05, "loss": 3.2234, "step": 5674 }, { "epoch": 0.92, "learning_rate": 1.882114673476587e-05, "loss": 3.1876, "step": 5675 }, { "epoch": 0.92, "learning_rate": 1.8750236392743923e-05, "loss": 3.209, "step": 5676 }, { "epoch": 0.92, "learning_rate": 1.8679457330853077e-05, "loss": 3.1199, "step": 5677 }, { "epoch": 0.92, "learning_rate": 1.8608809568400943e-05, "loss": 3.1794, "step": 5678 }, { "epoch": 0.92, "learning_rate": 1.85382931246596e-05, "loss": 3.0941, "step": 5679 }, { "epoch": 0.92, "learning_rate": 1.8467908018865275e-05, "loss": 3.0452, "step": 5680 }, { "epoch": 0.92, "learning_rate": 1.8397654270218278e-05, "loss": 3.1724, "step": 5681 }, { "epoch": 0.92, "learning_rate": 1.8327531897883166e-05, "loss": 3.0051, "step": 5682 }, { "epoch": 0.92, "learning_rate": 1.8257540920988636e-05, "loss": 3.154, "step": 5683 }, { "epoch": 0.92, "learning_rate": 1.8187681358627418e-05, "loss": 3.1737, "step": 5684 }, { "epoch": 0.92, "learning_rate": 1.81179532298566e-05, "loss": 3.207, "step": 5685 }, { "epoch": 0.92, "learning_rate": 1.804835655369741e-05, "loss": 3.2448, "step": 5686 }, { "epoch": 0.92, "learning_rate": 1.7978891349134828e-05, "loss": 3.1649, "step": 5687 }, { "epoch": 0.92, "learning_rate": 1.7909557635118536e-05, "loss": 3.2022, "step": 5688 }, { "epoch": 0.92, "learning_rate": 1.784035543056206e-05, "loss": 3.1362, "step": 5689 }, { "epoch": 0.92, "learning_rate": 1.777128475434292e-05, "loss": 3.2726, "step": 5690 }, { "epoch": 0.92, "learning_rate": 1.770234562530304e-05, "loss": 3.0861, "step": 5691 }, { "epoch": 0.92, "learning_rate": 1.7633538062248323e-05, "loss": 3.2239, "step": 5692 }, { "epoch": 0.92, "learning_rate": 1.756486208394864e-05, "loss": 3.0944, "step": 5693 }, { "epoch": 0.92, "learning_rate": 1.7496317709138234e-05, "loss": 3.1261, "step": 5694 }, { "epoch": 0.92, "learning_rate": 1.7427904956515416e-05, "loss": 3.0781, "step": 5695 }, { "epoch": 0.92, "learning_rate": 1.735962384474232e-05, "loss": 3.125, "step": 5696 }, { "epoch": 0.92, "learning_rate": 1.729147439244538e-05, "loss": 3.269, "step": 5697 }, { "epoch": 0.92, "learning_rate": 1.7223456618215228e-05, "loss": 3.1959, "step": 5698 }, { "epoch": 0.92, "learning_rate": 1.7155570540606236e-05, "loss": 3.2378, "step": 5699 }, { "epoch": 0.92, "learning_rate": 1.7087816178137205e-05, "loss": 3.1817, "step": 5700 }, { "epoch": 0.92, "learning_rate": 1.7020193549290852e-05, "loss": 2.9893, "step": 5701 }, { "epoch": 0.92, "learning_rate": 1.695270267251381e-05, "loss": 2.9859, "step": 5702 }, { "epoch": 0.92, "learning_rate": 1.6885343566217016e-05, "loss": 3.1793, "step": 5703 }, { "epoch": 0.92, "learning_rate": 1.681811624877533e-05, "loss": 3.0596, "step": 5704 }, { "epoch": 0.92, "learning_rate": 1.6751020738527746e-05, "loss": 3.2552, "step": 5705 }, { "epoch": 0.92, "learning_rate": 1.6684057053777235e-05, "loss": 3.0322, "step": 5706 }, { "epoch": 0.92, "learning_rate": 1.66172252127908e-05, "loss": 3.1178, "step": 5707 }, { "epoch": 0.92, "learning_rate": 1.6550525233799462e-05, "loss": 3.3634, "step": 5708 }, { "epoch": 0.92, "learning_rate": 1.6483957134998396e-05, "loss": 3.1225, "step": 5709 }, { "epoch": 0.92, "learning_rate": 1.6417520934546627e-05, "loss": 3.2244, "step": 5710 }, { "epoch": 0.92, "learning_rate": 1.635121665056738e-05, "loss": 3.0655, "step": 5711 }, { "epoch": 0.92, "learning_rate": 1.6285044301147634e-05, "loss": 3.1764, "step": 5712 }, { "epoch": 0.92, "learning_rate": 1.621900390433878e-05, "loss": 3.1152, "step": 5713 }, { "epoch": 0.92, "learning_rate": 1.61530954781558e-05, "loss": 3.053, "step": 5714 }, { "epoch": 0.92, "learning_rate": 1.608731904057792e-05, "loss": 3.0433, "step": 5715 }, { "epoch": 0.92, "learning_rate": 1.6021674609548285e-05, "loss": 3.1546, "step": 5716 }, { "epoch": 0.92, "learning_rate": 1.5956162202974133e-05, "loss": 3.1169, "step": 5717 }, { "epoch": 0.92, "learning_rate": 1.5890781838726388e-05, "loss": 3.1892, "step": 5718 }, { "epoch": 0.92, "learning_rate": 1.582553353464039e-05, "loss": 3.2242, "step": 5719 }, { "epoch": 0.92, "learning_rate": 1.576041730851502e-05, "loss": 3.0256, "step": 5720 }, { "epoch": 0.92, "learning_rate": 1.569543317811345e-05, "loss": 3.1503, "step": 5721 }, { "epoch": 0.92, "learning_rate": 1.5630581161162672e-05, "loss": 3.1163, "step": 5722 }, { "epoch": 0.92, "learning_rate": 1.5565861275353755e-05, "loss": 3.1321, "step": 5723 }, { "epoch": 0.92, "learning_rate": 1.5501273538341466e-05, "loss": 3.0619, "step": 5724 }, { "epoch": 0.92, "learning_rate": 1.543681796774482e-05, "loss": 3.1392, "step": 5725 }, { "epoch": 0.92, "learning_rate": 1.5372494581146702e-05, "loss": 3.1779, "step": 5726 }, { "epoch": 0.92, "learning_rate": 1.5308303396093682e-05, "loss": 3.1552, "step": 5727 }, { "epoch": 0.92, "learning_rate": 1.5244244430096699e-05, "loss": 3.2945, "step": 5728 }, { "epoch": 0.92, "learning_rate": 1.5180317700630274e-05, "loss": 2.857, "step": 5729 }, { "epoch": 0.92, "learning_rate": 1.5116523225132961e-05, "loss": 3.1144, "step": 5730 }, { "epoch": 0.92, "learning_rate": 1.5052861021007336e-05, "loss": 3.0732, "step": 5731 }, { "epoch": 0.92, "learning_rate": 1.498933110561984e-05, "loss": 3.2471, "step": 5732 }, { "epoch": 0.92, "learning_rate": 1.492593349630067e-05, "loss": 3.153, "step": 5733 }, { "epoch": 0.92, "learning_rate": 1.4862668210344154e-05, "loss": 3.1576, "step": 5734 }, { "epoch": 0.92, "learning_rate": 1.4799535265008435e-05, "loss": 3.2363, "step": 5735 }, { "epoch": 0.92, "learning_rate": 1.4736534677515512e-05, "loss": 3.0556, "step": 5736 }, { "epoch": 0.93, "learning_rate": 1.467366646505125e-05, "loss": 3.1518, "step": 5737 }, { "epoch": 0.93, "learning_rate": 1.4610930644765652e-05, "loss": 3.0887, "step": 5738 }, { "epoch": 0.93, "learning_rate": 1.4548327233772308e-05, "loss": 3.0697, "step": 5739 }, { "epoch": 0.93, "learning_rate": 1.4485856249148776e-05, "loss": 3.0453, "step": 5740 }, { "epoch": 0.93, "learning_rate": 1.4423517707936595e-05, "loss": 3.1049, "step": 5741 }, { "epoch": 0.93, "learning_rate": 1.4361311627140995e-05, "loss": 3.2221, "step": 5742 }, { "epoch": 0.93, "learning_rate": 1.4299238023731231e-05, "loss": 3.0702, "step": 5743 }, { "epoch": 0.93, "learning_rate": 1.423729691464043e-05, "loss": 3.1126, "step": 5744 }, { "epoch": 0.93, "learning_rate": 1.4175488316765407e-05, "loss": 3.1977, "step": 5745 }, { "epoch": 0.93, "learning_rate": 1.4113812246967006e-05, "loss": 3.1111, "step": 5746 }, { "epoch": 0.93, "learning_rate": 1.4052268722069828e-05, "loss": 3.1497, "step": 5747 }, { "epoch": 0.93, "learning_rate": 1.3990857758862275e-05, "loss": 3.122, "step": 5748 }, { "epoch": 0.93, "learning_rate": 1.3929579374096613e-05, "loss": 3.1329, "step": 5749 }, { "epoch": 0.93, "learning_rate": 1.3868433584489137e-05, "loss": 3.1188, "step": 5750 }, { "epoch": 0.93, "learning_rate": 1.3807420406719674e-05, "loss": 3.1085, "step": 5751 }, { "epoch": 0.93, "learning_rate": 1.3746539857432016e-05, "loss": 3.1243, "step": 5752 }, { "epoch": 0.93, "learning_rate": 1.3685791953233883e-05, "loss": 3.2276, "step": 5753 }, { "epoch": 0.93, "learning_rate": 1.3625176710696519e-05, "loss": 3.1399, "step": 5754 }, { "epoch": 0.93, "learning_rate": 1.356469414635525e-05, "loss": 3.1616, "step": 5755 }, { "epoch": 0.93, "learning_rate": 1.3504344276709214e-05, "loss": 3.0259, "step": 5756 }, { "epoch": 0.93, "learning_rate": 1.3444127118221128e-05, "loss": 3.0629, "step": 5757 }, { "epoch": 0.93, "learning_rate": 1.3384042687317632e-05, "loss": 3.1944, "step": 5758 }, { "epoch": 0.93, "learning_rate": 1.3324091000389171e-05, "loss": 3.1641, "step": 5759 }, { "epoch": 0.93, "learning_rate": 1.326427207379005e-05, "loss": 3.0947, "step": 5760 }, { "epoch": 0.93, "learning_rate": 1.3204585923838164e-05, "loss": 3.0785, "step": 5761 }, { "epoch": 0.93, "learning_rate": 1.3145032566815318e-05, "loss": 3.0591, "step": 5762 }, { "epoch": 0.93, "learning_rate": 1.308561201896713e-05, "loss": 3.0596, "step": 5763 }, { "epoch": 0.93, "learning_rate": 1.3026324296502857e-05, "loss": 3.1481, "step": 5764 }, { "epoch": 0.93, "learning_rate": 1.296716941559567e-05, "loss": 3.1903, "step": 5765 }, { "epoch": 0.93, "learning_rate": 1.2908147392382496e-05, "loss": 3.1231, "step": 5766 }, { "epoch": 0.93, "learning_rate": 1.2849258242963734e-05, "loss": 3.0665, "step": 5767 }, { "epoch": 0.93, "learning_rate": 1.2790501983403924e-05, "loss": 3.2479, "step": 5768 }, { "epoch": 0.93, "learning_rate": 1.2731878629731131e-05, "loss": 3.1062, "step": 5769 }, { "epoch": 0.93, "learning_rate": 1.2673388197937231e-05, "loss": 3.1694, "step": 5770 }, { "epoch": 0.93, "learning_rate": 1.2615030703977848e-05, "loss": 3.0937, "step": 5771 }, { "epoch": 0.93, "learning_rate": 1.2556806163772361e-05, "loss": 3.2145, "step": 5772 }, { "epoch": 0.93, "learning_rate": 1.2498714593203674e-05, "loss": 3.1417, "step": 5773 }, { "epoch": 0.93, "learning_rate": 1.2440756008118725e-05, "loss": 2.958, "step": 5774 }, { "epoch": 0.93, "learning_rate": 1.2382930424328087e-05, "loss": 3.0864, "step": 5775 }, { "epoch": 0.93, "learning_rate": 1.232523785760592e-05, "loss": 3.1011, "step": 5776 }, { "epoch": 0.93, "learning_rate": 1.2267678323690135e-05, "loss": 3.1926, "step": 5777 }, { "epoch": 0.93, "learning_rate": 1.221025183828256e-05, "loss": 3.0757, "step": 5778 }, { "epoch": 0.93, "learning_rate": 1.2152958417048388e-05, "loss": 3.1547, "step": 5779 }, { "epoch": 0.93, "learning_rate": 1.209579807561667e-05, "loss": 3.1484, "step": 5780 }, { "epoch": 0.93, "learning_rate": 1.2038770829580437e-05, "loss": 3.0722, "step": 5781 }, { "epoch": 0.93, "learning_rate": 1.1981876694495907e-05, "loss": 3.2802, "step": 5782 }, { "epoch": 0.93, "learning_rate": 1.1925115685883336e-05, "loss": 3.0681, "step": 5783 }, { "epoch": 0.93, "learning_rate": 1.186848781922656e-05, "loss": 3.1523, "step": 5784 }, { "epoch": 0.93, "learning_rate": 1.1811993109973051e-05, "loss": 3.0257, "step": 5785 }, { "epoch": 0.93, "learning_rate": 1.1755631573533987e-05, "loss": 3.2309, "step": 5786 }, { "epoch": 0.93, "learning_rate": 1.1699403225284289e-05, "loss": 3.1925, "step": 5787 }, { "epoch": 0.93, "learning_rate": 1.1643308080562465e-05, "loss": 3.1396, "step": 5788 }, { "epoch": 0.93, "learning_rate": 1.158734615467072e-05, "loss": 3.01, "step": 5789 }, { "epoch": 0.93, "learning_rate": 1.1531517462874952e-05, "loss": 3.2535, "step": 5790 }, { "epoch": 0.93, "learning_rate": 1.1475822020404591e-05, "loss": 3.0723, "step": 5791 }, { "epoch": 0.93, "learning_rate": 1.1420259842452817e-05, "loss": 2.9622, "step": 5792 }, { "epoch": 0.93, "learning_rate": 1.1364830944176453e-05, "loss": 2.8851, "step": 5793 }, { "epoch": 0.93, "learning_rate": 1.1309535340695897e-05, "loss": 3.0864, "step": 5794 }, { "epoch": 0.93, "learning_rate": 1.1254373047095367e-05, "loss": 3.0258, "step": 5795 }, { "epoch": 0.93, "learning_rate": 1.1199344078422491e-05, "loss": 3.1879, "step": 5796 }, { "epoch": 0.93, "learning_rate": 1.1144448449688593e-05, "loss": 2.9528, "step": 5797 }, { "epoch": 0.93, "learning_rate": 1.1089686175868697e-05, "loss": 3.0667, "step": 5798 }, { "epoch": 0.94, "learning_rate": 1.103505727190146e-05, "loss": 2.96, "step": 5799 }, { "epoch": 0.94, "learning_rate": 1.0980561752688967e-05, "loss": 3.1021, "step": 5800 }, { "epoch": 0.94, "learning_rate": 1.0926199633097156e-05, "loss": 3.0839, "step": 5801 }, { "epoch": 0.94, "learning_rate": 1.0871970927955498e-05, "loss": 2.8838, "step": 5802 }, { "epoch": 0.94, "learning_rate": 1.081787565205694e-05, "loss": 3.2359, "step": 5803 }, { "epoch": 0.94, "learning_rate": 1.0763913820158233e-05, "loss": 3.0948, "step": 5804 }, { "epoch": 0.94, "learning_rate": 1.071008544697949e-05, "loss": 3.1211, "step": 5805 }, { "epoch": 0.94, "learning_rate": 1.0656390547204686e-05, "loss": 3.1752, "step": 5806 }, { "epoch": 0.94, "learning_rate": 1.0602829135481162e-05, "loss": 3.0305, "step": 5807 }, { "epoch": 0.94, "learning_rate": 1.0549401226420064e-05, "loss": 3.0043, "step": 5808 }, { "epoch": 0.94, "learning_rate": 1.0496106834595897e-05, "loss": 3.1328, "step": 5809 }, { "epoch": 0.94, "learning_rate": 1.0442945974546813e-05, "loss": 3.3263, "step": 5810 }, { "epoch": 0.94, "learning_rate": 1.03899186607746e-05, "loss": 3.2463, "step": 5811 }, { "epoch": 0.94, "learning_rate": 1.0337024907744574e-05, "loss": 2.9378, "step": 5812 }, { "epoch": 0.94, "learning_rate": 1.028426472988564e-05, "loss": 3.0917, "step": 5813 }, { "epoch": 0.94, "learning_rate": 1.0231638141590227e-05, "loss": 3.2037, "step": 5814 }, { "epoch": 0.94, "learning_rate": 1.017914515721441e-05, "loss": 3.1373, "step": 5815 }, { "epoch": 0.94, "learning_rate": 1.012678579107762e-05, "loss": 3.1917, "step": 5816 }, { "epoch": 0.94, "learning_rate": 1.0074560057463044e-05, "loss": 2.9772, "step": 5817 }, { "epoch": 0.94, "learning_rate": 1.00224679706174e-05, "loss": 3.194, "step": 5818 }, { "epoch": 0.94, "learning_rate": 9.970509544750761e-06, "loss": 3.0528, "step": 5819 }, { "epoch": 0.94, "learning_rate": 9.918684794037015e-06, "loss": 3.0101, "step": 5820 }, { "epoch": 0.94, "learning_rate": 9.86699373261335e-06, "loss": 3.1465, "step": 5821 }, { "epoch": 0.94, "learning_rate": 9.815436374580544e-06, "loss": 3.0981, "step": 5822 }, { "epoch": 0.94, "learning_rate": 9.764012734002958e-06, "loss": 3.1508, "step": 5823 }, { "epoch": 0.94, "learning_rate": 9.712722824908426e-06, "loss": 3.1847, "step": 5824 }, { "epoch": 0.94, "learning_rate": 9.661566661288423e-06, "loss": 3.1078, "step": 5825 }, { "epoch": 0.94, "learning_rate": 9.610544257097731e-06, "loss": 2.9871, "step": 5826 }, { "epoch": 0.94, "learning_rate": 9.559655626254826e-06, "loss": 3.2144, "step": 5827 }, { "epoch": 0.94, "learning_rate": 9.50890078264155e-06, "loss": 3.2244, "step": 5828 }, { "epoch": 0.94, "learning_rate": 9.458279740103327e-06, "loss": 3.0702, "step": 5829 }, { "epoch": 0.94, "learning_rate": 9.407792512449109e-06, "loss": 3.2553, "step": 5830 }, { "epoch": 0.94, "learning_rate": 9.357439113451327e-06, "loss": 3.1375, "step": 5831 }, { "epoch": 0.94, "learning_rate": 9.307219556845826e-06, "loss": 3.1523, "step": 5832 }, { "epoch": 0.94, "learning_rate": 9.257133856332034e-06, "loss": 3.1633, "step": 5833 }, { "epoch": 0.94, "learning_rate": 9.207182025572802e-06, "loss": 3.1104, "step": 5834 }, { "epoch": 0.94, "learning_rate": 9.157364078194509e-06, "loss": 3.063, "step": 5835 }, { "epoch": 0.94, "learning_rate": 9.107680027787058e-06, "loss": 3.2492, "step": 5836 }, { "epoch": 0.94, "learning_rate": 9.05812988790361e-06, "loss": 3.0196, "step": 5837 }, { "epoch": 0.94, "learning_rate": 9.008713672061075e-06, "loss": 3.2821, "step": 5838 }, { "epoch": 0.94, "learning_rate": 8.959431393739726e-06, "loss": 3.0367, "step": 5839 }, { "epoch": 0.94, "learning_rate": 8.91028306638314e-06, "loss": 3.0892, "step": 5840 }, { "epoch": 0.94, "learning_rate": 8.861268703398596e-06, "loss": 3.2131, "step": 5841 }, { "epoch": 0.94, "learning_rate": 8.812388318156672e-06, "loss": 3.2442, "step": 5842 }, { "epoch": 0.94, "learning_rate": 8.763641923991483e-06, "loss": 3.1646, "step": 5843 }, { "epoch": 0.94, "learning_rate": 8.715029534200503e-06, "loss": 3.2161, "step": 5844 }, { "epoch": 0.94, "learning_rate": 8.666551162044845e-06, "loss": 3.1733, "step": 5845 }, { "epoch": 0.94, "learning_rate": 8.618206820748764e-06, "loss": 3.0776, "step": 5846 }, { "epoch": 0.94, "learning_rate": 8.569996523500212e-06, "loss": 3.2024, "step": 5847 }, { "epoch": 0.94, "learning_rate": 8.52192028345039e-06, "loss": 3.0798, "step": 5848 }, { "epoch": 0.94, "learning_rate": 8.473978113714143e-06, "loss": 3.1359, "step": 5849 }, { "epoch": 0.94, "learning_rate": 8.426170027369451e-06, "loss": 3.1325, "step": 5850 }, { "epoch": 0.94, "learning_rate": 8.378496037458106e-06, "loss": 3.1476, "step": 5851 }, { "epoch": 0.94, "learning_rate": 8.33095615698487e-06, "loss": 3.0812, "step": 5852 }, { "epoch": 0.94, "learning_rate": 8.28355039891826e-06, "loss": 3.0894, "step": 5853 }, { "epoch": 0.94, "learning_rate": 8.236278776190097e-06, "loss": 3.0367, "step": 5854 }, { "epoch": 0.94, "learning_rate": 8.189141301695568e-06, "loss": 3.2506, "step": 5855 }, { "epoch": 0.94, "learning_rate": 8.14213798829333e-06, "loss": 3.1903, "step": 5856 }, { "epoch": 0.94, "learning_rate": 8.095268848805404e-06, "loss": 3.2277, "step": 5857 }, { "epoch": 0.94, "learning_rate": 8.04853389601723e-06, "loss": 3.1115, "step": 5858 }, { "epoch": 0.94, "learning_rate": 8.001933142677664e-06, "loss": 3.2206, "step": 5859 }, { "epoch": 0.94, "learning_rate": 7.955466601498873e-06, "loss": 3.2783, "step": 5860 }, { "epoch": 0.95, "learning_rate": 7.90913428515655e-06, "loss": 3.2288, "step": 5861 }, { "epoch": 0.95, "learning_rate": 7.862936206289529e-06, "loss": 3.1356, "step": 5862 }, { "epoch": 0.95, "learning_rate": 7.816872377500395e-06, "loss": 3.0367, "step": 5863 }, { "epoch": 0.95, "learning_rate": 7.770942811354764e-06, "loss": 3.1318, "step": 5864 }, { "epoch": 0.95, "learning_rate": 7.725147520381781e-06, "loss": 3.0605, "step": 5865 }, { "epoch": 0.95, "learning_rate": 7.679486517073953e-06, "loss": 3.27, "step": 5866 }, { "epoch": 0.95, "learning_rate": 7.633959813887148e-06, "loss": 3.1798, "step": 5867 }, { "epoch": 0.95, "learning_rate": 7.588567423240544e-06, "loss": 3.1512, "step": 5868 }, { "epoch": 0.95, "learning_rate": 7.543309357516848e-06, "loss": 3.0595, "step": 5869 }, { "epoch": 0.95, "learning_rate": 7.4981856290619595e-06, "loss": 3.087, "step": 5870 }, { "epoch": 0.95, "learning_rate": 7.453196250185146e-06, "loss": 3.1736, "step": 5871 }, { "epoch": 0.95, "learning_rate": 7.408341233159088e-06, "loss": 2.906, "step": 5872 }, { "epoch": 0.95, "learning_rate": 7.3636205902197776e-06, "loss": 3.0359, "step": 5873 }, { "epoch": 0.95, "learning_rate": 7.31903433356651e-06, "loss": 3.2488, "step": 5874 }, { "epoch": 0.95, "learning_rate": 7.274582475362057e-06, "loss": 3.2926, "step": 5875 }, { "epoch": 0.95, "learning_rate": 7.230265027732441e-06, "loss": 3.0448, "step": 5876 }, { "epoch": 0.95, "learning_rate": 7.186082002766991e-06, "loss": 3.1184, "step": 5877 }, { "epoch": 0.95, "learning_rate": 7.142033412518345e-06, "loss": 3.1051, "step": 5878 }, { "epoch": 0.95, "learning_rate": 7.098119269002612e-06, "loss": 3.1055, "step": 5879 }, { "epoch": 0.95, "learning_rate": 7.0543395841989875e-06, "loss": 3.1803, "step": 5880 }, { "epoch": 0.95, "learning_rate": 7.010694370050308e-06, "loss": 3.1889, "step": 5881 }, { "epoch": 0.95, "learning_rate": 6.967183638462493e-06, "loss": 3.1123, "step": 5882 }, { "epoch": 0.95, "learning_rate": 6.9238074013047715e-06, "loss": 3.0366, "step": 5883 }, { "epoch": 0.95, "learning_rate": 6.880565670409789e-06, "loss": 3.0802, "step": 5884 }, { "epoch": 0.95, "learning_rate": 6.837458457573498e-06, "loss": 3.0471, "step": 5885 }, { "epoch": 0.95, "learning_rate": 6.794485774555048e-06, "loss": 2.945, "step": 5886 }, { "epoch": 0.95, "learning_rate": 6.7516476330769514e-06, "loss": 3.3361, "step": 5887 }, { "epoch": 0.95, "learning_rate": 6.708944044825138e-06, "loss": 3.0841, "step": 5888 }, { "epoch": 0.95, "learning_rate": 6.666375021448623e-06, "loss": 3.254, "step": 5889 }, { "epoch": 0.95, "learning_rate": 6.623940574559839e-06, "loss": 3.2136, "step": 5890 }, { "epoch": 0.95, "learning_rate": 6.581640715734472e-06, "loss": 3.1139, "step": 5891 }, { "epoch": 0.95, "learning_rate": 6.539475456511512e-06, "loss": 3.1468, "step": 5892 }, { "epoch": 0.95, "learning_rate": 6.497444808393149e-06, "loss": 3.0984, "step": 5893 }, { "epoch": 0.95, "learning_rate": 6.4555487828450445e-06, "loss": 3.0068, "step": 5894 }, { "epoch": 0.95, "learning_rate": 6.413787391295944e-06, "loss": 2.9969, "step": 5895 }, { "epoch": 0.95, "learning_rate": 6.372160645137903e-06, "loss": 3.12, "step": 5896 }, { "epoch": 0.95, "learning_rate": 6.330668555726393e-06, "loss": 2.9608, "step": 5897 }, { "epoch": 0.95, "learning_rate": 6.289311134380027e-06, "loss": 2.9508, "step": 5898 }, { "epoch": 0.95, "learning_rate": 6.248088392380502e-06, "loss": 3.1603, "step": 5899 }, { "epoch": 0.95, "learning_rate": 6.207000340973268e-06, "loss": 3.1351, "step": 5900 }, { "epoch": 0.95, "learning_rate": 6.166046991366525e-06, "loss": 3.1398, "step": 5901 }, { "epoch": 0.95, "learning_rate": 6.1252283547320046e-06, "loss": 3.0199, "step": 5902 }, { "epoch": 0.95, "learning_rate": 6.084544442204576e-06, "loss": 3.1157, "step": 5903 }, { "epoch": 0.95, "learning_rate": 6.043995264882529e-06, "loss": 3.1596, "step": 5904 }, { "epoch": 0.95, "learning_rate": 6.003580833827127e-06, "loss": 3.1201, "step": 5905 }, { "epoch": 0.95, "learning_rate": 5.963301160063106e-06, "loss": 3.2139, "step": 5906 }, { "epoch": 0.95, "learning_rate": 5.923156254578343e-06, "loss": 3.0825, "step": 5907 }, { "epoch": 0.95, "learning_rate": 5.883146128323913e-06, "loss": 3.23, "step": 5908 }, { "epoch": 0.95, "learning_rate": 5.843270792214306e-06, "loss": 3.1292, "step": 5909 }, { "epoch": 0.95, "learning_rate": 5.8035302571270435e-06, "loss": 3.1517, "step": 5910 }, { "epoch": 0.95, "learning_rate": 5.763924533902898e-06, "loss": 3.1377, "step": 5911 }, { "epoch": 0.95, "learning_rate": 5.724453633345949e-06, "loss": 3.0958, "step": 5912 }, { "epoch": 0.95, "learning_rate": 5.68511756622353e-06, "loss": 3.1083, "step": 5913 }, { "epoch": 0.95, "learning_rate": 5.645916343266056e-06, "loss": 3.1886, "step": 5914 }, { "epoch": 0.95, "learning_rate": 5.606849975167194e-06, "loss": 3.0474, "step": 5915 }, { "epoch": 0.95, "learning_rate": 5.567918472584032e-06, "loss": 3.2674, "step": 5916 }, { "epoch": 0.95, "learning_rate": 5.529121846136465e-06, "loss": 3.2918, "step": 5917 }, { "epoch": 0.95, "learning_rate": 5.4904601064079126e-06, "loss": 3.0303, "step": 5918 }, { "epoch": 0.95, "learning_rate": 5.451933263945053e-06, "loss": 3.1391, "step": 5919 }, { "epoch": 0.95, "learning_rate": 5.413541329257421e-06, "loss": 3.2609, "step": 5920 }, { "epoch": 0.95, "learning_rate": 5.3752843128180855e-06, "loss": 3.1735, "step": 5921 }, { "epoch": 0.95, "learning_rate": 5.337162225063141e-06, "loss": 3.1581, "step": 5922 }, { "epoch": 0.96, "learning_rate": 5.299175076391827e-06, "loss": 3.1932, "step": 5923 }, { "epoch": 0.96, "learning_rate": 5.261322877166741e-06, "loss": 3.148, "step": 5924 }, { "epoch": 0.96, "learning_rate": 5.22360563771368e-06, "loss": 3.0843, "step": 5925 }, { "epoch": 0.96, "learning_rate": 5.1860233683213576e-06, "loss": 3.1876, "step": 5926 }, { "epoch": 0.96, "learning_rate": 5.148576079241907e-06, "loss": 3.1561, "step": 5927 }, { "epoch": 0.96, "learning_rate": 5.1112637806906e-06, "loss": 3.2201, "step": 5928 }, { "epoch": 0.96, "learning_rate": 5.074086482845852e-06, "loss": 3.1928, "step": 5929 }, { "epoch": 0.96, "learning_rate": 5.037044195849216e-06, "loss": 3.2039, "step": 5930 }, { "epoch": 0.96, "learning_rate": 5.000136929805443e-06, "loss": 3.1518, "step": 5931 }, { "epoch": 0.96, "learning_rate": 4.9633646947825905e-06, "loss": 3.1451, "step": 5932 }, { "epoch": 0.96, "learning_rate": 4.926727500811634e-06, "loss": 2.879, "step": 5933 }, { "epoch": 0.96, "learning_rate": 4.890225357886913e-06, "loss": 3.1746, "step": 5934 }, { "epoch": 0.96, "learning_rate": 4.853858275965794e-06, "loss": 3.0657, "step": 5935 }, { "epoch": 0.96, "learning_rate": 4.8176262649689e-06, "loss": 3.1246, "step": 5936 }, { "epoch": 0.96, "learning_rate": 4.781529334779877e-06, "loss": 3.0573, "step": 5937 }, { "epoch": 0.96, "learning_rate": 4.74556749524574e-06, "loss": 3.2034, "step": 5938 }, { "epoch": 0.96, "learning_rate": 4.709740756176362e-06, "loss": 3.2719, "step": 5939 }, { "epoch": 0.96, "learning_rate": 4.674049127345093e-06, "loss": 3.0877, "step": 5940 }, { "epoch": 0.96, "learning_rate": 4.638492618488088e-06, "loss": 2.9772, "step": 5941 }, { "epoch": 0.96, "learning_rate": 4.603071239304924e-06, "loss": 3.247, "step": 5942 }, { "epoch": 0.96, "learning_rate": 4.567784999458147e-06, "loss": 3.017, "step": 5943 }, { "epoch": 0.96, "learning_rate": 4.5326339085735025e-06, "loss": 3.204, "step": 5944 }, { "epoch": 0.96, "learning_rate": 4.49761797623982e-06, "loss": 3.1675, "step": 5945 }, { "epoch": 0.96, "learning_rate": 4.462737212009182e-06, "loss": 3.1458, "step": 5946 }, { "epoch": 0.96, "learning_rate": 4.427991625396644e-06, "loss": 3.0829, "step": 5947 }, { "epoch": 0.96, "learning_rate": 4.39338122588051e-06, "loss": 3.2146, "step": 5948 }, { "epoch": 0.96, "learning_rate": 4.358906022902065e-06, "loss": 3.1524, "step": 5949 }, { "epoch": 0.96, "learning_rate": 4.324566025865895e-06, "loss": 3.144, "step": 5950 }, { "epoch": 0.96, "learning_rate": 4.290361244139506e-06, "loss": 3.1683, "step": 5951 }, { "epoch": 0.96, "learning_rate": 4.256291687053715e-06, "loss": 3.2288, "step": 5952 }, { "epoch": 0.96, "learning_rate": 4.222357363902363e-06, "loss": 3.2062, "step": 5953 }, { "epoch": 0.96, "learning_rate": 4.188558283942323e-06, "loss": 3.0767, "step": 5954 }, { "epoch": 0.96, "learning_rate": 4.154894456393665e-06, "loss": 3.1157, "step": 5955 }, { "epoch": 0.96, "learning_rate": 4.121365890439544e-06, "loss": 3.1962, "step": 5956 }, { "epoch": 0.96, "learning_rate": 4.087972595226252e-06, "loss": 3.3033, "step": 5957 }, { "epoch": 0.96, "learning_rate": 4.054714579863117e-06, "loss": 3.1311, "step": 5958 }, { "epoch": 0.96, "learning_rate": 4.021591853422601e-06, "loss": 3.0704, "step": 5959 }, { "epoch": 0.96, "learning_rate": 3.988604424940257e-06, "loss": 3.2803, "step": 5960 }, { "epoch": 0.96, "learning_rate": 3.955752303414717e-06, "loss": 3.1347, "step": 5961 }, { "epoch": 0.96, "learning_rate": 3.9230354978077584e-06, "loss": 3.1817, "step": 5962 }, { "epoch": 0.96, "learning_rate": 3.890454017044076e-06, "loss": 3.012, "step": 5963 }, { "epoch": 0.96, "learning_rate": 3.8580078700117264e-06, "loss": 2.9873, "step": 5964 }, { "epoch": 0.96, "learning_rate": 3.825697065561629e-06, "loss": 3.2002, "step": 5965 }, { "epoch": 0.96, "learning_rate": 3.7935216125078443e-06, "loss": 3.1818, "step": 5966 }, { "epoch": 0.96, "learning_rate": 3.7614815196274633e-06, "loss": 3.127, "step": 5967 }, { "epoch": 0.96, "learning_rate": 3.7295767956607717e-06, "loss": 3.1516, "step": 5968 }, { "epoch": 0.96, "learning_rate": 3.6978074493110303e-06, "loss": 3.1372, "step": 5969 }, { "epoch": 0.96, "learning_rate": 3.6661734892446396e-06, "loss": 3.1127, "step": 5970 }, { "epoch": 0.96, "learning_rate": 3.6346749240910304e-06, "loss": 2.9377, "step": 5971 }, { "epoch": 0.96, "learning_rate": 3.603311762442607e-06, "loss": 3.2213, "step": 5972 }, { "epoch": 0.96, "learning_rate": 3.5720840128550256e-06, "loss": 3.1827, "step": 5973 }, { "epoch": 0.96, "learning_rate": 3.540991683846806e-06, "loss": 3.0245, "step": 5974 }, { "epoch": 0.96, "learning_rate": 3.5100347838997183e-06, "loss": 3.1961, "step": 5975 }, { "epoch": 0.96, "learning_rate": 3.4792133214585074e-06, "loss": 3.1742, "step": 5976 }, { "epoch": 0.96, "learning_rate": 3.448527304930893e-06, "loss": 3.0325, "step": 5977 }, { "epoch": 0.96, "learning_rate": 3.4179767426876784e-06, "loss": 3.1653, "step": 5978 }, { "epoch": 0.96, "learning_rate": 3.387561643062864e-06, "loss": 3.1377, "step": 5979 }, { "epoch": 0.96, "learning_rate": 3.3572820143533133e-06, "loss": 3.1327, "step": 5980 }, { "epoch": 0.96, "learning_rate": 3.3271378648190296e-06, "loss": 3.0636, "step": 5981 }, { "epoch": 0.96, "learning_rate": 3.2971292026829915e-06, "loss": 3.1783, "step": 5982 }, { "epoch": 0.96, "learning_rate": 3.267256036131372e-06, "loss": 3.3819, "step": 5983 }, { "epoch": 0.96, "learning_rate": 3.237518373313153e-06, "loss": 3.1377, "step": 5984 }, { "epoch": 0.97, "learning_rate": 3.2079162223405123e-06, "loss": 3.2663, "step": 5985 }, { "epoch": 0.97, "learning_rate": 3.178449591288657e-06, "loss": 3.12, "step": 5986 }, { "epoch": 0.97, "learning_rate": 3.149118488195768e-06, "loss": 3.1093, "step": 5987 }, { "epoch": 0.97, "learning_rate": 3.1199229210630565e-06, "loss": 3.1641, "step": 5988 }, { "epoch": 0.97, "learning_rate": 3.090862897854818e-06, "loss": 3.2216, "step": 5989 }, { "epoch": 0.97, "learning_rate": 3.061938426498323e-06, "loss": 2.9737, "step": 5990 }, { "epoch": 0.97, "learning_rate": 3.033149514883815e-06, "loss": 3.285, "step": 5991 }, { "epoch": 0.97, "learning_rate": 3.0044961708647343e-06, "loss": 2.9358, "step": 5992 }, { "epoch": 0.97, "learning_rate": 2.975978402257329e-06, "loss": 3.2881, "step": 5993 }, { "epoch": 0.97, "learning_rate": 2.947596216841042e-06, "loss": 3.2222, "step": 5994 }, { "epoch": 0.97, "learning_rate": 2.9193496223581805e-06, "loss": 3.1006, "step": 5995 }, { "epoch": 0.97, "learning_rate": 2.8912386265141923e-06, "loss": 2.9476, "step": 5996 }, { "epoch": 0.97, "learning_rate": 2.8632632369774424e-06, "loss": 3.0084, "step": 5997 }, { "epoch": 0.97, "learning_rate": 2.8354234613793275e-06, "loss": 3.2239, "step": 5998 }, { "epoch": 0.97, "learning_rate": 2.8077193073142713e-06, "loss": 3.1365, "step": 5999 }, { "epoch": 0.97, "learning_rate": 2.7801507823397297e-06, "loss": 3.1141, "step": 6000 }, { "epoch": 0.97, "learning_rate": 2.7527178939760754e-06, "loss": 3.1217, "step": 6001 }, { "epoch": 0.97, "learning_rate": 2.7254206497067114e-06, "loss": 3.0611, "step": 6002 }, { "epoch": 0.97, "learning_rate": 2.698259056978125e-06, "loss": 3.2002, "step": 6003 }, { "epoch": 0.97, "learning_rate": 2.671233123199668e-06, "loss": 3.0797, "step": 6004 }, { "epoch": 0.97, "learning_rate": 2.6443428557437753e-06, "loss": 3.0632, "step": 6005 }, { "epoch": 0.97, "learning_rate": 2.617588261945747e-06, "loss": 3.1875, "step": 6006 }, { "epoch": 0.97, "learning_rate": 2.590969349104133e-06, "loss": 3.2222, "step": 6007 }, { "epoch": 0.97, "learning_rate": 2.5644861244802366e-06, "loss": 3.2405, "step": 6008 }, { "epoch": 0.97, "learning_rate": 2.5381385952983337e-06, "loss": 3.0538, "step": 6009 }, { "epoch": 0.97, "learning_rate": 2.511926768745898e-06, "loss": 3.045, "step": 6010 }, { "epoch": 0.97, "learning_rate": 2.4858506519732095e-06, "loss": 3.2631, "step": 6011 }, { "epoch": 0.97, "learning_rate": 2.459910252093467e-06, "loss": 3.1434, "step": 6012 }, { "epoch": 0.97, "learning_rate": 2.434105576183121e-06, "loss": 3.1258, "step": 6013 }, { "epoch": 0.97, "learning_rate": 2.408436631281319e-06, "loss": 3.2163, "step": 6014 }, { "epoch": 0.97, "learning_rate": 2.382903424390348e-06, "loss": 3.1349, "step": 6015 }, { "epoch": 0.97, "learning_rate": 2.3575059624754147e-06, "loss": 3.0948, "step": 6016 }, { "epoch": 0.97, "learning_rate": 2.3322442524646436e-06, "loss": 3.1551, "step": 6017 }, { "epoch": 0.97, "learning_rate": 2.3071183012491337e-06, "loss": 3.1125, "step": 6018 }, { "epoch": 0.97, "learning_rate": 2.282128115683124e-06, "loss": 3.1164, "step": 6019 }, { "epoch": 0.97, "learning_rate": 2.257273702583662e-06, "loss": 3.0705, "step": 6020 }, { "epoch": 0.97, "learning_rate": 2.232555068730713e-06, "loss": 3.3095, "step": 6021 }, { "epoch": 0.97, "learning_rate": 2.2079722208672714e-06, "loss": 3.1459, "step": 6022 }, { "epoch": 0.97, "learning_rate": 2.183525165699418e-06, "loss": 3.0908, "step": 6023 }, { "epoch": 0.97, "learning_rate": 2.159213909895874e-06, "loss": 3.0842, "step": 6024 }, { "epoch": 0.97, "learning_rate": 2.1350384600886675e-06, "loss": 3.1292, "step": 6025 }, { "epoch": 0.97, "learning_rate": 2.1109988228725786e-06, "loss": 3.1083, "step": 6026 }, { "epoch": 0.97, "learning_rate": 2.087095004805306e-06, "loss": 3.0559, "step": 6027 }, { "epoch": 0.97, "learning_rate": 2.063327012407634e-06, "loss": 3.1821, "step": 6028 }, { "epoch": 0.97, "learning_rate": 2.0396948521632653e-06, "loss": 3.1748, "step": 6029 }, { "epoch": 0.97, "learning_rate": 2.0161985305187647e-06, "loss": 3.1428, "step": 6030 }, { "epoch": 0.97, "learning_rate": 1.9928380538837278e-06, "loss": 3.141, "step": 6031 }, { "epoch": 0.97, "learning_rate": 1.969613428630668e-06, "loss": 3.0225, "step": 6032 }, { "epoch": 0.97, "learning_rate": 1.9465246610949616e-06, "loss": 3.2515, "step": 6033 }, { "epoch": 0.97, "learning_rate": 1.9235717575750155e-06, "loss": 3.2018, "step": 6034 }, { "epoch": 0.97, "learning_rate": 1.9007547243322099e-06, "loss": 3.0552, "step": 6035 }, { "epoch": 0.97, "learning_rate": 1.878073567590788e-06, "loss": 3.0947, "step": 6036 }, { "epoch": 0.97, "learning_rate": 1.8555282935378559e-06, "loss": 3.2657, "step": 6037 }, { "epoch": 0.97, "learning_rate": 1.8331189083237166e-06, "loss": 3.0422, "step": 6038 }, { "epoch": 0.97, "learning_rate": 1.8108454180612577e-06, "loss": 3.1003, "step": 6039 }, { "epoch": 0.97, "learning_rate": 1.788707828826508e-06, "loss": 2.9106, "step": 6040 }, { "epoch": 0.97, "learning_rate": 1.7667061466584144e-06, "loss": 3.107, "step": 6041 }, { "epoch": 0.97, "learning_rate": 1.7448403775587873e-06, "loss": 3.0328, "step": 6042 }, { "epoch": 0.97, "learning_rate": 1.7231105274924108e-06, "loss": 3.1148, "step": 6043 }, { "epoch": 0.97, "learning_rate": 1.7015166023869878e-06, "loss": 2.977, "step": 6044 }, { "epoch": 0.97, "learning_rate": 1.6800586081330282e-06, "loss": 3.2733, "step": 6045 }, { "epoch": 0.97, "learning_rate": 1.6587365505841833e-06, "loss": 3.2722, "step": 6046 }, { "epoch": 0.98, "learning_rate": 1.637550435556856e-06, "loss": 2.9988, "step": 6047 }, { "epoch": 0.98, "learning_rate": 1.616500268830423e-06, "loss": 3.1488, "step": 6048 }, { "epoch": 0.98, "learning_rate": 1.5955860561470691e-06, "loss": 3.2635, "step": 6049 }, { "epoch": 0.98, "learning_rate": 1.5748078032120638e-06, "loss": 3.0553, "step": 6050 }, { "epoch": 0.98, "learning_rate": 1.5541655156934841e-06, "loss": 3.2884, "step": 6051 }, { "epoch": 0.98, "learning_rate": 1.5336591992223814e-06, "loss": 3.1941, "step": 6052 }, { "epoch": 0.98, "learning_rate": 1.5132888593925586e-06, "loss": 3.0921, "step": 6053 }, { "epoch": 0.98, "learning_rate": 1.4930545017610152e-06, "loss": 3.0898, "step": 6054 }, { "epoch": 0.98, "learning_rate": 1.472956131847336e-06, "loss": 3.0606, "step": 6055 }, { "epoch": 0.98, "learning_rate": 1.452993755134191e-06, "loss": 3.0345, "step": 6056 }, { "epoch": 0.98, "learning_rate": 1.4331673770671683e-06, "loss": 3.134, "step": 6057 }, { "epoch": 0.98, "learning_rate": 1.413477003054664e-06, "loss": 3.2434, "step": 6058 }, { "epoch": 0.98, "learning_rate": 1.393922638468048e-06, "loss": 2.9231, "step": 6059 }, { "epoch": 0.98, "learning_rate": 1.3745042886414983e-06, "loss": 3.0694, "step": 6060 }, { "epoch": 0.98, "learning_rate": 1.3552219588721659e-06, "loss": 3.147, "step": 6061 }, { "epoch": 0.98, "learning_rate": 1.3360756544201213e-06, "loss": 3.1625, "step": 6062 }, { "epoch": 0.98, "learning_rate": 1.3170653805082423e-06, "loss": 3.0558, "step": 6063 }, { "epoch": 0.98, "learning_rate": 1.2981911423223248e-06, "loss": 3.1825, "step": 6064 }, { "epoch": 0.98, "learning_rate": 1.2794529450111392e-06, "loss": 3.0919, "step": 6065 }, { "epoch": 0.98, "learning_rate": 1.2608507936862635e-06, "loss": 3.0549, "step": 6066 }, { "epoch": 0.98, "learning_rate": 1.2423846934220829e-06, "loss": 3.1738, "step": 6067 }, { "epoch": 0.98, "learning_rate": 1.2240546492560678e-06, "loss": 3.1592, "step": 6068 }, { "epoch": 0.98, "learning_rate": 1.2058606661884964e-06, "loss": 3.1206, "step": 6069 }, { "epoch": 0.98, "learning_rate": 1.1878027491823984e-06, "loss": 3.2168, "step": 6070 }, { "epoch": 0.98, "learning_rate": 1.1698809031638336e-06, "loss": 3.1052, "step": 6071 }, { "epoch": 0.98, "learning_rate": 1.15209513302178e-06, "loss": 3.0692, "step": 6072 }, { "epoch": 0.98, "learning_rate": 1.134445443607912e-06, "loss": 3.0483, "step": 6073 }, { "epoch": 0.98, "learning_rate": 1.1169318397369344e-06, "loss": 3.0828, "step": 6074 }, { "epoch": 0.98, "learning_rate": 1.0995543261863584e-06, "loss": 3.2017, "step": 6075 }, { "epoch": 0.98, "learning_rate": 1.0823129076966142e-06, "loss": 3.2117, "step": 6076 }, { "epoch": 0.98, "learning_rate": 1.065207588971051e-06, "loss": 3.0492, "step": 6077 }, { "epoch": 0.98, "learning_rate": 1.048238374675714e-06, "loss": 3.2024, "step": 6078 }, { "epoch": 0.98, "learning_rate": 1.0314052694397335e-06, "loss": 3.0089, "step": 6079 }, { "epoch": 0.98, "learning_rate": 1.0147082778549367e-06, "loss": 3.1902, "step": 6080 }, { "epoch": 0.98, "learning_rate": 9.98147404476124e-07, "loss": 3.2471, "step": 6081 }, { "epoch": 0.98, "learning_rate": 9.8172265382096e-07, "loss": 3.2112, "step": 6082 }, { "epoch": 0.98, "learning_rate": 9.654340303699161e-07, "loss": 3.1345, "step": 6083 }, { "epoch": 0.98, "learning_rate": 9.492815385664377e-07, "loss": 3.1793, "step": 6084 }, { "epoch": 0.98, "learning_rate": 9.332651828166672e-07, "loss": 2.969, "step": 6085 }, { "epoch": 0.98, "learning_rate": 9.173849674897206e-07, "loss": 3.2883, "step": 6086 }, { "epoch": 0.98, "learning_rate": 9.016408969176326e-07, "loss": 3.2068, "step": 6087 }, { "epoch": 0.98, "learning_rate": 8.860329753951901e-07, "loss": 3.2056, "step": 6088 }, { "epoch": 0.98, "learning_rate": 8.705612071800429e-07, "loss": 2.9873, "step": 6089 }, { "epoch": 0.98, "learning_rate": 8.552255964928146e-07, "loss": 3.0509, "step": 6090 }, { "epoch": 0.98, "learning_rate": 8.400261475168258e-07, "loss": 3.2052, "step": 6091 }, { "epoch": 0.98, "learning_rate": 8.249628643983708e-07, "loss": 3.1778, "step": 6092 }, { "epoch": 0.98, "learning_rate": 8.100357512466072e-07, "loss": 2.9292, "step": 6093 }, { "epoch": 0.98, "learning_rate": 7.952448121333888e-07, "loss": 3.1732, "step": 6094 }, { "epoch": 0.98, "learning_rate": 7.805900510936547e-07, "loss": 2.97, "step": 6095 }, { "epoch": 0.98, "learning_rate": 7.66071472124985e-07, "loss": 2.967, "step": 6096 }, { "epoch": 0.98, "learning_rate": 7.516890791879339e-07, "loss": 3.1864, "step": 6097 }, { "epoch": 0.98, "learning_rate": 7.374428762059182e-07, "loss": 3.1148, "step": 6098 }, { "epoch": 0.98, "learning_rate": 7.233328670651073e-07, "loss": 3.08, "step": 6099 }, { "epoch": 0.98, "learning_rate": 7.093590556145891e-07, "loss": 3.1764, "step": 6100 }, { "epoch": 0.98, "learning_rate": 6.955214456663139e-07, "loss": 2.9644, "step": 6101 }, { "epoch": 0.98, "learning_rate": 6.818200409949849e-07, "loss": 3.0514, "step": 6102 }, { "epoch": 0.98, "learning_rate": 6.682548453382231e-07, "loss": 3.2485, "step": 6103 }, { "epoch": 0.98, "learning_rate": 6.548258623965131e-07, "loss": 3.1937, "step": 6104 }, { "epoch": 0.98, "learning_rate": 6.415330958330912e-07, "loss": 3.1076, "step": 6105 }, { "epoch": 0.98, "learning_rate": 6.283765492741678e-07, "loss": 2.9434, "step": 6106 }, { "epoch": 0.98, "learning_rate": 6.153562263086498e-07, "loss": 3.0277, "step": 6107 }, { "epoch": 0.98, "learning_rate": 6.02472130488363e-07, "loss": 3.2009, "step": 6108 }, { "epoch": 0.99, "learning_rate": 5.897242653279955e-07, "loss": 3.1393, "step": 6109 }, { "epoch": 0.99, "learning_rate": 5.771126343049881e-07, "loss": 3.0085, "step": 6110 }, { "epoch": 0.99, "learning_rate": 5.646372408597001e-07, "loss": 3.1107, "step": 6111 }, { "epoch": 0.99, "learning_rate": 5.522980883952422e-07, "loss": 3.2047, "step": 6112 }, { "epoch": 0.99, "learning_rate": 5.400951802777553e-07, "loss": 3.2768, "step": 6113 }, { "epoch": 0.99, "learning_rate": 5.280285198359103e-07, "loss": 3.1898, "step": 6114 }, { "epoch": 0.99, "learning_rate": 5.160981103614626e-07, "loss": 3.0722, "step": 6115 }, { "epoch": 0.99, "learning_rate": 5.043039551088646e-07, "loss": 3.2404, "step": 6116 }, { "epoch": 0.99, "learning_rate": 4.926460572954317e-07, "loss": 3.098, "step": 6117 }, { "epoch": 0.99, "learning_rate": 4.811244201013976e-07, "loss": 3.1392, "step": 6118 }, { "epoch": 0.99, "learning_rate": 4.697390466696372e-07, "loss": 3.0957, "step": 6119 }, { "epoch": 0.99, "learning_rate": 4.5848994010611043e-07, "loss": 3.2409, "step": 6120 }, { "epoch": 0.99, "learning_rate": 4.473771034793628e-07, "loss": 3.1426, "step": 6121 }, { "epoch": 0.99, "learning_rate": 4.3640053982085816e-07, "loss": 3.0202, "step": 6122 }, { "epoch": 0.99, "learning_rate": 4.2556025212492356e-07, "loss": 3.0489, "step": 6123 }, { "epoch": 0.99, "learning_rate": 4.1485624334869353e-07, "loss": 3.0799, "step": 6124 }, { "epoch": 0.99, "learning_rate": 4.0428851641211016e-07, "loss": 2.7815, "step": 6125 }, { "epoch": 0.99, "learning_rate": 3.9385707419792305e-07, "loss": 3.1315, "step": 6126 }, { "epoch": 0.99, "learning_rate": 3.835619195517448e-07, "loss": 3.1249, "step": 6127 }, { "epoch": 0.99, "learning_rate": 3.7340305528194006e-07, "loss": 3.0916, "step": 6128 }, { "epoch": 0.99, "learning_rate": 3.633804841598476e-07, "loss": 2.9497, "step": 6129 }, { "epoch": 0.99, "learning_rate": 3.5349420891939157e-07, "loss": 2.9506, "step": 6130 }, { "epoch": 0.99, "learning_rate": 3.437442322575812e-07, "loss": 3.2029, "step": 6131 }, { "epoch": 0.99, "learning_rate": 3.341305568340114e-07, "loss": 3.1657, "step": 6132 }, { "epoch": 0.99, "learning_rate": 3.246531852713064e-07, "loss": 2.9914, "step": 6133 }, { "epoch": 0.99, "learning_rate": 3.1531212015467605e-07, "loss": 3.1542, "step": 6134 }, { "epoch": 0.99, "learning_rate": 3.0610736403235973e-07, "loss": 3.2101, "step": 6135 }, { "epoch": 0.99, "learning_rate": 2.9703891941523785e-07, "loss": 3.1066, "step": 6136 }, { "epoch": 0.99, "learning_rate": 2.881067887771649e-07, "loss": 3.2224, "step": 6137 }, { "epoch": 0.99, "learning_rate": 2.793109745547473e-07, "loss": 3.1484, "step": 6138 }, { "epoch": 0.99, "learning_rate": 2.706514791473436e-07, "loss": 3.1242, "step": 6139 }, { "epoch": 0.99, "learning_rate": 2.621283049171752e-07, "loss": 3.1083, "step": 6140 }, { "epoch": 0.99, "learning_rate": 2.537414541893268e-07, "loss": 3.2423, "step": 6141 }, { "epoch": 0.99, "learning_rate": 2.4549092925157947e-07, "loss": 3.1174, "step": 6142 }, { "epoch": 0.99, "learning_rate": 2.3737673235468827e-07, "loss": 3.1476, "step": 6143 }, { "epoch": 0.99, "learning_rate": 2.2939886571204937e-07, "loss": 3.0187, "step": 6144 }, { "epoch": 0.99, "learning_rate": 2.2155733149992196e-07, "loss": 3.0417, "step": 6145 }, { "epoch": 0.99, "learning_rate": 2.1385213185748375e-07, "loss": 3.1507, "step": 6146 }, { "epoch": 0.99, "learning_rate": 2.06283268886609e-07, "loss": 3.163, "step": 6147 }, { "epoch": 0.99, "learning_rate": 1.9885074465197937e-07, "loss": 3.0262, "step": 6148 }, { "epoch": 0.99, "learning_rate": 1.915545611811398e-07, "loss": 3.0834, "step": 6149 }, { "epoch": 0.99, "learning_rate": 1.843947204644425e-07, "loss": 3.0183, "step": 6150 }, { "epoch": 0.99, "learning_rate": 1.7737122445493637e-07, "loss": 3.1082, "step": 6151 }, { "epoch": 0.99, "learning_rate": 1.7048407506858877e-07, "loss": 3.1886, "step": 6152 }, { "epoch": 0.99, "learning_rate": 1.6373327418423013e-07, "loss": 3.2377, "step": 6153 }, { "epoch": 0.99, "learning_rate": 1.5711882364327635e-07, "loss": 3.0895, "step": 6154 }, { "epoch": 0.99, "learning_rate": 1.5064072525017292e-07, "loss": 3.2126, "step": 6155 }, { "epoch": 0.99, "learning_rate": 1.4429898077211735e-07, "loss": 3.144, "step": 6156 }, { "epoch": 0.99, "learning_rate": 1.380935919389481e-07, "loss": 3.1136, "step": 6157 }, { "epoch": 0.99, "learning_rate": 1.3202456044353328e-07, "loss": 3.0606, "step": 6158 }, { "epoch": 0.99, "learning_rate": 1.2609188794143744e-07, "loss": 3.1164, "step": 6159 }, { "epoch": 0.99, "learning_rate": 1.2029557605097717e-07, "loss": 3.0261, "step": 6160 }, { "epoch": 0.99, "learning_rate": 1.1463562635333213e-07, "loss": 3.1631, "step": 6161 }, { "epoch": 0.99, "learning_rate": 1.0911204039254497e-07, "loss": 3.1024, "step": 6162 }, { "epoch": 0.99, "learning_rate": 1.0372481967541036e-07, "loss": 3.0692, "step": 6163 }, { "epoch": 0.99, "learning_rate": 9.847396567141953e-08, "loss": 3.0852, "step": 6164 }, { "epoch": 0.99, "learning_rate": 9.335947981298221e-08, "loss": 3.1154, "step": 6165 }, { "epoch": 0.99, "learning_rate": 8.838136349526016e-08, "loss": 3.1759, "step": 6166 }, { "epoch": 0.99, "learning_rate": 8.353961807627819e-08, "loss": 3.1643, "step": 6167 }, { "epoch": 0.99, "learning_rate": 7.883424487681312e-08, "loss": 3.1862, "step": 6168 }, { "epoch": 0.99, "learning_rate": 7.42652451804493e-08, "loss": 3.2032, "step": 6169 }, { "epoch": 0.99, "learning_rate": 6.98326202335231e-08, "loss": 3.0515, "step": 6170 }, { "epoch": 1.0, "learning_rate": 6.553637124523393e-08, "loss": 3.0979, "step": 6171 }, { "epoch": 1.0, "learning_rate": 6.137649938758871e-08, "loss": 3.2015, "step": 6172 }, { "epoch": 1.0, "learning_rate": 5.73530057952909e-08, "loss": 3.0306, "step": 6173 }, { "epoch": 1.0, "learning_rate": 5.3465891565962486e-08, "loss": 3.1208, "step": 6174 }, { "epoch": 1.0, "learning_rate": 4.971515775992197e-08, "loss": 3.0952, "step": 6175 }, { "epoch": 1.0, "learning_rate": 4.610080540035089e-08, "loss": 3.1886, "step": 6176 }, { "epoch": 1.0, "learning_rate": 4.262283547323831e-08, "loss": 3.1066, "step": 6177 }, { "epoch": 1.0, "learning_rate": 3.928124892732532e-08, "loss": 3.1035, "step": 6178 }, { "epoch": 1.0, "learning_rate": 3.607604667416054e-08, "loss": 2.9709, "step": 6179 }, { "epoch": 1.0, "learning_rate": 3.300722958810009e-08, "loss": 3.1927, "step": 6180 }, { "epoch": 1.0, "learning_rate": 3.007479850625217e-08, "loss": 3.0113, "step": 6181 }, { "epoch": 1.0, "learning_rate": 2.7278754228587944e-08, "loss": 3.1605, "step": 6182 }, { "epoch": 1.0, "learning_rate": 2.4619097517830646e-08, "loss": 3.1529, "step": 6183 }, { "epoch": 1.0, "learning_rate": 2.209582909945551e-08, "loss": 3.0735, "step": 6184 }, { "epoch": 1.0, "learning_rate": 1.9708949661911834e-08, "loss": 3.174, "step": 6185 }, { "epoch": 1.0, "learning_rate": 1.745845985617889e-08, "loss": 3.1061, "step": 6186 }, { "epoch": 1.0, "learning_rate": 1.5344360296265513e-08, "loss": 3.1935, "step": 6187 }, { "epoch": 1.0, "learning_rate": 1.3366651558877063e-08, "loss": 3.1264, "step": 6188 }, { "epoch": 1.0, "learning_rate": 1.1525334183415392e-08, "loss": 3.1197, "step": 6189 }, { "epoch": 1.0, "learning_rate": 9.82040867225642e-09, "loss": 3.0783, "step": 6190 }, { "epoch": 1.0, "learning_rate": 8.251875490472572e-09, "loss": 3.1003, "step": 6191 }, { "epoch": 1.0, "learning_rate": 6.8197350659437995e-09, "loss": 3.2202, "step": 6192 }, { "epoch": 1.0, "learning_rate": 5.523987789302076e-09, "loss": 2.8782, "step": 6193 }, { "epoch": 1.0, "learning_rate": 4.364634014097923e-09, "loss": 3.1175, "step": 6194 }, { "epoch": 1.0, "learning_rate": 3.3416740565228586e-09, "loss": 3.1302, "step": 6195 }, { "epoch": 1.0, "learning_rate": 2.4551081956314392e-09, "loss": 3.117, "step": 6196 }, { "epoch": 1.0, "learning_rate": 1.7049366732857508e-09, "loss": 3.1732, "step": 6197 }, { "epoch": 1.0, "learning_rate": 1.091159694155408e-09, "loss": 3.056, "step": 6198 }, { "epoch": 1.0, "learning_rate": 6.137774256065321e-10, "loss": 3.0067, "step": 6199 }, { "epoch": 1.0, "learning_rate": 2.727899979793058e-10, "loss": 3.1415, "step": 6200 }, { "epoch": 1.0, "learning_rate": 6.819750414388537e-11, "loss": 2.839, "step": 6201 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 3.2192, "step": 6202 }, { "epoch": 1.0, "step": 6202, "total_flos": 1.977746612331479e+17, "train_loss": 3.291977916983242, "train_runtime": 15293.3269, "train_samples_per_second": 38.93, "train_steps_per_second": 0.406 } ], "logging_steps": 1.0, "max_steps": 6202, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 1.977746612331479e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }