{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 3079, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.1505376344086024e-05, "loss": 3.0025, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.301075268817205e-05, "loss": 3.0823, "step": 2 }, { "epoch": 0.0, "learning_rate": 6.451612903225807e-05, "loss": 2.9825, "step": 3 }, { "epoch": 0.0, "learning_rate": 8.60215053763441e-05, "loss": 2.9615, "step": 4 }, { "epoch": 0.0, "learning_rate": 0.00010752688172043011, "loss": 3.0319, "step": 5 }, { "epoch": 0.0, "learning_rate": 0.00012903225806451613, "loss": 3.0456, "step": 6 }, { "epoch": 0.0, "learning_rate": 0.00015053763440860216, "loss": 2.9875, "step": 7 }, { "epoch": 0.0, "learning_rate": 0.0001720430107526882, "loss": 3.0439, "step": 8 }, { "epoch": 0.0, "learning_rate": 0.0001935483870967742, "loss": 2.9133, "step": 9 }, { "epoch": 0.0, "learning_rate": 0.00021505376344086021, "loss": 2.9772, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00023655913978494624, "loss": 2.9256, "step": 11 }, { "epoch": 0.0, "learning_rate": 0.00025806451612903227, "loss": 2.9884, "step": 12 }, { "epoch": 0.0, "learning_rate": 0.00027956989247311827, "loss": 2.8851, "step": 13 }, { "epoch": 0.0, "learning_rate": 0.0003010752688172043, "loss": 2.9326, "step": 14 }, { "epoch": 0.0, "learning_rate": 0.0003225806451612903, "loss": 2.9037, "step": 15 }, { "epoch": 0.01, "learning_rate": 0.0003440860215053764, "loss": 2.8716, "step": 16 }, { "epoch": 0.01, "learning_rate": 0.0003655913978494624, "loss": 2.8873, "step": 17 }, { "epoch": 0.01, "learning_rate": 0.0003870967741935484, "loss": 2.9048, "step": 18 }, { "epoch": 0.01, "learning_rate": 0.00040860215053763443, "loss": 2.7661, "step": 19 }, { "epoch": 0.01, "learning_rate": 0.00043010752688172043, "loss": 2.7424, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00045161290322580643, "loss": 2.7219, "step": 21 }, { "epoch": 0.01, "learning_rate": 0.0004731182795698925, "loss": 2.7913, "step": 22 }, { "epoch": 0.01, "learning_rate": 0.0004946236559139785, "loss": 2.7095, "step": 23 }, { "epoch": 0.01, "learning_rate": 0.0005161290322580645, "loss": 2.7516, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.0005376344086021505, "loss": 2.7181, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.0005591397849462365, "loss": 2.5637, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.0005806451612903226, "loss": 2.6443, "step": 27 }, { "epoch": 0.01, "learning_rate": 0.0006021505376344086, "loss": 2.6627, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.0006236559139784946, "loss": 2.6059, "step": 29 }, { "epoch": 0.01, "learning_rate": 0.0006451612903225806, "loss": 2.5745, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.0006666666666666666, "loss": 2.4728, "step": 31 }, { "epoch": 0.01, "learning_rate": 0.0006881720430107528, "loss": 2.54, "step": 32 }, { "epoch": 0.01, "learning_rate": 0.0007096774193548388, "loss": 2.4709, "step": 33 }, { "epoch": 0.01, "learning_rate": 0.0007311827956989248, "loss": 2.4182, "step": 34 }, { "epoch": 0.01, "learning_rate": 0.0007526881720430108, "loss": 2.409, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.0007741935483870968, "loss": 2.4798, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.0007956989247311828, "loss": 2.2774, "step": 37 }, { "epoch": 0.01, "learning_rate": 0.0008172043010752689, "loss": 2.3242, "step": 38 }, { "epoch": 0.01, "learning_rate": 0.0008387096774193549, "loss": 2.4111, "step": 39 }, { "epoch": 0.01, "learning_rate": 0.0008602150537634409, "loss": 2.3048, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.0008817204301075269, "loss": 2.3662, "step": 41 }, { "epoch": 0.01, "learning_rate": 0.0009032258064516129, "loss": 2.3347, "step": 42 }, { "epoch": 0.01, "learning_rate": 0.0009247311827956989, "loss": 2.2308, "step": 43 }, { "epoch": 0.01, "learning_rate": 0.000946236559139785, "loss": 2.2967, "step": 44 }, { "epoch": 0.01, "learning_rate": 0.000967741935483871, "loss": 2.3626, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.000989247311827957, "loss": 2.2981, "step": 46 }, { "epoch": 0.02, "learning_rate": 0.001010752688172043, "loss": 2.3351, "step": 47 }, { "epoch": 0.02, "learning_rate": 0.001032258064516129, "loss": 2.2606, "step": 48 }, { "epoch": 0.02, "learning_rate": 0.001053763440860215, "loss": 2.2589, "step": 49 }, { "epoch": 0.02, "learning_rate": 0.001075268817204301, "loss": 2.2418, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.001096774193548387, "loss": 2.1605, "step": 51 }, { "epoch": 0.02, "learning_rate": 0.001118279569892473, "loss": 2.238, "step": 52 }, { "epoch": 0.02, "learning_rate": 0.0011397849462365592, "loss": 2.2403, "step": 53 }, { "epoch": 0.02, "learning_rate": 0.0011612903225806453, "loss": 2.2488, "step": 54 }, { "epoch": 0.02, "learning_rate": 0.0011827956989247312, "loss": 2.1954, "step": 55 }, { "epoch": 0.02, "learning_rate": 0.0012043010752688173, "loss": 2.1484, "step": 56 }, { "epoch": 0.02, "learning_rate": 0.0012258064516129032, "loss": 2.252, "step": 57 }, { "epoch": 0.02, "learning_rate": 0.0012473118279569893, "loss": 2.1787, "step": 58 }, { "epoch": 0.02, "learning_rate": 0.0012688172043010752, "loss": 2.1972, "step": 59 }, { "epoch": 0.02, "learning_rate": 0.0012903225806451613, "loss": 2.2227, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.0013118279569892472, "loss": 2.2323, "step": 61 }, { "epoch": 0.02, "learning_rate": 0.0013333333333333333, "loss": 2.1606, "step": 62 }, { "epoch": 0.02, "learning_rate": 0.0013548387096774194, "loss": 2.2158, "step": 63 }, { "epoch": 0.02, "learning_rate": 0.0013763440860215055, "loss": 2.2307, "step": 64 }, { "epoch": 0.02, "learning_rate": 0.0013978494623655916, "loss": 2.1308, "step": 65 }, { "epoch": 0.02, "learning_rate": 0.0014193548387096775, "loss": 2.194, "step": 66 }, { "epoch": 0.02, "learning_rate": 0.0014408602150537636, "loss": 2.168, "step": 67 }, { "epoch": 0.02, "learning_rate": 0.0014623655913978495, "loss": 2.104, "step": 68 }, { "epoch": 0.02, "learning_rate": 0.0014838709677419356, "loss": 2.1663, "step": 69 }, { "epoch": 0.02, "learning_rate": 0.0015053763440860215, "loss": 2.1705, "step": 70 }, { "epoch": 0.02, "learning_rate": 0.0015268817204301076, "loss": 2.1728, "step": 71 }, { "epoch": 0.02, "learning_rate": 0.0015483870967741935, "loss": 2.1167, "step": 72 }, { "epoch": 0.02, "learning_rate": 0.0015698924731182796, "loss": 2.1928, "step": 73 }, { "epoch": 0.02, "learning_rate": 0.0015913978494623655, "loss": 2.1517, "step": 74 }, { "epoch": 0.02, "learning_rate": 0.0016129032258064516, "loss": 2.1611, "step": 75 }, { "epoch": 0.02, "learning_rate": 0.0016344086021505377, "loss": 2.0837, "step": 76 }, { "epoch": 0.03, "learning_rate": 0.0016559139784946238, "loss": 2.0847, "step": 77 }, { "epoch": 0.03, "learning_rate": 0.0016774193548387097, "loss": 2.0815, "step": 78 }, { "epoch": 0.03, "learning_rate": 0.0016989247311827958, "loss": 2.0861, "step": 79 }, { "epoch": 0.03, "learning_rate": 0.0017204301075268817, "loss": 2.1896, "step": 80 }, { "epoch": 0.03, "learning_rate": 0.0017419354838709678, "loss": 2.1261, "step": 81 }, { "epoch": 0.03, "learning_rate": 0.0017634408602150537, "loss": 2.1223, "step": 82 }, { "epoch": 0.03, "learning_rate": 0.0017849462365591398, "loss": 2.0469, "step": 83 }, { "epoch": 0.03, "learning_rate": 0.0018064516129032257, "loss": 2.0909, "step": 84 }, { "epoch": 0.03, "learning_rate": 0.0018279569892473118, "loss": 2.1279, "step": 85 }, { "epoch": 0.03, "learning_rate": 0.0018494623655913977, "loss": 2.0685, "step": 86 }, { "epoch": 0.03, "learning_rate": 0.0018709677419354838, "loss": 2.0632, "step": 87 }, { "epoch": 0.03, "learning_rate": 0.00189247311827957, "loss": 2.1188, "step": 88 }, { "epoch": 0.03, "learning_rate": 0.001913978494623656, "loss": 2.1488, "step": 89 }, { "epoch": 0.03, "learning_rate": 0.001935483870967742, "loss": 2.1614, "step": 90 }, { "epoch": 0.03, "learning_rate": 0.001956989247311828, "loss": 2.0829, "step": 91 }, { "epoch": 0.03, "learning_rate": 0.001978494623655914, "loss": 2.1734, "step": 92 }, { "epoch": 0.03, "learning_rate": 0.002, "loss": 2.0562, "step": 93 }, { "epoch": 0.03, "learning_rate": 0.0019999994465350755, "loss": 2.179, "step": 94 }, { "epoch": 0.03, "learning_rate": 0.0019999977861409153, "loss": 2.1083, "step": 95 }, { "epoch": 0.03, "learning_rate": 0.001999995018819356, "loss": 2.1141, "step": 96 }, { "epoch": 0.03, "learning_rate": 0.001999991144573462, "loss": 2.1289, "step": 97 }, { "epoch": 0.03, "learning_rate": 0.0019999861634075215, "loss": 2.1735, "step": 98 }, { "epoch": 0.03, "learning_rate": 0.0019999800753270486, "loss": 2.092, "step": 99 }, { "epoch": 0.03, "learning_rate": 0.0019999728803387815, "loss": 2.0592, "step": 100 }, { "epoch": 0.03, "learning_rate": 0.0019999645784506853, "loss": 2.0804, "step": 101 }, { "epoch": 0.03, "learning_rate": 0.0019999551696719495, "loss": 2.1273, "step": 102 }, { "epoch": 0.03, "learning_rate": 0.0019999446540129888, "loss": 2.129, "step": 103 }, { "epoch": 0.03, "learning_rate": 0.0019999330314854438, "loss": 2.1476, "step": 104 }, { "epoch": 0.03, "learning_rate": 0.001999920302102179, "loss": 2.0677, "step": 105 }, { "epoch": 0.03, "learning_rate": 0.001999906465877285, "loss": 2.2254, "step": 106 }, { "epoch": 0.03, "learning_rate": 0.001999891522826078, "loss": 2.0805, "step": 107 }, { "epoch": 0.04, "learning_rate": 0.0019998754729650993, "loss": 2.128, "step": 108 }, { "epoch": 0.04, "learning_rate": 0.001999858316312114, "loss": 2.1241, "step": 109 }, { "epoch": 0.04, "learning_rate": 0.001999840052886114, "loss": 2.1237, "step": 110 }, { "epoch": 0.04, "learning_rate": 0.001999820682707315, "loss": 2.1126, "step": 111 }, { "epoch": 0.04, "learning_rate": 0.0019998002057971588, "loss": 2.1263, "step": 112 }, { "epoch": 0.04, "learning_rate": 0.001999778622178312, "loss": 2.0781, "step": 113 }, { "epoch": 0.04, "learning_rate": 0.001999755931874666, "loss": 2.126, "step": 114 }, { "epoch": 0.04, "learning_rate": 0.001999732134911338, "loss": 2.1251, "step": 115 }, { "epoch": 0.04, "learning_rate": 0.0019997072313146687, "loss": 2.2221, "step": 116 }, { "epoch": 0.04, "learning_rate": 0.001999681221112225, "loss": 2.0657, "step": 117 }, { "epoch": 0.04, "learning_rate": 0.001999654104332798, "loss": 2.0495, "step": 118 }, { "epoch": 0.04, "learning_rate": 0.001999625881006404, "loss": 2.1597, "step": 119 }, { "epoch": 0.04, "learning_rate": 0.0019995965511642854, "loss": 2.0698, "step": 120 }, { "epoch": 0.04, "learning_rate": 0.0019995661148389074, "loss": 2.0925, "step": 121 }, { "epoch": 0.04, "learning_rate": 0.0019995345720639607, "loss": 2.115, "step": 122 }, { "epoch": 0.04, "learning_rate": 0.0019995019228743614, "loss": 2.0844, "step": 123 }, { "epoch": 0.04, "learning_rate": 0.0019994681673062495, "loss": 2.015, "step": 124 }, { "epoch": 0.04, "learning_rate": 0.001999433305396991, "loss": 2.0416, "step": 125 }, { "epoch": 0.04, "learning_rate": 0.001999397337185174, "loss": 2.0867, "step": 126 }, { "epoch": 0.04, "learning_rate": 0.0019993602627106136, "loss": 2.0684, "step": 127 }, { "epoch": 0.04, "learning_rate": 0.0019993220820143484, "loss": 2.086, "step": 128 }, { "epoch": 0.04, "learning_rate": 0.0019992827951386427, "loss": 2.1396, "step": 129 }, { "epoch": 0.04, "learning_rate": 0.0019992424021269832, "loss": 2.1644, "step": 130 }, { "epoch": 0.04, "learning_rate": 0.0019992009030240826, "loss": 2.0832, "step": 131 }, { "epoch": 0.04, "learning_rate": 0.0019991582978758776, "loss": 2.124, "step": 132 }, { "epoch": 0.04, "learning_rate": 0.0019991145867295284, "loss": 2.0195, "step": 133 }, { "epoch": 0.04, "learning_rate": 0.0019990697696334212, "loss": 2.1491, "step": 134 }, { "epoch": 0.04, "learning_rate": 0.0019990238466371653, "loss": 2.072, "step": 135 }, { "epoch": 0.04, "learning_rate": 0.001998976817791593, "loss": 2.0953, "step": 136 }, { "epoch": 0.04, "learning_rate": 0.0019989286831487633, "loss": 2.1112, "step": 137 }, { "epoch": 0.04, "learning_rate": 0.001998879442761957, "loss": 2.0882, "step": 138 }, { "epoch": 0.05, "learning_rate": 0.0019988290966856804, "loss": 2.0942, "step": 139 }, { "epoch": 0.05, "learning_rate": 0.001998777644975663, "loss": 2.0613, "step": 140 }, { "epoch": 0.05, "learning_rate": 0.0019987250876888573, "loss": 2.1476, "step": 141 }, { "epoch": 0.05, "learning_rate": 0.001998671424883442, "loss": 2.0381, "step": 142 }, { "epoch": 0.05, "learning_rate": 0.0019986166566188165, "loss": 2.0897, "step": 143 }, { "epoch": 0.05, "learning_rate": 0.0019985607829556065, "loss": 2.0352, "step": 144 }, { "epoch": 0.05, "learning_rate": 0.0019985038039556603, "loss": 2.1212, "step": 145 }, { "epoch": 0.05, "learning_rate": 0.001998445719682049, "loss": 2.1528, "step": 146 }, { "epoch": 0.05, "learning_rate": 0.0019983865301990682, "loss": 2.0831, "step": 147 }, { "epoch": 0.05, "learning_rate": 0.0019983262355722366, "loss": 2.1559, "step": 148 }, { "epoch": 0.05, "learning_rate": 0.0019982648358682956, "loss": 2.1353, "step": 149 }, { "epoch": 0.05, "learning_rate": 0.001998202331155211, "loss": 2.0933, "step": 150 }, { "epoch": 0.05, "learning_rate": 0.0019981387215021705, "loss": 2.118, "step": 151 }, { "epoch": 0.05, "learning_rate": 0.001998074006979586, "loss": 2.0517, "step": 152 }, { "epoch": 0.05, "learning_rate": 0.0019980081876590917, "loss": 2.1393, "step": 153 }, { "epoch": 0.05, "learning_rate": 0.0019979412636135455, "loss": 2.0884, "step": 154 }, { "epoch": 0.05, "learning_rate": 0.001997873234917027, "loss": 2.0733, "step": 155 }, { "epoch": 0.05, "learning_rate": 0.001997804101644839, "loss": 2.0472, "step": 156 }, { "epoch": 0.05, "learning_rate": 0.001997733863873508, "loss": 2.0553, "step": 157 }, { "epoch": 0.05, "learning_rate": 0.0019976625216807818, "loss": 2.045, "step": 158 }, { "epoch": 0.05, "learning_rate": 0.001997590075145631, "loss": 2.0846, "step": 159 }, { "epoch": 0.05, "learning_rate": 0.0019975165243482497, "loss": 2.0594, "step": 160 }, { "epoch": 0.05, "learning_rate": 0.0019974418693700525, "loss": 2.1156, "step": 161 }, { "epoch": 0.05, "learning_rate": 0.0019973661102936774, "loss": 2.078, "step": 162 }, { "epoch": 0.05, "learning_rate": 0.001997289247202985, "loss": 2.049, "step": 163 }, { "epoch": 0.05, "learning_rate": 0.0019972112801830565, "loss": 2.0087, "step": 164 }, { "epoch": 0.05, "learning_rate": 0.0019971322093201966, "loss": 2.0108, "step": 165 }, { "epoch": 0.05, "learning_rate": 0.001997052034701931, "loss": 1.9798, "step": 166 }, { "epoch": 0.05, "learning_rate": 0.001996970756417007, "loss": 2.025, "step": 167 }, { "epoch": 0.05, "learning_rate": 0.0019968883745553946, "loss": 2.0775, "step": 168 }, { "epoch": 0.05, "learning_rate": 0.0019968048892082843, "loss": 2.1539, "step": 169 }, { "epoch": 0.06, "learning_rate": 0.0019967203004680888, "loss": 2.1089, "step": 170 }, { "epoch": 0.06, "learning_rate": 0.0019966346084284415, "loss": 2.0814, "step": 171 }, { "epoch": 0.06, "learning_rate": 0.001996547813184198, "loss": 2.0793, "step": 172 }, { "epoch": 0.06, "learning_rate": 0.001996459914831434, "loss": 1.9996, "step": 173 }, { "epoch": 0.06, "learning_rate": 0.0019963709134674474, "loss": 2.0676, "step": 174 }, { "epoch": 0.06, "learning_rate": 0.0019962808091907557, "loss": 2.0625, "step": 175 }, { "epoch": 0.06, "learning_rate": 0.0019961896021010984, "loss": 2.0999, "step": 176 }, { "epoch": 0.06, "learning_rate": 0.001996097292299436, "loss": 2.0801, "step": 177 }, { "epoch": 0.06, "learning_rate": 0.001996003879887948, "loss": 2.0613, "step": 178 }, { "epoch": 0.06, "learning_rate": 0.001995909364970036, "loss": 2.0745, "step": 179 }, { "epoch": 0.06, "learning_rate": 0.001995813747650321, "loss": 2.0409, "step": 180 }, { "epoch": 0.06, "learning_rate": 0.0019957170280346446, "loss": 2.0792, "step": 181 }, { "epoch": 0.06, "learning_rate": 0.001995619206230069, "loss": 2.0212, "step": 182 }, { "epoch": 0.06, "learning_rate": 0.0019955202823448764, "loss": 2.0337, "step": 183 }, { "epoch": 0.06, "learning_rate": 0.0019954202564885677, "loss": 2.0714, "step": 184 }, { "epoch": 0.06, "learning_rate": 0.001995319128771865, "loss": 2.0505, "step": 185 }, { "epoch": 0.06, "learning_rate": 0.0019952168993067096, "loss": 2.0501, "step": 186 }, { "epoch": 0.06, "learning_rate": 0.001995113568206262, "loss": 2.0334, "step": 187 }, { "epoch": 0.06, "learning_rate": 0.001995009135584903, "loss": 2.1424, "step": 188 }, { "epoch": 0.06, "learning_rate": 0.0019949036015582317, "loss": 2.1002, "step": 189 }, { "epoch": 0.06, "learning_rate": 0.0019947969662430674, "loss": 2.096, "step": 190 }, { "epoch": 0.06, "learning_rate": 0.0019946892297574475, "loss": 2.0934, "step": 191 }, { "epoch": 0.06, "learning_rate": 0.001994580392220629, "loss": 2.1075, "step": 192 }, { "epoch": 0.06, "learning_rate": 0.0019944704537530867, "loss": 2.1223, "step": 193 }, { "epoch": 0.06, "learning_rate": 0.001994359414476516, "loss": 2.0245, "step": 194 }, { "epoch": 0.06, "learning_rate": 0.0019942472745138285, "loss": 2.0902, "step": 195 }, { "epoch": 0.06, "learning_rate": 0.001994134033989156, "loss": 2.129, "step": 196 }, { "epoch": 0.06, "learning_rate": 0.001994019693027847, "loss": 2.0232, "step": 197 }, { "epoch": 0.06, "learning_rate": 0.00199390425175647, "loss": 2.0766, "step": 198 }, { "epoch": 0.06, "learning_rate": 0.0019937877103028095, "loss": 2.0884, "step": 199 }, { "epoch": 0.06, "learning_rate": 0.001993670068795869, "loss": 2.0765, "step": 200 }, { "epoch": 0.07, "learning_rate": 0.0019935513273658697, "loss": 2.0611, "step": 201 }, { "epoch": 0.07, "learning_rate": 0.0019934314861442494, "loss": 2.0507, "step": 202 }, { "epoch": 0.07, "learning_rate": 0.001993310545263665, "loss": 2.0921, "step": 203 }, { "epoch": 0.07, "learning_rate": 0.0019931885048579882, "loss": 2.0731, "step": 204 }, { "epoch": 0.07, "learning_rate": 0.00199306536506231, "loss": 2.0434, "step": 205 }, { "epoch": 0.07, "learning_rate": 0.001992941126012937, "loss": 2.1057, "step": 206 }, { "epoch": 0.07, "learning_rate": 0.001992815787847394, "loss": 2.0512, "step": 207 }, { "epoch": 0.07, "learning_rate": 0.001992689350704421, "loss": 2.1049, "step": 208 }, { "epoch": 0.07, "learning_rate": 0.001992561814723975, "loss": 2.0961, "step": 209 }, { "epoch": 0.07, "learning_rate": 0.001992433180047229, "loss": 2.0657, "step": 210 }, { "epoch": 0.07, "learning_rate": 0.0019923034468165733, "loss": 2.1074, "step": 211 }, { "epoch": 0.07, "learning_rate": 0.001992172615175613, "loss": 2.0553, "step": 212 }, { "epoch": 0.07, "learning_rate": 0.00199204068526917, "loss": 2.0443, "step": 213 }, { "epoch": 0.07, "learning_rate": 0.0019919076572432813, "loss": 2.0007, "step": 214 }, { "epoch": 0.07, "learning_rate": 0.0019917735312451994, "loss": 2.1088, "step": 215 }, { "epoch": 0.07, "learning_rate": 0.0019916383074233927, "loss": 2.0499, "step": 216 }, { "epoch": 0.07, "learning_rate": 0.0019915019859275437, "loss": 2.0655, "step": 217 }, { "epoch": 0.07, "learning_rate": 0.0019913645669085516, "loss": 2.1092, "step": 218 }, { "epoch": 0.07, "learning_rate": 0.001991226050518529, "loss": 2.0206, "step": 219 }, { "epoch": 0.07, "learning_rate": 0.0019910864369108044, "loss": 2.1384, "step": 220 }, { "epoch": 0.07, "learning_rate": 0.00199094572623992, "loss": 2.077, "step": 221 }, { "epoch": 0.07, "learning_rate": 0.0019908039186616326, "loss": 2.0799, "step": 222 }, { "epoch": 0.07, "learning_rate": 0.0019906610143329132, "loss": 2.037, "step": 223 }, { "epoch": 0.07, "learning_rate": 0.0019905170134119467, "loss": 2.1598, "step": 224 }, { "epoch": 0.07, "learning_rate": 0.0019903719160581325, "loss": 2.0714, "step": 225 }, { "epoch": 0.07, "learning_rate": 0.0019902257224320833, "loss": 2.0904, "step": 226 }, { "epoch": 0.07, "learning_rate": 0.0019900784326956243, "loss": 2.103, "step": 227 }, { "epoch": 0.07, "learning_rate": 0.0019899300470117955, "loss": 2.0663, "step": 228 }, { "epoch": 0.07, "learning_rate": 0.0019897805655448497, "loss": 2.0921, "step": 229 }, { "epoch": 0.07, "learning_rate": 0.0019896299884602517, "loss": 2.1032, "step": 230 }, { "epoch": 0.08, "learning_rate": 0.0019894783159246803, "loss": 2.1365, "step": 231 }, { "epoch": 0.08, "learning_rate": 0.001989325548106026, "loss": 2.0476, "step": 232 }, { "epoch": 0.08, "learning_rate": 0.0019891716851733925, "loss": 2.0533, "step": 233 }, { "epoch": 0.08, "learning_rate": 0.001989016727297095, "loss": 2.0276, "step": 234 }, { "epoch": 0.08, "learning_rate": 0.0019888606746486614, "loss": 2.1047, "step": 235 }, { "epoch": 0.08, "learning_rate": 0.00198870352740083, "loss": 2.0429, "step": 236 }, { "epoch": 0.08, "learning_rate": 0.001988545285727553, "loss": 2.0694, "step": 237 }, { "epoch": 0.08, "learning_rate": 0.001988385949803992, "loss": 2.0591, "step": 238 }, { "epoch": 0.08, "learning_rate": 0.001988225519806521, "loss": 2.0747, "step": 239 }, { "epoch": 0.08, "learning_rate": 0.0019880639959127247, "loss": 2.0278, "step": 240 }, { "epoch": 0.08, "learning_rate": 0.0019879013783013987, "loss": 2.0023, "step": 241 }, { "epoch": 0.08, "learning_rate": 0.001987737667152549, "loss": 2.0752, "step": 242 }, { "epoch": 0.08, "learning_rate": 0.0019875728626473936, "loss": 2.0716, "step": 243 }, { "epoch": 0.08, "learning_rate": 0.001987406964968358, "loss": 1.969, "step": 244 }, { "epoch": 0.08, "learning_rate": 0.00198723997429908, "loss": 2.0557, "step": 245 }, { "epoch": 0.08, "learning_rate": 0.0019870718908244064, "loss": 2.0906, "step": 246 }, { "epoch": 0.08, "learning_rate": 0.001986902714730394, "loss": 2.1311, "step": 247 }, { "epoch": 0.08, "learning_rate": 0.0019867324462043085, "loss": 2.0179, "step": 248 }, { "epoch": 0.08, "learning_rate": 0.0019865610854346255, "loss": 2.0583, "step": 249 }, { "epoch": 0.08, "learning_rate": 0.0019863886326110295, "loss": 2.0916, "step": 250 }, { "epoch": 0.08, "learning_rate": 0.001986215087924414, "loss": 2.2303, "step": 251 }, { "epoch": 0.08, "learning_rate": 0.0019860404515668795, "loss": 2.081, "step": 252 }, { "epoch": 0.08, "learning_rate": 0.0019858647237317372, "loss": 2.0494, "step": 253 }, { "epoch": 0.08, "learning_rate": 0.001985687904613505, "loss": 2.0448, "step": 254 }, { "epoch": 0.08, "learning_rate": 0.00198550999440791, "loss": 2.0633, "step": 255 }, { "epoch": 0.08, "learning_rate": 0.0019853309933118854, "loss": 2.0759, "step": 256 }, { "epoch": 0.08, "learning_rate": 0.0019851509015235734, "loss": 2.0599, "step": 257 }, { "epoch": 0.08, "learning_rate": 0.0019849697192423233, "loss": 2.0558, "step": 258 }, { "epoch": 0.08, "learning_rate": 0.0019847874466686903, "loss": 2.0938, "step": 259 }, { "epoch": 0.08, "learning_rate": 0.001984604084004438, "loss": 1.9067, "step": 260 }, { "epoch": 0.08, "learning_rate": 0.001984419631452536, "loss": 2.0445, "step": 261 }, { "epoch": 0.09, "learning_rate": 0.00198423408921716, "loss": 2.162, "step": 262 }, { "epoch": 0.09, "learning_rate": 0.001984047457503692, "loss": 2.1467, "step": 263 }, { "epoch": 0.09, "learning_rate": 0.001983859736518721, "loss": 2.1907, "step": 264 }, { "epoch": 0.09, "learning_rate": 0.0019836709264700403, "loss": 2.0638, "step": 265 }, { "epoch": 0.09, "learning_rate": 0.0019834810275666495, "loss": 2.1193, "step": 266 }, { "epoch": 0.09, "learning_rate": 0.0019832900400187543, "loss": 2.3763, "step": 267 }, { "epoch": 0.09, "learning_rate": 0.001983097964037763, "loss": 2.2975, "step": 268 }, { "epoch": 0.09, "learning_rate": 0.001982904799836291, "loss": 2.302, "step": 269 }, { "epoch": 0.09, "learning_rate": 0.001982710547628158, "loss": 2.2486, "step": 270 }, { "epoch": 0.09, "learning_rate": 0.0019825152076283865, "loss": 2.3138, "step": 271 }, { "epoch": 0.09, "learning_rate": 0.001982318780053205, "loss": 2.2677, "step": 272 }, { "epoch": 0.09, "learning_rate": 0.0019821212651200443, "loss": 2.4078, "step": 273 }, { "epoch": 0.09, "learning_rate": 0.0019819226630475406, "loss": 2.4565, "step": 274 }, { "epoch": 0.09, "learning_rate": 0.001981722974055532, "loss": 2.6142, "step": 275 }, { "epoch": 0.09, "learning_rate": 0.0019815221983650595, "loss": 2.6623, "step": 276 }, { "epoch": 0.09, "learning_rate": 0.0019813203361983686, "loss": 2.6342, "step": 277 }, { "epoch": 0.09, "learning_rate": 0.0019811173877789063, "loss": 2.6934, "step": 278 }, { "epoch": 0.09, "learning_rate": 0.001980913353331322, "loss": 2.666, "step": 279 }, { "epoch": 0.09, "learning_rate": 0.001980708233081468, "loss": 2.6922, "step": 280 }, { "epoch": 0.09, "learning_rate": 0.001980502027256398, "loss": 2.6918, "step": 281 }, { "epoch": 0.09, "learning_rate": 0.0019802947360843667, "loss": 2.6234, "step": 282 }, { "epoch": 0.09, "learning_rate": 0.0019800863597948313, "loss": 2.6921, "step": 283 }, { "epoch": 0.09, "learning_rate": 0.0019798768986184503, "loss": 2.565, "step": 284 }, { "epoch": 0.09, "learning_rate": 0.001979666352787082, "loss": 2.6179, "step": 285 }, { "epoch": 0.09, "learning_rate": 0.0019794547225337853, "loss": 2.6638, "step": 286 }, { "epoch": 0.09, "learning_rate": 0.001979242008092821, "loss": 2.6022, "step": 287 }, { "epoch": 0.09, "learning_rate": 0.001979028209699649, "loss": 2.6106, "step": 288 }, { "epoch": 0.09, "learning_rate": 0.001978813327590928, "loss": 2.5023, "step": 289 }, { "epoch": 0.09, "learning_rate": 0.0019785973620045187, "loss": 2.6142, "step": 290 }, { "epoch": 0.09, "learning_rate": 0.0019783803131794794, "loss": 2.5806, "step": 291 }, { "epoch": 0.09, "learning_rate": 0.0019781621813560676, "loss": 2.5048, "step": 292 }, { "epoch": 0.1, "learning_rate": 0.001977942966775741, "loss": 2.5324, "step": 293 }, { "epoch": 0.1, "learning_rate": 0.0019777226696811533, "loss": 2.5818, "step": 294 }, { "epoch": 0.1, "learning_rate": 0.0019775012903161587, "loss": 2.5093, "step": 295 }, { "epoch": 0.1, "learning_rate": 0.001977278828925809, "loss": 2.5315, "step": 296 }, { "epoch": 0.1, "learning_rate": 0.0019770552857563523, "loss": 2.4867, "step": 297 }, { "epoch": 0.1, "learning_rate": 0.001976830661055236, "loss": 2.436, "step": 298 }, { "epoch": 0.1, "learning_rate": 0.0019766049550711037, "loss": 2.468, "step": 299 }, { "epoch": 0.1, "learning_rate": 0.001976378168053796, "loss": 2.416, "step": 300 }, { "epoch": 0.1, "learning_rate": 0.0019761503002543508, "loss": 2.4602, "step": 301 }, { "epoch": 0.1, "learning_rate": 0.0019759213519250007, "loss": 2.4497, "step": 302 }, { "epoch": 0.1, "learning_rate": 0.0019756913233191765, "loss": 2.4976, "step": 303 }, { "epoch": 0.1, "learning_rate": 0.001975460214691503, "loss": 2.4928, "step": 304 }, { "epoch": 0.1, "learning_rate": 0.0019752280262978015, "loss": 2.4358, "step": 305 }, { "epoch": 0.1, "learning_rate": 0.0019749947583950884, "loss": 2.4631, "step": 306 }, { "epoch": 0.1, "learning_rate": 0.0019747604112415745, "loss": 2.445, "step": 307 }, { "epoch": 0.1, "learning_rate": 0.0019745249850966658, "loss": 2.4122, "step": 308 }, { "epoch": 0.1, "learning_rate": 0.001974288480220963, "loss": 2.4289, "step": 309 }, { "epoch": 0.1, "learning_rate": 0.0019740508968762597, "loss": 2.4031, "step": 310 }, { "epoch": 0.1, "learning_rate": 0.0019738122353255443, "loss": 2.4523, "step": 311 }, { "epoch": 0.1, "learning_rate": 0.001973572495832999, "loss": 2.4728, "step": 312 }, { "epoch": 0.1, "learning_rate": 0.0019733316786639974, "loss": 2.4023, "step": 313 }, { "epoch": 0.1, "learning_rate": 0.0019730897840851084, "loss": 2.4529, "step": 314 }, { "epoch": 0.1, "learning_rate": 0.0019728468123640917, "loss": 2.4618, "step": 315 }, { "epoch": 0.1, "learning_rate": 0.0019726027637698997, "loss": 2.4451, "step": 316 }, { "epoch": 0.1, "learning_rate": 0.0019723576385726775, "loss": 2.471, "step": 317 }, { "epoch": 0.1, "learning_rate": 0.0019721114370437613, "loss": 2.4193, "step": 318 }, { "epoch": 0.1, "learning_rate": 0.0019718641594556794, "loss": 2.4481, "step": 319 }, { "epoch": 0.1, "learning_rate": 0.0019716158060821497, "loss": 2.432, "step": 320 }, { "epoch": 0.1, "learning_rate": 0.001971366377198083, "loss": 2.3644, "step": 321 }, { "epoch": 0.1, "learning_rate": 0.0019711158730795794, "loss": 2.4362, "step": 322 }, { "epoch": 0.1, "learning_rate": 0.0019708642940039294, "loss": 2.3846, "step": 323 }, { "epoch": 0.11, "learning_rate": 0.0019706116402496126, "loss": 2.3606, "step": 324 }, { "epoch": 0.11, "learning_rate": 0.0019703579120963, "loss": 2.3842, "step": 325 }, { "epoch": 0.11, "learning_rate": 0.00197010310982485, "loss": 2.4227, "step": 326 }, { "epoch": 0.11, "learning_rate": 0.0019698472337173114, "loss": 2.4753, "step": 327 }, { "epoch": 0.11, "learning_rate": 0.001969590284056921, "loss": 2.5037, "step": 328 }, { "epoch": 0.11, "learning_rate": 0.0019693322611281042, "loss": 2.3915, "step": 329 }, { "epoch": 0.11, "learning_rate": 0.0019690731652164738, "loss": 2.4047, "step": 330 }, { "epoch": 0.11, "learning_rate": 0.0019688129966088314, "loss": 2.4123, "step": 331 }, { "epoch": 0.11, "learning_rate": 0.0019685517555931647, "loss": 2.438, "step": 332 }, { "epoch": 0.11, "learning_rate": 0.00196828944245865, "loss": 2.3101, "step": 333 }, { "epoch": 0.11, "learning_rate": 0.0019680260574956488, "loss": 2.3534, "step": 334 }, { "epoch": 0.11, "learning_rate": 0.00196776160099571, "loss": 2.3386, "step": 335 }, { "epoch": 0.11, "learning_rate": 0.001967496073251569, "loss": 2.2228, "step": 336 }, { "epoch": 0.11, "learning_rate": 0.0019672294745571453, "loss": 2.3746, "step": 337 }, { "epoch": 0.11, "learning_rate": 0.0019669618052075457, "loss": 2.3131, "step": 338 }, { "epoch": 0.11, "learning_rate": 0.0019666930654990613, "loss": 2.2764, "step": 339 }, { "epoch": 0.11, "learning_rate": 0.001966423255729168, "loss": 2.3374, "step": 340 }, { "epoch": 0.11, "learning_rate": 0.0019661523761965263, "loss": 2.2712, "step": 341 }, { "epoch": 0.11, "learning_rate": 0.001965880427200981, "loss": 2.2847, "step": 342 }, { "epoch": 0.11, "learning_rate": 0.0019656074090435597, "loss": 2.3434, "step": 343 }, { "epoch": 0.11, "learning_rate": 0.001965333322026476, "loss": 2.3203, "step": 344 }, { "epoch": 0.11, "learning_rate": 0.0019650581664531234, "loss": 2.1935, "step": 345 }, { "epoch": 0.11, "learning_rate": 0.0019647819426280807, "loss": 2.2213, "step": 346 }, { "epoch": 0.11, "learning_rate": 0.001964504650857108, "loss": 2.2762, "step": 347 }, { "epoch": 0.11, "learning_rate": 0.001964226291447148, "loss": 2.2515, "step": 348 }, { "epoch": 0.11, "learning_rate": 0.0019639468647063246, "loss": 2.2773, "step": 349 }, { "epoch": 0.11, "learning_rate": 0.0019636663709439446, "loss": 2.3161, "step": 350 }, { "epoch": 0.11, "learning_rate": 0.0019633848104704935, "loss": 2.2455, "step": 351 }, { "epoch": 0.11, "learning_rate": 0.0019631021835976406, "loss": 2.236, "step": 352 }, { "epoch": 0.11, "learning_rate": 0.0019628184906382326, "loss": 2.2653, "step": 353 }, { "epoch": 0.11, "learning_rate": 0.0019625337319062984, "loss": 2.3332, "step": 354 }, { "epoch": 0.12, "learning_rate": 0.0019622479077170455, "loss": 2.3303, "step": 355 }, { "epoch": 0.12, "learning_rate": 0.001961961018386862, "loss": 2.2416, "step": 356 }, { "epoch": 0.12, "learning_rate": 0.0019616730642333137, "loss": 2.2743, "step": 357 }, { "epoch": 0.12, "learning_rate": 0.0019613840455751457, "loss": 2.3369, "step": 358 }, { "epoch": 0.12, "learning_rate": 0.0019610939627322816, "loss": 2.2322, "step": 359 }, { "epoch": 0.12, "learning_rate": 0.001960802816025822, "loss": 2.2351, "step": 360 }, { "epoch": 0.12, "learning_rate": 0.0019605106057780475, "loss": 2.1948, "step": 361 }, { "epoch": 0.12, "learning_rate": 0.0019602173323124126, "loss": 2.2395, "step": 362 }, { "epoch": 0.12, "learning_rate": 0.001959922995953551, "loss": 2.2546, "step": 363 }, { "epoch": 0.12, "learning_rate": 0.0019596275970272733, "loss": 2.1739, "step": 364 }, { "epoch": 0.12, "learning_rate": 0.001959331135860564, "loss": 2.2841, "step": 365 }, { "epoch": 0.12, "learning_rate": 0.0019590336127815857, "loss": 2.2876, "step": 366 }, { "epoch": 0.12, "learning_rate": 0.0019587350281196755, "loss": 2.2651, "step": 367 }, { "epoch": 0.12, "learning_rate": 0.001958435382205345, "loss": 2.2302, "step": 368 }, { "epoch": 0.12, "learning_rate": 0.0019581346753702825, "loss": 2.198, "step": 369 }, { "epoch": 0.12, "learning_rate": 0.0019578329079473484, "loss": 2.2241, "step": 370 }, { "epoch": 0.12, "learning_rate": 0.001957530080270578, "loss": 2.2068, "step": 371 }, { "epoch": 0.12, "learning_rate": 0.0019572261926751808, "loss": 2.3018, "step": 372 }, { "epoch": 0.12, "learning_rate": 0.001956921245497539, "loss": 2.2838, "step": 373 }, { "epoch": 0.12, "learning_rate": 0.001956615239075207, "loss": 2.2613, "step": 374 }, { "epoch": 0.12, "learning_rate": 0.0019563081737469135, "loss": 2.1727, "step": 375 }, { "epoch": 0.12, "learning_rate": 0.0019560000498525573, "loss": 2.28, "step": 376 }, { "epoch": 0.12, "learning_rate": 0.0019556908677332108, "loss": 2.1828, "step": 377 }, { "epoch": 0.12, "learning_rate": 0.0019553806277311163, "loss": 2.1922, "step": 378 }, { "epoch": 0.12, "learning_rate": 0.001955069330189688, "loss": 2.153, "step": 379 }, { "epoch": 0.12, "learning_rate": 0.0019547569754535103, "loss": 2.2754, "step": 380 }, { "epoch": 0.12, "learning_rate": 0.001954443563868338, "loss": 2.2508, "step": 381 }, { "epoch": 0.12, "learning_rate": 0.001954129095781096, "loss": 2.2654, "step": 382 }, { "epoch": 0.12, "learning_rate": 0.0019538135715398783, "loss": 2.2628, "step": 383 }, { "epoch": 0.12, "learning_rate": 0.0019534969914939476, "loss": 2.2951, "step": 384 }, { "epoch": 0.13, "learning_rate": 0.001953179355993737, "loss": 2.2696, "step": 385 }, { "epoch": 0.13, "learning_rate": 0.0019528606653908451, "loss": 2.2702, "step": 386 }, { "epoch": 0.13, "learning_rate": 0.0019525409200380412, "loss": 2.245, "step": 387 }, { "epoch": 0.13, "learning_rate": 0.0019522201202892607, "loss": 2.2476, "step": 388 }, { "epoch": 0.13, "learning_rate": 0.0019518982664996063, "loss": 2.2282, "step": 389 }, { "epoch": 0.13, "learning_rate": 0.0019515753590253477, "loss": 2.2384, "step": 390 }, { "epoch": 0.13, "learning_rate": 0.0019512513982239206, "loss": 2.1096, "step": 391 }, { "epoch": 0.13, "learning_rate": 0.0019509263844539274, "loss": 2.2024, "step": 392 }, { "epoch": 0.13, "learning_rate": 0.0019506003180751348, "loss": 2.203, "step": 393 }, { "epoch": 0.13, "learning_rate": 0.001950273199448476, "loss": 2.244, "step": 394 }, { "epoch": 0.13, "learning_rate": 0.001949945028936048, "loss": 2.2105, "step": 395 }, { "epoch": 0.13, "learning_rate": 0.0019496158069011128, "loss": 2.2775, "step": 396 }, { "epoch": 0.13, "learning_rate": 0.0019492855337080962, "loss": 2.2232, "step": 397 }, { "epoch": 0.13, "learning_rate": 0.0019489542097225869, "loss": 2.2179, "step": 398 }, { "epoch": 0.13, "learning_rate": 0.0019486218353113377, "loss": 2.2082, "step": 399 }, { "epoch": 0.13, "learning_rate": 0.001948288410842264, "loss": 2.259, "step": 400 }, { "epoch": 0.13, "learning_rate": 0.0019479539366844426, "loss": 2.2092, "step": 401 }, { "epoch": 0.13, "learning_rate": 0.0019476184132081137, "loss": 2.2653, "step": 402 }, { "epoch": 0.13, "learning_rate": 0.0019472818407846777, "loss": 2.2636, "step": 403 }, { "epoch": 0.13, "learning_rate": 0.0019469442197866966, "loss": 2.2524, "step": 404 }, { "epoch": 0.13, "learning_rate": 0.001946605550587894, "loss": 2.2221, "step": 405 }, { "epoch": 0.13, "learning_rate": 0.001946265833563152, "loss": 2.1822, "step": 406 }, { "epoch": 0.13, "learning_rate": 0.001945925069088514, "loss": 2.1763, "step": 407 }, { "epoch": 0.13, "learning_rate": 0.0019455832575411823, "loss": 2.2288, "step": 408 }, { "epoch": 0.13, "learning_rate": 0.001945240399299518, "loss": 2.1712, "step": 409 }, { "epoch": 0.13, "learning_rate": 0.0019448964947430415, "loss": 2.2069, "step": 410 }, { "epoch": 0.13, "learning_rate": 0.0019445515442524308, "loss": 2.1561, "step": 411 }, { "epoch": 0.13, "learning_rate": 0.001944205548209522, "loss": 2.1803, "step": 412 }, { "epoch": 0.13, "learning_rate": 0.0019438585069973086, "loss": 2.1821, "step": 413 }, { "epoch": 0.13, "learning_rate": 0.0019435104209999405, "loss": 2.1603, "step": 414 }, { "epoch": 0.13, "learning_rate": 0.001943161290602725, "loss": 2.1016, "step": 415 }, { "epoch": 0.14, "learning_rate": 0.0019428111161921243, "loss": 2.1675, "step": 416 }, { "epoch": 0.14, "learning_rate": 0.0019424598981557573, "loss": 2.2184, "step": 417 }, { "epoch": 0.14, "learning_rate": 0.001942107636882398, "loss": 2.1275, "step": 418 }, { "epoch": 0.14, "learning_rate": 0.0019417543327619742, "loss": 2.1577, "step": 419 }, { "epoch": 0.14, "learning_rate": 0.0019413999861855693, "loss": 2.1846, "step": 420 }, { "epoch": 0.14, "learning_rate": 0.00194104459754542, "loss": 2.1391, "step": 421 }, { "epoch": 0.14, "learning_rate": 0.0019406881672349165, "loss": 2.1704, "step": 422 }, { "epoch": 0.14, "learning_rate": 0.0019403306956486024, "loss": 2.1646, "step": 423 }, { "epoch": 0.14, "learning_rate": 0.0019399721831821735, "loss": 2.1891, "step": 424 }, { "epoch": 0.14, "learning_rate": 0.0019396126302324776, "loss": 2.1448, "step": 425 }, { "epoch": 0.14, "learning_rate": 0.0019392520371975153, "loss": 2.1243, "step": 426 }, { "epoch": 0.14, "learning_rate": 0.0019388904044764373, "loss": 2.2401, "step": 427 }, { "epoch": 0.14, "learning_rate": 0.0019385277324695455, "loss": 2.1384, "step": 428 }, { "epoch": 0.14, "learning_rate": 0.001938164021578293, "loss": 2.1652, "step": 429 }, { "epoch": 0.14, "learning_rate": 0.001937799272205282, "loss": 2.0959, "step": 430 }, { "epoch": 0.14, "learning_rate": 0.001937433484754264, "loss": 2.0891, "step": 431 }, { "epoch": 0.14, "learning_rate": 0.0019370666596301405, "loss": 2.1236, "step": 432 }, { "epoch": 0.14, "learning_rate": 0.0019366987972389614, "loss": 2.2146, "step": 433 }, { "epoch": 0.14, "learning_rate": 0.001936329897987924, "loss": 2.154, "step": 434 }, { "epoch": 0.14, "learning_rate": 0.0019359599622853741, "loss": 2.1548, "step": 435 }, { "epoch": 0.14, "learning_rate": 0.0019355889905408046, "loss": 2.1639, "step": 436 }, { "epoch": 0.14, "learning_rate": 0.001935216983164855, "loss": 2.0704, "step": 437 }, { "epoch": 0.14, "learning_rate": 0.0019348439405693122, "loss": 2.1747, "step": 438 }, { "epoch": 0.14, "learning_rate": 0.0019344698631671069, "loss": 2.1035, "step": 439 }, { "epoch": 0.14, "learning_rate": 0.0019340947513723178, "loss": 2.1711, "step": 440 }, { "epoch": 0.14, "learning_rate": 0.0019337186056001662, "loss": 2.1059, "step": 441 }, { "epoch": 0.14, "learning_rate": 0.0019333414262670198, "loss": 2.1624, "step": 442 }, { "epoch": 0.14, "learning_rate": 0.0019329632137903895, "loss": 2.1371, "step": 443 }, { "epoch": 0.14, "learning_rate": 0.00193258396858893, "loss": 2.1443, "step": 444 }, { "epoch": 0.14, "learning_rate": 0.0019322036910824393, "loss": 2.1459, "step": 445 }, { "epoch": 0.14, "learning_rate": 0.0019318223816918575, "loss": 2.1783, "step": 446 }, { "epoch": 0.15, "learning_rate": 0.0019314400408392675, "loss": 2.1633, "step": 447 }, { "epoch": 0.15, "learning_rate": 0.0019310566689478938, "loss": 2.1449, "step": 448 }, { "epoch": 0.15, "learning_rate": 0.0019306722664421026, "loss": 2.0984, "step": 449 }, { "epoch": 0.15, "learning_rate": 0.0019302868337474, "loss": 2.1321, "step": 450 }, { "epoch": 0.15, "learning_rate": 0.001929900371290433, "loss": 2.15, "step": 451 }, { "epoch": 0.15, "learning_rate": 0.0019295128794989886, "loss": 2.2113, "step": 452 }, { "epoch": 0.15, "learning_rate": 0.0019291243588019933, "loss": 2.1699, "step": 453 }, { "epoch": 0.15, "learning_rate": 0.0019287348096295117, "loss": 2.1483, "step": 454 }, { "epoch": 0.15, "learning_rate": 0.0019283442324127477, "loss": 2.1784, "step": 455 }, { "epoch": 0.15, "learning_rate": 0.0019279526275840428, "loss": 2.1405, "step": 456 }, { "epoch": 0.15, "learning_rate": 0.001927559995576876, "loss": 2.1672, "step": 457 }, { "epoch": 0.15, "learning_rate": 0.0019271663368258635, "loss": 2.1635, "step": 458 }, { "epoch": 0.15, "learning_rate": 0.001926771651766758, "loss": 2.1517, "step": 459 }, { "epoch": 0.15, "learning_rate": 0.0019263759408364482, "loss": 2.1726, "step": 460 }, { "epoch": 0.15, "learning_rate": 0.001925979204472958, "loss": 2.1924, "step": 461 }, { "epoch": 0.15, "learning_rate": 0.001925581443115447, "loss": 2.0985, "step": 462 }, { "epoch": 0.15, "learning_rate": 0.0019251826572042093, "loss": 2.0917, "step": 463 }, { "epoch": 0.15, "learning_rate": 0.0019247828471806723, "loss": 2.1681, "step": 464 }, { "epoch": 0.15, "learning_rate": 0.0019243820134873984, "loss": 2.1743, "step": 465 }, { "epoch": 0.15, "learning_rate": 0.001923980156568082, "loss": 2.0513, "step": 466 }, { "epoch": 0.15, "learning_rate": 0.00192357727686755, "loss": 2.117, "step": 467 }, { "epoch": 0.15, "learning_rate": 0.001923173374831763, "loss": 2.1436, "step": 468 }, { "epoch": 0.15, "learning_rate": 0.0019227684509078116, "loss": 2.177, "step": 469 }, { "epoch": 0.15, "learning_rate": 0.0019223625055439185, "loss": 2.1096, "step": 470 }, { "epoch": 0.15, "learning_rate": 0.0019219555391894363, "loss": 2.1981, "step": 471 }, { "epoch": 0.15, "learning_rate": 0.0019215475522948485, "loss": 2.1588, "step": 472 }, { "epoch": 0.15, "learning_rate": 0.001921138545311768, "loss": 2.2012, "step": 473 }, { "epoch": 0.15, "learning_rate": 0.0019207285186929367, "loss": 2.1444, "step": 474 }, { "epoch": 0.15, "learning_rate": 0.0019203174728922253, "loss": 2.1795, "step": 475 }, { "epoch": 0.15, "learning_rate": 0.0019199054083646329, "loss": 2.1323, "step": 476 }, { "epoch": 0.15, "learning_rate": 0.001919492325566286, "loss": 2.1902, "step": 477 }, { "epoch": 0.16, "learning_rate": 0.0019190782249544375, "loss": 2.1342, "step": 478 }, { "epoch": 0.16, "learning_rate": 0.001918663106987469, "loss": 2.2216, "step": 479 }, { "epoch": 0.16, "learning_rate": 0.001918246972124886, "loss": 2.1363, "step": 480 }, { "epoch": 0.16, "learning_rate": 0.001917829820827321, "loss": 2.0614, "step": 481 }, { "epoch": 0.16, "learning_rate": 0.001917411653556531, "loss": 2.1501, "step": 482 }, { "epoch": 0.16, "learning_rate": 0.0019169924707753982, "loss": 2.128, "step": 483 }, { "epoch": 0.16, "learning_rate": 0.0019165722729479284, "loss": 2.1603, "step": 484 }, { "epoch": 0.16, "learning_rate": 0.0019161510605392512, "loss": 2.079, "step": 485 }, { "epoch": 0.16, "learning_rate": 0.0019157288340156185, "loss": 2.0859, "step": 486 }, { "epoch": 0.16, "learning_rate": 0.0019153055938444061, "loss": 2.1403, "step": 487 }, { "epoch": 0.16, "learning_rate": 0.0019148813404941114, "loss": 2.0705, "step": 488 }, { "epoch": 0.16, "learning_rate": 0.0019144560744343525, "loss": 2.1252, "step": 489 }, { "epoch": 0.16, "learning_rate": 0.0019140297961358693, "loss": 2.1532, "step": 490 }, { "epoch": 0.16, "learning_rate": 0.001913602506070522, "loss": 2.1791, "step": 491 }, { "epoch": 0.16, "learning_rate": 0.0019131742047112908, "loss": 2.1478, "step": 492 }, { "epoch": 0.16, "learning_rate": 0.0019127448925322752, "loss": 2.1109, "step": 493 }, { "epoch": 0.16, "learning_rate": 0.0019123145700086935, "loss": 2.191, "step": 494 }, { "epoch": 0.16, "learning_rate": 0.001911883237616883, "loss": 2.1955, "step": 495 }, { "epoch": 0.16, "learning_rate": 0.0019114508958342977, "loss": 2.1231, "step": 496 }, { "epoch": 0.16, "learning_rate": 0.0019110175451395107, "loss": 2.1253, "step": 497 }, { "epoch": 0.16, "learning_rate": 0.0019105831860122095, "loss": 2.1387, "step": 498 }, { "epoch": 0.16, "learning_rate": 0.0019101478189332002, "loss": 2.1932, "step": 499 }, { "epoch": 0.16, "learning_rate": 0.0019097114443844033, "loss": 2.1489, "step": 500 }, { "epoch": 0.16, "learning_rate": 0.0019092740628488545, "loss": 2.1627, "step": 501 }, { "epoch": 0.16, "learning_rate": 0.0019088356748107047, "loss": 2.0619, "step": 502 }, { "epoch": 0.16, "learning_rate": 0.001908396280755219, "loss": 2.1345, "step": 503 }, { "epoch": 0.16, "learning_rate": 0.0019079558811687751, "loss": 2.1053, "step": 504 }, { "epoch": 0.16, "learning_rate": 0.0019075144765388654, "loss": 2.1282, "step": 505 }, { "epoch": 0.16, "learning_rate": 0.0019070720673540933, "loss": 2.1785, "step": 506 }, { "epoch": 0.16, "learning_rate": 0.0019066286541041743, "loss": 2.1153, "step": 507 }, { "epoch": 0.16, "learning_rate": 0.0019061842372799368, "loss": 2.0267, "step": 508 }, { "epoch": 0.17, "learning_rate": 0.0019057388173733183, "loss": 2.1919, "step": 509 }, { "epoch": 0.17, "learning_rate": 0.0019052923948773674, "loss": 2.1288, "step": 510 }, { "epoch": 0.17, "learning_rate": 0.0019048449702862428, "loss": 2.1042, "step": 511 }, { "epoch": 0.17, "learning_rate": 0.0019043965440952123, "loss": 2.1246, "step": 512 }, { "epoch": 0.17, "learning_rate": 0.0019039471168006514, "loss": 2.0712, "step": 513 }, { "epoch": 0.17, "learning_rate": 0.0019034966889000455, "loss": 2.1907, "step": 514 }, { "epoch": 0.17, "learning_rate": 0.0019030452608919859, "loss": 2.1392, "step": 515 }, { "epoch": 0.17, "learning_rate": 0.0019025928332761727, "loss": 2.1789, "step": 516 }, { "epoch": 0.17, "learning_rate": 0.0019021394065534105, "loss": 2.126, "step": 517 }, { "epoch": 0.17, "learning_rate": 0.0019016849812256117, "loss": 2.1303, "step": 518 }, { "epoch": 0.17, "learning_rate": 0.0019012295577957928, "loss": 2.1111, "step": 519 }, { "epoch": 0.17, "learning_rate": 0.0019007731367680755, "loss": 2.2, "step": 520 }, { "epoch": 0.17, "learning_rate": 0.0019003157186476864, "loss": 2.0795, "step": 521 }, { "epoch": 0.17, "learning_rate": 0.0018998573039409547, "loss": 2.1403, "step": 522 }, { "epoch": 0.17, "learning_rate": 0.0018993978931553137, "loss": 2.0947, "step": 523 }, { "epoch": 0.17, "learning_rate": 0.0018989374867992986, "loss": 2.0896, "step": 524 }, { "epoch": 0.17, "learning_rate": 0.0018984760853825472, "loss": 2.123, "step": 525 }, { "epoch": 0.17, "learning_rate": 0.0018980136894157984, "loss": 2.0766, "step": 526 }, { "epoch": 0.17, "learning_rate": 0.001897550299410892, "loss": 2.1781, "step": 527 }, { "epoch": 0.17, "learning_rate": 0.0018970859158807685, "loss": 2.0749, "step": 528 }, { "epoch": 0.17, "learning_rate": 0.0018966205393394677, "loss": 1.9679, "step": 529 }, { "epoch": 0.17, "learning_rate": 0.0018961541703021287, "loss": 2.0662, "step": 530 }, { "epoch": 0.17, "learning_rate": 0.0018956868092849894, "loss": 2.0647, "step": 531 }, { "epoch": 0.17, "learning_rate": 0.0018952184568053858, "loss": 2.1379, "step": 532 }, { "epoch": 0.17, "learning_rate": 0.001894749113381751, "loss": 2.116, "step": 533 }, { "epoch": 0.17, "learning_rate": 0.0018942787795336153, "loss": 1.9847, "step": 534 }, { "epoch": 0.17, "learning_rate": 0.0018938074557816056, "loss": 2.1033, "step": 535 }, { "epoch": 0.17, "learning_rate": 0.0018933351426474439, "loss": 2.0621, "step": 536 }, { "epoch": 0.17, "learning_rate": 0.0018928618406539477, "loss": 2.1301, "step": 537 }, { "epoch": 0.17, "learning_rate": 0.001892387550325029, "loss": 2.0771, "step": 538 }, { "epoch": 0.18, "learning_rate": 0.0018919122721856943, "loss": 2.0607, "step": 539 }, { "epoch": 0.18, "learning_rate": 0.001891436006762043, "loss": 1.9934, "step": 540 }, { "epoch": 0.18, "learning_rate": 0.0018909587545812671, "loss": 2.1134, "step": 541 }, { "epoch": 0.18, "learning_rate": 0.0018904805161716522, "loss": 2.1036, "step": 542 }, { "epoch": 0.18, "learning_rate": 0.0018900012920625737, "loss": 2.1144, "step": 543 }, { "epoch": 0.18, "learning_rate": 0.0018895210827844997, "loss": 2.0211, "step": 544 }, { "epoch": 0.18, "learning_rate": 0.0018890398888689879, "loss": 2.0266, "step": 545 }, { "epoch": 0.18, "learning_rate": 0.0018885577108486862, "loss": 2.0803, "step": 546 }, { "epoch": 0.18, "learning_rate": 0.0018880745492573321, "loss": 2.0821, "step": 547 }, { "epoch": 0.18, "learning_rate": 0.0018875904046297513, "loss": 2.178, "step": 548 }, { "epoch": 0.18, "learning_rate": 0.0018871052775018581, "loss": 2.0534, "step": 549 }, { "epoch": 0.18, "learning_rate": 0.001886619168410654, "loss": 2.0869, "step": 550 }, { "epoch": 0.18, "learning_rate": 0.0018861320778942282, "loss": 2.0644, "step": 551 }, { "epoch": 0.18, "learning_rate": 0.001885644006491755, "loss": 2.1137, "step": 552 }, { "epoch": 0.18, "learning_rate": 0.0018851549547434958, "loss": 2.0914, "step": 553 }, { "epoch": 0.18, "learning_rate": 0.0018846649231907962, "loss": 1.9875, "step": 554 }, { "epoch": 0.18, "learning_rate": 0.0018841739123760868, "loss": 2.1565, "step": 555 }, { "epoch": 0.18, "learning_rate": 0.0018836819228428823, "loss": 2.1218, "step": 556 }, { "epoch": 0.18, "learning_rate": 0.0018831889551357804, "loss": 2.0831, "step": 557 }, { "epoch": 0.18, "learning_rate": 0.001882695009800462, "loss": 2.1312, "step": 558 }, { "epoch": 0.18, "learning_rate": 0.0018822000873836898, "loss": 2.0955, "step": 559 }, { "epoch": 0.18, "learning_rate": 0.0018817041884333081, "loss": 2.095, "step": 560 }, { "epoch": 0.18, "learning_rate": 0.0018812073134982425, "loss": 2.1001, "step": 561 }, { "epoch": 0.18, "learning_rate": 0.0018807094631284986, "loss": 2.036, "step": 562 }, { "epoch": 0.18, "learning_rate": 0.0018802106378751615, "loss": 2.0238, "step": 563 }, { "epoch": 0.18, "learning_rate": 0.0018797108382903964, "loss": 2.122, "step": 564 }, { "epoch": 0.18, "learning_rate": 0.0018792100649274457, "loss": 2.0612, "step": 565 }, { "epoch": 0.18, "learning_rate": 0.001878708318340631, "loss": 2.0584, "step": 566 }, { "epoch": 0.18, "learning_rate": 0.00187820559908535, "loss": 2.1101, "step": 567 }, { "epoch": 0.18, "learning_rate": 0.0018777019077180783, "loss": 2.0248, "step": 568 }, { "epoch": 0.18, "learning_rate": 0.0018771972447963665, "loss": 2.0237, "step": 569 }, { "epoch": 0.19, "learning_rate": 0.001876691610878841, "loss": 2.0771, "step": 570 }, { "epoch": 0.19, "learning_rate": 0.0018761850065252032, "loss": 2.0118, "step": 571 }, { "epoch": 0.19, "learning_rate": 0.0018756774322962282, "loss": 2.1014, "step": 572 }, { "epoch": 0.19, "learning_rate": 0.0018751688887537657, "loss": 2.1533, "step": 573 }, { "epoch": 0.19, "learning_rate": 0.0018746593764607377, "loss": 2.0737, "step": 574 }, { "epoch": 0.19, "learning_rate": 0.001874148895981138, "loss": 2.0823, "step": 575 }, { "epoch": 0.19, "learning_rate": 0.001873637447880033, "loss": 2.1064, "step": 576 }, { "epoch": 0.19, "learning_rate": 0.0018731250327235598, "loss": 2.061, "step": 577 }, { "epoch": 0.19, "learning_rate": 0.0018726116510789264, "loss": 1.9748, "step": 578 }, { "epoch": 0.19, "learning_rate": 0.0018720973035144099, "loss": 2.0707, "step": 579 }, { "epoch": 0.19, "learning_rate": 0.0018715819905993566, "loss": 2.1104, "step": 580 }, { "epoch": 0.19, "learning_rate": 0.0018710657129041826, "loss": 1.9724, "step": 581 }, { "epoch": 0.19, "learning_rate": 0.0018705484710003705, "loss": 2.0259, "step": 582 }, { "epoch": 0.19, "learning_rate": 0.001870030265460471, "loss": 2.1052, "step": 583 }, { "epoch": 0.19, "learning_rate": 0.0018695110968581013, "loss": 2.135, "step": 584 }, { "epoch": 0.19, "learning_rate": 0.0018689909657679444, "loss": 2.0598, "step": 585 }, { "epoch": 0.19, "learning_rate": 0.0018684698727657494, "loss": 2.0408, "step": 586 }, { "epoch": 0.19, "learning_rate": 0.0018679478184283292, "loss": 2.0174, "step": 587 }, { "epoch": 0.19, "learning_rate": 0.0018674248033335617, "loss": 2.0058, "step": 588 }, { "epoch": 0.19, "learning_rate": 0.0018669008280603877, "loss": 2.0425, "step": 589 }, { "epoch": 0.19, "learning_rate": 0.001866375893188811, "loss": 2.114, "step": 590 }, { "epoch": 0.19, "learning_rate": 0.0018658499992998978, "loss": 2.0742, "step": 591 }, { "epoch": 0.19, "learning_rate": 0.0018653231469757756, "loss": 1.9674, "step": 592 }, { "epoch": 0.19, "learning_rate": 0.0018647953367996336, "loss": 2.0053, "step": 593 }, { "epoch": 0.19, "learning_rate": 0.0018642665693557197, "loss": 2.1688, "step": 594 }, { "epoch": 0.19, "learning_rate": 0.0018637368452293428, "loss": 2.0674, "step": 595 }, { "epoch": 0.19, "learning_rate": 0.0018632061650068709, "loss": 2.0846, "step": 596 }, { "epoch": 0.19, "learning_rate": 0.0018626745292757286, "loss": 2.0896, "step": 597 }, { "epoch": 0.19, "learning_rate": 0.0018621419386244003, "loss": 1.9572, "step": 598 }, { "epoch": 0.19, "learning_rate": 0.0018616083936424262, "loss": 2.1027, "step": 599 }, { "epoch": 0.19, "learning_rate": 0.001861073894920403, "loss": 2.0241, "step": 600 }, { "epoch": 0.2, "learning_rate": 0.0018605384430499833, "loss": 2.0724, "step": 601 }, { "epoch": 0.2, "learning_rate": 0.0018600020386238753, "loss": 1.9851, "step": 602 }, { "epoch": 0.2, "learning_rate": 0.0018594646822358402, "loss": 2.0904, "step": 603 }, { "epoch": 0.2, "learning_rate": 0.0018589263744806948, "loss": 2.0285, "step": 604 }, { "epoch": 0.2, "learning_rate": 0.0018583871159543071, "loss": 1.9982, "step": 605 }, { "epoch": 0.2, "learning_rate": 0.001857846907253599, "loss": 2.0522, "step": 606 }, { "epoch": 0.2, "learning_rate": 0.0018573057489765436, "loss": 2.0341, "step": 607 }, { "epoch": 0.2, "learning_rate": 0.0018567636417221652, "loss": 2.0398, "step": 608 }, { "epoch": 0.2, "learning_rate": 0.0018562205860905379, "loss": 2.1015, "step": 609 }, { "epoch": 0.2, "learning_rate": 0.0018556765826827871, "loss": 2.0336, "step": 610 }, { "epoch": 0.2, "learning_rate": 0.0018551316321010858, "loss": 2.0453, "step": 611 }, { "epoch": 0.2, "learning_rate": 0.0018545857349486564, "loss": 2.1177, "step": 612 }, { "epoch": 0.2, "learning_rate": 0.0018540388918297681, "loss": 2.0428, "step": 613 }, { "epoch": 0.2, "learning_rate": 0.0018534911033497386, "loss": 2.04, "step": 614 }, { "epoch": 0.2, "learning_rate": 0.0018529423701149313, "loss": 2.089, "step": 615 }, { "epoch": 0.2, "learning_rate": 0.0018523926927327549, "loss": 2.0528, "step": 616 }, { "epoch": 0.2, "learning_rate": 0.001851842071811664, "loss": 2.1072, "step": 617 }, { "epoch": 0.2, "learning_rate": 0.0018512905079611573, "loss": 2.1288, "step": 618 }, { "epoch": 0.2, "learning_rate": 0.0018507380017917772, "loss": 2.0465, "step": 619 }, { "epoch": 0.2, "learning_rate": 0.0018501845539151097, "loss": 2.0475, "step": 620 }, { "epoch": 0.2, "learning_rate": 0.001849630164943782, "loss": 2.0834, "step": 621 }, { "epoch": 0.2, "learning_rate": 0.0018490748354914647, "loss": 2.0578, "step": 622 }, { "epoch": 0.2, "learning_rate": 0.0018485185661728677, "loss": 2.0583, "step": 623 }, { "epoch": 0.2, "learning_rate": 0.0018479613576037426, "loss": 2.0998, "step": 624 }, { "epoch": 0.2, "learning_rate": 0.00184740321040088, "loss": 2.0542, "step": 625 }, { "epoch": 0.2, "learning_rate": 0.00184684412518211, "loss": 2.0458, "step": 626 }, { "epoch": 0.2, "learning_rate": 0.0018462841025663002, "loss": 2.0536, "step": 627 }, { "epoch": 0.2, "learning_rate": 0.001845723143173357, "loss": 1.9684, "step": 628 }, { "epoch": 0.2, "learning_rate": 0.0018451612476242225, "loss": 2.0439, "step": 629 }, { "epoch": 0.2, "learning_rate": 0.001844598416540876, "loss": 2.0858, "step": 630 }, { "epoch": 0.2, "learning_rate": 0.0018440346505463321, "loss": 2.0949, "step": 631 }, { "epoch": 0.21, "learning_rate": 0.0018434699502646397, "loss": 2.0807, "step": 632 }, { "epoch": 0.21, "learning_rate": 0.001842904316320883, "loss": 2.1028, "step": 633 }, { "epoch": 0.21, "learning_rate": 0.0018423377493411788, "loss": 2.1203, "step": 634 }, { "epoch": 0.21, "learning_rate": 0.001841770249952677, "loss": 2.0576, "step": 635 }, { "epoch": 0.21, "learning_rate": 0.0018412018187835596, "loss": 2.0445, "step": 636 }, { "epoch": 0.21, "learning_rate": 0.0018406324564630401, "loss": 2.0403, "step": 637 }, { "epoch": 0.21, "learning_rate": 0.0018400621636213625, "loss": 2.0406, "step": 638 }, { "epoch": 0.21, "learning_rate": 0.0018394909408898012, "loss": 2.075, "step": 639 }, { "epoch": 0.21, "learning_rate": 0.0018389187889006596, "loss": 2.0461, "step": 640 }, { "epoch": 0.21, "learning_rate": 0.0018383457082872699, "loss": 2.1189, "step": 641 }, { "epoch": 0.21, "learning_rate": 0.001837771699683992, "loss": 2.0012, "step": 642 }, { "epoch": 0.21, "learning_rate": 0.001837196763726213, "loss": 2.0922, "step": 643 }, { "epoch": 0.21, "learning_rate": 0.001836620901050347, "loss": 2.0066, "step": 644 }, { "epoch": 0.21, "learning_rate": 0.0018360441122938333, "loss": 2.027, "step": 645 }, { "epoch": 0.21, "learning_rate": 0.001835466398095137, "loss": 1.9155, "step": 646 }, { "epoch": 0.21, "learning_rate": 0.0018348877590937467, "loss": 2.0399, "step": 647 }, { "epoch": 0.21, "learning_rate": 0.0018343081959301757, "loss": 2.1007, "step": 648 }, { "epoch": 0.21, "learning_rate": 0.0018337277092459591, "loss": 2.0416, "step": 649 }, { "epoch": 0.21, "learning_rate": 0.0018331462996836554, "loss": 2.0681, "step": 650 }, { "epoch": 0.21, "learning_rate": 0.0018325639678868444, "loss": 2.0375, "step": 651 }, { "epoch": 0.21, "learning_rate": 0.001831980714500126, "loss": 2.0371, "step": 652 }, { "epoch": 0.21, "learning_rate": 0.0018313965401691213, "loss": 2.1078, "step": 653 }, { "epoch": 0.21, "learning_rate": 0.0018308114455404696, "loss": 2.0314, "step": 654 }, { "epoch": 0.21, "learning_rate": 0.0018302254312618303, "loss": 2.0954, "step": 655 }, { "epoch": 0.21, "learning_rate": 0.0018296384979818799, "loss": 2.0807, "step": 656 }, { "epoch": 0.21, "learning_rate": 0.0018290506463503125, "loss": 2.0138, "step": 657 }, { "epoch": 0.21, "learning_rate": 0.0018284618770178383, "loss": 2.0349, "step": 658 }, { "epoch": 0.21, "learning_rate": 0.0018278721906361834, "loss": 2.0406, "step": 659 }, { "epoch": 0.21, "learning_rate": 0.0018272815878580902, "loss": 2.0662, "step": 660 }, { "epoch": 0.21, "learning_rate": 0.0018266900693373138, "loss": 2.0533, "step": 661 }, { "epoch": 0.22, "learning_rate": 0.0018260976357286239, "loss": 2.0415, "step": 662 }, { "epoch": 0.22, "learning_rate": 0.0018255042876878028, "loss": 2.0553, "step": 663 }, { "epoch": 0.22, "learning_rate": 0.0018249100258716454, "loss": 2.0603, "step": 664 }, { "epoch": 0.22, "learning_rate": 0.0018243148509379577, "loss": 2.0532, "step": 665 }, { "epoch": 0.22, "learning_rate": 0.0018237187635455566, "loss": 2.048, "step": 666 }, { "epoch": 0.22, "learning_rate": 0.0018231217643542691, "loss": 2.0076, "step": 667 }, { "epoch": 0.22, "learning_rate": 0.0018225238540249317, "loss": 2.0231, "step": 668 }, { "epoch": 0.22, "learning_rate": 0.0018219250332193885, "loss": 2.0706, "step": 669 }, { "epoch": 0.22, "learning_rate": 0.0018213253026004926, "loss": 2.0822, "step": 670 }, { "epoch": 0.22, "learning_rate": 0.0018207246628321038, "loss": 2.0346, "step": 671 }, { "epoch": 0.22, "learning_rate": 0.0018201231145790879, "loss": 2.0709, "step": 672 }, { "epoch": 0.22, "learning_rate": 0.0018195206585073164, "loss": 2.0654, "step": 673 }, { "epoch": 0.22, "learning_rate": 0.0018189172952836668, "loss": 1.9906, "step": 674 }, { "epoch": 0.22, "learning_rate": 0.001818313025576019, "loss": 2.0052, "step": 675 }, { "epoch": 0.22, "learning_rate": 0.0018177078500532574, "loss": 2.0532, "step": 676 }, { "epoch": 0.22, "learning_rate": 0.001817101769385269, "loss": 2.1184, "step": 677 }, { "epoch": 0.22, "learning_rate": 0.0018164947842429426, "loss": 2.033, "step": 678 }, { "epoch": 0.22, "learning_rate": 0.001815886895298168, "loss": 2.0501, "step": 679 }, { "epoch": 0.22, "learning_rate": 0.001815278103223836, "loss": 2.0147, "step": 680 }, { "epoch": 0.22, "learning_rate": 0.001814668408693836, "loss": 2.0444, "step": 681 }, { "epoch": 0.22, "learning_rate": 0.0018140578123830577, "loss": 2.1034, "step": 682 }, { "epoch": 0.22, "learning_rate": 0.0018134463149673881, "loss": 1.9986, "step": 683 }, { "epoch": 0.22, "learning_rate": 0.001812833917123712, "loss": 2.0067, "step": 684 }, { "epoch": 0.22, "learning_rate": 0.001812220619529911, "loss": 2.0035, "step": 685 }, { "epoch": 0.22, "learning_rate": 0.0018116064228648622, "loss": 2.0901, "step": 686 }, { "epoch": 0.22, "learning_rate": 0.0018109913278084387, "loss": 2.0863, "step": 687 }, { "epoch": 0.22, "learning_rate": 0.001810375335041507, "loss": 2.0204, "step": 688 }, { "epoch": 0.22, "learning_rate": 0.001809758445245928, "loss": 2.0573, "step": 689 }, { "epoch": 0.22, "learning_rate": 0.001809140659104556, "loss": 2.1379, "step": 690 }, { "epoch": 0.22, "learning_rate": 0.0018085219773012365, "loss": 2.0386, "step": 691 }, { "epoch": 0.22, "learning_rate": 0.0018079024005208067, "loss": 1.9666, "step": 692 }, { "epoch": 0.23, "learning_rate": 0.0018072819294490947, "loss": 2.1035, "step": 693 }, { "epoch": 0.23, "learning_rate": 0.0018066605647729188, "loss": 2.0549, "step": 694 }, { "epoch": 0.23, "learning_rate": 0.0018060383071800859, "loss": 2.0504, "step": 695 }, { "epoch": 0.23, "learning_rate": 0.0018054151573593912, "loss": 2.1283, "step": 696 }, { "epoch": 0.23, "learning_rate": 0.0018047911160006182, "loss": 2.0588, "step": 697 }, { "epoch": 0.23, "learning_rate": 0.001804166183794537, "loss": 2.1567, "step": 698 }, { "epoch": 0.23, "learning_rate": 0.001803540361432903, "loss": 2.0114, "step": 699 }, { "epoch": 0.23, "learning_rate": 0.0018029136496084583, "loss": 2.072, "step": 700 }, { "epoch": 0.23, "learning_rate": 0.001802286049014929, "loss": 2.0697, "step": 701 }, { "epoch": 0.23, "learning_rate": 0.001801657560347025, "loss": 2.1085, "step": 702 }, { "epoch": 0.23, "learning_rate": 0.0018010281843004385, "loss": 2.1032, "step": 703 }, { "epoch": 0.23, "learning_rate": 0.0018003979215718451, "loss": 2.096, "step": 704 }, { "epoch": 0.23, "learning_rate": 0.0017997667728589014, "loss": 2.1098, "step": 705 }, { "epoch": 0.23, "learning_rate": 0.0017991347388602447, "loss": 2.11, "step": 706 }, { "epoch": 0.23, "learning_rate": 0.0017985018202754922, "loss": 1.9995, "step": 707 }, { "epoch": 0.23, "learning_rate": 0.0017978680178052403, "loss": 1.9648, "step": 708 }, { "epoch": 0.23, "learning_rate": 0.0017972333321510643, "loss": 2.0677, "step": 709 }, { "epoch": 0.23, "learning_rate": 0.0017965977640155165, "loss": 2.0654, "step": 710 }, { "epoch": 0.23, "learning_rate": 0.001795961314102126, "loss": 2.109, "step": 711 }, { "epoch": 0.23, "learning_rate": 0.0017953239831153987, "loss": 2.0635, "step": 712 }, { "epoch": 0.23, "learning_rate": 0.0017946857717608144, "loss": 2.0827, "step": 713 }, { "epoch": 0.23, "learning_rate": 0.0017940466807448293, "loss": 1.9763, "step": 714 }, { "epoch": 0.23, "learning_rate": 0.001793406710774872, "loss": 2.0237, "step": 715 }, { "epoch": 0.23, "learning_rate": 0.0017927658625593438, "loss": 2.0455, "step": 716 }, { "epoch": 0.23, "learning_rate": 0.0017921241368076195, "loss": 2.0798, "step": 717 }, { "epoch": 0.23, "learning_rate": 0.001791481534230044, "loss": 2.0623, "step": 718 }, { "epoch": 0.23, "learning_rate": 0.0017908380555379337, "loss": 2.0078, "step": 719 }, { "epoch": 0.23, "learning_rate": 0.0017901937014435738, "loss": 2.0683, "step": 720 }, { "epoch": 0.23, "learning_rate": 0.0017895484726602195, "loss": 2.0991, "step": 721 }, { "epoch": 0.23, "learning_rate": 0.0017889023699020937, "loss": 2.0288, "step": 722 }, { "epoch": 0.23, "learning_rate": 0.0017882553938843868, "loss": 2.0175, "step": 723 }, { "epoch": 0.24, "learning_rate": 0.001787607545323256, "loss": 2.0567, "step": 724 }, { "epoch": 0.24, "learning_rate": 0.0017869588249358242, "loss": 2.0658, "step": 725 }, { "epoch": 0.24, "learning_rate": 0.001786309233440179, "loss": 2.023, "step": 726 }, { "epoch": 0.24, "learning_rate": 0.001785658771555373, "loss": 1.9744, "step": 727 }, { "epoch": 0.24, "learning_rate": 0.0017850074400014217, "loss": 2.0452, "step": 728 }, { "epoch": 0.24, "learning_rate": 0.0017843552394993035, "loss": 2.0626, "step": 729 }, { "epoch": 0.24, "learning_rate": 0.0017837021707709586, "loss": 2.0739, "step": 730 }, { "epoch": 0.24, "learning_rate": 0.0017830482345392881, "loss": 2.089, "step": 731 }, { "epoch": 0.24, "learning_rate": 0.001782393431528154, "loss": 2.0068, "step": 732 }, { "epoch": 0.24, "learning_rate": 0.0017817377624623767, "loss": 2.1089, "step": 733 }, { "epoch": 0.24, "learning_rate": 0.0017810812280677365, "loss": 2.0301, "step": 734 }, { "epoch": 0.24, "learning_rate": 0.0017804238290709702, "loss": 2.1516, "step": 735 }, { "epoch": 0.24, "learning_rate": 0.0017797655661997728, "loss": 2.0661, "step": 736 }, { "epoch": 0.24, "learning_rate": 0.0017791064401827953, "loss": 2.1082, "step": 737 }, { "epoch": 0.24, "learning_rate": 0.0017784464517496435, "loss": 2.0286, "step": 738 }, { "epoch": 0.24, "learning_rate": 0.0017777856016308786, "loss": 2.0662, "step": 739 }, { "epoch": 0.24, "learning_rate": 0.0017771238905580154, "loss": 2.086, "step": 740 }, { "epoch": 0.24, "learning_rate": 0.001776461319263521, "loss": 1.9633, "step": 741 }, { "epoch": 0.24, "learning_rate": 0.0017757978884808164, "loss": 2.0705, "step": 742 }, { "epoch": 0.24, "learning_rate": 0.001775133598944272, "loss": 2.0439, "step": 743 }, { "epoch": 0.24, "learning_rate": 0.0017744684513892103, "loss": 2.1094, "step": 744 }, { "epoch": 0.24, "learning_rate": 0.0017738024465519025, "loss": 2.0532, "step": 745 }, { "epoch": 0.24, "learning_rate": 0.0017731355851695696, "loss": 2.0011, "step": 746 }, { "epoch": 0.24, "learning_rate": 0.0017724678679803803, "loss": 2.0671, "step": 747 }, { "epoch": 0.24, "learning_rate": 0.0017717992957234506, "loss": 2.0308, "step": 748 }, { "epoch": 0.24, "learning_rate": 0.0017711298691388431, "loss": 1.9415, "step": 749 }, { "epoch": 0.24, "learning_rate": 0.0017704595889675662, "loss": 2.0606, "step": 750 }, { "epoch": 0.24, "learning_rate": 0.001769788455951573, "loss": 2.0442, "step": 751 }, { "epoch": 0.24, "learning_rate": 0.0017691164708337603, "loss": 2.0792, "step": 752 }, { "epoch": 0.24, "learning_rate": 0.0017684436343579691, "loss": 2.0979, "step": 753 }, { "epoch": 0.24, "learning_rate": 0.0017677699472689816, "loss": 2.0941, "step": 754 }, { "epoch": 0.25, "learning_rate": 0.0017670954103125225, "loss": 2.1527, "step": 755 }, { "epoch": 0.25, "learning_rate": 0.0017664200242352571, "loss": 2.0311, "step": 756 }, { "epoch": 0.25, "learning_rate": 0.0017657437897847896, "loss": 2.0873, "step": 757 }, { "epoch": 0.25, "learning_rate": 0.0017650667077096652, "loss": 1.9802, "step": 758 }, { "epoch": 0.25, "learning_rate": 0.0017643887787593654, "loss": 2.0394, "step": 759 }, { "epoch": 0.25, "learning_rate": 0.00176371000368431, "loss": 2.0801, "step": 760 }, { "epoch": 0.25, "learning_rate": 0.0017630303832358559, "loss": 1.9778, "step": 761 }, { "epoch": 0.25, "learning_rate": 0.001762349918166295, "loss": 2.0216, "step": 762 }, { "epoch": 0.25, "learning_rate": 0.0017616686092288543, "loss": 2.0338, "step": 763 }, { "epoch": 0.25, "learning_rate": 0.0017609864571776952, "loss": 2.0074, "step": 764 }, { "epoch": 0.25, "learning_rate": 0.001760303462767912, "loss": 2.0378, "step": 765 }, { "epoch": 0.25, "learning_rate": 0.0017596196267555318, "loss": 1.9877, "step": 766 }, { "epoch": 0.25, "learning_rate": 0.0017589349498975128, "loss": 2.0811, "step": 767 }, { "epoch": 0.25, "learning_rate": 0.0017582494329517445, "loss": 2.0613, "step": 768 }, { "epoch": 0.25, "learning_rate": 0.0017575630766770458, "loss": 2.0206, "step": 769 }, { "epoch": 0.25, "learning_rate": 0.0017568758818331657, "loss": 2.0058, "step": 770 }, { "epoch": 0.25, "learning_rate": 0.0017561878491807797, "loss": 2.1282, "step": 771 }, { "epoch": 0.25, "learning_rate": 0.001755498979481492, "loss": 1.9862, "step": 772 }, { "epoch": 0.25, "learning_rate": 0.0017548092734978333, "loss": 2.0444, "step": 773 }, { "epoch": 0.25, "learning_rate": 0.0017541187319932594, "loss": 2.1242, "step": 774 }, { "epoch": 0.25, "learning_rate": 0.0017534273557321515, "loss": 2.0656, "step": 775 }, { "epoch": 0.25, "learning_rate": 0.0017527351454798149, "loss": 2.0131, "step": 776 }, { "epoch": 0.25, "learning_rate": 0.0017520421020024773, "loss": 1.985, "step": 777 }, { "epoch": 0.25, "learning_rate": 0.0017513482260672895, "loss": 2.0947, "step": 778 }, { "epoch": 0.25, "learning_rate": 0.0017506535184423232, "loss": 2.0473, "step": 779 }, { "epoch": 0.25, "learning_rate": 0.0017499579798965713, "loss": 2.0392, "step": 780 }, { "epoch": 0.25, "learning_rate": 0.001749261611199946, "loss": 2.0844, "step": 781 }, { "epoch": 0.25, "learning_rate": 0.0017485644131232786, "loss": 2.0488, "step": 782 }, { "epoch": 0.25, "learning_rate": 0.0017478663864383185, "loss": 2.0629, "step": 783 }, { "epoch": 0.25, "learning_rate": 0.0017471675319177323, "loss": 2.0638, "step": 784 }, { "epoch": 0.25, "learning_rate": 0.0017464678503351031, "loss": 2.0639, "step": 785 }, { "epoch": 0.26, "learning_rate": 0.001745767342464929, "loss": 2.067, "step": 786 }, { "epoch": 0.26, "learning_rate": 0.0017450660090826233, "loss": 2.0737, "step": 787 }, { "epoch": 0.26, "learning_rate": 0.0017443638509645127, "loss": 2.0523, "step": 788 }, { "epoch": 0.26, "learning_rate": 0.0017436608688878373, "loss": 2.0402, "step": 789 }, { "epoch": 0.26, "learning_rate": 0.0017429570636307483, "loss": 1.9324, "step": 790 }, { "epoch": 0.26, "learning_rate": 0.0017422524359723096, "loss": 2.0161, "step": 791 }, { "epoch": 0.26, "learning_rate": 0.0017415469866924938, "loss": 2.0163, "step": 792 }, { "epoch": 0.26, "learning_rate": 0.0017408407165721842, "loss": 2.0858, "step": 793 }, { "epoch": 0.26, "learning_rate": 0.0017401336263931722, "loss": 2.0924, "step": 794 }, { "epoch": 0.26, "learning_rate": 0.0017394257169381568, "loss": 2.0344, "step": 795 }, { "epoch": 0.26, "learning_rate": 0.0017387169889907445, "loss": 2.1021, "step": 796 }, { "epoch": 0.26, "learning_rate": 0.001738007443335447, "loss": 1.9772, "step": 797 }, { "epoch": 0.26, "learning_rate": 0.0017372970807576823, "loss": 2.0394, "step": 798 }, { "epoch": 0.26, "learning_rate": 0.0017365859020437712, "loss": 2.0795, "step": 799 }, { "epoch": 0.26, "learning_rate": 0.0017358739079809387, "loss": 2.0934, "step": 800 }, { "epoch": 0.26, "learning_rate": 0.0017351610993573123, "loss": 2.0364, "step": 801 }, { "epoch": 0.26, "learning_rate": 0.0017344474769619219, "loss": 2.023, "step": 802 }, { "epoch": 0.26, "learning_rate": 0.0017337330415846963, "loss": 2.1036, "step": 803 }, { "epoch": 0.26, "learning_rate": 0.0017330177940164662, "loss": 2.0484, "step": 804 }, { "epoch": 0.26, "learning_rate": 0.00173230173504896, "loss": 2.0979, "step": 805 }, { "epoch": 0.26, "learning_rate": 0.0017315848654748049, "loss": 2.0145, "step": 806 }, { "epoch": 0.26, "learning_rate": 0.001730867186087525, "loss": 2.0225, "step": 807 }, { "epoch": 0.26, "learning_rate": 0.0017301486976815418, "loss": 2.0018, "step": 808 }, { "epoch": 0.26, "learning_rate": 0.0017294294010521706, "loss": 1.9747, "step": 809 }, { "epoch": 0.26, "learning_rate": 0.0017287092969956231, "loss": 2.0119, "step": 810 }, { "epoch": 0.26, "learning_rate": 0.0017279883863090034, "loss": 2.0334, "step": 811 }, { "epoch": 0.26, "learning_rate": 0.0017272666697903092, "loss": 1.9868, "step": 812 }, { "epoch": 0.26, "learning_rate": 0.00172654414823843, "loss": 2.1112, "step": 813 }, { "epoch": 0.26, "learning_rate": 0.0017258208224531471, "loss": 2.0737, "step": 814 }, { "epoch": 0.26, "learning_rate": 0.0017250966932351305, "loss": 2.0365, "step": 815 }, { "epoch": 0.27, "learning_rate": 0.001724371761385941, "loss": 2.0486, "step": 816 }, { "epoch": 0.27, "learning_rate": 0.001723646027708027, "loss": 2.0915, "step": 817 }, { "epoch": 0.27, "learning_rate": 0.0017229194930047252, "loss": 2.0019, "step": 818 }, { "epoch": 0.27, "learning_rate": 0.001722192158080258, "loss": 2.0255, "step": 819 }, { "epoch": 0.27, "learning_rate": 0.0017214640237397348, "loss": 2.0989, "step": 820 }, { "epoch": 0.27, "learning_rate": 0.0017207350907891483, "loss": 2.0892, "step": 821 }, { "epoch": 0.27, "learning_rate": 0.001720005360035377, "loss": 2.0169, "step": 822 }, { "epoch": 0.27, "learning_rate": 0.0017192748322861814, "loss": 1.9828, "step": 823 }, { "epoch": 0.27, "learning_rate": 0.001718543508350204, "loss": 2.0409, "step": 824 }, { "epoch": 0.27, "learning_rate": 0.00171781138903697, "loss": 2.0528, "step": 825 }, { "epoch": 0.27, "learning_rate": 0.0017170784751568836, "loss": 2.0066, "step": 826 }, { "epoch": 0.27, "learning_rate": 0.0017163447675212287, "loss": 2.0489, "step": 827 }, { "epoch": 0.27, "learning_rate": 0.001715610266942169, "loss": 2.0423, "step": 828 }, { "epoch": 0.27, "learning_rate": 0.0017148749742327443, "loss": 2.0851, "step": 829 }, { "epoch": 0.27, "learning_rate": 0.0017141388902068724, "loss": 2.0734, "step": 830 }, { "epoch": 0.27, "learning_rate": 0.001713402015679347, "loss": 1.9922, "step": 831 }, { "epoch": 0.27, "learning_rate": 0.001712664351465836, "loss": 2.0418, "step": 832 }, { "epoch": 0.27, "learning_rate": 0.0017119258983828822, "loss": 2.0579, "step": 833 }, { "epoch": 0.27, "learning_rate": 0.0017111866572479012, "loss": 2.0888, "step": 834 }, { "epoch": 0.27, "learning_rate": 0.0017104466288791814, "loss": 2.0849, "step": 835 }, { "epoch": 0.27, "learning_rate": 0.0017097058140958818, "loss": 1.9614, "step": 836 }, { "epoch": 0.27, "learning_rate": 0.001708964213718033, "loss": 2.0426, "step": 837 }, { "epoch": 0.27, "learning_rate": 0.0017082218285665342, "loss": 2.0164, "step": 838 }, { "epoch": 0.27, "learning_rate": 0.0017074786594631535, "loss": 2.0245, "step": 839 }, { "epoch": 0.27, "learning_rate": 0.0017067347072305275, "loss": 1.9837, "step": 840 }, { "epoch": 0.27, "learning_rate": 0.0017059899726921586, "loss": 1.9614, "step": 841 }, { "epoch": 0.27, "learning_rate": 0.001705244456672416, "loss": 1.9895, "step": 842 }, { "epoch": 0.27, "learning_rate": 0.0017044981599965336, "loss": 2.0404, "step": 843 }, { "epoch": 0.27, "learning_rate": 0.0017037510834906095, "loss": 1.9788, "step": 844 }, { "epoch": 0.27, "learning_rate": 0.0017030032279816048, "loss": 1.9829, "step": 845 }, { "epoch": 0.27, "learning_rate": 0.0017022545942973434, "loss": 2.0822, "step": 846 }, { "epoch": 0.28, "learning_rate": 0.00170150518326651, "loss": 2.0808, "step": 847 }, { "epoch": 0.28, "learning_rate": 0.0017007549957186497, "loss": 1.9744, "step": 848 }, { "epoch": 0.28, "learning_rate": 0.001700004032484168, "loss": 1.9659, "step": 849 }, { "epoch": 0.28, "learning_rate": 0.0016992522943943288, "loss": 2.0823, "step": 850 }, { "epoch": 0.28, "learning_rate": 0.001698499782281253, "loss": 2.0644, "step": 851 }, { "epoch": 0.28, "learning_rate": 0.0016977464969779189, "loss": 1.9693, "step": 852 }, { "epoch": 0.28, "learning_rate": 0.00169699243931816, "loss": 1.9847, "step": 853 }, { "epoch": 0.28, "learning_rate": 0.0016962376101366658, "loss": 2.0562, "step": 854 }, { "epoch": 0.28, "learning_rate": 0.001695482010268979, "loss": 2.05, "step": 855 }, { "epoch": 0.28, "learning_rate": 0.0016947256405514955, "loss": 1.9587, "step": 856 }, { "epoch": 0.28, "learning_rate": 0.0016939685018214638, "loss": 2.0701, "step": 857 }, { "epoch": 0.28, "learning_rate": 0.0016932105949169836, "loss": 2.0174, "step": 858 }, { "epoch": 0.28, "learning_rate": 0.001692451920677004, "loss": 2.0718, "step": 859 }, { "epoch": 0.28, "learning_rate": 0.0016916924799413246, "loss": 2.0063, "step": 860 }, { "epoch": 0.28, "learning_rate": 0.001690932273550593, "loss": 2.1098, "step": 861 }, { "epoch": 0.28, "learning_rate": 0.0016901713023463039, "loss": 2.0831, "step": 862 }, { "epoch": 0.28, "learning_rate": 0.0016894095671707997, "loss": 2.033, "step": 863 }, { "epoch": 0.28, "learning_rate": 0.0016886470688672675, "loss": 2.0062, "step": 864 }, { "epoch": 0.28, "learning_rate": 0.0016878838082797394, "loss": 2.0567, "step": 865 }, { "epoch": 0.28, "learning_rate": 0.0016871197862530917, "loss": 2.1234, "step": 866 }, { "epoch": 0.28, "learning_rate": 0.0016863550036330423, "loss": 1.9374, "step": 867 }, { "epoch": 0.28, "learning_rate": 0.0016855894612661526, "loss": 1.9981, "step": 868 }, { "epoch": 0.28, "learning_rate": 0.0016848231599998244, "loss": 1.9936, "step": 869 }, { "epoch": 0.28, "learning_rate": 0.001684056100682299, "loss": 2.0159, "step": 870 }, { "epoch": 0.28, "learning_rate": 0.0016832882841626576, "loss": 2.0351, "step": 871 }, { "epoch": 0.28, "learning_rate": 0.001682519711290819, "loss": 2.011, "step": 872 }, { "epoch": 0.28, "learning_rate": 0.0016817503829175396, "loss": 2.0628, "step": 873 }, { "epoch": 0.28, "learning_rate": 0.001680980299894412, "loss": 2.0178, "step": 874 }, { "epoch": 0.28, "learning_rate": 0.0016802094630738638, "loss": 2.0679, "step": 875 }, { "epoch": 0.28, "learning_rate": 0.0016794378733091574, "loss": 2.0575, "step": 876 }, { "epoch": 0.28, "learning_rate": 0.0016786655314543885, "loss": 2.0209, "step": 877 }, { "epoch": 0.29, "learning_rate": 0.0016778924383644856, "loss": 2.0074, "step": 878 }, { "epoch": 0.29, "learning_rate": 0.0016771185948952084, "loss": 1.9977, "step": 879 }, { "epoch": 0.29, "learning_rate": 0.0016763440019031473, "loss": 2.0499, "step": 880 }, { "epoch": 0.29, "learning_rate": 0.0016755686602457224, "loss": 2.1481, "step": 881 }, { "epoch": 0.29, "learning_rate": 0.0016747925707811821, "loss": 2.0516, "step": 882 }, { "epoch": 0.29, "learning_rate": 0.001674015734368604, "loss": 2.0164, "step": 883 }, { "epoch": 0.29, "learning_rate": 0.0016732381518678905, "loss": 2.0543, "step": 884 }, { "epoch": 0.29, "learning_rate": 0.0016724598241397715, "loss": 2.1101, "step": 885 }, { "epoch": 0.29, "learning_rate": 0.001671680752045801, "loss": 1.9813, "step": 886 }, { "epoch": 0.29, "learning_rate": 0.0016709009364483571, "loss": 1.9541, "step": 887 }, { "epoch": 0.29, "learning_rate": 0.0016701203782106414, "loss": 1.9764, "step": 888 }, { "epoch": 0.29, "learning_rate": 0.0016693390781966763, "loss": 1.9972, "step": 889 }, { "epoch": 0.29, "learning_rate": 0.0016685570372713067, "loss": 2.0543, "step": 890 }, { "epoch": 0.29, "learning_rate": 0.0016677742563001967, "loss": 2.0023, "step": 891 }, { "epoch": 0.29, "learning_rate": 0.0016669907361498303, "loss": 2.0858, "step": 892 }, { "epoch": 0.29, "learning_rate": 0.001666206477687509, "loss": 2.0117, "step": 893 }, { "epoch": 0.29, "learning_rate": 0.001665421481781352, "loss": 1.998, "step": 894 }, { "epoch": 0.29, "learning_rate": 0.0016646357493002948, "loss": 2.0418, "step": 895 }, { "epoch": 0.29, "learning_rate": 0.0016638492811140882, "loss": 2.0105, "step": 896 }, { "epoch": 0.29, "learning_rate": 0.0016630620780932973, "loss": 2.0061, "step": 897 }, { "epoch": 0.29, "learning_rate": 0.0016622741411093003, "loss": 1.9732, "step": 898 }, { "epoch": 0.29, "learning_rate": 0.0016614854710342883, "loss": 2.1028, "step": 899 }, { "epoch": 0.29, "learning_rate": 0.001660696068741264, "loss": 1.9858, "step": 900 }, { "epoch": 0.29, "learning_rate": 0.0016599059351040401, "loss": 2.0482, "step": 901 }, { "epoch": 0.29, "learning_rate": 0.0016591150709972392, "loss": 2.0773, "step": 902 }, { "epoch": 0.29, "learning_rate": 0.0016583234772962925, "loss": 2.0712, "step": 903 }, { "epoch": 0.29, "learning_rate": 0.0016575311548774386, "loss": 2.0349, "step": 904 }, { "epoch": 0.29, "learning_rate": 0.0016567381046177225, "loss": 1.9795, "step": 905 }, { "epoch": 0.29, "learning_rate": 0.0016559443273949956, "loss": 1.9657, "step": 906 }, { "epoch": 0.29, "learning_rate": 0.0016551498240879137, "loss": 2.0245, "step": 907 }, { "epoch": 0.29, "learning_rate": 0.001654354595575936, "loss": 2.0728, "step": 908 }, { "epoch": 0.3, "learning_rate": 0.0016535586427393247, "loss": 2.1016, "step": 909 }, { "epoch": 0.3, "learning_rate": 0.0016527619664591438, "loss": 2.0334, "step": 910 }, { "epoch": 0.3, "learning_rate": 0.0016519645676172578, "loss": 2.0691, "step": 911 }, { "epoch": 0.3, "learning_rate": 0.0016511664470963317, "loss": 2.0981, "step": 912 }, { "epoch": 0.3, "learning_rate": 0.0016503676057798287, "loss": 2.0655, "step": 913 }, { "epoch": 0.3, "learning_rate": 0.0016495680445520102, "loss": 1.9478, "step": 914 }, { "epoch": 0.3, "learning_rate": 0.0016487677642979342, "loss": 2.0232, "step": 915 }, { "epoch": 0.3, "learning_rate": 0.001647966765903455, "loss": 2.0313, "step": 916 }, { "epoch": 0.3, "learning_rate": 0.0016471650502552216, "loss": 2.0185, "step": 917 }, { "epoch": 0.3, "learning_rate": 0.001646362618240677, "loss": 2.059, "step": 918 }, { "epoch": 0.3, "learning_rate": 0.001645559470748057, "loss": 1.9495, "step": 919 }, { "epoch": 0.3, "learning_rate": 0.0016447556086663896, "loss": 2.0281, "step": 920 }, { "epoch": 0.3, "learning_rate": 0.0016439510328854939, "loss": 2.0314, "step": 921 }, { "epoch": 0.3, "learning_rate": 0.0016431457442959783, "loss": 2.0735, "step": 922 }, { "epoch": 0.3, "learning_rate": 0.0016423397437892416, "loss": 2.016, "step": 923 }, { "epoch": 0.3, "learning_rate": 0.0016415330322574688, "loss": 1.9945, "step": 924 }, { "epoch": 0.3, "learning_rate": 0.001640725610593634, "loss": 1.9628, "step": 925 }, { "epoch": 0.3, "learning_rate": 0.001639917479691496, "loss": 2.1126, "step": 926 }, { "epoch": 0.3, "learning_rate": 0.0016391086404455987, "loss": 2.0264, "step": 927 }, { "epoch": 0.3, "learning_rate": 0.0016382990937512708, "loss": 2.063, "step": 928 }, { "epoch": 0.3, "learning_rate": 0.0016374888405046232, "loss": 1.9318, "step": 929 }, { "epoch": 0.3, "learning_rate": 0.0016366778816025503, "loss": 2.0521, "step": 930 }, { "epoch": 0.3, "learning_rate": 0.0016358662179427256, "loss": 2.0269, "step": 931 }, { "epoch": 0.3, "learning_rate": 0.0016350538504236046, "loss": 1.9622, "step": 932 }, { "epoch": 0.3, "learning_rate": 0.001634240779944421, "loss": 1.952, "step": 933 }, { "epoch": 0.3, "learning_rate": 0.0016334270074051867, "loss": 1.9759, "step": 934 }, { "epoch": 0.3, "learning_rate": 0.001632612533706691, "loss": 2.0239, "step": 935 }, { "epoch": 0.3, "learning_rate": 0.0016317973597504984, "loss": 2.0278, "step": 936 }, { "epoch": 0.3, "learning_rate": 0.0016309814864389502, "loss": 2.0246, "step": 937 }, { "epoch": 0.3, "learning_rate": 0.0016301649146751606, "loss": 2.0563, "step": 938 }, { "epoch": 0.3, "learning_rate": 0.0016293476453630173, "loss": 2.0648, "step": 939 }, { "epoch": 0.31, "learning_rate": 0.0016285296794071797, "loss": 2.0096, "step": 940 }, { "epoch": 0.31, "learning_rate": 0.0016277110177130797, "loss": 2.0187, "step": 941 }, { "epoch": 0.31, "learning_rate": 0.0016268916611869174, "loss": 2.0163, "step": 942 }, { "epoch": 0.31, "learning_rate": 0.001626071610735663, "loss": 2.0691, "step": 943 }, { "epoch": 0.31, "learning_rate": 0.0016252508672670557, "loss": 2.0106, "step": 944 }, { "epoch": 0.31, "learning_rate": 0.0016244294316896002, "loss": 2.0341, "step": 945 }, { "epoch": 0.31, "learning_rate": 0.001623607304912568, "loss": 2.0082, "step": 946 }, { "epoch": 0.31, "learning_rate": 0.0016227844878459964, "loss": 2.0171, "step": 947 }, { "epoch": 0.31, "learning_rate": 0.0016219609814006857, "loss": 2.0542, "step": 948 }, { "epoch": 0.31, "learning_rate": 0.0016211367864881999, "loss": 1.9994, "step": 949 }, { "epoch": 0.31, "learning_rate": 0.0016203119040208646, "loss": 1.9499, "step": 950 }, { "epoch": 0.31, "learning_rate": 0.0016194863349117674, "loss": 2.0921, "step": 951 }, { "epoch": 0.31, "learning_rate": 0.001618660080074755, "loss": 2.0784, "step": 952 }, { "epoch": 0.31, "learning_rate": 0.001617833140424434, "loss": 1.9804, "step": 953 }, { "epoch": 0.31, "learning_rate": 0.0016170055168761677, "loss": 1.9987, "step": 954 }, { "epoch": 0.31, "learning_rate": 0.0016161772103460783, "loss": 2.0856, "step": 955 }, { "epoch": 0.31, "learning_rate": 0.0016153482217510424, "loss": 1.9937, "step": 956 }, { "epoch": 0.31, "learning_rate": 0.0016145185520086926, "loss": 2.0427, "step": 957 }, { "epoch": 0.31, "learning_rate": 0.0016136882020374146, "loss": 2.0539, "step": 958 }, { "epoch": 0.31, "learning_rate": 0.001612857172756348, "loss": 1.995, "step": 959 }, { "epoch": 0.31, "learning_rate": 0.0016120254650853838, "loss": 1.9856, "step": 960 }, { "epoch": 0.31, "learning_rate": 0.001611193079945164, "loss": 2.0195, "step": 961 }, { "epoch": 0.31, "learning_rate": 0.0016103600182570809, "loss": 2.006, "step": 962 }, { "epoch": 0.31, "learning_rate": 0.0016095262809432747, "loss": 2.0073, "step": 963 }, { "epoch": 0.31, "learning_rate": 0.0016086918689266346, "loss": 1.9973, "step": 964 }, { "epoch": 0.31, "learning_rate": 0.0016078567831307958, "loss": 2.0317, "step": 965 }, { "epoch": 0.31, "learning_rate": 0.0016070210244801403, "loss": 2.0073, "step": 966 }, { "epoch": 0.31, "learning_rate": 0.0016061845938997934, "loss": 2.0555, "step": 967 }, { "epoch": 0.31, "learning_rate": 0.0016053474923156262, "loss": 2.0758, "step": 968 }, { "epoch": 0.31, "learning_rate": 0.0016045097206542503, "loss": 2.0127, "step": 969 }, { "epoch": 0.32, "learning_rate": 0.0016036712798430206, "loss": 2.0527, "step": 970 }, { "epoch": 0.32, "learning_rate": 0.0016028321708100328, "loss": 2.0459, "step": 971 }, { "epoch": 0.32, "learning_rate": 0.0016019923944841214, "loss": 1.9874, "step": 972 }, { "epoch": 0.32, "learning_rate": 0.0016011519517948592, "loss": 2.0796, "step": 973 }, { "epoch": 0.32, "learning_rate": 0.0016003108436725582, "loss": 1.9989, "step": 974 }, { "epoch": 0.32, "learning_rate": 0.0015994690710482656, "loss": 2.072, "step": 975 }, { "epoch": 0.32, "learning_rate": 0.001598626634853765, "loss": 1.9538, "step": 976 }, { "epoch": 0.32, "learning_rate": 0.001597783536021574, "loss": 1.9828, "step": 977 }, { "epoch": 0.32, "learning_rate": 0.0015969397754849435, "loss": 2.059, "step": 978 }, { "epoch": 0.32, "learning_rate": 0.0015960953541778577, "loss": 2.0429, "step": 979 }, { "epoch": 0.32, "learning_rate": 0.0015952502730350315, "loss": 2.1043, "step": 980 }, { "epoch": 0.32, "learning_rate": 0.0015944045329919108, "loss": 2.0179, "step": 981 }, { "epoch": 0.32, "learning_rate": 0.00159355813498467, "loss": 2.045, "step": 982 }, { "epoch": 0.32, "learning_rate": 0.0015927110799502126, "loss": 2.0576, "step": 983 }, { "epoch": 0.32, "learning_rate": 0.0015918633688261693, "loss": 2.0109, "step": 984 }, { "epoch": 0.32, "learning_rate": 0.001591015002550896, "loss": 2.0253, "step": 985 }, { "epoch": 0.32, "learning_rate": 0.0015901659820634758, "loss": 2.0242, "step": 986 }, { "epoch": 0.32, "learning_rate": 0.0015893163083037138, "loss": 2.0606, "step": 987 }, { "epoch": 0.32, "learning_rate": 0.0015884659822121402, "loss": 2.0031, "step": 988 }, { "epoch": 0.32, "learning_rate": 0.0015876150047300058, "loss": 1.9624, "step": 989 }, { "epoch": 0.32, "learning_rate": 0.001586763376799283, "loss": 1.8883, "step": 990 }, { "epoch": 0.32, "learning_rate": 0.0015859110993626639, "loss": 2.0278, "step": 991 }, { "epoch": 0.32, "learning_rate": 0.0015850581733635604, "loss": 2.0463, "step": 992 }, { "epoch": 0.32, "learning_rate": 0.0015842045997461014, "loss": 2.0236, "step": 993 }, { "epoch": 0.32, "learning_rate": 0.001583350379455133, "loss": 2.0282, "step": 994 }, { "epoch": 0.32, "learning_rate": 0.0015824955134362173, "loss": 2.049, "step": 995 }, { "epoch": 0.32, "learning_rate": 0.0015816400026356313, "loss": 2.0532, "step": 996 }, { "epoch": 0.32, "learning_rate": 0.0015807838480003647, "loss": 1.9822, "step": 997 }, { "epoch": 0.32, "learning_rate": 0.001579927050478121, "loss": 1.9679, "step": 998 }, { "epoch": 0.32, "learning_rate": 0.0015790696110173153, "loss": 1.9687, "step": 999 }, { "epoch": 0.32, "learning_rate": 0.0015782115305670722, "loss": 1.9639, "step": 1000 }, { "epoch": 0.33, "learning_rate": 0.001577352810077227, "loss": 2.0357, "step": 1001 }, { "epoch": 0.33, "learning_rate": 0.0015764934504983233, "loss": 1.98, "step": 1002 }, { "epoch": 0.33, "learning_rate": 0.0015756334527816111, "loss": 2.0457, "step": 1003 }, { "epoch": 0.33, "learning_rate": 0.0015747728178790487, "loss": 1.9957, "step": 1004 }, { "epoch": 0.33, "learning_rate": 0.0015739115467432977, "loss": 2.0154, "step": 1005 }, { "epoch": 0.33, "learning_rate": 0.0015730496403277246, "loss": 2.0913, "step": 1006 }, { "epoch": 0.33, "learning_rate": 0.0015721870995864, "loss": 2.0419, "step": 1007 }, { "epoch": 0.33, "learning_rate": 0.0015713239254740957, "loss": 1.9615, "step": 1008 }, { "epoch": 0.33, "learning_rate": 0.0015704601189462848, "loss": 1.9704, "step": 1009 }, { "epoch": 0.33, "learning_rate": 0.0015695956809591407, "loss": 2.053, "step": 1010 }, { "epoch": 0.33, "learning_rate": 0.0015687306124695358, "loss": 2.0259, "step": 1011 }, { "epoch": 0.33, "learning_rate": 0.0015678649144350398, "loss": 2.0405, "step": 1012 }, { "epoch": 0.33, "learning_rate": 0.00156699858781392, "loss": 2.0716, "step": 1013 }, { "epoch": 0.33, "learning_rate": 0.001566131633565139, "loss": 2.0552, "step": 1014 }, { "epoch": 0.33, "learning_rate": 0.0015652640526483542, "loss": 2.0201, "step": 1015 }, { "epoch": 0.33, "learning_rate": 0.0015643958460239175, "loss": 2.0713, "step": 1016 }, { "epoch": 0.33, "learning_rate": 0.001563527014652872, "loss": 2.0092, "step": 1017 }, { "epoch": 0.33, "learning_rate": 0.0015626575594969534, "loss": 2.0398, "step": 1018 }, { "epoch": 0.33, "learning_rate": 0.0015617874815185876, "loss": 1.9917, "step": 1019 }, { "epoch": 0.33, "learning_rate": 0.0015609167816808896, "loss": 1.9813, "step": 1020 }, { "epoch": 0.33, "learning_rate": 0.0015600454609476632, "loss": 1.9978, "step": 1021 }, { "epoch": 0.33, "learning_rate": 0.0015591735202833993, "loss": 2.0162, "step": 1022 }, { "epoch": 0.33, "learning_rate": 0.0015583009606532753, "loss": 1.9905, "step": 1023 }, { "epoch": 0.33, "learning_rate": 0.0015574277830231536, "loss": 2.0013, "step": 1024 }, { "epoch": 0.33, "learning_rate": 0.0015565539883595795, "loss": 1.9829, "step": 1025 }, { "epoch": 0.33, "learning_rate": 0.001555679577629784, "loss": 1.984, "step": 1026 }, { "epoch": 0.33, "learning_rate": 0.0015548045518016772, "loss": 2.0158, "step": 1027 }, { "epoch": 0.33, "learning_rate": 0.001553928911843852, "loss": 2.0539, "step": 1028 }, { "epoch": 0.33, "learning_rate": 0.00155305265872558, "loss": 2.0014, "step": 1029 }, { "epoch": 0.33, "learning_rate": 0.0015521757934168117, "loss": 2.0275, "step": 1030 }, { "epoch": 0.33, "learning_rate": 0.0015512983168881763, "loss": 2.0362, "step": 1031 }, { "epoch": 0.34, "learning_rate": 0.001550420230110978, "loss": 2.0073, "step": 1032 }, { "epoch": 0.34, "learning_rate": 0.0015495415340571978, "loss": 1.985, "step": 1033 }, { "epoch": 0.34, "learning_rate": 0.0015486622296994902, "loss": 2.0294, "step": 1034 }, { "epoch": 0.34, "learning_rate": 0.001547782318011184, "loss": 2.0242, "step": 1035 }, { "epoch": 0.34, "learning_rate": 0.0015469017999662786, "loss": 2.1605, "step": 1036 }, { "epoch": 0.34, "learning_rate": 0.001546020676539447, "loss": 2.0412, "step": 1037 }, { "epoch": 0.34, "learning_rate": 0.0015451389487060303, "loss": 2.0022, "step": 1038 }, { "epoch": 0.34, "learning_rate": 0.0015442566174420397, "loss": 1.8891, "step": 1039 }, { "epoch": 0.34, "learning_rate": 0.0015433736837241535, "loss": 1.9468, "step": 1040 }, { "epoch": 0.34, "learning_rate": 0.001542490148529718, "loss": 2.0402, "step": 1041 }, { "epoch": 0.34, "learning_rate": 0.0015416060128367439, "loss": 2.0142, "step": 1042 }, { "epoch": 0.34, "learning_rate": 0.0015407212776239083, "loss": 1.9723, "step": 1043 }, { "epoch": 0.34, "learning_rate": 0.0015398359438705503, "loss": 2.0277, "step": 1044 }, { "epoch": 0.34, "learning_rate": 0.0015389500125566726, "loss": 1.9853, "step": 1045 }, { "epoch": 0.34, "learning_rate": 0.001538063484662939, "loss": 2.0016, "step": 1046 }, { "epoch": 0.34, "learning_rate": 0.0015371763611706738, "loss": 2.0508, "step": 1047 }, { "epoch": 0.34, "learning_rate": 0.00153628864306186, "loss": 1.9874, "step": 1048 }, { "epoch": 0.34, "learning_rate": 0.00153540033131914, "loss": 1.9831, "step": 1049 }, { "epoch": 0.34, "learning_rate": 0.0015345114269258119, "loss": 2.0246, "step": 1050 }, { "epoch": 0.34, "learning_rate": 0.0015336219308658308, "loss": 2.0445, "step": 1051 }, { "epoch": 0.34, "learning_rate": 0.0015327318441238065, "loss": 2.0632, "step": 1052 }, { "epoch": 0.34, "learning_rate": 0.0015318411676850025, "loss": 1.9734, "step": 1053 }, { "epoch": 0.34, "learning_rate": 0.001530949902535335, "loss": 2.0183, "step": 1054 }, { "epoch": 0.34, "learning_rate": 0.0015300580496613726, "loss": 2.0284, "step": 1055 }, { "epoch": 0.34, "learning_rate": 0.0015291656100503328, "loss": 1.9821, "step": 1056 }, { "epoch": 0.34, "learning_rate": 0.0015282725846900848, "loss": 2.0234, "step": 1057 }, { "epoch": 0.34, "learning_rate": 0.0015273789745691442, "loss": 2.0154, "step": 1058 }, { "epoch": 0.34, "learning_rate": 0.0015264847806766748, "loss": 2.0215, "step": 1059 }, { "epoch": 0.34, "learning_rate": 0.0015255900040024868, "loss": 2.04, "step": 1060 }, { "epoch": 0.34, "learning_rate": 0.0015246946455370351, "loss": 2.0196, "step": 1061 }, { "epoch": 0.34, "learning_rate": 0.0015237987062714188, "loss": 1.9766, "step": 1062 }, { "epoch": 0.35, "learning_rate": 0.0015229021871973796, "loss": 1.9926, "step": 1063 }, { "epoch": 0.35, "learning_rate": 0.0015220050893073017, "loss": 2.0742, "step": 1064 }, { "epoch": 0.35, "learning_rate": 0.0015211074135942089, "loss": 2.009, "step": 1065 }, { "epoch": 0.35, "learning_rate": 0.0015202091610517655, "loss": 2.0281, "step": 1066 }, { "epoch": 0.35, "learning_rate": 0.0015193103326742743, "loss": 2.0399, "step": 1067 }, { "epoch": 0.35, "learning_rate": 0.0015184109294566748, "loss": 1.9765, "step": 1068 }, { "epoch": 0.35, "learning_rate": 0.0015175109523945436, "loss": 2.0569, "step": 1069 }, { "epoch": 0.35, "learning_rate": 0.0015166104024840921, "loss": 2.0083, "step": 1070 }, { "epoch": 0.35, "learning_rate": 0.001515709280722166, "loss": 2.0653, "step": 1071 }, { "epoch": 0.35, "learning_rate": 0.0015148075881062436, "loss": 2.0948, "step": 1072 }, { "epoch": 0.35, "learning_rate": 0.0015139053256344353, "loss": 2.0503, "step": 1073 }, { "epoch": 0.35, "learning_rate": 0.0015130024943054829, "loss": 2.0199, "step": 1074 }, { "epoch": 0.35, "learning_rate": 0.0015120990951187569, "loss": 2.0135, "step": 1075 }, { "epoch": 0.35, "learning_rate": 0.001511195129074257, "loss": 1.99, "step": 1076 }, { "epoch": 0.35, "learning_rate": 0.00151029059717261, "loss": 2.0824, "step": 1077 }, { "epoch": 0.35, "learning_rate": 0.0015093855004150694, "loss": 1.9533, "step": 1078 }, { "epoch": 0.35, "learning_rate": 0.0015084798398035137, "loss": 2.0089, "step": 1079 }, { "epoch": 0.35, "learning_rate": 0.001507573616340446, "loss": 1.9756, "step": 1080 }, { "epoch": 0.35, "learning_rate": 0.001506666831028992, "loss": 2.0406, "step": 1081 }, { "epoch": 0.35, "learning_rate": 0.001505759484872899, "loss": 1.9923, "step": 1082 }, { "epoch": 0.35, "learning_rate": 0.0015048515788765363, "loss": 2.0256, "step": 1083 }, { "epoch": 0.35, "learning_rate": 0.0015039431140448914, "loss": 1.9454, "step": 1084 }, { "epoch": 0.35, "learning_rate": 0.0015030340913835714, "loss": 1.8891, "step": 1085 }, { "epoch": 0.35, "learning_rate": 0.0015021245118988006, "loss": 2.0098, "step": 1086 }, { "epoch": 0.35, "learning_rate": 0.00150121437659742, "loss": 1.9624, "step": 1087 }, { "epoch": 0.35, "learning_rate": 0.0015003036864868848, "loss": 2.0595, "step": 1088 }, { "epoch": 0.35, "learning_rate": 0.001499392442575266, "loss": 2.0075, "step": 1089 }, { "epoch": 0.35, "learning_rate": 0.0014984806458712462, "loss": 2.0028, "step": 1090 }, { "epoch": 0.35, "learning_rate": 0.0014975682973841198, "loss": 2.0614, "step": 1091 }, { "epoch": 0.35, "learning_rate": 0.0014966553981237939, "loss": 1.9329, "step": 1092 }, { "epoch": 0.35, "learning_rate": 0.0014957419491007826, "loss": 1.9608, "step": 1093 }, { "epoch": 0.36, "learning_rate": 0.0014948279513262112, "loss": 2.0344, "step": 1094 }, { "epoch": 0.36, "learning_rate": 0.00149391340581181, "loss": 2.0103, "step": 1095 }, { "epoch": 0.36, "learning_rate": 0.0014929983135699171, "loss": 1.9721, "step": 1096 }, { "epoch": 0.36, "learning_rate": 0.0014920826756134758, "loss": 2.0514, "step": 1097 }, { "epoch": 0.36, "learning_rate": 0.0014911664929560323, "loss": 1.9663, "step": 1098 }, { "epoch": 0.36, "learning_rate": 0.0014902497666117373, "loss": 2.021, "step": 1099 }, { "epoch": 0.36, "learning_rate": 0.0014893324975953424, "loss": 1.9644, "step": 1100 }, { "epoch": 0.36, "learning_rate": 0.0014884146869221994, "loss": 2.0091, "step": 1101 }, { "epoch": 0.36, "learning_rate": 0.0014874963356082613, "loss": 1.9863, "step": 1102 }, { "epoch": 0.36, "learning_rate": 0.0014865774446700779, "loss": 2.0192, "step": 1103 }, { "epoch": 0.36, "learning_rate": 0.0014856580151247974, "loss": 2.0566, "step": 1104 }, { "epoch": 0.36, "learning_rate": 0.0014847380479901635, "loss": 2.0313, "step": 1105 }, { "epoch": 0.36, "learning_rate": 0.0014838175442845151, "loss": 1.9868, "step": 1106 }, { "epoch": 0.36, "learning_rate": 0.0014828965050267858, "loss": 1.9982, "step": 1107 }, { "epoch": 0.36, "learning_rate": 0.0014819749312365012, "loss": 2.0108, "step": 1108 }, { "epoch": 0.36, "learning_rate": 0.001481052823933779, "loss": 2.051, "step": 1109 }, { "epoch": 0.36, "learning_rate": 0.0014801301841393267, "loss": 2.0192, "step": 1110 }, { "epoch": 0.36, "learning_rate": 0.0014792070128744426, "loss": 2.0647, "step": 1111 }, { "epoch": 0.36, "learning_rate": 0.0014782833111610118, "loss": 2.0251, "step": 1112 }, { "epoch": 0.36, "learning_rate": 0.0014773590800215078, "loss": 2.0373, "step": 1113 }, { "epoch": 0.36, "learning_rate": 0.0014764343204789897, "loss": 2.0194, "step": 1114 }, { "epoch": 0.36, "learning_rate": 0.0014755090335571012, "loss": 1.9797, "step": 1115 }, { "epoch": 0.36, "learning_rate": 0.0014745832202800698, "loss": 2.0239, "step": 1116 }, { "epoch": 0.36, "learning_rate": 0.001473656881672706, "loss": 1.965, "step": 1117 }, { "epoch": 0.36, "learning_rate": 0.0014727300187604023, "loss": 2.0354, "step": 1118 }, { "epoch": 0.36, "learning_rate": 0.00147180263256913, "loss": 1.982, "step": 1119 }, { "epoch": 0.36, "learning_rate": 0.001470874724125441, "loss": 2.0237, "step": 1120 }, { "epoch": 0.36, "learning_rate": 0.001469946294456465, "loss": 2.0158, "step": 1121 }, { "epoch": 0.36, "learning_rate": 0.0014690173445899078, "loss": 2.0874, "step": 1122 }, { "epoch": 0.36, "learning_rate": 0.0014680878755540525, "loss": 2.0106, "step": 1123 }, { "epoch": 0.37, "learning_rate": 0.001467157888377756, "loss": 1.9722, "step": 1124 }, { "epoch": 0.37, "learning_rate": 0.0014662273840904488, "loss": 1.9701, "step": 1125 }, { "epoch": 0.37, "learning_rate": 0.0014652963637221336, "loss": 1.9799, "step": 1126 }, { "epoch": 0.37, "learning_rate": 0.001464364828303385, "loss": 1.9885, "step": 1127 }, { "epoch": 0.37, "learning_rate": 0.001463432778865347, "loss": 1.9631, "step": 1128 }, { "epoch": 0.37, "learning_rate": 0.0014625002164397331, "loss": 2.0582, "step": 1129 }, { "epoch": 0.37, "learning_rate": 0.0014615671420588248, "loss": 2.038, "step": 1130 }, { "epoch": 0.37, "learning_rate": 0.0014606335567554697, "loss": 1.9949, "step": 1131 }, { "epoch": 0.37, "learning_rate": 0.001459699461563081, "loss": 2.0568, "step": 1132 }, { "epoch": 0.37, "learning_rate": 0.0014587648575156368, "loss": 2.0067, "step": 1133 }, { "epoch": 0.37, "learning_rate": 0.0014578297456476783, "loss": 1.9899, "step": 1134 }, { "epoch": 0.37, "learning_rate": 0.0014568941269943085, "loss": 2.0568, "step": 1135 }, { "epoch": 0.37, "learning_rate": 0.001455958002591192, "loss": 2.0517, "step": 1136 }, { "epoch": 0.37, "learning_rate": 0.0014550213734745524, "loss": 1.9748, "step": 1137 }, { "epoch": 0.37, "learning_rate": 0.0014540842406811727, "loss": 2.0441, "step": 1138 }, { "epoch": 0.37, "learning_rate": 0.0014531466052483933, "loss": 2.028, "step": 1139 }, { "epoch": 0.37, "learning_rate": 0.0014522084682141102, "loss": 1.9636, "step": 1140 }, { "epoch": 0.37, "learning_rate": 0.001451269830616776, "loss": 2.057, "step": 1141 }, { "epoch": 0.37, "learning_rate": 0.0014503306934953964, "loss": 1.9922, "step": 1142 }, { "epoch": 0.37, "learning_rate": 0.0014493910578895304, "loss": 1.9878, "step": 1143 }, { "epoch": 0.37, "learning_rate": 0.0014484509248392885, "loss": 2.0342, "step": 1144 }, { "epoch": 0.37, "learning_rate": 0.001447510295385332, "loss": 2.0465, "step": 1145 }, { "epoch": 0.37, "learning_rate": 0.0014465691705688722, "loss": 2.0625, "step": 1146 }, { "epoch": 0.37, "learning_rate": 0.0014456275514316676, "loss": 1.9494, "step": 1147 }, { "epoch": 0.37, "learning_rate": 0.0014446854390160252, "loss": 1.9777, "step": 1148 }, { "epoch": 0.37, "learning_rate": 0.0014437428343647967, "loss": 2.0052, "step": 1149 }, { "epoch": 0.37, "learning_rate": 0.0014427997385213797, "loss": 2.0681, "step": 1150 }, { "epoch": 0.37, "learning_rate": 0.0014418561525297152, "loss": 1.963, "step": 1151 }, { "epoch": 0.37, "learning_rate": 0.0014409120774342863, "loss": 2.0661, "step": 1152 }, { "epoch": 0.37, "learning_rate": 0.0014399675142801184, "loss": 2.0275, "step": 1153 }, { "epoch": 0.37, "learning_rate": 0.001439022464112776, "loss": 2.0133, "step": 1154 }, { "epoch": 0.38, "learning_rate": 0.0014380769279783642, "loss": 1.8741, "step": 1155 }, { "epoch": 0.38, "learning_rate": 0.0014371309069235246, "loss": 2.0688, "step": 1156 }, { "epoch": 0.38, "learning_rate": 0.0014361844019954361, "loss": 1.9904, "step": 1157 }, { "epoch": 0.38, "learning_rate": 0.0014352374142418138, "loss": 2.0021, "step": 1158 }, { "epoch": 0.38, "learning_rate": 0.0014342899447109061, "loss": 1.9533, "step": 1159 }, { "epoch": 0.38, "learning_rate": 0.0014333419944514956, "loss": 1.9781, "step": 1160 }, { "epoch": 0.38, "learning_rate": 0.0014323935645128964, "loss": 1.9872, "step": 1161 }, { "epoch": 0.38, "learning_rate": 0.0014314446559449549, "loss": 1.9809, "step": 1162 }, { "epoch": 0.38, "learning_rate": 0.001430495269798045, "loss": 1.9728, "step": 1163 }, { "epoch": 0.38, "learning_rate": 0.001429545407123071, "loss": 1.9641, "step": 1164 }, { "epoch": 0.38, "learning_rate": 0.001428595068971465, "loss": 1.9972, "step": 1165 }, { "epoch": 0.38, "learning_rate": 0.0014276442563951838, "loss": 1.9798, "step": 1166 }, { "epoch": 0.38, "learning_rate": 0.0014266929704467104, "loss": 1.9755, "step": 1167 }, { "epoch": 0.38, "learning_rate": 0.0014257412121790518, "loss": 1.9785, "step": 1168 }, { "epoch": 0.38, "learning_rate": 0.0014247889826457373, "loss": 1.8916, "step": 1169 }, { "epoch": 0.38, "learning_rate": 0.0014238362829008188, "loss": 1.9922, "step": 1170 }, { "epoch": 0.38, "learning_rate": 0.0014228831139988679, "loss": 2.0204, "step": 1171 }, { "epoch": 0.38, "learning_rate": 0.0014219294769949751, "loss": 1.9211, "step": 1172 }, { "epoch": 0.38, "learning_rate": 0.00142097537294475, "loss": 2.015, "step": 1173 }, { "epoch": 0.38, "learning_rate": 0.0014200208029043196, "loss": 2.0603, "step": 1174 }, { "epoch": 0.38, "learning_rate": 0.001419065767930325, "loss": 1.9147, "step": 1175 }, { "epoch": 0.38, "learning_rate": 0.001418110269079923, "loss": 1.9916, "step": 1176 }, { "epoch": 0.38, "learning_rate": 0.0014171543074107844, "loss": 1.99, "step": 1177 }, { "epoch": 0.38, "learning_rate": 0.0014161978839810911, "loss": 2.0503, "step": 1178 }, { "epoch": 0.38, "learning_rate": 0.001415240999849537, "loss": 1.9902, "step": 1179 }, { "epoch": 0.38, "learning_rate": 0.001414283656075326, "loss": 2.0639, "step": 1180 }, { "epoch": 0.38, "learning_rate": 0.00141332585371817, "loss": 1.9997, "step": 1181 }, { "epoch": 0.38, "learning_rate": 0.001412367593838289, "loss": 2.0602, "step": 1182 }, { "epoch": 0.38, "learning_rate": 0.0014114088774964098, "loss": 2.013, "step": 1183 }, { "epoch": 0.38, "learning_rate": 0.001410449705753764, "loss": 2.0103, "step": 1184 }, { "epoch": 0.38, "learning_rate": 0.0014094900796720875, "loss": 2.0447, "step": 1185 }, { "epoch": 0.39, "learning_rate": 0.0014085300003136192, "loss": 1.9033, "step": 1186 }, { "epoch": 0.39, "learning_rate": 0.0014075694687410991, "loss": 2.088, "step": 1187 }, { "epoch": 0.39, "learning_rate": 0.0014066084860177685, "loss": 2.0176, "step": 1188 }, { "epoch": 0.39, "learning_rate": 0.001405647053207368, "loss": 2.0587, "step": 1189 }, { "epoch": 0.39, "learning_rate": 0.0014046851713741366, "loss": 2.0514, "step": 1190 }, { "epoch": 0.39, "learning_rate": 0.0014037228415828093, "loss": 2.0531, "step": 1191 }, { "epoch": 0.39, "learning_rate": 0.0014027600648986178, "loss": 2.0269, "step": 1192 }, { "epoch": 0.39, "learning_rate": 0.0014017968423872891, "loss": 2.0331, "step": 1193 }, { "epoch": 0.39, "learning_rate": 0.001400833175115042, "loss": 2.025, "step": 1194 }, { "epoch": 0.39, "learning_rate": 0.001399869064148589, "loss": 2.0136, "step": 1195 }, { "epoch": 0.39, "learning_rate": 0.0013989045105551332, "loss": 2.0268, "step": 1196 }, { "epoch": 0.39, "learning_rate": 0.0013979395154023679, "loss": 2.0279, "step": 1197 }, { "epoch": 0.39, "learning_rate": 0.0013969740797584746, "loss": 2.0766, "step": 1198 }, { "epoch": 0.39, "learning_rate": 0.0013960082046921237, "loss": 1.9513, "step": 1199 }, { "epoch": 0.39, "learning_rate": 0.0013950418912724704, "loss": 1.9501, "step": 1200 }, { "epoch": 0.39, "learning_rate": 0.001394075140569156, "loss": 1.9918, "step": 1201 }, { "epoch": 0.39, "learning_rate": 0.0013931079536523062, "loss": 1.9786, "step": 1202 }, { "epoch": 0.39, "learning_rate": 0.0013921403315925282, "loss": 2.0267, "step": 1203 }, { "epoch": 0.39, "learning_rate": 0.0013911722754609125, "loss": 1.9994, "step": 1204 }, { "epoch": 0.39, "learning_rate": 0.001390203786329029, "loss": 2.0059, "step": 1205 }, { "epoch": 0.39, "learning_rate": 0.0013892348652689275, "loss": 2.0365, "step": 1206 }, { "epoch": 0.39, "learning_rate": 0.001388265513353135, "loss": 2.0477, "step": 1207 }, { "epoch": 0.39, "learning_rate": 0.0013872957316546568, "loss": 2.0306, "step": 1208 }, { "epoch": 0.39, "learning_rate": 0.001386325521246973, "loss": 2.0996, "step": 1209 }, { "epoch": 0.39, "learning_rate": 0.001385354883204038, "loss": 1.9904, "step": 1210 }, { "epoch": 0.39, "learning_rate": 0.0013843838186002803, "loss": 2.0014, "step": 1211 }, { "epoch": 0.39, "learning_rate": 0.0013834123285106008, "loss": 2.0599, "step": 1212 }, { "epoch": 0.39, "learning_rate": 0.00138244041401037, "loss": 2.047, "step": 1213 }, { "epoch": 0.39, "learning_rate": 0.0013814680761754297, "loss": 2.0283, "step": 1214 }, { "epoch": 0.39, "learning_rate": 0.0013804953160820896, "loss": 1.9967, "step": 1215 }, { "epoch": 0.39, "learning_rate": 0.0013795221348071268, "loss": 1.9557, "step": 1216 }, { "epoch": 0.4, "learning_rate": 0.0013785485334277844, "loss": 2.0205, "step": 1217 }, { "epoch": 0.4, "learning_rate": 0.0013775745130217717, "loss": 2.0258, "step": 1218 }, { "epoch": 0.4, "learning_rate": 0.0013766000746672597, "loss": 2.0265, "step": 1219 }, { "epoch": 0.4, "learning_rate": 0.0013756252194428841, "loss": 2.0056, "step": 1220 }, { "epoch": 0.4, "learning_rate": 0.0013746499484277416, "loss": 1.9276, "step": 1221 }, { "epoch": 0.4, "learning_rate": 0.001373674262701388, "loss": 1.957, "step": 1222 }, { "epoch": 0.4, "learning_rate": 0.0013726981633438392, "loss": 2.0044, "step": 1223 }, { "epoch": 0.4, "learning_rate": 0.0013717216514355691, "loss": 2.0336, "step": 1224 }, { "epoch": 0.4, "learning_rate": 0.001370744728057507, "loss": 1.9889, "step": 1225 }, { "epoch": 0.4, "learning_rate": 0.0013697673942910393, "loss": 2.0155, "step": 1226 }, { "epoch": 0.4, "learning_rate": 0.0013687896512180058, "loss": 2.0261, "step": 1227 }, { "epoch": 0.4, "learning_rate": 0.0013678114999206995, "loss": 2.0001, "step": 1228 }, { "epoch": 0.4, "learning_rate": 0.001366832941481865, "loss": 1.9393, "step": 1229 }, { "epoch": 0.4, "learning_rate": 0.0013658539769846982, "loss": 2.0375, "step": 1230 }, { "epoch": 0.4, "learning_rate": 0.001364874607512844, "loss": 2.0222, "step": 1231 }, { "epoch": 0.4, "learning_rate": 0.0013638948341503953, "loss": 2.0127, "step": 1232 }, { "epoch": 0.4, "learning_rate": 0.0013629146579818932, "loss": 1.9215, "step": 1233 }, { "epoch": 0.4, "learning_rate": 0.0013619340800923236, "loss": 1.9821, "step": 1234 }, { "epoch": 0.4, "learning_rate": 0.0013609531015671172, "loss": 1.9702, "step": 1235 }, { "epoch": 0.4, "learning_rate": 0.0013599717234921488, "loss": 2.0184, "step": 1236 }, { "epoch": 0.4, "learning_rate": 0.0013589899469537347, "loss": 1.9623, "step": 1237 }, { "epoch": 0.4, "learning_rate": 0.0013580077730386334, "loss": 1.953, "step": 1238 }, { "epoch": 0.4, "learning_rate": 0.0013570252028340412, "loss": 1.9951, "step": 1239 }, { "epoch": 0.4, "learning_rate": 0.001356042237427596, "loss": 2.0159, "step": 1240 }, { "epoch": 0.4, "learning_rate": 0.0013550588779073702, "loss": 2.0313, "step": 1241 }, { "epoch": 0.4, "learning_rate": 0.0013540751253618747, "loss": 1.9782, "step": 1242 }, { "epoch": 0.4, "learning_rate": 0.001353090980880054, "loss": 1.9998, "step": 1243 }, { "epoch": 0.4, "learning_rate": 0.0013521064455512871, "loss": 1.9969, "step": 1244 }, { "epoch": 0.4, "learning_rate": 0.0013511215204653855, "loss": 2.0083, "step": 1245 }, { "epoch": 0.4, "learning_rate": 0.0013501362067125928, "loss": 2.0131, "step": 1246 }, { "epoch": 0.41, "learning_rate": 0.0013491505053835813, "loss": 2.0603, "step": 1247 }, { "epoch": 0.41, "learning_rate": 0.0013481644175694538, "loss": 2.0423, "step": 1248 }, { "epoch": 0.41, "learning_rate": 0.0013471779443617405, "loss": 1.9505, "step": 1249 }, { "epoch": 0.41, "learning_rate": 0.0013461910868523973, "loss": 1.9587, "step": 1250 }, { "epoch": 0.41, "learning_rate": 0.0013452038461338068, "loss": 1.9823, "step": 1251 }, { "epoch": 0.41, "learning_rate": 0.001344216223298775, "loss": 1.9609, "step": 1252 }, { "epoch": 0.41, "learning_rate": 0.0013432282194405314, "loss": 2.0465, "step": 1253 }, { "epoch": 0.41, "learning_rate": 0.0013422398356527263, "loss": 2.0799, "step": 1254 }, { "epoch": 0.41, "learning_rate": 0.001341251073029432, "loss": 2.0065, "step": 1255 }, { "epoch": 0.41, "learning_rate": 0.001340261932665139, "loss": 2.0182, "step": 1256 }, { "epoch": 0.41, "learning_rate": 0.0013392724156547561, "loss": 1.9894, "step": 1257 }, { "epoch": 0.41, "learning_rate": 0.0013382825230936098, "loss": 1.9858, "step": 1258 }, { "epoch": 0.41, "learning_rate": 0.0013372922560774408, "loss": 1.9982, "step": 1259 }, { "epoch": 0.41, "learning_rate": 0.0013363016157024062, "loss": 1.9088, "step": 1260 }, { "epoch": 0.41, "learning_rate": 0.0013353106030650746, "loss": 2.0551, "step": 1261 }, { "epoch": 0.41, "learning_rate": 0.0013343192192624281, "loss": 1.9906, "step": 1262 }, { "epoch": 0.41, "learning_rate": 0.0013333274653918586, "loss": 2.0496, "step": 1263 }, { "epoch": 0.41, "learning_rate": 0.0013323353425511685, "loss": 2.0294, "step": 1264 }, { "epoch": 0.41, "learning_rate": 0.0013313428518385677, "loss": 1.9883, "step": 1265 }, { "epoch": 0.41, "learning_rate": 0.001330349994352674, "loss": 1.9851, "step": 1266 }, { "epoch": 0.41, "learning_rate": 0.0013293567711925107, "loss": 1.9623, "step": 1267 }, { "epoch": 0.41, "learning_rate": 0.001328363183457507, "loss": 2.0594, "step": 1268 }, { "epoch": 0.41, "learning_rate": 0.001327369232247494, "loss": 1.967, "step": 1269 }, { "epoch": 0.41, "learning_rate": 0.0013263749186627062, "loss": 1.9893, "step": 1270 }, { "epoch": 0.41, "learning_rate": 0.001325380243803779, "loss": 2.0666, "step": 1271 }, { "epoch": 0.41, "learning_rate": 0.0013243852087717477, "loss": 1.9702, "step": 1272 }, { "epoch": 0.41, "learning_rate": 0.0013233898146680462, "loss": 2.052, "step": 1273 }, { "epoch": 0.41, "learning_rate": 0.0013223940625945061, "loss": 2.0043, "step": 1274 }, { "epoch": 0.41, "learning_rate": 0.001321397953653355, "loss": 2.0051, "step": 1275 }, { "epoch": 0.41, "learning_rate": 0.0013204014889472155, "loss": 2.0256, "step": 1276 }, { "epoch": 0.41, "learning_rate": 0.0013194046695791046, "loss": 1.9546, "step": 1277 }, { "epoch": 0.42, "learning_rate": 0.0013184074966524312, "loss": 2.002, "step": 1278 }, { "epoch": 0.42, "learning_rate": 0.0013174099712709953, "loss": 1.9202, "step": 1279 }, { "epoch": 0.42, "learning_rate": 0.0013164120945389876, "loss": 2.0351, "step": 1280 }, { "epoch": 0.42, "learning_rate": 0.0013154138675609886, "loss": 2.004, "step": 1281 }, { "epoch": 0.42, "learning_rate": 0.0013144152914419645, "loss": 1.9978, "step": 1282 }, { "epoch": 0.42, "learning_rate": 0.0013134163672872692, "loss": 2.063, "step": 1283 }, { "epoch": 0.42, "learning_rate": 0.0013124170962026422, "loss": 2.0069, "step": 1284 }, { "epoch": 0.42, "learning_rate": 0.001311417479294206, "loss": 1.9839, "step": 1285 }, { "epoch": 0.42, "learning_rate": 0.0013104175176684666, "loss": 2.0162, "step": 1286 }, { "epoch": 0.42, "learning_rate": 0.0013094172124323111, "loss": 1.9585, "step": 1287 }, { "epoch": 0.42, "learning_rate": 0.0013084165646930074, "loss": 2.103, "step": 1288 }, { "epoch": 0.42, "learning_rate": 0.0013074155755582021, "loss": 1.9457, "step": 1289 }, { "epoch": 0.42, "learning_rate": 0.0013064142461359208, "loss": 1.9966, "step": 1290 }, { "epoch": 0.42, "learning_rate": 0.0013054125775345637, "loss": 1.959, "step": 1291 }, { "epoch": 0.42, "learning_rate": 0.0013044105708629088, "loss": 1.9806, "step": 1292 }, { "epoch": 0.42, "learning_rate": 0.0013034082272301067, "loss": 1.9473, "step": 1293 }, { "epoch": 0.42, "learning_rate": 0.0013024055477456812, "loss": 2.0249, "step": 1294 }, { "epoch": 0.42, "learning_rate": 0.0013014025335195286, "loss": 1.9659, "step": 1295 }, { "epoch": 0.42, "learning_rate": 0.0013003991856619154, "loss": 1.9157, "step": 1296 }, { "epoch": 0.42, "learning_rate": 0.0012993955052834766, "loss": 2.0018, "step": 1297 }, { "epoch": 0.42, "learning_rate": 0.0012983914934952163, "loss": 2.0223, "step": 1298 }, { "epoch": 0.42, "learning_rate": 0.0012973871514085056, "loss": 1.9569, "step": 1299 }, { "epoch": 0.42, "learning_rate": 0.0012963824801350804, "loss": 2.0319, "step": 1300 }, { "epoch": 0.42, "learning_rate": 0.001295377480787041, "loss": 2.0382, "step": 1301 }, { "epoch": 0.42, "learning_rate": 0.0012943721544768512, "loss": 1.9724, "step": 1302 }, { "epoch": 0.42, "learning_rate": 0.0012933665023173373, "loss": 1.942, "step": 1303 }, { "epoch": 0.42, "learning_rate": 0.0012923605254216853, "loss": 2.0411, "step": 1304 }, { "epoch": 0.42, "learning_rate": 0.001291354224903441, "loss": 1.991, "step": 1305 }, { "epoch": 0.42, "learning_rate": 0.0012903476018765083, "loss": 1.9746, "step": 1306 }, { "epoch": 0.42, "learning_rate": 0.0012893406574551488, "loss": 1.968, "step": 1307 }, { "epoch": 0.42, "learning_rate": 0.0012883333927539793, "loss": 2.0716, "step": 1308 }, { "epoch": 0.43, "learning_rate": 0.0012873258088879707, "loss": 1.9628, "step": 1309 }, { "epoch": 0.43, "learning_rate": 0.001286317906972448, "loss": 2.0546, "step": 1310 }, { "epoch": 0.43, "learning_rate": 0.0012853096881230877, "loss": 1.9878, "step": 1311 }, { "epoch": 0.43, "learning_rate": 0.0012843011534559175, "loss": 2.0741, "step": 1312 }, { "epoch": 0.43, "learning_rate": 0.0012832923040873143, "loss": 1.9711, "step": 1313 }, { "epoch": 0.43, "learning_rate": 0.0012822831411340042, "loss": 1.9684, "step": 1314 }, { "epoch": 0.43, "learning_rate": 0.001281273665713059, "loss": 2.0649, "step": 1315 }, { "epoch": 0.43, "learning_rate": 0.0012802638789418976, "loss": 1.9661, "step": 1316 }, { "epoch": 0.43, "learning_rate": 0.0012792537819382827, "loss": 2.0227, "step": 1317 }, { "epoch": 0.43, "learning_rate": 0.0012782433758203216, "loss": 1.9672, "step": 1318 }, { "epoch": 0.43, "learning_rate": 0.0012772326617064627, "loss": 2.0463, "step": 1319 }, { "epoch": 0.43, "learning_rate": 0.001276221640715495, "loss": 1.9925, "step": 1320 }, { "epoch": 0.43, "learning_rate": 0.0012752103139665484, "loss": 1.9457, "step": 1321 }, { "epoch": 0.43, "learning_rate": 0.0012741986825790907, "loss": 2.0159, "step": 1322 }, { "epoch": 0.43, "learning_rate": 0.0012731867476729265, "loss": 1.961, "step": 1323 }, { "epoch": 0.43, "learning_rate": 0.0012721745103681976, "loss": 1.9534, "step": 1324 }, { "epoch": 0.43, "learning_rate": 0.0012711619717853785, "loss": 1.9441, "step": 1325 }, { "epoch": 0.43, "learning_rate": 0.0012701491330452792, "loss": 1.9739, "step": 1326 }, { "epoch": 0.43, "learning_rate": 0.0012691359952690408, "loss": 2.0727, "step": 1327 }, { "epoch": 0.43, "learning_rate": 0.001268122559578136, "loss": 2.0166, "step": 1328 }, { "epoch": 0.43, "learning_rate": 0.0012671088270943666, "loss": 2.0199, "step": 1329 }, { "epoch": 0.43, "learning_rate": 0.0012660947989398636, "loss": 2.0352, "step": 1330 }, { "epoch": 0.43, "learning_rate": 0.0012650804762370854, "loss": 2.0114, "step": 1331 }, { "epoch": 0.43, "learning_rate": 0.0012640658601088153, "loss": 1.9894, "step": 1332 }, { "epoch": 0.43, "learning_rate": 0.0012630509516781626, "loss": 1.9212, "step": 1333 }, { "epoch": 0.43, "learning_rate": 0.0012620357520685599, "loss": 2.0245, "step": 1334 }, { "epoch": 0.43, "learning_rate": 0.0012610202624037618, "loss": 2.0623, "step": 1335 }, { "epoch": 0.43, "learning_rate": 0.0012600044838078437, "loss": 1.9993, "step": 1336 }, { "epoch": 0.43, "learning_rate": 0.001258988417405202, "loss": 1.8532, "step": 1337 }, { "epoch": 0.43, "learning_rate": 0.0012579720643205506, "loss": 1.9156, "step": 1338 }, { "epoch": 0.43, "learning_rate": 0.0012569554256789206, "loss": 1.9607, "step": 1339 }, { "epoch": 0.44, "learning_rate": 0.0012559385026056607, "loss": 2.0242, "step": 1340 }, { "epoch": 0.44, "learning_rate": 0.0012549212962264325, "loss": 2.0328, "step": 1341 }, { "epoch": 0.44, "learning_rate": 0.0012539038076672122, "loss": 2.0363, "step": 1342 }, { "epoch": 0.44, "learning_rate": 0.0012528860380542888, "loss": 1.9909, "step": 1343 }, { "epoch": 0.44, "learning_rate": 0.0012518679885142614, "loss": 2.0149, "step": 1344 }, { "epoch": 0.44, "learning_rate": 0.0012508496601740396, "loss": 2.0567, "step": 1345 }, { "epoch": 0.44, "learning_rate": 0.001249831054160841, "loss": 2.0063, "step": 1346 }, { "epoch": 0.44, "learning_rate": 0.0012488121716021918, "loss": 2.1157, "step": 1347 }, { "epoch": 0.44, "learning_rate": 0.001247793013625923, "loss": 2.0076, "step": 1348 }, { "epoch": 0.44, "learning_rate": 0.001246773581360171, "loss": 1.9399, "step": 1349 }, { "epoch": 0.44, "learning_rate": 0.001245753875933376, "loss": 1.9105, "step": 1350 }, { "epoch": 0.44, "learning_rate": 0.00124473389847428, "loss": 1.8673, "step": 1351 }, { "epoch": 0.44, "learning_rate": 0.0012437136501119268, "loss": 1.9691, "step": 1352 }, { "epoch": 0.44, "learning_rate": 0.00124269313197566, "loss": 2.0618, "step": 1353 }, { "epoch": 0.44, "learning_rate": 0.001241672345195121, "loss": 2.0361, "step": 1354 }, { "epoch": 0.44, "learning_rate": 0.0012406512909002496, "loss": 2.0295, "step": 1355 }, { "epoch": 0.44, "learning_rate": 0.0012396299702212815, "loss": 1.9439, "step": 1356 }, { "epoch": 0.44, "learning_rate": 0.0012386083842887462, "loss": 1.9732, "step": 1357 }, { "epoch": 0.44, "learning_rate": 0.0012375865342334682, "loss": 1.9754, "step": 1358 }, { "epoch": 0.44, "learning_rate": 0.0012365644211865638, "loss": 1.9725, "step": 1359 }, { "epoch": 0.44, "learning_rate": 0.0012355420462794406, "loss": 2.023, "step": 1360 }, { "epoch": 0.44, "learning_rate": 0.0012345194106437956, "loss": 1.9284, "step": 1361 }, { "epoch": 0.44, "learning_rate": 0.0012334965154116148, "loss": 1.9968, "step": 1362 }, { "epoch": 0.44, "learning_rate": 0.0012324733617151713, "loss": 1.9491, "step": 1363 }, { "epoch": 0.44, "learning_rate": 0.001231449950687025, "loss": 2.0288, "step": 1364 }, { "epoch": 0.44, "learning_rate": 0.0012304262834600195, "loss": 2.0097, "step": 1365 }, { "epoch": 0.44, "learning_rate": 0.0012294023611672828, "loss": 2.0175, "step": 1366 }, { "epoch": 0.44, "learning_rate": 0.0012283781849422254, "loss": 1.9664, "step": 1367 }, { "epoch": 0.44, "learning_rate": 0.001227353755918538, "loss": 1.978, "step": 1368 }, { "epoch": 0.44, "learning_rate": 0.001226329075230192, "loss": 1.9899, "step": 1369 }, { "epoch": 0.44, "learning_rate": 0.0012253041440114373, "loss": 2.0075, "step": 1370 }, { "epoch": 0.45, "learning_rate": 0.0012242789633968001, "loss": 1.9994, "step": 1371 }, { "epoch": 0.45, "learning_rate": 0.001223253534521084, "loss": 2.0328, "step": 1372 }, { "epoch": 0.45, "learning_rate": 0.0012222278585193667, "loss": 2.0019, "step": 1373 }, { "epoch": 0.45, "learning_rate": 0.0012212019365269995, "loss": 2.0032, "step": 1374 }, { "epoch": 0.45, "learning_rate": 0.0012201757696796063, "loss": 1.9528, "step": 1375 }, { "epoch": 0.45, "learning_rate": 0.0012191493591130816, "loss": 1.9897, "step": 1376 }, { "epoch": 0.45, "learning_rate": 0.0012181227059635898, "loss": 2.0043, "step": 1377 }, { "epoch": 0.45, "learning_rate": 0.0012170958113675641, "loss": 1.9801, "step": 1378 }, { "epoch": 0.45, "learning_rate": 0.0012160686764617048, "loss": 1.9816, "step": 1379 }, { "epoch": 0.45, "learning_rate": 0.001215041302382978, "loss": 2.0285, "step": 1380 }, { "epoch": 0.45, "learning_rate": 0.0012140136902686147, "loss": 2.0051, "step": 1381 }, { "epoch": 0.45, "learning_rate": 0.0012129858412561099, "loss": 1.9222, "step": 1382 }, { "epoch": 0.45, "learning_rate": 0.0012119577564832199, "loss": 1.9954, "step": 1383 }, { "epoch": 0.45, "learning_rate": 0.0012109294370879627, "loss": 2.0013, "step": 1384 }, { "epoch": 0.45, "learning_rate": 0.0012099008842086155, "loss": 2.0537, "step": 1385 }, { "epoch": 0.45, "learning_rate": 0.0012088720989837142, "loss": 1.9854, "step": 1386 }, { "epoch": 0.45, "learning_rate": 0.001207843082552052, "loss": 1.9511, "step": 1387 }, { "epoch": 0.45, "learning_rate": 0.0012068138360526778, "loss": 2.0438, "step": 1388 }, { "epoch": 0.45, "learning_rate": 0.0012057843606248953, "loss": 1.9865, "step": 1389 }, { "epoch": 0.45, "learning_rate": 0.0012047546574082614, "loss": 2.0002, "step": 1390 }, { "epoch": 0.45, "learning_rate": 0.001203724727542586, "loss": 1.9154, "step": 1391 }, { "epoch": 0.45, "learning_rate": 0.0012026945721679283, "loss": 1.9399, "step": 1392 }, { "epoch": 0.45, "learning_rate": 0.0012016641924245987, "loss": 2.0533, "step": 1393 }, { "epoch": 0.45, "learning_rate": 0.0012006335894531553, "loss": 1.9953, "step": 1394 }, { "epoch": 0.45, "learning_rate": 0.0011996027643944025, "loss": 1.9712, "step": 1395 }, { "epoch": 0.45, "learning_rate": 0.0011985717183893923, "loss": 2.0011, "step": 1396 }, { "epoch": 0.45, "learning_rate": 0.0011975404525794197, "loss": 2.0834, "step": 1397 }, { "epoch": 0.45, "learning_rate": 0.0011965089681060235, "loss": 1.9319, "step": 1398 }, { "epoch": 0.45, "learning_rate": 0.0011954772661109852, "loss": 1.9598, "step": 1399 }, { "epoch": 0.45, "learning_rate": 0.0011944453477363264, "loss": 1.975, "step": 1400 }, { "epoch": 0.46, "learning_rate": 0.001193413214124308, "loss": 1.9437, "step": 1401 }, { "epoch": 0.46, "learning_rate": 0.0011923808664174298, "loss": 2.0537, "step": 1402 }, { "epoch": 0.46, "learning_rate": 0.0011913483057584284, "loss": 1.9711, "step": 1403 }, { "epoch": 0.46, "learning_rate": 0.0011903155332902758, "loss": 2.0134, "step": 1404 }, { "epoch": 0.46, "learning_rate": 0.0011892825501561783, "loss": 1.9182, "step": 1405 }, { "epoch": 0.46, "learning_rate": 0.0011882493574995768, "loss": 2.0218, "step": 1406 }, { "epoch": 0.46, "learning_rate": 0.001187215956464142, "loss": 2.0552, "step": 1407 }, { "epoch": 0.46, "learning_rate": 0.0011861823481937768, "loss": 2.0052, "step": 1408 }, { "epoch": 0.46, "learning_rate": 0.0011851485338326131, "loss": 1.9477, "step": 1409 }, { "epoch": 0.46, "learning_rate": 0.0011841145145250111, "loss": 2.0248, "step": 1410 }, { "epoch": 0.46, "learning_rate": 0.001183080291415557, "loss": 1.9217, "step": 1411 }, { "epoch": 0.46, "learning_rate": 0.0011820458656490638, "loss": 1.9956, "step": 1412 }, { "epoch": 0.46, "learning_rate": 0.0011810112383705681, "loss": 2.0064, "step": 1413 }, { "epoch": 0.46, "learning_rate": 0.0011799764107253293, "loss": 1.9566, "step": 1414 }, { "epoch": 0.46, "learning_rate": 0.00117894138385883, "loss": 2.0471, "step": 1415 }, { "epoch": 0.46, "learning_rate": 0.001177906158916771, "loss": 1.9784, "step": 1416 }, { "epoch": 0.46, "learning_rate": 0.001176870737045075, "loss": 2.0043, "step": 1417 }, { "epoch": 0.46, "learning_rate": 0.0011758351193898802, "loss": 1.9036, "step": 1418 }, { "epoch": 0.46, "learning_rate": 0.0011747993070975438, "loss": 2.0428, "step": 1419 }, { "epoch": 0.46, "learning_rate": 0.0011737633013146364, "loss": 1.9913, "step": 1420 }, { "epoch": 0.46, "learning_rate": 0.0011727271031879444, "loss": 1.9987, "step": 1421 }, { "epoch": 0.46, "learning_rate": 0.0011716907138644664, "loss": 2.0077, "step": 1422 }, { "epoch": 0.46, "learning_rate": 0.001170654134491412, "loss": 2.0187, "step": 1423 }, { "epoch": 0.46, "learning_rate": 0.0011696173662162025, "loss": 1.9233, "step": 1424 }, { "epoch": 0.46, "learning_rate": 0.0011685804101864677, "loss": 2.0219, "step": 1425 }, { "epoch": 0.46, "learning_rate": 0.0011675432675500447, "loss": 2.0214, "step": 1426 }, { "epoch": 0.46, "learning_rate": 0.0011665059394549773, "loss": 1.904, "step": 1427 }, { "epoch": 0.46, "learning_rate": 0.0011654684270495162, "loss": 1.9755, "step": 1428 }, { "epoch": 0.46, "learning_rate": 0.001164430731482114, "loss": 1.9633, "step": 1429 }, { "epoch": 0.46, "learning_rate": 0.0011633928539014269, "loss": 2.0211, "step": 1430 }, { "epoch": 0.46, "learning_rate": 0.001162354795456313, "loss": 1.9328, "step": 1431 }, { "epoch": 0.47, "learning_rate": 0.0011613165572958298, "loss": 1.9341, "step": 1432 }, { "epoch": 0.47, "learning_rate": 0.0011602781405692343, "loss": 2.0498, "step": 1433 }, { "epoch": 0.47, "learning_rate": 0.001159239546425981, "loss": 1.9738, "step": 1434 }, { "epoch": 0.47, "learning_rate": 0.00115820077601572, "loss": 1.9881, "step": 1435 }, { "epoch": 0.47, "learning_rate": 0.0011571618304882984, "loss": 2.1258, "step": 1436 }, { "epoch": 0.47, "learning_rate": 0.0011561227109937553, "loss": 1.9673, "step": 1437 }, { "epoch": 0.47, "learning_rate": 0.0011550834186823237, "loss": 1.9841, "step": 1438 }, { "epoch": 0.47, "learning_rate": 0.0011540439547044266, "loss": 2.038, "step": 1439 }, { "epoch": 0.47, "learning_rate": 0.001153004320210678, "loss": 1.9143, "step": 1440 }, { "epoch": 0.47, "learning_rate": 0.0011519645163518806, "loss": 1.9961, "step": 1441 }, { "epoch": 0.47, "learning_rate": 0.0011509245442790238, "loss": 2.0483, "step": 1442 }, { "epoch": 0.47, "learning_rate": 0.001149884405143284, "loss": 1.9781, "step": 1443 }, { "epoch": 0.47, "learning_rate": 0.0011488441000960226, "loss": 2.0336, "step": 1444 }, { "epoch": 0.47, "learning_rate": 0.0011478036302887836, "loss": 1.9546, "step": 1445 }, { "epoch": 0.47, "learning_rate": 0.0011467629968732946, "loss": 1.9828, "step": 1446 }, { "epoch": 0.47, "learning_rate": 0.0011457222010014638, "loss": 1.933, "step": 1447 }, { "epoch": 0.47, "learning_rate": 0.0011446812438253788, "loss": 1.9919, "step": 1448 }, { "epoch": 0.47, "learning_rate": 0.0011436401264973066, "loss": 1.9725, "step": 1449 }, { "epoch": 0.47, "learning_rate": 0.001142598850169691, "loss": 1.9327, "step": 1450 }, { "epoch": 0.47, "learning_rate": 0.0011415574159951516, "loss": 2.0204, "step": 1451 }, { "epoch": 0.47, "learning_rate": 0.0011405158251264831, "loss": 1.9923, "step": 1452 }, { "epoch": 0.47, "learning_rate": 0.0011394740787166538, "loss": 1.9641, "step": 1453 }, { "epoch": 0.47, "learning_rate": 0.0011384321779188034, "loss": 2.0454, "step": 1454 }, { "epoch": 0.47, "learning_rate": 0.001137390123886243, "loss": 1.9596, "step": 1455 }, { "epoch": 0.47, "learning_rate": 0.0011363479177724537, "loss": 1.9667, "step": 1456 }, { "epoch": 0.47, "learning_rate": 0.0011353055607310845, "loss": 2.0499, "step": 1457 }, { "epoch": 0.47, "learning_rate": 0.001134263053915951, "loss": 2.0997, "step": 1458 }, { "epoch": 0.47, "learning_rate": 0.0011332203984810362, "loss": 1.9242, "step": 1459 }, { "epoch": 0.47, "learning_rate": 0.0011321775955804854, "loss": 1.9777, "step": 1460 }, { "epoch": 0.47, "learning_rate": 0.0011311346463686088, "loss": 2.0677, "step": 1461 }, { "epoch": 0.47, "learning_rate": 0.0011300915519998778, "loss": 1.9902, "step": 1462 }, { "epoch": 0.48, "learning_rate": 0.001129048313628925, "loss": 1.9275, "step": 1463 }, { "epoch": 0.48, "learning_rate": 0.001128004932410542, "loss": 1.9599, "step": 1464 }, { "epoch": 0.48, "learning_rate": 0.001126961409499678, "loss": 1.9988, "step": 1465 }, { "epoch": 0.48, "learning_rate": 0.0011259177460514408, "loss": 1.9869, "step": 1466 }, { "epoch": 0.48, "learning_rate": 0.0011248739432210917, "loss": 2.0322, "step": 1467 }, { "epoch": 0.48, "learning_rate": 0.0011238300021640475, "loss": 1.9729, "step": 1468 }, { "epoch": 0.48, "learning_rate": 0.0011227859240358778, "loss": 1.9967, "step": 1469 }, { "epoch": 0.48, "learning_rate": 0.0011217417099923038, "loss": 1.9835, "step": 1470 }, { "epoch": 0.48, "learning_rate": 0.0011206973611891967, "loss": 2.0131, "step": 1471 }, { "epoch": 0.48, "learning_rate": 0.0011196528787825782, "loss": 2.0445, "step": 1472 }, { "epoch": 0.48, "learning_rate": 0.0011186082639286167, "loss": 1.9923, "step": 1473 }, { "epoch": 0.48, "learning_rate": 0.0011175635177836274, "loss": 1.9708, "step": 1474 }, { "epoch": 0.48, "learning_rate": 0.0011165186415040714, "loss": 2.0711, "step": 1475 }, { "epoch": 0.48, "learning_rate": 0.0011154736362465527, "loss": 2.0144, "step": 1476 }, { "epoch": 0.48, "learning_rate": 0.0011144285031678197, "loss": 2.0364, "step": 1477 }, { "epoch": 0.48, "learning_rate": 0.0011133832434247607, "loss": 1.9648, "step": 1478 }, { "epoch": 0.48, "learning_rate": 0.0011123378581744054, "loss": 1.9815, "step": 1479 }, { "epoch": 0.48, "learning_rate": 0.0011112923485739217, "loss": 1.9404, "step": 1480 }, { "epoch": 0.48, "learning_rate": 0.0011102467157806156, "loss": 1.9733, "step": 1481 }, { "epoch": 0.48, "learning_rate": 0.0011092009609519288, "loss": 1.9713, "step": 1482 }, { "epoch": 0.48, "learning_rate": 0.0011081550852454387, "loss": 1.9535, "step": 1483 }, { "epoch": 0.48, "learning_rate": 0.0011071090898188566, "loss": 2.0495, "step": 1484 }, { "epoch": 0.48, "learning_rate": 0.001106062975830026, "loss": 1.9729, "step": 1485 }, { "epoch": 0.48, "learning_rate": 0.0011050167444369215, "loss": 1.9804, "step": 1486 }, { "epoch": 0.48, "learning_rate": 0.0011039703967976478, "loss": 2.024, "step": 1487 }, { "epoch": 0.48, "learning_rate": 0.0011029239340704384, "loss": 2.0717, "step": 1488 }, { "epoch": 0.48, "learning_rate": 0.0011018773574136545, "loss": 2.0129, "step": 1489 }, { "epoch": 0.48, "learning_rate": 0.0011008306679857827, "loss": 1.9889, "step": 1490 }, { "epoch": 0.48, "learning_rate": 0.001099783866945435, "loss": 1.985, "step": 1491 }, { "epoch": 0.48, "learning_rate": 0.0010987369554513462, "loss": 2.0752, "step": 1492 }, { "epoch": 0.48, "learning_rate": 0.0010976899346623744, "loss": 1.9381, "step": 1493 }, { "epoch": 0.49, "learning_rate": 0.0010966428057374979, "loss": 1.9904, "step": 1494 }, { "epoch": 0.49, "learning_rate": 0.001095595569835815, "loss": 2.0128, "step": 1495 }, { "epoch": 0.49, "learning_rate": 0.0010945482281165427, "loss": 1.9579, "step": 1496 }, { "epoch": 0.49, "learning_rate": 0.0010935007817390145, "loss": 2.0238, "step": 1497 }, { "epoch": 0.49, "learning_rate": 0.0010924532318626798, "loss": 1.8935, "step": 1498 }, { "epoch": 0.49, "learning_rate": 0.0010914055796471035, "loss": 2.0481, "step": 1499 }, { "epoch": 0.49, "learning_rate": 0.0010903578262519626, "loss": 1.9333, "step": 1500 }, { "epoch": 0.49, "learning_rate": 0.0010893099728370465, "loss": 1.9122, "step": 1501 }, { "epoch": 0.49, "learning_rate": 0.0010882620205622559, "loss": 1.9202, "step": 1502 }, { "epoch": 0.49, "learning_rate": 0.0010872139705876002, "loss": 1.9836, "step": 1503 }, { "epoch": 0.49, "learning_rate": 0.001086165824073197, "loss": 2.0492, "step": 1504 }, { "epoch": 0.49, "learning_rate": 0.0010851175821792714, "loss": 2.0268, "step": 1505 }, { "epoch": 0.49, "learning_rate": 0.0010840692460661536, "loss": 1.981, "step": 1506 }, { "epoch": 0.49, "learning_rate": 0.0010830208168942778, "loss": 1.9633, "step": 1507 }, { "epoch": 0.49, "learning_rate": 0.0010819722958241816, "loss": 1.971, "step": 1508 }, { "epoch": 0.49, "learning_rate": 0.0010809236840165042, "loss": 2.0367, "step": 1509 }, { "epoch": 0.49, "learning_rate": 0.001079874982631986, "loss": 2.0085, "step": 1510 }, { "epoch": 0.49, "learning_rate": 0.001078826192831465, "loss": 2.0053, "step": 1511 }, { "epoch": 0.49, "learning_rate": 0.0010777773157758784, "loss": 1.9646, "step": 1512 }, { "epoch": 0.49, "learning_rate": 0.0010767283526262595, "loss": 1.9678, "step": 1513 }, { "epoch": 0.49, "learning_rate": 0.0010756793045437365, "loss": 2.031, "step": 1514 }, { "epoch": 0.49, "learning_rate": 0.0010746301726895324, "loss": 2.0045, "step": 1515 }, { "epoch": 0.49, "learning_rate": 0.001073580958224963, "loss": 1.986, "step": 1516 }, { "epoch": 0.49, "learning_rate": 0.0010725316623114343, "loss": 1.9799, "step": 1517 }, { "epoch": 0.49, "learning_rate": 0.0010714822861104432, "loss": 1.9548, "step": 1518 }, { "epoch": 0.49, "learning_rate": 0.0010704328307835765, "loss": 1.9639, "step": 1519 }, { "epoch": 0.49, "learning_rate": 0.001069383297492507, "loss": 1.9937, "step": 1520 }, { "epoch": 0.49, "learning_rate": 0.0010683336873989941, "loss": 1.9835, "step": 1521 }, { "epoch": 0.49, "learning_rate": 0.0010672840016648834, "loss": 2.012, "step": 1522 }, { "epoch": 0.49, "learning_rate": 0.0010662342414521028, "loss": 2.0561, "step": 1523 }, { "epoch": 0.49, "learning_rate": 0.0010651844079226631, "loss": 1.9337, "step": 1524 }, { "epoch": 0.5, "learning_rate": 0.0010641345022386571, "loss": 1.9762, "step": 1525 }, { "epoch": 0.5, "learning_rate": 0.0010630845255622556, "loss": 1.9506, "step": 1526 }, { "epoch": 0.5, "learning_rate": 0.0010620344790557101, "loss": 1.9668, "step": 1527 }, { "epoch": 0.5, "learning_rate": 0.0010609843638813479, "loss": 1.9873, "step": 1528 }, { "epoch": 0.5, "learning_rate": 0.0010599341812015733, "loss": 2.0113, "step": 1529 }, { "epoch": 0.5, "learning_rate": 0.0010588839321788643, "loss": 2.0119, "step": 1530 }, { "epoch": 0.5, "learning_rate": 0.0010578336179757732, "loss": 2.0328, "step": 1531 }, { "epoch": 0.5, "learning_rate": 0.0010567832397549242, "loss": 1.9681, "step": 1532 }, { "epoch": 0.5, "learning_rate": 0.001055732798679012, "loss": 1.9323, "step": 1533 }, { "epoch": 0.5, "learning_rate": 0.0010546822959108016, "loss": 2.0237, "step": 1534 }, { "epoch": 0.5, "learning_rate": 0.0010536317326131255, "loss": 2.0133, "step": 1535 }, { "epoch": 0.5, "learning_rate": 0.0010525811099488837, "loss": 1.9234, "step": 1536 }, { "epoch": 0.5, "learning_rate": 0.001051530429081042, "loss": 1.9643, "step": 1537 }, { "epoch": 0.5, "learning_rate": 0.0010504796911726302, "loss": 2.0206, "step": 1538 }, { "epoch": 0.5, "learning_rate": 0.0010494288973867411, "loss": 1.96, "step": 1539 }, { "epoch": 0.5, "learning_rate": 0.0010483780488865304, "loss": 1.9971, "step": 1540 }, { "epoch": 0.5, "learning_rate": 0.0010473271468352137, "loss": 2.0204, "step": 1541 }, { "epoch": 0.5, "learning_rate": 0.001046276192396065, "loss": 2.0258, "step": 1542 }, { "epoch": 0.5, "learning_rate": 0.0010452251867324176, "loss": 1.9346, "step": 1543 }, { "epoch": 0.5, "learning_rate": 0.0010441741310076615, "loss": 2.015, "step": 1544 }, { "epoch": 0.5, "learning_rate": 0.001043123026385241, "loss": 1.9739, "step": 1545 }, { "epoch": 0.5, "learning_rate": 0.0010420718740286555, "loss": 2.0023, "step": 1546 }, { "epoch": 0.5, "learning_rate": 0.0010410206751014566, "loss": 2.0228, "step": 1547 }, { "epoch": 0.5, "learning_rate": 0.0010399694307672482, "loss": 1.9363, "step": 1548 }, { "epoch": 0.5, "learning_rate": 0.0010389181421896839, "loss": 1.9665, "step": 1549 }, { "epoch": 0.5, "learning_rate": 0.0010378668105324662, "loss": 1.926, "step": 1550 }, { "epoch": 0.5, "learning_rate": 0.0010368154369593456, "loss": 1.989, "step": 1551 }, { "epoch": 0.5, "learning_rate": 0.0010357640226341192, "loss": 1.9684, "step": 1552 }, { "epoch": 0.5, "learning_rate": 0.0010347125687206287, "loss": 1.9626, "step": 1553 }, { "epoch": 0.5, "learning_rate": 0.0010336610763827594, "loss": 1.9729, "step": 1554 }, { "epoch": 0.51, "learning_rate": 0.0010326095467844399, "loss": 1.9802, "step": 1555 }, { "epoch": 0.51, "learning_rate": 0.0010315579810896399, "loss": 2.0125, "step": 1556 }, { "epoch": 0.51, "learning_rate": 0.0010305063804623687, "loss": 1.9968, "step": 1557 }, { "epoch": 0.51, "learning_rate": 0.0010294547460666743, "loss": 2.0094, "step": 1558 }, { "epoch": 0.51, "learning_rate": 0.0010284030790666421, "loss": 1.9932, "step": 1559 }, { "epoch": 0.51, "learning_rate": 0.0010273513806263943, "loss": 1.9948, "step": 1560 }, { "epoch": 0.51, "learning_rate": 0.0010262996519100867, "loss": 1.9315, "step": 1561 }, { "epoch": 0.51, "learning_rate": 0.0010252478940819092, "loss": 1.9959, "step": 1562 }, { "epoch": 0.51, "learning_rate": 0.0010241961083060844, "loss": 2.0812, "step": 1563 }, { "epoch": 0.51, "learning_rate": 0.0010231442957468646, "loss": 1.936, "step": 1564 }, { "epoch": 0.51, "learning_rate": 0.0010220924575685332, "loss": 1.9334, "step": 1565 }, { "epoch": 0.51, "learning_rate": 0.0010210405949354015, "loss": 2.0743, "step": 1566 }, { "epoch": 0.51, "learning_rate": 0.001019988709011807, "loss": 1.9782, "step": 1567 }, { "epoch": 0.51, "learning_rate": 0.0010189368009621137, "loss": 2.0199, "step": 1568 }, { "epoch": 0.51, "learning_rate": 0.0010178848719507106, "loss": 1.926, "step": 1569 }, { "epoch": 0.51, "learning_rate": 0.0010168329231420084, "loss": 2.001, "step": 1570 }, { "epoch": 0.51, "learning_rate": 0.001015780955700441, "loss": 1.9569, "step": 1571 }, { "epoch": 0.51, "learning_rate": 0.0010147289707904635, "loss": 2.0103, "step": 1572 }, { "epoch": 0.51, "learning_rate": 0.001013676969576548, "loss": 2.0018, "step": 1573 }, { "epoch": 0.51, "learning_rate": 0.0010126249532231865, "loss": 1.9848, "step": 1574 }, { "epoch": 0.51, "learning_rate": 0.0010115729228948878, "loss": 1.9725, "step": 1575 }, { "epoch": 0.51, "learning_rate": 0.0010105208797561755, "loss": 2.0296, "step": 1576 }, { "epoch": 0.51, "learning_rate": 0.0010094688249715866, "loss": 1.9522, "step": 1577 }, { "epoch": 0.51, "learning_rate": 0.0010084167597056732, "loss": 1.9606, "step": 1578 }, { "epoch": 0.51, "learning_rate": 0.0010073646851229971, "loss": 2.003, "step": 1579 }, { "epoch": 0.51, "learning_rate": 0.0010063126023881312, "loss": 1.9781, "step": 1580 }, { "epoch": 0.51, "learning_rate": 0.0010052605126656573, "loss": 1.9846, "step": 1581 }, { "epoch": 0.51, "learning_rate": 0.0010042084171201654, "loss": 1.9346, "step": 1582 }, { "epoch": 0.51, "learning_rate": 0.0010031563169162502, "loss": 2.0157, "step": 1583 }, { "epoch": 0.51, "learning_rate": 0.001002104213218514, "loss": 1.9656, "step": 1584 }, { "epoch": 0.51, "learning_rate": 0.0010010521071915617, "loss": 2.0394, "step": 1585 }, { "epoch": 0.52, "learning_rate": 0.001, "loss": 2.1115, "step": 1586 }, { "epoch": 0.52, "learning_rate": 0.0009989478928084385, "loss": 2.0265, "step": 1587 }, { "epoch": 0.52, "learning_rate": 0.000997895786781486, "loss": 1.9647, "step": 1588 }, { "epoch": 0.52, "learning_rate": 0.0009968436830837498, "loss": 1.9512, "step": 1589 }, { "epoch": 0.52, "learning_rate": 0.0009957915828798349, "loss": 1.9189, "step": 1590 }, { "epoch": 0.52, "learning_rate": 0.0009947394873343427, "loss": 1.9728, "step": 1591 }, { "epoch": 0.52, "learning_rate": 0.0009936873976118688, "loss": 2.0013, "step": 1592 }, { "epoch": 0.52, "learning_rate": 0.000992635314877003, "loss": 1.9386, "step": 1593 }, { "epoch": 0.52, "learning_rate": 0.0009915832402943268, "loss": 2.0413, "step": 1594 }, { "epoch": 0.52, "learning_rate": 0.0009905311750284136, "loss": 2.0455, "step": 1595 }, { "epoch": 0.52, "learning_rate": 0.0009894791202438252, "loss": 1.9944, "step": 1596 }, { "epoch": 0.52, "learning_rate": 0.0009884270771051125, "loss": 1.9801, "step": 1597 }, { "epoch": 0.52, "learning_rate": 0.0009873750467768137, "loss": 1.9845, "step": 1598 }, { "epoch": 0.52, "learning_rate": 0.0009863230304234525, "loss": 1.9974, "step": 1599 }, { "epoch": 0.52, "learning_rate": 0.000985271029209537, "loss": 1.9149, "step": 1600 }, { "epoch": 0.52, "learning_rate": 0.0009842190442995592, "loss": 2.0311, "step": 1601 }, { "epoch": 0.52, "learning_rate": 0.0009831670768579916, "loss": 1.9615, "step": 1602 }, { "epoch": 0.52, "learning_rate": 0.0009821151280492897, "loss": 2.0104, "step": 1603 }, { "epoch": 0.52, "learning_rate": 0.0009810631990378861, "loss": 1.9678, "step": 1604 }, { "epoch": 0.52, "learning_rate": 0.000980011290988193, "loss": 1.9849, "step": 1605 }, { "epoch": 0.52, "learning_rate": 0.0009789594050645986, "loss": 1.9993, "step": 1606 }, { "epoch": 0.52, "learning_rate": 0.0009779075424314666, "loss": 1.9363, "step": 1607 }, { "epoch": 0.52, "learning_rate": 0.0009768557042531354, "loss": 2.016, "step": 1608 }, { "epoch": 0.52, "learning_rate": 0.0009758038916939157, "loss": 2.0202, "step": 1609 }, { "epoch": 0.52, "learning_rate": 0.000974752105918091, "loss": 1.904, "step": 1610 }, { "epoch": 0.52, "learning_rate": 0.0009737003480899136, "loss": 1.9898, "step": 1611 }, { "epoch": 0.52, "learning_rate": 0.0009726486193736058, "loss": 2.024, "step": 1612 }, { "epoch": 0.52, "learning_rate": 0.0009715969209333579, "loss": 1.9486, "step": 1613 }, { "epoch": 0.52, "learning_rate": 0.0009705452539333259, "loss": 1.9851, "step": 1614 }, { "epoch": 0.52, "learning_rate": 0.0009694936195376314, "loss": 1.9982, "step": 1615 }, { "epoch": 0.52, "learning_rate": 0.0009684420189103603, "loss": 1.9514, "step": 1616 }, { "epoch": 0.53, "learning_rate": 0.0009673904532155602, "loss": 1.9297, "step": 1617 }, { "epoch": 0.53, "learning_rate": 0.0009663389236172407, "loss": 1.9525, "step": 1618 }, { "epoch": 0.53, "learning_rate": 0.0009652874312793716, "loss": 2.0181, "step": 1619 }, { "epoch": 0.53, "learning_rate": 0.000964235977365881, "loss": 1.9276, "step": 1620 }, { "epoch": 0.53, "learning_rate": 0.0009631845630406544, "loss": 2.0593, "step": 1621 }, { "epoch": 0.53, "learning_rate": 0.0009621331894675339, "loss": 1.9457, "step": 1622 }, { "epoch": 0.53, "learning_rate": 0.0009610818578103164, "loss": 1.9651, "step": 1623 }, { "epoch": 0.53, "learning_rate": 0.000960030569232752, "loss": 1.9265, "step": 1624 }, { "epoch": 0.53, "learning_rate": 0.0009589793248985436, "loss": 1.9734, "step": 1625 }, { "epoch": 0.53, "learning_rate": 0.0009579281259713449, "loss": 1.9525, "step": 1626 }, { "epoch": 0.53, "learning_rate": 0.0009568769736147593, "loss": 2.0336, "step": 1627 }, { "epoch": 0.53, "learning_rate": 0.0009558258689923388, "loss": 1.9424, "step": 1628 }, { "epoch": 0.53, "learning_rate": 0.0009547748132675826, "loss": 2.0631, "step": 1629 }, { "epoch": 0.53, "learning_rate": 0.0009537238076039354, "loss": 2.0071, "step": 1630 }, { "epoch": 0.53, "learning_rate": 0.0009526728531647868, "loss": 1.9346, "step": 1631 }, { "epoch": 0.53, "learning_rate": 0.0009516219511134695, "loss": 2.0024, "step": 1632 }, { "epoch": 0.53, "learning_rate": 0.0009505711026132586, "loss": 2.0097, "step": 1633 }, { "epoch": 0.53, "learning_rate": 0.0009495203088273698, "loss": 2.034, "step": 1634 }, { "epoch": 0.53, "learning_rate": 0.0009484695709189581, "loss": 1.9967, "step": 1635 }, { "epoch": 0.53, "learning_rate": 0.0009474188900511163, "loss": 1.9834, "step": 1636 }, { "epoch": 0.53, "learning_rate": 0.0009463682673868744, "loss": 1.9937, "step": 1637 }, { "epoch": 0.53, "learning_rate": 0.0009453177040891986, "loss": 2.0226, "step": 1638 }, { "epoch": 0.53, "learning_rate": 0.0009442672013209881, "loss": 1.9485, "step": 1639 }, { "epoch": 0.53, "learning_rate": 0.0009432167602450759, "loss": 1.9673, "step": 1640 }, { "epoch": 0.53, "learning_rate": 0.000942166382024227, "loss": 1.9638, "step": 1641 }, { "epoch": 0.53, "learning_rate": 0.000941116067821136, "loss": 2.0197, "step": 1642 }, { "epoch": 0.53, "learning_rate": 0.0009400658187984269, "loss": 1.8942, "step": 1643 }, { "epoch": 0.53, "learning_rate": 0.0009390156361186523, "loss": 1.9804, "step": 1644 }, { "epoch": 0.53, "learning_rate": 0.0009379655209442901, "loss": 1.9793, "step": 1645 }, { "epoch": 0.53, "learning_rate": 0.0009369154744377445, "loss": 2.0104, "step": 1646 }, { "epoch": 0.53, "learning_rate": 0.0009358654977613432, "loss": 2.0287, "step": 1647 }, { "epoch": 0.54, "learning_rate": 0.000934815592077337, "loss": 2.0093, "step": 1648 }, { "epoch": 0.54, "learning_rate": 0.0009337657585478974, "loss": 1.9822, "step": 1649 }, { "epoch": 0.54, "learning_rate": 0.0009327159983351167, "loss": 2.0142, "step": 1650 }, { "epoch": 0.54, "learning_rate": 0.000931666312601006, "loss": 1.8852, "step": 1651 }, { "epoch": 0.54, "learning_rate": 0.0009306167025074933, "loss": 1.9687, "step": 1652 }, { "epoch": 0.54, "learning_rate": 0.0009295671692164237, "loss": 1.9465, "step": 1653 }, { "epoch": 0.54, "learning_rate": 0.000928517713889557, "loss": 1.9656, "step": 1654 }, { "epoch": 0.54, "learning_rate": 0.0009274683376885662, "loss": 1.9282, "step": 1655 }, { "epoch": 0.54, "learning_rate": 0.0009264190417750375, "loss": 1.9892, "step": 1656 }, { "epoch": 0.54, "learning_rate": 0.0009253698273104676, "loss": 1.9753, "step": 1657 }, { "epoch": 0.54, "learning_rate": 0.0009243206954562638, "loss": 2.0136, "step": 1658 }, { "epoch": 0.54, "learning_rate": 0.000923271647373741, "loss": 1.97, "step": 1659 }, { "epoch": 0.54, "learning_rate": 0.0009222226842241219, "loss": 2.0307, "step": 1660 }, { "epoch": 0.54, "learning_rate": 0.0009211738071685354, "loss": 1.9554, "step": 1661 }, { "epoch": 0.54, "learning_rate": 0.0009201250173680145, "loss": 2.0446, "step": 1662 }, { "epoch": 0.54, "learning_rate": 0.0009190763159834956, "loss": 1.9999, "step": 1663 }, { "epoch": 0.54, "learning_rate": 0.0009180277041758185, "loss": 1.9338, "step": 1664 }, { "epoch": 0.54, "learning_rate": 0.0009169791831057223, "loss": 1.9689, "step": 1665 }, { "epoch": 0.54, "learning_rate": 0.0009159307539338466, "loss": 1.9873, "step": 1666 }, { "epoch": 0.54, "learning_rate": 0.0009148824178207284, "loss": 1.9856, "step": 1667 }, { "epoch": 0.54, "learning_rate": 0.0009138341759268028, "loss": 1.939, "step": 1668 }, { "epoch": 0.54, "learning_rate": 0.0009127860294124, "loss": 1.9604, "step": 1669 }, { "epoch": 0.54, "learning_rate": 0.0009117379794377443, "loss": 1.993, "step": 1670 }, { "epoch": 0.54, "learning_rate": 0.0009106900271629536, "loss": 1.9469, "step": 1671 }, { "epoch": 0.54, "learning_rate": 0.0009096421737480376, "loss": 1.9814, "step": 1672 }, { "epoch": 0.54, "learning_rate": 0.0009085944203528968, "loss": 1.9983, "step": 1673 }, { "epoch": 0.54, "learning_rate": 0.0009075467681373202, "loss": 2.0085, "step": 1674 }, { "epoch": 0.54, "learning_rate": 0.0009064992182609857, "loss": 1.9776, "step": 1675 }, { "epoch": 0.54, "learning_rate": 0.0009054517718834575, "loss": 1.97, "step": 1676 }, { "epoch": 0.54, "learning_rate": 0.0009044044301641849, "loss": 1.9797, "step": 1677 }, { "epoch": 0.54, "learning_rate": 0.0009033571942625022, "loss": 1.967, "step": 1678 }, { "epoch": 0.55, "learning_rate": 0.000902310065337626, "loss": 1.9823, "step": 1679 }, { "epoch": 0.55, "learning_rate": 0.0009012630445486541, "loss": 1.9771, "step": 1680 }, { "epoch": 0.55, "learning_rate": 0.0009002161330545653, "loss": 2.0161, "step": 1681 }, { "epoch": 0.55, "learning_rate": 0.0008991693320142176, "loss": 1.9691, "step": 1682 }, { "epoch": 0.55, "learning_rate": 0.0008981226425863457, "loss": 1.9369, "step": 1683 }, { "epoch": 0.55, "learning_rate": 0.0008970760659295616, "loss": 2.0251, "step": 1684 }, { "epoch": 0.55, "learning_rate": 0.0008960296032023523, "loss": 1.9637, "step": 1685 }, { "epoch": 0.55, "learning_rate": 0.0008949832555630791, "loss": 1.9951, "step": 1686 }, { "epoch": 0.55, "learning_rate": 0.0008939370241699743, "loss": 1.924, "step": 1687 }, { "epoch": 0.55, "learning_rate": 0.0008928909101811437, "loss": 2.0118, "step": 1688 }, { "epoch": 0.55, "learning_rate": 0.0008918449147545616, "loss": 1.9914, "step": 1689 }, { "epoch": 0.55, "learning_rate": 0.0008907990390480716, "loss": 1.9667, "step": 1690 }, { "epoch": 0.55, "learning_rate": 0.0008897532842193849, "loss": 1.9452, "step": 1691 }, { "epoch": 0.55, "learning_rate": 0.0008887076514260786, "loss": 1.9628, "step": 1692 }, { "epoch": 0.55, "learning_rate": 0.0008876621418255945, "loss": 2.007, "step": 1693 }, { "epoch": 0.55, "learning_rate": 0.0008866167565752392, "loss": 1.9892, "step": 1694 }, { "epoch": 0.55, "learning_rate": 0.0008855714968321803, "loss": 1.9835, "step": 1695 }, { "epoch": 0.55, "learning_rate": 0.0008845263637534471, "loss": 1.9784, "step": 1696 }, { "epoch": 0.55, "learning_rate": 0.0008834813584959289, "loss": 2.0278, "step": 1697 }, { "epoch": 0.55, "learning_rate": 0.0008824364822163726, "loss": 2.0235, "step": 1698 }, { "epoch": 0.55, "learning_rate": 0.0008813917360713833, "loss": 1.98, "step": 1699 }, { "epoch": 0.55, "learning_rate": 0.0008803471212174217, "loss": 1.9704, "step": 1700 }, { "epoch": 0.55, "learning_rate": 0.0008793026388108033, "loss": 1.933, "step": 1701 }, { "epoch": 0.55, "learning_rate": 0.0008782582900076965, "loss": 1.9212, "step": 1702 }, { "epoch": 0.55, "learning_rate": 0.0008772140759641224, "loss": 2.0121, "step": 1703 }, { "epoch": 0.55, "learning_rate": 0.0008761699978359527, "loss": 1.9687, "step": 1704 }, { "epoch": 0.55, "learning_rate": 0.0008751260567789086, "loss": 1.9705, "step": 1705 }, { "epoch": 0.55, "learning_rate": 0.0008740822539485593, "loss": 2.0596, "step": 1706 }, { "epoch": 0.55, "learning_rate": 0.000873038590500322, "loss": 1.8859, "step": 1707 }, { "epoch": 0.55, "learning_rate": 0.0008719950675894583, "loss": 1.9996, "step": 1708 }, { "epoch": 0.56, "learning_rate": 0.000870951686371075, "loss": 2.029, "step": 1709 }, { "epoch": 0.56, "learning_rate": 0.0008699084480001223, "loss": 2.0221, "step": 1710 }, { "epoch": 0.56, "learning_rate": 0.0008688653536313914, "loss": 1.9635, "step": 1711 }, { "epoch": 0.56, "learning_rate": 0.0008678224044195148, "loss": 1.9739, "step": 1712 }, { "epoch": 0.56, "learning_rate": 0.000866779601518964, "loss": 1.9892, "step": 1713 }, { "epoch": 0.56, "learning_rate": 0.000865736946084049, "loss": 1.999, "step": 1714 }, { "epoch": 0.56, "learning_rate": 0.0008646944392689158, "loss": 2.0473, "step": 1715 }, { "epoch": 0.56, "learning_rate": 0.0008636520822275464, "loss": 1.9934, "step": 1716 }, { "epoch": 0.56, "learning_rate": 0.0008626098761137572, "loss": 2.0001, "step": 1717 }, { "epoch": 0.56, "learning_rate": 0.0008615678220811971, "loss": 2.0263, "step": 1718 }, { "epoch": 0.56, "learning_rate": 0.0008605259212833466, "loss": 1.9561, "step": 1719 }, { "epoch": 0.56, "learning_rate": 0.0008594841748735171, "loss": 1.9826, "step": 1720 }, { "epoch": 0.56, "learning_rate": 0.0008584425840048487, "loss": 1.9698, "step": 1721 }, { "epoch": 0.56, "learning_rate": 0.0008574011498303092, "loss": 1.9259, "step": 1722 }, { "epoch": 0.56, "learning_rate": 0.0008563598735026934, "loss": 2.0179, "step": 1723 }, { "epoch": 0.56, "learning_rate": 0.000855318756174621, "loss": 2.0285, "step": 1724 }, { "epoch": 0.56, "learning_rate": 0.0008542777989985364, "loss": 2.0327, "step": 1725 }, { "epoch": 0.56, "learning_rate": 0.0008532370031267053, "loss": 1.9617, "step": 1726 }, { "epoch": 0.56, "learning_rate": 0.0008521963697112162, "loss": 2.0022, "step": 1727 }, { "epoch": 0.56, "learning_rate": 0.0008511558999039774, "loss": 2.0173, "step": 1728 }, { "epoch": 0.56, "learning_rate": 0.000850115594856716, "loss": 1.9174, "step": 1729 }, { "epoch": 0.56, "learning_rate": 0.0008490754557209762, "loss": 2.0466, "step": 1730 }, { "epoch": 0.56, "learning_rate": 0.0008480354836481194, "loss": 1.9969, "step": 1731 }, { "epoch": 0.56, "learning_rate": 0.0008469956797893221, "loss": 1.9836, "step": 1732 }, { "epoch": 0.56, "learning_rate": 0.0008459560452955734, "loss": 1.9434, "step": 1733 }, { "epoch": 0.56, "learning_rate": 0.0008449165813176764, "loss": 2.0549, "step": 1734 }, { "epoch": 0.56, "learning_rate": 0.0008438772890062448, "loss": 1.9915, "step": 1735 }, { "epoch": 0.56, "learning_rate": 0.0008428381695117018, "loss": 1.8869, "step": 1736 }, { "epoch": 0.56, "learning_rate": 0.00084179922398428, "loss": 1.8743, "step": 1737 }, { "epoch": 0.56, "learning_rate": 0.0008407604535740195, "loss": 2.0145, "step": 1738 }, { "epoch": 0.56, "learning_rate": 0.0008397218594307661, "loss": 1.9042, "step": 1739 }, { "epoch": 0.57, "learning_rate": 0.0008386834427041704, "loss": 1.9218, "step": 1740 }, { "epoch": 0.57, "learning_rate": 0.000837645204543687, "loss": 1.9682, "step": 1741 }, { "epoch": 0.57, "learning_rate": 0.0008366071460985733, "loss": 1.8635, "step": 1742 }, { "epoch": 0.57, "learning_rate": 0.0008355692685178862, "loss": 1.9915, "step": 1743 }, { "epoch": 0.57, "learning_rate": 0.0008345315729504838, "loss": 1.9664, "step": 1744 }, { "epoch": 0.57, "learning_rate": 0.0008334940605450228, "loss": 1.9627, "step": 1745 }, { "epoch": 0.57, "learning_rate": 0.0008324567324499559, "loss": 1.9971, "step": 1746 }, { "epoch": 0.57, "learning_rate": 0.0008314195898135328, "loss": 1.9748, "step": 1747 }, { "epoch": 0.57, "learning_rate": 0.0008303826337837979, "loss": 1.9482, "step": 1748 }, { "epoch": 0.57, "learning_rate": 0.0008293458655085884, "loss": 2.0236, "step": 1749 }, { "epoch": 0.57, "learning_rate": 0.000828309286135534, "loss": 1.9953, "step": 1750 }, { "epoch": 0.57, "learning_rate": 0.0008272728968120557, "loss": 1.973, "step": 1751 }, { "epoch": 0.57, "learning_rate": 0.0008262366986853637, "loss": 2.0457, "step": 1752 }, { "epoch": 0.57, "learning_rate": 0.0008252006929024565, "loss": 1.9848, "step": 1753 }, { "epoch": 0.57, "learning_rate": 0.0008241648806101196, "loss": 1.9285, "step": 1754 }, { "epoch": 0.57, "learning_rate": 0.0008231292629549252, "loss": 1.9616, "step": 1755 }, { "epoch": 0.57, "learning_rate": 0.0008220938410832289, "loss": 2.1081, "step": 1756 }, { "epoch": 0.57, "learning_rate": 0.0008210586161411704, "loss": 1.9339, "step": 1757 }, { "epoch": 0.57, "learning_rate": 0.0008200235892746705, "loss": 2.0079, "step": 1758 }, { "epoch": 0.57, "learning_rate": 0.000818988761629432, "loss": 2.001, "step": 1759 }, { "epoch": 0.57, "learning_rate": 0.0008179541343509365, "loss": 2.0024, "step": 1760 }, { "epoch": 0.57, "learning_rate": 0.0008169197085844431, "loss": 1.9783, "step": 1761 }, { "epoch": 0.57, "learning_rate": 0.0008158854854749891, "loss": 2.0527, "step": 1762 }, { "epoch": 0.57, "learning_rate": 0.000814851466167387, "loss": 1.9876, "step": 1763 }, { "epoch": 0.57, "learning_rate": 0.0008138176518062234, "loss": 2.0189, "step": 1764 }, { "epoch": 0.57, "learning_rate": 0.0008127840435358582, "loss": 1.9691, "step": 1765 }, { "epoch": 0.57, "learning_rate": 0.0008117506425004234, "loss": 2.0089, "step": 1766 }, { "epoch": 0.57, "learning_rate": 0.0008107174498438217, "loss": 2.0181, "step": 1767 }, { "epoch": 0.57, "learning_rate": 0.0008096844667097244, "loss": 2.0392, "step": 1768 }, { "epoch": 0.57, "learning_rate": 0.0008086516942415716, "loss": 2.0529, "step": 1769 }, { "epoch": 0.57, "learning_rate": 0.0008076191335825702, "loss": 1.9907, "step": 1770 }, { "epoch": 0.58, "learning_rate": 0.000806586785875692, "loss": 1.9291, "step": 1771 }, { "epoch": 0.58, "learning_rate": 0.0008055546522636736, "loss": 1.9969, "step": 1772 }, { "epoch": 0.58, "learning_rate": 0.000804522733889015, "loss": 1.9671, "step": 1773 }, { "epoch": 0.58, "learning_rate": 0.0008034910318939766, "loss": 1.9188, "step": 1774 }, { "epoch": 0.58, "learning_rate": 0.0008024595474205807, "loss": 1.9888, "step": 1775 }, { "epoch": 0.58, "learning_rate": 0.000801428281610608, "loss": 1.9667, "step": 1776 }, { "epoch": 0.58, "learning_rate": 0.0008003972356055977, "loss": 1.955, "step": 1777 }, { "epoch": 0.58, "learning_rate": 0.0007993664105468452, "loss": 1.9612, "step": 1778 }, { "epoch": 0.58, "learning_rate": 0.0007983358075754015, "loss": 1.9994, "step": 1779 }, { "epoch": 0.58, "learning_rate": 0.000797305427832072, "loss": 1.995, "step": 1780 }, { "epoch": 0.58, "learning_rate": 0.0007962752724574144, "loss": 1.9729, "step": 1781 }, { "epoch": 0.58, "learning_rate": 0.0007952453425917386, "loss": 1.9578, "step": 1782 }, { "epoch": 0.58, "learning_rate": 0.0007942156393751052, "loss": 2.0136, "step": 1783 }, { "epoch": 0.58, "learning_rate": 0.0007931861639473226, "loss": 1.9178, "step": 1784 }, { "epoch": 0.58, "learning_rate": 0.0007921569174479481, "loss": 1.9162, "step": 1785 }, { "epoch": 0.58, "learning_rate": 0.000791127901016286, "loss": 2.0047, "step": 1786 }, { "epoch": 0.58, "learning_rate": 0.0007900991157913846, "loss": 1.9636, "step": 1787 }, { "epoch": 0.58, "learning_rate": 0.0007890705629120375, "loss": 2.0034, "step": 1788 }, { "epoch": 0.58, "learning_rate": 0.0007880422435167801, "loss": 1.929, "step": 1789 }, { "epoch": 0.58, "learning_rate": 0.00078701415874389, "loss": 1.8884, "step": 1790 }, { "epoch": 0.58, "learning_rate": 0.0007859863097313851, "loss": 2.0243, "step": 1791 }, { "epoch": 0.58, "learning_rate": 0.0007849586976170222, "loss": 1.9691, "step": 1792 }, { "epoch": 0.58, "learning_rate": 0.0007839313235382954, "loss": 1.9572, "step": 1793 }, { "epoch": 0.58, "learning_rate": 0.0007829041886324359, "loss": 1.9209, "step": 1794 }, { "epoch": 0.58, "learning_rate": 0.0007818772940364105, "loss": 1.9938, "step": 1795 }, { "epoch": 0.58, "learning_rate": 0.0007808506408869187, "loss": 1.9695, "step": 1796 }, { "epoch": 0.58, "learning_rate": 0.0007798242303203939, "loss": 1.9271, "step": 1797 }, { "epoch": 0.58, "learning_rate": 0.0007787980634730007, "loss": 2.0286, "step": 1798 }, { "epoch": 0.58, "learning_rate": 0.0007777721414806333, "loss": 1.9787, "step": 1799 }, { "epoch": 0.58, "learning_rate": 0.0007767464654789161, "loss": 2.0081, "step": 1800 }, { "epoch": 0.58, "learning_rate": 0.0007757210366032001, "loss": 1.9589, "step": 1801 }, { "epoch": 0.59, "learning_rate": 0.000774695855988563, "loss": 1.9344, "step": 1802 }, { "epoch": 0.59, "learning_rate": 0.0007736709247698079, "loss": 2.0335, "step": 1803 }, { "epoch": 0.59, "learning_rate": 0.000772646244081462, "loss": 1.9429, "step": 1804 }, { "epoch": 0.59, "learning_rate": 0.000771621815057775, "loss": 2.0139, "step": 1805 }, { "epoch": 0.59, "learning_rate": 0.0007705976388327173, "loss": 1.9585, "step": 1806 }, { "epoch": 0.59, "learning_rate": 0.0007695737165399807, "loss": 1.9613, "step": 1807 }, { "epoch": 0.59, "learning_rate": 0.0007685500493129755, "loss": 1.9306, "step": 1808 }, { "epoch": 0.59, "learning_rate": 0.0007675266382848289, "loss": 2.0499, "step": 1809 }, { "epoch": 0.59, "learning_rate": 0.0007665034845883856, "loss": 1.8765, "step": 1810 }, { "epoch": 0.59, "learning_rate": 0.0007654805893562049, "loss": 1.9346, "step": 1811 }, { "epoch": 0.59, "learning_rate": 0.0007644579537205598, "loss": 1.9605, "step": 1812 }, { "epoch": 0.59, "learning_rate": 0.0007634355788134364, "loss": 1.9722, "step": 1813 }, { "epoch": 0.59, "learning_rate": 0.000762413465766532, "loss": 1.9239, "step": 1814 }, { "epoch": 0.59, "learning_rate": 0.0007613916157112537, "loss": 1.9108, "step": 1815 }, { "epoch": 0.59, "learning_rate": 0.0007603700297787187, "loss": 1.9092, "step": 1816 }, { "epoch": 0.59, "learning_rate": 0.0007593487090997501, "loss": 2.0477, "step": 1817 }, { "epoch": 0.59, "learning_rate": 0.0007583276548048788, "loss": 1.9884, "step": 1818 }, { "epoch": 0.59, "learning_rate": 0.0007573068680243399, "loss": 1.9824, "step": 1819 }, { "epoch": 0.59, "learning_rate": 0.000756286349888073, "loss": 1.9528, "step": 1820 }, { "epoch": 0.59, "learning_rate": 0.00075526610152572, "loss": 1.9898, "step": 1821 }, { "epoch": 0.59, "learning_rate": 0.000754246124066624, "loss": 1.8995, "step": 1822 }, { "epoch": 0.59, "learning_rate": 0.0007532264186398293, "loss": 1.9548, "step": 1823 }, { "epoch": 0.59, "learning_rate": 0.0007522069863740771, "loss": 1.9914, "step": 1824 }, { "epoch": 0.59, "learning_rate": 0.0007511878283978082, "loss": 1.9912, "step": 1825 }, { "epoch": 0.59, "learning_rate": 0.0007501689458391591, "loss": 1.9875, "step": 1826 }, { "epoch": 0.59, "learning_rate": 0.0007491503398259607, "loss": 1.931, "step": 1827 }, { "epoch": 0.59, "learning_rate": 0.0007481320114857388, "loss": 1.9449, "step": 1828 }, { "epoch": 0.59, "learning_rate": 0.0007471139619457112, "loss": 1.979, "step": 1829 }, { "epoch": 0.59, "learning_rate": 0.0007460961923327878, "loss": 1.9493, "step": 1830 }, { "epoch": 0.59, "learning_rate": 0.0007450787037735677, "loss": 2.0071, "step": 1831 }, { "epoch": 0.59, "learning_rate": 0.0007440614973943393, "loss": 2.0383, "step": 1832 }, { "epoch": 0.6, "learning_rate": 0.0007430445743210794, "loss": 1.9463, "step": 1833 }, { "epoch": 0.6, "learning_rate": 0.0007420279356794497, "loss": 1.9887, "step": 1834 }, { "epoch": 0.6, "learning_rate": 0.0007410115825947981, "loss": 1.9817, "step": 1835 }, { "epoch": 0.6, "learning_rate": 0.0007399955161921564, "loss": 2.0475, "step": 1836 }, { "epoch": 0.6, "learning_rate": 0.0007389797375962386, "loss": 1.9644, "step": 1837 }, { "epoch": 0.6, "learning_rate": 0.0007379642479314404, "loss": 1.9929, "step": 1838 }, { "epoch": 0.6, "learning_rate": 0.0007369490483218377, "loss": 1.9816, "step": 1839 }, { "epoch": 0.6, "learning_rate": 0.000735934139891185, "loss": 1.89, "step": 1840 }, { "epoch": 0.6, "learning_rate": 0.0007349195237629151, "loss": 1.99, "step": 1841 }, { "epoch": 0.6, "learning_rate": 0.0007339052010601365, "loss": 1.8957, "step": 1842 }, { "epoch": 0.6, "learning_rate": 0.0007328911729056338, "loss": 2.0506, "step": 1843 }, { "epoch": 0.6, "learning_rate": 0.0007318774404218645, "loss": 1.954, "step": 1844 }, { "epoch": 0.6, "learning_rate": 0.0007308640047309591, "loss": 1.9688, "step": 1845 }, { "epoch": 0.6, "learning_rate": 0.0007298508669547208, "loss": 1.9913, "step": 1846 }, { "epoch": 0.6, "learning_rate": 0.0007288380282146213, "loss": 1.967, "step": 1847 }, { "epoch": 0.6, "learning_rate": 0.0007278254896318026, "loss": 1.9979, "step": 1848 }, { "epoch": 0.6, "learning_rate": 0.0007268132523270732, "loss": 1.9144, "step": 1849 }, { "epoch": 0.6, "learning_rate": 0.0007258013174209092, "loss": 2.0022, "step": 1850 }, { "epoch": 0.6, "learning_rate": 0.0007247896860334515, "loss": 1.9511, "step": 1851 }, { "epoch": 0.6, "learning_rate": 0.000723778359284505, "loss": 1.9859, "step": 1852 }, { "epoch": 0.6, "learning_rate": 0.0007227673382935376, "loss": 1.9202, "step": 1853 }, { "epoch": 0.6, "learning_rate": 0.0007217566241796785, "loss": 1.9959, "step": 1854 }, { "epoch": 0.6, "learning_rate": 0.0007207462180617172, "loss": 1.9857, "step": 1855 }, { "epoch": 0.6, "learning_rate": 0.0007197361210581026, "loss": 2.0757, "step": 1856 }, { "epoch": 0.6, "learning_rate": 0.0007187263342869412, "loss": 1.9412, "step": 1857 }, { "epoch": 0.6, "learning_rate": 0.0007177168588659961, "loss": 1.9479, "step": 1858 }, { "epoch": 0.6, "learning_rate": 0.0007167076959126857, "loss": 2.0021, "step": 1859 }, { "epoch": 0.6, "learning_rate": 0.0007156988465440825, "loss": 1.9694, "step": 1860 }, { "epoch": 0.6, "learning_rate": 0.0007146903118769126, "loss": 2.0025, "step": 1861 }, { "epoch": 0.6, "learning_rate": 0.0007136820930275522, "loss": 1.929, "step": 1862 }, { "epoch": 0.61, "learning_rate": 0.0007126741911120294, "loss": 2.0295, "step": 1863 }, { "epoch": 0.61, "learning_rate": 0.000711666607246021, "loss": 1.9505, "step": 1864 }, { "epoch": 0.61, "learning_rate": 0.0007106593425448512, "loss": 1.9346, "step": 1865 }, { "epoch": 0.61, "learning_rate": 0.0007096523981234917, "loss": 2.005, "step": 1866 }, { "epoch": 0.61, "learning_rate": 0.0007086457750965591, "loss": 2.0177, "step": 1867 }, { "epoch": 0.61, "learning_rate": 0.000707639474578315, "loss": 1.9959, "step": 1868 }, { "epoch": 0.61, "learning_rate": 0.0007066334976826629, "loss": 1.9343, "step": 1869 }, { "epoch": 0.61, "learning_rate": 0.0007056278455231488, "loss": 1.9856, "step": 1870 }, { "epoch": 0.61, "learning_rate": 0.0007046225192129595, "loss": 1.9548, "step": 1871 }, { "epoch": 0.61, "learning_rate": 0.00070361751986492, "loss": 1.9328, "step": 1872 }, { "epoch": 0.61, "learning_rate": 0.0007026128485914946, "loss": 1.8947, "step": 1873 }, { "epoch": 0.61, "learning_rate": 0.0007016085065047839, "loss": 1.9552, "step": 1874 }, { "epoch": 0.61, "learning_rate": 0.0007006044947165238, "loss": 2.0229, "step": 1875 }, { "epoch": 0.61, "learning_rate": 0.0006996008143380848, "loss": 1.9534, "step": 1876 }, { "epoch": 0.61, "learning_rate": 0.0006985974664804712, "loss": 1.9784, "step": 1877 }, { "epoch": 0.61, "learning_rate": 0.0006975944522543187, "loss": 1.9063, "step": 1878 }, { "epoch": 0.61, "learning_rate": 0.0006965917727698934, "loss": 2.0572, "step": 1879 }, { "epoch": 0.61, "learning_rate": 0.0006955894291370912, "loss": 2.0055, "step": 1880 }, { "epoch": 0.61, "learning_rate": 0.0006945874224654361, "loss": 1.9697, "step": 1881 }, { "epoch": 0.61, "learning_rate": 0.0006935857538640793, "loss": 1.9551, "step": 1882 }, { "epoch": 0.61, "learning_rate": 0.0006925844244417978, "loss": 2.0453, "step": 1883 }, { "epoch": 0.61, "learning_rate": 0.0006915834353069928, "loss": 2.0172, "step": 1884 }, { "epoch": 0.61, "learning_rate": 0.0006905827875676891, "loss": 2.0414, "step": 1885 }, { "epoch": 0.61, "learning_rate": 0.0006895824823315338, "loss": 2.0015, "step": 1886 }, { "epoch": 0.61, "learning_rate": 0.0006885825207057942, "loss": 1.9625, "step": 1887 }, { "epoch": 0.61, "learning_rate": 0.0006875829037973579, "loss": 2.0789, "step": 1888 }, { "epoch": 0.61, "learning_rate": 0.0006865836327127308, "loss": 1.9478, "step": 1889 }, { "epoch": 0.61, "learning_rate": 0.0006855847085580356, "loss": 2.0178, "step": 1890 }, { "epoch": 0.61, "learning_rate": 0.0006845861324390116, "loss": 1.9612, "step": 1891 }, { "epoch": 0.61, "learning_rate": 0.0006835879054610123, "loss": 2.0498, "step": 1892 }, { "epoch": 0.61, "learning_rate": 0.000682590028729005, "loss": 1.9245, "step": 1893 }, { "epoch": 0.62, "learning_rate": 0.0006815925033475692, "loss": 1.9177, "step": 1894 }, { "epoch": 0.62, "learning_rate": 0.0006805953304208954, "loss": 1.9323, "step": 1895 }, { "epoch": 0.62, "learning_rate": 0.0006795985110527845, "loss": 1.9406, "step": 1896 }, { "epoch": 0.62, "learning_rate": 0.0006786020463466451, "loss": 1.9764, "step": 1897 }, { "epoch": 0.62, "learning_rate": 0.000677605937405494, "loss": 1.98, "step": 1898 }, { "epoch": 0.62, "learning_rate": 0.0006766101853319542, "loss": 1.9083, "step": 1899 }, { "epoch": 0.62, "learning_rate": 0.0006756147912282527, "loss": 1.934, "step": 1900 }, { "epoch": 0.62, "learning_rate": 0.0006746197561962213, "loss": 2.0014, "step": 1901 }, { "epoch": 0.62, "learning_rate": 0.0006736250813372944, "loss": 1.9195, "step": 1902 }, { "epoch": 0.62, "learning_rate": 0.0006726307677525066, "loss": 2.0019, "step": 1903 }, { "epoch": 0.62, "learning_rate": 0.0006716368165424933, "loss": 1.9826, "step": 1904 }, { "epoch": 0.62, "learning_rate": 0.0006706432288074895, "loss": 1.9464, "step": 1905 }, { "epoch": 0.62, "learning_rate": 0.0006696500056473261, "loss": 1.9775, "step": 1906 }, { "epoch": 0.62, "learning_rate": 0.0006686571481614326, "loss": 1.9764, "step": 1907 }, { "epoch": 0.62, "learning_rate": 0.0006676646574488317, "loss": 1.9529, "step": 1908 }, { "epoch": 0.62, "learning_rate": 0.0006666725346081413, "loss": 2.0103, "step": 1909 }, { "epoch": 0.62, "learning_rate": 0.0006656807807375718, "loss": 1.9286, "step": 1910 }, { "epoch": 0.62, "learning_rate": 0.0006646893969349253, "loss": 2.0195, "step": 1911 }, { "epoch": 0.62, "learning_rate": 0.0006636983842975939, "loss": 2.0011, "step": 1912 }, { "epoch": 0.62, "learning_rate": 0.0006627077439225591, "loss": 1.9679, "step": 1913 }, { "epoch": 0.62, "learning_rate": 0.0006617174769063905, "loss": 2.0344, "step": 1914 }, { "epoch": 0.62, "learning_rate": 0.0006607275843452439, "loss": 2.0023, "step": 1915 }, { "epoch": 0.62, "learning_rate": 0.0006597380673348611, "loss": 2.06, "step": 1916 }, { "epoch": 0.62, "learning_rate": 0.0006587489269705682, "loss": 1.934, "step": 1917 }, { "epoch": 0.62, "learning_rate": 0.0006577601643472737, "loss": 1.9861, "step": 1918 }, { "epoch": 0.62, "learning_rate": 0.0006567717805594688, "loss": 1.9286, "step": 1919 }, { "epoch": 0.62, "learning_rate": 0.0006557837767012252, "loss": 1.9544, "step": 1920 }, { "epoch": 0.62, "learning_rate": 0.0006547961538661935, "loss": 1.9427, "step": 1921 }, { "epoch": 0.62, "learning_rate": 0.0006538089131476029, "loss": 2.0148, "step": 1922 }, { "epoch": 0.62, "learning_rate": 0.0006528220556382598, "loss": 2.011, "step": 1923 }, { "epoch": 0.62, "learning_rate": 0.0006518355824305463, "loss": 1.9691, "step": 1924 }, { "epoch": 0.63, "learning_rate": 0.0006508494946164188, "loss": 1.9386, "step": 1925 }, { "epoch": 0.63, "learning_rate": 0.0006498637932874074, "loss": 2.0333, "step": 1926 }, { "epoch": 0.63, "learning_rate": 0.0006488784795346147, "loss": 1.9429, "step": 1927 }, { "epoch": 0.63, "learning_rate": 0.0006478935544487133, "loss": 1.874, "step": 1928 }, { "epoch": 0.63, "learning_rate": 0.0006469090191199464, "loss": 2.0387, "step": 1929 }, { "epoch": 0.63, "learning_rate": 0.0006459248746381259, "loss": 2.0645, "step": 1930 }, { "epoch": 0.63, "learning_rate": 0.0006449411220926301, "loss": 1.9916, "step": 1931 }, { "epoch": 0.63, "learning_rate": 0.0006439577625724044, "loss": 2.0048, "step": 1932 }, { "epoch": 0.63, "learning_rate": 0.0006429747971659587, "loss": 1.9118, "step": 1933 }, { "epoch": 0.63, "learning_rate": 0.0006419922269613672, "loss": 1.9683, "step": 1934 }, { "epoch": 0.63, "learning_rate": 0.0006410100530462654, "loss": 1.9608, "step": 1935 }, { "epoch": 0.63, "learning_rate": 0.0006400282765078514, "loss": 1.9116, "step": 1936 }, { "epoch": 0.63, "learning_rate": 0.0006390468984328828, "loss": 1.9739, "step": 1937 }, { "epoch": 0.63, "learning_rate": 0.0006380659199076763, "loss": 2.0504, "step": 1938 }, { "epoch": 0.63, "learning_rate": 0.0006370853420181069, "loss": 2.0353, "step": 1939 }, { "epoch": 0.63, "learning_rate": 0.0006361051658496045, "loss": 1.9374, "step": 1940 }, { "epoch": 0.63, "learning_rate": 0.0006351253924871561, "loss": 1.9498, "step": 1941 }, { "epoch": 0.63, "learning_rate": 0.0006341460230153019, "loss": 2.0407, "step": 1942 }, { "epoch": 0.63, "learning_rate": 0.0006331670585181349, "loss": 1.9505, "step": 1943 }, { "epoch": 0.63, "learning_rate": 0.0006321885000793006, "loss": 1.909, "step": 1944 }, { "epoch": 0.63, "learning_rate": 0.0006312103487819943, "loss": 1.9952, "step": 1945 }, { "epoch": 0.63, "learning_rate": 0.0006302326057089606, "loss": 1.9768, "step": 1946 }, { "epoch": 0.63, "learning_rate": 0.000629255271942493, "loss": 1.9929, "step": 1947 }, { "epoch": 0.63, "learning_rate": 0.0006282783485644312, "loss": 2.1206, "step": 1948 }, { "epoch": 0.63, "learning_rate": 0.0006273018366561609, "loss": 1.9114, "step": 1949 }, { "epoch": 0.63, "learning_rate": 0.0006263257372986122, "loss": 1.9661, "step": 1950 }, { "epoch": 0.63, "learning_rate": 0.0006253500515722585, "loss": 1.9564, "step": 1951 }, { "epoch": 0.63, "learning_rate": 0.0006243747805571158, "loss": 1.9813, "step": 1952 }, { "epoch": 0.63, "learning_rate": 0.0006233999253327403, "loss": 2.0061, "step": 1953 }, { "epoch": 0.63, "learning_rate": 0.0006224254869782285, "loss": 2.0107, "step": 1954 }, { "epoch": 0.63, "learning_rate": 0.0006214514665722156, "loss": 1.9915, "step": 1955 }, { "epoch": 0.64, "learning_rate": 0.0006204778651928733, "loss": 1.9369, "step": 1956 }, { "epoch": 0.64, "learning_rate": 0.0006195046839179105, "loss": 1.9516, "step": 1957 }, { "epoch": 0.64, "learning_rate": 0.0006185319238245705, "loss": 2.0036, "step": 1958 }, { "epoch": 0.64, "learning_rate": 0.0006175595859896301, "loss": 1.924, "step": 1959 }, { "epoch": 0.64, "learning_rate": 0.0006165876714893995, "loss": 1.9906, "step": 1960 }, { "epoch": 0.64, "learning_rate": 0.0006156161813997197, "loss": 2.0461, "step": 1961 }, { "epoch": 0.64, "learning_rate": 0.0006146451167959625, "loss": 1.942, "step": 1962 }, { "epoch": 0.64, "learning_rate": 0.0006136744787530275, "loss": 1.9873, "step": 1963 }, { "epoch": 0.64, "learning_rate": 0.0006127042683453434, "loss": 2.0403, "step": 1964 }, { "epoch": 0.64, "learning_rate": 0.0006117344866468652, "loss": 1.9754, "step": 1965 }, { "epoch": 0.64, "learning_rate": 0.000610765134731073, "loss": 1.9962, "step": 1966 }, { "epoch": 0.64, "learning_rate": 0.0006097962136709709, "loss": 1.9959, "step": 1967 }, { "epoch": 0.64, "learning_rate": 0.0006088277245390874, "loss": 1.9715, "step": 1968 }, { "epoch": 0.64, "learning_rate": 0.0006078596684074715, "loss": 2.0092, "step": 1969 }, { "epoch": 0.64, "learning_rate": 0.000606892046347694, "loss": 1.9923, "step": 1970 }, { "epoch": 0.64, "learning_rate": 0.0006059248594308439, "loss": 2.0592, "step": 1971 }, { "epoch": 0.64, "learning_rate": 0.0006049581087275295, "loss": 1.9615, "step": 1972 }, { "epoch": 0.64, "learning_rate": 0.0006039917953078764, "loss": 2.0036, "step": 1973 }, { "epoch": 0.64, "learning_rate": 0.0006030259202415253, "loss": 1.953, "step": 1974 }, { "epoch": 0.64, "learning_rate": 0.0006020604845976323, "loss": 2.0293, "step": 1975 }, { "epoch": 0.64, "learning_rate": 0.000601095489444867, "loss": 1.9422, "step": 1976 }, { "epoch": 0.64, "learning_rate": 0.0006001309358514113, "loss": 1.9994, "step": 1977 }, { "epoch": 0.64, "learning_rate": 0.0005991668248849582, "loss": 1.9715, "step": 1978 }, { "epoch": 0.64, "learning_rate": 0.0005982031576127111, "loss": 1.9922, "step": 1979 }, { "epoch": 0.64, "learning_rate": 0.0005972399351013822, "loss": 1.9757, "step": 1980 }, { "epoch": 0.64, "learning_rate": 0.0005962771584171909, "loss": 1.997, "step": 1981 }, { "epoch": 0.64, "learning_rate": 0.0005953148286258635, "loss": 1.9769, "step": 1982 }, { "epoch": 0.64, "learning_rate": 0.000594352946792632, "loss": 1.9745, "step": 1983 }, { "epoch": 0.64, "learning_rate": 0.0005933915139822314, "loss": 1.9556, "step": 1984 }, { "epoch": 0.64, "learning_rate": 0.0005924305312589011, "loss": 2.0122, "step": 1985 }, { "epoch": 0.65, "learning_rate": 0.000591469999686381, "loss": 1.9885, "step": 1986 }, { "epoch": 0.65, "learning_rate": 0.0005905099203279127, "loss": 1.9786, "step": 1987 }, { "epoch": 0.65, "learning_rate": 0.0005895502942462361, "loss": 1.978, "step": 1988 }, { "epoch": 0.65, "learning_rate": 0.0005885911225035904, "loss": 1.9825, "step": 1989 }, { "epoch": 0.65, "learning_rate": 0.0005876324061617113, "loss": 2.0069, "step": 1990 }, { "epoch": 0.65, "learning_rate": 0.0005866741462818304, "loss": 2.028, "step": 1991 }, { "epoch": 0.65, "learning_rate": 0.0005857163439246745, "loss": 2.0266, "step": 1992 }, { "epoch": 0.65, "learning_rate": 0.0005847590001504632, "loss": 1.9267, "step": 1993 }, { "epoch": 0.65, "learning_rate": 0.0005838021160189092, "loss": 1.9746, "step": 1994 }, { "epoch": 0.65, "learning_rate": 0.0005828456925892159, "loss": 1.9193, "step": 1995 }, { "epoch": 0.65, "learning_rate": 0.0005818897309200771, "loss": 1.9905, "step": 1996 }, { "epoch": 0.65, "learning_rate": 0.0005809342320696753, "loss": 1.971, "step": 1997 }, { "epoch": 0.65, "learning_rate": 0.0005799791970956805, "loss": 2.027, "step": 1998 }, { "epoch": 0.65, "learning_rate": 0.0005790246270552498, "loss": 1.9842, "step": 1999 }, { "epoch": 0.65, "learning_rate": 0.0005780705230050248, "loss": 1.9517, "step": 2000 }, { "epoch": 0.65, "learning_rate": 0.0005771168860011322, "loss": 1.9458, "step": 2001 }, { "epoch": 0.65, "learning_rate": 0.0005761637170991809, "loss": 1.9974, "step": 2002 }, { "epoch": 0.65, "learning_rate": 0.0005752110173542626, "loss": 1.9545, "step": 2003 }, { "epoch": 0.65, "learning_rate": 0.0005742587878209485, "loss": 1.8983, "step": 2004 }, { "epoch": 0.65, "learning_rate": 0.0005733070295532898, "loss": 1.9746, "step": 2005 }, { "epoch": 0.65, "learning_rate": 0.0005723557436048164, "loss": 1.9829, "step": 2006 }, { "epoch": 0.65, "learning_rate": 0.0005714049310285352, "loss": 1.9973, "step": 2007 }, { "epoch": 0.65, "learning_rate": 0.0005704545928769288, "loss": 1.9644, "step": 2008 }, { "epoch": 0.65, "learning_rate": 0.0005695047302019551, "loss": 1.9381, "step": 2009 }, { "epoch": 0.65, "learning_rate": 0.0005685553440550456, "loss": 1.9262, "step": 2010 }, { "epoch": 0.65, "learning_rate": 0.0005676064354871035, "loss": 1.9782, "step": 2011 }, { "epoch": 0.65, "learning_rate": 0.0005666580055485046, "loss": 1.9408, "step": 2012 }, { "epoch": 0.65, "learning_rate": 0.000565710055289094, "loss": 1.9279, "step": 2013 }, { "epoch": 0.65, "learning_rate": 0.0005647625857581863, "loss": 2.0392, "step": 2014 }, { "epoch": 0.65, "learning_rate": 0.0005638155980045637, "loss": 1.979, "step": 2015 }, { "epoch": 0.65, "learning_rate": 0.0005628690930764757, "loss": 1.9294, "step": 2016 }, { "epoch": 0.66, "learning_rate": 0.000561923072021636, "loss": 2.0319, "step": 2017 }, { "epoch": 0.66, "learning_rate": 0.0005609775358872241, "loss": 2.027, "step": 2018 }, { "epoch": 0.66, "learning_rate": 0.000560032485719882, "loss": 1.9103, "step": 2019 }, { "epoch": 0.66, "learning_rate": 0.0005590879225657139, "loss": 1.9534, "step": 2020 }, { "epoch": 0.66, "learning_rate": 0.0005581438474702851, "loss": 1.9715, "step": 2021 }, { "epoch": 0.66, "learning_rate": 0.0005572002614786204, "loss": 2.0133, "step": 2022 }, { "epoch": 0.66, "learning_rate": 0.0005562571656352038, "loss": 1.9995, "step": 2023 }, { "epoch": 0.66, "learning_rate": 0.0005553145609839754, "loss": 1.9175, "step": 2024 }, { "epoch": 0.66, "learning_rate": 0.0005543724485683326, "loss": 1.9044, "step": 2025 }, { "epoch": 0.66, "learning_rate": 0.0005534308294311281, "loss": 1.9704, "step": 2026 }, { "epoch": 0.66, "learning_rate": 0.0005524897046146681, "loss": 1.9641, "step": 2027 }, { "epoch": 0.66, "learning_rate": 0.0005515490751607118, "loss": 1.9853, "step": 2028 }, { "epoch": 0.66, "learning_rate": 0.0005506089421104698, "loss": 1.9861, "step": 2029 }, { "epoch": 0.66, "learning_rate": 0.0005496693065046035, "loss": 2.0724, "step": 2030 }, { "epoch": 0.66, "learning_rate": 0.0005487301693832238, "loss": 1.9681, "step": 2031 }, { "epoch": 0.66, "learning_rate": 0.0005477915317858897, "loss": 2.0475, "step": 2032 }, { "epoch": 0.66, "learning_rate": 0.0005468533947516068, "loss": 1.9518, "step": 2033 }, { "epoch": 0.66, "learning_rate": 0.0005459157593188271, "loss": 1.9621, "step": 2034 }, { "epoch": 0.66, "learning_rate": 0.0005449786265254477, "loss": 1.988, "step": 2035 }, { "epoch": 0.66, "learning_rate": 0.0005440419974088082, "loss": 2.0184, "step": 2036 }, { "epoch": 0.66, "learning_rate": 0.0005431058730056915, "loss": 1.9712, "step": 2037 }, { "epoch": 0.66, "learning_rate": 0.0005421702543523218, "loss": 1.9376, "step": 2038 }, { "epoch": 0.66, "learning_rate": 0.0005412351424843632, "loss": 2.0893, "step": 2039 }, { "epoch": 0.66, "learning_rate": 0.000540300538436919, "loss": 1.979, "step": 2040 }, { "epoch": 0.66, "learning_rate": 0.0005393664432445308, "loss": 1.9524, "step": 2041 }, { "epoch": 0.66, "learning_rate": 0.0005384328579411755, "loss": 2.0182, "step": 2042 }, { "epoch": 0.66, "learning_rate": 0.0005374997835602669, "loss": 1.9625, "step": 2043 }, { "epoch": 0.66, "learning_rate": 0.0005365672211346531, "loss": 2.0133, "step": 2044 }, { "epoch": 0.66, "learning_rate": 0.0005356351716966151, "loss": 1.9215, "step": 2045 }, { "epoch": 0.66, "learning_rate": 0.0005347036362778663, "loss": 2.0577, "step": 2046 }, { "epoch": 0.66, "learning_rate": 0.0005337726159095511, "loss": 1.9706, "step": 2047 }, { "epoch": 0.67, "learning_rate": 0.0005328421116222442, "loss": 2.0002, "step": 2048 }, { "epoch": 0.67, "learning_rate": 0.0005319121244459475, "loss": 2.004, "step": 2049 }, { "epoch": 0.67, "learning_rate": 0.0005309826554100922, "loss": 1.9836, "step": 2050 }, { "epoch": 0.67, "learning_rate": 0.0005300537055435352, "loss": 1.9888, "step": 2051 }, { "epoch": 0.67, "learning_rate": 0.0005291252758745591, "loss": 1.9487, "step": 2052 }, { "epoch": 0.67, "learning_rate": 0.0005281973674308701, "loss": 1.8988, "step": 2053 }, { "epoch": 0.67, "learning_rate": 0.0005272699812395983, "loss": 2.0093, "step": 2054 }, { "epoch": 0.67, "learning_rate": 0.0005263431183272942, "loss": 1.9528, "step": 2055 }, { "epoch": 0.67, "learning_rate": 0.0005254167797199306, "loss": 1.9388, "step": 2056 }, { "epoch": 0.67, "learning_rate": 0.0005244909664428993, "loss": 1.9626, "step": 2057 }, { "epoch": 0.67, "learning_rate": 0.0005235656795210107, "loss": 1.8999, "step": 2058 }, { "epoch": 0.67, "learning_rate": 0.0005226409199784923, "loss": 1.9746, "step": 2059 }, { "epoch": 0.67, "learning_rate": 0.0005217166888389883, "loss": 2.0194, "step": 2060 }, { "epoch": 0.67, "learning_rate": 0.0005207929871255577, "loss": 1.9622, "step": 2061 }, { "epoch": 0.67, "learning_rate": 0.0005198698158606734, "loss": 1.9599, "step": 2062 }, { "epoch": 0.67, "learning_rate": 0.0005189471760662212, "loss": 1.9329, "step": 2063 }, { "epoch": 0.67, "learning_rate": 0.0005180250687634988, "loss": 1.977, "step": 2064 }, { "epoch": 0.67, "learning_rate": 0.0005171034949732141, "loss": 1.9651, "step": 2065 }, { "epoch": 0.67, "learning_rate": 0.0005161824557154851, "loss": 1.9183, "step": 2066 }, { "epoch": 0.67, "learning_rate": 0.000515261952009837, "loss": 1.9841, "step": 2067 }, { "epoch": 0.67, "learning_rate": 0.0005143419848752028, "loss": 2.0135, "step": 2068 }, { "epoch": 0.67, "learning_rate": 0.000513422555329922, "loss": 1.9802, "step": 2069 }, { "epoch": 0.67, "learning_rate": 0.0005125036643917387, "loss": 1.9752, "step": 2070 }, { "epoch": 0.67, "learning_rate": 0.0005115853130778003, "loss": 1.8947, "step": 2071 }, { "epoch": 0.67, "learning_rate": 0.0005106675024046576, "loss": 1.8249, "step": 2072 }, { "epoch": 0.67, "learning_rate": 0.0005097502333882628, "loss": 1.8596, "step": 2073 }, { "epoch": 0.67, "learning_rate": 0.0005088335070439678, "loss": 1.9211, "step": 2074 }, { "epoch": 0.67, "learning_rate": 0.0005079173243865245, "loss": 1.9401, "step": 2075 }, { "epoch": 0.67, "learning_rate": 0.0005070016864300829, "loss": 2.0237, "step": 2076 }, { "epoch": 0.67, "learning_rate": 0.0005060865941881902, "loss": 2.0019, "step": 2077 }, { "epoch": 0.67, "learning_rate": 0.000505172048673789, "loss": 1.9252, "step": 2078 }, { "epoch": 0.68, "learning_rate": 0.0005042580508992176, "loss": 1.9137, "step": 2079 }, { "epoch": 0.68, "learning_rate": 0.0005033446018762066, "loss": 1.9629, "step": 2080 }, { "epoch": 0.68, "learning_rate": 0.0005024317026158803, "loss": 1.9597, "step": 2081 }, { "epoch": 0.68, "learning_rate": 0.0005015193541287544, "loss": 1.9732, "step": 2082 }, { "epoch": 0.68, "learning_rate": 0.0005006075574247343, "loss": 1.9463, "step": 2083 }, { "epoch": 0.68, "learning_rate": 0.0004996963135131153, "loss": 1.9986, "step": 2084 }, { "epoch": 0.68, "learning_rate": 0.0004987856234025803, "loss": 2.0478, "step": 2085 }, { "epoch": 0.68, "learning_rate": 0.0004978754881011999, "loss": 2.0484, "step": 2086 }, { "epoch": 0.68, "learning_rate": 0.0004969659086164291, "loss": 2.03, "step": 2087 }, { "epoch": 0.68, "learning_rate": 0.0004960568859551091, "loss": 1.979, "step": 2088 }, { "epoch": 0.68, "learning_rate": 0.0004951484211234637, "loss": 1.9906, "step": 2089 }, { "epoch": 0.68, "learning_rate": 0.0004942405151271007, "loss": 1.9414, "step": 2090 }, { "epoch": 0.68, "learning_rate": 0.0004933331689710082, "loss": 1.8849, "step": 2091 }, { "epoch": 0.68, "learning_rate": 0.0004924263836595539, "loss": 1.9667, "step": 2092 }, { "epoch": 0.68, "learning_rate": 0.0004915201601964861, "loss": 1.9723, "step": 2093 }, { "epoch": 0.68, "learning_rate": 0.0004906144995849306, "loss": 1.9783, "step": 2094 }, { "epoch": 0.68, "learning_rate": 0.00048970940282739, "loss": 2.0199, "step": 2095 }, { "epoch": 0.68, "learning_rate": 0.000488804870925743, "loss": 2.0032, "step": 2096 }, { "epoch": 0.68, "learning_rate": 0.0004879009048812434, "loss": 1.9077, "step": 2097 }, { "epoch": 0.68, "learning_rate": 0.00048699750569451727, "loss": 1.962, "step": 2098 }, { "epoch": 0.68, "learning_rate": 0.00048609467436556476, "loss": 1.9662, "step": 2099 }, { "epoch": 0.68, "learning_rate": 0.00048519241189375664, "loss": 1.9683, "step": 2100 }, { "epoch": 0.68, "learning_rate": 0.0004842907192778342, "loss": 2.0214, "step": 2101 }, { "epoch": 0.68, "learning_rate": 0.00048338959751590793, "loss": 1.9241, "step": 2102 }, { "epoch": 0.68, "learning_rate": 0.0004824890476054563, "loss": 1.9346, "step": 2103 }, { "epoch": 0.68, "learning_rate": 0.00048158907054332545, "loss": 2.0518, "step": 2104 }, { "epoch": 0.68, "learning_rate": 0.00048068966732572606, "loss": 1.9824, "step": 2105 }, { "epoch": 0.68, "learning_rate": 0.00047979083894823473, "loss": 1.9504, "step": 2106 }, { "epoch": 0.68, "learning_rate": 0.00047889258640579135, "loss": 1.9931, "step": 2107 }, { "epoch": 0.68, "learning_rate": 0.00047799491069269853, "loss": 1.9542, "step": 2108 }, { "epoch": 0.68, "learning_rate": 0.0004770978128026203, "loss": 1.9004, "step": 2109 }, { "epoch": 0.69, "learning_rate": 0.0004762012937285812, "loss": 1.9214, "step": 2110 }, { "epoch": 0.69, "learning_rate": 0.0004753053544629653, "loss": 1.9789, "step": 2111 }, { "epoch": 0.69, "learning_rate": 0.00047440999599751354, "loss": 1.9777, "step": 2112 }, { "epoch": 0.69, "learning_rate": 0.00047351521932332554, "loss": 2.0267, "step": 2113 }, { "epoch": 0.69, "learning_rate": 0.00047262102543085626, "loss": 1.9341, "step": 2114 }, { "epoch": 0.69, "learning_rate": 0.0004717274153099156, "loss": 2.0125, "step": 2115 }, { "epoch": 0.69, "learning_rate": 0.00047083438994966723, "loss": 1.9983, "step": 2116 }, { "epoch": 0.69, "learning_rate": 0.000469941950338628, "loss": 2.0019, "step": 2117 }, { "epoch": 0.69, "learning_rate": 0.0004690500974646652, "loss": 2.0896, "step": 2118 }, { "epoch": 0.69, "learning_rate": 0.0004681588323149978, "loss": 1.9337, "step": 2119 }, { "epoch": 0.69, "learning_rate": 0.0004672681558761933, "loss": 1.9514, "step": 2120 }, { "epoch": 0.69, "learning_rate": 0.0004663780691341689, "loss": 1.9796, "step": 2121 }, { "epoch": 0.69, "learning_rate": 0.0004654885730741882, "loss": 2.0195, "step": 2122 }, { "epoch": 0.69, "learning_rate": 0.00046459966868086014, "loss": 1.9899, "step": 2123 }, { "epoch": 0.69, "learning_rate": 0.0004637113569381399, "loss": 2.0053, "step": 2124 }, { "epoch": 0.69, "learning_rate": 0.0004628236388293262, "loss": 2.0316, "step": 2125 }, { "epoch": 0.69, "learning_rate": 0.0004619365153370608, "loss": 1.9939, "step": 2126 }, { "epoch": 0.69, "learning_rate": 0.00046104998744332724, "loss": 2.0152, "step": 2127 }, { "epoch": 0.69, "learning_rate": 0.00046016405612944955, "loss": 2.0263, "step": 2128 }, { "epoch": 0.69, "learning_rate": 0.000459278722376092, "loss": 1.9936, "step": 2129 }, { "epoch": 0.69, "learning_rate": 0.00045839398716325633, "loss": 2.015, "step": 2130 }, { "epoch": 0.69, "learning_rate": 0.00045750985147028245, "loss": 1.9798, "step": 2131 }, { "epoch": 0.69, "learning_rate": 0.00045662631627584673, "loss": 1.9956, "step": 2132 }, { "epoch": 0.69, "learning_rate": 0.00045574338255796056, "loss": 1.9987, "step": 2133 }, { "epoch": 0.69, "learning_rate": 0.0004548610512939697, "loss": 1.9597, "step": 2134 }, { "epoch": 0.69, "learning_rate": 0.00045397932346055324, "loss": 1.9754, "step": 2135 }, { "epoch": 0.69, "learning_rate": 0.0004530982000337215, "loss": 1.9557, "step": 2136 }, { "epoch": 0.69, "learning_rate": 0.0004522176819888164, "loss": 1.9541, "step": 2137 }, { "epoch": 0.69, "learning_rate": 0.0004513377703005099, "loss": 1.9493, "step": 2138 }, { "epoch": 0.69, "learning_rate": 0.0004504584659428024, "loss": 1.9544, "step": 2139 }, { "epoch": 0.7, "learning_rate": 0.00044957976988902203, "loss": 1.9649, "step": 2140 }, { "epoch": 0.7, "learning_rate": 0.0004487016831118239, "loss": 2.0201, "step": 2141 }, { "epoch": 0.7, "learning_rate": 0.0004478242065831887, "loss": 1.9699, "step": 2142 }, { "epoch": 0.7, "learning_rate": 0.0004469473412744206, "loss": 1.9589, "step": 2143 }, { "epoch": 0.7, "learning_rate": 0.0004460710881561485, "loss": 1.9648, "step": 2144 }, { "epoch": 0.7, "learning_rate": 0.00044519544819832303, "loss": 1.9418, "step": 2145 }, { "epoch": 0.7, "learning_rate": 0.0004443204223702163, "loss": 1.9355, "step": 2146 }, { "epoch": 0.7, "learning_rate": 0.0004434460116404204, "loss": 2.019, "step": 2147 }, { "epoch": 0.7, "learning_rate": 0.00044257221697684666, "loss": 1.9866, "step": 2148 }, { "epoch": 0.7, "learning_rate": 0.000441699039346725, "loss": 1.8919, "step": 2149 }, { "epoch": 0.7, "learning_rate": 0.0004408264797166005, "loss": 1.9654, "step": 2150 }, { "epoch": 0.7, "learning_rate": 0.0004399545390523367, "loss": 1.9453, "step": 2151 }, { "epoch": 0.7, "learning_rate": 0.00043908321831911045, "loss": 1.9685, "step": 2152 }, { "epoch": 0.7, "learning_rate": 0.0004382125184814124, "loss": 2.0329, "step": 2153 }, { "epoch": 0.7, "learning_rate": 0.0004373424405030468, "loss": 2.0196, "step": 2154 }, { "epoch": 0.7, "learning_rate": 0.0004364729853471282, "loss": 1.9575, "step": 2155 }, { "epoch": 0.7, "learning_rate": 0.00043560415397608267, "loss": 1.9448, "step": 2156 }, { "epoch": 0.7, "learning_rate": 0.0004347359473516457, "loss": 1.9914, "step": 2157 }, { "epoch": 0.7, "learning_rate": 0.00043386836643486115, "loss": 1.979, "step": 2158 }, { "epoch": 0.7, "learning_rate": 0.00043300141218608005, "loss": 1.9443, "step": 2159 }, { "epoch": 0.7, "learning_rate": 0.0004321350855649605, "loss": 1.9945, "step": 2160 }, { "epoch": 0.7, "learning_rate": 0.0004312693875304645, "loss": 1.9386, "step": 2161 }, { "epoch": 0.7, "learning_rate": 0.00043040431904085944, "loss": 2.0018, "step": 2162 }, { "epoch": 0.7, "learning_rate": 0.00042953988105371533, "loss": 1.9771, "step": 2163 }, { "epoch": 0.7, "learning_rate": 0.0004286760745259045, "loss": 2.0156, "step": 2164 }, { "epoch": 0.7, "learning_rate": 0.0004278129004136001, "loss": 2.0211, "step": 2165 }, { "epoch": 0.7, "learning_rate": 0.00042695035967227547, "loss": 1.9831, "step": 2166 }, { "epoch": 0.7, "learning_rate": 0.00042608845325670287, "loss": 2.0233, "step": 2167 }, { "epoch": 0.7, "learning_rate": 0.0004252271821209516, "loss": 1.9886, "step": 2168 }, { "epoch": 0.7, "learning_rate": 0.00042436654721838873, "loss": 1.9618, "step": 2169 }, { "epoch": 0.7, "learning_rate": 0.0004235065495016769, "loss": 2.0095, "step": 2170 }, { "epoch": 0.71, "learning_rate": 0.00042264718992277294, "loss": 1.9501, "step": 2171 }, { "epoch": 0.71, "learning_rate": 0.0004217884694329278, "loss": 1.967, "step": 2172 }, { "epoch": 0.71, "learning_rate": 0.0004209303889826852, "loss": 1.9366, "step": 2173 }, { "epoch": 0.71, "learning_rate": 0.00042007294952187927, "loss": 1.9847, "step": 2174 }, { "epoch": 0.71, "learning_rate": 0.0004192161519996357, "loss": 1.9376, "step": 2175 }, { "epoch": 0.71, "learning_rate": 0.00041835999736436914, "loss": 1.9567, "step": 2176 }, { "epoch": 0.71, "learning_rate": 0.00041750448656378283, "loss": 1.9888, "step": 2177 }, { "epoch": 0.71, "learning_rate": 0.0004166496205448671, "loss": 1.9446, "step": 2178 }, { "epoch": 0.71, "learning_rate": 0.0004157954002538987, "loss": 1.9706, "step": 2179 }, { "epoch": 0.71, "learning_rate": 0.00041494182663644007, "loss": 1.9826, "step": 2180 }, { "epoch": 0.71, "learning_rate": 0.00041408890063733604, "loss": 1.9696, "step": 2181 }, { "epoch": 0.71, "learning_rate": 0.0004132366232007172, "loss": 2.0033, "step": 2182 }, { "epoch": 0.71, "learning_rate": 0.0004123849952699942, "loss": 2.05, "step": 2183 }, { "epoch": 0.71, "learning_rate": 0.0004115340177878596, "loss": 1.9722, "step": 2184 }, { "epoch": 0.71, "learning_rate": 0.00041068369169628614, "loss": 1.9586, "step": 2185 }, { "epoch": 0.71, "learning_rate": 0.0004098340179365244, "loss": 1.9695, "step": 2186 }, { "epoch": 0.71, "learning_rate": 0.000408984997449104, "loss": 1.9793, "step": 2187 }, { "epoch": 0.71, "learning_rate": 0.00040813663117383105, "loss": 1.9709, "step": 2188 }, { "epoch": 0.71, "learning_rate": 0.00040728892004978745, "loss": 1.9271, "step": 2189 }, { "epoch": 0.71, "learning_rate": 0.00040644186501533, "loss": 1.9936, "step": 2190 }, { "epoch": 0.71, "learning_rate": 0.00040559546700808923, "loss": 1.9505, "step": 2191 }, { "epoch": 0.71, "learning_rate": 0.0004047497269649687, "loss": 1.9424, "step": 2192 }, { "epoch": 0.71, "learning_rate": 0.00040390464582214247, "loss": 1.9882, "step": 2193 }, { "epoch": 0.71, "learning_rate": 0.0004030602245150566, "loss": 2.0265, "step": 2194 }, { "epoch": 0.71, "learning_rate": 0.00040221646397842626, "loss": 1.9459, "step": 2195 }, { "epoch": 0.71, "learning_rate": 0.00040137336514623503, "loss": 1.9267, "step": 2196 }, { "epoch": 0.71, "learning_rate": 0.00040053092895173424, "loss": 1.9712, "step": 2197 }, { "epoch": 0.71, "learning_rate": 0.0003996891563274421, "loss": 1.8862, "step": 2198 }, { "epoch": 0.71, "learning_rate": 0.000398848048205141, "loss": 1.9396, "step": 2199 }, { "epoch": 0.71, "learning_rate": 0.000398007605515879, "loss": 1.9479, "step": 2200 }, { "epoch": 0.71, "learning_rate": 0.0003971678291899673, "loss": 1.9833, "step": 2201 }, { "epoch": 0.72, "learning_rate": 0.0003963287201569793, "loss": 1.9612, "step": 2202 }, { "epoch": 0.72, "learning_rate": 0.0003954902793457499, "loss": 1.9223, "step": 2203 }, { "epoch": 0.72, "learning_rate": 0.0003946525076843741, "loss": 1.9657, "step": 2204 }, { "epoch": 0.72, "learning_rate": 0.00039381540610020695, "loss": 1.9456, "step": 2205 }, { "epoch": 0.72, "learning_rate": 0.0003929789755198603, "loss": 1.9493, "step": 2206 }, { "epoch": 0.72, "learning_rate": 0.0003921432168692045, "loss": 1.9387, "step": 2207 }, { "epoch": 0.72, "learning_rate": 0.0003913081310733658, "loss": 1.9728, "step": 2208 }, { "epoch": 0.72, "learning_rate": 0.0003904737190567256, "loss": 2.0324, "step": 2209 }, { "epoch": 0.72, "learning_rate": 0.0003896399817429195, "loss": 1.9883, "step": 2210 }, { "epoch": 0.72, "learning_rate": 0.000388806920054836, "loss": 1.955, "step": 2211 }, { "epoch": 0.72, "learning_rate": 0.0003879745349146162, "loss": 2.0185, "step": 2212 }, { "epoch": 0.72, "learning_rate": 0.00038714282724365204, "loss": 1.9183, "step": 2213 }, { "epoch": 0.72, "learning_rate": 0.0003863117979625853, "loss": 1.9189, "step": 2214 }, { "epoch": 0.72, "learning_rate": 0.0003854814479913075, "loss": 1.9507, "step": 2215 }, { "epoch": 0.72, "learning_rate": 0.0003846517782489575, "loss": 2.0088, "step": 2216 }, { "epoch": 0.72, "learning_rate": 0.0003838227896539219, "loss": 2.0226, "step": 2217 }, { "epoch": 0.72, "learning_rate": 0.00038299448312383233, "loss": 1.9815, "step": 2218 }, { "epoch": 0.72, "learning_rate": 0.0003821668595755663, "loss": 1.9582, "step": 2219 }, { "epoch": 0.72, "learning_rate": 0.000381339919925245, "loss": 1.9636, "step": 2220 }, { "epoch": 0.72, "learning_rate": 0.0003805136650882326, "loss": 1.9876, "step": 2221 }, { "epoch": 0.72, "learning_rate": 0.0003796880959791353, "loss": 2.0374, "step": 2222 }, { "epoch": 0.72, "learning_rate": 0.00037886321351180053, "loss": 1.9742, "step": 2223 }, { "epoch": 0.72, "learning_rate": 0.00037803901859931457, "loss": 1.9888, "step": 2224 }, { "epoch": 0.72, "learning_rate": 0.00037721551215400375, "loss": 1.9456, "step": 2225 }, { "epoch": 0.72, "learning_rate": 0.000376392695087432, "loss": 1.9349, "step": 2226 }, { "epoch": 0.72, "learning_rate": 0.0003755705683103999, "loss": 1.9491, "step": 2227 }, { "epoch": 0.72, "learning_rate": 0.00037474913273294444, "loss": 1.9306, "step": 2228 }, { "epoch": 0.72, "learning_rate": 0.0003739283892643368, "loss": 1.9115, "step": 2229 }, { "epoch": 0.72, "learning_rate": 0.00037310833881308304, "loss": 1.9544, "step": 2230 }, { "epoch": 0.72, "learning_rate": 0.00037228898228692066, "loss": 1.9816, "step": 2231 }, { "epoch": 0.72, "learning_rate": 0.00037147032059282024, "loss": 1.9918, "step": 2232 }, { "epoch": 0.73, "learning_rate": 0.0003706523546369829, "loss": 1.9651, "step": 2233 }, { "epoch": 0.73, "learning_rate": 0.0003698350853248394, "loss": 1.9651, "step": 2234 }, { "epoch": 0.73, "learning_rate": 0.0003690185135610499, "loss": 2.0082, "step": 2235 }, { "epoch": 0.73, "learning_rate": 0.0003682026402495019, "loss": 1.9773, "step": 2236 }, { "epoch": 0.73, "learning_rate": 0.0003673874662933097, "loss": 1.874, "step": 2237 }, { "epoch": 0.73, "learning_rate": 0.0003665729925948137, "loss": 2.0721, "step": 2238 }, { "epoch": 0.73, "learning_rate": 0.0003657592200555793, "loss": 1.8913, "step": 2239 }, { "epoch": 0.73, "learning_rate": 0.0003649461495763956, "loss": 2.0257, "step": 2240 }, { "epoch": 0.73, "learning_rate": 0.00036413378205727456, "loss": 2.022, "step": 2241 }, { "epoch": 0.73, "learning_rate": 0.00036332211839745, "loss": 1.9702, "step": 2242 }, { "epoch": 0.73, "learning_rate": 0.00036251115949537674, "loss": 1.9877, "step": 2243 }, { "epoch": 0.73, "learning_rate": 0.00036170090624872934, "loss": 2.0161, "step": 2244 }, { "epoch": 0.73, "learning_rate": 0.00036089135955440124, "loss": 1.9354, "step": 2245 }, { "epoch": 0.73, "learning_rate": 0.000360082520308504, "loss": 1.8824, "step": 2246 }, { "epoch": 0.73, "learning_rate": 0.0003592743894063658, "loss": 1.9692, "step": 2247 }, { "epoch": 0.73, "learning_rate": 0.0003584669677425313, "loss": 1.9413, "step": 2248 }, { "epoch": 0.73, "learning_rate": 0.0003576602562107588, "loss": 1.9325, "step": 2249 }, { "epoch": 0.73, "learning_rate": 0.00035685425570402185, "loss": 2.0259, "step": 2250 }, { "epoch": 0.73, "learning_rate": 0.0003560489671145064, "loss": 1.904, "step": 2251 }, { "epoch": 0.73, "learning_rate": 0.00035524439133361054, "loss": 1.9496, "step": 2252 }, { "epoch": 0.73, "learning_rate": 0.000354440529251943, "loss": 1.9668, "step": 2253 }, { "epoch": 0.73, "learning_rate": 0.0003536373817593234, "loss": 1.9451, "step": 2254 }, { "epoch": 0.73, "learning_rate": 0.0003528349497447786, "loss": 1.9436, "step": 2255 }, { "epoch": 0.73, "learning_rate": 0.00035203323409654507, "loss": 1.9109, "step": 2256 }, { "epoch": 0.73, "learning_rate": 0.00035123223570206587, "loss": 1.9922, "step": 2257 }, { "epoch": 0.73, "learning_rate": 0.0003504319554479899, "loss": 1.9541, "step": 2258 }, { "epoch": 0.73, "learning_rate": 0.0003496323942201712, "loss": 2.0256, "step": 2259 }, { "epoch": 0.73, "learning_rate": 0.00034883355290366827, "loss": 1.9635, "step": 2260 }, { "epoch": 0.73, "learning_rate": 0.00034803543238274236, "loss": 1.9784, "step": 2261 }, { "epoch": 0.73, "learning_rate": 0.0003472380335408566, "loss": 1.9938, "step": 2262 }, { "epoch": 0.73, "learning_rate": 0.0003464413572606756, "loss": 1.9577, "step": 2263 }, { "epoch": 0.74, "learning_rate": 0.0003456454044240642, "loss": 1.9367, "step": 2264 }, { "epoch": 0.74, "learning_rate": 0.00034485017591208645, "loss": 1.9984, "step": 2265 }, { "epoch": 0.74, "learning_rate": 0.00034405567260500437, "loss": 1.9536, "step": 2266 }, { "epoch": 0.74, "learning_rate": 0.0003432618953822776, "loss": 1.926, "step": 2267 }, { "epoch": 0.74, "learning_rate": 0.00034246884512256195, "loss": 2.014, "step": 2268 }, { "epoch": 0.74, "learning_rate": 0.00034167652270370786, "loss": 1.9619, "step": 2269 }, { "epoch": 0.74, "learning_rate": 0.000340884929002761, "loss": 1.971, "step": 2270 }, { "epoch": 0.74, "learning_rate": 0.0003400940648959602, "loss": 1.9645, "step": 2271 }, { "epoch": 0.74, "learning_rate": 0.0003393039312587359, "loss": 1.9634, "step": 2272 }, { "epoch": 0.74, "learning_rate": 0.0003385145289657118, "loss": 1.983, "step": 2273 }, { "epoch": 0.74, "learning_rate": 0.00033772585889069986, "loss": 2.0076, "step": 2274 }, { "epoch": 0.74, "learning_rate": 0.00033693792190670293, "loss": 1.9193, "step": 2275 }, { "epoch": 0.74, "learning_rate": 0.00033615071888591175, "loss": 1.9535, "step": 2276 }, { "epoch": 0.74, "learning_rate": 0.00033536425069970513, "loss": 1.9234, "step": 2277 }, { "epoch": 0.74, "learning_rate": 0.0003345785182186478, "loss": 1.9578, "step": 2278 }, { "epoch": 0.74, "learning_rate": 0.00033379352231249125, "loss": 1.9482, "step": 2279 }, { "epoch": 0.74, "learning_rate": 0.00033300926385016995, "loss": 1.9942, "step": 2280 }, { "epoch": 0.74, "learning_rate": 0.0003322257436998034, "loss": 1.9828, "step": 2281 }, { "epoch": 0.74, "learning_rate": 0.0003314429627286936, "loss": 1.8799, "step": 2282 }, { "epoch": 0.74, "learning_rate": 0.00033066092180332387, "loss": 1.989, "step": 2283 }, { "epoch": 0.74, "learning_rate": 0.0003298796217893588, "loss": 1.9485, "step": 2284 }, { "epoch": 0.74, "learning_rate": 0.00032909906355164275, "loss": 1.9741, "step": 2285 }, { "epoch": 0.74, "learning_rate": 0.0003283192479541992, "loss": 1.9581, "step": 2286 }, { "epoch": 0.74, "learning_rate": 0.00032754017586022864, "loss": 1.9329, "step": 2287 }, { "epoch": 0.74, "learning_rate": 0.00032676184813210964, "loss": 1.9574, "step": 2288 }, { "epoch": 0.74, "learning_rate": 0.00032598426563139625, "loss": 1.9696, "step": 2289 }, { "epoch": 0.74, "learning_rate": 0.00032520742921881787, "loss": 1.88, "step": 2290 }, { "epoch": 0.74, "learning_rate": 0.00032443133975427785, "loss": 1.9967, "step": 2291 }, { "epoch": 0.74, "learning_rate": 0.0003236559980968532, "loss": 1.9805, "step": 2292 }, { "epoch": 0.74, "learning_rate": 0.0003228814051047919, "loss": 1.9903, "step": 2293 }, { "epoch": 0.75, "learning_rate": 0.0003221075616355147, "loss": 1.9859, "step": 2294 }, { "epoch": 0.75, "learning_rate": 0.00032133446854561165, "loss": 1.9283, "step": 2295 }, { "epoch": 0.75, "learning_rate": 0.00032056212669084286, "loss": 2.0057, "step": 2296 }, { "epoch": 0.75, "learning_rate": 0.00031979053692613645, "loss": 1.897, "step": 2297 }, { "epoch": 0.75, "learning_rate": 0.0003190197001055882, "loss": 1.9636, "step": 2298 }, { "epoch": 0.75, "learning_rate": 0.0003182496170824608, "loss": 1.9436, "step": 2299 }, { "epoch": 0.75, "learning_rate": 0.0003174802887091813, "loss": 2.0081, "step": 2300 }, { "epoch": 0.75, "learning_rate": 0.0003167117158373427, "loss": 2.0254, "step": 2301 }, { "epoch": 0.75, "learning_rate": 0.00031594389931770115, "loss": 1.9506, "step": 2302 }, { "epoch": 0.75, "learning_rate": 0.00031517684000017545, "loss": 1.9189, "step": 2303 }, { "epoch": 0.75, "learning_rate": 0.0003144105387338474, "loss": 1.9885, "step": 2304 }, { "epoch": 0.75, "learning_rate": 0.00031364499636695777, "loss": 1.9837, "step": 2305 }, { "epoch": 0.75, "learning_rate": 0.00031288021374690865, "loss": 2.0128, "step": 2306 }, { "epoch": 0.75, "learning_rate": 0.00031211619172026063, "loss": 1.9322, "step": 2307 }, { "epoch": 0.75, "learning_rate": 0.0003113529311327324, "loss": 2.0118, "step": 2308 }, { "epoch": 0.75, "learning_rate": 0.0003105904328292002, "loss": 1.9332, "step": 2309 }, { "epoch": 0.75, "learning_rate": 0.00030982869765369594, "loss": 1.9741, "step": 2310 }, { "epoch": 0.75, "learning_rate": 0.0003090677264494074, "loss": 1.9624, "step": 2311 }, { "epoch": 0.75, "learning_rate": 0.00030830752005867555, "loss": 1.9682, "step": 2312 }, { "epoch": 0.75, "learning_rate": 0.00030754807932299613, "loss": 2.0543, "step": 2313 }, { "epoch": 0.75, "learning_rate": 0.0003067894050830166, "loss": 1.9313, "step": 2314 }, { "epoch": 0.75, "learning_rate": 0.00030603149817853603, "loss": 1.9484, "step": 2315 }, { "epoch": 0.75, "learning_rate": 0.00030527435944850445, "loss": 1.9766, "step": 2316 }, { "epoch": 0.75, "learning_rate": 0.0003045179897310213, "loss": 2.0646, "step": 2317 }, { "epoch": 0.75, "learning_rate": 0.0003037623898633345, "loss": 1.9794, "step": 2318 }, { "epoch": 0.75, "learning_rate": 0.0003030075606818402, "loss": 1.9555, "step": 2319 }, { "epoch": 0.75, "learning_rate": 0.00030225350302208134, "loss": 2.0476, "step": 2320 }, { "epoch": 0.75, "learning_rate": 0.000301500217718747, "loss": 2.0197, "step": 2321 }, { "epoch": 0.75, "learning_rate": 0.0003007477056056711, "loss": 1.9362, "step": 2322 }, { "epoch": 0.75, "learning_rate": 0.00029999596751583167, "loss": 1.9652, "step": 2323 }, { "epoch": 0.75, "learning_rate": 0.00029924500428135046, "loss": 2.022, "step": 2324 }, { "epoch": 0.76, "learning_rate": 0.0002984948167334904, "loss": 1.9751, "step": 2325 }, { "epoch": 0.76, "learning_rate": 0.00029774540570265694, "loss": 2.0562, "step": 2326 }, { "epoch": 0.76, "learning_rate": 0.0002969967720183954, "loss": 1.9915, "step": 2327 }, { "epoch": 0.76, "learning_rate": 0.00029624891650939067, "loss": 1.9913, "step": 2328 }, { "epoch": 0.76, "learning_rate": 0.00029550184000346646, "loss": 1.9735, "step": 2329 }, { "epoch": 0.76, "learning_rate": 0.00029475554332758437, "loss": 1.9508, "step": 2330 }, { "epoch": 0.76, "learning_rate": 0.0002940100273078418, "loss": 1.9459, "step": 2331 }, { "epoch": 0.76, "learning_rate": 0.0002932652927694729, "loss": 1.9865, "step": 2332 }, { "epoch": 0.76, "learning_rate": 0.0002925213405368463, "loss": 1.95, "step": 2333 }, { "epoch": 0.76, "learning_rate": 0.00029177817143346575, "loss": 1.9544, "step": 2334 }, { "epoch": 0.76, "learning_rate": 0.0002910357862819668, "loss": 1.9385, "step": 2335 }, { "epoch": 0.76, "learning_rate": 0.00029029418590411813, "loss": 2.0197, "step": 2336 }, { "epoch": 0.76, "learning_rate": 0.0002895533711208187, "loss": 2.0131, "step": 2337 }, { "epoch": 0.76, "learning_rate": 0.0002888133427520987, "loss": 1.9476, "step": 2338 }, { "epoch": 0.76, "learning_rate": 0.0002880741016171178, "loss": 1.933, "step": 2339 }, { "epoch": 0.76, "learning_rate": 0.0002873356485341639, "loss": 1.8911, "step": 2340 }, { "epoch": 0.76, "learning_rate": 0.00028659798432065296, "loss": 1.935, "step": 2341 }, { "epoch": 0.76, "learning_rate": 0.00028586110979312776, "loss": 1.8985, "step": 2342 }, { "epoch": 0.76, "learning_rate": 0.00028512502576725585, "loss": 1.9375, "step": 2343 }, { "epoch": 0.76, "learning_rate": 0.00028438973305783136, "loss": 1.9293, "step": 2344 }, { "epoch": 0.76, "learning_rate": 0.0002836552324787713, "loss": 1.9747, "step": 2345 }, { "epoch": 0.76, "learning_rate": 0.0002829215248431166, "loss": 2.0264, "step": 2346 }, { "epoch": 0.76, "learning_rate": 0.00028218861096302995, "loss": 1.9441, "step": 2347 }, { "epoch": 0.76, "learning_rate": 0.0002814564916497957, "loss": 1.9944, "step": 2348 }, { "epoch": 0.76, "learning_rate": 0.00028072516771381897, "loss": 2.0109, "step": 2349 }, { "epoch": 0.76, "learning_rate": 0.00027999463996462317, "loss": 1.9221, "step": 2350 }, { "epoch": 0.76, "learning_rate": 0.0002792649092108518, "loss": 1.9733, "step": 2351 }, { "epoch": 0.76, "learning_rate": 0.0002785359762602655, "loss": 1.9998, "step": 2352 }, { "epoch": 0.76, "learning_rate": 0.00027780784191974196, "loss": 1.9558, "step": 2353 }, { "epoch": 0.76, "learning_rate": 0.0002770805069952749, "loss": 1.9665, "step": 2354 }, { "epoch": 0.76, "learning_rate": 0.00027635397229197324, "loss": 1.9463, "step": 2355 }, { "epoch": 0.77, "learning_rate": 0.00027562823861405927, "loss": 1.9321, "step": 2356 }, { "epoch": 0.77, "learning_rate": 0.00027490330676486975, "loss": 1.9446, "step": 2357 }, { "epoch": 0.77, "learning_rate": 0.0002741791775468532, "loss": 1.9433, "step": 2358 }, { "epoch": 0.77, "learning_rate": 0.00027345585176156994, "loss": 2.0034, "step": 2359 }, { "epoch": 0.77, "learning_rate": 0.0002727333302096909, "loss": 1.9508, "step": 2360 }, { "epoch": 0.77, "learning_rate": 0.0002720116136909968, "loss": 1.9914, "step": 2361 }, { "epoch": 0.77, "learning_rate": 0.0002712907030043774, "loss": 1.9442, "step": 2362 }, { "epoch": 0.77, "learning_rate": 0.0002705705989478293, "loss": 2.0221, "step": 2363 }, { "epoch": 0.77, "learning_rate": 0.00026985130231845823, "loss": 1.92, "step": 2364 }, { "epoch": 0.77, "learning_rate": 0.0002691328139124747, "loss": 2.0059, "step": 2365 }, { "epoch": 0.77, "learning_rate": 0.00026841513452519506, "loss": 1.9739, "step": 2366 }, { "epoch": 0.77, "learning_rate": 0.0002676982649510402, "loss": 1.9855, "step": 2367 }, { "epoch": 0.77, "learning_rate": 0.0002669822059835341, "loss": 1.9393, "step": 2368 }, { "epoch": 0.77, "learning_rate": 0.0002662669584153038, "loss": 1.961, "step": 2369 }, { "epoch": 0.77, "learning_rate": 0.0002655525230380783, "loss": 1.8938, "step": 2370 }, { "epoch": 0.77, "learning_rate": 0.0002648389006426876, "loss": 1.9559, "step": 2371 }, { "epoch": 0.77, "learning_rate": 0.00026412609201906137, "loss": 1.9746, "step": 2372 }, { "epoch": 0.77, "learning_rate": 0.00026341409795622897, "loss": 2.0059, "step": 2373 }, { "epoch": 0.77, "learning_rate": 0.00026270291924231803, "loss": 1.9679, "step": 2374 }, { "epoch": 0.77, "learning_rate": 0.000261992556664553, "loss": 1.9359, "step": 2375 }, { "epoch": 0.77, "learning_rate": 0.0002612830110092557, "loss": 1.9798, "step": 2376 }, { "epoch": 0.77, "learning_rate": 0.0002605742830618433, "loss": 1.9714, "step": 2377 }, { "epoch": 0.77, "learning_rate": 0.00025986637360682796, "loss": 1.8982, "step": 2378 }, { "epoch": 0.77, "learning_rate": 0.0002591592834278159, "loss": 2.0296, "step": 2379 }, { "epoch": 0.77, "learning_rate": 0.0002584530133075066, "loss": 2.0043, "step": 2380 }, { "epoch": 0.77, "learning_rate": 0.00025774756402769083, "loss": 1.9727, "step": 2381 }, { "epoch": 0.77, "learning_rate": 0.0002570429363692518, "loss": 1.8878, "step": 2382 }, { "epoch": 0.77, "learning_rate": 0.000256339131112163, "loss": 2.0252, "step": 2383 }, { "epoch": 0.77, "learning_rate": 0.00025563614903548735, "loss": 1.9763, "step": 2384 }, { "epoch": 0.77, "learning_rate": 0.0002549339909173768, "loss": 1.9351, "step": 2385 }, { "epoch": 0.77, "learning_rate": 0.00025423265753507095, "loss": 1.9967, "step": 2386 }, { "epoch": 0.78, "learning_rate": 0.0002535321496648972, "loss": 1.9446, "step": 2387 }, { "epoch": 0.78, "learning_rate": 0.0002528324680822679, "loss": 1.9801, "step": 2388 }, { "epoch": 0.78, "learning_rate": 0.00025213361356168175, "loss": 2.0671, "step": 2389 }, { "epoch": 0.78, "learning_rate": 0.00025143558687672164, "loss": 1.9332, "step": 2390 }, { "epoch": 0.78, "learning_rate": 0.0002507383888000543, "loss": 1.9618, "step": 2391 }, { "epoch": 0.78, "learning_rate": 0.0002500420201034289, "loss": 2.0158, "step": 2392 }, { "epoch": 0.78, "learning_rate": 0.00024934648155767726, "loss": 1.9798, "step": 2393 }, { "epoch": 0.78, "learning_rate": 0.00024865177393271063, "loss": 1.9496, "step": 2394 }, { "epoch": 0.78, "learning_rate": 0.0002479578979975227, "loss": 1.9786, "step": 2395 }, { "epoch": 0.78, "learning_rate": 0.0002472648545201851, "loss": 1.9872, "step": 2396 }, { "epoch": 0.78, "learning_rate": 0.0002465726442678482, "loss": 1.939, "step": 2397 }, { "epoch": 0.78, "learning_rate": 0.0002458812680067407, "loss": 1.9868, "step": 2398 }, { "epoch": 0.78, "learning_rate": 0.0002451907265021669, "loss": 1.9149, "step": 2399 }, { "epoch": 0.78, "learning_rate": 0.0002445010205185081, "loss": 1.9196, "step": 2400 }, { "epoch": 0.78, "learning_rate": 0.0002438121508192205, "loss": 1.9319, "step": 2401 }, { "epoch": 0.78, "learning_rate": 0.00024312411816683455, "loss": 1.9896, "step": 2402 }, { "epoch": 0.78, "learning_rate": 0.00024243692332295396, "loss": 1.9429, "step": 2403 }, { "epoch": 0.78, "learning_rate": 0.00024175056704825549, "loss": 1.945, "step": 2404 }, { "epoch": 0.78, "learning_rate": 0.00024106505010248737, "loss": 1.9208, "step": 2405 }, { "epoch": 0.78, "learning_rate": 0.00024038037324446848, "loss": 1.973, "step": 2406 }, { "epoch": 0.78, "learning_rate": 0.00023969653723208818, "loss": 1.9296, "step": 2407 }, { "epoch": 0.78, "learning_rate": 0.00023901354282230492, "loss": 1.8546, "step": 2408 }, { "epoch": 0.78, "learning_rate": 0.00023833139077114574, "loss": 1.9356, "step": 2409 }, { "epoch": 0.78, "learning_rate": 0.00023765008183370507, "loss": 1.974, "step": 2410 }, { "epoch": 0.78, "learning_rate": 0.0002369696167641442, "loss": 1.9648, "step": 2411 }, { "epoch": 0.78, "learning_rate": 0.00023628999631569026, "loss": 1.9527, "step": 2412 }, { "epoch": 0.78, "learning_rate": 0.00023561122124063504, "loss": 1.9482, "step": 2413 }, { "epoch": 0.78, "learning_rate": 0.0002349332922903351, "loss": 1.9346, "step": 2414 }, { "epoch": 0.78, "learning_rate": 0.00023425621021521038, "loss": 1.9868, "step": 2415 }, { "epoch": 0.78, "learning_rate": 0.0002335799757647431, "loss": 2.0049, "step": 2416 }, { "epoch": 0.78, "learning_rate": 0.0002329045896874774, "loss": 1.9323, "step": 2417 }, { "epoch": 0.79, "learning_rate": 0.0002322300527310186, "loss": 1.9298, "step": 2418 }, { "epoch": 0.79, "learning_rate": 0.0002315563656420312, "loss": 1.9352, "step": 2419 }, { "epoch": 0.79, "learning_rate": 0.00023088352916623988, "loss": 1.9547, "step": 2420 }, { "epoch": 0.79, "learning_rate": 0.00023021154404842737, "loss": 1.9246, "step": 2421 }, { "epoch": 0.79, "learning_rate": 0.00022954041103243395, "loss": 1.9129, "step": 2422 }, { "epoch": 0.79, "learning_rate": 0.00022887013086115706, "loss": 2.0028, "step": 2423 }, { "epoch": 0.79, "learning_rate": 0.00022820070427654959, "loss": 1.9481, "step": 2424 }, { "epoch": 0.79, "learning_rate": 0.00022753213201961986, "loss": 1.8996, "step": 2425 }, { "epoch": 0.79, "learning_rate": 0.00022686441483043052, "loss": 1.913, "step": 2426 }, { "epoch": 0.79, "learning_rate": 0.00022619755344809755, "loss": 2.0204, "step": 2427 }, { "epoch": 0.79, "learning_rate": 0.00022553154861078983, "loss": 1.9541, "step": 2428 }, { "epoch": 0.79, "learning_rate": 0.000224866401055728, "loss": 1.9751, "step": 2429 }, { "epoch": 0.79, "learning_rate": 0.0002242021115191839, "loss": 1.9468, "step": 2430 }, { "epoch": 0.79, "learning_rate": 0.00022353868073647897, "loss": 2.0636, "step": 2431 }, { "epoch": 0.79, "learning_rate": 0.00022287610944198488, "loss": 1.9422, "step": 2432 }, { "epoch": 0.79, "learning_rate": 0.0002222143983691215, "loss": 2.0488, "step": 2433 }, { "epoch": 0.79, "learning_rate": 0.00022155354825035656, "loss": 1.8892, "step": 2434 }, { "epoch": 0.79, "learning_rate": 0.00022089355981720472, "loss": 1.953, "step": 2435 }, { "epoch": 0.79, "learning_rate": 0.00022023443380022735, "loss": 1.8845, "step": 2436 }, { "epoch": 0.79, "learning_rate": 0.00021957617092903004, "loss": 1.9564, "step": 2437 }, { "epoch": 0.79, "learning_rate": 0.00021891877193226384, "loss": 1.9575, "step": 2438 }, { "epoch": 0.79, "learning_rate": 0.0002182622375376233, "loss": 1.991, "step": 2439 }, { "epoch": 0.79, "learning_rate": 0.0002176065684718461, "loss": 1.974, "step": 2440 }, { "epoch": 0.79, "learning_rate": 0.00021695176546071183, "loss": 1.9516, "step": 2441 }, { "epoch": 0.79, "learning_rate": 0.0002162978292290414, "loss": 1.9349, "step": 2442 }, { "epoch": 0.79, "learning_rate": 0.0002156447605006967, "loss": 1.9743, "step": 2443 }, { "epoch": 0.79, "learning_rate": 0.00021499255999857846, "loss": 1.9504, "step": 2444 }, { "epoch": 0.79, "learning_rate": 0.00021434122844462723, "loss": 1.94, "step": 2445 }, { "epoch": 0.79, "learning_rate": 0.00021369076655982112, "loss": 2.0271, "step": 2446 }, { "epoch": 0.79, "learning_rate": 0.00021304117506417596, "loss": 1.9661, "step": 2447 }, { "epoch": 0.8, "learning_rate": 0.00021239245467674395, "loss": 1.9354, "step": 2448 }, { "epoch": 0.8, "learning_rate": 0.0002117446061156133, "loss": 1.9588, "step": 2449 }, { "epoch": 0.8, "learning_rate": 0.0002110976300979065, "loss": 1.9377, "step": 2450 }, { "epoch": 0.8, "learning_rate": 0.00021045152733978068, "loss": 1.9711, "step": 2451 }, { "epoch": 0.8, "learning_rate": 0.0002098062985564263, "loss": 2.0114, "step": 2452 }, { "epoch": 0.8, "learning_rate": 0.0002091619444620666, "loss": 1.925, "step": 2453 }, { "epoch": 0.8, "learning_rate": 0.00020851846576995604, "loss": 1.99, "step": 2454 }, { "epoch": 0.8, "learning_rate": 0.0002078758631923806, "loss": 2.0181, "step": 2455 }, { "epoch": 0.8, "learning_rate": 0.00020723413744065623, "loss": 2.0098, "step": 2456 }, { "epoch": 0.8, "learning_rate": 0.0002065932892251282, "loss": 1.9453, "step": 2457 }, { "epoch": 0.8, "learning_rate": 0.00020595331925517068, "loss": 2.0416, "step": 2458 }, { "epoch": 0.8, "learning_rate": 0.00020531422823918545, "loss": 1.9654, "step": 2459 }, { "epoch": 0.8, "learning_rate": 0.00020467601688460148, "loss": 1.9308, "step": 2460 }, { "epoch": 0.8, "learning_rate": 0.00020403868589787412, "loss": 2.0584, "step": 2461 }, { "epoch": 0.8, "learning_rate": 0.0002034022359844837, "loss": 2.0132, "step": 2462 }, { "epoch": 0.8, "learning_rate": 0.00020276666784893572, "loss": 1.9088, "step": 2463 }, { "epoch": 0.8, "learning_rate": 0.00020213198219475959, "loss": 1.9319, "step": 2464 }, { "epoch": 0.8, "learning_rate": 0.00020149817972450791, "loss": 1.9288, "step": 2465 }, { "epoch": 0.8, "learning_rate": 0.00020086526113975546, "loss": 2.0215, "step": 2466 }, { "epoch": 0.8, "learning_rate": 0.0002002332271410986, "loss": 1.9604, "step": 2467 }, { "epoch": 0.8, "learning_rate": 0.00019960207842815514, "loss": 1.9136, "step": 2468 }, { "epoch": 0.8, "learning_rate": 0.00019897181569956168, "loss": 1.9763, "step": 2469 }, { "epoch": 0.8, "learning_rate": 0.00019834243965297527, "loss": 2.0198, "step": 2470 }, { "epoch": 0.8, "learning_rate": 0.00019771395098507082, "loss": 1.929, "step": 2471 }, { "epoch": 0.8, "learning_rate": 0.00019708635039154154, "loss": 1.9819, "step": 2472 }, { "epoch": 0.8, "learning_rate": 0.00019645963856709692, "loss": 2.0372, "step": 2473 }, { "epoch": 0.8, "learning_rate": 0.00019583381620546348, "loss": 1.9635, "step": 2474 }, { "epoch": 0.8, "learning_rate": 0.00019520888399938207, "loss": 1.9447, "step": 2475 }, { "epoch": 0.8, "learning_rate": 0.00019458484264060904, "loss": 1.9752, "step": 2476 }, { "epoch": 0.8, "learning_rate": 0.00019396169281991438, "loss": 1.9081, "step": 2477 }, { "epoch": 0.8, "learning_rate": 0.00019333943522708132, "loss": 2.0289, "step": 2478 }, { "epoch": 0.81, "learning_rate": 0.0001927180705509053, "loss": 1.9835, "step": 2479 }, { "epoch": 0.81, "learning_rate": 0.00019209759947919335, "loss": 1.9456, "step": 2480 }, { "epoch": 0.81, "learning_rate": 0.0001914780226987638, "loss": 1.9096, "step": 2481 }, { "epoch": 0.81, "learning_rate": 0.0001908593408954441, "loss": 1.9806, "step": 2482 }, { "epoch": 0.81, "learning_rate": 0.000190241554754072, "loss": 2.0027, "step": 2483 }, { "epoch": 0.81, "learning_rate": 0.0001896246649584932, "loss": 1.954, "step": 2484 }, { "epoch": 0.81, "learning_rate": 0.00018900867219156126, "loss": 1.9592, "step": 2485 }, { "epoch": 0.81, "learning_rate": 0.00018839357713513783, "loss": 2.0379, "step": 2486 }, { "epoch": 0.81, "learning_rate": 0.00018777938047008914, "loss": 1.9395, "step": 2487 }, { "epoch": 0.81, "learning_rate": 0.000187166082876288, "loss": 1.8718, "step": 2488 }, { "epoch": 0.81, "learning_rate": 0.00018655368503261194, "loss": 1.9868, "step": 2489 }, { "epoch": 0.81, "learning_rate": 0.00018594218761694238, "loss": 2.0125, "step": 2490 }, { "epoch": 0.81, "learning_rate": 0.00018533159130616395, "loss": 1.9741, "step": 2491 }, { "epoch": 0.81, "learning_rate": 0.00018472189677616415, "loss": 1.9644, "step": 2492 }, { "epoch": 0.81, "learning_rate": 0.00018411310470183207, "loss": 1.9811, "step": 2493 }, { "epoch": 0.81, "learning_rate": 0.00018350521575705747, "loss": 1.938, "step": 2494 }, { "epoch": 0.81, "learning_rate": 0.00018289823061473098, "loss": 1.9413, "step": 2495 }, { "epoch": 0.81, "learning_rate": 0.0001822921499467427, "loss": 1.9771, "step": 2496 }, { "epoch": 0.81, "learning_rate": 0.0001816869744239812, "loss": 1.9173, "step": 2497 }, { "epoch": 0.81, "learning_rate": 0.00018108270471633336, "loss": 1.9519, "step": 2498 }, { "epoch": 0.81, "learning_rate": 0.00018047934149268376, "loss": 2.0085, "step": 2499 }, { "epoch": 0.81, "learning_rate": 0.00017987688542091252, "loss": 1.9597, "step": 2500 }, { "epoch": 0.81, "learning_rate": 0.00017927533716789645, "loss": 1.9292, "step": 2501 }, { "epoch": 0.81, "learning_rate": 0.00017867469739950748, "loss": 1.9077, "step": 2502 }, { "epoch": 0.81, "learning_rate": 0.00017807496678061164, "loss": 2.0079, "step": 2503 }, { "epoch": 0.81, "learning_rate": 0.00017747614597506844, "loss": 1.8984, "step": 2504 }, { "epoch": 0.81, "learning_rate": 0.0001768782356457308, "loss": 2.0036, "step": 2505 }, { "epoch": 0.81, "learning_rate": 0.00017628123645444348, "loss": 1.9819, "step": 2506 }, { "epoch": 0.81, "learning_rate": 0.00017568514906204246, "loss": 1.957, "step": 2507 }, { "epoch": 0.81, "learning_rate": 0.00017508997412835482, "loss": 1.9369, "step": 2508 }, { "epoch": 0.81, "learning_rate": 0.0001744957123121973, "loss": 1.9608, "step": 2509 }, { "epoch": 0.82, "learning_rate": 0.00017390236427137628, "loss": 1.9574, "step": 2510 }, { "epoch": 0.82, "learning_rate": 0.0001733099306626863, "loss": 1.9941, "step": 2511 }, { "epoch": 0.82, "learning_rate": 0.00017271841214191007, "loss": 1.9517, "step": 2512 }, { "epoch": 0.82, "learning_rate": 0.00017212780936381666, "loss": 1.9929, "step": 2513 }, { "epoch": 0.82, "learning_rate": 0.0001715381229821621, "loss": 2.0394, "step": 2514 }, { "epoch": 0.82, "learning_rate": 0.0001709493536496878, "loss": 1.9705, "step": 2515 }, { "epoch": 0.82, "learning_rate": 0.00017036150201811984, "loss": 1.9897, "step": 2516 }, { "epoch": 0.82, "learning_rate": 0.00016977456873816966, "loss": 1.9434, "step": 2517 }, { "epoch": 0.82, "learning_rate": 0.00016918855445953042, "loss": 1.9678, "step": 2518 }, { "epoch": 0.82, "learning_rate": 0.00016860345983087888, "loss": 1.93, "step": 2519 }, { "epoch": 0.82, "learning_rate": 0.000168019285499874, "loss": 1.9051, "step": 2520 }, { "epoch": 0.82, "learning_rate": 0.00016743603211315561, "loss": 1.9492, "step": 2521 }, { "epoch": 0.82, "learning_rate": 0.0001668537003163444, "loss": 1.988, "step": 2522 }, { "epoch": 0.82, "learning_rate": 0.00016627229075404082, "loss": 1.9203, "step": 2523 }, { "epoch": 0.82, "learning_rate": 0.00016569180406982465, "loss": 2.0124, "step": 2524 }, { "epoch": 0.82, "learning_rate": 0.0001651122409062533, "loss": 1.9533, "step": 2525 }, { "epoch": 0.82, "learning_rate": 0.0001645336019048631, "loss": 1.9816, "step": 2526 }, { "epoch": 0.82, "learning_rate": 0.00016395588770616666, "loss": 1.9175, "step": 2527 }, { "epoch": 0.82, "learning_rate": 0.00016337909894965309, "loss": 1.892, "step": 2528 }, { "epoch": 0.82, "learning_rate": 0.00016280323627378701, "loss": 1.8977, "step": 2529 }, { "epoch": 0.82, "learning_rate": 0.0001622283003160081, "loss": 1.9744, "step": 2530 }, { "epoch": 0.82, "learning_rate": 0.0001616542917127304, "loss": 1.9323, "step": 2531 }, { "epoch": 0.82, "learning_rate": 0.0001610812110993405, "loss": 1.9163, "step": 2532 }, { "epoch": 0.82, "learning_rate": 0.00016050905911019886, "loss": 1.9435, "step": 2533 }, { "epoch": 0.82, "learning_rate": 0.00015993783637863757, "loss": 1.9758, "step": 2534 }, { "epoch": 0.82, "learning_rate": 0.00015936754353696004, "loss": 2.0101, "step": 2535 }, { "epoch": 0.82, "learning_rate": 0.00015879818121644051, "loss": 2.0003, "step": 2536 }, { "epoch": 0.82, "learning_rate": 0.00015822975004732331, "loss": 1.9678, "step": 2537 }, { "epoch": 0.82, "learning_rate": 0.00015766225065882145, "loss": 1.9131, "step": 2538 }, { "epoch": 0.82, "learning_rate": 0.0001570956836791172, "loss": 1.9847, "step": 2539 }, { "epoch": 0.82, "learning_rate": 0.00015653004973536033, "loss": 2.0067, "step": 2540 }, { "epoch": 0.83, "learning_rate": 0.00015596534945366814, "loss": 1.9516, "step": 2541 }, { "epoch": 0.83, "learning_rate": 0.00015540158345912403, "loss": 1.9593, "step": 2542 }, { "epoch": 0.83, "learning_rate": 0.00015483875237577748, "loss": 1.9871, "step": 2543 }, { "epoch": 0.83, "learning_rate": 0.00015427685682664329, "loss": 1.9421, "step": 2544 }, { "epoch": 0.83, "learning_rate": 0.00015371589743369996, "loss": 1.995, "step": 2545 }, { "epoch": 0.83, "learning_rate": 0.00015315587481789006, "loss": 1.9499, "step": 2546 }, { "epoch": 0.83, "learning_rate": 0.00015259678959911993, "loss": 1.9086, "step": 2547 }, { "epoch": 0.83, "learning_rate": 0.00015203864239625732, "loss": 1.9316, "step": 2548 }, { "epoch": 0.83, "learning_rate": 0.00015148143382713242, "loss": 2.0392, "step": 2549 }, { "epoch": 0.83, "learning_rate": 0.00015092516450853556, "loss": 1.9784, "step": 2550 }, { "epoch": 0.83, "learning_rate": 0.00015036983505621794, "loss": 1.9129, "step": 2551 }, { "epoch": 0.83, "learning_rate": 0.00014981544608489038, "loss": 1.9916, "step": 2552 }, { "epoch": 0.83, "learning_rate": 0.00014926199820822274, "loss": 2.0029, "step": 2553 }, { "epoch": 0.83, "learning_rate": 0.00014870949203884266, "loss": 1.995, "step": 2554 }, { "epoch": 0.83, "learning_rate": 0.00014815792818833617, "loss": 1.9598, "step": 2555 }, { "epoch": 0.83, "learning_rate": 0.00014760730726724524, "loss": 2.0044, "step": 2556 }, { "epoch": 0.83, "learning_rate": 0.00014705762988506888, "loss": 2.0195, "step": 2557 }, { "epoch": 0.83, "learning_rate": 0.00014650889665026134, "loss": 1.9166, "step": 2558 }, { "epoch": 0.83, "learning_rate": 0.0001459611081702319, "loss": 1.9468, "step": 2559 }, { "epoch": 0.83, "learning_rate": 0.00014541426505134382, "loss": 1.9916, "step": 2560 }, { "epoch": 0.83, "learning_rate": 0.00014486836789891422, "loss": 1.9183, "step": 2561 }, { "epoch": 0.83, "learning_rate": 0.00014432341731721311, "loss": 2.0009, "step": 2562 }, { "epoch": 0.83, "learning_rate": 0.00014377941390946215, "loss": 1.94, "step": 2563 }, { "epoch": 0.83, "learning_rate": 0.0001432363582778351, "loss": 1.9126, "step": 2564 }, { "epoch": 0.83, "learning_rate": 0.00014269425102345646, "loss": 1.97, "step": 2565 }, { "epoch": 0.83, "learning_rate": 0.00014215309274640098, "loss": 1.9434, "step": 2566 }, { "epoch": 0.83, "learning_rate": 0.00014161288404569296, "loss": 1.8925, "step": 2567 }, { "epoch": 0.83, "learning_rate": 0.00014107362551930535, "loss": 1.9809, "step": 2568 }, { "epoch": 0.83, "learning_rate": 0.00014053531776415996, "loss": 1.942, "step": 2569 }, { "epoch": 0.83, "learning_rate": 0.00013999796137612508, "loss": 2.0158, "step": 2570 }, { "epoch": 0.84, "learning_rate": 0.00013946155695001683, "loss": 1.9768, "step": 2571 }, { "epoch": 0.84, "learning_rate": 0.00013892610507959725, "loss": 1.9459, "step": 2572 }, { "epoch": 0.84, "learning_rate": 0.0001383916063575741, "loss": 1.9826, "step": 2573 }, { "epoch": 0.84, "learning_rate": 0.00013785806137559987, "loss": 1.9974, "step": 2574 }, { "epoch": 0.84, "learning_rate": 0.0001373254707242717, "loss": 1.9418, "step": 2575 }, { "epoch": 0.84, "learning_rate": 0.0001367938349931296, "loss": 2.0014, "step": 2576 }, { "epoch": 0.84, "learning_rate": 0.00013626315477065709, "loss": 1.9758, "step": 2577 }, { "epoch": 0.84, "learning_rate": 0.00013573343064428035, "loss": 1.9215, "step": 2578 }, { "epoch": 0.84, "learning_rate": 0.00013520466320036652, "loss": 1.9666, "step": 2579 }, { "epoch": 0.84, "learning_rate": 0.00013467685302422439, "loss": 1.9703, "step": 2580 }, { "epoch": 0.84, "learning_rate": 0.0001341500007001023, "loss": 1.9413, "step": 2581 }, { "epoch": 0.84, "learning_rate": 0.0001336241068111892, "loss": 1.9649, "step": 2582 }, { "epoch": 0.84, "learning_rate": 0.00013309917193961253, "loss": 1.97, "step": 2583 }, { "epoch": 0.84, "learning_rate": 0.00013257519666643835, "loss": 2.0128, "step": 2584 }, { "epoch": 0.84, "learning_rate": 0.00013205218157167077, "loss": 1.8954, "step": 2585 }, { "epoch": 0.84, "learning_rate": 0.00013153012723425052, "loss": 1.9999, "step": 2586 }, { "epoch": 0.84, "learning_rate": 0.00013100903423205558, "loss": 1.9044, "step": 2587 }, { "epoch": 0.84, "learning_rate": 0.00013048890314189875, "loss": 1.9527, "step": 2588 }, { "epoch": 0.84, "learning_rate": 0.0001299697345395291, "loss": 1.9298, "step": 2589 }, { "epoch": 0.84, "learning_rate": 0.00012945152899962952, "loss": 1.9749, "step": 2590 }, { "epoch": 0.84, "learning_rate": 0.00012893428709581745, "loss": 2.0421, "step": 2591 }, { "epoch": 0.84, "learning_rate": 0.0001284180094006433, "loss": 1.9592, "step": 2592 }, { "epoch": 0.84, "learning_rate": 0.0001279026964855905, "loss": 1.9187, "step": 2593 }, { "epoch": 0.84, "learning_rate": 0.00012738834892107387, "loss": 1.9967, "step": 2594 }, { "epoch": 0.84, "learning_rate": 0.00012687496727644022, "loss": 1.921, "step": 2595 }, { "epoch": 0.84, "learning_rate": 0.00012636255211996716, "loss": 2.0146, "step": 2596 }, { "epoch": 0.84, "learning_rate": 0.00012585110401886214, "loss": 1.9966, "step": 2597 }, { "epoch": 0.84, "learning_rate": 0.00012534062353926245, "loss": 1.9378, "step": 2598 }, { "epoch": 0.84, "learning_rate": 0.00012483111124623425, "loss": 1.9276, "step": 2599 }, { "epoch": 0.84, "learning_rate": 0.00012432256770377194, "loss": 1.971, "step": 2600 }, { "epoch": 0.84, "learning_rate": 0.0001238149934747972, "loss": 1.9307, "step": 2601 }, { "epoch": 0.85, "learning_rate": 0.00012330838912115926, "loss": 1.9472, "step": 2602 }, { "epoch": 0.85, "learning_rate": 0.0001228027552036337, "loss": 1.9212, "step": 2603 }, { "epoch": 0.85, "learning_rate": 0.00012229809228192178, "loss": 1.9391, "step": 2604 }, { "epoch": 0.85, "learning_rate": 0.00012179440091464989, "loss": 1.9403, "step": 2605 }, { "epoch": 0.85, "learning_rate": 0.000121291681659369, "loss": 2.0057, "step": 2606 }, { "epoch": 0.85, "learning_rate": 0.0001207899350725542, "loss": 1.9326, "step": 2607 }, { "epoch": 0.85, "learning_rate": 0.00012028916170960358, "loss": 1.9497, "step": 2608 }, { "epoch": 0.85, "learning_rate": 0.00011978936212483838, "loss": 2.0538, "step": 2609 }, { "epoch": 0.85, "learning_rate": 0.00011929053687150138, "loss": 1.9669, "step": 2610 }, { "epoch": 0.85, "learning_rate": 0.00011879268650175735, "loss": 1.9206, "step": 2611 }, { "epoch": 0.85, "learning_rate": 0.00011829581156669189, "loss": 1.8806, "step": 2612 }, { "epoch": 0.85, "learning_rate": 0.0001177999126163103, "loss": 1.9562, "step": 2613 }, { "epoch": 0.85, "learning_rate": 0.00011730499019953799, "loss": 2.0012, "step": 2614 }, { "epoch": 0.85, "learning_rate": 0.00011681104486421956, "loss": 1.9862, "step": 2615 }, { "epoch": 0.85, "learning_rate": 0.00011631807715711772, "loss": 1.9657, "step": 2616 }, { "epoch": 0.85, "learning_rate": 0.00011582608762391322, "loss": 1.9119, "step": 2617 }, { "epoch": 0.85, "learning_rate": 0.00011533507680920408, "loss": 1.8177, "step": 2618 }, { "epoch": 0.85, "learning_rate": 0.00011484504525650441, "loss": 1.9775, "step": 2619 }, { "epoch": 0.85, "learning_rate": 0.00011435599350824499, "loss": 1.9729, "step": 2620 }, { "epoch": 0.85, "learning_rate": 0.0001138679221057719, "loss": 1.9679, "step": 2621 }, { "epoch": 0.85, "learning_rate": 0.00011338083158934586, "loss": 1.9864, "step": 2622 }, { "epoch": 0.85, "learning_rate": 0.00011289472249814193, "loss": 1.9068, "step": 2623 }, { "epoch": 0.85, "learning_rate": 0.00011240959537024864, "loss": 1.9753, "step": 2624 }, { "epoch": 0.85, "learning_rate": 0.00011192545074266803, "loss": 2.0497, "step": 2625 }, { "epoch": 0.85, "learning_rate": 0.00011144228915131382, "loss": 1.86, "step": 2626 }, { "epoch": 0.85, "learning_rate": 0.00011096011113101224, "loss": 2.0205, "step": 2627 }, { "epoch": 0.85, "learning_rate": 0.0001104789172155004, "loss": 1.9394, "step": 2628 }, { "epoch": 0.85, "learning_rate": 0.00010999870793742628, "loss": 1.9734, "step": 2629 }, { "epoch": 0.85, "learning_rate": 0.00010951948382834797, "loss": 2.0051, "step": 2630 }, { "epoch": 0.85, "learning_rate": 0.00010904124541873295, "loss": 1.982, "step": 2631 }, { "epoch": 0.85, "learning_rate": 0.00010856399323795729, "loss": 1.9807, "step": 2632 }, { "epoch": 0.86, "learning_rate": 0.00010808772781430586, "loss": 1.962, "step": 2633 }, { "epoch": 0.86, "learning_rate": 0.00010761244967497109, "loss": 1.933, "step": 2634 }, { "epoch": 0.86, "learning_rate": 0.00010713815934605254, "loss": 2.0031, "step": 2635 }, { "epoch": 0.86, "learning_rate": 0.00010666485735255627, "loss": 1.9763, "step": 2636 }, { "epoch": 0.86, "learning_rate": 0.00010619254421839442, "loss": 1.9772, "step": 2637 }, { "epoch": 0.86, "learning_rate": 0.00010572122046638456, "loss": 1.9692, "step": 2638 }, { "epoch": 0.86, "learning_rate": 0.00010525088661824888, "loss": 1.9354, "step": 2639 }, { "epoch": 0.86, "learning_rate": 0.00010478154319461419, "loss": 2.0075, "step": 2640 }, { "epoch": 0.86, "learning_rate": 0.00010431319071501044, "loss": 1.9242, "step": 2641 }, { "epoch": 0.86, "learning_rate": 0.00010384582969787126, "loss": 1.9957, "step": 2642 }, { "epoch": 0.86, "learning_rate": 0.00010337946066053238, "loss": 1.9888, "step": 2643 }, { "epoch": 0.86, "learning_rate": 0.00010291408411923153, "loss": 2.0269, "step": 2644 }, { "epoch": 0.86, "learning_rate": 0.00010244970058910796, "loss": 1.9963, "step": 2645 }, { "epoch": 0.86, "learning_rate": 0.00010198631058420161, "loss": 1.9741, "step": 2646 }, { "epoch": 0.86, "learning_rate": 0.0001015239146174528, "loss": 1.9234, "step": 2647 }, { "epoch": 0.86, "learning_rate": 0.00010106251320070137, "loss": 1.8878, "step": 2648 }, { "epoch": 0.86, "learning_rate": 0.0001006021068446863, "loss": 2.0332, "step": 2649 }, { "epoch": 0.86, "learning_rate": 0.00010014269605904546, "loss": 1.9637, "step": 2650 }, { "epoch": 0.86, "learning_rate": 9.968428135231378e-05, "loss": 1.9624, "step": 2651 }, { "epoch": 0.86, "learning_rate": 9.922686323192454e-05, "loss": 2.0193, "step": 2652 }, { "epoch": 0.86, "learning_rate": 9.877044220420739e-05, "loss": 1.9107, "step": 2653 }, { "epoch": 0.86, "learning_rate": 9.831501877438843e-05, "loss": 1.9675, "step": 2654 }, { "epoch": 0.86, "learning_rate": 9.786059344658948e-05, "loss": 1.9073, "step": 2655 }, { "epoch": 0.86, "learning_rate": 9.740716672382765e-05, "loss": 1.9017, "step": 2656 }, { "epoch": 0.86, "learning_rate": 9.695473910801411e-05, "loss": 1.9545, "step": 2657 }, { "epoch": 0.86, "learning_rate": 9.65033110999548e-05, "loss": 2.0305, "step": 2658 }, { "epoch": 0.86, "learning_rate": 9.605288319934868e-05, "loss": 2.0049, "step": 2659 }, { "epoch": 0.86, "learning_rate": 9.560345590478803e-05, "loss": 1.9788, "step": 2660 }, { "epoch": 0.86, "learning_rate": 9.515502971375722e-05, "loss": 2.0044, "step": 2661 }, { "epoch": 0.86, "learning_rate": 9.470760512263254e-05, "loss": 1.9245, "step": 2662 }, { "epoch": 0.86, "learning_rate": 9.426118262668204e-05, "loss": 2.0141, "step": 2663 }, { "epoch": 0.87, "learning_rate": 9.381576272006343e-05, "loss": 1.8707, "step": 2664 }, { "epoch": 0.87, "learning_rate": 9.337134589582564e-05, "loss": 2.0109, "step": 2665 }, { "epoch": 0.87, "learning_rate": 9.292793264590693e-05, "loss": 1.9906, "step": 2666 }, { "epoch": 0.87, "learning_rate": 9.24855234611347e-05, "loss": 1.9607, "step": 2667 }, { "epoch": 0.87, "learning_rate": 9.204411883122477e-05, "loss": 2.0099, "step": 2668 }, { "epoch": 0.87, "learning_rate": 9.160371924478118e-05, "loss": 1.9039, "step": 2669 }, { "epoch": 0.87, "learning_rate": 9.116432518929529e-05, "loss": 1.9966, "step": 2670 }, { "epoch": 0.87, "learning_rate": 9.072593715114564e-05, "loss": 1.9326, "step": 2671 }, { "epoch": 0.87, "learning_rate": 9.028855561559691e-05, "loss": 1.9561, "step": 2672 }, { "epoch": 0.87, "learning_rate": 8.985218106679983e-05, "loss": 1.9271, "step": 2673 }, { "epoch": 0.87, "learning_rate": 8.941681398779055e-05, "loss": 1.9791, "step": 2674 }, { "epoch": 0.87, "learning_rate": 8.898245486048962e-05, "loss": 1.9487, "step": 2675 }, { "epoch": 0.87, "learning_rate": 8.854910416570217e-05, "loss": 1.9229, "step": 2676 }, { "epoch": 0.87, "learning_rate": 8.811676238311705e-05, "loss": 1.9026, "step": 2677 }, { "epoch": 0.87, "learning_rate": 8.768542999130646e-05, "loss": 1.9977, "step": 2678 }, { "epoch": 0.87, "learning_rate": 8.725510746772491e-05, "loss": 2.0328, "step": 2679 }, { "epoch": 0.87, "learning_rate": 8.682579528870926e-05, "loss": 1.9329, "step": 2680 }, { "epoch": 0.87, "learning_rate": 8.639749392947815e-05, "loss": 1.9179, "step": 2681 }, { "epoch": 0.87, "learning_rate": 8.59702038641309e-05, "loss": 1.9613, "step": 2682 }, { "epoch": 0.87, "learning_rate": 8.554392556564772e-05, "loss": 1.9194, "step": 2683 }, { "epoch": 0.87, "learning_rate": 8.511865950588882e-05, "loss": 1.9542, "step": 2684 }, { "epoch": 0.87, "learning_rate": 8.469440615559387e-05, "loss": 1.9502, "step": 2685 }, { "epoch": 0.87, "learning_rate": 8.427116598438156e-05, "loss": 1.9604, "step": 2686 }, { "epoch": 0.87, "learning_rate": 8.384893946074901e-05, "loss": 1.9564, "step": 2687 }, { "epoch": 0.87, "learning_rate": 8.342772705207169e-05, "loss": 2.019, "step": 2688 }, { "epoch": 0.87, "learning_rate": 8.300752922460175e-05, "loss": 1.9495, "step": 2689 }, { "epoch": 0.87, "learning_rate": 8.258834644346891e-05, "loss": 2.0014, "step": 2690 }, { "epoch": 0.87, "learning_rate": 8.21701791726791e-05, "loss": 1.9631, "step": 2691 }, { "epoch": 0.87, "learning_rate": 8.175302787511407e-05, "loss": 1.9068, "step": 2692 }, { "epoch": 0.87, "learning_rate": 8.13368930125311e-05, "loss": 1.9955, "step": 2693 }, { "epoch": 0.87, "learning_rate": 8.092177504556253e-05, "loss": 1.9375, "step": 2694 }, { "epoch": 0.88, "learning_rate": 8.050767443371442e-05, "loss": 2.0251, "step": 2695 }, { "epoch": 0.88, "learning_rate": 8.009459163536725e-05, "loss": 1.8723, "step": 2696 }, { "epoch": 0.88, "learning_rate": 7.968252710777479e-05, "loss": 1.9554, "step": 2697 }, { "epoch": 0.88, "learning_rate": 7.927148130706341e-05, "loss": 1.9461, "step": 2698 }, { "epoch": 0.88, "learning_rate": 7.886145468823214e-05, "loss": 1.9604, "step": 2699 }, { "epoch": 0.88, "learning_rate": 7.845244770515158e-05, "loss": 1.9491, "step": 2700 }, { "epoch": 0.88, "learning_rate": 7.804446081056371e-05, "loss": 1.9623, "step": 2701 }, { "epoch": 0.88, "learning_rate": 7.763749445608159e-05, "loss": 1.8987, "step": 2702 }, { "epoch": 0.88, "learning_rate": 7.723154909218832e-05, "loss": 2.0214, "step": 2703 }, { "epoch": 0.88, "learning_rate": 7.682662516823691e-05, "loss": 1.9959, "step": 2704 }, { "epoch": 0.88, "learning_rate": 7.64227231324498e-05, "loss": 1.9553, "step": 2705 }, { "epoch": 0.88, "learning_rate": 7.601984343191837e-05, "loss": 1.9982, "step": 2706 }, { "epoch": 0.88, "learning_rate": 7.561798651260177e-05, "loss": 1.972, "step": 2707 }, { "epoch": 0.88, "learning_rate": 7.521715281932773e-05, "loss": 1.988, "step": 2708 }, { "epoch": 0.88, "learning_rate": 7.481734279579088e-05, "loss": 1.8983, "step": 2709 }, { "epoch": 0.88, "learning_rate": 7.441855688455302e-05, "loss": 1.9617, "step": 2710 }, { "epoch": 0.88, "learning_rate": 7.4020795527042e-05, "loss": 1.9713, "step": 2711 }, { "epoch": 0.88, "learning_rate": 7.362405916355208e-05, "loss": 1.9556, "step": 2712 }, { "epoch": 0.88, "learning_rate": 7.32283482332421e-05, "loss": 1.9672, "step": 2713 }, { "epoch": 0.88, "learning_rate": 7.283366317413654e-05, "loss": 1.9956, "step": 2714 }, { "epoch": 0.88, "learning_rate": 7.244000442312404e-05, "loss": 1.9825, "step": 2715 }, { "epoch": 0.88, "learning_rate": 7.204737241595738e-05, "loss": 1.9452, "step": 2716 }, { "epoch": 0.88, "learning_rate": 7.165576758725246e-05, "loss": 2.0359, "step": 2717 }, { "epoch": 0.88, "learning_rate": 7.126519037048828e-05, "loss": 2.0188, "step": 2718 }, { "epoch": 0.88, "learning_rate": 7.087564119800694e-05, "loss": 2.0544, "step": 2719 }, { "epoch": 0.88, "learning_rate": 7.048712050101135e-05, "loss": 1.9853, "step": 2720 }, { "epoch": 0.88, "learning_rate": 7.009962870956699e-05, "loss": 1.9886, "step": 2721 }, { "epoch": 0.88, "learning_rate": 6.971316625260016e-05, "loss": 2.0083, "step": 2722 }, { "epoch": 0.88, "learning_rate": 6.932773355789746e-05, "loss": 1.9832, "step": 2723 }, { "epoch": 0.88, "learning_rate": 6.894333105210615e-05, "loss": 1.954, "step": 2724 }, { "epoch": 0.89, "learning_rate": 6.855995916073255e-05, "loss": 1.9741, "step": 2725 }, { "epoch": 0.89, "learning_rate": 6.817761830814284e-05, "loss": 1.863, "step": 2726 }, { "epoch": 0.89, "learning_rate": 6.779630891756106e-05, "loss": 2.007, "step": 2727 }, { "epoch": 0.89, "learning_rate": 6.741603141107011e-05, "loss": 1.9566, "step": 2728 }, { "epoch": 0.89, "learning_rate": 6.70367862096104e-05, "loss": 1.9721, "step": 2729 }, { "epoch": 0.89, "learning_rate": 6.665857373298012e-05, "loss": 1.9892, "step": 2730 }, { "epoch": 0.89, "learning_rate": 6.628139439983394e-05, "loss": 2.0165, "step": 2731 }, { "epoch": 0.89, "learning_rate": 6.590524862768254e-05, "loss": 1.9325, "step": 2732 }, { "epoch": 0.89, "learning_rate": 6.553013683289311e-05, "loss": 1.9109, "step": 2733 }, { "epoch": 0.89, "learning_rate": 6.515605943068803e-05, "loss": 2.0098, "step": 2734 }, { "epoch": 0.89, "learning_rate": 6.478301683514487e-05, "loss": 1.9236, "step": 2735 }, { "epoch": 0.89, "learning_rate": 6.441100945919542e-05, "loss": 2.003, "step": 2736 }, { "epoch": 0.89, "learning_rate": 6.404003771462618e-05, "loss": 1.9595, "step": 2737 }, { "epoch": 0.89, "learning_rate": 6.367010201207624e-05, "loss": 1.9531, "step": 2738 }, { "epoch": 0.89, "learning_rate": 6.330120276103879e-05, "loss": 1.9962, "step": 2739 }, { "epoch": 0.89, "learning_rate": 6.293334036985943e-05, "loss": 1.9816, "step": 2740 }, { "epoch": 0.89, "learning_rate": 6.256651524573598e-05, "loss": 2.0402, "step": 2741 }, { "epoch": 0.89, "learning_rate": 6.220072779471808e-05, "loss": 2.0014, "step": 2742 }, { "epoch": 0.89, "learning_rate": 6.183597842170685e-05, "loss": 1.9177, "step": 2743 }, { "epoch": 0.89, "learning_rate": 6.147226753045442e-05, "loss": 2.0132, "step": 2744 }, { "epoch": 0.89, "learning_rate": 6.110959552356288e-05, "loss": 1.9822, "step": 2745 }, { "epoch": 0.89, "learning_rate": 6.0747962802484846e-05, "loss": 1.9436, "step": 2746 }, { "epoch": 0.89, "learning_rate": 6.038736976752235e-05, "loss": 1.9709, "step": 2747 }, { "epoch": 0.89, "learning_rate": 6.0027816817826653e-05, "loss": 2.0005, "step": 2748 }, { "epoch": 0.89, "learning_rate": 5.9669304351397614e-05, "loss": 2.0105, "step": 2749 }, { "epoch": 0.89, "learning_rate": 5.9311832765083564e-05, "loss": 1.9153, "step": 2750 }, { "epoch": 0.89, "learning_rate": 5.8955402454580086e-05, "loss": 1.9567, "step": 2751 }, { "epoch": 0.89, "learning_rate": 5.860001381443081e-05, "loss": 1.92, "step": 2752 }, { "epoch": 0.89, "learning_rate": 5.8245667238025935e-05, "loss": 1.9766, "step": 2753 }, { "epoch": 0.89, "learning_rate": 5.7892363117602265e-05, "loss": 1.9901, "step": 2754 }, { "epoch": 0.89, "learning_rate": 5.754010184424263e-05, "loss": 1.9029, "step": 2755 }, { "epoch": 0.9, "learning_rate": 5.718888380787579e-05, "loss": 2.0237, "step": 2756 }, { "epoch": 0.9, "learning_rate": 5.683870939727531e-05, "loss": 1.8976, "step": 2757 }, { "epoch": 0.9, "learning_rate": 5.6489579000059575e-05, "loss": 1.9614, "step": 2758 }, { "epoch": 0.9, "learning_rate": 5.614149300269156e-05, "loss": 1.9701, "step": 2759 }, { "epoch": 0.9, "learning_rate": 5.579445179047793e-05, "loss": 1.9479, "step": 2760 }, { "epoch": 0.9, "learning_rate": 5.544845574756918e-05, "loss": 1.9062, "step": 2761 }, { "epoch": 0.9, "learning_rate": 5.510350525695862e-05, "loss": 1.9703, "step": 2762 }, { "epoch": 0.9, "learning_rate": 5.4759600700482136e-05, "loss": 1.987, "step": 2763 }, { "epoch": 0.9, "learning_rate": 5.441674245881789e-05, "loss": 1.9827, "step": 2764 }, { "epoch": 0.9, "learning_rate": 5.407493091148608e-05, "loss": 1.9745, "step": 2765 }, { "epoch": 0.9, "learning_rate": 5.373416643684803e-05, "loss": 1.8856, "step": 2766 }, { "epoch": 0.9, "learning_rate": 5.339444941210614e-05, "loss": 1.9656, "step": 2767 }, { "epoch": 0.9, "learning_rate": 5.305578021330315e-05, "loss": 1.9058, "step": 2768 }, { "epoch": 0.9, "learning_rate": 5.2718159215322525e-05, "loss": 1.9433, "step": 2769 }, { "epoch": 0.9, "learning_rate": 5.238158679188654e-05, "loss": 1.9865, "step": 2770 }, { "epoch": 0.9, "learning_rate": 5.20460633155575e-05, "loss": 1.9979, "step": 2771 }, { "epoch": 0.9, "learning_rate": 5.1711589157736215e-05, "loss": 2.0405, "step": 2772 }, { "epoch": 0.9, "learning_rate": 5.13781646886623e-05, "loss": 1.9185, "step": 2773 }, { "epoch": 0.9, "learning_rate": 5.104579027741318e-05, "loss": 1.9911, "step": 2774 }, { "epoch": 0.9, "learning_rate": 5.0714466291904126e-05, "loss": 2.0158, "step": 2775 }, { "epoch": 0.9, "learning_rate": 5.038419309888731e-05, "loss": 1.915, "step": 2776 }, { "epoch": 0.9, "learning_rate": 5.0054971063952073e-05, "loss": 1.9042, "step": 2777 }, { "epoch": 0.9, "learning_rate": 4.972680055152412e-05, "loss": 1.988, "step": 2778 }, { "epoch": 0.9, "learning_rate": 4.9399681924865214e-05, "loss": 1.9804, "step": 2779 }, { "epoch": 0.9, "learning_rate": 4.9073615546072815e-05, "loss": 1.8797, "step": 2780 }, { "epoch": 0.9, "learning_rate": 4.8748601776079314e-05, "loss": 1.9306, "step": 2781 }, { "epoch": 0.9, "learning_rate": 4.842464097465249e-05, "loss": 2.0169, "step": 2782 }, { "epoch": 0.9, "learning_rate": 4.810173350039382e-05, "loss": 1.9419, "step": 2783 }, { "epoch": 0.9, "learning_rate": 4.7779879710739385e-05, "loss": 1.934, "step": 2784 }, { "epoch": 0.9, "learning_rate": 4.745907996195886e-05, "loss": 1.9162, "step": 2785 }, { "epoch": 0.9, "learning_rate": 4.713933460915498e-05, "loss": 1.9396, "step": 2786 }, { "epoch": 0.91, "learning_rate": 4.68206440062634e-05, "loss": 1.9558, "step": 2787 }, { "epoch": 0.91, "learning_rate": 4.650300850605238e-05, "loss": 1.94, "step": 2788 }, { "epoch": 0.91, "learning_rate": 4.6186428460122e-05, "loss": 1.9313, "step": 2789 }, { "epoch": 0.91, "learning_rate": 4.587090421890405e-05, "loss": 1.9383, "step": 2790 }, { "epoch": 0.91, "learning_rate": 4.5556436131661936e-05, "loss": 1.9954, "step": 2791 }, { "epoch": 0.91, "learning_rate": 4.5243024546489767e-05, "loss": 1.9535, "step": 2792 }, { "epoch": 0.91, "learning_rate": 4.493066981031213e-05, "loss": 1.9766, "step": 2793 }, { "epoch": 0.91, "learning_rate": 4.4619372268884014e-05, "loss": 1.95, "step": 2794 }, { "epoch": 0.91, "learning_rate": 4.430913226678957e-05, "loss": 1.9343, "step": 2795 }, { "epoch": 0.91, "learning_rate": 4.3999950147442844e-05, "loss": 1.9701, "step": 2796 }, { "epoch": 0.91, "learning_rate": 4.369182625308688e-05, "loss": 2.0039, "step": 2797 }, { "epoch": 0.91, "learning_rate": 4.3384760924793156e-05, "loss": 1.9338, "step": 2798 }, { "epoch": 0.91, "learning_rate": 4.3078754502461346e-05, "loss": 1.9252, "step": 2799 }, { "epoch": 0.91, "learning_rate": 4.2773807324819394e-05, "loss": 1.9254, "step": 2800 }, { "epoch": 0.91, "learning_rate": 4.2469919729422046e-05, "loss": 1.9186, "step": 2801 }, { "epoch": 0.91, "learning_rate": 4.216709205265179e-05, "loss": 1.9861, "step": 2802 }, { "epoch": 0.91, "learning_rate": 4.186532462971748e-05, "loss": 2.0021, "step": 2803 }, { "epoch": 0.91, "learning_rate": 4.156461779465459e-05, "loss": 2.0414, "step": 2804 }, { "epoch": 0.91, "learning_rate": 4.126497188032452e-05, "loss": 1.9514, "step": 2805 }, { "epoch": 0.91, "learning_rate": 4.0966387218414167e-05, "loss": 1.9484, "step": 2806 }, { "epoch": 0.91, "learning_rate": 4.0668864139436044e-05, "loss": 1.9023, "step": 2807 }, { "epoch": 0.91, "learning_rate": 4.037240297272693e-05, "loss": 1.9755, "step": 2808 }, { "epoch": 0.91, "learning_rate": 4.0077004046448875e-05, "loss": 2.0122, "step": 2809 }, { "epoch": 0.91, "learning_rate": 3.978266768758754e-05, "loss": 1.9247, "step": 2810 }, { "epoch": 0.91, "learning_rate": 3.9489394221952745e-05, "loss": 1.9427, "step": 2811 }, { "epoch": 0.91, "learning_rate": 3.919718397417771e-05, "loss": 1.9143, "step": 2812 }, { "epoch": 0.91, "learning_rate": 3.8906037267718686e-05, "loss": 1.8401, "step": 2813 }, { "epoch": 0.91, "learning_rate": 3.861595442485444e-05, "loss": 1.8935, "step": 2814 }, { "epoch": 0.91, "learning_rate": 3.832693576668644e-05, "loss": 1.9562, "step": 2815 }, { "epoch": 0.91, "learning_rate": 3.803898161313812e-05, "loss": 1.9451, "step": 2816 }, { "epoch": 0.91, "learning_rate": 3.77520922829544e-05, "loss": 2.0206, "step": 2817 }, { "epoch": 0.92, "learning_rate": 3.7466268093701797e-05, "loss": 1.9718, "step": 2818 }, { "epoch": 0.92, "learning_rate": 3.718150936176756e-05, "loss": 1.9156, "step": 2819 }, { "epoch": 0.92, "learning_rate": 3.689781640235979e-05, "loss": 1.9306, "step": 2820 }, { "epoch": 0.92, "learning_rate": 3.6615189529506375e-05, "loss": 1.9077, "step": 2821 }, { "epoch": 0.92, "learning_rate": 3.633362905605564e-05, "loss": 1.9141, "step": 2822 }, { "epoch": 0.92, "learning_rate": 3.6053135293675265e-05, "loss": 1.995, "step": 2823 }, { "epoch": 0.92, "learning_rate": 3.577370855285211e-05, "loss": 2.0011, "step": 2824 }, { "epoch": 0.92, "learning_rate": 3.54953491428921e-05, "loss": 1.9562, "step": 2825 }, { "epoch": 0.92, "learning_rate": 3.521805737191941e-05, "loss": 1.9034, "step": 2826 }, { "epoch": 0.92, "learning_rate": 3.494183354687675e-05, "loss": 2.0163, "step": 2827 }, { "epoch": 0.92, "learning_rate": 3.4666677973524296e-05, "loss": 1.955, "step": 2828 }, { "epoch": 0.92, "learning_rate": 3.439259095644009e-05, "loss": 1.9377, "step": 2829 }, { "epoch": 0.92, "learning_rate": 3.411957279901934e-05, "loss": 1.9349, "step": 2830 }, { "epoch": 0.92, "learning_rate": 3.384762380347384e-05, "loss": 1.9118, "step": 2831 }, { "epoch": 0.92, "learning_rate": 3.3576744270832124e-05, "loss": 1.9248, "step": 2832 }, { "epoch": 0.92, "learning_rate": 3.33069345009388e-05, "loss": 2.0115, "step": 2833 }, { "epoch": 0.92, "learning_rate": 3.30381947924544e-05, "loss": 1.887, "step": 2834 }, { "epoch": 0.92, "learning_rate": 3.2770525442854747e-05, "loss": 1.9422, "step": 2835 }, { "epoch": 0.92, "learning_rate": 3.2503926748431275e-05, "loss": 1.952, "step": 2836 }, { "epoch": 0.92, "learning_rate": 3.2238399004289934e-05, "loss": 1.9094, "step": 2837 }, { "epoch": 0.92, "learning_rate": 3.197394250435137e-05, "loss": 1.9267, "step": 2838 }, { "epoch": 0.92, "learning_rate": 3.171055754135022e-05, "loss": 2.0696, "step": 2839 }, { "epoch": 0.92, "learning_rate": 3.144824440683536e-05, "loss": 2.0267, "step": 2840 }, { "epoch": 0.92, "learning_rate": 3.118700339116887e-05, "loss": 1.9677, "step": 2841 }, { "epoch": 0.92, "learning_rate": 3.09268347835262e-05, "loss": 1.9343, "step": 2842 }, { "epoch": 0.92, "learning_rate": 3.0667738871896e-05, "loss": 2.0085, "step": 2843 }, { "epoch": 0.92, "learning_rate": 3.0409715943078954e-05, "loss": 1.9525, "step": 2844 }, { "epoch": 0.92, "learning_rate": 3.0152766282688705e-05, "loss": 1.9444, "step": 2845 }, { "epoch": 0.92, "learning_rate": 2.9896890175150182e-05, "loss": 1.9569, "step": 2846 }, { "epoch": 0.92, "learning_rate": 2.9642087903700264e-05, "loss": 1.9472, "step": 2847 }, { "epoch": 0.92, "learning_rate": 2.938835975038745e-05, "loss": 2.0479, "step": 2848 }, { "epoch": 0.93, "learning_rate": 2.913570599607085e-05, "loss": 1.945, "step": 2849 }, { "epoch": 0.93, "learning_rate": 2.8884126920420527e-05, "loss": 2.0412, "step": 2850 }, { "epoch": 0.93, "learning_rate": 2.8633622801916727e-05, "loss": 1.958, "step": 2851 }, { "epoch": 0.93, "learning_rate": 2.8384193917850078e-05, "loss": 1.9877, "step": 2852 }, { "epoch": 0.93, "learning_rate": 2.8135840544320725e-05, "loss": 1.9885, "step": 2853 }, { "epoch": 0.93, "learning_rate": 2.788856295623854e-05, "loss": 1.9838, "step": 2854 }, { "epoch": 0.93, "learning_rate": 2.764236142732246e-05, "loss": 1.9523, "step": 2855 }, { "epoch": 0.93, "learning_rate": 2.7397236230100486e-05, "loss": 1.9038, "step": 2856 }, { "epoch": 0.93, "learning_rate": 2.7153187635908573e-05, "loss": 1.9218, "step": 2857 }, { "epoch": 0.93, "learning_rate": 2.6910215914891623e-05, "loss": 1.9446, "step": 2858 }, { "epoch": 0.93, "learning_rate": 2.666832133600239e-05, "loss": 1.9376, "step": 2859 }, { "epoch": 0.93, "learning_rate": 2.6427504167001014e-05, "loss": 1.9269, "step": 2860 }, { "epoch": 0.93, "learning_rate": 2.6187764674455383e-05, "loss": 1.9506, "step": 2861 }, { "epoch": 0.93, "learning_rate": 2.5949103123740214e-05, "loss": 1.9913, "step": 2862 }, { "epoch": 0.93, "learning_rate": 2.5711519779037186e-05, "loss": 1.9349, "step": 2863 }, { "epoch": 0.93, "learning_rate": 2.5475014903334147e-05, "loss": 1.9399, "step": 2864 }, { "epoch": 0.93, "learning_rate": 2.523958875842569e-05, "loss": 1.8954, "step": 2865 }, { "epoch": 0.93, "learning_rate": 2.500524160491191e-05, "loss": 2.0251, "step": 2866 }, { "epoch": 0.93, "learning_rate": 2.4771973702198637e-05, "loss": 1.9348, "step": 2867 }, { "epoch": 0.93, "learning_rate": 2.4539785308497098e-05, "loss": 1.9591, "step": 2868 }, { "epoch": 0.93, "learning_rate": 2.4308676680823593e-05, "loss": 1.962, "step": 2869 }, { "epoch": 0.93, "learning_rate": 2.4078648074999155e-05, "loss": 2.0041, "step": 2870 }, { "epoch": 0.93, "learning_rate": 2.384969974564932e-05, "loss": 1.9846, "step": 2871 }, { "epoch": 0.93, "learning_rate": 2.362183194620382e-05, "loss": 1.9603, "step": 2872 }, { "epoch": 0.93, "learning_rate": 2.3395044928896214e-05, "loss": 1.9369, "step": 2873 }, { "epoch": 0.93, "learning_rate": 2.3169338944764028e-05, "loss": 1.9638, "step": 2874 }, { "epoch": 0.93, "learning_rate": 2.2944714243647746e-05, "loss": 1.8804, "step": 2875 }, { "epoch": 0.93, "learning_rate": 2.272117107419136e-05, "loss": 1.9629, "step": 2876 }, { "epoch": 0.93, "learning_rate": 2.2498709683841377e-05, "loss": 1.8778, "step": 2877 }, { "epoch": 0.93, "learning_rate": 2.2277330318846823e-05, "loss": 1.9294, "step": 2878 }, { "epoch": 0.94, "learning_rate": 2.2057033224259338e-05, "loss": 1.9699, "step": 2879 }, { "epoch": 0.94, "learning_rate": 2.1837818643932196e-05, "loss": 1.9177, "step": 2880 }, { "epoch": 0.94, "learning_rate": 2.1619686820520624e-05, "loss": 1.837, "step": 2881 }, { "epoch": 0.94, "learning_rate": 2.1402637995481257e-05, "loss": 1.9743, "step": 2882 }, { "epoch": 0.94, "learning_rate": 2.1186672409071904e-05, "loss": 1.9803, "step": 2883 }, { "epoch": 0.94, "learning_rate": 2.0971790300351345e-05, "loss": 1.9381, "step": 2884 }, { "epoch": 0.94, "learning_rate": 2.0757991907178974e-05, "loss": 2.0757, "step": 2885 }, { "epoch": 0.94, "learning_rate": 2.0545277466214596e-05, "loss": 1.9608, "step": 2886 }, { "epoch": 0.94, "learning_rate": 2.0333647212918194e-05, "loss": 1.9697, "step": 2887 }, { "epoch": 0.94, "learning_rate": 2.0123101381549715e-05, "loss": 1.9546, "step": 2888 }, { "epoch": 0.94, "learning_rate": 1.9913640205168505e-05, "loss": 1.8987, "step": 2889 }, { "epoch": 0.94, "learning_rate": 1.970526391563332e-05, "loss": 1.9115, "step": 2890 }, { "epoch": 0.94, "learning_rate": 1.949797274360221e-05, "loss": 1.9835, "step": 2891 }, { "epoch": 0.94, "learning_rate": 1.929176691853196e-05, "loss": 1.9922, "step": 2892 }, { "epoch": 0.94, "learning_rate": 1.9086646668677875e-05, "loss": 1.9453, "step": 2893 }, { "epoch": 0.94, "learning_rate": 1.8882612221093886e-05, "loss": 1.9429, "step": 2894 }, { "epoch": 0.94, "learning_rate": 1.8679663801631553e-05, "loss": 1.999, "step": 2895 }, { "epoch": 0.94, "learning_rate": 1.8477801634940618e-05, "loss": 1.993, "step": 2896 }, { "epoch": 0.94, "learning_rate": 1.827702594446845e-05, "loss": 1.9524, "step": 2897 }, { "epoch": 0.94, "learning_rate": 1.80773369524595e-05, "loss": 1.8849, "step": 2898 }, { "epoch": 0.94, "learning_rate": 1.7878734879955615e-05, "loss": 1.9341, "step": 2899 }, { "epoch": 0.94, "learning_rate": 1.7681219946795168e-05, "loss": 2.0376, "step": 2900 }, { "epoch": 0.94, "learning_rate": 1.7484792371613602e-05, "loss": 1.9532, "step": 2901 }, { "epoch": 0.94, "learning_rate": 1.7289452371842317e-05, "loss": 2.0101, "step": 2902 }, { "epoch": 0.94, "learning_rate": 1.7095200163708912e-05, "loss": 1.9608, "step": 2903 }, { "epoch": 0.94, "learning_rate": 1.690203596223705e-05, "loss": 1.9451, "step": 2904 }, { "epoch": 0.94, "learning_rate": 1.6709959981245916e-05, "loss": 1.8723, "step": 2905 }, { "epoch": 0.94, "learning_rate": 1.651897243335021e-05, "loss": 2.0041, "step": 2906 }, { "epoch": 0.94, "learning_rate": 1.6329073529959715e-05, "loss": 1.9579, "step": 2907 }, { "epoch": 0.94, "learning_rate": 1.614026348127917e-05, "loss": 1.8972, "step": 2908 }, { "epoch": 0.94, "learning_rate": 1.5952542496308064e-05, "loss": 1.917, "step": 2909 }, { "epoch": 0.95, "learning_rate": 1.5765910782840286e-05, "loss": 1.9624, "step": 2910 }, { "epoch": 0.95, "learning_rate": 1.558036854746414e-05, "loss": 1.8962, "step": 2911 }, { "epoch": 0.95, "learning_rate": 1.5395915995561893e-05, "loss": 2.0248, "step": 2912 }, { "epoch": 0.95, "learning_rate": 1.5212553331309553e-05, "loss": 2.0107, "step": 2913 }, { "epoch": 0.95, "learning_rate": 1.5030280757676763e-05, "loss": 1.9053, "step": 2914 }, { "epoch": 0.95, "learning_rate": 1.4849098476426349e-05, "loss": 1.9775, "step": 2915 }, { "epoch": 0.95, "learning_rate": 1.466900668811444e-05, "loss": 2.0041, "step": 2916 }, { "epoch": 0.95, "learning_rate": 1.4490005592090017e-05, "loss": 2.0069, "step": 2917 }, { "epoch": 0.95, "learning_rate": 1.4312095386494806e-05, "loss": 1.941, "step": 2918 }, { "epoch": 0.95, "learning_rate": 1.4135276268262942e-05, "loss": 2.0025, "step": 2919 }, { "epoch": 0.95, "learning_rate": 1.3959548433120638e-05, "loss": 1.9607, "step": 2920 }, { "epoch": 0.95, "learning_rate": 1.3784912075586408e-05, "loss": 1.953, "step": 2921 }, { "epoch": 0.95, "learning_rate": 1.3611367388970285e-05, "loss": 1.9768, "step": 2922 }, { "epoch": 0.95, "learning_rate": 1.3438914565374271e-05, "loss": 1.948, "step": 2923 }, { "epoch": 0.95, "learning_rate": 1.3267553795691334e-05, "loss": 2.0325, "step": 2924 }, { "epoch": 0.95, "learning_rate": 1.3097285269606074e-05, "loss": 1.9075, "step": 2925 }, { "epoch": 0.95, "learning_rate": 1.2928109175593617e-05, "loss": 1.9279, "step": 2926 }, { "epoch": 0.95, "learning_rate": 1.2760025700920164e-05, "loss": 1.9428, "step": 2927 }, { "epoch": 0.95, "learning_rate": 1.2593035031642109e-05, "loss": 1.9207, "step": 2928 }, { "epoch": 0.95, "learning_rate": 1.2427137352606588e-05, "loss": 1.9958, "step": 2929 }, { "epoch": 0.95, "learning_rate": 1.2262332847450708e-05, "loss": 1.9147, "step": 2930 }, { "epoch": 0.95, "learning_rate": 1.209862169860132e-05, "loss": 1.9149, "step": 2931 }, { "epoch": 0.95, "learning_rate": 1.1936004087275464e-05, "loss": 1.9741, "step": 2932 }, { "epoch": 0.95, "learning_rate": 1.1774480193479265e-05, "loss": 1.966, "step": 2933 }, { "epoch": 0.95, "learning_rate": 1.1614050196008253e-05, "loss": 1.917, "step": 2934 }, { "epoch": 0.95, "learning_rate": 1.1454714272447264e-05, "loss": 1.9195, "step": 2935 }, { "epoch": 0.95, "learning_rate": 1.1296472599169993e-05, "loss": 1.9314, "step": 2936 }, { "epoch": 0.95, "learning_rate": 1.113932535133888e-05, "loss": 1.9046, "step": 2937 }, { "epoch": 0.95, "learning_rate": 1.098327270290489e-05, "loss": 1.8829, "step": 2938 }, { "epoch": 0.95, "learning_rate": 1.0828314826607511e-05, "loss": 1.9226, "step": 2939 }, { "epoch": 0.95, "learning_rate": 1.067445189397398e-05, "loss": 2.0185, "step": 2940 }, { "epoch": 0.96, "learning_rate": 1.0521684075319837e-05, "loss": 1.903, "step": 2941 }, { "epoch": 0.96, "learning_rate": 1.037001153974837e-05, "loss": 1.9517, "step": 2942 }, { "epoch": 0.96, "learning_rate": 1.02194344551505e-05, "loss": 2.0394, "step": 2943 }, { "epoch": 0.96, "learning_rate": 1.0069952988204566e-05, "loss": 1.9713, "step": 2944 }, { "epoch": 0.96, "learning_rate": 9.921567304375878e-06, "loss": 1.9695, "step": 2945 }, { "epoch": 0.96, "learning_rate": 9.774277567916934e-06, "loss": 1.9287, "step": 2946 }, { "epoch": 0.96, "learning_rate": 9.628083941867427e-06, "loss": 1.9801, "step": 2947 }, { "epoch": 0.96, "learning_rate": 9.482986588053132e-06, "loss": 2.0046, "step": 2948 }, { "epoch": 0.96, "learning_rate": 9.338985667086909e-06, "loss": 2.005, "step": 2949 }, { "epoch": 0.96, "learning_rate": 9.196081338367468e-06, "loss": 1.8992, "step": 2950 }, { "epoch": 0.96, "learning_rate": 9.054273760080057e-06, "loss": 1.946, "step": 2951 }, { "epoch": 0.96, "learning_rate": 8.913563089195554e-06, "loss": 1.9397, "step": 2952 }, { "epoch": 0.96, "learning_rate": 8.773949481470922e-06, "loss": 1.9271, "step": 2953 }, { "epoch": 0.96, "learning_rate": 8.63543309144843e-06, "loss": 2.0136, "step": 2954 }, { "epoch": 0.96, "learning_rate": 8.498014072456317e-06, "loss": 1.9567, "step": 2955 }, { "epoch": 0.96, "learning_rate": 8.361692576607572e-06, "loss": 1.9173, "step": 2956 }, { "epoch": 0.96, "learning_rate": 8.226468754800598e-06, "loss": 1.9936, "step": 2957 }, { "epoch": 0.96, "learning_rate": 8.092342756718663e-06, "loss": 1.9795, "step": 2958 }, { "epoch": 0.96, "learning_rate": 7.959314730829781e-06, "loss": 1.9711, "step": 2959 }, { "epoch": 0.96, "learning_rate": 7.827384824386719e-06, "loss": 2.006, "step": 2960 }, { "epoch": 0.96, "learning_rate": 7.696553183426658e-06, "loss": 1.9886, "step": 2961 }, { "epoch": 0.96, "learning_rate": 7.566819952770976e-06, "loss": 1.9043, "step": 2962 }, { "epoch": 0.96, "learning_rate": 7.438185276025245e-06, "loss": 1.968, "step": 2963 }, { "epoch": 0.96, "learning_rate": 7.3106492955792305e-06, "loss": 2.0478, "step": 2964 }, { "epoch": 0.96, "learning_rate": 7.184212152606007e-06, "loss": 1.9047, "step": 2965 }, { "epoch": 0.96, "learning_rate": 7.0588739870628414e-06, "loss": 1.9509, "step": 2966 }, { "epoch": 0.96, "learning_rate": 6.9346349376901985e-06, "loss": 1.9693, "step": 2967 }, { "epoch": 0.96, "learning_rate": 6.811495142011959e-06, "loss": 1.9863, "step": 2968 }, { "epoch": 0.96, "learning_rate": 6.68945473633531e-06, "loss": 1.8908, "step": 2969 }, { "epoch": 0.96, "learning_rate": 6.568513855750524e-06, "loss": 1.9813, "step": 2970 }, { "epoch": 0.96, "learning_rate": 6.4486726341304035e-06, "loss": 1.9178, "step": 2971 }, { "epoch": 0.97, "learning_rate": 6.329931204130946e-06, "loss": 1.9719, "step": 2972 }, { "epoch": 0.97, "learning_rate": 6.2122896971905655e-06, "loss": 2.0168, "step": 2973 }, { "epoch": 0.97, "learning_rate": 6.095748243530097e-06, "loss": 1.933, "step": 2974 }, { "epoch": 0.97, "learning_rate": 5.980306972152904e-06, "loss": 1.9662, "step": 2975 }, { "epoch": 0.97, "learning_rate": 5.8659660108442144e-06, "loss": 1.9866, "step": 2976 }, { "epoch": 0.97, "learning_rate": 5.752725486171562e-06, "loss": 1.9492, "step": 2977 }, { "epoch": 0.97, "learning_rate": 5.640585523484232e-06, "loss": 1.8969, "step": 2978 }, { "epoch": 0.97, "learning_rate": 5.529546246913153e-06, "loss": 1.9138, "step": 2979 }, { "epoch": 0.97, "learning_rate": 5.4196077793712276e-06, "loss": 1.9064, "step": 2980 }, { "epoch": 0.97, "learning_rate": 5.310770242552554e-06, "loss": 1.9509, "step": 2981 }, { "epoch": 0.97, "learning_rate": 5.203033756932651e-06, "loss": 1.9821, "step": 2982 }, { "epoch": 0.97, "learning_rate": 5.096398441768235e-06, "loss": 1.9687, "step": 2983 }, { "epoch": 0.97, "learning_rate": 4.990864415097107e-06, "loss": 1.9153, "step": 2984 }, { "epoch": 0.97, "learning_rate": 4.886431793737933e-06, "loss": 1.9426, "step": 2985 }, { "epoch": 0.97, "learning_rate": 4.783100693290576e-06, "loss": 2.0313, "step": 2986 }, { "epoch": 0.97, "learning_rate": 4.680871228135097e-06, "loss": 2.048, "step": 2987 }, { "epoch": 0.97, "learning_rate": 4.579743511432311e-06, "loss": 1.9879, "step": 2988 }, { "epoch": 0.97, "learning_rate": 4.479717655123783e-06, "loss": 1.9707, "step": 2989 }, { "epoch": 0.97, "learning_rate": 4.380793769930724e-06, "loss": 1.959, "step": 2990 }, { "epoch": 0.97, "learning_rate": 4.282971965355209e-06, "loss": 1.9527, "step": 2991 }, { "epoch": 0.97, "learning_rate": 4.186252349679065e-06, "loss": 1.9886, "step": 2992 }, { "epoch": 0.97, "learning_rate": 4.090635029964096e-06, "loss": 1.9049, "step": 2993 }, { "epoch": 0.97, "learning_rate": 3.99612011205197e-06, "loss": 1.9113, "step": 2994 }, { "epoch": 0.97, "learning_rate": 3.902707700564112e-06, "loss": 1.9994, "step": 2995 }, { "epoch": 0.97, "learning_rate": 3.810397898901363e-06, "loss": 1.9225, "step": 2996 }, { "epoch": 0.97, "learning_rate": 3.7191908092444324e-06, "loss": 2.0016, "step": 2997 }, { "epoch": 0.97, "learning_rate": 3.6290865325528944e-06, "loss": 1.9885, "step": 2998 }, { "epoch": 0.97, "learning_rate": 3.540085168566076e-06, "loss": 2.0084, "step": 2999 }, { "epoch": 0.97, "learning_rate": 3.452186815802172e-06, "loss": 1.9864, "step": 3000 }, { "epoch": 0.97, "learning_rate": 3.3653915715585738e-06, "loss": 1.9354, "step": 3001 }, { "epoch": 0.97, "learning_rate": 3.2796995319113175e-06, "loss": 2.0404, "step": 3002 }, { "epoch": 0.98, "learning_rate": 3.1951107917156386e-06, "loss": 1.928, "step": 3003 }, { "epoch": 0.98, "learning_rate": 3.111625444605415e-06, "loss": 1.9209, "step": 3004 }, { "epoch": 0.98, "learning_rate": 3.0292435829928353e-06, "loss": 1.9178, "step": 3005 }, { "epoch": 0.98, "learning_rate": 2.9479652980690663e-06, "loss": 1.9522, "step": 3006 }, { "epoch": 0.98, "learning_rate": 2.8677906798033617e-06, "loss": 1.8929, "step": 3007 }, { "epoch": 0.98, "learning_rate": 2.788719816943397e-06, "loss": 1.9711, "step": 3008 }, { "epoch": 0.98, "learning_rate": 2.710752797015159e-06, "loss": 1.9309, "step": 3009 }, { "epoch": 0.98, "learning_rate": 2.633889706322501e-06, "loss": 1.9784, "step": 3010 }, { "epoch": 0.98, "learning_rate": 2.558130629947586e-06, "loss": 1.982, "step": 3011 }, { "epoch": 0.98, "learning_rate": 2.483475651750333e-06, "loss": 1.9541, "step": 3012 }, { "epoch": 0.98, "learning_rate": 2.40992485436875e-06, "loss": 1.9719, "step": 3013 }, { "epoch": 0.98, "learning_rate": 2.3374783192181558e-06, "loss": 1.8735, "step": 3014 }, { "epoch": 0.98, "learning_rate": 2.2661361264919578e-06, "loss": 1.9724, "step": 3015 }, { "epoch": 0.98, "learning_rate": 2.1958983551608745e-06, "loss": 1.8659, "step": 3016 }, { "epoch": 0.98, "learning_rate": 2.126765082973159e-06, "loss": 1.96, "step": 3017 }, { "epoch": 0.98, "learning_rate": 2.058736386454596e-06, "loss": 1.9726, "step": 3018 }, { "epoch": 0.98, "learning_rate": 1.9918123409081722e-06, "loss": 1.9338, "step": 3019 }, { "epoch": 0.98, "learning_rate": 1.925993020414074e-06, "loss": 1.965, "step": 3020 }, { "epoch": 0.98, "learning_rate": 1.8612784978295772e-06, "loss": 1.9482, "step": 3021 }, { "epoch": 0.98, "learning_rate": 1.7976688447892687e-06, "loss": 1.9501, "step": 3022 }, { "epoch": 0.98, "learning_rate": 1.7351641317044918e-06, "loss": 1.9503, "step": 3023 }, { "epoch": 0.98, "learning_rate": 1.6737644277636798e-06, "loss": 1.9754, "step": 3024 }, { "epoch": 0.98, "learning_rate": 1.6134698009317994e-06, "loss": 2.0034, "step": 3025 }, { "epoch": 0.98, "learning_rate": 1.5542803179510178e-06, "loss": 1.9807, "step": 3026 }, { "epoch": 0.98, "learning_rate": 1.4961960443398148e-06, "loss": 1.9134, "step": 3027 }, { "epoch": 0.98, "learning_rate": 1.439217044393315e-06, "loss": 2.0051, "step": 3028 }, { "epoch": 0.98, "learning_rate": 1.3833433811833994e-06, "loss": 1.944, "step": 3029 }, { "epoch": 0.98, "learning_rate": 1.3285751165583726e-06, "loss": 1.9237, "step": 3030 }, { "epoch": 0.98, "learning_rate": 1.2749123111426286e-06, "loss": 1.9679, "step": 3031 }, { "epoch": 0.98, "learning_rate": 1.2223550243372073e-06, "loss": 1.9734, "step": 3032 }, { "epoch": 0.99, "learning_rate": 1.1709033143195714e-06, "loss": 2.0292, "step": 3033 }, { "epoch": 0.99, "learning_rate": 1.1205572380428298e-06, "loss": 1.9776, "step": 3034 }, { "epoch": 0.99, "learning_rate": 1.0713168512366257e-06, "loss": 1.9633, "step": 3035 }, { "epoch": 0.99, "learning_rate": 1.0231822084068033e-06, "loss": 1.9171, "step": 3036 }, { "epoch": 0.99, "learning_rate": 9.76153362834853e-07, "loss": 1.9936, "step": 3037 }, { "epoch": 0.99, "learning_rate": 9.302303665785772e-07, "loss": 1.9881, "step": 3038 }, { "epoch": 0.99, "learning_rate": 8.854132704713136e-07, "loss": 2.0263, "step": 3039 }, { "epoch": 0.99, "learning_rate": 8.417021241224899e-07, "loss": 1.9965, "step": 3040 }, { "epoch": 0.99, "learning_rate": 7.990969759174016e-07, "loss": 1.9417, "step": 3041 }, { "epoch": 0.99, "learning_rate": 7.575978730167688e-07, "loss": 2.0792, "step": 3042 }, { "epoch": 0.99, "learning_rate": 7.172048613574012e-07, "loss": 1.8986, "step": 3043 }, { "epoch": 0.99, "learning_rate": 6.779179856514217e-07, "loss": 1.9614, "step": 3044 }, { "epoch": 0.99, "learning_rate": 6.397372893865993e-07, "loss": 1.944, "step": 3045 }, { "epoch": 0.99, "learning_rate": 6.026628148263491e-07, "loss": 2.0125, "step": 3046 }, { "epoch": 0.99, "learning_rate": 5.666946030095099e-07, "loss": 1.8974, "step": 3047 }, { "epoch": 0.99, "learning_rate": 5.318326937503448e-07, "loss": 1.9212, "step": 3048 }, { "epoch": 0.99, "learning_rate": 4.980771256385408e-07, "loss": 2.0016, "step": 3049 }, { "epoch": 0.99, "learning_rate": 4.654279360392089e-07, "loss": 1.9803, "step": 3050 }, { "epoch": 0.99, "learning_rate": 4.3388516109266196e-07, "loss": 2.0401, "step": 3051 }, { "epoch": 0.99, "learning_rate": 4.0344883571452606e-07, "loss": 1.9063, "step": 3052 }, { "epoch": 0.99, "learning_rate": 3.741189935956291e-07, "loss": 1.9398, "step": 3053 }, { "epoch": 0.99, "learning_rate": 3.4589566720211185e-07, "loss": 2.0023, "step": 3054 }, { "epoch": 0.99, "learning_rate": 3.1877888777531725e-07, "loss": 1.9799, "step": 3055 }, { "epoch": 0.99, "learning_rate": 2.9276868533145706e-07, "loss": 1.9627, "step": 3056 }, { "epoch": 0.99, "learning_rate": 2.6786508866205593e-07, "loss": 1.9419, "step": 3057 }, { "epoch": 0.99, "learning_rate": 2.4406812533361855e-07, "loss": 1.9866, "step": 3058 }, { "epoch": 0.99, "learning_rate": 2.2137782168785147e-07, "loss": 1.9632, "step": 3059 }, { "epoch": 0.99, "learning_rate": 1.9979420284110815e-07, "loss": 1.9604, "step": 3060 }, { "epoch": 0.99, "learning_rate": 1.7931729268505504e-07, "loss": 1.9185, "step": 3061 }, { "epoch": 0.99, "learning_rate": 1.599471138862274e-07, "loss": 1.9139, "step": 3062 }, { "epoch": 0.99, "learning_rate": 1.416836878861405e-07, "loss": 2.0092, "step": 3063 }, { "epoch": 1.0, "learning_rate": 1.2452703490084538e-07, "loss": 1.9322, "step": 3064 }, { "epoch": 1.0, "learning_rate": 1.0847717392181711e-07, "loss": 1.8879, "step": 3065 }, { "epoch": 1.0, "learning_rate": 9.353412271495554e-08, "loss": 1.9738, "step": 3066 }, { "epoch": 1.0, "learning_rate": 7.969789782125147e-08, "loss": 1.8862, "step": 3067 }, { "epoch": 1.0, "learning_rate": 6.696851455645359e-08, "loss": 1.9788, "step": 3068 }, { "epoch": 1.0, "learning_rate": 5.534598701106841e-08, "loss": 1.8941, "step": 3069 }, { "epoch": 1.0, "learning_rate": 4.483032805047138e-08, "loss": 1.8734, "step": 3070 }, { "epoch": 1.0, "learning_rate": 3.542154931457375e-08, "loss": 1.994, "step": 3071 }, { "epoch": 1.0, "learning_rate": 2.7119661218488746e-08, "loss": 1.9776, "step": 3072 }, { "epoch": 1.0, "learning_rate": 1.9924672951643352e-08, "loss": 1.9884, "step": 3073 }, { "epoch": 1.0, "learning_rate": 1.3836592478444488e-08, "loss": 1.9403, "step": 3074 }, { "epoch": 1.0, "learning_rate": 8.85542653794591e-09, "loss": 1.9776, "step": 3075 }, { "epoch": 1.0, "learning_rate": 4.981180643959249e-09, "loss": 1.9452, "step": 3076 }, { "epoch": 1.0, "learning_rate": 2.213859085054004e-09, "loss": 1.9349, "step": 3077 }, { "epoch": 1.0, "learning_rate": 5.534649244465229e-10, "loss": 2.0038, "step": 3078 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.8175, "step": 3079 }, { "epoch": 1.0, "step": 3079, "total_flos": 2.140960418532478e+19, "train_loss": 2.030868922049599, "train_runtime": 25336.8024, "train_samples_per_second": 15.551, "train_steps_per_second": 0.122 } ], "max_steps": 3079, "num_train_epochs": 1, "total_flos": 2.140960418532478e+19, "trial_name": null, "trial_params": null }